Create table

Hi Aigles,

I need some help on this script .In the script passing 2 paramtere ,one is file name and table name but the last column not printing (highlighted with bold color)

I/P file

 cat a2.csv
ID|ID2|ID3|ID4|ID5|ID6
7666303|7666|73037341|XXX|XXXX-YYYY/Z|

Script

file=$1
table=$2
awk -F'|' '
BEGIN {
   colDefaultLength = 1;
}
NR==1 {
   for ( c=1; c<=NF; c++ ) {
      colName[c] = $c;
   }
   colCount = NF;
   if (!colName[colCount]) colCount--;
   next;
}
{
   for ( c=1; c<=NF; c++ ) {
      if (length($c) > colLength[c]) colLength[c] = length($c);
           if ( $c ~ /^ *$/ )                     continue;
           if ( $c ~ /^[0-9]+$/        )          colIsNum[c]++
      else if ( $c ~ /^[0-9]*\.[0-9]*$/ )         colIsDec[c]++
      else if ( $c ~ /^[0-9]+\/[0-9]+\/[0-9]+$/ ) colIsDat[c]++
      else                                        colIsTxt[c]++;
   }
}
END {
   print "create table tablename("
   for ( c=1; c<=colCount; c++ ) {

      if ( colName[c] == "" ) {
         if (colLength[c]+0 == 0) {
            continue;
         } else {
            colName[c] = "Column" c;
         }
      }


      if ( colLength[c]+0 == 0 ) colLength[c] = colDefaultLength;
      var2Len = colLength[c];
           if (var2Len <   50) var2Len = 50
      else if (var2Len <  100) var2Len = 100
      else if (var2Len <  500) var2Len = 500
      else if (var2Len < 1000) var2Len = 1000;

      out = "  " colName[c];
           if ( colIsTxt[c] ) out = out " varchar2(" var2Len ")"
      else if ( colIsDat[c] ) out = out " date"
      else if ( colIsDec[c] ) out = out " number(38,4)"
      else if ( colIsNum[c] ) out = out " number(" colLength[c] ")"
      else                    out = out " varchar2(" var2Len ")";
      if ( c != colCount ) out = out ",";

      print out

   }
   print ");";
}
' $file

./a.sh a2.csv tablename
create table tablename(
  ID number(7),
  ID2 number(7),
  ID3 number(8),
  ID4 varchar2(50),
  ID5 varchar2(50),
 varchar2(50)
);

Thanks in advance
MR

The script works fine for me with your input file :

create table tablename(
  ID number(7),
  ID2 number(4),
  ID3 number(8),
  ID4 varchar2(50),
  ID5 varchar2(50),
  ID6 varchar2(50)
);

If you want to parameterize the table name, modify your script like that :

file=$1
table=$2
awk -F'|' '
. . . . . .
END {
   print "create table " TableName "("
   for ( c=1; c<=colCount; c++ ) {
. . . . . .
   print ");";
}
' TableName=$2 $file

Jean-Pierre.

Hi Aigless
I am still having issue with last column ,I modified the script as you suggested.OS is linux

a5.sh 

file=$1
table=$2
awk -F'|' '
. . . . . .
END {
   print "create table " TableName "("
   for ( c=1; c<=colCount; c++ ) {
. . . . . .
   print ");";
}
' TableName=$2 $file

./a5.sh a2.csv xx
create table xx(
  ID number(7),
  ID2 number(4),
  ID3 number(8),
  ID4 varchar2(50),
  ID5 varchar2(50),
 varchar2(50)
);

Regards,
MR

For debugging purpose, modify your script (colored lines) :

file=$1
table=$2
awk -F'|' '
BEGIN {
   colDefaultLength = 1;
}
{ printf "Record %d [%s]\n", NR, $0 }
NR==1 {
   for ( c=1; c<=NF; c++ ) {
      colName[c] = $c;
   }
   colCount = NF;
if (!colName[colCount]) print "No name for last column " colCount;
   if (!colName[colCount]) colCount--;
print "Column list:"
for (c=1; c<=colCount; c++) printf "  %d [%s]\n", c, colName[c];

   next;
}
{
   for ( c=1; c<=NF; c++ ) {
      if (length($c) > colLength[c]) colLength[c] = length($c);
           if ( $c ~ /^ *$/ )                     continue;
           if ( $c ~ /^[0-9]+$/        )          colIsNum[c]++
      else if ( $c ~ /^[0-9]*\.[0-9]*$/ )         colIsDec[c]++
      else if ( $c ~ /^[0-9]+\/[0-9]+\/[0-9]+$/ ) colIsDat[c]++
      else                                        colIsTxt[c]++;
   }
}
END {
printf "END Column list:"
for (c=1; c<=colCount; c++) printf "  %d [%s] len=%d\n", c, colName[c], colLength[c];

   print "create table " TableName "("
   for ( c=1; c<=colCount; c++ ) {

      if ( colName[c] == "" ) {
         if (colLength[c]+0 == 0) {
            continue;
         } else {
            colName[c] = "Column" c;
         }
      }


      if ( colLength[c]+0 == 0 ) colLength[c] = colDefaultLength;
      var2Len = colLength[c];
           if (var2Len <   50) var2Len = 50
      else if (var2Len <  100) var2Len = 100
      else if (var2Len <  500) var2Len = 500
      else if (var2Len < 1000) var2Len = 1000;

      out = "  " colName[c];
           if ( colIsTxt[c] ) out = out " varchar2(" var2Len ")"
      else if ( colIsDat[c] ) out = out " date"
      else if ( colIsDec[c] ) out = out " number(38,4)"
      else if ( colIsNum[c] ) out = out " number(" colLength[c] ")"
      else                    out = out " varchar2(" var2Len ")";
      if ( c != colCount ) out = out ",";

      print out

   }
   print ");";
}
' TableName=$table $file

With your input data file

ID|ID2|ID3|ID4|ID5|ID6
7666303|7666|73037341|XXX|XXXX-YYYY/Z|
$ ./mohan4.sh mohan4.txt xx
Record 1 [ID|ID2|ID3|ID4|ID5|ID6]
Column list:
  1 [ID]
  2 [ID2]
  3 [ID3]
  4 [ID4]
  5 [ID5]
  6 [ID6]
Record 2 [7666303|7666|73037341|XXX|XXXX-YYYY/Z|]
END Column list:  1 [ID] len=7
  2 [ID2] len=4
  3 [ID3] len=8
  4 [ID4] len=3
  5 [ID5] len=11
  6 [ID6] len=0
create table xx(
  ID number(7),
  ID2 number(4),
  ID3 number(8),
  ID4 varchar2(50),
  ID5 varchar2(50),
  ID6 varchar2(50)
);
$ 

Execute the modified script and post the output.

Jean-Pierre.

Hi
Executed the script .Still having issue from my side.Please see the o/p


 ./a5.sh a2.csv xx
]ecord 1 [ID1|ID2|ID3|ID4|ID5|ID6
Column list:
  1 [ID1]
  2 [ID2]
  3 [ID3]
  4 [ID4]
  5 [ID5]
] 6 [ID6
]ecord 2 [7666303|7666|73037341|XXX|XXXX-YYYY/Z|
END Column list:  1 [ID1] len=7
  2 [ID2] len=4
  3 [ID3] len=8
  4 [ID4] len=3
  5 [ID5] len=11
] len=16
create table xx(
  ID1 number(7),
  ID2 number(4),
  ID3 number(8),
  ID4 varchar2(50),
  ID5 varchar2(50),
 varchar2(50)
);

Regards,
MR

There is a carriage return at the end of your input records.
Removes them or modify your script like that :

BEGIN {
   colDefaultLength = 1;
}
{ sub(/\r$/, "") }

Jean-Pierre.

Hi Aigless,

Its working fine .Thanks for your great help.

Regards,
MR

Hi
If the date format comming as yyyymmdd instead of / or - ,its treating as Number data type.Is there any way to validate the date format before setting the data type.

ID|ID2|ID3|ID4|ID5|ID6
7666303|7666|73037341|2005/12/06/|20050607|200000000

Thanks in advance,
MR

Try this new version:

file=$1
table=$2
awk -F'|' '

BEGIN {
   colDefaultLength = 1;

   dat1Re = "[0-9]+\/[0-9]+\/[0-9]+";
   dat2Re = "[12][0-9][0-9][0-9][01][0-9][0-3][0-9]";

   datRe  = "^\(" dat1Re "|" dat2Re "\)$";
   nulRe  = "^[[:space:]]*$";
   numRe  = "^[0-9]+$"; 
   decRe  = "^\([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+\)$";

}

{ sub(/\r$/, "") }

NR==1 {
   for ( c=1; c<=NF; c++ ) {
      colName[c] = $c;
   }
   colCount = NF;
   if (!colName[colCount]) colCount--;
   next;
}

{
   for ( c=1; c<=NF; c++ ) {
      if (length($c) > colLength[c]) colLength[c] = length($c);
           if ( $c ~ nulRe    )   continue;
           if ( $c ~ datRe    )   colIsDat[c]++ 
      else if ( $c ~ numRe    )   colIsNum[c]++
      else if ( $c ~ decRe    )   colIsDec[c]++
      else                        colIsTxt[c]++;
   }
}

END {
   print "create table " TableName "("
   for ( c=1; c<=colCount; c++ ) {

      if ( colName[c] == "" ) {
         if (colLength[c]+0 == 0) {
            continue;
         } else {
            colName[c] = "Column" c;
         }
      }


      if ( colLength[c]+0 == 0 ) colLength[c] = colDefaultLength;
      var2Len = colLength[c];
           if (var2Len <   50) var2Len = 50
      else if (var2Len <  100) var2Len = 100
      else if (var2Len <  500) var2Len = 500
      else if (var2Len < 1000) var2Len = 1000;

      out = "  " colName[c];
           if ( colIsTxt[c] ) out = out " varchar2(" var2Len ")"
      else if ( colIsDec[c] ) out = out " number(38,4)"
      else if ( colIsNum[c] ) out = out " number(" colLength[c] ")"
      else if ( colIsDat[c] ) out = out " date"
      else                    out = out " varchar2(" var2Len ")";
      if ( c != colCount ) out = out ",";

      print out

   }
   print ");";
}

' TableName=$table $file

Input file:

COL1_NUM|COL2_NUM|COL3_TXT|COL4_TXT|COL5_TXT|COL6_DAT|COL7_DEC|
20100708|766|XXX|XXXX-YYYY/Z||10/05/2010|.4
12345|1234|123|01/01/2001||20050607|12.8

Output:

create table (
  COL1_NUM number(8),
  COL2_NUM number(4),
  COL3_TXT varchar2(50),
  COL4_TXT varchar2(50),
  COL5_TXT varchar2(50),
  COL6_DAT date,
  COL7_DEC number(38,4)
);

Jean-Pierre.

Thanks for your reply .When executing the script ,getting the error meaage but table generated with proper data type.

awk: cmd. line:6: warning: escape sequence `\/' treated as plain `/'
awk: cmd. line:9: warning: escape sequence `\(' treated as plain `('
awk: cmd. line:9: warning: escape sequence `\)' treated as plain `)'
create table xx(col number...)


The AIX awk doesn't product any message.
These messages are specific tho gawk (GNU awk) :

Modify the script :

   dat1Re = "[0-9]+/[0-9]+/[0-9]+";
   dat2Re = "[12][0-9][0-9][0-9][01][0-9][0-3][0-9]";

   datRe  = "^(" dat1Re "|" dat2Re ")$";
   nulRe  = "^[[:space:]]*$";
   numRe  = "^[0-9]+$"; 
   decRe  = "^\([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+\)$";

Jean-Pierre.

Hi Aigles,

Its working fine.Thanks a lot

Regards,
MR