how to read strings from a txt and put them into a spreadsheet?

i have hundreds of thousands of txt files as below,

         RADARSAT 1 SCENE DESCRIPTION
                    
SCENE_ID           c0005098
MDA ORDER NUMBER   
GEOGRAPHICAL AREA  CIS ScanSar Canada
SCENE START TIME   APR 02 1997 23:05:10.222
SCENE STOP TIME    APR 02 1997 23:02:49.695
ORBIT              7366 ASCENDING
ORBIT DATA TYPE    PREDICTED
APPLICATION LUT    ICE
BEAM MODE          (A) W1 W2 W3 S7
PRODUCT TYPE       SCANSAR WIDE (SCW)
FORMAT             RADARSAT CEOS
# OF IMAGE LINES   9280
# OF IMAGE PIXELS  5504
PIXEL SPACING      100.000 m
SCENE CENTRE       63�17'N   82�32'W
CORNER COORDINATES:
                    
       58�39'34.57" N      59�35'47.62" N
       85�27'10.00" W      76�00'25.94" W
  
       66�42'13.92" N      67�47'07.99" N
       90�46'18.26" W      78�18'44.20" W
  
************************* IMPORTANT *********************
ALL DESCENDING SCANSAR PRODUCTS ARE EAST-WEST INVERTED
For information on RADARSAT CEOS format see README.TXT
(c) Canadian Space Agency 1997 Agence spatiale canadienne. Processed and distributed
by MDA Geospatial Services Inc.

I'm trying to read information from these txts, such as, scene_id, coordinates, formats, etc. and put these information into a spreadsheet.

The problem is these information are in different formats. for example, scene_id is strings, MDA ORDER NUMBER is Null, start time and stop time is date and time format, etc. The most troubling problem is how to read the coordinates and keep them as coordinates in the spreadsheet without changing them into strings, especially the degree simpols.

The spreadsheet would look like this:

SCENE_ID	MDA ORDER NUMBER	GEOGRAPHICAL AREA	START DATE	START TIME	STOP TIME	ORBIT	etc......
c0005098	Null			CIS ScanSar Canada	1997-04-02	23:05:10	23:02:50	7366 ASCENDING	etc....

Thanks so much!

Hi sunnydanniel,

Put here the spreadsheet you want as output. It will be helpful for anyone who would like to help you.

Regards,
Birei

1 Like

That's a pretty ugly input file, unless it was originally tab-separated and ruined by copy-pasting it into a web postbox, but:

$ cat radar.awk

BEGIN {
        OFS=","
        # excel expects \r\n terminators for csv, etc
        ORS="\r\n"
        A[1]="SCENE_ID"
        A[2]="GEOGRAPHICAL AREA"
        A[3]="SCENE START TIME"
        A[4]="SCENE STOP TIME"
        A[5]="ORBIT DATA TYPE"
        A[6]="ORBIT"
        A[7]="APPLICATION LUT"
        A[8]="BEAM MODE"
        A[9]="PRODUCT TYPE"
        A[10]="FORMAT"
        A[11]="# OF IMAGE LINES"
        A[12]="# OF IMAGE PIXELS"
        A[13]="PIXEL SPACING"
        A[14]="SCENE CENTRE"
}

{       for(N=1; A[N]; N++)
        {
                R="^" A[N];
                if($0 ~ R)
                {
                        sub(R, "");
                        sub(/^[ \t]*/, "");
                        D[N]=$0
                        break
                }
        }
}

/^CORNER COORDINATES:/ {

        for(M=1; M<=2; M++)
        {
                getline

                for(N=1; N<=2; N++)
                {
                        getline
                        B[++L]=$1" "$2;
                        B[++L]=$3" "$4;
                }
        }
}

END {
        for(N=1; A[N]; N++)     $N=A[N];
        for(M=1; M<=L; M++)     $(N++)="Corner "M

        print;

        for(N=1; A[N]; N++)
        {
                if(!D[N]) D[N]="Null";
                $N=D[N];
        }

        for(M=1; M<=L; M++)     $(N++)=B[M];
        print
}

$ awk -f radar.awk data
SCENE_ID,GEOGRAPHICAL AREA,SCENE START TIME,SCENE STOP TIME,ORBIT DATA TYPE,ORBIT,APPLICATION LUT,BEAM MODE,PRODUCT TYPE,FORMAT,# OF IMAGE LINES,# OF IMAGE PIXELS,PIXEL SPACING,SCENE CENTRE,Corner 1,Corner 2,Corner 3,Corner 4,Corner 5,Corner 6,Corner 7,Corner 8
c0005098,CIS ScanSar Canada,APR 02 1997 23:05:10.222,APR 02 1997 23:02:49.695,PREDICTED,7366 ASCENDING,ICE,(A) W1 W2 W3 S7,SCANSAR WIDE (SCW),RADARSAT CEOS,9280,5504,100.000 m,63�17'N   82�32'W,58�39'34.57" N,59�35'47.62" N,85�27'10.00" W,76�00'25.94" W,66�42'13.92" N,67�47'07.99" N,90�46'18.26" W,78�18'44.20" W

$

It was able to import into Excel with degree symbols intact. Adjust OFS to taste.

1 Like

Thank you so much for your answer!
But i have some questions about your script.
The first line,

cat radar.awk

do you save the file as radar.awk or it has something to do with the awk command?
after that, i can understand your loop, but is it in perl? i tried copy paste and pops a number of errors.
and how the data is stored into a new file or only a change to the original text file?

thanks again!

Like shown, it's a textfile named radar.awk

It's not perl, it's awk.

You run it as shown,

awk -f radar.awk filename

The data is printed to the screen, as shown.

awk -f radar.awk filename

would print

SCENE_ID,GEOGRAPHICAL AREA,SCENE START TIME,SCENE STOP TIME,ORBIT DATA TYPE,ORBIT,APPLICATION LUT,BEAM MODE,PRODUCT TYPE,FORMAT,# OF IMAGE LINES,# OF IMAGE PIXELS,PIXEL SPACING,SCENE CENTRE,Corner 1,Corner 2,Corner 3,Corner 4,Corner 5,Corner 6,Corner 7,Corner 8
c0005098,CIS ScanSar Canada,APR 02 1997 23:05:10.222,APR 02 1997 23:02:49.695,PREDICTED,7366 ASCENDING,ICE,(A) W1 W2 W3 S7,SCANSAR WIDE (SCW),RADARSAT CEOS,9280,5504,100.000 m,63�17'N   82�32'W,58�39'34.57" N,59�35'47.62" N,85�27'10.00" W,76�00'25.94" W,66�42'13.92" N,67�47'07.99" N,90�46'18.26" W,78�18'44.20" W

---------- Post updated at 01:07 PM ---------- Previous update was at 01:03 PM ----------

Since you say you have millions of files, I made an improved version:

# radar2.awk

BEGIN {
        OFS=","
        # excel expects \r\n terminators for csv, etc
        ORS="\r\n"
        A[1]="SCENE_ID"
        A[2]="MDA ORDER NUMBER"
        A[3]="GEOGRAPHICAL AREA"
        A[4]="SCENE START TIME"
        A[5]="SCENE STOP TIME"
        A[6]="ORBIT"
        A[7]="ORBIT DATA TYPE"
        A[8]="APPLICATION LUT"
        A[9]="BEAM MODE"
        A[10]="PRODUCT TYPE"
        A[11]="FORMAT"
        A[12]="# OF IMAGE LINES"
        A[13]="# OF IMAGE PIXELS"
        A[14]="PIXEL SPACING"
        A[15]="SCENE CENTRE"
}

function printline()
{
        OLD=$0

        for(N=1; A[N]; N++)
        {
                if(!D[N]) D[N]="Null";
                $N=D[N];
        }

        for(M=1; M<=L; M++)     $(N++)=B[M];
        print

        $0=OLD
}

# Clear out old data whe files change
(FNR == 1) && (NR != 1) {
        printline()
        for(N=1; A[N]; N++) D[N]=""
}

/^CORNER COORDINATES:/ {
        L=0
        for(M=1; M<=2; M++)
        {
                getline

                if(FNR==1) next;

                for(N=1; N<=2; N++)
                {
                        getline
                        if(FNR==1) next;
                        B[++L]=$1" "$2;
                        B[++L]=$3" "$4;
                }
        }
}

# Check lines for data
(FNR>2) {
        N=FNR-2
        R="^" A[N];
        if($0 ~ R)
        {
                sub(R, "");
                sub(/^[ \t]*/, "");
                D[N]=$0
        }
        else print "!!!FAIL!!!", FNR, R, $0;
}

END {
        if(NR < 1)
        {
                for(N=1; A[N]; N++)     $N=A[N];
                for(M=1; M<=8; M++)     $(N++)="Corner "M;
                print;
        }
        else
                printline();
}
awk -f radar2.awk file1 file2 ...

would print

c0005098,CIS ScanSar Canada,APR 02 1997 23:05:10.222,APR 02 1997 23:02:49.695,PREDICTED,7366 ASCENDING,ICE,(A) W1 W2 W3 S7,SCANSAR WIDE (SCW),RADARSAT CEOS,9280,5504,100.000 m,63�17'N   82�32'W,58�39'34.57" N,59�35'47.62" N,85�27'10.00" W,76�00'25.94" W,66�42'13.92" N,67�47'07.99" N,90�46'18.26" W,78�18'44.20" W
line from file2
line from file3
...

...all this assuming the lines come in the exact same order in every sat file. If this isn't true, it may break down.

1 Like

it works great!!!

thank you!