Replace floating-point by integer in awk

smadonald1 · January 31, 2008, 6:07am

Hi,
I am trying to write a script to extract multiple sets of data from a chemistry output file. The problem section is in the following format...

Geometry "geometry" -> "geometry"
1 Pd 46.0000 -0.19290971 0.00535260 0.02297606
2 P 15.0000 -0.14710910 -2.36148276 0.03834032
3 P 15.0000 -0.15274409 2.36634164 -0.06817369
4 C-t 6.0000 -2.28301257 -0.04175538 0.06012368
5 H-t 1.0000 -2.64711929 -0.61127738 -0.80775979
6 H-t 1.0000 -2.59746921 -0.53769963 0.98862569
7 H-t 1.0000 -2.72455591 0.96040010 0.02865251
8 O-t 8.0000 2.07206484 -0.64945821 -0.99276006
9 H-t 1.0000 2.35209406 -0.10074612 -1.74408343
10 H-t 1.0000 2.22690284 -0.00778417 0.03690656
11 O-t 8.0000 2.04896854 0.63726649 1.06014962
12 H-t 1.0000 2.29703540 0.08837100 1.82240885
Atomic Mass

My problem...
I need a way to convert the 3rd column (the "integer" in floating-point format) into just an integer BEFORE the print command so I can then treat these integers with an array from a split function.

Is there a way of doing this in my script...

#! /usr/bin/awk -f

#create array to convert integers to element symbols
# 1=H, 2=He, etc.
BEGIN {
i = 1;e = 1;
atomlist = "H ,He,Li,Be,B ,C ,N ,O ,F ,Ne,Na,Mg,Al,Si,P ,S ,Cl,Ar,K ,Ca,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Ga,Ge,As,Se,Br,Kr,Rb,Sr,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,In,Sn,Sb,Te,I ,Xe,Cs,Ba,La,Ce,Pr,Nd,Pm,Sm,Eu,Gd,Tb,Dy,Ho,Er,Tm,Yb,Lu,Hf,Ta,W,Re,Os,Ir,Pt,Au,Hg,Tl,Pb,Bi,Po,At,Rn,Fr,Ra,Ac,Th,Pa,U,Np,Pu,Am,Cm,Bk,Cf,Es,Fm,Md,No,Lr,Rf,Db,Sg,Bh,Hs,Mt";
split(atomlist,PSE,",")
}

# find out number of lines of data (natoms)
/Geometry "geometry" -> "geometry"/ {natoms=0;++steps}

# extract energy value for each set
/^@/ {
if ( $2 ~ /[0-9]+/ ) {
E[e++]=$3
}
}

# exctract only columns 3-6 from the set
# convert $3 from floating-point to integer
/Geometry "geometry" -> "geometry"/,/Atomic Mass/ {
if ( $1 ~ /[0-9]+/ ) {
++natoms;
S [i]= $3; <-- problem line here
X [i]= $4;
Y [i]= $5;
Z[i\+\+] = $6
}
}

END {
i=1;k=1; j=natoms;
do {
if (j==natoms) {z++;printf("%i\n%22.8f\n",natoms,E[i\+\+]);j=0};
printf("%s %8.6f %8.6f %8.6f",PSE[S[k]],X[k],Y[k],Z[k]);
printf("\n");
++k;++j}
while (k <= steps*natoms)
;
}

At the moment everything works except for that one column of data, as treating the floating-point with the array produces no result.

Any help would be fantastic!

Tytalus · January 31, 2008, 6:21am

printf should help. e.g.

 # echo  2.52690284 | awk '{printf "%.0f",$1}'
3

radoulov · January 31, 2008, 6:27am

Could you post an example of the desired result?

vgersh99 · January 31, 2008, 6:30am

S = int($3)

smadonald1 · January 31, 2008, 6:31am

Thanks,
Unfortunately, if i printf at this stage it will go straight to output and I need all the data in a specific format, not just that one column.

Remember also that this integer will eventually end up converted to a string by the array...

I need a way without using printf

smadonald1 · January 31, 2008, 6:35am

Thanks vgersh99 !!!

The S [i]= int($3) command worked a charm!

the output is perfect...

36
-572.44987938
Pd -0.212918 0.004511 0.022713
P -0.152320 -2.360683 0.037839
P -0.157447 2.366475 -0.066462
C -2.300322 -0.042021 0.060348
H -2.660893 -0.611970 -0.808329
H -2.613078 -0.538682 0.988993
H -2.739315 0.960898 0.028249
O 2.117646 -0.659949 -0.986743
H 2.366548 -0.108745 -1.747134
H 2.271505 -0.009514 0.035132
O 2.092218 0.645113 1.053500
H 2.310793 0.094472 1.823721
C 1.370822 -3.099086 0.827188
H 1.386621 -2.849606 1.898103
H 2.242302 -2.652521 0.329789
H 1.377317 -4.194074 0.706847
C -0.117670 -3.069906 -1.689405
H 0.749056 -2.628859 -2.199087
H -0.033101 -4.168041 -1.668271
H -1.037221 -2.783124 -2.220210
C -1.540751 -3.321685 0.853492
H -1.384633 -4.405730 0.739767
H -2.503214 -3.046919 0.398931
H -1.577146 -3.072302 1.924305
C -0.226934 3.158669 1.623556
H 0.602118 2.742269 2.211242
H -0.133838 4.254254 1.555691
H -1.179894 2.900727 2.108129
C -1.476324 3.302192 -1.019132
H -2.472726 3.070857 -0.616182
H -1.306484 4.388200 -0.953136
H -1.448164 2.993615 -2.074466
C 1.418810 3.057762 -0.791369
H 2.253202 2.621749 -0.225615
H 1.430697 4.156742 -0.716978
H 1.494764 2.762780 -1.848113
36
-572.44987938
Pd -0.212918 0.004511 0.022713
P -0.152320 -2.360683 0.037839
P -0.157447 2.366475 -0.066462
C -2.300322 -0.042021 0.060348
H -2.660893 -0.611970 -0.808329
H -2.613078 -0.538682 0.988993
H -2.739315 0.960898 0.028249
O 2.117646 -0.659949 -0.986743
H 2.366548 -0.108745 -1.747134
H 2.271505 -0.009514 0.035132
O 2.092218 0.645113 1.053500
H 2.310793 0.094472 1.823721
C 1.370822 -3.099086 0.827188
H 1.386621 -2.849606 1.898103
H 2.242302 -2.652521 0.329789
H 1.377317 -4.194074 0.706847
C -0.117670 -3.069906 -1.689405
H 0.749056 -2.628859 -2.199087
H -0.033101 -4.168041 -1.668271
H -1.037221 -2.783124 -2.220210
C -1.540751 -3.321685 0.853492
H -1.384633 -4.405730 0.739767
H -2.503214 -3.046919 0.398931
H -1.577146 -3.072302 1.924305
C -0.226934 3.158669 1.623556
H 0.602118 2.742269 2.211242
H -0.133838 4.254254 1.555691
H -1.179894 2.900727 2.108129
C -1.476324 3.302192 -1.019132
H -2.472726 3.070857 -0.616182
H -1.306484 4.388200 -0.953136
H -1.448164 2.993615 -2.074466
C 1.418810 3.057762 -0.791369
H 2.253202 2.621749 -0.225615
H 1.430697 4.156742 -0.716978
H 1.494764 2.762780 -1.848113

etc...

Thanks again

radoulov · January 31, 2008, 6:36am

As far as this part is concerned:

$ cat file
Geometry "geometry" -> "geometry"
1 Pd 46.0000 -0.19290971 0.00535260 0.02297606
2 P 15.0000 -0.14710910 -2.36148276 0.03834032
3 P 15.0000 -0.15274409 2.36634164 -0.06817369
4 C-t 6.0000 -2.28301257 -0.04175538 0.06012368
5 H-t 1.0000 -2.64711929 -0.61127738 -0.80775979
6 H-t 1.0000 -2.59746921 -0.53769963 0.98862569
7 H-t 1.0000 -2.72455591 0.96040010 0.02865251
8 O-t 8.0000 2.07206484 -0.64945821 -0.99276006
9 H-t 1.0000 2.35209406 -0.10074612 -1.74408343
10 H-t 1.0000 2.22690284 -0.00778417 0.03690656
11 O-t 8.0000 2.04896854 0.63726649 1.06014962
12 H-t 1.0000 2.29703540 0.08837100 1.82240885
Atomic Mass
$ nawk 'NR == 1 {
split("H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, Mn, Fe, Co, Ni, Cu, Zn, Ga, Ge, As, Se, Br, Kr, Rb, Sr, Y, Zr, Nb, Mo, Tc, Ru, Rh, Pd, Ag, Cd, In, Sn, Sb, Te, I ,Xe, Cs, Ba, La, Ce, Pr, Nd, Pm, Sm, Eu, Gd, Tb, Dy, Ho, Er, Tm, Yb, Lu, Hf, Ta, W, Re, Os, Ir, Pt, Au, Hg, Tl, Pb, Bi, Po, At, Rn, Fr, Ra, Ac, Th, Pa, U, Np, Pu, Am, Cm, Bk, Cf, Es, Fm, Md, No, Lr, Rf, Db, Sg, Bh, Hs, Mt",pse,", ") }
{ $3 = int($3) in pse ? pse[int($3)] : $3
}1' file
Geometry "geometry" -> "geometry"
1 Pd Pd -0.19290971 0.00535260 0.02297606
2 P P -0.14710910 -2.36148276 0.03834032
3 P P -0.15274409 2.36634164 -0.06817369
4 C-t C -2.28301257 -0.04175538 0.06012368
5 H-t H -2.64711929 -0.61127738 -0.80775979
6 H-t H -2.59746921 -0.53769963 0.98862569
7 H-t H -2.72455591 0.96040010 0.02865251
8 O-t O 2.07206484 -0.64945821 -0.99276006
9 H-t H 2.35209406 -0.10074612 -1.74408343
10 H-t H 2.22690284 -0.00778417 0.03690656
11 O-t O 2.04896854 0.63726649 1.06014962
12 H-t H 2.29703540 0.08837100 1.82240885
Atomic Mass

fpmurphy · January 31, 2008, 6:39am

The following shortened version of your script should show you how to do it:

#!/usr/bin/awk -f

BEGIN {
    i = 1;
    atomlist = "H ,He,Li,Be,B ,C ,N ,O ,F ,Ne,Na,Mg,Al,Si,P ,S ,Cl,Ar,K ,Ca,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Ga,Ge,As,Se,Br,Kr,Rb,Sr,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,In
,Sn,Sb,Te,I,Xe,Cs,Ba,La,Ce,Pr,Nd,Pm,Sm,Eu,Gd,Tb,Dy,Ho,Er,Tm,Yb,Lu,Hf,Ta,W,Re,Os,Ir,Pt,Au,Hg,Tl,Pb,Bi,Po,At,Rn,F r,Ra,Ac,Th,Pa,U,Np,Pu,Am,Cm,Bk,Cf,Es,Fm,Md,No,Lr,Rf,Db,Sg,Bh,Hs,Mt";
    split(atomlist,PSE,",")
}

/Geometry "geometry" -> "geometry"/,/Atomic Mass/ {
     if ( $1 ~ /[0-9]+/ ) {
         S = $3;
         x = $3"abc";        # convert to string
         y = x + 0;           # convert to integer
         printf("%s %d\n",PSE[y], y);
     }
}

~
~