Sort a the file & refine data column & row format

ckaramsetty · July 9, 2011, 10:55am

cat file1.txt

field1 "user1":
field2:"data-cde"
field3:"data-pqr"
field4:"data-mno"
 
field1 "user1":
field2:"data-dcb"
field3:"data-mxz"
field4:"data-zul"
 
field1 "user2":
field2:"data-cqz"
field3:"data-xoq"
field4:"data-pos"

Now i need to have the date like below.
i have just given only 3 sets of data & may file contains 1000 sets of data
i need to have the data below format. Your help is higly appricated

field1 field2 field3 field4
user1 data-cde data-pqr data-mno
user1 data-dcb data-mxz data-zul
user2 data-cqz data-xoq data-pos

Thanks & Regards
Chandrasekhar K

bartus11 · July 9, 2011, 11:02am

Try:

awk -F"[ :]" -vRS= 'BEGIN{print "field1 field2 field3 field4"}{gsub("\"","");gsub("\n"," ");print $2,$5,$7,$9}' file

birei · July 9, 2011, 3:58pm

Hi,

Using 'perl':

$ cat script.pl
use warnings;                                                                                                                                                                       
use strict;                                                                                                                                                                         
                                                                                                                                                                                    
@ARGV == 1 or die "Usage: perl $0 <input-file>\n";                                                                                                                                  
                                                                                                                                                                                    
my %field;                                                                                                                                                                          
my $printed_header = 0;                                                                                                                                                             
                                                                                                                                                                                    
while ( <> ) {                                                                                                                                                                      
        if ( /^\s*$/ ) {                                                                                                                                                            
        ## When found a blank line print data saved previously.                                                                                                                     
                                                                                                                                                                                    
                ## Print header once in the program.                                                                                                                                
                unless ( $printed_header ) {                                                                                                                                        
                        print_header();                                                                                                                                             
                        $printed_header = 1;                                                                                                                                        
                }                                                                                                                                                                   
                                                                                                                                                                                    
                print_data();                                                                                                                                                       
                                                                                                                                                                                    
        } else {                                                                                                                                                                    
        ## Data found, save it in a hash.                                                                                                                                           
                chomp;                                                                                                                                                              
                                                                                                                                                                                    
                ## $f -> field name.                                                                                                                                                
                ## $d -> data.                                                                                                                                                      
                my ($f,$d);                                                                                                                                                         
                                                                                                                                                                                    
                if ( /^field\d+:/ ) {                                                                                                                                               
                ## All fields but first one.                                                                                                                                        
                        ($f,$d) = split /:/;                                                                                                                                        
                } else {                                                                                                                                                            
                ## Field 1.                                                                                                                                                         
                        ($f,$d) = split;                                                                                                                                            
                        $d =~ s/:\s*$//;                                                                                                                                            
                }                                                                                                                                                                   
                $d =~ tr/"//d;                                                                                                                                                      
                $field{ $f } = $d;                                                                                                                                                  
        }                                                                                                                                                                           
}                                                                                                                                                                                   
                                                                                                                                                                                    
print_data();                                                                                                                                                                       
                                                                                                                                                                                    
sub print_data {                                                                                                                                                                    
        for my $key ( sort keys %field ) {                                                                                                                                          
                printf "%s ", $field{ $key };                                                                                                                                       
        }                                                                                                                                                                           
        print "\n";                                                                                                                                                                 

}

sub print_header {
        for my $key ( sort keys %field ) {
                printf "%s ", $key;
        }
        print "\n";
}
$ perl script.pl infile
field1 field2 field3 field4 
user1 data-cde data-pqr data-mno 
user1 data-dcb data-mxz data-zul 
user2 data-cqz data-xoq data-pos

Regards,
Birei

ltomuno · July 10, 2011, 12:58am

awk -F'field[1-4]' -v RS= 'BEGIN{print "field1\tfield2\tfield3\tfield4"}{gsub("\"","");gsub(":","")gsub("\n"," ");sub(" ","",$2);print $2 $3 $4 $5 $6}' file1.txt

drl · July 10, 2011, 8:17am

Hi.

Using the regularity of the posted data with common utilities:

#!/usr/bin/env bash

# @(#) s1	Demonstrate flattening of fields with sed.

# Utility functions: print-as-echo, print-line-with-visual-space, debug.
# export PATH="/usr/local/bin:/usr/bin:/bin"
pe() { for _i;do printf "%s" "$_i";done; printf "\n"; }
pl() { pe;pe "-----" ;pe "$*"; }
db() { ( printf " db, ";for _i;do printf "%s" "$_i";done;printf "\n" ) >&2 ; }
db() { : ; }
C=$HOME/bin/context && [ -f $C ] && $C sed paste

FILE=${1-data1}

pl " Input data file $FILE:"
cat $FILE

pl " Results:"
echo "field1 field2 field3 field4"
sed -e '/^[ 	]*$/d' -e 's/field..//' -e 's/[":]//g' $FILE |
paste -d" " - - - -

exit 0

producing:

% ./s1

Environment: LC_ALL = C, LANG = C
(Versions displayed with local utility "version")
OS, ker|rel, machine: Linux, 2.6.26-2-amd64, x86_64
Distribution        : Debian GNU/Linux 5.0.8 (lenny) 
GNU bash 3.2.39
GNU sed version 4.1.5
paste (GNU coreutils) 6.10

-----
 Input data file data1:
field1 "user1":
field2:"data-cde"
field3:"data-pqr"
field4:"data-mno"
 
field1 "user1":
field2:"data-dcb"
field3:"data-mxz"
field4:"data-zul"
 
field1 "user2":
field2:"data-cqz"
field3:"data-xoq"
field4:"data-pos"

-----
 Results:
field1 field2 field3 field4
user1 data-cde data-pqr data-mno
user1 data-dcb data-mxz data-zul
user2 data-cqz data-xoq data-pos

See man pages for details ... cheers, drl

( edit 1: minor typo )

ckaramsetty · August 4, 2011, 4:00am

My actual data looks like below

i have given only format. i can't give exact data format of my requirement due to some reasons. I this set of data lines about 5000

I need to come up with information in below

exact format of my data set :
Line<space>Number1<space>"somedata":
LineNumber2:"somedata"
LineNumber3:"somedata"
LineNumber4:"somedata"

------------------------------------

ab cd "somedata1":
efgh:"somedata2"
ijkl:"somedata3"
monp:"somedata4"

ab cd "somedata5":
efgh:"somedata6"
ijkl:"somedata7"
monp:"somedata"
i need to get the ouput as

  abcd   efgh   ijkl   monop

somedata1 somedata2 somedata3 somedata4
somedata5 somedata6 somedata7 somedata8

I would happy if you i get some script which can give this info.

from my file i will need to get the 4000 lines of data.

Thanks in adavance.

Chubler_XL · August 5, 2011, 12:06am

How about this awk script:

awk '
NF==0 {
  if(keystr) print substr(keystr,2);
  print substr(vals,2);
  vals=keystr=""
  next
}
{
    gsub(/[\":]/," ",$0);
    key=$1;
    for(i=2;i<NF;i++) key=key$i;
    if(!(key in have)) {
        keystr=keystr" "key
        have[key]
    }
    vals=vals" "$i }
END { print substr(vals,2) }' infile

ckaramsetty · September 8, 2011, 6:50am

Firstly i have used the perl command to modified the file.
And then the awk command

------------------------------------------

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

cat file1.txt

field1 data "user1":
field2:"data-cde"
field3:"data-pqr"
field4:"data-mno"

field1 data "user1":
field2:"data-dcb"
field3:"data-mxz"
field4:"data-zul"

field1 data "user2":
field2:"data-cqz"
field3:"data-xoq"
field4:"data-pos"

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Used the perl command and modified the file1.txt data

perl -pi -e "s/field1 data /field1_data:g" file1.txt

so, now modified file1.txt

cat file1.txt
field1_data:"user1":
field2:"data-cde"
field3:"data-pqr"
field4:"data-mno"

field1_data:"user1":
field2:"data-dcb"
field3:"data-mxz"
field4:"data-zul"

field1_data:"user2":
field2:"data-cqz"
field3:"data-xoq"
field4:"data-pos"

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

then use the awk script to get sorted info

cat file1.txt | Sort_a_File.awk > Sorted_Info.txt

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

cat Sort_a_File.awk
set -x

function Extract_Arg(line) {
# -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
# Pull the argument after the equal sign
# -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
n = split(line, a, ":")
sub(/;$/, "", a[2])

return a[2]
}

# -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
# MAIN BODY
# -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
BEGIN {

tb = "\t" # handy tab

}

{
currentline = $0
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Check for each data value that we want to save
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (index(currentline, "field1_data:") > 0) {
col1 = Extract_Arg(currentline)
next
}

if (index(currentline, "field2:") > 0) {
col2 = Extract_Arg(currentline)
next
}

if (index(currentline, "field3:") > 0) {
col3 = Extract_Arg(currentline)
next
}

if (index(currentline, "field4:") > 0) {
col4 = Extract_Arg(currentline)
next
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Output the data line
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (index(currentline, "field4") == 0) {
print col1 " " col2 " " col3 " " col4 " "
}

}

field4 {
}

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

cat Sorted_Info.txt

user1 data-cde data-pqr data-mno
user1 data-dcb data-mxz data-zul
user2 data-cqz data-xoq data-pos

above file does not contains the header...
you may append the header before sorting the data.

Hope this helps.

Regards
Chandrasekhar K