Numbering by field

I'm not really sure how to explain this but I will try. In the attached file if $4=$4 and $5="-" then the last record is 1 and the one above that is 2, etc...

However, $4=$4 and $5="-" then the first record is 1 and the one below that is 2, etc...

"-" example:

chr10 90694830 90695123 ACTA2 - 10
chr10 90697817 90697999 ACTA2 - 9
chr10 90699263 90699455 ACTA2 - 8
chr10 90700985 90701147 ACTA2 - 7
chr10 90701541 90701626 ACTA2 - 6
chr10 90703553 90703664 ACTA2 - 5
chr10 90707014 90707143 ACTA2 - 4
chr10 90708558 90708710 ACTA2 - 3
chr10 90712487 90712580 ACTA2 - 2
chr10 90750695 90751147 ACTA2 - 1 
 "+" example
chr11 119076985 119077322 CBL + 1
chr11 119103157 119103405 CBL + 2
chr11 119142444 119142591 CBL + 3
chr11 119144577 119144734 CBL + 4
chr11 119145541 119145663 CBL + 5
chr11 119146706 119146844 CBL + 6
chr11 119148466 119148554 CBL + 7
chr11 119148875 119149007 CBL + 8
chr11 119149219 119149423 CBL + 9
chr11 119155678 119155810 CBL + 10
chr11 119155898 119156276 CBL + 11
chr11 119158561 119158656 CBL + 12
chr11 119167627 119167744 CBL + 13
chr11 119168093 119168191 CBL + 14
chr11 119169067 119169250 CBL + 15
chr11 119170204 119178859 CBL + 16 

Thank you :).

You said

However, $4=$4 and $5="-"

But, did you mean

However, $4=$4 and $5="+"

Yes, you are correct. Thank you very much :slight_smile: and I apologize for any confusion.


awk 'BEGIN { prev="";cur="";cursign="";prevsign="";}
{
if($5!="")
cursign=$5;
cur=$4;
#print "prev =" prev " cur= " cur " cursign= " cursign " prevsign=" prevsign

if($5=="-")
        {
        if(cur!=prev)
                {
		if(prevsign=="+" || prevsign=="")
			{
			i=1;	
			a=$0
			}
		else
			{		
			k=i;
			for(j=1;j<=k;j++)
				{
				print a[j] " " i
				i=i-1;
				}
	                i=1;
			a=$0
                	}
		}
        else
                {
		i=i+1
		a=$0
                }
		prev=cur;
        }
if($5=="+")
        {
        if(cur!=prev)
                {
		if(prevsign=="-" || prevsign=="")
	  		{
			k=i;
			for(j=1;j<=k;j++)
				{
				print a[j] " " i
				i=i-1;
				}
			i=1;
                	print $0 " " i
			}	
		else
			{
                	i=1;
                	print $0 " " i
                	}
		}
        else
                {
                i=i+1
                print $0 " " i
                }
prev=cur;
	}
prevsign=$5;
}
END { 
if(cursign=="-")
	{
			k=i;
			for(j=1;j<=k;j++)
				{
				print a[j] " " i
				i=i-1;
				}
	}
}
' input.txt

I seem to be getting the below error with the code. Thank you :).

awk -f exon.awk
awk: exon.awk:2: awk 'BEGIN { prev="";cur="";cursign="";prevsign="";}
awk: exon.awk:2: ^ invalid char ''' in expression
awk: exon.awk:2: awk 'BEGIN { prev="";cur="";cursign="";prevsign="";}
awk: exon.awk:2: ^ syntax error 

exon.awk

 #!/bin/awk -f
awk 'BEGIN { prev="";cur="";cursign="";prevsign="";}
{
if($5!="")
cursign=$5;
cur=$4;
#print "prev =" prev " cur= " cur " cursign= " cursign " prevsign=" prevsign
if($5=="-")
        {
        if(cur!=prev)
                {
  if(prevsign=="+" || prevsign=="")
   {
   i=1; 
   a=$0
   }
  else
   {  
   k=i;
   for(j=1;j<=k;j++)
    {
    print a[j] " " i
    i=i-1;
    }
                 i=1;
   a=$0
                 }
  }
        else
                {
  i=i+1
  a=$0
                }
  prev=cur;
        }
if($5=="+")
        {
        if(cur!=prev)
                {
  if(prevsign=="-" || prevsign=="")
     {
   k=i;
   for(j=1;j<=k;j++)
    {
    print a[j] " " i
    i=i-1;
    }
   i=1;
                 print $0 " " i
   } 
  else
   {
                 i=1;
                 print $0 " " i
                 }
  }
        else
                {
                i=i+1
                print $0 " " i
                }
prev=cur;
 }
prevsign=$5;
}
END { 
if(cursign=="-")
 {
   k=i;
   for(j=1;j<=k;j++)
    {
    print a[j] " " i
    i=i-1;
    }
 }
}
' output.txt > exon_number.txt 

It has to do with the way you are executing the awk. if you run the awk on command line it is working fine. I am not expert either on how to run it by placing the code in a file. May be some experts here can suggest on how to run it !

Yes your code runs fine ysrv1..
Some users read the man pages before trying anything, some dont and invent impossible syntax...

To get your code to run as a file for awk meaning awk will be using awk -f awkprgfile...

your code copied in awkprgfile should have first line modified and last line removed or commmented out, giving first line and last line:

BEGIN { prev="";cur="";cursign="";prevsign="";}
.
.
#  ' input.txt

and so to run:

 awk -f awkprgfile  input.txt 

Optimized code

awk '
BEGIN { z=1}
{ 
ink[$4]++;
inz[z]=$0;
in4[z]=$4;
inlast[z]=$5;
z++;
}
END { 

cur="";prev=""
for(l=1;l<=z;l++)
  {
  cur=in4[l]
  if(cur!=prev)
    {
    new=1
    }
  if(inlast[l]=="-")
  {
  print inz[l] " " ink[in4[l]]--
  }
  if(inlast[l]=="+")
  print inz[l] new++
    
  prev=in4[l]
  }
}
' input.txt