How to get index values for multiple matches in the same line with awk?

chilicuil · July 14, 2015, 1:48am

Hi,

I know that

echo "bob alice robert alice" | awk '{print index($0,"alice")}'
5

Will output the index of the first alice match, is there any way to get the index of all matches?, eg:

echo "bob alice robert alice" | awk 'unknown magic'
5:18

Thanks for your time.

Akshay_Hegde · July 14, 2015, 2:21am

[akshay@localhost tmp]$ cat position.awk
function usage()
{
	print "\n\tawk -f position.awk -vsearch='word_to_be_searched' infile\n"
	exit
}
BEGIN{
	if(!search)usage();
}
index($0,search){
   printf ("FILENAME : %s, LINE :  %s, Column : ", (FILENAME=="-"?"stdin":FILENAME), FNR )
   string=$0; m=0;
   while((n=index(string, search))>0)
   {
      m+=n;
      printf "%s ", m
      string=substr(string, n+1)
   }
   print ""
}

OR

[akshay@localhost tmp]$ cat position.awk
function usage()
{
	print "\n\tawk -f position.awk -vsearch='word_to_be_searched' infile\n"
	exit
}
BEGIN{if(!search)usage(); sl = length(search) }
{
    si = 0
    pos = ""
    while ( sind = index($0,search) )
    {
        pos = pos (pos?" ":"") (si + sind)
        $0  = substr($0,sind + sl)
        si += (sind + sl - 1)
    }
}
pos { printf ("FILENAME : %s, LINE :  %s, Column : %s\n", (FILENAME=="-"?"stdin":FILENAME), FNR, pos ) }

[akshay@localhost tmp]$ echo "bob alice robert alice" | awk -f position.awk -vsearch='alice'
FILENAME : stdin, LINE :  1, Column : 5 18

[akshay@localhost tmp]$ cat testfile
bob alice robert alice
bob 1 alice robert alice
bob     2 alice robert alice
bob        3 alice robert alice

[akshay@localhost tmp]$ awk -f position.awk -vsearch='alice' testfile
FILENAME : testfile, LINE :  1, Column : 5 18 
FILENAME : testfile, LINE :  2, Column : 7 20 
FILENAME : testfile, LINE :  3, Column : 11 24 
FILENAME : testfile, LINE :  4, Column : 14 27

RudiC · July 14, 2015, 5:04am

In case you expect EXACTLY two matches, try

awk -vsrch=alice '{match($0,srch".*"srch); print RSTART, RSTART+RLENGTH-length(srch)}' file
5 18
7 20
11 24
14 27

Don_Cragun · July 14, 2015, 5:19am

You could also try something like:

#!/bin/ksh
printf '%s\n' 'bob alice robert alice' 'aaaa bbbb aaaa' | awk -v s="$1" '
{       d = ""
        for(i = 1; x = index(substr($0, i), s); i = i + x + length(s) - 1) {
                printf("%s%d", d, i + x - 1)
                d = ":"
        }
        print ""
}'

If you want to try this on a Solaris/SunOS system, change awk to /usr/xpg4/bin/awk .

If you save this in a file named tester and make it executable, then various invocations show how it works:

$ ./tester alice
5:18

$ ./tester a
5:18
1:2:3:4:11:12:13:14
$ ./tester "b a"
3
9
$ ./tester b
1:3:13
6:7:8:9
$ ./tester aa

1:3:11:13
$

RudiC · July 14, 2015, 5:59am

In case your awk version allows for string field separators, try

awk -vSRCH=alice '
BEGIN   {LNS=length(SRCH)
        }
        {n=split ($0, TMP, SRCH)
         m=0
         for (i=1; i<n; i++)    {m+=length(TMP)
                                 printf "%d ", (i-1)*LNS + m + 1
                                }
         printf "\n"
        }
' file

---------- Post updated at 11:59 ---------- Previous update was at 11:21 ----------

Of course, above is stupid, as you could use FS and normal filed splitting ab initio:

awk -F"alice" '
BEGIN   {LNS=length(FS)
        }
        {m=0
         for (i=1; i<NF; i++)   {m+=length($i)
                                 printf "%d ", (i-1)*LNS + m + 1
                                }
         printf "\n"
        }
' file

malcomex999 · July 14, 2015, 6:17am

Try this if you don't want to reset the index position for each line...

awk '{ for(i=1;i<=NF;i++)
      { if(index($i,"alice")!=0) 
         printf ("%d ",index($i,"alice") + len);
         len+=length($i)+1
      }
 printf "\n"
}' file

And this if you want to reset the index position for each line...

awk '{ for(i=1;i<=NF;i++)
      { if(index($i,"alice")!=0) 
         printf ("%d ",index($i,"alice") + len);
         len+=length($i)+1
      }
 printf "\n";len=0
}' file

chilicuil · July 14, 2015, 1:39pm

I wasn't expected so high quality answers, thanks for all the them, at the end I used a modified version of Akshay's answer:

[akshay@localhost tmp]$ cat position.awk function usage()
{
     print "\n\tawk -f position.awk -vsearch='word_to_be_searched' infile\n"
     exit
}

BEGIN{ if(!search)usage(); }

match(tolower($0),search) {
    string=tolower($0); m=0;
    while((n=match(string,search))>0)
    {
       m+=n;
       printf ("%s:%s:%s\n", FNR, m, substr($0, m, RLENGTH))
       string=substr(string, n+1)
    }
}

I wanted to print the resulting match and separate every entry:

1:5:alice
1:18:alice