Hello, I have a very large file (DB_file.txt) with coordinate positions, example:
example for the DB_file.txt
chr1 1000 2000 chr1 10000 11000 3 281 0 0.459585414218248 0 0 0 1218 1 1.88650643678468 0 0 0 0.000000425582664277006 3.87833663319237 0.000000425597761316606 0.201115300038799 0.743522481235195 0.822936566866982
chr1 1000 2000 chr1 11000 12000 4 281 0 0.459585414218248 0 0 0 459 1 0.710924839477969 0 0 0 0.000000355375953311341 1.75327208538672 0.000000192399150742434 0.068194541602096 0.101318817290739 0.81707019750747
chr1 1000 2000 chr1 12000 13000 5 281 0 0.459585414218248 0 0 0 1415 1 2.19163104109222 0 0 0 0.000000310867653966458 3.44281554060217 0.000000377804900731428 0.128881860609444 0.263803805858084 0.81707019750747
chr1 1000 2000 chr1 13000 14000 1 281 0 0.459585414218248 0 0 0 437 1 0.676850010570529 0 0 0 0.000000285200375223444 1.49690344452761 0.000000164265976667856 0.335040184988013 0.776177861586765 0.837324774202159
chr1 1000 2000 chr1 14000 15000 1 281 0 0.459585414218248 0 0 0 530 1 0.820893605497437 0 0 0 0.000000271516726063383 1.60662404023105 0.000000176306406449545 0.322230259464951 0.799436461434159 0.850447555722604
chr1 1000 2000 chr1 16000 17000 1 281 0 0.459585414218248 0 0 0 490 1 0.758939371120272 0 0 0 0.000000253854147173018 1.41695624386548 0.000000155492795574155 0.343542270137564 0.75754917047966 0.828368055880824
chr1 1000 2000 chr1 18000 19000 2 281 0 0.459585414218248 0 0 0 130 1 0.201351261725787 0 0 0 0.000000231422263301451 0.691389012939271 0.0000000758710869983566 0.119714987783608 0.152817308925748 0.81707019750747
I have a pattern file with the following:
chr1 1000 2000 chr1 10000 11000
chr1 1000 2000 chr1 11000 12000
chr1 1000 2000 chr1 12000 13000
For a single query, I am using the command:
awk '{if ($1 == "chr1" && $2 == "1000" && $3 == "2000" && $4 == "chr1" && $5 == "38000" && $6 == "39000") print $0}' DB_file.txt
How can I do for all the patterns in a file ?
Hi
try this
awk '
NR == FNR {A[$1$2$3$4$5$6]; next}
$1$2$3$4$5$6 in A
' pattern.txt DB_file.txt
--- Post updated at 12:41 ---
in the same format pattern and db
awk '
NR == FNR {A[$0]; len=length; next}
substr($0, 1, len) in A
' pattern.txt DB_file.txt
Hello, Thanks for the reply. With the above code, only the last line of the pattern file got printed. Can you kindly check?
chr1 1000 2000 chr1 12000 13000 5 281 0 0.459585414218248 0 0 0 1415 1 2.19163104109222 0 0 0 0.000000310867653966458 3.44281554060217 0.000000377804900731428 0.128881860609444 0.263803805858084 0.81707019750747
cat pattern.txt
chr1 1000 2000 chr1 10000 11000
chr1 1000 2000 chr1 11000 12000
chr1 1000 2000 chr1 12000 13000
cat DB_file.txt
chr1 1000 2000 chr1 10000 11000 3 281 0 0.459585414218248 0 0 0 1218 1 1.88650643678468 0 0 0 0.000000425582664277006 3.87833663319237 0.000000425597761316606 0.201115300038799 0.743522481235195 0.822936566866982
chr1 1000 2000 chr1 11000 12000 4 281 0 0.459585414218248 0 0 0 459 1 0.710924839477969 0 0 0 0.000000355375953311341 1.75327208538672 0.000000192399150742434 0.068194541602096 0.101318817290739 0.81707019750747
chr1 1000 2000 chr1 12000 13000 5 281 0 0.459585414218248 0 0 0 1415 1 2.19163104109222 0 0 0 0.000000310867653966458 3.44281554060217 0.000000377804900731428 0.128881860609444 0.263803805858084 0.81707019750747
chr1 1000 2000 chr1 13000 14000 1 281 0 0.459585414218248 0 0 0 437 1 0.676850010570529 0 0 0 0.000000285200375223444 1.49690344452761 0.000000164265976667856 0.335040184988013 0.776177861586765 0.837324774202159
chr1 1000 2000 chr1 14000 15000 1 281 0 0.459585414218248 0 0 0 530 1 0.820893605497437 0 0 0 0.000000271516726063383 1.60662404023105 0.000000176306406449545 0.322230259464951 0.799436461434159 0.850447555722604
chr1 1000 2000 chr1 16000 17000 1 281 0 0.459585414218248 0 0 0 490 1 0.758939371120272 0 0 0 0.000000253854147173018 1.41695624386548 0.000000155492795574155 0.343542270137564 0.75754917047966 0.828368055880824
chr1 1000 2000 chr1 18000 19000 2 281 0 0.459585414218248 0 0 0 130 1 0.201351261725787 0 0 0 0.000000231422263301451 0.691389012939271 0.0000000758710869983566 0.119714987783608 0.152817308925748 0.81707019750747
cat test1.sh
#!/bin/bash
awk '
NR == FNR {A[$1$2$3$4$5$6];next}
$1$2$3$4$5$6 in A
' pattern.txt DB_file.txt
./test1.sh
chr1 1000 2000 chr1 10000 11000 3 281 0 0.459585414218248 0 0 0 1218 1 1.88650643678468 0 0 0 0.000000425582664277006 3.87833663319237 0.000000425597761316606 0.201115300038799 0.743522481235195 0.822936566866982
chr1 1000 2000 chr1 11000 12000 4 281 0 0.459585414218248 0 0 0 459 1 0.710924839477969 0 0 0 0.000000355375953311341 1.75327208538672 0.000000192399150742434 0.068194541602096 0.101318817290739 0.81707019750747
chr1 1000 2000 chr1 12000 13000 5 281 0 0.459585414218248 0 0 0 1415 1 2.19163104109222 0 0 0 0.000000310867653966458 3.44281554060217 0.000000377804900731428 0.128881860609444 0.263803805858084 0.81707019750747
cat test2.sh
#!/bin/bash
awk '
NR == FNR {A[$0]; len=length; next}
substr($0, 1, len) in A
' pattern.txt DB_file.txt
./test2.sh
chr1 1000 2000 chr1 10000 11000 3 281 0 0.459585414218248 0 0 0 1218 1 1.88650643678468 0 0 0 0.000000425582664277006 3.87833663319237 0.000000425597761316606 0.201115300038799 0.743522481235195 0.822936566866982
chr1 1000 2000 chr1 11000 12000 4 281 0 0.459585414218248 0 0 0 459 1 0.710924839477969 0 0 0 0.000000355375953311341 1.75327208538672 0.000000192399150742434 0.068194541602096 0.101318817290739 0.81707019750747
chr1 1000 2000 chr1 12000 13000 5 281 0 0.459585414218248 0 0 0 1415 1 2.19163104109222 0 0 0 0.000000310867653966458 3.44281554060217 0.000000377804900731428 0.128881860609444 0.263803805858084 0.81707019750747
1 Like