How to extract information from a file?

Hi, i have a file like this:

<Iteration>
      <Iteration_iter-num>3</Iteration_iter-num>
      <Iteration_query-ID>lcl|3_0</Iteration_query-ID>
      <Iteration_query-def>G383C4U01EQA0A length=197</Iteration_query-def>
      <Iteration_query-len>197</Iteration_query-len>
      <Iteration_stat>
        <Statistics>
          <Statistics_db-num>31601460</Statistics_db-num>
          <Statistics_db-len>10937649309</Statistics_db-len>
          <Statistics_hsp-len>0</Statistics_hsp-len>
          <Statistics_eff-space>0</Statistics_eff-space>
          <Statistics_kappa>0.041</Statistics_kappa>
          <Statistics_lambda>0.267</Statistics_lambda>
          <Statistics_entropy>0.14</Statistics_entropy>
        </Statistics>
      </Iteration_stat>
      <Iteration_message>No hits found</Iteration_message>
    </Iteration>
    <Iteration>
      <Iteration_iter-num>4</Iteration_iter-num>
      <Iteration_query-ID>lcl|4_0</Iteration_query-ID>
      <Iteration_query-def>G383C4U01AUSDH length=64</Iteration_query-def>
      <Iteration_query-len>64</Iteration_query-len>
      <Iteration_stat>
        <Statistics>
          <Statistics_db-num>31601460</Statistics_db-num>
          <Statistics_db-len>10937649309</Statistics_db-len>
          <Statistics_hsp-len>0</Statistics_hsp-len>
          <Statistics_eff-space>0</Statistics_eff-space>
          <Statistics_kappa>0.041</Statistics_kappa>
          <Statistics_lambda>0.267</Statistics_lambda>
          <Statistics_entropy>0.14</Statistics_entropy>
        </Statistics>
      </Iteration_stat>
      <Iteration_message>No hits found</Iteration_message>
    </Iteration>
    <Iteration>
      <Iteration_iter-num>5</Iteration_iter-num>
      <Iteration_query-ID>lcl|5_0</Iteration_query-ID>
      <Iteration_query-def>G383C4U01DPLAS length=224</Iteration_query-def>
      <Iteration_query-len>224</Iteration_query-len>
      <Iteration_hits>
        <Hit>
          <Hit_num>1</Hit_num>
          <Hit_id>gi|460414860|ref|XP_004252780.1|</Hit_id>
          <Hit_def>PREDICTED: exocyst complex component SEC3A-like [Solanum lycopersicum]</Hit_def>
          <Hit_accession>XP_004252780</Hit_accession>
          <Hit_len>888</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>60.077</Hsp_bit-score>
              <Hsp_score>144</Hsp_score>
              <Hsp_evalue>1.95683e-09</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>84</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>36</Hsp_identity>
              <Hsp_positive>37</Hsp_positive>
              <Hsp_gaps>2</Hsp_gaps>
              <Hsp_align-len>56</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTAN--RSPNAKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWAKTGKLGRSHTAKPRVIAISTKAKGQRT-KAFLHVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  T    RS  AKPR +AISTKAK     K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
        <Hit>
          <Hit_num>2</Hit_num>
          <Hit_id>gi|225458426|ref|XP_002283704.1|</Hit_id>
          <Hit_def>PREDICTED: exocyst complex component SEC3A isoform 1 [Vitis vinifera] >gi|302142418|emb|CBI19621.3| unnamed protein product [Vitis vinifera]</Hit_def>
          <Hit_accession>XP_002283704</Hit_accession>
          <Hit_len>886</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>56.6102</Hsp_bit-score>
              <Hsp_score>135</Hsp_score>
              <Hsp_evalue>3.26752e-08</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>83</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>34</Hsp_identity>
              <Hsp_positive>37</Hsp_positive>
              <Hsp_gaps>1</Hsp_gaps>
              <Hsp_align-len>55</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTANRSPN-AKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWGKSGKLGRNMAKPRVLALSTKAKAQRT-KAFLRVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  +     N AKPR +A+STKAKA    K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
        <Hit>
          <Hit_num>3</Hit_num>
          <Hit_id>gi|359492097|ref|XP_003634363.1|</Hit_id>
          <Hit_def>PREDICTED: exocyst complex component SEC3A isoform 2 [Vitis vinifera]</Hit_def>
          <Hit_accession>XP_003634363</Hit_accession>
          <Hit_len>887</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>56.6102</Hsp_bit-score>
              <Hsp_score>135</Hsp_score>
              <Hsp_evalue>3.26763e-08</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>83</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>34</Hsp_identity>
              <Hsp_positive>37</Hsp_positive>
              <Hsp_gaps>1</Hsp_gaps>
              <Hsp_align-len>55</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTANRSPN-AKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWGKSGKLGRNMAKPRVLALSTKAKAQRT-KAFLRVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  +     N AKPR +A+STKAKA    K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
        <Hit>
          <Hit_num>4</Hit_num>
          <Hit_id>gi|255538520|ref|XP_002510325.1|</Hit_id>
          <Hit_def>exocyst complex component sec3, putative [Ricinus communis] >gi|223551026|gb|EEF52512.1| exocyst complex component sec3, putative [Ricinus communis]</Hit_def>
          <Hit_accession>XP_002510325</Hit_accession>
          <Hit_len>889</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>53.9138</Hsp_bit-score>
              <Hsp_score>128</Hsp_score>
              <Hsp_evalue>2.91784e-07</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>83</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>32</Hsp_identity>
              <Hsp_positive>36</Hsp_positive>
              <Hsp_gaps>1</Hsp_gaps>
              <Hsp_align-len>55</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTANRSPN-AKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWGKSGKLGRQMAKPRVLALSTKSKGTRT-KAFLRVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  +       AKPR +A+STK+K T   K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
        <Hit>
          <Hit_num>5</Hit_num>
          <Hit_id>gi|449460129|ref|XP_004147798.1|</Hit_id>
          <Hit_def>PREDICTED: exocyst complex component SEC3A-like [Cucumis sativus]</Hit_def>
          <Hit_accession>XP_004147798</Hit_accession>
          <Hit_len>883</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>52.7582</Hsp_bit-score>
              <Hsp_score>125</Hsp_score>
              <Hsp_evalue>7.46528e-07</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>84</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>32</Hsp_identity>
              <Hsp_positive>35</Hsp_positive>
              <Hsp_gaps>2</Hsp_gaps>
              <Hsp_align-len>56</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTA--NRSPNAKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWGKSGMLGRQQMAKPRVLALSTKEKGPRT-KAFLRVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  +    R   AKPR +A+STK K     K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
      </Iteration_hits>
      <Iteration_stat>
        <Statistics>
          <Statistics_db-num>31601460</Statistics_db-num>
          <Statistics_db-len>10937649309</Statistics_db-len>
          <Statistics_hsp-len>0</Statistics_hsp-len>
          <Statistics_eff-space>0</Statistics_eff-space>

Every inquiry starts from <Iteration>, and ends with </Iteration>

I want to extract only the information from certain inquires, for example, from a B file:

G383C4U01AUSDH 
G383C4U01DPLAS
..

How could i do this?

Thanks.

Quick and simple way:

awk '$1 == "Iteration_query-def" { print $2 }' RS="<" FS=">" iteration.xml

Actually both files are very large, and the inquires in file B are not continuous.

In what way does my solution not work for you?

In what way does your data differ from what you posted?

If you mean that you only want to extract information from between <Iteration> tags:

awk '/^Iteration/ { P=1 } ; P && ($1 == "Iteration_query-def") { print $2 } ; /^\/Iteration/ { P=0 }' RS="<" FS=">" iteration.xml

Sorry, i think i didn't say it clearly.

I have a xml file A like this:

<Iteration>
      <Iteration_iter-num>3</Iteration_iter-num>
      <Iteration_query-ID>lcl|3_0</Iteration_query-ID>
      <Iteration_query-def>G383C4U01EQA0A length=197</Iteration_query-def>
      <Iteration_query-len>197</Iteration_query-len>
      <Iteration_stat>
        <Statistics>
          <Statistics_db-num>31601460</Statistics_db-num>
          <Statistics_db-len>10937649309</Statistics_db-len>
          <Statistics_hsp-len>0</Statistics_hsp-len>
          <Statistics_eff-space>0</Statistics_eff-space>
          <Statistics_kappa>0.041</Statistics_kappa>
          <Statistics_lambda>0.267</Statistics_lambda>
          <Statistics_entropy>0.14</Statistics_entropy>
        </Statistics>
      </Iteration_stat>
      <Iteration_message>No hits found</Iteration_message>
    </Iteration>
    <Iteration>
      <Iteration_iter-num>4</Iteration_iter-num>
      <Iteration_query-ID>lcl|4_0</Iteration_query-ID>
      <Iteration_query-def>G383C4U01AUSDH length=64</Iteration_query-def>
      <Iteration_query-len>64</Iteration_query-len>
      <Iteration_stat>
        <Statistics>
          <Statistics_db-num>31601460</Statistics_db-num>
          <Statistics_db-len>10937649309</Statistics_db-len>
          <Statistics_hsp-len>0</Statistics_hsp-len>
          <Statistics_eff-space>0</Statistics_eff-space>
          <Statistics_kappa>0.041</Statistics_kappa>
          <Statistics_lambda>0.267</Statistics_lambda>
          <Statistics_entropy>0.14</Statistics_entropy>
        </Statistics>
      </Iteration_stat>
      <Iteration_message>No hits found</Iteration_message>
    </Iteration>
    <Iteration>
      <Iteration_iter-num>5</Iteration_iter-num>
      <Iteration_query-ID>lcl|5_0</Iteration_query-ID>
      <Iteration_query-def>G383C4U01DPLAS length=224</Iteration_query-def>
      <Iteration_query-len>224</Iteration_query-len>
      <Iteration_hits>
        <Hit>
          <Hit_num>1</Hit_num>
          <Hit_id>gi|460414860|ref|XP_004252780.1|</Hit_id>
          <Hit_def>PREDICTED: exocyst complex component SEC3A-like [Solanum lycopersicum]</Hit_def>
          <Hit_accession>XP_004252780</Hit_accession>
          <Hit_len>888</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>60.077</Hsp_bit-score>
              <Hsp_score>144</Hsp_score>
              <Hsp_evalue>1.95683e-09</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>84</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>36</Hsp_identity>
              <Hsp_positive>37</Hsp_positive>
              <Hsp_gaps>2</Hsp_gaps>
              <Hsp_align-len>56</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTAN--RSPNAKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWAKTGKLGRSHTAKPRVIAISTKAKGQRT-KAFLHVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  T    RS  AKPR +AISTKAK     K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
        <Hit>
          <Hit_num>2</Hit_num>
          <Hit_id>gi|225458426|ref|XP_002283704.1|</Hit_id>
          <Hit_def>PREDICTED: exocyst complex component SEC3A isoform 1 [Vitis vinifera] >gi|302142418|emb|CBI19621.3| unnamed protein product [Vitis vinifera]</Hit_def>
          <Hit_accession>XP_002283704</Hit_accession>
          <Hit_len>886</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>56.6102</Hsp_bit-score>
              <Hsp_score>135</Hsp_score>
              <Hsp_evalue>3.26752e-08</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>83</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>34</Hsp_identity>
              <Hsp_positive>37</Hsp_positive>
              <Hsp_gaps>1</Hsp_gaps>
              <Hsp_align-len>55</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTANRSPN-AKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWGKSGKLGRNMAKPRVLALSTKAKAQRT-KAFLRVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  +     N AKPR +A+STKAKA    K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
        <Hit>
          <Hit_num>3</Hit_num>
          <Hit_id>gi|359492097|ref|XP_003634363.1|</Hit_id>
          <Hit_def>PREDICTED: exocyst complex component SEC3A isoform 2 [Vitis vinifera]</Hit_def>
          <Hit_accession>XP_003634363</Hit_accession>
          <Hit_len>887</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>56.6102</Hsp_bit-score>
              <Hsp_score>135</Hsp_score>
              <Hsp_evalue>3.26763e-08</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>83</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>34</Hsp_identity>
              <Hsp_positive>37</Hsp_positive>
              <Hsp_gaps>1</Hsp_gaps>
              <Hsp_align-len>55</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTANRSPN-AKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWGKSGKLGRNMAKPRVLALSTKAKAQRT-KAFLRVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  +     N AKPR +A+STKAKA    K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
        <Hit>
          <Hit_num>4</Hit_num>
          <Hit_id>gi|255538520|ref|XP_002510325.1|</Hit_id>
          <Hit_def>exocyst complex component sec3, putative [Ricinus communis] >gi|223551026|gb|EEF52512.1| exocyst complex component sec3, putative [Ricinus communis]</Hit_def>
          <Hit_accession>XP_002510325</Hit_accession>
          <Hit_len>889</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>53.9138</Hsp_bit-score>
              <Hsp_score>128</Hsp_score>
              <Hsp_evalue>2.91784e-07</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>83</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>32</Hsp_identity>
              <Hsp_positive>36</Hsp_positive>
              <Hsp_gaps>1</Hsp_gaps>
              <Hsp_align-len>55</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTANRSPN-AKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWGKSGKLGRQMAKPRVLALSTKSKGTRT-KAFLRVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  +       AKPR +A+STK+K T   K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
        <Hit>
          <Hit_num>5</Hit_num>
          <Hit_id>gi|449460129|ref|XP_004147798.1|</Hit_id>
          <Hit_def>PREDICTED: exocyst complex component SEC3A-like [Cucumis sativus]</Hit_def>
          <Hit_accession>XP_004147798</Hit_accession>
          <Hit_len>883</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>52.7582</Hsp_bit-score>
              <Hsp_score>125</Hsp_score>
              <Hsp_evalue>7.46528e-07</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>84</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>32</Hsp_identity>
              <Hsp_positive>35</Hsp_positive>
              <Hsp_gaps>2</Hsp_gaps>
              <Hsp_align-len>56</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTA--NRSPNAKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWGKSGMLGRQQMAKPRVLALSTKEKGPRT-KAFLRVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  +    R   AKPR +A+STK K     K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
      </Iteration_hits>
      <Iteration_stat>
        <Statistics>
          <Statistics_db-num>31601460</Statistics_db-num>
          <Statistics_db-len>10937649309</Statistics_db-len>
          <Statistics_hsp-len>0</Statistics_hsp-len>
          <Statistics_eff-space>0</Statistics_eff-space>

....

and a B file contain the names of interest:

G383C4U01AUSDH 
G383C4U01DPLAS
..

I wanna get a C file like this:

   <Iteration>
      <Iteration_iter-num>4</Iteration_iter-num>
      <Iteration_query-ID>lcl|4_0</Iteration_query-ID>
      <Iteration_query-def>G383C4U01AUSDH length=64</Iteration_query-def>
      <Iteration_query-len>64</Iteration_query-len>
      <Iteration_stat>
        <Statistics>
          <Statistics_db-num>31601460</Statistics_db-num>
          <Statistics_db-len>10937649309</Statistics_db-len>
          <Statistics_hsp-len>0</Statistics_hsp-len>
          <Statistics_eff-space>0</Statistics_eff-space>
          <Statistics_kappa>0.041</Statistics_kappa>
          <Statistics_lambda>0.267</Statistics_lambda>
          <Statistics_entropy>0.14</Statistics_entropy>
        </Statistics>
      </Iteration_stat>
      <Iteration_message>No hits found</Iteration_message>
    </Iteration>
    <Iteration>
      <Iteration_iter-num>5</Iteration_iter-num>
      <Iteration_query-ID>lcl|5_0</Iteration_query-ID>
      <Iteration_query-def>G383C4U01DPLAS length=224</Iteration_query-def>
      <Iteration_query-len>224</Iteration_query-len>
      <Iteration_hits>
        <Hit>
          <Hit_num>1</Hit_num>
          <Hit_id>gi|460414860|ref|XP_004252780.1|</Hit_id>
          <Hit_def>PREDICTED: exocyst complex component SEC3A-like [Solanum lycopersicum]</Hit_def>
          <Hit_accession>XP_004252780</Hit_accession>
          <Hit_len>888</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>60.077</Hsp_bit-score>
              <Hsp_score>144</Hsp_score>
              <Hsp_evalue>1.95683e-09</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>84</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>36</Hsp_identity>
              <Hsp_positive>37</Hsp_positive>
              <Hsp_gaps>2</Hsp_gaps>
              <Hsp_align-len>56</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTAN--RSPNAKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWAKTGKLGRSHTAKPRVIAISTKAKGQRT-KAFLHVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  T    RS  AKPR +AISTKAK     K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
        <Hit>
          <Hit_num>2</Hit_num>
          <Hit_id>gi|225458426|ref|XP_002283704.1|</Hit_id>
          <Hit_def>PREDICTED: exocyst complex component SEC3A isoform 1 [Vitis vinifera] >gi|302142418|emb|CBI19621.3| unnamed protein product [Vitis vinifera]</Hit_def>
          <Hit_accession>XP_002283704</Hit_accession>
          <Hit_len>886</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>56.6102</Hsp_bit-score>
              <Hsp_score>135</Hsp_score>
              <Hsp_evalue>3.26752e-08</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>83</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>34</Hsp_identity>
              <Hsp_positive>37</Hsp_positive>
              <Hsp_gaps>1</Hsp_gaps>
              <Hsp_align-len>55</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTANRSPN-AKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWGKSGKLGRNMAKPRVLALSTKAKAQRT-KAFLRVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  +     N AKPR +A+STKAKA    K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
        <Hit>
          <Hit_num>3</Hit_num>
          <Hit_id>gi|359492097|ref|XP_003634363.1|</Hit_id>
          <Hit_def>PREDICTED: exocyst complex component SEC3A isoform 2 [Vitis vinifera]</Hit_def>
          <Hit_accession>XP_003634363</Hit_accession>
          <Hit_len>887</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>56.6102</Hsp_bit-score>
              <Hsp_score>135</Hsp_score>
              <Hsp_evalue>3.26763e-08</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>83</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>34</Hsp_identity>
              <Hsp_positive>37</Hsp_positive>
              <Hsp_gaps>1</Hsp_gaps>
              <Hsp_align-len>55</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTANRSPN-AKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWGKSGKLGRNMAKPRVLALSTKAKAQRT-KAFLRVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  +     N AKPR +A+STKAKA    K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
        <Hit>
          <Hit_num>4</Hit_num>
          <Hit_id>gi|255538520|ref|XP_002510325.1|</Hit_id>
          <Hit_def>exocyst complex component sec3, putative [Ricinus communis] >gi|223551026|gb|EEF52512.1| exocyst complex component sec3, putative [Ricinus communis]</Hit_def>
          <Hit_accession>XP_002510325</Hit_accession>
          <Hit_len>889</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>53.9138</Hsp_bit-score>
              <Hsp_score>128</Hsp_score>
              <Hsp_evalue>2.91784e-07</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>83</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>32</Hsp_identity>
              <Hsp_positive>36</Hsp_positive>
              <Hsp_gaps>1</Hsp_gaps>
              <Hsp_align-len>55</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTANRSPN-AKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWGKSGKLGRQMAKPRVLALSTKSKGTRT-KAFLRVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  +       AKPR +A+STK+K T   K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
        <Hit>
          <Hit_num>5</Hit_num>
          <Hit_id>gi|449460129|ref|XP_004147798.1|</Hit_id>
          <Hit_def>PREDICTED: exocyst complex component SEC3A-like [Cucumis sativus]</Hit_def>
          <Hit_accession>XP_004147798</Hit_accession>
          <Hit_len>883</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>52.7582</Hsp_bit-score>
              <Hsp_score>125</Hsp_score>
              <Hsp_evalue>7.46528e-07</Hsp_evalue>
              <Hsp_query-from>61</Hsp_query-from>
              <Hsp_query-to>222</Hsp_query-to>
              <Hsp_hit-from>30</Hsp_hit-from>
              <Hsp_hit-to>84</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_identity>32</Hsp_identity>
              <Hsp_positive>35</Hsp_positive>
              <Hsp_gaps>2</Hsp_gaps>
              <Hsp_align-len>56</Hsp_align-len>
              <Hsp_qseq>IRVAKSRGIWESTA--NRSPNAKPRFVAISTKAKATTN*KHFSES*KYSTGGVLEP</Hsp_qseq>
              <Hsp_hseq>IRVAKSRGIWGKSGMLGRQQMAKPRVLALSTKEKGPRT-KAFLRVLKYSTGGVLEP</Hsp_hseq>
              <Hsp_midline>IRVAKSRGIW  +    R   AKPR +A+STK K     K F    KYSTGGVLEP</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
      </Iteration_hits>
      <Iteration_stat>
        <Statistics>
          <Statistics_db-num>31601460</Statistics_db-num>
          <Statistics_db-len>10937649309</Statistics_db-len>
          <Statistics_hsp-len>0</Statistics_hsp-len>
          <Statistics_eff-space>0</Statistics_eff-space>
...

Something more like this, then:

$ cat iteration.awk

BEGIN { while((getline <bfile) > 0) D[$1]=1; RS="<"; FS=">" }

$1 == "Iteration_query-def" {   split($2, Q, " ");      if(D[Q[1]]) M=1 }
$1 == "Iteration" {     P=1     }
P { R=R"<"$0 }
$1 == "/Iteration" { if(M) print R;  M=P=R="" }
END { if(M) print R }

$ awk -v bfile="b" -f iteration.awk a.xml

Thanks a lot, i will try it

Try worked with given sample :

awk ' 
      function dothis(){
			if(f){print s}
			s=f=""
		       }
      FNR==NR{
		gsub(/[[:space:]]+/,x,$1)
		A[$1]=1
		next
             }
           !f{
		split($2,S,/ /)
		gsub(/[[:space:]]+/,x,S[1])
             	f = (S[1] in A)
             }
             {
                s = s ? s ORS $0 : $0
		if(/<\/Iteration>/)
		 { 
			dothis()	
                 }
             }
	  END{ 
		dothis() 
	     } 
      ' FS=">"  fileB fileA
1 Like

Thanks!! It works perfect