The awk
below when run using the contents of file
, works great with the desired output of
expName
barcodeSampleInfo barcodedSamples
.
However, when the complete file is used (attached) I get different output. It looks like the same data is there but the ordering is off. Both data sets are html
and I am not sure why the difference. Thank you :).
file
{"barcodeId": "IonXpress", "barcodedSamples": {"MEV34": {"barcodeSampleInfo": {"IonXpress_004": {"controlSequenceType": "", "description": "", "externalId": "", "hotSpotRegionBedFile": "", "nucleotideType": "DNA", "reference": "hg19", "targetRegionBedFile": "/results/uploads/BED/6/hg19/unmerged/detail/LCHv2_IDP.bed"}}, "barcodes": ["IonXpress_004"]}, "MEV35": {"barcodeSampleInfo": {"IonXpress_005": {"controlSequenceType": "", "description": "", "externalId": "", "hotSpotRegionBedFile": "", "nucleotideType": "DNA", "reference": "hg19", "targetRegionBedFile": "/results/uploads/BED/6/hg19/unmerged/detail/LCHv2_IDP.bed"}}, "barcodes": ["IonXpress_005"]}, "MEV36": {"barcodeSampleInfo": {"IonXpress_006": {"controlSequenceType": "", "description": "", "externalId": "", "hotSpotRegionBedFile": "", "nucleotideType": "DNA", "reference": "hg19", "targetRegionBedFile": "/results/uploads/BED/6/hg19/unmerged/detail/LCHv2_IDP.bed"}}, "barcodes": ["IonXpress_006"]}}, "chipDescription": "540", "chipInstrumentType": "S5", "chipType": "540", "date": "2016-09-20T15:14:38+00:00", "expName": "R_2016_09_20_10_12_41_user_S5-00580-6-Medexome", "flows": 500
{"barcodeId": "IonXpress", "barcodedSamples": {"MEV45": {"barcodeSampleInfo": {"IonXpress_007": {"controlSequenceType": "", "description": "", "externalId": "", "hotSpotRegionBedFile": "", "nucleotideType": "DNA", "reference": "hg19", "targetRegionBedFile": "/results/uploads/BED/6/hg19/unmerged/detail/LCHv2_IDP.bed"}}, "barcodes": ["IonXpress_007"]}, "MEV46": {"barcodeSampleInfo": {"IonXpress_008": {"controlSequenceType": "", "description": "", "externalId": "", "hotSpotRegionBedFile": "", "nucleotideType": "DNA", "reference": "hg19", "targetRegionBedFile": "/results/uploads/BED/6/hg19/unmerged/detail/LCHv2_IDP.bed"}}, "barcodes": ["IonXpress_008"]}, "MEV47": {"barcodeSampleInfo": {"IonXpress_009": {"controlSequenceType": "", "description": "", "externalId": "", "hotSpotRegionBedFile": "", "nucleotideType": "DNA", "reference": "hg19", "targetRegionBedFile": "/results/uploads/BED/6/hg19/unmerged/detail/LCHv2_IDP.bed"}}, "barcodes": ["IonXpress_009"]}}, "chipDescription": "540", "chipInstrumentType": "S5", "chipType": "540", "date": "2016-09-01T18:22:00+00:00", "expName": "R_2016_09_01_13_20_02_user_S5-00580-5-Medexome", "flows": 500,
{"meta": {"limit": 20, "next": null, "offset": 0, "previous": null, "total_count": 8}, "objects": [{"barcodeId": "IonXpress", "barcodedSamples": {"MEV37": {"barcodeSampleInfo": {"IonXpress_007": {"controlSequenceType": "", "description": "", "externalId": "", "hotSpotRegionBedFile": "", "nucleotideType": "DNA", "reference": "hg19", "targetRegionBedFile": "/results/uploads/BED/6/hg19/unmerged/detail/LCHv2_IDP.bed"}}, "barcodes": ["IonXpress_007"]}, "MEV38": {"barcodeSampleInfo": {"IonXpress_008": {"controlSequenceType": "", "description": "", "externalId": "", "hotSpotRegionBedFile": "", "nucleotideType": "DNA", "reference": "hg19", "targetRegionBedFile": "/results/uploads/BED/6/hg19/unmerged/detail/LCHv2_IDP.bed"}}, "barcodes": ["IonXpress_008"]}, "MEV39": {"barcodeSampleInfo": {"IonXpress_009": {"controlSequenceType": "", "description": "", "externalId": "", "hotSpotRegionBedFile": "", "nucleotideType": "DNA", "reference": "hg19", "targetRegionBedFile": "/results/uploads/BED/6/hg19/unmerged/detail/LCHv2_IDP.bed"}}, "barcodes": ["IonXpress_009"]}}, "chipDescription": "540", "chipInstrumentType": "S5", "chipType": "540", "date": "2016-09-20T17:49:30+00:00", "expName": "R_2016_09_20_12_47_36_user_S5-00580-7-Medexome", "flows": 500
output from file (desired)
R_2016_09_20_10_12_41_user_S5-00580-6-Medexome
IonXpress_004 MEV34
IonXpress_005 MEV35
IonXpress_006 MEV36
R_2016_09_01_13_20_02_user_S5-00580-5-Medexome
IonXpress_007 MEV45
IonXpress_008 MEV46
IonXpress_009 MEV47
R_2016_09_20_12_47_36_user_S5-00580-7-Medexome
IonXpress_007 MEV37
IonXpress_008 MEV38
IonXpress_009 MEV39
awk
awk -F"[]\":{}, ]*" '
BEGIN {for (n=split ("expName", T); n>0; n--) SRCH[T[n]] = n
}
{for (i=1; i<NF; i++) if ($i in SRCH) print $(i+1)
}
{for (i=1; i<NF; i++) if ($i =="barcodeSampleInfo") print $(i+1)" " $(i-1)
}
' index.html > out
output using the complete file (attached)
R_2016_09_20_12_47_36_user_S5-00580-7-Medexome
R_2016_09_20_10_12_41_user_S5-00580-6-Medexome
R_2016_09_01_13_20_02_user_S5-00580-5-Medexome
R_2016_09_01_10_24_52_user_S5-00580-4-Medexome
R_2016_08_03_10_42_57_user_S5-00580-2-Medical_Exome
R_2016_08_03_14_04_54_user_S5-00580-3-Medical_Exome
R_2016_07_23_08_40_18_user_S5-00580-1-IQOQ_RUN_Sample_2
R_2016_07_22_17_09_29_user_S5-00580-0-Test_Fragment_Run
IonXpress_007 MEV37
IonXpress_008 MEV38
IonXpress_009 MEV39
IonXpress_004 MEV34
IonXpress_005 MEV35
IonXpress_006 MEV36
IonXpress_007 MEV45
IonXpress_008 MEV46
IonXpress_009 MEV47
IonXpress_004 MEV42
IonXpress_005 MEV43
IonXpress_006 MEV44
IonXpress_001 MEC1
IonXpress_002 MEV40
IonXpress_003 MEV41
IonXpress_001 MEC1
IonXpress_002 MEV40
IonXpress_003 MEV41