XML Parse between to tag with upper tag

Hi Guys

Here is my Input :

<?xml version="1.0" encoding="UTF-8"?>
            <xn:MeContext id="01736">
                <xn:VsDataContainer id="01736">
                    <xn:attributes>
                        <xn:vsDataType>vsDataMeContext</xn:vsDataType>
                        <xn:vsDataFormatVersion>EricssonSpecificAttributes.13.25</xn:vsDataFormatVersion>
                        <es:vsDataMeContext>
                            <es:userLabel>01736</es:userLabel>
                            <es:ipAddress>107.69.51.30</es:ipAddress>
                            <es:neMIMversion>vD.1.44</es:neMIMversion>
                            <es:lostSynchronisation>SYNCHRONISED</es:lostSynchronisation>
                            <es:bcrLastChange>1394456163399</es:bcrLastChange>
                            <es:bctLastChange>1394403616793</es:bctLastChange>
                            <es:multiStandardRbs6k>false</es:multiStandardRbs6k>
                            <es:mixedModeRadio>false</es:mixedModeRadio>
                            <es:mirrorMIBversion>D.1.42.M.2.30</es:mirrorMIBversion>
                            <es:stnNodes></es:stnNodes>
                        </es:vsDataMeContext>
                    </xn:attributes>
			<xn:VsDataContainer id="01736_A_111">
                            <xn:attributes>
                                <xn:vsDataType>vsDataEUtranCellFDD</xn:vsDataType>
                                <es:vsDataEUtranCellFDD>
                                    <es:pZeroNominalPusch>-100</es:pZeroNominalPusch>
                                    <es:pZeroNominalPucch>-116</es:pZeroNominalPucch>
                                    <es:dlInterferenceManagementActive>false</es:dlInterferenceManagementActive>
                                    <es:ulInterferenceManagementActive>true</es:ulInterferenceManagementActive>
                                    <es:cellBarred>0</es:cellBarred>
                                    <es:pMaxServingCell>23</es:pMaxServingCell>
                                    <es:systemInformationBlock3>
                                    <es:qHyst>4</es:qHyst>
                                    <es:tEvaluation>240</es:tEvaluation>
                                    <es:tHystNormal>240</es:tHystNormal>
                                    <es:nCellChangeMedium>16</es:nCellChangeMedium>
                                    <es:nCellChangeHigh>16</es:nCellChangeHigh>
                                    <es:qHystSfMedium>0</es:qHystSfMedium>
                                    </es:systemInformationBlock3>
                                    <es:systemInformationBlock6>
                                    <es:tReselectionUtra>2</es:tReselectionUtra>
                                    <es:tReselectionUtraSfMedium>100</es:tReselectionUtraSfMedium>
                                    <es:tReselectionUtraSfHigh>100</es:tReselectionUtraSfHigh>
                                    </es:systemInformationBlock6>
                                    <es:systemInformationBlock7>
                                    <es:tReselectionGeran>2</es:tReselectionGeran>
                                    <es:tReselectionGeranSfMedium>100</es:tReselectionGeranSfMedium>
                                    <es:tReselectionGeranSfHigh>100</es:tReselectionGeranSfHigh>
                                    </es:systemInformationBlock7>
                                    <es:systemInformationBlock8>
                                    <es:searchWindowSizeCdma>8</es:searchWindowSizeCdma>
                                    <es:tReselectionCdmaHrpd>2</es:tReselectionCdmaHrpd>
                                    <es:tReselectionCdmaHrpdSfMedium>100</es:tReselectionCdmaHrpdSfMedium>
                                    <es:tReselectionCdmaHrpdSfHigh>100</es:tReselectionCdmaHrpdSfHigh>
                                    </es:systemInformationBlock8>
                                    <es:mappingInfo>
                                    <es:mappingInfoSIB4>2</es:mappingInfoSIB4>
                                    <es:mappingInfoSIB5>3</es:mappingInfoSIB5>
                                    <es:mappingInfoSIB6>4</es:mappingInfoSIB6>
                                    <es:mappingInfoSIB7>5</es:mappingInfoSIB7>
                                    <es:mappingInfoSIB8>6</es:mappingInfoSIB8>
                                    <es:mappingInfoSIB3>1</es:mappingInfoSIB3>
                                    <es:mappingInfoSIB10>1</es:mappingInfoSIB10>
                                    <es:mappingInfoSIB11>0</es:mappingInfoSIB11>
                                    <es:mappingInfoSIB12>7</es:mappingInfoSIB12>
                                    </es:mappingInfo>
                                    <es:qQualMinOffset>0</es:qQualMinOffset>
                                    <es:rateShapingActive>false</es:rateShapingActive>
                                    <es:threshServingLow>6</es:threshServingLow>
                                    <es:ulSrsEnable>true</es:ulSrsEnable>
                                    <es:acBarringForEmergency>false</es:acBarringForEmergency>
 				</es:vsDataEUtranCellFDD>
    			 </xn:attributes>
                       </xn:VsDataContainer>
             </xn:VsDataContainer>
	 </xn:MeContext>
    </configData>
    <fileFooter dateTime="2014-03-10T10:43:09Z"/>
</bulkCmConfigDataFile>

Output I want is all data between two tag :-

01736 01736_A_111 vsDataEUtranCellFDD pZeroNominalPusch -100
01736 01736_A_111 vsDataEUtranCellFDD pZeroNominalPucch -116
------
------
01736 01736_A_111 vsDataEUtranCellFDD systemInformationBlock3 qHyst -4
01736 01736_A_111 vsDataEUtranCellFDD systemInformationBlock3 tEvaluation 240
----
----
01736 01736_A_111 vsDataEUtranCellFDD acBarringForEmergency false

I want all value but with respect to 01736 01736_A_111 tag

Does the raw XML actually look like that, or are you copy-pasting prettied-up XML from internet explorer?

No Its Huge XML So i just copy and paste in notepad.

---------- Post updated at 10:59 AM ---------- Previous update was at 10:56 AM ----------

I have Tried below nawk commnad but its have diffrent output and also i have to put all value :

nawk -F'[\"\>\<]' 'BEGIN{print "Nbr"} /MeContext id/{a=$3} /VsDataContainer id/{b=$3}/cellIndividualOffsetEUtran/{c=$3}/isRemoveAllowed/{d=$3}/isHoAllowed/{e=$3}/loadBalancing/{print a,b,c,d,e,$3}' filename
	

---------- Post updated at 11:40 AM ---------- Previous update was at 10:59 AM ----------

Any One Can Help me on this ????

The prettied up version is pretty useless. Can you post what it actually looks like?

Bumping up posts or double posting is not permitted in these forums.

Please read the rules, which you agreed to when you registered, if you have not already done so.

You may receive an infraction for this. If so, don't worry, just try to follow the rules more carefully. The infraction will expire in the near future

Thank You.

The UNIX and Linux Forums.

Please find attachment!!!!

I like to think this may be easier using xmllint (with --xpath)

I don't know about XMlint

can you some me sample output :-

I may have found a solution that could help you -- but you have removed your attachment so I cannot tell.

Please find attahcemnt

Sorry this one

An improved version of my generic xml-extraction awk program:

$ cat xmlt.awk

BEGIN {
        DEP=4;  # How many tags out to keep data
        POS=0   # Position in tag stack
        RS="<"; # Input record separator
        FS="[ \n\t\t>/]";       # Input field separator

        # Hardcode the first two things in the output order
        ORDER[++O]="XN:MECONTEXT:ID";
        ORDER["XN:MECONTEXT:ID"]=O

        ORDER[++O]="XN:VSDATACONTAINER:ID";
        ORDER["XN:VSDATACONTAINER:ID"]=O
}

# This function is checked on whether a property should be added to
# the list of what to print.
function catchthis(PROPNAME, PROPVAL) {

        # Catch all CDATA elements inside XN:VSDATACONTAINER tags
        if(TSS ~ /XN:VSDATACONTAINER/) return(PROPNAME ~ /DATA/);

        return((TSS ~ /XN:VSDATACONTAINER/) && (PROPNAME ~ /CDATA/));
}

# Always this finicky case when RS isn't \n
(NR==1) && (length($0) == 0) { next }

# Skip XML comments
/^!--/ {
        while(!(I=index($0, "-->"))) if(getline <= 0) exit;
        # Strip out comment
        $0="--XMLCOMMENT-- />"substr($0,I+3);
}

# Ignore XML specification junk
/^\?/ || /^\!/ { next }

# These should be special variables for match() but aren't.
# String before match
function rbefore(STR)   { return(substr(STR, N, RSTART-1)); }
# First char of match
function rmid(STR)      { return(substr(STR, RSTART, 1)); }
# Entire match
function rall(STR)      { return(substr(STR, RSTART, RLENGTH)); }
# String after match
function rafter(STR)    { return(substr(STR, RSTART+RLENGTH)); }

# Turns Q SUBSEP R into A[PFIX":"Q]=R
function aquote(OUT, A, PFIX, TA) {
        if(OUT)
        {
                if(PFIX) PFIX=PFIX":"
                split(OUT, TA, SUBSEP);
                A[toupper(PFIX) toupper(TA[1])]=TA[2];
        }

        return("");
}

# Intended to be less stupid about quoted text in XML/HTML.
# Splits a='b' c='d' e='f' into A[PFIX":"a]=b, A[PFIX":"c]=d, etc.
function qsplit(STR, A, PFIX, X, OUT) {
        while(STR && match(STR, /([ \n\t]+)|[\x27\x22=]/))
        {
                OUT = OUT rbefore(STR);

                RMID=rmid(STR);
                if((RMID == "'") || (RMID == "\""))     # Quote characters
                {
                        if(!Q)          Q=RMID;         # Begin quote section
                        else if(Q == RMID)      Q="";   # End quote section
                        else                    OUT = OUT RMID; # Quoted quote
                } else if(RMID == "=") {
                        if(Q)   OUT=OUT RMID; else OUT=OUT SUBSEP;
                } else if((RMID=="\r")||(RMID=="\n")||(RMID=="\t")||(RMID==" ")) {
                        if(Q)   OUT = OUT rall(STR); # Literal quoted whitespace
                        else    OUT = aquote(OUT, A, PFIX); # Unquoted WS, next block
                }
                STR=rafter(STR); # Strip off the text we've processed already.
        }

        aquote(OUT STR, A, PFIX); # Process any text we haven't already.
}

# Call before increment
function addprop(AIN,X,S) {
        for(X in AIN)
        {
                if(!(X in ORDER))
                if(catchthis(X, AIN[X]))
                {
                        ORDER[++O]=X
                        ORDER[X]=O
                }

                PROP[X]=AIN[X]
                KEEP[X]=(POS+2)-DEP
        }
}

# Call before decrement
function delprop(TA, N, M,X) {
        for(X in KEEP)
        if(KEEP[X] > POS)
        {
                delete PROP[X];
                delete KEEP[X];
        }
}

# Non-close tag
!/^\// {

        TAG=$1;                         sub(/^[^ \r\n\t>\/]*/, "");
        match($0, /\/?>/);
        TDATA=rbefore($0);              CDATA=rafter($0);

        # Flatten and strip whitespace
        gsub(/[ \r\n\t]+/, " ", CDATA);
        gsub(/^[ \r\n\t]+/, "", CDATA); gsub(/[ \r\n\t]+$/, "", CDATA);

        for(X in TA) delete TA[X];
        qsplit(TDATA, TA, TAG);
        if(length(CDATA))
                TA[toupper(TAG)":""CDATA"]=CDATA

        addprop(TA);

        if(RLENGTH != 2) # Found > instead of self-closing />
        {
                TS[++POS]=toupper(TAG);
                TSS=TSS"/"toupper(TAG);
        }



#       for(X in A) printf("%s[%s]=%s\n", TAG, X, A[X]);
}

# Close tags
/^\// {

        for(TPOS=POS; (TPOS>0) && (toupper($2) != TS[TPOS]); TPOS--);

        if(toupper($2) == "XN:VSDATACONTAINER")
        {
                OUT=""
                PFIX=""
                for(N=1; N<=O; N++)
                {
                        if(!PROP[ORDER[N]]) PROP[ORDER[N]]="!"ORDER[N]
                        OUT=OUT PFIX PROP[ORDER[N]];
                        PFIX=OFS
                }
                print OUT;
        }

        if(TPOS <= 0) print "Went under for "$2" pos="POS
        else
        {
                TPOS--;
                while(TPOS < POS)
                {
                        delprop();
                        sub(/\/[^\/]*$/, "", TSS); POS--;
                }
        }
}

$ awk -f xmlt.awk OFS="\t" Enodeb_MO_Export_10_47.xml
CCL01736        1       vsDataENodeBFunction    EricssonSpecificAttributes.13.25CCL01736        SubNetwork=ONRM_ROOT_MO_R,MeContext=CCL01736,ManagedElement=1,vsDataTransportNetwork=1,vsDataSctp=1     0       32      1       0       310    410      3       30      1440    30      true    SubNetwork=ONRM_ROOT_MO_R,MeContext=CCL01736,ManagedElement=1,vsDataIpSystem=1,vsDataIpAccessHostEt=1   false  1true    true    1       -2000000000     -2000000000     -2000000000     -2000000000     100     true    0       false

Thanks a lot men !!!!!!!!:slight_smile: