Hi Don,
My Apologies for confusing you again AWK commands are perfectly working fine and it splits file correctly as expected
Hope I am not confusing you further
1) If my input file name is sampletest_111.xml after AWK command file name will be like sampletest_111.xml.0001
2)sampletest_111.xml.0001 is renamed to Extrfile111.xml
3)when there are multiple input files AWK is spliting files and creating unique files but
below piece of code is not renaming files in a sequence its just appending to 1 file
Output Expected:Extrfile111.xml,Extrfile1112.xml etc i mean unique name
for f in ../Inbound/sampletest_*
do TMP="${f/sampletest_/Extrfile}"
mv "$f" "${TMP%.*}"
done
Total code :
#!/bin/sh
#pass all Input files to array
FileList=($(ls | grep "sampletest*\\_[0-9]"))
echo "$FileList"
#loop array for Input files
for x in "${FileList[@]}"
do
#for each element in array
#File Split Begin
awk -f xml_tag_handler.awk -f File_split.awk OUT=$x"" ROWS="500" $x $x
mv $x ../
done
rm Response.xml Extr*.xml
for f in sampletest_*
echo "$f"
do TMP="${f/sampletest_/Extrfile}"
mv "$f" "${TMP%.*}"
done
# add all files to array
arr=($(ls | grep "Extrfile[0-9]*.xml"))
#loop array
for i in "${arr[@]}"
do
#for each element in array
echo "$i"
sed -i '/<com1:URI>/c\<com1:URI>file:///tmp/karthik/'$i'</com1:URI>' soaprequest.xml
#WebService Call Begin
sleep 5
curl --header "Content-Type: text/xml;charset=UTF-8" --data @soaprequest.xml {WSDLURL} --insecure >> Response.xml
echo ":Webservice call Begin"
done
sed -i '/<com1:URI>/c\<com1:URI>file:///tmp/karthik/'$i'</com1:URI>' soaprequest.xml
echo ":Webservice call End"
NEW_VAR=$(awk -v sq="'" -F'<ns11:Job_Id>' '
{ for(i = 2; i <= NF; i++) {
sub(/<.*/, "", $i)
printf("%s%s", cnt++ ? "," : sq, $i)
}
}
END { print sq
}' Response.xml
)
printf 'NEW_VAR has been assigned the value: %s\n' "$NEW_VAR"
#End Web Service Call
xml_tag_handler.awk:
###############################################################################
BEGIN {
FS=">"; OFS=">";
RS="<"; ORS="<"
}
# After match("qwertyuiop", /rty/)
# rbefore("qwertyuiop") is "qwe",
# rmid("qwertyuipo") is "r"
# rall("qwertyuiop") is "rty"
# rafter("qwertyuiop") is "uiop"
# !?!?!
# function rbefore(STR) { return(substr(STR, N, RSTART-1)); }# before match
function rbefore(STR) { return(substr(STR, 0, RSTART-1)); }# before match
function rmid(STR) { return(substr(STR, RSTART, 1)); } # First char match
function rall(STR) { return(substr(STR, RSTART, RLENGTH)); }# Entire match
function rafter(STR) { return(substr(STR, RSTART+RLENGTH)); }# after match
function aquote(OUT, A, PFIX, TA) { # Turns Q SUBSEP R into A[PFIX":"Q]=R
if(OUT)
{
if(PFIX) PFIX=PFIX":"
split(OUT, TA, SUBSEP);
A[toupper(PFIX) toupper(TA[1])]=TA[2];
}
return("");
}
# Intended to be less stupid about quoted text in XML/HTML.
# Splits a='b' c='d' e='f' into A[PFIX":"a]=b, A[PFIX":"c]=d, etc.
function qsplit(STR, A, PFIX, X, OUT) {
while(STR && match(STR, /([ \n\t]+)|[\x27\x22=]/))
{
OUT = OUT rbefore(STR);
RMID=rmid(STR);
if((RMID == "'") || (RMID == "\"")) # Quote characters
{
if(!Q) Q=RMID; # Begin quote section
else if(Q == RMID) Q=""; # End quote section
else OUT = OUT RMID; # Quoted quote
} else if(RMID == "=") {
if(Q) OUT=OUT RMID; else OUT=OUT SUBSEP;
} else if((RMID=="\r")||(RMID=="\n")||(RMID=="\t")||(RMID==" ")) {
if(Q) OUT = OUT rall(STR); # Literal quoted whitespace
else OUT = aquote(OUT, A, PFIX); # Unquoted WS, next block
}
STR=rafter(STR); # Strip off the text we've processed already.
}
aquote(OUT STR, A, PFIX); # Process any text we haven't already.
}
{ SPEC=0 ; TAG="" }
NR==1 {
if(ORS == RS) print;
next } # The first "line" is blank when RS=<
/^[!?]/ { SPEC=1 } # XML specification junk
# Handle open-tags
(!SPEC) && match($1, /^[^\/ \r\n\t>]+/) {
CTAG=""
TAG=substr(toupper($1), RSTART, RLENGTH);
if((!SPEC) && !($1 ~ /\/$/))
{
TAGS=TAG "%" TAGS;
DEP++;
LTAGS=TAGS
}
for(X in ARGS) delete ARGS[X];
qsplit(rafter($1), ARGS, "", "", "");
}
# Handle close-tags
(!SPEC) && /^[\/]/ {
sub(/^\//, "", $1);
LTAGS=TAGS
CTAG=toupper($1)
TAG=""
# sub("^.*" toupper($1) "%", "", TAGS);
sub("^" toupper($1) "%", "", TAGS);
$1="/"$1
DEP=split(TAGS, TA, "%")-1;
# Update TAG with tag on top of stack, if any
# if(DEP < 0) { DEP=0; TAG="" }
# else { TAG=TA[DEP]; }
}
File_split.awk
BEGIN {
ORS=""
#OUT="x."
ROWS=5
ROWTAG="^RECIPIENT[0-9]*$"
HDRTAG="^DOCUMENTSET$"
FTRTAG="^DOCUMENTSET$"
}
# First pass, remember headers and footers
NR==FNR {
if(!HDREND)
{
HDR=HDR RS $1 OFS $2
if(TAG ~ HDRTAG) HDREND=FNR
next
}
if(FTRSTART || (CTAG ~ FTRTAG))
{
FTR=FTR RS $1 OFS $2
if(CTAG ~ FTRTAG) FTRSTART=FNR
}
next
}
# Skip header and footer
(FNR <= HDREND) || (FNR >= FTRSTART) { next }
# Close output file once enough DOCUMENT records
((XNR%(ROWS+1)) == 0) {
# printf("FNR==%d XNR==%d FILE=%s\n", FNR, XNR, FILE)>"/dev/stderr"
if(!length(OUT)) FBASE=FILENAME "."
else FBASE = OUT "."
if(FILE) {
print FTR > FILE
close(FILE);
}
FILE=sprintf("%s%04d", FBASE,++FILENUM);
print HDR > FILE
XNR++
}
{ print RS $0 > FILE }
CTAG ~ ROWTAG { XNR++ }
END { if(FILE) print FTR > FILE }
#8 in the same thread got the sample xml structure for your reference