Parsing XML isn't trivial, but we get asked for it all the time, so:

# yanx.awk v0.0.8, Tyler Montbriand, 2017.  Yet another noncompliant XML parser
# XML is a pain to process in the shell, but people need it all the time.
# I've been using and improving this kludge since 2014 or so.  It parses and
# stacks tags and digests parameters, allowing simple XML processing and
# extraction to be managed with a handful of lines addendum.
# I've restricted my use of GNU features enough that this script will run on
# busybox's awk.  I think it works with mawk except -e is unsupported.
# You can work around that by running multiple files, i.e.
# mawk -f yanx.awk -f mystuff.awk inputfile
# Basic use:
# Fed this XML, <body><html a="b">Your Web Browser Hates This</html></body>
# yanx will read it token-by-token as so:
#     Line 1:  Empty, skipped
#     Line 2:  $1="body"
#     Line 3:  $1="html a="b"", $2="Your web browser hates this"
#     Line 4:  $1="/html"
#     Line 5:  $1="/body", $2="\n"
# The script sets a few new "special" variables along the way.
# TAG           The name of the current tag, uppercased.
# CTAG          If close-tag, name in uppercase.
# TAGS          List of nested tags, like HTML%BODY%, including current tag
# LTAGS         List of nested tags, not including current tag
# ARGS          Array of tag parameters, uppercased.  i.e. ARGS["HREF"]
# DEP           How many tags deep it's nested, including current tag.
# Examples:
# # Rewrite cdata of all divs
# awk -f yanx.awk -e 'TAGS ~ /^DIV%/ { $2="quux froob" } 1' input
# # Extract href's from every link
# awk -f yanx.awk -e 'TAGS~/^A%/ && ("HREF" in ARGS) {
#       print ARGS["HREF"] }' ORS="\n" input
# Known Bugs:
# A short XML script can't possibly handle DOD, etc.  Entities a la &lt;
# are not translated either.
# I've done my best to make it swallow <!--, <? ?> and other such fancy
# XML syntax without choking, but that doesn't mean it handles them
# properly either.
# It's an XML parser, not an HTML parser.  It probably won't swallow a
# wild-from-the internet HTML web page without some cleanup first:
# javascript, tags inside comments, etc will be mangled instead of ignored.
# Last: Because of its design, when printing raw HTML, yanx adds an extra <
# to the end of the file.  This is because < belongs at the beginning of
# a token but awk is told it's printed at the end.  There is no equivalent
# "line prefix" variable that I know of, if you want it to print smarter
# you'll have to print the <'s yourself, by setting ORS=" and
# printing lines like print "<" $0
        FS=">"; OFS=">";
        RS="<"; ORS="<"

# After match("qwertyuiop", /rty/)
#       rbefore("qwertyuiop") is "qwe",
#       rmid("qwertyuipo")    is "r"
#       rall("qwertyuiop")    is "rty"
#       rafter("qwertyuiop")  is "uiop"

# !?!?!
# function rbefore(STR)   { return(substr(STR, N, RSTART-1)); }# before match
function rbefore(STR)   { return(substr(STR, 0, RSTART-1)); }# before match
function rmid(STR)      { return(substr(STR, RSTART, 1)); }  # First char match
function rall(STR)      { return(substr(STR, RSTART, RLENGTH)); }# Entire match
function rafter(STR)    { return(substr(STR, RSTART+RLENGTH)); }# after match

function aquote(OUT, A, PFIX, TA) { # Turns Q SUBSEP R into A[PFIX":"Q]=R
                if(PFIX) PFIX=PFIX":"
                split(OUT, TA, SUBSEP);
                A[toupper(PFIX) toupper(TA[1])]=TA[2];


# Intended to be less stupid about quoted text in XML/HTML.
# Splits a='b' c='d' e='f' into A[PFIX":"a]=b, A[PFIX":"c]=d, etc.
function qsplit(STR, A, PFIX, X, OUT) {
        while(STR && match(STR, /([ \n\t]+)|[\x27\x22=]/))
                OUT = OUT rbefore(STR);

                if((RMID == "'") || (RMID == "\""))     # Quote characters
                        if(!Q)          Q=RMID;         # Begin quote section
                        else if(Q == RMID)      Q="";   # End quote section
                        else                    OUT = OUT RMID; # Quoted quote
                } else if(RMID == "=") {
                        if(Q)   OUT=OUT RMID; else OUT=OUT SUBSEP;
                } else if((RMID=="\r")||(RMID=="\n")||(RMID=="\t")||(RMID==" ")) {
                        if(Q)   OUT = OUT rall(STR); # Literal quoted whitespace
                        else    OUT = aquote(OUT, A, PFIX); # Unquoted WS, next block
                STR=rafter(STR); # Strip off the text we've processed already.

        aquote(OUT STR, A, PFIX); # Process any text we haven't already.

{ SPEC=0 ; TAG="" }

NR==1 {
        if(ORS == RS) print;
        next } # The first "line" is blank when RS=<

/^[!?]/ { SPEC=1    }   # XML specification junk

# Handle open-tags
(!SPEC) && match($1, /^[^\/ \r\n\t>]+/) {
        TAG=substr(toupper($1), RSTART, RLENGTH);
        if((!SPEC) && !($1 ~ /\/$/))
                TAGS=TAG "%" TAGS;

        for(X in ARGS) delete ARGS[X];

        qsplit(rafter($1), ARGS, "", "", "");

# Handle close-tags
(!SPEC) && /^[\/]/ {
        sub(/^\//, "", $1);
#        sub("^.*" toupper($1) "%", "", TAGS);
        sub("^" toupper($1) "%", "", TAGS);
        DEP=split(TAGS, TA, "%")-1;
        # Update TAG with tag on top of stack, if any
#       if(DEP < 0) {   DEP=0;  TAG=""  }
#       else { TAG=TA[DEP]; }

You can use it with this:

# xmlsplit.awk

# First pass, remember headers and footers
        if(F || TAG == "FOOTER")
                if(!F) {
                FTR=FTR "<" $1 OFS $2
        else if((!H) && (TAG == "DOCUMENT"))
        else if(!H)     HDR=HDR "<" $1 OFS $2

# Skip header and footer
(FNR < HDREND) || (FNR >= FTRSTART) { next }

# Close output file once enough DOCUMENT records
((XNR%(ROWS+1)) == 0) {
        if(FILE) {
                print FTR > FILE
        FILE=sprintf("%s%04d", X,++FILENUM);
        print HDR > FILE

{       print "<" $0 > FILE     }


END {   if(FILE) print FTR > FILE }

Like this:

# Yes, it's fed inputfile twice
awk -f yanx.awk -f xmlsplit.awk X="x." ROWS="10" input input

With this input:

<?xml version="1.0" encoding="UTF-8"?><Recipient>



To produce output like this:

$ cat x.0001

<?xml version="1.0" encoding="UTF-8"?><Recipient>


$ cat x.0010

<?xml version="1.0" encoding="UTF-8"?><Recipient>




