Skip to content

Commit

Permalink
Enhanced to handle PI that starts with <?xml-model ...?>
Browse files Browse the repository at this point in the history
This occurs in test infoset data for the iCalendar DFDL schema.
Xerces was tolerating this before, so our constructing loader
should also.

DAFFODIL-2527
  • Loading branch information
mbeckerle committed May 28, 2021
1 parent 04187ae commit 3d800f5
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,48 @@ class DaffodilConstructingLoader private[xml] (uri: URI,
override def procInstr(pos: Int, target: String, txt: String) =
ProcInstr(target, text(pos, txt).text)

private def parseXMLPrologAttributes(m: MetaData): (Option[String], Option[String], Option[Boolean]) = {

var info_ver: Option[String] = None
var info_enc: Option[String] = None
var info_stdl: Option[Boolean] = None

var n = 0
m("version") match {
case null =>
case Text("1.0") =>
info_ver = Some("1.0"); n += 1
case _ => reportSyntaxError("cannot deal with versions != 1.0")
}

m("encoding") match {
case null =>
case Text(enc) =>
if (!isValidIANAEncoding(enc))
reportSyntaxError("\"" + enc + "\" is not a valid encoding")
else {
info_enc = Some(enc)
n += 1
}
}

m("standalone") match {
case null =>
case Text("yes") =>
info_stdl = Some(true); n += 1
case Text("no") =>
info_stdl = Some(false); n += 1
case _ => reportSyntaxError("either 'yes' or 'no' expected")
}

if (m.length - n != 0) {
reportSyntaxError(
"only 'version', 'encoding', and 'standalone' attributes are expected in xml prolog. Found: " + m)
}

(info_ver, info_enc, info_stdl)
}

/**
* Override of document to make it tolerant of the start of the file
* being whitespace instead of a "<" character
Expand All @@ -320,12 +362,28 @@ class DaffodilConstructingLoader private[xml] (uri: URI,
if ('<' == ch) {
nextch()
if ('?' == ch) {
// It's an XML Prolog
nextch()
val info_prolog = prolog()
doc.version = info_prolog._1
doc.encoding = info_prolog._2
doc.standAlone = info_prolog._3
// It's probably an XML prolog, but
// there are cases where there is no XML Prolog, but a starting
// PI of <?xml-model href="...."?>
// So we have to recognize as a general PI, then look and see if
// it is a prolog.
val name = xName
xSpace()
val (md, scp) = xAttributes(TopScope)
if (scp != TopScope)
reportSyntaxError("no xmlns definitions allowed.")
xToken('?')
xToken('>')
if (name == "xml") {
val info_prolog = parseXMLPrologAttributes(md)
doc.version = info_prolog._1
doc.encoding = info_prolog._2
doc.standAlone = info_prolog._3
} else {
// not an xml prolog. It's some other PI
// do nothing. We're just skipping those PIs
}
children = content(TopScope)
} else {
val ts = new NodeBuffer()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,41 @@ class TestXMLLoader {
@Test
def test_schemaLoad(): Unit = {
val data =
"""<xs:schema targetNamespace="http://example.com"
"""<?xml version="1.0"?>
|<xs:schema
|targetNamespace="http://example.com"
|xmlns:ex="http://example.com"
|xmlns:xs="http://www.w3.org/2001/XMLSchema"
|xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/">
| <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
| <xs:annotation>
| <xs:appinfo source="http://www.ogf.org/dfdl/">
| <dfdl:format lengthKind="delimited" ref="ex:GeneralFormat"/>
| </xs:appinfo>
| </xs:annotation>
| <xs:element name="e1">
| <xs:complexType>
| <xs:sequence>
| <xs:element name="s1" type="xs:int"/>
| </xs:sequence>
| </xs:complexType>
| </xs:element>
|</xs:schema>
|""".stripMargin
val loader = new DaffodilXMLLoader()
val ss = StringSchemaSource(data)
val root =
loader.load(ss, None, false)
assertEquals("http://example.com", (root \ "@targetNamespace").text)
}

@Test
def test_startsWithPINotProlog(): Unit = {
val data =
"""<?xml-model href="../Schematron/iCalendar.sch" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
|<xs:schema
|targetNamespace="http://example.com"
|xmlns:ex="http://example.com"
|xmlns:xs="http://www.w3.org/2001/XMLSchema"
|xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
Expand Down

0 comments on commit 3d800f5

Please sign in to comment.