diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilConstructingLoader.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilConstructingLoader.scala index 5f8d8040d3..ac55be074b 100644 --- a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilConstructingLoader.scala +++ b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilConstructingLoader.scala @@ -304,6 +304,48 @@ class DaffodilConstructingLoader private[xml] (uri: URI, override def procInstr(pos: Int, target: String, txt: String) = ProcInstr(target, text(pos, txt).text) + private def parseXMLPrologAttributes(m: MetaData): (Option[String], Option[String], Option[Boolean]) = { + + var info_ver: Option[String] = None + var info_enc: Option[String] = None + var info_stdl: Option[Boolean] = None + + var n = 0 + m("version") match { + case null => + case Text("1.0") => + info_ver = Some("1.0"); n += 1 + case _ => reportSyntaxError("cannot deal with versions != 1.0") + } + + m("encoding") match { + case null => + case Text(enc) => + if (!isValidIANAEncoding(enc)) + reportSyntaxError("\"" + enc + "\" is not a valid encoding") + else { + info_enc = Some(enc) + n += 1 + } + } + + m("standalone") match { + case null => + case Text("yes") => + info_stdl = Some(true); n += 1 + case Text("no") => + info_stdl = Some(false); n += 1 + case _ => reportSyntaxError("either 'yes' or 'no' expected") + } + + if (m.length - n != 0) { + reportSyntaxError( + "only 'version', 'encoding', and 'standalone' attributes are expected in xml prolog. Found: " + m) + } + + (info_ver, info_enc, info_stdl) + } + /** * Override of document to make it tolerant of the start of the file * being whitespace instead of a "<" character @@ -320,12 +362,28 @@ class DaffodilConstructingLoader private[xml] (uri: URI, if ('<' == ch) { nextch() if ('?' == ch) { - // It's an XML Prolog nextch() - val info_prolog = prolog() - doc.version = info_prolog._1 - doc.encoding = info_prolog._2 - doc.standAlone = info_prolog._3 + // It's probably an XML prolog, but + // there are cases where there is no XML Prolog, but a starting + // PI of + // So we have to recognize as a general PI, then look and see if + // it is a prolog. + val name = xName + xSpace() + val (md, scp) = xAttributes(TopScope) + if (scp != TopScope) + reportSyntaxError("no xmlns definitions allowed.") + xToken('?') + xToken('>') + if (name == "xml") { + val info_prolog = parseXMLPrologAttributes(md) + doc.version = info_prolog._1 + doc.encoding = info_prolog._2 + doc.standAlone = info_prolog._3 + } else { + // not an xml prolog. It's some other PI + // do nothing. We're just skipping those PIs + } children = content(TopScope) } else { val ts = new NodeBuffer() diff --git a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLLoader.scala b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLLoader.scala index d1884493bd..9f123f5161 100644 --- a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLLoader.scala +++ b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLLoader.scala @@ -31,7 +31,41 @@ class TestXMLLoader { @Test def test_schemaLoad(): Unit = { val data = - """ + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + |""".stripMargin + val loader = new DaffodilXMLLoader() + val ss = StringSchemaSource(data) + val root = + loader.load(ss, None, false) + assertEquals("http://example.com", (root \ "@targetNamespace").text) + } + + @Test + def test_startsWithPINotProlog(): Unit = { + val data = + """ + |