diff --git a/.gitignore b/.gitignore
index 84516529a..64a0c8561 100644
--- a/.gitignore
+++ b/.gitignore
@@ -105,3 +105,6 @@ tests/scripts/*.log
tests/scripts/*.trs
tests/test-suite.log
Testing/
+
+# Vincent
+build/
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4254f89bb..6425cd1ec 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -177,8 +177,14 @@ install(
# Process subdirectories
add_subdirectory(doc)
add_subdirectory(src)
-add_subdirectory(tests)
-add_subdirectory(samples)
+if (NOT (DEFINED XERCES_BUILD_FUZZERS))
+ add_subdirectory(tests)
+ add_subdirectory(samples)
+endif()
+
+if ((DEFINED XERCES_BUILD_FUZZERS) OR (DEFINED XERCES_BUILD_FOR_OSS_FUZZ))
+ add_subdirectory(fuzzers)
+endif()
# Display configuration summary
message(STATUS "")
diff --git a/build_fuzzer.sh b/build_fuzzer.sh
new file mode 100755
index 000000000..adbfe8e1a
--- /dev/null
+++ b/build_fuzzer.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+set -e
+rm -rf build
+mkdir build
+cd build
+CC=clang CXX=clang++ CXXFLAGS="-std=c++14" cmake .. -DXERCES_BUILD_FUZZERS=1 -Wfatal-errors
+CC=clang CXX=clang++ CXXFLAGS="-std=c++14" make -j8
+
diff --git a/cmake/XercesIntTypes.cmake b/cmake/XercesIntTypes.cmake
index 6ad7dd313..85c94e1b9 100644
--- a/cmake/XercesIntTypes.cmake
+++ b/cmake/XercesIntTypes.cmake
@@ -58,14 +58,14 @@ set(HAVE_OFF_T ${SIZEOF_OFF_T})
set(HAVE_SIZE_T ${SIZEOF_SIZE_T})
set(HAVE_SSIZE_T ${SSIZEOF_SSIZE_T})
set(HAVE_WCHAR_T ${WCHAROF_WCHAR_T})
-if(SIZEOF_SIZE_T)
+if(HAVE_SIZEOF_SIZE_T)
set(XERCES_SIZE_T size_t)
set(XERCES_SIZE_MAX SIZE_MAX)
else()
set(XERCES_SIZE_T "unsigned long")
set(XERCES_SIZE_MAX ULONG_MAX)
endif()
-if(SIZEOF_SSIZE_T)
+if(HAVE_SIZEOF_SSIZE_T)
set(XERCES_SSIZE_T ssize_t)
set(XERCES_SSIZE_MAX SSIZE_MAX)
else()
diff --git a/doc/build.xml b/doc/build.xml
index 3f706b959..aedb4bde9 100644
--- a/doc/build.xml
+++ b/doc/build.xml
@@ -572,14 +572,16 @@
AIX PowerPC |
IBM XL C++ |
./configure CXX=xlC_r CC=xlc_r
- gmake libxerces_c_la_LDFLAGS=-qmkshrobj |
+ gmake libxerces_c_la_LDFLAGS=-qmkshrobj
+ (for xlC v11-v13, libxerces_c_la_LDFLAGS is not needed, but CXXFLAGS=-rtti is needed otherwise RTTI is disabled by default)
| AIX PowerPC-64 |
IBM XL C++ |
export OBJECT_MODE=64
./configure CXX=xlC_r CC=xlc_r CXXFLAGS=-q64 CFLAGS=-q64
- gmake libxerces_c_la_LDFLAGS=-qmkshrobj |
+ gmake libxerces_c_la_LDFLAGS=-qmkshrobj
+ (for xlC v11-v13, libxerces_c_la_LDFLAGS is not needed, but CXXFLAGS="-q64 -rtti" is needed otherwise RTTI is disabled by default)
| HP-UX IA-64-32 |
diff --git a/fuzzers/CMakeLists.txt b/fuzzers/CMakeLists.txt
new file mode 100644
index 000000000..d3dd27307
--- /dev/null
+++ b/fuzzers/CMakeLists.txt
@@ -0,0 +1,90 @@
+# CMake build for xerces-c
+#
+# Written by Roger Leigh
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Definitions required for building
+add_definitions(
+ -DHAVE_CONFIG_H=1
+)
+# Search the project binary dir for config.h
+include_directories(
+ ${PROJECT_BINARY_DIR}
+ ${PROJECT_SOURCE_DIR}/src
+ ${PROJECT_BINARY_DIR}/src
+ ${CMAKE_CURRENT_SOURCE_DIR}
+)
+
+macro(add_fuzzer_executable name)
+ add_executable(${name} ${ARGN})
+ target_link_libraries(${name} xerces-c)
+ if (XERCES_BUILD_FUZZERS)
+ target_compile_options(${name} PUBLIC -fsanitize=fuzzer-no-link)
+ target_link_libraries(${name} -fsanitize=fuzzer)
+ elseif(XERCES_BUILD_FOR_OSS_FUZZ)
+ target_link_libraries(${name} $ENV{LIB_FUZZING_ENGINE})
+ endif()
+ set_target_properties(${name} PROPERTIES FOLDER "Fuzzers")
+endmacro()
+
+add_fuzzer_executable(fuzz_parser_target
+ src/xerces_fuzz_common.cpp
+ src/parse_target.cpp
+
+)
+
+include(ExternalProject)
+
+set(EXTERNAL_INSTALL_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/external)
+
+
+ExternalProject_Add(libprotobuf-mutator
+ GIT_SHALLOW 1
+ GIT_REPOSITORY https://github.com/google/libprotobuf-mutator.git
+ CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release -DLIB_PROTO_MUTATOR_DOWNLOAD_PROTOBUF=ON -DLIB_PROTO_MUTATOR_TESTING=false -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_LOCATION}
+)
+
+set(PROTOC_PATH ${CMAKE_CURRENT_BINARY_DIR}/libprotobuf-mutator-prefix/src/libprotobuf-mutator-build/external.protobuf/bin/protoc)
+
+add_custom_command(
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/genfiles ${CMAKE_CURRENT_BINARY_DIR}/genfiles/xml.pb.cc
+ DEPENDS libprotobuf-mutator
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src
+ COMMAND mkdir ${CMAKE_CURRENT_BINARY_DIR}/genfiles && ${PROTOC_PATH} xml.proto --cpp_out=${CMAKE_CURRENT_BINARY_DIR}/genfiles
+)
+add_custom_target(xml_proto_files ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/genfiles)
+
+
+
+add_fuzzer_executable(fuzz_parser_target_proto
+ src/xerces_fuzz_common.cpp
+ src/parse_target_proto.cpp
+ src/xmlProtoConverter.cpp
+ ${CMAKE_CURRENT_BINARY_DIR}/genfiles/xml.pb.cc
+)
+add_dependencies(fuzz_parser_target_proto xml_proto_files)
+add_dependencies(fuzz_parser_target_proto libprotobuf-mutator)
+target_include_directories(fuzz_parser_target_proto PUBLIC ${EXTERNAL_INSTALL_LOCATION}/include
+ fuzz_parser_target_proto PUBLIC ${EXTERNAL_INSTALL_LOCATION}/include/libprotobuf-mutator/
+ ${EXTERNAL_INSTALL_LOCATION}/../libprotobuf-mutator-prefix/src/libprotobuf-mutator-build/external.protobuf/include
+ fuzz_parser_target_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/genfiles
+)
+target_link_libraries(fuzz_parser_target_proto
+ ${EXTERNAL_INSTALL_LOCATION}/lib/libprotobuf-mutator-libfuzzer.a
+ ${EXTERNAL_INSTALL_LOCATION}/lib/libprotobuf-mutator.a
+ ${CMAKE_CURRENT_BINARY_DIR}/libprotobuf-mutator-prefix/src/libprotobuf-mutator-build/external.protobuf/src/external.protobuf-build/libprotobuf.a)
+
diff --git a/fuzzers/src/parse_target.cpp b/fuzzers/src/parse_target.cpp
new file mode 100755
index 000000000..5e976765a
--- /dev/null
+++ b/fuzzers/src/parse_target.cpp
@@ -0,0 +1,28 @@
+/*
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+*/
+#include "xerces_fuzz_common.h"
+
+#include "xercesc/framework/MemBufInputSource.hpp"
+#include "xercesc/parsers/SAXParser.hpp"
+#include "xercesc/util/OutOfMemoryException.hpp"
+
+using namespace xercesc_3_2;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ parseInMemory(Data, Size);
+ return 0;
+}
diff --git a/fuzzers/src/parse_target_proto.cpp b/fuzzers/src/parse_target_proto.cpp
new file mode 100644
index 000000000..cc2c83f08
--- /dev/null
+++ b/fuzzers/src/parse_target_proto.cpp
@@ -0,0 +1,45 @@
+/*
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+*/
+#include "xerces_fuzz_common.h"
+#include "xmlProtoConverter.h"
+
+#include "xercesc/framework/MemBufInputSource.hpp"
+#include "xercesc/parsers/SAXParser.hpp"
+#include "xercesc/util/OutOfMemoryException.hpp"
+
+#include "xml.pb.h"
+
+#include "libprotobuf-mutator/src/libfuzzer/libfuzzer_macro.h"
+
+#include
+
+namespace {
+ protobuf_mutator::protobuf::LogSilencer log_silincer;
+ void ignore(void* ctx, const char* msg, ...) {}
+
+ template
+ std::unique_ptr MakeUnique(T* obj, D del) {
+ return {obj, del};
+ }
+}
+
+using namespace xercesc_3_2;
+
+DEFINE_PROTO_FUZZER(const xmlProtoFuzzer::XmlDocument& xmlDocument) {
+ std::string xmlData = xmlProtoFuzzer::ProtoConverter().protoToString(xmlDocument);
+ parseInMemory((const uint8_t *)xmlData.c_str(), xmlData.size());
+}
diff --git a/fuzzers/src/xerces_fuzz_common.cpp b/fuzzers/src/xerces_fuzz_common.cpp
new file mode 100755
index 000000000..a76b383a8
--- /dev/null
+++ b/fuzzers/src/xerces_fuzz_common.cpp
@@ -0,0 +1,47 @@
+/*
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+*/
+#include "xerces_fuzz_common.h"
+
+using namespace xercesc_3_2;
+static bool initialized = false;
+
+void parseInMemory(const uint8_t *Data, size_t Size)
+{
+ if (!initialized)
+ {
+ XMLPlatformUtils::Initialize();
+ initialized = true;
+ }
+ SAXParser::ValSchemes valScheme = SAXParser::Val_Auto;
+ bool doNamespaces = false;
+ bool doSchema = false;
+ bool schemaFullChecking = false;
+ SAXParser *parser = new SAXParser;
+ parser->setValidationScheme(valScheme);
+ parser->setDoNamespaces(doNamespaces);
+ parser->setDoSchema(doSchema);
+ parser->setHandleMultipleImports(true);
+ parser->setValidationSchemaFullChecking(schemaFullChecking);
+ static const char *gMemBufId = "prodInfo";
+
+ MemBufInputSource *memBufIS = new MemBufInputSource(
+ (const XMLByte *)Data, Size, gMemBufId, false);
+ parser->parse(*memBufIS);
+ delete parser;
+ delete memBufIS;
+ //XMLPlatformUtils::Terminate();
+}
diff --git a/fuzzers/src/xerces_fuzz_common.h b/fuzzers/src/xerces_fuzz_common.h
new file mode 100644
index 000000000..9eaf88bd7
--- /dev/null
+++ b/fuzzers/src/xerces_fuzz_common.h
@@ -0,0 +1,23 @@
+/*
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+*/
+#pragma once
+
+#include "xercesc/parsers/SAXParser.hpp"
+#include "xercesc/framework/MemBufInputSource.hpp"
+#include "xercesc/util/OutOfMemoryException.hpp"
+
+void parseInMemory(const uint8_t *Data, size_t Size);
\ No newline at end of file
diff --git a/fuzzers/src/xml.proto b/fuzzers/src/xml.proto
new file mode 100755
index 000000000..75e54b6f0
--- /dev/null
+++ b/fuzzers/src/xml.proto
@@ -0,0 +1,339 @@
+/*
+ * Copyright (C) 2019 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+
+message Misc {
+ oneof misc_oneof {
+ string comment = 1;
+ ProcessingInstruction inst = 2;
+ }
+}
+
+message PEReference {
+ string name = 1;
+}
+
+message ElementDecl {
+ enum ContentSpec {
+ EMPTY = 0;
+ ANY = 1;
+ FUZZ = 2;
+ MIXED = 3;
+ CHILDREN = 4;
+ }
+ string name = 1;
+ ContentSpec spec = 2;
+ repeated string cdata = 3;
+}
+
+message AttrType {
+ enum Type {
+ CDATA = 0;
+ ID = 1;
+ IDREF = 2;
+ IDREFS = 3;
+ ENTITY = 4;
+ ENTITIES = 5;
+ NMTOKEN = 6;
+ NMTOKENS = 7;
+ }
+ Type ty = 1;
+}
+
+message EnumeratedType {
+ repeated string names = 1;
+}
+
+message AttrListDecl {
+ string name = 1;
+ AttrType atype = 2;
+ EnumeratedType etype = 3;
+ DefaultDecl def = 4;
+}
+
+message ExternalId {
+ enum Type {
+ SYSTEM = 0;
+ PUBLIC = 1;
+ FUZZ = 2;
+ }
+ Type type = 1;
+ string system = 2;
+ string pub = 3;
+}
+
+message AttValue {
+ enum Type {
+ ENTITY = 0;
+ CHAR = 1;
+ FUZZ = 2;
+ }
+ Type type = 1;
+ repeated string value = 2;
+}
+
+message DefaultDecl {
+ enum Type {
+ REQUIRED = 0;
+ IMPLIED = 1;
+ FIXED = 2;
+ FUZZ = 3;
+ }
+ Type type = 1;
+ AttValue att = 2;
+}
+
+message AttDef {
+ // TODO: Add enumerated type
+ enum Type {
+ CDATA = 0;
+ ID = 1;
+ IDREF = 2;
+ IDREFS = 3;
+ ENTITY = 4;
+ ENTITIES = 5;
+ NMTOKEN = 6;
+ NMTOKENS = 7;
+ FUZZ = 8;
+ }
+ string name = 1;
+ Type type = 2;
+ DefaultDecl def = 3;
+}
+
+message AttListDecl {
+ string name = 1;
+ repeated AttDef attdefs = 2;
+}
+
+message NotationDecl {
+ string name = 1;
+ oneof notation_oneof {
+ ExternalId ext = 2;
+ string pub = 3;
+ string fuzz = 4;
+ }
+}
+
+message EntityValue {
+ enum Type {
+ ENTITY = 0;
+ CHAR = 1;
+ PEREF = 2;
+ FUZZ = 3;
+ }
+ Type type = 1;
+ repeated string name = 2;
+}
+
+message NDataDecl {
+ string name = 1;
+}
+
+message EntityDef {
+ oneof entity_oneof {
+ ExternalId ext = 1;
+ EntityValue val = 2;
+ }
+ NDataDecl ndata = 3;
+}
+
+message PEDef {
+ oneof pedef_oneof {
+ EntityValue val = 1;
+ ExternalId id = 2;
+ }
+}
+
+message EntityDecl {
+ enum Type {
+ GEDECL = 0;
+ PEDECL = 1;
+ }
+ Type type = 1;
+ string name = 2;
+ EntityDef ent = 3;
+ PEDef pedef = 4;
+}
+
+message ConditionalSect {
+ enum Type {
+ INCLUDE = 0;
+ IGNORE = 1;
+ FUZZ = 2;
+ }
+ Type type = 1;
+ ExtSubsetDecl ext = 2;
+ // TODO: Make this recursive
+ // See https://www.w3.org/TR/xml/#NT-conditionalSect
+ repeated string ignores = 3;
+}
+
+message OneExtSubsetDecl {
+ oneof extsubset_oneof {
+ MarkupDecl m = 1;
+ ConditionalSect c = 2;
+ }
+}
+
+message ExtSubsetDecl {
+ repeated OneExtSubsetDecl decls = 1;
+}
+
+message MarkupDecl {
+ oneof markup_oneof {
+ ElementDecl e = 1;
+ AttListDecl a = 2;
+ NotationDecl n = 3;
+ Misc m = 4;
+ EntityDecl entity = 5;
+ ExtSubsetDecl ext = 6;
+ }
+}
+
+message DocTypeDecl {
+ string name = 1;
+ ExternalId ext = 2;
+ repeated MarkupDecl mdecl = 3;
+}
+
+message Prolog {
+ XmlDeclaration decl = 1;
+ DocTypeDecl doctype = 2;
+ repeated Misc misc = 3;
+}
+
+message KeyValue {
+ enum XmlNamespace {
+ ATTRIBUTES = 0;
+ BASE = 1;
+ CATALOG = 2;
+ ID = 3;
+ LANG = 4;
+ LINK = 5;
+ SPACE = 6;
+ SPECIAL = 7;
+ TEST = 8;
+ FUZZ = 9;
+ }
+ XmlNamespace type = 1;
+ string key = 2;
+ string value = 3;
+}
+
+message ProcessingInstruction {
+ string name = 1;
+ repeated KeyValue kv = 2;
+}
+
+message CData {
+ string data = 1;
+}
+
+message Content {
+ // TODO: Add other content types
+ oneof content_oneof {
+ string str = 1;
+ Element e = 2;
+ CData c = 3;
+ }
+}
+
+message Element {
+ enum Type {
+ PREDEFINED = 0;
+ FUZZ = 1;
+ }
+ enum Id {
+ XIINCLUDE = 0;
+ XIFALLBACK = 1;
+ // Attributes of xinclude
+ XIHREF = 2;
+ XIPARSE = 3;
+ XIXPOINTER = 4;
+ XIENCODING = 5;
+ XIACCEPT = 6;
+ XIACCEPTLANG = 7;
+ }
+ Type type = 1;
+ Id id = 2;
+ string name = 3;
+ repeated KeyValue kv = 4;
+ Content content = 5;
+ string childprop = 6;
+}
+
+message VersionNum {
+ enum Type {
+ STANDARD = 0;
+ FUZZ = 1;
+ }
+ Type type = 1;
+ uint64 major = 2;
+ uint64 minor = 3;
+}
+
+message Encodings {
+ enum Enc {
+ BIG5 = 0;
+ EUCJP = 1;
+ EUCKR = 2;
+ GB18030 = 3;
+ ISO2022JP = 4;
+ ISO2022KR = 5;
+ ISO88591 = 6;
+ ISO88592 = 7;
+ ISO88593 = 8;
+ ISO88594 = 9;
+ ISO88595 = 10;
+ ISO88596 = 11;
+ ISO88597 = 12;
+ ISO88598 = 13;
+ ISO88599 = 14;
+ SHIFTJIS = 15;
+ TIS620 = 16;
+ USASCII = 17;
+ UTF8 = 18;
+ UTF16 = 19;
+ UTF16BE = 20;
+ UTF16LE = 21;
+ WINDOWS31J = 22;
+ WINDOWS1255 = 23;
+ WINDOWS1256 = 24;
+ FUZZ = 25;
+ }
+ Enc name = 1;
+ string fuzz = 2;
+}
+
+message XmlDeclaration {
+ VersionNum ver = 1;
+ Encodings enc = 2;
+ enum Standalone {
+ YES = 0;
+ NO = 1;
+ }
+ Standalone standalone = 3;
+ string fuzz = 4;
+}
+
+message XmlDocument {
+ Prolog p = 1;
+ repeated Element e = 2;
+}
+
+package xmlProtoFuzzer;
\ No newline at end of file
diff --git a/fuzzers/src/xmlProtoConverter.cpp b/fuzzers/src/xmlProtoConverter.cpp
new file mode 100644
index 000000000..f8a47dee2
--- /dev/null
+++ b/fuzzers/src/xmlProtoConverter.cpp
@@ -0,0 +1,758 @@
+/*
+ * Copyright (C) 2019 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "xmlProtoConverter.h"
+
+#include
+
+using namespace std;
+using namespace xmlProtoFuzzer;
+
+string ProtoConverter::removeNonAscii(string const& _utf8)
+{
+ string asciiStr{_utf8};
+ asciiStr.erase(remove_if(asciiStr.begin(), asciiStr.end(), [=](char c) -> bool {
+ return !(std::isalpha(c) || std::isdigit(c));
+ }), asciiStr.end());
+ return asciiStr.empty() ? "fuzz" : asciiStr;
+}
+
+
+void ProtoConverter::visit(Misc const& _x)
+{
+ switch (_x.misc_oneof_case())
+ {
+ case Misc::kComment:
+ m_output << "\n";
+ break;
+ case Misc::kInst:
+ visit(_x.inst());
+ break;
+ case Misc::MISC_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+void ProtoConverter::visit(Prolog const& _x)
+{
+ visit(_x.decl());
+ visit(_x.doctype());
+ for (auto const& misc: _x.misc())
+ visit(misc);
+}
+
+void ProtoConverter::visit(KeyValue const& _x)
+{
+ if (!KeyValue::XmlNamespace_IsValid(_x.type()))
+ return;
+
+ switch (_x.type())
+ {
+ case KeyValue::ATTRIBUTES:
+ m_output << "xml:attributes=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::BASE:
+ m_output << "xml:base=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::CATALOG:
+ m_output << "xml:catalog=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::ID:
+ m_output << "xml:id=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::LANG:
+ m_output << "xml:lang=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::LINK:
+ m_output << "xml:link=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::SPACE:
+ m_output << "xml:space=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::SPECIAL:
+ m_output << "xml:special=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::TEST:
+ m_output << "xml:test=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::FUZZ:
+ if (_x.ByteSizeLong() % 2)
+ m_output << "xmlns:" << removeNonAscii(_x.key()) << "=\"" << removeNonAscii(_x.value()) << "\" ";
+ else
+ m_output << removeNonAscii(_x.key()) << "=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue_XmlNamespace_KeyValue_XmlNamespace_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case KeyValue_XmlNamespace_KeyValue_XmlNamespace_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+void ProtoConverter::visit(ProcessingInstruction const& _x)
+{
+ m_output << "" << removeNonAscii(_x.name()) << " ";
+ for (auto const& prop: _x.kv())
+ visit(prop);
+ m_output << "?>\n";
+}
+
+void ProtoConverter::visit(Content const& _x)
+{
+ switch (_x.content_oneof_case())
+ {
+ case Content::kStr:
+ m_output << _x.str() << "\n";
+ break;
+ case Content::kE:
+ visit(_x.e());
+ m_output << "\n";
+ break;
+ case Content::kC:
+ visit(_x.c());
+ m_output << "\n";
+ break;
+ case Content::CONTENT_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+void ProtoConverter::visit(ElementDecl const& _x)
+{
+ if (!ElementDecl::ContentSpec_IsValid(_x.spec()))
+ return;
+
+ m_output << "";
+ break;
+ case ElementDecl::ANY:
+ m_output << "ANY>";
+ break;
+ case ElementDecl::FUZZ:
+ m_output << "FUZZ>";
+ break;
+ case ElementDecl::MIXED:
+ m_output << "(#PCDATA";
+ for (auto const& pcdata: _x.cdata())
+ m_output << "|" << pcdata;
+ m_output << ")";
+ if (_x.cdata_size() > 0)
+ m_output << "*";
+ m_output << ">";
+ break;
+ case ElementDecl::CHILDREN:
+ {
+ m_output << "(";
+ string delim = "";
+ for (auto const& str: _x.cdata()) {
+ m_output << delim << removeNonAscii(str);
+ delim = ", ";
+ }
+ m_output << ")>";
+ break;
+ }
+ case ElementDecl_ContentSpec_ElementDecl_ContentSpec_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case ElementDecl_ContentSpec_ElementDecl_ContentSpec_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+void ProtoConverter::visit(AttValue const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ m_output << "\"";
+ string prefix;
+ switch (_x.type())
+ {
+ case AttValue::ENTITY:
+ prefix = "&";
+ break;
+ case AttValue::CHAR:
+ if (_x.ByteSizeLong() % 2)
+ prefix = "";
+ else
+ // TODO: Value that follows this must be a
+ // sequence of hex digits.
+ prefix = "";
+ break;
+ case AttValue::FUZZ:
+ prefix = "fuzz";
+ break;
+ case AttValue_Type_AttValue_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case AttValue_Type_AttValue_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+ for (auto const& name: _x.value())
+ m_output << prefix << removeNonAscii(name) << ";";
+ m_output << "\"";
+}
+
+void ProtoConverter::visit(DefaultDecl const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ switch (_x.type())
+ {
+ case DefaultDecl::REQUIRED:
+ m_output << "#REQUIRED";
+ break;
+ case DefaultDecl::IMPLIED:
+ m_output << "#IMPLIED";
+ break;
+ case DefaultDecl::FIXED:
+ m_output << "#FIXED ";
+ visit(_x.att());
+ break;
+ case DefaultDecl::FUZZ:
+ m_output << "#FUZZ";
+ break;
+ case DefaultDecl_Type_DefaultDecl_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case DefaultDecl_Type_DefaultDecl_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+void ProtoConverter::visit(AttDef const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ m_output << " " << removeNonAscii(_x.name()) << " ";
+ switch (_x.type())
+ {
+ case AttDef::CDATA:
+ m_output << "CDATA ";
+ break;
+ case AttDef::ID:
+ m_output << "ID ";
+ break;
+ case AttDef::IDREF:
+ m_output << "IDREF ";
+ break;
+ case AttDef::IDREFS:
+ m_output << "IDREFS ";
+ break;
+ case AttDef::ENTITY:
+ m_output << "ENTITY ";
+ break;
+ case AttDef::ENTITIES:
+ m_output << "ENTITIES ";
+ break;
+ case AttDef::NMTOKEN:
+ m_output << "NMTOKEN ";
+ break;
+ case AttDef::NMTOKENS:
+ m_output << "NMTOKENS ";
+ break;
+ case AttDef::FUZZ:
+ m_output << "FUZZ ";
+ break;
+ case AttDef_Type_AttDef_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case AttDef_Type_AttDef_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+ visit(_x.def());
+}
+
+void ProtoConverter::visit(AttListDecl const& _x)
+{
+ m_output << "";
+}
+
+void ProtoConverter::visit(NotationDecl const& _x)
+{
+ m_output << "";
+}
+
+void ProtoConverter::visit(NDataDecl const& _x)
+{
+ m_output << " NDATA " << _x.name();
+}
+
+void ProtoConverter::visit(EntityDef const& _x)
+{
+ switch (_x.entity_oneof_case())
+ {
+ case EntityDef::kExt:
+ visit(_x.ext());
+ if (_x.ByteSizeLong() % 2)
+ visit(_x.ndata());
+ break;
+ case EntityDef::kVal:
+ visit(_x.val());
+ break;
+ case EntityDef::ENTITY_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+void ProtoConverter::visit(PEDef const& _x)
+{
+ switch (_x.pedef_oneof_case())
+ {
+ case PEDef::kVal:
+ visit(_x.val());
+ break;
+ case PEDef::kId:
+ visit(_x.id());
+ break;
+ case PEDef::PEDEF_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+void ProtoConverter::visit(EntityValue const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ m_output << "\"";
+ string prefix;
+ switch (_x.type())
+ {
+ case EntityValue::ENTITY:
+ prefix = "&";
+ break;
+ case EntityValue::CHAR:
+ if (_x.ByteSizeLong() % 2)
+ prefix = "";
+ else
+ prefix = "";
+ break;
+ case EntityValue::PEREF:
+ prefix = "%";
+ break;
+ case EntityValue::FUZZ:
+ prefix = "fuzz";
+ break;
+ case EntityValue_Type_EntityValue_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case EntityValue_Type_EntityValue_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+ for (auto const& ref: _x.name())
+ m_output << prefix << ref << ";";
+ m_output << "\"";
+}
+
+void ProtoConverter::visit(EntityDecl const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ m_output << "";
+}
+
+void ProtoConverter::visit(ConditionalSect const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ switch (_x.type())
+ {
+ case ConditionalSect::INCLUDE:
+ m_output << "";
+ break;
+ case ConditionalSect::IGNORE:
+ m_output << "";
+ m_output << "]]>";
+ break;
+ case ConditionalSect::FUZZ:
+ m_output << "";
+ break;
+ case ConditionalSect_Type_ConditionalSect_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case ConditionalSect_Type_ConditionalSect_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+
+void ProtoConverter::visit(OneExtSubsetDecl const& _x)
+{
+ switch (_x.extsubset_oneof_case())
+ {
+ case OneExtSubsetDecl::kM:
+ visit(_x.m());
+ break;
+ case OneExtSubsetDecl::kC:
+ visit(_x.c());
+ break;
+ case OneExtSubsetDecl::EXTSUBSET_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+
+void ProtoConverter::visit(ExtSubsetDecl const& _x)
+{
+ for (auto const& decl: _x.decls())
+ visit(decl);
+}
+
+void ProtoConverter::visit(CData const& _x)
+{
+ m_output << "";
+}
+
+void ProtoConverter::visit(MarkupDecl const& _x)
+{
+ switch (_x.markup_oneof_case())
+ {
+ case MarkupDecl::kE:
+ visit(_x.e());
+ break;
+ case MarkupDecl::kA:
+ visit(_x.a());
+ break;
+ case MarkupDecl::kN:
+ visit(_x.n());
+ break;
+ case MarkupDecl::kM:
+ visit(_x.m());
+ break;
+ case MarkupDecl::kEntity:
+ visit(_x.entity());
+ break;
+ case MarkupDecl::kExt:
+ visit(_x.ext());
+ break;
+ case MarkupDecl::MARKUP_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+/// Returns predefined element from an Element_Id enum
+/// @param _x is an enum that holds the desired type of predefined value
+/// @param _prop is a string that holds the value of the desired type
+/// @return string holding the predefined value of the form
+/// name attribute=\"value\"
+string ProtoConverter::getPredefined(Element_Id _x, string const& _prop)
+{
+ string output{};
+ switch (_x)
+ {
+ case Element::XIINCLUDE:
+ case Element::XIFALLBACK:
+ case Element::XIHREF:
+ output = "xi:include href=\"fuzz.xml\"";
+ case Element::XIPARSE:
+ output = "xi:include parse=\"xml\"";
+ case Element::XIXPOINTER:
+ output = "xi:include xpointer=\"" + removeNonAscii(_prop) + "\"";
+ case Element::XIENCODING:
+ output = "xi:include encoding=\"" + removeNonAscii(_prop) + "\"";
+ case Element::XIACCEPT:
+ output = "xi:include accept=\"" + removeNonAscii(_prop) + "\"";
+ case Element::XIACCEPTLANG:
+ output = "xi:include accept-language=\"" + removeNonAscii(_prop) + "\"";
+ case Element_Id_Element_Id_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case Element_Id_Element_Id_INT_MAX_SENTINEL_DO_NOT_USE_:
+ output = "xi:fuzz xifuzz=\"fuzz\"";
+ }
+ return output;
+}
+
+/// Returns uri string for a given Element_Id type
+string ProtoConverter::getUri(Element_Id _x)
+{
+ if (!Element::Id_IsValid(_x))
+ return s_XInclude;
+
+ switch (_x)
+ {
+ case Element::XIINCLUDE:
+ case Element::XIFALLBACK:
+ case Element::XIHREF:
+ case Element::XIPARSE:
+ case Element::XIXPOINTER:
+ case Element::XIENCODING:
+ case Element::XIACCEPT:
+ case Element::XIACCEPTLANG:
+ case Element_Id_Element_Id_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case Element_Id_Element_Id_INT_MAX_SENTINEL_DO_NOT_USE_:
+ return s_XInclude;
+ }
+}
+
+void ProtoConverter::visit(Element const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ // Predefined child node
+ string child = {};
+ // Predefined uri for child node
+ string pUri = {};
+ // Element name
+ string name = removeNonAscii(_x.name());
+
+ switch (_x.type())
+ {
+ case Element::PREDEFINED:
+ child = getPredefined(_x.id(), _x.childprop());
+ pUri = getUri(_x.id());
+ break;
+ case Element::FUZZ:
+ case Element_Type_Element_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case Element_Type_Element_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+
+ //
+ //
+ //
+
+ // Start name tag: Must be Ascii?
+ m_output << "<" << name << " ";
+
+ // Add uri to name tag
+ if (!pUri.empty())
+ m_output << pUri << " ";
+ for (auto const& prop: _x.kv())
+ visit(prop);
+ m_output << ">\n";
+
+ // Add attribute
+ if (!child.empty())
+ m_output << "<" << child << "/>\n";
+
+ // Add content
+ visit(_x.content());
+
+ // Close name tag
+ m_output << "" << name << ">\n";
+}
+
+void ProtoConverter::visit(ExternalId const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ switch (_x.type())
+ {
+ case ExternalId::SYSTEM:
+ m_output << "SYSTEM " << "\"" << removeNonAscii(_x.system()) << "\"";
+ break;
+ case ExternalId::PUBLIC:
+ m_output << "PUBLIC " << "\"" << removeNonAscii(_x.pub()) << "\""
+ << " " << "\"" << removeNonAscii(_x.system()) << "\"";
+ break;
+ case ExternalId::FUZZ:
+ m_output << "FUZZ " << "\"" << removeNonAscii(_x.pub()) << "\"";
+ break;
+ case ExternalId_Type_ExternalId_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case ExternalId_Type_ExternalId_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+void ProtoConverter::visit(DocTypeDecl const& _x)
+{
+ m_output << "\n";
+}
+
+void ProtoConverter::visit(VersionNum const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ switch (_x.type())
+ {
+ case VersionNum::STANDARD:
+ m_output << "\"1.0\"";
+ break;
+ case VersionNum::FUZZ:
+ case VersionNum_Type_VersionNum_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case VersionNum_Type_VersionNum_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ m_output << "\"" << _x.major() << "." << _x.minor() << "\"";
+ break;
+ }
+}
+
+void ProtoConverter::visit(Encodings const& _x)
+{
+ if (!Encodings::Enc_IsValid(_x.name()))
+ return;
+
+ m_output << " encoding=\"";
+ switch (_x.name())
+ {
+ case Encodings::BIG5:
+ m_output << "BIG5";
+ break;
+ case Encodings::EUCJP:
+ m_output << "EUC-JP";
+ break;
+ case Encodings::EUCKR:
+ m_output << "EUC-KR";
+ break;
+ case Encodings::GB18030:
+ m_output << "GB18030";
+ break;
+ case Encodings::ISO2022JP:
+ m_output << "ISO-2022-JP";
+ break;
+ case Encodings::ISO2022KR:
+ m_output << "ISO-2022-KR";
+ break;
+ case Encodings::ISO88591:
+ m_output << "ISO-8859-1";
+ break;
+ case Encodings::ISO88592:
+ m_output << "ISO-8859-2";
+ break;
+ case Encodings::ISO88593:
+ m_output << "ISO-8859-3";
+ break;
+ case Encodings::ISO88594:
+ m_output << "ISO-8859-4";
+ break;
+ case Encodings::ISO88595:
+ m_output << "ISO-8859-5";
+ break;
+ case Encodings::ISO88596:
+ m_output << "ISO-8859-6";
+ break;
+ case Encodings::ISO88597:
+ m_output << "ISO-8859-7";
+ break;
+ case Encodings::ISO88598:
+ m_output << "ISO-8859-8";
+ break;
+ case Encodings::ISO88599:
+ m_output << "ISO-8859-9";
+ break;
+ case Encodings::SHIFTJIS:
+ m_output << "SHIFT_JIS";
+ break;
+ case Encodings::TIS620:
+ m_output << "TIS-620";
+ break;
+ case Encodings::USASCII:
+ m_output << "US-ASCII";
+ break;
+ case Encodings::UTF8:
+ m_output << "UTF-8";
+ break;
+ case Encodings::UTF16:
+ m_output << "UTF-16";
+ break;
+ case Encodings::UTF16BE:
+ m_output << "UTF-16BE";
+ break;
+ case Encodings::UTF16LE:
+ m_output << "UTF-16LE";
+ break;
+ case Encodings::WINDOWS31J:
+ m_output << "WINDOWS-31J";
+ break;
+ case Encodings::WINDOWS1255:
+ m_output << "WINDOWS-1255";
+ break;
+ case Encodings::WINDOWS1256:
+ m_output << "WINDOWS-1256";
+ break;
+ case Encodings::FUZZ:
+ m_output << removeNonAscii(_x.fuzz());
+ break;
+ case Encodings_Enc_Encodings_Enc_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case Encodings_Enc_Encodings_Enc_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+ m_output << "\"";
+}
+
+void ProtoConverter::visit(XmlDeclaration const& _x)
+{
+ m_output << R"(\n";
+}
+
+void ProtoConverter::visit(XmlDocument const& _x)
+{
+ visit(_x.p());
+ for (auto const& element: _x.e())
+ visit(element);
+}
+
+string ProtoConverter::protoToString(XmlDocument const& _x)
+{
+ visit(_x);
+ return m_output.str();
+}
diff --git a/fuzzers/src/xmlProtoConverter.h b/fuzzers/src/xmlProtoConverter.h
new file mode 100644
index 000000000..501dde36c
--- /dev/null
+++ b/fuzzers/src/xmlProtoConverter.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2019 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include
+
+#include "xml.pb.h"
+
+namespace xmlProtoFuzzer {
+class ProtoConverter
+{
+public:
+ ProtoConverter() = default;
+
+ ProtoConverter(ProtoConverter const&) = delete;
+
+ ProtoConverter(ProtoConverter&&) = delete;
+
+ std::string protoToString(XmlDocument const&);
+
+private:
+ void visit(Prolog const&);
+
+ void visit(ProcessingInstruction const&);
+
+ void visit(ExternalId const&);
+
+ void visit(DocTypeDecl const&);
+
+ void visit(VersionNum const&);
+
+ void visit(Encodings const&);
+
+ void visit(Misc const&);
+
+ void visit(KeyValue const&);
+
+ void visit(Element const&);
+
+ void visit(ElementDecl const&);
+
+ void visit(AttValue const&);
+
+ void visit(DefaultDecl const&);
+
+ void visit(AttDef const&);
+
+ void visit(AttListDecl const&);
+
+ void visit(NotationDecl const&);
+
+ void visit(EntityDecl const&);
+
+ void visit(EntityValue const&);
+
+ void visit(EntityDef const&);
+
+ void visit(PEDef const&);
+
+ void visit(NDataDecl const&);
+
+ void visit(ConditionalSect const&);
+
+ void visit(OneExtSubsetDecl const&);
+
+ void visit(ExtSubsetDecl const&);
+
+ void visit(MarkupDecl const&);
+
+ void visit(CData const&);
+
+ void visit(Content const&);
+
+ void visit(XmlDeclaration const&);
+
+ void visit(XmlDocument const&);
+
+ template
+ bool isValid(T const& messageType) {
+ return T::Type_IsValid(messageType.type());
+ }
+
+ std::string removeNonAscii(std::string const&);
+ std::string getUri(Element_Id _x);
+ std::string getPredefined(Element_Id _x, std::string const&);
+
+ std::ostringstream m_output;
+
+ static constexpr auto s_XInclude = "xmlns:xi=\"http://www.w3.org/2001/XInclude\"";
+};
+}
+
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c29aa257b..cccfbda05 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1272,6 +1272,10 @@ add_library(xerces-c
${libxerces_c_SOURCES}
${libxerces_c_RESOURCES})
target_link_libraries(xerces-c ${libxerces_c_DEPS})
+if (XERCES_BUILD_FUZZERS)
+ target_compile_options(xerces-c PUBLIC -fsanitize=fuzzer-no-link)
+ #target_link_libraries(xerces-c -fsanitize=fuzzer-no-link)
+endif()
if(XERCES_USE_NETACCESSOR_CURL)
target_include_directories(xerces-c SYSTEM PRIVATE ${CURL_INCLUDE_DIRS})
endif()
@@ -1289,11 +1293,13 @@ elseif(UNIX)
# set the version in the filename, and create the symlink at install
# time. Note: could be dropped when the SONAME is updated and
# libtool compatibility is no longer required.
- set_target_properties(xerces-c PROPERTIES OUTPUT_NAME "xerces-c-${INTERFACE_VERSION_D}")
- file(GENERATE
- OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/InstallLibrarySymlink.cmake"
- CONTENT "execute_process(COMMAND ln -sf \"$\" \"\$ENV{DESTDIR}${CMAKE_INSTALL_FULL_LIBDIR}/libxerces-c.so\")")
- install(SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/InstallLibrarySymlink.cmake")
+ if(BUILD_SHARED_LIBS)
+ set_target_properties(xerces-c PROPERTIES OUTPUT_NAME "xerces-c-${INTERFACE_VERSION_D}")
+ file(GENERATE
+ OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/InstallLibrarySymlink.cmake"
+ CONTENT "execute_process(COMMAND ln -sf \"$\" \"\$ENV{DESTDIR}${CMAKE_INSTALL_FULL_LIBDIR}/libxerces-c${CMAKE_SHARED_LIBRARY_SUFFIX}\")")
+ install(SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/InstallLibrarySymlink.cmake")
+ endif()
else()
# Not used for the common cases, though this would be the default if
# not for libtool compatibility.
diff --git a/src/xercesc/util/Janitor.hpp b/src/xercesc/util/Janitor.hpp
index 24ff372a0..cf06e6762 100644
--- a/src/xercesc/util/Janitor.hpp
+++ b/src/xercesc/util/Janitor.hpp
@@ -154,10 +154,10 @@ private :
MFPT fToCall;
};
-
+#if defined(__GNUC__) || (! defined(_AIX) && ! defined(__hpux) && ! defined(__sun))
XERCES_TEMPLATE_EXTERN template class XMLUTIL_EXPORT ArrayJanitor;
XERCES_TEMPLATE_EXTERN template class XMLUTIL_EXPORT ArrayJanitor;
-
+#endif
XERCES_CPP_NAMESPACE_END
diff --git a/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.cpp b/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.cpp
index 8d9befd06..5ed659389 100644
--- a/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.cpp
+++ b/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.cpp
@@ -56,6 +56,7 @@ XERCES_CPP_NAMESPACE_BEGIN
CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/)
: fMulti(0)
, fEasy(0)
+ , fHeadersList(0)
, fMemoryManager(urlSource.getMemoryManager())
, fURLSource(urlSource)
, fTotalBytesRead(0)
@@ -69,23 +70,23 @@ CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTP
, fPayloadLen(0)
, fContentType(0)
{
- // Allocate the curl multi handle
- fMulti = curl_multi_init();
+ // Allocate the curl multi handle
+ fMulti = curl_multi_init();
- // Allocate the curl easy handle
- fEasy = curl_easy_init();
+ // Allocate the curl easy handle
+ fEasy = curl_easy_init();
- // Set URL option
+ // Set URL option
TranscodeToStr url(fURLSource.getURLText(), "ISO8859-1", fMemoryManager);
- curl_easy_setopt(fEasy, CURLOPT_URL, (char*)url.str());
+ curl_easy_setopt(fEasy, CURLOPT_URL, (char*)url.str());
// Set up a way to recieve the data
- curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this); // Pass this pointer to write function
- curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); // Our static write function
+ curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this); // Pass this pointer to write function
+ curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); // Our static write function
- // Do redirects
- curl_easy_setopt(fEasy, CURLOPT_FOLLOWLOCATION, (long)1);
- curl_easy_setopt(fEasy, CURLOPT_MAXREDIRS, (long)6);
+ // Do redirects
+ curl_easy_setopt(fEasy, CURLOPT_FOLLOWLOCATION, (long)1);
+ curl_easy_setopt(fEasy, CURLOPT_MAXREDIRS, (long)6);
// Add username and password if authentication is required
const XMLCh *username = urlSource.getUser();
@@ -117,8 +118,6 @@ CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTP
// Add custom headers
if(httpInfo->fHeaders) {
- struct curl_slist *headersList = 0;
-
const char *headersBuf = httpInfo->fHeaders;
const char *headersBufEnd = httpInfo->fHeaders + httpInfo->fHeadersLen;
@@ -133,7 +132,7 @@ CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTP
memcpy(header.get(), headerStart, length);
header.get()[length] = 0;
- headersList = curl_slist_append(headersList, header.get());
+ fHeadersList = curl_slist_append(fHeadersList, header.get());
headersBuf += 2;
headerStart = headersBuf;
@@ -141,8 +140,7 @@ CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTP
}
++headersBuf;
}
- curl_easy_setopt(fEasy, CURLOPT_HTTPHEADER, headersList);
- curl_slist_free_all(headersList);
+ curl_easy_setopt(fEasy, CURLOPT_HTTPHEADER, fHeadersList);
}
// Set up the payload
@@ -155,16 +153,16 @@ CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTP
}
}
- // Add easy handle to the multi stack
- curl_multi_add_handle(fMulti, fEasy);
+ // Add easy handle to the multi stack
+ curl_multi_add_handle(fMulti, fEasy);
// Start reading, to get the content type
- while(fBufferHeadPtr == fBuffer)
- {
- int runningHandles = 0;
+ while(fBufferHeadPtr == fBuffer)
+ {
+ int runningHandles = 0;
readMore(&runningHandles);
- if(runningHandles == 0) break;
- }
+ if(runningHandles == 0) break;
+ }
// Find the content type
char *contentType8 = 0;
@@ -176,16 +174,18 @@ CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTP
CurlURLInputStream::~CurlURLInputStream()
{
- // Remove the easy handle from the multi stack
- curl_multi_remove_handle(fMulti, fEasy);
+ // Remove the easy handle from the multi stack
+ curl_multi_remove_handle(fMulti, fEasy);
- // Cleanup the easy handle
- curl_easy_cleanup(fEasy);
+ // Cleanup the easy handle
+ curl_easy_cleanup(fEasy);
- // Cleanup the multi handle
- curl_multi_cleanup(fMulti);
+ // Cleanup the multi handle
+ curl_multi_cleanup(fMulti);
if(fContentType) fMemoryManager->deallocate(fContentType);
+
+ if(fHeadersList) curl_slist_free_all(fHeadersList);
}
diff --git a/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.hpp b/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.hpp
index ea528c1eb..f75857b92 100644
--- a/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.hpp
+++ b/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.hpp
@@ -96,24 +96,25 @@ private :
// that readBytes must return.
// -----------------------------------------------------------------------
- CURLM* fMulti;
- CURL* fEasy;
-
+ CURLM* fMulti;
+ CURL* fEasy;
+ curl_slist* fHeadersList;
+
MemoryManager* fMemoryManager;
- XMLURL fURLSource;
+ XMLURL fURLSource;
XMLSize_t fTotalBytesRead;
- XMLByte* fWritePtr;
+ XMLByte* fWritePtr;
XMLSize_t fBytesRead;
XMLSize_t fBytesToRead;
- bool fDataAvailable;
+ bool fDataAvailable;
// Overflow buffer for when curl writes more data to us
// than we've asked for.
- XMLByte fBuffer[CURL_MAX_WRITE_SIZE];
- XMLByte* fBufferHeadPtr;
- XMLByte* fBufferTailPtr;
+ XMLByte fBuffer[CURL_MAX_WRITE_SIZE];
+ XMLByte* fBufferHeadPtr;
+ XMLByte* fBufferTailPtr;
// Upload data
const char* fPayload;
diff --git a/src/xercesc/util/XMLChar.cpp b/src/xercesc/util/XMLChar.cpp
index 2d8b13c8e..46a61dab9 100644
--- a/src/xercesc/util/XMLChar.cpp
+++ b/src/xercesc/util/XMLChar.cpp
@@ -8837,7 +8837,7 @@ XMLByte XMLChar1_1::fgCharCharsTable1_1[0x10000] =
#include
-static XMLCh gTmpCharTable[0xFFFF];
+static XMLCh gTmpCharTable[0x10000];
static void initOneTable(const XMLCh* const theTable
, const XMLByte theMask)
diff --git a/tests/Fuzzers/parse_target.cpp b/tests/Fuzzers/parse_target.cpp
new file mode 100755
index 000000000..5e976765a
--- /dev/null
+++ b/tests/Fuzzers/parse_target.cpp
@@ -0,0 +1,28 @@
+/*
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+*/
+#include "xerces_fuzz_common.h"
+
+#include "xercesc/framework/MemBufInputSource.hpp"
+#include "xercesc/parsers/SAXParser.hpp"
+#include "xercesc/util/OutOfMemoryException.hpp"
+
+using namespace xercesc_3_2;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ parseInMemory(Data, Size);
+ return 0;
+}
diff --git a/tests/Fuzzers/parse_target_proto.cpp b/tests/Fuzzers/parse_target_proto.cpp
new file mode 100644
index 000000000..b1fd33cbe
--- /dev/null
+++ b/tests/Fuzzers/parse_target_proto.cpp
@@ -0,0 +1,45 @@
+/*
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+*/
+#include "xerces_fuzz_common.h"
+#include "xmlProtoConverter.h"
+
+#include "xercesc/framework/MemBufInputSource.hpp"
+#include "xercesc/parsers/SAXParser.hpp"
+#include "xercesc/util/OutOfMemoryException.hpp"
+
+#include "genfiles/xml.pb.h"
+
+#include "src/libfuzzer/libfuzzer_macro.h"
+
+#include
+
+namespace {
+ protobuf_mutator::protobuf::LogSilencer log_silincer;
+ void ignore(void* ctx, const char* msg, ...) {}
+
+ template
+ std::unique_ptr MakeUnique(T* obj, D del) {
+ return {obj, del};
+ }
+}
+
+using namespace xercesc_3_2;
+
+DEFINE_PROTO_FUZZER(const xmlProtoFuzzer::XmlDocument& xmlDocument) {
+ std::string xmlData = xmlProtoFuzzer::ProtoConverter().protoToString(xmlDocument);
+ parseInMemory((const uint8_t *)xmlData.c_str(), xmlData.size());
+}
diff --git a/tests/Fuzzers/xerces_fuzz_common.cpp b/tests/Fuzzers/xerces_fuzz_common.cpp
new file mode 100755
index 000000000..a76b383a8
--- /dev/null
+++ b/tests/Fuzzers/xerces_fuzz_common.cpp
@@ -0,0 +1,47 @@
+/*
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+*/
+#include "xerces_fuzz_common.h"
+
+using namespace xercesc_3_2;
+static bool initialized = false;
+
+void parseInMemory(const uint8_t *Data, size_t Size)
+{
+ if (!initialized)
+ {
+ XMLPlatformUtils::Initialize();
+ initialized = true;
+ }
+ SAXParser::ValSchemes valScheme = SAXParser::Val_Auto;
+ bool doNamespaces = false;
+ bool doSchema = false;
+ bool schemaFullChecking = false;
+ SAXParser *parser = new SAXParser;
+ parser->setValidationScheme(valScheme);
+ parser->setDoNamespaces(doNamespaces);
+ parser->setDoSchema(doSchema);
+ parser->setHandleMultipleImports(true);
+ parser->setValidationSchemaFullChecking(schemaFullChecking);
+ static const char *gMemBufId = "prodInfo";
+
+ MemBufInputSource *memBufIS = new MemBufInputSource(
+ (const XMLByte *)Data, Size, gMemBufId, false);
+ parser->parse(*memBufIS);
+ delete parser;
+ delete memBufIS;
+ //XMLPlatformUtils::Terminate();
+}
diff --git a/tests/Fuzzers/xerces_fuzz_common.h b/tests/Fuzzers/xerces_fuzz_common.h
new file mode 100644
index 000000000..9eaf88bd7
--- /dev/null
+++ b/tests/Fuzzers/xerces_fuzz_common.h
@@ -0,0 +1,23 @@
+/*
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+*/
+#pragma once
+
+#include "xercesc/parsers/SAXParser.hpp"
+#include "xercesc/framework/MemBufInputSource.hpp"
+#include "xercesc/util/OutOfMemoryException.hpp"
+
+void parseInMemory(const uint8_t *Data, size_t Size);
\ No newline at end of file
diff --git a/tests/Fuzzers/xml.proto b/tests/Fuzzers/xml.proto
new file mode 100755
index 000000000..75e54b6f0
--- /dev/null
+++ b/tests/Fuzzers/xml.proto
@@ -0,0 +1,339 @@
+/*
+ * Copyright (C) 2019 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+
+message Misc {
+ oneof misc_oneof {
+ string comment = 1;
+ ProcessingInstruction inst = 2;
+ }
+}
+
+message PEReference {
+ string name = 1;
+}
+
+message ElementDecl {
+ enum ContentSpec {
+ EMPTY = 0;
+ ANY = 1;
+ FUZZ = 2;
+ MIXED = 3;
+ CHILDREN = 4;
+ }
+ string name = 1;
+ ContentSpec spec = 2;
+ repeated string cdata = 3;
+}
+
+message AttrType {
+ enum Type {
+ CDATA = 0;
+ ID = 1;
+ IDREF = 2;
+ IDREFS = 3;
+ ENTITY = 4;
+ ENTITIES = 5;
+ NMTOKEN = 6;
+ NMTOKENS = 7;
+ }
+ Type ty = 1;
+}
+
+message EnumeratedType {
+ repeated string names = 1;
+}
+
+message AttrListDecl {
+ string name = 1;
+ AttrType atype = 2;
+ EnumeratedType etype = 3;
+ DefaultDecl def = 4;
+}
+
+message ExternalId {
+ enum Type {
+ SYSTEM = 0;
+ PUBLIC = 1;
+ FUZZ = 2;
+ }
+ Type type = 1;
+ string system = 2;
+ string pub = 3;
+}
+
+message AttValue {
+ enum Type {
+ ENTITY = 0;
+ CHAR = 1;
+ FUZZ = 2;
+ }
+ Type type = 1;
+ repeated string value = 2;
+}
+
+message DefaultDecl {
+ enum Type {
+ REQUIRED = 0;
+ IMPLIED = 1;
+ FIXED = 2;
+ FUZZ = 3;
+ }
+ Type type = 1;
+ AttValue att = 2;
+}
+
+message AttDef {
+ // TODO: Add enumerated type
+ enum Type {
+ CDATA = 0;
+ ID = 1;
+ IDREF = 2;
+ IDREFS = 3;
+ ENTITY = 4;
+ ENTITIES = 5;
+ NMTOKEN = 6;
+ NMTOKENS = 7;
+ FUZZ = 8;
+ }
+ string name = 1;
+ Type type = 2;
+ DefaultDecl def = 3;
+}
+
+message AttListDecl {
+ string name = 1;
+ repeated AttDef attdefs = 2;
+}
+
+message NotationDecl {
+ string name = 1;
+ oneof notation_oneof {
+ ExternalId ext = 2;
+ string pub = 3;
+ string fuzz = 4;
+ }
+}
+
+message EntityValue {
+ enum Type {
+ ENTITY = 0;
+ CHAR = 1;
+ PEREF = 2;
+ FUZZ = 3;
+ }
+ Type type = 1;
+ repeated string name = 2;
+}
+
+message NDataDecl {
+ string name = 1;
+}
+
+message EntityDef {
+ oneof entity_oneof {
+ ExternalId ext = 1;
+ EntityValue val = 2;
+ }
+ NDataDecl ndata = 3;
+}
+
+message PEDef {
+ oneof pedef_oneof {
+ EntityValue val = 1;
+ ExternalId id = 2;
+ }
+}
+
+message EntityDecl {
+ enum Type {
+ GEDECL = 0;
+ PEDECL = 1;
+ }
+ Type type = 1;
+ string name = 2;
+ EntityDef ent = 3;
+ PEDef pedef = 4;
+}
+
+message ConditionalSect {
+ enum Type {
+ INCLUDE = 0;
+ IGNORE = 1;
+ FUZZ = 2;
+ }
+ Type type = 1;
+ ExtSubsetDecl ext = 2;
+ // TODO: Make this recursive
+ // See https://www.w3.org/TR/xml/#NT-conditionalSect
+ repeated string ignores = 3;
+}
+
+message OneExtSubsetDecl {
+ oneof extsubset_oneof {
+ MarkupDecl m = 1;
+ ConditionalSect c = 2;
+ }
+}
+
+message ExtSubsetDecl {
+ repeated OneExtSubsetDecl decls = 1;
+}
+
+message MarkupDecl {
+ oneof markup_oneof {
+ ElementDecl e = 1;
+ AttListDecl a = 2;
+ NotationDecl n = 3;
+ Misc m = 4;
+ EntityDecl entity = 5;
+ ExtSubsetDecl ext = 6;
+ }
+}
+
+message DocTypeDecl {
+ string name = 1;
+ ExternalId ext = 2;
+ repeated MarkupDecl mdecl = 3;
+}
+
+message Prolog {
+ XmlDeclaration decl = 1;
+ DocTypeDecl doctype = 2;
+ repeated Misc misc = 3;
+}
+
+message KeyValue {
+ enum XmlNamespace {
+ ATTRIBUTES = 0;
+ BASE = 1;
+ CATALOG = 2;
+ ID = 3;
+ LANG = 4;
+ LINK = 5;
+ SPACE = 6;
+ SPECIAL = 7;
+ TEST = 8;
+ FUZZ = 9;
+ }
+ XmlNamespace type = 1;
+ string key = 2;
+ string value = 3;
+}
+
+message ProcessingInstruction {
+ string name = 1;
+ repeated KeyValue kv = 2;
+}
+
+message CData {
+ string data = 1;
+}
+
+message Content {
+ // TODO: Add other content types
+ oneof content_oneof {
+ string str = 1;
+ Element e = 2;
+ CData c = 3;
+ }
+}
+
+message Element {
+ enum Type {
+ PREDEFINED = 0;
+ FUZZ = 1;
+ }
+ enum Id {
+ XIINCLUDE = 0;
+ XIFALLBACK = 1;
+ // Attributes of xinclude
+ XIHREF = 2;
+ XIPARSE = 3;
+ XIXPOINTER = 4;
+ XIENCODING = 5;
+ XIACCEPT = 6;
+ XIACCEPTLANG = 7;
+ }
+ Type type = 1;
+ Id id = 2;
+ string name = 3;
+ repeated KeyValue kv = 4;
+ Content content = 5;
+ string childprop = 6;
+}
+
+message VersionNum {
+ enum Type {
+ STANDARD = 0;
+ FUZZ = 1;
+ }
+ Type type = 1;
+ uint64 major = 2;
+ uint64 minor = 3;
+}
+
+message Encodings {
+ enum Enc {
+ BIG5 = 0;
+ EUCJP = 1;
+ EUCKR = 2;
+ GB18030 = 3;
+ ISO2022JP = 4;
+ ISO2022KR = 5;
+ ISO88591 = 6;
+ ISO88592 = 7;
+ ISO88593 = 8;
+ ISO88594 = 9;
+ ISO88595 = 10;
+ ISO88596 = 11;
+ ISO88597 = 12;
+ ISO88598 = 13;
+ ISO88599 = 14;
+ SHIFTJIS = 15;
+ TIS620 = 16;
+ USASCII = 17;
+ UTF8 = 18;
+ UTF16 = 19;
+ UTF16BE = 20;
+ UTF16LE = 21;
+ WINDOWS31J = 22;
+ WINDOWS1255 = 23;
+ WINDOWS1256 = 24;
+ FUZZ = 25;
+ }
+ Enc name = 1;
+ string fuzz = 2;
+}
+
+message XmlDeclaration {
+ VersionNum ver = 1;
+ Encodings enc = 2;
+ enum Standalone {
+ YES = 0;
+ NO = 1;
+ }
+ Standalone standalone = 3;
+ string fuzz = 4;
+}
+
+message XmlDocument {
+ Prolog p = 1;
+ repeated Element e = 2;
+}
+
+package xmlProtoFuzzer;
\ No newline at end of file
diff --git a/tests/Fuzzers/xmlProtoConverter.cpp b/tests/Fuzzers/xmlProtoConverter.cpp
new file mode 100644
index 000000000..f8a47dee2
--- /dev/null
+++ b/tests/Fuzzers/xmlProtoConverter.cpp
@@ -0,0 +1,758 @@
+/*
+ * Copyright (C) 2019 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "xmlProtoConverter.h"
+
+#include
+
+using namespace std;
+using namespace xmlProtoFuzzer;
+
+string ProtoConverter::removeNonAscii(string const& _utf8)
+{
+ string asciiStr{_utf8};
+ asciiStr.erase(remove_if(asciiStr.begin(), asciiStr.end(), [=](char c) -> bool {
+ return !(std::isalpha(c) || std::isdigit(c));
+ }), asciiStr.end());
+ return asciiStr.empty() ? "fuzz" : asciiStr;
+}
+
+
+void ProtoConverter::visit(Misc const& _x)
+{
+ switch (_x.misc_oneof_case())
+ {
+ case Misc::kComment:
+ m_output << "\n";
+ break;
+ case Misc::kInst:
+ visit(_x.inst());
+ break;
+ case Misc::MISC_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+void ProtoConverter::visit(Prolog const& _x)
+{
+ visit(_x.decl());
+ visit(_x.doctype());
+ for (auto const& misc: _x.misc())
+ visit(misc);
+}
+
+void ProtoConverter::visit(KeyValue const& _x)
+{
+ if (!KeyValue::XmlNamespace_IsValid(_x.type()))
+ return;
+
+ switch (_x.type())
+ {
+ case KeyValue::ATTRIBUTES:
+ m_output << "xml:attributes=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::BASE:
+ m_output << "xml:base=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::CATALOG:
+ m_output << "xml:catalog=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::ID:
+ m_output << "xml:id=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::LANG:
+ m_output << "xml:lang=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::LINK:
+ m_output << "xml:link=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::SPACE:
+ m_output << "xml:space=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::SPECIAL:
+ m_output << "xml:special=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::TEST:
+ m_output << "xml:test=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::FUZZ:
+ if (_x.ByteSizeLong() % 2)
+ m_output << "xmlns:" << removeNonAscii(_x.key()) << "=\"" << removeNonAscii(_x.value()) << "\" ";
+ else
+ m_output << removeNonAscii(_x.key()) << "=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue_XmlNamespace_KeyValue_XmlNamespace_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case KeyValue_XmlNamespace_KeyValue_XmlNamespace_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+void ProtoConverter::visit(ProcessingInstruction const& _x)
+{
+ m_output << "" << removeNonAscii(_x.name()) << " ";
+ for (auto const& prop: _x.kv())
+ visit(prop);
+ m_output << "?>\n";
+}
+
+void ProtoConverter::visit(Content const& _x)
+{
+ switch (_x.content_oneof_case())
+ {
+ case Content::kStr:
+ m_output << _x.str() << "\n";
+ break;
+ case Content::kE:
+ visit(_x.e());
+ m_output << "\n";
+ break;
+ case Content::kC:
+ visit(_x.c());
+ m_output << "\n";
+ break;
+ case Content::CONTENT_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+void ProtoConverter::visit(ElementDecl const& _x)
+{
+ if (!ElementDecl::ContentSpec_IsValid(_x.spec()))
+ return;
+
+ m_output << "";
+ break;
+ case ElementDecl::ANY:
+ m_output << "ANY>";
+ break;
+ case ElementDecl::FUZZ:
+ m_output << "FUZZ>";
+ break;
+ case ElementDecl::MIXED:
+ m_output << "(#PCDATA";
+ for (auto const& pcdata: _x.cdata())
+ m_output << "|" << pcdata;
+ m_output << ")";
+ if (_x.cdata_size() > 0)
+ m_output << "*";
+ m_output << ">";
+ break;
+ case ElementDecl::CHILDREN:
+ {
+ m_output << "(";
+ string delim = "";
+ for (auto const& str: _x.cdata()) {
+ m_output << delim << removeNonAscii(str);
+ delim = ", ";
+ }
+ m_output << ")>";
+ break;
+ }
+ case ElementDecl_ContentSpec_ElementDecl_ContentSpec_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case ElementDecl_ContentSpec_ElementDecl_ContentSpec_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+void ProtoConverter::visit(AttValue const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ m_output << "\"";
+ string prefix;
+ switch (_x.type())
+ {
+ case AttValue::ENTITY:
+ prefix = "&";
+ break;
+ case AttValue::CHAR:
+ if (_x.ByteSizeLong() % 2)
+ prefix = "";
+ else
+ // TODO: Value that follows this must be a
+ // sequence of hex digits.
+ prefix = "";
+ break;
+ case AttValue::FUZZ:
+ prefix = "fuzz";
+ break;
+ case AttValue_Type_AttValue_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case AttValue_Type_AttValue_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+ for (auto const& name: _x.value())
+ m_output << prefix << removeNonAscii(name) << ";";
+ m_output << "\"";
+}
+
+void ProtoConverter::visit(DefaultDecl const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ switch (_x.type())
+ {
+ case DefaultDecl::REQUIRED:
+ m_output << "#REQUIRED";
+ break;
+ case DefaultDecl::IMPLIED:
+ m_output << "#IMPLIED";
+ break;
+ case DefaultDecl::FIXED:
+ m_output << "#FIXED ";
+ visit(_x.att());
+ break;
+ case DefaultDecl::FUZZ:
+ m_output << "#FUZZ";
+ break;
+ case DefaultDecl_Type_DefaultDecl_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case DefaultDecl_Type_DefaultDecl_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+void ProtoConverter::visit(AttDef const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ m_output << " " << removeNonAscii(_x.name()) << " ";
+ switch (_x.type())
+ {
+ case AttDef::CDATA:
+ m_output << "CDATA ";
+ break;
+ case AttDef::ID:
+ m_output << "ID ";
+ break;
+ case AttDef::IDREF:
+ m_output << "IDREF ";
+ break;
+ case AttDef::IDREFS:
+ m_output << "IDREFS ";
+ break;
+ case AttDef::ENTITY:
+ m_output << "ENTITY ";
+ break;
+ case AttDef::ENTITIES:
+ m_output << "ENTITIES ";
+ break;
+ case AttDef::NMTOKEN:
+ m_output << "NMTOKEN ";
+ break;
+ case AttDef::NMTOKENS:
+ m_output << "NMTOKENS ";
+ break;
+ case AttDef::FUZZ:
+ m_output << "FUZZ ";
+ break;
+ case AttDef_Type_AttDef_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case AttDef_Type_AttDef_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+ visit(_x.def());
+}
+
+void ProtoConverter::visit(AttListDecl const& _x)
+{
+ m_output << "";
+}
+
+void ProtoConverter::visit(NotationDecl const& _x)
+{
+ m_output << "";
+}
+
+void ProtoConverter::visit(NDataDecl const& _x)
+{
+ m_output << " NDATA " << _x.name();
+}
+
+void ProtoConverter::visit(EntityDef const& _x)
+{
+ switch (_x.entity_oneof_case())
+ {
+ case EntityDef::kExt:
+ visit(_x.ext());
+ if (_x.ByteSizeLong() % 2)
+ visit(_x.ndata());
+ break;
+ case EntityDef::kVal:
+ visit(_x.val());
+ break;
+ case EntityDef::ENTITY_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+void ProtoConverter::visit(PEDef const& _x)
+{
+ switch (_x.pedef_oneof_case())
+ {
+ case PEDef::kVal:
+ visit(_x.val());
+ break;
+ case PEDef::kId:
+ visit(_x.id());
+ break;
+ case PEDef::PEDEF_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+void ProtoConverter::visit(EntityValue const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ m_output << "\"";
+ string prefix;
+ switch (_x.type())
+ {
+ case EntityValue::ENTITY:
+ prefix = "&";
+ break;
+ case EntityValue::CHAR:
+ if (_x.ByteSizeLong() % 2)
+ prefix = "";
+ else
+ prefix = "";
+ break;
+ case EntityValue::PEREF:
+ prefix = "%";
+ break;
+ case EntityValue::FUZZ:
+ prefix = "fuzz";
+ break;
+ case EntityValue_Type_EntityValue_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case EntityValue_Type_EntityValue_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+ for (auto const& ref: _x.name())
+ m_output << prefix << ref << ";";
+ m_output << "\"";
+}
+
+void ProtoConverter::visit(EntityDecl const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ m_output << "";
+}
+
+void ProtoConverter::visit(ConditionalSect const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ switch (_x.type())
+ {
+ case ConditionalSect::INCLUDE:
+ m_output << "";
+ break;
+ case ConditionalSect::IGNORE:
+ m_output << "";
+ m_output << "]]>";
+ break;
+ case ConditionalSect::FUZZ:
+ m_output << "";
+ break;
+ case ConditionalSect_Type_ConditionalSect_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case ConditionalSect_Type_ConditionalSect_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+
+void ProtoConverter::visit(OneExtSubsetDecl const& _x)
+{
+ switch (_x.extsubset_oneof_case())
+ {
+ case OneExtSubsetDecl::kM:
+ visit(_x.m());
+ break;
+ case OneExtSubsetDecl::kC:
+ visit(_x.c());
+ break;
+ case OneExtSubsetDecl::EXTSUBSET_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+
+void ProtoConverter::visit(ExtSubsetDecl const& _x)
+{
+ for (auto const& decl: _x.decls())
+ visit(decl);
+}
+
+void ProtoConverter::visit(CData const& _x)
+{
+ m_output << "";
+}
+
+void ProtoConverter::visit(MarkupDecl const& _x)
+{
+ switch (_x.markup_oneof_case())
+ {
+ case MarkupDecl::kE:
+ visit(_x.e());
+ break;
+ case MarkupDecl::kA:
+ visit(_x.a());
+ break;
+ case MarkupDecl::kN:
+ visit(_x.n());
+ break;
+ case MarkupDecl::kM:
+ visit(_x.m());
+ break;
+ case MarkupDecl::kEntity:
+ visit(_x.entity());
+ break;
+ case MarkupDecl::kExt:
+ visit(_x.ext());
+ break;
+ case MarkupDecl::MARKUP_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+/// Returns predefined element from an Element_Id enum
+/// @param _x is an enum that holds the desired type of predefined value
+/// @param _prop is a string that holds the value of the desired type
+/// @return string holding the predefined value of the form
+/// name attribute=\"value\"
+string ProtoConverter::getPredefined(Element_Id _x, string const& _prop)
+{
+ string output{};
+ switch (_x)
+ {
+ case Element::XIINCLUDE:
+ case Element::XIFALLBACK:
+ case Element::XIHREF:
+ output = "xi:include href=\"fuzz.xml\"";
+ case Element::XIPARSE:
+ output = "xi:include parse=\"xml\"";
+ case Element::XIXPOINTER:
+ output = "xi:include xpointer=\"" + removeNonAscii(_prop) + "\"";
+ case Element::XIENCODING:
+ output = "xi:include encoding=\"" + removeNonAscii(_prop) + "\"";
+ case Element::XIACCEPT:
+ output = "xi:include accept=\"" + removeNonAscii(_prop) + "\"";
+ case Element::XIACCEPTLANG:
+ output = "xi:include accept-language=\"" + removeNonAscii(_prop) + "\"";
+ case Element_Id_Element_Id_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case Element_Id_Element_Id_INT_MAX_SENTINEL_DO_NOT_USE_:
+ output = "xi:fuzz xifuzz=\"fuzz\"";
+ }
+ return output;
+}
+
+/// Returns uri string for a given Element_Id type
+string ProtoConverter::getUri(Element_Id _x)
+{
+ if (!Element::Id_IsValid(_x))
+ return s_XInclude;
+
+ switch (_x)
+ {
+ case Element::XIINCLUDE:
+ case Element::XIFALLBACK:
+ case Element::XIHREF:
+ case Element::XIPARSE:
+ case Element::XIXPOINTER:
+ case Element::XIENCODING:
+ case Element::XIACCEPT:
+ case Element::XIACCEPTLANG:
+ case Element_Id_Element_Id_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case Element_Id_Element_Id_INT_MAX_SENTINEL_DO_NOT_USE_:
+ return s_XInclude;
+ }
+}
+
+void ProtoConverter::visit(Element const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ // Predefined child node
+ string child = {};
+ // Predefined uri for child node
+ string pUri = {};
+ // Element name
+ string name = removeNonAscii(_x.name());
+
+ switch (_x.type())
+ {
+ case Element::PREDEFINED:
+ child = getPredefined(_x.id(), _x.childprop());
+ pUri = getUri(_x.id());
+ break;
+ case Element::FUZZ:
+ case Element_Type_Element_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case Element_Type_Element_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+
+ //
+ //
+ //
+
+ // Start name tag: Must be Ascii?
+ m_output << "<" << name << " ";
+
+ // Add uri to name tag
+ if (!pUri.empty())
+ m_output << pUri << " ";
+ for (auto const& prop: _x.kv())
+ visit(prop);
+ m_output << ">\n";
+
+ // Add attribute
+ if (!child.empty())
+ m_output << "<" << child << "/>\n";
+
+ // Add content
+ visit(_x.content());
+
+ // Close name tag
+ m_output << "" << name << ">\n";
+}
+
+void ProtoConverter::visit(ExternalId const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ switch (_x.type())
+ {
+ case ExternalId::SYSTEM:
+ m_output << "SYSTEM " << "\"" << removeNonAscii(_x.system()) << "\"";
+ break;
+ case ExternalId::PUBLIC:
+ m_output << "PUBLIC " << "\"" << removeNonAscii(_x.pub()) << "\""
+ << " " << "\"" << removeNonAscii(_x.system()) << "\"";
+ break;
+ case ExternalId::FUZZ:
+ m_output << "FUZZ " << "\"" << removeNonAscii(_x.pub()) << "\"";
+ break;
+ case ExternalId_Type_ExternalId_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case ExternalId_Type_ExternalId_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+void ProtoConverter::visit(DocTypeDecl const& _x)
+{
+ m_output << "\n";
+}
+
+void ProtoConverter::visit(VersionNum const& _x)
+{
+ if (!isValid(_x))
+ return;
+
+ switch (_x.type())
+ {
+ case VersionNum::STANDARD:
+ m_output << "\"1.0\"";
+ break;
+ case VersionNum::FUZZ:
+ case VersionNum_Type_VersionNum_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case VersionNum_Type_VersionNum_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ m_output << "\"" << _x.major() << "." << _x.minor() << "\"";
+ break;
+ }
+}
+
+void ProtoConverter::visit(Encodings const& _x)
+{
+ if (!Encodings::Enc_IsValid(_x.name()))
+ return;
+
+ m_output << " encoding=\"";
+ switch (_x.name())
+ {
+ case Encodings::BIG5:
+ m_output << "BIG5";
+ break;
+ case Encodings::EUCJP:
+ m_output << "EUC-JP";
+ break;
+ case Encodings::EUCKR:
+ m_output << "EUC-KR";
+ break;
+ case Encodings::GB18030:
+ m_output << "GB18030";
+ break;
+ case Encodings::ISO2022JP:
+ m_output << "ISO-2022-JP";
+ break;
+ case Encodings::ISO2022KR:
+ m_output << "ISO-2022-KR";
+ break;
+ case Encodings::ISO88591:
+ m_output << "ISO-8859-1";
+ break;
+ case Encodings::ISO88592:
+ m_output << "ISO-8859-2";
+ break;
+ case Encodings::ISO88593:
+ m_output << "ISO-8859-3";
+ break;
+ case Encodings::ISO88594:
+ m_output << "ISO-8859-4";
+ break;
+ case Encodings::ISO88595:
+ m_output << "ISO-8859-5";
+ break;
+ case Encodings::ISO88596:
+ m_output << "ISO-8859-6";
+ break;
+ case Encodings::ISO88597:
+ m_output << "ISO-8859-7";
+ break;
+ case Encodings::ISO88598:
+ m_output << "ISO-8859-8";
+ break;
+ case Encodings::ISO88599:
+ m_output << "ISO-8859-9";
+ break;
+ case Encodings::SHIFTJIS:
+ m_output << "SHIFT_JIS";
+ break;
+ case Encodings::TIS620:
+ m_output << "TIS-620";
+ break;
+ case Encodings::USASCII:
+ m_output << "US-ASCII";
+ break;
+ case Encodings::UTF8:
+ m_output << "UTF-8";
+ break;
+ case Encodings::UTF16:
+ m_output << "UTF-16";
+ break;
+ case Encodings::UTF16BE:
+ m_output << "UTF-16BE";
+ break;
+ case Encodings::UTF16LE:
+ m_output << "UTF-16LE";
+ break;
+ case Encodings::WINDOWS31J:
+ m_output << "WINDOWS-31J";
+ break;
+ case Encodings::WINDOWS1255:
+ m_output << "WINDOWS-1255";
+ break;
+ case Encodings::WINDOWS1256:
+ m_output << "WINDOWS-1256";
+ break;
+ case Encodings::FUZZ:
+ m_output << removeNonAscii(_x.fuzz());
+ break;
+ case Encodings_Enc_Encodings_Enc_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case Encodings_Enc_Encodings_Enc_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+ m_output << "\"";
+}
+
+void ProtoConverter::visit(XmlDeclaration const& _x)
+{
+ m_output << R"(\n";
+}
+
+void ProtoConverter::visit(XmlDocument const& _x)
+{
+ visit(_x.p());
+ for (auto const& element: _x.e())
+ visit(element);
+}
+
+string ProtoConverter::protoToString(XmlDocument const& _x)
+{
+ visit(_x);
+ return m_output.str();
+}
diff --git a/tests/Fuzzers/xmlProtoConverter.h b/tests/Fuzzers/xmlProtoConverter.h
new file mode 100644
index 000000000..501dde36c
--- /dev/null
+++ b/tests/Fuzzers/xmlProtoConverter.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2019 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include
+
+#include "xml.pb.h"
+
+namespace xmlProtoFuzzer {
+class ProtoConverter
+{
+public:
+ ProtoConverter() = default;
+
+ ProtoConverter(ProtoConverter const&) = delete;
+
+ ProtoConverter(ProtoConverter&&) = delete;
+
+ std::string protoToString(XmlDocument const&);
+
+private:
+ void visit(Prolog const&);
+
+ void visit(ProcessingInstruction const&);
+
+ void visit(ExternalId const&);
+
+ void visit(DocTypeDecl const&);
+
+ void visit(VersionNum const&);
+
+ void visit(Encodings const&);
+
+ void visit(Misc const&);
+
+ void visit(KeyValue const&);
+
+ void visit(Element const&);
+
+ void visit(ElementDecl const&);
+
+ void visit(AttValue const&);
+
+ void visit(DefaultDecl const&);
+
+ void visit(AttDef const&);
+
+ void visit(AttListDecl const&);
+
+ void visit(NotationDecl const&);
+
+ void visit(EntityDecl const&);
+
+ void visit(EntityValue const&);
+
+ void visit(EntityDef const&);
+
+ void visit(PEDef const&);
+
+ void visit(NDataDecl const&);
+
+ void visit(ConditionalSect const&);
+
+ void visit(OneExtSubsetDecl const&);
+
+ void visit(ExtSubsetDecl const&);
+
+ void visit(MarkupDecl const&);
+
+ void visit(CData const&);
+
+ void visit(Content const&);
+
+ void visit(XmlDeclaration const&);
+
+ void visit(XmlDocument const&);
+
+ template
+ bool isValid(T const& messageType) {
+ return T::Type_IsValid(messageType.type());
+ }
+
+ std::string removeNonAscii(std::string const&);
+ std::string getUri(Element_Id _x);
+ std::string getPredefined(Element_Id _x, std::string const&);
+
+ std::ostringstream m_output;
+
+ static constexpr auto s_XInclude = "xmlns:xi=\"http://www.w3.org/2001/XInclude\"";
+};
+}
+