diff --git a/.gitignore b/.gitignore index 84516529a..64a0c8561 100644 --- a/.gitignore +++ b/.gitignore @@ -105,3 +105,6 @@ tests/scripts/*.log tests/scripts/*.trs tests/test-suite.log Testing/ + +# Vincent +build/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 4254f89bb..6425cd1ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -177,8 +177,14 @@ install( # Process subdirectories add_subdirectory(doc) add_subdirectory(src) -add_subdirectory(tests) -add_subdirectory(samples) +if (NOT (DEFINED XERCES_BUILD_FUZZERS)) + add_subdirectory(tests) + add_subdirectory(samples) +endif() + +if ((DEFINED XERCES_BUILD_FUZZERS) OR (DEFINED XERCES_BUILD_FOR_OSS_FUZZ)) + add_subdirectory(fuzzers) +endif() # Display configuration summary message(STATUS "") diff --git a/build_fuzzer.sh b/build_fuzzer.sh new file mode 100755 index 000000000..adbfe8e1a --- /dev/null +++ b/build_fuzzer.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -e +rm -rf build +mkdir build +cd build +CC=clang CXX=clang++ CXXFLAGS="-std=c++14" cmake .. -DXERCES_BUILD_FUZZERS=1 -Wfatal-errors +CC=clang CXX=clang++ CXXFLAGS="-std=c++14" make -j8 + diff --git a/cmake/XercesIntTypes.cmake b/cmake/XercesIntTypes.cmake index 6ad7dd313..85c94e1b9 100644 --- a/cmake/XercesIntTypes.cmake +++ b/cmake/XercesIntTypes.cmake @@ -58,14 +58,14 @@ set(HAVE_OFF_T ${SIZEOF_OFF_T}) set(HAVE_SIZE_T ${SIZEOF_SIZE_T}) set(HAVE_SSIZE_T ${SSIZEOF_SSIZE_T}) set(HAVE_WCHAR_T ${WCHAROF_WCHAR_T}) -if(SIZEOF_SIZE_T) +if(HAVE_SIZEOF_SIZE_T) set(XERCES_SIZE_T size_t) set(XERCES_SIZE_MAX SIZE_MAX) else() set(XERCES_SIZE_T "unsigned long") set(XERCES_SIZE_MAX ULONG_MAX) endif() -if(SIZEOF_SSIZE_T) +if(HAVE_SIZEOF_SSIZE_T) set(XERCES_SSIZE_T ssize_t) set(XERCES_SSIZE_MAX SSIZE_MAX) else() diff --git a/doc/build.xml b/doc/build.xml index 3f706b959..aedb4bde9 100644 --- a/doc/build.xml +++ b/doc/build.xml @@ -572,14 +572,16 @@ AIX PowerPC IBM XL C++ ./configure CXX=xlC_r CC=xlc_r
- gmake libxerces_c_la_LDFLAGS=-qmkshrobj + gmake libxerces_c_la_LDFLAGS=-qmkshrobj
+ (for xlC v11-v13, libxerces_c_la_LDFLAGS is not needed, but CXXFLAGS=-rtti is needed otherwise RTTI is disabled by default) AIX PowerPC-64 IBM XL C++ export OBJECT_MODE=64
./configure CXX=xlC_r CC=xlc_r CXXFLAGS=-q64 CFLAGS=-q64
- gmake libxerces_c_la_LDFLAGS=-qmkshrobj + gmake libxerces_c_la_LDFLAGS=-qmkshrobj
+ (for xlC v11-v13, libxerces_c_la_LDFLAGS is not needed, but CXXFLAGS="-q64 -rtti" is needed otherwise RTTI is disabled by default) HP-UX IA-64-32 diff --git a/fuzzers/CMakeLists.txt b/fuzzers/CMakeLists.txt new file mode 100644 index 000000000..d3dd27307 --- /dev/null +++ b/fuzzers/CMakeLists.txt @@ -0,0 +1,90 @@ +# CMake build for xerces-c +# +# Written by Roger Leigh +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Definitions required for building +add_definitions( + -DHAVE_CONFIG_H=1 +) +# Search the project binary dir for config.h +include_directories( + ${PROJECT_BINARY_DIR} + ${PROJECT_SOURCE_DIR}/src + ${PROJECT_BINARY_DIR}/src + ${CMAKE_CURRENT_SOURCE_DIR} +) + +macro(add_fuzzer_executable name) + add_executable(${name} ${ARGN}) + target_link_libraries(${name} xerces-c) + if (XERCES_BUILD_FUZZERS) + target_compile_options(${name} PUBLIC -fsanitize=fuzzer-no-link) + target_link_libraries(${name} -fsanitize=fuzzer) + elseif(XERCES_BUILD_FOR_OSS_FUZZ) + target_link_libraries(${name} $ENV{LIB_FUZZING_ENGINE}) + endif() + set_target_properties(${name} PROPERTIES FOLDER "Fuzzers") +endmacro() + +add_fuzzer_executable(fuzz_parser_target + src/xerces_fuzz_common.cpp + src/parse_target.cpp + +) + +include(ExternalProject) + +set(EXTERNAL_INSTALL_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/external) + + +ExternalProject_Add(libprotobuf-mutator + GIT_SHALLOW 1 + GIT_REPOSITORY https://github.com/google/libprotobuf-mutator.git + CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release -DLIB_PROTO_MUTATOR_DOWNLOAD_PROTOBUF=ON -DLIB_PROTO_MUTATOR_TESTING=false -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_LOCATION} +) + +set(PROTOC_PATH ${CMAKE_CURRENT_BINARY_DIR}/libprotobuf-mutator-prefix/src/libprotobuf-mutator-build/external.protobuf/bin/protoc) + +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/genfiles ${CMAKE_CURRENT_BINARY_DIR}/genfiles/xml.pb.cc + DEPENDS libprotobuf-mutator + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src + COMMAND mkdir ${CMAKE_CURRENT_BINARY_DIR}/genfiles && ${PROTOC_PATH} xml.proto --cpp_out=${CMAKE_CURRENT_BINARY_DIR}/genfiles +) +add_custom_target(xml_proto_files ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/genfiles) + + + +add_fuzzer_executable(fuzz_parser_target_proto + src/xerces_fuzz_common.cpp + src/parse_target_proto.cpp + src/xmlProtoConverter.cpp + ${CMAKE_CURRENT_BINARY_DIR}/genfiles/xml.pb.cc +) +add_dependencies(fuzz_parser_target_proto xml_proto_files) +add_dependencies(fuzz_parser_target_proto libprotobuf-mutator) +target_include_directories(fuzz_parser_target_proto PUBLIC ${EXTERNAL_INSTALL_LOCATION}/include + fuzz_parser_target_proto PUBLIC ${EXTERNAL_INSTALL_LOCATION}/include/libprotobuf-mutator/ + ${EXTERNAL_INSTALL_LOCATION}/../libprotobuf-mutator-prefix/src/libprotobuf-mutator-build/external.protobuf/include + fuzz_parser_target_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/genfiles +) +target_link_libraries(fuzz_parser_target_proto + ${EXTERNAL_INSTALL_LOCATION}/lib/libprotobuf-mutator-libfuzzer.a + ${EXTERNAL_INSTALL_LOCATION}/lib/libprotobuf-mutator.a + ${CMAKE_CURRENT_BINARY_DIR}/libprotobuf-mutator-prefix/src/libprotobuf-mutator-build/external.protobuf/src/external.protobuf-build/libprotobuf.a) + diff --git a/fuzzers/src/parse_target.cpp b/fuzzers/src/parse_target.cpp new file mode 100755 index 000000000..5e976765a --- /dev/null +++ b/fuzzers/src/parse_target.cpp @@ -0,0 +1,28 @@ +/* +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +*/ +#include "xerces_fuzz_common.h" + +#include "xercesc/framework/MemBufInputSource.hpp" +#include "xercesc/parsers/SAXParser.hpp" +#include "xercesc/util/OutOfMemoryException.hpp" + +using namespace xercesc_3_2; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + parseInMemory(Data, Size); + return 0; +} diff --git a/fuzzers/src/parse_target_proto.cpp b/fuzzers/src/parse_target_proto.cpp new file mode 100644 index 000000000..cc2c83f08 --- /dev/null +++ b/fuzzers/src/parse_target_proto.cpp @@ -0,0 +1,45 @@ +/* +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +*/ +#include "xerces_fuzz_common.h" +#include "xmlProtoConverter.h" + +#include "xercesc/framework/MemBufInputSource.hpp" +#include "xercesc/parsers/SAXParser.hpp" +#include "xercesc/util/OutOfMemoryException.hpp" + +#include "xml.pb.h" + +#include "libprotobuf-mutator/src/libfuzzer/libfuzzer_macro.h" + +#include + +namespace { + protobuf_mutator::protobuf::LogSilencer log_silincer; + void ignore(void* ctx, const char* msg, ...) {} + + template + std::unique_ptr MakeUnique(T* obj, D del) { + return {obj, del}; + } +} + +using namespace xercesc_3_2; + +DEFINE_PROTO_FUZZER(const xmlProtoFuzzer::XmlDocument& xmlDocument) { + std::string xmlData = xmlProtoFuzzer::ProtoConverter().protoToString(xmlDocument); + parseInMemory((const uint8_t *)xmlData.c_str(), xmlData.size()); +} diff --git a/fuzzers/src/xerces_fuzz_common.cpp b/fuzzers/src/xerces_fuzz_common.cpp new file mode 100755 index 000000000..a76b383a8 --- /dev/null +++ b/fuzzers/src/xerces_fuzz_common.cpp @@ -0,0 +1,47 @@ +/* +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +*/ +#include "xerces_fuzz_common.h" + +using namespace xercesc_3_2; +static bool initialized = false; + +void parseInMemory(const uint8_t *Data, size_t Size) +{ + if (!initialized) + { + XMLPlatformUtils::Initialize(); + initialized = true; + } + SAXParser::ValSchemes valScheme = SAXParser::Val_Auto; + bool doNamespaces = false; + bool doSchema = false; + bool schemaFullChecking = false; + SAXParser *parser = new SAXParser; + parser->setValidationScheme(valScheme); + parser->setDoNamespaces(doNamespaces); + parser->setDoSchema(doSchema); + parser->setHandleMultipleImports(true); + parser->setValidationSchemaFullChecking(schemaFullChecking); + static const char *gMemBufId = "prodInfo"; + + MemBufInputSource *memBufIS = new MemBufInputSource( + (const XMLByte *)Data, Size, gMemBufId, false); + parser->parse(*memBufIS); + delete parser; + delete memBufIS; + //XMLPlatformUtils::Terminate(); +} diff --git a/fuzzers/src/xerces_fuzz_common.h b/fuzzers/src/xerces_fuzz_common.h new file mode 100644 index 000000000..9eaf88bd7 --- /dev/null +++ b/fuzzers/src/xerces_fuzz_common.h @@ -0,0 +1,23 @@ +/* +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +*/ +#pragma once + +#include "xercesc/parsers/SAXParser.hpp" +#include "xercesc/framework/MemBufInputSource.hpp" +#include "xercesc/util/OutOfMemoryException.hpp" + +void parseInMemory(const uint8_t *Data, size_t Size); \ No newline at end of file diff --git a/fuzzers/src/xml.proto b/fuzzers/src/xml.proto new file mode 100755 index 000000000..75e54b6f0 --- /dev/null +++ b/fuzzers/src/xml.proto @@ -0,0 +1,339 @@ +/* + * Copyright (C) 2019 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +message Misc { + oneof misc_oneof { + string comment = 1; + ProcessingInstruction inst = 2; + } +} + +message PEReference { + string name = 1; +} + +message ElementDecl { + enum ContentSpec { + EMPTY = 0; + ANY = 1; + FUZZ = 2; + MIXED = 3; + CHILDREN = 4; + } + string name = 1; + ContentSpec spec = 2; + repeated string cdata = 3; +} + +message AttrType { + enum Type { + CDATA = 0; + ID = 1; + IDREF = 2; + IDREFS = 3; + ENTITY = 4; + ENTITIES = 5; + NMTOKEN = 6; + NMTOKENS = 7; + } + Type ty = 1; +} + +message EnumeratedType { + repeated string names = 1; +} + +message AttrListDecl { + string name = 1; + AttrType atype = 2; + EnumeratedType etype = 3; + DefaultDecl def = 4; +} + +message ExternalId { + enum Type { + SYSTEM = 0; + PUBLIC = 1; + FUZZ = 2; + } + Type type = 1; + string system = 2; + string pub = 3; +} + +message AttValue { + enum Type { + ENTITY = 0; + CHAR = 1; + FUZZ = 2; + } + Type type = 1; + repeated string value = 2; +} + +message DefaultDecl { + enum Type { + REQUIRED = 0; + IMPLIED = 1; + FIXED = 2; + FUZZ = 3; + } + Type type = 1; + AttValue att = 2; +} + +message AttDef { + // TODO: Add enumerated type + enum Type { + CDATA = 0; + ID = 1; + IDREF = 2; + IDREFS = 3; + ENTITY = 4; + ENTITIES = 5; + NMTOKEN = 6; + NMTOKENS = 7; + FUZZ = 8; + } + string name = 1; + Type type = 2; + DefaultDecl def = 3; +} + +message AttListDecl { + string name = 1; + repeated AttDef attdefs = 2; +} + +message NotationDecl { + string name = 1; + oneof notation_oneof { + ExternalId ext = 2; + string pub = 3; + string fuzz = 4; + } +} + +message EntityValue { + enum Type { + ENTITY = 0; + CHAR = 1; + PEREF = 2; + FUZZ = 3; + } + Type type = 1; + repeated string name = 2; +} + +message NDataDecl { + string name = 1; +} + +message EntityDef { + oneof entity_oneof { + ExternalId ext = 1; + EntityValue val = 2; + } + NDataDecl ndata = 3; +} + +message PEDef { + oneof pedef_oneof { + EntityValue val = 1; + ExternalId id = 2; + } +} + +message EntityDecl { + enum Type { + GEDECL = 0; + PEDECL = 1; + } + Type type = 1; + string name = 2; + EntityDef ent = 3; + PEDef pedef = 4; +} + +message ConditionalSect { + enum Type { + INCLUDE = 0; + IGNORE = 1; + FUZZ = 2; + } + Type type = 1; + ExtSubsetDecl ext = 2; + // TODO: Make this recursive + // See https://www.w3.org/TR/xml/#NT-conditionalSect + repeated string ignores = 3; +} + +message OneExtSubsetDecl { + oneof extsubset_oneof { + MarkupDecl m = 1; + ConditionalSect c = 2; + } +} + +message ExtSubsetDecl { + repeated OneExtSubsetDecl decls = 1; +} + +message MarkupDecl { + oneof markup_oneof { + ElementDecl e = 1; + AttListDecl a = 2; + NotationDecl n = 3; + Misc m = 4; + EntityDecl entity = 5; + ExtSubsetDecl ext = 6; + } +} + +message DocTypeDecl { + string name = 1; + ExternalId ext = 2; + repeated MarkupDecl mdecl = 3; +} + +message Prolog { + XmlDeclaration decl = 1; + DocTypeDecl doctype = 2; + repeated Misc misc = 3; +} + +message KeyValue { + enum XmlNamespace { + ATTRIBUTES = 0; + BASE = 1; + CATALOG = 2; + ID = 3; + LANG = 4; + LINK = 5; + SPACE = 6; + SPECIAL = 7; + TEST = 8; + FUZZ = 9; + } + XmlNamespace type = 1; + string key = 2; + string value = 3; +} + +message ProcessingInstruction { + string name = 1; + repeated KeyValue kv = 2; +} + +message CData { + string data = 1; +} + +message Content { + // TODO: Add other content types + oneof content_oneof { + string str = 1; + Element e = 2; + CData c = 3; + } +} + +message Element { + enum Type { + PREDEFINED = 0; + FUZZ = 1; + } + enum Id { + XIINCLUDE = 0; + XIFALLBACK = 1; + // Attributes of xinclude + XIHREF = 2; + XIPARSE = 3; + XIXPOINTER = 4; + XIENCODING = 5; + XIACCEPT = 6; + XIACCEPTLANG = 7; + } + Type type = 1; + Id id = 2; + string name = 3; + repeated KeyValue kv = 4; + Content content = 5; + string childprop = 6; +} + +message VersionNum { + enum Type { + STANDARD = 0; + FUZZ = 1; + } + Type type = 1; + uint64 major = 2; + uint64 minor = 3; +} + +message Encodings { + enum Enc { + BIG5 = 0; + EUCJP = 1; + EUCKR = 2; + GB18030 = 3; + ISO2022JP = 4; + ISO2022KR = 5; + ISO88591 = 6; + ISO88592 = 7; + ISO88593 = 8; + ISO88594 = 9; + ISO88595 = 10; + ISO88596 = 11; + ISO88597 = 12; + ISO88598 = 13; + ISO88599 = 14; + SHIFTJIS = 15; + TIS620 = 16; + USASCII = 17; + UTF8 = 18; + UTF16 = 19; + UTF16BE = 20; + UTF16LE = 21; + WINDOWS31J = 22; + WINDOWS1255 = 23; + WINDOWS1256 = 24; + FUZZ = 25; + } + Enc name = 1; + string fuzz = 2; +} + +message XmlDeclaration { + VersionNum ver = 1; + Encodings enc = 2; + enum Standalone { + YES = 0; + NO = 1; + } + Standalone standalone = 3; + string fuzz = 4; +} + +message XmlDocument { + Prolog p = 1; + repeated Element e = 2; +} + +package xmlProtoFuzzer; \ No newline at end of file diff --git a/fuzzers/src/xmlProtoConverter.cpp b/fuzzers/src/xmlProtoConverter.cpp new file mode 100644 index 000000000..f8a47dee2 --- /dev/null +++ b/fuzzers/src/xmlProtoConverter.cpp @@ -0,0 +1,758 @@ +/* + * Copyright (C) 2019 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "xmlProtoConverter.h" + +#include + +using namespace std; +using namespace xmlProtoFuzzer; + +string ProtoConverter::removeNonAscii(string const& _utf8) +{ + string asciiStr{_utf8}; + asciiStr.erase(remove_if(asciiStr.begin(), asciiStr.end(), [=](char c) -> bool { + return !(std::isalpha(c) || std::isdigit(c)); + }), asciiStr.end()); + return asciiStr.empty() ? "fuzz" : asciiStr; +} + + +void ProtoConverter::visit(Misc const& _x) +{ + switch (_x.misc_oneof_case()) + { + case Misc::kComment: + m_output << "\n"; + break; + case Misc::kInst: + visit(_x.inst()); + break; + case Misc::MISC_ONEOF_NOT_SET: + break; + } +} + +void ProtoConverter::visit(Prolog const& _x) +{ + visit(_x.decl()); + visit(_x.doctype()); + for (auto const& misc: _x.misc()) + visit(misc); +} + +void ProtoConverter::visit(KeyValue const& _x) +{ + if (!KeyValue::XmlNamespace_IsValid(_x.type())) + return; + + switch (_x.type()) + { + case KeyValue::ATTRIBUTES: + m_output << "xml:attributes=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::BASE: + m_output << "xml:base=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::CATALOG: + m_output << "xml:catalog=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::ID: + m_output << "xml:id=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::LANG: + m_output << "xml:lang=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::LINK: + m_output << "xml:link=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::SPACE: + m_output << "xml:space=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::SPECIAL: + m_output << "xml:special=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::TEST: + m_output << "xml:test=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::FUZZ: + if (_x.ByteSizeLong() % 2) + m_output << "xmlns:" << removeNonAscii(_x.key()) << "=\"" << removeNonAscii(_x.value()) << "\" "; + else + m_output << removeNonAscii(_x.key()) << "=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue_XmlNamespace_KeyValue_XmlNamespace_INT_MIN_SENTINEL_DO_NOT_USE_: + case KeyValue_XmlNamespace_KeyValue_XmlNamespace_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + +void ProtoConverter::visit(ProcessingInstruction const& _x) +{ + m_output << "\n"; +} + +void ProtoConverter::visit(Content const& _x) +{ + switch (_x.content_oneof_case()) + { + case Content::kStr: + m_output << _x.str() << "\n"; + break; + case Content::kE: + visit(_x.e()); + m_output << "\n"; + break; + case Content::kC: + visit(_x.c()); + m_output << "\n"; + break; + case Content::CONTENT_ONEOF_NOT_SET: + break; + } +} + +void ProtoConverter::visit(ElementDecl const& _x) +{ + if (!ElementDecl::ContentSpec_IsValid(_x.spec())) + return; + + m_output << ""; + break; + case ElementDecl::ANY: + m_output << "ANY>"; + break; + case ElementDecl::FUZZ: + m_output << "FUZZ>"; + break; + case ElementDecl::MIXED: + m_output << "(#PCDATA"; + for (auto const& pcdata: _x.cdata()) + m_output << "|" << pcdata; + m_output << ")"; + if (_x.cdata_size() > 0) + m_output << "*"; + m_output << ">"; + break; + case ElementDecl::CHILDREN: + { + m_output << "("; + string delim = ""; + for (auto const& str: _x.cdata()) { + m_output << delim << removeNonAscii(str); + delim = ", "; + } + m_output << ")>"; + break; + } + case ElementDecl_ContentSpec_ElementDecl_ContentSpec_INT_MIN_SENTINEL_DO_NOT_USE_: + case ElementDecl_ContentSpec_ElementDecl_ContentSpec_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + +void ProtoConverter::visit(AttValue const& _x) +{ + if (!isValid(_x)) + return; + + m_output << "\""; + string prefix; + switch (_x.type()) + { + case AttValue::ENTITY: + prefix = "&"; + break; + case AttValue::CHAR: + if (_x.ByteSizeLong() % 2) + prefix = "&#"; + else + // TODO: Value that follows this must be a + // sequence of hex digits. + prefix = "&#x"; + break; + case AttValue::FUZZ: + prefix = "fuzz"; + break; + case AttValue_Type_AttValue_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case AttValue_Type_AttValue_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + for (auto const& name: _x.value()) + m_output << prefix << removeNonAscii(name) << ";"; + m_output << "\""; +} + +void ProtoConverter::visit(DefaultDecl const& _x) +{ + if (!isValid(_x)) + return; + + switch (_x.type()) + { + case DefaultDecl::REQUIRED: + m_output << "#REQUIRED"; + break; + case DefaultDecl::IMPLIED: + m_output << "#IMPLIED"; + break; + case DefaultDecl::FIXED: + m_output << "#FIXED "; + visit(_x.att()); + break; + case DefaultDecl::FUZZ: + m_output << "#FUZZ"; + break; + case DefaultDecl_Type_DefaultDecl_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case DefaultDecl_Type_DefaultDecl_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + +void ProtoConverter::visit(AttDef const& _x) +{ + if (!isValid(_x)) + return; + + m_output << " " << removeNonAscii(_x.name()) << " "; + switch (_x.type()) + { + case AttDef::CDATA: + m_output << "CDATA "; + break; + case AttDef::ID: + m_output << "ID "; + break; + case AttDef::IDREF: + m_output << "IDREF "; + break; + case AttDef::IDREFS: + m_output << "IDREFS "; + break; + case AttDef::ENTITY: + m_output << "ENTITY "; + break; + case AttDef::ENTITIES: + m_output << "ENTITIES "; + break; + case AttDef::NMTOKEN: + m_output << "NMTOKEN "; + break; + case AttDef::NMTOKENS: + m_output << "NMTOKENS "; + break; + case AttDef::FUZZ: + m_output << "FUZZ "; + break; + case AttDef_Type_AttDef_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case AttDef_Type_AttDef_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + visit(_x.def()); +} + +void ProtoConverter::visit(AttListDecl const& _x) +{ + m_output << ""; +} + +void ProtoConverter::visit(NotationDecl const& _x) +{ + m_output << ""; +} + +void ProtoConverter::visit(NDataDecl const& _x) +{ + m_output << " NDATA " << _x.name(); +} + +void ProtoConverter::visit(EntityDef const& _x) +{ + switch (_x.entity_oneof_case()) + { + case EntityDef::kExt: + visit(_x.ext()); + if (_x.ByteSizeLong() % 2) + visit(_x.ndata()); + break; + case EntityDef::kVal: + visit(_x.val()); + break; + case EntityDef::ENTITY_ONEOF_NOT_SET: + break; + } +} + +void ProtoConverter::visit(PEDef const& _x) +{ + switch (_x.pedef_oneof_case()) + { + case PEDef::kVal: + visit(_x.val()); + break; + case PEDef::kId: + visit(_x.id()); + break; + case PEDef::PEDEF_ONEOF_NOT_SET: + break; + } +} + +void ProtoConverter::visit(EntityValue const& _x) +{ + if (!isValid(_x)) + return; + + m_output << "\""; + string prefix; + switch (_x.type()) + { + case EntityValue::ENTITY: + prefix = "&"; + break; + case EntityValue::CHAR: + if (_x.ByteSizeLong() % 2) + prefix = "&#"; + else + prefix = "&#x"; + break; + case EntityValue::PEREF: + prefix = "%"; + break; + case EntityValue::FUZZ: + prefix = "fuzz"; + break; + case EntityValue_Type_EntityValue_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case EntityValue_Type_EntityValue_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + for (auto const& ref: _x.name()) + m_output << prefix << ref << ";"; + m_output << "\""; +} + +void ProtoConverter::visit(EntityDecl const& _x) +{ + if (!isValid(_x)) + return; + + m_output << ""; +} + +void ProtoConverter::visit(ConditionalSect const& _x) +{ + if (!isValid(_x)) + return; + + switch (_x.type()) + { + case ConditionalSect::INCLUDE: + m_output << ""; + break; + case ConditionalSect::IGNORE: + m_output << ""; + m_output << "]]>"; + break; + case ConditionalSect::FUZZ: + m_output << ""; + break; + case ConditionalSect_Type_ConditionalSect_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case ConditionalSect_Type_ConditionalSect_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + + +void ProtoConverter::visit(OneExtSubsetDecl const& _x) +{ + switch (_x.extsubset_oneof_case()) + { + case OneExtSubsetDecl::kM: + visit(_x.m()); + break; + case OneExtSubsetDecl::kC: + visit(_x.c()); + break; + case OneExtSubsetDecl::EXTSUBSET_ONEOF_NOT_SET: + break; + } +} + + +void ProtoConverter::visit(ExtSubsetDecl const& _x) +{ + for (auto const& decl: _x.decls()) + visit(decl); +} + +void ProtoConverter::visit(CData const& _x) +{ + m_output << ""; +} + +void ProtoConverter::visit(MarkupDecl const& _x) +{ + switch (_x.markup_oneof_case()) + { + case MarkupDecl::kE: + visit(_x.e()); + break; + case MarkupDecl::kA: + visit(_x.a()); + break; + case MarkupDecl::kN: + visit(_x.n()); + break; + case MarkupDecl::kM: + visit(_x.m()); + break; + case MarkupDecl::kEntity: + visit(_x.entity()); + break; + case MarkupDecl::kExt: + visit(_x.ext()); + break; + case MarkupDecl::MARKUP_ONEOF_NOT_SET: + break; + } +} + +/// Returns predefined element from an Element_Id enum +/// @param _x is an enum that holds the desired type of predefined value +/// @param _prop is a string that holds the value of the desired type +/// @return string holding the predefined value of the form +/// name attribute=\"value\" +string ProtoConverter::getPredefined(Element_Id _x, string const& _prop) +{ + string output{}; + switch (_x) + { + case Element::XIINCLUDE: + case Element::XIFALLBACK: + case Element::XIHREF: + output = "xi:include href=\"fuzz.xml\""; + case Element::XIPARSE: + output = "xi:include parse=\"xml\""; + case Element::XIXPOINTER: + output = "xi:include xpointer=\"" + removeNonAscii(_prop) + "\""; + case Element::XIENCODING: + output = "xi:include encoding=\"" + removeNonAscii(_prop) + "\""; + case Element::XIACCEPT: + output = "xi:include accept=\"" + removeNonAscii(_prop) + "\""; + case Element::XIACCEPTLANG: + output = "xi:include accept-language=\"" + removeNonAscii(_prop) + "\""; + case Element_Id_Element_Id_INT_MIN_SENTINEL_DO_NOT_USE_: + case Element_Id_Element_Id_INT_MAX_SENTINEL_DO_NOT_USE_: + output = "xi:fuzz xifuzz=\"fuzz\""; + } + return output; +} + +/// Returns uri string for a given Element_Id type +string ProtoConverter::getUri(Element_Id _x) +{ + if (!Element::Id_IsValid(_x)) + return s_XInclude; + + switch (_x) + { + case Element::XIINCLUDE: + case Element::XIFALLBACK: + case Element::XIHREF: + case Element::XIPARSE: + case Element::XIXPOINTER: + case Element::XIENCODING: + case Element::XIACCEPT: + case Element::XIACCEPTLANG: + case Element_Id_Element_Id_INT_MIN_SENTINEL_DO_NOT_USE_: + case Element_Id_Element_Id_INT_MAX_SENTINEL_DO_NOT_USE_: + return s_XInclude; + } +} + +void ProtoConverter::visit(Element const& _x) +{ + if (!isValid(_x)) + return; + + // Predefined child node + string child = {}; + // Predefined uri for child node + string pUri = {}; + // Element name + string name = removeNonAscii(_x.name()); + + switch (_x.type()) + { + case Element::PREDEFINED: + child = getPredefined(_x.id(), _x.childprop()); + pUri = getUri(_x.id()); + break; + case Element::FUZZ: + case Element_Type_Element_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case Element_Type_Element_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + + // + // + // + + // Start name tag: Must be Ascii? + m_output << "<" << name << " "; + + // Add uri to name tag + if (!pUri.empty()) + m_output << pUri << " "; + for (auto const& prop: _x.kv()) + visit(prop); + m_output << ">\n"; + + // Add attribute + if (!child.empty()) + m_output << "<" << child << "/>\n"; + + // Add content + visit(_x.content()); + + // Close name tag + m_output << "\n"; +} + +void ProtoConverter::visit(ExternalId const& _x) +{ + if (!isValid(_x)) + return; + + switch (_x.type()) + { + case ExternalId::SYSTEM: + m_output << "SYSTEM " << "\"" << removeNonAscii(_x.system()) << "\""; + break; + case ExternalId::PUBLIC: + m_output << "PUBLIC " << "\"" << removeNonAscii(_x.pub()) << "\"" + << " " << "\"" << removeNonAscii(_x.system()) << "\""; + break; + case ExternalId::FUZZ: + m_output << "FUZZ " << "\"" << removeNonAscii(_x.pub()) << "\""; + break; + case ExternalId_Type_ExternalId_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case ExternalId_Type_ExternalId_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + +void ProtoConverter::visit(DocTypeDecl const& _x) +{ + m_output << "\n"; +} + +void ProtoConverter::visit(VersionNum const& _x) +{ + if (!isValid(_x)) + return; + + switch (_x.type()) + { + case VersionNum::STANDARD: + m_output << "\"1.0\""; + break; + case VersionNum::FUZZ: + case VersionNum_Type_VersionNum_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case VersionNum_Type_VersionNum_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + m_output << "\"" << _x.major() << "." << _x.minor() << "\""; + break; + } +} + +void ProtoConverter::visit(Encodings const& _x) +{ + if (!Encodings::Enc_IsValid(_x.name())) + return; + + m_output << " encoding=\""; + switch (_x.name()) + { + case Encodings::BIG5: + m_output << "BIG5"; + break; + case Encodings::EUCJP: + m_output << "EUC-JP"; + break; + case Encodings::EUCKR: + m_output << "EUC-KR"; + break; + case Encodings::GB18030: + m_output << "GB18030"; + break; + case Encodings::ISO2022JP: + m_output << "ISO-2022-JP"; + break; + case Encodings::ISO2022KR: + m_output << "ISO-2022-KR"; + break; + case Encodings::ISO88591: + m_output << "ISO-8859-1"; + break; + case Encodings::ISO88592: + m_output << "ISO-8859-2"; + break; + case Encodings::ISO88593: + m_output << "ISO-8859-3"; + break; + case Encodings::ISO88594: + m_output << "ISO-8859-4"; + break; + case Encodings::ISO88595: + m_output << "ISO-8859-5"; + break; + case Encodings::ISO88596: + m_output << "ISO-8859-6"; + break; + case Encodings::ISO88597: + m_output << "ISO-8859-7"; + break; + case Encodings::ISO88598: + m_output << "ISO-8859-8"; + break; + case Encodings::ISO88599: + m_output << "ISO-8859-9"; + break; + case Encodings::SHIFTJIS: + m_output << "SHIFT_JIS"; + break; + case Encodings::TIS620: + m_output << "TIS-620"; + break; + case Encodings::USASCII: + m_output << "US-ASCII"; + break; + case Encodings::UTF8: + m_output << "UTF-8"; + break; + case Encodings::UTF16: + m_output << "UTF-16"; + break; + case Encodings::UTF16BE: + m_output << "UTF-16BE"; + break; + case Encodings::UTF16LE: + m_output << "UTF-16LE"; + break; + case Encodings::WINDOWS31J: + m_output << "WINDOWS-31J"; + break; + case Encodings::WINDOWS1255: + m_output << "WINDOWS-1255"; + break; + case Encodings::WINDOWS1256: + m_output << "WINDOWS-1256"; + break; + case Encodings::FUZZ: + m_output << removeNonAscii(_x.fuzz()); + break; + case Encodings_Enc_Encodings_Enc_INT_MIN_SENTINEL_DO_NOT_USE_: + case Encodings_Enc_Encodings_Enc_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + m_output << "\""; +} + +void ProtoConverter::visit(XmlDeclaration const& _x) +{ + m_output << R"(\n"; +} + +void ProtoConverter::visit(XmlDocument const& _x) +{ + visit(_x.p()); + for (auto const& element: _x.e()) + visit(element); +} + +string ProtoConverter::protoToString(XmlDocument const& _x) +{ + visit(_x); + return m_output.str(); +} diff --git a/fuzzers/src/xmlProtoConverter.h b/fuzzers/src/xmlProtoConverter.h new file mode 100644 index 000000000..501dde36c --- /dev/null +++ b/fuzzers/src/xmlProtoConverter.h @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2019 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include "xml.pb.h" + +namespace xmlProtoFuzzer { +class ProtoConverter +{ +public: + ProtoConverter() = default; + + ProtoConverter(ProtoConverter const&) = delete; + + ProtoConverter(ProtoConverter&&) = delete; + + std::string protoToString(XmlDocument const&); + +private: + void visit(Prolog const&); + + void visit(ProcessingInstruction const&); + + void visit(ExternalId const&); + + void visit(DocTypeDecl const&); + + void visit(VersionNum const&); + + void visit(Encodings const&); + + void visit(Misc const&); + + void visit(KeyValue const&); + + void visit(Element const&); + + void visit(ElementDecl const&); + + void visit(AttValue const&); + + void visit(DefaultDecl const&); + + void visit(AttDef const&); + + void visit(AttListDecl const&); + + void visit(NotationDecl const&); + + void visit(EntityDecl const&); + + void visit(EntityValue const&); + + void visit(EntityDef const&); + + void visit(PEDef const&); + + void visit(NDataDecl const&); + + void visit(ConditionalSect const&); + + void visit(OneExtSubsetDecl const&); + + void visit(ExtSubsetDecl const&); + + void visit(MarkupDecl const&); + + void visit(CData const&); + + void visit(Content const&); + + void visit(XmlDeclaration const&); + + void visit(XmlDocument const&); + + template + bool isValid(T const& messageType) { + return T::Type_IsValid(messageType.type()); + } + + std::string removeNonAscii(std::string const&); + std::string getUri(Element_Id _x); + std::string getPredefined(Element_Id _x, std::string const&); + + std::ostringstream m_output; + + static constexpr auto s_XInclude = "xmlns:xi=\"http://www.w3.org/2001/XInclude\""; +}; +} + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c29aa257b..cccfbda05 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1272,6 +1272,10 @@ add_library(xerces-c ${libxerces_c_SOURCES} ${libxerces_c_RESOURCES}) target_link_libraries(xerces-c ${libxerces_c_DEPS}) +if (XERCES_BUILD_FUZZERS) + target_compile_options(xerces-c PUBLIC -fsanitize=fuzzer-no-link) + #target_link_libraries(xerces-c -fsanitize=fuzzer-no-link) +endif() if(XERCES_USE_NETACCESSOR_CURL) target_include_directories(xerces-c SYSTEM PRIVATE ${CURL_INCLUDE_DIRS}) endif() @@ -1289,11 +1293,13 @@ elseif(UNIX) # set the version in the filename, and create the symlink at install # time. Note: could be dropped when the SONAME is updated and # libtool compatibility is no longer required. - set_target_properties(xerces-c PROPERTIES OUTPUT_NAME "xerces-c-${INTERFACE_VERSION_D}") - file(GENERATE - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/InstallLibrarySymlink.cmake" - CONTENT "execute_process(COMMAND ln -sf \"$\" \"\$ENV{DESTDIR}${CMAKE_INSTALL_FULL_LIBDIR}/libxerces-c.so\")") - install(SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/InstallLibrarySymlink.cmake") + if(BUILD_SHARED_LIBS) + set_target_properties(xerces-c PROPERTIES OUTPUT_NAME "xerces-c-${INTERFACE_VERSION_D}") + file(GENERATE + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/InstallLibrarySymlink.cmake" + CONTENT "execute_process(COMMAND ln -sf \"$\" \"\$ENV{DESTDIR}${CMAKE_INSTALL_FULL_LIBDIR}/libxerces-c${CMAKE_SHARED_LIBRARY_SUFFIX}\")") + install(SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/InstallLibrarySymlink.cmake") + endif() else() # Not used for the common cases, though this would be the default if # not for libtool compatibility. diff --git a/src/xercesc/util/Janitor.hpp b/src/xercesc/util/Janitor.hpp index 24ff372a0..cf06e6762 100644 --- a/src/xercesc/util/Janitor.hpp +++ b/src/xercesc/util/Janitor.hpp @@ -154,10 +154,10 @@ private : MFPT fToCall; }; - +#if defined(__GNUC__) || (! defined(_AIX) && ! defined(__hpux) && ! defined(__sun)) XERCES_TEMPLATE_EXTERN template class XMLUTIL_EXPORT ArrayJanitor; XERCES_TEMPLATE_EXTERN template class XMLUTIL_EXPORT ArrayJanitor; - +#endif XERCES_CPP_NAMESPACE_END diff --git a/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.cpp b/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.cpp index 8d9befd06..5ed659389 100644 --- a/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.cpp +++ b/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.cpp @@ -56,6 +56,7 @@ XERCES_CPP_NAMESPACE_BEGIN CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/) : fMulti(0) , fEasy(0) + , fHeadersList(0) , fMemoryManager(urlSource.getMemoryManager()) , fURLSource(urlSource) , fTotalBytesRead(0) @@ -69,23 +70,23 @@ CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTP , fPayloadLen(0) , fContentType(0) { - // Allocate the curl multi handle - fMulti = curl_multi_init(); + // Allocate the curl multi handle + fMulti = curl_multi_init(); - // Allocate the curl easy handle - fEasy = curl_easy_init(); + // Allocate the curl easy handle + fEasy = curl_easy_init(); - // Set URL option + // Set URL option TranscodeToStr url(fURLSource.getURLText(), "ISO8859-1", fMemoryManager); - curl_easy_setopt(fEasy, CURLOPT_URL, (char*)url.str()); + curl_easy_setopt(fEasy, CURLOPT_URL, (char*)url.str()); // Set up a way to recieve the data - curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this); // Pass this pointer to write function - curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); // Our static write function + curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this); // Pass this pointer to write function + curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); // Our static write function - // Do redirects - curl_easy_setopt(fEasy, CURLOPT_FOLLOWLOCATION, (long)1); - curl_easy_setopt(fEasy, CURLOPT_MAXREDIRS, (long)6); + // Do redirects + curl_easy_setopt(fEasy, CURLOPT_FOLLOWLOCATION, (long)1); + curl_easy_setopt(fEasy, CURLOPT_MAXREDIRS, (long)6); // Add username and password if authentication is required const XMLCh *username = urlSource.getUser(); @@ -117,8 +118,6 @@ CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTP // Add custom headers if(httpInfo->fHeaders) { - struct curl_slist *headersList = 0; - const char *headersBuf = httpInfo->fHeaders; const char *headersBufEnd = httpInfo->fHeaders + httpInfo->fHeadersLen; @@ -133,7 +132,7 @@ CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTP memcpy(header.get(), headerStart, length); header.get()[length] = 0; - headersList = curl_slist_append(headersList, header.get()); + fHeadersList = curl_slist_append(fHeadersList, header.get()); headersBuf += 2; headerStart = headersBuf; @@ -141,8 +140,7 @@ CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTP } ++headersBuf; } - curl_easy_setopt(fEasy, CURLOPT_HTTPHEADER, headersList); - curl_slist_free_all(headersList); + curl_easy_setopt(fEasy, CURLOPT_HTTPHEADER, fHeadersList); } // Set up the payload @@ -155,16 +153,16 @@ CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTP } } - // Add easy handle to the multi stack - curl_multi_add_handle(fMulti, fEasy); + // Add easy handle to the multi stack + curl_multi_add_handle(fMulti, fEasy); // Start reading, to get the content type - while(fBufferHeadPtr == fBuffer) - { - int runningHandles = 0; + while(fBufferHeadPtr == fBuffer) + { + int runningHandles = 0; readMore(&runningHandles); - if(runningHandles == 0) break; - } + if(runningHandles == 0) break; + } // Find the content type char *contentType8 = 0; @@ -176,16 +174,18 @@ CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTP CurlURLInputStream::~CurlURLInputStream() { - // Remove the easy handle from the multi stack - curl_multi_remove_handle(fMulti, fEasy); + // Remove the easy handle from the multi stack + curl_multi_remove_handle(fMulti, fEasy); - // Cleanup the easy handle - curl_easy_cleanup(fEasy); + // Cleanup the easy handle + curl_easy_cleanup(fEasy); - // Cleanup the multi handle - curl_multi_cleanup(fMulti); + // Cleanup the multi handle + curl_multi_cleanup(fMulti); if(fContentType) fMemoryManager->deallocate(fContentType); + + if(fHeadersList) curl_slist_free_all(fHeadersList); } diff --git a/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.hpp b/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.hpp index ea528c1eb..f75857b92 100644 --- a/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.hpp +++ b/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.hpp @@ -96,24 +96,25 @@ private : // that readBytes must return. // ----------------------------------------------------------------------- - CURLM* fMulti; - CURL* fEasy; - + CURLM* fMulti; + CURL* fEasy; + curl_slist* fHeadersList; + MemoryManager* fMemoryManager; - XMLURL fURLSource; + XMLURL fURLSource; XMLSize_t fTotalBytesRead; - XMLByte* fWritePtr; + XMLByte* fWritePtr; XMLSize_t fBytesRead; XMLSize_t fBytesToRead; - bool fDataAvailable; + bool fDataAvailable; // Overflow buffer for when curl writes more data to us // than we've asked for. - XMLByte fBuffer[CURL_MAX_WRITE_SIZE]; - XMLByte* fBufferHeadPtr; - XMLByte* fBufferTailPtr; + XMLByte fBuffer[CURL_MAX_WRITE_SIZE]; + XMLByte* fBufferHeadPtr; + XMLByte* fBufferTailPtr; // Upload data const char* fPayload; diff --git a/src/xercesc/util/XMLChar.cpp b/src/xercesc/util/XMLChar.cpp index 2d8b13c8e..46a61dab9 100644 --- a/src/xercesc/util/XMLChar.cpp +++ b/src/xercesc/util/XMLChar.cpp @@ -8837,7 +8837,7 @@ XMLByte XMLChar1_1::fgCharCharsTable1_1[0x10000] = #include -static XMLCh gTmpCharTable[0xFFFF]; +static XMLCh gTmpCharTable[0x10000]; static void initOneTable(const XMLCh* const theTable , const XMLByte theMask) diff --git a/tests/Fuzzers/parse_target.cpp b/tests/Fuzzers/parse_target.cpp new file mode 100755 index 000000000..5e976765a --- /dev/null +++ b/tests/Fuzzers/parse_target.cpp @@ -0,0 +1,28 @@ +/* +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +*/ +#include "xerces_fuzz_common.h" + +#include "xercesc/framework/MemBufInputSource.hpp" +#include "xercesc/parsers/SAXParser.hpp" +#include "xercesc/util/OutOfMemoryException.hpp" + +using namespace xercesc_3_2; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + parseInMemory(Data, Size); + return 0; +} diff --git a/tests/Fuzzers/parse_target_proto.cpp b/tests/Fuzzers/parse_target_proto.cpp new file mode 100644 index 000000000..b1fd33cbe --- /dev/null +++ b/tests/Fuzzers/parse_target_proto.cpp @@ -0,0 +1,45 @@ +/* +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +*/ +#include "xerces_fuzz_common.h" +#include "xmlProtoConverter.h" + +#include "xercesc/framework/MemBufInputSource.hpp" +#include "xercesc/parsers/SAXParser.hpp" +#include "xercesc/util/OutOfMemoryException.hpp" + +#include "genfiles/xml.pb.h" + +#include "src/libfuzzer/libfuzzer_macro.h" + +#include + +namespace { + protobuf_mutator::protobuf::LogSilencer log_silincer; + void ignore(void* ctx, const char* msg, ...) {} + + template + std::unique_ptr MakeUnique(T* obj, D del) { + return {obj, del}; + } +} + +using namespace xercesc_3_2; + +DEFINE_PROTO_FUZZER(const xmlProtoFuzzer::XmlDocument& xmlDocument) { + std::string xmlData = xmlProtoFuzzer::ProtoConverter().protoToString(xmlDocument); + parseInMemory((const uint8_t *)xmlData.c_str(), xmlData.size()); +} diff --git a/tests/Fuzzers/xerces_fuzz_common.cpp b/tests/Fuzzers/xerces_fuzz_common.cpp new file mode 100755 index 000000000..a76b383a8 --- /dev/null +++ b/tests/Fuzzers/xerces_fuzz_common.cpp @@ -0,0 +1,47 @@ +/* +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +*/ +#include "xerces_fuzz_common.h" + +using namespace xercesc_3_2; +static bool initialized = false; + +void parseInMemory(const uint8_t *Data, size_t Size) +{ + if (!initialized) + { + XMLPlatformUtils::Initialize(); + initialized = true; + } + SAXParser::ValSchemes valScheme = SAXParser::Val_Auto; + bool doNamespaces = false; + bool doSchema = false; + bool schemaFullChecking = false; + SAXParser *parser = new SAXParser; + parser->setValidationScheme(valScheme); + parser->setDoNamespaces(doNamespaces); + parser->setDoSchema(doSchema); + parser->setHandleMultipleImports(true); + parser->setValidationSchemaFullChecking(schemaFullChecking); + static const char *gMemBufId = "prodInfo"; + + MemBufInputSource *memBufIS = new MemBufInputSource( + (const XMLByte *)Data, Size, gMemBufId, false); + parser->parse(*memBufIS); + delete parser; + delete memBufIS; + //XMLPlatformUtils::Terminate(); +} diff --git a/tests/Fuzzers/xerces_fuzz_common.h b/tests/Fuzzers/xerces_fuzz_common.h new file mode 100644 index 000000000..9eaf88bd7 --- /dev/null +++ b/tests/Fuzzers/xerces_fuzz_common.h @@ -0,0 +1,23 @@ +/* +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +*/ +#pragma once + +#include "xercesc/parsers/SAXParser.hpp" +#include "xercesc/framework/MemBufInputSource.hpp" +#include "xercesc/util/OutOfMemoryException.hpp" + +void parseInMemory(const uint8_t *Data, size_t Size); \ No newline at end of file diff --git a/tests/Fuzzers/xml.proto b/tests/Fuzzers/xml.proto new file mode 100755 index 000000000..75e54b6f0 --- /dev/null +++ b/tests/Fuzzers/xml.proto @@ -0,0 +1,339 @@ +/* + * Copyright (C) 2019 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +message Misc { + oneof misc_oneof { + string comment = 1; + ProcessingInstruction inst = 2; + } +} + +message PEReference { + string name = 1; +} + +message ElementDecl { + enum ContentSpec { + EMPTY = 0; + ANY = 1; + FUZZ = 2; + MIXED = 3; + CHILDREN = 4; + } + string name = 1; + ContentSpec spec = 2; + repeated string cdata = 3; +} + +message AttrType { + enum Type { + CDATA = 0; + ID = 1; + IDREF = 2; + IDREFS = 3; + ENTITY = 4; + ENTITIES = 5; + NMTOKEN = 6; + NMTOKENS = 7; + } + Type ty = 1; +} + +message EnumeratedType { + repeated string names = 1; +} + +message AttrListDecl { + string name = 1; + AttrType atype = 2; + EnumeratedType etype = 3; + DefaultDecl def = 4; +} + +message ExternalId { + enum Type { + SYSTEM = 0; + PUBLIC = 1; + FUZZ = 2; + } + Type type = 1; + string system = 2; + string pub = 3; +} + +message AttValue { + enum Type { + ENTITY = 0; + CHAR = 1; + FUZZ = 2; + } + Type type = 1; + repeated string value = 2; +} + +message DefaultDecl { + enum Type { + REQUIRED = 0; + IMPLIED = 1; + FIXED = 2; + FUZZ = 3; + } + Type type = 1; + AttValue att = 2; +} + +message AttDef { + // TODO: Add enumerated type + enum Type { + CDATA = 0; + ID = 1; + IDREF = 2; + IDREFS = 3; + ENTITY = 4; + ENTITIES = 5; + NMTOKEN = 6; + NMTOKENS = 7; + FUZZ = 8; + } + string name = 1; + Type type = 2; + DefaultDecl def = 3; +} + +message AttListDecl { + string name = 1; + repeated AttDef attdefs = 2; +} + +message NotationDecl { + string name = 1; + oneof notation_oneof { + ExternalId ext = 2; + string pub = 3; + string fuzz = 4; + } +} + +message EntityValue { + enum Type { + ENTITY = 0; + CHAR = 1; + PEREF = 2; + FUZZ = 3; + } + Type type = 1; + repeated string name = 2; +} + +message NDataDecl { + string name = 1; +} + +message EntityDef { + oneof entity_oneof { + ExternalId ext = 1; + EntityValue val = 2; + } + NDataDecl ndata = 3; +} + +message PEDef { + oneof pedef_oneof { + EntityValue val = 1; + ExternalId id = 2; + } +} + +message EntityDecl { + enum Type { + GEDECL = 0; + PEDECL = 1; + } + Type type = 1; + string name = 2; + EntityDef ent = 3; + PEDef pedef = 4; +} + +message ConditionalSect { + enum Type { + INCLUDE = 0; + IGNORE = 1; + FUZZ = 2; + } + Type type = 1; + ExtSubsetDecl ext = 2; + // TODO: Make this recursive + // See https://www.w3.org/TR/xml/#NT-conditionalSect + repeated string ignores = 3; +} + +message OneExtSubsetDecl { + oneof extsubset_oneof { + MarkupDecl m = 1; + ConditionalSect c = 2; + } +} + +message ExtSubsetDecl { + repeated OneExtSubsetDecl decls = 1; +} + +message MarkupDecl { + oneof markup_oneof { + ElementDecl e = 1; + AttListDecl a = 2; + NotationDecl n = 3; + Misc m = 4; + EntityDecl entity = 5; + ExtSubsetDecl ext = 6; + } +} + +message DocTypeDecl { + string name = 1; + ExternalId ext = 2; + repeated MarkupDecl mdecl = 3; +} + +message Prolog { + XmlDeclaration decl = 1; + DocTypeDecl doctype = 2; + repeated Misc misc = 3; +} + +message KeyValue { + enum XmlNamespace { + ATTRIBUTES = 0; + BASE = 1; + CATALOG = 2; + ID = 3; + LANG = 4; + LINK = 5; + SPACE = 6; + SPECIAL = 7; + TEST = 8; + FUZZ = 9; + } + XmlNamespace type = 1; + string key = 2; + string value = 3; +} + +message ProcessingInstruction { + string name = 1; + repeated KeyValue kv = 2; +} + +message CData { + string data = 1; +} + +message Content { + // TODO: Add other content types + oneof content_oneof { + string str = 1; + Element e = 2; + CData c = 3; + } +} + +message Element { + enum Type { + PREDEFINED = 0; + FUZZ = 1; + } + enum Id { + XIINCLUDE = 0; + XIFALLBACK = 1; + // Attributes of xinclude + XIHREF = 2; + XIPARSE = 3; + XIXPOINTER = 4; + XIENCODING = 5; + XIACCEPT = 6; + XIACCEPTLANG = 7; + } + Type type = 1; + Id id = 2; + string name = 3; + repeated KeyValue kv = 4; + Content content = 5; + string childprop = 6; +} + +message VersionNum { + enum Type { + STANDARD = 0; + FUZZ = 1; + } + Type type = 1; + uint64 major = 2; + uint64 minor = 3; +} + +message Encodings { + enum Enc { + BIG5 = 0; + EUCJP = 1; + EUCKR = 2; + GB18030 = 3; + ISO2022JP = 4; + ISO2022KR = 5; + ISO88591 = 6; + ISO88592 = 7; + ISO88593 = 8; + ISO88594 = 9; + ISO88595 = 10; + ISO88596 = 11; + ISO88597 = 12; + ISO88598 = 13; + ISO88599 = 14; + SHIFTJIS = 15; + TIS620 = 16; + USASCII = 17; + UTF8 = 18; + UTF16 = 19; + UTF16BE = 20; + UTF16LE = 21; + WINDOWS31J = 22; + WINDOWS1255 = 23; + WINDOWS1256 = 24; + FUZZ = 25; + } + Enc name = 1; + string fuzz = 2; +} + +message XmlDeclaration { + VersionNum ver = 1; + Encodings enc = 2; + enum Standalone { + YES = 0; + NO = 1; + } + Standalone standalone = 3; + string fuzz = 4; +} + +message XmlDocument { + Prolog p = 1; + repeated Element e = 2; +} + +package xmlProtoFuzzer; \ No newline at end of file diff --git a/tests/Fuzzers/xmlProtoConverter.cpp b/tests/Fuzzers/xmlProtoConverter.cpp new file mode 100644 index 000000000..f8a47dee2 --- /dev/null +++ b/tests/Fuzzers/xmlProtoConverter.cpp @@ -0,0 +1,758 @@ +/* + * Copyright (C) 2019 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "xmlProtoConverter.h" + +#include + +using namespace std; +using namespace xmlProtoFuzzer; + +string ProtoConverter::removeNonAscii(string const& _utf8) +{ + string asciiStr{_utf8}; + asciiStr.erase(remove_if(asciiStr.begin(), asciiStr.end(), [=](char c) -> bool { + return !(std::isalpha(c) || std::isdigit(c)); + }), asciiStr.end()); + return asciiStr.empty() ? "fuzz" : asciiStr; +} + + +void ProtoConverter::visit(Misc const& _x) +{ + switch (_x.misc_oneof_case()) + { + case Misc::kComment: + m_output << "\n"; + break; + case Misc::kInst: + visit(_x.inst()); + break; + case Misc::MISC_ONEOF_NOT_SET: + break; + } +} + +void ProtoConverter::visit(Prolog const& _x) +{ + visit(_x.decl()); + visit(_x.doctype()); + for (auto const& misc: _x.misc()) + visit(misc); +} + +void ProtoConverter::visit(KeyValue const& _x) +{ + if (!KeyValue::XmlNamespace_IsValid(_x.type())) + return; + + switch (_x.type()) + { + case KeyValue::ATTRIBUTES: + m_output << "xml:attributes=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::BASE: + m_output << "xml:base=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::CATALOG: + m_output << "xml:catalog=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::ID: + m_output << "xml:id=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::LANG: + m_output << "xml:lang=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::LINK: + m_output << "xml:link=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::SPACE: + m_output << "xml:space=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::SPECIAL: + m_output << "xml:special=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::TEST: + m_output << "xml:test=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::FUZZ: + if (_x.ByteSizeLong() % 2) + m_output << "xmlns:" << removeNonAscii(_x.key()) << "=\"" << removeNonAscii(_x.value()) << "\" "; + else + m_output << removeNonAscii(_x.key()) << "=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue_XmlNamespace_KeyValue_XmlNamespace_INT_MIN_SENTINEL_DO_NOT_USE_: + case KeyValue_XmlNamespace_KeyValue_XmlNamespace_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + +void ProtoConverter::visit(ProcessingInstruction const& _x) +{ + m_output << "\n"; +} + +void ProtoConverter::visit(Content const& _x) +{ + switch (_x.content_oneof_case()) + { + case Content::kStr: + m_output << _x.str() << "\n"; + break; + case Content::kE: + visit(_x.e()); + m_output << "\n"; + break; + case Content::kC: + visit(_x.c()); + m_output << "\n"; + break; + case Content::CONTENT_ONEOF_NOT_SET: + break; + } +} + +void ProtoConverter::visit(ElementDecl const& _x) +{ + if (!ElementDecl::ContentSpec_IsValid(_x.spec())) + return; + + m_output << ""; + break; + case ElementDecl::ANY: + m_output << "ANY>"; + break; + case ElementDecl::FUZZ: + m_output << "FUZZ>"; + break; + case ElementDecl::MIXED: + m_output << "(#PCDATA"; + for (auto const& pcdata: _x.cdata()) + m_output << "|" << pcdata; + m_output << ")"; + if (_x.cdata_size() > 0) + m_output << "*"; + m_output << ">"; + break; + case ElementDecl::CHILDREN: + { + m_output << "("; + string delim = ""; + for (auto const& str: _x.cdata()) { + m_output << delim << removeNonAscii(str); + delim = ", "; + } + m_output << ")>"; + break; + } + case ElementDecl_ContentSpec_ElementDecl_ContentSpec_INT_MIN_SENTINEL_DO_NOT_USE_: + case ElementDecl_ContentSpec_ElementDecl_ContentSpec_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + +void ProtoConverter::visit(AttValue const& _x) +{ + if (!isValid(_x)) + return; + + m_output << "\""; + string prefix; + switch (_x.type()) + { + case AttValue::ENTITY: + prefix = "&"; + break; + case AttValue::CHAR: + if (_x.ByteSizeLong() % 2) + prefix = "&#"; + else + // TODO: Value that follows this must be a + // sequence of hex digits. + prefix = "&#x"; + break; + case AttValue::FUZZ: + prefix = "fuzz"; + break; + case AttValue_Type_AttValue_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case AttValue_Type_AttValue_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + for (auto const& name: _x.value()) + m_output << prefix << removeNonAscii(name) << ";"; + m_output << "\""; +} + +void ProtoConverter::visit(DefaultDecl const& _x) +{ + if (!isValid(_x)) + return; + + switch (_x.type()) + { + case DefaultDecl::REQUIRED: + m_output << "#REQUIRED"; + break; + case DefaultDecl::IMPLIED: + m_output << "#IMPLIED"; + break; + case DefaultDecl::FIXED: + m_output << "#FIXED "; + visit(_x.att()); + break; + case DefaultDecl::FUZZ: + m_output << "#FUZZ"; + break; + case DefaultDecl_Type_DefaultDecl_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case DefaultDecl_Type_DefaultDecl_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + +void ProtoConverter::visit(AttDef const& _x) +{ + if (!isValid(_x)) + return; + + m_output << " " << removeNonAscii(_x.name()) << " "; + switch (_x.type()) + { + case AttDef::CDATA: + m_output << "CDATA "; + break; + case AttDef::ID: + m_output << "ID "; + break; + case AttDef::IDREF: + m_output << "IDREF "; + break; + case AttDef::IDREFS: + m_output << "IDREFS "; + break; + case AttDef::ENTITY: + m_output << "ENTITY "; + break; + case AttDef::ENTITIES: + m_output << "ENTITIES "; + break; + case AttDef::NMTOKEN: + m_output << "NMTOKEN "; + break; + case AttDef::NMTOKENS: + m_output << "NMTOKENS "; + break; + case AttDef::FUZZ: + m_output << "FUZZ "; + break; + case AttDef_Type_AttDef_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case AttDef_Type_AttDef_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + visit(_x.def()); +} + +void ProtoConverter::visit(AttListDecl const& _x) +{ + m_output << ""; +} + +void ProtoConverter::visit(NotationDecl const& _x) +{ + m_output << ""; +} + +void ProtoConverter::visit(NDataDecl const& _x) +{ + m_output << " NDATA " << _x.name(); +} + +void ProtoConverter::visit(EntityDef const& _x) +{ + switch (_x.entity_oneof_case()) + { + case EntityDef::kExt: + visit(_x.ext()); + if (_x.ByteSizeLong() % 2) + visit(_x.ndata()); + break; + case EntityDef::kVal: + visit(_x.val()); + break; + case EntityDef::ENTITY_ONEOF_NOT_SET: + break; + } +} + +void ProtoConverter::visit(PEDef const& _x) +{ + switch (_x.pedef_oneof_case()) + { + case PEDef::kVal: + visit(_x.val()); + break; + case PEDef::kId: + visit(_x.id()); + break; + case PEDef::PEDEF_ONEOF_NOT_SET: + break; + } +} + +void ProtoConverter::visit(EntityValue const& _x) +{ + if (!isValid(_x)) + return; + + m_output << "\""; + string prefix; + switch (_x.type()) + { + case EntityValue::ENTITY: + prefix = "&"; + break; + case EntityValue::CHAR: + if (_x.ByteSizeLong() % 2) + prefix = "&#"; + else + prefix = "&#x"; + break; + case EntityValue::PEREF: + prefix = "%"; + break; + case EntityValue::FUZZ: + prefix = "fuzz"; + break; + case EntityValue_Type_EntityValue_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case EntityValue_Type_EntityValue_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + for (auto const& ref: _x.name()) + m_output << prefix << ref << ";"; + m_output << "\""; +} + +void ProtoConverter::visit(EntityDecl const& _x) +{ + if (!isValid(_x)) + return; + + m_output << ""; +} + +void ProtoConverter::visit(ConditionalSect const& _x) +{ + if (!isValid(_x)) + return; + + switch (_x.type()) + { + case ConditionalSect::INCLUDE: + m_output << ""; + break; + case ConditionalSect::IGNORE: + m_output << ""; + m_output << "]]>"; + break; + case ConditionalSect::FUZZ: + m_output << ""; + break; + case ConditionalSect_Type_ConditionalSect_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case ConditionalSect_Type_ConditionalSect_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + + +void ProtoConverter::visit(OneExtSubsetDecl const& _x) +{ + switch (_x.extsubset_oneof_case()) + { + case OneExtSubsetDecl::kM: + visit(_x.m()); + break; + case OneExtSubsetDecl::kC: + visit(_x.c()); + break; + case OneExtSubsetDecl::EXTSUBSET_ONEOF_NOT_SET: + break; + } +} + + +void ProtoConverter::visit(ExtSubsetDecl const& _x) +{ + for (auto const& decl: _x.decls()) + visit(decl); +} + +void ProtoConverter::visit(CData const& _x) +{ + m_output << ""; +} + +void ProtoConverter::visit(MarkupDecl const& _x) +{ + switch (_x.markup_oneof_case()) + { + case MarkupDecl::kE: + visit(_x.e()); + break; + case MarkupDecl::kA: + visit(_x.a()); + break; + case MarkupDecl::kN: + visit(_x.n()); + break; + case MarkupDecl::kM: + visit(_x.m()); + break; + case MarkupDecl::kEntity: + visit(_x.entity()); + break; + case MarkupDecl::kExt: + visit(_x.ext()); + break; + case MarkupDecl::MARKUP_ONEOF_NOT_SET: + break; + } +} + +/// Returns predefined element from an Element_Id enum +/// @param _x is an enum that holds the desired type of predefined value +/// @param _prop is a string that holds the value of the desired type +/// @return string holding the predefined value of the form +/// name attribute=\"value\" +string ProtoConverter::getPredefined(Element_Id _x, string const& _prop) +{ + string output{}; + switch (_x) + { + case Element::XIINCLUDE: + case Element::XIFALLBACK: + case Element::XIHREF: + output = "xi:include href=\"fuzz.xml\""; + case Element::XIPARSE: + output = "xi:include parse=\"xml\""; + case Element::XIXPOINTER: + output = "xi:include xpointer=\"" + removeNonAscii(_prop) + "\""; + case Element::XIENCODING: + output = "xi:include encoding=\"" + removeNonAscii(_prop) + "\""; + case Element::XIACCEPT: + output = "xi:include accept=\"" + removeNonAscii(_prop) + "\""; + case Element::XIACCEPTLANG: + output = "xi:include accept-language=\"" + removeNonAscii(_prop) + "\""; + case Element_Id_Element_Id_INT_MIN_SENTINEL_DO_NOT_USE_: + case Element_Id_Element_Id_INT_MAX_SENTINEL_DO_NOT_USE_: + output = "xi:fuzz xifuzz=\"fuzz\""; + } + return output; +} + +/// Returns uri string for a given Element_Id type +string ProtoConverter::getUri(Element_Id _x) +{ + if (!Element::Id_IsValid(_x)) + return s_XInclude; + + switch (_x) + { + case Element::XIINCLUDE: + case Element::XIFALLBACK: + case Element::XIHREF: + case Element::XIPARSE: + case Element::XIXPOINTER: + case Element::XIENCODING: + case Element::XIACCEPT: + case Element::XIACCEPTLANG: + case Element_Id_Element_Id_INT_MIN_SENTINEL_DO_NOT_USE_: + case Element_Id_Element_Id_INT_MAX_SENTINEL_DO_NOT_USE_: + return s_XInclude; + } +} + +void ProtoConverter::visit(Element const& _x) +{ + if (!isValid(_x)) + return; + + // Predefined child node + string child = {}; + // Predefined uri for child node + string pUri = {}; + // Element name + string name = removeNonAscii(_x.name()); + + switch (_x.type()) + { + case Element::PREDEFINED: + child = getPredefined(_x.id(), _x.childprop()); + pUri = getUri(_x.id()); + break; + case Element::FUZZ: + case Element_Type_Element_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case Element_Type_Element_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + + // + // + // + + // Start name tag: Must be Ascii? + m_output << "<" << name << " "; + + // Add uri to name tag + if (!pUri.empty()) + m_output << pUri << " "; + for (auto const& prop: _x.kv()) + visit(prop); + m_output << ">\n"; + + // Add attribute + if (!child.empty()) + m_output << "<" << child << "/>\n"; + + // Add content + visit(_x.content()); + + // Close name tag + m_output << "\n"; +} + +void ProtoConverter::visit(ExternalId const& _x) +{ + if (!isValid(_x)) + return; + + switch (_x.type()) + { + case ExternalId::SYSTEM: + m_output << "SYSTEM " << "\"" << removeNonAscii(_x.system()) << "\""; + break; + case ExternalId::PUBLIC: + m_output << "PUBLIC " << "\"" << removeNonAscii(_x.pub()) << "\"" + << " " << "\"" << removeNonAscii(_x.system()) << "\""; + break; + case ExternalId::FUZZ: + m_output << "FUZZ " << "\"" << removeNonAscii(_x.pub()) << "\""; + break; + case ExternalId_Type_ExternalId_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case ExternalId_Type_ExternalId_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + +void ProtoConverter::visit(DocTypeDecl const& _x) +{ + m_output << "\n"; +} + +void ProtoConverter::visit(VersionNum const& _x) +{ + if (!isValid(_x)) + return; + + switch (_x.type()) + { + case VersionNum::STANDARD: + m_output << "\"1.0\""; + break; + case VersionNum::FUZZ: + case VersionNum_Type_VersionNum_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case VersionNum_Type_VersionNum_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + m_output << "\"" << _x.major() << "." << _x.minor() << "\""; + break; + } +} + +void ProtoConverter::visit(Encodings const& _x) +{ + if (!Encodings::Enc_IsValid(_x.name())) + return; + + m_output << " encoding=\""; + switch (_x.name()) + { + case Encodings::BIG5: + m_output << "BIG5"; + break; + case Encodings::EUCJP: + m_output << "EUC-JP"; + break; + case Encodings::EUCKR: + m_output << "EUC-KR"; + break; + case Encodings::GB18030: + m_output << "GB18030"; + break; + case Encodings::ISO2022JP: + m_output << "ISO-2022-JP"; + break; + case Encodings::ISO2022KR: + m_output << "ISO-2022-KR"; + break; + case Encodings::ISO88591: + m_output << "ISO-8859-1"; + break; + case Encodings::ISO88592: + m_output << "ISO-8859-2"; + break; + case Encodings::ISO88593: + m_output << "ISO-8859-3"; + break; + case Encodings::ISO88594: + m_output << "ISO-8859-4"; + break; + case Encodings::ISO88595: + m_output << "ISO-8859-5"; + break; + case Encodings::ISO88596: + m_output << "ISO-8859-6"; + break; + case Encodings::ISO88597: + m_output << "ISO-8859-7"; + break; + case Encodings::ISO88598: + m_output << "ISO-8859-8"; + break; + case Encodings::ISO88599: + m_output << "ISO-8859-9"; + break; + case Encodings::SHIFTJIS: + m_output << "SHIFT_JIS"; + break; + case Encodings::TIS620: + m_output << "TIS-620"; + break; + case Encodings::USASCII: + m_output << "US-ASCII"; + break; + case Encodings::UTF8: + m_output << "UTF-8"; + break; + case Encodings::UTF16: + m_output << "UTF-16"; + break; + case Encodings::UTF16BE: + m_output << "UTF-16BE"; + break; + case Encodings::UTF16LE: + m_output << "UTF-16LE"; + break; + case Encodings::WINDOWS31J: + m_output << "WINDOWS-31J"; + break; + case Encodings::WINDOWS1255: + m_output << "WINDOWS-1255"; + break; + case Encodings::WINDOWS1256: + m_output << "WINDOWS-1256"; + break; + case Encodings::FUZZ: + m_output << removeNonAscii(_x.fuzz()); + break; + case Encodings_Enc_Encodings_Enc_INT_MIN_SENTINEL_DO_NOT_USE_: + case Encodings_Enc_Encodings_Enc_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + m_output << "\""; +} + +void ProtoConverter::visit(XmlDeclaration const& _x) +{ + m_output << R"(\n"; +} + +void ProtoConverter::visit(XmlDocument const& _x) +{ + visit(_x.p()); + for (auto const& element: _x.e()) + visit(element); +} + +string ProtoConverter::protoToString(XmlDocument const& _x) +{ + visit(_x); + return m_output.str(); +} diff --git a/tests/Fuzzers/xmlProtoConverter.h b/tests/Fuzzers/xmlProtoConverter.h new file mode 100644 index 000000000..501dde36c --- /dev/null +++ b/tests/Fuzzers/xmlProtoConverter.h @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2019 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include "xml.pb.h" + +namespace xmlProtoFuzzer { +class ProtoConverter +{ +public: + ProtoConverter() = default; + + ProtoConverter(ProtoConverter const&) = delete; + + ProtoConverter(ProtoConverter&&) = delete; + + std::string protoToString(XmlDocument const&); + +private: + void visit(Prolog const&); + + void visit(ProcessingInstruction const&); + + void visit(ExternalId const&); + + void visit(DocTypeDecl const&); + + void visit(VersionNum const&); + + void visit(Encodings const&); + + void visit(Misc const&); + + void visit(KeyValue const&); + + void visit(Element const&); + + void visit(ElementDecl const&); + + void visit(AttValue const&); + + void visit(DefaultDecl const&); + + void visit(AttDef const&); + + void visit(AttListDecl const&); + + void visit(NotationDecl const&); + + void visit(EntityDecl const&); + + void visit(EntityValue const&); + + void visit(EntityDef const&); + + void visit(PEDef const&); + + void visit(NDataDecl const&); + + void visit(ConditionalSect const&); + + void visit(OneExtSubsetDecl const&); + + void visit(ExtSubsetDecl const&); + + void visit(MarkupDecl const&); + + void visit(CData const&); + + void visit(Content const&); + + void visit(XmlDeclaration const&); + + void visit(XmlDocument const&); + + template + bool isValid(T const& messageType) { + return T::Type_IsValid(messageType.type()); + } + + std::string removeNonAscii(std::string const&); + std::string getUri(Element_Id _x); + std::string getPredefined(Element_Id _x, std::string const&); + + std::ostringstream m_output; + + static constexpr auto s_XInclude = "xmlns:xi=\"http://www.w3.org/2001/XInclude\""; +}; +} +