diff --git a/CMakeLists.txt b/CMakeLists.txt index 613c3ec9..e12c6d0a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,11 +106,10 @@ if(BUILD_CUSTOM_OPENSSL) LINK_DIRECTORIES("${BUILD_CUSTOM_OPENSSL}/lib") endif() if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Linux") - set(LIBRARIES ${LIBRARIES} pthread ssl crypto pcre protobuf xml2) + set(LIBRARIES ${LIBRARIES} pthread ssl crypto pcre2-8 protobuf xml2) endif() if(BUILD_REDHAT) - set(LIBRARIES ${LIBRARIES} ssl crypto pcre protobuf xml2) - add_definitions(-DWAFLZ_PCRE_INFO_FLAGS_MISSING) + set(LIBRARIES ${LIBRARIES} ssl crypto pcre2-8 protobuf xml2) endif() # ------------------------------------------------------------------------------ # special build case for CUSTOM_CAPLENMAX @@ -133,7 +132,7 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # -------------------------------------------------------- INCLUDE_DIRECTORIES(/usr/local/opt/openssl/include) INCLUDE_DIRECTORIES(/usr/local/opt/protobuf/include) - INCLUDE_DIRECTORIES(/usr/local/opt/pcre/include) + INCLUDE_DIRECTORIES(/usr/local/opt/pcre2/include) INCLUDE_DIRECTORIES(/usr/local/opt/libxml2/include) INCLUDE_DIRECTORIES(/usr/local/opt/rapidjson/include) # -------------------------------------------------------- @@ -141,7 +140,7 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # -------------------------------------------------------- LINK_DIRECTORIES(/usr/local/opt/openssl/lib) LINK_DIRECTORIES(/usr/local/opt/protobuf/lib) - LINK_DIRECTORIES(/usr/local/opt/pcre/lib) + LINK_DIRECTORIES(/usr/local/opt/pcre2/lib) LINK_DIRECTORIES(/usr/local/opt/libxml2/lib) # -------------------------------------------------------- # if redis @@ -205,11 +204,11 @@ if(BUILD_UBUNTU) fail_if_not_found_library(libssl.a) endif() fail_if_not_found_library(libcrypto.a) - fail_if_not_found_library(libpcre.a) + fail_if_not_found_library(libpcre2-8.a) fail_if_not_found_library(libprotobuf.a) # -------------------------------------------------------- # if rate-limiting check for kv db libs - # -------------------------------------------------------- + # -------------------------------------------------------- if(BUILD_REDIS) fail_if_not_found_library(libhiredis.a) endif() diff --git a/README.md b/README.md index f11cb7cb..154e7f93 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ The open source standard implementation of the [ModSecurity Rules Engine](https: ##### Package Requirements ```sh -$ sudo apt-get install -y libssl-dev libpcre3-dev libxml2-dev libicu-dev protobuf-compiler libprotobuf-dev liblzma-dev python3-pip +$ sudo apt-get install -y libssl-dev libpcre2-dev libxml2-dev libicu-dev protobuf-compiler libprotobuf-dev liblzma-dev python3-pip ``` ##### Python Package Requirements @@ -59,7 +59,7 @@ sudo make install ##### Package Requirements (with Homebrew) ```sh -$ brew install cmake openssl protobuf libxml2 pcre dpkg rapidjson jq +$ brew install cmake openssl protobuf libxml2 pcre2 dpkg rapidjson jq ``` ##### Python Package Requirements @@ -172,4 +172,3 @@ $ curl -s "http://localhost:12345/index.html" -H"Host:" | jq '.' ## License This project is licensed under the terms of the Apache 2.0 open source license. Please refer to the `LICENSE-2.0.txt` file for the full terms. - diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7d820ab8..db1a645c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -194,7 +194,7 @@ SET_SOURCE_FILES_PROPERTIES( PROPERTIES LANGUAGE C ) set(CMAKE_C_FLAGS "-std=c99") -set(CMAKE_CXX_FLAGS "-Wall -Weffc++ -Werror -std=c++11") +set(CMAKE_CXX_FLAGS "-Wall -Werror -std=c++11") # ------------------------------------------------------------------------------ # headers # ------------------------------------------------------------------------------ @@ -280,7 +280,7 @@ target_include_directories(waflzcore PUBLIC target_link_libraries(waflz ssl crypto - pcre + pcre2-8 protobuf xml2 ) @@ -322,4 +322,3 @@ install(FILES ${HDRS} DESTINATION include/waflz COMPONENT Headers ) - diff --git a/src/modsecurity/config_parser.cc b/src/modsecurity/config_parser.cc index 56073698..5211d799 100644 --- a/src/modsecurity/config_parser.cc +++ b/src/modsecurity/config_parser.cc @@ -27,7 +27,8 @@ #include #include #include -#include +#define PCRE2_CODE_UNIT_WIDTH 8 +#include #include #include #include @@ -2461,37 +2462,57 @@ void config_parser::show_status(void) //! ---------------------------------------------------------------------------- int32_t get_pcre_match_list(const char *a_regex, const char *a_str, match_list_t &ao_match_list) { - pcre *l_re; - const char *l_error; - int l_erroffset; - l_re = pcre_compile(a_regex, // the pattern - PCRE_ANCHORED,// options - &l_error, // for error message - &l_erroffset, // for error offset - 0); // use default character tables - if(!l_re) - { - NDBG_PRINT("pcre_compile failed (offset: %d), %s\n", l_erroffset, l_error); + PCRE2_SIZE l_erroffset = 0; + PCRE2_SIZE l_length = strlen(a_regex); + PCRE2_SPTR l_pattern = (PCRE2_SPTR) a_regex; + uint32_t options = PCRE2_ANCHORED; + int l_errorcode; + pcre2_code *l_re; + l_re = pcre2_compile(l_pattern, // the pattern + l_length, // length of the pattern + options, // default options + &l_errorcode, // for error code + &l_erroffset, // for error offset + nullptr); // use default compile context + if(l_re == nullptr) + { + PCRE2_UCHAR l_buffer[256]; + pcre2_get_error_message(l_errorcode, l_buffer, sizeof(l_buffer)); + NDBG_PRINT("pcre2_compile failed (offset: %d), %s\n", (int) l_erroffset, l_buffer); + return WAFLZ_STATUS_ERROR; + } + int l_rc; + l_rc = pcre2_jit_compile(l_re, PCRE2_JIT_COMPLETE); + if (l_rc != 0) + { + NDBG_PRINT("pcre2_jit_compile failed, %d\n", l_rc); + pcre2_code_free(l_re); return WAFLZ_STATUS_ERROR; } uint32_t l_offset = 0; + PCRE2_SPTR l_str = (PCRE2_SPTR) a_str; uint32_t l_len = strlen(a_str); - int l_rc; - int l_ovector[100]; + pcre2_match_data *l_match_data = pcre2_match_data_create_from_pattern(l_re, nullptr); + if (l_match_data == nullptr) + { + NDBG_PRINT("pcre2_match_data_create_from_pattern failed\n"); + pcre2_code_free(l_re); + return WAFLZ_STATUS_ERROR; + } while (l_offset < l_len) { - l_rc = pcre_exec(l_re, // Compiled pattern - 0, // Study - a_str, // str - l_len, // str len - l_offset, // str offset - 0, // options - l_ovector, // output vector for substr info - sizeof(l_ovector)); // num elements in output vector + l_rc = pcre2_match(l_re, // Compiled pattern + l_str, // str + l_len, // str len + l_offset, // str offset + 0, // options + l_match_data, // output vector for substr info + nullptr); // use default match context if(l_rc < 0) { break; } + PCRE2_SIZE *l_ovector = pcre2_get_ovector_pointer(l_match_data); for(int i_match = 0; i_match < l_rc; ++i_match) { std::string l_match; @@ -2501,6 +2522,8 @@ int32_t get_pcre_match_list(const char *a_regex, const char *a_str, match_list_t } l_offset = l_ovector[1]; } + pcre2_match_data_free(l_match_data); // Release memory used for the match + pcre2_code_free(l_re); return WAFLZ_STATUS_OK; } //! ---------------------------------------------------------------------------- diff --git a/src/op/regex.cc b/src/op/regex.cc index 62b9a297..0e467b8a 100644 --- a/src/op/regex.cc +++ b/src/op/regex.cc @@ -12,7 +12,8 @@ //! ---------------------------------------------------------------------------- #include "waflz/def.h" #include "regex.h" -#include "pcre.h" +#define PCRE2_CODE_UNIT_WIDTH 8 +#include "pcre2.h" #include "support/ndebug.h" #include #include @@ -24,18 +25,19 @@ //! ---------------------------------------------------------------------------- #define _WAFLZ_PCRE_MATCH_LIMIT 1000 #define _WAFLZ_PCRE_MATCH_LIMIT_RECURSION 1000 +#define _WAFLZ_PCRE_GLOBAL_BUFFER_SIZE 256 namespace ns_waflz { +PCRE2_UCHAR g_buffer[_WAFLZ_PCRE_GLOBAL_BUFFER_SIZE]; //! ---------------------------------------------------------------------------- //! \details: TODO //! \return: TODO //! \param: TODO //! ---------------------------------------------------------------------------- regex::regex(void): - m_regex(NULL), - m_regex_study(NULL), - m_regex_str(), - m_err_ptr(NULL), + m_regex(nullptr), + m_ctx(nullptr), + m_err_ptr(0), m_err_off(-1) {} //! ---------------------------------------------------------------------------- @@ -45,19 +47,15 @@ regex::regex(void): //! ---------------------------------------------------------------------------- regex::~regex() { - if(m_regex) + if(m_regex != nullptr) { - pcre_free(m_regex); - m_regex = NULL; + pcre2_code_free(m_regex); + m_regex = nullptr; } - if(m_regex_study) + if (m_ctx != nullptr) { -#ifdef PCRE_STUDY_JIT_COMPILE - pcre_free_study(m_regex_study); -#else - pcre_free(m_regex_study); -#endif - m_regex_study = NULL; + pcre2_match_context_free(m_ctx); + m_ctx = nullptr; } } //! ---------------------------------------------------------------------------- @@ -67,7 +65,7 @@ regex::~regex() //! ---------------------------------------------------------------------------- void regex::get_err_info(const char** a_reason, int& a_offset) { - *a_reason = m_err_ptr; + *a_reason = (char*) g_buffer; a_offset = m_err_off; } //! ---------------------------------------------------------------------------- @@ -87,46 +85,30 @@ int32_t regex::init(const char* a_buf, uint32_t a_len) // ------------------------------------------------- // compile // ------------------------------------------------- - m_regex = pcre_compile(m_regex_str.c_str(), - PCRE_DUPNAMES|PCRE_DOTALL|PCRE_MULTILINE, - &m_err_ptr, - &m_err_off, - NULL); - if(!m_regex) + m_regex = pcre2_compile((PCRE2_SPTR) m_regex_str.c_str(), + m_regex_str.length(), + PCRE2_DUPNAMES|PCRE2_DOTALL|PCRE2_MULTILINE, + &m_err_ptr, + &m_err_off, + nullptr); + if(m_regex == nullptr) { + pcre2_get_error_message(m_err_ptr, g_buffer, sizeof(g_buffer)); return WAFLZ_STATUS_ERROR; } - // ------------------------------------------------- - // study - // ------------------------------------------------- - m_regex_study = pcre_study(m_regex, - s_pcre_study_options, - &m_err_ptr); - // ------------------------------------------------- - // if regex_study NULL not compiled with JIT - // check m_err_ptr for error - // ------------------------------------------------- - if(m_err_ptr) + int l_rc; + l_rc = pcre2_jit_compile(m_regex, PCRE2_JIT_COMPLETE); + if (l_rc != 0) { return WAFLZ_STATUS_ERROR; } - // ------------------------------------------------- - // create study if nul - // ------------------------------------------------- - if(!m_regex_study) + m_ctx = pcre2_match_context_create(nullptr); + if (m_ctx == nullptr) { - m_regex_study = (pcre_extra*)calloc(1, sizeof(pcre_extra)); + return WAFLZ_STATUS_ERROR; } - // ------------------------------------------------- - // set match limits - // ------------------------------------------------- - m_regex_study->flags |= PCRE_EXTRA_MATCH_LIMIT; - m_regex_study->match_limit = _WAFLZ_PCRE_MATCH_LIMIT; - // ------------------------------------------------- - // set recursion limit - // ------------------------------------------------- - m_regex_study->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; - m_regex_study->match_limit_recursion = _WAFLZ_PCRE_MATCH_LIMIT_RECURSION; + pcre2_set_match_limit(m_ctx, _WAFLZ_PCRE_MATCH_LIMIT); + pcre2_set_recursion_limit(m_ctx, _WAFLZ_PCRE_MATCH_LIMIT_RECURSION); return WAFLZ_STATUS_OK; } //! ---------------------------------------------------------------------------- @@ -148,29 +130,28 @@ int regex::compare(const char* a_buf, uint32_t a_len, std::string* ao_captured) // ----------------------------------------- // match first only // ----------------------------------------- - int l_ovecsize = 2; - int l_ovector[2] = {0}; int l_s; - l_s = pcre_exec(m_regex, - m_regex_study, - a_buf, - a_len, - 0, - 0, - l_ovector, - l_ovecsize); - if(l_s == PCRE_ERROR_MATCHLIMIT || - l_s == PCRE_ERROR_RECURSIONLIMIT) - { - return WAFLZ_STATUS_ERROR; + pcre2_match_data* l_match_data = nullptr; + if (m_regex != nullptr) { + l_match_data = pcre2_match_data_create_from_pattern(m_regex, nullptr); + if (l_match_data == nullptr) + { + return WAFLZ_STATUS_ERROR; + } } - // ----------------------------------------- - // Match succeeded but ovector too small - // ----------------------------------------- - if(l_s == 0) + l_s = pcre2_match(m_regex, + (PCRE2_SPTR) a_buf, + a_len, + 0, + 0, + l_match_data, + m_ctx); + if(l_s == PCRE2_ERROR_MATCHLIMIT || l_s == PCRE2_ERROR_RECURSIONLIMIT) { - l_s = l_ovecsize / 2; + pcre2_match_data_free(l_match_data); // Release memory used for the match + return WAFLZ_STATUS_ERROR; } + PCRE2_SIZE* l_ovector = pcre2_get_ovector_pointer(l_match_data); // ----------------------------------------- // optional save first capture... // ----------------------------------------- @@ -180,6 +161,7 @@ int regex::compare(const char* a_buf, uint32_t a_len, std::string* ao_captured) ao_captured->assign(a_buf + l_ovector[0], (l_ovector[1] - l_ovector[0])); } + pcre2_match_data_free(l_match_data); // Release memory used for the match return l_s; } //! ---------------------------------------------------------------------------- @@ -193,32 +175,38 @@ int regex::compare_all(const char* a_buf, uint32_t a_len, data_list_t* ao_captur // No check for empty input // Input can be empty. e.g empty headers // ----------------------------------------- - int l_ovecsize = 30; - int l_ovector[30] = {0}; int l_s; int l_offset = 0; int l_ret_val = 0; + pcre2_match_data* l_match_data = nullptr; + if (m_regex != nullptr) { + l_match_data = pcre2_match_data_create_from_pattern(m_regex, nullptr); + if (l_match_data == nullptr) + { + return WAFLZ_STATUS_ERROR; + } + } // ----------------------------------------- // Get all matches // ----------------------------------------- do { - l_s = pcre_exec(m_regex, - m_regex_study, - a_buf, - a_len, - l_offset, - 0, - l_ovector, - l_ovecsize); - if(l_s == PCRE_ERROR_MATCHLIMIT || - l_s == PCRE_ERROR_RECURSIONLIMIT) + l_s = pcre2_match(m_regex, + (PCRE2_SPTR) a_buf, + a_len, + l_offset, + 0, + l_match_data, + m_ctx); + if(l_s == PCRE2_ERROR_MATCHLIMIT || l_s == PCRE2_ERROR_RECURSIONLIMIT) { + pcre2_match_data_free(l_match_data); // Release memory used for the match return WAFLZ_STATUS_ERROR; } // --------------------------------- // loop over matches // --------------------------------- + PCRE2_SIZE* l_ovector = pcre2_get_ovector_pointer(l_match_data); for(int i_t = 0; i_t < l_s; ++i_t) { l_ret_val++; @@ -245,6 +233,7 @@ int regex::compare_all(const char* a_buf, uint32_t a_len, data_list_t* ao_captur } } } while (l_s > 0); + pcre2_match_data_free(l_match_data); // Release memory used for the match return l_ret_val; } //! ---------------------------------------------------------------------------- @@ -259,35 +248,23 @@ void regex::display(void) // ------------------------------------------------- int l_s; UNUSED(l_s); -#define _DISPLAY_PCRE_PROP(_what) do { \ - int l_opt; \ - l_s = pcre_fullinfo(m_regex, m_regex_study, _what, &l_opt); \ - NDBG_OUTPUT(":%s: %d\n", #_what, l_opt); \ - } while(0) #define _DISPLAY_PCRE_PROP_U(_what) do { \ uint32_t l_opt; \ - l_s = pcre_fullinfo(m_regex, m_regex_study, _what, &l_opt); \ + l_s = pcre2_pattern_info(m_regex, _what, &l_opt); \ NDBG_OUTPUT(":%s: %u\n", #_what, l_opt); \ } while(0) #define _DISPLAY_PCRE_PROP_UL(_what) do { \ size_t l_opt; \ - l_s = pcre_fullinfo(m_regex, m_regex_study, _what, &l_opt); \ + l_s = pcre2_pattern_info(m_regex, _what, &l_opt); \ NDBG_OUTPUT(":%s: %lu\n", #_what, l_opt); \ } while(0) - _DISPLAY_PCRE_PROP(PCRE_INFO_BACKREFMAX); - _DISPLAY_PCRE_PROP(PCRE_INFO_CAPTURECOUNT); - _DISPLAY_PCRE_PROP(PCRE_INFO_JIT); - _DISPLAY_PCRE_PROP_UL(PCRE_INFO_JITSIZE); - _DISPLAY_PCRE_PROP(PCRE_INFO_MINLENGTH); -#ifndef WAFLZ_PCRE_INFO_FLAGS_MISSING - _DISPLAY_PCRE_PROP_U(PCRE_INFO_MATCHLIMIT); -#endif - _DISPLAY_PCRE_PROP(PCRE_INFO_OPTIONS); - _DISPLAY_PCRE_PROP(PCRE_INFO_SIZE); - _DISPLAY_PCRE_PROP_UL(PCRE_INFO_STUDYSIZE); -#ifndef WAFLZ_PCRE_INFO_FLAGS_MISSING - _DISPLAY_PCRE_PROP_UL(PCRE_INFO_RECURSIONLIMIT); -#endif - _DISPLAY_PCRE_PROP(PCRE_INFO_REQUIREDCHAR); + _DISPLAY_PCRE_PROP_U(PCRE2_INFO_BACKREFMAX); + _DISPLAY_PCRE_PROP_U(PCRE2_INFO_CAPTURECOUNT); + _DISPLAY_PCRE_PROP_UL(PCRE2_INFO_JITSIZE); + _DISPLAY_PCRE_PROP_U(PCRE2_INFO_MINLENGTH); + _DISPLAY_PCRE_PROP_U(PCRE2_INFO_MATCHLIMIT); + _DISPLAY_PCRE_PROP_U(PCRE2_INFO_ARGOPTIONS); + _DISPLAY_PCRE_PROP_U(PCRE2_INFO_SIZE); + _DISPLAY_PCRE_PROP_UL(PCRE2_INFO_RECURSIONLIMIT); } } diff --git a/src/op/regex.h b/src/op/regex.h index 78242f7f..9f35b7f8 100644 --- a/src/op/regex.h +++ b/src/op/regex.h @@ -13,7 +13,8 @@ //! includes //! ---------------------------------------------------------------------------- #include "waflz/def.h" -#include +#define PCRE2_CODE_UNIT_WIDTH 8 +#include #include //! ---------------------------------------------------------------------------- //! fwd decl's @@ -49,20 +50,12 @@ class regex // ------------------------------------------------- // private members // ------------------------------------------------- - pcre* m_regex; - pcre_extra* m_regex_study; + pcre2_code* m_regex; + pcre2_match_context* m_ctx; std::string m_regex_str; // err info - const char* m_err_ptr; - int m_err_off; - // ------------------------------------------------- - // private static - // ------------------------------------------------- -#ifdef PCRE_STUDY_JIT_COMPILE - static const int s_pcre_study_options = PCRE_STUDY_JIT_COMPILE; -#else - static const int s_pcre_study_options = 0; -#endif + int m_err_ptr; + PCRE2_SIZE m_err_off; }; } #endif diff --git a/tests/blackbox/wjc/test_bb_wjc.py b/tests/blackbox/wjc/test_bb_wjc.py index abda6878..15dd6801 100644 --- a/tests/blackbox/wjc/test_bb_wjc.py +++ b/tests/blackbox/wjc/test_bb_wjc.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 '''Test WAF Access settings''' -#TODO: make so waflz_server only runs once and then can post to it +#TODO: make so waflz_server only runs once and then can post to it # ------------------------------------------------------------------------------ # Imports # ------------------------------------------------------------------------------ @@ -41,7 +41,7 @@ def test_bb_wjc_bad_regex(setup_wjc): #print(l_sp_stderr) #print('return code: %d'%(l_sp.returncode)) assert l_sp.returncode != 0 - assert l_sp_stderr == b'init failed for regex: \'**dogs[]\' in general_settings ignore list. Reason: nothing to repeat -offset: 0\n' + assert l_sp_stderr == b'init failed for regex: \'**dogs[]\' in general_settings ignore list. Reason: quantifier does not follow a repeatable item -offset: 0\n' # ------------------------------------------------------------------------------ # test output with bad regex # ------------------------------------------------------------------------------ diff --git a/tests/whitebox/op/wb_regex.cc b/tests/whitebox/op/wb_regex.cc index 914ccc4e..928a7713 100644 --- a/tests/whitebox/op/wb_regex.cc +++ b/tests/whitebox/op/wb_regex.cc @@ -62,6 +62,6 @@ TEST_CASE( "pcre obj test", "[regex]" ) { } uint64_t l_dt_s; l_dt_s = ns_waflz::get_delta_time_ms(l_t_s); - REQUIRE(l_dt_s < 200); + REQUIRE(l_dt_s < 400); } }