diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a275113f3..fbf49aba6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -68,9 +68,8 @@ jobs: runs-on: ubuntu-24.04 outputs: - COMMIT_MESSAGE: ${{ steps.setup.outputs.COMMIT_MSG }} + COMMIT_MESSAGE: ${{ steps.getcommitpush.outputs.COMMIT_MSG || steps.getcommitpr.outputs.COMMIT_MSG }} FULL_RUN: ${{ steps.setuppush.outputs.FULL_RUN || steps.setuppr.outputs.FULL_RUN || steps.setupmanual.outputs.FULL_RUN || steps.setupschedule.outputs.FULL_RUN }} - steps: - name: Checkout uses: actions/checkout@v5 @@ -79,11 +78,13 @@ jobs: fetch-depth: 2 - name: Get Commit Message - run: echo "COMMIT_MSG=$(git log -1 --pretty=%B HEAD | tr '\n' ' ')" >> $GITHUB_ENV + id: getcommitpush + run: echo "COMMIT_MSG=$(git log -1 --pretty=%B HEAD | tr '\n' ' ')" >> $GITHUB_OUTPUT if: ${{ github.event_name == 'push' }} - name: Get Commit Message - run: echo "COMMIT_MSG=$(git log -1 --pretty=%B HEAD^2 | tr '\n' ' ')" >> $GITHUB_ENV + id: getcommitpr + run: echo "COMMIT_MSG=$(git log -1 --pretty=%B HEAD^2 | tr '\n' ' ')" >> $GITHUB_OUTPUT if: ${{ github.event_name == 'pull_request' }} - name: Display and Setup Build Args (Push) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 6b1a3452f..e3aadfa0a 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -1,5 +1,8 @@ name: Conda End-to-end Test +env: + SANITIZER_CI_SCHEDULE: '25 6 * * 5' + on: push: branches: @@ -18,9 +21,16 @@ on: - README.md - "docs/**" workflow_dispatch: + inputs: + sanitizer: + description: "Run sanitized build" + required: false + type: boolean + default: false schedule: # Run conda CI on Monday and Thursday at 1:25am EST (06:25 UTC) - - cron: '25 6 * * 1,4' + # Run conda sanitized builds on Fridays at 1:25 am EST (06:25 UTC) + - cron: '25 6 * * 1,4,5' concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -31,7 +41,32 @@ permissions: checks: write jobs: + initialize: + runs-on: ubuntu-24.04 + outputs: + SANITIZER: ${{ steps.setupmanual.outputs.SANITIZER || steps.setupschedule.outputs.SANITIZER }} + steps: + - name: Display and Setup Build Args (Manual) + id: setupmanual + run: | + echo "Sanitizer: $SANITIZER" + echo "SANITIZER=$SANITIZER" >> $GITHUB_OUTPUT + env: + SANITIZER: ${{ github.event.inputs.sanitizer }} + if: ${{ github.event_name == 'workflow_dispatch' }} + + - name: Display and Setup Build Args (Schedule) + id: setupschedule + run: | + echo "Sanitizer: $SANITIZER" + echo "SANITIZER=$SANITIZER" >> $GITHUB_OUTPUT + env: + SANITIZER: ${{ github.event.schedule == env.SANITIZER_CI_SCHEDULE }} + if: ${{ github.event_name == 'schedule' }} + build: + needs: + - initialize strategy: matrix: os: @@ -81,7 +116,7 @@ jobs: if: ${{ runner.os == 'Windows' }} - name: Python Build Steps - run: make build-conda + run: make build-conda ${{ needs.initialize.outputs.SANITIZER && 'ASAN="ON" UBSAN="ON"' || '' }} shell: micromamba-shell {0} if: ${{ runner.os != 'Windows' }} @@ -95,7 +130,12 @@ jobs: - name: Python Test Steps run: make test shell: micromamba-shell {0} - if: ${{ runner.os != 'Windows' }} + if: ${{ runner.os != 'Windows' && needs.initialize.outputs.SANITIZER == 'false' }} + + - name: Python Test Steps (Sanitizer) + run: make test-sanitizer + shell: micromamba-shell {0} + if: ${{ runner.os != 'Windows' && needs.initialize.outputs.SANITIZER == 'true' }} - name: Python Test Steps ( Windows ) run: make test diff --git a/CMakeLists.txt b/CMakeLists.txt index 881c06912..07536e58c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,6 +67,8 @@ option(CSP_MANYLINUX "Build for python's manylinux setup" OFF) option(CSP_USE_VCPKG "Build with vcpkg dependencies" ON) option(CSP_USE_CCACHE "Build with ccache caching" OFF) option(CSP_USE_LD_CLASSIC_MAC "On macOS, link with ld_classic" OFF) +option(CSP_ENABLE_ASAN "Build with address sanitizer" OFF) +option(CSP_ENABLE_UBSAN "Build with undefined behavior sanitizer" OFF) # Extension options option(CSP_BUILD_KAFKA_ADAPTER "Build kafka adapter" ON) @@ -213,6 +215,18 @@ else() endif() endif() +if(CSP_ENABLE_ASAN) + message(STATUS "Enabling Address Sanitizer") + add_compile_options(-fsanitize=address -fno-omit-frame-pointer) + add_link_options(-fsanitize=address -fno-omit-frame-pointer) +endif() + +if(CSP_ENABLE_UBSAN) + message(STATUS "Enabling Undefined Behavior Sanitizer") + add_compile_options(-fsanitize=undefined -fno-omit-frame-pointer) + add_link_options(-fsanitize=undefined -fno-omit-frame-pointer) +endif() + ################################################################################################################################################### # Messages # diff --git a/Makefile b/Makefile index 631070ad5..60fdd95c8 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,9 @@ EXTRA_ARGS := ######### .PHONY: requirements develop build build-debug build-conda install +ASAN := +UBSAN := + requirements: ## install python dev and runtime dependencies ifeq ($(OS),Windows_NT) Powershell.exe -executionpolicy bypass -noprofile .\ci\scripts\windows\make_requirements.ps1 @@ -18,13 +21,13 @@ develop: requirements ## install dependencies and build library python -m pip install -e .[develop] build: ## build the library - python setup.py build build_ext --inplace + CSP_ENABLE_ASAN=$(ASAN) CSP_ENABLE_UBSAN=$(UBSAN) python setup.py build build_ext --inplace build-debug: ## build the library ( DEBUG ) - May need a make clean when switching from regular build to build-debug and vice versa - SKBUILD_CONFIGURE_OPTIONS="" DEBUG=1 python setup.py build build_ext --inplace + CSP_ENABLE_ASAN=$(ASAN) CSP_ENABLE_UBSAN=$(UBSAN) SKBUILD_CONFIGURE_OPTIONS="" DEBUG=1 python setup.py build build_ext --inplace build-conda: ## build the library in Conda - python setup.py build build_ext --csp-no-vcpkg --inplace + CSP_ENABLE_ASAN=$(ASAN) CSP_ENABLE_UBSAN=$(UBSAN) python setup.py build build_ext --csp-no-vcpkg --inplace install: ## install library python -m pip install . @@ -78,12 +81,26 @@ checks: check ######### # TESTS # ######### -.PHONY: test-py test-cpp coverage-py test tests +.PHONY: test-py test-cpp test-py-sanitizer coverage-py test test-sanitizer tests TEST_ARGS := test-py: ## Clean and Make unit tests python -m pytest -v csp/tests --junitxml=junit.xml $(TEST_ARGS) +test-py-sanitizer: ## Clean and Make unit tests with sanitizers enabled + @if [ "$$(uname -s)" = "Darwin" ]; then \ + ASAN_OPTIONS=detect_leaks=0,detect_stack_use_after_return=true,use_odr_indicator=1,strict_init_order=true,strict_string_checks=true \ + DYLD_INSERT_LIBRARIES=$$($(CXX) -print-file-name=libclang_rt.asan_osx_dynamic.dylib) \ + python -m pytest -v csp/tests --junitxml=junit.xml $(TEST_ARGS); \ + elif [ "$$(uname -s)" = "Linux" ]; then \ + ASAN_OPTIONS=detect_leaks=0,detect_stack_use_after_return=true,use_odr_indicator=1,strict_init_order=true,strict_string_checks=true \ + LD_PRELOAD=$$($(CXX) -print-file-name=libasan.so) \ + python -m pytest -v csp/tests --junitxml=junit.xml $(TEST_ARGS); \ + else \ + echo "Unsupported platform: $$(uname -s)"; \ + exit 1; \ + fi + test-cpp: ## Make C++ unit tests ifneq ($(OS),Windows_NT) for f in ./csp/tests/bin/*; do $$f; done || (echo "TEST FAILED" && exit 1) @@ -96,6 +113,8 @@ coverage-py: test: test-cpp test-py ## run the tests +test-sanitizer: test-cpp test-py-sanitizer ## run the tests + # Alias tests: test diff --git a/cpp/cmake/modules/Findcsp_autogen.cmake b/cpp/cmake/modules/Findcsp_autogen.cmake index 3e8284bb9..c44a5c163 100644 --- a/cpp/cmake/modules/Findcsp_autogen.cmake +++ b/cpp/cmake/modules/Findcsp_autogen.cmake @@ -26,8 +26,32 @@ function(csp_autogen MODULE_NAME DEST_FILENAME HEADER_NAME_OUTVAR SOURCE_NAME_OU set(CSP_AUTOGEN_PYTHONPATH ${PROJECT_BINARY_DIR}/lib:${CMAKE_SOURCE_DIR}:$$PYTHONPATH) endif() + if(CSP_ENABLE_ASAN) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + # Clang - use DYLD_INSERT_LIBRARIES + execute_process( + COMMAND ${CMAKE_CXX_COMPILER} -print-file-name=libclang_rt.asan_osx_dynamic.dylib + OUTPUT_VARIABLE ASAN_LIB_PATH + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + set(PRELOAD_CMD "DYLD_INSERT_LIBRARIES=${ASAN_LIB_PATH}") + elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + # GCC - use LD_PRELOAD + execute_process( + COMMAND ${CMAKE_CXX_COMPILER} -print-file-name=libasan.so + OUTPUT_VARIABLE ASAN_LIB_PATH + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + set(PRELOAD_CMD "LD_PRELOAD=${ASAN_LIB_PATH}") + endif() + # Turn off leak checks as we are using PyMalloc when we run autogen + set(ASAN_PRELOAD_CMD "ASAN_OPTIONS=detect_leaks=0" ${PRELOAD_CMD}) + else() + set(ASAN_PRELOAD_CMD "") + endif() + add_custom_command(OUTPUT "${CSP_AUTOGEN_CPP_OUT}" "${CSP_AUTOGEN_H_OUT}" - COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CSP_AUTOGEN_PYTHONPATH}" ${Python_EXECUTABLE} ${CSP_AUTOGEN_MODULE_PATH} -m ${MODULE_NAME} -d ${CSP_AUTOGEN_DESTINATION_FOLDER} -o ${DEST_FILENAME} ${CSP_AUTOGEN_EXTRA_ARGS} + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CSP_AUTOGEN_PYTHONPATH}" ${ASAN_PRELOAD_CMD} ${Python_EXECUTABLE} ${CSP_AUTOGEN_MODULE_PATH} -m ${MODULE_NAME} -d ${CSP_AUTOGEN_DESTINATION_FOLDER} -o ${DEST_FILENAME} ${CSP_AUTOGEN_EXTRA_ARGS} COMMENT "generating csp c++ types from module ${MODULE_NAME}" DEPENDS mkdir_autogen_${MODULE_TARGETNAME} ${CSP_AUTOGEN_MODULE_PATH} diff --git a/cpp/csp/core/DynamicBitSet.h b/cpp/csp/core/DynamicBitSet.h index 5fb120f06..2da4b512b 100644 --- a/cpp/csp/core/DynamicBitSet.h +++ b/cpp/csp/core/DynamicBitSet.h @@ -141,7 +141,8 @@ class DynamicBitSet { node_type * old = m_nodes; m_nodes = new node_type[ newNodes ]; - memcpy( m_nodes, old, m_numNodes * sizeof( node_type ) ); + if( likely( m_numNodes > 0 ) ) + memcpy( m_nodes, old, m_numNodes * sizeof( node_type ) ); memset( m_nodes + m_numNodes, 0, ( newNodes - m_numNodes ) * sizeof( node_type ) ); m_numNodes = newNodes; diff --git a/cpp/csp/core/Exception.cpp b/cpp/csp/core/Exception.cpp index e4480754b..a06c504b5 100644 --- a/cpp/csp/core/Exception.cpp +++ b/cpp/csp/core/Exception.cpp @@ -34,7 +34,7 @@ static void printBacktrace( char ** messages, int size, std::ostream & dest ) { char *begin_name = 0, *begin_offset = 0; char tmp[1024]; - strncpy( tmp, messages[i], sizeof(tmp) ); + strncpy( tmp, messages[i], sizeof(tmp) - 1 ); tmp[ sizeof( tmp ) - 1 ] = 0; // find parentheses and +address offset surrounding the mangled name: diff --git a/cpp/csp/python/cspbaselibimpl.cpp b/cpp/csp/python/cspbaselibimpl.cpp index 9289e8024..4e54a5115 100644 --- a/cpp/csp/python/cspbaselibimpl.cpp +++ b/cpp/csp/python/cspbaselibimpl.cpp @@ -274,6 +274,14 @@ DECLARE_CPPNODE( exprtk_impl ) csp.make_passive( inputs ); } + virtual ~exprtk_impl() + { + // Need to release the expression before clearing values/symbol table + // https://github.com/ArashPartow/exprtk/blob/cc1b800c2bd1ac3ac260478c915d2aec6f4eb41c/readme.txt#L909 + s_expr.release(); + s_valuesContainer.clear(); + } + INVOKE() { if( use_trigger ) diff --git a/cpp/csp/python/npstatsimpl.cpp b/cpp/csp/python/npstatsimpl.cpp index 53de3b6b3..d195cfb4c 100644 --- a/cpp/csp/python/npstatsimpl.cpp +++ b/cpp/csp/python/npstatsimpl.cpp @@ -1301,12 +1301,12 @@ DECLARE_CPPNODE( _np_arg_min_max ) PyArray_Descr *descr; PyArray_DescrConverter( date_type, &descr ); Py_XDECREF( date_type ); - DateTime * values = new DateTime[s_elem.size()]; + + PyObject * out = PyArray_NewFromDescr( &PyArray_Type, descr, s_shp.m_dims.size(), &s_shp.m_dims[0], NULL, NULL, 0, NULL ); + DateTime * values = static_cast( PyArray_DATA( ( PyArrayObject * )out ) ); for( size_t i = 0; i < s_elem.size(); ++i ) values[i] = s_elem[i].compute_dt(); - - PyObject * out = PyArray_NewFromDescr( &PyArray_Type, descr, s_shp.m_dims.size(), &s_shp.m_dims[0], NULL, values, 0, NULL ); - PyArray_ENABLEFLAGS( ( PyArrayObject * ) out, NPY_ARRAY_OWNDATA ); + RETURN( PyObjectPtr::own( out ) ); } } diff --git a/cpp/tests/engine/test_tick_buffer.cpp b/cpp/tests/engine/test_tick_buffer.cpp index 443639f3f..8d97c0422 100644 --- a/cpp/tests/engine/test_tick_buffer.cpp +++ b/cpp/tests/engine/test_tick_buffer.cpp @@ -98,4 +98,8 @@ TEST( TickBufferTest, test_flatten ) ASSERT_EQ( values_wrap[ i ], i + 3 ); ASSERT_EQ( values_nowrap[ i ], i ); } + + free( values_wrap ); + free( values_nowrap ); + free( values_single ); } diff --git a/csp/tests/test_engine.py b/csp/tests/test_engine.py index d33261197..8e138e149 100644 --- a/csp/tests/test_engine.py +++ b/csp/tests/test_engine.py @@ -14,6 +14,7 @@ import numpy as np import psutil +import pytest import csp from csp import PushMode, ts @@ -1195,6 +1196,10 @@ def list_comprehension_bug_graph(): rv = csp.run(list_comprehension_bug_graph, starttime=datetime(2020, 1, 1))["Bucket"] self.assertEqual([v[1][0] for v in rv[10:]], list(range(20))) + @unittest.skipIf( + os.environ.get("ASAN_OPTIONS") is not None, + reason="Test skipped when AddressSanitizer is enabled, RSS usage is much larger than usual", + ) def test_alarm_leak(self): """this was a leak in Scheduler.cpp""" diff --git a/setup.py b/setup.py index 2f1d6c818..c237df53e 100644 --- a/setup.py +++ b/setup.py @@ -23,6 +23,8 @@ ("CSP_BUILD_KAFKA_ADAPTER", "1"), ("CSP_BUILD_PARQUET_ADAPTER", "1"), ("CSP_BUILD_WS_CLIENT_ADAPTER", "1"), + ("CSP_ENABLE_ASAN", "0"), + ("CSP_ENABLE_UBSAN", "0"), # NOTE: # - omit vcpkg, need to test for presence # - omit ccache, need to test for presence