diff --git a/.github/workflows/clibmouse_pr.yml b/.github/workflows/clibmouse_pr.yml new file mode 100644 index 000000000000..b91a29237a61 --- /dev/null +++ b/.github/workflows/clibmouse_pr.yml @@ -0,0 +1,343 @@ +name: PR Sanity +run-name: ${{ github.actor }} is running PR sanity check 🚀 +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + +on: # yamllint disable-line rule:truthy + pull_request: + types: + - synchronize + - reopened + - opened + branches: + - Kusto-phase3 + paths-ignore: + - 'docker/docs/**' + - 'docs/**' + - 'website/**' +jobs: + CheckLabels: + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Labels check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 run_check.py + PythonUnitTests: + needs: CheckLabels + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Python unit tests + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 -m unittest discover -s . -p '*_test.py' + DockerHubPushAarch64: + needs: CheckLabels + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: + needs: CheckLabels + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/changed_images.json + StyleCheck: + needs: DockerHubPush + runs-on: [self-hosted, style-checker] + if: ${{ success() || failure() }} + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{ runner.temp }}/style_check + ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=package_release + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + fetch-depth: 0 # for performance artifact + submodules: true + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + BuilderBinRelease: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_release + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + # shellcheck disable=SC2046 + docker kill $(docker ps -q) ||: + # shellcheck disable=SC2046 + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinClangTidy: + needs: [DockerHubPush, StyleCheck] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_tidy + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + UnitTestsReleaseClang: + needs: [BuilderBinRelease] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/unit_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Unit tests (release-clang) + REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Unit test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 unit_tests_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + # shellcheck disable=SC2046 + docker kill $(docker ps -q) ||: + # shellcheck disable=SC2046 + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestRelease: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (release) + REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" diff --git a/contrib/googletest-cmake/CMakeLists.txt b/contrib/googletest-cmake/CMakeLists.txt index 90fdde0c1859..94c35656987d 100644 --- a/contrib/googletest-cmake/CMakeLists.txt +++ b/contrib/googletest-cmake/CMakeLists.txt @@ -1,15 +1,30 @@ -set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest") +set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/googletest") -add_library(_gtest "${SRC_DIR}/src/gtest-all.cc") +add_library(_gtest "${SRC_DIR}/googletest/src/gtest-all.cc") set_target_properties(_gtest PROPERTIES VERSION "1.0.0") target_compile_definitions (_gtest PUBLIC GTEST_HAS_POSIX_RE=0) -target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/include") -target_include_directories(_gtest PRIVATE "${SRC_DIR}") +target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/googletest/include") +target_include_directories(_gtest PRIVATE "${SRC_DIR}/googletest") -add_library(_gtest_main "${SRC_DIR}/src/gtest_main.cc") +add_library(_gtest_main "${SRC_DIR}/googletest/src/gtest_main.cc") set_target_properties(_gtest_main PROPERTIES VERSION "1.0.0") target_link_libraries(_gtest_main PUBLIC _gtest) add_library(_gtest_all INTERFACE) target_link_libraries(_gtest_all INTERFACE _gtest _gtest_main) add_library(ch_contrib::gtest_all ALIAS _gtest_all) + +add_library(_gmock "${SRC_DIR}/googlemock/src/gmock-all.cc") +set_target_properties(_gmock PROPERTIES VERSION "1.0.0") +target_compile_definitions (_gmock PUBLIC GTEST_HAS_POSIX_RE=0) +target_include_directories(_gmock SYSTEM PUBLIC "${SRC_DIR}/googlemock/include") +target_include_directories(_gmock PRIVATE "${SRC_DIR}/googlemock") +target_link_libraries(_gmock PUBLIC _gtest) + +add_library(_gmock_main "${SRC_DIR}/googlemock/src/gmock_main.cc") +set_target_properties(_gmock_main PROPERTIES VERSION "1.0.0") +target_link_libraries(_gmock_main PUBLIC _gmock) + +add_library(_gmock_all INTERFACE) +target_link_libraries(_gmock_all INTERFACE _gmock _gmock_main) +add_library(ch_contrib::gmock_all ALIAS _gmock_all) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 9b01e6920a45..36e8b051c548 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -56,7 +56,8 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/WriteBufferFromNuraftBuffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ZooKeeperDataReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsFields.cpp + # this will need to be uncommented once dbms is no longer a dependency of standalone clickhouse-keeper + # ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsFields.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BaseSettings.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/Field.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsEnums.cpp @@ -136,7 +137,7 @@ if (BUILD_STANDALONE_KEEPER) clickhouse_common_config_no_zookeeper_log loggers_no_text_log clickhouse_common_io - clickhouse_parsers # Otherwise compression will not built. FIXME. + clickhouse_parsers dbms # Otherwise compression will not built. FIXME. ${LINK_RESOURCE_LIB_STANDALONE_KEEPER} ) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index ed3297ed7cb4..ff140702a19b 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -58,15 +58,17 @@ int mainEntryClickHouseKeeper(int argc, char ** argv) } } -#ifdef KEEPER_STANDALONE_BUILD +// this will need to be uncommented once dbms is no longer a dependency of standalone clickhouse-keeper +// +// #ifdef KEEPER_STANDALONE_BUILD -// Weak symbols don't work correctly on Darwin -// so we have a stub implementation to avoid linker errors -void collectCrashLog( - Int32, UInt64, const String &, const StackTrace &) -{} +// // Weak symbols don't work correctly on Darwin +// // so we have a stub implementation to avoid linker errors +// void collectCrashLog( +// Int32, UInt64, const String &, const StackTrace &) +// {} -#endif +// #endif namespace DB { diff --git a/programs/server/users.xml b/programs/server/users.xml index 5e2ff51bf4de..9810feb9a53c 100644 --- a/programs/server/users.xml +++ b/programs/server/users.xml @@ -5,6 +5,15 @@ + + random diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index aef7dc6a38e1..f1dc754766b9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -599,6 +599,7 @@ if (ENABLE_TESTS) ) target_link_libraries(unit_tests_dbms PRIVATE + ch_contrib::gmock_all ch_contrib::gtest_all clickhouse_functions clickhouse_aggregate_functions diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 96aff9aa304b..3742e3dd895b 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -319,7 +319,8 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - std::unique_ptr parser; + ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); + ParserKQLStatement kql_parser(end, global_context->getSettings().allow_settings_after_format_in_insert); ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -330,15 +331,24 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu const Dialect & dialect = settings.dialect; - if (dialect == Dialect::kusto) - parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); - else - parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + const auto * begin = pos; if (is_interactive || ignore_error) { String message; - res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + if (dialect == Dialect::kusto) + res = tryParseQuery(kql_parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + else if (dialect == Dialect::kusto_auto) + { + res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + if (!res) + { + pos = begin; + res = tryParseQuery(kql_parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + } + } + else + res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) { @@ -348,7 +358,22 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu } else { - res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + if (dialect == Dialect::kusto) + res = parseQueryAndMovePosition(kql_parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + else if (dialect == Dialect::kusto_auto) + { + try + { + res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + } + catch (...) + { + pos = begin; + res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + } + } + else + res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) @@ -2036,21 +2061,9 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) bool ClientBase::processQueryText(const String & text) { - auto trimmed_input = trim(text, [](char c) { return isWhitespaceASCII(c) || c == ';'; }); - - if (exit_strings.end() != exit_strings.find(trimmed_input)) + if (exit_strings.end() != exit_strings.find(trim(text, [](char c) { return isWhitespaceASCII(c) || c == ';'; }))) return false; - if (trimmed_input.starts_with("\\i")) - { - size_t skip_prefix_size = std::strlen("\\i"); - auto file_name = trim( - trimmed_input.substr(skip_prefix_size, trimmed_input.size() - skip_prefix_size), - [](char c) { return isWhitespaceASCII(c); }); - - return processMultiQueryFromFile(file_name); - } - if (!is_multiquery) { assert(!query_fuzzer_runs); @@ -2293,17 +2306,6 @@ void ClientBase::runInteractive() } -bool ClientBase::processMultiQueryFromFile(const String & file_name) -{ - String queries_from_file; - - ReadBufferFromFile in(file_name); - readStringUntilEOF(queries_from_file, in); - - return executeMultiQuery(queries_from_file); -} - - void ClientBase::runNonInteractive() { if (delayed_interactive) @@ -2311,13 +2313,23 @@ void ClientBase::runNonInteractive() if (!queries_files.empty()) { + auto process_multi_query_from_file = [&](const String & file) + { + String queries_from_file; + + ReadBufferFromFile in(file); + readStringUntilEOF(queries_from_file, in); + + return executeMultiQuery(queries_from_file); + }; + for (const auto & queries_file : queries_files) { for (const auto & interleave_file : interleave_queries_files) - if (!processMultiQueryFromFile(interleave_file)) + if (!process_multi_query_from_file(interleave_file)) return; - if (!processMultiQueryFromFile(queries_file)) + if (!process_multi_query_from_file(queries_file)) return; } diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index bce4f7c0000a..25c6a86b59fb 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -165,7 +165,8 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, {{"clickhouse", Dialect::clickhouse}, - {"kusto", Dialect::kusto}}) + {"kusto", Dialect::kusto}, + {"kusto_auto", Dialect::kusto_auto}}) IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS, {{"mmap", LocalFSReadMethod::mmap}, diff --git a/src/Core/SortCursor.h b/src/Core/SortCursor.h index 3c412fa1f17d..f0724b6dc7e6 100644 --- a/src/Core/SortCursor.h +++ b/src/Core/SortCursor.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h index 39ce37c4c139..d1d4dd39a9d0 100644 --- a/src/Core/callOnTypeIndex.h +++ b/src/Core/callOnTypeIndex.h @@ -157,6 +157,7 @@ inline bool callOnBasicTypes(TypeIndex type_num1, TypeIndex type_num2, F && f) class DataTypeDate; class DataTypeDate32; +class DataTypeInterval; class DataTypeString; class DataTypeFixedString; class DataTypeUUID; @@ -201,6 +202,8 @@ bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... args) case TypeIndex::DateTime: return f(TypePair(), std::forward(args)...); case TypeIndex::DateTime64: return f(TypePair(), std::forward(args)...); + case TypeIndex::Interval: return f(TypePair(), std::forward(args)...); + case TypeIndex::String: return f(TypePair(), std::forward(args)...); case TypeIndex::FixedString: return f(TypePair(), std::forward(args)...); diff --git a/src/DataTypes/DataTypeInterval.h b/src/DataTypes/DataTypeInterval.h index 83d89a734606..d3d33f1280ba 100644 --- a/src/DataTypes/DataTypeInterval.h +++ b/src/DataTypes/DataTypeInterval.h @@ -25,7 +25,7 @@ class DataTypeInterval final : public DataTypeNumberBase IntervalKind getKind() const { return kind; } - explicit DataTypeInterval(IntervalKind kind_) : kind(kind_) {} + explicit DataTypeInterval(IntervalKind kind_ = {}) : kind(kind_) {} std::string doGetName() const override { return fmt::format("Interval{}", kind.toString()); } const char * getFamilyName() const override { return "Interval"; } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index e5bdbeca69e3..76eb46a037ab 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -371,6 +371,7 @@ struct WhichDataType constexpr bool isDateTime() const { return idx == TypeIndex::DateTime; } constexpr bool isDateTime64() const { return idx == TypeIndex::DateTime64; } constexpr bool isDateOrDate32() const { return isDate() || isDate32(); } + constexpr bool isDateOrDate32OrDateTimeOrDateTime64() const { return isDate() || isDate32() || isDateTime() || isDateTime64(); } constexpr bool isString() const { return idx == TypeIndex::String; } constexpr bool isFixedString() const { return idx == TypeIndex::FixedString; } @@ -556,6 +557,7 @@ inline bool isNullableOrLowCardinalityNullable(const DataTypePtr & data_type) template constexpr bool IsDataTypeDecimal = false; template constexpr bool IsDataTypeNumber = false; template constexpr bool IsDataTypeDateOrDateTime = false; +template constexpr bool IsDataTypeInterval = false; template constexpr bool IsDataTypeEnum = false; template constexpr bool IsDataTypeDecimalOrNumber = IsDataTypeDecimal || IsDataTypeNumber; @@ -571,6 +573,8 @@ class DataTypeDate32; class DataTypeDateTime; class DataTypeDateTime64; +class DataTypeInterval; + template constexpr bool IsDataTypeDecimal> = true; template <> inline constexpr bool IsDataTypeDecimal = true; @@ -581,6 +585,8 @@ template <> inline constexpr bool IsDataTypeDateOrDateTime = tru template <> inline constexpr bool IsDataTypeDateOrDateTime = true; template <> inline constexpr bool IsDataTypeDateOrDateTime = true; +template <> inline constexpr bool IsDataTypeInterval = true; + template class DataTypeEnum; diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index 5a3e10656b43..2a4f97c0e300 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -32,7 +31,7 @@ String getExceptionMessage( template std::conditional_t, T> -getArgument(const ASTPtr & arguments, size_t argument_index, const char * argument_name [[maybe_unused]], const std::string context_data_type_name) +getArgument(const ASTPtr & arguments, size_t argument_index, const char * argument_name [[maybe_unused]], const std::string & context_data_type_name) { using NearestResultType = NearestFieldType; const auto field_type = Field::TypeToEnum::value; diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index ec4dc7a1292e..9e4cf5a3fc5a 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -103,6 +103,8 @@ endif() add_subdirectory(JSONPath) list (APPEND PRIVATE_LIBS clickhouse_functions_jsonpath) +add_subdirectory(Kusto) +list (APPEND OBJECT_LIBS $) # Signed integer overflow on user-provided data inside boost::geometry - ignore. set_source_files_properties("pointInPolygon.cpp" PROPERTIES COMPILE_FLAGS -fno-sanitize=signed-integer-overflow) diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h index b12ab08bbfd7..cf155b4badba 100644 --- a/src/Functions/DivisionUtils.h +++ b/src/Functions/DivisionUtils.h @@ -70,6 +70,7 @@ struct DivideIntegralImpl { using ResultType = typename NumberTraits::ResultOfIntegerDivision::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template @@ -122,6 +123,7 @@ struct ModuloImpl using IntegerBType = typename NumberTraits::ToInteger::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index b951fe89c5fc..f1a891e485c6 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -100,6 +100,9 @@ template constexpr bool IsIntegralOrExtendedOrDecimal = IsIntegralOrExtended || IsDataTypeDecimal; +template constexpr bool IsInterval = false; +template <> inline constexpr bool IsInterval = true; + template constexpr bool IsFloatingPoint = false; template <> inline constexpr bool IsFloatingPoint = true; template <> inline constexpr bool IsFloatingPoint = true; @@ -124,6 +127,7 @@ struct BinaryOperationTraits { using T0 = typename LeftDataType::FieldType; using T1 = typename RightDataType::FieldType; + private: /// it's not correct for Decimal using Op = Operation; @@ -203,8 +207,6 @@ template static void NO_INLINE process(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t size, const NullMap * right_nullmap = nullptr) @@ -637,6 +639,8 @@ class FunctionBinaryArithmetic : public IFunction { static constexpr const bool is_plus = IsOperation::plus; static constexpr const bool is_minus = IsOperation::minus; + static constexpr const bool is_modulo = IsOperation::modulo; + static constexpr const bool is_modulo_or_zero = IsOperation::modulo_or_zero; static constexpr const bool is_multiply = IsOperation::multiply; static constexpr const bool is_division = IsOperation::division; @@ -674,8 +678,31 @@ class FunctionBinaryArithmetic : public IFunction }); } + static ColumnsWithTypeAndName switchArgumentOrder(const ColumnsWithTypeAndName & arguments) + { + auto new_arguments = arguments; + + /// Interval argument must be second. + if (isDateOrDate32(arguments[1].type) || isDateTime(arguments[1].type) || isDateTime64(arguments[1].type)) + std::swap(new_arguments[0], new_arguments[1]); + + /// Change interval argument type to its representation + if (WhichDataType(new_arguments[1].type).isInterval()) + new_arguments[1].type = std::make_shared>(); + + return new_arguments; + } + + static FunctionOverloadResolverPtr getFunctionForDateTimeArithmetic(const DataTypePtr & type0, const DataTypePtr & type1, ContextPtr context) + { + if (isDateTime64(type0) && isDateTime64(type1) && is_minus) + return FunctionFactory::instance().get("dateTime64Diff", context); + + return {}; + } + static FunctionOverloadResolverPtr - getFunctionForIntervalArithmetic(const DataTypePtr & type0, const DataTypePtr & type1, ContextPtr context) + getFunctionForDateTimeIntervalArithmetic(const DataTypePtr & type0, const DataTypePtr & type1, ContextPtr context) { bool first_is_date_or_datetime = isDateOrDate32(type0) || isDateTime(type0) || isDateTime64(type0); bool second_is_date_or_datetime = isDateOrDate32(type1) || isDateTime(type1) || isDateTime64(type1); @@ -986,18 +1013,37 @@ class FunctionBinaryArithmetic : public IFunction ColumnPtr executeDateTimeIntervalPlusMinus(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, const FunctionOverloadResolverPtr & function_builder) const { - ColumnsWithTypeAndName new_arguments = arguments; + const auto new_arguments = switchArgumentOrder(arguments); + auto function = function_builder->build(new_arguments); + return function->execute(new_arguments, result_type, input_rows_count); + } - /// Interval argument must be second. - if (isDateOrDate32(arguments[1].type) || isDateTime(arguments[1].type) || isDateTime64(arguments[1].type)) - std::swap(new_arguments[0], new_arguments[1]); + ColumnPtr + executeInterval(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const + { + const auto convert_argument = [this, &input_rows_count](const ColumnWithTypeAndName & argument) + { + if (const WhichDataType which_data_type(*argument.type); which_data_type.isInterval()) + { + const ColumnsWithTypeAndName conversion_args{ + argument, + createConstColumnWithTypeAndName( + DataTypeNumber().getName(), "target_type")}; - /// Change interval argument type to its representation - if (WhichDataType(new_arguments[1].type).isInterval()) - new_arguments[1].type = std::make_shared>(); + const auto converted = executeFunctionCall(context, "cast", conversion_args, input_rows_count); + return asArgument(converted, argument.name); + } - auto function = function_builder->build(new_arguments); - return function->execute(new_arguments, result_type, input_rows_count); + return argument; + }; + + const ColumnsWithTypeAndName adjusted_args{convert_argument(arguments.front()), convert_argument(arguments.back())}; + const auto intermediate = executeFunctionCall(context, name, adjusted_args, input_rows_count); + + const ColumnsWithTypeAndName conversion_args + = {asArgument(intermediate, "intermediate"), + createConstColumnWithTypeAndName(result_type->getName(), "target_type")}; + return executeFunctionCall(context, "accurateCastOrNull", conversion_args, input_rows_count).first; } ColumnPtr executeDateTimeTupleOfIntervalsPlusMinus(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, @@ -1225,21 +1271,23 @@ class FunctionBinaryArithmetic : public IFunction return getReturnTypeImplStatic(new_arguments, context); } - /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval. - if (auto function_builder = getFunctionForIntervalArithmetic(arguments[0], arguments[1], context)) + if (auto function_builder = getFunctionForDateTimeArithmetic(arguments[0], arguments[1], context)) { ColumnsWithTypeAndName new_arguments(2); - for (size_t i = 0; i < 2; ++i) new_arguments[i].type = arguments[i]; - /// Interval argument must be second. - if (isDateOrDate32(new_arguments[1].type) || isDateTime(new_arguments[1].type) || isDateTime64(new_arguments[1].type)) - std::swap(new_arguments[0], new_arguments[1]); + return function_builder->build(new_arguments)->getResultType(); + } - /// Change interval argument to its representation - new_arguments[1].type = std::make_shared>(); + /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval. + if (auto function_builder = getFunctionForDateTimeIntervalArithmetic(arguments[0], arguments[1], context)) + { + ColumnsWithTypeAndName new_arguments(2); + for (size_t i = 0; i < 2; ++i) + new_arguments[i].type = arguments[i]; + new_arguments = switchArgumentOrder(new_arguments); auto function = function_builder->build(new_arguments); return function->getResultType(); } @@ -1306,6 +1354,7 @@ class FunctionBinaryArithmetic : public IFunction { using LeftDataType = std::decay_t; using RightDataType = std::decay_t; + using ConcreteOp = Op; if constexpr ((std::is_same_v || std::is_same_v) || (std::is_same_v || std::is_same_v)) @@ -1313,7 +1362,7 @@ class FunctionBinaryArithmetic : public IFunction if constexpr (std::is_same_v && std::is_same_v) { - if constexpr (!Op::allow_fixed_string) + if constexpr (!ConcreteOp::allow_fixed_string) return false; else { @@ -1325,7 +1374,7 @@ class FunctionBinaryArithmetic : public IFunction } } - if constexpr (!Op::allow_string_integer) + if constexpr (!ConcreteOp::allow_string_integer) return false; else if constexpr (!IsIntegral) return false; @@ -1335,21 +1384,6 @@ class FunctionBinaryArithmetic : public IFunction type_res = std::make_shared(); return true; } - else if constexpr (std::is_same_v || std::is_same_v) - { - if constexpr (std::is_same_v && - std::is_same_v) - { - if constexpr (is_plus || is_minus) - { - if (left.getKind() == right.getKind()) - { - type_res = std::make_shared(left.getKind()); - return true; - } - } - } - } else { using ResultDataType = typename BinaryOperationTraits::ResultDataType; @@ -1396,6 +1430,42 @@ class FunctionBinaryArithmetic : public IFunction tz = &left; type_res = std::make_shared(*tz); } + else if constexpr (IsInterval || IsInterval) + { + if constexpr (!ConcreteOp::allow_interval) + return false; + + const auto nested_type = std::invoke( + [&]() -> std::shared_ptr + { + static constexpr auto is_left_interval = IsInterval; + static constexpr auto is_right_interval = IsInterval; + if constexpr ( + is_left_interval && !is_right_interval + && (is_division || is_modulo || is_modulo_or_zero || is_multiply)) + return std::make_shared(left.getKind()); + else if constexpr (!is_left_interval && is_right_interval && is_multiply) + return std::make_shared(right.getKind()); + else if constexpr ( + is_left_interval && is_right_interval + && (is_division || is_minus || is_modulo || is_modulo_or_zero || is_plus)) + { + if (left.getKind() != right.getKind()) + return {}; + else if constexpr (!is_division) + return std::make_shared(left.getKind()); + else + return std::make_shared(); + } + else + return {}; + }); + + if (nested_type) + type_res = makeNullable(nested_type); + + return static_cast(nested_type); + } else type_res = std::make_shared(); return true; @@ -1741,8 +1811,11 @@ class FunctionBinaryArithmetic : public IFunction return executeImpl(new_arguments, result_type, input_rows_count); } + if (auto function_builder = getFunctionForDateTimeArithmetic(arguments[0].type, arguments[1].type, context)) + return function_builder->build(arguments)->execute(arguments, result_type, input_rows_count); + /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval. - if (auto function_builder = getFunctionForIntervalArithmetic(arguments[0].type, arguments[1].type, context)) + if (auto function_builder = getFunctionForDateTimeIntervalArithmetic(arguments[0].type, arguments[1].type, context)) { return executeDateTimeIntervalPlusMinus(arguments, result_type, input_rows_count, function_builder); } @@ -1802,6 +1875,7 @@ class FunctionBinaryArithmetic : public IFunction { using LeftDataType = std::decay_t; using RightDataType = std::decay_t; + using ConcreteOp = Op; if constexpr ((std::is_same_v || std::is_same_v) || (std::is_same_v || std::is_same_v)) @@ -1809,13 +1883,13 @@ class FunctionBinaryArithmetic : public IFunction if constexpr (std::is_same_v && std::is_same_v) { - if constexpr (!Op::allow_fixed_string) + if constexpr (!ConcreteOp::allow_fixed_string) return false; else return (res = executeFixedString(arguments)) != nullptr; } - if constexpr (!Op::allow_string_integer) + if constexpr (!ConcreteOp::allow_string_integer) return false; else if constexpr (!IsIntegral) return false; @@ -1826,6 +1900,13 @@ class FunctionBinaryArithmetic : public IFunction else if constexpr (std::is_same_v) return (res = executeStringInteger(arguments, left, right)) != nullptr; } + else if constexpr (IsInterval || IsInterval) + { + if constexpr (!ConcreteOp::allow_interval) + return false; + + return (res = executeInterval(arguments, result_type, input_rows_count)) != nullptr; + } else return (res = executeNumeric(arguments, left, right, right_nullmap)) != nullptr; }); diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index c981f6662196..22621f2c08dd 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -1,3 +1,5 @@ +#include "FunctionFactory.h" + #include #include #include @@ -312,4 +314,16 @@ bool isDecimalOrNullableDecimal(const DataTypePtr & type) return isDecimal(assert_cast(type.get())->getNestedType()); } +std::pair executeFunctionCall( + const ContextPtr & context, const std::string & name, const ColumnsWithTypeAndName & arguments, const size_t input_rows_count) +{ + const auto function = FunctionFactory::instance().get(name, context)->build(arguments); + const auto & result_data_type = function->getResultType(); + return {function->execute(arguments, result_data_type, input_rows_count), result_data_type}; +} + +ColumnWithTypeAndName asArgument(const std::pair & column_with_type, const std::string_view name) +{ + return {column_with_type.first, column_with_type.second, std::string(name)}; +} } diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index 18a4e5840809..670fa005f0f2 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB @@ -174,4 +175,15 @@ struct NullPresence NullPresence getNullPresense(const ColumnsWithTypeAndName & args); bool isDecimalOrNullableDecimal(const DataTypePtr & type); + +template +ColumnWithTypeAndName createConstColumnWithTypeAndName(const typename T::FieldType & value, const std::string & name, Args&&... args) +{ + return {T().createColumnConst(1, toField(value)), std::make_shared(std::forward(args)...), name}; +} + +std::pair executeFunctionCall( + const ContextPtr & context, const std::string & name, const ColumnsWithTypeAndName & arguments, size_t input_rows_count); + +ColumnWithTypeAndName asArgument(const std::pair & column_with_type, std::string_view name); } diff --git a/src/Functions/FunctionUnaryArithmetic.h b/src/Functions/FunctionUnaryArithmetic.h index cf2f6efc3432..eacc3d81ac66 100644 --- a/src/Functions/FunctionUnaryArithmetic.h +++ b/src/Functions/FunctionUnaryArithmetic.h @@ -239,11 +239,12 @@ class FunctionUnaryArithmetic : public IFunction return false; result = std::make_shared(type.getN()); } - else if constexpr (std::is_same_v) + else if constexpr (std::is_same_v && !is_sign_function) { - if constexpr (!IsUnaryOperation::negate) + if constexpr (!Op::allow_interval) return false; - result = std::make_shared(type.getKind()); + + result = std::make_shared(type.getKind()); } else { diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 01e057e19a1e..5ed7728cdab4 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -117,17 +117,17 @@ REGISTER_FUNCTION(Conversion) factory.registerFunction(); factory.registerFunction(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 55003044ff5b..9362ec311aa5 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -2480,6 +2480,17 @@ using FunctionToDate32 = FunctionConvert; using FunctionToDateTime32 = FunctionConvert; using FunctionToDateTime64 = FunctionConvert; +using FunctionToIntervalNanosecond = FunctionConvert; +using FunctionToIntervalMicrosecond = FunctionConvert; +using FunctionToIntervalMillisecond = FunctionConvert; +using FunctionToIntervalSecond = FunctionConvert; +using FunctionToIntervalMinute = FunctionConvert; +using FunctionToIntervalHour = FunctionConvert; +using FunctionToIntervalDay = FunctionConvert; +using FunctionToIntervalWeek = FunctionConvert; +using FunctionToIntervalMonth = FunctionConvert; +using FunctionToIntervalQuarter = FunctionConvert; +using FunctionToIntervalYear = FunctionConvert; using FunctionToUUID = FunctionConvert>; using FunctionToIPv4 = FunctionConvert>; using FunctionToIPv6 = FunctionConvert>; @@ -2842,8 +2853,8 @@ class FunctionCast final : public FunctionCastBase { TypeIndex from_type_index = from_type->getTypeId(); WhichDataType which(from_type_index); - bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull) - && (which.isInt() || which.isUInt() || which.isFloat()); + const bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull) + && (which.isInt() || which.isUInt() || which.isFloat() || which.isInterval()); if (requested_result_is_nullable && checkAndGetDataType(from_type.get())) { @@ -2855,8 +2866,30 @@ class FunctionCast final : public FunctionCastBase } else if (!can_apply_accurate_cast) { - FunctionPtr function = FunctionTo::Type::create(context); - return createFunctionAdaptor(function, from_type); + if constexpr (std::is_same_v) + { + const auto to_interval_function = std::invoke( + [interval_kind = to_type->getKind()] + { + switch (interval_kind) + { +#define DECLARE_CASE(NAME) \ + case IntervalKind::NAME: \ + return FunctionToInterval##NAME::create(); + FOR_EACH_INTERVAL_KIND(DECLARE_CASE) +#undef DECLARE_CASE + } + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected interval kind"); + }); + + return createFunctionAdaptor(to_interval_function, from_type); + } + else + { + FunctionPtr function = FunctionTo::Type::create(context); + return createFunctionAdaptor(function, from_type); + } } auto wrapper_cast_type = cast_type; @@ -2870,7 +2903,8 @@ class FunctionCast final : public FunctionCastBase using LeftDataType = typename Types::LeftType; using RightDataType = typename Types::RightType; - if constexpr (IsDataTypeNumber && IsDataTypeNumber) + if constexpr ((IsDataTypeNumber || IsDataTypeInterval) + && (IsDataTypeNumber || IsDataTypeInterval)) { if (wrapper_cast_type == CastType::accurate) { @@ -3881,6 +3915,7 @@ class FunctionCast final : public FunctionCastBase std::is_same_v || std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v) diff --git a/src/Functions/FunctionsStringSearchToString.h b/src/Functions/FunctionsStringSearchToString.h index 978a84de472d..6f0e9b2a6449 100644 --- a/src/Functions/FunctionsStringSearchToString.h +++ b/src/Functions/FunctionsStringSearchToString.h @@ -85,4 +85,63 @@ class FunctionsStringSearchToString : public IFunction } }; + +template +class KqlStringSearchToString : public IFunction +{ +public: + static constexpr auto name = Name::name; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 3; } + + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); + + if (!isString(arguments[1])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[1]->getName(), getName()); + + if (!isUnsignedInteger(arguments[2])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[2]->getName(), getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnPtr column = arguments[0].column; + const ColumnPtr column_needle = arguments[1].column; + const auto capture = arguments[2].column->getUInt(0); + + const ColumnConst * col_needle = typeid_cast(&*column_needle); + if (!col_needle) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument of function {} must be constant string", getName()); + + if (const ColumnString * col = checkAndGetColumn(column.get())) + { + auto col_res = ColumnString::create(); + + ColumnString::Chars & vec_res = col_res->getChars(); + ColumnString::Offsets & offsets_res = col_res->getOffsets(); + Impl::vector(col->getChars(), col->getOffsets(), col_needle->getValue(), static_cast(capture), vec_res, offsets_res); + + return col_res; + } + else + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()); + } +}; } diff --git a/src/Functions/GCDLCMImpl.h b/src/Functions/GCDLCMImpl.h index df531363c31f..567853bd62c0 100644 --- a/src/Functions/GCDLCMImpl.h +++ b/src/Functions/GCDLCMImpl.h @@ -23,6 +23,7 @@ struct GCDLCMImpl { using ResultType = typename NumberTraits::ResultOfAdditionMultiplication::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/IsOperation.h b/src/Functions/IsOperation.h index 39f9114f5e09..830d77f58e35 100644 --- a/src/Functions/IsOperation.h +++ b/src/Functions/IsOperation.h @@ -15,6 +15,7 @@ template struct DivideIntegralOrZeroImpl; template struct LeastBaseImpl; template struct GreatestBaseImpl; template struct ModuloImpl; +template struct ModuloOrZeroImpl; template struct PositiveModuloImpl; template struct EqualsOp; template struct NotEqualsOp; @@ -54,6 +55,7 @@ struct IsOperation static constexpr bool div_int = IsSameOperation::value; static constexpr bool div_int_or_zero = IsSameOperation::value; static constexpr bool modulo = IsSameOperation::value; + static constexpr bool modulo_or_zero = IsSameOperation::value; static constexpr bool positive_modulo = IsSameOperation::value; static constexpr bool least = IsSameOperation::value; static constexpr bool greatest = IsSameOperation::value; diff --git a/src/Functions/Kusto/CMakeLists.txt b/src/Functions/Kusto/CMakeLists.txt new file mode 100644 index 000000000000..3c534905d227 --- /dev/null +++ b/src/Functions/Kusto/CMakeLists.txt @@ -0,0 +1,8 @@ +include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") +add_headers_and_sources(clickhouse_functions_kusto .) +add_library(clickhouse_functions_kusto OBJECT ${clickhouse_functions_kusto_sources} ${clickhouse_functions_kusto_headers}) +target_link_libraries(clickhouse_functions_kusto PRIVATE dbms clickhouse_functions_gatherutils) + +if (OMIT_HEAVY_DEBUG_SYMBOLS) + target_compile_options(clickhouse_functions_kusto PRIVATE "-g0") +endif() diff --git a/src/Functions/Kusto/KqlArrayIif.cpp b/src/Functions/Kusto/KqlArrayIif.cpp new file mode 100644 index 000000000000..bbee6b64bdc9 --- /dev/null +++ b/src/Functions/Kusto/KqlArrayIif.cpp @@ -0,0 +1,147 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlArrayIif : public KqlFunctionBase +{ +public: + static constexpr auto name = "kql_ArrayIif"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + explicit FunctionKqlArrayIif(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlArrayIif() override = default; + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 3; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } + + static bool isDataTypeBoolORBoolConvertible(std::string_view datatype_name); + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + +private: + ContextPtr context; +}; + +bool FunctionKqlArrayIif::isDataTypeBoolORBoolConvertible(std::string_view datatype_name) +{ + if (datatype_name.find("Int") != datatype_name.npos || + datatype_name.find("Float") != datatype_name.npos || + datatype_name.find("Decimal") != datatype_name.npos || + datatype_name.find("Bool") != datatype_name.npos) + return true; + return false; +} + +DataTypePtr FunctionKqlArrayIif::getReturnTypeImpl(const DataTypes & arguments) const +{ + const auto * array_type0 = typeid_cast(arguments[0].get()); + if (!array_type0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be an array but it has type {}", getName(), + arguments[0]->getName()); + + DataTypePtr nested_type1, nested_type2; + + const auto * array_type1 = typeid_cast(arguments[1].get()); + if (!array_type1) + nested_type1 = makeNullable(arguments[1]->getPtr()); + else + nested_type1 = makeNullable(array_type1->getNestedType()); + + const auto * array_type2 = typeid_cast(arguments[2].get()); + if (!array_type2) + nested_type2 = makeNullable(arguments[2]->getPtr()); + else + nested_type2 = makeNullable(array_type2->getNestedType()); + + if (nested_type1->getName() != nested_type2->getName()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Last two arguments for function {} must have same nested data type", getName()); + + DataTypes types = {nested_type1, nested_type2}; + + return std::make_shared(getLeastSupertype(types)); +} + +ColumnPtr FunctionKqlArrayIif::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const DataTypePtr & elem_type = static_cast(*result_type).getNestedType(); + auto out = ColumnArray::create(elem_type->createColumn()); + + if (input_rows_count == 0) + return out; + + IColumn & out_data = out->getData(); + IColumn::Offsets & out_offsets = out->getOffsets(); + size_t total_length = 0; + for (size_t i = 0; i < input_rows_count; i++) + { + Field array0; + arguments[0].column->get(i, array0); + total_length += array0.get().size(); + } + + out_data.reserve(total_length); + out_offsets.resize(input_rows_count); + IColumn::Offset current_offset = 0; + + for (size_t i = 0; i < input_rows_count; i++) + { + Field array0; + arguments[0].column->get(i, array0); + size_t len0 = array0.get().size(); + for (size_t k = 0; k < len0; k++) + { + if (!isDataTypeBoolORBoolConvertible(array0.get().at(k).getTypeName())) + out_data.insert(Field()); + else + { + Field temp; + std::string dump = array0.get().at(k).dump(); + dump = dump.substr(dump.find('_') + 1); + if (dump == "0" || dump == "-0") + arguments[2].column->get(i, temp); + else + arguments[1].column->get(i, temp); + if (temp.getTypeName() == "Array") + { + if (k < temp.get().size()) + out_data.insert(temp.get().at(k)); + else + out_data.insert(Field()); + } + else + out_data.insert(temp); + } + } + current_offset += len0; + out_offsets[i] = current_offset; + } + return out; +} + +REGISTER_FUNCTION(KqlArrayIif) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/Kusto/KqlArraySort.cpp b/src/Functions/Kusto/KqlArraySort.cpp new file mode 100644 index 000000000000..5be36328cc37 --- /dev/null +++ b/src/Functions/Kusto/KqlArraySort.cpp @@ -0,0 +1,264 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + +template +class FunctionKqlArraySort : public KqlFunctionBase +{ +public: + static constexpr auto name = Name::name; + explicit FunctionKqlArraySort(ContextPtr context_) : context(context_) { } + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.empty()) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} needs at least one argument; passed {}.", + getName(), + arguments.size()); + + auto array_count = arguments.size(); + + if (!isArray(arguments.at(array_count - 1).type)) + --array_count; + + DataTypes nested_types; + for (size_t index = 0; index < array_count; ++index) + { + const DataTypeArray * array_type = checkAndGetDataType(arguments[index].type.get()); + if (!array_type) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument {} of function {} must be array. Found {} instead.", + index + 1, + getName(), + arguments[0].type->getName()); + + nested_types.emplace_back(array_type->getNestedType()); + } + + DataTypes data_types(array_count); + + for (size_t i = 0; i < array_count; ++i) + data_types[i] = std::make_shared(makeNullable(nested_types[i])); + + return std::make_shared(data_types); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + size_t array_count = arguments.size(); + const auto & last_arg = arguments[array_count - 1]; + + size_t input_rows_count_local = input_rows_count; + + bool null_last = true; + if (!isArray(last_arg.type)) + { + --array_count; + null_last = check_condition(last_arg, context, input_rows_count_local); + } + + ColumnsWithTypeAndName new_args; + ColumnPtr first_array_column; + std::unordered_set null_indices; + DataTypes nested_types; + + String sort_function = is_desc ? "arrayReverseSort" : "arraySort"; + + for (size_t i = 0; i < array_count; ++i) + { + ColumnPtr holder = arguments[i].column->convertToFullColumnIfConst(); + + const ColumnArray * column_array = checkAndGetColumn(holder.get()); + const DataTypeArray * array_type = checkAndGetDataType(arguments[i].type.get()); + + if (!column_array) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Argument {} of function {} must be array. Found column {} instead.", + i + 1, + getName(), + holder->getName()); + + nested_types.emplace_back(makeNullable(array_type->getNestedType())); + if (i == 0) + { + first_array_column = holder; + new_args.push_back(arguments[i]); + } + else if (!column_array->hasEqualOffsets(static_cast(*first_array_column))) + { + null_indices.insert(i); + } + else + new_args.push_back(arguments[i]); + } + + auto zipped + = FunctionFactory::instance().get("arrayZip", context)->build(new_args)->execute(new_args, result_type, input_rows_count_local); + + ColumnsWithTypeAndName sort_arg({{zipped, std::make_shared(result_type), "zipped"}}); + auto sorted_tuple + = FunctionFactory::instance().get(sort_function, context)->build(sort_arg)->execute(sort_arg, result_type, input_rows_count_local); + + auto null_type = std::make_shared(std::make_shared()); + + Columns tuple_columns(array_count); + size_t sorted_index = 0; + for (size_t i = 0; i < array_count; ++i) + { + if (null_indices.contains(i)) + { + auto fun_array = FunctionFactory::instance().get("array", context); + + DataTypePtr arg_type + = std::make_shared(makeNullable(nested_types[i])); + + ColumnsWithTypeAndName null_array_arg({ + {null_type->createColumnConstWithDefaultValue(input_rows_count_local), null_type, "NULL"}, + }); + + tuple_columns[i] = fun_array->build(null_array_arg)->execute(null_array_arg, arg_type, input_rows_count_local); + tuple_columns[i] = tuple_columns[i]->convertToFullColumnIfConst(); + } + else + { + ColumnsWithTypeAndName untuple_args( + {{ColumnWithTypeAndName(sorted_tuple, std::make_shared(result_type), "sorted")}, + {DataTypeUInt8().createColumnConst(1, toField(UInt8(sorted_index + 1))), std::make_shared(), ""}}); + auto tuple_coulmn = FunctionFactory::instance() + .get("tupleElement", context) + ->build(untuple_args) + ->execute(untuple_args, result_type, input_rows_count_local); + + auto out_tmp = ColumnArray::create(nested_types[i]->createColumn()); + + size_t array_size = tuple_coulmn->size(); + const auto * arr = checkAndGetColumn(tuple_coulmn.get()); + + for (size_t j = 0; j < array_size; ++j) + { + Field arr_field; + arr->get(j, arr_field); + out_tmp->insert(arr_field); + } + + tuple_columns[i] = std::move(out_tmp); + + ++sorted_index; + } + } + + if (!null_last) + { + Columns adjusted_columns(array_count); + + ColumnWithTypeAndName arg_of_index{nullptr, std::make_shared(nested_types[0]), "array"}; + arg_of_index.column = tuple_columns[0]; + + auto inside_null_type = nested_types[0]; + ColumnsWithTypeAndName indexof_args({ + arg_of_index, + {inside_null_type->createColumnConstWithDefaultValue(input_rows_count_local), inside_null_type, "NULL"}, + }); + + auto null_index_datetype = std::make_shared(); + + ColumnWithTypeAndName slice_index{nullptr, null_index_datetype, ""}; + slice_index.column = FunctionFactory::instance() + .get("indexOf", context) + ->build(indexof_args) + ->execute(indexof_args, result_type, input_rows_count_local); + + auto null_index_in_array = slice_index.column->get64(0); + if (null_index_in_array > 0) + { + ColumnWithTypeAndName slice_index_len{nullptr, null_index_datetype, ""}; + slice_index_len.column = DataTypeUInt64().createColumnConst(1, toField(UInt64(null_index_in_array - 1))); + + auto fun_slice = FunctionFactory::instance().get("arraySlice", context); + + for (size_t i = 0; i < array_count; ++i) + { + if (null_indices.contains(i)) + { + adjusted_columns[i] = std::move(tuple_columns[i]); + } + else + { + DataTypePtr arg_type = std::make_shared(nested_types[i]); + + ColumnsWithTypeAndName slice_args_left( + {{ColumnWithTypeAndName(tuple_columns[i], arg_type, "array")}, + {DataTypeUInt8().createColumnConst(1, toField(UInt8(1))), std::make_shared(), ""}, + slice_index_len}); + + ColumnsWithTypeAndName slice_args_right( + {{ColumnWithTypeAndName(tuple_columns[i], arg_type, "array")}, slice_index}); + ColumnWithTypeAndName arr_left{ + fun_slice->build(slice_args_left)->execute(slice_args_left, arg_type, input_rows_count_local), arg_type, ""}; + ColumnWithTypeAndName arr_right{ + fun_slice->build(slice_args_right)->execute(slice_args_right, arg_type, input_rows_count_local), arg_type, ""}; + + ColumnsWithTypeAndName arr_cancat({arr_right, arr_left}); + auto out_tmp = FunctionFactory::instance() + .get("arrayConcat", context) + ->build(arr_cancat) + ->execute(arr_cancat, arg_type, input_rows_count_local); + adjusted_columns[i] = std::move(out_tmp); + } + } + return ColumnTuple::create(adjusted_columns); + } + } + return ColumnTuple::create(tuple_columns); + } + +private: + ContextPtr context; +}; + +struct NameKqlArraySortAsc +{ + static constexpr auto name = "kql_array_sort_asc"; +}; + +struct NameKqlArraySortDesc +{ + static constexpr auto name = "kql_array_sort_desc"; +}; + +using FunctionKqlArraySortAsc = FunctionKqlArraySort; +using FunctionKqlArraySortDesc = FunctionKqlArraySort; + +REGISTER_FUNCTION(KqlArraySort) +{ + factory.registerFunction(); + factory.registerFunction(); +} + +} diff --git a/src/Functions/Kusto/KqlFunctionBase.h b/src/Functions/Kusto/KqlFunctionBase.h new file mode 100644 index 000000000000..efdf8982f4b1 --- /dev/null +++ b/src/Functions/Kusto/KqlFunctionBase.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "Functions/array/FunctionArrayMapped.h" + +namespace DB +{ + +class KqlFunctionBase : public IFunction +{ +public: + static bool check_condition (const ColumnWithTypeAndName & condition, ContextPtr context, size_t input_rows_count) + { + ColumnsWithTypeAndName if_columns( + { + condition, + {DataTypeUInt8().createColumnConst(1, toField(UInt8(1))), std::make_shared(), ""}, + {DataTypeUInt8().createColumnConst(1, toField(UInt8(2))), std::make_shared(), ""} + }); + auto if_res = FunctionFactory::instance().get("if", context)->build(if_columns)->execute(if_columns, std::make_shared(), input_rows_count); + auto result = if_res->getUInt(0); + return (result == 1); + } +}; + +} diff --git a/src/Functions/Kusto/KqlIndexOf.cpp b/src/Functions/Kusto/KqlIndexOf.cpp new file mode 100644 index 000000000000..57c97100aab8 --- /dev/null +++ b/src/Functions/Kusto/KqlIndexOf.cpp @@ -0,0 +1,151 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlIndexOf : public KqlFunctionBase +{ +public: + static constexpr auto name = "kql_indexof"; + explicit FunctionKqlIndexOf(ContextPtr context_) : context(context_) { } + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() < 2 || 5 < arguments.size()) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 to 5.", + getName(), + arguments.size()); + + if (arguments.size() >= 3) + { + for (size_t i = 3; i < arguments.size(); ++i) + if (!isInteger(arguments.at(i).type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of function {}", getName()); + } + + return std::make_shared(std::make_shared()); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + int64_t occurrence = 1; + + auto null_type = std::make_shared(std::make_shared()); + auto null_column = null_type->createColumnConstWithDefaultValue(1); + auto not_found_column = DataTypeUInt64().createColumnConst(1, toField(UInt64(0))); + + ColumnPtr column_source = arguments[0].column; + ColumnPtr column_lookup = arguments[1].column; + ColumnPtr column_start_pos = DataTypeUInt64().createColumnConst(input_rows_count, toField(UInt64(1))); + ColumnPtr column_length = DataTypeInt64().createColumnConst(input_rows_count, toField(Int64(-1))); + + if (!isString(arguments[0].type)) + column_source = FunctionFactory::instance() + .get("toString", context) + ->build({arguments[0]}) + ->execute({arguments[0]}, std::make_shared(), input_rows_count); + + if (!isString(arguments[1].type)) + column_lookup = FunctionFactory::instance() + .get("toString", context) + ->build({arguments[1]}) + ->execute({arguments[1]}, std::make_shared(), input_rows_count); + + if (arguments.size() >= 3) + { + auto input_start_column = ColumnUInt64::create(); + for (size_t j = 0; j < input_rows_count; ++j) + { + StringRef source = column_source->getDataAt(j); + auto start_pos = arguments[2].column->getInt(j); + if (start_pos < 0) + { + start_pos = source.size + start_pos; + if (start_pos < 0) + start_pos = 0; + } + ++start_pos; + input_start_column->insertValue(start_pos); + } + column_start_pos = std::move(input_start_column); + } + + if (arguments.size() >= 4) + column_length = arguments[3].column; + + if (arguments.size() == 5) + occurrence = arguments[4].column->getInt(0); //must be a constant + + if (occurrence < 0) + return null_column; + + ColumnPtr last_pos = not_found_column; + for (auto i = 0; i < occurrence; ++i) + { + ColumnsWithTypeAndName position_args( + {{ColumnWithTypeAndName(column_source, std::make_shared(), "source")}, + {ColumnWithTypeAndName(column_lookup, std::make_shared(), "lookup")}, + {ColumnWithTypeAndName(column_start_pos, std::make_shared(), "start_pos")}}); + auto pos = FunctionFactory::instance() + .get("position", context) + ->build(position_args) + ->execute(position_args, result_type, input_rows_count); + last_pos = pos; + + auto new_pos_column = ColumnUInt64::create(); + for (size_t j = 0; j < input_rows_count; ++j) + { + new_pos_column->insertValue(pos->getInt(j) + 1); + } + column_start_pos = std::move(new_pos_column); + } + + auto null_map = ColumnUInt8::create(input_rows_count); + auto result_column = ColumnInt64::create(); + for (size_t i = 0; i < input_rows_count; ++i) + { + auto length = column_length->getInt(i); + null_map->getData()[i] = length < -1; + + auto pos_val = last_pos->get64(i); + if (length > -1 && last_pos->get64(i) > UInt64(length) + 1) + pos_val = 0; + result_column->insertValue(Int64(pos_val) -1); // used for kql, so returned index is 0 based + } + return ColumnNullable::create(std::move(result_column), std::move(null_map)); + } + +private: + ContextPtr context; +}; + +REGISTER_FUNCTION(KqlIndexOf) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/Kusto/KqlRange.cpp b/src/Functions/Kusto/KqlRange.cpp new file mode 100644 index 000000000000..aef968a636be --- /dev/null +++ b/src/Functions/Kusto/KqlRange.cpp @@ -0,0 +1,865 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace DB +{ +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +static constexpr size_t max_array_size_as_field = 1000000; // the value from ColumnArray.cpp + +class FunctionKqlRange : public IFunction +{ +public: + static constexpr auto name = "kql_range"; + + const size_t max_elements; + static FunctionPtr create(ContextPtr context_) { return std::make_shared(std::move(context_)); } + explicit FunctionKqlRange(ContextPtr context) : max_elements(context->getSettingsRef().function_range_max_elements_in_block) { } + +private: + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() < 2 || 3 < arguments.size()) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3.", + getName(), + arguments.size()); + + const auto & start = arguments[0]; + const auto & end = arguments[1]; + + WhichDataType start_type(*start); + WhichDataType end_type(*end); + + auto return_type = start; + if (start_type.isNullable()) + { + const auto * nullable_type = checkAndGetDataType(start.get()); + if (nullable_type) + { + return_type = nullable_type->getNestedType(); + start_type = WhichDataType(nullable_type->getNestedType()); + } + } + if (end_type.isNullable()) + { + const auto * nullable_type = checkAndGetDataType(end.get()); + if (nullable_type) + { + end_type = WhichDataType(nullable_type->getNestedType()); + } + } + if ((!start_type.isDateTime64()) && !start_type.isInterval() && !isNumber(start_type)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type of first argument of function {}, expected DateTime64, Interval or Number", + getName()); + + if ((start_type.isDateTime64() && !end_type.isDateTime64()) || (!start_type.isDateTime64() && end_type.isDateTime64())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Type not match of first and second argument of function {}", getName()); + + if ((start_type.isInterval() && !end_type.isInterval()) || (!start_type.isInterval() && end_type.isInterval())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Type not match of first and second argument of function {}", getName()); + + if (arguments.size() == 3) + { + const auto & step = arguments[2]; + const WhichDataType step_type(*step); + if (!isNumber(step_type) && !step_type.isInterval()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type of third argument of function {}, expected Interval or Number", + getName()); + } + + if (start_type.isDateTime64() || start_type.isInterval()) + return std::make_shared(return_type); + DataTypePtr common_type = getLeastSupertype(arguments); + return std::make_shared(common_type); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); + WhichDataType which(elem_type); + ColumnPtr res; + const auto & start = arguments[0]; + WhichDataType start_type(*start.type); + + ColumnsWithTypeAndName new_args; + + for (size_t i = 0; i < arguments.size(); ++i) + { + const auto & arg_col = arguments[i]; + WhichDataType arg_type(*arg_col.type); + + if (arg_type.isNullable()) + { + const auto * nullable_type = checkAndGetDataType(arg_col.type.get()); + const auto & nested_type = nullable_type->getNestedType(); + const auto * nullable_column = checkAndGetColumn(*arguments[i].column); + ColumnPtr nested_column = nullable_column->getNestedColumnPtr(); + ColumnWithTypeAndName new_arg{nullptr, nested_type, "new_arg"}; + new_arg.column = nested_column; + new_args.push_back(new_arg); + + if (i == 0) + start_type = nested_type; + } + else + new_args.push_back(arguments[i]); + } + + if (start_type.isDateTime64()) + { + return executeDateTime64(new_args, result_type, input_rows_count); + } + + if (start_type.isInterval()) + { + return executeInterval(new_args, result_type, input_rows_count); + } + + Columns columns_holder(3); + ColumnRawPtrs column_ptrs(3); + + for (size_t i = 0; i < new_args.size(); ++i) + { + if (i <= 1) + columns_holder[i] = castColumn(new_args[i], elem_type)->convertToFullColumnIfConst(); + else + columns_holder[i] = castColumn(new_args[i], elem_type); + + column_ptrs[i] = columns_holder[i].get(); + } + + /// Step is one by default. + if (new_args.size() == 2) + { + /// Convert a column with constant 1 to the result type. + if (start_type.isFloat32()) + columns_holder[2] = castColumn( + {DataTypeFloat32().createColumnConst(input_rows_count, 1.0), std::make_shared(), {}}, elem_type); + else if (start_type.isFloat64()) + columns_holder[2] = castColumn( + {DataTypeFloat64().createColumnConst(input_rows_count, 1.0), std::make_shared(), {}}, elem_type); + else if (start_type.isUInt8() || start_type.isUInt16() || start_type.isUInt32() || start_type.isUInt64()) + columns_holder[2] = castColumn( + {DataTypeUInt8().createColumnConst(input_rows_count, 1), std::make_shared(), {}}, elem_type); + else + columns_holder[2] + = castColumn({DataTypeInt8().createColumnConst(input_rows_count, 1), std::make_shared(), {}}, elem_type); + + column_ptrs[2] = columns_holder[2].get(); + } + + bool is_start_const = isColumnConst(*column_ptrs[0]); + bool is_step_const = isColumnConst(*column_ptrs[2]); + + if (is_start_const && is_step_const) + { + UInt64 start_uint = assert_cast(*column_ptrs[0]).getUInt(0); + UInt64 step_uint = assert_cast(*column_ptrs[2]).getUInt(0); + Int64 start_int = assert_cast(*column_ptrs[0]).getInt(0); + Int64 step_int = assert_cast(*column_ptrs[2]).getInt(0); + Float32 start_float32 = assert_cast(*column_ptrs[0]).getFloat32(0); + Float32 step_float32 = assert_cast(*column_ptrs[2]).getFloat32(0); + Float64 start_float64 = assert_cast(*column_ptrs[0]).getFloat64(0); + Float64 step_float64 = assert_cast(*column_ptrs[2]).getFloat64(0); + + if ((res = executeConstStartStep(column_ptrs[1], start_uint, step_uint, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_uint, step_uint, input_rows_count)) + || (res = executeConstStartStep( + column_ptrs[1], static_cast(start_uint), static_cast(step_uint), input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_uint, step_uint, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_int, step_int, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_int, step_int, input_rows_count)) + || (res = executeConstStartStep( + column_ptrs[1], static_cast(start_uint), static_cast(step_uint), input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_int, step_int, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_float32, step_float32, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_float64, step_float64, input_rows_count))) + { + } + } + else if (is_start_const && !is_step_const) + { + UInt64 start_uint = assert_cast(*column_ptrs[0]).getUInt(0); + Int64 start_int = assert_cast(*column_ptrs[0]).getInt(0); + Float32 start_float32 = assert_cast(*column_ptrs[0]).getFloat32(0); + Float64 start_float64 = assert_cast(*column_ptrs[0]).getFloat64(0); + + if ((res = executeConstStart(column_ptrs[1], column_ptrs[2], start_uint, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_uint, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], static_cast(start_uint), input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_uint, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_int, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_int, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], static_cast(start_uint), input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_int, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_float32, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_float64, input_rows_count))) + { + } + } + else if (!is_start_const && is_step_const) + { + UInt64 step_uint = assert_cast(*column_ptrs[2]).getUInt(0); + Int64 step_int = assert_cast(*column_ptrs[2]).getInt(0); + Float32 step_float32 = assert_cast(*column_ptrs[2]).getFloat32(0); + Float64 step_float64 = assert_cast(*column_ptrs[2]).getFloat64(0); + + if ((res = executeConstStep(column_ptrs[0], column_ptrs[1], step_uint, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_uint, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_uint, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], static_cast(step_uint), input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_int, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_int, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], static_cast(step_uint), input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_int, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_float32, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_float64, input_rows_count))) + { + } + } + else + { + if ((res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count))) + { + } + } + + if (!res) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {} of argument of function {}", column_ptrs[0]->getName(), getName()); + + return res; + } + + template + ColumnPtr executeConstStartStep(const IColumn * end_arg, const T start, const T step, const size_t input_rows_count) const + { + auto end_column = checkAndGetColumn>(end_arg); + if (!end_column) + return nullptr; + + const auto & end_data = end_column->getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (start < end_data[row_idx] && step == 0) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, "A call to function {} overflows, the 3rd argument step can't be zero", getName()); + + if (step > 0 && start <= end_data[row_idx]) + { + pre_values += start >= end_data[row_idx] ? 0 : static_cast((end_data[row_idx] - start) / (step) + 1); + } + + if (step < 0 && start >= end_data[row_idx]) + { + pre_values += start <= end_data[row_idx] ? 0 : static_cast((start - end_data[row_idx]) / (-step) + 1); + } + + if (pre_values < total_values) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnVector::create(total_values); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (step > 0) + { + T st = start; + T ed = end_data[row_idx]; + while (st <= ed) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + + st += step; + } + } + else + { + T st = start; + T ed = end_data[row_idx]; + while (st >= ed) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + + st += step; + } + } + out_offsets[row_idx] = offset; + } + + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + template + ColumnPtr executeConstStep(const IColumn * start_arg, const IColumn * end_arg, const T step, const size_t input_rows_count) const + { + auto start_column = checkAndGetColumn>(start_arg); + auto end_column = checkAndGetColumn>(end_arg); + if (!end_column || !start_column) + return nullptr; + + const auto & start_data = start_column->getData(); + const auto & end_data = end_column->getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (start_data[row_idx] < end_data[row_idx] && step == 0) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, "A call to function {} overflows, the 3rd argument step can't be zero", getName()); + + if (step > 0 && start_data[row_idx] <= end_data[row_idx]) + { + pre_values += start_data[row_idx] >= end_data[row_idx] + ? 0 + : static_cast((end_data[row_idx] - start_data[row_idx]) / (step) + 1); + } + + if (step < 0 && start_data[row_idx] >= end_data[row_idx]) + { + pre_values += start_data[row_idx] <= end_data[row_idx] + ? 0 + : static_cast((start_data[row_idx] - end_data[row_idx]) / (-step) + 1); + } + + if (pre_values < total_values) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnVector::create(total_values); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (step > 0) + { + T st = start_data[row_idx]; + T ed = end_data[row_idx]; + while (st <= ed) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + st += step; + } + } + else + { + T st = start_data[row_idx]; + T ed = end_data[row_idx]; + while (st >= ed) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + st += step; + } + } + out_offsets[row_idx] = offset; + } + + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + template + ColumnPtr executeConstStart(const IColumn * end_arg, const IColumn * step_arg, const T start, const size_t input_rows_count) const + { + auto end_column = checkAndGetColumn>(end_arg); + auto step_column = checkAndGetColumn>(step_arg); + if (!end_column || !step_column) + return nullptr; + + const auto & end_data = end_column->getData(); + const auto & step_data = step_column->getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (start < end_data[row_idx] && step_data[row_idx] == 0) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, "A call to function {} overflows, the 3rd argument step can't be zero", getName()); + + if (step_data[row_idx] > 0 && start <= end_data[row_idx]) + { + pre_values += start >= end_data[row_idx] ? 0 : static_cast((end_data[row_idx] - start) / (step_data[row_idx]) + 1); + } + + if (step_data[row_idx] < 0 && start >= end_data[row_idx]) + { + pre_values += start <= end_data[row_idx] ? 0 : static_cast((start - end_data[row_idx]) / (-step_data[row_idx]) + 1); + } + + if (pre_values < total_values) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnVector::create(total_values); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (step_data[row_idx] > 0) + { + T st = start; + T ed = end_data[row_idx]; + while (st <= ed) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step_data[row_idx]) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + st += step_data[row_idx]; + } + } + else + { + T st = start; + T ed = end_data[row_idx]; + while (st >= ed) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step_data[row_idx]) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + st += step_data[row_idx]; + } + } + out_offsets[row_idx] = offset; + } + + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + template + ColumnPtr + executeGeneric(const IColumn * start_col, const IColumn * end_col, const IColumn * step_col, const size_t input_rows_count) const + { + auto start_column = checkAndGetColumn>(start_col); + auto end_column = checkAndGetColumn>(end_col); + auto step_column = checkAndGetColumn>(step_col); + + if (!start_column || !end_column || !step_column) + return nullptr; + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + const auto & start_data = start_column->getData(); + const auto & end_start = end_column->getData(); + const auto & step_data = step_column->getData(); + + size_t total_values = 0; + size_t pre_values = 0; + + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (start_data[row_idx] < end_start[row_idx] && step_data[row_idx] == 0) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, "A call to function {} overflows, the 3rd argument step can't be zero", getName()); + + if (step_data[row_idx] > 0 && start_data[row_idx] <= end_start[row_idx]) + { + pre_values += start_data[row_idx] >= end_start[row_idx] + ? 0 + : static_cast((end_start[row_idx] - start_data[row_idx]) / (step_data[row_idx]) + 1); + } + + if (step_data[row_idx] < 0 && start_data[row_idx] >= end_start[row_idx]) + { + pre_values += start_data[row_idx] <= end_start[row_idx] + ? 0 + : static_cast((start_data[row_idx] - end_start[row_idx]) / (-step_data[row_idx]) + 1); + } + + if (pre_values < total_values) + throw Exception{ + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()}; + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnVector::create(total_values); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (step_data[row_idx] > 0) + { + auto st = start_data[row_idx]; + auto ed = end_start[row_idx]; + while (st <= ed) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step_data[row_idx]) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + st += step_data[row_idx]; + } + } + else + { + auto st = start_data[row_idx]; + auto ed = end_start[row_idx]; + while (st >= ed) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step_data[row_idx]) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + st += step_data[row_idx]; + } + } + out_offsets[row_idx] = offset; + } + + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + ColumnPtr executeDateTime64(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { + DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); + const auto & start_col = arguments[0].column; + const auto & end_col = arguments[1].column; + Int64 step_value = 3600000000000; + + const auto & start_data = typeid_cast &>(*start_col).getData(); + const auto & end_data = typeid_cast &>(*end_col).getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (arguments.size() > 2) + step_value = arguments[2].column->getInt(row_idx); + + if (start_data[row_idx] < end_data[row_idx] && step_value == 0) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, "A call to function {} overflows, the 3rd argument step can't be zero", getName()); + + if (step_value > 0 && start_data[row_idx] <= end_data[row_idx]) + { + pre_values += start_data[row_idx] >= end_data[row_idx] + ? 0 + : static_cast((end_data[row_idx] - start_data[row_idx]) / (step_value) + 1); + } + + if (step_value < 0 && start_data[row_idx] >= end_data[row_idx]) + { + pre_values += start_data[row_idx] <= end_data[row_idx] + ? 0 + : static_cast((start_data[row_idx] - end_data[row_idx]) / (-step_value) + 1); + } + + if (pre_values < total_values) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnDecimal::create(total_values, 9); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_col->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (arguments.size() > 2) + step_value = arguments[2].column->getInt(row_idx); + if (step_value > 0) + { + for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st <= ed; st += step_value) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step_value) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + } + } + else + { + for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st >= ed; st += step_value) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step_value) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + } + } + out_offsets[row_idx] = offset; + } + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + ColumnPtr executeInterval(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { + DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); + const auto & start_col = arguments[0].column; + const auto & end_col = arguments[1].column; + Int64 step_value = 3600000000000; + + const auto & start_data = typeid_cast &>(*start_col).getData(); + const auto & end_data = typeid_cast &>(*end_col).getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (arguments.size() > 2) + step_value = arguments[2].column->getInt(row_idx); + + if (start_data[row_idx] < end_data[row_idx] && step_value == 0) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, "A call to function {} overflows, the 3rd argument step can't be zero", getName()); + + if (step_value > 0 && start_data[row_idx] <= end_data[row_idx]) + { + pre_values += start_data[row_idx] >= end_data[row_idx] + ? 0 + : static_cast((end_data[row_idx] - start_data[row_idx]) / (step_value) + 1); + } + + if (step_value < 0 && start_data[row_idx] >= end_data[row_idx]) + { + pre_values += start_data[row_idx] <= end_data[row_idx] + ? 0 + : static_cast((start_data[row_idx] - end_data[row_idx]) / (-step_value) + 1); + } + if (pre_values < total_values) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto out = ColumnArray::create(std::make_shared(IntervalKind::Nanosecond)->createColumn()); + IColumn & out_data = out->getData(); + IColumn::Offsets & out_offsets = out->getOffsets(); + + out_data.reserve(input_rows_count * total_values); + out_offsets.resize(input_rows_count); + IColumn::Offset current_offset = 0; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (arguments.size() > 2) + step_value = arguments[2].column->getInt(row_idx); + if (step_value > 0) + { + for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st <= ed; st += step_value) + { + out_data.insert(Field(st)); + current_offset++; + if (current_offset >= total_values) + break; + if (st > st + step_value) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + } + } + else + { + for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st >= ed; st += step_value) + { + out_data.insert(Field(st)); + current_offset++; + if (current_offset >= total_values) + break; + if (st < st + step_value) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + } + } + out_offsets[row_idx] = current_offset; + } + + return out; + } +}; + +REGISTER_FUNCTION(KqlRange) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/Kusto/kqlBin.cpp b/src/Functions/Kusto/kqlBin.cpp new file mode 100644 index 000000000000..400ab106441e --- /dev/null +++ b/src/Functions/Kusto/kqlBin.cpp @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include +#include + +namespace +{ +DB::ColumnWithTypeAndName +interpretAsInterval(const DB::ContextPtr & context, const DB::ColumnWithTypeAndName & argument, const size_t input_rows_count) +{ + static constexpr auto NANOSECONDS_PER_SECOND = 1'000'000'000U; + + const DB::ColumnsWithTypeAndName multiply_args{ + argument, DB::createConstColumnWithTypeAndName(NANOSECONDS_PER_SECOND, argument.name)}; + const auto product = executeFunctionCall(context, "multiply", multiply_args, input_rows_count); + + const DB::ColumnsWithTypeAndName to_interval_args{asArgument(product, argument.name)}; + const auto interval = executeFunctionCall(context, "toIntervalNanosecond", to_interval_args, input_rows_count); + + return asArgument(interval, argument.name); +} +} + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlBin : public IFunction +{ +public: + static constexpr auto name = "kql_bin"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlBin(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlBin() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr +FunctionKqlBin::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto intermediate = std::invoke( + [this, &arguments, &input_rows_count] + { + const auto & round_to_argument = arguments.back(); + const auto & value_argument = arguments.front(); + const WhichDataType round_to_which_data_type(*round_to_argument.type); + const WhichDataType value_which_data_type(*value_argument.type); + + const auto & adjusted_round_to + = (value_which_data_type.isDateOrDate32OrDateTimeOrDateTime64() || value_which_data_type.isInterval()) && !round_to_which_data_type.isInterval() + ? interpretAsInterval(context, round_to_argument, input_rows_count) + : round_to_argument; + + if (value_which_data_type.isDateOrDate32OrDateTimeOrDateTime64()) + { + const auto adjusted_args = std::invoke( + [this, &adjusted_round_to, &input_rows_count, &value_argument, &value_which_data_type]() -> ColumnsWithTypeAndName + { + if (value_which_data_type.isDateTime64()) + return {value_argument, adjusted_round_to}; + + const ColumnsWithTypeAndName to_datetime64_args{ + value_argument, + createConstColumnWithTypeAndName(9, "scale"), + createConstColumnWithTypeAndName("UTC", "timezone")}; + + const auto as_datetime64 = executeFunctionCall(context, "toDateTime64", to_datetime64_args, input_rows_count); + return {asArgument(as_datetime64, "as_datetime64"), adjusted_round_to}; + }); + + return executeFunctionCall(context, "toStartOfIntervalOrNull", adjusted_args, input_rows_count); + } + + const ColumnsWithTypeAndName adjusted_args{value_argument, adjusted_round_to}; + const auto quotient = executeFunctionCall(context, "divide", adjusted_args, input_rows_count); + + const ColumnsWithTypeAndName floor_args{asArgument(quotient, adjusted_round_to.name)}; + const auto floored = executeFunctionCall(context, "floor", floor_args, input_rows_count); + + const ColumnsWithTypeAndName multiply_args{asArgument(floored, adjusted_round_to.name), adjusted_round_to}; + return executeFunctionCall(context, "multiply", multiply_args, input_rows_count); + }); + + const ColumnsWithTypeAndName conversion_args{ + asArgument(intermediate, "intermediate"), createConstColumnWithTypeAndName(result_type->getName(), "target_type")}; + return executeFunctionCall(context, "accurateCastOrNull", conversion_args, input_rows_count).first; +} + +DataTypePtr FunctionKqlBin::getReturnTypeImpl(const DataTypes & arguments) const +{ + const auto nested_type = std::invoke( + [this, &arguments]() -> DataTypePtr + { + const auto & value_argument = arguments.front(); + const auto & round_to_argument = arguments.back(); + if (const WhichDataType value_which_data_type(*value_argument); value_which_data_type.isInterval() || isNumber(value_which_data_type)) + { + const WhichDataType round_to_which_data_type(*round_to_argument); + return isNumber(value_which_data_type) && (round_to_which_data_type.isFloat() || round_to_which_data_type.isDecimal()) + ? round_to_argument + : value_argument; + } + else if (value_which_data_type.isDateOrDate32OrDateTimeOrDateTime64()) + return std::make_shared(9, "UTC"); + + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected Date, Date32, DateTime, DateTime64, Interval or Number", + value_argument->getName(), + getName()); + }); + + return makeNullable(nested_type); +} + +REGISTER_FUNCTION(KqlBin) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlBinAt.cpp b/src/Functions/Kusto/kqlBinAt.cpp new file mode 100644 index 000000000000..55c386e3429e --- /dev/null +++ b/src/Functions/Kusto/kqlBinAt.cpp @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlBinAt : public IFunction +{ +public: + static constexpr auto name = "kql_bin_at"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlBinAt(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlBinAt() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 3; } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr FunctionKqlBinAt::executeImpl( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto get_or_convert_argument = [this, &input_rows_count](const ColumnWithTypeAndName & argument) + { + if (const WhichDataType which_data_type(*argument.type); which_data_type.isDateOrDate32() || which_data_type.isDateTime()) + { + const ColumnsWithTypeAndName to_datetime64_args{ + argument, + createConstColumnWithTypeAndName(9, "scale"), + createConstColumnWithTypeAndName("UTC", "timezone")}; + + return asArgument(executeFunctionCall(context, "toDateTime64", to_datetime64_args, input_rows_count), argument.name); + } + + return argument; + }; + + const auto & value_argument = get_or_convert_argument(arguments.front()); + const auto & round_to_argument = arguments[1]; + const auto & offset_argument = get_or_convert_argument(arguments.back()); + + const ColumnsWithTypeAndName subtraction_args{value_argument, offset_argument}; + const auto difference = executeFunctionCall(context, "minus", subtraction_args, input_rows_count); + + const ColumnsWithTypeAndName bin_args{asArgument(difference, "difference"), round_to_argument}; + const auto bin_result = executeFunctionCall(context, "kql_bin", bin_args, input_rows_count); + + const ColumnsWithTypeAndName addition_args{offset_argument, asArgument(bin_result, "bin_result")}; + const auto sum = executeFunctionCall(context, "plus", addition_args, input_rows_count); + + const ColumnsWithTypeAndName cast_args{ + asArgument(sum, "sum"), createConstColumnWithTypeAndName(result_type->getName(), "type")}; + return executeFunctionCall(context, "cast", cast_args, input_rows_count).first; +} + +DataTypePtr FunctionKqlBinAt::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const +{ + const auto & value_type = *arguments.front().type; + const auto & offset_type = *arguments.back().type; + + WhichDataType value_which_data_type(value_type); + WhichDataType offset_which_data_type(offset_type); + if ((value_which_data_type.isDateOrDate32OrDateTimeOrDateTime64() && offset_which_data_type.isDateOrDate32OrDateTimeOrDateTime64()) + || (value_which_data_type.isInterval() && offset_which_data_type.isInterval()) + || (isNumber(value_which_data_type) && isNumber(offset_which_data_type))) + { + const ColumnsWithTypeAndName bin_args{arguments.front(), arguments[1]}; + return FunctionFactory::instance().get("kql_bin", context)->build(bin_args)->getResultType(); + } + + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of third argument of function {}, expected {}", + offset_type.getName(), + getName(), + value_type.getFamilyName()); +} + +REGISTER_FUNCTION(KqlBinAt) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlDateTime.cpp b/src/Functions/Kusto/kqlDateTime.cpp new file mode 100644 index 000000000000..63b9e6572f2d --- /dev/null +++ b/src/Functions/Kusto/kqlDateTime.cpp @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB::ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +namespace +{ +enum class InputPolicy +{ + Arbitrary, + Constant +}; + +constexpr const char * getDateTimeParsingFunction(const InputPolicy input_policy) +{ + if (input_policy == InputPolicy::Arbitrary) + return "parseDateTime64BestEffortOrNull"; + else if (input_policy == InputPolicy::Constant) + return "parseDateTime64BestEffort"; + + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unhandled input policy: {}", magic_enum::enum_name(input_policy)); +} + +constexpr const char * getFunctionName(const InputPolicy input_policy) +{ + if (input_policy == InputPolicy::Arbitrary) + return "kql_todatetime"; + else if (input_policy == InputPolicy::Constant) + return "kql_datetime"; + + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unhandled input policy: {}", magic_enum::enum_name(input_policy)); +} +} + +namespace DB +{ +template +class FunctionKqlDateTime : public IFunction +{ +public: + static constexpr auto name = getFunctionName(input_policy); + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlDateTime(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlDateTime() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return makeNullable(std::make_shared(9, "UTC")); } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +template +ColumnPtr FunctionKqlDateTime::executeImpl( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto & argument = arguments.front(); + const ColumnsWithTypeAndName conversion_args{ + argument, + createConstColumnWithTypeAndName(9, "scale"), + createConstColumnWithTypeAndName("UTC", "timezone")}; + + const auto * const conversion_function + = WhichDataType(*argument.type).isStringOrFixedString() ? getDateTimeParsingFunction(input_policy) : "toDateTime64"; + const auto converted = executeFunctionCall(context, conversion_function, conversion_args, input_rows_count); + + const ColumnsWithTypeAndName addition_args{ + asArgument(converted, "converted"), + createConstColumnWithTypeAndName(50, "interval_50", IntervalKind::Nanosecond)}; + const auto sum = executeFunctionCall(context, "plus", addition_args, input_rows_count); + + const ColumnsWithTypeAndName to_start_of_interval_args{ + asArgument(sum, "sum"), createConstColumnWithTypeAndName(100, "interval_100", IntervalKind::Nanosecond)}; + const auto [rounded_column, _] = executeFunctionCall(context, "toStartOfInterval", to_start_of_interval_args, input_rows_count); + + return wrapInNullable(rounded_column, conversion_args, result_type, input_rows_count); +} + +REGISTER_FUNCTION(KqlDateTime) +{ + factory.registerFunction>(); + factory.registerFunction>(); +} +} diff --git a/src/Functions/Kusto/kqlHasAnyIp.h b/src/Functions/Kusto/kqlHasAnyIp.h new file mode 100644 index 000000000000..055215d9a5ef --- /dev/null +++ b/src/Functions/Kusto/kqlHasAnyIp.h @@ -0,0 +1,72 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + + +enum class ArgumentPolicy +{ + Single, + Variadic +}; + +template +DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const std::string & name) +{ + const auto args_length = arguments.size(); + + if (args_length < 2) + { + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 or more.", + name, + toString(arguments.size())); + } + + if (!isStringOrFixedString(arguments.at(0).type)) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of function {}", name); + } + + if (!isStringOrFixedString(arguments.at(1).type) && !isArray(arguments.at(1).type)) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of function {}", name); + } + + if (isStringOrFixedString(arguments.at(1).type)) + { + if constexpr (ap == ArgumentPolicy::Variadic) + { + const auto are_arguments_valid = std::ranges::all_of( + arguments | std::views::drop(2), [](const auto & argument) { return isStringOrFixedString(argument.type); }); + if (!are_arguments_valid) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of function {}", name); + } + } + + else if (ap == ArgumentPolicy::Single || !isArray(arguments.at(1).type)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal type of argument of function {}", name); + } + + return std::make_shared(); +} +} diff --git a/src/Functions/Kusto/kqlHasAnyIpv4.cpp b/src/Functions/Kusto/kqlHasAnyIpv4.cpp new file mode 100644 index 000000000000..1154200c831e --- /dev/null +++ b/src/Functions/Kusto/kqlHasAnyIpv4.cpp @@ -0,0 +1,132 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static std::vector extractIpsFromArguments( + const DB::ColumnsWithTypeAndName & arguments, const DB::DataTypePtr & result_type, const DB::ContextPtr & context, size_t row) +{ + std::vector ips; + const auto is_ipv4_string = [&, result_type](const DB::ColumnsWithTypeAndName & args) + { return DB::FunctionFactory::instance().get("isIPv4String", context)->build(args)->execute(args, result_type, 1); }; + if (DB::isStringOrFixedString(arguments.at(1).type)) + { + std::ranges::copy_if( + arguments | std::views::drop(1) + | std::views::transform([&row](const DB::ColumnWithTypeAndName & arg) { return arg.column->getDataAt(row).toString(); }), + std::back_inserter(ips), + [&is_ipv4_string](const std::string & arg) + { + const DB::ColumnsWithTypeAndName is_ipv4_string_args + = {DB::createConstColumnWithTypeAndName(arg, "ip")}; + const auto is_ipv4 = is_ipv4_string(is_ipv4_string_args); + return is_ipv4->getUInt(0); + }); + } + + else if (isArray(arguments.at(1).type)) + { + DB::Field array0; + arguments[1].column->get(row, array0); + const auto len0 = array0.get().size(); + + for (size_t j = 0; j < len0; ++j) + { + if (const auto & value = array0.get().at(j); value.getType() == DB::Field::Types::String) + { + const auto value_as_string = toString(value); + const DB::ColumnsWithTypeAndName is_ipv4_string_args + = {DB::createConstColumnWithTypeAndName(value_as_string, "ip")}; + const auto is_ipv4 = is_ipv4_string(is_ipv4_string_args); + if (is_ipv4->getUInt(0) == 1) + { + ips.push_back(value_as_string); + } + } + } + } + return ips; +} + +namespace DB +{ +template +class FunctionKqlHasIpv4Generic : public IFunction +{ +public: + static constexpr auto name = Name::name; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlHasIpv4Generic(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlHasIpv4Generic() override = default; + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return ap == ArgumentPolicy::Variadic ? 0 : 2; } + bool isVariadic() const override { return ap == ArgumentPolicy::Variadic ? true : false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + return getReturnType(arguments, getName()); + } + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const override + { + auto result = ColumnUInt8::create(); + auto & result_column = result->getData(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + bool res = false; + const auto ips = extractIpsFromArguments(arguments, result_type, context, i); + + std::string source = arguments[0].column->getDataAt(i).toString(); + const std::regex ip_finder("([^[:alnum:]]|^)([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3})([^[:alnum:]]|$)"); + std::smatch matches; + + while (!res && std::regex_search(source, matches, ip_finder)) + { + res = std::ranges::any_of( + matches, + [&ips](const std::ssub_match & m) + { return std::ranges::any_of(ips, std::bind_front(std::equal_to(), m)); }); + + source = matches.suffix().str(); + } + result_column.push_back(UInt8(res)); + } + return result; + } + +private: + ContextPtr context; +}; + +struct NameKqlHasAnyIpv4 +{ + static constexpr auto name = "kql_has_any_ipv4"; +}; + +struct NameKqlHasIpv4 +{ + static constexpr auto name = "kql_has_ipv4"; +}; + +using FunctionKqlHasAnyIpv4 = FunctionKqlHasIpv4Generic; +using FunctionKqlHasIpv4 = FunctionKqlHasIpv4Generic; +REGISTER_FUNCTION(KqlHasIpv4Generic) +{ + factory.registerFunction(); + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlHasAnyIpv4Prefix.cpp b/src/Functions/Kusto/kqlHasAnyIpv4Prefix.cpp new file mode 100644 index 000000000000..eeac29c30c5d --- /dev/null +++ b/src/Functions/Kusto/kqlHasAnyIpv4Prefix.cpp @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static std::vector extractIpsFromArguments(const DB::ColumnsWithTypeAndName & arguments, size_t row) +{ + std::vector ips; + if (DB::isStringOrFixedString(arguments.at(1).type)) + { + std::ranges::copy_if( + arguments | std::views::drop(1) + | std::views::transform([&row](const DB::ColumnWithTypeAndName & arg) { return arg.column->getDataAt(row).toString(); }), + std::back_inserter(ips), + [](const std::string & arg) + { + const auto n = std::ranges::count(arg, '.'); + return n == 3 || (arg.back() == '.' && n <= 2); + }); + } + + else if (isArray(arguments.at(1).type)) + { + DB::Field array0; + arguments[1].column->get(row, array0); + const auto len0 = array0.get().size(); + + for (size_t j = 0; j < len0; ++j) + { + if (const auto & value = array0.get().at(j); value.getType() == DB::Field::Types::String) + { + const auto value_as_string = toString(value); + + const auto n = std::ranges::count(value_as_string, '.'); + if (n == 3 || (value_as_string.back() == '.' && n <= 2)) + { + ips.push_back(value_as_string); + } + } + } + } + return ips; +} + +namespace DB +{ +template +class FunctionKqlHasIpv4PrefixGeneric : public IFunction +{ +public: + static constexpr auto name = Name::name; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlHasIpv4PrefixGeneric(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlHasIpv4PrefixGeneric() override = default; + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return ap == ArgumentPolicy::Variadic ? 0 : 2; } + bool isVariadic() const override { return ap == ArgumentPolicy::Variadic ? true : false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + return getReturnType(arguments, getName()); + } + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const override + { + auto result = ColumnUInt8::create(); + auto & result_column = result->getData(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + bool res = false; + const auto ips = extractIpsFromArguments(arguments, i); + + std::string source = arguments[0].column->getDataAt(i).toString(); + const std::regex ip_finder("([^[:alnum:]]|^)([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3})([^[:alnum:]]|$)"); + std::smatch matches; + + while (!res && std::regex_search(source, matches, ip_finder)) + { + for (size_t j = 0; j < matches.size(); ++j) + { + const auto match_as_str = matches[j].str(); + + const ColumnsWithTypeAndName is_ipv4_string_args + = {createConstColumnWithTypeAndName(match_as_str, "ip")}; + + const auto is_ipv4 = FunctionFactory::instance() + .get("isIPv4String", context) + ->build(is_ipv4_string_args) + ->execute(is_ipv4_string_args, result_type, 1); + + if (is_ipv4->getUInt(0) == 1) + { + res = std::ranges::any_of( + ips, + [&match_as_str](const std::string & str) -> bool + { return std::memcmp(str.c_str(), match_as_str.c_str(), std::min(str.size(), match_as_str.size())) == 0; }); + } + } + source = matches.suffix().str(); + } + result_column.push_back(UInt8(res)); + } + + return result; + } + +private: + ContextPtr context; +}; + +struct NameKqlHasAnyIpv4Prefix +{ + static constexpr auto name = "kql_has_any_ipv4_prefix"; +}; + +struct NameKqlHasIpv4Prefix +{ + static constexpr auto name = "kql_has_ipv4_prefix"; +}; + +using FunctionKqlHasAnyIpv4Prefix = FunctionKqlHasIpv4PrefixGeneric; +using FunctionKqlHasIpv4Prefix = FunctionKqlHasIpv4PrefixGeneric; + +REGISTER_FUNCTION(KqlHasIpv4PrefixGeneric) +{ + factory.registerFunction(); + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlToString.cpp b/src/Functions/Kusto/kqlToString.cpp new file mode 100644 index 000000000000..c1eddc93de9d --- /dev/null +++ b/src/Functions/Kusto/kqlToString.cpp @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +class FunctionKqlToString : public IFunction +{ +public: + static constexpr auto name = "kql_tostring"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlToString(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlToString() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared(); } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr +FunctionKqlToString::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, const size_t input_rows_count) const +{ + const auto & argument = arguments.front(); + if (WhichDataType which_data_type(*argument.type); which_data_type.isInterval()) + { + static const auto TICKS_PER_DAY = ParserKQLTimespan::parse("1d").value(); + static const auto TICKS_PER_HOUR = ParserKQLTimespan::parse("1h").value(); + static const auto TICKS_PER_MINUTE = ParserKQLTimespan::parse("1m").value(); + static const auto TICKS_PER_SECOND = ParserKQLTimespan::parse("1s").value(); + + const auto & in_column = *argument.column; + auto out_column = ColumnString::create(); + auto & chars = out_column->getChars(); + auto & offsets = out_column->getOffsets(); + for (size_t i = 0; i < input_rows_count; ++i) + { + const auto value = in_column.getInt(i); + const auto abs_ticks = std::abs(value / 100); + + std::string timespan_as_string = value < 0 ? "-" : ""; + if (abs_ticks >= TICKS_PER_DAY) + timespan_as_string.append(std::format("{}.", abs_ticks / TICKS_PER_DAY)); + + timespan_as_string.append(std::format( + "{:02}:{:02}:{:02}", + (abs_ticks / TICKS_PER_HOUR) % 24, + (abs_ticks / TICKS_PER_MINUTE) % 60, + (abs_ticks / TICKS_PER_SECOND) % 60)); + + if (const auto fractional_second = abs_ticks % TICKS_PER_SECOND) + timespan_as_string.append(std::format(".{:07}", fractional_second)); + + const auto chars_old_length = chars.size(); + const auto str_length_with_terminator = timespan_as_string.length() + 1; + chars.resize(chars.size() + str_length_with_terminator); + std::copy(timespan_as_string.c_str(), timespan_as_string.c_str() + str_length_with_terminator, chars.data() + chars_old_length); + offsets.push_back(chars.size()); + } + + return out_column; + } + else if (which_data_type.isDateOrDate32() || which_data_type.isDateTime() || which_data_type.isDateTime64()) + { + const ColumnsWithTypeAndName to_datetime64_args{argument, createConstColumnWithTypeAndName(7, "scale")}; + const auto as_datetime64 = executeFunctionCall(context, "toDateTime64", to_datetime64_args, input_rows_count); + + const ColumnsWithTypeAndName format_datetime_args + { + asArgument(as_datetime64, "as_datetime64"), createConstColumnWithTypeAndName("%FT%T.%fZ", "format_string") + }; + return executeFunctionCall(context, "formatDateTime", format_datetime_args, input_rows_count).first; + } + + return executeFunctionCall(context, "toString", arguments, input_rows_count).first; +} + +REGISTER_FUNCTION(KqlToString) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlToTimespan.cpp b/src/Functions/Kusto/kqlToTimespan.cpp new file mode 100644 index 000000000000..39923de9d187 --- /dev/null +++ b/src/Functions/Kusto/kqlToTimespan.cpp @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlToTimespan : public IFunction +{ +public: + static constexpr auto name = "kql_totimespan"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlToTimespan(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlToTimespan() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + DataTypePtr getReturnTypeImpl(const DataTypes &) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + +private: + ContextPtr context; +}; + +ColumnPtr FunctionKqlToTimespan::executeImpl( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto & argument = arguments.front(); + if (WhichDataType(*argument.type).isInterval()) + return wrapInNullable(argument.column, arguments, result_type, input_rows_count); + + const auto * in_column = typeid_cast(argument.column.get()); + if (!in_column) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected String", + argument.type->getName(), + getName()); + + auto out_column = result_type->createColumn(); + auto & out_column_as_nullable = assert_cast(*out_column); + auto & out_nested_column = assert_cast(out_column_as_nullable.getNestedColumn()); + + const auto size = in_column->size(); + auto & out_data = out_nested_column.getData(); + auto & out_null_map = out_column_as_nullable.getNullMapData(); + out_data.resize(size); + out_null_map.resize(size); + + const auto & in_chars = in_column->getChars(); + const auto & in_offsets = in_column->getOffsets(); + const auto * in_chars_data = reinterpret_cast(in_chars.data()); + size_t start = 0; + for (size_t i = 0; i < size; ++i) + { + const auto & offset = in_offsets[i]; + std::optional ticks; + const auto success = ParserKQLTimespan::tryParse({in_chars_data + start, offset - start - 1}, ticks); + out_data[i] = ticks.value_or(0) * 100; + out_null_map[i] = !ticks.has_value() || !success; + + start = offset; + } + + return out_column; +} + +DataTypePtr FunctionKqlToTimespan::getReturnTypeImpl(const DataTypes &) const +{ + return makeNullable(std::make_shared(IntervalKind::Nanosecond)); +} + +REGISTER_FUNCTION(KqlToTimespan) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/abs.cpp b/src/Functions/abs.cpp index a7afdbacad6f..84d6aae9b955 100644 --- a/src/Functions/abs.cpp +++ b/src/Functions/abs.cpp @@ -11,6 +11,7 @@ struct AbsImpl { using ResultType = std::conditional_t, A, typename NumberTraits::ResultOfAbs::Type>; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) diff --git a/src/Functions/bitAnd.cpp b/src/Functions/bitAnd.cpp index 28f61ec66e13..62d136c2534c 100644 --- a/src/Functions/bitAnd.cpp +++ b/src/Functions/bitAnd.cpp @@ -16,7 +16,8 @@ template struct BitAndImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; - static constexpr const bool allow_fixed_string = true; + static const constexpr bool allow_fixed_string = true; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitBoolMaskAnd.cpp b/src/Functions/bitBoolMaskAnd.cpp index 11c0c1d1b7d6..4e354b5e64ac 100644 --- a/src/Functions/bitBoolMaskAnd.cpp +++ b/src/Functions/bitBoolMaskAnd.cpp @@ -22,6 +22,7 @@ struct BitBoolMaskAndImpl { using ResultType = UInt8; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitBoolMaskOr.cpp b/src/Functions/bitBoolMaskOr.cpp index 7940bf3e2caa..42a69a4dc5eb 100644 --- a/src/Functions/bitBoolMaskOr.cpp +++ b/src/Functions/bitBoolMaskOr.cpp @@ -22,6 +22,7 @@ struct BitBoolMaskOrImpl { using ResultType = UInt8; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitCount.cpp b/src/Functions/bitCount.cpp index d425dd1dca2b..6b5ac8cbf3a4 100644 --- a/src/Functions/bitCount.cpp +++ b/src/Functions/bitCount.cpp @@ -13,7 +13,8 @@ template struct BitCountImpl { using ResultType = UInt8; - static constexpr bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType apply(A a) diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp index 75928c2a8af7..eb4060301054 100644 --- a/src/Functions/bitHammingDistance.cpp +++ b/src/Functions/bitHammingDistance.cpp @@ -9,6 +9,7 @@ struct BitHammingDistanceImpl { using ResultType = UInt8; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitNot.cpp b/src/Functions/bitNot.cpp index f8bfad644949..12e294688f47 100644 --- a/src/Functions/bitNot.cpp +++ b/src/Functions/bitNot.cpp @@ -18,6 +18,7 @@ struct BitNotImpl { using ResultType = typename NumberTraits::ResultOfBitNot::Type; static const constexpr bool allow_fixed_string = true; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType apply(A a) diff --git a/src/Functions/bitOr.cpp b/src/Functions/bitOr.cpp index acdad33f38c0..92bc84474db4 100644 --- a/src/Functions/bitOr.cpp +++ b/src/Functions/bitOr.cpp @@ -15,7 +15,8 @@ template struct BitOrImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; - static constexpr const bool allow_fixed_string = true; + static const constexpr bool allow_fixed_string = true; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitRotateLeft.cpp b/src/Functions/bitRotateLeft.cpp index c72466b8d495..eb165dd73e23 100644 --- a/src/Functions/bitRotateLeft.cpp +++ b/src/Functions/bitRotateLeft.cpp @@ -17,6 +17,7 @@ struct BitRotateLeftImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitRotateRight.cpp b/src/Functions/bitRotateRight.cpp index 045758f9a311..d04d386ffea8 100644 --- a/src/Functions/bitRotateRight.cpp +++ b/src/Functions/bitRotateRight.cpp @@ -17,6 +17,7 @@ struct BitRotateRightImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 7b3748edb5c9..13686a7ae5b3 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -17,6 +17,7 @@ struct BitShiftLeftImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = true; template diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 108847f13ed6..e9b4740127d1 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -18,6 +18,7 @@ struct BitShiftRightImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = true; template diff --git a/src/Functions/bitSwapLastTwo.cpp b/src/Functions/bitSwapLastTwo.cpp index 4ca57f9b1037..783b1d422762 100644 --- a/src/Functions/bitSwapLastTwo.cpp +++ b/src/Functions/bitSwapLastTwo.cpp @@ -19,7 +19,8 @@ template struct BitSwapLastTwoImpl { using ResultType = UInt8; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType NO_SANITIZE_UNDEFINED apply([[maybe_unused]] A a) diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp index 4c9c6aa2dfb0..c7341323ba34 100644 --- a/src/Functions/bitTest.cpp +++ b/src/Functions/bitTest.cpp @@ -18,6 +18,7 @@ struct BitTestImpl { using ResultType = UInt8; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitWrapperFunc.cpp b/src/Functions/bitWrapperFunc.cpp index f08511765133..aef81d86f0e4 100644 --- a/src/Functions/bitWrapperFunc.cpp +++ b/src/Functions/bitWrapperFunc.cpp @@ -19,7 +19,8 @@ template struct BitWrapperFuncImpl { using ResultType = UInt8; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType NO_SANITIZE_UNDEFINED apply(A a [[maybe_unused]]) diff --git a/src/Functions/bitXor.cpp b/src/Functions/bitXor.cpp index 78c4c64d06ec..984754d06051 100644 --- a/src/Functions/bitXor.cpp +++ b/src/Functions/bitXor.cpp @@ -15,7 +15,8 @@ template struct BitXorImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; - static constexpr bool allow_fixed_string = true; + static const constexpr bool allow_fixed_string = true; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/dateTime64Diff.cpp b/src/Functions/dateTime64Diff.cpp new file mode 100644 index 000000000000..f03bd189d042 --- /dev/null +++ b/src/Functions/dateTime64Diff.cpp @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionDateTime64Diff : public IFunction +{ + using ColumnDateTime64 = ColumnDecimal; + +public: + static constexpr auto name = "dateTime64Diff"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionDateTime64Diff(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionDateTime64Diff() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr FunctionDateTime64Diff::executeImpl( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto & lhs_arg = arguments.front(); + const auto & rhs_arg = arguments.back(); + const auto * lhs_type = checkAndGetDataType(lhs_arg.type.get()); + const auto * rhs_type = checkAndGetDataType(rhs_arg.type.get()); + if (!lhs_type || !rhs_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unexpected arguments of function {}", getName()); + + const auto common_scale = std::max(lhs_type->getScale(), rhs_type->getScale()); + const auto scale_arg = createConstColumnWithTypeAndName(common_scale, "scale"); + const auto convert_to_decimal = [this, &input_rows_count, &scale_arg](const ColumnWithTypeAndName & argument) + { + const ColumnsWithTypeAndName cast_args{argument, scale_arg}; + return executeFunctionCall(context, "toDecimal64", cast_args, input_rows_count); + }; + + const auto lhs_arg_as_decimal = convert_to_decimal(lhs_arg); + const auto rhs_arg_as_decimal = convert_to_decimal(rhs_arg); + const ColumnsWithTypeAndName subtraction_args{asArgument(lhs_arg_as_decimal, "lhs"), asArgument(rhs_arg_as_decimal, "rhs")}; + const auto difference = executeFunctionCall(context, "minus", subtraction_args, input_rows_count); + + const ColumnsWithTypeAndName to_decimal128_args{asArgument(difference, "difference"), scale_arg}; + const auto as_decimal128 = executeFunctionCall(context, "toDecimal128", to_decimal128_args, input_rows_count); + + const ColumnsWithTypeAndName scale_args{ + asArgument(as_decimal128, "difference"), createConstColumnWithTypeAndName(1'000'000'000, "multiplier")}; + const auto scaled = executeFunctionCall(context, "multiply", scale_args, input_rows_count); + + const ColumnsWithTypeAndName to_int64_args{asArgument(scaled, "scaled")}; + const auto as_int64 = executeFunctionCall(context, "toInt64", to_int64_args, input_rows_count); + + const ColumnsWithTypeAndName cast_args{ + asArgument(as_int64, "as_int64"), createConstColumnWithTypeAndName(result_type->getName(), "target_type")}; + return executeFunctionCall(context, "cast", cast_args, input_rows_count).first; +} + +DataTypePtr FunctionDateTime64Diff::getReturnTypeImpl(const DataTypes & arguments) const +{ + const auto & lhs = arguments.front(); + const auto & rhs = arguments.back(); + if (!WhichDataType(*lhs).isDateTime64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected DateTime64", + lhs->getName(), + getName()); + + if (!WhichDataType(*rhs).isDateTime64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}, expected DateTime64", + rhs->getName(), + getName()); + + return std::make_shared(IntervalKind::Nanosecond); +} + +REGISTER_FUNCTION(DateTime64Diff) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/divide.cpp b/src/Functions/divide.cpp index ca552256cd16..2d35e9017f36 100644 --- a/src/Functions/divide.cpp +++ b/src/Functions/divide.cpp @@ -13,6 +13,7 @@ struct DivideFloatingImpl { using ResultType = typename NumberTraits::ResultOfFloatingPointDivision::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/extract.cpp b/src/Functions/extract.cpp index 74c5a2fdd366..24aa95d2b127 100644 --- a/src/Functions/extract.cpp +++ b/src/Functions/extract.cpp @@ -70,4 +70,63 @@ REGISTER_FUNCTION(Extract) factory.registerFunction(); } +struct KqlExtractImpl +{ + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + const std::string & pattern, + unsigned capture, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + res_data.reserve(data.size() / 5); + res_offsets.resize(offsets.size()); + + const Regexps::Regexp regexp = Regexps::createRegexp(pattern); + + OptimizedRegularExpression::MatchVec matches; + matches.reserve(capture + 1); + size_t prev_offset = 0; + size_t res_offset = 0; + + for (size_t i = 0; i < offsets.size(); ++i) + { + size_t cur_offset = offsets[i]; + + unsigned count + = regexp.match(reinterpret_cast(&data[prev_offset]), cur_offset - prev_offset - 1, matches, capture + 1); + if (count > capture && matches[capture].offset != std::string::npos) + { + const auto & match = matches[capture]; + res_data.resize(res_offset + match.length + 1); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &data[prev_offset + match.offset], match.length); + res_offset += match.length; + } + else + { + res_data.resize(res_offset + 1); + } + + res_data[res_offset] = 0; + ++res_offset; + res_offsets[i] = res_offset; + + prev_offset = cur_offset; + } + } +}; + +struct NameKqlExtract +{ + static constexpr auto name = "kql_extract"; +}; + +using FunctionKqlExtract = KqlStringSearchToString; + +REGISTER_FUNCTION(KqlExtract) +{ + factory.registerFunction(); +} + } diff --git a/src/Functions/factorial.cpp b/src/Functions/factorial.cpp index 4e96391bccda..b1ea9a7fb9a5 100644 --- a/src/Functions/factorial.cpp +++ b/src/Functions/factorial.cpp @@ -18,6 +18,7 @@ struct FactorialImpl using ResultType = UInt64; static const constexpr bool allow_decimal = false; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) diff --git a/src/Functions/greatest.cpp b/src/Functions/greatest.cpp index 93fd7e24853f..a9d4637884af 100644 --- a/src/Functions/greatest.cpp +++ b/src/Functions/greatest.cpp @@ -12,6 +12,7 @@ struct GreatestBaseImpl { using ResultType = NumberTraits::ResultOfGreatest; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template @@ -43,6 +44,7 @@ struct GreatestSpecialImpl { using ResultType = make_unsigned_t; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/intDivOrZero.cpp b/src/Functions/intDivOrZero.cpp index 96ff6ea80fc4..708186dadfb4 100644 --- a/src/Functions/intDivOrZero.cpp +++ b/src/Functions/intDivOrZero.cpp @@ -10,6 +10,7 @@ struct DivideIntegralOrZeroImpl { using ResultType = typename NumberTraits::ResultOfIntegerDivision::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/intExp10.cpp b/src/Functions/intExp10.cpp index 909afc4df17e..0b3ae45b4de0 100644 --- a/src/Functions/intExp10.cpp +++ b/src/Functions/intExp10.cpp @@ -17,7 +17,8 @@ template struct IntExp10Impl { using ResultType = UInt64; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType apply([[maybe_unused]] A a) diff --git a/src/Functions/intExp2.cpp b/src/Functions/intExp2.cpp index 7d04f329e3f9..6d953bfed579 100644 --- a/src/Functions/intExp2.cpp +++ b/src/Functions/intExp2.cpp @@ -18,7 +18,8 @@ template struct IntExp2Impl { using ResultType = UInt64; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType apply([[maybe_unused]] A a) diff --git a/src/Functions/least.cpp b/src/Functions/least.cpp index f5680d4d468f..2de1805dc2cc 100644 --- a/src/Functions/least.cpp +++ b/src/Functions/least.cpp @@ -12,6 +12,7 @@ struct LeastBaseImpl { using ResultType = NumberTraits::ResultOfLeast; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template @@ -43,6 +44,7 @@ struct LeastSpecialImpl { using ResultType = std::make_signed_t; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/minus.cpp b/src/Functions/minus.cpp index 04877a42b18d..32e8e053df67 100644 --- a/src/Functions/minus.cpp +++ b/src/Functions/minus.cpp @@ -10,6 +10,7 @@ struct MinusImpl { using ResultType = typename NumberTraits::ResultOfSubtraction::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/moduloOrZero.cpp b/src/Functions/moduloOrZero.cpp index 3551ae74c5f1..dae58a7f7dd1 100644 --- a/src/Functions/moduloOrZero.cpp +++ b/src/Functions/moduloOrZero.cpp @@ -4,14 +4,12 @@ namespace DB { -namespace -{ - template struct ModuloOrZeroImpl { using ResultType = typename NumberTraits::ResultOfModulo::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template @@ -39,8 +37,6 @@ struct ModuloOrZeroImpl struct NameModuloOrZero { static constexpr auto name = "moduloOrZero"; }; using FunctionModuloOrZero = BinaryArithmeticOverloadResolver; -} - REGISTER_FUNCTION(ModuloOrZero) { factory.registerFunction(); diff --git a/src/Functions/multiply.cpp b/src/Functions/multiply.cpp index 4dc8cd10f317..535c0a1758de 100644 --- a/src/Functions/multiply.cpp +++ b/src/Functions/multiply.cpp @@ -11,6 +11,7 @@ struct MultiplyImpl { using ResultType = typename NumberTraits::ResultOfAdditionMultiplication::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/negate.cpp b/src/Functions/negate.cpp index 731c0766e9db..8ae3eb08ac57 100644 --- a/src/Functions/negate.cpp +++ b/src/Functions/negate.cpp @@ -9,7 +9,8 @@ template struct NegateImpl { using ResultType = std::conditional_t, A, typename NumberTraits::ResultOfNegate::Type>; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) diff --git a/src/Functions/plus.cpp b/src/Functions/plus.cpp index cd9cf6cec5c3..4f792e569ce8 100644 --- a/src/Functions/plus.cpp +++ b/src/Functions/plus.cpp @@ -10,6 +10,7 @@ struct PlusImpl { using ResultType = typename NumberTraits::ResultOfAdditionMultiplication::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; static const constexpr bool is_commutative = true; diff --git a/src/Functions/roundAge.cpp b/src/Functions/roundAge.cpp index d2503bb6938a..aaa1131da639 100644 --- a/src/Functions/roundAge.cpp +++ b/src/Functions/roundAge.cpp @@ -10,7 +10,8 @@ template struct RoundAgeImpl { using ResultType = UInt8; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType apply(A x) diff --git a/src/Functions/roundDuration.cpp b/src/Functions/roundDuration.cpp index 62d35ea194d2..4fa78ceec835 100644 --- a/src/Functions/roundDuration.cpp +++ b/src/Functions/roundDuration.cpp @@ -10,7 +10,8 @@ template struct RoundDurationImpl { using ResultType = UInt16; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline ResultType apply(A x) diff --git a/src/Functions/roundToExp2.cpp b/src/Functions/roundToExp2.cpp index ef29b5eaf4ac..9f00bc24fe3b 100644 --- a/src/Functions/roundToExp2.cpp +++ b/src/Functions/roundToExp2.cpp @@ -63,7 +63,8 @@ template struct RoundToExp2Impl { using ResultType = T; - static constexpr const bool allow_fixed_string = false; + static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; static inline T apply(T x) diff --git a/src/Functions/sign.cpp b/src/Functions/sign.cpp index 60ad6ba53654..63a9f31af396 100644 --- a/src/Functions/sign.cpp +++ b/src/Functions/sign.cpp @@ -10,6 +10,7 @@ struct SignImpl { using ResultType = Int8; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index c0220f1aed2a..0e462447fa43 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -23,12 +24,17 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; extern const int DECIMAL_OVERFLOW; + extern const int LOGICAL_ERROR; } namespace { - constexpr auto function_name = "toStartOfInterval"; + enum class ExecutionErrorPolicy + { + Null, + Throw + }; template struct Transform; @@ -36,22 +42,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfYearInterval(DayNum(d), years); } - static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); } - static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); } - static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); } @@ -60,22 +66,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfQuarterInterval(DayNum(d), quarters); } - static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); } - static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); } - static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); } @@ -84,22 +90,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfMonthInterval(DayNum(d), months); } - static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); } - static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); } - static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); } @@ -108,22 +114,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfWeekInterval(DayNum(d), weeks); } - static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); } - static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); } - static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); } @@ -132,22 +138,22 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64, const char*) { return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); } - static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64, const char*) { return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); } - static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64, const char*) { return static_cast(time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days)); } - static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); } @@ -156,16 +162,16 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfHourInterval(t, hours); } - static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); } @@ -174,16 +180,16 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfMinuteInterval(t, minutes); } - static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); } @@ -192,16 +198,16 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfSecondInterval(t, seconds); } - static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); } @@ -210,13 +216,13 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateTimeIsNotSupported(function_name); } - static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier, const char*) { if (scale_multiplier < 1000) { @@ -247,13 +253,13 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateTimeIsNotSupported(function_name); } - static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier, const char*) { if (scale_multiplier < 1000000) { @@ -284,13 +290,13 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateTimeIsNotSupported(function_name); } - static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier, const char*) { if (scale_multiplier < 1000000000) { @@ -310,12 +316,23 @@ namespace } }; +template class FunctionToStartOfInterval : public IFunction { public: static FunctionPtr create(ContextPtr) { return std::make_shared(); } - static constexpr auto name = function_name; + static constexpr auto name = std::invoke( + [] + { + if (execution_error_policy == ExecutionErrorPolicy::Null) + return "toStartOfIntervalOrNull"; + else if (execution_error_policy == ExecutionErrorPolicy::Throw) + return "toStartOfInterval"; + + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unhandled execution policy"); + }); + String getName() const override { return name; } bool isVariadic() const override { return true; } @@ -381,36 +398,43 @@ class FunctionToStartOfInterval : public IFunction getName(), arguments.size()); } - if (result_type_is_date) - return std::make_shared(); - else if (result_type_is_datetime) - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); - else - { - auto scale = 0; + auto return_type = std::invoke( + [&arguments, &interval_type, &result_type_is_date, &result_type_is_datetime]() -> std::shared_ptr + { + if (result_type_is_date) + return std::make_shared(); + else if (result_type_is_datetime) + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + else + { + auto scale = 0; - if (interval_type->getKind() == IntervalKind::Nanosecond) - scale = 9; - else if (interval_type->getKind() == IntervalKind::Microsecond) - scale = 6; - else if (interval_type->getKind() == IntervalKind::Millisecond) - scale = 3; + if (interval_type->getKind() == IntervalKind::Nanosecond) + scale = 9; + else if (interval_type->getKind() == IntervalKind::Microsecond) + scale = 6; + else if (interval_type->getKind() == IntervalKind::Millisecond) + scale = 3; - return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); - } + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + } + }); + + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + return makeNullable(return_type); + return return_type; } bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t) const override { const auto & time_column = arguments[0]; const auto & interval_column = arguments[1]; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0); - auto result_column = dispatchForColumns(time_column, interval_column, result_type, time_zone); - return result_column; + return dispatchForColumns(time_column, interval_column, result_type, time_zone); } bool hasInformationAboutMonotonicity() const override @@ -470,13 +494,12 @@ class FunctionToStartOfInterval : public IFunction throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be an interval of time.", getName()); const auto * interval_column_const_int64 = checkAndGetColumnConst(interval_column.column.get()); if (!interval_column_const_int64) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for second argument of function {}, must be a const interval of time.", - getName()); - Int64 num_units = interval_column_const_int64->getValue(); - if (num_units <= 0) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for second argument of function {} must be positive.", getName()); + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column for second argument of function {}, must be a const interval of time.", + getName()); + const auto num_units = interval_column_const_int64->getValue(); switch (interval_type->getKind()) { case IntervalKind::Nanosecond: @@ -513,18 +536,54 @@ class FunctionToStartOfInterval : public IFunction using ToFieldType = typename ToDataType::FieldType; const auto & time_data = time_column_type.getData(); - size_t size = time_data.size(); + const auto size = time_data.size(); auto result_col = result_type->createColumn(); - auto *col_to = assert_cast(result_col.get()); - auto & result_data = col_to->getData(); - result_data.resize(size); + auto [result_null_map_data, result_value_data] = std::invoke( + [&result_col]() -> std::pair + { + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + { + auto & nullable_column = assert_cast(*result_col); + auto & nested_column = assert_cast(nullable_column.getNestedColumn()); + return {&nullable_column.getNullMapData(), nested_column.getData()}; + } + else if constexpr (execution_error_policy == ExecutionErrorPolicy::Throw) + { + auto & target_column = assert_cast(*result_col); + return {nullptr, target_column.getData()}; + } + }); + + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + result_null_map_data->resize(size, true); + + result_value_data.resize(size); + if (num_units <= 0) + { + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + return result_col; + else if constexpr (execution_error_policy == ExecutionErrorPolicy::Throw) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for second argument of function {} must be positive.", getName()); + } - Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); + const auto scale_multiplier = DecimalUtils::scaleMultiplier(scale); for (size_t i = 0; i != size; ++i) - result_data[i] = static_cast( - Transform::execute(time_data[i], num_units, time_zone, scale_multiplier)); + { + try + { + result_value_data[i] + = static_cast(Transform::execute(time_data[i], num_units, time_zone, scale_multiplier, name)); + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + (*result_null_map_data)[i] = false; + } + catch (...) + { + if constexpr (execution_error_policy == ExecutionErrorPolicy::Throw) + throw; + } + } return result_col; } @@ -534,7 +593,8 @@ class FunctionToStartOfInterval : public IFunction REGISTER_FUNCTION(ToStartOfInterval) { - factory.registerFunction(); + factory.registerFunction>(); + factory.registerFunction>(); } } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index c233060e646d..affb0b39babb 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -381,12 +381,26 @@ static std::tuple executeQueryImpl( /// Parse the query from string. try { - if (settings.dialect == Dialect::kusto && !internal) + const Dialect & dialect = settings.dialect; + + if (dialect == Dialect::kusto && !internal) { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + else if (dialect == Dialect::kusto_auto && !internal) + { + try { + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + catch (...) + { + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } } else { diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 73d46593e042..2738f049875b 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -4,8 +4,9 @@ add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) add_headers_and_sources(clickhouse_parsers ./Kusto) +add_headers_and_sources(clickhouse_parsers ./Kusto/KustoFunctions) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) -target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) +target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access ch_contrib::roaring string_utils) if (USE_DEBUG_HELPERS) # CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc. diff --git a/src/Parsers/CommonParsers.cpp b/src/Parsers/CommonParsers.cpp index 275679d61f07..22242c7f5708 100644 --- a/src/Parsers/CommonParsers.cpp +++ b/src/Parsers/CommonParsers.cpp @@ -38,4 +38,26 @@ bool ParserKeyword::parseImpl(Pos & pos, [[maybe_unused]] ASTPtr & node, Expecte return true; } +bool ParserSequence::parseImpl(Pos & pos, [[maybe_unused]] ASTPtr & node, Expected & expected) +{ + expected.add(pos, sequence.c_str()); + + Tokens keyword_tokens(sequence.c_str(), sequence.c_str() + sequence.length()); + Pos keyword_tokens_pos(keyword_tokens, pos.max_depth); + + while (!keyword_tokens_pos->isEnd() && !pos->isEnd() && keyword_tokens_pos->type == pos->type) + { + const auto keyword_token_length = keyword_tokens_pos->end - keyword_tokens_pos->begin; + + if (const auto pos_token_length = pos->end - pos->begin; + keyword_token_length != pos_token_length || strncasecmp(keyword_tokens_pos->begin, pos->begin, keyword_token_length) != 0) + break; + + ++keyword_tokens_pos; + ++pos; + } + + return keyword_tokens_pos->isEnd(); +} + } diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index f6c5c9c05207..6eef791bfad3 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -27,6 +27,20 @@ class ParserKeyword : public IParserBase bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserSequence : public IParserBase +{ +public: + explicit ParserSequence(std::string sequence_) : sequence(std::move(sequence_)) { } + + const char * getName() const override { return sequence.c_str(); } + +protected: + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + std::string sequence; + Tokens tokens{sequence.c_str(), sequence.c_str() + sequence.length()}; +}; class ParserToken : public IParserBase { diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index f2660e5a40c6..729132c4a3be 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -42,7 +42,7 @@ #include #include - +#include namespace DB { @@ -105,62 +105,70 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserSelectWithUnionQuery select; ParserExplainQuery explain; - - if (pos->type != TokenType::OpeningRoundBracket) - return false; - ++pos; - ASTPtr result_node = nullptr; + ParserKeyword s_kql("KQL"); - if (ASTPtr select_node; select.parse(pos, select_node, expected)) + if (s_kql.ignore(pos, expected)) { - result_node = std::move(select_node); + if (!ParserKQLTaleFunction().parse(pos, result_node, expected)) + return false; } - else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected)) + else { - const auto & explain_query = explain_node->as(); + if (pos->type != TokenType::OpeningRoundBracket) + return false; + ++pos; - if (explain_query.getTableFunction() || explain_query.getTableOverride()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN in a subquery cannot have a table function or table override"); + if (ASTPtr select_node; select.parse(pos, select_node, expected)) + { + result_node = std::move(select_node); + } + else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected)) + { + const auto & explain_query = explain_node->as(); - /// Replace subquery `(EXPLAIN SELECT ...)` - /// with `(SELECT * FROM viewExplain("", "", SELECT ...))` + if (explain_query.getTableFunction() || explain_query.getTableOverride()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN in a subquery cannot have a table function or table override"); - String kind_str = ASTExplainQuery::toString(explain_query.getKind()); + /// Replace subquery `(EXPLAIN SELECT ...)` + /// with `(SELECT * FROM viewExplain("", "", SELECT ...))` - String settings_str; - if (ASTPtr settings_ast = explain_query.getSettings()) - { - if (!settings_ast->as()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN settings must be a SET query"); - settings_str = queryToString(settings_ast); - } + String kind_str = ASTExplainQuery::toString(explain_query.getKind()); - const ASTPtr & explained_ast = explain_query.getExplainedQuery(); - if (explained_ast) - { - auto view_explain = makeASTFunction("viewExplain", - std::make_shared(kind_str), - std::make_shared(settings_str), - explained_ast); - result_node = buildSelectFromTableFunction(view_explain); + String settings_str; + if (ASTPtr settings_ast = explain_query.getSettings()) + { + if (!settings_ast->as()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN settings must be a SET query"); + settings_str = queryToString(settings_ast); + } + + const ASTPtr & explained_ast = explain_query.getExplainedQuery(); + if (explained_ast) + { + auto view_explain = makeASTFunction("viewExplain", + std::make_shared(kind_str), + std::make_shared(settings_str), + explained_ast); + result_node = buildSelectFromTableFunction(view_explain); + } + else + { + auto view_explain = makeASTFunction("viewExplain", + std::make_shared(kind_str), + std::make_shared(settings_str)); + result_node = buildSelectFromTableFunction(view_explain); + } } else { - auto view_explain = makeASTFunction("viewExplain", - std::make_shared(kind_str), - std::make_shared(settings_str)); - result_node = buildSelectFromTableFunction(view_explain); + return false; } - } - else - { - return false; - } - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + return false; + ++pos; + } node = std::make_shared(); node->children.push_back(result_node); @@ -170,6 +178,16 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + /// 'kql(' is used for subuquery in Kusto, should not be treated as an identifier if kql followed by ( + ParserKeyword s_kql("KQL"); + if (s_kql.ignore(pos, expected)) + { + if (pos->type == TokenType::OpeningRoundBracket) + { --pos; + return false; + } + --pos; + } /// Identifier in backquotes or in double quotes if (pos->type == TokenType::QuotedIdentifier) { diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 054a22a0c3a7..a13d3066667d 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -661,6 +661,26 @@ class Layer } else { + /// enable using subscript operator for kql_array_sort + if (cur_op.function_name == "arrayElement" && !operands.empty()) + { + auto* first_arg_as_node = operands.front()->as(); + if (first_arg_as_node) + { + if (first_arg_as_node->name == "kql_array_sort_asc" || first_arg_as_node->name == "kql_array_sort_desc") + { + cur_op.function_name = "tupleElement"; + cur_op.type = OperatorType::TupleElement; + } + else if (first_arg_as_node->name == "arrayElement" && !first_arg_as_node->arguments->children.empty()) + { + auto *arg_inside = first_arg_as_node->arguments->children[0]->as(); + if (arg_inside && (arg_inside->name == "kql_array_sort_asc" || arg_inside->name == "kql_array_sort_desc")) + first_arg_as_node->name = "tupleElement"; + } + } + } + function = makeASTFunction(cur_op); if (!popLastNOperands(function->children[0]->children, cur_op.arity)) @@ -2492,8 +2512,9 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos { layers.back()->pushOperand(std::move(tmp)); } - else if (pos->type == TokenType::OpeningRoundBracket) + else if (pos->type == TokenType::OpeningRoundBracket || String(pos->begin , pos->end) == "kql") { + if (subquery_parser.parse(pos, tmp, expected)) { layers.back()->pushOperand(std::move(tmp)); diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md new file mode 100644 index 000000000000..cd7ce8f2fcb9 --- /dev/null +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -0,0 +1,1512 @@ +## KQL implemented features +# March XX, 2023 +## Operator +## Bugfixes +## Functions +- [has_ipv4()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/has-ipv4-function) + `print has_ipv4('10:00:00 192.168.1.1 GET /index.html 404', '192.168.1.1')` +- [has_any_ipv4()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/has-any-ipv4-function) + `print has_any_ipv4('10:00:00 192.168.1.1 GET /index.html 404', '127.0.0.1', '192.168.1.1')` +- [has_ipv4_prefix()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/has-ipv4-prefix-function) + `print has_ipv4_prefix('10:00:00 192.168.1.1 GET /index.html 404', '192.168.')` +- [has_any_ipv4_prefix()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/has-any-ipv4-prefix-function) + `print has_any_ipv4_prefix('10:00:00 192.168.1.1 GET /index.html 404', '127.', '192.168.1.')` + +# February XX, 2023 +## Operator + +- [project-away](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/projectawayoperator) + ``` + print '1-- remove one column'; + Customers | project-away FirstName; + print '2-- remove two columns'; + Customers | project-away FirstName, LastName; + print '3-- remove columns by one wildcard'; + Customers | project-away *Name; + print '4-- remove columns by two wildcards'; + Customers | project-away *Name, *tion; + print '5-- remove columns by one wildcard, one regular column'; + Customers | project-away *Name, Age; + print '6-- remove columns by one wildcard, two regular column'; + Customers | project-away *Name, Age, Education; + print '7-- remove columns by two wildcard, two regular column'; + Customers | project-away *irstName, Age, *astName, Education; + print '8-- remove one column from previous piple result'; + Customers | where Age< 30 | limit 2 | project-away FirstName; + print '9-- remove one column from summized piple result'; + Customers|summarize sum(Age), avg(Age) by FirstName | project-away sum_Age; + print '10-- remove columns after extend'; + Customers|extend FullName = strcat(FirstName,' ',LastName) | project-away FirstName, LastName; + ``` + + +## Bugfixes +- Fixed count operator issue (2112): + ``` + Customers|project FirstName|where FirstName != 'Peter'|sort by FirstName asc nulls first|count + ``` +- Fixed KQL sub-query issues: + - Multiple columns in sub-query. + Multiple columns in sub-query works in KQL ADX but only the first column is effective, while not working in ClickHouse. this fixed issue. e.g. + ``` + Customers | where FirstName in ((Customers|project FirstName, LastName)) + ``` + limitation: the `select *` noit work in sub-querym because there's individula column. + - Negative operators in sub-query + fixed the issue for negative operators not work in KQL sub-query. e.g + ``` + Customers | where FirstName in ((Customers|project FirstName, LastName|where FirstName !has 'Peter')) + ``` + - Case-insensitive compare in sub-query + fixed the case-insensitive compare issuse for multiple pipe in sub-query. e.g + ``` + Customers | where FirstName in~ ((Customers|where FirstName !has 'Peter'|project FirstName, LastName)) + ``` + - Check functional test `tests/queries/0_stateless/02366_kql_test_subquery.sql` for details. + +- KQL - has operator fails to return result when needle has separator character +- strcat_delim fails when encountered with escaped double quotes (2159) +- summarize throw exception if Aggregation is missing (2113) +- todecimal() doesn't work with column arguments (1413) +- extract_json value cast to boolean causes exception (1490) +- [isempty() and isnotempty() not accepting non-quoted strings] +## Functions +- [abs()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/abs-function) + `print abs(-5)` +- [acos()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/acosfunction) +- [asin()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/asinfunction) + print asin(0.5) +- [atan()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/atanfunction) + print atan(0.5) +- [atan2()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/atan2function) + print atan2(1,1) +- [ceiling()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/ceilingfunction) + `print c1 = ceiling(-1.1), c2 = ceiling(0), c3 = ceiling(0.9)` +- [cos()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/cosfunction) + print cos(1) +- [cot()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/cotfunction) + print cot(1) +- [degrees()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/degreesfunction) + print degrees(pi()/4) +- [exp()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/exp-function) + `print exp(2)` +- [exp2()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/exp2-function) + `print exp2(2)` +- [exp10()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/exp10-function) + `print exp10(3)` +- [gamma()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/gammafunction) +- [isfinite()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/isfinitefunction) + print isfinite(1.0/0.0) +- [isinf()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/isinffunction) + print isinf(1.0/0.0) +- [log()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/log-function) + `print log(5)` +- [log2()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/log2-function) + `print log2(5)` +- [log10()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/log10-function) + `print log10(5)` +- [loggamma()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/loggammafunction) + print loggamma(5) +- [max_of()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/max-offunction) + print result = max_of(10, 1, -3, 17) +- [min_of()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/min-offunction) + print result = min_of(10, 1, -3, 17) +- [pi()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/pifunction) + print pi() +- [pow()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/powfunction) + print pow(2, 3)` +- [radians()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/radiansfunction) + print radians0 = radians(90), radians1 = radians(180), radians2 = radians(360) +- [rand()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/randfunction) + print rand(1000) +- [round()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/roundfunction) + print round(2.15, 1) +- [sign()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/signfunction) + print s1 = sign(-42), s2 = sign(0), s3 = sign(11.2) +- [sin()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/sinfunction) +- [sqrt()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/sqrtfunction) + `print sqrt(256)` +- [tan()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/tanfunction) +- [variance()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/variance-aggfunction) + `Customers | summarize variance(Age);` +- [variancep()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/variancep-aggfunction) + `Customers | summarize variancep(Age);` +- [varianceif()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/varianceif-aggfunction) + `Customers | summarize varianceif(Age, Age < 30)` +- [lookup()](Not a KQL function, it's an IBM specific suggested implementation. +Supports simple keys only. Do not suppoer RANGE_HASHED keys.) + `print lookup('dictionary_table', 'value', '1')` + `print lookup('dictionary_table', 'value', '100', 'default')` +# January XX, 2023 +## Functions +- [range()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/rangefunction) +Difference from ADX: + Return empty array [] if range is empty, while ADX return NULL + The maxamum number of elements of array is 1000000 (limitation of clickhouse), 1,048,576 in ADX +``` +print '-- range function int, int, int --'; +print range(1, 10, 2); +print '-- range function int, int --'; +print range(1, 10); +print '-- range function float, float, float --'; +print range(1.2, 10.3, 2.2); +print '-- range function positive float, float, int --'; +print range(1.2, 10.3, 2); +print '-- range function positive float, int, float --'; +print range(1.2, 10, 2.2); +print '-- range function positive integer, int, float --'; +print range(1, 10, 2.2); +print '-- range function positive integer, float, float --'; +print range(1, 10.5, 2.2); +print '-- range function positive float, int, int --'; +print range(1.2, 10, 2); +print '-- range function positive int, int, negative int --'; +print range(12, 3, -2); +print '-- range function positive float, int, negative float --'; +print range(12.8, 3, -2.3); +print '-- range function datetime, datetime, timespan --'; +print range(datetime('2001-01-01'), datetime('2001-01-02'), 5h); +print '-- range function datetime, datetime, negative timespan --'; +print range(datetime('2001-01-03'), datetime('2001-01-02'), -5h); +print '-- range function datetime, datetime --'; +print range(datetime('2001-01-01'), datetime('2001-01-02')); +print '-- range function timespan, timespan, timespan --'; +print range(1h, 5h, 2h); +print '-- range function timespan, timespan --'; +print range(1h, 5h); +print '-- range function timespan, timespan, negative timespan --'; +print range(11h, 5h, -2h); +print '-- range function float timespan, timespan, timespan --'; +print range(1.5h, 5h, 2h); +print '-- range function endofday, endofday, timespan --'; +print range(endofday(datetime(2017-01-01 10:10:17)), endofday(datetime(2017-01-03 10:10:17)), 1d); +``` + +## Improvement +- [dcount()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/dcount-aggfunction) and [dcountif()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/dcountif-aggfunction) + docunt and dcountif now accept the additional accuracy parameter which is the base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). +## Case Insensitive Operators +- [in~](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/inoperator) + `print t = 'a' in~ ('A', 'b', 'c')` + `Customers | where FirstName in~ ((Customers | project FirstName | where FirstName == 'Peter'))` +- [!in~](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/not-in-operator) + `print t = 'a' !in~ (dynamic(['A', 'b', 'c']))` + `Customers | where FirstName !in~ ('peter', 'apple')` +- [=~](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/equals-operator) + `Customers | where FirstName =~ 'peter' and LastName =~ 'naRA'` +- [!~](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/not-equals-operator) + `Customers | where FirstName !~ 'nEyMaR' and LastName =~ 'naRA'` +## Aggregate Functions +- [take_any()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/take-any-aggfunction) + ``` + Note: * is not currently a supported argument. + ``` +- [take_anyif()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/take-anyif-aggfunction) +- [dcount() and dcountif()] +## Operator +- [range](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/rangeoperator) + `range LastWeek from ago(7d) to now() step 1d` + `range Steps from 1 to 8 step 3` +- [top-nested](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/topnestedoperator) + + ``` + CREATE TABLE sales + (salesdate String,salesperson String,region String,amount UInt32) ENGINE = Memory; + + INSERT INTO sales VALUES ( '12/31/1995','Robert','ON-Ontario',1); + INSERT INTO sales VALUES ( '12/31/1995','Joseph','ON-Ontario',2); + INSERT INTO sales VALUES ( '12/31/1995','Joseph','QC-Quebec',3); + INSERT INTO sales VALUES ( '12/31/1995','Joseph','MA-Manitoba',4); + INSERT INTO sales VALUES ( '12/31/1995','Steven','QC-Quebec',5); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','ON-Ontario',6); + INSERT INTO sales VALUES ( '03/29/1996','Robert','QC-Quebec',7); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','ON-Ontario',8); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','BC-British Columbia',9); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','QC-Quebec',10); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','MA-Manitoba',11); + INSERT INTO sales VALUES ( '03/29/1996','Steven','ON-Ontario',12); + INSERT INTO sales VALUES ( '03/29/1996','Steven','QC-Quebec',13); + INSERT INTO sales VALUES ( '03/29/1996','Steven','MA-Manitoba',14); + INSERT INTO sales VALUES ( '03/30/1996','Robert','ON-Ontario',15); + INSERT INTO sales VALUES ( '03/30/1996','Robert','QC-Quebec',16); + INSERT INTO sales VALUES ( '03/30/1996','Robert','MA-Manitoba',17); + INSERT INTO sales VALUES ( '03/30/1996','Joseph','ON-Ontario',18); + INSERT INTO sales VALUES ( '03/30/1996','Joseph','BC-British Columbia',19); + INSERT INTO sales VALUES ( '03/30/1996','Joseph','QC-Quebec',20); + INSERT INTO sales VALUES ( '03/30/1996','Joseph','MA-Manitoba',21); + INSERT INTO sales VALUES ( '03/30/1996','Steven','ON-Ontario',22); + INSERT INTO sales VALUES ( '03/30/1996','Steven','QC-Quebec',23); + INSERT INTO sales VALUES ( '03/30/1996','Steven','MA-Manitoba',24); + INSERT INTO sales VALUES ( '03/31/1996','Robert','MA-Manitoba',25); + INSERT INTO sales VALUES ( '03/31/1996','Thomas','ON-Ontario',26); + INSERT INTO sales VALUES ( '03/31/1996','Thomas','BC-British Columbia',27); + INSERT INTO sales VALUES ( '03/31/1996','Thomas','QC-Quebec',28); + INSERT INTO sales VALUES ( '03/31/1996','Thomas','MA-Manitoba',29); + INSERT INTO sales VALUES ( '03/31/1996','Steven','ON-Ontario',30); + + print '-- top 3 regions by sales--'; + sales | top-nested 3 of region by sum(amount); + + print '-- top 2 salespeople in each of these regions?--'; + sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount); + + print '--top 3 and other regions by sales--'; + sales | top-nested 3 of region with others = 'all other region' by sum(amount); + + print '--top 3 and other regions by sales and top 2 and other salespeople in each of these regions--'; + sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount); + + print '--top 3 and other regions by sales and top 2 salespeople in each of these regions--'; + sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson by sum(amount) + + print '--top 3 regions by sales and top 2 and other salespeople in each of these regions--'; + sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount); + + print '--top 3 regions by difference between max sales and min sales--'; + sales | top-nested 3 of region by sum(amount) - min(amount); + + print '-- top 3 regions using abbreviations by sales--'; + sales | top-nested 3 of substring(region, 0, 2) by sum(amount); + + print '-- all top regions by sales--'; + sales | top-nested of region by sum(amount); + ``` + +## Bugs +- [KQL Phase 2 - base64_encode_fromguid encodes strings as opposed to binary] +- [KQL Phase 2: summarize with bin and format_datetime] +- [make_datetime creates wrong date time] +- [KQL Phase 2: summarize using bin has different result than Azure Data Explorer using the same sample data] +- [KQL Phase 3: datetime should be rounded in certain cases] +- [kql_bin does not accept DateTime type] +- [KQL Phase 2 - totimespan should return null when conversion fails.] +- [reverse() with datetime and timespan arguments needs to be improved.] +- [String operator has throws exception when needle has white space or separator characters] + + +# December 7, 2022 + +## Functions +- [count_distinct](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/count-distinct-aggfunction) + `Customers | summarize count_distinct(Education);` +- [count_distinctif](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/count-distinctif-aggfunction) + `Customers | summarize count_distinctif(Education, Age > 30);` +- [iff](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/ifffunction) + `Customers | extend t = iff(Age <= 10, "smaller", "bigger");` +- [iif](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/iiffunction) + `Customers | extend t = iif(Age <= 10, "smaller", "bigger");` +## bug fixed +- [indexOf function doesn't work for extended parameters] +- [Create generic function for time arithmetic] +- [KQL Phase 2: tolong should return the number of ticks when supplied with a timespan] + +# November 23, 2022 + +## Operator +- [join](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/joinoperator?pivots=azuredataexplorer) + ``` + CREATE TABLE X (Key String, Value1 Int64) ENGINE = Memory; + INSERT INTO X VALUES ('a',1); + INSERT INTO X VALUES ('b',2); + INSERT INTO X VALUES ('b',3); + INSERT INTO X VALUES ('c',4); + + CREATE TABLE Y (Key String, Value2 Int64) ENGINE = Memory; + INSERT INTO Y VALUES ('b',10); + INSERT INTO Y VALUES ('c',20); + INSERT INTO Y VALUES ('c',30); + INSERT INTO Y VALUES ('d',40); + + Join flavor : + + Default join is innerunique + X | join Y on $left.Key == $right.Key ; + X | join kind=innerunique Y on Key ; + + Inner-join + X | join kind=inner Y on Key ; + + Left outer-join + X | join kind=leftouter Y on Key ; + + Right outer-join + X | join kind=rightouter Y on Key ; + + Full outer-join + X | join kind=fullouter Y on Key ; + + Left anti-join + X | join kind=leftanti Y on Key ; + + Right anti-join + X | join kind=rightanti Y on Key ; + + Left semi-join + X | join kind=leftsemi Y on Key ; + + Right semi-join + X | join kind=rightsemi Y on Key ; + ``` + **Deviation from ADX** + Because of the limitation between KQL and SQL. the result may different from ADX.(KQL-CH take the result of ClickHouse) + - columns + ADX : common columns are duplicatedc in output + KQL-CH : only one column for common columns + - column name + ADX : column with same name (not common) ->column1 + KQL-CH : column with same name (not common) -> right_.column + - filters + ADX: Kusto is optimized to push filters that come after the join, towards the appropriate join side, left or right, when possible + KQL-CH: because in the domanin of KQL, does not know the schema of tables, so the push need to manually done by user, like: + ``` + t1|join kind = innerunique t2 on key | where value == 'val1.2' + ``` + need to chang as the fowllowing by user(if user want) : + ``` + t1| where value == 'val1.2' | join kind = innerunique t2 on key + ``` + - semi join flavor + ADX : only returns left side or right side columns + KQL-CH : returns columns from both side + - Join hints : not supported yet +- [lookup](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/lookupoperator) + lookup is a subset of join, only support : kind=leftouter and kind=inner. if kind unspecified, kind=leftouter + ``` + DROP TABLE IF EXISTS FactTable; + CREATE TABLE FactTable (Row String, Personal String, Family String) ENGINE = Memory; + INSERT INTO FactTable VALUES ('1', 'Bill', 'Gates'); + INSERT INTO FactTable VALUES ('2', 'Bill', 'Clinton'); + INSERT INTO FactTable VALUES ('3', 'Bill', 'Clinton'); + INSERT INTO FactTable VALUES ('4', 'Steve', 'Ballmer'); + INSERT INTO FactTable VALUES ('5', 'Tim', 'Cook'); + + DROP TABLE IF EXISTS DimTable; + CREATE TABLE DimTable (Personal String, Family String, Alias String) ENGINE = Memory; + INSERT INTO DimTable VALUES ('Bill', 'Gates', 'billg'); + INSERT INTO DimTable VALUES ('Bill', 'Clinton', 'billc'); + INSERT INTO DimTable VALUES ('Steve', 'Ballmer', 'steveb'); + INSERT INTO DimTable VALUES ('Tim', 'Cook', 'timc'); + + FactTable | lookup kind=leftouter DimTable on Personal, Family + + FactTable | lookup kind=inner DimTable on Personal, Family + ``` + +## Bugs fixed + - [Incorrect Regx conversion] + - [KQL phase 2 - timespan calculation results in exception] + - [KQL phase 2 - format_timespan returns incorrect results] + - [Bin function should support time intervals less than 1 second] + - [KQL Phase 2: datetime subtraction results in exception] + - [Timespan() doesn't parse bareword arguments.] + - [KQL-phase2 distinct operator does not support alias] + +# November 7, 2022 +## Improvement +- [array_sort_asc](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysortascfunction) and [array_sort_desc](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysortdescfunction) + ``` + Returns the same number of arrays as in the input, with the first array sorted in ascending order, and the remaining arrays ordered to match the reordered first array. + + null will be returned for every array that differs in length from the first one. + ``` + Because array in ClickHouse is not nullable, so an array with a single NULL ( `[NULL]`) is returned instead of a null if array that differs in length from the first one: + ``` + array_sort_asc(dynamic([2, 1, 3]), dynamic([20, 40, 30]), dynamic([100, 200])) -> [1,2,3,NULL],[10,20,30,40],[NULL] + ``` + the result can be used as a condition + ``` + DROP TABLE IF EXISTS visit; + CREATE TABLE visit(pageid UInt8, ip_country Array(Nullable(String)), hit Array(Int64),duration Array(Int64)) ENGINE = Memory; + INSERT INTO visit VALUES (1,['CA', 'US','FR','Eng'], [11,16,12,20],[100,500,300,200]); + INSERT INTO visit VALUES (2,['Japan', 'Gem','FR','Eng'], [31,22,33,10],[510,410,310,210]); + INSERT INTO visit VALUES (3,['CA', 'Gem','Japan','Eng'], [25,10,23,11],[120,110,130]); + INSERT INTO visit VALUES (4,['CA', 'Gem',null,'Eng'], [5,10,3,2],[220,320,310,150]); + INSERT INTO visit VALUES (5,['FR', null,'US','Eng'], [16,12,23,10],[210,250,110,260]); + + visit | project *, array_sort_asc(ip_country, hit, duration) + ┌─pageid─┬─ip_country─────────────────┬─hit───────────┬─duration──────────┬─kql_array_sort_asc(ip_country, hit, duration)────────────────┐ + │ 2 │ ['Japan','Gem','FR','Eng'] │ [31,22,33,10] │ [510,410,310,210] │ (['Eng','FR','Gem','Japan'],[10,33,22,31],[210,310,410,510]) │ + └────────┴────────────────────────────┴───────────────┴───────────────────┴──────────────────────────────────────────────────────────────┘ + ┌─pageid─┬─ip_country─────────────┬─hit───────────┬─duration──────────┬─kql_array_sort_asc(ip_country, hit, duration)────────────┐ + │ 1 │ ['CA','US','FR','Eng'] │ [11,16,12,20] │ [100,500,300,200] │ (['CA','Eng','FR','US'],[11,20,12,16],[100,200,300,500]) │ + └────────┴────────────────────────┴───────────────┴───────────────────┴──────────────────────────────────────────────────────────┘ + ┌─pageid─┬─ip_country──────────────┬─hit────────┬─duration──────────┬─kql_array_sort_asc(ip_country, hit, duration)──────────┐ + │ 4 │ ['CA','Gem',NULL,'Eng'] │ [5,10,3,2] │ [220,320,310,150] │ (['CA','Eng','Gem',NULL],[5,2,10,3],[220,150,320,310]) │ + └────────┴─────────────────────────┴────────────┴───────────────────┴────────────────────────────────────────────────────────┘ + ┌─pageid─┬─ip_country─────────────────┬─hit───────────┬─duration──────┬─kql_array_sort_asc(ip_country, hit, duration)─────┐ + │ 3 │ ['CA','Gem','Japan','Eng'] │ [25,10,23,11] │ [120,110,130] │ (['CA','Eng','Gem','Japan'],[25,11,10,23],[NULL]) │ + └────────┴────────────────────────────┴───────────────┴───────────────┴───────────────────────────────────────────────────┘ + ┌─pageid─┬─ip_country─────────────┬─hit───────────┬─duration──────────┬─kql_array_sort_asc(ip_country, hit, duration)────────────┐ + │ 5 │ ['FR',NULL,'US','Eng'] │ [16,12,23,10] │ [210,250,110,260] │ (['Eng','FR','US',NULL],[10,16,23,12],[260,210,110,250]) │ + └────────┴────────────────────────┴───────────────┴───────────────────┴──────────────────────────────────────────────────────────┘ + + visit | where isnull((array_sort_asc(ip_country, hit, duration))[2][0]) + ┌─pageid─┬─ip_country─────────────────┬─hit───────────┬─duration──────┐ + │ 3 │ ['CA','Gem','Japan','Eng'] │ [25,10,23,11] │ [120,110,130] │ + └────────┴────────────────────────────┴───────────────┴───────────────┘ + ``` + + the following behaviours are same as Azure Data Explorer + if no alias specified, the functions return a single tuple includes arrays. can use array sbscripon to access the element inside. for exapmple: + ``` + print array_sort_asc(dynamic([2, 1, 3]), dynamic([20, 40, 30]), dynamic([100, 200]))[0] -> [1,2,3] + ``` + if a single alias is used the first array as an column is returned : + ``` + print t = array_sort_asc(dynamic([2, 1, 3]), dynamic([20, 40, 30]), dynamic([100, 200])) + ┌─t───────┐ + │ [1,2,3] │ + └─────────┘ + ``` + if a n aliasies are used the first n arrays as columns are returned : + ``` + print 5, (t,w) = array_sort_asc(dynamic([2, 1, 3]), dynamic([20, 40, 30]), dynamic([100, 200])) + ┌─5─┬─t───────┬─w──────────┐ + │ 5 │ [1,2,3] │ [40,20,30] │ + └───┴─────────┴────────────┘ + ``` +## New Functions +- [case](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/casefunction) + `Customers | extend t = case(Age <= 10, "A", Age <= 20, "B", Age <= 30, "C", "D");` +## Bug fixed +- [summarize crash if aggregation function is missing] + ``` + fixed with throw exception: + + Exception on client: + Code: 62. DB::Exception: Syntax error near keyword "by". (SYNTAX_ERROR) + ``` +- [make_datetime creates wrong date time] + +- [todecimal() doesn't work with column arguments] + + + +# October 25, 2022 +## New Operators +- [count](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/countoperator) +`Customers | count;` +`Customers | where Age< 30 | count;` +`Customers | where Age< 30 | limit 2 | count;` +`Customers | where Age< 30 | limit 2 | count | project Count;` + +- [top](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/topoperator) +`Customers | top 3 by Age;` +`Customers | top 3 by Age desc;` +`Customers | top 3 by Age asc | order by FirstName;` +`Customers | top 3 by FirstName desc nulls first;` +`Customers | top 3 by FirstName desc nulls last;` +`Customers | top 3 by Age | top 2 by FirstName;` + +- [top-hitters](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/tophittersoperator) +`Customers | top-hitters a = 2 of Age by extra;` +`Customers | top-hitters 2 of Age;` +`Customers | top-hitters 2 of Age by extra | top-hitters 2 of Age | order by Age;` +`Customers | top-hitters 2 of Age by extra | where Age > 30;` +`Customers | top-hitters 2 of Age by extra | where approximate_sum_extra < 200;` +`Customers | top-hitters 2 of Age | where approximate_count_Age > 2;` + +## Bugs fixed +- [parse_version needs to return null when parameter is empty string] +- [Different expressions with the same alias in function substring] +- [parse_version needs to return null when parameter is empty string] +- [parse_url() output mismatch for empty string] +- [array_sum and array_length return incorrect results] + +# October 9, 2022 + +## operator +- [distinct](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/distinctoperator) + `Customers | distinct *` + `Customers | distinct Occupation` + `Customers | distinct Occupation, Education` + `Customers | where Age <30 | distinct Occupation, Education` + `Customers | where Age <30 | order by Age| distinct Occupation, Education` + +## String functions +- [reverse](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/reversefunction) + `print reverse(123)` + `print reverse(123.34)` + `print reverse('clickhouse')` + `print reverse(3h)` + `print reverse(datetime(2017-1-1 12:23:34))` + +- [parse_command_line](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-command-line) + `print parse_command_line('echo \"hello world!\" print$?', \"Windows\")` + +- [parse_csv](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parsecsvfunction) + `print result=parse_csv('aa,b,cc')` + `print result_multi_record=parse_csv('record1,a,b,c\nrecord2,x,y,z')` + +- [parse_json](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parsejsonfunction) + `print parse_json( dynamic([1, 2, 3]))` + `print parse_json('{"a":123.5, "b":"{\\"c\\":456}"}')` + +- [extract_json](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction) + `print extract_json( "$.a" , '{"a":123, "b":"{\\"c\\":456}"}' , typeof(int))` + +- [parse_version](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-versionfunction) + `print parse_version('1')` + `print parse_version('1.2.3.40')` + +## Bug fixed +- [correct array index in expression] + array index should start with 0 +- [Summarize should generate alias or use correct columns] + - if bin is used , the column should be in select list if no alias include + - if no column included in aggregate functions, ( like count() ), should has alias with fun name + '_',e.g count_ + - if column name included in aggregate functions, should have fun name + "_" + column name , like count(Age) -> count_Age + - if argument of an aggregate functions is an exprision, Columns1 ... Columnsn should be used as alias + ``` + Customers | summarize count() by bin(Age, 10) + ┌─Age─┬─count_─┐ + │ 40 │ 2 │ + │ 20 │ 6 │ + │ 30 │ 4 │ + └─────┴────────┘ + Customers | summarize count(Age) by bin(Age, 10) + ┌─Age─┬─count_Age─┐ + │ 40 │ 2 │ + │ 20 │ 6 │ + │ 30 │ 4 │ + └─────┴───────────┘ + Customers | summarize count(Age+1) by bin(Age+1, 10) + ┌─Columns1─┬─count_─┐ + │ 40 │ 2 │ + │ 20 │ 6 │ + │ 30 │ 4 │ + └──────────┴────────┘ + ``` +- [extend doesn't replace existing columns] + +- [throw exception if use quoted string as alias] + +- [repeat() doesn't work with count argument as negative value] + +- [substring() doesn't work right with negative offsets] +- [endofmonth() doesn't return correct result] + +- [split() outputs array instead of string] + +- [split() returns empty string when arg goes out of bound] + +- [split() doesn't work with negative index] + + +# September 26, 2022 +## Bug fixed : +["select * from kql" results in syntax error] +[Parsing ipv4 with arrayStringConcat throws exception] +[CH Client crashes on invalid function name] +[extract() doesn't work right with 4th argument i.e typeof()] +[parse_ipv6_mask return incorrect results] +[timespan returns wrong output in seconds] +[timespan doesn't work for nanoseconds and tick] +[totimespan() doesn't work for nanoseconds and tick timespan unit] +[data types should throw exception in certain cases] +[decimal does not support scientific notation] +[extend statement causes client core dumping] +[extend crashes with array sorting] +[Core dump happens when WHERE keyword doesn't follow field name] +[Null values are missing in the result of `make_list_with_nulls'] +[trim functions use non-unique aliases] +[format_ipv4_mask returns incorrect mask value] + +# September 12, 2022 +## Extend operator +https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extendoperator +`T | extend T | extend duration = endTime - startTime` +`T | project endTime, startTime | extend duration = endTime - startTime` +## Array functions +- [array_reverse](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array-reverse-function) + `print array_reverse(dynamic(["this", "is", "an", "example"])) == dynamic(["example","an","is","this"])` + +- [array_rotate_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array_rotate_leftfunction) + `print array_rotate_left(dynamic([1,2,3,4,5]), 2) == dynamic([3,4,5,1,2])` + `print array_rotate_left(dynamic([1,2,3,4,5]), -2) == dynamic([4,5,1,2,3])` + +- [array_rotate_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array_rotate_rightfunction) + `print array_rotate_right(dynamic([1,2,3,4,5]), -2) == dynamic([3,4,5,1,2])` + `print array_rotate_right(dynamic([1,2,3,4,5]), 2) == dynamic([4,5,1,2,3])` + +- [array_shift_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array_shift_leftfunction) + `print array_shift_left(dynamic([1,2,3,4,5]), 2) == dynamic([3,4,5,null,null])` + `print array_shift_left(dynamic([1,2,3,4,5]), -2) == dynamic([null,null,1,2,3])` + `print array_shift_left(dynamic([1,2,3,4,5]), 2, -1) == dynamic([3,4,5,-1,-1])` + `print array_shift_left(dynamic(['a', 'b', 'c']), 2) == dynamic(['c','',''])` + +- [array_shift_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array_shift_rightfunction) + `print array_shift_right(dynamic([1,2,3,4,5]), -2) == dynamic([3,4,5,null,null])` + `print array_shift_right(dynamic([1,2,3,4,5]), 2) == dynamic([null,null,1,2,3])` + `print array_shift_right(dynamic([1,2,3,4,5]), -2, -1) == dynamic([3,4,5,-1,-1])` + `print array_shift_right(dynamic(['a', 'b', 'c']), -2) == dynamic(['c','',''])` + +- [pack_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/packarrayfunction) + `print x = 1, y = x * 2, z = y * 2, pack_array(x,y,z)` + + Please note that only arrays of elements of the same type may be created at this time. The underlying reasons are explained under the release note section of the `dynamic` data type. + +- [repeat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/repeatfunction) + `print repeat(1, 0) == dynamic([])` + `print repeat(1, 3) == dynamic([1, 1, 1])` + `print repeat("asd", 3) == dynamic(['asd', 'asd', 'asd'])` + `print repeat(timespan(1d), 3) == dynamic([86400, 86400, 86400])` + `print repeat(true, 3) == dynamic([true, true, true])` + +- [zip](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/zipfunction) + `print zip(dynamic([1,3,5]), dynamic([2,4,6]))` + + Please note that only arrays of the same type are supported in our current implementation. The underlying reasons are explained under the release note section of the `dynamic` data type. + +## Data types + - [dynamic](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic) + `print isnull(dynamic(null))` + `print dynamic(1) == 1` + `print dynamic(timespan(1d)) == 86400` + `print dynamic([1, 2, 3])` + `print dynamic([[1], [2], [3]])` + `print dynamic(['a', "b", 'c'])` + + According to the KQL specifications `dynamic` is a literal, which means that no function calls are permitted. Expressions producing literals such as `datetime` and `timespan` and their aliases (ie. `date` and `time`, respectively) along with nested `dynamic` literals are allowed. + + Please note that our current implementation supports only scalars and arrays made up of elements of the same type. Support for mixed types and property bags is deferred for now, based on our understanding of the required effort and discussion with representatives of the QRadar team. + +## Mathematical functions + - [isnan](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/isnanfunction) + `print isnan(double(nan)) == true` + `print isnan(4.2) == false` + `print isnan(4) == false` + `print isnan(real(+inf)) == false` + +## Set functions +Please note that functions returning arrays with set semantics may return them in any particular order, which may be subject to change in the future. + + - [jaccard_index](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/jaccard-index-function) + `print jaccard_index(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3, 4, 4, 4])) == 0.75` + `print jaccard_index(dynamic([1, 2, 3]), dynamic([])) == 0` + `print jaccard_index(dynamic([]), dynamic([1, 2, 3, 4])) == 0` + `print isnan(jaccard_index(dynamic([]), dynamic([])))` + `print jaccard_index(dynamic([1, 2, 3]), dynamic([4, 5, 6, 7])) == 0` + `print jaccard_index(dynamic(['a', 's', 'd']), dynamic(['f', 'd', 's', 'a'])) == 0.75` + `print jaccard_index(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])) == 0.25` + + - [set_difference](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/setdifferencefunction) + `print set_difference(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])) == dynamic([])` + `print array_sort_asc(set_difference(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[1] == dynamic([4, 5, 6])` + `print set_difference(dynamic([4]), dynamic([1, 2, 3])) == dynamic([4])` + `print array_sort_asc(set_difference(dynamic([1, 2, 3, 4, 5]), dynamic([5]), dynamic([2, 4])))[1] == dynamic([1, 3])` + `print array_sort_asc(set_difference(dynamic([1, 2, 3]), dynamic([])))[1] == dynamic([1, 2, 3])` + `print array_sort_asc(set_difference(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[1] == dynamic(['d', 's'])` + `print array_sort_asc(set_difference(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[1] == dynamic(['Chewbacca', 'Han Solo'])` + + - [set_has_element](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/sethaselementfunction) + `print set_has_element(dynamic(["this", "is", "an", "example"]), "example") == true` + `print set_has_element(dynamic(["this", "is", "an", "example"]), "examples") == false` + `print set_has_element(dynamic([1, 2, 3]), 2) == true` + `print set_has_element(dynamic([1, 2, 3, 4.2]), 4) == false` + + - [set_intersect](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/setintersectfunction) + `print array_sort_asc(set_intersect(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3])` + `print array_sort_asc(set_intersect(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3])` + `print set_intersect(dynamic([4]), dynamic([1, 2, 3])) == dynamic([])` + `print set_intersect(dynamic([1, 2, 3, 4, 5]), dynamic([1, 3, 5]), dynamic([2, 5])) == dynamic([5])` + `print set_intersect(dynamic([1, 2, 3]), dynamic([])) == dynamic([])` + `print set_intersect(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])) == dynamic(['a'])` + `print set_intersect(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])) == dynamic(['Darth Vader'])` + + - [set_union](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/setunionfunction) + `print array_sort_asc(set_union(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3])` + `print array_sort_asc(set_union(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3, 4, 5, 6])` + `print array_sort_asc(set_union(dynamic([4]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3, 4])` + `print array_sort_asc(set_union(dynamic([1, 3, 4]), dynamic([5]), dynamic([2, 4])))[1] == dynamic([1, 2, 3, 4, 5])` + `print array_sort_asc(set_union(dynamic([1, 2, 3]), dynamic([])))[1] == dynamic([1, 2, 3])` + `print array_sort_asc(set_union(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[1] == dynamic(['a', 'd', 'f', 's'])` + `print array_sort_asc(set_union(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[1] == dynamic(['Chewbacca', 'Darth Sidious', 'Darth Vader', 'Han Solo'])` + +# August 29, 2022 + +## **mv-expand operator** +https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/mvexpandoperator +Note: **expand on array columns only** +- test cases + ``` + CREATE TABLE T + ( + a UInt8, + b Array(String), + c Array(Int8), + d Array(Int8) + ) ENGINE = Memory; + + INSERT INTO T VALUES (1, ['Salmon', 'Steak','Chicken'],[1,2,3,4],[5,6,7,8]) + + T | mv-expand c + T | mv-expand c, d + T | mv-expand b | mv-expand c + T | mv-expand c to typeof(bool) + T | mv-expand with_itemindex=index b, c, d + T | mv-expand array_concat(c,d) + T | mv-expand x = c, y = d + T | mv-expand xy = array_concat(c, d) + T | mv-expand with_itemindex=index c,d to typeof(bool) + ``` + +## **make-series operator** +https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-seriesoperator + +- test case make-series on datetime column + ``` + CREATE TABLE T + ( + Supplier Nullable(String), + Fruit String , + Price Float64, + Purchase Date + ) ENGINE = Memory; + + INSERT INTO T VALUES ('Aldi','Apple',4,'2016-09-10'); + INSERT INTO T VALUES ('Costco','Apple',2,'2016-09-11'); + INSERT INTO T VALUES ('Aldi','Apple',6,'2016-09-10'); + INSERT INTO T VALUES ('Costco','Snargaluff',100,'2016-09-12'); + INSERT INTO T VALUES ('Aldi','Apple',7,'2016-09-12'); + INSERT INTO T VALUES ('Aldi','Snargaluff',400,'2016-09-11'); + INSERT INTO T VALUES ('Costco','Snargaluff',104,'2016-09-12'); + INSERT INTO T VALUES ('Aldi','Apple',5,'2016-09-12'); + INSERT INTO T VALUES ('Aldi','Snargaluff',600,'2016-09-11'); + INSERT INTO T VALUES ('Costco','Snargaluff',200,'2016-09-10'); + ``` + Have from and to + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit + ``` + Has from , without to + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) step 1d by Supplier, Fruit + ``` + Without from , has to + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase to datetime(2016-09-13) step 1d by Supplier, Fruit + ``` + Without from , without to + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d by Supplier, Fruit + ``` + Without by clause + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d + ``` + Without aggregation alias + ``` + T | make-series avg(Price) default=0 on Purchase step 1d by Supplier, Fruit + ``` + Has group expression alias + ``` + T | make-series avg(Price) default=0 on Purchase step 1d by Supplier_Name = Supplier, Fruit + ``` + Use different step value + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 3d by Supplier, Fruit + ``` +- test case make-series on numeric column + ``` + CREATE TABLE T2 + ( + Supplier Nullable(String), + Fruit String , + Price Int32, + Purchase Int32 + ) ENGINE = Memory; + + INSERT INTO T2 VALUES ('Aldi','Apple',4,10); + INSERT INTO T2 VALUES ('Costco','Apple',2,11); + INSERT INTO T2 VALUES ('Aldi','Apple',6,10); + INSERT INTO T2 VALUES ('Costco','Snargaluff',100,12); + INSERT INTO T2 VALUES ('Aldi','Apple',7,12); + INSERT INTO T2 VALUES ('Aldi','Snargaluff',400,11); + INSERT INTO T2 VALUES ('Costco','Snargaluff',104,12); + INSERT INTO T2 VALUES ('Aldi','Apple',5,12); + INSERT INTO T2 VALUES ('Aldi','Snargaluff',600,11); + INSERT INTO T2 VALUES ('Costco','Snargaluff',200,10); + ``` + Have from and to + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 to 15 step 1.0 by Supplier, Fruit; + ``` + Has from , without to + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 step 1.0 by Supplier, Fruit; + ``` + Without from , has to + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase to 18 step 4.0 by Supplier, Fruit; + ``` + Without from , without to + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0 by Supplier, Fruit; + ``` + Without by clause + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0; + ``` + +## Aggregate Functions +- [bin](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binfunction) + `print bin(4.5, 1)` + `print bin(time(16d), 7d)` + `print bin(datetime(1970-05-11 13:45:07), 1d)` +- [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) + `Customers | summarize t = stdev(Age) by FirstName` + +- [stdevif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdevif-aggfunction) + `Customers | summarize t = stdevif(Age, Age < 10) by FirstName` + +- [binary_all_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-and-aggfunction) + `Customers | summarize t = binary_all_and(Age) by FirstName` + +- [binary_all_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-or-aggfunction) + `Customers | summarize t = binary_all_or(Age) by FirstName` + +- [binary_all_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-xor-aggfunction) + `Customers | summarize t = binary_all_xor(Age) by FirstName` + +- [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName` + +- [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)` + +- [percentile](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize t = percentile(Age, 50) by FirstName` + +- [percentilew](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilew(Bucket, Frequency, 50)` + +## Dynamic functions +- [array_sort_asc](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysortascfunction) + **Only support the constant dynamic array.** + **Returns an array. So, each element of the input has to be of same datatype.** + `print t = array_sort_asc(dynamic([null, 'd', 'a', 'c', 'c']))` + `print t = array_sort_asc(dynamic([4, 1, 3, 2]))` + `print t = array_sort_asc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))` + `print t = array_sort_asc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world']))` + `print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , false)` + `print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2)` + `print t = array_sort_asc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]) , false)` + `print t = array_sort_asc( dynamic([null, null, null]) , false)` + `print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), 1 > 2)` + `print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30, 50, 3]), 1 > 2)` + +- [array_sort_desc](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysortdescfunction) **(only support the constant dynamic array)** + + `print t = array_sort_desc(dynamic([null, 'd', 'a', 'c', 'c']))` + `print t = array_sort_desc(dynamic([4, 1, 3, 2]))` + `print t = array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))` + `print t = array_sort_desc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world']))` + `print t = array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , false)` + `print t = array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2)` + `print t = array_sort_desc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]) , false)` + `print t = array_sort_desc( dynamic([null, null, null]) , false)` + `print t = array_sort_desc(dynamic([2, 1, null, 3]), dynamic([20, 10, 40, 30]), 1 > 2)` + `print t = array_sort_desc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50, 3]), 1 > 2)` + +- [array_concat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayconcatfunction) + `print array_concat(dynamic([1, 2, 3]), dynamic([4, 5]), dynamic([6, 7, 8, 9])) == dynamic([1, 2, 3, 4, 5, 6, 7, 8, 9])` + +- [array_iff / array_iif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayifffunction) + `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` + `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` + `print array_iif(dynamic([true, false, true, false]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, null])` + `print array_iif(dynamic([1, 0, -1, 44, 0]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, 4, null])` + `print t = array_iif(dynamic([true, false, true, false, true]), dynamic([1.1, 2.2, 3.3, 4.4, 5.5]), 999.99);` + `print t = array_iif(dynamic([true, false, true, false, true]), 90, dynamic([1, 3]));` + +- [array_slice](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayslicefunction) + `print array_slice(dynamic([1,2,3]), 1, 2) == dynamic([2, 3])` + `print array_slice(dynamic([1,2,3,4,5]), 2, -1) == dynamic([3, 4, 5])` + `print array_slice(dynamic([1,2,3,4,5]), -3, -2) == dynamic([3, 4])` + +- [array_split](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysplitfunction) + `print array_split(dynamic([1,2,3,4,5]), 2) == dynamic([[1,2],[3,4,5]])` + `print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])) == dynamic([[1],[2,3],[4,5]])` + +## DateTimeFunctions + +- [ago](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/agofunction) + `print ago(2h)` + +- [endofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofdayfunction) + `print endofday(datetime(2017-01-01 10:10:17), -1)` + `print endofday(datetime(2017-01-01 10:10:17), 1)` + `print endofday(datetime(2017-01-01 10:10:17))` + +- [endofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofmonthfunction) + `print endofmonth(datetime(2017-01-01 10:10:17), -1)` + `print endofmonth(datetime(2017-01-01 10:10:17), 1)` + `print endofmonth(datetime(2017-01-01 10:10:17))` + +- [endofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofweekfunction) + `print endofweek(datetime(2017-01-01 10:10:17), 1)` + `print endofweek(datetime(2017-01-01 10:10:17), -1)` + `print endofweek(datetime(2017-01-01 10:10:17))` + +- [endofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofyearfunction) + `print endofyear(datetime(2017-01-01 10:10:17), -1)` + `print endofyear(datetime(2017-01-01 10:10:17), 1)` + `print endofyear(datetime(2017-01-01 10:10:17))` + +- [make_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-datetimefunction) + `print make_datetime(2017,10,01)` + `print make_datetime(2017,10,01,12,10)` + `print make_datetime(2017,10,01,12,11,0.1234567)` + +- [datetime_diff](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-difffunction) + `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` + `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` + `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` + +- [unixtime_microseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-microseconds-todatetimefunction) + `print unixtime_microseconds_todatetime(1546300800000000)` + +- [unixtime_milliseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-milliseconds-todatetimefunction) + `print unixtime_milliseconds_todatetime(1546300800000)` + +- [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) + `print unixtime_nanoseconds_todatetime(1546300800000000000)` + +- [datetime_part](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-partfunction) + `print datetime_part('day', datetime(2017-10-30 01:02:03.7654321))` + +- [datetime_add](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-addfunction) + `print datetime_add('day',1,datetime(2017-10-30 01:02:03.7654321))` + +- [format_timespan](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-timespanfunction) + `print format_timespan(time(1d), 'd-[hh:mm:ss]')` + `print format_timespan(time('12:30:55.123'), 'ddddd-[hh:mm:ss.ffff]')` + +- [format_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-datetimefunction) + `print format_datetime(todatetime('2009-06-15T13:45:30.6175425'), 'yy-M-dd [H:mm:ss.fff]')` + `print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s tt')` + +- [todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/todatetimefunction) + `print todatetime('2014-05-25T08:20:03.123456Z')` + `print todatetime('2014-05-25 20:03.123')` + +- [totimespan] (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/totimespanfunction) + ` print totimespan('0.01:34:23')` + `print totimespan(1d)` + +# August 15, 2022 + **double quote support** + ``print res = strcat("double ","quote")`` +## Aggregate functions + - [bin_at](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binatfunction) + `print res = bin_at(6.5, 2.5, 7)` + `print res = bin_at(1h, 1d, 12h)` + `print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))` + `print res = bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0))` + + - [array_index_of](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayindexoffunction) + *Supports only basic lookup. Do not support start_index, length and occurrence* + `print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')` + `print output = array_index_of(dynamic([1, 2, 3]), 2)` + - [array_sum](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array-sum-function) + `print output = array_sum(dynamic([2, 5, 3]))` + `print output = array_sum(dynamic([2.5, 5.5, 3]))` + - [array_length](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraylengthfunction) + `print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))` + `print output = array_length(dynamic([1, 2, 3]))` + +## Conversion +- [tobool / toboolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/toboolfunction) + `print tobool(true) == true` + `print toboolean(false) == false` + `print tobool(0) == false` + `print toboolean(19819823) == true` + `print tobool(-2) == true` + `print isnull(toboolean('a'))` + `print tobool('true') == true` + `print toboolean('false') == false` + +- [todouble / toreal](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/todoublefunction) + `print todouble(4) == 4` + `print toreal(4.2) == 4.2` + `print isnull(todouble('a'))` + `print toreal('-0.3') == -0.3` + +- [toint](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tointfunction) + `print isnull(toint('a'))` + `print toint(4) == 4` + `print toint('4') == 4` + `print isnull(toint(4.2))` + +- [tostring](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tostringfunction) + `print tostring(123) == '123'` + `print tostring('asd') == 'asd'` + +## Data Types + - [dynamic](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic) + *Supports only 1D array* + `print output = dynamic(['a', 'b', 'c'])` + `print output = dynamic([1, 2, 3])` + +- [bool,boolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/bool) + `print bool(1)` + `print boolean(0)` + +- [datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/datetime) + `print datetime(2015-12-31 23:59:59.9)` + `print datetime('2015-12-31 23:59:59.9')` + `print datetime("2015-12-31:)` + +- [guid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/guid) + `print guid(74be27de-1e4e-49d9-b579-fe0b331d3642)` + `print guid('74be27de-1e4e-49d9-b579-fe0b331d3642')` + `print guid('74be27de1e4e49d9b579fe0b331d3642')` + +- [int](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/int) + `print int(1)` + +- [long](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/long) + `print long(16)` + +- [real](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/real) + `print real(1)` + +- [timespan ,time](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/timespan) + **Note** the timespan is used for calculating datatime, so the output is in seconds. e.g. time(1h) = 3600 + `print 1d` + `print 30m` + `print time('0.12:34:56.7')` + `print time(2h)` + `print timespan(2h)` + + +## StringFunctions + +- [base64_encode_fromguid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64-encode-fromguid-function) +`print Quine = base64_encode_fromguid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')` +- [base64_decode_toarray](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64_decode_toarrayfunction) +`print base64_decode_toarray('S3VzdG8=')` +- [base64_decode_toguid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64-decode-toguid-function) +`print base64_decode_toguid('YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi')` +- [replace_regex](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/replace-regex-function) +`print replace_regex('Hello, World!', '.', '\\0\\0')` +- [has_any_index](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-any-index-function) +`print idx = has_any_index('this is an example', dynamic(['this', 'example']))` +- [translate](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/translatefunction) +`print translate('krasp', 'otsku', 'spark')` +- [trim](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimfunction) +`print trim('--', '--https://bing.com--')` +- [trim_end](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimendfunction) +`print trim_end('.com', 'bing.com')` +- [trim_start](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimstartfunction) +`print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))` + +## DateTimeFunctions +- [startofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofyearfunction) + `print startofyear(datetime(2017-01-01 10:10:17), -1)` + `print startofyear(datetime(2017-01-01 10:10:17), 0)` + `print startofyear(datetime(2017-01-01 10:10:17), 1)` +- [weekofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/weekofyearfunction) + `print week_of_year(datetime(2020-12-31))` + `print week_of_year(datetime(2020-06-15))` + `print week_of_year(datetime(1970-01-01))` + `print week_of_year(datetime(2000-01-01))` + +- [startofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofweekfunction) + `print startofweek(datetime(2017-01-01 10:10:17), -1)` + `print startofweek(datetime(2017-01-01 10:10:17), 0)` + `print startofweek(datetime(2017-01-01 10:10:17), 1)` + +- [startofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofmonthfunction) + `print startofmonth(datetime(2017-01-01 10:10:17), -1)` + `print startofmonth(datetime(2017-01-01 10:10:17), 0)` + `print startofmonth(datetime(2017-01-01 10:10:17), 1)` + +- [startofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofdayfunction) + `print startofday(datetime(2017-01-01 10:10:17), -1)` + `print startofday(datetime(2017-01-01 10:10:17), 0)` + `print startofday(datetime(2017-01-01 10:10:17), 1)` + +- [monthofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/monthofyearfunction) + `print monthofyear(datetime("2015-12-14"))` + +- [hourofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/hourofdayfunction) + `print hourofday(datetime(2015-12-14 18:54:00))` + +- [getyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getyearfunction) + `print getyear(datetime(2015-10-12))` + +- [getmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getmonthfunction) + `print getmonth(datetime(2015-10-12))` + +- [dayofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofyearfunction) + `print dayofyear(datetime(2015-12-14))` + +- [dayofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofmonthfunction) + `print (datetime(2015-12-14))` + +- [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) + `print unixtime_seconds_todatetime(1546300800)` + +- [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) + `print dayofweek(datetime(2015-12-20))` + +- [now](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/nowfunction) + `print now()` + `print now(2d)` + `print now(-2h)` + `print now(5microseconds)` + `print now(5seconds)` + `print now(6minutes)` + `print now(-2d) ` + `print now(time(1d))` + + +## Binary functions +- [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) + `print binary_and(15, 3) == 3` + `print binary_and(1, 2) == 0` +- [binary_not](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-notfunction) + `print binary_not(1) == -2` +- [binary_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-orfunction) + `print binary_or(3, 8) == 11` + `print binary_or(1, 2) == 3` +- [binary_shift_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-leftfunction) + `print binary_shift_left(1, 1) == 2` + `print binary_shift_left(1, 64) == 1` +- [binary_shift_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-rightfunction) + `print binary_shift_right(1, 1) == 0` + `print binary_shift_right(1, 64) == 1` +- [binary_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-xorfunction) + `print binary_xor(1, 3) == 2` +- [bitset_count_ones](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/bitset-count-onesfunction) + `print bitset_count_ones(42) == 3` + +## IP functions +- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) + `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` + `print format_ipv4(3232236031, 24) == '192.168.1.0'` +- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) + `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` + `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` +- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) + `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` + `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` +- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) + `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` + `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` +- [ipv6_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) + `print ipv6_compare('::ffff:7f00:1', '127.0.0.1') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') < 0` + `print ipv6_compare('192.168.1.1/24', '192.168.1.255/24') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == 0` +- [ipv6_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-is-matchfunction) + `print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false` + `print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true` +- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) + `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` + `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` + `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` + `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` +- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6-maskfunction) + `print parse_ipv6_mask('127.0.0.1', 24) == '0000:0000:0000:0000:0000:ffff:7f00:0000'` + `print parse_ipv6_mask('fe80::85d:e82c:9446:7994', 120) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'` + +# August 1, 2022 + +**The config setting to allow modify dialect setting**. + - Set dialect setting in server configuration XML at user level(` users.xml `). This sets the ` dialect ` at server startup and CH will do query parsing for all users with ` default ` profile according to dialect value. + + For example: + ` + + + random + kusto_auto + ` + + - Query can be executed with HTTP client as below once dialect is set in users.xml + ` echo "KQL query" | curl -sS "http://localhost:8123/?" --data-binary @- ` + + - To execute the query using clickhouse-client , Update clickhouse-client.xml as below and connect clickhouse-client with --config-file option (` clickhouse-client --config-file= `) + + ` + kusto_auto + ` + + OR + pass dialect setting with '--'. For example : + ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` + +- **strcmp** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcmpfunction) + `print strcmp('abc','ABC')` + +- **parse_url** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parseurlfunction) + `print Result = parse_url('scheme://username:password@www.google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')` + +- **parse_urlquery** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parseurlqueryfunction) + `print Result = parse_urlquery('k1=v1&k2=v2&k3=v3')` + +- **print operator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/printoperator) + `print x=1, s=strcat('Hello', ', ', 'World!')` + +- **Aggregate Functions:** + - [make_list()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makelist-aggfunction) + `Customers | summarize t = make_list(FirstName) by FirstName` + `Customers | summarize t = make_list(FirstName, 10) by FirstName` + - [make_list_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makelistif-aggfunction) + `Customers | summarize t = make_list_if(FirstName, Age > 10) by FirstName` + `Customers | summarize t = make_list_if(FirstName, Age > 10, 10) by FirstName` + - [make_list_with_nulls()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-list-with-nulls-aggfunction) + `Customers | summarize t = make_list_with_nulls(Age) by FirstName` + - [make_set()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makeset-aggfunction) + `Customers | summarize t = make_set(FirstName) by FirstName` + `Customers | summarize t = make_set(FirstName, 10) by FirstName` + - [make_set_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makesetif-aggfunction) + `Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName` + `Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName` + +## IP functions + +- **The following functions now support arbitrary expressions as their argument:** + - [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) + - [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) + - [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) + +# July 17, 2022 + +## Renamed dialect from sql_dialect to dialect + +`set dialect='clickhouse'` +`set dialect='kusto'` +`set dialect='kusto_auto'` + +## IP functions +- [parse_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4function) + `"Customers | project parse_ipv4('127.0.0.1')"` +- [parse_ipv6](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6function) + `"Customers | project parse_ipv6('127.0.0.1')"` + +Please note that the functions listed below only take constant parameters for now. Further improvement is to be expected to support expressions. + +- [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) + `"Customers | project ipv4_is_private('192.168.1.6/24')"` + `"Customers | project ipv4_is_private('192.168.1.6')"` +- [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) + `"Customers | project ipv4_is_in_range('127.0.0.1', '127.0.0.1')"` + `"Customers | project ipv4_is_in_range('192.168.1.6', '192.168.1.1/24')"` +- [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) + `"Customers | project ipv4_netmask_suffix('192.168.1.1/24')"` + `"Customers | project ipv4_netmask_suffix('192.168.1.1')"` + +## string functions +- **support subquery for `in` orerator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) + (subquery need to be wrapped with bracket inside bracket) + + `Customers | where Age in ((Customers|project Age|where Age < 30))` + Note: case-insensitive not supported yet +- **has_all** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator) + `Customers|where Occupation has_any ('Skilled','abcd')` + note : subquery not supported yet +- **has _any** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator) + `Customers|where Occupation has_all ('Skilled','abcd')` + note : subquery not supported yet +- **countof** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction) + `Customers | project countof('The cat sat on the mat', 'at')` + `Customers | project countof('The cat sat on the mat', 'at', 'normal')` + `Customers | project countof('The cat sat on the mat', 'at', 'regex')` +- **extract** ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction) +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 3, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20', typeof(real))` + +- **extract_all** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction) + + `Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 20')` + note: captureGroups not supported yet + +- **split** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction) + `Customers | project split('aa_bb', '_')` + `Customers | project split('aaa_bbb_ccc', '_', 1)` + `Customers | project split('', '_')` + `Customers | project split('a__b', '_')` + `Customers | project split('aabbcc', 'bb')` + +- **strcat_delim** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction) + `Customers | project strcat_delim('-', '1', '2', 'A') , 1s)` + `Customers | project strcat_delim('-', '1', '2', strcat('A','b'))` + note: only support string now. + +- **indexof** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction) + `Customers | project indexof('abcdefg','cde')` + `Customers | project indexof('abcdefg','cde',2)` + `Customers | project indexof('abcdefg','cde',6)` + note: length and occurrence not supported yet + + + + +# July 4, 2022 + +## sql_dialect + +- default is `clickhouse` + `set sql_dialect='clickhouse'` +- only process kql + `set sql_dialect='kusto'` +- process both kql and CH sql + `set sql_dialect='kusto_auto'` +## KQL() function + + - create table + `CREATE TABLE kql_table4 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName,Age);` + verify the content of `kql_table` + `select * from kql_table` + + - insert into table + create a tmp table: + ``` + CREATE TABLE temp + ( + FirstName Nullable(String), + LastName String, + Age Nullable(UInt8) + ) ENGINE = Memory; + ``` + `INSERT INTO temp select * from kql(Customers|project FirstName,LastName,Age);` + verify the content of `temp` + `select * from temp` + + - Select from kql() + `Select * from kql(Customers|project FirstName)` + +## KQL operators: + - Tabular expression statements + `Customers` + - Select Column + `Customers | project FirstName,LastName,Occupation` + - Limit returned results + `Customers | project FirstName,LastName,Occupation | take 1 | take 3` + - sort, order + `Customers | order by Age desc , FirstName asc` + - Filter + `Customers | where Occupation == 'Skilled Manual'` + - summarize + `Customers |summarize max(Age) by Occupation` + +## KQL string operators and functions + - contains + `Customers |where Education contains 'degree'` + - !contains + `Customers |where Education !contains 'degree'` + - contains_cs + `Customers |where Education contains 'Degree'` + - !contains_cs + `Customers |where Education !contains 'Degree'` + - endswith + `Customers | where FirstName endswith 'RE'` + - !endswith + `Customers | where !FirstName endswith 'RE'` + - endswith_cs + `Customers | where FirstName endswith_cs 're'` + - !endswith_cs + `Customers | where FirstName !endswith_cs 're'` + - == + `Customers | where Occupation == 'Skilled Manual'` + - != + `Customers | where Occupation != 'Skilled Manual'` + - has + `Customers | where Occupation has 'skilled'` + - !has + `Customers | where Occupation !has 'skilled'` + - has_cs + `Customers | where Occupation has 'Skilled'` + - !has_cs + `Customers | where Occupation !has 'Skilled'` + - hasprefix + `Customers | where Occupation hasprefix_cs 'Ab'` + - !hasprefix + `Customers | where Occupation !hasprefix_cs 'Ab'` + - hasprefix_cs + `Customers | where Occupation hasprefix_cs 'ab'` + - !hasprefix_cs + `Customers | where Occupation! hasprefix_cs 'ab'` + - hassuffix + `Customers | where Occupation hassuffix 'Ent'` + - !hassuffix + `Customers | where Occupation !hassuffix 'Ent'` + - hassuffix_cs + `Customers | where Occupation hassuffix 'ent'` + - !hassuffix_cs + `Customers | where Occupation hassuffix 'ent'` + - in + `Customers |where Education in ('Bachelors','High School')` + - !in + `Customers | where Education !in ('Bachelors','High School')` + - matches regex + `Customers | where FirstName matches regex 'P.*r'` + - startswith + `Customers | where FirstName startswith 'pet'` + - !startswith + `Customers | where FirstName !startswith 'pet'` + - startswith_cs + `Customers | where FirstName startswith_cs 'pet'` + - !startswith_cs + `Customers | where FirstName !startswith_cs 'pet'` + + - base64_encode_tostring() + `Customers | project base64_encode_tostring('Kusto1') | take 1` + - base64_decode_tostring() + `Customers | project base64_decode_tostring('S3VzdG8x') | take 1` + - isempty() + `Customers | where isempty(LastName)` + - isnotempty() + `Customers | where isnotempty(LastName)` + - isnotnull() + `Customers | where isnotnull(FirstName)` + - isnull() + `Customers | where isnull(FirstName)` + - url_decode() + `Customers | project url_decode('https%3A%2F%2Fwww.test.com%2Fhello%20word') | take 1` + - url_encode() + `Customers | project url_encode('https://www.test.com/hello word') | take 1` + - substring() + `Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))` + - strcat() + `Customers | project name = strcat(FirstName, ' ', LastName)` + - strlen() + `Customers | project FirstName, strlen(FirstName)` + - strrep() + `Customers | project strrep(FirstName,2,'_')` + - toupper() + `Customers | project toupper(FirstName)` + - tolower() + `Customers | project tolower(FirstName)` + + ## Aggregate Functions + - arg_max() + - arg_min() + - avg() + - avgif() + - count() + - countif() + - max() + - maxif() + - min() + - minif() + - sum() + - sumif() + - dcount() + - dcountif() + - bin diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp new file mode 100644 index 000000000000..156a6fde6d5b --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -0,0 +1,374 @@ +#include "KQLFunctionFactory.h" + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +namespace DB::ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int SYNTAX_ERROR; +extern const int UNKNOWN_FUNCTION; +} + +namespace +{ +constexpr DB::TokenType determineClosingPair(const DB::TokenType token_type) +{ + if (token_type == DB::TokenType::OpeningCurlyBrace) + return DB::TokenType::ClosingCurlyBrace; + else if (token_type == DB::TokenType::OpeningRoundBracket) + return DB::TokenType::ClosingRoundBracket; + else if (token_type == DB::TokenType::OpeningSquareBracket) + return DB::TokenType::ClosingSquareBracket; + + throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Unhandled token: {}", magic_enum::enum_name(token_type)); +} + +constexpr bool isClosingBracket(const DB::TokenType token_type) +{ + return token_type == DB::TokenType::ClosingCurlyBrace || token_type == DB::TokenType::ClosingRoundBracket + || token_type == DB::TokenType::ClosingSquareBracket; +} + +constexpr bool isOpeningBracket(const DB::TokenType token_type) +{ + return token_type == DB::TokenType::OpeningCurlyBrace || token_type == DB::TokenType::OpeningRoundBracket + || token_type == DB::TokenType::OpeningSquareBracket; +} +} + +namespace DB +{ +bool IParserKQLFunction::convert(String & out, IParser::Pos & pos) +{ + return wrapConvertImpl( + pos, + IncreaseDepthTag{}, + [&] + { + bool res = convertImpl(out, pos); + if (!res) + out = ""; + return res; + }); +} + +bool IParserKQLFunction::directMapping( + String & out, IParser::Pos & pos, const std::string_view ch_fn, const Interval & argument_count_interval) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + out.append(ch_fn.data(), ch_fn.length()); + out.push_back('('); + + int argument_count = 0; + const auto begin = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos != begin) + out.append(", "); + + if (const auto argument = getOptionalArgument(fn_name, pos)) + { + ++argument_count; + out.append(*argument); + } + + if (pos->type == TokenType::ClosingRoundBracket) + { + if (!argument_count_interval.IsWithinBounds(argument_count)) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "{}: between {} and {} arguments are expected, but {} were provided", + fn_name, + argument_count_interval.Min(), + argument_count_interval.Max(), + argument_count); + + out.push_back(')'); + return true; + } + } + + out.clear(); + pos = begin; + return false; +} + +String IParserKQLFunction::generateUniqueIdentifier() +{ + // This particular random generator hits each number exactly once before looping over. + // Because of this, it's sufficient for queries consisting of up to 2^16 (= 65536) distinct function calls. + // Reference: https://www.pcg-random.org/using-pcg-cpp.html#insecure-generators + static pcg16_once_insecure random_generator; + return std::to_string(random_generator()); +} + +String IParserKQLFunction::getArgument(const String & function_name, DB::IParser::Pos & pos, const ArgumentState argument_state) +{ + if (auto optional_argument = getOptionalArgument(function_name, pos, argument_state)) + return std::move(*optional_argument); + + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Required argument was not provided in {}", function_name); +} + +std::vector IParserKQLFunction::getArguments( + const String & function_name, DB::IParser::Pos & pos, const ArgumentState argument_state, const Interval & argument_count_interval) +{ + std::vector arguments; + while (auto argument = getOptionalArgument(function_name, pos, argument_state)) + { + arguments.push_back(std::move(*argument)); + } + if (!argument_count_interval.IsWithinBounds(static_cast(arguments.size()))) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "{}: between {} and {} arguments are expected, but {} were provided", + function_name, + argument_count_interval.Min(), + argument_count_interval.Max(), + arguments.size()); + + return arguments; +} + +String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser::Pos & pos) +{ + int32_t round_bracket_count = 0, square_bracket_count = 0; + if (pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) + return {}; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Need more argument(s) in function: {}", fn_name); + + std::vector tokens; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++round_bracket_count; + if (pos->type == TokenType::ClosingRoundBracket) + --round_bracket_count; + + if (pos->type == TokenType::OpeningSquareBracket) + ++square_bracket_count; + if (pos->type == TokenType::ClosingSquareBracket) + --square_bracket_count; + + if (!KQLOperators::convert(tokens, pos)) + { + if (pos->type == TokenType::BareWord) + { + tokens.push_back(IParserKQLFunction::getExpression(pos)); + } + else if ( + pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket + || pos->type == TokenType::ClosingSquareBracket) + { + if (pos->type == TokenType::Comma) + break; + if (pos->type == TokenType::ClosingRoundBracket && round_bracket_count == -1) + break; + if (pos->type == TokenType::ClosingSquareBracket && square_bracket_count == 0) + break; + tokens.push_back(String(pos->begin, pos->end)); + } + else + { + String token; + if (pos->type == TokenType::QuotedIdentifier) + token = "'" + escapeSingleQuotes(String(pos->begin + 1, pos->end - 1)) + "'"; + else if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + String array_index; + while (!pos->isEnd() && pos->type != TokenType::ClosingSquareBracket) + { + array_index += getExpression(pos); + ++pos; + } + token = std::format("[ {0} >=0 ? {0} + 1 : {0}]", array_index); + } + else + token = String(pos->begin, pos->end); + + tokens.push_back(token); + } + } + + ++pos; + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) + { + if (pos->type == TokenType::Comma) + break; + if (pos->type == TokenType::ClosingRoundBracket && round_bracket_count == -1) + break; + if (pos->type == TokenType::ClosingSquareBracket && square_bracket_count == 0) + break; + } + } + + String converted_arg; + for (const auto & token : tokens) + converted_arg.append((converted_arg.empty() ? "" : " ") + token); + + return converted_arg; +} + +std::optional +IParserKQLFunction::getOptionalArgument(const String & function_name, DB::IParser::Pos & pos, const ArgumentState argument_state) +{ + if (const auto type = pos->type; type != DB::TokenType::Comma && type != DB::TokenType::OpeningRoundBracket) + return {}; + + ++pos; + if (const auto type = pos->type; type == DB::TokenType::ClosingRoundBracket || type == DB::TokenType::ClosingSquareBracket) + return {}; + + if (argument_state == ArgumentState::Parsed) + return getConvertedArgument(function_name, pos); + + if (argument_state != ArgumentState::Raw) + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Argument extraction is not implemented for {}::{}", + magic_enum::enum_type_name(), + magic_enum::enum_name(argument_state)); + + const auto * begin = pos->begin; + std::stack scopes; + while (!pos->isEnd() && (!scopes.empty() || (pos->type != DB::TokenType::Comma && pos->type != DB::TokenType::ClosingRoundBracket))) + { + const auto token_type = pos->type; + if (isOpeningBracket(token_type)) + scopes.push(token_type); + else if (isClosingBracket(token_type)) + { + if (scopes.empty() || determineClosingPair(scopes.top()) != token_type) + throw Exception( + DB::ErrorCodes::SYNTAX_ERROR, "Unmatched token: {} when parsing {}", magic_enum::enum_name(token_type), function_name); + + scopes.pop(); + } + + ++pos; + } + + return std::string(begin, pos->begin); +} + +String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos) +{ + String fn_name(pos->begin, pos->end); + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + --pos; + return ""; + } + return fn_name; +} + +String IParserKQLFunction::kqlCallToExpression( + const std::string_view function_name, const std::initializer_list params, const uint32_t max_depth) +{ + return kqlCallToExpression(function_name, std::span(params), max_depth); +} + +String IParserKQLFunction::kqlCallToExpression( + const std::string_view function_name, const std::span params, const uint32_t max_depth) +{ + const auto params_str = std::accumulate( + std::cbegin(params), + std::cend(params), + String(), + [](String acc, const std::string_view param) + { + if (!acc.empty()) + acc.append(", "); + + acc.append(param.data(), param.length()); + return acc; + }); + + const auto kql_call = std::format("{}({})", function_name, params_str); + DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); + DB::IParser::Pos tokens_pos(call_tokens, max_depth); + return DB::IParserKQLFunction::getExpression(tokens_pos); +} + +void IParserKQLFunction::validateEndOfFunction(const String & fn_name, IParser::Pos & pos) +{ + if (pos->type != TokenType::ClosingRoundBracket) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in function: {}", fn_name); +} + +String IParserKQLFunction::getExpression(IParser::Pos & pos) +{ + String arg(pos->begin, pos->end); + if (pos->type == TokenType::BareWord) + { + const auto fun = KQLFunctionFactory::get(arg); + if (String new_arg; fun && fun->convert(new_arg, pos)) + { + validateEndOfFunction(arg, pos); + arg = std::move(new_arg); + } + else + { + if (!fun) + { + ++pos; + if (pos->type == TokenType::OpeningRoundBracket) + { + if (Poco::toLower(arg) != "and" && Poco::toLower(arg) != "or") + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "{} is not a supported kusto function", arg); + } + --pos; + } + + if (std::optional ticks; ParserKQLTimespan::tryParse(extractTokenWithoutQuotes(pos), ticks) && ticks) + arg = kqlTicksToInterval(ticks); + } + } + else if (pos->type == TokenType::QuotedIdentifier) + arg = "'" + escapeSingleQuotes(String(pos->begin + 1, pos->end - 1)) + "'"; + else if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + String array_index; + while (!pos->isEnd() && pos->type != TokenType::ClosingSquareBracket) + { + array_index += getExpression(pos); + ++pos; + } + arg = std::format("[ {0} >=0 ? {0} + 1 : {0}]", array_index); + } + + return arg; +} + +String IParserKQLFunction::escapeSingleQuotes(const String & input) +{ + String output; + for (const auto & ch : input) + { + if (ch == '\'') + output += ch; + output += ch; + } + return output; +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h new file mode 100644 index 000000000000..147436551f97 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -0,0 +1,91 @@ +#pragma once + +#include + +#include + +namespace DB +{ +class Interval +{ +public: + using Representation = int; + + Interval(const Representation min_, const Representation max_) : max(max_), min(min_) { } + + Representation Max() const { return max; } + Representation Min() const { return min; } + bool IsWithinBounds(const Representation value) const { return min <= value && value <= max; } + + static constexpr auto max_bound = std::numeric_limits::max(); + static constexpr auto min_bound = std::numeric_limits::min(); + +private: + Representation max = max_bound; + Representation min = min_bound; +}; + +class IParserKQLFunction +{ +public: + enum class ArgumentState + { + Parsed, + Raw + }; + + template + ALWAYS_INLINE static bool wrapConvertImpl(IParser::Pos & pos, const F & func) + { + IParser::Pos begin = pos; + bool res = func(); + if (!res) + pos = begin; + return res; + } + + struct IncreaseDepthTag + { + }; + + template + ALWAYS_INLINE static bool wrapConvertImpl(IParser::Pos & pos, IncreaseDepthTag, const F & func) + { + IParser::Pos begin = pos; + pos.increaseDepth(); + bool res = func(); + pos.decreaseDepth(); + if (!res) + pos = begin; + return res; + } + + bool convert(String & out, IParser::Pos & pos); + virtual const char * getName() const = 0; + virtual ~IParserKQLFunction() = default; + + static String generateUniqueIdentifier(); + static String getArgument(const String & function_name, DB::IParser::Pos & pos, ArgumentState argument_state = ArgumentState::Parsed); + static std::vector getArguments( + const String & function_name, + DB::IParser::Pos & pos, + ArgumentState argument_state = ArgumentState::Parsed, + const Interval & argument_count_interval = {0, Interval::max_bound}); + static String getConvertedArgument(const String & fn_name, IParser::Pos & pos); + static String getExpression(IParser::Pos & pos); + static String getKQLFunctionName(IParser::Pos & pos); + static std::optional + getOptionalArgument(const String & function_name, DB::IParser::Pos & pos, ArgumentState argument_state = ArgumentState::Parsed); + static String + kqlCallToExpression(std::string_view function_name, std::initializer_list params, uint32_t max_depth); + static String kqlCallToExpression(std::string_view function_name, std::span params, uint32_t max_depth); + static String escapeSingleQuotes(const String & input); + +protected: + virtual bool convertImpl(String & out, IParser::Pos & pos) = 0; + + static bool directMapping( + String & out, IParser::Pos & pos, std::string_view ch_fn, const Interval & argument_count_interval = {0, Interval::max_bound}); + static void validateEndOfFunction(const String & fn_name, IParser::Pos & pos); +}; +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp new file mode 100644 index 000000000000..d3c878ed2993 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -0,0 +1,550 @@ +#include "KQLAggregationFunctions.h" + +#include + +#include + +namespace DB +{ + +bool ArgMax::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "argMax"); +} + +bool ArgMin::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "argMin"); +} + +bool Avg::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "avg"); +} + +bool AvgIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "avgIf"); +} + +bool BinaryAllAnd::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "groupBitAnd"); +} + +bool BinaryAllOr::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "groupBitOr"); +} + +bool BinaryAllXor::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "groupBitXor"); +} + +bool BuildSchema::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool Count::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "count"); +} + +bool CountIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "countIf"); +} + +bool DCount::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + String value = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto accuracy = getConvertedArgument(fn_name, pos); + out = "count(DISTINCT " + value + " , " + accuracy + ")"; + } + else + out = "count(DISTINCT " + value + ")"; + return true; +} + +bool DCountIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + String value = getConvertedArgument(fn_name, pos); + ++pos; + String condition = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto accuracy = getConvertedArgument(fn_name, pos); + out = "count(DISTINCT " + value + " , " + condition + " , " + accuracy + ")"; + } + else + out = "countIf(DISTINCT " + value + " , " + condition + ")"; + return true; +} + +bool MakeBag::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool MakeBagIf::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool MakeList::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name, pos); + out = "groupArrayIf(" + max_size + ")(" + expr + " , " + expr + " IS NOT NULL)"; + } + else + out = "groupArrayIf(" + expr + " , " + expr + " IS NOT NULL)"; + return true; +} + +bool MakeListIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + ++pos; + const auto predicate = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name, pos); + out = "groupArrayIf(" + max_size + ")(" + expr + " , " + predicate + " )"; + } + else + out = "groupArrayIf(" + expr + " , " + predicate + " )"; + return true; +} + +bool MakeListWithNulls::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto column_name = getConvertedArgument(fn_name, pos); + out = "arrayConcat(groupArray(" + column_name + "), arrayMap(x -> null, range(0, toUInt32(count(*)-length( groupArray(" + column_name + + "))),1)))"; + return true; +} + +bool MakeSet::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name, pos); + out = "groupUniqArray(" + max_size + ")(" + expr + ")"; + } + else + out = "groupUniqArray(" + expr + ")"; + return true; +} + +bool MakeSetIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + ++pos; + const auto predicate = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name, pos); + out = "groupUniqArrayIf(" + max_size + ")(" + expr + " , " + predicate + " )"; + } + else + out = "groupUniqArrayIf(" + expr + " , " + predicate + " )"; + return true; +} + +bool Max::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "max"); +} + +bool MaxIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "maxIf"); +} + +bool Min::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "min"); +} + +bool MinIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "minIf"); +} + +bool Percentile::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name, pos); + trim(column_name); + + ++pos; + String value = getConvertedArgument(fn_name, pos); + trim(value); + + out = "quantile(" + value + "/100)(" + column_name + ")"; + return true; +} + +bool Percentilew::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name, pos); + trim(bucket_column); + + ++pos; + String frequency_column = getConvertedArgument(fn_name, pos); + trim(frequency_column); + + ++pos; + String value = getConvertedArgument(fn_name, pos); + trim(value); + + out = "quantileExactWeighted(" + value + "/100)(" + bucket_column + "," + frequency_column + ")"; + return true; +} + +bool Percentiles::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name, pos); + trim(column_name); + String expr = "quantiles("; + String value; + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::Comma) + { + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + expr += ", "; + } + else + ++pos; + } + out = expr + ")(" + column_name + ")"; + return true; +} + +bool PercentilesArray::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name, pos); + trim(column_name); + String expr = "quantiles("; + String value; + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" && pos->type != TokenType::OpeningRoundBracket + && pos->type != TokenType::OpeningSquareBracket && pos->type != TokenType::ClosingSquareBracket) + { + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + + if (pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket) + expr += ", "; + ++pos; + } + else + { + ++pos; + } + } + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + --pos; + + expr.pop_back(); + expr.pop_back(); + expr = expr + ")(" + column_name + ")"; + out = expr; + return true; +} + +bool Percentilesw::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name, pos); + trim(bucket_column); + + ++pos; + String frequency_column = getConvertedArgument(fn_name, pos); + trim(frequency_column); + + String expr = "quantilesExactWeighted("; + String value; + + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::Comma) + { + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + expr += ", "; + } + else + ++pos; + } + expr = expr + ")(" + bucket_column + "," + frequency_column + ")"; + out = expr; + return true; +} + +bool PercentileswArray::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name, pos); + trim(bucket_column); + + ++pos; + String frequency_column = getConvertedArgument(fn_name, pos); + trim(frequency_column); + + String expr = "quantilesExactWeighted("; + String value; + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" && pos->type != TokenType::OpeningRoundBracket + && pos->type != TokenType::OpeningSquareBracket && pos->type != TokenType::ClosingSquareBracket) + { + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + + if (pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket) + expr += ", "; + ++pos; + } + else + { + ++pos; + } + } + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + --pos; + + expr.pop_back(); + expr.pop_back(); + expr = expr + ")(" + bucket_column + "," + frequency_column + ")"; + out = expr; + return true; +} + +bool Stdev::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + out = "sqrt(varSamp(" + expr + "))"; + return true; +} + +bool StdevIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const auto predicate = getConvertedArgument(fn_name, pos); + out = "sqrt(varSampIf(" + expr + ", " + predicate + "))"; + return true; +} + +bool Sum::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "sum"); +} + +bool SumIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "sumIf"); +} + +bool TakeAny::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + String expr; + String arg; + const auto begin = pos; + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos != begin) + expr.append(", "); + ++pos; + arg = getConvertedArgument(fn_name, pos); + expr = expr + "any(" + arg + ")"; + } + out = expr; + return true; +} + +bool TakeAnyIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const auto predicate = getConvertedArgument(fn_name, pos); + out = "anyIf(" + expr + ", " + predicate + ")"; + return true; +} + +bool Variance::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "varSamp"); +} + +bool VarianceIf::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const String expr = getArgument(fn_name, pos); + const String predicate = getArgument(fn_name, pos); + out = std::format("varSampIf({}, {})", expr, predicate); + + return true; +} + +bool VarianceP::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "varPop"); +} + +bool CountDistinct::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const String expr = getArgument(fn_name, pos); + out = std::format("count(DISTINCT {})", expr); + + return true; +} + + +bool CountDistinctIf::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const String expr = getArgument(fn_name, pos); + const String predicate = getArgument(fn_name, pos); + out = std::format("countIf(DISTINCT {}, {})", expr, predicate); + + return true; +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h new file mode 100644 index 000000000000..74185812c7e2 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h @@ -0,0 +1,288 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class ArgMax : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "arg_max()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArgMin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "arg_min()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Avg : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "avg()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class AvgIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "avgif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllAnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_and()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllOr : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_or()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllXor : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_xor()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BuildSchema : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "buildschema()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Count : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "count()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class CountIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "countif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DCount : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dcount()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DCountIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dcountif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeBag : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_bag()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeBagIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_bag_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeList : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeListIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeListWithNulls : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list_with_nulls()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeSet : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_set()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeSetIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_set_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Max : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "max()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MaxIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "maxif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Min : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "min()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MinIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "minif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentile : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentile()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentilew : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilew()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentiles : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentiles()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PercentilesArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentiles_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentilesw : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilesw()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PercentileswArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilesw_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Stdev : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "stdev()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StdevIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "stdevif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Sum : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sum()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SumIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sumif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TakeAny : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "take_any()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TakeAnyIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "take_anyif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Variance : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "variance()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class VarianceIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "varianceif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class VarianceP : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "variancep()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class CountDistinct : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "count_distinct()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class CountDistinctIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "count_distinctif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp new file mode 100644 index 000000000000..5779f0ed3708 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp @@ -0,0 +1,84 @@ +#include "KQLBinaryFunctions.h" + +#include + +namespace DB +{ + +bool BinaryAnd::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitAnd(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; +} + +bool BinaryNot::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + out = std::format("bitNot(cast({0}, 'Int64'))", value); + return true; +} + +bool BinaryOr::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitOr(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; +} + +bool BinaryShiftLeft::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + out = std::format("if({1} < 0, null, bitShiftLeft(cast({0}, 'Int64'), {1}))", value, count); + return true; +} + +bool BinaryShiftRight::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + out = std::format("if({1} < 0, null, bitShiftRight(cast({0}, 'Int64'), {1}))", value, count); + return true; +} + +bool BinaryXor::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitXor(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; +} + +bool BitsetCountOnes::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "bitCount"); +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h new file mode 100644 index 000000000000..591c0fd236e6 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h @@ -0,0 +1,57 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class BinaryAnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_and()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryNot : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_not()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryOr : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_or()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryShiftLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_shift_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryShiftRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_shift_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryXor : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_xor()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BitsetCountOnes : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bitset_count_ones()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp new file mode 100644 index 000000000000..235fbcfde611 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -0,0 +1,98 @@ +#include "KQLCastingFunctions.h" + +#include + +namespace DB +{ +bool ToBool::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format( + "multiIf(toString({0}) = 'true', true, " + "toString({0}) = 'false', false, toInt64OrNull(toString({0})) != 0)", + param); + return true; +} + +bool ToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_todatetime"); +} + +bool ToDouble::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format("toFloat64OrNull(toString({0})) / if(toTypeName({0}) = 'IntervalNanosecond', 100, 1)", param); + return true; +} + +bool ToInt::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format("toInt32OrNull(toString({0})) / if(toTypeName({0}) = 'IntervalNanosecond', 100, 1)", param); + return true; +} + +bool ToLong::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format("toInt64OrNull(toString({0})) / if(toTypeName({0}) = 'IntervalNanosecond', 100, 1)", param); + return true; +} + +bool ToString::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format("ifNull(kql_tostring({0}), '')", argument); + return true; +} + +bool ToTimeSpan::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_totimespan"); +} + +bool ToDecimal::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral || pos->type == TokenType::Number) + { + --pos; + const auto arg = getArgument(fn_name, pos); + const auto scale = std::format("if (position({0}::String,'e') = 0,(countSubstrings({0}::String,'.') = 1 ? length(substr({0}::String, position({0}::String,'.') + 1)): 0), toUInt64(multiIf ((position({0}::String,'e+') as x) > 0, substr({0}::String, x + 2),(position({0}::String, 'e-') as y) > 0, substr({0}::String, y + 2), position({0}::String, 'e-') = 0 AND position({0}::String, 'e+') =0 AND position({0}::String, 'e') > 0,substr({0}::String, position({0}::String, 'e') + 1), 0::String)))",arg); + out = std::format("toTypeName({0}) = 'String' OR toTypeName({0}) = 'FixedString' ? toDecimal128OrNull({0}::String , ({1}::UInt8)) : toDecimal128OrNull({0}::String , ({1}::UInt8))", arg, scale); + } + else + { + --pos; + const auto arg = getArgument(fn_name, pos); + out = std::format("toDecimal128OrNull({0}::Nullable(String), 17) / if(toTypeName({0}) = 'IntervalNanosecond', 100, 1)", arg); + } + + return true; +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h new file mode 100644 index 000000000000..72d5602dfcb3 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h @@ -0,0 +1,62 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class ToBool : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tobool()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todatetime()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToDouble : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todouble()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToInt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "toint()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToLong : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tolong()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tostring()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "totimespan()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToDecimal : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todecimal()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp new file mode 100644 index 000000000000..d044b3db7497 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -0,0 +1,216 @@ +#include "KQLDataTypeFunctions.h" + +#include +#include + +#include +#include + +#include +#include +#include + +namespace DB::ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int SYNTAX_ERROR; +} + +namespace +{ +bool mapToAccurateCast(std::string & out, DB::IParser::Pos & pos, const std::string_view type_name) +{ + const auto function_name = DB::IParserKQLFunction::getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + ++pos; + if (const auto & type = pos->type; type == DB::TokenType::QuotedIdentifier || type == DB::TokenType::StringLiteral) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "String cannot be parsed as a literal of type {}", type_name); + + --pos; + + const auto arg = DB::IParserKQLFunction::getArgument(function_name, pos); + out = std::format( + "if(toTypeName({0}) = 'IntervalNanosecond' or isNull(accurateCastOrNull({0}, '{1}')) != isNull({0}), " + "accurateCastOrNull(throwIf(true, 'Failed to parse {1} literal'), '{1}'), accurateCastOrNull({0}, '{1}'))", + arg, + type_name); + + return true; +} +} + +namespace DB +{ +bool DatatypeBool::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToAccurateCast(out, pos, "Bool"); +} + +bool DatatypeDatetime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto argument = extractLiteralArgumentWithoutQuotes(fn_name, pos); + const auto mutated_argument = std::invoke( + [&argument] + { + if (Int64 value; (boost::conversion::try_lexical_convert(argument, value) && (value < 1900 || value > 2261)) + || Poco::toLower(argument) == "null") + return argument; + + return "'" + argument + "'"; + }); + + out = std::format("kql_datetime({})", mutated_argument); + return true; +} + +bool DatatypeDynamic::convertImpl(String & out, IParser::Pos & pos) +{ + static const std::unordered_set ALLOWED_FUNCTIONS{"date", "datetime", "dynamic", "time", "timespan"}; + + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + ++pos; + if (pos->type == TokenType::OpeningCurlyBrace) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Property bags are not supported for now in {}", function_name); + + while (!pos->isEnd() && pos->type != TokenType::ClosingRoundBracket) + { + if (const auto token_type = pos->type; token_type == TokenType::BareWord || token_type == TokenType::Number + || token_type == TokenType::QuotedIdentifier || token_type == TokenType::StringLiteral) + { + if (const std::string_view token(pos->begin, pos->end); token_type == TokenType::BareWord && !ALLOWED_FUNCTIONS.contains(token)) + { + ++pos; + if (pos->type != TokenType::ClosingRoundBracket && pos->type != TokenType::ClosingSquareBracket + && pos->type != TokenType::Comma) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Expression {} is not supported inside {}", token, function_name); + + --pos; + } + + out.append(getConvertedArgument(function_name, pos)); + } + else + { + out.append(pos->begin, pos->end); + ++pos; + } + } + + return true; +} + +bool DatatypeGuid::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String guid_str; + + ++pos; + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + guid_str = String(pos->begin + 1, pos->end - 1); + else + { + auto start = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + ++pos; + if (pos->type == TokenType::ClosingRoundBracket) + break; + } + --pos; + guid_str = String(start->begin, pos->end); + } + out = std::format("toUUIDOrNull('{}')", guid_str); + ++pos; + return true; +} + +bool DatatypeInt::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToAccurateCast(out, pos, "Int32"); +} + +bool DatatypeLong::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToAccurateCast(out, pos, "Int64"); +} + +bool DatatypeReal::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToAccurateCast(out, pos, "Float64"); +} + +bool DatatypeTimespan::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto argument = extractLiteralArgumentWithoutQuotes(fn_name, pos); + const auto ticks = ParserKQLTimespan::parse(argument); + out = kqlTicksToInterval(ticks); + + return true; +} + +bool DatatypeDecimal::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String arg; + int scale = 0; + int precision = 34; + + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse String as decimal Literal: {}", fn_name); + + --pos; + arg = getArgument(fn_name, pos); + + //NULL expr returns NULL not exception + static const std::regex expr{"^[0-9]+e[+-]?[0-9]+"}; + bool is_string = std::any_of(arg.begin(), arg.end(), ::isalpha) && Poco::toUpper(arg) != "NULL" && !(std::regex_match(arg, expr)); + if (is_string) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse String as decimal Literal: {}", fn_name); + + if (std::regex_match(arg, expr)) + { + auto exponential_pos = arg.find('e'); + if (arg[exponential_pos + 1] == '+' || arg[exponential_pos + 1] == '-') + scale = std::stoi(arg.substr(exponential_pos + 2, arg.length())); + else + scale = std::stoi(arg.substr(exponential_pos + 1, arg.length())); + + out = std::format("toDecimal128({}::String,{})", arg, scale); + return true; + } + + if (const auto dot_pos = arg.find('.'); dot_pos != String::npos) + { + const auto length = static_cast(std::ssize(arg.substr(0, dot_pos - 1))); + scale = std::max(precision - length, 0); + } + if (is_string) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse String as decimal Literal: {}", fn_name); + + if (scale < 0 || Poco::toUpper(arg) == "NULL") + out = "NULL"; + else + out = std::format("toDecimal128({}::String,{})", arg, scale); + + return true; +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h new file mode 100644 index 000000000000..f2a5013ed1f8 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h @@ -0,0 +1,69 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class DatatypeBool : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bool(),boolean()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDatetime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime(),date()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "guid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeInt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "int()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeLong : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "long()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeReal : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "real(),double()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeTimespan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "timespan(), time()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDecimal : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "decimal()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp new file mode 100644 index 000000000000..2ad501693a98 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -0,0 +1,528 @@ +#include "KQLDateTimeFunctions.h" + +#include + +#include + +#include +#include +#include +#include + +namespace +{ + +bool mapToEndOfPeriod(std::string & out, DB::IParser::Pos & pos, const std::string_view period) +{ + const auto function_name = DB::IParserKQLFunction::getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto datetime = DB::IParserKQLFunction::getArgument(function_name, pos, DB::IParserKQLFunction::ArgumentState::Raw); + const auto offset = DB::IParserKQLFunction::getOptionalArgument(function_name, pos, DB::IParserKQLFunction::ArgumentState::Raw); + out = std::format( + "minus({}, {})", + DB::IParserKQLFunction::kqlCallToExpression( + std::format("startof{}", Poco::toLower(std::string(period))), + {datetime, std::format("{} + 1", offset.value_or("0"))}, + pos.max_depth), + DB::IParserKQLFunction::kqlCallToExpression("timespan", {"1tick"}, pos.max_depth)); + return true; +} + +bool mapToStartOfPeriod(std::string & out, DB::IParser::Pos & pos, const std::string_view period) +{ + const auto function_name = DB::IParserKQLFunction::getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto datetime = DB::IParserKQLFunction::getArgument(function_name, pos); + const auto offset = DB::IParserKQLFunction::getOptionalArgument(function_name, pos); + out = std::format("kql_todatetime(add{0}s(toStartOf{0}({1}), {2}))", period, datetime, offset.value_or("0")); + return true; +} +} + +namespace DB::ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int SYNTAX_ERROR; +extern const int LOGICAL_ERROR; +} + +namespace DB +{ +bool Ago::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto offset = getOptionalArgument(function_name, pos, ArgumentState::Raw); + out = kqlCallToExpression( + "now", {std::format("-1 * {}", offset.value_or(kqlCallToExpression("timespan", {"0"}, pos.max_depth)))}, pos.max_depth); + return true; +} + +bool DatetimeAdd::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto period = getArgument(fn_name, pos); + //remove quotes from period. + trim(period); + if (period.front() == '\"' || period.front() == '\'') + { + //period.remove + period.erase( 0, 1); // erase the first quote + period.erase( period.size() - 1); // erase the last quote + } + + const auto offset = getArgument(fn_name, pos); + const auto datetime = getArgument(fn_name, pos); + + out = std::format("date_add({}, {}, {})",period,offset,datetime); + + return true; + +}; + +bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String part = Poco::toUpper(getConvertedArgument(fn_name, pos)); + trim(part); + if (part.front() == '\"' || part.front() == '\'') + { + //period.remove + part.erase( 0, 1); // erase the first quote + part.erase( part.size() - 1); // erase the last quote + } + String date; + if (pos->type == TokenType::Comma) + { + ++pos; + date = getConvertedArgument(fn_name, pos); + } + String format; + + if (part == "YEAR") + format = "%G"; + else if (part == "QUARTER") + format = "%Q"; + else if (part == "MONTH") + format = "%m"; + else if (part == "WEEK_OF_YEAR") + format = "%V"; + else if (part == "DAY") + format = "%e"; + else if (part == "DAYOFYEAR") + format = "%j"; + else if (part == "HOUR") + format = "%I"; + else if (part == "MINUTE") + format = "%M"; + else if (part == "SECOND") + format = "%S"; + else + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unexpected argument {} for {}", part, fn_name); + + out = std::format("formatDateTime({}, '{}')", date, format); + return true; +} + +bool DatetimeDiff::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto period = getArgument(fn_name, pos); + const auto datetime_lhs = getArgument(fn_name, pos); + const auto datetime_rhs = getArgument(fn_name, pos); + out = std::format("dateDiff({}, {}, {})", period, datetime_rhs, datetime_lhs); + + return true; +} + +bool DayOfMonth::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toDayOfMonth"); +} + +bool DayOfWeek::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto datetime = getArgument(fn_name, pos); + out = std::format("(toDayOfWeek({}) % 7) * {}", datetime, kqlCallToExpression("timespan", {"1d"}, pos.max_depth)); + + return true; +} + +bool DayOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toDayOfYear"); +} + +bool EndOfMonth::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToEndOfPeriod(out, pos, "Month"); +} + +bool EndOfDay::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToEndOfPeriod(out, pos, "Day"); +} + +bool EndOfWeek::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToEndOfPeriod(out, pos, "Week"); +} + +bool EndOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToEndOfPeriod(out, pos, "Year"); +} + +bool FormatDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String formatspecifier; + ++pos; + const auto datetime = getConvertedArgument(fn_name, pos); + ++pos; + auto format = getConvertedArgument(fn_name, pos); + trim(format); + + //remove quotes and end space from format argument. + if (format.front() == '\"' || format.front() == '\'') + { + format.erase( 0, 1); // erase the first quote + format.erase( format.size() - 1); // erase the last quote + } + + std::vector res; + getTokens(format, res); + std::string::size_type i = 0; + size_t decimal =0; + while (i < format.size()) + { + char c = format[i]; + if (!isalpha(c)) + { + //delimiter + if (c == ' ' || c == '-' || c == '_' || c == '[' || c == ']' || c == '/' || c == ',' || c == '.' || c == ':') + formatspecifier = formatspecifier + c; + else + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid format delimiter in function: {}", fn_name); + ++i; + } + else + { + //format specifier + String arg = res.back(); + + if (arg == "y" || arg == "yy") + formatspecifier = formatspecifier + "%y"; + else if (arg == "yyyy") + formatspecifier = formatspecifier + "%Y"; + else if (arg == "M" || arg == "MM") + formatspecifier = formatspecifier + "%m"; + else if (arg == "s" || arg == "ss") + formatspecifier = formatspecifier + "%S"; + else if (arg == "m" || arg == "mm") + formatspecifier = formatspecifier + "%M"; + else if (arg == "h" || arg == "hh") + formatspecifier = formatspecifier + "%I"; + else if (arg == "H" || arg == "HH") + formatspecifier = formatspecifier + "%H"; + else if (arg == "d") + formatspecifier = formatspecifier + "%e"; + else if (arg == "dd") + formatspecifier = formatspecifier + "%d"; + else if (arg == "tt") + formatspecifier = formatspecifier + "%p"; + else if (arg.starts_with('f') || arg.starts_with('F')) + decimal = arg.size(); + else + throw Exception(ErrorCodes::SYNTAX_ERROR, "Format specifier {} in function: {} is not supported", arg, fn_name); + res.pop_back(); + i = i + arg.size(); + } + } + if (decimal > 0 && formatspecifier.find('.') != String::npos) + { + + out = std::format("concat(" + "substring(toString(formatDateTime({0}, '{1}')), 1, position(toString(formatDateTime({0}, '{1}')), '.')) ," + "substring(substring(toString({0}), position(toString({0}),'.')+1),1,{2})," + "substring(toString(formatDateTime({0}, '{1}')), position(toString(formatDateTime({0}, '{1}')), '.') + 1, length(toString(formatDateTime({0}, '{1}')))))", datetime, formatspecifier, decimal); + } + else + out = std::format("formatDateTime({0}, '{1}')", datetime, formatspecifier); + + return true; +} + +bool FormatTimeSpan::convertImpl(String & out, IParser::Pos & pos) +{ + static const std::unordered_set ALLOWED_DELIMITERS{' ', '/', '-', ':', ',', '.', '_', '[', ']'}; + static const std::unordered_map, bool, int, std::optional>> + ATTRIBUTES_BY_FORMAT_CHARACTER{ + {'d', {"1d", std::nullopt, false, 8, "leftPad"}}, + {'f', {"1tick", 10'000'000, true, 7, "rightPad"}}, + {'F', {"1tick", 10'000'000, true, 7, std::nullopt}}, + {'h', {"1h", 24, false, 2, "leftPad"}}, + {'H', {"1h", 24, false, 2, "leftPad"}}, + {'m', {"1m", 60, false, 2, "leftPad"}}, + {'s', {"1s", 60, false, 2, "leftPad"}}}; + + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto timespan = getArgument(fn_name, pos); + const auto format = getArgument(fn_name, pos); + if (std::ssize(format) < 3 || format.front() != format.back() || format.front() != '\'') + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected non-empty string literal as the second argument to {}", fn_name); + + std::string current_streak; + std::string delimited_parts; + const auto convert_streak = [¤t_streak, ×pan, &delimited_parts, &pos] + { + while (!current_streak.empty()) + { + if (!delimited_parts.empty()) + delimited_parts.append(", "); + + const auto attributes_it = ATTRIBUTES_BY_FORMAT_CHARACTER.find(current_streak.front()); + if (attributes_it == ATTRIBUTES_BY_FORMAT_CHARACTER.cend()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format character: {}", current_streak.front()); + + const auto & [timespan_unit, modulus, should_truncate, max_length, pad_function] = attributes_it->second; + const auto streak_length = std::ssize(current_streak); + const auto part_length = std::min(streak_length, static_cast(max_length)); + current_streak.erase(current_streak.cbegin(), current_streak.cbegin() + part_length); + + auto expression = std::format("intDiv({}, {})", timespan, kqlCallToExpression("timespan", {timespan_unit}, pos.max_depth)); + expression = std::format("toString({})", modulus ? std::format("modulo({}, {})", expression, *modulus) : expression); + if (should_truncate) + expression = std::format("substring({}, 1, {})", expression, part_length); + + delimited_parts.append( + pad_function ? std::format("if(length({1}) < {2}, {0}({1}, {2}, '0'), {1})", *pad_function, expression, part_length) + : expression); + } + }; + + for (const auto & c : std::string_view(format.cbegin() + 1, format.cend() - 1)) + { + if (ALLOWED_DELIMITERS.contains(c)) + { + convert_streak(); + delimited_parts.append(std::format(", '{}'", c)); + } + else if (ATTRIBUTES_BY_FORMAT_CHARACTER.contains(c)) + { + if (!current_streak.empty() && current_streak.back() != c) + convert_streak(); + + current_streak.push_back(c); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected character '{}' in format string of {}", c, fn_name); + } + + convert_streak(); + out = "concat(" + delimited_parts + ", '')"; + return true; +} + +bool GetMonth::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toMonth"); +} + +bool GetYear::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toYear"); +} + +bool HoursOfDay::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toHour"); +} + +bool MakeTimeSpan::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto arg1 = getArgument(fn_name, pos); + const auto arg2 = getArgument(fn_name, pos); + const auto arg3 = getOptionalArgument(fn_name, pos); + const auto arg4 = getOptionalArgument(fn_name, pos); + + const auto & [day, hour, minute, second] + = std::invoke([&arg1, &arg2, &arg3, &arg4] + { return arg4 ? std::make_tuple(arg1, arg2, *arg3, *arg4) : std::make_tuple("0", arg1, arg2, arg3.value_or("0")); }); + + out = std::format( + "{} * {} + {} * {} + {} * {} + {} * {}", + day, + kqlCallToExpression("timespan", {"1d"}, pos.max_depth), + hour, + kqlCallToExpression("timespan", {"1h"}, pos.max_depth), + minute, + kqlCallToExpression("timespan", {"1m"}, pos.max_depth), + second, + kqlCallToExpression("timespan", {"1s"}, pos.max_depth)); + + return true; +} + +bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto year = getArgument(fn_name, pos); + const auto month = getArgument(fn_name, pos); + const auto day = getArgument(fn_name, pos); + const auto hour = getOptionalArgument(fn_name, pos); + const auto minute = getOptionalArgument(fn_name, pos); + const auto second = getOptionalArgument(fn_name, pos); + out = std::format( + "if({0} between 1900 and 2261 and {1} between 1 and 12 and {3} between 0 and 59 and {4} between 0 and 59 and {5} >= 0 and {5} < 60 " + " and isNotNull(toModifiedJulianDayOrNull(concat(leftPad(toString({0}), 4, '0'), '-', leftPad(toString({1}), 2, '0'), '-', leftPad(toString({2}), 2, '0')))), " + "toDateTime64OrNull(toString(makeDateTime64({0}, {1}, {2}, {3}, {4}, truncate({5}), ({5} - truncate({5})) * 1e7, 7, 'UTC')), 9), null)", + year, + month, + day, + hour.value_or("0"), + minute.value_or("0"), + second.value_or("0")); + + return true; +} + +bool Now::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto offset = getOptionalArgument(fn_name, pos); + out = "now64(9, 'UTC')" + (offset ? " + " + *offset : ""); + + return true; +} + +bool StartOfDay::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToStartOfPeriod(out, pos, "Day"); +} + +bool StartOfMonth::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToStartOfPeriod(out, pos, "Month"); +} + +bool StartOfWeek::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToStartOfPeriod(out, pos, "Week"); +} + +bool StartOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToStartOfPeriod(out, pos, "Year"); +} + +bool UnixTimeMicrosecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto value = getArgument(fn_name, pos); + out = std::format("kql_todatetime(fromUnixTimestamp64Micro({}, 'UTC'))", value); + + return true; +} + +bool UnixTimeMillisecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto value = getArgument(fn_name, pos); + out = std::format("kql_todatetime(fromUnixTimestamp64Milli({}, 'UTC'))", value); + + return true; +} + +bool UnixTimeNanosecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto value = getArgument(fn_name, pos); + out = std::format("kql_todatetime(fromUnixTimestamp64Nano({}, 'UTC'))", value); + + return true; +} + +bool UnixTimeSecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} accepts only long, int and double type of arguments", fn_name); + + const auto expression = getConvertedArgument(fn_name, pos); + out = std::format( + "if(toTypeName(assumeNotNull({0})) in ['Int32', 'Int64', 'Float64', 'UInt32', 'UInt64'], " + "kql_todatetime({0}), kql_todatetime(throwIf(true, '{1} only accepts int, long and double type of arguments')))", + expression, + fn_name); + + return true; +} + +bool WeekOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + const String time_str = getConvertedArgument(fn_name, pos); + out = std::format("toWeek({},3,'UTC')", time_str); + return true; +} + +bool MonthOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toMonth"); +} + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h new file mode 100644 index 000000000000..35b99795faaa --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -0,0 +1,235 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Ago : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ago()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimeAdd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_add()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimePart : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_part()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimeDiff : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_diff()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class GetMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "getmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class GetYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "getyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class HoursOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "hourofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Now : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "now()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeMicrosecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_microseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeMillisecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_milliseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeNanosecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_nanoseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeSecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_seconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class WeekOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "week_of_year()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MonthOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "monthofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +void inline getTokens(String format, std::vector & res) +{ + String str = format; + String token; + auto pos = str.find_first_not_of("abcdefghijklmnopqrstuvwxyzQWERTYUIOPASDFGHJKLZXCVBNM"); + while (pos != String::npos) + { + if (pos != 0) + { + // Found a token + token = str.substr(0, pos); + res.insert(res.begin(), token); + } + str.erase(0, pos+1); // Always remove pos+1 to get rid of delimiter + pos = str.find_first_not_of("abcdefghijklmnopqrstuvwxyzQWERTYUIOPASDFGHJKLZXCVBNM"); + } + // Cover the last (or only) token + if (str.length() > 0) + { + token = str; + res.insert(res.begin(), token); + } +} + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp new file mode 100644 index 000000000000..1e84355c2f9e --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -0,0 +1,357 @@ +#include "KQLDynamicFunctions.h" + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ArrayConcat::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "arrayConcat"); +} + +bool ArrayIif::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_ArrayIif"); +} + +bool ArrayIndexOf::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto array = getArgument(fn_name, pos); + const auto needle = getArgument(fn_name, pos); + out = "minus(indexOf(" + array + ", " + needle + "), 1)"; + + return true; +} + +bool ArrayLength::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + out = std::format("arrayLastIndex(x -> true, {0})", array); + + return true; +} + +bool ArrayReverse::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "arrayReverse"); +} + +bool ArrayRotateLeft::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + out = std::format( + "arrayMap(x -> {0}[moduloOrZero(x + length({0}) + moduloOrZero({1}, toInt64(length({0}))), length({0})) + 1], " + "range(0, length({0})))", + array, + count); + + return true; +} + +bool ArrayRotateRight::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos, ArgumentState::Raw); + const auto count = getArgument(function_name, pos, ArgumentState::Raw); + out = kqlCallToExpression("array_rotate_left", {array, "-1 * " + count}, pos.max_depth); + + return true; +} + +bool ArrayShiftLeft::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + const auto fill = getOptionalArgument(function_name, pos); + out = std::format( + "arrayResize(if({1} > 0, arraySlice({0}, {1} + 1), arrayConcat(arrayWithConstant(abs({1}), fill_value_{3}), {0})), " + "length({0}), if(isNull({2}) and (extract(toTypeName({0}), 'Array\\((.*)\\)') as element_type_{3}) = 'String', " + "defaultValueOfTypeName(if(element_type_{3} = 'Nothing', 'Nullable(Nothing)', element_type_{3})), {2}) as fill_value_{3})", + array, + count, + fill.value_or("null"), + generateUniqueIdentifier()); + + return true; +} + +bool ArrayShiftRight::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos, ArgumentState::Raw); + const auto count = getArgument(function_name, pos, ArgumentState::Raw); + const auto fill = getOptionalArgument(function_name, pos, ArgumentState::Raw); + + const auto negated_count = "-1 * " + count; + out = kqlCallToExpression( + "array_shift_left", + fill ? std::initializer_list{array, negated_count, *fill} + : std::initializer_list{array, negated_count}, + pos.max_depth); + + return true; +} + +bool ArraySlice::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + const auto start = getArgument(function_name, pos); + const auto end = getArgument(function_name, pos); + + out = std::format( + "arraySlice({0}, plus(1, if({1} >= 0, {1}, arrayMax([-length({0}), {1}]) + length({0}))) as offset_{3}, " + " plus(1, if({2} >= 0, {2}, arrayMax([-length({0}), {2}]) + length({0}))) - offset_{3} + 1)", + array, + start, + end, + generateUniqueIdentifier()); + + return true; +} + +bool ArraySortAsc::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_array_sort_asc"); +} + +bool ArraySortDesc::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_array_sort_desc"); +} + +bool ArraySplit::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + const auto indices = getArgument(function_name, pos); + + out = std::format( + "if(empty(arrayMap(x -> if(x >= 0, x, arrayMax([0, x + length({0})::Int64])), flatten([{1}])) as indices_{2}), [{0}], " + "arrayConcat([arraySlice({0}, 1, indices_{2}[1])], arrayMap(i -> arraySlice({0}, indices_{2}[i] + 1, " + "if(i = length(indices_{2}), length({0})::Int64, indices_{2}[i + 1]::Int64) - indices_{2}[i]), " + "range(1, length(indices_{2}) + 1))))", + array, + indices, + generateUniqueIdentifier()); + + return true; +} + +bool ArraySum::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format( + "if(multiSearchAny(extract(toTypeName(arrayMap(x -> assumeNotNull(x), arrayFilter(x -> isNotNull(x), {0}))), " + "'Array\\((.*)\\)'), ['Bool', 'Decimal', 'Float', 'Int', 'Nothing', 'UInt']), " + "arraySum(x -> toFloat64OrDefault(x), {0}), null)", + argument, + generateUniqueIdentifier()); + + return true; +} + +bool BagKeys::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool BagMerge::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool BagRemoveKeys::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool JaccardIndex::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); + out = std::format( + "divide(length({0}), length({1}))", + kqlCallToExpression("set_intersect", {lhs, rhs}, pos.max_depth), + kqlCallToExpression("set_union", {lhs, rhs}, pos.max_depth)); + + return true; +} + +bool Pack::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool PackAll::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool PackArray::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "array", {1, Interval::max_bound}); +} + +bool Repeat::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + String value = getArgument(function_name, pos); + String count = getArgument(function_name, pos); + + value.erase(remove(value.begin(), value.end(), ' '), value.end()); + count.erase(remove(count.begin(), count.end(), ' '), count.end()); + + if (count.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "number of arguments do not match in function: {}", function_name); + else + out = "if(" + count + " < 0, [NULL], " + std::format("arrayWithConstant(abs({1}), {0}))", value, count); + + return true; +} + +bool SetDifference::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = std::invoke( + [&function_name, &pos] + { + std::vector arrays{getArgument(function_name, pos, ArgumentState::Raw)}; + while (auto next_array = getOptionalArgument(function_name, pos, ArgumentState::Raw)) + arrays.push_back(*next_array); + + return kqlCallToExpression("set_union", std::vector(arrays.cbegin(), arrays.cend()), pos.max_depth); + }); + + out = std::format("arrayFilter(x -> not has({1}, x), arrayDistinct({0}))", lhs, rhs); + + return true; +} + +bool SetHasElement::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "has"); +} + +bool SetIntersect::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "arrayIntersect"); +} + +bool SetUnion::convertImpl(String & out, IParser::Pos & pos) +{ + if (!directMapping(out, pos, "arrayConcat")) + return false; + + out = std::format("arrayDistinct({0})", out); + + return true; +} + +bool TreePath::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); + out = res; + return false; +} + +bool Zip::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto arguments = getArguments(function_name, pos, ArgumentState::Parsed, {2, 16}); + const auto unique_identifier = generateUniqueIdentifier(); + const auto resized_arguments = std::invoke( + [&arguments, &unique_identifier] + { + String lengths; + for (int i = 0; i < std::ssize(arguments); ++i) + { + lengths.append(i > 0 ? ", " : ""); + lengths.append(std::format( + "length(if(match(toTypeName({0}), 'Array\\(Nullable\\(.*\\)\\)'), {0}, " + "cast({0}, concat('Array(', extract(toTypeName(if(length({0}) = 0, [NULL], {0})), 'Array\\((.*)\\)'), ')'))) as " + "arg{1}_{2})", + arguments[i], + i, + unique_identifier)); + } + + auto result = std::format("arrayResize(arg0_{1}, arrayMax([{0}]) as max_length_{1}, null)", lengths, unique_identifier); + for (int i = 1; i < std::ssize(arguments); ++i) + result.append(std::format(", arrayResize(arg{0}_{1}, max_length_{1}, null)", i, unique_identifier)); + + return result; + }); + + out = std::format("arrayMap(t -> [untuple(t)], arrayZip({0}))", resized_arguments); + + return true; +} + +bool Range::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_range"); +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h new file mode 100644 index 000000000000..553ed57332c8 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h @@ -0,0 +1,210 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class ArrayConcat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_concat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayIif : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_iif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayIndexOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_index_of()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayLength : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_length()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayReverse : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_reverse()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayRotateLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_rotate_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayRotateRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_rotate_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayShiftLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_shift_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayShiftRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_shift_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySlice : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_slice()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySortAsc : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sort_asc()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySortDesc : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sort_desc()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySplit : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_split()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySum : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sum()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagKeys : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_keys()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagMerge : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_merge()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagRemoveKeys : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_remove_keys()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class JaccardIndex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "jaccard_index()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Pack : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PackAll : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack_all()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PackArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Repeat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "repeat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetDifference : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_difference()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetHasElement : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_has_element()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetIntersect : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_intersect()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetUnion : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_union()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TreePath : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "treepath()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Zip : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "zip()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Range : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "range()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp new file mode 100644 index 000000000000..72b45d2214e9 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -0,0 +1,1234 @@ +#include "KQLFunctionFactory.h" +#include "KQLAggregationFunctions.h" +#include "KQLBinaryFunctions.h" +#include "KQLCastingFunctions.h" +#include "KQLDataTypeFunctions.h" +#include "KQLDateTimeFunctions.h" +#include "KQLDynamicFunctions.h" +#include "KQLGeneralFunctions.h" +#include "KQLIPFunctions.h" +#include "KQLMathematicalFunctions.h" +#include "KQLStringFunctions.h" +#include "KQLTimeSeriesFunctions.h" + +#include + +namespace +{ +enum class KQLFunction : uint16_t +{ + none, + ago, + datetime_add, + datetime_part, + datetime_diff, + dayofmonth, + dayofweek, + dayofyear, + endofday, + endofweek, + endofyear, + endofmonth, + monthofyear, + format_datetime, + format_timespan, + getmonth, + getyear, + hourofday, + make_timespan, + make_datetime, + now, + startofday, + startofmonth, + startofweek, + startofyear, + todatetime, + totimespan, + unixtime_microseconds_todatetime, + unixtime_milliseconds_todatetime, + unixtime_nanoseconds_todatetime, + unixtime_seconds_todatetime, + week_of_year, + + base64_encode_tostring, + base64_encode_fromguid, + base64_decode_tostring, + base64_decode_toarray, + base64_decode_toguid, + countof, + extract, + extract_all, + extract_json, + has_any_index, + indexof, + isempty, + isnan, + isnotempty, + isnotnull, + isnull, + parse_command_line, + parse_csv, + parse_json, + parse_url, + parse_urlquery, + parse_version, + replace_regex, + reverse, + split, + strcat, + strcat_delim, + strcmp, + strlen, + strrep, + substring, + tolower, + toupper, + translate, + trim, + trim_end, + trim_start, + url_decode, + url_encode, + + array_concat, + array_iif, + array_index_of, + array_length, + array_reverse, + array_rotate_left, + array_rotate_right, + array_shift_left, + array_shift_right, + array_slice, + array_sort_asc, + array_sort_desc, + array_split, + array_sum, + bag_keys, + bag_merge, + bag_remove_keys, + jaccard_index, + pack, + pack_all, + pack_array, + repeat, + set_difference, + set_has_element, + set_intersect, + set_union, + treepath, + zip, + + tobool, + todouble, + toint, + tolong, + tostring, + todecimal, + + arg_max, + arg_min, + avg, + avgif, + binary_all_and, + binary_all_or, + binary_all_xor, + buildschema, + count, + countif, + dcount, + dcountif, + make_bag, + make_bag_if, + make_list, + make_list_if, + make_list_with_nulls, + make_set, + make_set_if, + max, + maxif, + min, + minif, + percentile, + percentilew, + percentiles, + percentiles_array, + percentilesw, + percentilesw_array, + stdev, + stdevif, + sum, + sumif, + take_any, + take_anyif, + variance, + varianceif, + variancep, + count_distinct, + count_distinctif, + + series_fir, + series_iir, + series_fit_line, + series_fit_line_dynamic, + series_fit_2lines, + series_fit_2lines_dynamic, + series_outliers, + series_periods_detect, + series_periods_validate, + series_stats_dynamic, + series_stats, + series_fill_backward, + series_fill_const, + series_fill_forward, + series_fill_linear, + + ipv4_compare, + ipv4_is_in_range, + ipv4_is_match, + ipv4_is_private, + ipv4_netmask_suffix, + parse_ipv4, + parse_ipv4_mask, + has_ipv6, + has_any_ipv6, + has_ipv6_prefix, + has_any_ipv6_prefix, + ipv6_compare, + ipv6_is_match, + parse_ipv6, + parse_ipv6_mask, + format_ipv4, + format_ipv4_mask, + has_ipv4, + has_any_ipv4, + has_ipv4_prefix, + has_any_ipv4_prefix, + + binary_and, + binary_not, + binary_or, + binary_shift_left, + binary_shift_right, + binary_xor, + bitset_count_ones, + + bin, + bin_at, + kase, + iff, + iif, + lookup, + + datatype_bool, + datatype_datetime, + datatype_dynamic, + datatype_guid, + datatype_int, + datatype_long, + datatype_real, + datatype_timespan, + datatype_decimal, + range, + + abs, + acos, + asin, + atan, + atan2, + ceiling, + cos, + cot, + degrees, + exp, + exp2, + exp10, + gamma, + isfinite, + isinf, + log, + log2, + log10, + loggamma, + max_of, + min_of, + pi, + pow, + radians, + rand, + round, + sign, + sin, + sqrt, + tan +}; + +const std::unordered_map KQL_FUNCTIONS{ + {"ago", KQLFunction::ago}, + {"datetime_add", KQLFunction::datetime_add}, + {"datetime_part", KQLFunction::datetime_part}, + {"datetime_diff", KQLFunction::datetime_diff}, + {"dayofmonth", KQLFunction::dayofmonth}, + {"dayofweek", KQLFunction::dayofweek}, + {"dayofyear", KQLFunction::dayofyear}, + {"endofday", KQLFunction::endofday}, + {"endofweek", KQLFunction::endofweek}, + {"endofyear", KQLFunction::endofyear}, + {"endofmonth", KQLFunction::endofmonth}, + + {"format_datetime", KQLFunction::format_datetime}, + {"format_timespan", KQLFunction::format_timespan}, + {"getmonth", KQLFunction::getmonth}, + {"getyear", KQLFunction::getyear}, + {"hourofday", KQLFunction::hourofday}, + {"make_timespan", KQLFunction::make_timespan}, + {"make_datetime", KQLFunction::make_datetime}, + {"now", KQLFunction::now}, + {"startofday", KQLFunction::startofday}, + {"startofmonth", KQLFunction::startofmonth}, + {"startofweek", KQLFunction::startofweek}, + {"startofyear", KQLFunction::startofyear}, + {"todatetime", KQLFunction::todatetime}, + {"totimespan", KQLFunction::totimespan}, + {"unixtime_microseconds_todatetime", KQLFunction::unixtime_microseconds_todatetime}, + {"unixtime_milliseconds_todatetime", KQLFunction::unixtime_milliseconds_todatetime}, + {"unixtime_nanoseconds_todatetime", KQLFunction::unixtime_nanoseconds_todatetime}, + {"unixtime_seconds_todatetime", KQLFunction::unixtime_seconds_todatetime}, + {"week_of_year", KQLFunction::week_of_year}, + {"monthofyear", KQLFunction::monthofyear}, + {"base64_encode_tostring", KQLFunction::base64_encode_tostring}, + {"base64_encode_fromguid", KQLFunction::base64_encode_fromguid}, + {"base64_decode_tostring", KQLFunction::base64_decode_tostring}, + {"base64_decode_toarray", KQLFunction::base64_decode_toarray}, + {"base64_decode_toguid", KQLFunction::base64_decode_toguid}, + {"countof", KQLFunction::countof}, + {"extract", KQLFunction::extract}, + {"extract_all", KQLFunction::extract_all}, + {"extract_json", KQLFunction::extract_json}, + {"extractjson", KQLFunction::extract_json}, + {"has_any_index", KQLFunction::has_any_index}, + {"indexof", KQLFunction::indexof}, + {"isempty", KQLFunction::isempty}, + {"isnan", KQLFunction::isnan}, + {"isnotempty", KQLFunction::isnotempty}, + {"notempty", KQLFunction::isnotempty}, + {"isnotnull", KQLFunction::isnotnull}, + {"notnull", KQLFunction::isnotnull}, + {"isnull", KQLFunction::isnull}, + {"parse_command_line", KQLFunction::parse_command_line}, + {"parse_csv", KQLFunction::parse_csv}, + {"parse_json", KQLFunction::parse_json}, + {"parse_url", KQLFunction::parse_url}, + {"parse_urlquery", KQLFunction::parse_urlquery}, + {"parse_version", KQLFunction::parse_version}, + {"replace_regex", KQLFunction::replace_regex}, + {"reverse", KQLFunction::reverse}, + {"split", KQLFunction::split}, + {"strcat", KQLFunction::strcat}, + {"strcat_delim", KQLFunction::strcat_delim}, + {"strcmp", KQLFunction::strcmp}, + {"strlen", KQLFunction::strlen}, + {"strrep", KQLFunction::strrep}, + {"substring", KQLFunction::substring}, + {"tolower", KQLFunction::tolower}, + {"toupper", KQLFunction::toupper}, + {"translate", KQLFunction::translate}, + {"trim", KQLFunction::trim}, + {"trim_end", KQLFunction::trim_end}, + {"trim_start", KQLFunction::trim_start}, + {"url_decode", KQLFunction::url_decode}, + {"url_encode", KQLFunction::url_encode}, + + {"array_concat", KQLFunction::array_concat}, + {"array_iff", KQLFunction::array_iif}, + {"array_iif", KQLFunction::array_iif}, + {"array_index_of", KQLFunction::array_index_of}, + {"array_length", KQLFunction::array_length}, + {"array_reverse", KQLFunction::array_reverse}, + {"array_rotate_left", KQLFunction::array_rotate_left}, + {"array_rotate_right", KQLFunction::array_rotate_right}, + {"array_shift_left", KQLFunction::array_shift_left}, + {"array_shift_right", KQLFunction::array_shift_right}, + {"array_slice", KQLFunction::array_slice}, + {"array_sort_asc", KQLFunction::array_sort_asc}, + {"array_sort_desc", KQLFunction::array_sort_desc}, + {"array_split", KQLFunction::array_split}, + {"array_sum", KQLFunction::array_sum}, + {"bag_keys", KQLFunction::bag_keys}, + {"bag_merge", KQLFunction::bag_merge}, + {"bag_remove_keys", KQLFunction::bag_remove_keys}, + {"jaccard_index", KQLFunction::jaccard_index}, + {"pack", KQLFunction::pack}, + {"pack_all", KQLFunction::pack_all}, + {"pack_array", KQLFunction::pack_array}, + {"repeat", KQLFunction::repeat}, + {"set_difference", KQLFunction::set_difference}, + {"set_has_element", KQLFunction::set_has_element}, + {"set_intersect", KQLFunction::set_intersect}, + {"set_union", KQLFunction::set_union}, + {"treepath", KQLFunction::treepath}, + {"zip", KQLFunction::zip}, + + {"tobool", KQLFunction::tobool}, + {"toboolean", KQLFunction::tobool}, + {"todouble", KQLFunction::todouble}, + {"toint", KQLFunction::toint}, + {"tolong", KQLFunction::tolong}, + {"toreal", KQLFunction::todouble}, + {"tostring", KQLFunction::tostring}, + {"totimespan", KQLFunction::totimespan}, + {"todecimal", KQLFunction::todecimal}, + + {"arg_max", KQLFunction::arg_max}, + {"arg_min", KQLFunction::arg_min}, + {"avg", KQLFunction::avg}, + {"avgif", KQLFunction::avgif}, + {"binary_all_and", KQLFunction::binary_all_and}, + {"binary_all_or", KQLFunction::binary_all_or}, + {"binary_all_xor", KQLFunction::binary_all_xor}, + {"buildschema", KQLFunction::buildschema}, + {"count", KQLFunction::count}, + {"countif", KQLFunction::countif}, + {"dcount", KQLFunction::dcount}, + {"dcountif", KQLFunction::dcountif}, + {"make_bag", KQLFunction::make_bag}, + {"make_bag_if", KQLFunction::make_bag_if}, + {"make_list", KQLFunction::make_list}, + {"make_list_if", KQLFunction::make_list_if}, + {"make_list_with_nulls", KQLFunction::make_list_with_nulls}, + {"make_set", KQLFunction::make_set}, + {"make_set_if", KQLFunction::make_set_if}, + {"max", KQLFunction::max}, + {"maxif", KQLFunction::maxif}, + {"min", KQLFunction::min}, + {"minif", KQLFunction::minif}, + {"percentile", KQLFunction::percentile}, + {"percentilew", KQLFunction::percentilew}, + {"percentiles", KQLFunction::percentiles}, + {"percentiles_array", KQLFunction::percentiles_array}, + {"percentilesw", KQLFunction::percentilesw}, + {"percentilesw_array", KQLFunction::percentilesw_array}, + {"stdev", KQLFunction::stdev}, + {"stdevif", KQLFunction::stdevif}, + {"sum", KQLFunction::sum}, + {"sumif", KQLFunction::sumif}, + {"take_any", KQLFunction::take_any}, + {"take_anyif", KQLFunction::take_anyif}, + {"variance", KQLFunction::variance}, + {"varianceif", KQLFunction::varianceif}, + {"variancep", KQLFunction::variancep}, + {"count_distinct", KQLFunction::count_distinct}, + {"count_distinctif", KQLFunction::count_distinctif}, + + {"series_fir", KQLFunction::series_fir}, + {"series_iir", KQLFunction::series_iir}, + {"series_fit_line", KQLFunction::series_fit_line}, + {"series_fit_line_dynamic", KQLFunction::series_fit_line_dynamic}, + {"series_fit_2lines", KQLFunction::series_fit_2lines}, + {"series_fit_2lines_dynamic", KQLFunction::series_fit_2lines_dynamic}, + {"series_outliers", KQLFunction::series_outliers}, + {"series_periods_detect", KQLFunction::series_periods_detect}, + {"series_periods_validate", KQLFunction::series_periods_validate}, + {"series_stats_dynamic", KQLFunction::series_stats_dynamic}, + {"series_stats", KQLFunction::series_stats}, + {"series_fill_backward", KQLFunction::series_fill_backward}, + {"series_fill_const", KQLFunction::series_fill_const}, + {"series_fill_forward", KQLFunction::series_fill_forward}, + {"series_fill_linear", KQLFunction::series_fill_linear}, + + {"ipv4_compare", KQLFunction::ipv4_compare}, + {"ipv4_is_in_range", KQLFunction::ipv4_is_in_range}, + {"ipv4_is_match", KQLFunction::ipv4_is_match}, + {"ipv4_is_private", KQLFunction::ipv4_is_private}, + {"ipv4_netmask_suffix", KQLFunction::ipv4_netmask_suffix}, + {"parse_ipv4", KQLFunction::parse_ipv4}, + {"parse_ipv4_mask", KQLFunction::parse_ipv4_mask}, + {"ipv6_compare", KQLFunction::ipv6_compare}, + {"ipv6_is_match", KQLFunction::ipv6_is_match}, + {"parse_ipv6", KQLFunction::parse_ipv6}, + {"parse_ipv6_mask", KQLFunction::parse_ipv6_mask}, + {"format_ipv4", KQLFunction::format_ipv4}, + {"format_ipv4_mask", KQLFunction::format_ipv4_mask}, + {"has_ipv4", KQLFunction::has_ipv4}, + {"has_any_ipv4", KQLFunction::has_any_ipv4}, + {"has_ipv4_prefix", KQLFunction::has_ipv4_prefix}, + {"has_any_ipv4_prefix", KQLFunction::has_any_ipv4_prefix}, + + {"binary_and", KQLFunction::binary_and}, + {"binary_not", KQLFunction::binary_not}, + {"binary_or", KQLFunction::binary_or}, + {"binary_shift_left", KQLFunction::binary_shift_left}, + {"binary_shift_right", KQLFunction::binary_shift_right}, + {"binary_xor", KQLFunction::binary_xor}, + {"bitset_count_ones", KQLFunction::bitset_count_ones}, + + {"bin", KQLFunction::bin}, + {"floor", KQLFunction::bin}, + {"bin_at", KQLFunction::bin_at}, + {"case", KQLFunction::kase}, + {"iff", KQLFunction::iff}, + {"iif", KQLFunction::iif}, + {"lookup", KQLFunction::lookup}, + + {"bool", KQLFunction::datatype_bool}, + {"boolean", KQLFunction::datatype_bool}, + {"datetime", KQLFunction::datatype_datetime}, + {"date", KQLFunction::datatype_datetime}, + {"dynamic", KQLFunction::datatype_dynamic}, + {"guid", KQLFunction::datatype_guid}, + {"int", KQLFunction::datatype_int}, + {"long", KQLFunction::datatype_long}, + {"real", KQLFunction::datatype_real}, + {"double", KQLFunction::datatype_real}, + {"timespan", KQLFunction::datatype_timespan}, + {"time", KQLFunction::datatype_timespan}, + {"decimal", KQLFunction::datatype_decimal}, + {"range", KQLFunction::range}, + + {"abs", KQLFunction::abs}, + {"acos", KQLFunction::acos}, + {"asin", KQLFunction::asin}, + {"atan", KQLFunction::atan}, + {"atan2", KQLFunction::atan2}, + {"ceiling", KQLFunction::ceiling}, + {"cos", KQLFunction::cos}, + {"cot", KQLFunction::cot}, + {"degrees", KQLFunction::degrees}, + {"exp", KQLFunction::exp}, + {"exp2", KQLFunction::exp2}, + {"exp10", KQLFunction::exp10}, + {"gamma", KQLFunction::gamma}, + {"isfinite", KQLFunction::isfinite}, + {"isinf", KQLFunction::isinf}, + {"log", KQLFunction::log}, + {"log2", KQLFunction::log2}, + {"log10", KQLFunction::log10}, + {"loggamma", KQLFunction::loggamma}, + {"max_of", KQLFunction::max_of}, + {"min_of", KQLFunction::min_of}, + {"pi", KQLFunction::pi}, + {"pow", KQLFunction::pow}, + {"radians", KQLFunction::radians}, + {"rand", KQLFunction::rand}, + {"round", KQLFunction::round}, + {"sign", KQLFunction::sign}, + {"sin", KQLFunction::sin}, + {"sqrt", KQLFunction::sqrt}, + {"tan", KQLFunction::tan}}; +} + +namespace DB +{ +std::unique_ptr KQLFunctionFactory::get(const String & kql_function) +{ + const auto kql_function_it = KQL_FUNCTIONS.find(kql_function); + if (kql_function_it == KQL_FUNCTIONS.end()) + return nullptr; + + const auto& kql_function_id = kql_function_it->second; + switch (kql_function_id) + { + case KQLFunction::none: + return nullptr; + + case KQLFunction::ago: + return std::make_unique(); + + case KQLFunction::datetime_add: + return std::make_unique(); + + case KQLFunction::datetime_part: + return std::make_unique(); + + case KQLFunction::datetime_diff: + return std::make_unique(); + + case KQLFunction::dayofmonth: + return std::make_unique(); + + case KQLFunction::dayofweek: + return std::make_unique(); + + case KQLFunction::dayofyear: + return std::make_unique(); + + case KQLFunction::endofday: + return std::make_unique(); + + case KQLFunction::endofweek: + return std::make_unique(); + + case KQLFunction::endofyear: + return std::make_unique(); + + case KQLFunction::endofmonth: + return std::make_unique(); + + case KQLFunction::monthofyear: + return std::make_unique(); + + case KQLFunction::format_datetime: + return std::make_unique(); + + case KQLFunction::format_timespan: + return std::make_unique(); + + case KQLFunction::getmonth: + return std::make_unique(); + + case KQLFunction::getyear: + return std::make_unique(); + + case KQLFunction::hourofday: + return std::make_unique(); + + case KQLFunction::make_timespan: + return std::make_unique(); + + case KQLFunction::make_datetime: + return std::make_unique(); + + case KQLFunction::now: + return std::make_unique(); + + case KQLFunction::startofday: + return std::make_unique(); + + case KQLFunction::startofmonth: + return std::make_unique(); + + case KQLFunction::startofweek: + return std::make_unique(); + + case KQLFunction::startofyear: + return std::make_unique(); + + case KQLFunction::unixtime_microseconds_todatetime: + return std::make_unique(); + + case KQLFunction::unixtime_milliseconds_todatetime: + return std::make_unique(); + + case KQLFunction::unixtime_nanoseconds_todatetime: + return std::make_unique(); + + case KQLFunction::unixtime_seconds_todatetime: + return std::make_unique(); + + case KQLFunction::week_of_year: + return std::make_unique(); + + case KQLFunction::base64_encode_tostring: + return std::make_unique(); + + case KQLFunction::base64_encode_fromguid: + return std::make_unique(); + + case KQLFunction::base64_decode_tostring: + return std::make_unique(); + + case KQLFunction::base64_decode_toarray: + return std::make_unique(); + + case KQLFunction::base64_decode_toguid: + return std::make_unique(); + + case KQLFunction::countof: + return std::make_unique(); + + case KQLFunction::extract: + return std::make_unique(); + + case KQLFunction::extract_all: + return std::make_unique(); + + case KQLFunction::extract_json: + return std::make_unique(); + + case KQLFunction::has_any_index: + return std::make_unique(); + + case KQLFunction::indexof: + return std::make_unique(); + + case KQLFunction::isempty: + return std::make_unique(); + + case KQLFunction::isnan: + return std::make_unique(); + + case KQLFunction::isnotempty: + return std::make_unique(); + + case KQLFunction::isnotnull: + return std::make_unique(); + + case KQLFunction::isnull: + return std::make_unique(); + + case KQLFunction::parse_command_line: + return std::make_unique(); + + case KQLFunction::parse_csv: + return std::make_unique(); + + case KQLFunction::parse_json: + return std::make_unique(); + + case KQLFunction::parse_url: + return std::make_unique(); + + case KQLFunction::parse_urlquery: + return std::make_unique(); + + case KQLFunction::parse_version: + return std::make_unique(); + + case KQLFunction::replace_regex: + return std::make_unique(); + + case KQLFunction::reverse: + return std::make_unique(); + + case KQLFunction::split: + return std::make_unique(); + + case KQLFunction::strcat: + return std::make_unique(); + + case KQLFunction::strcat_delim: + return std::make_unique(); + + case KQLFunction::strcmp: + return std::make_unique(); + + case KQLFunction::strlen: + return std::make_unique(); + + case KQLFunction::strrep: + return std::make_unique(); + + case KQLFunction::substring: + return std::make_unique(); + + case KQLFunction::tolower: + return std::make_unique(); + + case KQLFunction::toupper: + return std::make_unique(); + + case KQLFunction::translate: + return std::make_unique(); + + case KQLFunction::trim: + return std::make_unique(); + + case KQLFunction::trim_end: + return std::make_unique(); + + case KQLFunction::trim_start: + return std::make_unique(); + + case KQLFunction::url_decode: + return std::make_unique(); + + case KQLFunction::url_encode: + return std::make_unique(); + + case KQLFunction::array_concat: + return std::make_unique(); + + case KQLFunction::array_iif: + return std::make_unique(); + + case KQLFunction::array_index_of: + return std::make_unique(); + + case KQLFunction::array_length: + return std::make_unique(); + + case KQLFunction::array_reverse: + return std::make_unique(); + + case KQLFunction::array_rotate_left: + return std::make_unique(); + + case KQLFunction::array_rotate_right: + return std::make_unique(); + + case KQLFunction::array_shift_left: + return std::make_unique(); + + case KQLFunction::array_shift_right: + return std::make_unique(); + + case KQLFunction::array_slice: + return std::make_unique(); + + case KQLFunction::array_sort_asc: + return std::make_unique(); + + case KQLFunction::array_sort_desc: + return std::make_unique(); + + case KQLFunction::array_split: + return std::make_unique(); + + case KQLFunction::array_sum: + return std::make_unique(); + + case KQLFunction::bag_keys: + return std::make_unique(); + + case KQLFunction::bag_merge: + return std::make_unique(); + + case KQLFunction::bag_remove_keys: + return std::make_unique(); + + case KQLFunction::jaccard_index: + return std::make_unique(); + + case KQLFunction::pack: + return std::make_unique(); + + case KQLFunction::pack_all: + return std::make_unique(); + + case KQLFunction::pack_array: + return std::make_unique(); + + case KQLFunction::repeat: + return std::make_unique(); + + case KQLFunction::set_difference: + return std::make_unique(); + + case KQLFunction::set_has_element: + return std::make_unique(); + + case KQLFunction::set_intersect: + return std::make_unique(); + + case KQLFunction::set_union: + return std::make_unique(); + + case KQLFunction::treepath: + return std::make_unique(); + + case KQLFunction::zip: + return std::make_unique(); + + case KQLFunction::tobool: + return std::make_unique(); + + case KQLFunction::todatetime: + return std::make_unique(); + + case KQLFunction::todouble: + return std::make_unique(); + + case KQLFunction::toint: + return std::make_unique(); + + case KQLFunction::tolong: + return std::make_unique(); + + case KQLFunction::tostring: + return std::make_unique(); + + case KQLFunction::totimespan: + return std::make_unique(); + + case KQLFunction::todecimal: + return std::make_unique(); + + case KQLFunction::arg_max: + return std::make_unique(); + + case KQLFunction::arg_min: + return std::make_unique(); + + case KQLFunction::avg: + return std::make_unique(); + + case KQLFunction::avgif: + return std::make_unique(); + + case KQLFunction::binary_all_and: + return std::make_unique(); + + case KQLFunction::binary_all_or: + return std::make_unique(); + + case KQLFunction::binary_all_xor: + return std::make_unique(); + + case KQLFunction::buildschema: + return std::make_unique(); + + case KQLFunction::count: + return std::make_unique(); + + case KQLFunction::countif: + return std::make_unique(); + + case KQLFunction::dcount: + return std::make_unique(); + + case KQLFunction::dcountif: + return std::make_unique(); + + case KQLFunction::make_bag: + return std::make_unique(); + + case KQLFunction::make_bag_if: + return std::make_unique(); + + case KQLFunction::make_list: + return std::make_unique(); + + case KQLFunction::make_list_if: + return std::make_unique(); + + case KQLFunction::make_list_with_nulls: + return std::make_unique(); + + case KQLFunction::make_set: + return std::make_unique(); + + case KQLFunction::make_set_if: + return std::make_unique(); + + case KQLFunction::max: + return std::make_unique(); + + case KQLFunction::maxif: + return std::make_unique(); + + case KQLFunction::min: + return std::make_unique(); + + case KQLFunction::minif: + return std::make_unique(); + + case KQLFunction::percentile: + return std::make_unique(); + + case KQLFunction::percentilew: + return std::make_unique(); + + case KQLFunction::percentiles: + return std::make_unique(); + + case KQLFunction::percentiles_array: + return std::make_unique(); + + case KQLFunction::percentilesw: + return std::make_unique(); + + case KQLFunction::percentilesw_array: + return std::make_unique(); + + case KQLFunction::stdev: + return std::make_unique(); + + case KQLFunction::stdevif: + return std::make_unique(); + + case KQLFunction::sum: + return std::make_unique(); + + case KQLFunction::sumif: + return std::make_unique(); + + case KQLFunction::take_any: + return std::make_unique(); + + case KQLFunction::take_anyif: + return std::make_unique(); + + case KQLFunction::variance: + return std::make_unique(); + + case KQLFunction::varianceif: + return std::make_unique(); + + case KQLFunction::variancep: + return std::make_unique(); + + case KQLFunction::count_distinct: + return std::make_unique(); + + case KQLFunction::count_distinctif: + return std::make_unique(); + + case KQLFunction::series_fir: + return std::make_unique(); + + case KQLFunction::series_iir: + return std::make_unique(); + + case KQLFunction::series_fit_line: + return std::make_unique(); + + case KQLFunction::series_fit_line_dynamic: + return std::make_unique(); + + case KQLFunction::series_fit_2lines: + return std::make_unique(); + + case KQLFunction::series_fit_2lines_dynamic: + return std::make_unique(); + + case KQLFunction::series_outliers: + return std::make_unique(); + + case KQLFunction::series_periods_detect: + return std::make_unique(); + + case KQLFunction::series_periods_validate: + return std::make_unique(); + + case KQLFunction::series_stats_dynamic: + return std::make_unique(); + + case KQLFunction::series_stats: + return std::make_unique(); + + case KQLFunction::series_fill_backward: + return std::make_unique(); + + case KQLFunction::series_fill_const: + return std::make_unique(); + + case KQLFunction::series_fill_forward: + return std::make_unique(); + + case KQLFunction::series_fill_linear: + return std::make_unique(); + + case KQLFunction::ipv4_compare: + return std::make_unique(); + + case KQLFunction::ipv4_is_in_range: + return std::make_unique(); + + case KQLFunction::ipv4_is_match: + return std::make_unique(); + + case KQLFunction::ipv4_is_private: + return std::make_unique(); + + case KQLFunction::ipv4_netmask_suffix: + return std::make_unique(); + + case KQLFunction::parse_ipv4: + return std::make_unique(); + + case KQLFunction::parse_ipv4_mask: + return std::make_unique(); + + case KQLFunction::has_ipv6: + return std::make_unique(); + + case KQLFunction::has_any_ipv6: + return std::make_unique(); + + case KQLFunction::has_ipv6_prefix: + return std::make_unique(); + + case KQLFunction::has_any_ipv6_prefix: + return std::make_unique(); + + case KQLFunction::ipv6_compare: + return std::make_unique(); + + case KQLFunction::ipv6_is_match: + return std::make_unique(); + + case KQLFunction::parse_ipv6: + return std::make_unique(); + + case KQLFunction::parse_ipv6_mask: + return std::make_unique(); + + case KQLFunction::format_ipv4: + return std::make_unique(); + + case KQLFunction::format_ipv4_mask: + return std::make_unique(); + + case KQLFunction::has_ipv4: + return std::make_unique(); + + case KQLFunction::has_any_ipv4: + return std::make_unique(); + + case KQLFunction::has_ipv4_prefix: + return std::make_unique(); + + case KQLFunction::has_any_ipv4_prefix: + return std::make_unique(); + + case KQLFunction::binary_and: + return std::make_unique(); + + case KQLFunction::binary_not: + return std::make_unique(); + + case KQLFunction::binary_or: + return std::make_unique(); + + case KQLFunction::binary_shift_left: + return std::make_unique(); + + case KQLFunction::binary_shift_right: + return std::make_unique(); + + case KQLFunction::binary_xor: + return std::make_unique(); + + case KQLFunction::bitset_count_ones: + return std::make_unique(); + + case KQLFunction::bin: + return std::make_unique(); + + case KQLFunction::bin_at: + return std::make_unique(); + + case KQLFunction::kase: + return std::make_unique(); + + case KQLFunction::iff: + return std::make_unique(); + + case KQLFunction::iif: + return std::make_unique(); + + case KQLFunction::lookup: + return std::make_unique(); + + case KQLFunction::datatype_bool: + return std::make_unique(); + + case KQLFunction::datatype_datetime: + return std::make_unique(); + + case KQLFunction::datatype_dynamic: + return std::make_unique(); + + case KQLFunction::datatype_guid: + return std::make_unique(); + + case KQLFunction::datatype_int: + return std::make_unique(); + + case KQLFunction::datatype_long: + return std::make_unique(); + + case KQLFunction::datatype_real: + return std::make_unique(); + + case KQLFunction::datatype_timespan: + return std::make_unique(); + + case KQLFunction::datatype_decimal: + return std::make_unique(); + + case KQLFunction::range: + return std::make_unique(); + + case KQLFunction::abs: + return std::make_unique(); + + case KQLFunction::acos: + return std::make_unique(); + + case KQLFunction::asin: + return std::make_unique(); + + case KQLFunction::atan: + return std::make_unique(); + + case KQLFunction::atan2: + return std::make_unique(); + + case KQLFunction::cos: + return std::make_unique(); + + case KQLFunction::cot: + return std::make_unique(); + + case KQLFunction::ceiling: + return std::make_unique(); + + case KQLFunction::degrees: + return std::make_unique(); + + case KQLFunction::exp: + return std::make_unique(); + + case KQLFunction::exp2: + return std::make_unique(); + + case KQLFunction::exp10: + return std::make_unique(); + + case KQLFunction::gamma: + return std::make_unique(); + + case KQLFunction::isfinite: + return std::make_unique(); + + case KQLFunction::isinf: + return std::make_unique(); + + case KQLFunction::log: + return std::make_unique(); + + case KQLFunction::log2: + return std::make_unique(); + + case KQLFunction::log10: + return std::make_unique(); + + case KQLFunction::loggamma: + return std::make_unique(); + + case KQLFunction::max_of: + return std::make_unique(); + + case KQLFunction::min_of: + return std::make_unique(); + + case KQLFunction::pi: + return std::make_unique(); + + case KQLFunction::pow: + return std::make_unique(); + + case KQLFunction::radians: + return std::make_unique(); + + case KQLFunction::rand: + return std::make_unique(); + + case KQLFunction::round: + return std::make_unique(); + + case KQLFunction::sign: + return std::make_unique(); + + case KQLFunction::sin: + return std::make_unique(); + + case KQLFunction::sqrt: + return std::make_unique(); + + case KQLFunction::tan: + return std::make_unique(); + } +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h new file mode 100644 index 000000000000..f9aea3b57987 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -0,0 +1,12 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class KQLFunctionFactory +{ +public: + static std::unique_ptr get(const String & kql_function); +}; +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp new file mode 100644 index 000000000000..c1a4d610c9d0 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -0,0 +1,63 @@ +#include "KQLGeneralFunctions.h" + +namespace DB::ErrorCodes +{ +extern const int SYNTAX_ERROR; +} + +namespace DB +{ +bool Bin::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_bin"); +} + +bool BinAt::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_bin_at"); +} + +bool Case::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "multiIf"); +} + +bool Iff::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "If"); +} + +bool Iif::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "If"); +} + +bool Lookup::convertImpl(String & out, IParser::Pos & pos) +{ + auto temp_pos = pos; + const String fn_name = getKQLFunctionName(temp_pos); + + if (fn_name.empty()) + return false; + int num_of_args = 0; + temp_pos = pos; + ++temp_pos; + ++temp_pos; + + String arg; + + while (!temp_pos->isEnd() && temp_pos->type != TokenType::PipeMark && temp_pos->type != TokenType::Semicolon) + { + arg = getConvertedArgument(fn_name, temp_pos); + ++num_of_args; + ++temp_pos; + } + if (num_of_args == 3) + return directMapping(out, pos, "dictGet"); + else if (num_of_args == 4) + return directMapping(out, pos, "dictGetOrDefault"); + else + throw Exception(ErrorCodes::SYNTAX_ERROR, "number of arguments do not match in function: {}", fn_name); +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h new file mode 100644 index 000000000000..b5acf02ab18d --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h @@ -0,0 +1,49 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Bin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bin()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinAt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bin_at()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Case : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "case()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Iff : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "iff()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; +class Iif : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "iif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Lookup : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "lookup()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp new file mode 100644 index 000000000000..99bb28e17fa7 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -0,0 +1,308 @@ +#include "KQLIPFunctions.h" + +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Parsed); + out = std::format( + "if(isNull({0} as lhs_ip_{5}) or isNull({1} as lhs_mask_{5}) " + "or isNull({2} as rhs_ip_{5}) or isNull({3} as rhs_mask_{5}), null, " + "sign(toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip_{5}), " + "toUInt8(min2({4}, min2(assumeNotNull(lhs_mask_{5}), assumeNotNull(rhs_mask_{5})))) as mask_{5}), 1)::UInt32)" + " - toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(rhs_ip_{5}), mask_{5}), 1)::UInt32)))", + kqlCallToExpression("parse_ipv4", {lhs}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {lhs}, pos.max_depth), + kqlCallToExpression("parse_ipv4", {rhs}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {rhs}, pos.max_depth), + mask.value_or("32"), + generateUniqueIdentifier()); + return true; +} + +bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos, ArgumentState::Parsed); + const auto ip_range = getArgument(function_name, pos, ArgumentState::Raw); + out = std::format( + "if(isNull(IPv4StringToNumOrNull({0}) as ip_{3}) " + "or isNull({1} as range_start_ip_{3}) or isNull({2} as range_mask_{3}), null, " + "bitXor(range_start_ip_{3}, bitAnd(ip_{3}, bitNot(toUInt32(intExp2(32 - range_mask_{3}) - 1)))) = 0)", + ip_address, + kqlCallToExpression("parse_ipv4", {ip_range}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {ip_range}, pos.max_depth), + generateUniqueIdentifier()); + return true; +} + +bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Raw); + out = std::format("equals({}, 0)", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask.value_or("32")}, pos.max_depth)); + return true; +} + +bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) +{ + static const std::array s_private_subnets{"10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"}; + + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + const auto unique_identifier = generateUniqueIdentifier(); + + out += std::format( + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) > 2 or isNull(toIPv4OrNull(tokens_{1}[1]) as nullable_ip_{1}) " + "or length(tokens_{1}) = 2 and isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, " + "ignore(assumeNotNull(nullable_ip_{1}) as ip_{1}, " + "IPv4CIDRToRange(ip_{1}, assumeNotNull(mask_{1})) as range_{1}, IPv4NumToString(tupleElement(range_{1}, 1)) as begin_{1}, " + "IPv4NumToString(tupleElement(range_{1}, 2)) as end_{1}), null, ", + ip_address, + unique_identifier); + for (int i = 0; i < std::ssize(s_private_subnets); ++i) + { + if (i > 0) + out += " or "; + + const auto & subnet = s_private_subnets[i]; + out += std::format( + "length(tokens_{1}) = 1 and isIPAddressInRange(IPv4NumToString(ip_{1}), '{0}') or " + "length(tokens_{1}) = 2 and isIPAddressInRange(begin_{1}, '{0}') and isIPAddressInRange(end_{1}, '{0}')", + subnet, + unique_identifier); + } + + out.push_back(')'); + return true; +} + +bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_range = getArgument(function_name, pos); + out = std::format( + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) > 2 or not isIPv4String(tokens_{1}[1]), null, " + "length(tokens_{1}) = 1, 32, isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, toUInt8(min2(mask_{1}, 32)))", + ip_range, + generateUniqueIdentifier()); + return true; +} + +bool ParseIpv4::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + out = std::format( + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) = 1, IPv4StringToNumOrNull(tokens_{1}[1]) as ip_{1}, " + "length(tokens_{1}) = 2 and isNotNull(ip_{1}) and isNotNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), " + "tupleElement(IPv4CIDRToRange(assumeNotNull(ip_{1}), assumeNotNull(mask_{1})), 1)::UInt32, null)", + ip_address, + generateUniqueIdentifier()); + return true; +} + +bool ParseIpv4Mask::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + const auto mask = getArgument(function_name, pos); + out = std::format( + "if(isNull(toIPv4OrNull({0}) as ip_{2}) or isNull(toUInt8OrNull(toString({1})) as mask_{2}), null, " + "toUInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(ip_{2}), arrayMax([0, arrayMin([32, assumeNotNull(mask_{2})])])), 1)))", + ip_address, + mask, + generateUniqueIdentifier()); + return true; +} + +bool HasIpv6::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, ""); +} + +bool HasAnyIpv6::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, ""); +} + +bool HasIpv6Prefix::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, ""); +} + +bool HasAnyIpv6Prefix::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, ""); +} + +bool Ipv6Compare::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + const auto calculated_mask = mask.value_or("128"); + out = std::format( + "if(length(splitByChar('/', {1}) as lhs_tokens_{0}) > 2 or length(splitByChar('/', {2}) as rhs_tokens_{0}) > 2 " + "or isNull(IPv6StringToNumOrNull(lhs_tokens_{0}[1]) as lhs_ipv6_{0}) or length(lhs_tokens_{0}) = 2 " + "and isNull((if(isIPv4String(lhs_tokens_{0}[1]), 96, 0) + toUInt8OrNull(lhs_tokens_{0}[-1])) as lhs_suffix_{0}) " + "or isNull(IPv6StringToNumOrNull(rhs_tokens_{0}[1]) as rhs_ipv6_{0}) or length(rhs_tokens_{0}) = 2 " + "and isNull((if(isIPv4String(rhs_tokens_{0}[1]), 96, 0) + toUInt8OrNull(rhs_tokens_{0}[-1])) as rhs_suffix_{0}) " + "or isNull(toUInt8(min2({3}, min2(ifNull(lhs_suffix_{0}, 128), ifNull(rhs_suffix_{0}, 128)))) as suffix_{0}) " + "or isNull(bitShiftLeft(bitShiftRight(bitNot(reinterpretAsFixedString(0::UInt128)), (128 - suffix_{0}) as zeroes_{0}), " + "zeroes_{0}) as mask_{0}) or isNull(bitAnd(lhs_ipv6_{0}, mask_{0}) as lhs_base_{0}) " + "or isNull(bitAnd(rhs_ipv6_{0}, mask_{0}) as rhs_base_{0}), null, " + "multiIf(lhs_base_{0} < rhs_base_{0}, -1, lhs_base_{0} > rhs_base_{0}, 1, 0))", + generateUniqueIdentifier(), + lhs, + rhs, + calculated_mask); + return true; +} + +bool Ipv6IsMatch::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Raw); + out = std::format("equals({}, 0)", kqlCallToExpression("ipv6_compare", {lhs, rhs, mask.value_or("128")}, pos.max_depth)); + return true; +} + +bool ParseIpv6::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + out = std::format( + "if(length(splitByChar('/', {0}) as tokens_{1}) > 2 or isNull(IPv6StringToNumOrNull(tokens_{1}[1]) as ip_{1}) " + "or length(tokens_{1}) = 2 and isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, " + "arrayStringConcat(flatten(extractAllGroups(lower(hex(tupleElement(IPv6CIDRToRange(assumeNotNull(ip_{1}), toUInt8(ifNull(mask_{1} " + "+ if(isIPv4String(tokens_{1}[1]), 96, 0), 128))), 1))), '([\\da-f]{{4}})')), ':'))", + ip_address, + generateUniqueIdentifier()); + return true; +} + +bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getArgument(function_name, pos, ArgumentState::Raw); + const auto unique_identifier = generateUniqueIdentifier(); + out = std::format( + "if(empty({0} as ipv4_{3}), {1}, {2})", + kqlCallToExpression("format_ipv4", {"trim_start('::', " + ip_address + ")", mask + " - 96"}, pos.max_depth), + kqlCallToExpression("parse_ipv6", {"strcat(tostring(parse_ipv6(" + ip_address + ")), '/', tostring(" + mask + "))"}, pos.max_depth), + kqlCallToExpression("parse_ipv6", {"ipv4_" + unique_identifier}, pos.max_depth), + unique_identifier); + return true; +} + +bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Parsed); + out = std::format( + "ifNull(if(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32_{3}) and toTypeName({0}) = 'String' or ({1}) < 0 " + "or isNull(ifNull(param_as_uint32_{3}, {2}) as ip_as_number_{3}), null, " + "IPv4NumToString(bitAnd(ip_as_number_{3}, bitNot(toUInt32(intExp2(32 - ({1})) - 1))))), '')", + ParserKQLBase::getExprFromToken(ip_address, pos.max_depth), + mask.value_or("32"), + kqlCallToExpression("parse_ipv4", {"tostring(" + ip_address + ")"}, pos.max_depth), + generateUniqueIdentifier()); + return true; +} + +bool FormatIpv4Mask::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Raw); + const auto calculated_mask = mask.value_or("32"); + out = std::format( + "if(empty({1} as formatted_ip_{2}) or position(toTypeName({0}), 'Int') = 0 or not {0} between 0 and 32, '', " + "concat(formatted_ip_{2}, '/', toString(toInt64(min2({0}, ifNull({3} as suffix_{2}, 32))))))", + ParserKQLBase::getExprFromToken(calculated_mask, pos.max_depth), + kqlCallToExpression("format_ipv4", {ip_address, calculated_mask}, pos.max_depth), + generateUniqueIdentifier(), + kqlCallToExpression("ipv4_netmask_suffix", {"tostring(" + ip_address + ")"}, pos.max_depth)); + return true; +} + +bool HasIpv4::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_has_ipv4"); +} + +bool HasAnyIpv4::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_has_any_ipv4"); +} + +bool HasIpv4Prefix::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_has_ipv4_prefix"); +} + +bool HasAnyIpv4Prefix::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_has_any_ipv4_prefix"); +} +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h new file mode 100644 index 000000000000..9f1b4a1ec9d9 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h @@ -0,0 +1,154 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Ipv4Compare : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_compare()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv4IsInRange : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_in_range()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv4IsMatch : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_match()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv4IsPrivate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_private()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv4NetmaskSuffix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_netmask_suffix()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv4()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseIpv4Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv4_mask()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv6Compare : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv6_compare()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasIpv6 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_ipv6()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasAnyIpv6 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_ipv6()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasIpv6Prefix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_ipv6_prefix()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasAnyIpv6Prefix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_ipv6_prefix()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv6IsMatch : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv6_is_match()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseIpv6 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv6()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseIpv6Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv6_mask()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class FormatIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_ipv4()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class FormatIpv4Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_ipv4_mask()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_ipv4()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasAnyIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_ipv4()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasIpv4Prefix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_ipv4_prefix()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasAnyIpv4Prefix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_ipv4_prefix()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.cpp new file mode 100644 index 000000000000..fea0645fa63e --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.cpp @@ -0,0 +1,214 @@ +#include "KQLMathematicalFunctions.h" + +#include + +namespace DB +{ + +bool Abs::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "abs"); +} + +bool Acos::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "acos"); +} + +bool Asin::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "asin"); +} + +bool Atan::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "atan"); +} + +bool Atan2::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "atan2"); +} + +bool Ceiling::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "ceil"); +} + +bool Cos::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "cos"); +} + +bool Cot::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + out = "1/tan(" + getArgument(fn_name, pos) + ")"; + + return true; +} + +bool Degrees::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "degrees"); +} + +bool Exp::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "exp"); +} + +bool Exp2::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "exp2"); +} + +bool Exp10::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "exp10"); +} + +bool Gamma::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "tgamma"); +} + +bool IsFinite::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "isFinite"); +} + +bool IsInfinite::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "isInfinite"); +} + +bool IsNan::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format( + "if(toTypeName({0}) in ['Float64', 'Nullable(Float64)'], isNaN({0}), throwIf(true, 'Expected argument of data type real'))", + argument); + + return true; +} + +bool Log::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "log"); +} + +bool Log2::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "log2"); +} + +bool Log10::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "log10"); +} + +bool LogGamma::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "lgamma"); +} + +bool MaxOf::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + out.append("arrayReduce('max', ["); + const auto arguments = getArguments(fn_name, pos, ArgumentState::Parsed, {2, 64}); + + for (size_t i = 0; i < arguments.size(); i++) + { + out.append(arguments[i]); + if (i < arguments.size() - 1) + out.append(", "); + } + out.append("])"); + return true; +} + +bool MinOf::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + out.append("arrayReduce('min', ["); + const auto arguments = getArguments(fn_name, pos, ArgumentState::Parsed, {2, 64}); + + for (size_t i = 0; i < arguments.size(); i++) + { + out.append(arguments[i]); + if (i < arguments.size() - 1) + out.append(", "); + } + out.append("])"); + return true; +} + +bool Pi::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "pi"); +} + +bool Pow::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "pow"); +} + +bool Radians::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "radians"); +} + +bool Rand::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + const auto arg = getOptionalArgument(fn_name, pos).value_or("0"); + out.append("if(" + arg + " < 2, randCanonical(), moduloOrZero(rand()," + arg + "))"); + return true; +} + +bool Round::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "round"); +} + +bool Sign::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "sign"); +} + +bool Sin::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "sin"); +} + +bool Sqrt::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "sqrt"); +} + +bool Tan::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "tan"); +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.h new file mode 100644 index 000000000000..5934fadac4f8 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.h @@ -0,0 +1,224 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Abs : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "abs()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Acos : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "acos()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Asin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "asin()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Atan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "atan()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Atan2 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "atan2()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ceiling : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ceiling()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Cos : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "cos()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Cot : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "cot()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Degrees : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "degrees()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Exp : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "exp()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Exp2 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "exp2()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Exp10 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "exp10()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Gamma : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "gamma()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class IsFinite : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isfinite()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class IsInfinite : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isinf()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class IsNan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnan()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Log : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "log()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Log2 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "log2()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Log10 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "log10()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class LogGamma : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "loggamma()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class MaxOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "max_of()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class MinOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "min_of()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Pi : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pi()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Pow : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pow()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Radians : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "radians()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Rand : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "rand()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Round : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "round()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Sign : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sign()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Sin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sin()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Sqrt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sqrt()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Tan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tan()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp new file mode 100644 index 000000000000..0294bc74f36a --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -0,0 +1,736 @@ +#include "KQLStringFunctions.h" + +#include + +#include + +namespace DB::ErrorCodes +{ +extern const int SYNTAX_ERROR; +extern const int UNKNOWN_TYPE; +extern const int BAD_ARGUMENTS; +} + +namespace DB +{ + +bool Base64EncodeToString::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "base64Encode"); +} + +bool Base64EncodeFromGuid::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format( + "if(toTypeName({0}) not in ['UUID', 'Nullable(UUID)'], toString(throwIf(true, 'Expected guid as argument')), " + "base64Encode(UUIDStringToNum(toString({0}), 2)))", + argument, + generateUniqueIdentifier()); + return true; +} + +bool Base64DecodeToString::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "base64Decode"); +} + +bool Base64DecodeToArray::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String str = getConvertedArgument(fn_name, pos); + + out = std::format("arrayMap(x -> (reinterpretAsUInt8(x)), splitByRegexp ('',base64Decode({})))", str); + + return true; +} + +bool Base64DecodeToGuid::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format("toUUIDOrNull(UUIDNumToString(toFixedString(base64Decode({}), 16), 2))", argument); + + return true; +} + +bool CountOf::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + ++pos; + const String search = getConvertedArgument(fn_name, pos); + + String kind = "'normal'"; + if (pos->type == TokenType::Comma) + { + ++pos; + kind = getConvertedArgument(fn_name, pos); + } + assert(kind == "'normal'" || kind == "'regex'"); + + if (kind == "'normal'") + out = "countSubstrings(" + source + ", " + search + ")"; + else + out = "countMatches(" + source + ", " + search + ")"; + return true; +} + +bool Extract::convertImpl(String & out, IParser::Pos & pos) +{ + ParserKeyword s_kql("typeof"); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserToken close_bracket(TokenType::ClosingRoundBracket); + Expected expected; + + std::unordered_map type_cast + = {{"bool", "Boolean"}, + {"boolean", "Boolean"}, + {"datetime", "DateTime"}, + {"date", "DateTime"}, + {"guid", "UUID"}, + {"int", "Int32"}, + {"long", "Int64"}, + {"real", "Float64"}, + {"double", "Float64"}, + {"string", "String"}, + {"decimal", "Decimal"}}; + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String regex = getConvertedArgument(fn_name, pos); + + ++pos; + String capture_group = getConvertedArgument(fn_name, pos); + + ++pos; + String source = getConvertedArgument(fn_name, pos); + + String type_literal; + + if (pos->type == TokenType::Comma) + { + ++pos; + + if (s_kql.ignore(pos, expected)) + { + if (!open_bracket.ignore(pos, expected)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near typeof"); + + type_literal = String(pos->begin, pos->end); + + if (type_cast.find(type_literal) == type_cast.end()) + throw Exception(ErrorCodes::UNKNOWN_TYPE, "{} is not a supported kusto data type for extract", type_literal); + + type_literal = type_cast[type_literal]; + ++pos; + + if (!close_bracket.ignore(pos, expected)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near typeof"); + } + } + + out = std::format("kql_extract({}, {}, {})", source, regex, capture_group); + if (type_literal == "Decimal") + { + out = std::format("countSubstrings({0}, '.') > 1 ? NULL: {0}, length(substr({0}, position({0},'.') + 1)))", out); + out = std::format("toDecimal128OrNull({0})", out); + } + else + { + if (type_literal == "Boolean") + out = std::format("toInt64OrNull({})", out); + + if (!type_literal.empty()) + out = "accurateCastOrNull(" + out + ", '" + type_literal + "')"; + } + return true; +} + +bool ExtractAll::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String regex = getConvertedArgument(fn_name, pos); + + ++pos; + const String second_arg = getConvertedArgument(fn_name, pos); + + String third_arg; + if (pos->type == TokenType::Comma) + { + ++pos; + third_arg = getConvertedArgument(fn_name, pos); + } + + if (!third_arg.empty()) // currently the captureGroups not supported + return false; + + out = "extractAllGroups(" + second_arg + ", " + regex + ")"; + return true; +} + +bool ExtractJson::convertImpl(String & out, IParser::Pos & pos) +{ + String datatype = "String"; + ParserKeyword s_kql("typeof"); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserToken close_bracket(TokenType::ClosingRoundBracket); + Expected expected; + + std::unordered_map type_cast + = {{"bool", "Boolean"}, + {"boolean", "Boolean"}, + {"datetime", "DateTime"}, + {"date", "DateTime"}, + {"dynamic", "Array"}, + {"guid", "UUID"}, + {"int", "Int32"}, + {"long", "Int64"}, + {"real", "Float64"}, + {"double", "Float64"}, + {"string", "String"}, + {"decimal", "Decimal"}}; + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String json_datapath = getConvertedArgument(fn_name, pos); + ++pos; + const String json_datasource = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + if (s_kql.ignore(pos, expected)) + { + if (!open_bracket.ignore(pos, expected)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near typeof"); + + datatype = String(pos->begin, pos->end); + + if (type_cast.find(datatype) == type_cast.end()) + throw Exception(ErrorCodes::UNKNOWN_TYPE, "{} is not a supported kusto data type for {}", datatype, fn_name); + datatype = type_cast[datatype]; + ++pos; + + if (!close_bracket.ignore(pos, expected)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near typeof"); + } + } + const auto json_val = std::format("JSON_VALUE({0},{1})", json_datasource, json_datapath); + if (datatype == "Decimal") + { + out = std::format("countSubstrings({0}, '.') > 1 ? NULL: length(substr({0}, position({0},'.') + 1)))", json_val); + out = std::format("toDecimal128OrNull({0}::String ,{1})", json_val, out); + } + else + { + if (datatype == "Boolean") + out = std::format("if(toInt64OrNull({}) > 0, true, false)", json_val); + else if (!datatype.empty()) + out = std::format("accurateCastOrNull({},'{}')", json_val, datatype); + } + return true; +} + +bool HasAnyIndex::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + ++pos; + const String lookup = getConvertedArgument(fn_name, pos); + String src_array = std::format("splitByChar(' ',{})", source); + out = std::format( + "if (empty({1}), -1, indexOf(arrayMap(x -> (x in {0}), if (empty({1}), [''], arrayMap(x -> (toString(x)), {1}))), 1) - 1)", + src_array, + lookup); + return true; +} + +bool IndexOf::convertImpl(String & out, IParser::Pos & pos) +{ + String start_index = "0", length = "-1", occurrence = "1"; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + ++pos; + const String lookup = getConvertedArgument(fn_name, pos); + + if (pos->type == TokenType::Comma) + { + ++pos; + start_index = getConvertedArgument(fn_name, pos); + + if (pos->type == TokenType::Comma) + { + ++pos; + length = getConvertedArgument(fn_name, pos); + + if (pos->type == TokenType::Comma) + { + ++pos; + occurrence = getConvertedArgument(fn_name, pos); + } + } + } + + out = std::format("kql_indexof({},{},{},{},{})", source, lookup, start_index, length, occurrence); + return true; +} + +bool IsEmpty::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + const auto arg = getArgument(fn_name, pos, ArgumentState::Raw); + out.append("empty(" + kqlCallToExpression("tostring", {arg}, pos.max_depth) + ")"); + return true; +} + +bool IsNotEmpty::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + const auto arg = getArgument(fn_name, pos, ArgumentState::Raw); + out.append("notEmpty(" + kqlCallToExpression("tostring", {arg}, pos.max_depth) + ")"); + return true; +} + +bool IsNotNull::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "isNotNull"); +} + +bool ParseCommandLine::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String json_string = getConvertedArgument(fn_name, pos); + + ++pos; + const String type = getConvertedArgument(fn_name, pos); + + if (type != "'windows'") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Supported type argument is windows for {}", fn_name); + + out = std::format( + "if(empty({0}) OR hasAll(splitByChar(' ', {0}) , ['']) , arrayMap(x->null, splitByChar(' ', '')), splitByChar(' ', {0}))", + json_string); + return true; +} + +bool IsNull::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "isNull"); +} + +bool ParseCSV::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String csv_string = getConvertedArgument(fn_name, pos); + + out = std::format( + "if(position({0} ,'\n')::UInt8, (splitByChar(',', substring({0}, 1, position({0},'\n') -1))), (splitByChar(',', substring({0}, 1, " + "length({0})))))", + csv_string); + return true; +} + +bool ParseJson::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + if (String(pos->begin, pos->end) == "dynamic") + { + --pos; + auto arg = getArgument(fn_name, pos); + auto result = kqlCallToExpression("dynamic", {arg}, pos.max_depth); + out = std::format("{}", result); + } + else + { + auto arg = getConvertedArgument(fn_name, pos); + out = std::format("if (isValidJSON({0}) , JSON_QUERY({0}, '$') , toJSONString({0}))", arg); + } + return true; +} + +bool ParseURL::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String url = getConvertedArgument(fn_name, pos); + + const String scheme = std::format(R"(concat('"Scheme":"', protocol({0}),'"'))", url); + const String host = std::format(R"(concat('"Host":"', domain({0}),'"'))", url); + String port = std::format(R"(concat('"Port":"', toString(port({0})),'"'))", url); + const String path = std::format(R"(concat('"Path":"', path({0}),'"'))", url); + const String username_pwd = std::format("netloc({0})", url); + const String query_string = std::format("queryString({0})", url); + const String fragment = std::format(R"(concat('"Fragment":"',fragment({0}),'"'))", url); + const String username = std::format( + R"(concat('"Username":"', arrayElement(splitByChar(':',arrayElement(splitByChar('@',{0}) ,1)),1),'"'))", username_pwd); + const String password = std::format( + R"(concat('"Password":"', arrayElement(splitByChar(':',arrayElement(splitByChar('@',{0}) ,1)),2),'"'))", username_pwd); + String query_parameters + = std::format(R"(concat('"Query Parameters":', concat('{{"', replace(replace({}, '=', '":"'),'&','","') ,'"}}')))", query_string); + + bool all_space = true; + for (char ch : url) + { + if (ch == '\'' || ch == '\"') + continue; + if (ch != ' ') + { + all_space = false; + break; + } + } + + if (all_space) + { + port = R"('"Port":""')"; + query_parameters = "'\"Query Parameters\":{}'"; + } + out = std::format( + "concat('{{',{},',',{},',',{},',',{},',',{},',',{},',',{},',',{},'}}')", + scheme, + host, + port, + path, + username, + password, + query_parameters, + fragment); + return true; +} + +bool ParseURLQuery::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + const String query = getConvertedArgument(fn_name, pos); + + const String query_string = std::format("if (position({},'?') > 0, queryString({}), {})", query, query, query); + const String query_parameters + = std::format(R"(concat('"Query Parameters":', concat('{{"', replace(replace({}, '=', '":"'),'&','","') ,'"}}')))", query_string); + out = std::format("concat('{{',{},'}}')", query_parameters); + return true; +} + +bool ParseVersion::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String arg; + ++pos; + arg = getConvertedArgument(fn_name, pos); + out = std::format( + "length(splitByChar('.', {0})) > 4 OR length(splitByChar('.', {0})) < 1 OR match({0}, '.*[a-zA-Z]+.*') = 1 OR empty({0}) OR " + "hasAll(splitByChar('.', {0}) , ['']) ? toDecimal128OrNull('NULL' , 0) : " + "toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), " + "arrayResize(splitByChar('.', {0}), 4)))), 8),0)", + arg); + return true; +} + +bool ReplaceRegex::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "replaceRegexpAll"); +} + +bool Reverse::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos, ArgumentState::Raw); + out = std::format("reverse({})", kqlCallToExpression("tostring", {argument}, pos.max_depth)); + + return true; +} + +bool Split::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + ++pos; + const String delimiter = getConvertedArgument(fn_name, pos); + auto split_res = std::format("empty({0}) ? splitByString(' ' , {1}) : splitByString({0} , {1})", delimiter, source); + int requested_index = -1; + + if (pos->type == TokenType::Comma) + { + ++pos; + auto arg = getConvertedArgument(fn_name, pos); + // remove space between minus and value + arg.erase(remove_if(arg.begin(), arg.end(), isspace), arg.end()); + requested_index = std::stoi(arg); + requested_index += 1; + out = std::format( + "multiIf(length({0}) >= {1} AND {1} > 0 , arrayPushBack([],arrayElement({0}, {1})) , {1}=0 ,{0} , arrayPushBack([] " + ",arrayElement(NULL,1)))", + split_res, + requested_index); + } + else + out = split_res; + return true; +} + +bool StrCat::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto arguments = getArguments(function_name, pos, ArgumentState::Raw); + + out.append("concat("); + for (const auto & argument : arguments) + { + out.append(kqlCallToExpression("tostring", {argument}, pos.max_depth)); + out.append(", "); + } + + out.append("'')"); + return true; +} + +bool StrCatDelim::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto arguments = getArguments(fn_name, pos, ArgumentState::Raw, {2, 64}); + const String & delimiter = arguments[0]; + + String args; + args = "concat("; + for (size_t i = 1; i < arguments.size(); i++) + { + args += kqlCallToExpression("tostring", {arguments[i]}, pos.max_depth); + if (i < arguments.size() - 1) + args += ", " + delimiter + ", "; + } + args += ")"; + out = std::move(args); + return true; +} + +bool StrCmp::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String string1 = getConvertedArgument(fn_name, pos); + ++pos; + const String string2 = getConvertedArgument(fn_name, pos); + + out = std::format("multiIf({0} == {1}, 0, {0} < {1}, -1, 1)", string1, string2); + return true; +} + +bool StrLen::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "lengthUTF8"); +} + +bool StrRep::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + const auto arguments = getArguments(fn_name, pos, ArgumentState::Raw, {2, 3}); + const String & value = arguments[0]; + const String & multiplier = arguments[1]; + + if (arguments.size() == 2) + out = "repeat(" + value + " , " + multiplier + ")"; + else if (arguments.size() == 3) + { + const String & delimiter = arguments[2]; + const String repeated_str + = "repeat(concat(" + kqlCallToExpression("tostring", {value}, pos.max_depth) + " , " + delimiter + ")," + multiplier + ")"; + out = "substr(" + repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; + } + return true; +} + +bool SubString::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String source = getConvertedArgument(fn_name, pos); + + ++pos; + String starting_index = getConvertedArgument(fn_name, pos); + + if (pos->type == TokenType::Comma) + { + ++pos; + auto length = getConvertedArgument(fn_name, pos); + + if (starting_index.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "number of arguments do not match in function: {}", fn_name); + else + out = "if(toInt64(length(" + source + ")) <= 0, '', substr(" + source + ", " + "((" + starting_index + "% toInt64(length(" + + source + ")) + toInt64(length(" + source + "))) % toInt64(length(" + source + "))) + 1, " + length + ") )"; + } + else + out = "if(toInt64(length(" + source + ")) <= 0, '', substr(" + source + "," + "((" + starting_index + "% toInt64(length(" + source + + ")) + toInt64(length(" + source + "))) % toInt64(length(" + source + "))) + 1))"; + + return true; +} + +bool ToLower::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "lower"); +} + +bool ToUpper::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "upper"); +} + +bool Translate::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String from = getConvertedArgument(fn_name, pos); + ++pos; + String to = getConvertedArgument(fn_name, pos); + ++pos; + String source = getConvertedArgument(fn_name, pos); + + String len_diff = std::format("length({}) - length({})", from, to); + String to_str = std::format( + "multiIf(length({1}) = 0, {0}, {2} > 0, concat({1},repeat(substr({1},length({1}),1),toUInt16({2}))),{2} < 0 , " + "substr({1},1,length({0})),{1})", + from, + to, + len_diff); + out = std::format("if (length({3}) = 0,'',translate({0},{1},{2}))", source, from, to_str, to); + return true; +} + +bool Trim::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto regex = getArgument(fn_name, pos, ArgumentState::Raw); + const auto source = getArgument(fn_name, pos, ArgumentState::Raw); + out = kqlCallToExpression("trim_start", {regex, std::format("trim_end({0}, {1})", regex, source)}, pos.max_depth); + + return true; +} + +bool TrimEnd::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto regex = getArgument(fn_name, pos); + const auto source = getArgument(fn_name, pos); + out = std::format("replaceRegexpOne({0}, concat({1}, '$'), '')", source, regex); + + return true; +} + +bool TrimStart::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto regex = getArgument(fn_name, pos); + const auto source = getArgument(fn_name, pos); + out = std::format("replaceRegexpOne({0}, concat('^', {1}), '')", source, regex); + + return true; +} + +bool URLDecode::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "decodeURLComponent"); +} + +bool URLEncode::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "encodeURLComponent"); +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h new file mode 100644 index 000000000000..5597fec54afe --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h @@ -0,0 +1,274 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Base64EncodeToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_encode_tostring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64EncodeFromGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_encode_fromguid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_tostring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_toarray()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_toguid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class CountOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "countof()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Extract : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ExtractAll : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract_all()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ExtractJson : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract_json(), extractjson()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class HasAnyIndex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_index()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IndexOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "indexof()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsEmpty : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isempty()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNotEmpty : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnotempty()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNotNull : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnotnull()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNull : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnull()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseCommandLine : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_command_line()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseCSV : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_csv()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseJson : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_json()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseURL : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_url()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseURLQuery : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_urlquery()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseVersion : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_version()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ReplaceRegex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "replace_regex()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Reverse : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "reverse()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Split : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "split()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCatDelim : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcat_delim()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCmp : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcmp()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrLen : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strlen()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrRep : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strrep()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SubString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "substring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToLower : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tolower()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToUpper : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "toupper()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Translate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "translate()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Trim : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TrimEnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim_end()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TrimStart : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim_start()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class URLDecode : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "url_decode()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class URLEncode : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "url_encode()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp new file mode 100644 index 000000000000..d5be8e262a84 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp @@ -0,0 +1,111 @@ +#include "KQLTimeSeriesFunctions.h" + +namespace DB +{ + +bool SeriesFir::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesIir::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFitLine::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFitLineDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFit2lines::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFit2linesDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesOutliers::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesPeriodsDetect::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesPeriodsValidate::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesStatsDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesStats::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillBackward::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillConst::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillForward::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillLinear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h new file mode 100644 index 000000000000..999a27f6b391 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h @@ -0,0 +1,113 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class SeriesFir : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fir()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesIir : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_iir()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFitLine : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_line()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFitLineDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_line_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFit2lines : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_2lines()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFit2linesDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_2lines_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesOutliers : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_outliers()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesPeriodsDetect : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_periods_detect()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesPeriodsValidate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_periods_validate()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesStatsDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_stats_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesStats : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_stats()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillBackward : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_backward()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillConst : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_const()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillForward : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_forward()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillLinear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_linear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/ParserKQLCount.cpp b/src/Parsers/Kusto/ParserKQLCount.cpp new file mode 100644 index 000000000000..90e848583aa6 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLCount.cpp @@ -0,0 +1,33 @@ +#include +#include +#include +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLCount::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserKeyword("count").ignore(pos, expected)) + return false; + + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near count operator"); + + ASTPtr select_expression_list; + String converted_columns = getExprFromToken("Count = count()", pos.max_depth); + + Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLCount.h b/src/Parsers/Kusto/ParserKQLCount.h new file mode 100644 index 000000000000..49132e090d7b --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLCount.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLCount : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL count"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLDistinct.cpp b/src/Parsers/Kusto/ParserKQLDistinct.cpp new file mode 100644 index 000000000000..dcac882f5422 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLDistinct.cpp @@ -0,0 +1,29 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLDistinct::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserKeyword("distinct").ignore(pos, expected)) + return false; + + ASTPtr select_expression_list; + String expr; + + expr = getExprFromToken(pos); + + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + node->as()->distinct = true; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLDistinct.h b/src/Parsers/Kusto/ParserKQLDistinct.h new file mode 100644 index 000000000000..eb997893d3e6 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLDistinct.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLDistinct : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL distinct"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLExtend.cpp b/src/Parsers/Kusto/ParserKQLExtend.cpp new file mode 100644 index 000000000000..255ca9b7a7b5 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLExtend.cpp @@ -0,0 +1,67 @@ +#include "Utilities.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +bool ParserKQLExtend ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserKeyword("extend").ignore(pos, expected)) + return false; + + const auto extend_expr = getExprFromToken(pos); + Tokens ntokens(extend_expr.c_str(), extend_expr.c_str() + extend_expr.size()); + IParser::Pos npos(ntokens, pos.max_depth); + + ASTPtr expression_list; + if (!ParserNotEmptyExpressionList(false).parse(npos, expression_list, expected) || !npos->isEnd()) + return false; + + int column_index = 1; + std::ranges::for_each( + expression_list->children, + [&column_index](const ASTPtr & expression) + { + if (const auto alias = expression->tryGetAlias(); !alias.empty()) + return; + + expression->setAlias(std::format("Column{}", column_index)); + ++column_index; + }); + + auto asterisk = std::make_shared(); + asterisk->transformers = std::make_shared(); + const auto & columns_except_transformer + = asterisk->children.emplace_back(asterisk->transformers)->children.emplace_back(std::make_shared()); + + std::ranges::transform( + expression_list->children, + std::back_inserter(columns_except_transformer->children), + [](const ASTPtr & child) { return std::make_shared(child->getAliasOrColumnName()); }); + + expression_list->children.insert(expression_list->children.cbegin(), std::move(asterisk)); + + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(expression_list)); + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLExtend.h b/src/Parsers/Kusto/ParserKQLExtend.h new file mode 100644 index 000000000000..67bd55773be4 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLExtend.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class ParserKQLExtend : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL extend"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index 3a399bdccdb1..7df3ea7959ef 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -8,8 +8,11 @@ namespace DB { -bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserKQLFilter::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + if (!ParserKeyword("filter").ignore(pos, expected) && !ParserKeyword("where").ignore(pos, expected)) + return false; + String expr = getExprFromToken(pos); ASTPtr where_expression; diff --git a/src/Parsers/Kusto/ParserKQLJoin.cpp b/src/Parsers/Kusto/ParserKQLJoin.cpp new file mode 100644 index 000000000000..dc7ef7cdd913 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLJoin.cpp @@ -0,0 +1,296 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLJoin ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserKeyword("join").ignore(pos, expected)) + return false; + + ASTPtr sub_query_node; + String str_right_table; + String str_attributes; + std::vector attribute_list; + std::vector left_columns; + const String default_join = "UNINQUE INNER JOIN"; + String join_kind = default_join; + String kql_join_kind = "innerunique"; + + ParserKeyword s_kind("kind"); + ParserToken equals(TokenType::Equals); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserIdentifier id_right_table; + + size_t paren_count = 0; + ASTPtr ast_right_table; + + std::unordered_map join_type + = {{"innerunique", default_join}, + {"inner", "INNER JOIN"}, + {"leftouter", "LEFT OUTER JOIN"}, + {"rightouter", "RIGHT OUTER JOIN"}, + {"fullouter", "FULL OUTER JOIN"}, + + {"leftanti", "LEFT ANTI JOIN"}, + {"anti", "LEFT ANTI JOIN"}, + {"leftantisemi", "LEFT ANTI JOIN"}, + + {"rightanti", "RIGHT ANTI JOIN"}, + {"rightantisemi", "RIGHT ANTI JOIN"}, + + {"leftsemi", "LEFT SEMI JOIN"}, + {"rightsemi", "RIGHT SEMI JOIN"}}; + + if (s_kind.ignore(pos)) + { + if (!equals.ignore(pos)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid kind for join operator"); + + String join_word(pos->begin, pos->end); + if (join_type.find(join_word) == join_type.end()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid value of kind for join operator"); + + join_kind = join_type[join_word]; + kql_join_kind = join_word; + ++pos; + } + + Pos right_table_start_pos = pos; + Pos keyword_on_pos = pos; + + bool has_bracket = open_bracket.ignore(pos); + + if (!has_bracket) + { + if (!id_right_table.parse(pos, ast_right_table, expected)) + return false; + } + else + paren_count = 1; + + Pos attributes_start_pos = pos; + bool attributes_on_column = false; + + auto parse_attribute = [&](Pos & start_pos, Pos & end_pos) + { + while (start_pos < end_pos && start_pos->type == TokenType::OpeningRoundBracket) + ++start_pos; + while (start_pos < end_pos && end_pos->type == TokenType::ClosingRoundBracket) + --end_pos; + + if (start_pos == end_pos) + { + if (start_pos->type != TokenType::BareWord) + return false; + attribute_list.push_back(String(start_pos->begin, end_pos->end)); + left_columns.push_back(String(start_pos->begin, end_pos->end)); + } + else + { + String left_column, right_column; + auto get_coulmn = [&]() + { + String left_alias = "left_.", right_alias = "right_."; + String left_alias2 = "$left.", right_alias2 = "$right."; + + auto attribute_str = String(start_pos->begin, end_pos->end); + + if (attribute_str.substr(0, left_alias.length()) != left_alias + && attribute_str.substr(0, left_alias2.length()) != left_alias2) + return false; + + auto r_begin = attribute_str.find("=="); + if (r_begin == std::string::npos) + return false; + if (attribute_str.substr(0, left_alias.length()) == left_alias) + left_column = attribute_str.substr(left_alias.length(), r_begin - left_alias.length()); + else + left_column = attribute_str.substr(left_alias2.length(), r_begin - left_alias2.length()); + + r_begin += 2; + while (r_begin < attribute_str.length() && attribute_str[r_begin] <= 0x20) + ++r_begin; + + if (attribute_str.substr(r_begin, right_alias.length()) != right_alias + && attribute_str.substr(r_begin, right_alias2.length()) != right_alias2) + return false; + + right_column = attribute_str.substr(r_begin + right_alias.length()); + return true; + }; + + if (!get_coulmn()) + return false; + trim(left_column); + trim(right_column); + left_columns.push_back(left_column); + + if (left_column != right_column) + { + attributes_on_column = true; + attribute_list.push_back(String(start_pos->begin, end_pos->end)); + } + else + attribute_list.push_back(left_column); + } + + return true; + }; + + auto update_attributes = [&] + { + auto temp_pos = pos; + --temp_pos; + + if (temp_pos < attributes_start_pos || !parse_attribute(attributes_start_pos, temp_pos)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Attributes error for join or lookup operator"); + attributes_start_pos = pos; + ++attributes_start_pos; + }; + + while (!pos->isEnd() && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::PipeMark && paren_count == 0) + break; + + if (String(pos->begin, pos->end) == "on" && paren_count == 0) + { + if (keyword_on_pos == right_table_start_pos) + { + keyword_on_pos = pos; + attributes_start_pos = pos; + ++attributes_start_pos; + } + } + + if (pos->type == TokenType::Comma && right_table_start_pos < keyword_on_pos && paren_count == 0) + { + update_attributes(); + } + ++pos; + } + + update_attributes(); + + if (keyword_on_pos <= right_table_start_pos) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing right table or 'on' for join or lookup operator"); + + --keyword_on_pos; + if (right_table_start_pos == keyword_on_pos) + str_right_table = String(right_table_start_pos->begin, keyword_on_pos->end); + else + str_right_table = std::format("kql{}", String(right_table_start_pos->begin, keyword_on_pos->end)); + + ++keyword_on_pos; + ++keyword_on_pos; + --pos; + if (pos < keyword_on_pos) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing attributes for join or lookup operator"); + + String query_join; + if (join_kind == default_join) + { + join_kind = "INNER JOIN"; + String distinct_column; + for (auto const & col : left_columns) + distinct_column = distinct_column.empty() ? col : distinct_column + "," + col; + + String distinct_query = std::format("(SELECT DISTINCT ON ({}) * FROM dum_tbl)", distinct_column); + if (!parseSQLQueryByString(std::make_unique(), distinct_query, sub_query_node, pos.max_depth)) + return false; + if (!setSubQuerySource(sub_query_node, node, true, false)) + return false; + node = std::move(sub_query_node); + } + + if (attributes_on_column) + { + auto replace = [&](std::string & str, const std::string & from, const std::string & to) + { + size_t start_pos = str.find(from); + if (start_pos != std::string::npos) + str.replace(start_pos, from.length(), to); + }; + + for (auto str : attribute_list) + { + if (str.substr(0, 6) != "left_." && str.substr(0, 6) != "$left.") + str = std::format("left_.{0} == right_.{0}", str); + else if (str.substr(0, 6) == "$left.") + { + replace(str, "$left.", "left_."); + replace(str, "$right.", "right_."); + } + + str_attributes = str_attributes.empty() ? str : str_attributes + " and " + str; + } + query_join = std::format("SELECT * FROM tbl {} {} ON {}", join_kind, str_right_table, str_attributes); + } + else + { + for (auto const & str : attribute_list) + str_attributes = str_attributes.empty() ? str : str_attributes + "," + str; + + query_join = std::format("SELECT * FROM tbl {} {} USING {}", join_kind, str_right_table, str_attributes); + } + + if (!parseSQLQueryByString(std::make_unique(), query_join, sub_query_node, pos.max_depth)) + return false; + + ASTPtr table_expr; + if (sub_query_node->as()->tables() + && sub_query_node->as()->tables()->as()->children.size() > 1) + { + table_expr = sub_query_node->as()->tables()->as()->children[1]; + if (table_expr->as()->table_expression->as()->subquery) + table_expr->as()->table_expression->as()->subquery->as()->alias + = "right_"; + else if (table_expr->as()->table_expression->as()->database_and_table_name) + { + table_expr + = table_expr->as()->table_expression->as()->database_and_table_name; + if (auto * ast_with_alias = dynamic_cast(table_expr.get())) + ast_with_alias->alias = "right_"; + } + } + if (kql_join_kind == "innerunique") + { + if (!setSubQuerySource(sub_query_node, node, false, true, "left_")) + return false; + } + else + { + if (!setSubQuerySource(sub_query_node, node, false, false, "left_")) + return false; + } + + node = std::move(sub_query_node); + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLJoin.h b/src/Parsers/Kusto/ParserKQLJoin.h new file mode 100644 index 000000000000..185f91bdeb2f --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLJoin.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLJoin : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL Join"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index bb8e08fd3786..921e95c33aa0 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -2,15 +2,15 @@ #include #include #include -#include -#include -#include namespace DB { -bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserKQLLimit::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + if (!ParserKeyword("limit").ignore(pos, expected) && !ParserKeyword("take").ignore(pos, expected)) + return false; + ASTPtr limit_length; auto expr = getExprFromToken(pos); diff --git a/src/Parsers/Kusto/ParserKQLLookup.cpp b/src/Parsers/Kusto/ParserKQLLookup.cpp new file mode 100644 index 000000000000..72b508b7ef9d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLookup.cpp @@ -0,0 +1,80 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLLookup::updatePipeLine(Pos pos, String & query) +{ + if (!ParserKeyword("lookup").ignore(pos)) + return false; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + return false; + + String join_kind = "kind=leftouter"; + ParserKeyword s_kind("kind"); + ParserToken equals(TokenType::Equals); + auto end_pos = pos; + + if (s_kind.ignore(pos)) + { + if (!equals.ignore(pos)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid kind for lookup operator"); + + if (ParserKeyword("leftouter").ignore(pos)) + join_kind = "kind=leftouter"; + else if (ParserKeyword("inner").ignore(pos)) + join_kind = "kind=inner"; + else + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid value of kind for lookup operator"); + } + Pos right_table_start_pos = pos; + + size_t paren_count = 0; + while (!pos->isEnd() && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::PipeMark && paren_count == 0) + break; + end_pos = pos; + ++pos; + } + + String right_expr = (right_table_start_pos <= end_pos) ? String(right_table_start_pos->begin, end_pos->end) : ""; + if (right_expr.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "lookup operator need right table"); + + query = std::format("join {} {} ", join_kind, right_expr); + + return true; +} + +bool ParserKQLLookup::parseImpl(Pos & /*pos*/, ASTPtr & /*node*/, Expected & /*expected*/) //(Pos & pos, ASTPtr & node, Expected & expected) +{ + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLLookup.h b/src/Parsers/Kusto/ParserKQLLookup.h new file mode 100644 index 000000000000..57cf7e0d3627 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLookup.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLLookup : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL lookup"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool updatePipeLine (Pos pos, String & query) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLMVExpand.cpp b/src/Parsers/Kusto/ParserKQLMVExpand.cpp new file mode 100644 index 000000000000..dd95b52b7c42 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMVExpand.cpp @@ -0,0 +1,307 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB::ErrorCodes +{ +extern const int UNKNOWN_TYPE; +} + +namespace DB +{ + +std::unordered_map ParserKQLMVExpand::type_cast = +{ {"bool", "Boolean"}, + {"boolean", "Boolean"}, + {"datetime", "DateTime"}, + {"date", "DateTime"}, + {"guid", "UUID"}, + {"int", "Int32"}, + {"long", "Int64"}, + {"real", "Float64"}, + {"double", "Float64"}, + {"string", "String"} +}; + +bool ParserKQLMVExpand::parseColumnArrayExprs(ColumnArrayExprs & column_array_exprs, Pos & pos, Expected & expected) +{ + ParserToken equals(TokenType::Equals); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserToken close_bracket(TokenType::ClosingRoundBracket); + ParserToken comma(TokenType::Comma); + + ParserKeyword s_to("to"); + ParserKeyword s_type("typeof"); + uint16_t bracket_count = 0; + Pos expr_begin_pos = pos; + Pos expr_end_pos = pos; + + String alias; + String column_array_expr; + String to_type; + --expr_end_pos; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++bracket_count; + + if (pos->type == TokenType::ClosingRoundBracket) + --bracket_count; + + if (String(pos->begin,pos->end) == "=") + { + --pos; + alias = String(pos->begin, pos->end); + ++pos; + ++pos; + expr_begin_pos = pos; + } + + auto add_columns = [&] + { + column_array_expr = getExprFromToken(String(expr_begin_pos->begin, expr_end_pos->end), pos.max_depth); + + if (alias.empty()) + { + alias = expr_begin_pos == expr_end_pos ? column_array_expr : String(expr_begin_pos->begin,expr_begin_pos->end) + "_"; + } + column_array_exprs.push_back(ColumnArrayExpr(alias, column_array_expr, to_type)); + }; + + if (s_to.ignore(pos, expected)) + { + --pos; + --pos; + expr_end_pos = pos; + ++pos; + ++pos; + + column_array_expr = String(expr_begin_pos->begin, expr_end_pos->end); + + if (!s_type.ignore(pos, expected)) + return false; + if (!open_bracket.ignore(pos, expected)) + return false; + to_type = String(pos->begin, pos->end); + + if (!type_cast.contains(to_type)) + throw Exception(ErrorCodes::UNKNOWN_TYPE, "{} is not a supported kusto data type for mv-expand", to_type); + + ++pos; + if (!close_bracket.ignore(pos, expected)) + return false; + --pos; + } + + if ((pos->type == TokenType::Comma && bracket_count == 0) || String(pos->begin, pos->end) == "limit" || pos->type == TokenType::Semicolon) + { + if (column_array_expr.empty()) + { + expr_end_pos = pos; + --expr_end_pos; + } + add_columns(); + expr_begin_pos = pos; + expr_end_pos = pos; + ++expr_begin_pos; + + alias.clear(); + column_array_expr.clear(); + to_type.clear(); + + if (pos->type == TokenType::Semicolon) + break; + } + + if (String(pos->begin, pos->end) == "limit") + break; + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + { + if (expr_end_pos < expr_begin_pos) + { + expr_end_pos = pos; + --expr_end_pos; + } + add_columns(); + break; + } + } + return true; +} + +bool ParserKQLMVExpand::parserMVExpand(KQLMVExpand & kql_mv_expand, Pos & pos, Expected & expected) +{ + ParserKeyword s_bagexpansion("bagexpansion"); + ParserKeyword s_kind("kind"); + ParserKeyword s_with_itemindex("with_itemindex"); + ParserKeyword s_limit("limit"); + + ParserToken equals(TokenType::Equals); + ParserToken comma(TokenType::Comma); + + auto & column_array_exprs = kql_mv_expand.column_array_exprs; + auto & bagexpansion = kql_mv_expand.bagexpansion; + auto & with_itemindex = kql_mv_expand.with_itemindex; + auto & limit = kql_mv_expand.limit; + + if (s_bagexpansion.ignore(pos, expected)) + { + if (!equals.ignore(pos, expected)) + return false; + bagexpansion = String(pos->begin, pos->end); + ++pos; + } + else if (s_kind.ignore(pos, expected)) + { + if (!equals.ignore(pos, expected)) + return false; + bagexpansion = String(pos->begin, pos->end); + ++pos; + } + + if (s_with_itemindex.ignore(pos, expected)) + { + if (!equals.ignore(pos, expected)) + return false; + with_itemindex = String(pos->begin, pos->end); + ++pos; + } + + if (!parseColumnArrayExprs(column_array_exprs, pos, expected)) + return false; + + if (s_limit.ignore(pos, expected)) + limit = String(pos->begin, pos->end); + + return true; +} + +bool ParserKQLMVExpand::genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_node, int32_t max_depth) +{ + String expand_str; + String cast_type_column_remove, cast_type_column_rename ; + String cast_type_column_restore, cast_type_column_restore_name ; + String row_count_str; + String extra_columns; + String input = "dummy_input"; + for (auto column : kql_mv_expand.column_array_exprs) + { + if (column.alias == column.column_array_expr) + expand_str = expand_str.empty() ? String("ARRAY JOIN ") + column.alias : expand_str + "," + column.alias; + else + { + expand_str = expand_str.empty() ? std::format("ARRAY JOIN {} AS {} ", column.column_array_expr, column.alias): expand_str + std::format(", {} AS {}", column.column_array_expr, column.alias); + extra_columns = extra_columns + ", " + column.alias; + } + + if (!column.to_type.empty()) + { + cast_type_column_remove = cast_type_column_remove.empty() ? " Except " + column.alias : cast_type_column_remove + " Except " + column.alias ; + String rename_str; + + if (type_cast[column.to_type] == "Boolean") + rename_str = std::format("accurateCastOrNull(toInt64OrNull(toString({0})),'{1}') as {0}_ali",column.alias, type_cast[column.to_type]); + else + rename_str = std::format("accurateCastOrNull({0},'{1}') as {0}_ali",column.alias, type_cast[column.to_type]); + + cast_type_column_rename = cast_type_column_rename.empty() ? rename_str : cast_type_column_rename + "," + rename_str; + cast_type_column_restore = cast_type_column_restore.empty() ? std::format(" Except {}_ali ", column.alias) : cast_type_column_restore + std::format(" Except {}_ali ", column.alias); + cast_type_column_restore_name = cast_type_column_restore_name.empty() ? std::format("{0}_ali as {0}", column.alias) : cast_type_column_restore_name + std::format(", {0}_ali as {0}", column.alias); + } + + if (!kql_mv_expand.with_itemindex.empty()) + { + row_count_str = row_count_str.empty() ? "length("+column.alias+")" : row_count_str + ", length("+column.alias+")"; + } + } + + String columns = "*"; + if (!row_count_str.empty()) + { + expand_str += std::format(", range(0, arrayMax([{}])) AS {} ", row_count_str, kql_mv_expand.with_itemindex); + columns = kql_mv_expand.with_itemindex + " , " + columns; + } + + if (!kql_mv_expand.limit.empty()) + expand_str += " LIMIT " + kql_mv_expand.limit; + + auto query = std::format("(Select {} {} From {} {})", columns, extra_columns, input, expand_str); + + ASTPtr sub_query_node; + Expected expected; + + if (cast_type_column_remove.empty()) + { + query = std::format("Select {} {} From {} {}", columns, extra_columns, input, expand_str); + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + return false; + if (!setSubQuerySource(sub_query_node, select_node, false, false)) + return false; + select_node = std::move(sub_query_node); + } + else + { + query = std::format("(Select {} {} From {} {})", columns, extra_columns, input, expand_str); + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + return false; + if (!setSubQuerySource(sub_query_node, select_node, true, false)) + return false; + select_node = std::move(sub_query_node); + + auto rename_query = std::format("(Select * {}, {} From {})", cast_type_column_remove, cast_type_column_rename, "query"); + if (!parseSQLQueryByString(std::make_unique(), rename_query, sub_query_node, max_depth)) + return false; + if (!setSubQuerySource(sub_query_node, select_node, true, true)) + return false; + + select_node = std::move(sub_query_node); + query = std::format("Select * {}, {} from {}", cast_type_column_restore, cast_type_column_restore_name, "rename_query"); + + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + return false; + sub_query_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(select_node)); + select_node = std::move(sub_query_node); + } + return true; +} + +bool ParserKQLMVExpand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserSequence("mv-expand").ignore(pos, expected)) + return false; + + ASTPtr setting; + ASTPtr select_expression_list; + auto begin = pos; + + KQLMVExpand kql_mv_expand; + if (!parserMVExpand(kql_mv_expand, pos, expected)) + return false; + if (!genQuery(kql_mv_expand, node, pos.max_depth)) + return false; + + const String setting_str = "enable_unaligned_array_join = 1"; + Tokens token_settings(setting_str.c_str(), setting_str.c_str() + setting_str.size()); + IParser::Pos pos_settings(token_settings, pos.max_depth); + + if (!ParserSetQuery(true).parse(pos_settings, setting, expected)) + return false; + node->as()->setExpression(ASTSelectQuery::Expression::SETTINGS, std::move(setting)); + + pos = begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLMVExpand.h b/src/Parsers/Kusto/ParserKQLMVExpand.h new file mode 100644 index 000000000000..ee5719dd4f8a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMVExpand.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLMVExpand : public ParserKQLBase +{ + +protected: + static std::unordered_map type_cast; + + struct ColumnArrayExpr + { + String alias; + String column_array_expr; + String to_type; + ColumnArrayExpr(String alias_, String column_array_expr_, String to_type_) + :alias(alias_), column_array_expr(column_array_expr_), to_type(to_type_){} + }; + using ColumnArrayExprs = std::vector; + + struct KQLMVExpand + { + ColumnArrayExprs column_array_exprs; + String bagexpansion; + String with_itemindex; + String limit; + }; + + static bool parseColumnArrayExprs(ColumnArrayExprs & column_array_exprs, Pos & pos, Expected & expected); + static bool parserMVExpand(KQLMVExpand & kql_mv_expand, Pos & pos, Expected & expected); + static bool genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_node, int32_t max_depth); + + const char * getName() const override { return "KQL mv-expand"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp new file mode 100644 index 000000000000..735304ba50a3 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -0,0 +1,422 @@ +#include "ParserKQLTimespan.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +bool ParserKQLMakeSeries :: parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos) +{ + std::unordered_set allowed_aggregation + ({ + "avg", + "avgif", + "count", + "countif", + "dcount", + "dcountif", + "max", + "maxif", + "min", + "minif", + "percentile", + "take_any", + "stdev", + "sum", + "sumif", + "variance" + }); + + Expected expected; + ParserKeyword s_default("default"); + ParserToken equals(TokenType::Equals); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserToken close_bracket(TokenType::ClosingRoundBracket); + ParserToken comma(TokenType::Comma); + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + String alias; + String aggregation_fun; + String column; + double default_value = 0; + + String first_token(pos->begin,pos->end); + + ++pos; + if (equals.ignore(pos, expected)) + { + alias = std::move(first_token); + aggregation_fun = String(pos->begin,pos->end); + ++pos; + } + else + aggregation_fun = std::move(first_token); + + if (allowed_aggregation.find(aggregation_fun) == allowed_aggregation.end()) + return false; + + if (open_bracket.ignore(pos, expected)) + column = String(pos->begin,pos->end); + else + return false; + + ++pos; + if (!close_bracket.ignore(pos, expected)) + return false; + + if (s_default.ignore(pos, expected)) + { + if (!equals.ignore(pos, expected)) + return false; + + default_value = std::stod(String(pos->begin,pos->end)); + ++pos; + } + if (alias.empty()) + alias = std::format("{}_{}", aggregation_fun, column); + aggregation_columns.push_back(AggregationColumn(alias, aggregation_fun, column, default_value)); + + if (!comma.ignore(pos, expected)) + break; + } + return true; +} + +bool ParserKQLMakeSeries :: parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos) +{ + auto begin = pos; + auto from_pos = begin; + auto to_pos = begin; + auto step_pos = begin; + auto end_pos = begin; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "from") + from_pos = pos; + if (String(pos->begin, pos->end) == "to") + to_pos = pos; + if (String(pos->begin, pos->end) == "step") + step_pos = pos; + if (String(pos->begin, pos->end) == "by") + { + end_pos = pos; + break; + } + ++pos; + } + + if (end_pos == begin) + end_pos = pos; + + if (String(step_pos->begin, step_pos->end) != "step") + return false; + + if (String(from_pos->begin, from_pos->end) == "from") + { + ++from_pos; + auto end_from_pos = (to_pos != begin) ? to_pos : step_pos; + --end_from_pos; + from_to_step.from_str = String(from_pos->begin, end_from_pos->end); + } + + if (String(to_pos->begin, to_pos->end) == "to") + { ++to_pos; + --step_pos; + from_to_step.to_str = String(to_pos->begin, step_pos->end); + ++step_pos; + } + --end_pos; + ++step_pos; + from_to_step.step_str = String(step_pos->begin, end_pos->end); + + if (std::optional ticks; String(step_pos->begin, step_pos->end) == "time" || String(step_pos->begin, step_pos->end) == "timespan" + || ParserKQLTimespan::tryParse(from_to_step.step_str, ticks)) + { + // TODO: this is a hack of the ugliest kind that can only be fixed by supporting arbitrary expressions in make-series + static constexpr std::string_view wrapper = "toIntervalNanosecond("; + const auto timespan = getExprFromToken(from_to_step.step_str, pos.max_depth); + const auto value = timespan.substr(wrapper.length(), timespan.length() - wrapper.length() - 1); + + from_to_step.is_timespan = true; + from_to_step.step = std::stod(value) * 1e-9; + } + else + from_to_step.step = std::stod(from_to_step.step_str); + + return true; +} + +bool ParserKQLMakeSeries :: makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & select_node, const uint32_t & max_depth) +{ + const uint64_t era_diff = 62135596800; // this magic number is the differicen is second form 0001-01-01 (Azure start time ) and 1970-01-01 (CH start time) + + String start_str, end_str; + String sub_query, main_query; + + auto & aggregation_columns = kql_make_series.aggregation_columns; + auto & from_to_step = kql_make_series.from_to_step; + auto & subquery_columns = kql_make_series.subquery_columns; + auto & axis_column = kql_make_series.axis_column; + auto & group_expression = kql_make_series.group_expression; + auto step = from_to_step.step; + + if (!kql_make_series.from_to_step.from_str.empty()) + start_str = getExprFromToken(kql_make_series.from_to_step.from_str, max_depth); + + if (!kql_make_series.from_to_step.to_str.empty()) + end_str = getExprFromToken(from_to_step.to_str, max_depth); + + auto date_type_cast = [&] (String & src) + { + Tokens tokens(src.c_str(), src.c_str() + src.size()); + IParser::Pos pos(tokens, max_depth); + String res; + while (!pos->isEnd()) + { + String tmp = String(pos->begin, pos->end); + if (tmp == "kql_datetime" || tmp == "kql_todatetime") + { + ++pos; + auto datetime_start_pos = pos; + auto datetime_end_pos = pos; + auto paren_count = 0; + while (!pos->isEnd()) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + + if (pos->type == TokenType::ClosingRoundBracket && paren_count == 0) + { + ++datetime_start_pos; + datetime_end_pos = pos; + --datetime_end_pos; + tmp = std::format("toDateTime64({}, 9, 'UTC')",String(datetime_start_pos->begin, datetime_end_pos->end)); + break; + } + + ++pos; + } + } + res = res.empty() ? tmp : res + " " + tmp; + ++pos; + } + return res; + }; + + start_str = date_type_cast(start_str); + end_str = date_type_cast(end_str); + + String bin_str, start, end; + + uint64_t diff = 0; + String axis_column_format; + String axis_str; + + auto get_group_expression_alias = [&] + { + std::vector group_expression_tokens; + Tokens tokens(group_expression.c_str(), group_expression.c_str() + group_expression.size()); + IParser::Pos pos(tokens, max_depth); + while (!pos->isEnd()) + { + if (String(pos->begin, pos->end) == "AS") + { + if (!group_expression_tokens.empty()) + group_expression_tokens.pop_back(); + ++pos; + group_expression_tokens.push_back(String(pos->begin, pos->end)); + } + else + group_expression_tokens.push_back(String(pos->begin, pos->end)); + ++pos; + } + String res; + for (auto const & token : group_expression_tokens) + res = res + token + " "; + return res; + }; + + auto group_expression_alias = get_group_expression_alias(); + + if (from_to_step.is_timespan) + { + axis_column_format = std::format("toFloat64(toDateTime64({}, 9, 'UTC'))", axis_column); + } + else + axis_column_format = std::format("toFloat64({})", axis_column); + + if (!start_str.empty()) // has from + { + bin_str = std::format(" toFloat64({0}) + (toInt64((({1} - toFloat64({0})) / {2})) * {2}) AS {3}_ali", + start_str, axis_column_format, step, axis_column); + start = std::format("toUInt64({})", start_str); + } + else + { + if (from_to_step.is_timespan) + diff = era_diff; + bin_str = std::format(" toFloat64(toInt64(({0} + {1}) / {2}) * {2}) AS {3}_ali ", axis_column_format, diff, step, axis_column); + } + + if (!end_str.empty()) + end = std::format("toUInt64({})", end_str); + + String range, condition; + + if (!start_str.empty() && !end_str.empty()) + { + range = std::format("range({}, {}, toUInt64({}))", start, end, step); + condition = std::format("where toInt64({0}) >= {1} and toInt64({0}) < {2}", axis_column_format, start, end); + } + else if (start_str.empty() && !end_str.empty()) + { + range = std::format("range(low, {} + {}, toUInt64({}))", end, diff, step); + condition = std::format("where toInt64({0}) - {1} < {2}", axis_column_format, diff, end); + } + else if (!start_str.empty() && end_str.empty()) + { + range = std::format("range({}, high, toUInt64({}))", start, step); + condition = std::format("where toInt64({}) >= {}", axis_column_format, start); + } + else + { + range = std::format("range(low, high, toUInt64({}))", step); + condition = " "; + } + + auto range_len = std::format("length({})", range); + + String sub_sub_query; + if (group_expression.empty()) + sub_sub_query = std::format(" (Select {0}, {1} FROM {2} {4} GROUP BY {3}_ali ORDER BY {3}_ali) ", subquery_columns, bin_str, "table_name", axis_column, condition); + else + sub_sub_query = std::format(" (Select {0}, {1}, {2} FROM {3} {5} GROUP BY {0}, {4}_ali ORDER BY {4}_ali) ", group_expression, subquery_columns, bin_str, "table_name", axis_column, condition); + + ASTPtr sub_query_node; + + if (!ParserSimpleCHSubquery(select_node).parseByString(sub_sub_query, sub_query_node, max_depth)) + return false; + select_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(sub_query_node)); + + if (!group_expression.empty()) + main_query = std::format("{} ", group_expression_alias); + + auto axis_and_agg_alias_list = axis_column; + auto final_axis_agg_alias_list =std::format("tupleElement(zipped,1) AS {}", axis_column); + int idx = 2; + for (auto agg_column : aggregation_columns) + { + String agg_group_column = std::format("arrayConcat(groupArray({}_ali) as ga, arrayMap(x -> ({}),range(0,toUInt32({} - length(ga) < 0 ? 0 : {} - length(ga)),1))) as {}", + agg_column.alias, agg_column.default_value, range_len, range_len, agg_column.alias); + main_query = main_query.empty() ? agg_group_column : main_query + ", " + agg_group_column; + + axis_and_agg_alias_list += ", " + agg_column.alias; + final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); + } + + if (from_to_step.is_timespan) + axis_str = std::format("arrayDistinct(arrayConcat(groupArray(toDateTime64({0}_ali - {1}, 9, 'UTC')), arrayMap(x->(toDateTime64(x - {1}, 9, 'UTC')), {2}))) as {0}", + axis_column, diff, range); + else + axis_str = std::format("arrayDistinct(arrayConcat(groupArray({0}_ali), arrayMap(x->(toFloat64(x)), {1}))) as {0}", + axis_column, range); + + main_query += ", " + axis_str; + auto sub_group_by = group_expression.empty() ? "" : std::format("GROUP BY {}", group_expression_alias); + + sub_query = std::format("( SELECT toUInt64(min({}_ali)) AS low, toUInt64(max({}_ali))+ {} AS high, arraySort(arrayZip({})) as zipped, {} FROM {} {} )", + axis_column, axis_column,step, axis_and_agg_alias_list, main_query, sub_sub_query, sub_group_by); + + if (group_expression.empty()) + main_query = std::format("{}", final_axis_agg_alias_list); + else + main_query = std::format("{},{}", group_expression_alias, final_axis_agg_alias_list); + + if (!ParserSimpleCHSubquery(select_node).parseByString(sub_query, sub_query_node, max_depth)) + return false; + select_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(sub_query_node)); + + kql_make_series.sub_query = std::move(sub_query); + kql_make_series.main_query = std::move(main_query); + + return true; +} + +bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserSequence("make-series").ignore(pos, expected)) + return false; + + auto begin = pos; + ParserKeyword s_on("on"); + ParserKeyword s_by("by"); + + ParserToken equals(TokenType::Equals); + ParserToken comma(TokenType::Comma); + + ASTPtr select_expression_list; + + KQLMakeSeries kql_make_series; + auto & aggregation_columns = kql_make_series.aggregation_columns; + auto & from_to_step = kql_make_series.from_to_step; + auto & subquery_columns = kql_make_series.subquery_columns; + auto & axis_column = kql_make_series.axis_column; + auto & group_expression = kql_make_series.group_expression; + + //const auto make_series_parameters = getMakeSeriesParameters(pos); + + if (!parseAggregationColumns(aggregation_columns, pos)) + return false; + + if (!s_on.ignore(pos, expected)) + return false; + + axis_column = String(pos->begin, pos->end); + ++pos; + + if (!parseFromToStepClause(from_to_step, pos)) + return false; + + if (s_by.ignore(pos, expected)) + { + group_expression = getExprFromToken(pos); + if (group_expression.empty()) + return false; + } + + for (auto agg_column : aggregation_columns) + { + String column_str = std::format("{}({}) AS {}_ali", agg_column.aggregation_fun, agg_column.column, agg_column.alias); + if (subquery_columns.empty()) + subquery_columns = column_str; + else + subquery_columns += ", "+ column_str; + } + + makeSeries(kql_make_series, node, pos.max_depth); + + Tokens token_main_query(kql_make_series.main_query.c_str(), kql_make_series.main_query.c_str() + kql_make_series.main_query.size()); + IParser::Pos pos_main_query(token_main_query, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_main_query, select_expression_list, expected)) + return false; + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + pos = begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.h b/src/Parsers/Kusto/ParserKQLMakeSeries.h new file mode 100644 index 000000000000..0f0caece6873 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLMakeSeries : public ParserKQLBase +{ + +protected: + struct AggregationColumn + { + String alias; + String aggregation_fun; + String column; + double default_value; + AggregationColumn(String alias_, String aggregation_fun_, String column_, double default_value_) + :alias(alias_), aggregation_fun(aggregation_fun_), column(column_), default_value(default_value_){} + }; + using AggregationColumns = std::vector; + + struct FromToStepClause + { + String from_str; + String to_str; + String step_str; + bool is_timespan = false; + double step; + }; + + struct KQLMakeSeries + { + AggregationColumns aggregation_columns; + FromToStepClause from_to_step; + String axis_column; + String group_expression; + String subquery_columns; + String sub_query; + String main_query; + }; + + static bool makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & select_node, const uint32_t & max_depth); + static bool parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos); + static bool parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos); + + const char * getName() const override { return "KQL make-series"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 1575cffcc394..e86eea254cb7 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -1,71 +1,324 @@ -#include -#include -#include -#include +#include "ParserKQLOperators.h" +#include +#include +#include +#include #include +#include +#include "KustoFunctions/IParserKQLFunction.h" +#include "ParserKQLStatement.h" -namespace DB +#include +#include + +namespace DB::ErrorCodes +{ +extern const int LOGICAL_ERROR; +extern const int SYNTAX_ERROR; +} + +namespace +{ +enum class WildcardsPos : uint8_t +{ + none, + left, + right, + both +}; + +enum class KQLOperatorValue : uint16_t +{ + none, + contains, + not_contains, + contains_cs, + not_contains_cs, + endswith, + not_endswith, + endswith_cs, + not_endswith_cs, + equal, //=~ + not_equal, //!~ + equal_cs, //= + not_equal_cs, //!= + has, + not_has, + has_all, + has_any, + has_cs, + not_has_cs, + hasprefix, + not_hasprefix, + hasprefix_cs, + not_hasprefix_cs, + hassuffix, + not_hassuffix, + hassuffix_cs, + not_hassuffix_cs, + in_cs, //in + not_in_cs, //!in + in, //in~ + not_in, //!in~ + matches_regex, + startswith, + not_startswith, + startswith_cs, + not_startswith_cs, +}; + +const std::unordered_map KQLOperator = { + {"contains", KQLOperatorValue::contains}, + {"!contains", KQLOperatorValue::not_contains}, + {"contains_cs", KQLOperatorValue::contains_cs}, + {"!contains_cs", KQLOperatorValue::not_contains_cs}, + {"endswith", KQLOperatorValue::endswith}, + {"!endswith", KQLOperatorValue::not_endswith}, + {"endswith_cs", KQLOperatorValue::endswith_cs}, + {"!endswith_cs", KQLOperatorValue::not_endswith_cs}, + {"=~", KQLOperatorValue::equal}, + {"!~", KQLOperatorValue::not_equal}, + {"==", KQLOperatorValue::equal_cs}, + {"!=", KQLOperatorValue::not_equal_cs}, + {"has", KQLOperatorValue::has}, + {"!has", KQLOperatorValue::not_has}, + {"has_all", KQLOperatorValue::has_all}, + {"has_any", KQLOperatorValue::has_any}, + {"has_cs", KQLOperatorValue::has_cs}, + {"!has_cs", KQLOperatorValue::not_has_cs}, + {"hasprefix", KQLOperatorValue::hasprefix}, + {"!hasprefix", KQLOperatorValue::not_hasprefix}, + {"hasprefix_cs", KQLOperatorValue::hasprefix_cs}, + {"!hasprefix_cs", KQLOperatorValue::not_hasprefix_cs}, + {"hassuffix", KQLOperatorValue::hassuffix}, + {"!hassuffix", KQLOperatorValue::not_hassuffix}, + {"hassuffix_cs", KQLOperatorValue::hassuffix_cs}, + {"!hassuffix_cs", KQLOperatorValue::not_hassuffix_cs}, + {"in", KQLOperatorValue::in_cs}, + {"!in", KQLOperatorValue::not_in_cs}, + {"in~", KQLOperatorValue::in}, + {"!in~", KQLOperatorValue::not_in}, + {"matches regex", KQLOperatorValue::matches_regex}, + {"startswith", KQLOperatorValue::startswith}, + {"!startswith", KQLOperatorValue::not_startswith}, + {"startswith_cs", KQLOperatorValue::startswith_cs}, + {"!startswith_cs", KQLOperatorValue::not_startswith_cs}, +}; + +void rebuildSubqueryForInOperator(DB::ASTPtr & node, bool useLowerCase) +{ + //A sub-query for in operator in kql can have multiple columns, but only takes the first column. + //A sub-query for in operator in ClickHouse can not have multiple columns + //So only take the first column if there are multiple columns. + //select * not working for subquery. (a tabular statement without project) + + const auto selectColumns = node->children[0]->children[0]->as()->select(); + while (selectColumns->children.size() > 1) + selectColumns->children.pop_back(); + + if (useLowerCase) + { + auto args = std::make_shared(); + args->children.push_back(selectColumns->children[0]); + auto func_lower = std::make_shared(); + func_lower->name = "lower"; + func_lower->children.push_back(selectColumns->children[0]); + func_lower->arguments = args; + if (selectColumns->children[0]->as()) + func_lower->alias = std::move(selectColumns->children[0]->as()->alias); + else if (selectColumns->children[0]->as()) + func_lower->alias = std::move(selectColumns->children[0]->as()->alias); + + auto funcs = std::make_shared(); + funcs->children.push_back(func_lower); + selectColumns->children[0] = std::move(funcs); + } +} + +std::string applyFormatString(const std::string_view format_string, const std::string & haystack, const std::string & needle) { + return std::vformat(format_string, std::make_format_args(haystack, needle)); +} -namespace ErrorCodes +std::string constructHasOperatorTranslation(const KQLOperatorValue kql_op, const std::string & haystack, const std::string & needle) { - extern const int SYNTAX_ERROR; + if (kql_op != KQLOperatorValue::has && kql_op != KQLOperatorValue::not_has && kql_op != KQLOperatorValue::has_cs + && kql_op != KQLOperatorValue::not_has_cs && kql_op != KQLOperatorValue::has_all && kql_op != KQLOperatorValue::has_any) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unexpected operator: {}", magic_enum::enum_name(kql_op)); + + const auto tokens = std::invoke( + [&needle] + { + std::vector result; + size_t pos = 0; + size_t start = 0; + size_t length = 0; + DB::SplitTokenExtractor token_extractor; + while (pos < needle.length() && token_extractor.nextInString(needle.c_str(), needle.length(), &pos, &start, &length)) + result.emplace_back(needle.c_str() + start, length); + + return result; + }); + + const auto is_case_sensitive = kql_op == KQLOperatorValue::has_cs || kql_op == KQLOperatorValue::not_has_cs; + const auto has_token_suffix = is_case_sensitive ? "" : "CaseInsensitive"; + const auto has_all_tokens = std::accumulate( + tokens.cbegin(), + tokens.cend(), + std::string(), + [&has_token_suffix, &haystack](auto acc, const auto & token) + { return std::move(acc) + std::format("hasToken{}({}, '{}') and ", has_token_suffix, haystack, token); }); + + const auto is_negation = kql_op == KQLOperatorValue::not_has || kql_op == KQLOperatorValue::not_has_cs; + return std::format( + "{4}ifNull(hasToken{3}OrNull({0}, {1}), {2} position{3}({0}, {1}) > 0)", + haystack, + needle, + has_all_tokens, + has_token_suffix, + is_negation ? "not " : ""); +} } -String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Pos &token_pos,String kql_op, String ch_op) +String genHasAnyAllOpExpr( + std::vector & tokens, + DB::IParser::Pos & token_pos, + const std::string & kql_op, + const std::function & translate) { - String new_expr; - Expected expected; - ParserToken s_lparen(TokenType::OpeningRoundBracket); + std::string new_expr; + DB::Expected expected; + DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); ++token_pos; if (!s_lparen.ignore(token_pos, expected)) - throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); auto haystack = tokens.back(); - - String logic_op = (kql_op == "has_all") ? " and " : " or "; - - while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) + const auto * const logic_op = (kql_op == "has_all") ? " and " : " or "; + while (!token_pos->isEnd() && token_pos->type != DB::TokenType::PipeMark && token_pos->type != DB::TokenType::Semicolon) { - auto tmp_arg = String(token_pos->begin, token_pos->end); - if (token_pos->type == TokenType::Comma) - new_expr = new_expr + logic_op; + auto tmp_arg = DB::IParserKQLFunction::getExpression(token_pos); + if (token_pos->type == DB::TokenType::Comma) + new_expr += logic_op; else - new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")"; + new_expr += translate(haystack, tmp_arg); ++token_pos; - if (token_pos->type == TokenType::ClosingRoundBracket) + if (token_pos->type == DB::TokenType::ClosingRoundBracket) break; - } tokens.pop_back(); return new_expr; } -String KQLOperators::genInOpExpr(IParser::Pos &token_pos, String kql_op, String ch_op) +String genEqOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos, const DB::String & ch_op) { - String new_expr; + DB::String tmp_arg(token_pos->begin, token_pos->end); + + if (tokens.empty() || tmp_arg != "~") + return tmp_arg; + + DB::String new_expr; + new_expr += "lower(" + tokens.back() + ")"; + new_expr += ch_op; + ++token_pos; + new_expr += " lower(" + DB::String(token_pos->begin, token_pos->end) + ")" + " "; + tokens.pop_back(); + + return new_expr; +} + +String genInOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos, const DB::String & kql_op, const DB::String & ch_op) +{ + DB::ParserKQLTaleFunction kqlfun_p; - ParserToken s_lparen(TokenType::OpeningRoundBracket); + DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); - ASTPtr select; - Expected expected; + DB::ASTPtr select; + DB::Expected expected; + DB::String new_expr; ++token_pos; if (!s_lparen.ignore(token_pos, expected)) - throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + + if (tokens.empty()) + throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + + new_expr = "lower(" + tokens.back() + ") "; + tokens.pop_back(); + auto pos = token_pos; + if (kqlfun_p.parse(pos, select, expected)) + { + rebuildSubqueryForInOperator(select, true); + new_expr += ch_op + " (" + serializeAST(*select) + ")"; + token_pos = pos; + return new_expr; + } --token_pos; --token_pos; - return ch_op; + new_expr += ch_op + "("; + bool has_dynamic = false; + while (!token_pos->isEnd() && token_pos->type != DB::TokenType::PipeMark && token_pos->type != DB::TokenType::Semicolon) + { + auto tmp_arg = DB::String(token_pos->begin, token_pos->end); + if (tmp_arg == "dynamic") + has_dynamic = true; + if (token_pos->type != DB::TokenType::Comma && token_pos->type != DB::TokenType::ClosingRoundBracket + && token_pos->type != DB::TokenType::OpeningRoundBracket && token_pos->type != DB::TokenType::OpeningSquareBracket + && token_pos->type != DB::TokenType::ClosingSquareBracket && tmp_arg != "~" && tmp_arg != "dynamic") + new_expr = new_expr + "lower(" + tmp_arg + ")"; + ++token_pos; + if (token_pos->type == DB::TokenType::ClosingRoundBracket) + break; + else if (token_pos->type == DB::TokenType::Comma) + new_expr += ", "; + } + if (has_dynamic) + ++token_pos; + new_expr += ")"; + return new_expr; +} + +std::string genInOpExpr(DB::IParser::Pos & token_pos, const std::string & kql_op, const std::string & ch_op) +{ + DB::ParserKQLTaleFunction kqlfun_p; + DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); + + DB::ASTPtr select; + DB::Expected expected; + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + + auto pos = token_pos; + if (kqlfun_p.parse(pos, select, expected)) + { + rebuildSubqueryForInOperator(select, false); + auto new_expr = ch_op + " (" + serializeAST(*select) + ")"; + token_pos = pos; + return new_expr; + } + + --token_pos; + --token_pos; + return ch_op; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos) +std::string genHaystackOpExpr( + std::vector & tokens, + DB::IParser::Pos & token_pos, + const std::string & kql_op, + const std::function & translate, + WildcardsPos wildcards_pos, + WildcardsPos space_pos = WildcardsPos::none) { - String new_expr, left_wildcards, right_wildcards, left_space, right_space; + std::string new_expr, left_wildcards, right_wildcards, left_space, right_space; switch (wildcards_pos) { @@ -73,7 +326,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; case WildcardsPos::left: - left_wildcards ="%"; + left_wildcards = "%"; break; case WildcardsPos::right: @@ -81,7 +334,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; case WildcardsPos::both: - left_wildcards ="%"; + left_wildcards = "%"; right_wildcards = "%"; break; } @@ -92,7 +345,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; case WildcardsPos::left: - left_space =" "; + left_space = " "; break; case WildcardsPos::right: @@ -100,260 +353,277 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; case WildcardsPos::both: - left_space =" "; + left_space = " "; right_space = " "; break; } ++token_pos; - if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')"; - else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) + if (!tokens.empty() && (token_pos->type == DB::TokenType::StringLiteral || token_pos->type == DB::TokenType::QuotedIdentifier)) + new_expr = translate( + tokens.back(), + "'" + left_wildcards + left_space + DB::IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + + right_space + right_wildcards + "'"); + else if (!tokens.empty() && token_pos->type == DB::TokenType::BareWord) { - auto tmp_arg = String(token_pos->begin, token_pos->end); - new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; + auto tmp_arg = DB::IParserKQLFunction::getExpression(token_pos); + new_expr = translate( + tokens.back(), "concat('" + left_wildcards + left_space + "', " + tmp_arg + ", '" + right_space + right_wildcards + "')"); } else - throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + tokens.pop_back(); return new_expr; } -bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) +namespace DB +{ +bool KQLOperators::convert(std::vector & tokens, IParser::Pos & pos) { + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + return false; + auto begin = pos; + auto token = String(pos->begin, pos->end); - if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + String op = token; + if (token == "!") + { + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid negative operator"); + op = "!" + String(pos->begin, pos->end); + } + else if (token == "matches") { - KQLOperatorValue op_value = KQLOperatorValue::none; - - auto token = String(pos->begin,pos->end); - - String op = token; - if (token == "!") - { - ++pos; - if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) - throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid negative operator"); - op ="!"+String(pos->begin,pos->end); - } - else if (token == "matches") - { - ++pos; - if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) - { - if (String(pos->begin,pos->end) == "regex") - op +=" regex"; - else - --pos; - } - } - else - { - op = token; - } - ++pos; if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - if (String(pos->begin,pos->end) == "~") - op +="~"; + if (String(pos->begin, pos->end) == "regex") + op += " regex"; else --pos; } + } + else + { + op = token; + } + + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "~") + op += "~"; else --pos; + } + else + --pos; - if (KQLOperator.find(op) == KQLOperator.end()) - { - pos = begin; - return false; - } - - op_value = KQLOperator[op]; + const auto op_it = KQLOperator.find(op); + if (op_it == KQLOperator.end()) + { + pos = begin; + return false; + } - String new_expr; + String new_expr; - if (op_value == KQLOperatorValue::none) - tokens.push_back(op); - else - { - auto last_op = tokens.back(); - auto last_pos = pos; - - switch (op_value) - { - case KQLOperatorValue::contains: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); - break; - - case KQLOperatorValue::not_contains: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::both); - break; - - case KQLOperatorValue::contains_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "like", WildcardsPos::both); - break; - - case KQLOperatorValue::not_contains_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not like", WildcardsPos::both); - break; - - case KQLOperatorValue::endswith: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); - break; - - case KQLOperatorValue::not_endswith: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); - break; - - case KQLOperatorValue::endswith_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); - break; - - case KQLOperatorValue::not_endswith_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); - break; - - case KQLOperatorValue::equal: - break; - - case KQLOperatorValue::not_equal: - break; - - case KQLOperatorValue::equal_cs: - new_expr = "=="; - break; - - case KQLOperatorValue::not_equal_cs: - new_expr = "!="; - break; - case KQLOperatorValue::has: - new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); - break; - - case KQLOperatorValue::not_has: - new_expr = genHaystackOpExpr(tokens, pos, op, "not hasTokenCaseInsensitive", WildcardsPos::none); - break; - - case KQLOperatorValue::has_all: - new_expr = genHasAnyAllOpExpr(tokens, pos, "has_all", "hasTokenCaseInsensitive"); - break; - - case KQLOperatorValue::has_any: - new_expr = genHasAnyAllOpExpr(tokens, pos, "has_any", "hasTokenCaseInsensitive"); - break; - - case KQLOperatorValue::has_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "hasToken", WildcardsPos::none); - break; - - case KQLOperatorValue::not_has_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not hasToken", WildcardsPos::none); - break; - - case KQLOperatorValue::hasprefix: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::left); - break; - - case KQLOperatorValue::not_hasprefix: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::left); - break; - - case KQLOperatorValue::hasprefix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::left); - break; - - case KQLOperatorValue::not_hasprefix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::left); - break; - - case KQLOperatorValue::hassuffix: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::right); - break; - - case KQLOperatorValue::not_hassuffix: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::right); - break; - - case KQLOperatorValue::hassuffix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::right); - break; - - case KQLOperatorValue::not_hassuffix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::right); - break; - - case KQLOperatorValue::in_cs: - new_expr = genInOpExpr(pos,op,"in"); - break; - - case KQLOperatorValue::not_in_cs: - new_expr = genInOpExpr(pos,op,"not in"); - break; - - case KQLOperatorValue::in: - break; - - case KQLOperatorValue::not_in: - break; - - case KQLOperatorValue::matches_regex: - new_expr = genHaystackOpExpr(tokens, pos, op, "match", WildcardsPos::none); - break; - - case KQLOperatorValue::startswith: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); - break; - - case KQLOperatorValue::not_startswith: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); - break; - - case KQLOperatorValue::startswith_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); - break; - - case KQLOperatorValue::not_startswith_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); - break; - - default: - break; - } - - tokens.push_back(new_expr); - } + const auto & op_value = op_it->second; + if (op_value == KQLOperatorValue::none) + { + tokens.push_back(op); return true; } - pos = begin; - return false; -} -} + if (tokens.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", op); + + auto last_op = tokens.back(); + auto last_pos = pos; + + switch (op_value) + { + case KQLOperatorValue::contains: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "ilike({0}, {1})"), WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not ilike({0}, {1})"), WildcardsPos::both); + break; + + case KQLOperatorValue::contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "like({0}, {1})"), WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not like({0}, {1})"), WildcardsPos::both); + break; + + case KQLOperatorValue::endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "ilike({0}, {1})"), WildcardsPos::left); + break; + + case KQLOperatorValue::not_endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not ilike({0}, {1})"), WildcardsPos::left); + break; + + case KQLOperatorValue::endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "endsWith({0}, {1})"), WildcardsPos::none); + break; + + case KQLOperatorValue::not_endswith_cs: + new_expr + = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not endsWith({0}, {1})"), WildcardsPos::none); + break; + + case KQLOperatorValue::equal: + new_expr = genEqOpExprCis(tokens, pos, "=="); + break; + + case KQLOperatorValue::not_equal: + new_expr = genEqOpExprCis(tokens, pos, "!="); + break; + + case KQLOperatorValue::equal_cs: + new_expr = "=="; + break; + + case KQLOperatorValue::not_equal_cs: + new_expr = "!="; + break; + case KQLOperatorValue::has: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&constructHasOperatorTranslation, op_value), WildcardsPos::none); + break; + + case KQLOperatorValue::not_has: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&constructHasOperatorTranslation, op_value), WildcardsPos::none); + break; + + case KQLOperatorValue::has_all: + case KQLOperatorValue::has_any: + new_expr = genHasAnyAllOpExpr(tokens, pos, op, std::bind_front(&constructHasOperatorTranslation, op_value)); + break; + case KQLOperatorValue::has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&constructHasOperatorTranslation, op_value), WildcardsPos::none); + break; + + case KQLOperatorValue::not_has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&constructHasOperatorTranslation, op_value), WildcardsPos::none); + break; + + case KQLOperatorValue::hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "ilike({0}, {1})"), WildcardsPos::right); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "ilike({0}, {1})"), WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::not_hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not ilike({0}, {1})"), WildcardsPos::right); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "not ilike({0}, {1})"), WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "startsWith({0}, {1})"), WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "like({0}, {1})"), WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::not_hasprefix_cs: + new_expr + = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not startsWith({0}, {1})"), WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "not like({0}, {1})"), WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "ilike({0}, {1})"), WildcardsPos::left); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "ilike({0}, {1})"), WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::not_hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not ilike({0}, {1})"), WildcardsPos::left); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "not ilike({0}, {1})"), WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "endsWith({0}, {1})"), WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "like({0}, {1})"), WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::not_hassuffix_cs: + new_expr + = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not endsWith({0}, {1})"), WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "not like({0}, {1})"), WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::in_cs: + new_expr = genInOpExpr(pos, op, "in"); + break; + + case KQLOperatorValue::not_in_cs: + new_expr = genInOpExpr(pos, op, "not in"); + break; + + case KQLOperatorValue::in: + new_expr = genInOpExprCis(tokens, pos, op, "in"); + break; + + case KQLOperatorValue::not_in: + new_expr = genInOpExprCis(tokens, pos, op, "not in"); + break; + + case KQLOperatorValue::matches_regex: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "match({0}, {1})"), WildcardsPos::none); + break; + + case KQLOperatorValue::startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "ilike({0}, {1})"), WildcardsPos::right); + break; + + case KQLOperatorValue::not_startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not ilike({0}, {1})"), WildcardsPos::right); + break; + + case KQLOperatorValue::startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "startsWith({0}, {1})"), WildcardsPos::none); + break; + + case KQLOperatorValue::not_startswith_cs: + new_expr + = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not startsWith({0}, {1})"), WildcardsPos::none); + break; + + default: + break; + } + + tokens.push_back(new_expr); + return true; +} +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9796ae10c07c..684be75401aa 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -1,106 +1,12 @@ #pragma once -#include -#include -#include +#include + namespace DB { - class KQLOperators { public: - bool convert(std::vector &tokens,IParser::Pos &pos); -protected: - - enum class WildcardsPos:uint8_t - { - none, - left, - right, - both - }; - - enum class KQLOperatorValue : uint16_t - { - none, - contains, - not_contains, - contains_cs, - not_contains_cs, - endswith, - not_endswith, - endswith_cs, - not_endswith_cs, - equal, //=~ - not_equal,//!~ - equal_cs, //= - not_equal_cs,//!= - has, - not_has, - has_all, - has_any, - has_cs, - not_has_cs, - hasprefix, - not_hasprefix, - hasprefix_cs, - not_hasprefix_cs, - hassuffix, - not_hassuffix, - hassuffix_cs, - not_hassuffix_cs, - in_cs, //in - not_in_cs, //!in - in, //in~ - not_in ,//!in~ - matches_regex, - startswith, - not_startswith, - startswith_cs, - not_startswith_cs, - }; - - std::unordered_map KQLOperator = - { - {"contains" , KQLOperatorValue::contains}, - {"!contains" , KQLOperatorValue::not_contains}, - {"contains_cs" , KQLOperatorValue::contains_cs}, - {"!contains_cs" , KQLOperatorValue::not_contains_cs}, - {"endswith" , KQLOperatorValue::endswith}, - {"!endswith" , KQLOperatorValue::not_endswith}, - {"endswith_cs" , KQLOperatorValue::endswith_cs}, - {"!endswith_cs" , KQLOperatorValue::not_endswith_cs}, - {"=~" , KQLOperatorValue::equal}, - {"!~" , KQLOperatorValue::not_equal}, - {"==" , KQLOperatorValue::equal_cs}, - {"!=" , KQLOperatorValue::not_equal_cs}, - {"has" , KQLOperatorValue::has}, - {"!has" , KQLOperatorValue::not_has}, - {"has_all" , KQLOperatorValue::has_all}, - {"has_any" , KQLOperatorValue::has_any}, - {"has_cs" , KQLOperatorValue::has_cs}, - {"!has_cs" , KQLOperatorValue::not_has_cs}, - {"hasprefix" , KQLOperatorValue::hasprefix}, - {"!hasprefix" , KQLOperatorValue::not_hasprefix}, - {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, - {"!hasprefix_cs" , KQLOperatorValue::not_hasprefix_cs}, - {"hassuffix" , KQLOperatorValue::hassuffix}, - {"!hassuffix" , KQLOperatorValue::not_hassuffix}, - {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, - {"!hassuffix_cs" , KQLOperatorValue::not_hassuffix_cs}, - {"in" , KQLOperatorValue::in_cs}, - {"!in" , KQLOperatorValue::not_in_cs}, - {"in~" , KQLOperatorValue::in}, - {"!in~" , KQLOperatorValue::not_in}, - {"matches regex" , KQLOperatorValue::matches_regex}, - {"startswith" , KQLOperatorValue::startswith}, - {"!startswith" , KQLOperatorValue::not_startswith}, - {"startswith_cs" , KQLOperatorValue::startswith_cs}, - {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, - }; - static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); - static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op); - static String genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op); + static bool convert(std::vector & tokens, IParser::Pos & pos); }; - } diff --git a/src/Parsers/Kusto/ParserKQLPrint.cpp b/src/Parsers/Kusto/ParserKQLPrint.cpp new file mode 100644 index 000000000000..21256ebf1562 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLPrint.cpp @@ -0,0 +1,28 @@ +#include +#include +#include + +#include "Utilities.h" + +namespace DB +{ + +bool ParserKQLPrint::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserKeyword("print").ignore(pos, expected)) + return false; + + const String expr = getExprFromToken(pos); + + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + ASTPtr select_expression_list; + if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) + return false; + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLPrint.h b/src/Parsers/Kusto/ParserKQLPrint.h new file mode 100644 index 000000000000..c962d7fa4282 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLPrint.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLPrint : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index e978323d8215..8dabec12fb92 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -6,6 +6,9 @@ namespace DB bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + if (!ParserKeyword("project").ignore(pos, expected)) + return false; + ASTPtr select_expression_list; String expr; @@ -14,7 +17,7 @@ bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); IParser::Pos new_pos(tokens, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) + if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) return false; node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); diff --git a/src/Parsers/Kusto/ParserKQLProjectAway.cpp b/src/Parsers/Kusto/ParserKQLProjectAway.cpp new file mode 100644 index 000000000000..171d9f9b792e --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProjectAway.cpp @@ -0,0 +1,101 @@ +#include +#include +#include +#include +#include +#include +#include "Utilities.h" +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} +bool ParserKQLProjectAway::parseImpl(Pos & pos, ASTPtr & node, Expected & /*expected*/) +{ + size_t bracket_count = 0; + auto begin_pos = pos; + String regular_column_str; + std::vector wildcard_columns; + std::vector regular_columns; + ASTPtr sub_query_node; + + auto append_columns = [®ular_columns, &wildcard_columns](Pos & begin, Pos & end) + { + const auto column = String(begin->begin, end->end); + const auto regex_column = wildcardToRegex(column); + if (regex_column == column) + regular_columns.push_back(column); + else + wildcard_columns.push_back("'" + regex_column + "'"); + }; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++bracket_count; + + if (pos->type == TokenType::ClosingRoundBracket) + --bracket_count; + + if (bracket_count == 0 and pos->type == TokenType::Comma) + { + auto end_pos = pos; + --end_pos; + append_columns(begin_pos, end_pos); + begin_pos = pos; + ++begin_pos; + } + ++pos; + } + + --pos; + append_columns(begin_pos, pos); + + if (wildcard_columns.empty() && regular_columns.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error: Missing projected away expressions"); + + if (!regular_columns.empty()) + { + if (regular_columns.size() == 1) + regular_column_str = regular_columns[0]; + else + { + regular_column_str = "(" + regular_columns[0]; + for (size_t i = 1; i < regular_columns.size(); ++i) + { + regular_column_str += "," + regular_columns[i]; + } + regular_column_str += ")"; + } + } + + size_t wildcard_columns_index = regular_columns.empty() ? 1 : 0; + + for (size_t i = wildcard_columns_index; i < wildcard_columns.size(); ++i) + { + String project_away_query = std::format("(SELECT * EXCEPT {} FROM dummy_input)", wildcard_columns[i]); + if (!parseSQLQueryByString(std::make_unique(), project_away_query, sub_query_node, pos.max_depth)) + return false; + if (!setSubQuerySource(sub_query_node, node, true, i != wildcard_columns_index)) + return false; + node = std::move(sub_query_node); + } + + String last_away = std::format("SELECT * EXCEPT {} from dummy", regular_columns.empty() ? wildcard_columns[0] : regular_column_str); + + if (!parseSQLQueryByString(std::make_unique(), last_away, sub_query_node, pos.max_depth)) + return false; + if (wildcard_columns_index < wildcard_columns.size()) + sub_query_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(node)); + else + { + if (!setSubQuerySource(sub_query_node, node, false, false)) + return false; + } + node = std::move(sub_query_node); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLProjectAway.h b/src/Parsers/Kusto/ParserKQLProjectAway.h new file mode 100644 index 000000000000..f49f51754dd6 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProjectAway.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLProjectAway : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL project-away"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 04ee36705a9a..d1d4440a2d62 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -1,24 +1,172 @@ +#include "Utilities.h" + +#include #include #include +#include +#include +#include +#include #include -#include -#include -#include +#include +#include +#include +#include #include -#include -#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include + #include -#include -#include -#include namespace DB { -String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLBase::parseByString(const String & expr, ASTPtr & node, const uint32_t max_depth) +{ + Expected expected; + + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos pos(tokens, max_depth); + return parse(pos, node, expected); +} + +bool ParserKQLBase::parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, int32_t max_depth) +{ + Expected expected; + Tokens token_subquery(query.c_str(), query.c_str() + query.size()); + IParser::Pos pos_subquery(token_subquery, max_depth); + if (!parser->parse(pos_subquery, select_node, expected)) + return false; + return true; +}; + +bool ParserKQLBase::setSubQuerySource( + const ASTPtr & select_query, + const ASTPtr & source, + const bool dest_is_subquery, + const bool src_is_subquery, + const String & alias, + const int32_t table_index) +{ + ASTPtr table_expr; + auto apply_alias = [&]() + { + if (!alias.empty()) + { + if (table_expr->as()->table_expression->as()->subquery) + table_expr->as() + ->table_expression->as() + ->subquery->as() + ->alias + = alias; + else if (table_expr->as()->table_expression->as()->database_and_table_name) + { + table_expr + = table_expr->as()->table_expression->as()->database_and_table_name; + if (auto * ast_with_alias = dynamic_cast(table_expr.get())) + ast_with_alias->alias = alias; + } + } + }; + if (!dest_is_subquery) + { + if (!select_query || !select_query->as()->tables() + || select_query->as()->tables()->as()->children.empty()) + return false; + table_expr = select_query->as()->tables()->as()->children[table_index]; + + if (!src_is_subquery) + { + table_expr->as()->table_expression + = source->as()->tables()->children[0]->as()->table_expression; + } + else + { + table_expr->as()->table_expression + = source->children[0]->as()->table_expression; + } + apply_alias(); + return true; + } + + if (!select_query || select_query->as()->children.empty() + || !select_query->as()->children[table_index]->as()->table_expression + || select_query->as() + ->children[table_index] + ->as() + ->table_expression->as() + ->subquery->children.empty() + || select_query->as() + ->children[table_index] + ->as() + ->table_expression->as() + ->subquery->children[0] + ->as() + ->list_of_selects->children.empty() + || select_query->as() + ->children[table_index] + ->as() + ->table_expression->as() + ->subquery->children[0] + ->as() + ->list_of_selects->children[0] + ->as() + ->tables() + ->as() + ->children.empty()) + return false; + + table_expr = select_query->as() + ->children[table_index] + ->as() + ->table_expression->as() + ->subquery->children[0] + ->as() + ->list_of_selects->children[0] + ->as() + ->tables() + ->as() + ->children[0]; + + if (!src_is_subquery) + { + table_expr->as()->table_expression + = source->as()->tables()->children[0]->as()->table_expression; + } + else + { + table_expr->as()->table_expression + = source->children[0]->as()->table_expression; + } + apply_alias(); + + return true; +} + +String ParserKQLBase::getExprFromToken(const String & text, const uint32_t max_depth) { Tokens tokens(text.c_str(), text.c_str() + text.size()); IParser::Pos pos(tokens, max_depth); @@ -26,7 +174,7 @@ String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & m return getExprFromToken(pos); } -String ParserKQLBase :: getExprFromPipe(Pos & pos) +String ParserKQLBase::getExprFromPipe(Pos & pos) { uint16_t bracket_count = 0; auto begin = pos; @@ -36,7 +184,7 @@ String ParserKQLBase :: getExprFromPipe(Pos & pos) if (end->type == TokenType::OpeningRoundBracket) ++bracket_count; - if (end->type == TokenType::OpeningRoundBracket) + if (end->type == TokenType::ClosingRoundBracket) --bracket_count; if (end->type == TokenType::PipeMark && bracket_count == 0) @@ -45,57 +193,164 @@ String ParserKQLBase :: getExprFromPipe(Pos & pos) ++end; } --end; - return String(begin->begin, end->end); + return (begin <= end) ? String(begin->begin, end->end) : ""; } -String ParserKQLBase :: getExprFromToken(Pos & pos) +String ParserKQLBase::getExprFromToken(Pos & pos) { String res; - std::vector tokens; - String alias; + std::vector comma_pos; + std::vector columns; + size_t paren_count = 0; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + comma_pos.push_back(pos); + while (!pos->isEnd() && pos->type != TokenType::Semicolon) { - String token = String(pos->begin,pos->end); + if (pos->type == TokenType::PipeMark && paren_count == 0) + break; + + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; - if (token == "=") + if (pos->type == TokenType::Comma && paren_count == 0) { ++pos; - if (String(pos->begin,pos->end) != "~") + comma_pos.push_back(pos); + --pos; + } + ++pos; + } + + auto set_columns = [&](Pos & start_pos, Pos & end_pos) + { + bool has_alias = false; + auto equal_pos = start_pos; + auto columms_start_pos = start_pos; + auto it_pos = start_pos; + if (String(it_pos->begin, it_pos->end) == "=") + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid equal symbol (=)"); + + while (it_pos < end_pos) + { + if (String(it_pos->begin, it_pos->end) == "=") { - alias = tokens.back(); - tokens.pop_back(); + ++it_pos; + if (String(it_pos->begin, it_pos->end) != "~") + { + if (has_alias) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid equal symbol (=)"); + has_alias = true; + } + --it_pos; + equal_pos = it_pos; } - --pos; + ++it_pos; } - else if (!KQLOperators().convert(tokens,pos)) + + if (has_alias) { - tokens.push_back(token); + columms_start_pos = equal_pos; + ++columms_start_pos; } + String column_str; + String function_name; + std::vector tokens; - if (pos->type == TokenType::Comma && !alias.empty()) + while (columms_start_pos < end_pos) { - tokens.pop_back(); - tokens.push_back("AS"); - tokens.push_back(alias); - tokens.push_back(","); - alias.clear(); + if (!KQLOperators().convert(tokens, columms_start_pos)) + { + if (columms_start_pos->type == TokenType::BareWord && function_name.empty()) + function_name = String(columms_start_pos->begin, columms_start_pos->end); + + auto expr = IParserKQLFunction::getExpression(columms_start_pos); + tokens.push_back(expr); + } + ++columms_start_pos; } - ++pos; - } - if (!alias.empty()) + for (const auto & token : tokens) + column_str = column_str.empty() ? token : column_str + " " + token; + + if (has_alias) + { + --equal_pos; + if (start_pos == equal_pos) + { + String new_column_str; + if (start_pos->type != TokenType::BareWord) + throw Exception(ErrorCodes::SYNTAX_ERROR, "{} is not a valid alias", std::string_view(start_pos->begin, start_pos->end)); + + if (function_name == "array_sort_asc" || function_name == "array_sort_desc") + new_column_str = std::format("{0}[1] AS {1}", column_str, String(start_pos->begin, start_pos->end)); + else + new_column_str = std::format("{0} AS {1}", column_str, String(start_pos->begin, start_pos->end)); + + columns.push_back(new_column_str); + } + else + { + String whole_alias(start_pos->begin, equal_pos->end); + + if (function_name != "array_sort_asc" && function_name != "array_sort_desc") + throw Exception(ErrorCodes::SYNTAX_ERROR, "{} is not a valid alias", whole_alias); + + if (start_pos->type != TokenType::OpeningRoundBracket && equal_pos->type != TokenType::ClosingRoundBracket) + throw Exception(ErrorCodes::SYNTAX_ERROR, "{} is not a valid alias for {}", whole_alias, function_name); + + String alias_inside; + bool comma_meet = false; + size_t index = 1; + ++start_pos; + while (start_pos < equal_pos) + { + if (start_pos->type == TokenType::Comma) + { + alias_inside.clear(); + if (comma_meet) + throw Exception(ErrorCodes::SYNTAX_ERROR, "{} has invalid alias for {}", whole_alias, function_name); + comma_meet = true; + } + else + { + if (!alias_inside.empty() || start_pos->type != TokenType::BareWord) + throw Exception(ErrorCodes::SYNTAX_ERROR, "{} has invalid alias for {}", whole_alias, function_name); + + alias_inside = String(start_pos->begin, start_pos->end); + auto new_column_str = std::format("{0}[{1}] AS {2}", column_str, index, alias_inside); + columns.push_back(new_column_str); + comma_meet = false; + ++index; + } + ++start_pos; + } + } + } + else + columns.push_back(column_str); + }; + + size_t cloumn_size = comma_pos.size(); + for (size_t i = 0; i < cloumn_size; ++i) { - tokens.push_back("AS"); - tokens.push_back(alias); + if (i == cloumn_size - 1) + set_columns(comma_pos[i], pos); + else + { + auto end_pos = comma_pos[i + 1]; + --end_pos; + set_columns(comma_pos[i], end_pos); + } } - for (auto const &token : tokens) - res = res.empty()? token : res +" " + token; + for (const auto & token : columns) + res = res.empty() ? token : res + "," + token; return res; } -std::unique_ptr ParserKQLQuery::getOperator(String & op_name) +std::unique_ptr ParserKQLQuery::getOperator(const std::string_view op_name) { if (op_name == "filter" || op_name == "where") return std::make_unique(); @@ -103,48 +358,51 @@ std::unique_ptr ParserKQLQuery::getOperator(String & op_name) return std::make_unique(); else if (op_name == "project") return std::make_unique(); + else if (op_name == "distinct") + return std::make_unique(); + else if (op_name == "extend") + return std::make_unique(); else if (op_name == "sort by" || op_name == "order by") return std::make_unique(); else if (op_name == "summarize") return std::make_unique(); else if (op_name == "table") return std::make_unique(); - else - return nullptr; + else if (op_name == "make-series") + return std::make_unique(); + else if (op_name == "mv-expand") + return std::make_unique(); + else if (op_name == "print") + return std::make_unique(); + else if (op_name == "count") + return std::make_unique(); + else if (op_name == "top") + return std::make_unique(); + else if (op_name == "top-hitters") + return std::make_unique(); + else if (op_name == "lookup") + return std::make_unique(); + else if (op_name == "join") + return std::make_unique(); + else if (op_name == "top-nested") + return std::make_unique(); + else if (op_name == "range") + return std::make_unique(); + else if (op_name == "project-away") + return std::make_unique(); + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "No such KQL operator exists: {}", op_name); } -bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserKQLQuery::getOperations(Pos & pos, Expected & expected, OperationsPos & operation_pos) { - struct KQLOperatorDataFlowState - { - String operator_name; - bool need_input; - bool gen_output; - int8_t backspace_steps; // how many steps to last token of previous pipe - }; - - auto select_query = std::make_shared(); - node = select_query; - ASTPtr tables; - - std::unordered_map kql_parser = - { - { "filter", {"filter", false, false, 3}}, - { "where", {"filter", false, false, 3}}, - { "limit", {"limit", false, true, 3}}, - { "take", {"limit", false, true, 3}}, - { "project", {"project", false, false, 3}}, - { "sort by", {"order by", false, false, 4}}, - { "order by", {"order by", false, false, 4}}, - { "table", {"table", false, false, 3}}, - { "summarize", {"summarize", true, true, 3}} - }; - - std::vector> operation_pos; - String table_name(pos->begin, pos->end); - operation_pos.push_back(std::make_pair("table", pos)); + if (table_name == "print" || table_name == "range") + operation_pos.emplace_back(table_name, pos); + else + operation_pos.push_back(std::make_pair("table", pos)); + ++pos; uint16_t bracket_count = 0; @@ -152,165 +410,173 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (pos->type == TokenType::OpeningRoundBracket) ++bracket_count; - if (pos->type == TokenType::OpeningRoundBracket) + if (pos->type == TokenType::ClosingRoundBracket) --bracket_count; if (pos->type == TokenType::PipeMark && bracket_count == 0) { ++pos; + auto op_pos_begin = pos; String kql_operator(pos->begin, pos->end); - if (kql_operator == "order" || kql_operator == "sort") + + auto validate_kql_operator = [&] { - ++pos; - ParserKeyword s_by("by"); - if (s_by.ignore(pos,expected)) + if (kql_operator == "order" || kql_operator == "sort") { - kql_operator = "order by"; - --pos; + ++pos; + ParserKeyword s_by("by"); + if (s_by.ignore(pos, expected)) + { + kql_operator = "order by"; + --pos; + } } - } - if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) + else + { + ++pos; + ParserToken s_dash(TokenType::Minus); + if (s_dash.ignore(pos, expected)) + kql_operator = {op_pos_begin->begin, pos->end}; + else + --pos; + } + + try + { + getOperator(kql_operator); + return true; + } + catch (const Exception &) + { + return false; + } + }; + + if (!validate_kql_operator()) return false; ++pos; - operation_pos.push_back(std::make_pair(kql_operator, pos)); + operation_pos.emplace_back(kql_operator, op_pos_begin); } else ++pos; } + return true; +} - auto kql_operator_str = operation_pos.back().first; - auto npos = operation_pos.back().second; - if (!npos.isValid()) - return false; +bool ParserKQLQuery::preProcess(String & source, Pos & pos) +{ + bool need_preprocess = false; + auto begin = pos; + while (!pos->isEnd() && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::HereDoc) + need_preprocess = true; - auto kql_operator_p = getOperator(kql_operator_str); + ++pos; + } - if (!kql_operator_p) + auto end = pos; + --end; + source = String(begin->begin, end->end); + + auto replace = [&](std::string & str, const std::string & from, const std::string & to) + { + size_t start_pos = str.find(from); + if (start_pos != std::string::npos) + { + str.replace(start_pos, from.length(), to); + return true; + } return false; + }; - if (operation_pos.size() == 1) + if (need_preprocess) { - if (!kql_operator_p->parse(npos, node, expected)) - return false; + while (replace(source, "$left", "left_")) + ; + while (replace(source, "$right", "right_")) + ; } - else if (operation_pos.size() == 2 && operation_pos.front().first == "table") + + return need_preprocess; +} + +bool ParserKQLQuery::parseImpl(Pos & original_pos, ASTPtr & node, Expected & expected) +{ + auto pos = original_pos; + if (String pre_processed_query; preProcess(pre_processed_query, original_pos)) { - if (!kql_operator_p->parse(npos, node, expected)) - return false; - npos = operation_pos.front().second; - if (!ParserKQLTable().parse(npos, node, expected)) - return false; + Tokens tokens(pre_processed_query.data(), pre_processed_query.data() + pre_processed_query.size(), original_pos.max_depth); + IParser::Pos n_pos(tokens, original_pos.max_depth); + return executeImpl(n_pos, node, expected); } - else - { - String project_clause, order_clause, where_clause, limit_clause; - auto last_pos = operation_pos.back().second; - auto last_op = operation_pos.back().first; + return executeImpl(pos, node, expected); +} - auto set_main_query_clause =[&](String & op, Pos & op_pos) - { - auto op_str = ParserKQLBase::getExprFromPipe(op_pos); - if (op == "project") - project_clause = op_str; - else if (op == "where" || op == "filter") - where_clause = where_clause.empty() ? std::format("({})", op_str) : where_clause + std::format("AND ({})", op_str); - else if (op == "limit" || op == "take") - limit_clause = op_str; - else if (op == "order by" || op == "sort by") - order_clause = order_clause.empty() ? op_str : order_clause + "," + op_str; - }; - - set_main_query_clause(last_op, last_pos); - - operation_pos.pop_back(); - - if (kql_parser[last_op].need_input) - { - if (!kql_operator_p->parse(npos, node, expected)) - return false; - } - else +bool ParserKQLQuery::executeImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + OperationsPos operation_pos; + + if (!getOperations(pos, expected, operation_pos)) + return false; + + std::string substitute_query; + std::optional substitute_query_tokens; + for (int i = 0; i < std::ssize(operation_pos); ++i) + { + const auto & [cur_op, cur_pos] = operation_pos[i]; + const auto parser = getOperator(cur_op); + if (std::string temp; parser->updatePipeLine(cur_pos, temp)) { - while (!operation_pos.empty()) + if (i > 0) + temp.insert(0, std::string_view(operation_pos.front().second->begin, operation_pos[i].second->begin)); + + if (i < std::ssize(operation_pos) - 1) { - auto prev_op = operation_pos.back().first; - auto prev_pos = operation_pos.back().second; - - if (kql_parser[prev_op].gen_output) - break; - if (!project_clause.empty() && prev_op == "project") - break; - set_main_query_clause(prev_op, prev_pos); - operation_pos.pop_back(); - last_op = prev_op; - last_pos = prev_pos; + temp.append(" | "); + temp.append(operation_pos[i + 1].second->begin); } - } - - if (!operation_pos.empty()) - { - for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) - --last_pos; - String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); - Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); - IParser::Pos pos_subquery(token_subquery, pos.max_depth); + substitute_query = std::move(temp); + substitute_query_tokens.emplace(substitute_query.c_str(), substitute_query.c_str() + substitute_query.size()); + IParser::Pos substitute_query_tokens_pos(*substitute_query_tokens, pos.max_depth); - if (!ParserKQLSubquery().parse(pos_subquery, tables, expected)) - return false; - select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); - } - else - { - if (!ParserKQLTable().parse(last_pos, node, expected)) + operation_pos.clear(); + if (!getOperations(substitute_query_tokens_pos, expected, operation_pos)) return false; } + } - auto set_query_clasue =[&](String op_str, String op_calsue) + for (auto & [cur_op, cur_pos] : operation_pos) + { + auto enclosing_select_query = std::make_shared(); + if (node) { - auto oprator = getOperator(op_str); - if (oprator) - { - Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); - IParser::Pos pos_clause(token_clause, pos.max_depth); - if (!oprator->parse(pos_clause, node, expected)) - return false; - } - return true; - }; + auto subquery = std::make_shared(); + auto table_expression = std::make_shared(); + table_expression->subquery = subquery; - if (!select_query->select()) - { - if (project_clause.empty()) - project_clause = "*"; - if (!set_query_clasue("project", project_clause)) - return false; - } + auto tables_in_select_query_element = std::make_shared(); + tables_in_select_query_element->table_expression = table_expression; - if (!order_clause.empty()) - if (!set_query_clasue("order by", order_clause)) - return false; + auto nested_query_wrapper = std::make_shared(); + nested_query_wrapper->children.emplace_back(std::move(tables_in_select_query_element)) + ->children.emplace_back(std::move(table_expression)) + ->children.emplace_back(std::move(subquery)) + ->children.emplace_back(wrapInSelectWithUnion(node)); - if (!where_clause.empty()) - if (!set_query_clasue("where", where_clause)) - return false; - - if (!limit_clause.empty()) - if (!set_query_clasue("limit", limit_clause)) - return false; - return true; - } + enclosing_select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(nested_query_wrapper)); + } - if (!select_query->select()) - { - auto expr = String("*"); - Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); - if (!std::make_unique()->parse(new_pos, node, expected)) + node = std::move(enclosing_select_query); + if (const auto parser = getOperator(cur_op); !parser->parse(cur_pos, node, expected)) return false; + + if (auto * select_query = node->as(); !select_query->select()) + setSelectAll(*select_query); } - return true; + return true; } bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) @@ -339,4 +605,46 @@ bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } +bool ParserSimpleCHSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr sub_select_node; + ParserSelectWithUnionQuery select; + + if (pos->type != TokenType::OpeningRoundBracket) + return false; + ++pos; + + if (!select.parse(pos, sub_select_node, expected)) + return false; + + if (pos->type != TokenType::ClosingRoundBracket) + return false; + ++pos; + + if (parent_select_node && parent_select_node->as()->tables()) + { + auto select_query = sub_select_node->as()->list_of_selects->children[0]; + select_query->as()->setExpression( + ASTSelectQuery::Expression::TABLES, parent_select_node->as()->tables()); + } + + ASTPtr node_subquery = std::make_shared(); + node_subquery->children.push_back(sub_select_node); + + ASTPtr node_table_expr = std::make_shared(); + node_table_expr->as()->subquery = node_subquery; + + node_table_expr->children.emplace_back(node_subquery); + + ASTPtr node_table_in_select_query_emlement = std::make_shared(); + node_table_in_select_query_emlement->as()->table_expression = node_table_expr; + + ASTPtr res = std::make_shared(); + + res->children.emplace_back(node_table_in_select_query_emlement); + + node = res; + return true; +} + } diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 42f5f84f0317..dbef3a916f10 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -5,28 +5,54 @@ namespace DB { +using OperationsPos = std::vector>; + class ParserKQLBase : public IParserBase { public: static String getExprFromToken(Pos & pos); + static String getExprFromToken(const String & text, const uint32_t max_depth); static String getExprFromPipe(Pos & pos); - static String getExprFromToken(const String & text, const uint32_t & max_depth); + static bool setSubQuerySource( + const ASTPtr & select_query, + const ASTPtr & source, + const bool dest_is_subquery, + const bool src_is_subquery, + const String & alias = "", + const int32_t table_index = 0); + static bool parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, int32_t max_depth); + bool parseByString(const String & expr, ASTPtr & node, const uint32_t max_depth); + virtual bool updatePipeLine([[maybe_unused]] Pos pos, [[maybe_unused]] String & query) { return false; } }; class ParserKQLQuery : public IParserBase { +public: + static bool getOperations(Pos & pos, Expected & expected, OperationsPos & operation_pos); protected: - static std::unique_ptr getOperator(String &op_name); + static std::unique_ptr getOperator(std::string_view op_name); + static bool preProcess(String & source, Pos & pos); const char * getName() const override { return "KQL query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + static bool executeImpl(Pos & pos, ASTPtr & node, Expected & expected); }; -class ParserKQLSubquery : public IParserBase +class ParserKQLSubquery : public ParserKQLBase { protected: const char * getName() const override { return "KQL subquery"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserSimpleCHSubquery : public ParserKQLBase +{ +public: + explicit ParserSimpleCHSubquery(ASTPtr parent_select_node_ = nullptr) : parent_select_node(parent_select_node_) { } + +protected: + const char * getName() const override { return "Simple ClickHouse subquery"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + ASTPtr parent_select_node; +}; } diff --git a/src/Parsers/Kusto/ParserKQLRange.cpp b/src/Parsers/Kusto/ParserKQLRange.cpp new file mode 100644 index 000000000000..19e111d0b7cc --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLRange.cpp @@ -0,0 +1,90 @@ +#include +#include +#include +#include +#include +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserKeyword("range").ignore(pos, expected)) + return false; + + ASTPtr select_node; + String column_name, start, stop, step; + auto start_pos = pos; + auto end_pos = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "from") + { + end_pos = pos; + --end_pos; + if (end_pos < start_pos) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing column name for range operator"); + + column_name = String(start_pos->begin, end_pos->end); + start_pos = pos; + ++start_pos; + } + if (String(pos->begin, pos->end) == "to") + { + if (column_name.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing `from` for range operator"); + end_pos = pos; + --end_pos; + if (end_pos < start_pos) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing start expression for range operator"); + start = String(start_pos->begin, end_pos->end); + start_pos = pos; + ++start_pos; + } + if (String(pos->begin, pos->end) == "step") + { + if (column_name.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing `from` for range operator"); + if (start.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing 'to' for range operator"); + + end_pos = pos; + --end_pos; + if (end_pos < start_pos) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing stop expression for range operator"); + + stop = String(start_pos->begin, end_pos->end); + start_pos = pos; + ++start_pos; + } + ++pos; + } + + if (column_name.empty() || start.empty() || stop.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing required expression for range operator"); + + end_pos = pos; + --end_pos; + if (end_pos < start_pos) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing step expression for range operator"); + + step = String(start_pos->begin, end_pos->end); + + column_name = getExprFromToken(column_name, pos.max_depth); + start = getExprFromToken(start, pos.max_depth); + stop = getExprFromToken(stop, pos.max_depth); + step = getExprFromToken(step, pos.max_depth); + String query = std::format("SELECT * FROM (SELECT kql_range({0}, {1},{2}) AS {3}) ARRAY JOIN {3}", start, stop, step, column_name); + + if (!parseSQLQueryByString(std::make_unique(), query, select_node, pos.max_depth)) + return false; + node = std::move(select_node); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLRange.h b/src/Parsers/Kusto/ParserKQLRange.h new file mode 100644 index 000000000000..124e06a4d329 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLRange.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLRange : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL range"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index f7540d729fdd..cc53b24c65e9 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -8,8 +8,11 @@ namespace DB { -bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserKQLSort::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + if (!ParserKeyword("order by").ignore(pos, expected) && !ParserKeyword("sort by").ignore(pos, expected)) + return false; + bool has_dir = false; std::vector has_directions; ParserOrderByExpressionList order_list; @@ -35,6 +38,7 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) has_directions.push_back(has_dir); has_dir = false; } + ++new_pos; } has_directions.push_back(has_dir); @@ -53,7 +57,6 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } node->as()->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); - return true; } diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 21e480234d39..3a54cf97a9af 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -71,6 +71,7 @@ bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (s_lparen.ignore(pos, expected)) { ++paren_count; + auto pos_start = pos; while (!pos->isEnd()) { if (pos->type == TokenType::ClosingRoundBracket) @@ -80,10 +81,10 @@ bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (paren_count == 0) break; - - kql_statement = kql_statement + " " + String(pos->begin,pos->end); ++pos; } + kql_statement = String(pos_start->begin, (--pos)->end); + ++pos; Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); IParser::Pos pos_kql(token_kql, pos.max_depth); diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index ef44b2d6c8ac..864cda5531ad 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -19,7 +19,6 @@ class ParserKQLStatement : public IParserBase {} }; - class ParserKQLWithOutput : public IParserBase { protected: diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 75eacb1adbd2..8f48a427277c 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include #include @@ -15,11 +17,24 @@ #include #include +#include +#include +#include +#include + namespace DB { -bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + if (!ParserKeyword("summarize").ignore(pos, expected)) + return false; + ASTPtr select_expression_list; ASTPtr group_expression_list; @@ -27,33 +42,169 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String expr_groupby; String expr_columns; bool groupby = false; + auto column_begin_pos = pos; + + uint16_t bracket_count = 0; + int32_t new_column_index = 1; + + std::vector expr_aggregations; + std::vector expr_groupbys; + + std::unordered_set aggregate_functions( + {"arg_max", + "arg_min", + "avg", + "avgif", + "binary_all_and", + "binary_all_or", + "binary_all_xor", + "buildschema", + "count", + "countif", + "dcount", + "dcountif", + "make_bag", + "make_bag_if", + "make_list", + "make_list_if", + "make_list_with_nulls", + "make_set", + "make_set_if", + "max", + "maxif", + "min", + "minif", + "percentile", + "percentilew", + "percentiles", + "percentiles_array", + "percentilesw", + "percentilesw_array", + "stdev", + "stdevif", + "sum", + "sumif", + "take_any", + "take_anyif", + "variance", + "varianceif", + "variancep"}); + + auto apply_aliais = [&](Pos & begin_pos, Pos & end_pos, bool is_groupby) + { + if (String(begin_pos->begin, begin_pos->end) == "by") + return; + if (end_pos->end <= begin_pos->begin) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near keyword \"{}\"", std::string_view(begin_pos->begin, begin_pos->end)); + auto expr = String(begin_pos->begin, end_pos->end); + auto equal_pos = begin_pos; + ++equal_pos; + if (!is_groupby) + { + if (String(equal_pos->begin, equal_pos->end) != "=") + { + String alias; + String aggregate_fun = String(begin_pos->begin, begin_pos->end); + if (aggregate_functions.find(aggregate_fun) == aggregate_functions.end()) + { + alias = std::format("Columns{}", new_column_index); + ++new_column_index; + } + else + { + alias = std::format("{}_", aggregate_fun); + auto agg_colum_pos = begin_pos; + ++agg_colum_pos; + ++agg_colum_pos; + ++agg_colum_pos; + if (agg_colum_pos->type == TokenType::Comma || agg_colum_pos->type == TokenType::ClosingRoundBracket) + { + --agg_colum_pos; + if (agg_colum_pos->type != TokenType::ClosingRoundBracket) + alias = alias + String(agg_colum_pos->begin, agg_colum_pos->end); + } + } + expr = std::format("{} = {}", alias, expr); + } + expr_aggregations.push_back(expr); + } + else + { + if (String(equal_pos->begin, equal_pos->end) != "=") + { + String groupby_fun = String(begin_pos->begin, begin_pos->end); + if (equal_pos->isEnd() || equal_pos->type == TokenType::Comma || equal_pos->type == TokenType::Semicolon + || equal_pos->type == TokenType::PipeMark) + { + expr = groupby_fun; + } + else + { + String alias; + if (groupby_fun == "bin" || groupby_fun == "bin_at") + { + auto bin_colum_pos = begin_pos; + ++bin_colum_pos; + ++bin_colum_pos; + alias = String(bin_colum_pos->begin, bin_colum_pos->end); + ++bin_colum_pos; + if (bin_colum_pos->type != TokenType::Comma) + alias.clear(); + } + if (alias.empty()) + { + alias = std::format("Columns{}", new_column_index); + ++new_column_index; + } - auto begin = pos; - auto pos_groupby = pos; + expr = std::format("{} = {}", alias, expr); + } + } + expr_groupbys.push_back(expr); + } + }; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - if (String(pos->begin, pos->end) == "by") + if (pos->type == TokenType::OpeningRoundBracket) + ++bracket_count; + + if (pos->type == TokenType::ClosingRoundBracket) + --bracket_count; + + if ((bracket_count == 0 and pos->type == TokenType::Comma) || String(pos->begin, pos->end) == "by") { - groupby = true; - auto end = pos; - --end; - expr_aggregation = begin <= end ? String(begin->begin, end->end) : ""; - pos_groupby = pos; - ++pos_groupby; + auto end_pos = pos; + --end_pos; + apply_aliais(column_begin_pos, end_pos, groupby); + if (String(pos->begin, pos->end) == "by") + groupby = true; + column_begin_pos = pos; + ++column_begin_pos; } ++pos; } --pos; - if (groupby) - expr_groupby = String(pos_groupby->begin, pos->end); - else - expr_aggregation = begin <= pos ? String(begin->begin, pos->end) : ""; + apply_aliais(column_begin_pos, pos, groupby); - auto expr_aggregation_str = expr_aggregation.empty() ? "" : expr_aggregation +","; - expr_columns = groupby ? expr_aggregation_str + expr_groupby : expr_aggregation_str; + for (auto const & expr : expr_aggregations) + expr_aggregation = expr_aggregation.empty() ? expr : expr_aggregation + "," + expr; - String converted_columns = getExprFromToken(expr_columns, pos.max_depth); + for (auto const & expr : expr_groupbys) + expr_groupby = expr_groupby.empty() ? expr : expr_groupby + "," + expr; + + if (!expr_groupby.empty()) + expr_columns = expr_groupby; + + if (!expr_aggregation.empty()) + { + if (expr_columns.empty()) + expr_columns = expr_aggregation; + else + expr_columns = expr_columns + "," + expr_aggregation; + } + + String converted_columns = getExprFromToken(expr_columns, pos.max_depth); Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); @@ -65,7 +216,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (groupby) { - String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); + String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); @@ -77,5 +228,4 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte return true; } - } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 1aad02705dfd..e95043c15232 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -8,7 +8,6 @@ namespace DB class ParserKQLSummarize : public ParserKQLBase { - protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 6356ad688b67..3e071f592f8a 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -3,7 +3,9 @@ #include #include #include + #include + namespace DB { diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h index c67dcb151562..8e03b50cb8bb 100644 --- a/src/Parsers/Kusto/ParserKQLTable.h +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -8,7 +8,6 @@ namespace DB class ParserKQLTable : public ParserKQLBase { - protected: const char * getName() const override { return "KQL Table"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/Kusto/ParserKQLTimespan.cpp b/src/Parsers/Kusto/ParserKQLTimespan.cpp new file mode 100644 index 000000000000..2a329c9a80d0 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTimespan.cpp @@ -0,0 +1,257 @@ +#include "ParserKQLTimespan.h" +#include "Utilities.h" + +#include + +#include +#include + +namespace x3 = boost::spirit::x3; + +namespace +{ +enum class KQLTimespanUnit +{ + Day, + Hour, + Minute, + Second, + Millisecond, + Microsecond, + Nanosecond, + Tick +}; + +template +concept arithmetic = std::is_arithmetic_v; + +Int64 kqlTimespanToTicks(const arithmetic auto value, const KQLTimespanUnit unit) +{ + static constexpr Int64 TICKS_PER_MICROSECOND = 10; + static constexpr auto TICKS_PER_MILLISECOND = TICKS_PER_MICROSECOND * 1000; + static constexpr auto TICKS_PER_SECOND = TICKS_PER_MILLISECOND * 1000; + static constexpr auto TICKS_PER_MINUTE = TICKS_PER_SECOND * 60; + static constexpr auto TICKS_PER_HOUR = TICKS_PER_MINUTE * 60; + static constexpr auto TICKS_PER_DAY = TICKS_PER_HOUR * 24; + + switch (unit) + { + case KQLTimespanUnit::Day: + return static_cast(value * TICKS_PER_DAY); + case KQLTimespanUnit::Hour: + return static_cast(value * TICKS_PER_HOUR); + case KQLTimespanUnit::Minute: + return static_cast(value * TICKS_PER_MINUTE); + case KQLTimespanUnit::Second: + return static_cast(value * TICKS_PER_SECOND); + case KQLTimespanUnit::Millisecond: + return static_cast(value * TICKS_PER_MILLISECOND); + case KQLTimespanUnit::Microsecond: + return static_cast(value * TICKS_PER_MICROSECOND); + case KQLTimespanUnit::Tick: + return static_cast(value); + case KQLTimespanUnit::Nanosecond: + return static_cast(value / 100); + } +} + +struct TimespanUnits : public x3::symbols +{ + TimespanUnits() + { + // clang-format off + add + ("d", KQLTimespanUnit::Day) + ("day", KQLTimespanUnit::Day) + ("days", KQLTimespanUnit::Day) + ("h", KQLTimespanUnit::Hour) + ("hr", KQLTimespanUnit::Hour) + ("hrs", KQLTimespanUnit::Hour) + ("hour", KQLTimespanUnit::Hour) + ("hours", KQLTimespanUnit::Hour) + ("m", KQLTimespanUnit::Minute) + ("min", KQLTimespanUnit::Minute) + ("minute", KQLTimespanUnit::Minute) + ("minutes", KQLTimespanUnit::Minute) + ("s", KQLTimespanUnit::Second) + ("sec", KQLTimespanUnit::Second) + ("second", KQLTimespanUnit::Second) + ("seconds", KQLTimespanUnit::Second) + ("ms", KQLTimespanUnit::Millisecond) + ("milli", KQLTimespanUnit::Millisecond) + ("millis", KQLTimespanUnit::Millisecond) + ("millisec", KQLTimespanUnit::Millisecond) + ("millisecond", KQLTimespanUnit::Millisecond) + ("milliseconds", KQLTimespanUnit::Millisecond) + ("micro", KQLTimespanUnit::Microsecond) + ("micros", KQLTimespanUnit::Microsecond) + ("microsec", KQLTimespanUnit::Microsecond) + ("microsecond", KQLTimespanUnit::Microsecond) + ("microseconds", KQLTimespanUnit::Microsecond) + ("nano", KQLTimespanUnit::Nanosecond) + ("nanos", KQLTimespanUnit::Nanosecond) + ("nanosec", KQLTimespanUnit::Nanosecond) + ("nanosecond", KQLTimespanUnit::Nanosecond) + ("nanoseconds", KQLTimespanUnit::Nanosecond) + ("tick", KQLTimespanUnit::Tick) + ("ticks", KQLTimespanUnit::Tick) + ; + // clang-format on + } +}; + +const TimespanUnits timespan_units; + +struct KQLTimespanComponents +{ + static constexpr auto MAX_SECONDS_FRACTIONAL = 10'000'000U; + + bool isValid() const { return hours < 24 && minutes < 60 && seconds < 60 && seconds_fractional < MAX_SECONDS_FRACTIONAL; } + std::optional toTicks() const + { + if (!isValid()) + return {}; + + const auto sign = is_negative ? -1 : 1; + auto seconds_fractional_in_ticks = seconds_fractional; + while (seconds_fractional_in_ticks > 0 && seconds_fractional_in_ticks < (MAX_SECONDS_FRACTIONAL / 10)) + seconds_fractional_in_ticks *= 10; + + const auto ticks = kqlTimespanToTicks(days, KQLTimespanUnit::Day) + kqlTimespanToTicks(hours, KQLTimespanUnit::Hour) + + kqlTimespanToTicks(minutes, KQLTimespanUnit::Minute) + kqlTimespanToTicks(seconds, KQLTimespanUnit::Second) + + kqlTimespanToTicks(seconds_fractional_in_ticks, KQLTimespanUnit::Tick); + return sign * ticks; + } + + bool is_negative = false; + unsigned days = 0; + unsigned hours = 0; + unsigned minutes = 0; + unsigned seconds = 0; + unsigned seconds_fractional = 0; +}; + +struct KQLTimespanNull +{ +}; + +using KQLTimespanValueWithUnit = std::pair; + +using x3::_attr; +using x3::_val; +using x3::double_; +using x3::int_; +using x3::lexeme; +using x3::lit; +using x3::omit; +using x3::uint_; + +const auto SET_DAYS = [](auto & ctx) { _val(ctx).days = _attr(ctx); }; +const auto SET_HOURS_AND_MINUTES = [](auto & ctx) +{ + auto & kql_timespan_components = _val(ctx); + const auto & attributes = _attr(ctx); + kql_timespan_components.hours = at_c<0>(attributes); + kql_timespan_components.minutes = at_c<1>(attributes); +}; + +const auto SET_NEGATIVE = [](auto & ctx) { _val(ctx).is_negative = true; }; +const auto SET_SECONDS = [](auto & ctx) { _val(ctx).seconds = _attr(ctx); }; +const auto SET_SECONDS_FRACTIONAL = [](auto & ctx) { _val(ctx).seconds_fractional = _attr(ctx); }; + +const x3::rule KQL_TIMESPAN_SEPARATED_COMPONENTS = "KQL timespan separated components"; +// clang-format off +const auto KQL_TIMESPAN_SEPARATED_COMPONENTS_def = + lexeme + [ + -(lit('-')[SET_NEGATIVE] | lit('+')) + >> -(uint_ >> lit('.'))[SET_DAYS] + >> (uint_ >> lit(':') >> uint_)[SET_HOURS_AND_MINUTES] + >> -(lit(':') >> uint_[SET_SECONDS] >> -(lit('.') >> uint_[SET_SECONDS_FRACTIONAL])) + ]; +// clang-format on + +const auto SET_VALUE_AND_UNIT = [](auto & ctx) +{ + const auto & value_and_unit = _attr(ctx); + _val(ctx) = {at_c<0>(value_and_unit), at_c<1>(value_and_unit)}; +}; + +const x3::rule KQL_TIMESPAN_VALUE_WITH_UNIT = "KQL timespan value with unit"; +const auto KQL_TIMESPAN_VALUE_WITH_UNIT_def = (double_ >> timespan_units)[SET_VALUE_AND_UNIT]; + +const x3::rule KQL_TIMESPAN_DAY_VALUE = "KQL timespan day value"; +const auto KQL_TIMESPAN_DAY_VALUE_def = int_; + +const x3::rule KQL_TIMESPAN_NULL = "KQL timespan null"; +const auto KQL_TIMESPAN_NULL_def = omit[lit("null")]; + +const x3::rule> + KQL_TIMESPAN = "KQL timespan"; +const auto KQL_TIMESPAN_def = KQL_TIMESPAN_SEPARATED_COMPONENTS | KQL_TIMESPAN_VALUE_WITH_UNIT | KQL_TIMESPAN_DAY_VALUE | KQL_TIMESPAN_NULL; + +BOOST_SPIRIT_DEFINE( + KQL_TIMESPAN_SEPARATED_COMPONENTS, KQL_TIMESPAN_VALUE_WITH_UNIT, KQL_TIMESPAN_DAY_VALUE, KQL_TIMESPAN_NULL, KQL_TIMESPAN); +} + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +std::string kqlTicksToInterval(const std::optional ticks) +{ + return std::format("toIntervalNanosecond({})", ticks ? std::to_string(*ticks * 100) : "null"); +} + +std::optional ParserKQLTimespan::parse(const std::string_view expression) +{ + const auto throw_exception + = [&expression] { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a correct timespan expression: {}", expression); }; + + const auto * first = expression.cbegin(); + const auto * last = expression.cend(); + + boost::variant kql_timespan_variant; + const auto success = x3::parse(first, last, KQL_TIMESPAN, kql_timespan_variant); + + if (!success || first != last) + throw_exception(); + + return boost::apply_visitor( + [&throw_exception](const auto & kql_timespan) -> std::optional + { + using Type = std::decay_t; + if constexpr (std::is_same_v) + { + const auto ticks = kql_timespan.toTicks(); + if (!ticks) + throw_exception(); + + return *ticks; + } + else if constexpr (std::is_same_v) + return kqlTimespanToTicks(kql_timespan.first, kql_timespan.second); + else if constexpr (std::is_same_v) + return kqlTimespanToTicks(kql_timespan, KQLTimespanUnit::Day); + else if constexpr (std::is_same_v) + return std::nullopt; + }, + kql_timespan_variant); +} + +bool ParserKQLTimespan::tryParse(const std::string_view expression, std::optional & ticks) +{ + try + { + ticks = parse(expression); + return true; + } + catch (...) + { + return false; + } +} +} diff --git a/src/Parsers/Kusto/ParserKQLTimespan.h b/src/Parsers/Kusto/ParserKQLTimespan.h new file mode 100644 index 000000000000..f98de09fa980 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTimespan.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +#include +#include + +namespace DB +{ +std::string kqlTicksToInterval(std::optional ticks); + +class ParserKQLTimespan +{ +public: + static std::optional parse(std::string_view expression); + static bool tryParse(std::string_view expression, std::optional & ticks); +}; +} diff --git a/src/Parsers/Kusto/ParserKQLTop.cpp b/src/Parsers/Kusto/ParserKQLTop.cpp new file mode 100644 index 000000000000..8e8027216680 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTop.cpp @@ -0,0 +1,52 @@ +#include +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLTop::parseImpl(Pos & /*pos*/, ASTPtr & /*node*/, Expected & /*expected*/) +{ + return true; +} + +bool ParserKQLTop::updatePipeLine(Pos pos, String & query) +{ + if (!ParserKeyword("top").ignore(pos)) + return false; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near top operator"); + + String limit_expr, sort_expr; + auto start_pos = pos; + auto end_pos = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "by") + { + auto limt_end_pos = pos; + --limt_end_pos; + limit_expr = String(start_pos->begin, limt_end_pos->end); + start_pos = pos; + ++start_pos; + } + end_pos = pos; + ++pos; + } + sort_expr = (start_pos <= end_pos) ? String(start_pos->begin, end_pos->end) : ""; + if (limit_expr.empty() || sort_expr.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "top operator need a by clause"); + + query = std::format("sort by {} | take {}", sort_expr, limit_expr); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTop.h b/src/Parsers/Kusto/ParserKQLTop.h new file mode 100644 index 000000000000..b5aecc8d7f05 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTop.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTop : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL top"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool updatePipeLine(Pos pos, String & query) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTopHitter.cpp b/src/Parsers/Kusto/ParserKQLTopHitter.cpp new file mode 100644 index 000000000000..426a7dad1f89 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTopHitter.cpp @@ -0,0 +1,69 @@ +#include +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLTopHitters::parseImpl(Pos & /*pos*/, ASTPtr & /*node*/, Expected & /*expected*/) +{ + return true; +} + +bool ParserKQLTopHitters::updatePipeLine(Pos pos, String & query) +{ + if (!ParserSequence("top-hitters").ignore(pos)) + return false; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near top-hitters operator"); + + String number_of_values, value_expression, summing_expression; + auto start_pos = pos; + auto end_pos = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "of") + { + auto number_end_pos = pos; + --number_end_pos; + number_of_values = String(start_pos->begin, number_end_pos->end); + start_pos = pos; + ++start_pos; + } + + if (String(pos->begin, pos->end) == "by") + { + auto expr_end_pos = pos; + --expr_end_pos; + value_expression = String(start_pos->begin, expr_end_pos->end); + start_pos = pos; + ++start_pos; + } + end_pos = pos; + ++pos; + } + + if (value_expression.empty()) + value_expression = (start_pos <= end_pos) ? String(start_pos->begin, end_pos->end) : ""; + else + summing_expression = (start_pos <= end_pos) ? String(start_pos->begin, end_pos->end) : ""; + + if (number_of_values.empty() || value_expression.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "top-hitter operator need a ValueExpression"); + + if (summing_expression.empty()) + query = std::format("summarize approximate_count_{0} = count() by {0} | sort by approximate_count_{0} desc | take {1} ", value_expression, number_of_values); + else + query = std::format("summarize approximate_sum_{0} = sum({0}) by {1} | sort by approximate_sum_{0} desc | take {2}", summing_expression, value_expression, number_of_values); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTopHitter.h b/src/Parsers/Kusto/ParserKQLTopHitter.h new file mode 100644 index 000000000000..e6f692bc7ed9 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTopHitter.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTopHitters : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL top-hitters"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool updatePipeLine(Pos pos, String & query) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTopNested.cpp b/src/Parsers/Kusto/ParserKQLTopNested.cpp new file mode 100644 index 000000000000..14b4127d6507 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTopNested.cpp @@ -0,0 +1,434 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_DIRECTION_OF_SORTING; + extern const int SYNTAX_ERROR; +} + +String ParserKQLTopNested ::calculateSingleTopNestedWithOthers( + const TopNestedClauses & top_nested_clauses, size_t layer, bool has_others, const uint32_t max_depth) +{ + const String source_table = "source_table AS (SELECT * FROM StormEvents) "; + const String & topn = getExprFromToken(top_nested_clauses[layer].topn, max_depth); + const String & expr_alias = top_nested_clauses[layer].expr_alias; + const String & expr = getExprFromToken(top_nested_clauses[layer].expr, max_depth); + const String & agg_alias = top_nested_clauses[layer].agg_alias; + const String & agg_expr = getExprFromToken(top_nested_clauses[layer].agg_expr, max_depth); + const String & order_expr = top_nested_clauses[layer].order; + + String topn_expr = topn.empty() ? "" : std::format("LIMIT {} ", topn); + String column_expr_with_aliais = expr + " AS " + expr_alias; + String agg_expr_with_aliais = std::format("{} AS {} ", agg_expr, agg_alias); + String agg_expr_value_with_aliais = std::format("{} AS {}_value ", agg_expr, agg_alias); + String query; + if (layer == 0) + { + query = std::format( + "WITH {0},tb0_normal AS (SELECT {1}, {2} FROM source_table GROUP BY {3} ORDER BY {4} {5} {6})", + source_table, + column_expr_with_aliais, + agg_expr_with_aliais, + expr_alias, + agg_alias, + order_expr, + topn_expr); + if (has_others) + query = query + + std::format( + ",tb0_others AS (SELECT {0} FROM source_table WHERE {1} NOT IN (SELECT {1} FROM tb0_normal))", + agg_expr_value_with_aliais, + expr_alias); + } + else + { + const String tb0_normal_name = std::format("tb{}_normal", layer - 1); + const String row_alias0_name = std::format("row{}", layer - 1); + + const String tb1_prev_name = std::format("tb{}_prev", layer); + const String tb1_partition_name = std::format("tb{}_partition", layer); + const String tb1_normal_name = std::format("tb{}_normal", layer); + const String tb1_others_prev_name = std::format("tb{}_others_prev", layer); + const String tb1_others_name = std::format("tb{}_others", layer); + const String row_alias1_name = std::format("row{}", layer); + + String column_list, select_list, join_list, group_list, prev_group_list; + for (size_t i = 0; i < layer; ++i) + { + const String select_tmp = std::format("{0}, {1}", top_nested_clauses[i].expr_alias, top_nested_clauses[i].agg_alias); + select_list = select_list.empty() ? select_tmp : select_list + ", " + select_tmp; + join_list = join_list.empty() ? top_nested_clauses[i].expr : join_list + ", " + top_nested_clauses[i].expr; + column_list = column_list.empty() ? top_nested_clauses[i].expr_alias : column_list + ", " + top_nested_clauses[i].expr_alias; + + prev_group_list = select_list; + } + group_list = select_list + ", " + expr_alias; + auto tb1_prev_select_list = select_list + ", " + column_expr_with_aliais + ", " + agg_expr_with_aliais; + auto tb1_partition_select_list = select_list + ", " + expr_alias + ", " + agg_alias; + auto tb1_others_select_list = select_list + ", " + expr_alias + ", " + agg_alias; + + + const String tb1_prev_query = std::format( + "{0} AS (SELECT {1} FROM {2} INNER JOIN source_table AS join1 USING ({3}) GROUP BY {4})", + tb1_prev_name, + tb1_prev_select_list, + tb0_normal_name, + join_list, + group_list); + + const String tb1_partition_query = std::format( + "{0} AS (SELECT {1}, ROW_NUMBER () over (PARTITION by {2} order by {3} {4}) AS {5} FROM {6})", + tb1_partition_name, + tb1_partition_select_list, + column_list, + agg_alias, + order_expr, + row_alias1_name, + tb1_prev_name); + + const String where_clause = (topn.empty() || layer < 1) ? "" : std::format("WHERE {} <= {}", row_alias1_name, topn); + const String tb1_normal_query + = std::format("{0} AS (SELECT {1} FROM {2} {3})", tb1_normal_name, tb1_partition_select_list, tb1_partition_name, where_clause); + + query = tb1_prev_query + "," + tb1_partition_query + "," + tb1_normal_query; + if (has_others) + { + auto tb1_others_prev_select_list = column_list + ", " + agg_expr_value_with_aliais; + auto tb1_others_prev_join_clause + = std::format("LEFT JOIN {0} USING ({1})", tb1_normal_name, column_list + ", " + expr_alias); + auto tb1_others_prev_join_where_clasue = std::format(" empty({}.{}) ", tb1_normal_name, expr); + for (size_t i = 0; i < layer; ++i) + tb1_others_prev_join_where_clasue + += std::format("AND source_table.{0} IN (SELECT {0} FROM {1}) ", top_nested_clauses[i].expr, tb1_normal_name); + + const String tb1_others_prev_query = std::format( + "{0} AS (SELECT {1} FROM source_table {2} WHERE {3} GROUP BY {4})", + tb1_others_prev_name, + tb1_others_prev_select_list, + tb1_others_prev_join_clause, + tb1_others_prev_join_where_clasue, + column_list); + + const String tb1_others_query = std::format( + "{0} AS (SELECT DISTINCT {1}, {2}_value FROM {3} RIGHT JOIN {4} USING ({5}))", + tb1_others_name, + select_list, + agg_alias, + tb1_others_prev_name, + tb1_normal_name, + column_list); + + query = query + "," + tb1_others_prev_query + "," + tb1_others_query; + } + } + return query; +} + +String ParserKQLTopNested ::calculateTopNestedWithOthers(const TopNestedClauses & top_nested_clauses, const uint32_t max_depth) +{ + String query, last_select_list, last_others_list; + auto size = top_nested_clauses.size(); + bool has_others = false; + for (size_t i = 0; i < size; ++i) + { + if (!top_nested_clauses[i].others.empty()) + { + has_others = true; + break; + } + } + + for (size_t i = 0; i < size; ++i) + { + const String single_query = calculateSingleTopNestedWithOthers(top_nested_clauses, i, has_others, max_depth); + const String others_expr + = top_nested_clauses[i].others.empty() ? "NULL" : getExprFromToken(top_nested_clauses[i].others, max_depth); + const String others_agg = top_nested_clauses[i].others.empty() ? "NULL" : std::format("{}_value", top_nested_clauses[i].agg_alias); + if (i == 0) + { + query = single_query; + last_select_list = std::format("{}, {}", top_nested_clauses[i].expr_alias, top_nested_clauses[i].agg_alias); + last_others_list = std::format( + "{} AS {}, {} AS {}", others_expr, top_nested_clauses[i].expr_alias, others_agg, top_nested_clauses[i].agg_alias); + } + else + { + query = query + "," + single_query; + last_others_list + = last_select_list + ", " + + std::format( + "{} AS {}, {} AS {}", others_expr, top_nested_clauses[i].expr_alias, others_agg, top_nested_clauses[i].agg_alias); + last_select_list + = last_select_list + ", " + std::format("{}, {}", top_nested_clauses[i].expr_alias, top_nested_clauses[i].agg_alias); + } + } + if (has_others) + for (size_t i = 0; i < size - 1; ++i) + { + auto other_values = top_nested_clauses[i].agg_alias; + String all_others_table = std::format("tb{}_all_others AS (SELECT ", i); + String separator; + String first_list; + for (size_t j = 0; j < i; ++j) + { + if (first_list.empty()) + first_list = std::format("{}, {}", top_nested_clauses[j].expr_alias, top_nested_clauses[j].agg_alias); + else + first_list += std::format(", {}, {}", top_nested_clauses[j].expr_alias, top_nested_clauses[j].agg_alias); + } + all_others_table += first_list; + for (size_t j = i; j < size; ++j) + { + separator = (i == 0) ? "" : ","; + if (i == 0) + { + separator = (j == 0) ? "" : ","; + } + else + separator = ","; + if (top_nested_clauses[j].others.empty()) + all_others_table + = all_others_table + + std::format( + "{} NULL AS {} , NULL AS {}", separator, top_nested_clauses[j].expr_alias, top_nested_clauses[j].agg_alias); + else + all_others_table = all_others_table + + std::format("{} {} AS {} , {}_value AS {}", + separator, + getExprFromToken(top_nested_clauses[j].others, max_depth), + top_nested_clauses[j].expr_alias, + other_values, + top_nested_clauses[j].agg_alias); + } + all_others_table += std::format(" FROM tb{}_others )", i); + query = query + "," + all_others_table; + } + + String last_normal_table = std::format("tb{}_normal", size - 1); + if (has_others) + { + String last_others_table = std::format("tb{}_others", size - 1); + query = query + + std::format( + ", last_query AS (SELECT {0} FROM {1} UNION ALL SELECT {2} FROM {3}", + last_select_list, + last_normal_table, + last_others_list, + last_others_table); + if (size > 1) + { + for (size_t i = 0; i < size - 1; ++i) + { + String tb_all_others = std::format("tb{}_all_others", i); + query = query + std::format(" UNION ALL SELECT {} FROM {}", last_select_list, tb_all_others); + } + } + query += ") Select * from last_query"; + } + else + query = query + std::format(" SELECT {0} FROM {1} ", last_select_list, last_normal_table); + + return query; +} + +bool ParserKQLTopNested ::parseSingleTopNestedClause(Pos & begin_pos, Pos & last_pos, TopNestedClause & top_nested_clause, const int layer) +{ + TopNestedClause arg; + auto pos = begin_pos; + for (auto i = 0; i < 3; ++i) + ++pos; + auto start_pos = pos; + auto end_pos = pos; + + auto get_name_value = [&](Pos & begin, Pos & end, String & name, String & value) + { + Pos tmp = begin; + bool has_alias = false; + Pos value_pos = begin; + while (tmp < end) + { + if (String(tmp->begin, tmp->end) == "=") + { + --tmp; + name = String(begin->begin, tmp->end); + ++tmp; + ++tmp; + --end; + value = String(tmp->begin, end->end); + value_pos = tmp; + ++end; + has_alias = true; + break; + } + ++tmp; + } + if (!has_alias) + { + --end; + value = String(begin->begin, end->end); + ++end; + } + return value_pos; + }; + + bool has_by = false, has_of = false; + Pos expr_start_pos = begin_pos; + Pos expr_end_pos = begin_pos; + while (pos < last_pos) + { + if (String(pos->begin, pos->end) == "of") + { + has_of = true; + end_pos = pos; + --end_pos; + if (start_pos <= end_pos) + arg.topn = String(start_pos->begin, end_pos->end); + start_pos = pos; + ++start_pos; + } + + if (String(pos->begin, pos->end) == "with") + { + end_pos = pos; + expr_start_pos = get_name_value(start_pos, end_pos, arg.expr_alias, arg.expr); + expr_end_pos = end_pos; + start_pos = pos; + ++start_pos; + } + + if (String(pos->begin, pos->end) == "by") + { + has_by = true; + end_pos = pos; + if (arg.expr.empty()) + { + expr_start_pos = get_name_value(start_pos, end_pos, arg.expr_alias, arg.expr); + expr_end_pos = end_pos; + } + else + get_name_value(start_pos, end_pos, arg.others_name, arg.others); + start_pos = pos; + ++start_pos; + } + ++pos; + } + + if (!has_of) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing 'of' keyword for top-nested operator"); + + if (!has_by) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing 'by' keyword for top-nested operator"); + + get_name_value(start_pos, pos, arg.agg_alias, arg.agg_expr); + + if (arg.agg_expr.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing aggregation expression for top-nested operator"); + + if (arg.expr_alias.empty()) + { --expr_end_pos; + if (expr_start_pos == expr_end_pos) + arg.expr_alias = arg.expr; + else + arg.expr_alias = std::format("Column{}", layer + 1); + } + + if (arg.agg_alias.empty()) + arg.agg_alias = std::format("aggregated_{}", arg.expr_alias); + + --last_pos; + + if (last_pos->type != TokenType::BareWord) + { + if (last_pos->type != TokenType::Number && last_pos->type != TokenType::ClosingRoundBracket) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Incorrect aggregation expression: {}", arg.expr); + arg.order = "DESC"; + } + else + { + const auto sort_direct = String(last_pos->begin, last_pos->end); + if (sort_direct != "desc" && sort_direct != "asc") + throw Exception(ErrorCodes::UNKNOWN_DIRECTION_OF_SORTING, "Unknown direction of sorting: {}", sort_direct); + + std::size_t found = arg.agg_expr.find(sort_direct); + arg.agg_expr = arg.agg_expr.substr(0, found); + arg.order = sort_direct; + } + + top_nested_clause = std::move(arg); + return true; +} + +bool ParserKQLTopNested ::parseTopNestedClause(Pos & pos, TopNestedClauses & top_nested_clauses) +{ + TopNestedClause top_nested_clause; + auto start_pos = pos; + for (auto i = 0; i < 3; ++i) + --start_pos; + + auto end_pos = start_pos; + auto paren_count = 0; + int layer = 0; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + + if (String(pos->begin, pos->end) == "," and paren_count == 0) + { + end_pos = pos; + parseSingleTopNestedClause(start_pos, end_pos, top_nested_clause, layer); + ++layer; + top_nested_clauses.emplace_back(top_nested_clause); + start_pos = pos; + ++start_pos; + } + ++pos; + } + + parseSingleTopNestedClause(start_pos, pos, top_nested_clause, layer); + top_nested_clauses.emplace_back(top_nested_clause); + return true; +} + +bool ParserKQLTopNested ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserSequence("top-nested").ignore(pos)) + return false; + + TopNestedClauses top_nested_clauses; + + parseTopNestedClause(pos, top_nested_clauses); + String query = calculateTopNestedWithOthers(top_nested_clauses, pos.max_depth); + + ASTPtr select_node; + Tokens tokens(query.c_str(), query.c_str() + query.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + if (!ParserSelectQuery().parse(new_pos, select_node, expected)) + return false; + + auto with_node = select_node->as()->with(); + + auto * with_elem = with_node->children[0]->as(); + + auto sub_select = with_elem->children[0]->children[0]->children[0]->children[0]; + if (!setSubQuerySource(sub_select, node, false, false, "")) + return false; + + node = std::move(select_node); + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTopNested.h b/src/Parsers/Kusto/ParserKQLTopNested.h new file mode 100644 index 000000000000..89fd283573aa --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTopNested.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTopNested : public ParserKQLBase +{ + +protected: + struct TopNestedClause + { + String topn; + String expr_alias; + String expr; + String others_name; + String others; + String agg_alias; + String agg_function; + String agg_expr; + String agg_column; + String order; + }; + using TopNestedClauses = std::vector; + const char * getName() const override { return "KQL top-nested"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + + static bool parseSingleTopNestedClause(Pos & begin_pos, Pos & last_pos, TopNestedClause & top_nested_clause, const int layer); + static bool parseTopNestedClause(Pos & pos, TopNestedClauses & top_nested_clauses); + static String calculateTopNestedWithOthers(const TopNestedClauses & top_nested_clauses, const uint32_t max_depth); + static String calculateSingleTopNestedWithOthers(const TopNestedClauses & top_nested_clauses, size_t layer, bool has_others, const uint32_t max_depth); +}; + +} diff --git a/src/Parsers/Kusto/Utilities.cpp b/src/Parsers/Kusto/Utilities.cpp new file mode 100644 index 000000000000..72c1ee86628d --- /dev/null +++ b/src/Parsers/Kusto/Utilities.cpp @@ -0,0 +1,74 @@ +#include "Utilities.h" + +#include "KustoFunctions/IParserKQLFunction.h" + +#include +#include +#include + +namespace DB +{ +String extractLiteralArgumentWithoutQuotes(const std::string & function_name, IParser::Pos & pos) +{ + ++pos; + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + { + auto result = extractTokenWithoutQuotes(pos); + ++pos; + return result; + } + + --pos; + return IParserKQLFunction::getArgument(function_name, pos, IParserKQLFunction::ArgumentState::Raw); +} + +String extractTokenWithoutQuotes(IParser::Pos & pos) +{ + const auto offset = static_cast(pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral); + return {pos->begin + offset, pos->end - offset}; +} + +void setSelectAll(ASTSelectQuery & select_query) +{ + auto expression_list = std::make_shared(); + expression_list->children.push_back(std::make_shared()); + select_query.setExpression(ASTSelectQuery::Expression::SELECT, std::move(expression_list)); +} + +String wildcardToRegex(const String & wildcard) +{ + String regex; + for (char c : wildcard) + { + if (c == '*') + { + regex += ".*"; + } + else if (c == '?') + { + regex += "."; + } + else if (c == '.' || c == '+' || c == '(' || c == ')' || c == '[' || c == ']' || c == '\\' || c == '^' || c == '$') + { + regex += "\\"; + regex += c; + } + else + { + regex += c; + } + } + return regex; +} + +ASTPtr wrapInSelectWithUnion(const ASTPtr & select_query) +{ + auto select_with_union_query = std::make_shared(); + auto & list_of_selects = select_with_union_query->list_of_selects; + list_of_selects = std::make_shared(); + list_of_selects->children.push_back(select_query); + select_with_union_query->children.push_back(list_of_selects); + + return select_with_union_query; +} +} diff --git a/src/Parsers/Kusto/Utilities.h b/src/Parsers/Kusto/Utilities.h new file mode 100644 index 000000000000..f19fad72287f --- /dev/null +++ b/src/Parsers/Kusto/Utilities.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include + +namespace DB +{ +String extractLiteralArgumentWithoutQuotes(const std::string & function_name, IParser::Pos & pos); +String extractTokenWithoutQuotes(IParser::Pos & pos); +void setSelectAll(ASTSelectQuery & select_query); +String wildcardToRegex(const String & wildcard); +ASTPtr wrapInSelectWithUnion(const ASTPtr & select_query); +} diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index be67807ad8f1..449b6972cd12 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -213,7 +213,7 @@ Token Lexer::nextTokenImpl() for (const char * iterator = token_begin; iterator < pos; ++iterator) { - if (!isWordCharASCII(*iterator) && *iterator != '$') + if (!isWordCharASCII(*iterator) && *iterator != '$' && *iterator != '.') return Token(TokenType::ErrorWrongNumber, token_begin, pos); } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 8cbfac914656..a8c785383a8c 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -20,7 +20,7 @@ #include #include #include - +#include namespace DB { @@ -631,17 +631,25 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe /// ENGINE can not be specified for table functions. if (storage || !table_function_p.parse(pos, as_table_function, expected)) { - /// AS [db.]table - if (!name_p.parse(pos, as_table, expected)) - return false; - - if (s_dot.ignore(pos, expected)) + ParserKeyword s_kql("KQL"); + if (s_kql.ignore(pos, expected)) { - as_database = as_table; - if (!name_p.parse(pos, as_table, expected)) + if (!ParserKQLTaleFunction().parse(pos, select, expected)) return false; } + else + { + /// AS [db.]table + if (!name_p.parse(pos, as_table, expected)) + return false; + if (s_dot.ignore(pos, expected)) + { + as_database = as_table; + if (!name_p.parse(pos, as_table, expected)) + return false; + } + } /// Optional - ENGINE can be specified. if (!storage) storage_p.parse(pos, storage, expected); diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index 8601e12ebcba..8715dade90c3 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -12,7 +12,7 @@ #include #include #include "Parsers/IAST_fwd.h" - +#include namespace DB { @@ -47,6 +47,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserFunction table_function_p{false}; ParserStringLiteral infile_name_p; ParserExpressionWithOptionalAlias exp_elem_p(false); + ParserKeyword s_kql("KQL"); /// create ASTPtr variables (result of parsing will be put in them). /// They will be used to initialize ASTInsertQuery's fields. @@ -183,6 +184,11 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserWatchQuery watch_p; watch_p.parse(pos, watch, expected); } + else if (!infile && s_kql.ignore(pos, expected)) + { + if (!ParserKQLTaleFunction().parse(pos, select, expected)) + return false; + } else if (!infile) { /// If all previous conditions were false and it's not FROM INFILE, query is incorrect diff --git a/src/Parsers/TokenIterator.cpp b/src/Parsers/TokenIterator.cpp index 6633ddb95639..1e243f252330 100644 --- a/src/Parsers/TokenIterator.cpp +++ b/src/Parsers/TokenIterator.cpp @@ -1,5 +1,6 @@ +#include #include - +#include namespace DB { @@ -20,13 +21,52 @@ Tokens::Tokens(const char * begin, const char * end, size_t max_query_size) UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin) { + std::unordered_set valid_kql_negative_suffix( + {"contains", + "contains_cs", + "endswith", + "endswith_cs", + "~", + "=", + "has", + "has_cs", + "hasprefix", + "hasprefix_cs", + "hassuffix", + "hassuffix_cs", + "in", + "startswith", + "startswith_cs"}); /// We have just two kind of parentheses: () and []. UnmatchedParentheses stack; /// We have to iterate through all tokens until the end to avoid false positive "Unmatched parentheses" error /// when parser failed in the middle of the query. - for (TokenIterator it = begin; it.isValid(); ++it) + for (TokenIterator it = begin; !it->isEnd(); ++it) { + if (!it.isValid()) // allow kql negative operators + { + if (it->type == TokenType::ErrorSingleExclamationMark) + { + ++it; + if (!valid_kql_negative_suffix.contains(String(it.get().begin, it.get().end))) + break; + --it; + } + else + { + if (String(it.get().begin, it.get().end) == "~") + { + --it; + if (const auto prev = String(it.get().begin, it.get().end); prev != "!" && prev != "=" && prev != "in") + break; + ++it; + } + else + break; + } + } + if (it->type == TokenType::OpeningRoundBracket || it->type == TokenType::OpeningSquareBracket) { stack.push_back(*it); @@ -39,7 +79,8 @@ UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin) stack.push_back(*it); return stack; } - else if ((stack.back().type == TokenType::OpeningRoundBracket && it->type == TokenType::ClosingRoundBracket) + else if ( + (stack.back().type == TokenType::OpeningRoundBracket && it->type == TokenType::ClosingRoundBracket) || (stack.back().type == TokenType::OpeningSquareBracket && it->type == TokenType::ClosingSquareBracket)) { /// Valid match. diff --git a/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp new file mode 100644 index 000000000000..5de19025d7cc --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp @@ -0,0 +1,125 @@ +#include +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Aggregate, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | summarize t = stdev(Age) by FirstName", + "SELECT\n FirstName,\n sqrt(varSamp(Age)) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = stdevif(Age, Age < 10) by FirstName", + "SELECT\n FirstName,\n sqrt(varSampIf(Age, Age < 10)) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = binary_all_and(Age) by FirstName", + "SELECT\n FirstName,\n groupBitAnd(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = binary_all_or(Age) by FirstName", + "SELECT\n FirstName,\n groupBitOr(Age) AS t\nFROM Customers\nGROUP BY FirstName" + + }, + { + "Customers | summarize t = binary_all_xor(Age) by FirstName", + "SELECT\n FirstName,\n groupBitXor(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName", + "SELECT\n FirstName,\n quantiles(30 / 100, 40 / 100, 50 / 100, 60 / 100, 70 / 100)(Age) AS percentiles_Age\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName", + "SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName", + "SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)", + "SELECT quantilesExactWeighted(50 / 100, 75 / 100, 99.9 / 100)(Bucket, Frequency) AS t\nFROM DataTable" + }, + { + "DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))", + "SELECT quantilesExactWeighted(10 / 100, 50 / 100, 30 / 100)(Bucket, Frequency) AS t\nFROM DataTable" + }, + { + "Customers | summarize t = percentile(Age, 50) by FirstName", + "SELECT\n FirstName,\n quantile(50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "DataTable | summarize t = percentilew(Bucket, Frequency, 50)", + "SELECT quantileExactWeighted(50 / 100)(Bucket, Frequency) AS t\nFROM DataTable" + }, + { + "Customers | summarize t = make_list_with_nulls(Age) by FirstName", + "SELECT\n FirstName,\n arrayConcat(groupArray(Age), arrayMap(x -> NULL, range(0, toUInt32(count(*) - length(groupArray(Age))), 1))) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize count() by bin(Age, 10)", + "SELECT\n kql_bin(Age, 10) AS Age,\n count() AS count_\nFROM Customers\nGROUP BY Age" + }, + { + "Customers | summarize count(Age+1) by bin(Age+1, 10)", + "SELECT\n kql_bin(Age + 1, 10) AS Columns1,\n count(Age + 1) AS count_\nFROM Customers\nGROUP BY Columns1" + }, + { + "Customers | summarize count(Age) by bin(Age, 10)", + "SELECT\n kql_bin(Age, 10) AS Age,\n count(Age) AS count_Age\nFROM Customers\nGROUP BY Age" + }, + { + "Customers | summarize count_distinct(Education)", + "SELECT countDistinct(Education) AS Columns1\nFROM Customers" + }, + { + "Customers | summarize count_distinctif(Education,Age >30)", + "SELECT countIfDistinct(Education, Age > 30) AS Columns1\nFROM Customers" + }, + { + "Customers | summarize take_any(FirstName)" + "SELECT any(FirstName) AS take_any_FirstName\nFROM Customers" + }, + { + "Customers | summarize take_any(FirstName), take_any(LastName)" + "SELECT\n any(FirstName) AS take_any_FirstName,\n any(LastName) AS take_any_LastName\nFROM Customers" + }, + { + "Customers | summarize take_any(FirstName, LastName) by FirstName, LastName" + "SELECT\n FirstName,\n LastName,\n any(FirstName),\n any(LastName) AS take_any_FirstName\nFROM Customers\nGROUP BY\n FirstName,\n LastName" + }, + { + "Customers | summarize take_anyif(FirstName, LastName has 'Diaz')" + "SELECT anyIf(FirstName, hasTokenCaseInsensitive(LastName, 'Diaz')) AS take_anyif_FirstName\nFROM Customers" + }, + { + "Customers | summarize take_anyif(FirstName, LastName has 'Diaz'), dcount(FirstName)" + "SELECT\n anyIf(FirstName, hasTokenCaseInsensitive(LastName, 'Diaz')) AS take_anyif_FirstName,\n countDistinct(FirstName) AS dcount_FirstName\nFROM Customers" + }, + { + "Customers | summarize dcount(Education, 2)" + "SELECT countDistinct(Education, 2) AS dcount_Education\nFROM Customers" + }, + { + "Customers | summarize dcountif(Education, Occupation=='Professional', 2)" + "SELECT countDistinct(Education, Occupation = 'Professional', 2) AS dcountif_Education\nFROM Customers" + }, + { + "Customers | summarize by FirstName, LastName, Age", + "SELECT\n FirstName,\n LastName,\n Age\nFROM Customers\nGROUP BY\n FirstName,\n LastName,\n Age" + }, + { + "Customers | summarize variance(Age)", + "SELECT varSamp(Age) AS variance_Age\nFROM Customers" + }, + { + "Customers | summarize variancep(Age)", + "SELECT varPop(Age) AS variancep_Age\nFROM Customers" + }, + { + "Customers | summarize varianceif(Age, Age < 30)", + "SELECT varSampIf(Age, Age < 30) AS varianceif_Age\nFROM Customers" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp b/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp new file mode 100644 index 000000000000..a1b26ee56147 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp @@ -0,0 +1,37 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Binary, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print binary_and(A, B)", + "SELECT bitAnd(CAST(A, 'Int64'), CAST(B, 'Int64'))" + }, + { + "print binary_not(A)", + "SELECT bitNot(CAST(A, 'Int64'))" + }, + { + "print binary_or(A, B)", + "SELECT bitOr(CAST(A, 'Int64'), CAST(B, 'Int64'))" + }, + { + "print binary_shift_left(A, B)", + "SELECT if(B < 0, NULL, bitShiftLeft(CAST(A, 'Int64'), B))" + }, + { + "print binary_shift_right(A, B)", + "SELECT if(B < 0, NULL, bitShiftRight(CAST(A, 'Int64'), B))" + }, + { + "print binary_xor(A, B)", + "SELECT bitXor(CAST(A, 'Int64'), CAST(B, 'Int64'))" + }, + { + "print bitset_count_ones(A)", + "SELECT bitCount(A)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp b/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp new file mode 100644 index 000000000000..f44482ff2be1 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp @@ -0,0 +1,80 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P( + ParserKQLQuery_Conversion, + ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print tobool(A)", + "SELECT multiIf(toString(A) = 'true', true, toString(A) = 'false', false, toInt64OrNull(toString(A)) != 0)" + }, + { + "print toboolean(A)", + "SELECT multiIf(toString(A) = 'true', true, toString(A) = 'false', false, toInt64OrNull(toString(A)) != 0)" + }, + { + "print todouble(A)", + "SELECT toFloat64OrNull(toString(A)) / if(toTypeName(A) = 'IntervalNanosecond', 100, 1)" + }, + { + "print toint(A)", + "SELECT toInt32OrNull(toString(A)) / if(toTypeName(A) = 'IntervalNanosecond', 100, 1)" + }, + { + "print tolong(A)", + "SELECT toInt64OrNull(toString(A)) / if(toTypeName(A) = 'IntervalNanosecond', 100, 1)" + }, + { + "print toreal(A)", + "SELECT toFloat64OrNull(toString(A)) / if(toTypeName(A) = 'IntervalNanosecond', 100, 1)" + }, + { + "print tostring(A)", + "SELECT ifNull(kql_tostring(A), '')" + }, + { + "print decimal(123.345)", + "SELECT toDecimal128(CAST('123.345', 'String'), 32)" + }, + { + "print decimal(NULL)", + "SELECT NULL" + }, + { + "print todecimal('123.45')", + "SELECT if((toTypeName('123.45') = 'String') OR (toTypeName('123.45') = 'FixedString'), toDecimal128OrNull(CAST('123.45', 'String'), CAST(if(position(CAST('123.45', 'String'), 'e') = 0, if(countSubstrings(CAST('123.45', 'String'), '.') = 1, length(substr(CAST('123.45', 'String'), position(CAST('123.45', 'String'), '.') + 1)), 0), toUInt64(multiIf((position(CAST('123.45', 'String'), 'e+') AS x) > 0, substr(CAST('123.45', 'String'), x + 2), (position(CAST('123.45', 'String'), 'e-') AS y) > 0, substr(CAST('123.45', 'String'), y + 2), (position(CAST('123.45', 'String'), 'e-') = 0) AND (position(CAST('123.45', 'String'), 'e+') = 0) AND (position(CAST('123.45', 'String'), 'e') > 0), substr(CAST('123.45', 'String'), position(CAST('123.45', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8')), toDecimal128OrNull(CAST('123.45', 'String'), CAST(if(position(CAST('123.45', 'String'), 'e') = 0, if(countSubstrings(CAST('123.45', 'String'), '.') = 1, length(substr(CAST('123.45', 'String'), position(CAST('123.45', 'String'), '.') + 1)), 0), toUInt64(multiIf(x > 0, substr(CAST('123.45', 'String'), x + 2), y > 0, substr(CAST('123.45', 'String'), y + 2), (position(CAST('123.45', 'String'), 'e-') = 0) AND (position(CAST('123.45', 'String'), 'e+') = 0) AND (position(CAST('123.45', 'String'), 'e') > 0), substr(CAST('123.45', 'String'), position(CAST('123.45', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8')))" + }, + { + "print todecimal(NULL)", + "SELECT toDecimal128OrNull(CAST(NULL, 'Nullable(String)'), 17) / if(toTypeName(NULL) = 'IntervalNanosecond', 100, 1)" + }, + { + "print todecimal(123456.3456)", + "SELECT if((toTypeName(123456.3456) = 'String') OR (toTypeName(123456.3456) = 'FixedString'), toDecimal128OrNull(CAST('123456.3456', 'String'), CAST(if(position(CAST('123456.3456', 'String'), 'e') = 0, if(countSubstrings(CAST('123456.3456', 'String'), '.') = 1, length(substr(CAST('123456.3456', 'String'), position(CAST('123456.3456', 'String'), '.') + 1)), 0), toUInt64(multiIf((position(CAST('123456.3456', 'String'), 'e+') AS x) > 0, substr(CAST('123456.3456', 'String'), x + 2), (position(CAST('123456.3456', 'String'), 'e-') AS y) > 0, substr(CAST('123456.3456', 'String'), y + 2), (position(CAST('123456.3456', 'String'), 'e-') = 0) AND (position(CAST('123456.3456', 'String'), 'e+') = 0) AND (position(CAST('123456.3456', 'String'), 'e') > 0), substr(CAST('123456.3456', 'String'), position(CAST('123456.3456', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8')), toDecimal128OrNull(CAST('123456.3456', 'String'), CAST(if(position(CAST('123456.3456', 'String'), 'e') = 0, if(countSubstrings(CAST('123456.3456', 'String'), '.') = 1, length(substr(CAST('123456.3456', 'String'), position(CAST('123456.3456', 'String'), '.') + 1)), 0), toUInt64(multiIf(x > 0, substr(CAST('123456.3456', 'String'), x + 2), y > 0, substr(CAST('123456.3456', 'String'), y + 2), (position(CAST('123456.3456', 'String'), 'e-') = 0) AND (position(CAST('123456.3456', 'String'), 'e+') = 0) AND (position(CAST('123456.3456', 'String'), 'e') > 0), substr(CAST('123456.3456', 'String'), position(CAST('123456.3456', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8')))" + }, + { + "print todecimal('abc')", + "SELECT if((toTypeName('abc') = 'String') OR (toTypeName('abc') = 'FixedString'), toDecimal128OrNull(CAST('abc', 'String'), CAST(if(position(CAST('abc', 'String'), 'e') = 0, if(countSubstrings(CAST('abc', 'String'), '.') = 1, length(substr(CAST('abc', 'String'), position(CAST('abc', 'String'), '.') + 1)), 0), toUInt64(multiIf((position(CAST('abc', 'String'), 'e+') AS x) > 0, substr(CAST('abc', 'String'), x + 2), (position(CAST('abc', 'String'), 'e-') AS y) > 0, substr(CAST('abc', 'String'), y + 2), (position(CAST('abc', 'String'), 'e-') = 0) AND (position(CAST('abc', 'String'), 'e+') = 0) AND (position(CAST('abc', 'String'), 'e') > 0), substr(CAST('abc', 'String'), position(CAST('abc', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8')), toDecimal128OrNull(CAST('abc', 'String'), CAST(if(position(CAST('abc', 'String'), 'e') = 0, if(countSubstrings(CAST('abc', 'String'), '.') = 1, length(substr(CAST('abc', 'String'), position(CAST('abc', 'String'), '.') + 1)), 0), toUInt64(multiIf(x > 0, substr(CAST('abc', 'String'), x + 2), y > 0, substr(CAST('abc', 'String'), y + 2), (position(CAST('abc', 'String'), 'e-') = 0) AND (position(CAST('abc', 'String'), 'e+') = 0) AND (position(CAST('abc', 'String'), 'e') > 0), substr(CAST('abc', 'String'), position(CAST('abc', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8')))" + }, + { + "print todecimal('1e5')", + "SELECT if((toTypeName('1e5') = 'String') OR (toTypeName('1e5') = 'FixedString'), toDecimal128OrNull(CAST('1e5', 'String'), CAST(if(position(CAST('1e5', 'String'), 'e') = 0, if(countSubstrings(CAST('1e5', 'String'), '.') = 1, length(substr(CAST('1e5', 'String'), position(CAST('1e5', 'String'), '.') + 1)), 0), toUInt64(multiIf((position(CAST('1e5', 'String'), 'e+') AS x) > 0, substr(CAST('1e5', 'String'), x + 2), (position(CAST('1e5', 'String'), 'e-') AS y) > 0, substr(CAST('1e5', 'String'), y + 2), (position(CAST('1e5', 'String'), 'e-') = 0) AND (position(CAST('1e5', 'String'), 'e+') = 0) AND (position(CAST('1e5', 'String'), 'e') > 0), substr(CAST('1e5', 'String'), position(CAST('1e5', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8')), toDecimal128OrNull(CAST('1e5', 'String'), CAST(if(position(CAST('1e5', 'String'), 'e') = 0, if(countSubstrings(CAST('1e5', 'String'), '.') = 1, length(substr(CAST('1e5', 'String'), position(CAST('1e5', 'String'), '.') + 1)), 0), toUInt64(multiIf(x > 0, substr(CAST('1e5', 'String'), x + 2), y > 0, substr(CAST('1e5', 'String'), y + 2), (position(CAST('1e5', 'String'), 'e-') = 0) AND (position(CAST('1e5', 'String'), 'e+') = 0) AND (position(CAST('1e5', 'String'), 'e') > 0), substr(CAST('1e5', 'String'), position(CAST('1e5', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8')))" + }, + { + "print decimal(1e-5)", + "SELECT toDecimal128(CAST('1e-5', 'String'), 5)" + }, + { + "print time(9nanoseconds)", + "SELECT toIntervalNanosecond(0)" + }, + { + "print time(1tick)", + "SELECT toIntervalNanosecond(100)" + } + +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Count.cpp b/src/Parsers/tests/KQL/gtest_KQL_Count.cpp new file mode 100644 index 000000000000..66bc4cbb145e --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Count.cpp @@ -0,0 +1,29 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Count, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | count", + "SELECT count() AS Count\nFROM Customers" + }, + { + "Customers | where Age< 30 | count", + "SELECT count() AS Count\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n)" + }, + { + "Customers | where Age< 30 | limit 2| count", + "SELECT count() AS Count\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n LIMIT 2\n)" + }, + { + "Customers | where Age< 30 | limit 2 | count | project Count", + "SELECT Count\nFROM\n(\n SELECT count() AS Count\n FROM\n (\n SELECT *\n FROM Customers\n WHERE Age < 30\n LIMIT 2\n )\n)" + }, + { + "Customers|project FirstName|where FirstName != 'Peter'|sort by FirstName asc nulls first|count", + "SELECT count() AS Count\nFROM\n(\n SELECT FirstName\n FROM Customers\n WHERE FirstName != 'Peter'\n ORDER BY FirstName ASC NULLS FIRST\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_DataType.cpp b/src/Parsers/tests/KQL/gtest_KQL_DataType.cpp new file mode 100644 index 000000000000..dae854c9a089 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_DataType.cpp @@ -0,0 +1,65 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DataType, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print dynamic(null)", + "SELECT NULL" + }, + { + "print dynamic(1)", + "SELECT 1" + }, + { + "print dynamic(datetime(1))", + "SELECT kql_datetime(1)" + }, + { + "print dynamic(timespan(1d))", + "SELECT toIntervalNanosecond(86400000000000)" + }, + { + "print dynamic(parse_ipv4('127.0.0.1'))", + "throws" + }, + { + "print dynamic({ \"a\": 9 })", + "throws" + }, + { + "print dynamic([1, 2, 3])", + "SELECT [1, 2, 3]" + }, + { + "print dynamic([1, dynamic([2]), 3])", + "SELECT [1, [2], 3]" + }, + { + "print dynamic([[1], [2], [3]])", + "SELECT [[1], [2], [3]]" + }, + { + "print dynamic(['a', \"b\", 'c'])", + "SELECT ['a', 'b', 'c']" + }, + { + "print dynamic([1, 'a', true, false])", + "SELECT [1, 'a', true, false]" + }, + { + "print dynamic([date(1), time(1d), 1, 2])", + "SELECT [kql_datetime(1), toIntervalNanosecond(86400000000000), 1, 2]" + }, + { + "print time('13:00:40.00000')", + "SELECT toIntervalNanosecond(46840000000000)" + }, + { + "print timespan('12.23:12:23');", + "SELECT toIntervalNanosecond(1120343000000000)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Distinct.cpp b/src/Parsers/tests/KQL/gtest_KQL_Distinct.cpp new file mode 100644 index 000000000000..7200230bfc7e --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Distinct.cpp @@ -0,0 +1,33 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Distinct, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | distinct *", + "SELECT DISTINCT *\nFROM Customers" + }, + { + "Customers | distinct Occupation", + "SELECT DISTINCT Occupation\nFROM Customers" + }, + { + "Customers | distinct Occupation, Education", + "SELECT DISTINCT\n Occupation,\n Education\nFROM Customers" + }, + { + "Customers |where Age <30| distinct Occupation, Education", + "SELECT DISTINCT\n Occupation,\n Education\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n)" + }, + { + "Customers |where Age <30 | order by Age| distinct Occupation, Education", + "SELECT DISTINCT\n Occupation,\n Education\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n ORDER BY Age DESC\n)" + }, + { + "Customers | project a = (Age % 10) | distinct a;", + "SELECT DISTINCT a\nFROM\n(\n SELECT Age % 10 AS a\n FROM Customers\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp b/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp new file mode 100644 index 000000000000..a3212103508b --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp @@ -0,0 +1,136 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicExactMatch, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print array_concat(A, B)", + "SELECT arrayConcat(A, B)" + }, + { + "print array_concat(A, B, C, D)", + "SELECT arrayConcat(A, B, C, D)" + }, + { + "print array_iff(A, B, C)", + "SELECT kql_ArrayIif(A, B, C)" + }, + { + "print output = array_index_of(dynamic([1, 2, 3]), 2)", + "SELECT indexOf([1, 2, 3], 2) - 1 AS output" + }, + { + "print output = array_index_of(dynamic(['a', 'b', 'c']), 'b')", + "SELECT indexOf(['a', 'b', 'c'], 'b') - 1 AS output" + }, + { + "print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')", + "SELECT indexOf(['John', 'Denver', 'Bob', 'Marley'], 'Marley') - 1 AS output" + }, + + { + "print array_length(dynamic([1, 2, 3]))", + "SELECT arrayLastIndex(x -> true, [1, 2, 3])" + }, + { + "print array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))", + "SELECT arrayLastIndex(x -> true, ['John', 'Denver', 'Bob', 'Marley'])" + }, + { + "print array_reverse(A)", + "SELECT arrayReverse(A)" + }, + { + "print array_rotate_left(A, B)", + "SELECT arrayMap(x -> (A[moduloOrZero((x + length(A)) + moduloOrZero(B, toInt64(length(A))), length(A)) + 1]), range(0, length(A)))" + }, + { + "print array_rotate_right(A, B)", + "SELECT arrayMap(x -> (A[moduloOrZero((x + length(A)) + moduloOrZero(-1 * B, toInt64(length(A))), length(A)) + 1]), range(0, length(A)))" + }, + { + "print array_sum(dynamic([2, 5, 3]))", + "SELECT if(multiSearchAny(extract(toTypeName(arrayMap(x -> assumeNotNull(x), arrayFilter(x -> (x IS NOT NULL), [2, 5, 3]))), 'Array\\\\((.*)\\\\)'), ['Bool', 'Decimal', 'Float', 'Int', 'Nothing', 'UInt']), arraySum(x -> toFloat64OrDefault(x), [2, 5, 3]), NULL)" + }, + { + "print array_sum(dynamic([2.5, 5.5, 3]))", + "SELECT if(multiSearchAny(extract(toTypeName(arrayMap(x -> assumeNotNull(x), arrayFilter(x -> (x IS NOT NULL), [2.5, 5.5, 3]))), 'Array\\\\((.*)\\\\)'), ['Bool', 'Decimal', 'Float', 'Int', 'Nothing', 'UInt']), arraySum(x -> toFloat64OrDefault(x), [2.5, 5.5, 3]), NULL)" + }, + { + "print jaccard_index(A, B)", + "SELECT length(arrayIntersect(A, B)) / length(arrayDistinct(arrayConcat(A, B)))" + }, + { + "print pack_array(A, B, C, D)", + "SELECT [A, B, C, D]" + }, + { + "print set_difference(A, B)", + "SELECT arrayFilter(x -> (NOT has(arrayDistinct(arrayConcat(B)), x)), arrayDistinct(A))" + }, + { + "print set_difference(A, B, C)", + "SELECT arrayFilter(x -> (NOT has(arrayDistinct(arrayConcat(B, C)), x)), arrayDistinct(A))" + }, + { + "print set_has_element(A, B)", + "SELECT has(A, B)" + }, + { + "print set_intersect(A, B)", + "SELECT arrayIntersect(A, B)" + }, + { + "print set_intersect(A, B, C)", + "SELECT arrayIntersect(A, B, C)" + }, + { + "print set_union(A, B)", + "SELECT arrayDistinct(arrayConcat(A, B))" + }, + { + "print set_union(A, B, C)", + "SELECT arrayDistinct(arrayConcat(A, B, C))" + } +}))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicRegex, ParserRegexTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print array_shift_left(A, B)", + R"(SELECT arrayResize\(if\(B > 0, arraySlice\(A, B \+ 1\), arrayConcat\(arrayWithConstant\(abs\(B\), fill_value_\d+\), A\)\), length\(A\), if\(\(NULL IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)*'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), NULL\) AS fill_value_\d+\))" + }, + { + "print array_shift_left(A, B, C)", + R"(SELECT arrayResize\(if\(B > 0, arraySlice\(A, B \+ 1\), arrayConcat\(arrayWithConstant\(abs\(B\), fill_value_\d+\), A\)\), length\(A\), if\(\(C IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), C\) AS fill_value_\d+\))" + }, + { + "print array_shift_right(A, B)", + R"(SELECT arrayResize\(if\(\(-1 \* B\) > 0, arraySlice\(A, \(-1 \* B\) \+ 1\), arrayConcat\(arrayWithConstant\(abs\(-1 \* B\), fill_value_\d+\), A\)\), length\(A\), if\(\(NULL IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), NULL\) AS fill_value_\d+\))" + }, + { + "print array_shift_right(A, B, C)", + R"(SELECT arrayResize\(if\(\(-1 \* B\) > 0, arraySlice\(A, \(-1 \* B\) \+ 1\), arrayConcat\(arrayWithConstant\(abs\(-1 \* B\), fill_value_\d+\), A\)\), length\(A\), if\(\(C IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), C\) AS fill_value_\d+\))" + }, + { + "print array_slice(A, B, C)", + R"(SELECT arraySlice\(A, 1 \+ if\(B >= 0, B, arrayMax\(\[-length\(A\), B\]\) \+ length\(A\)\) AS offset_\d+, \(\(1 \+ if\(C >= 0, C, arrayMax\(\[-length\(A\), C\]\) \+ length\(A\)\)\) - offset_\d+\) \+ 1\))" + }, + { + "print array_split(A, B)", + R"(SELECT if\(empty\(arrayMap\(x -> if\(x >= 0, x, arrayMax\(\[0, x \+ CAST\(length\(A\), 'Int\d+'\)\]\)\), flatten\(\[B\]\)\) AS indices_\d+\), \[A\], arrayConcat\(\[arraySlice\(A, 1, indices_\d+\[1\]\)\], arrayMap\(i -> arraySlice\(A, \(indices_\d+\[i\]\) \+ 1, if\(i = length\(indices_\d+\), CAST\(length\(A\), 'Int\d+'\), CAST\(indices_\d+\[i \+ 1\], 'Int\d+'\)\) - \(indices_\d+\[i\]\)\), range\(1, length\(indices_\d+\) \+ 1\)\)\)\))" + }, + { + "print zip(A, B)", + R"(SELECT arrayMap\(t -> \[untuple\(t\)\], arrayZip\(arrayResize\(arg0_\d+, arrayMax\(\[length\(if\(match\(toTypeName\(A\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), A, CAST\(A, concat\('Array\(', extract\(toTypeName\(if\(length\(A\) = 0, \[NULL\], A\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg0_\d+\), length\(if\(match\(toTypeName\(B\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), B, CAST\(B, concat\('Array\(', extract\(toTypeName\(if\(length\(B\) = 0, \[NULL\], B\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg1_\d+\)\]\) AS max_length_\d+, NULL\), arrayResize\(arg1_\d+, max_length_\d+, NULL\)\)\))" + }, + { + "print zip(A, B, C)", + R"(SELECT arrayMap\(t -> \[untuple\(t\)\], arrayZip\(arrayResize\(arg0_\d+, arrayMax\(\[length\(if\(match\(toTypeName\(A\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), A, CAST\(A, concat\('Array\(', extract\(toTypeName\(if\(length\(A\) = 0, \[NULL\], A\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg0_\d+\), length\(if\(match\(toTypeName\(B\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), B, CAST\(B, concat\('Array\(', extract\(toTypeName\(if\(length\(B\) = 0, \[NULL\], B\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg1_\d+\), length\(if\(match\(toTypeName\(C\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), C, CAST\(C, concat\('Array\(', extract\(toTypeName\(if\(length\(C\) = 0, \[NULL\], C\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg2_\d+\)\]\) AS max_length_\d+, NULL\), arrayResize\(arg1_\d+, max_length_\d+, NULL\), arrayResize\(arg2_\d+, max_length_\d+, NULL\)\)\))" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_General.cpp b/src/Parsers/tests/KQL/gtest_KQL_General.cpp new file mode 100644 index 000000000000..39955b5d73e6 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_General.cpp @@ -0,0 +1,69 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_General, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print t = case(5 <= 10, 'A', 12 <= 20, 'B', 22 <= 30, 'C', 'D')", + "SELECT multiIf(5 <= 10, 'A', 12 <= 20, 'B', 22 <= 30, 'C', 'D') AS t" + }, + { + "Customers | extend t = case(Age <= 10, 'A', Age <= 20, 'B', Age <= 30, 'C', 'D')", + "SELECT\n * EXCEPT t,\n multiIf(Age <= 10, 'A', Age <= 20, 'B', Age <= 30, 'C', 'D') AS t\nFROM Customers" + }, + { + "Customers | extend t = iff(Age < 20, 'little', 'big')", + "SELECT\n * EXCEPT t,\n If(Age < 20, 'little', 'big') AS t\nFROM Customers" + }, + { + "Customers | extend t = iif(Age < 20, 'little', 'big')", + "SELECT\n * EXCEPT t,\n If(Age < 20, 'little', 'big') AS t\nFROM Customers" + }, + { + "print res = bin_at(6.5, 2.5, 7)", + "SELECT kql_bin_at(6.5, 2.5, 7) AS res" + }, + { + "print res = bin_at(1h, 1d, 12h)", + "SELECT kql_bin_at(toIntervalNanosecond(3600000000000), toIntervalNanosecond(86400000000000), toIntervalNanosecond(43200000000000)) AS res" + }, + { + "print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))", + "SELECT kql_bin_at(kql_datetime('2017-05-15 10:20:00.0'), toIntervalNanosecond(86400000000000), kql_datetime('1970-01-01 12:00:00.0')) AS res" + }, + { + "print bin(4.5, 1)", + "SELECT kql_bin(4.5, 1)" + }, + { + "print bin(4.5, -1)", + "SELECT kql_bin(4.5, -1)" + }, + { + "print bin(time(16d), 7d)", + "SELECT kql_bin(toIntervalNanosecond(1382400000000000), toIntervalNanosecond(604800000000000))" + }, + { + "print bin(datetime(1970-05-11 13:45:07), 1d)", + "SELECT kql_bin(kql_datetime('1970-05-11 13:45:07'), toIntervalNanosecond(86400000000000))" + }, + { + "print bin(datetime(1970-05-11 13:45:07.456345672), 1ms)", + "SELECT kql_bin(kql_datetime('1970-05-11 13:45:07.456345672'), toIntervalNanosecond(1000000))" + }, + { + "print bin(datetime(1970-05-11 13:45:07.456345672), 1microseconds)", + "SELECT kql_bin(kql_datetime('1970-05-11 13:45:07.456345672'), toIntervalNanosecond(1000))" + }, + { + "print lookup('dictionary_table', 'value', '1')", + "SELECT dictGet('dictionary_table', 'value', '1')" + }, + { + "print lookup('dictionary_table', 'value', '100', 'default')", + "SELECT dictGetOrDefault('dictionary_table', 'value', '100', 'default')" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp new file mode 100644 index 000000000000..59e1a9c94266 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp @@ -0,0 +1,116 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print format_ipv4(A)", + R"(SELECT ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(A\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(A\) = 'String'\)\) OR \(32 < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - 32\) - 1\)\)\)\)\), ''\))" + }, + { + "print format_ipv4(A, B)", + R"(SELECT ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(A\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(A\) = 'String'\)\) OR \(B < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - B\) - 1\)\)\)\)\), ''\))" + }, + { + "print format_ipv4_mask(A)", + R"(SELECT if\(empty\(ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(A\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(A\) = 'String'\)\) OR \(32 < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\)\.1, 'UInt32'\), NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - 32\) - 1\)\)\)\)\), ''\) AS formatted_ip_\d+\) OR \(position\(toTypeName\(32\), 'Int'\) = 0\) OR \(NOT \(\(32 >= 0\) AND \(32 <= 32\)\)\), '', concat\(formatted_ip_\d+, '/', toString\(toInt64\(min2\(32, ifNull\(multiIf\(\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS suffix_\d+, 32\)\)\)\)\)\))" + }, + { + "print format_ipv4_mask(A, B)", + R"(SELECT if\(empty\(ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(A\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(A\) = 'String'\)\) OR \(B < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\)\.1, 'UInt32'\), NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - B\) - 1\)\)\)\)\), ''\) AS formatted_ip_\d+\) OR \(position\(toTypeName\(B\), 'Int'\) = 0\) OR \(NOT \(\(B >= 0\) AND \(B <= 32\)\)\), '', concat\(formatted_ip_\d+, '/', toString\(toInt64\(min2\(B, ifNull\(multiIf\(\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS suffix_\d+, 32\)\)\)\)\)\))" + }, + { + "print ipv4_compare(A, B)", + R"(SELECT if\(\(\(multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS lhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS lhs_mask_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS rhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS rhs_mask_\d+\) IS NULL\), NULL, sign\(toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(lhs_ip_\d+\), toUInt8\(min2\(32, min2\(assumeNotNull\(lhs_mask_\d+\), assumeNotNull\(rhs_mask_\d+\)\)\)\) AS mask_\d+\).1, 'UInt32'\)\) - toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(rhs_ip_\d+\), mask_\d+\).1, 'UInt32'\)\)\)\))" + }, + { + "print ipv4_compare(A, B, C)", + R"(SELECT if\(\(\(multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS lhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS lhs_mask_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS rhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS rhs_mask_\d+\) IS NULL\), NULL, sign\(toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(lhs_ip_\d+\), toUInt8\(min2\(C, min2\(assumeNotNull\(lhs_mask_\d+\), assumeNotNull\(rhs_mask_\d+\)\)\)\) AS mask_\d+\).1, 'UInt32'\)\) - toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(rhs_ip_\d+\), mask_\d+\).1, 'UInt32'\)\)\)\))" + }, + { + "print ipv6_compare(A, B)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS lhs_tokens_\d+\) > 2\) OR \(length\(splitByChar\('/', B\) AS rhs_tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(lhs_tokens_\d+\[1\]\) AS lhs_ipv6_\d+\) IS NULL\) OR \(\(length\(lhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(lhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(lhs_tokens_\d+\[-1\]\)\) AS lhs_suffix_\d+\) IS NULL\)\) OR \(\(IPv6StringToNumOrNull\(rhs_tokens_\d+\[1\]\) AS rhs_ipv6_\d+\) IS NULL\) OR \(\(length\(rhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(rhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(rhs_tokens_\d+\[-1\]\)\) AS rhs_suffix_\d+\) IS NULL\)\) OR \(\(toUInt8\(min2\(128, min2\(ifNull\(lhs_suffix_\d+, 128\), ifNull\(rhs_suffix_\d+, 128\)\)\)\) AS suffix_\d+\) IS NULL\) OR \(\(bitShiftLeft\(bitShiftRight\(bitNot\(reinterpretAsFixedString\(CAST\('0', 'UInt128'\)\)\), 128 - suffix_\d+ AS zeroes_\d+\), zeroes_\d+\) AS mask_\d+\) IS NULL\) OR \(\(bitAnd\(lhs_ipv6_\d+, mask_\d+\) AS lhs_base_\d+\) IS NULL\) OR \(\(bitAnd\(rhs_ipv6_\d+, mask_\d+\) AS rhs_base_\d+\) IS NULL\), NULL, multiIf\(lhs_base_\d+ < rhs_base_\d+, -1, lhs_base_\d+ > rhs_base_\d+, 1, 0\)\))" + }, + { + "print ipv6_compare(A, B, C)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS lhs_tokens_\d+\) > 2\) OR \(length\(splitByChar\('/', B\) AS rhs_tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(lhs_tokens_\d+\[1\]\) AS lhs_ipv6_\d+\) IS NULL\) OR \(\(length\(lhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(lhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(lhs_tokens_\d+\[-1\]\)\) AS lhs_suffix_\d+\) IS NULL\)\) OR \(\(IPv6StringToNumOrNull\(rhs_tokens_\d+\[1\]\) AS rhs_ipv6_\d+\) IS NULL\) OR \(\(length\(rhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(rhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(rhs_tokens_\d+\[-1\]\)\) AS rhs_suffix_\d+\) IS NULL\)\) OR \(\(toUInt8\(min2\(C, min2\(ifNull\(lhs_suffix_\d+, 128\), ifNull\(rhs_suffix_\d+, 128\)\)\)\) AS suffix_\d+\) IS NULL\) OR \(\(bitShiftLeft\(bitShiftRight\(bitNot\(reinterpretAsFixedString\(CAST\('0', 'UInt128'\)\)\), 128 - suffix_\d+ AS zeroes_\d+\), zeroes_\d+\) AS mask_\d+\) IS NULL\) OR \(\(bitAnd\(lhs_ipv6_\d+, mask_\d+\) AS lhs_base_\d+\) IS NULL\) OR \(\(bitAnd\(rhs_ipv6_\d+, mask_\d+\) AS rhs_base_\d+\) IS NULL\), NULL, multiIf\(lhs_base_\d+ < rhs_base_\d+, -1, lhs_base_\d+ > rhs_base_\d+, 1, 0\)\))" + }, + { + "print ipv4_is_in_range(A, B)", + R"(SELECT if\(\(\(IPv4StringToNumOrNull\(A\) AS ip_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS range_start_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS range_mask_\d+\) IS NULL\), NULL, bitXor\(range_start_ip_\d+, bitAnd\(ip_\d+, bitNot\(toUInt32\(intExp2\(32 - range_mask_\d+\) - 1\)\)\)\) = 0\))" + }, + { + "print ipv4_is_match(A, B)", + R"(SELECT if\(\(\(multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS lhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS lhs_mask_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS rhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS rhs_mask_\d+\) IS NULL\), NULL, sign\(toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(lhs_ip_\d+\), toUInt8\(min2\(32, min2\(assumeNotNull\(lhs_mask_\d+\), assumeNotNull\(rhs_mask_\d+\)\)\)\) AS mask_\d+\).1, 'UInt32'\)\) - toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(rhs_ip_\d+\), mask_\d+\).1, 'UInt32'\)\)\)\) = 0)" + }, + { + "print ipv4_is_match(A, B, C)", + R"(SELECT if\(\(\(multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS lhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS lhs_mask_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS rhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS rhs_mask_\d+\) IS NULL\), NULL, sign\(toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(lhs_ip_\d+\), toUInt8\(min2\(C, min2\(assumeNotNull\(lhs_mask_\d+\), assumeNotNull\(rhs_mask_\d+\)\)\)\) AS mask_\d+\).1, 'UInt32'\)\) - toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(rhs_ip_\d+\), mask_\d+\).1, 'UInt32'\)\)\)\) = 0)" + }, + { + "print ipv6_is_match(A, B)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS lhs_tokens_\d+\) > 2\) OR \(length\(splitByChar\('/', B\) AS rhs_tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(lhs_tokens_\d+\[1\]\) AS lhs_ipv6_\d+\) IS NULL\) OR \(\(length\(lhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(lhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(lhs_tokens_\d+\[-1\]\)\) AS lhs_suffix_\d+\) IS NULL\)\) OR \(\(IPv6StringToNumOrNull\(rhs_tokens_\d+\[1\]\) AS rhs_ipv6_\d+\) IS NULL\) OR \(\(length\(rhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(rhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(rhs_tokens_\d+\[-1\]\)\) AS rhs_suffix_\d+\) IS NULL\)\) OR \(\(toUInt8\(min2\(128, min2\(ifNull\(lhs_suffix_\d+, 128\), ifNull\(rhs_suffix_\d+, 128\)\)\)\) AS suffix_\d+\) IS NULL\) OR \(\(bitShiftLeft\(bitShiftRight\(bitNot\(reinterpretAsFixedString\(CAST\('0', 'UInt128'\)\)\), 128 - suffix_\d+ AS zeroes_\d+\), zeroes_\d+\) AS mask_\d+\) IS NULL\) OR \(\(bitAnd\(lhs_ipv6_\d+, mask_\d+\) AS lhs_base_\d+\) IS NULL\) OR \(\(bitAnd\(rhs_ipv6_\d+, mask_\d+\) AS rhs_base_\d+\) IS NULL\), NULL, multiIf\(lhs_base_\d+ < rhs_base_\d+, -1, lhs_base_\d+ > rhs_base_\d+, 1, 0\)\) = 0)" + }, + { + "print ipv6_is_match(A, B, C)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS lhs_tokens_\d+\) > 2\) OR \(length\(splitByChar\('/', B\) AS rhs_tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(lhs_tokens_\d+\[1\]\) AS lhs_ipv6_\d+\) IS NULL\) OR \(\(length\(lhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(lhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(lhs_tokens_\d+\[-1\]\)\) AS lhs_suffix_\d+\) IS NULL\)\) OR \(\(IPv6StringToNumOrNull\(rhs_tokens_\d+\[1\]\) AS rhs_ipv6_\d+\) IS NULL\) OR \(\(length\(rhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(rhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(rhs_tokens_\d+\[-1\]\)\) AS rhs_suffix_\d+\) IS NULL\)\) OR \(\(toUInt8\(min2\(C, min2\(ifNull\(lhs_suffix_\d+, 128\), ifNull\(rhs_suffix_\d+, 128\)\)\)\) AS suffix_\d+\) IS NULL\) OR \(\(bitShiftLeft\(bitShiftRight\(bitNot\(reinterpretAsFixedString\(CAST\('0', 'UInt128'\)\)\), 128 - suffix_\d+ AS zeroes_\d+\), zeroes_\d+\) AS mask_\d+\) IS NULL\) OR \(\(bitAnd\(lhs_ipv6_\d+, mask_\d+\) AS lhs_base_\d+\) IS NULL\) OR \(\(bitAnd\(rhs_ipv6_\d+, mask_\d+\) AS rhs_base_\d+\) IS NULL\), NULL, multiIf\(lhs_base_\d+ < rhs_base_\d+, -1, lhs_base_\d+ > rhs_base_\d+, 1, 0\)\) = 0)" + }, + { + "print ipv4_is_private(A)", + R"(SELECT multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(\(toIPv4OrNull\(tokens_\d+\[1\]\) AS nullable_ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, ignore\(assumeNotNull\(nullable_ip_\d+\) AS ip_\d+, IPv4CIDRToRange\(ip_\d+, assumeNotNull\(mask_\d+\)\) AS range_\d+, IPv4NumToString\(range_\d+.1\) AS begin_\d+, IPv4NumToString\(range_\d+.2\) AS end_\d+\), NULL, \(\(length\(tokens_\d+\) = 1\) AND isIPAddressInRange\(IPv4NumToString\(ip_\d+\), '10.0.0.0/8'\)\) OR \(\(length\(tokens_\d+\) = 2\) AND isIPAddressInRange\(begin_\d+, '10.0.0.0/8'\) AND isIPAddressInRange\(end_\d+, '10.0.0.0/8'\)\) OR \(\(length\(tokens_\d+\) = 1\) AND isIPAddressInRange\(IPv4NumToString\(ip_\d+\), '172.16.0.0/12'\)\) OR \(\(length\(tokens_\d+\) = 2\) AND isIPAddressInRange\(begin_\d+, '172.16.0.0/12'\) AND isIPAddressInRange\(end_\d+, '172.16.0.0/12'\)\) OR \(\(length\(tokens_\d+\) = 1\) AND isIPAddressInRange\(IPv4NumToString\(ip_\d+\), '192.168.0.0/16'\)\) OR \(\(length\(tokens_\d+\) = 2\) AND isIPAddressInRange\(begin_\d+, '192.168.0.0/16'\) AND isIPAddressInRange\(end_\d+, '192.168.0.0/16'\)\)\))" + }, + { + "print ipv4_netmask_suffix(A)", + R"(SELECT multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\))" + }, + { + "print parse_ipv4(A)", + R"(SELECT multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\))" + }, + { + "print parse_ipv4_mask(A, B)", + R"(SELECT if\(\(\(toIPv4OrNull\(A\) AS ip_\d+\) IS NULL\) OR \(\(toUInt8OrNull\(toString\(B\)\) AS mask_\d+\) IS NULL\), NULL, toUInt32\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), arrayMax\(\[0, arrayMin\(\[32, assumeNotNull\(mask_\d+\)\]\)\]\)\).1\)\))" + }, + { + "print parse_ipv6(A)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, arrayStringConcat\(flatten\(extractAllGroups\(lower\(hex\(IPv6CIDRToRange\(assumeNotNull\(ip_\d+\), toUInt8\(ifNull\(mask_\d+ \+ if\(isIPv4String\(tokens_\d+\[1\]\), 96, 0\), 128\)\)\).1\)\), '\(\[\\\\da-f\]\{4\}\)'\)\), ':'\)\))" + }, + { + "print parse_ipv6_mask(A, B)", + R"(SELECT if\(empty\(ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(replaceRegexpOne\(A, concat\('\^', '::'\), ''\)\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(replaceRegexpOne\(A, concat\('\^', '::'\), ''\)\) = 'String'\)\) OR \(\(B - 96\) < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(replaceRegexpOne\(A, concat\('\^', '::'\), ''\)\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\)\.1, 'UInt32'\), NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - \(B - 96\)\) - 1\)\)\)\)\), ''\) AS ipv4_\d+\), if\(\(length\(splitByChar\('/', concat\(ifNull\(kql_tostring\(ifNull\(kql_tostring\(if\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, arrayStringConcat\(flatten\(extractAllGroups\(lower\(hex\(IPv6CIDRToRange\(assumeNotNull\(ip_\d+\), toUInt8\(ifNull\(mask_\d+ \+ if\(isIPv4String\(tokens_\d+\[1\]\), 96, 0\), 128\)\)\)\.1\)\), '\(\[\\\\da-f\]\{4\}\)'\)\), ':'\)\)\), ''\)\), ''\), ifNull\(kql_tostring\('/'\), ''\), ifNull\(kql_tostring\(ifNull\(kql_tostring\(B\), ''\)\), ''\), ''\)\) AS tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, arrayStringConcat\(flatten\(extractAllGroups\(lower\(hex\(IPv6CIDRToRange\(assumeNotNull\(ip_\d+\), toUInt8\(ifNull\(mask_\d+ \+ if\(isIPv4String\(tokens_\d+\[1\]\), 96, 0\), 128\)\)\)\.1\)\), '\(\[\\\\da-f\]\{4\}\)'\)\), ':'\)\), if\(\(length\(splitByChar\('/', ipv4_\d+\) AS tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, arrayStringConcat\(flatten\(extractAllGroups\(lower\(hex\(IPv6CIDRToRange\(assumeNotNull\(ip_\d+\), toUInt8\(ifNull\(mask_\d+ \+ if\(isIPv4String\(tokens_\d+\[1\]\), 96, 0\), 128\)\)\)\.1\)\), '\(\[\\\\da-f\]\{4\}\)'\)\), ':'\)\)\))" + } +}))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print has_ipv4(A, B)", + "SELECT kql_has_ipv4(A, B)" + }, + { + "print has_ipv4_prefix(A, B)", + "SELECT kql_has_ipv4_prefix(A, B)" + }, + { + "print has_any_ipv4(A, B, C)", + "SELECT kql_has_any_ipv4(A, B, C)" + }, + { + "print has_any_ipv4_prefix(A, B, C)", + "SELECT kql_has_any_ipv4_prefix(A, B, C)" + }, + { + "print has_any_ipv4(A, dynamic(['1.2.3.4']))", + "SELECT kql_has_any_ipv4(A, ['1.2.3.4'])" + }, + { + "print has_any_ipv4_prefix(A, dynamic(['1.2.3.4']))", + "SELECT kql_has_any_ipv4_prefix(A, ['1.2.3.4'])" + } +}))); + diff --git a/src/Parsers/tests/KQL/gtest_KQL_MVExpand.cpp b/src/Parsers/tests/KQL/gtest_KQL_MVExpand.cpp new file mode 100644 index 000000000000..a58d0799820f --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_MVExpand.cpp @@ -0,0 +1,45 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_MVExpand, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "T | mv-expand c", + "SELECT *\nFROM T\nARRAY JOIN c\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand c, d", + "SELECT *\nFROM T\nARRAY JOIN\n c,\n d\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand c to typeof(bool)", + "SELECT\n * EXCEPT c_ali,\n c_ali AS c\nFROM\n(\n SELECT\n * EXCEPT c,\n accurateCastOrNull(toInt64OrNull(toString(c)), 'Boolean') AS c_ali\n FROM\n (\n SELECT *\n FROM T\n ARRAY JOIN c\n )\n)\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand b | mv-expand c", + "SELECT *\nFROM\n(\n SELECT *\n FROM T\n ARRAY JOIN b\n SETTINGS enable_unaligned_array_join = 1\n)\nARRAY JOIN c\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand with_itemindex=index b, c, d", + "SELECT\n index,\n *\nFROM T\nARRAY JOIN\n b,\n c,\n d,\n range(0, arrayMax([length(b), length(c), length(d)])) AS index\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand array_concat(c,d)", + "SELECT\n *,\n array_concat_\nFROM T\nARRAY JOIN arrayConcat(c, d) AS array_concat_\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand x = c, y = d", + "SELECT\n *,\n x,\n y\nFROM T\nARRAY JOIN\n c AS x,\n d AS y\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand xy = array_concat(c, d)", + "SELECT\n *,\n xy\nFROM T\nARRAY JOIN arrayConcat(c, d) AS xy\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand with_itemindex=index c,d to typeof(bool)", + "SELECT\n * EXCEPT d_ali,\n d_ali AS d\nFROM\n(\n SELECT\n * EXCEPT d,\n accurateCastOrNull(toInt64OrNull(toString(d)), 'Boolean') AS d_ali\n FROM\n (\n SELECT\n index,\n *\n FROM T\n ARRAY JOIN\n c,\n d,\n range(0, arrayMax([length(c), length(d)])) AS index\n )\n)\nSETTINGS enable_unaligned_array_join = 1" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp b/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp new file mode 100644 index 000000000000..5c94ab4665e5 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp @@ -0,0 +1,25 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_MakeSeries, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga)) < 0, 0, length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 0, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 0, 9, 'UTC'), range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toDateTime64('2016-09-10', 9, 'UTC')) + (toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) - toFloat64(toDateTime64('2016-09-10', 9, 'UTC'))) / 86400) * 86400) AS Purchase_ali\n FROM T\n WHERE (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) >= toUInt64(toDateTime64('2016-09-10', 9, 'UTC'))) AND (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) < toUInt64(toDateTime64('2016-09-13', 9, 'UTC')))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 to 15 step 1.0 by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 1 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(10), toUInt64(15), toUInt64(1))) - length(ga)) < 0, 0, length(range(toUInt64(10), toUInt64(15), toUInt64(1))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(Purchase_ali), arrayMap(x -> toFloat64(x), range(toUInt64(10), toUInt64(15), toUInt64(1))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(10) + (toInt64((toFloat64(Purchase) - toFloat64(10)) / 1) * 1) AS Purchase_ali\n FROM T2\n WHERE (toInt64(toFloat64(Purchase)) >= toUInt64(10)) AND (toInt64(toFloat64(Purchase)) < toUInt64(15))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(low, high, toUInt64(86400))) - length(ga)) < 0, 0, length(range(low, high, toUInt64(86400))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 62135596800, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 62135596800, 9, 'UTC'), range(low, high, toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) + 62135596800) / 86400) * 86400) AS Purchase_ali\n FROM T\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 1.0 by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 1 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(low, high, toUInt64(1))) - length(ga)) < 0, 0, length(range(low, high, toUInt64(1))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(Purchase_ali), arrayMap(x -> toFloat64(x), range(low, high, toUInt64(1))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toInt64((toFloat64(Purchase) + 0) / 1) * 1) AS Purchase_ali\n FROM T2\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Math.cpp b/src/Parsers/tests/KQL/gtest_KQL_Math.cpp new file mode 100644 index 000000000000..dc96ab3754e8 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Math.cpp @@ -0,0 +1,137 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Math, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print abs(-5)", + "SELECT abs(-5)" + }, + { + "print c1 = ceiling(-1.1), c2 = ceiling(0), c3 = ceiling(0.9)", + "SELECT\n ceil(-1.1) AS c1,\n ceil(0) AS c2,\n ceil(0.9) AS c3" + }, + { + "print exp(2);", + "SELECT exp(2)" + }, + { + "print exp2(2)", + "SELECT exp2(2)" + }, + { + "print exp10(3)", + "SELECT exp10(3)" + }, + { + "print log(5)", + "SELECT log(5)" + }, + { + "print log2(5)", + "SELECT log2(5)" + }, + { + "print log10(5)", + "SELECT log10(5)" + }, + { + "print pow(2, 3)", + "SELECT pow(2, 3)" + }, + { + "print sqrt(256)", + "SELECT sqrt(256)" + }, + { + "print acos(-0.45)", + "SELECT acos(-0.45)" + }, + { + "print asin(0.5)", + "SELECT asin(0.5)" + }, + { + "print atan(0.5);", + "SELECT atan(0.5)" + }, + { + "print atan2(1, -1);", + "SELECT atan2(1, -1)" + }, + { + "print cos(-0.45)", + "SELECT cos(-0.45)" + }, + { + "print cot(-0.45)", + "SELECT 1 / tan(-0.45)" + }, + { + "print degrees(pi()/4)", + "SELECT degrees(pi() / 4)" + }, + { + "print gamma(-0.45)", + "SELECT tgamma(-0.45)" + }, + { + "print isfinite(1.0/0.0)", + "SELECT isFinite(1. / 0.)" + }, + { + "print isinf(1.0/0.0)", + "SELECT isInfinite(1. / 0.)" + }, + { + "print loggamma(-0.45)", + "SELECT lgamma(-0.45)" + }, + { + "print max_of(10, 1, -3, 17)", + "SELECT arrayReduce('max', [10, 1, -3, 17])" + }, + { + "print min_of(10, 1, -3, 17)", + "SELECT arrayReduce('min', [10, 1, -3, 17])" + }, + { + "print pi()", + "SELECT pi()" + }, + { + "print radians(180)", + "SELECT radians(180)" + }, + { + "print rand()", + "SELECT if(0 < 2, randCanonical(), moduloOrZero(rand(), 0))" + }, + { + "print rand(1000)", + "SELECT if(1000 < 2, randCanonical(), moduloOrZero(rand(), 1000))" + }, + { + "print rand(0)", + "SELECT if(0 < 2, randCanonical(), moduloOrZero(rand(), 0))" + }, + { + "print round(2.15, 1)", + "SELECT round(2.15, 1)" + }, + { + "print sign(-42)", + "SELECT sign(-42)" + }, + { + "print sin(-0.45)", + "SELECT sin(-0.45)" + }, + { + "print tan(-0.45)", + "SELECT tan(-0.45)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_ProjectAway.cpp b/src/Parsers/tests/KQL/gtest_KQL_ProjectAway.cpp new file mode 100644 index 000000000000..a655bc741b29 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_ProjectAway.cpp @@ -0,0 +1,49 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_ProjectAway, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | project-away FirstName", + "SELECT * EXCEPT FirstName\nFROM Customers" + }, + { + "Customers | project-away FirstName, LastName", + "SELECT * EXCEPT (FirstName, LastName)\nFROM Customers" + }, + { + "Customers | project-away *Name", + "SELECT * EXCEPT '.*Name'\nFROM Customers" + }, + { + "Customers | project-away *Name, *tion", + "SELECT * EXCEPT '.*Name'\nFROM\n(\n SELECT * EXCEPT '.*tion'\n FROM Customers\n)" + }, + { + "Customers | project-away *Name, Age", + "SELECT * EXCEPT Age\nFROM\n(\n SELECT * EXCEPT '.*Name'\n FROM Customers\n)" + }, + { + "Customers | project-away *Name, Age, Education", + "SELECT * EXCEPT (Age, Education)\nFROM\n(\n SELECT * EXCEPT '.*Name'\n FROM Customers\n)" + }, + { + "Customers | project-away *irstName, Age, *astName, Education", + "SELECT * EXCEPT (Age, Education)\nFROM\n(\n SELECT * EXCEPT '.*astName'\n FROM\n (\n SELECT * EXCEPT '.*irstName'\n FROM Customers\n )\n)" + }, + { + "Customers | where Age< 30 | limit 2 | project-away FirstName", + "SELECT * EXCEPT FirstName\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n LIMIT 2\n)" + }, + { + "Customers|summarize sum(Age), avg(Age) by FirstName | project-away sum_Age", + "SELECT * EXCEPT sum_Age\nFROM\n(\n SELECT\n FirstName,\n sum(Age) AS sum_Age,\n avg(Age) AS avg_Age\n FROM Customers\n GROUP BY FirstName\n)" + }, + { + "Customers|extend FullName = strcat(FirstName,' ',LastName) | project-away FirstName, LastName", + "SELECT * EXCEPT (FirstName, LastName)\nFROM\n(\n SELECT\n * EXCEPT FullName,\n concat(ifNull(kql_tostring(FirstName), ''), ifNull(kql_tostring(' '), ''), ifNull(kql_tostring(LastName), ''), '') AS FullName\n FROM Customers\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp new file mode 100644 index 000000000000..48c15d3c53f4 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -0,0 +1,225 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_String, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print base64_encode_fromguid(A)", + "SELECT if(toTypeName(A) NOT IN ['UUID', 'Nullable(UUID)'], toString(throwIf(true, 'Expected guid as argument')), base64Encode(UUIDStringToNum(toString(A), 2)))" + }, + { + "print base64_decode_toguid(A)", + "SELECT toUUIDOrNull(UUIDNumToString(toFixedString(base64Decode(A), 16), 2))" + }, + { + "print base64_decode_toarray('S3VzdG8=')", + "SELECT arrayMap(x -> reinterpretAsUInt8(x), splitByRegexp('', base64Decode('S3VzdG8=')))" + }, + { + "print replace_regex('Hello, World!', '.', '\\0\\0')", + "SELECT replaceRegexpAll('Hello, World!', '.', '\\0\\0')" + }, + { + "print idx = has_any_index('this is an example', dynamic(['this', 'example'])) ", + "SELECT if(empty(['this', 'example']), -1, indexOf(arrayMap(x -> (x IN splitByChar(' ', 'this is an example')), if(empty(['this', 'example']), [''], arrayMap(x -> toString(x), ['this', 'example']))), 1) - 1) AS idx" + }, + { + "print idx = has_any_index('this is an example', dynamic([]))", + "SELECT if(empty([]), -1, indexOf(arrayMap(x -> (x IN splitByChar(' ', 'this is an example')), if(empty([]), [''], arrayMap(x -> toString(x), []))), 1) - 1) AS idx" + }, + { + "print translate('krasp', 'otsku', 'spark')", + "SELECT if(length('otsku') = 0, '', translate('spark', 'krasp', multiIf(length('otsku') = 0, 'krasp', (length('krasp') - length('otsku')) > 0, concat('otsku', repeat(substr('otsku', length('otsku'), 1), toUInt16(length('krasp') - length('otsku')))), (length('krasp') - length('otsku')) < 0, substr('otsku', 1, length('krasp')), 'otsku')))" + }, + { + "print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))", + "SELECT replaceRegexpOne(concat(ifNull(kql_tostring('- '), ''), ifNull(kql_tostring('Te st1'), ''), ifNull(kql_tostring('// $'), ''), ''), concat('^', '[^\\\\w]+'), '')" + }, + { + "print trim_end('.com', 'bing.com')", + "SELECT replaceRegexpOne('bing.com', concat('.com', '$'), '')" + }, + { + "print trim('--', '--https://bing.com--')", + "SELECT replaceRegexpOne(replaceRegexpOne('--https://bing.com--', concat('--', '$'), ''), concat('^', '--'), '')" + }, + { + "print bool(1)", + "SELECT if((toTypeName(1) = 'IntervalNanosecond') OR ((accurateCastOrNull(1, 'Bool') IS NULL) != (1 IS NULL)), accurateCastOrNull(throwIf(true, 'Failed to parse Bool literal'), 'Bool'), accurateCastOrNull(1, 'Bool'))" + }, + { + "print guid(74be27de-1e4e-49d9-b579-fe0b331d3642)", + "SELECT toUUIDOrNull('74be27de-1e4e-49d9-b579-fe0b331d3642')" + }, + { + "print guid('74be27de-1e4e-49d9-b579-fe0b331d3642')", + "SELECT toUUIDOrNull('74be27de-1e4e-49d9-b579-fe0b331d3642')" + }, + { + "print guid('74be27de1e4e49d9b579fe0b331d3642')", + "SELECT toUUIDOrNull('74be27de1e4e49d9b579fe0b331d3642')" + }, + { + "print int(32.5)", + "SELECT if((toTypeName(32.5) = 'IntervalNanosecond') OR ((accurateCastOrNull(32.5, 'Int32') IS NULL) != (32.5 IS NULL)), accurateCastOrNull(throwIf(true, 'Failed to parse Int32 literal'), 'Int32'), accurateCastOrNull(32.5, 'Int32'))" + }, + { + "print long(32.5)", + "SELECT if((toTypeName(32.5) = 'IntervalNanosecond') OR ((accurateCastOrNull(32.5, 'Int64') IS NULL) != (32.5 IS NULL)), accurateCastOrNull(throwIf(true, 'Failed to parse Int64 literal'), 'Int64'), accurateCastOrNull(32.5, 'Int64'))" + }, + { + "print real(32.5)", + "SELECT if((toTypeName(32.5) = 'IntervalNanosecond') OR ((accurateCastOrNull(32.5, 'Float64') IS NULL) != (32.5 IS NULL)), accurateCastOrNull(throwIf(true, 'Failed to parse Float64 literal'), 'Float64'), accurateCastOrNull(32.5, 'Float64'))" + }, + { + "print time('1.22:34:8.128')", + "SELECT toIntervalNanosecond(167648128000000)" + }, + { + "print time('1d')", + "SELECT toIntervalNanosecond(86400000000000)" + }, + { + "print time('1.5d')", + "SELECT toIntervalNanosecond(129600000000000)" + }, + { + "print timespan('1.5d')", + "SELECT toIntervalNanosecond(129600000000000)" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(bool));", + "SELECT accurateCastOrNull(toInt64OrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1)), 'Boolean')" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(date));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'DateTime')" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(guid));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'UUID')" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(int));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'Int32')" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(long));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'Int64')" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(real));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'Float64')" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(decimal));", + "SELECT toDecimal128OrNull(if(countSubstrings(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), '.') > 1, NULL, kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1)), length(substr(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), position(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), '.') + 1)))" + }, + { + "print parse_version('1.2.3.40')", + "SELECT if((length(splitByChar('.', '1.2.3.40')) > 4) OR (length(splitByChar('.', '1.2.3.40')) < 1) OR (match('1.2.3.40', '.*[a-zA-Z]+.*') = 1) OR empty('1.2.3.40') OR hasAll(splitByChar('.', '1.2.3.40'), ['']), toDecimal128OrNull('NULL', 0), toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), arrayResize(splitByChar('.', '1.2.3.40'), 4)))), 8), 0))" + }, + { + "print parse_version('1')", + "SELECT if((length(splitByChar('.', '1')) > 4) OR (length(splitByChar('.', '1')) < 1) OR (match('1', '.*[a-zA-Z]+.*') = 1) OR empty('1') OR hasAll(splitByChar('.', '1'), ['']), toDecimal128OrNull('NULL', 0), toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), arrayResize(splitByChar('.', '1'), 4)))), 8), 0))" + }, + { + "print parse_version('')", + "SELECT if((length(splitByChar('.', '')) > 4) OR (length(splitByChar('.', '')) < 1) OR (match('', '.*[a-zA-Z]+.*') = 1) OR empty('') OR hasAll(splitByChar('.', ''), ['']), toDecimal128OrNull('NULL', 0), toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), arrayResize(splitByChar('.', ''), 4)))), 8), 0))" + }, + { + "print parse_version('...')", + "SELECT if((length(splitByChar('.', '...')) > 4) OR (length(splitByChar('.', '...')) < 1) OR (match('...', '.*[a-zA-Z]+.*') = 1) OR empty('...') OR hasAll(splitByChar('.', '...'), ['']), toDecimal128OrNull('NULL', 0), toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), arrayResize(splitByChar('.', '...'), 4)))), 8), 0))" + }, + { + "print parse_json( dynamic([1, 2, 3]))", + "SELECT [1, 2, 3]" + }, + { + "print parse_json('{\"a\":123.5, \"b\":\"{\\\"c\\\":456}\"}')", + "SELECT if(isValidJSON('{\"a\":123.5, \"b\":\"{\"c\":456}\"}'), JSON_QUERY('{\"a\":123.5, \"b\":\"{\"c\":456}\"}', '$'), toJSONString('{\"a\":123.5, \"b\":\"{\"c\":456}\"}'))" + }, + { + "print extract_json( '$.a' , '{\"a\":123, \"b\":\"{\"c\":456}\"}' , typeof(long))", + "SELECT accurateCastOrNull(JSON_VALUE('{\"a\":123, \"b\":\"{\"c\":456}\"}', '$.a'), 'Int64')" + }, + { + "print extract_json( '$.a' , '{\"a\":123, \"b\":\"{\"c\":456}\"}' , typeof(bool))", + "SELECT if(toInt64OrNull(JSON_VALUE('{\"a\":123, \"b\":\"{\"c\":456}\"}', '$.a')) > 0, true, false)" + }, + { + "print parse_command_line('echo \"hello world!\" print$?', 'windows')", + "SELECT if(empty('echo \"hello world!\" print$?') OR hasAll(splitByChar(' ', 'echo \"hello world!\" print$?'), ['']), arrayMap(x -> NULL, splitByChar(' ', '')), splitByChar(' ', 'echo \"hello world!\" print$?'))" + }, + { + "print reverse(123)", + "SELECT reverse(ifNull(kql_tostring(123), ''))" + }, + { + "print reverse(123.34)", + "SELECT reverse(ifNull(kql_tostring(123.34), ''))" + }, + { + "print reverse('clickhouse')", + "SELECT reverse(ifNull(kql_tostring('clickhouse'), ''))" + }, + { + "print result=parse_csv('aa,b,cc')", + "SELECT if(CAST(position('aa,b,cc', '\\n'), 'UInt8'), splitByChar(',', substring('aa,b,cc', 1, position('aa,b,cc', '\\n') - 1)), splitByChar(',', substring('aa,b,cc', 1, length('aa,b,cc')))) AS result" + }, + { + "print result_multi_record=parse_csv('record1,a,b,c\nrecord2,x,y,z')", + "SELECT if(CAST(position('record1,a,b,c\\nrecord2,x,y,z', '\\n'), 'UInt8'), splitByChar(',', substring('record1,a,b,c\\nrecord2,x,y,z', 1, position('record1,a,b,c\\nrecord2,x,y,z', '\\n') - 1)), splitByChar(',', substring('record1,a,b,c\\nrecord2,x,y,z', 1, length('record1,a,b,c\\nrecord2,x,y,z')))) AS result_multi_record" + }, + { + "Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))| order by LastName", + "SELECT concat(ifNull(kql_tostring(if(toInt64(length(FirstName)) <= 0, '', substr(FirstName, (((0 % toInt64(length(FirstName))) + toInt64(length(FirstName))) % toInt64(length(FirstName))) + 1, 3))), ''), ifNull(kql_tostring(' '), ''), ifNull(kql_tostring(if(toInt64(length(LastName)) <= 0, '', substr(LastName, (((2 % toInt64(length(LastName))) + toInt64(length(LastName))) % toInt64(length(LastName))) + 1))), ''), '') AS name_abbr\nFROM Customers\nORDER BY LastName DESC" + }, + { + "print idx1 = indexof('abcdefg','cde')", + "SELECT kql_indexof('abcdefg', 'cde', 0, -1, 1) AS idx1" + }, + { + "print idx2 = indexof('abcdefg','cde',0,3)", + "SELECT kql_indexof('abcdefg', 'cde', 0, 3, 1) AS idx2" + }, + { + "print idx3 = indexof('abcdefg','cde',1,2)", + "SELECT kql_indexof('abcdefg', 'cde', 1, 2, 1) AS idx3" + }, + { + "print idx5 = indexof('abcdefg','cde',-5) ", + "SELECT kql_indexof('abcdefg', 'cde', -5, -1, 1) AS idx5" + }, + { + "print idx6 = indexof(1234567,5,1,4) ", + "SELECT kql_indexof(1234567, 5, 1, 4, 1) AS idx6" + }, + { + "print idx7 = indexof('abcdefg','cde',2,-1)", + "SELECT kql_indexof('abcdefg', 'cde', 2, -1, 1) AS idx7" + }, + { + "print idx8 = indexof('abcdefgabcdefg', 'cde', 3)", + "SELECT kql_indexof('abcdefgabcdefg', 'cde', 3, -1, 1) AS idx8" + }, + { + "print idx9 = indexof('abcdefgabcdefg', 'cde', 1, 13, 3) ", + "SELECT kql_indexof('abcdefgabcdefg', 'cde', 1, 13, 3) AS idx9" + }, + { + "print from_time = strrep(3s,2,' ')", + "SELECT substr(repeat(concat(ifNull(kql_tostring(toIntervalNanosecond(3000000000)), ''), ' '), 2), 1, length(repeat(concat(ifNull(kql_tostring(toIntervalNanosecond(3000000000)), ''), ' '), 2)) - length(' ')) AS from_time" + }, + { + "print isempty(1.12345)", + "SELECT empty(ifNull(kql_tostring(1.12345), ''))" + }, + { + "print isnotempty('1.12345')", + "SELECT notEmpty(ifNull(kql_tostring('1.12345'), ''))" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_TopHitter.cpp b/src/Parsers/tests/KQL/gtest_KQL_TopHitter.cpp new file mode 100644 index 000000000000..4162461e1dc2 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_TopHitter.cpp @@ -0,0 +1,57 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_TopHitters, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | top 5 by Age", + "SELECT *\nFROM Customers\nORDER BY Age DESC\nLIMIT 5" + }, + { + "Customers | top 5 by Age desc", + "SELECT *\nFROM Customers\nORDER BY Age DESC\nLIMIT 5" + }, + { + "Customers | top 5 by Age asc", + "SELECT *\nFROM Customers\nORDER BY Age ASC\nLIMIT 5" + }, + { + "Customers | top 5 by FirstName desc nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST\nLIMIT 5" + }, + { + "Customers | top 5 by FirstName desc nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST\nLIMIT 5" + }, + { + "Customers | top 5 by Age | top 2 by FirstName", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n ORDER BY Age DESC\n LIMIT 5\n)\nORDER BY FirstName DESC\nLIMIT 2" + }, + { + "Customers| top-hitters a = 3 of Age by extra", + "SELECT *\nFROM\n(\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n)\nORDER BY approximate_sum_extra DESC\nLIMIT 3 AS a" + }, + { + "Customers| top-hitters 3 of Age", + "SELECT *\nFROM\n(\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM Customers\n GROUP BY Age\n)\nORDER BY approximate_count_Age DESC\nLIMIT 3" + }, + { + "Customers| top-hitters 3 of Age by extra | top-hitters 2 of Age", + "SELECT *\nFROM\n(\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM\n (\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC\n LIMIT 3\n )\n GROUP BY Age\n)\nORDER BY approximate_count_Age DESC\nLIMIT 2" + }, + { + "Customers| top-hitters 3 of Age by extra | where Age > 30", + "SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC\n LIMIT 3\n)\nWHERE Age > 30" + }, + { + "Customers| top-hitters 3 of Age by extra | where approximate_sum_extra < 200", + "SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC\n LIMIT 3\n)\nWHERE approximate_sum_extra < 200" + }, + { + "Customers| top-hitters 3 of Age | where approximate_count_Age > 2", + "SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_count_Age DESC\n LIMIT 3\n)\nWHERE approximate_count_Age > 2" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_TopNested.cpp b/src/Parsers/tests/KQL/gtest_KQL_TopNested.cpp new file mode 100644 index 000000000000..bbf15fa6fe01 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_TopNested.cpp @@ -0,0 +1,61 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_TopNested, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "sales | top-nested 3 of region by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n )\nSELECT\n region,\n aggregated_region\nFROM tb0_normal" + }, + { + "sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n )\nSELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\nFROM tb1_normal" + }, + { + "sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount), top-nested 2 of salesdate by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb2_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate AS salesdate,\n sum(amount) AS aggregated_salesdate\n FROM tb1_normal\n INNER JOIN source_table AS join1 USING (region, salesperson)\n GROUP BY\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate\n ),\n tb2_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate,\n ROW_NUMBER() OVER (PARTITION BY region, salesperson ORDER BY aggregated_salesdate DESC) AS row2\n FROM tb2_prev\n ),\n tb2_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb2_partition\n WHERE row2 <= 2\n )\nSELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\nFROM tb2_normal" + }, + { + "sales | top-nested 3 of region with others = 'all other region' by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region\n FROM tb0_normal\n UNION ALL\n SELECT\n 'all other region' AS region,\n aggregated_region_value AS aggregated_region\n FROM tb0_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb1_others_prev AS\n (\n SELECT\n region,\n sum(amount) AS aggregated_salesperson_value\n FROM source_table\n LEFT JOIN tb1_normal USING (region, salesperson)\n WHERE empty(tb1_normal.salesperson) AND (source_table.region IN (\n SELECT region\n FROM tb1_normal\n ))\n GROUP BY region\n ),\n tb1_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n aggregated_salesperson_value\n FROM tb1_others_prev\n RIGHT JOIN tb1_normal USING (region)\n ),\n tb0_all_others AS\n (\n SELECT\n 'all other region' AS region,\n aggregated_region_value AS aggregated_region,\n 'all other person' AS salesperson,\n aggregated_region_value AS aggregated_salesperson\n FROM tb0_others\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_normal\n UNION ALL\n SELECT\n region,\n aggregated_region,\n 'all other person' AS salesperson,\n aggregated_salesperson_value AS aggregated_salesperson\n FROM tb1_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb0_all_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb1_others_prev AS\n (\n SELECT\n region,\n sum(amount) AS aggregated_salesperson_value\n FROM source_table\n LEFT JOIN tb1_normal USING (region, salesperson)\n WHERE empty(tb1_normal.salesperson) AND (source_table.region IN (\n SELECT region\n FROM tb1_normal\n ))\n GROUP BY region\n ),\n tb1_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n aggregated_salesperson_value\n FROM tb1_others_prev\n RIGHT JOIN tb1_normal USING (region)\n ),\n tb0_all_others AS\n (\n SELECT\n 'all other region' AS region,\n aggregated_region_value AS aggregated_region,\n NULL AS salesperson,\n NULL AS aggregated_salesperson\n FROM tb0_others\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_normal\n UNION ALL\n SELECT\n region,\n aggregated_region,\n NULL AS salesperson,\n NULL AS aggregated_salesperson\n FROM tb1_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb0_all_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb1_others_prev AS\n (\n SELECT\n region,\n sum(amount) AS aggregated_salesperson_value\n FROM source_table\n LEFT JOIN tb1_normal USING (region, salesperson)\n WHERE empty(tb1_normal.salesperson) AND (source_table.region IN (\n SELECT region\n FROM tb1_normal\n ))\n GROUP BY region\n ),\n tb1_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n aggregated_salesperson_value\n FROM tb1_others_prev\n RIGHT JOIN tb1_normal USING (region)\n ),\n tb0_all_others AS\n (\n SELECT\n NULL AS region,\n NULL AS aggregated_region,\n 'all other person' AS salesperson,\n aggregated_region_value AS aggregated_salesperson\n FROM tb0_others\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_normal\n UNION ALL\n SELECT\n region,\n aggregated_region,\n 'all other person' AS salesperson,\n aggregated_salesperson_value AS aggregated_salesperson\n FROM tb1_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb0_all_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount), top-nested 2 of salesdate with others = 'all other date' by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb1_others_prev AS\n (\n SELECT\n region,\n sum(amount) AS aggregated_salesperson_value\n FROM source_table\n LEFT JOIN tb1_normal USING (region, salesperson)\n WHERE empty(tb1_normal.salesperson) AND (source_table.region IN (\n SELECT region\n FROM tb1_normal\n ))\n GROUP BY region\n ),\n tb1_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n aggregated_salesperson_value\n FROM tb1_others_prev\n RIGHT JOIN tb1_normal USING (region)\n ),\n tb2_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate AS salesdate,\n sum(amount) AS aggregated_salesdate\n FROM tb1_normal\n INNER JOIN source_table AS join1 USING (region, salesperson)\n GROUP BY\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate\n ),\n tb2_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate,\n ROW_NUMBER() OVER (PARTITION BY region, salesperson ORDER BY aggregated_salesdate DESC) AS row2\n FROM tb2_prev\n ),\n tb2_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb2_partition\n WHERE row2 <= 2\n ),\n tb2_others_prev AS\n (\n SELECT\n region,\n salesperson,\n sum(amount) AS aggregated_salesdate_value\n FROM source_table\n LEFT JOIN tb2_normal USING (region, salesperson, salesdate)\n WHERE empty(tb2_normal.salesdate) AND (source_table.region IN (\n SELECT region\n FROM tb2_normal\n )) AND (source_table.salesperson IN (\n SELECT salesperson\n FROM tb2_normal\n ))\n GROUP BY\n region,\n salesperson\n ),\n tb2_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n aggregated_salesdate_value\n FROM tb2_others_prev\n RIGHT JOIN tb2_normal USING (region, salesperson)\n ),\n tb0_all_others AS\n (\n SELECT\n 'all other region' AS region,\n aggregated_region_value AS aggregated_region,\n 'all other person' AS salesperson,\n aggregated_region_value AS aggregated_salesperson,\n 'all other date' AS salesdate,\n aggregated_region_value AS aggregated_salesdate\n FROM tb0_others\n ),\n tb1_all_others AS\n (\n SELECT\n region,\n aggregated_region,\n 'all other person' AS salesperson,\n aggregated_salesperson_value AS aggregated_salesperson,\n 'all other date' AS salesdate,\n aggregated_salesperson_value AS aggregated_salesdate\n FROM tb1_others\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb2_normal\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n 'all other date' AS salesdate,\n aggregated_salesdate_value AS aggregated_salesdate\n FROM tb2_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb0_all_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb1_all_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region by sum(amount)*2 + 5", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n (sum(amount) * 2) + 5 AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n )\nSELECT\n region,\n aggregated_region\nFROM tb0_normal" + }, + { + "sales | top-nested strlen('abc') of region by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT lengthUTF8('abc')\n )\nSELECT\n region,\n aggregated_region\nFROM tb0_normal" + }, + { + "sales | top-nested 3 of region with others = strcat('all other',' region') by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region\n FROM tb0_normal\n UNION ALL\n SELECT\n concat(ifNull(kql_tostring('all other'), ''), ifNull(kql_tostring(' region'), ''), '') AS region,\n aggregated_region_value AS aggregated_region\n FROM tb0_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested of region by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n )\nSELECT\n region,\n aggregated_region\nFROM tb0_normal" + }, + { + "sales | top-nested of substring(region,0,1) by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n if(toInt64(length(region)) <= 0, '', substr(region, (((0 % toInt64(length(region))) + toInt64(length(region))) % toInt64(length(region))) + 1, 1)) AS Column1,\n sum(amount) AS aggregated_Column1\n FROM source_table\n GROUP BY Column1\n ORDER BY aggregated_Column1 DESC\n )\nSELECT\n Column1,\n aggregated_Column1\nFROM tb0_normal" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp new file mode 100644 index 000000000000..aa94ba3940c2 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -0,0 +1,230 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Datetime, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print week_of_year(datetime(2020-12-31))", + "SELECT toWeek(kql_datetime('2020-12-31'), 3, 'UTC')" + }, + { + "print startofweek(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addWeeks(toStartOfWeek(kql_datetime('2017-01-01 10:10:17')), -1))" + }, + { + "print startofmonth(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addMonths(toStartOfMonth(kql_datetime('2017-01-01 10:10:17')), -1))" + }, + { + "print startofday(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-01 10:10:17')), -1))" + }, + { + "print startofyear(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addYears(toStartOfYear(kql_datetime('2017-01-01 10:10:17')), -1))" + }, + { + "print monthofyear(datetime(2015-12-14))", + "SELECT toMonth(kql_datetime('2015-12-14'))" + }, + { + "print hourofday(datetime(2015-12-14 10:54:00))", + "SELECT toHour(kql_datetime('2015-12-14 10:54:00'))" + }, + { + "print getyear(datetime(2015-10-12))", + "SELECT toYear(kql_datetime('2015-10-12'))" + }, + { + "print getmonth(datetime(2015-10-12))", + "SELECT toMonth(kql_datetime('2015-10-12'))" + }, + { + "print dayofyear(datetime(2015-10-12))", + "SELECT toDayOfYear(kql_datetime('2015-10-12'))" + }, + { + "print dayofmonth(datetime(2015-10-12))", + "SELECT toDayOfMonth(kql_datetime('2015-10-12'))" + }, + { + "print unixtime_seconds_todatetime(1546300899)", + "SELECT if(toTypeName(assumeNotNull(1546300899)) IN ['Int32', 'Int64', 'Float64', 'UInt32', 'UInt64'], kql_todatetime(1546300899), kql_todatetime(throwIf(true, 'unixtime_seconds_todatetime only accepts int, long and double type of arguments')))" + }, + { + "print dayofweek(datetime(2015-12-20))", + "SELECT (toDayOfWeek(kql_datetime('2015-12-20')) % 7) * toIntervalNanosecond(86400000000000)" + }, + { + "print now()", + "SELECT now64(9, 'UTC')" + }, + { + "print now(1d)", + "SELECT now64(9, 'UTC') + toIntervalNanosecond(86400000000000)" + }, + { + "print ago(2d)", + "SELECT now64(9, 'UTC') + (-1 * toIntervalNanosecond(172800000000000))" + }, + { + "print endofday(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-01 10:10:17')), -1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print endofday(datetime(2017-01-01 10:10:17), 1)", + "SELECT kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-01 10:10:17')), 1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print endofmonth(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addMonths(toStartOfMonth(kql_datetime('2017-01-01 10:10:17')), -1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print endofmonth(datetime(2017-01-01 10:10:17), 1)", + "SELECT kql_todatetime(addMonths(toStartOfMonth(kql_datetime('2017-01-01 10:10:17')), 1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print endofweek(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addWeeks(toStartOfWeek(kql_datetime('2017-01-01 10:10:17')), -1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print endofweek(datetime(2017-01-01 10:10:17), 1)", + "SELECT kql_todatetime(addWeeks(toStartOfWeek(kql_datetime('2017-01-01 10:10:17')), 1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print endofyear(datetime(2017-01-01 10:10:17), -1) ", + "SELECT kql_todatetime(addYears(toStartOfYear(kql_datetime('2017-01-01 10:10:17')), -1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print endofyear(datetime(2017-01-01 10:10:17), 1)" , + "SELECT kql_todatetime(addYears(toStartOfYear(kql_datetime('2017-01-01 10:10:17')), 1 + 1)) - toIntervalNanosecond(100)" + }, + { + "print make_datetime(2017,10,01)", + "SELECT if(((2017 >= 1900) AND (2017 <= 2261)) AND ((10 >= 1) AND (10 <= 12)) AND ((0 >= 0) AND (0 <= 59)) AND ((0 >= 0) AND (0 <= 59)) AND (0 >= 0) AND (0 < 60) AND (toModifiedJulianDayOrNull(concat(leftPad(toString(2017), 4, '0'), '-', leftPad(toString(10), 2, '0'), '-', leftPad(toString(1), 2, '0'))) IS NOT NULL), toDateTime64OrNull(toString(makeDateTime64(2017, 10, 1, 0, 0, truncate(0), (0 - truncate(0)) * 10000000., 7, 'UTC')), 9), NULL)" + }, + { + "print make_datetime(2017,10,01,12,10)", + "SELECT if(((2017 >= 1900) AND (2017 <= 2261)) AND ((10 >= 1) AND (10 <= 12)) AND ((12 >= 0) AND (12 <= 59)) AND ((10 >= 0) AND (10 <= 59)) AND (0 >= 0) AND (0 < 60) AND (toModifiedJulianDayOrNull(concat(leftPad(toString(2017), 4, '0'), '-', leftPad(toString(10), 2, '0'), '-', leftPad(toString(1), 2, '0'))) IS NOT NULL), toDateTime64OrNull(toString(makeDateTime64(2017, 10, 1, 12, 10, truncate(0), (0 - truncate(0)) * 10000000., 7, 'UTC')), 9), NULL)" + }, + { + "print make_datetime(2017,10,01,12,11,0.1234567)", + "SELECT if(((2017 >= 1900) AND (2017 <= 2261)) AND ((10 >= 1) AND (10 <= 12)) AND ((12 >= 0) AND (12 <= 59)) AND ((11 >= 0) AND (11 <= 59)) AND (0.1234567 >= 0) AND (0.1234567 < 60) AND (toModifiedJulianDayOrNull(concat(leftPad(toString(2017), 4, '0'), '-', leftPad(toString(10), 2, '0'), '-', leftPad(toString(1), 2, '0'))) IS NOT NULL), toDateTime64OrNull(toString(makeDateTime64(2017, 10, 1, 12, 11, truncate(0.1234567), (0.1234567 - truncate(0.1234567)) * 10000000., 7, 'UTC')), 9), NULL)" + }, + { + "print unixtime_microseconds_todatetime(1546300800000000)", + "SELECT kql_todatetime(fromUnixTimestamp64Micro(1546300800000000, 'UTC'))" + }, + { + "print unixtime_milliseconds_todatetime(1546300800000)", + "SELECT kql_todatetime(fromUnixTimestamp64Milli(1546300800000, 'UTC'))" + }, + { + "print unixtime_nanoseconds_todatetime(1546300800000000000)", + "SELECT kql_todatetime(fromUnixTimestamp64Nano(1546300800000000000, 'UTC'))" + }, + { + "print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))", + "SELECT dateDiff('year', kql_datetime('2000-12-31'), kql_datetime('2017-01-01'))" + }, + { + "print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))", + "SELECT dateDiff('minute', kql_datetime('2017-10-30 23:00:59'), kql_datetime('2017-10-30 23:05:01'))" + }, + { + "print datetime(null)", + "SELECT kql_datetime(NULL)" + }, + { + "print datetime('2014-05-25T08:20:03.123456Z')", + "SELECT kql_datetime('2014-05-25T08:20:03.123456Z')" + }, + { + "print datetime(2015-12-14 18:54)", + "SELECT kql_datetime('2015-12-14 18:54')" + }, + { + "print datetime(2015-12-31 23:59:59.9)", + "SELECT kql_datetime('2015-12-31 23:59:59.9')" + }, + { + "print datetime(\"2015-12-31 23:59:59.9\")", + "SELECT kql_datetime('2015-12-31 23:59:59.9')" + }, + { + "print datetime('2015-12-31 23:59:59.9')", + "SELECT kql_datetime('2015-12-31 23:59:59.9')" + }, + { + "print make_timespan(67,12,30,59.9799)", + "SELECT (((67 * toIntervalNanosecond(86400000000000)) + (12 * toIntervalNanosecond(3600000000000))) + (30 * toIntervalNanosecond(60000000000))) + (59.9799 * toIntervalNanosecond(1000000000))" + }, + { + "print todatetime('2014-05-25T08:20:03.123456Z')", + "SELECT kql_todatetime('2014-05-25T08:20:03.123456Z')" + }, + { + "print format_datetime(todatetime('2009-06-15T13:45:30.6175425'), 'yy-M-dd [H:mm:ss.fff]')", + "SELECT concat(substring(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%M:%S.]')), 1, position(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%M:%S.]')), '.')), substring(substring(toString(kql_todatetime('2009-06-15T13:45:30.6175425')), position(toString(kql_todatetime('2009-06-15T13:45:30.6175425')), '.') + 1), 1, 3), substring(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%M:%S.]')), position(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%M:%S.]')), '.') + 1, length(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%M:%S.]')))))" + }, + { + "print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s tt')", + "SELECT formatDateTime(kql_datetime('2015-12-14 02:03:04.12345'), '%y-%m-%e %I:%M:%S %p')" + }, + { + "print format_timespan(time(1d), 'd-[hh:mm:ss]')", + "SELECT concat(if(length(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(86400000000000)))) < 1, leftPad(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(86400000000000))), 1, '0'), toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(86400000000000)))), '-', '[', if(length(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(3600000000000)) % 24)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(3600000000000)) % 24), 2, '0'), toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(3600000000000)) % 24)), ':', if(length(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(60000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(60000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(60000000000)) % 60)), ':', if(length(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(1000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(1000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(1000000000)) % 60)), ']', '')" + }, + { + "print format_timespan(time('12:30:55.123'), 'ddddd-[hh:mm:ss.ffff]')", + "SELECT concat(if(length(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(86400000000000)))) < 5, leftPad(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(86400000000000))), 5, '0'), toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(86400000000000)))), '-', '[', if(length(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(3600000000000)) % 24)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(3600000000000)) % 24), 2, '0'), toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(3600000000000)) % 24)), ':', if(length(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(60000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(60000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(60000000000)) % 60)), ':', if(length(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(1000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(1000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(1000000000)) % 60)), '.', if(length(substring(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(100)) % 10000000), 1, 4)) < 4, rightPad(substring(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(100)) % 10000000), 1, 4), 4, '0'), substring(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(100)) % 10000000), 1, 4)), ']', '')" + }, + { + "print v1=format_timespan(time('29.09:00:05.12345'), 'dd.hh:mm:ss:FF')", + "SELECT concat(if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000)))) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000))), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000)))), '.', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24)), ':', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60)), ':', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60)), ':', substring(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(100)) % 10000000), 1, 2), '') AS v1" + }, + { + "print v2=format_timespan(time('29.09:00:05.12345'), 'ddd.h:mm:ss [fffffff]');", + "SELECT concat(if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000)))) < 3, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000))), 3, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000)))), '.', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24)) < 1, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24), 1, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24)), ':', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60)), ':', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60)), ' ', '[', if(length(substring(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(100)) % 10000000), 1, 7)) < 7, rightPad(substring(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(100)) % 10000000), 1, 7), 7, '0'), substring(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(100)) % 10000000), 1, 7)), ']', '') AS v2" + }, + { + "print datetime_part('day', datetime(2017-10-30 01:02:03.7654321))", + "SELECT formatDateTime(kql_datetime('2017-10-30 01:02:03.7654321'), '%e')" + }, + { + "print datetime_add('day',1,datetime(2017-10-30 01:02:03.7654321))", + "SELECT kql_datetime('2017-10-30 01:02:03.7654321') + toIntervalDay(1)" + }, + { + "print totimespan(time(1d))", + "SELECT kql_totimespan(toIntervalNanosecond(86400000000000))" + }, + { + "print totimespan('0.01:34:23')", + "SELECT kql_totimespan('0.01:34:23')" + }, + { + "print totimespan(time('-1:12:34'))", + "SELECT kql_totimespan(toIntervalNanosecond(-4354000000000))" + }, + { + "print totimespan(-1d)", + "SELECT kql_totimespan(-toIntervalNanosecond(86400000000000))" + }, + { + "print totimespan('abc')", + "SELECT kql_totimespan('abc')" + }, + { + "print time(2)", + "SELECT toIntervalNanosecond(172800000000000)" + }, + { + "hits | project bin(todatetime(EventTime), 1m)", + "SELECT kql_bin(kql_todatetime(EventTime), toIntervalNanosecond(60000000000))\nFROM hits" + } + +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dynamicFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dynamicFunctions.cpp new file mode 100644 index 000000000000..38bf08e9900c --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_dynamicFunctions.cpp @@ -0,0 +1,140 @@ +#include +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Dynamic, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print t = array_sort_asc(dynamic([null, 'd', 'a', 'c', 'c']))", + "SELECT kql_array_sort_asc([NULL, 'd', 'a', 'c', 'c']).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([4, 1, 3, 2]))", + "SELECT kql_array_sort_asc([4, 1, 3, 2]).1 AS t" + }, + { + "print t = array_sort_asc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))", + "SELECT kql_array_sort_asc(['b', 'a', 'c'], ['q', 'p', 'r']).1 AS t" + }, + { + "print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , false)", + "SELECT kql_array_sort_asc(['d', NULL, 'a', 'c', 'c'], false).1 AS t" + }, + { + "print t = array_sort_asc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]) , false)", + "SELECT kql_array_sort_asc([NULL, 'd', NULL, NULL, 'a', 'c', 'c', NULL, NULL, NULL], false).1 AS t" + }, + { + "print t = array_sort_asc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]) , true)", + "SELECT kql_array_sort_asc([NULL, 'd', NULL, NULL, 'a', 'c', 'c', NULL, NULL, NULL], true).1 AS t" + }, + { + "print t = array_sort_asc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]))", + "SELECT kql_array_sort_asc([NULL, 'd', NULL, NULL, 'a', 'c', 'c', NULL, NULL, NULL]).1 AS t" + }, + { + "print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']), 1 < 2)", + "SELECT kql_array_sort_asc(['d', NULL, 'a', 'c', 'c'], 1 < 2).1 AS t" + }, + { + "print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2)", + "SELECT kql_array_sort_asc(['d', NULL, 'a', 'c', 'c'], 1 > 2).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), false)", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30], false).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), 1 > 2)", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30], 1 > 2).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), true)", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30], true).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), 1 < 2)", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30], 1 < 2).1 AS t" + }, + { + "print t = array_sort_desc(dynamic([null, 'd', 'a', 'c', 'c']))", + "SELECT kql_array_sort_desc([NULL, 'd', 'a', 'c', 'c']).1 AS t" + }, + { + "print t = array_sort_desc(dynamic([4, 1, 3, 2]))", + "SELECT kql_array_sort_desc([4, 1, 3, 2]).1 AS t" + }, + { + "print t = array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))", + "SELECT kql_array_sort_desc(['b', 'a', 'c'], ['q', 'p', 'r']).1 AS t" + }, + { + "print array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))", + "SELECT kql_array_sort_desc(['b', 'a', 'c'], ['q', 'p', 'r'])" + }, + { + "print t = array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , false)", + "SELECT kql_array_sort_desc(['d', NULL, 'a', 'c', 'c'], false).1 AS t" + }, + { + "print array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]))[0]", + "SELECT tupleElement(kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]), if(0 >= 0, 0 + 1, 0))" + }, + { + "print (t) = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]))", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t" + }, + { + "print (t,w) = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]))", + "SELECT\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t,\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).2 AS w" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30])),y=5", + "SELECT\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t,\n 5 AS y" + }, + { + "print 5, (t) = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30])),y=5", + "SELECT\n 5,\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t,\n 5 AS y" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30])),w = array_sort_asc(dynamic([2, 1, 3]))", + "SELECT\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t,\n kql_array_sort_asc([2, 1, 3]).1 AS w" + }, + { + "print A[0]", + "SELECT A[if(0 >= 0, 0 + 1, 0)]" + }, + { + "print A[0][1]", + "SELECT (A[if(0 >= 0, 0 + 1, 0)])[if(1 >= 0, 1 + 1, 1)]" + }, + { + "print dynamic([[1,2,3,4,5],[20,30]])[0]", + "SELECT [[1, 2, 3, 4, 5], [20, 30]][if(0 >= 0, 0 + 1, 0)]" + }, + { + "print dynamic([[1,2,3,4,5],[20,30]])[1][1]", + "SELECT ([[1, 2, 3, 4, 5], [20, 30]][if(1 >= 0, 1 + 1, 1)])[if(1 >= 0, 1 + 1, 1)]" + }, + { + "print A[B[1]]", + "SELECT A[if((B[if(1 >= 0, 1 + 1, 1)]) >= 0, (B[if(1 >= 0, 1 + 1, 1)]) + 1, B[if(1 >= 0, 1 + 1, 1)])]" + }, + { + "print A[strlen('a')-1]", + "SELECT A[if((lengthUTF8('a') - 1) >= 0, (lengthUTF8('a') - 1) + 1, lengthUTF8('a') - 1)]" + }, + { + "print strlen(A[0])", + "SELECT lengthUTF8(A[if(0 >= 0, 0 + 1, 0)])" + }, + { + "print repeat(1, 3)", + "SELECT if(3 < 0, [NULL], arrayWithConstant(abs(3), 1))" + }, + { + "print repeat(1, -3)", + "SELECT if(-3 < 0, [NULL], arrayWithConstant(abs(-3), 1))" + } + }))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_lookup_join.cpp b/src/Parsers/tests/KQL/gtest_KQL_lookup_join.cpp new file mode 100644 index 000000000000..d6b566ac413e --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_lookup_join.cpp @@ -0,0 +1,106 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_lookup_join, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "FactTable | lookup kind=leftouter DimTable on Personal, Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | lookup kind=inner DimTable on Personal, Family", + "SELECT *\nFROM FactTable AS left_\nINNER JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | lookup kind=leftouter (DimTable | where Personal == 'Bill') on Personal, Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN\n(\n SELECT *\n FROM DimTable\n WHERE Personal = 'Bill'\n) AS right_ USING (Personal, Family)" + }, + { + "FactTable | project Personal , Family| lookup kind=leftouter DimTable on Personal, Family", + "SELECT *\nFROM\n(\n SELECT\n Personal,\n Family\n FROM FactTable\n) AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | lookup kind=leftouter DimTable on $left.Personal == $right.Personal, $left.Family == $right.Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | lookup kind=leftouter DimTable on Personal , $left.Family == $right.Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable|lookup kind=leftouter DimTable on Personal , ($left.Family == $right.Family)", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | project Row, Personal , Family | lookup kind=leftouter (FactTable | lookup kind=leftouter DimTable on Personal) on Personal, Family", + "SELECT *\nFROM\n(\n SELECT\n Row,\n Personal,\n Family\n FROM FactTable\n) AS left_\nLEFT JOIN\n(\n SELECT *\n FROM FactTable AS left_\n LEFT JOIN DimTable AS right_ USING (Personal)\n) AS right_ USING (Personal, Family)" + }, + { + "FactTable|project Row, Personal , Family| lookup kind=leftouter (DimTable | where Personal == 'Bill') on Personal, Family| lookup kind=inner DimTable on Personal, Family", + "SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Row,\n Personal,\n Family\n FROM FactTable\n ) AS left_\n LEFT JOIN\n (\n SELECT *\n FROM DimTable\n WHERE Personal = 'Bill'\n ) AS right_ USING (Personal, Family)\n) AS left_\nINNER JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable| lookup kind=leftouter DimTable on $left.Personal == $right.FirstName, Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ ON (left_.Personal = right_.FirstName) AND (left_.Family = right_.Family)" + }, + { + "FactTable| lookup kind=leftouter DimTable on $left.Personal == $right.FirstName, Family| lookup kind=inner DimTable on $left.Personal == $right.FirstName", + "SELECT *\nFROM\n(\n SELECT *\n FROM FactTable AS left_\n LEFT JOIN DimTable AS right_ ON (left_.Personal = right_.FirstName) AND (left_.Family = right_.Family)\n) AS left_\nINNER JOIN DimTable AS right_ ON left_.Personal = right_.FirstName" + }, + { + "X | join Y on Key", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=innerunique Y on Key", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=inner Y on Key", + "SELECT *\nFROM X AS left_\nINNER JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=leftouter Y on Key", + "SELECT *\nFROM X AS left_\nLEFT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=rightouter Y on Key", + "SELECT *\nFROM X AS left_\nRIGHT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=fullouter Y on Key", + "SELECT *\nFROM X AS left_\nFULL OUTER JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=leftanti Y on Key", + "SELECT *\nFROM X AS left_\nANTI LEFT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=rightanti Y on Key", + "SELECT *\nFROM X AS left_\nANTI RIGHT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=leftsemi Y on Key", + "SELECT *\nFROM X AS left_\nSEMI LEFT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=rightsemi Y on Key", + "SELECT *\nFROM X AS left_\nSEMI RIGHT JOIN Y AS right_ USING (Key)" + }, + { + "X | join Y on $left.Key == $right.Key", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN Y AS right_ USING (Key)" + }, + { + "X | join Y on $left.Key == $right.Key2", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN Y AS right_ ON left_.Key = right_.Key2" + }, + { + "X | join (Y | project Key, value2) on $left.Key == $right.Key", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN\n(\n SELECT\n Key,\n value2\n FROM Y\n) AS right_ USING (Key)" + } + +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_operator_in_sql.cpp b/src/Parsers/tests/KQL/gtest_KQL_operator_in_sql.cpp new file mode 100644 index 000000000000..5e88c5037569 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_operator_in_sql.cpp @@ -0,0 +1,110 @@ +#include + +#include +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_operator_in_sql, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "select * from kql(Customers | where FirstName !in ('Peter', 'Latoya'))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE FirstName NOT IN ('Peter', 'Latoya')\n)" + }, + { + "select * from kql(Customers | where FirstName !contains 'Pet');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT (FirstName ILIKE '%Pet%')\n)" + }, + { + "select * from kql(Customers | where FirstName !contains_cs 'Pet');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT (FirstName LIKE '%Pet%')\n)" + }, + { + "select * from kql(Customers | where FirstName !endswith 'ter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT (FirstName ILIKE '%ter')\n)" + }, + { + "select * from kql(Customers | where FirstName !endswith_cs 'ter');" + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT endsWith(FirstName, 'ter')\n)" + }, + { + "select * from kql(Customers | where FirstName != 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE FirstName != 'Peter'\n)" + }, + { + "select * from kql(Customers | where FirstName !has 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT ifNull(hasTokenCaseInsensitiveOrNull(FirstName, 'Peter'), hasTokenCaseInsensitive(FirstName, 'Peter') AND (positionCaseInsensitive(FirstName, 'Peter') > 0))\n)" + }, + { + "select * from kql(Customers | where FirstName !has_cs 'peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT ifNull(hasTokenOrNull(FirstName, 'peter'), hasToken(FirstName, 'peter') AND (position(FirstName, 'peter') > 0))\n)" + }, + { + "select * from kql(Customers | where FirstName !hasprefix 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (NOT (FirstName ILIKE 'Peter%')) AND (NOT (FirstName ILIKE '% Peter%'))\n)" + }, + { + "select * from kql(Customers | where FirstName !hasprefix_cs 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (NOT startsWith(FirstName, 'Peter')) AND (NOT (FirstName LIKE '% Peter%'))\n)" + }, + { + "select * from kql(Customers | where FirstName !hassuffix 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (NOT (FirstName ILIKE '%Peter')) AND (NOT (FirstName ILIKE '%Peter %'))\n)" + }, + { + "select * from kql(Customers | where FirstName !hassuffix_cs 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (NOT endsWith(FirstName, 'Peter')) AND (NOT (FirstName LIKE '%Peter %'))\n)" + }, + { + "select * from kql(Customers | where FirstName !startswith 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT (FirstName ILIKE 'Peter%')\n)" + }, + { + "select * from kql(Customers | where FirstName !startswith_cs 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT startsWith(FirstName, 'Peter')\n)" + }, + { + "select * from kql(print t = 'a' in~ ('A', 'b', 'c'))", + "SELECT *\nFROM\n(\n SELECT lower('a') IN (lower('A'), lower('b'), lower('c')) AS t\n)" + }, + { + "select * from kql(Customers | where FirstName in~ ('peter', 'apple'))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) IN (lower('peter'), lower('apple'))\n)" + }, + { + "select * from kql(Customers | where FirstName in~ ((Customers | project FirstName | where FirstName == 'Peter')))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE FirstName = 'Peter'\n )\n)" + }, + { + "select * from kql(Customers | where FirstName in~ ((Customers | project FirstName | where Age < 30)))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE Age < 30\n )\n)" + }, + { + "select * from kql(print t = 'a' !in~ ('A', 'b', 'c'))", + "SELECT *\nFROM\n(\n SELECT lower('a') NOT IN (lower('A'), lower('b'), lower('c')) AS t\n)" + }, + { + "select * from kql(print t = 'a' !in~ (dynamic(['A', 'b', 'c'])))", + "SELECT *\nFROM\n(\n SELECT lower('a') NOT IN (lower('A'), lower('b'), lower('c')) AS t\n)" + }, + { + "select * from kql(Customers | where FirstName !in~ ('peter', 'apple'))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) NOT IN (lower('peter'), lower('apple'))\n)" + }, + { + "select * from kql(Customers | where FirstName !in~ ((Customers | project FirstName | where FirstName == 'Peter')))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) NOT IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE FirstName = 'Peter'\n )\n)" + }, + { + "select * from kql(Customers | where FirstName !in~ ((Customers | project FirstName | where Age < 30)))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) NOT IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE Age < 30\n )\n)" + }, + { + "select * from kql(Customers | where FirstName =~ 'peter' and LastName =~ 'naRA')", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (lower(FirstName) = lower('peter')) AND (lower(LastName) = lower('naRA'))\n)" + }, + { + "select * from kql(Customers | where FirstName !~ 'nEyMaR' and LastName =~ 'naRA')", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (lower(FirstName) != lower('nEyMaR')) AND (lower(LastName) = lower('naRA'))\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_range.cpp b/src/Parsers/tests/KQL/gtest_KQL_range.cpp new file mode 100644 index 000000000000..decc34c6a600 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_range.cpp @@ -0,0 +1,81 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Range, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print range(1, 10, 2)", + "SELECT kql_range(1, 10, 2)" + }, + { + "print range(1, 10)", + "SELECT kql_range(1, 10)" + }, + { + "print range(1.2, 10.3, 2.2)", + "SELECT kql_range(1.2, 10.3, 2.2)" + }, + { + "print range(1.2, 10.3, 2)", + "SELECT kql_range(1.2, 10.3, 2)" + }, + { + "print range(1.2, 10,2.2)", + "SELECT kql_range(1.2, 10, 2.2)" + }, + { + "print range(1, 10, 2.2)", + "SELECT kql_range(1, 10, 2.2)" + }, + { + "print range(1, 10.5, 2.2)", + "SELECT kql_range(1, 10.5, 2.2)" + }, + { + "print range(1.1, 10 ,2.2)", + "SELECT kql_range(1.1, 10, 2.2)" + }, + { + "print range(1.2, 10, 2)", + "SELECT kql_range(1.2, 10, 2)" + }, + { + "print range(datetime('2001-01-01'), datetime('2001-01-02'), 5h)", + "SELECT kql_range(kql_datetime('2001-01-01'), kql_datetime('2001-01-02'), toIntervalNanosecond(18000000000000))" + }, + { + "print range(datetime('2001-01-01'), datetime('2001-01-02'))", + "SELECT kql_range(kql_datetime('2001-01-01'), kql_datetime('2001-01-02'))" + }, + { + "print range(1h, 5h, 2h)", + "SELECT kql_range(toIntervalNanosecond(3600000000000), toIntervalNanosecond(18000000000000), toIntervalNanosecond(7200000000000))" + }, + { + "print range(1.5h, 5h, 2h)", + "SELECT kql_range(toIntervalNanosecond(5400000000000), toIntervalNanosecond(18000000000000), toIntervalNanosecond(7200000000000))" + }, + { + "print range(ago(1d),now(),1d)", + "SELECT kql_range(now64(9, 'UTC') + (-1 * toIntervalNanosecond(86400000000000)), now64(9, 'UTC'), toIntervalNanosecond(86400000000000))" + }, + { + "print range(endofday(datetime(2017-01-01 10:10:17)), endofday(datetime(2017-01-03 10:10:17)), 1d)", + "SELECT kql_range(kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-01 10:10:17')), 0 + 1)) - toIntervalNanosecond(100), kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-03 10:10:17')), 0 + 1)) - toIntervalNanosecond(100), toIntervalNanosecond(86400000000000))" + }, + { + "range Age from 20 to 25 step 1", + "SELECT *\nFROM\n(\n SELECT kql_range(20, 25, 1) AS Age\n)\nARRAY JOIN Age" + }, + { + "range LastWeek from ago(7d) to now() step 1d", + "SELECT *\nFROM\n(\n SELECT kql_range(now64(9, 'UTC') + (-1 * toIntervalNanosecond(604800000000000)), now64(9, 'UTC'), toIntervalNanosecond(86400000000000)) AS LastWeek\n)\nARRAY JOIN LastWeek" + }, + { + "range FirstWeek from datetime('2023-01-01') to datetime('2023-01-07') step 1d", + "SELECT *\nFROM\n(\n SELECT kql_range(kql_datetime('2023-01-01'), kql_datetime('2023-01-07'), toIntervalNanosecond(86400000000000)) AS FirstWeek\n)\nARRAY JOIN FirstWeek" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_test_subquery.cpp b/src/Parsers/tests/KQL/gtest_KQL_test_subquery.cpp new file mode 100644 index 000000000000..f32c170ceee9 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_test_subquery.cpp @@ -0,0 +1,109 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Distinct, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !in ('Peter', 'Latoya')));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE FirstName NOT IN ('Peter', 'Latoya')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName, Age | where Age !in (28, 29)));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE Age NOT IN (28, 29)\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !contains 'ste'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT (FirstName ILIKE '%ste%')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !contains_cs 'Ste'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT (FirstName LIKE '%Ste%')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !contains_cs 'ste'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT (FirstName LIKE '%ste%')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !endswith 'ore'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT (FirstName ILIKE '%ore')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !endswith_cs 'Ore'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT endsWith(FirstName, 'Ore')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName != 'Theodore'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE FirstName != 'Theodore'\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !~ 'theodore'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE lower(FirstName) != lower('theodore')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !has 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT ifNull(hasTokenCaseInsensitiveOrNull(FirstName, 'Peter'), hasTokenCaseInsensitive(FirstName, 'Peter') AND (positionCaseInsensitive(FirstName, 'Peter') > 0))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !has_cs 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT ifNull(hasTokenOrNull(FirstName, 'Peter'), hasToken(FirstName, 'Peter') AND (position(FirstName, 'Peter') > 0))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !hasprefix 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE (NOT (FirstName ILIKE 'Peter%')) AND (NOT (FirstName ILIKE '% Peter%'))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !hasprefix_cs 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE (NOT startsWith(FirstName, 'Peter')) AND (NOT (FirstName LIKE '% Peter%'))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !hassuffix 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE (NOT (FirstName ILIKE '%Peter')) AND (NOT (FirstName ILIKE '%Peter %'))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !hassuffix_cs 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE (NOT endsWith(FirstName, 'Peter')) AND (NOT (FirstName LIKE '%Peter %'))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !startswith 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT (FirstName ILIKE 'Peter%')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !startswith_cs 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT startsWith(FirstName, 'Peter')\n)" + }, + { + "Customers | where FirstName !in~ ((Customers | project FirstName | where FirstName !in~ ('peter', 'apple')));", + "SELECT *\nFROM Customers\nWHERE lower(FirstName) NOT IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE lower(FirstName) NOT IN (lower('peter'), lower('apple'))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName in~ ('peter', 'apple')));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE lower(FirstName) IN (lower('peter'), lower('apple'))\n)" + }, + { + "Customers | where substring(FirstName,0,3) in~ ((Customers | project substring(FirstName,0,3) | where FirstName in~ ('peter', 'apple')));", + "SELECT *\nFROM Customers\nWHERE lower(if(toInt64(length(FirstName)) <= 0, '', substr(FirstName, (((0 % toInt64(length(FirstName))) + toInt64(length(FirstName))) % toInt64(length(FirstName))) + 1, 3))) IN (\n SELECT lower(if(toInt64(length(FirstName)) <= 0, '', substr(FirstName, (((0 % toInt64(length(FirstName))) + toInt64(length(FirstName))) % toInt64(length(FirstName))) + 1, 3)))\n FROM Customers\n WHERE lower(FirstName) IN (lower('peter'), lower('apple'))\n)" + }, + { + "Customers | where FirstName in~ ((Customers | where FirstName !in~ ('peter', 'apple')| project FirstName));", + "SELECT *\nFROM Customers\nWHERE lower(FirstName) IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE lower(FirstName) NOT IN (lower('peter'), lower('apple'))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName, LastName, Age));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n)" + }, + { + "Customers | where FirstName in~ ((Customers | project FirstName, LastName, Age|where Age <30));", + "SELECT *\nFROM Customers\nWHERE lower(FirstName) IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE Age < 30\n)" + }, + { + "Customers | where FirstName !in ((Customers | project FirstName, LastName, Age |where Age <30 ));", + "SELECT *\nFROM Customers\nWHERE FirstName NOT IN (\n SELECT FirstName\n FROM Customers\n WHERE Age < 30\n)" + }, + { + "Customers | where FirstName !in~ ((Customers | project FirstName, LastName, Age |where Age <30));", + "SELECT *\nFROM Customers\nWHERE lower(FirstName) NOT IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE Age < 30\n)" + } +}))); diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index b452bd276429..8e2ed63af65d 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -1,9 +1,9 @@ +#include #include #include #include #include #include -#include #include #include #include @@ -13,9 +13,6 @@ #include #include #include -#include -#include -#include namespace { @@ -23,13 +20,6 @@ using namespace DB; using namespace std::literals; } - -struct ParserTestCase -{ - const std::string_view input_text; - const char * expected_ast = nullptr; -}; - std::ostream & operator<<(std::ostream & ostr, const std::shared_ptr parser) { return ostr << "Parser: " << parser->getName(); @@ -40,51 +30,6 @@ std::ostream & operator<<(std::ostream & ostr, const ParserTestCase & test_case) return ostr << "ParserTestCase input: " << test_case.input_text; } -class ParserTest : public ::testing::TestWithParam, ParserTestCase>> -{}; - -TEST_P(ParserTest, parseQuery) -{ - const auto & parser = std::get<0>(GetParam()); - const auto & [input_text, expected_ast] = std::get<1>(GetParam()); - - ASSERT_NE(nullptr, parser); - - if (expected_ast) - { - if (std::string(expected_ast).starts_with("throws")) - { - EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } - else - { - ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); - if (std::string("CREATE USER or ALTER USER query") != parser->getName() - && std::string("ATTACH access entity query") != parser->getName()) - { - EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); - } - else - { - if (input_text.starts_with("ATTACH")) - { - auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); - EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); - } - else - { - EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); - } - } - } - } - else - { - ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } -} - INSTANTIATE_TEST_SUITE_P(ParserOptimizeQuery, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), @@ -326,10 +271,6 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" }, - { - "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", - "SELECT\n FirstName,\n LastName,\n Education\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" - }, { "Customers | sort by FirstName desc", "SELECT *\nFROM Customers\nORDER BY FirstName DESC" @@ -412,23 +353,23 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers |summarize count() by Occupation", - "SELECT\n count(),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n count() AS count_\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize sum(Age) by Occupation", - "SELECT\n sum(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n sum(Age) AS sum_Age\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize avg(Age) by Occupation", - "SELECT\n avg(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n avg(Age) AS avg_Age\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize min(Age) by Occupation", - "SELECT\n min(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n min(Age) AS min_Age\nFROM Customers\nGROUP BY Occupation" }, { "Customers |summarize max(Age) by Occupation", - "SELECT\n max(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n max(Age) AS max_Age\nFROM Customers\nGROUP BY Occupation" }, { "Customers | where FirstName contains 'pet'", @@ -446,32 +387,163 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | where FirstName !endswith 'er'", "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%er')" }, + { + "Customers | where FirstName matches regex 'P.*r'", + "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers | where FirstName startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + }, + { + "Customers | where FirstName !startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" + }, + { + "Customers | where Age in ((Customers|project Age|where Age < 30))", + "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" + }, { "Customers | where Education has 'School'", - "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Education, 'School')" + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenCaseInsensitiveOrNull(Education, 'School'), hasTokenCaseInsensitive(Education, 'School') AND (positionCaseInsensitive(Education, 'School') > 0))" }, { "Customers | where Education !has 'School'", - "SELECT *\nFROM Customers\nWHERE NOT hasTokenCaseInsensitive(Education, 'School')" + "SELECT *\nFROM Customers\nWHERE NOT ifNull(hasTokenCaseInsensitiveOrNull(Education, 'School'), hasTokenCaseInsensitive(Education, 'School') AND (positionCaseInsensitive(Education, 'School') > 0))" }, { "Customers | where Education has_cs 'School'", - "SELECT *\nFROM Customers\nWHERE hasToken(Education, 'School')" + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenOrNull(Education, 'School'), hasToken(Education, 'School') AND (position(Education, 'School') > 0))" }, { "Customers | where Education !has_cs 'School'", - "SELECT *\nFROM Customers\nWHERE NOT hasToken(Education, 'School')" + "SELECT *\nFROM Customers\nWHERE NOT ifNull(hasTokenOrNull(Education, 'School'), hasToken(Education, 'School') AND (position(Education, 'School') > 0))" }, { - "Customers | where FirstName matches regex 'P.*r'", - "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + "Customers|where Occupation has_any ('Skilled','abcd')", + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'Skilled'), hasTokenCaseInsensitive(Occupation, 'Skilled') AND (positionCaseInsensitive(Occupation, 'Skilled') > 0)) OR ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'abcd'), hasTokenCaseInsensitive(Occupation, 'abcd') AND (positionCaseInsensitive(Occupation, 'abcd') > 0))" }, { - "Customers | where FirstName startswith 'pet'", - "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + "Customers|where Occupation has_all ('Skilled','abcd')", + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'Skilled'), hasTokenCaseInsensitive(Occupation, 'Skilled') AND (positionCaseInsensitive(Occupation, 'Skilled') > 0)) AND ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'abcd'), hasTokenCaseInsensitive(Occupation, 'abcd') AND (positionCaseInsensitive(Occupation, 'abcd') > 0))" }, { - "Customers | where FirstName !startswith 'pet'", - "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" - } + "Customers|where Occupation has_all (strcat('Skill','ed'),'Manual')", + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenCaseInsensitiveOrNull(Occupation, concat(ifNull(kql_tostring('Skill'), ''), ifNull(kql_tostring('ed'), ''), '')), hasTokenCaseInsensitive(Occupation, 'concat') AND hasTokenCaseInsensitive(Occupation, 'ifNull') AND hasTokenCaseInsensitive(Occupation, 'kql') AND hasTokenCaseInsensitive(Occupation, 'tostring') AND hasTokenCaseInsensitive(Occupation, 'Skill') AND hasTokenCaseInsensitive(Occupation, 'ifNull') AND hasTokenCaseInsensitive(Occupation, 'kql') AND hasTokenCaseInsensitive(Occupation, 'tostring') AND hasTokenCaseInsensitive(Occupation, 'ed') AND (positionCaseInsensitive(Occupation, concat(ifNull(kql_tostring('Skill'), ''), ifNull(kql_tostring('ed'), ''), '')) > 0)) AND ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'Manual'), hasTokenCaseInsensitive(Occupation, 'Manual') AND (positionCaseInsensitive(Occupation, 'Manual') > 0))" + }, + { + "Customers | where Occupation == strcat('Pro','fessional') | take 1", + "SELECT *\nFROM Customers\nWHERE Occupation = concat(ifNull(kql_tostring('Pro'), ''), ifNull(kql_tostring('fessional'), ''), '')\nLIMIT 1" + }, + { + "Customers | project countof('The cat sat on the mat', 'at')", + "SELECT countSubstrings('The cat sat on the mat', 'at')\nFROM Customers" + }, + { + "Customers | project countof('The cat sat on the mat', 'at', 'normal')", + "SELECT countSubstrings('The cat sat on the mat', 'at')\nFROM Customers" + }, + { + "Customers | project countof('The cat sat on the mat', 'at', 'regex')", + "SELECT countMatches('The cat sat on the mat', 'at')\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 10')", + "SELECT kql_extract('The price of PINEAPPLE ice cream is 10', '(\\b[A-Z]+\\b).+(\\b\\\\d+)', 0)\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20')", + "SELECT kql_extract('The price of PINEAPPLE ice cream is 20', '(\\b[A-Z]+\\b).+(\\b\\\\d+)', 1)\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 30')", + "SELECT kql_extract('The price of PINEAPPLE ice cream is 30', '(\\b[A-Z]+\\b).+(\\b\\\\d+)', 2)\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 40', typeof(int))", + "SELECT accurateCastOrNull(kql_extract('The price of PINEAPPLE ice cream is 40', '(\\b[A-Z]+\\b).+(\\b\\\\d+)', 2), 'Int32')\nFROM Customers" + }, + { + "Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 50')", + "SELECT extractAllGroups('The price of PINEAPPLE ice cream is 50', '(\\\\w)(\\\\w+)(\\\\w)')\nFROM Customers" + }, + { + " Customers | project split('aa_bb', '_')", + "SELECT if(empty('_'), splitByString(' ', 'aa_bb'), splitByString('_', 'aa_bb'))\nFROM Customers" + }, + { + "Customers | project split('aaa_bbb_ccc', '_', 1)", + "SELECT multiIf((length(if(empty('_'), splitByString(' ', 'aaa_bbb_ccc'), splitByString('_', 'aaa_bbb_ccc'))) >= 2) AND (2 > 0), arrayPushBack([], if(empty('_'), splitByString(' ', 'aaa_bbb_ccc'), splitByString('_', 'aaa_bbb_ccc'))[2]), 2 = 0, if(empty('_'), splitByString(' ', 'aaa_bbb_ccc'), splitByString('_', 'aaa_bbb_ccc')), arrayPushBack([], NULL[1]))\nFROM Customers" + }, + { + "Customers | project strcat_delim('-', '1', '2', 'A')", + "SELECT concat(ifNull(kql_tostring('1'), ''), '-', ifNull(kql_tostring('2'), ''), '-', ifNull(kql_tostring('A'), ''))\nFROM Customers" + }, + { + "print x=1, s=strcat('Hello', ', ', 'World!')", + "SELECT\n 1 AS x,\n concat(ifNull(kql_tostring('Hello'), ''), ifNull(kql_tostring(', '), ''), ifNull(kql_tostring('World!'), ''), '') AS s" + }, + { + "print parse_urlquery('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')", + "SELECT concat('{', concat('\"Query Parameters\":', concat('{\"', replace(replace(if(position('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment', '?') > 0, queryString('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), 'https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '=', '\":\"'), '&', '\",\"'), '\"}')), '}')" + }, + { + "print strcmp('a','b')", + "SELECT multiIf('a' = 'b', 0, 'a' < 'b', -1, 1)" + }, + { + "print parse_url('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')", + "SELECT concat('{', concat('\"Scheme\":\"', protocol('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), ',', concat('\"Host\":\"', domain('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), ',', concat('\"Port\":\"', toString(port('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')), '\"'), ',', concat('\"Path\":\"', path('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), ',', concat('\"Username\":\"', splitByChar(':', splitByChar('@', netloc('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'))[1])[1], '\"'), ',', concat('\"Password\":\"', splitByChar(':', splitByChar('@', netloc('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'))[1])[2], '\"'), ',', concat('\"Query Parameters\":', concat('{\"', replace(replace(queryString('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '=', '\":\"'), '&', '\",\"'), '\"}')), ',', concat('\"Fragment\":\"', fragment('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), '}')" + },{ + "Customers | summarize t = make_list(FirstName) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(FirstName, FirstName IS NOT NULL) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list(FirstName, 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(10)(FirstName, FirstName IS NOT NULL) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list_if(FirstName, Age > 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list_if(FirstName, Age > 10, 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(10)(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set(FirstName) by FirstName", + "SELECT\n FirstName,\n groupUniqArray(FirstName) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set(FirstName, 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArray(10)(FirstName) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArrayIf(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArrayIf(10)(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "print output = dynamic([1, 2, 3])", + "SELECT [1, 2, 3] AS output" + }, + { + "print output = dynamic(['a', 'b', 'c'])", + "SELECT ['a', 'b', 'c'] AS output" + }, + { + "T | extend duration = endTime - startTime", + "SELECT\n * EXCEPT duration,\n endTime - startTime AS duration\nFROM T" + }, + { + "T |project endTime, startTime | extend duration = endTime - startTime", + "SELECT\n * EXCEPT duration,\n endTime - startTime AS duration\nFROM\n(\n SELECT\n endTime,\n startTime\n FROM T\n)" + }, + { + "T | extend c =c*2, b-a, d = a +b , a*b", + "SELECT\n * EXCEPT c EXCEPT d,\n c * 2 AS c,\n b - a AS Column1,\n a + b AS d,\n a * b AS Column2\nFROM T" + } }))); diff --git a/src/Parsers/tests/gtest_common.cpp b/src/Parsers/tests/gtest_common.cpp new file mode 100644 index 000000000000..c9efdbe105c8 --- /dev/null +++ b/src/Parsers/tests/gtest_common.cpp @@ -0,0 +1,64 @@ +#include "gtest_common.h" + +#include +#include +#include + +#include + +#include + +TEST_P(ParserTest, parseQuery) +{ + const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + + ASSERT_NE(nullptr, parser); + + if (expected_ast) + { + if (std::string(expected_ast).starts_with("throws")) + { + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } + else + { + DB::ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + if (std::string("CREATE USER or ALTER USER query") != parser->getName() + && std::string("ATTACH access entity query") != parser->getName()) + { + EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + } + else + { + if (input_text.starts_with("ATTACH")) + { + auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); + EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + } + else + { + EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + } + } + } + } + else + { + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } +} + +TEST_P(ParserRegexTest, parseQuery) +{ + const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + + ASSERT_TRUE(parser); + ASSERT_TRUE(expected_ast); + + DB::ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + EXPECT_THAT(serializeAST(*ast->clone(), false), ::testing::MatchesRegex(expected_ast)); +} diff --git a/src/Parsers/tests/gtest_common.h b/src/Parsers/tests/gtest_common.h new file mode 100644 index 000000000000..4eca9390d92c --- /dev/null +++ b/src/Parsers/tests/gtest_common.h @@ -0,0 +1,17 @@ +#include + +#include + +#include + +struct ParserTestCase +{ + const std::string_view input_text; + const char * expected_ast = nullptr; +}; + +class ParserTest : public ::testing::TestWithParam, ParserTestCase>> +{}; + +class ParserRegexTest : public ::testing::TestWithParam, ParserTestCase>> +{}; diff --git a/src/Processors/Transforms/AddingDefaultsTransform.cpp b/src/Processors/Transforms/AddingDefaultsTransform.cpp index e6c2bcec2c85..bf8b380e7268 100644 --- a/src/Processors/Transforms/AddingDefaultsTransform.cpp +++ b/src/Processors/Transforms/AddingDefaultsTransform.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index d60a9e6afd15..2ab1b8d46f3d 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -9,6 +9,7 @@ from get_robot_token import get_parameter_from_ssm from pr_info import PRInfo from report import TestResults +from env_helper import GITHUB_REPOSITORY class InsertException(Exception): @@ -142,7 +143,7 @@ def prepare_tests_results_for_clickhouse( check_name: str, ) -> List[dict]: - pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + pull_request_url = "https://github.com/{}/commits/master".format(GITHUB_REPOSITORY) base_ref = "master" head_ref = "master" base_repo = pr_info.repo_full_name diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index d5d1b1a1085f..d19e21e5006f 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -15,7 +15,13 @@ from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from commit_status_helper import post_commit_status -from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL +from env_helper import ( + GITHUB_WORKSPACE, + RUNNER_TEMP, + GITHUB_RUN_URL, + DOCKER_USER, + DOCKER_REPO, +) from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo from report import TestResults, TestResult @@ -94,7 +100,7 @@ def get_images_dict(repo_path: str, image_file_path: str) -> ImagesDict: def get_changed_docker_images( - pr_info: PRInfo, images_dict: ImagesDict + pr_info: PRInfo, images_dict: ImagesDict, docker_repo: str ) -> Set[DockerImage]: if not images_dict: @@ -114,7 +120,7 @@ def get_changed_docker_images( for dockerfile_dir, image_description in images_dict.items(): for f in files_changed: if f.startswith(dockerfile_dir): - name = image_description["name"] + name = docker_repo + "/" + image_description["name"] only_amd64 = image_description.get("only_amd64", False) logging.info( "Found changed file '%s' which affects " @@ -138,7 +144,7 @@ def get_changed_docker_images( dependent, image, ) - name = images_dict[dependent]["name"] + name = docker_repo + "/" + images_dict[dependent]["name"] only_amd64 = images_dict[dependent].get("only_amd64", False) changed_images.append(DockerImage(dependent, name, only_amd64, image)) index += 1 @@ -249,6 +255,7 @@ def build_and_push_one_image( "docker buildx build --builder default " f"--label build-url={GITHUB_RUN_URL} " f"{from_tag_arg}" + f"--build-arg DOCKER_REPO={DOCKER_REPO} " # A hack to invalidate cache, grep for it in docker/ dir f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " f"--tag {image.repo}:{version_string} " @@ -383,7 +390,6 @@ def parse_args() -> argparse.Namespace: default=argparse.SUPPRESS, help="don't push images to docker hub", ) - return parser.parse_args() @@ -398,10 +404,11 @@ def main(): changed_json = os.path.join(TEMP_PATH, f"changed_images_{args.suffix}.json") else: changed_json = os.path.join(TEMP_PATH, "changed_images.json") - if args.push: subprocess.check_output( # pylint: disable=unexpected-keyword-arg - "docker login --username 'robotclickhouse' --password-stdin", + "docker login {} --username '{}' --password-stdin".format( + DOCKER_REPO, DOCKER_USER + ), input=get_parameter_from_ssm("dockerhub_robot_password"), encoding="utf-8", shell=True, @@ -425,7 +432,7 @@ def main(): # If the event does not contain diff, nothing will be built pass - changed_images = get_changed_docker_images(pr_info, images_dict) + changed_images = get_changed_docker_images(pr_info, images_dict, DOCKER_REPO) if changed_images: logging.info( "Has changed images: %s", ", ".join([im.path for im in changed_images]) diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index 9a77a91647ee..7e0c7e038f3f 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -11,7 +11,7 @@ from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from commit_status_helper import post_commit_status -from env_helper import RUNNER_TEMP +from env_helper import RUNNER_TEMP, DOCKER_USER, DOCKER_REPO from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo from report import TestResults, TestResult @@ -61,7 +61,6 @@ def parse_args() -> argparse.Namespace: default=argparse.SUPPRESS, help="don't push images to docker hub", ) - args = parser.parse_args() if len(args.suffixes) < 2: parser.error("more than two --suffix should be given") @@ -174,7 +173,9 @@ def main(): args = parse_args() if args.push: subprocess.check_output( # pylint: disable=unexpected-keyword-arg - "docker login --username 'robotclickhouse' --password-stdin", + "docker login {} --username '{}' --password-stdin".format( + DOCKER_REPO, DOCKER_USER + ), input=get_parameter_from_ssm("dockerhub_robot_password"), encoding="utf-8", shell=True, diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index 9d68f4364391..8ccfa471871c 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -5,7 +5,7 @@ from unittest.mock import patch, MagicMock from pathlib import Path -from env_helper import GITHUB_RUN_URL +from env_helper import GITHUB_RUN_URL, DOCKER_REPO from pr_info import PRInfo from report import TestResult import docker_images_check as di @@ -32,7 +32,9 @@ def test_get_changed_docker_images(self): images = sorted( list( di.get_changed_docker_images( - pr_info, di.get_images_dict("/", self.docker_images_path) + pr_info, + di.get_images_dict("/", self.docker_images_path), + DOCKER_REPO, ) ) ) @@ -129,6 +131,7 @@ def test_build_and_push_one_image(self, mock_machine, mock_popen): self.assertIn( f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg FROM_TAG=version " + f"--build-arg DOCKER_REPO={DOCKER_REPO} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version --cache-from type=registry,ref=name:version " "--cache-from type=registry,ref=name:latest " @@ -146,6 +149,7 @@ def test_build_and_push_one_image(self, mock_machine, mock_popen): self.assertIn( f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg FROM_TAG=version2 " + f"--build-arg DOCKER_REPO={DOCKER_REPO} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " @@ -162,6 +166,7 @@ def test_build_and_push_one_image(self, mock_machine, mock_popen): mock_machine.assert_not_called() self.assertIn( f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " + f"--build-arg DOCKER_REPO={DOCKER_REPO} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " @@ -180,6 +185,7 @@ def test_build_and_push_one_image(self, mock_machine, mock_popen): mock_machine.assert_not_called() self.assertIn( f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " + f"--build-arg DOCKER_REPO={DOCKER_REPO} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index a5a4913be0b4..1f45f5d337c6 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -31,6 +31,15 @@ "{pr_or_release}/{commit}/{build_name}/{artifact}" ) +DOCKER_REPO = os.getenv("DOCKER_REPO", "docker.io") +DOCKER_USER = os.getenv("DOCKER_USER", "robotclickhouse") +S3_REGION = os.getenv("S3_REGION", "us-east-1") +S3_ENDPOINT = os.getenv("S3_ENDPOINT", "https://s3.amazonaws.com") +VAULT_PATH = os.getenv("VAULT_PATH") +VAULT_TOKEN = os.getenv("VAULT_TOKEN") +VAULT_URL = os.getenv("VAULT_URL") +VAULT_MOUNT_POINT = os.getenv("VAULT_MOUNT_POINT", "secret") + # These parameters are set only on demand, and only once _GITHUB_JOB_ID = "" _GITHUB_JOB_URL = "" diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py index 6ecaf468ed15..1ea4b6654eb3 100644 --- a/tests/ci/get_robot_token.py +++ b/tests/ci/get_robot_token.py @@ -3,6 +3,8 @@ from dataclasses import dataclass import boto3 # type: ignore +import hvac # type: ignore # pylint: disable=import-error +from env_helper import VAULT_URL, VAULT_TOKEN, VAULT_PATH, VAULT_MOUNT_POINT from github import Github from github.AuthenticatedUser import AuthenticatedUser @@ -15,18 +17,44 @@ class Token: def get_parameter_from_ssm(name, decrypt=True, client=None): - if not client: - client = boto3.client("ssm", region_name="us-east-1") - return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"] + if VAULT_URL: + if not client: + client = hvac.Client(url=VAULT_URL, token=VAULT_TOKEN) + parameter = client.secrets.kv.v2.read_secret_version( + mount_point=VAULT_MOUNT_POINT, path=VAULT_PATH + )["data"]["data"][name] + else: + if not client: + client = boto3.client("ssm", region_name="us-east-1") + parameter = client.get_parameter(Name=name, WithDecryption=decrypt)[ + "Parameter" + ]["Value"] + return parameter def get_best_robot_token(token_prefix_env_name="github_robot_token_"): - client = boto3.client("ssm", region_name="us-east-1") - parameters = client.describe_parameters( - ParameterFilters=[ - {"Key": "Name", "Option": "BeginsWith", "Values": [token_prefix_env_name]} + client = None + if VAULT_URL: + client = hvac.Client(url=VAULT_URL, token=VAULT_TOKEN) + response = client.secrets.kv.read_secret_version( + path=VAULT_PATH, mount_point=VAULT_MOUNT_POINT + ) + parameters = [ + {"Name": p} + for p in response["data"]["data"] + if p.startswith(token_prefix_env_name) ] - )["Parameters"] + else: + client = boto3.client("ssm", region_name="us-east-1") + parameters = client.describe_parameters( + ParameterFilters=[ + { + "Key": "Name", + "Option": "BeginsWith", + "Values": [token_prefix_env_name], + } + ] + )["Parameters"] assert parameters token = None diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 7119f4437199..994208774c90 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -21,6 +21,7 @@ TRUSTED_ORG_IDS = { 54801242, # clickhouse + 96197510, # ClicbMouse } OK_SKIP_LABELS = {"release", "pr-backport", "pr-cherrypick"} diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index fbe9f33b49bb..2ddd5c6f6521 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -16,6 +16,8 @@ CI, S3_URL, S3_DOWNLOAD, + S3_REGION, + S3_ENDPOINT, ) from compress_files import compress_file_fast @@ -40,9 +42,10 @@ def _flatten_list(lst): class S3Helper: - def __init__(self, host=S3_URL, download_host=S3_DOWNLOAD): - self.session = boto3.session.Session(region_name="us-east-1") - self.client = self.session.client("s3", endpoint_url=host) + def __init__(self, host=S3_URL, download_host=S3_DOWNLOAD, endpoint=S3_ENDPOINT): + self.session = boto3.session.Session(region_name=S3_REGION) + self.client = self.session.client("s3", endpoint_url=endpoint) + self.endpoint = endpoint self.host = host self.download_host = download_host @@ -107,8 +110,13 @@ def _upload_file_to_s3(self, bucket_name: str, file_path: str, s3_path: str) -> logging.info("Upload %s to %s. Meta: %s", file_path, s3_path, metadata) # last two replacements are specifics of AWS urls: # https://jamesd3142.wordpress.com/2018/02/28/amazon-s3-and-the-plus-symbol/ - url = f"{self.download_host}/{bucket_name}/{s3_path}" - return url.replace("+", "%2B").replace(" ", "%20") + return ( + "{host}/{bucket}/{path}".format( + host=self.endpoint, bucket=bucket_name, path=s3_path + ) + .replace("+", "%2B") + .replace(" ", "%20") + ) def upload_test_report_to_s3(self, file_path: str, s3_path: str) -> str: if CI: @@ -179,7 +187,9 @@ def upload_task(file_path): t = time.time() except Exception as ex: logging.critical("Failed to upload file, expcetion %s", ex) - return f"{self.download_host}/{bucket_name}/{s3_path}" + return "{host}/{bucket}/{path}".format( + host=self.endpoint, bucket=bucket_name, path=s3_path + ) p = Pool(256) diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index a4a5a013c36f..712618eae4c8 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -123,6 +123,19 @@ "tonickkozlov", # Cloudflare "tylerhannan", # ClickHouse Employee "myrrc", # Mike Kot, DoubleCloud + "ch-devops", + "larryluogit", + "bkuschel", + "SadiHassan", + "kashwy", + "HeenaBansal2009", + "umang8223", + "HarryLeeIBM", + "ltrk2", + "MeenaRenganathan22", + "mcmajam", + "bemitc", + "vibhaKulka", ] } diff --git a/tests/queries/0_stateless/02366_kql_count.reference b/tests/queries/0_stateless/02366_kql_count.reference new file mode 100644 index 000000000000..dde58001af97 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_count.reference @@ -0,0 +1,5 @@ +6 +4 +2 +2 +4 diff --git a/tests/queries/0_stateless/02366_kql_count.sql b/tests/queries/0_stateless/02366_kql_count.sql new file mode 100644 index 000000000000..2d630316d6b0 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_count.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect = 'kusto'; + +Customers | count; +Customers | where Age< 30 | count; +Customers | where Age< 30 | limit 2 | count; +Customers | where Age< 30 | limit 2 | count | project Count; +Customers |project FirstName|where FirstName != 'Peter'|sort by FirstName asc nulls first|count; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_create_table.reference b/tests/queries/0_stateless/02366_kql_create_table.reference new file mode 100644 index 000000000000..35136b5ff425 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_create_table.reference @@ -0,0 +1,4 @@ +-- test create table -- +Theodore +Diaz +Theodore Diaz 28 diff --git a/tests/queries/0_stateless/02366_kql_create_table.sql b/tests/queries/0_stateless/02366_kql_create_table.sql new file mode 100644 index 000000000000..b266679b06aa --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_create_table.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); +Select '-- test create table --' ; +Select * from kql(Customers|project FirstName) limit 1;; +DROP TABLE IF EXISTS kql_table1; +CREATE TABLE kql_table1 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName | filter LastName=='Diaz'); +select LastName from kql_table1 limit 1; +DROP TABLE IF EXISTS kql_table2; +CREATE TABLE kql_table2 +( + FirstName Nullable(String), + LastName String, + Age Nullable(UInt8) +) ENGINE = Memory; +INSERT INTO kql_table2 select * from kql(Customers|project FirstName,LastName,Age | filter FirstName=='Theodore'); +select * from kql_table2 limit 1; +-- select * from kql(Customers | where FirstName !in ("test", "test2")); +DROP TABLE IF EXISTS Customers; +DROP TABLE IF EXISTS kql_table1; +DROP TABLE IF EXISTS kql_table2; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_datatype.reference b/tests/queries/0_stateless/02366_kql_datatype.reference new file mode 100644 index 000000000000..12a9357ad4c9 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_datatype.reference @@ -0,0 +1,143 @@ +-- bool +true +\N +-- int +123 +\N +-2147483648 +2147483647 +-- long +123 +255 +-1 +\N +-9223372036854775808 +9223372036854775807 +456 +-- real +0.01 +\N +nan +inf +-inf +-- datetime +2015-12-31 23:59:59.900000000 +2015-12-31 00:00:00.000000000 +2014-05-25 08:20:03.123456000 +2014-11-08 15:55:55.000000000 +2014-11-08 15:55:00.000000000 +2014-11-08 00:00:00.000000000 +\N +2014-05-25 08:20:03.123456000 +2014-11-08 15:55:55.123456000 +31536000000000000 +1970-05-11 13:45:07.456345700 +-- time +\N +93783000000000 +93783123000000 +-93783123000000 +93783000000000 +93780000000000 +7380000000000 +7384000000000 +7384567890100 +1216984123450000 +45055123000000 +86400000000000 +-86400000000000 +0 +600 +172800000000000 +259200000000000 +-- timespan (time) +\N +172800000000000 +5400000000000 +1800000000000 +10000000000 +100000000 +100000000 +10000 +100 +3 +1120343 +129600000000000 +false +true +864000000000 +864000000000 +1.00:00:00 +2.04:08:16.1234567 +331.08:12:40 +165.16:06:20 +-1.01:01:01.1234567 +864000000000 +-- guid +\N +-- null +1 +\N \N \N \N \N +-- decimal +\N +123.345 +100000 +-- dynamic +\N +1 +86400000000000 +[1,2,3] +[[1],[2],[3]] +['a','b','c'] +-- cast functions +true +1 +-- tobool("false") +false +1 +-- tobool(1) +true +1 +-- tobool(123) +true +1 +-- tobool("abc") +\N +\N +-- todouble() +123.4 +\N +-- toreal() +123.4 +\N +-- toint() +1 +\N +-- tostring() +123 + +-- todatetime() +1 +1 +1 +-- totimespan() +\N +100 +60000000000 +\N +1120343 +1120343 +59400000000000 +\N +-- tolong() +123 +\N +-- todecimal() +123.345 +\N +\N +100000 +0.00001 +123.561 +653.4 +9999999999999999999999999999999999 diff --git a/tests/queries/0_stateless/02366_kql_datatype.sql b/tests/queries/0_stateless/02366_kql_datatype.sql new file mode 100644 index 000000000000..ad2a8c5e8798 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_datatype.sql @@ -0,0 +1,189 @@ + +DROP TABLE IF EXISTS tb1; +create table tb1 ( +str String +)ENGINE = Memory; +INSERT INTO tb1 VALUES ('123.561') , ('653.4'); + +-- datatable(s:string, i:long) [ +-- '0', 0, +-- '1899', 1899, +-- '1900', 1900, +-- '2261', 2261, +-- '2262', 2262, +-- '10000', 10000 +-- ] + +drop table if exists datetime_test; +create table datetime_test(s String, i Int64) engine = Memory; +insert into datetime_test values ('0', 0), ('1899', 1899), ('1900', 1900), ('2261', 2261), ('2262', 2262), ('10000', 10000); + +set dialect = 'kusto'; +print '-- bool'; +print bool(true); +print bool(null); +print bool('false'); -- { clientError BAD_ARGUMENTS } +print '-- int'; +print int(123); +print int(null); +print int(-2147483648); +print int(2147483647); +print int('4'); -- { clientError BAD_ARGUMENTS } +print int(-2147483649); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print int(2147483648); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print '-- long'; +print long(123); +print long(0xff); +print long(-1); +print long(null); +print long(-9223372036854775808); +print long(9223372036854775807); +print 456; +-- print long(-9223372036854775809); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print long(9223372036854775808); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print long('9023'); -- { clientError BAD_ARGUMENTS } +print '-- real'; +print real(0.01); +print real(null); +print real(nan); +print real(+inf); +print real(-inf); +print double('4.2'); -- { clientError BAD_ARGUMENTS } +print '-- datetime'; +print datetime(2015-12-31 23:59:59.9); +print datetime(2015-12-31); +print datetime('2014-05-25T08:20:03.123456'); +print datetime('2014-11-08 15:55:55'); +print datetime('2014-11-08 15:55'); +print datetime('2014-11-08'); +print datetime(null); +print datetime('2014-05-25T08:20:03.123456Z'); +print datetime('2014-11-08 15:55:55.123456Z'); +print datetime('2022') - datetime('2021'); +print datetime('1970-05-11 13:45:07.456345672'); +print '-- time'; +print time(null); +print time(1.2:3:3); +print time(1.2:3:3.123); +print time(-1.2:3:3.123); +print time(001.02:03:03); +print time(001.02:03); +print time(02:03); +print time(02:03:04); +print time(02:03:04.5678901); +print time(24:03:04.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:60:04.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:03:60.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:-03:04.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:03:-04.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:03:04.-5678901); -- { clientError BAD_ARGUMENTS } +print time(1.-02:03:04.5678901); -- { clientError BAD_ARGUMENTS } +print time(1.23); -- { clientError BAD_ARGUMENTS } +print time(02:03:04.56789012); -- { clientError BAD_ARGUMENTS } +print time(03:04.56789012); -- { clientError BAD_ARGUMENTS } +print time('14.02:03:04.12345'); +print time('12:30:55.123'); +print time(1d); +print time(-1d); +print time(6nanoseconds); +print time(6tick); +print time(2); +print time(2) + 1d; +print '-- timespan (time)'; +print timespan(null); +print timespan(2d); -- 2 days +print timespan(1.5h); -- 1.5 hour +print timespan(30m); -- 30 minutes +print timespan(10s); -- 10 seconds +print timespan(0.1s); -- 0.1 second +print timespan(100ms); -- 100 millisecond +print timespan(10microsecond); -- 10 microseconds +print timespan(1tick); -- 100 nanoseconds +print timespan(1.5h) / timespan(30m); +print timespan('12.23:12:23') / timespan(1s); +print (timespan(1.5d) / timespan(0.6d)) * timespan(0.6d); +print tobool(timespan(0s)); +print tobool(timespan(1d)); +print todouble(timespan(1d)); +-- print toint(timespan(1d)); -> 711573504 +print tolong(timespan(1d)); +print tostring(timespan(1d)); +print tostring(timespan(2d) + timespan(4h) + timespan(8m) + timespan(16s) + timespan(123millis) + timespan(456micros) + timespan(789nanos)); +print tostring((1h + 90d) * 2 + (6h + 32s + 30d + 2m) * 5); +print tostring(((1h + 90d) * 2 + (6h + 32s + 30d + 2m) * 5) / 2); +print tostring(-timespan(1d) - timespan(1h) - timespan(1m) - timespan(1s) - timespan(123456789nanos)); +print todecimal(timespan(1d)); +print 49h + (1h + 1m) * 999999h + 1s; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print 1h * 1h; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print 2h + 2; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print 2h - 2; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- guid' +print guid(74be27de-1e4e-49d9-b579-fe0b331d3642); +print guid(null); +print '-- null'; +print isnull(null); +print bool(null), int(null), long(null), real(null), double(null); +print '-- decimal'; +print decimal(null); +print decimal(123.345); +print decimal(1e5); +print '-- dynamic'; -- no support for mixed types and bags for now +print dynamic(null); +print dynamic(1); +print dynamic(timespan(1d)); +print dynamic([1,2,3]); +print dynamic([[1], [2], [3]]); +print dynamic(['a', "b", 'c']); +print '-- cast functions' +print '--tobool("true")'; -- == true +print tobool('true'); -- == true +print tobool('true') == toboolean('true'); -- == true +print '-- tobool("false")'; -- == false +print tobool('false'); -- == false +print tobool('false') == toboolean('false'); -- == false +print '-- tobool(1)'; -- == true +print tobool(1); -- == true +print tobool(1) == toboolean(1); -- == true +print '-- tobool(123)'; -- == true +print tobool(123); -- == true +print tobool(123) == toboolean(123); -- == true +print '-- tobool("abc")'; -- == null +print tobool('abc'); -- == null +print tobool('abc') == toboolean('abc'); -- == null +print '-- todouble()'; +print todouble('123.4'); +print todouble('abc') == null; +print '-- toreal()'; +print toreal("123.4"); +print toreal('abc') == null; +print '-- toint()'; +print toint("123") == int(123); +print toint('abc'); +print '-- tostring()'; +print tostring(123); +print tostring(null); +print '-- todatetime()'; +print todatetime("2015-12-24") == datetime(2015-12-24); +print isnull(todatetime('abc')); +print todatetime('1970-05-11 13:45:07.456345672') == datetime('1970-05-11 13:45:07.456345672'); +print '-- totimespan()'; +print totimespan(null); +print totimespan(1tick); +print totimespan('0.00:01:00'); +print totimespan('abc'); +print totimespan('12.23:12:23') / totimespan(1s); +print totimespan(strcat('12.', '23', ':12:', '23')) / timespan(1s); +print totimespan(timespan(16:30)); +print totimespan("'asdadsasd"); +print '-- tolong()'; +print tolong('123'); +print tolong('abc'); +print '-- todecimal()'; +print todecimal(123.345); +print todecimal(null); +print todecimal('abc'); +print todecimal(1e5); +print todecimal(1e-5); +tb1 | project todecimal(str); +print todecimal('9999999999999999999999999999999999'); +-- print todecimal(4 * 2 + 3); -> 11 diff --git a/tests/queries/0_stateless/02366_kql_distinct.reference b/tests/queries/0_stateless/02366_kql_distinct.reference new file mode 100644 index 000000000000..74035603adfc --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_distinct.reference @@ -0,0 +1,30 @@ +-- distinct * -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +\N why Professional Partial College 38 +-- distinct one column -- +Skilled Manual +Management abcd defg +Professional +-- distinct two column -- +Skilled Manual Bachelors +Management abcd defg Bachelors +Skilled Manual Graduate Degree +Professional Graduate Degree +Professional Partial College +-- distinct with where -- +Skilled Manual Bachelors +Skilled Manual Graduate Degree +Professional Graduate Degree +-- distinct with where, order -- +Skilled Manual Bachelors +Skilled Manual Graduate Degree +Professional Graduate Degree +-- distinct with alias -- +8 +3 +6 +5 diff --git a/tests/queries/0_stateless/02366_kql_distinct.sql b/tests/queries/0_stateless/02366_kql_distinct.sql new file mode 100644 index 000000000000..04ef94b0e416 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_distinct.sql @@ -0,0 +1,31 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect = 'kusto'; + +print '-- distinct * --'; +Customers | distinct *; + +print '-- distinct one column --'; +Customers | distinct Occupation; + +print '-- distinct two column --'; +Customers | distinct Occupation, Education; + +print '-- distinct with where --'; +Customers | where Age <30 | distinct Occupation, Education; + +print '-- distinct with where, order --'; +Customers | where Age <30 | order by Age| distinct Occupation, Education; + +print '-- distinct with alias --'; +Customers | project a = (Age % 10) | distinct a; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_extend.reference b/tests/queries/0_stateless/02366_kql_extend.reference new file mode 100644 index 000000000000..ea841b6fb2ec --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_extend.reference @@ -0,0 +1,32 @@ +-- extend #1 -- +Aldi Apple 4 2016-09-10 400 +Costco Apple 2 2016-09-11 200 +-- extend #2 -- +Apple 200 +Apple 400 +-- extend #3 -- +Apple cost 480 on average based on 5 samples. +Snargaluff cost 28080 on average based on 5 samples. +-- extend #4 -- +1 +-- extend #5 -- +Aldi Apple 4 2016-09-10 Apple was purchased from Aldi for $4 on 2016-09-10T00:00:00.0000000Z 400 +Costco Apple 2 2016-09-11 Apple was purchased from Costco for $2 on 2016-09-11T00:00:00.0000000Z 200 +-- extend #6 -- +Aldi Apple 2016-09-10 400 +Costco Apple 2016-09-11 200 +Aldi Apple 2016-09-10 600 +Costco Snargaluff 2016-09-12 10000 +Aldi Apple 2016-09-12 700 +Aldi Snargaluff 2016-09-11 40000 +Costco Snargaluff 2016-09-12 10400 +Aldi Apple 2016-09-12 500 +Aldi Snargaluff 2016-09-11 60000 +Costco Snargaluff 2016-09-10 20000 +-- extend #7 -- +5 +-- extend #8 -- +-- extend #9 -- +-- extend #10 -- +-- extend #11 -- +5 [2,1] diff --git a/tests/queries/0_stateless/02366_kql_extend.sql b/tests/queries/0_stateless/02366_kql_extend.sql new file mode 100644 index 000000000000..9325a7662405 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_extend.sql @@ -0,0 +1,58 @@ +-- datatable(Supplier:string, Fruit:string, Price: real, Purchase:datetime) +-- [ +-- 'Aldi','Apple',4,'2016-09-10', +-- 'Costco','Apple',2,'2016-09-11', +-- 'Aldi','Apple',6,'2016-09-10', +-- 'Costco','Snargaluff',100,'2016-09-12', +-- 'Aldi','Apple',7,'2016-09-12', +-- 'Aldi','Snargaluff',400,'2016-09-11', +-- 'Costco','Snargaluff',104,'2016-09-12', +-- 'Aldi','Apple',5,'2016-09-12', +-- 'Aldi','Snargaluff',600,'2016-09-11', +-- 'Costco','Snargaluff',200,'2016-09-10', +-- ] + +DROP TABLE IF EXISTS Ledger; +CREATE TABLE Ledger +( + Supplier Nullable(String), + Fruit String , + Price Float64, + Purchase Date +) ENGINE = Memory; +INSERT INTO Ledger VALUES ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10'); + +set dialect = 'kusto'; + +print '-- extend #1 --'; +Ledger | extend PriceInCents = 100 * Price | take 2; + +print '-- extend #2 --'; +Ledger | extend PriceInCents = 100 * Price | sort by PriceInCents asc | project Fruit, PriceInCents | take 2; + +print '-- extend #3 --'; +Ledger | extend PriceInCents = 100 * Price | sort by PriceInCents asc | project Fruit, PriceInCents | summarize AveragePrice = avg(PriceInCents), Purchases = count() by Fruit | extend Sentence = strcat(Fruit, ' cost ', tostring(AveragePrice), ' on average based on ', tostring(Purchases), ' samples.') | project Sentence; + +print '-- extend #4 --'; +Ledger | extend a = Price | extend b = a | extend c = a, d = b + 500 | extend Pass = bool(b == a and c == a and d == b + 500) | summarize binary_all_and(Pass); + +print '-- extend #5 --'; +Ledger | take 2 | extend strcat(Fruit, ' was purchased from ', Supplier, ' for $', tostring(Price), ' on ', tostring(Purchase)) | extend PriceInCents = 100 * Price; + +print '-- extend #6 --'; +Ledger | extend Price = 100 * Price; + +print '-- extend #7 --'; +print a = 4 | extend a = 5; + +print '-- extend #8 --'; +-- print x = 5 | extend array_sort_desc(range(0, x), range(1, x + 1)) + +print '-- extend #9 --'; +print x = 19 | extend = 4 + ; -- { clientError SYNTAX_ERROR } + +print '-- extend #10 --'; +Ledger | extend PriceInCents = * Price | sort by PriceInCents asc | project Fruit, PriceInCents | summarize AveragePrice = avg(PriceInCents), Purchases = count() by Fruit | extend Sentence = strcat(Fruit, ' cost ', tostring(AveragePrice), ' on average based on ', tostring(Purchases), ' samples.') | project Sentence; -- { clientError SYNTAX_ERROR } + +print '-- extend #11 --'; +print x = 5 | extend ex = array_sort_desc(dynamic([1, 2]), dynamic([3, 4])); diff --git a/tests/queries/0_stateless/02366_kql_func_binary.reference b/tests/queries/0_stateless/02366_kql_func_binary.reference new file mode 100644 index 000000000000..6276cd6d8675 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_binary.reference @@ -0,0 +1,7 @@ + -- binary functions +4 7 +1 +1 +1 +7 3 +1 diff --git a/tests/queries/0_stateless/02366_kql_func_binary.sql b/tests/queries/0_stateless/02366_kql_func_binary.sql new file mode 100644 index 000000000000..824022b564ce --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_binary.sql @@ -0,0 +1,8 @@ +set dialect='kusto'; +print ' -- binary functions'; +print binary_and(4,7), binary_or(4,7); +print binary_shift_left(1, 1) == binary_shift_left(1, 65); +print binary_shift_right(2, 1) == binary_shift_right(2, 65); +print binary_shift_right(binary_shift_left(1, 65), 65) == 1; +print binary_xor(2, 5), bitset_count_ones(42); +print bitset_count_ones(binary_shift_left(binary_and(4,7), 1)); diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.reference b/tests/queries/0_stateless/02366_kql_func_datetime.reference new file mode 100644 index 000000000000..d0a36d79e74c --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_datetime.reference @@ -0,0 +1,90 @@ +-- dayofmonth() +31 +-- dayofweek() +345600000000000 +172800000000000 +345600000000000 +-- dayofyear() +365 +-- getmonth() +10 +-- getyear() +2015 +-- hoursofday() +23 +-- startofday() +2017-01-01 00:00:00.000000000 +2016-12-31 00:00:00.000000000 +2017-01-02 00:00:00.000000000 +-- endofday() +2017-01-01 23:59:59.999999900 +2016-12-31 23:59:59.999999900 +2017-01-02 23:59:59.999999900 +-- endofmonth() +2017-01-31 23:59:59.999999900 +2016-12-31 23:59:59.999999900 +2017-02-28 23:59:59.999999900 +2022-09-30 23:59:59.999999900 +-- startofweek() +2017-01-01 00:00:00.000000000 +2016-12-25 00:00:00.000000000 +2017-01-08 00:00:00.000000000 +-- endofweek() +2017-01-07 23:59:59.999999900 +2016-12-31 23:59:59.999999900 +2017-01-14 23:59:59.999999900 +-- startofyear() +2017-01-01 00:00:00.000000000 +2016-01-01 00:00:00.000000000 +2018-01-01 00:00:00.000000000 +-- endofyear() +2017-12-31 23:59:59.999999900 +2016-12-31 23:59:59.999999900 +2018-12-31 23:59:59.999999900 +-- unixtime_seconds_todatetime() +2019-01-01 00:00:00.000000000 +1970-01-02 00:00:00.000000000 +1969-12-31 00:00:00.000000000 +-- unixtime_microseconds_todatetime +2019-01-01 00:00:00.000000000 +-- unixtime_milliseconds_todatetime() +2019-01-01 00:00:00.000000000 +-- unixtime_nanoseconds_todatetime() +2019-01-01 00:00:00.000000000 +-- weekofyear() +52 +-- monthofyear() +12 +-- weekofyear() +52 +-- now() +1 +-- make_datetime() +2017-10-01 12:10:00.000000000 +\N +\N +\N +2017-10-01 12:10:00.000000000 +2017-10-01 12:11:00.123456700 +-- format_datetime +15-12-14 02:03:04.1234500 +17-01-29 [09:00:05] +2017-01-29 [09:00:05] +17-01-29 [09:00:05 AM] +-- format_timespan() +2:3:4.1234500 +29.09:00:05:12 +029.9:00:05 [1234500] +05/5-29:29,029.29_9[12]121234512 09 0 00 +-- make_timespan() +4320000000000 +4350000000000 +131455123000000 +-- ago() +-86400000000000 +-- datetime_diff() +17 2 13 4 29 2 5 10 +-- datetime_part() +2017 4 10 44 30 303 01 02 03 +-- datetime_add() +2018-01-01 00:00:00.000000000 2017-04-01 00:00:00.000000000 2017-02-01 00:00:00.000000000 2017-01-08 00:00:00.000000000 2017-01-02 00:00:00.000000000 2017-01-01 01:00:00.000000000 2017-01-01 00:01:00.000000000 2017-01-01 00:00:01.000000000 diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.sql b/tests/queries/0_stateless/02366_kql_func_datetime.sql new file mode 100644 index 000000000000..554a3c408a94 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_datetime.sql @@ -0,0 +1,98 @@ +set dialect = 'kusto'; + +print '-- dayofmonth()'; +print dayofmonth(datetime(2015-12-31)); +print '-- dayofweek()'; +print dayofweek(datetime(2015-12-31)); +print dayofweek(datetime(2015-12-14 18:54:00)) + 1d; +print dayofweek(datetime(2015-12-18 18:54:00)) - dayofweek(datetime(2015-12-14 18:54:00)); +print '-- dayofyear()'; +print dayofyear(datetime(2015-12-31)); +print '-- getmonth()'; +print getmonth(datetime(2015-10-12)); +print '-- getyear()'; +print getyear(datetime(2015-10-12)); +print '-- hoursofday()'; +print hourofday(datetime(2015-12-31 23:59:59.9)); +print '-- startofday()'; +print startofday(datetime(2017-01-01 10:10:17)); +print startofday(datetime(2017-01-01 10:10:17), -1); +print startofday(datetime(2017-01-01 10:10:17), 1); +print '-- endofday()'; +print endofday(datetime(2017-01-01 10:10:17)); +print endofday(datetime(2017-01-01 10:10:17), -1); +print endofday(datetime(2017-01-01 10:10:17), 1); +print '-- endofmonth()'; +print endofmonth(datetime(2017-01-01 10:10:17)); +print endofmonth(datetime(2017-01-01 10:10:17), -1); +print endofmonth(datetime(2017-01-01 10:10:17), 1); +print endofmonth(datetime(2022-09-23)); +print '-- startofweek()'; +print startofweek(datetime(2017-01-01 10:10:17)); +print startofweek(datetime(2017-01-01 10:10:17), -1); +print startofweek(datetime(2017-01-01 10:10:17), 1); +print '-- endofweek()'; +print endofweek(datetime(2017-01-01 10:10:17)); +print endofweek(datetime(2017-01-01 10:10:17), -1); +print endofweek(datetime(2017-01-01 10:10:17), 1); +print '-- startofyear()'; +print startofyear(datetime(2017-01-01 10:10:17)); +print startofyear(datetime(2017-01-01 10:10:17), -1); +print startofyear(datetime(2017-01-01 10:10:17), 1); +print '-- endofyear()'; +print endofyear(datetime(2017-01-01 10:10:17)); +print endofyear(datetime(2017-01-01 10:10:17), -1); +print endofyear(datetime(2017-01-01 10:10:17), 1); +print '-- unixtime_seconds_todatetime()'; +print unixtime_seconds_todatetime(1546300800); +print unixtime_seconds_todatetime(86400); +print unixtime_seconds_todatetime(-86400); +print '-- unixtime_microseconds_todatetime'; +print unixtime_microseconds_todatetime(1546300800000000); +print '-- unixtime_milliseconds_todatetime()'; +print unixtime_milliseconds_todatetime(1546300800000); +print '-- unixtime_nanoseconds_todatetime()'; +print unixtime_nanoseconds_todatetime(1546300800000000000); +print '-- weekofyear()'; +print week_of_year(datetime(2000-01-01)); +print '-- monthofyear()'; +print monthofyear(datetime(2015-12-31)); +print '-- weekofyear()'; +print week_of_year(datetime(2000-01-01)); +print '-- now()'; +print getyear(now(-2d))>1900; +print '-- make_datetime()'; +print make_datetime(2017,10,01,12,10); +print make_datetime(2300,10,01,12,10); +print make_datetime(2020,14,30,12,10); +print make_datetime(2020,10,35,12,10); +print year_month_day_hour_minute = make_datetime(2017,10,01,12,10); +print year_month_day_hour_minute_second = make_datetime(2017,10,01,12,11,0.1234567); +print '-- format_datetime'; +print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s.fffffff'); +print format_datetime(datetime(2017-01-29 09:00:05),'yy-MM-dd [HH:mm:ss]'); +print format_datetime(datetime(2017-01-29 09:00:05), 'yyyy-M-dd [H:mm:ss]'); +print format_datetime(datetime(2017-01-29 09:00:05), 'yy-MM-dd [hh:mm:ss tt]'); +print '-- format_timespan()'; +print format_timespan(time('14.02:03:04.12345'), 'h:m:s.fffffff'); +print format_timespan(time('29.09:00:05.12345'), 'dd.hh:mm:ss:FF'); +print format_timespan(time('29.09:00:05.12345'), 'ddd.h:mm:ss [fffffff]'); +print format_timespan(time('29.09:00:05.12345'), 'ss/s-d:dd,ddd.dd_h[ff]FFfffffFF HH m mm'); +print '-- make_timespan()'; +print make_timespan(1,12); +print make_timespan(1,12,30); +print make_timespan(1,12,30,55.123); +print '-- ago()'; +print ago(1d) - now(); +print '-- datetime_diff()'; +print year = datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31)), quarter = datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30)), month = datetime_diff('month',datetime(2017-01-01),datetime(2015-12-30)), week = datetime_diff('week',datetime(2017-10-29 00:00),datetime(2017-09-30 23:59)), day = datetime_diff('day',datetime(2017-10-29 00:00),datetime(2017-09-30 23:59)), hour = datetime_diff('hour',datetime(2017-10-31 01:00),datetime(2017-10-30 23:59)), minute = datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59)), second = datetime_diff('second',datetime(2017-10-30 23:00:10.100),datetime(2017-10-30 23:00:00.900)); +-- millisecond = datetime_diff('millisecond',datetime(2017-10-30 23:00:00.200100),datetime(2017-10-30 23:00:00.100900)), +-- microsecond = datetime_diff('microsecond',datetime(2017-10-30 23:00:00.1009001),datetime(2017-10-30 23:00:00.1008009)), +-- nanosecond = datetime_diff('nanosecond',datetime(2017-10-30 23:00:00.0000000),datetime(2017-10-30 23:00:00.0000007)) +print '-- datetime_part()'; +print year = datetime_part("year", datetime(2017-10-30 01:02:03.7654321)),quarter = datetime_part("quarter", datetime(2017-10-30 01:02:03.7654321)),month = datetime_part("month", datetime(2017-10-30 01:02:03.7654321)),weekOfYear = datetime_part("week_of_year", datetime(2017-10-30 01:02:03.7654321)),day = datetime_part("day", datetime(2017-10-30 01:02:03.7654321)),dayOfYear = datetime_part("dayOfYear", datetime(2017-10-30 01:02:03.7654321)),hour = datetime_part("hour", datetime(2017-10-30 01:02:03.7654321)),minute = datetime_part("minute", datetime(2017-10-30 01:02:03.7654321)),second = datetime_part("second", datetime(2017-10-30 01:02:03.7654321)); +-- millisecond = datetime_part("millisecond", dt), +-- microsecond = datetime_part("microsecond", dt), +-- nanosecond = datetime_part("nanosecond", dt) +print '-- datetime_add()'; +print year = datetime_add('year',1,make_datetime(2017,1,1)),quarter = datetime_add('quarter',1,make_datetime(2017,1,1)),month = datetime_add('month',1,make_datetime(2017,1,1)),week = datetime_add('week',1,make_datetime(2017,1,1)),day = datetime_add('day',1,make_datetime(2017,1,1)),hour = datetime_add('hour',1,make_datetime(2017,1,1)),minute = datetime_add('minute',1,make_datetime(2017,1,1)),second = datetime_add('second',1,make_datetime(2017,1,1)); \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_dynamic.reference b/tests/queries/0_stateless/02366_kql_func_dynamic.reference new file mode 100644 index 000000000000..2bee07871386 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_dynamic.reference @@ -0,0 +1,189 @@ +-- constant index value +1 c ['A',NULL,'C'] +-- array_length() +4 +3 +-- array_sum() +10 +11 +1 +\N +0 +4 +-- array_index_of() +3 +1 +-- array_iif() +[1,5,3] +[1,5,3] +[1,5,NULL] +[NULL,NULL,NULL] +[1,NULL] +['1','2',NULL,'2',NULL] +['1','2',NULL,'2',NULL] +['1','2',NULL,'2',NULL] +[1.1,999.99,3.3,999.99,5.5] +[90,3,90,NULL,90] +[1,4,5,8,9] +-- array_concat() +[1,2,3,4,5,6] +-- array_reverse() +[] +[1] +[4,3,2,1] +['example','an','is','this'] +-- array_rotate_left() +[] +[] +[] +[3,4,5,1,2] +[1,2,3,4,5] +[3,4,5,1,2] +[4,5,1,2,3] +[1,2,3,4,5] +[4,5,1,2,3] +-- array_rotate_right() +[] +[] +[] +[4,5,1,2,3] +[1,2,3,4,5] +[4,5,1,2,3] +[3,4,5,1,2] +[1,2,3,4,5] +[3,4,5,1,2] +-- array_shift_left() +[] +[] +[] +[3,4,5,NULL,NULL] +[NULL,NULL,1,2,3] +[3,4,5,-1,-1] +['c','',''] +-- array_shift_right() +[] +[] +[] +[3,4,5,NULL,NULL] +[NULL,NULL,1,2,3] +[3,4,5,-1,-1] +['c','',''] +-- array_slice() +[2,3] +[3,4] +-- array_split() +[[1],[2,3],[4,5]] +[[1,2],[3,4,5]] +[[1],[2,3],[4,5]] +[[1,2,3,4],[],[4,5]] +-- array_sort_asc() +(['a','c','c','d',NULL]) +([1,2,3,4]) +['a','b','c'] +(['p','q','r'],['hello','clickhouse','world']) +([NULL,'a','c','c','d']) +([NULL,'a','c','c','d']) +([NULL,NULL,NULL]) +[1,2,3,NULL,NULL] +['a','e','b','c','d'] +(['George','John','Paul','Ringo']) +(['blue','green','yellow',NULL,NULL]) +([NULL,NULL,'blue','green','yellow']) +-- array_sort_desc() +(['d','c','c','a',NULL]) +([4,3,2,1]) +['c','b','a'] +(['r','q','p'],['world','clickhouse','hello']) +([NULL,'d','c','c','a']) +([NULL,'d','c','c','a']) +([NULL,NULL,NULL]) +[3,2,1,NULL,NULL] +['d','c','b','e','a'] +(['Ringo','Paul','John','George']) +(['yellow','green','blue',NULL,NULL]) +([NULL,NULL,'yellow','green','blue']) +-- jaccard_index() +0.75 +0 +0 +nan +0 +0.75 +0.25 +-- pack_array() +1 2 4 [1,2,4] +['ab','0.0.0.42','4.2'] +-- repeat() +[] +[1,1,1] +['asd','asd','asd'] +[86400000000000,86400000000000,86400000000000] +[true,true,true] +[NULL] +[NULL] +-- set_difference() +[] +[] +[] +[] +[4,5,6] +[4] +[1,3] +[1,2,3] +['d','s'] +['Chewbacca','Han Solo'] +-- set_has_element() +0 +1 +0 +1 +0 +-- set_intersect() +[] +[1,2,3] +[1,2,3] +[] +[5] +[] +['a'] +['Darth Vader'] +-- set_union() +[] +[1,2,3] +[1,2,3,4,5,6] +[1,2,3,4] +[1,2,3,4,5] +[1,2,3] +['a','d','f','s'] +['Chewbacca','Darth Sidious','Darth Vader','Han Solo'] +-- zip() +[] +[[1,2],[3,4],[5,6]] +[['Darth','Vader','has a suit'],['Master','Yoda','doesn\'t have a suit']] +[[1,10],[2,20],[3,NULL]] +[[NULL,1],[NULL,2],[NULL,3]] +-- array_sort in table() +1 (['CA','Eng','FR','US'],[11,20,12,16],[100,200,300,500]) +2 (['Eng','FR','Gem','Japan'],[10,33,22,31],[210,310,410,510]) +3 (['CA','Eng','Gem','Japan'],[25,11,10,23],[120,0,110,130]) +-- array_sort in table() with condition +1 (['CA','Eng','FR','US'],[11,20,12,16],[100,200,300,500]) +2 (['Eng','FR','Gem','Japan'],[10,33,22,31],[210,310,410,510]) +3 (['CA','Eng','Gem','Japan'],[25,11,10,23],[120,0,110,130]) +-- array_sort as condition +-- array_sort with single alias +1 ['CA','Eng','FR','US'] +2 ['Eng','FR','Gem','Japan'] +3 ['CA','Eng','Gem','Japan'] +1 ['CA','Eng','FR','US'] +2 ['Eng','FR','Gem','Japan'] +3 ['CA','Eng','Gem','Japan'] +-- array_sort with partial alias +1 ['CA','Eng','FR','US'] [11,20,12,16] +2 ['Eng','FR','Gem','Japan'] [10,33,22,31] +3 ['CA','Eng','Gem','Japan'] [25,11,10,23] +-- array_sort with all alias +1 ['CA','Eng','FR','US'] [11,20,12,16] [100,200,300,500] +2 ['Eng','FR','Gem','Japan'] [10,33,22,31] [210,310,410,510] +3 ['CA','Eng','Gem','Japan'] [25,11,10,23] [120,0,110,130] +[[1,2],[1,2],[1,2],[1,2]] diff --git a/tests/queries/0_stateless/02366_kql_func_dynamic.sql b/tests/queries/0_stateless/02366_kql_func_dynamic.sql new file mode 100644 index 000000000000..25c6e16a56ba --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_dynamic.sql @@ -0,0 +1,197 @@ +DROP TABLE IF EXISTS array_test; +CREATE TABLE array_test (floats Array(Float64), + strings Array(String), + nullable_strings Array(Nullable(String)) + ) ENGINE=Memory; +INSERT INTO array_test VALUES([1.0, 2.5], ['a', 'c'], ['A', NULL, 'C']); +DROP TABLE IF EXISTS visit; +CREATE TABLE visit(pageid UInt8, ip_country Array(Nullable(String)), hit Array(Int64),duration Array(Int64)) ENGINE = Memory; +INSERT INTO visit VALUES (1,['CA', 'US','FR','Eng'], [11,16,12,20],[100,500,300,200]); +INSERT INTO visit VALUES (2,['Japan', 'Gem','FR','Eng'], [31,22,33,10],[510,410,310,210]); +INSERT INTO visit VALUES (3,['CA', 'Gem','Japan','Eng'], [25,10,23,11],[120,110,130,000]); +--INSERT INTO visit VALUES (4,['CA', 'Gem',null,'Eng'], [5,10,3,2],[220,320,310,150]); +--INSERT INTO visit VALUES (5,['FR', null,'US','Eng'], [16,12,23,10],[210,250,110,260]); +set dialect = 'kusto'; +print '-- constant index value'; +array_test | project floats[0], strings[1], nullable_strings; +print '-- array_length()'; +print array_length(dynamic(['John', 'Denver', 'Bob', 'Marley'])); +print array_length(dynamic([1, 2, 3])); +print array_length(42); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print array_length('a'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- print array_length(dynamic(42)); -> NULL +-- print array_length(dynamic('a')); -> NULL +print '-- array_sum()'; +print array_sum(dynamic([2, 5, 3])); +print array_sum(dynamic([2.5, 5.5, 3])); +print array_sum(dynamic([true, false, null])); +print array_sum(dynamic(['Alice', 'Bob'])); +print array_sum(dynamic([null, null, null])); +print array_sum(repeat(1, 4)); +print '-- array_index_of()'; +print array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley'); +print array_index_of(dynamic([1, 2, 3]), 2); +print '-- array_iif()'; +print array_iif(dynamic([true,false,true]), dynamic([1,2,3]), dynamic([4,5,6])); +print array_iif(dynamic([1,0,1]), dynamic([1,2,3]), dynamic([4,5,6])); +print array_iif(dynamic([true,false,true]), dynamic([1,2]), dynamic([4,5,6])); +print array_iif(dynamic(['a','b','c']), dynamic([1,2,3]), dynamic([4,5,6])); +print array_iif(dynamic([true,null]), dynamic([1, 2]), repeat(4, 2)); +print t = array_iif(dynamic([true, false, true, false, true]), dynamic(['1', '3']), '2'); +print t = array_iif(dynamic([10, 0, 5, 0, -4]), dynamic(['1', '3']), '2'); +print t = array_iif(dynamic([2.2, 0, 4.4, 0, 66.7]), dynamic(['1', '3']), '2'); +print t = array_iif(dynamic([true, false, true, false, true]), dynamic([1.1, 2.2, 3.3, 4.4, 5.5]), 999.99); +print t = array_iif(dynamic([true, false, true, false, true]), 90, dynamic([1, 3])); +print t = array_iif(dynamic([true, false, true, false, true]), dynamic([1, 3, 5, 7, 9]), dynamic([2, 4, 6, 8, 10])); +print '-- array_concat()'; +print array_concat(dynamic([1,2,3]),dynamic([4,5,6])); +print '-- array_reverse()'; +print array_reverse(dynamic([])); +print array_reverse(dynamic([1])); +print array_reverse(dynamic([1,2,3,4])); +print array_reverse(dynamic(["this", "is", "an", "example"])); +print '-- array_rotate_left()'; +print array_rotate_left(dynamic([]), 0); +print array_rotate_left(dynamic([]), 500); +print array_rotate_left(dynamic([]), -500); +print array_rotate_left(dynamic([1,2,3,4,5]), 2); +print array_rotate_left(dynamic([1,2,3,4,5]), 5); +print array_rotate_left(dynamic([1,2,3,4,5]), 7); +print array_rotate_left(dynamic([1,2,3,4,5]), -2); +print array_rotate_left(dynamic([1,2,3,4,5]), -5); +print array_rotate_left(dynamic([1,2,3,4,5]), -7); +print '-- array_rotate_right()'; +print array_rotate_right(dynamic([]), 0); +print array_rotate_right(dynamic([]), 500); +print array_rotate_right(dynamic([]), -500); +print array_rotate_right(dynamic([1,2,3,4,5]), 2); +print array_rotate_right(dynamic([1,2,3,4,5]), 5); +print array_rotate_right(dynamic([1,2,3,4,5]), 7); +print array_rotate_right(dynamic([1,2,3,4,5]), -2); +print array_rotate_right(dynamic([1,2,3,4,5]), -5); +print array_rotate_right(dynamic([1,2,3,4,5]), -7); +print '-- array_shift_left()'; +print array_shift_left(dynamic([]), 0); +print array_shift_left(dynamic([]), 555); +print array_shift_left(dynamic([]), -555); +print array_shift_left(dynamic([1,2,3,4,5]), 2); +print array_shift_left(dynamic([1,2,3,4,5]), -2); +print array_shift_left(dynamic([1,2,3,4,5]), 2, -1); +print array_shift_left(dynamic(['a', 'b', 'c']), 2); +print '-- array_shift_right()'; +print array_shift_left(dynamic([]), 0); +print array_shift_left(dynamic([]), 555); +print array_shift_left(dynamic([]), -555); +print array_shift_right(dynamic([1,2,3,4,5]), -2); +print array_shift_right(dynamic([1,2,3,4,5]), 2); +print array_shift_right(dynamic([1,2,3,4,5]), -2, -1); +print array_shift_right(dynamic(['a', 'b', 'c']), -2); +print '-- array_slice()'; +print array_slice(dynamic([1,2,3]), 1, 2); +print array_slice(dynamic([1,2,3,4,5]), -3, -2); +print '-- array_split()'; +print array_split(dynamic([1,2,3,4,5]), dynamic([1,-2])); +print array_split(dynamic([1,2,3,4,5]), 2); +print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])); +print array_split(dynamic([1,2,3,4,5]), dynamic([-1,-2])); +print '-- array_sort_asc()'; +print array_sort_asc(dynamic([null, 'd', 'a', 'c', 'c'])); +print array_sort_asc(dynamic([4, 1, 3, 2])); +print array_sort_asc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))[0]; +print array_sort_asc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world'])); +print array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , false); +print array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2); +print array_sort_asc( dynamic([null, null, null]) , false); +print array_sort_asc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50]), 1 < 2)[0]; +print array_sort_asc(dynamic(['1','3','4','5','2']),dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]))[3]; +print array_sort_asc(split("John,Paul,George,Ringo", ",")); +print array_sort_asc(dynamic([null,"blue","yellow","green",null])); +print array_sort_asc(dynamic([null,"blue","yellow","green",null]), false); +print '-- array_sort_desc()'; +print array_sort_desc(dynamic([null, 'd', 'a', 'c', 'c'])); +print array_sort_desc(dynamic([4, 1, 3, 2])); +print array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))[0]; +print array_sort_desc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world'])); +print array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , false); +print array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2); +print array_sort_desc( dynamic([null, null, null]) , false); +print array_sort_desc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50]), 1 < 2)[0]; +print array_sort_desc(dynamic(['1','3','4','5','2']),dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]))[3]; +print array_sort_desc(split("John,Paul,George,Ringo", ",")); +print array_sort_desc(dynamic([null,"blue","yellow","green",null])); +print array_sort_desc(dynamic([null,"blue","yellow","green",null]), false); +print '-- jaccard_index()'; +print jaccard_index(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3, 4, 4, 4])); +print jaccard_index(dynamic([1, 2, 3]), dynamic([])); +print jaccard_index(dynamic([]), dynamic([1, 2, 3, 4])); +print jaccard_index(dynamic([]), dynamic([])); +print jaccard_index(dynamic([1, 2, 3]), dynamic([4, 5, 6, 7])); +print jaccard_index(dynamic(['a', 's', 'd']), dynamic(['f', 'd', 's', 'a'])); +print jaccard_index(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])); +print '-- pack_array()'; +print pack_array(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print x = 1 | extend y = x * 2 | extend z = y * 2 | extend pack_array(x,y,z); +print pack_array(strcat('a', 'b'), format_ipv4(42), tostring(4.2)); +print '-- repeat()'; +print repeat(1, 0); +print repeat(1, 3); +print repeat("asd", 3); +print repeat(timespan(1d), 3); +print repeat(true, 3); +print repeat(1, -3); +print repeat(6.7,-4); +print '-- set_difference()'; +print set_difference(dynamic([]), dynamic([])); +print set_difference(dynamic([]), dynamic([9])); +print set_difference(dynamic([]), dynamic(["asd"])); +print set_difference(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])); +print array_sort_asc(set_difference(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; +print set_difference(dynamic([4]), dynamic([1, 2, 3])); +print array_sort_asc(set_difference(dynamic([1, 2, 3, 4, 5]), dynamic([5]), dynamic([2, 4])))[0]; +print array_sort_asc(set_difference(dynamic([1, 2, 3]), dynamic([])))[0]; +print array_sort_asc(set_difference(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[0]; +print array_sort_asc(set_difference(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[0]; +print '-- set_has_element()'; +print set_has_element(dynamic([]), 9); +print set_has_element(dynamic(["this", "is", "an", "example"]), "example"); +print set_has_element(dynamic(["this", "is", "an", "example"]), "examplee"); +print set_has_element(dynamic([1, 2, 3]), 2); +print set_has_element(dynamic([1, 2, 3, 4.2]), 4); +print '-- set_intersect()'; +print set_intersect(dynamic([]), dynamic([])); +print array_sort_asc(set_intersect(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_intersect(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; +print set_intersect(dynamic([4]), dynamic([1, 2, 3])); +print set_intersect(dynamic([1, 2, 3, 4, 5]), dynamic([1, 3, 5]), dynamic([2, 5])); +print set_intersect(dynamic([1, 2, 3]), dynamic([])); +print set_intersect(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])); +print set_intersect(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])); +print '-- set_union()'; +print set_union(dynamic([]), dynamic([])); +print array_sort_asc(set_union(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_union(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_union(dynamic([4]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_union(dynamic([1, 3, 4]), dynamic([5]), dynamic([2, 4])))[0]; +print array_sort_asc(set_union(dynamic([1, 2, 3]), dynamic([])))[0]; +print array_sort_asc(set_union(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[0]; +print array_sort_asc(set_union(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[0]; +print '-- zip()'; +print zip(dynamic([]), dynamic([])); +print zip(dynamic([1,3,5]), dynamic([2,4,6])); +print zip(dynamic(['Darth','Master']), dynamic(['Vader','Yoda']), dynamic(['has a suit','doesn\'t have a suit'])); +print zip(dynamic([1,2,3]), dynamic([10,20])); +print zip(dynamic([]), dynamic([1,2,3])); +print '-- array_sort in table()'; +visit | project pageid, array_sort_asc(ip_country, hit, duration) | order by pageid asc; +print '-- array_sort in table() with condition'; +visit | project pageid, array_sort_asc(ip_country, hit, duration, pageid > 4) | order by pageid asc; +print '-- array_sort as condition'; +visit | where isnull(array_sort_asc(ip_country, hit, duration)[2][0]); +print '-- array_sort with single alias'; +visit | project pageid, a = array_sort_asc(ip_country, hit, duration) | order by pageid asc; +visit | project pageid, (a) = array_sort_asc(ip_country, hit, duration) | order by pageid asc; +print '-- array_sort with partial alias'; +visit | project pageid, (a,b) = array_sort_asc(ip_country, hit, duration) | order by pageid asc; +print '-- array_sort with all alias'; +visit | project pageid, (a,b,c) = array_sort_asc(ip_country, hit, duration) | order by pageid asc; +print zip(repeat(1,4), repeat(2,4)); diff --git a/tests/queries/0_stateless/02366_kql_func_general.reference b/tests/queries/0_stateless/02366_kql_func_general.reference new file mode 100644 index 000000000000..a919281dc711 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_general.reference @@ -0,0 +1,36 @@ +-- case +Theodore Diaz Skilled Manual Bachelors 28 C +Stephanie Cox Management Bachelors 33 D +Peter Nara Skilled Manual Graduate Degree 26 C +Latoya Shen Professional Graduate Degree 25 C +Joshua Lee Professional Partial College 26 C +Edward Hernandez Skilled Manual High School 36 D +Dalton Wood Professional Partial College 42 D +Christine Nara Skilled Manual Partial College 33 D +Cameron Rodriguez Professional Partial College 28 C +Angel Stewart Professional Partial College 46 D +-- iff +Theodore Diaz Skilled Manual Bachelors 28 bigger +Stephanie Cox Management Bachelors 33 bigger +Peter Nara Skilled Manual Graduate Degree 26 bigger +Latoya Shen Professional Graduate Degree 25 bigger +Joshua Lee Professional Partial College 26 bigger +Edward Hernandez Skilled Manual High School 36 bigger +Dalton Wood Professional Partial College 42 bigger +Christine Nara Skilled Manual Partial College 33 bigger +Cameron Rodriguez Professional Partial College 28 bigger +Angel Stewart Professional Partial College 46 bigger +-- iif +Theodore Diaz Skilled Manual Bachelors 28 bigger +Stephanie Cox Management Bachelors 33 bigger +Peter Nara Skilled Manual Graduate Degree 26 bigger +Latoya Shen Professional Graduate Degree 25 bigger +Joshua Lee Professional Partial College 26 bigger +Edward Hernandez Skilled Manual High School 36 bigger +Dalton Wood Professional Partial College 42 bigger +Christine Nara Skilled Manual Partial College 33 bigger +Cameron Rodriguez Professional Partial College 28 bigger +Angel Stewart Professional Partial College 46 bigger +-- lookup +First +default diff --git a/tests/queries/0_stateless/02366_kql_func_general.sql b/tests/queries/0_stateless/02366_kql_func_general.sql new file mode 100644 index 000000000000..b3f3d5dfd992 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_general.sql @@ -0,0 +1,48 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management','Bachelors',33), ('Peter','Nara','Skilled Manual','Graduate Degree',26), ('Latoya','Shen','Professional','Graduate Degree',25), ('Joshua','Lee','Professional','Partial College',26), ('Edward','Hernandez','Skilled Manual','High School',36), ('Dalton','Wood','Professional','Partial College',42), ('Christine','Nara','Skilled Manual','Partial College',33), ('Cameron','Rodriguez','Professional','Partial College',28), ('Angel','Stewart','Professional','Partial College',46); + +DROP TABLE IF EXISTS dictionary_source_table; +CREATE TABLE dictionary_source_table +( + key String, + start_range UInt64, + end_range UInt64, + value String, + value_nullable Nullable(String) +) +ENGINE = Memory; +INSERT INTO dictionary_source_table VALUES('1', 10, 20, 'First', 'First'), ('2', 11, 21, 'Second', NULL), ('3', 12, 22, 'Third', 'Third'); + +CREATE DICTIONARY dictionary_table +( + key String, + start_range UInt64, + end_range UInt64, + value String, + value_nullable Nullable(String) +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'dictionary_source_table')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(FLAT()); + +set dialect='kusto'; + +print '-- case'; +Customers | extend t = case(Age <= 10, "A", Age <= 20, "B", Age <= 30, "C", "D"); +print '-- iff'; +Customers | extend t = iff(Age <= 10, "smaller", "bigger"); +print '-- iif'; +Customers | extend t = iif(Age <= 10, "smaller", "bigger"); +print '-- lookup'; +print lookup('dictionary_table', 'value', '1'); +print lookup('dictionary_table', 'value', '100', 'default'); diff --git a/tests/queries/0_stateless/02366_kql_func_has_any_ipv4.reference b/tests/queries/0_stateless/02366_kql_func_has_any_ipv4.reference new file mode 100644 index 000000000000..aaaf086a3c67 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_has_any_ipv4.reference @@ -0,0 +1,48 @@ +-- #1 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 10.0.0.1 1 +09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404 10.0.0.300 0 +09:46:0010.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404 192.168.1.1 1 +-- #2 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 10.0.0.1 1 +09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404 10.0.0.300 0 +09:46:0010.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404 192.168.1.1 1 +-- #3 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 10.0.0.1 1 +09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404 10.0.0.300 1 +09:46:0010.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404 192.168.1.1 1 +-- #4 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 10.0.0.1 1 +09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404 10.0.0.300 0 +09:46:0010.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404 192.168.1.1 1 +-- #5 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 10.0.0.1 1 +09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404 10.0.0.300 0 +09:46:0010.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404 192.168.1.1 1 +-- #6 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 10.0.0.1 1 +09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404 10.0.0.300 0 +09:46:0010.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404 192.168.1.1 1 +-- #7 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404 10.0.0.300 0 +09:46:0010.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404 192.168.1.1 0 +-- #8 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 ['1.2.3.4','2.3.4.5','10.0.0.1'] 1 +09:46:00 10.0.0.1 GET /favicon.ico 404 ['1.2.3.','2.3.4.','10.0.0.'] 0 +-- #9 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 ['1.2.3.4','2.3.4.5','10.0.0.1'] 1 +09:46:00 10.0.0.1 GET /favicon.ico 404 ['1.2.3.','2.3.4.','10.0.0.'] 1 diff --git a/tests/queries/0_stateless/02366_kql_func_has_any_ipv4.sql b/tests/queries/0_stateless/02366_kql_func_has_any_ipv4.sql new file mode 100644 index 000000000000..48fcf1a537cf --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_has_any_ipv4.sql @@ -0,0 +1,46 @@ +DROP TABLE IF EXISTS IP_STRING; +DROP TABLE IF EXISTS IP_ARRAY; + +CREATE TABLE IP_STRING (haystack String, needle String) ENGINE = Memory; +CREATE TABLE IP_ARRAY (haystack String, needle Array(String)) ENGINE = Memory; +INSERT INTO IP_STRING (haystack, needle) VALUES ('09:46:00 10.0.0.1 GET /favicon.ico 404', '10.0.0.1'), ('09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404', '10.0.0.300'), ('09:46:0010.0.0.1 GET /favicon.ico 404', '10.0.0.1'), ('09:46:00 10.0.0.1GET /favicon.ico 404', '10.0.0.1'), ('09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404', '192.168.1.1'); +INSERT INTO IP_ARRAY (haystack, needle) VALUES ('09:46:00 10.0.0.1 GET /favicon.ico 404', ['1.2.3.4', '2.3.4.5', '10.0.0.1']), ('09:46:00 10.0.0.1 GET /favicon.ico 404', ['1.2.3.', '2.3.4.', '10.0.0.']); + +set dialect='kusto'; +print has_ipv4('X'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print has_any_ipv4('X'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print has_ipv4_prefix('X'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print has_any_ipv4_prefix('X'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +print has_ipv4(1, 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_any_ipv4(1,2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_ipv4_prefix(1,2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_any_ipv4_prefix(1,2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +print has_ipv4('X', 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_any_ipv4('X', 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_ipv4_prefix('X', 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_any_ipv4_prefix('X', 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +print "-- #1 --"; +IP_STRING | extend has_ipv4(haystack, needle); +print "-- #2 --"; +IP_STRING | extend has_any_ipv4(haystack, needle); +print "-- #3 --"; +IP_STRING | extend has_any_ipv4(haystack, needle, '1.2.3.4'); +print "-- #4 --"; +IP_STRING | extend has_any_ipv4(haystack, 'X', needle); +print "-- #5 --"; +IP_STRING | extend has_ipv4_prefix(haystack, needle); +print "-- #6 --"; +IP_STRING | extend has_ipv4_prefix(haystack, substring(needle, 0, strlen(needle)-1)); +print "-- #7 --"; +IP_STRING | extend has_ipv4_prefix(haystack, substring(needle, 0, strlen(needle)-2)); +print "-- #8 --"; +IP_ARRAY | extend has_any_ipv4(haystack, dynamic(needle)); +print "-- #9 --"; +IP_ARRAY | extend has_any_ipv4_prefix(haystack, dynamic(needle)); +set dialect='kusto_auto'; +DROP TABLE IP_STRING; +DROP TABLE IP_ARRAY; + diff --git a/tests/queries/0_stateless/02366_kql_func_ip.reference b/tests/queries/0_stateless/02366_kql_func_ip.reference new file mode 100644 index 000000000000..fdba4622c9a9 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_ip.reference @@ -0,0 +1,122 @@ +-- ipv4_is_private(\'127.0.0.1\') +0 +-- ipv4_is_private(\'10.1.2.3\') +1 +-- ipv4_is_private(\'192.168.1.1/24\') +1 +ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\')) +1 +-- ipv4_is_private(\'abc\') +\N +-- ipv4_netmask_suffix(\'192.168.1.1/24\') +24 +-- ipv4_netmask_suffix(\'192.168.1.1\') +32 +-- ipv4_netmask_suffix(\'127.0.0.1/16\') +16 +-- ipv4_netmask_suffix(\'abc\') +\N +ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\')) +16 +-- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\') +1 +-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\') +1 +-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\') +0 +-- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\') +0 +-- ipv4_is_in_range(\'abc\', \'127.0.0.1\') +\N +-- parse_ipv6(127.0.0.1) +0000:0000:0000:0000:0000:ffff:7f00:0001 +-- parse_ipv6(fe80::85d:e82c:9446:7994) +fe80:0000:0000:0000:085d:e82c:9446:7994 +-- parse_ipv4(\'127.0.0.1\') +2130706433 +-- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\') +1 +-- parse_ipv4(arrayStringConcat([\'127\', \'0\', \'0\', \'1\'], \'.\')) +-- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432 +1 +-- parse_ipv4_mask(\'abc\', 31) +\N +\N +-- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31) +1 +-- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\') +1 +-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\') +0 +-- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\') +1 +-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24) +1 +-- ipv4_is_match(\'abc\', \'def\', 24) +\N +-- ipv4_compare() +0 +-1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +-- format_ipv4() +192.168.1.0 +192.168.1.1 +192.168.1.0 +192.168.1.0 +1 +1 +127.0.0.0 +-- format_ipv4_mask() +192.168.1.0/24 +192.168.1.0/24 +192.168.1.0/24 +192.168.1.1/32 +192.168.1.0/24 +1 +1 +127.0.0.0/24 +-- parse_ipv6_mask() +0000:0000:0000:0000:0000:0000:0000:0000 +fe80:0000:0000:0000:085d:e82c:9446:7900 +0000:0000:0000:0000:0000:ffff:c0a8:ff00 +0000:0000:0000:0000:0000:ffff:c0a8:ff00 +0000:0000:0000:0000:0000:ffff:ffff:ffff +fe80:0000:0000:0000:085d:e82c:9446:7994 +fe80:0000:0000:0000:085d:e82c:9446:7900 +0000:0000:0000:0000:0000:ffff:c0a8:ffff +0000:0000:0000:0000:0000:ffff:c0a8:ff00 +-- ipv6_is_match() +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02366_kql_func_ip.sql b/tests/queries/0_stateless/02366_kql_func_ip.sql new file mode 100644 index 000000000000..8123bd6a3d11 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_ip.sql @@ -0,0 +1,129 @@ +set dialect='kusto'; +print '-- ipv4_is_private(\'127.0.0.1\')'; +print ipv4_is_private('127.0.0.1'); +print '-- ipv4_is_private(\'10.1.2.3\')'; +print ipv4_is_private('10.1.2.3'); +print '-- ipv4_is_private(\'192.168.1.1/24\')'; +print ipv4_is_private('192.168.1.1/24'); +print 'ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\'))'; +print ipv4_is_private(strcat('192.','168.','1.','1','/24')); +print '-- ipv4_is_private(\'abc\')'; +print ipv4_is_private('abc'); -- == null + +print '-- ipv4_netmask_suffix(\'192.168.1.1/24\')'; +print ipv4_netmask_suffix('192.168.1.1/24'); -- == 24 +print '-- ipv4_netmask_suffix(\'192.168.1.1\')'; +print ipv4_netmask_suffix('192.168.1.1'); -- == 32 +print '-- ipv4_netmask_suffix(\'127.0.0.1/16\')'; +print ipv4_netmask_suffix('127.0.0.1/16'); -- == 16 +print '-- ipv4_netmask_suffix(\'abc\')'; +print ipv4_netmask_suffix('abc'); -- == null +print 'ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\'))'; +print ipv4_netmask_suffix(strcat('127.', '0.', '0.1/16')); -- == 16 + +print '-- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\')'; +print ipv4_is_in_range('127.0.0.1', '127.0.0.1'); -- == true +print '-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\')'; +print ipv4_is_in_range('192.168.1.6', '192.168.1.1/24'); -- == true +print '-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\')'; +print ipv4_is_in_range('192.168.1.1', '192.168.2.1/24'); -- == false +print '-- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\')'; +print ipv4_is_in_range(strcat('192.','168.', '1.1'), '192.168.2.1/24'); -- == false +print '-- ipv4_is_in_range(\'abc\', \'127.0.0.1\')'; -- == null +print ipv4_is_in_range('abc', '127.0.0.1'); + +print '-- parse_ipv6(127.0.0.1)'; +print parse_ipv6('127.0.0.1'); +print '-- parse_ipv6(fe80::85d:e82c:9446:7994)'; +print parse_ipv6('fe80::85d:e82c:9446:7994'); +print '-- parse_ipv4(\'127.0.0.1\')'; +print parse_ipv4('127.0.0.1'); +print '-- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\')'; +print parse_ipv4('192.1.168.1') < parse_ipv4('192.1.168.2'); +print '-- parse_ipv4(arrayStringConcat([\'127\', \'0\', \'0\', \'1\'], \'.\'))'; +print parse_ipv4(arrayStringConcat(['127', '0', '0', '1'], '.')); -- { clientError UNKNOWN_FUNCTION } + +print '-- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432'; +print parse_ipv4_mask('127.0.0.1', 24) == 2130706432; +print '-- parse_ipv4_mask(\'abc\', 31)'; +print parse_ipv4_mask('abc', 31) +print '-- parse_ipv4_mask(\'192.1.168.2\', 1000)'; +print parse_ipv4_mask('192.1.168.2', 1000); +print '-- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31)'; +print parse_ipv4_mask('192.1.168.2', 31) == parse_ipv4_mask('192.1.168.3', 31); +print '-- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\')'; +print ipv4_is_match('127.0.0.1', '127.0.0.1'); +print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\')'; +print ipv4_is_match('192.168.1.1', '192.168.1.255'); +print '-- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\')'; +print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24'); +print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24)'; +print ipv4_is_match('192.168.1.1', '192.168.1.255', 24); +print '-- ipv4_is_match(\'abc\', \'def\', 24)'; +print ipv4_is_match('abc', 'dev', 24); +print '-- ipv4_compare()'; +print ipv4_compare('127.0.0.1', '127.0.0.1'); +print ipv4_compare('192.168.1.1', '192.168.1.255'); +print ipv4_compare('192.168.1.255', '192.168.1.1'); +print ipv4_compare('192.168.1.1/24', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1', '192.168.1.255', 24); +print ipv4_compare('192.168.1.1/24', '192.168.1.255'); +print ipv4_compare('192.168.1.1', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1/30', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1', '192.168.1.0', 31); +print ipv4_compare('192.168.1.1/24', '192.168.1.255', 31); +print ipv4_compare('192.168.1.1', '192.168.1.255', 24); +print '-- format_ipv4()'; +print format_ipv4('192.168.1.255', 24); +print format_ipv4('192.168.1.1', 32); +print format_ipv4('192.168.1.1/24', 32); +print format_ipv4(3232236031, 24); +print format_ipv4('192.168.1.1/24', -1) == ''; +print format_ipv4('abc', 24) == ''; +print format_ipv4(strcat('127.0', '.0.', '1', '/32'), 12 + 12); +print '-- format_ipv4_mask()'; +print format_ipv4_mask('192.168.1.255', 24); +print format_ipv4_mask(3232236031, 24); +print format_ipv4_mask('192.168.1.1', 24); +print format_ipv4_mask('192.168.1.1', 32); +print format_ipv4_mask('192.168.1.1/24', 32); +print format_ipv4_mask('192.168.1.1/24', -1) == ''; +print format_ipv4_mask('abc', 24) == ''; +print format_ipv4_mask(strcat('127.0', '.0.', '1', '/32'), 12 + 12); +print '-- parse_ipv6_mask()'; +print parse_ipv6_mask("127.0.0.1", 24); +print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 120); +print parse_ipv6_mask("192.168.255.255", 120); +print parse_ipv6_mask("192.168.255.255/24", 124); +print parse_ipv6_mask("255.255.255.255", 128); +print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 128); +print parse_ipv6_mask("fe80::85d:e82c:9446:7994/120", 124); +print parse_ipv6_mask("::192.168.255.255", 128); +print parse_ipv6_mask("::192.168.255.255/24", 128); +print '-- ipv6_is_match()'; +print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false; +print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true; +print ipv6_is_match('192.168.1.1', '192.168.1.1'); -- // Equal IPs +print ipv6_is_match('192.168.1.1/24', '192.168.1.255'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7994'); -- // Equal IPs +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '::ffff:c0a8:0101'); -- // Equal IPs +print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.0', 31); -- // 31 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1/24', '192.168.1.255', 31); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.255', 24); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127); -- // 127 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7998', 120); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998', 127); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff', 127); -- // 127 bit IP6-prefix is used for comparison +print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255', 120); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24', 127); -- // 120 bit IP6-prefix is used for comparison \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_math.reference b/tests/queries/0_stateless/02366_kql_func_math.reference new file mode 100644 index 000000000000..abcf6931d42d --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_math.reference @@ -0,0 +1,100 @@ +-- isnan -- +1 +0 +0 +-- abs -- +5 +-- ceiling -- +-1 +0 +1 +-- exp -- +7.389056098924109 +1.6487212707014907 +0.3678794411711252 +-- exp2 -- +4 +1.4142135623730951 +0.5 +-- exp10 -- +1000 +3.162277660168379 +0.001 +-- log -- +1.6094379132876024 +-0.6931471805599453 +nan +-- log2 -- +2.321928094887362 +-1 +nan +-- log10 -- +0.6989700043360189 +-0.3010299956639812 +nan +-- pow -- +8 +0.7071067811865476 +-1 +-- sqrt -- +16 +nan +0.7071067811865476 +-- acos -- +0 +2.037561665842193 +-- asin -- +1.5707963267948966 +0.5235987755982989 +-- atan -- +0.7853981633974483 +0.4636476090008061 +-- atan2 -- +2.356194490192345 +-0.7853981633974483 +-- cos -- +0.5403023058681398 +0.9004471023526769 +-- cot -- +0.6420926159343306 +-2.0701573613012125 +inf +-- degrees -- +45 +-- gamma -- +1 +-3.591387263852389 +-- isfinite -- +0 +-- isinf -- +1 +-- loggamma -- +3.1780538303479458 +1.2785385523288975 +-- max_of -- +17 +test +-- min_of -- +-3 +abc +-- pi -- +3.141592653589793 +-- radians -- +1.5707963267948966 +3.141592653589793 +6.283185307179586 +-- rand -- +1 +1 +-- round -- +2.2 +-- sign -- +-1 +0 +1 +-- sin -- +0.8414709848078965 +-0.43496553411123023 +-- tan -- +1.5574077246549023 +-0.4830550656165784 diff --git a/tests/queries/0_stateless/02366_kql_func_math.sql b/tests/queries/0_stateless/02366_kql_func_math.sql new file mode 100644 index 000000000000..e5f3585ec8f7 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_math.sql @@ -0,0 +1,135 @@ +set dialect = 'kusto'; +print '-- isnan --'; +print isnan(double(nan)); +print isnan(4.2); +print isnan(4); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print isnan(real(+inf)); +print isnan(dynamic(null)); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print '-- abs --'; +print abs(-5); +print abs('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print abs(1d); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- ceiling --'; +print ceiling(-1.1); +print ceiling(0); +print ceiling(0.9); +print ceiling('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- exp --'; +print exp(2); +print exp(0.5); +print exp(-1); +print exp('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- exp2 --'; +print exp2(2); +print exp2(0.5); +print exp2(-1); +print exp2('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- exp10 --'; +print exp10(3); +print exp10(0.5); +print exp10(-3); +print exp10('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- log --'; +print log(5); +print log(0.5); +print log(-5); +print log('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- log2 --'; +print log2(5); +print log2(0.5); +print log2(-5); +print log2('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- log10 --'; +print log10(5); +print log10(0.5); +print log10(-5); +print log10('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- pow --'; +print pow(2, 3); +print pow(0.5, 0.5); +print pow(-1, -1); +print pow('test', 'test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- sqrt --'; +print sqrt(256); +print sqrt(-1); +print sqrt(0.5); +print sqrt('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- acos --'; +print acos(1); +print acos(-0.45); +print acos('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- asin --'; +print asin(1); +print asin(0.5); +print asin('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- atan --'; +print atan(1); +print atan(0.5); +print atan('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- atan2 --'; +print atan2(1, -1); +print atan2(-0.5, 0.5); +print atan2('test', 'test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- cos --'; +print cos(1); +print cos(-0.45); +print cos('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- cot --'; +print cot(1); +print cot(-0.45); +print cot(0); +print cot('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- degrees --'; +print degrees(pi()/4); +print degrees('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- gamma --'; +print gamma(1); +print gamma(-0.45); +print gamma('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- isfinite --'; +print isfinite(1.0/0.0); +print isfinite('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- isinf --'; +print isinf(1.0/0.0); +print isinf('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- loggamma --'; +print loggamma(5); +print loggamma(-0.45); +print loggamma('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- max_of --'; +print max_of(10, 1, -3, 17); +print max_of('test', 'abc'); +print max_of(1); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print max_of(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print '-- min_of --'; +print min_of(10, 1, -3, 17); +print min_of('test', 'abc'); +print min_of(1); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print min_of(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print '-- pi --'; +print pi(); +print pi('any'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print '-- radians --'; +print radians(90); +print radians(180); +print radians(360); +print radians('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- rand --'; +print x = rand() | project x >= 0 and x <= 1; +print x = rand(1234) | project x >= 0 and x <= 1233; +print '-- round --'; +print round(2.15, 1); +print round('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- sign --'; +print sign(-42); +print sign(0); +print sign(11.2); +print sign('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- sin --'; +print sin(1); +print sin(-0.45); +print sin('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- tan --'; +print tan(1); +print tan(-0.45); +print tan('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02366_kql_func_scalar.reference b/tests/queries/0_stateless/02366_kql_func_scalar.reference new file mode 100644 index 000000000000..b63db6efb5e6 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_scalar.reference @@ -0,0 +1,27 @@ +-- bin() +4 +3 +1970-05-11 00:00:00.000000000 +1209600000000000 +1970-05-11 13:45:07.345000000 +2022-09-26 10:13:23.982000000 +1970-05-11 13:45:07.345623000 +2022-09-26 10:13:23.987232000 +1970-05-11 13:45:07.456336000 +1970-05-11 13:45:07.456345700 +2022-09-26 10:13:23.987234100 +2022-09-26 10:13:23.987234100 +\N +26 1 +28 2 +25 1 +38 1 +33 1 +-- bin_at() +4.5 +-43200000000000 +2017-05-14 12:00:00.000000000 +2017-05-14 00:00:00.000000000 +2018-02-26 15:14:00.000000000 5 +2018-02-24 15:14:00.000000000 3 +2018-02-23 15:14:00.000000000 4 diff --git a/tests/queries/0_stateless/02366_kql_func_scalar.sql b/tests/queries/0_stateless/02366_kql_func_scalar.sql new file mode 100644 index 000000000000..aba7d624f98c --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_scalar.sql @@ -0,0 +1,48 @@ +-- datatable (Date:datetime, Num:int) [ +-- '2018-02-24T15:14:00', 3, +-- '2018-02-23T16:14:00', 4, +-- '2018-02-26T15:14:00', 5 +-- ] | summarize sum(Num) by d = todatetime(bin_at(Date, 1d, datetime('2018-02-24 15:14:00'))) | order by d; + +DROP TABLE IF EXISTS Bin_at_test; +CREATE TABLE Bin_at_test +( + `Date` DateTime64(9, 'UTC'), + Num Nullable(UInt8) +) ENGINE = Memory; +INSERT INTO Bin_at_test VALUES ('2018-02-24T15:14:00', 3), ('2018-02-23T16:14:00', 4), ('2018-02-26T15:14:00', 5); + +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect = 'kusto'; +print '-- bin()'; +print bin(4.5, 1); +print floor(4.5, 3); +print bin(datetime(1970-05-11 13:45:07), 1d); +print bin(16d, 7d); +print bin(datetime(1970-05-11 13:45:07.345623), 1ms); +print bin(datetime(2022-09-26 10:13:23.987234), 6ms); +print bin(datetime(1970-05-11 13:45:07.345623), 1microsecond); +print bin(datetime(2022-09-26 10:13:23.987234), 6microseconds); +print bin(datetime(1970-05-11 13:45:07.456345672), 16microseconds); +print bin(datetime('1970-05-11 13:45:07.456345672'), 1tick); +print bin(datetime(2022-09-26 10:13:23.987234128), 100nanosecond); +print bin(datetime(2022-09-26 10:13:23.987234128), 1tick); +print bin(datetime(2022-09-26 10:13:23.987234128), 99nanosecond); +Customers | summarize count() by bin(Age, Age); + +print '-- bin_at()'; +print bin_at(6.5, 2.5, 7); +print bin_at(1h, 1d, 12h); +print bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0)); +print bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0)); +Bin_at_test | summarize sum(Num) by d = todatetime(bin_at(Date, 1d, datetime('2018-02-24 15:14:00'))) | order by d; diff --git a/tests/queries/0_stateless/02366_kql_func_string.reference b/tests/queries/0_stateless/02366_kql_func_string.reference new file mode 100644 index 000000000000..39f91107c143 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_string.reference @@ -0,0 +1,432 @@ +-- test String Functions -- +-- Customers |where Education contains \'degree\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers |where Education !contains \'degree\' +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers |where Education contains \'Degree\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers |where Education !contains \'Degree\' +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where FirstName endswith \'RE\' +Theodore Diaz Skilled Manual Bachelors 28 + +-- Customers | where ! FirstName endswith \'RE\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +--Customers | where FirstName endswith_cs \'re\' +Theodore Diaz Skilled Manual Bachelors 28 + +-- Customers | where FirstName !endswith_cs \'re\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation == \'Skilled Manual\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation != \'Skilled Manual\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation has \'skilled\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation !has \'skilled\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation has \'Skilled\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation !has \'Skilled\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation hasprefix_cs \'Ab\' + +-- Customers | where Occupation !hasprefix_cs \'Ab\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hasprefix_cs \'ab\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation !hasprefix_cs \'ab\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hassuffix \'Ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation !hassuffix \'Ent\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hassuffix \'ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation hassuffix \'ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers |where Education in (\'Bachelors\',\'High School\') +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Education !in (\'Bachelors\',\'High School\') +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName matches regex \'P.*r\' +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName startswith \'pet\' +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName !startswith \'pet\' +Latoya Shen Professional Graduate Degree 25 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where FirstName startswith_cs \'pet\' + +-- Customers | where FirstName !startswith_cs \'pet\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where isempty(LastName) +Apple Skilled Manual Bachelors 28 + +-- print isempty(1.2345) +0 + +-- Customers | where isnotempty(LastName) +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +\N why Professional Partial College 38 + +-- print isnotempty(1.2345) +1 + +-- Customers | where isnotnull(FirstName) +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where isnull(FirstName) +\N why Professional Partial College 38 + +-- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1 +https://www.test.com/hello word + +-- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1 +https%3A%2F%2Fwww.test.com%2Fhello%20word + +-- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2)) + y +Lat en +Pet ra +The az +Ste x +App + +-- Customers | project name = strcat(FirstName, \' \', LastName) + why +Latoya Shen +Peter Nara +Theodore Diaz +Stephanie Cox +Apple + +-- Customers | project FirstName, strlen(FirstName) +\N \N +Latoya 6 +Peter 5 +Theodore 8 +Stephanie 9 +Apple 5 + +-- Customers | project strrep(FirstName,2,\'_\') +_ +Latoya_Latoya +Peter_Peter +Theodore_Theodore +Stephanie_Stephanie +Apple_Apple + +--print from_str = strrep("ABC", 2) +ABCABC +--print from_int = strrep(123,3,".") +123.123.123 +--print from_time = strrep(3s,2," ") +00:00:03 00:00:03 + +-- Customers | project toupper(FirstName) +\N +LATOYA +PETER +THEODORE +STEPHANIE +APPLE + +-- Customers | project tolower(FirstName) +\N +latoya +peter +theodore +stephanie +apple + + +-- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Apple Skilled Manual Bachelors 28 + +-- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction) +3 +3 +1 + +-- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction) +PINEAPPLE ice cream is 20 +PINEAPPLE +20 + +20 +\N +\N +\N +\N +\N +45.6 +45.6 +alert + +-- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction); TODO: captureGroups not supported yet +[['T','h','e'],['p','ric','e'],['P','INEAPPL','E'],['i','c','e'],['c','rea','m']] + +-- extract_json (https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction) + + +John +iPhone +\N +26 +26 +26 +true +26 +\N + +-- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction) +['aa','bb'] +['bbb'] +[''] +['a','','b'] +['aa','cc'] +['aabbcc'] +['aaa','bbb','ccc'] +[NULL] + +-- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now. +1-2-Ab +1-2-A-00:00:01 +1-2-A-55 +1-2-A-7.99 +qqqqq fffffff \'asd bcd\' "moo moo " + +-- base64_encode_fromguid() +8jMxriJurkmwahbmqbIS6w== +-- base64_decode_toarray() +[] +[75,117,115,116,111] +-- base64_decode_toguid() +10e99626-bc2b-4c75-bb3e-fe606de25700 +1 +-- base64_encode_tostring + +S3VzdG8x +-- base64_decode_tostring + +Kusto1 +-- parse_url() +{"Scheme":"scheme","Host":"","Port":"0","Path":"/this/is/a/path","Username":"username","Password":"password","Query Parameters":{"k1":"v1","k2":"v2"},"Fragment":"fragment"} +-- parse_url() +{"Scheme":"","Host":"","Port":"","Path":"","Username":"","Password":"","Query Parameters":{},"Fragment":""} +-- parse_urlquery() +{"Query Parameters":{"k1":"v1","k2":"v2","k3":"v3"}} +-- strcat -- +a1235.00:00:00 +a111.01:00:00 +ab\'c +-- strcmp() +0 1 -1 1 +-- substring() +CD +-- translate() +kusto xxx +-- trim() +https://www.ibm.com +Te st1 + asd +asd +sd +-- trim_start() +www.ibm.com +Te st1// $ +asdw + +asd +-- trim_end() +https +- Te st1 +wasd + +asd +-- trim, trim_start, trim_end all at once +--https://bing.com-- -- https://bing.com-- --https://bing.com https://bing.com +-- replace_regex +Number was: 1 +-- has_any_index() +0 1 -1 -1 +-- parse_version() +1000000020000000300000040 +1000000020000000000000000 +1000000020000000000000000 +\N +\N +\N +\N +\N +\N +1000000020000000300000004 +1000000020000000000000000 +1000000020000000300000000 +1000000000000000000000000 +-- parse_json() +[1,2,3] +[{"a":123.5,"b":"{\\"c\\":456}"}] +-- parse_command_line() +[NULL] +[NULL] +-- reverse() +321 +43.321 + +dsa +][ +]3,2,1[ +]\'redaV\',\'htraD\'[ +Z0000000.00:00:21T51-01-7102 +00:00:30 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +-- parse_csv() +[''] +['aaa'] +['aa','b','cc'] +['record1','a','b','c'] +-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction) +9 +2 +2 +2 +-1 +2 +4 +2 +9 +-1 +-1 +-1 +9 +2 +-- has -- +1 +0 +1 +0 +0 +1 +1 +0 +1 +1 +0 + +asdf +asdf.ghkj +asdf.qwer + +asdf.qwer + +asdf.qwer +qwer +-- !has -- +asdfghkj +qwer +qwerqwer + +asdf +asdf.ghkj +asdfghkj +qwer +qwerqwer +-- has_all -- +asdf.qwer +-- has_any -- +asdf +asdf.ghkj +asdf.qwer +qwer diff --git a/tests/queries/0_stateless/02366_kql_func_string.sql b/tests/queries/0_stateless/02366_kql_func_string.sql new file mode 100644 index 000000000000..caed8fca22f3 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_string.sql @@ -0,0 +1,402 @@ +-- datatable(FirstName:string, LastName:string, Occupation:string, Education:string, Age:int) [ +-- 'Theodore', 'Diaz', 'Skilled Manual', 'Bachelors', 28, +-- 'Stephanie', 'Cox', 'Management abcd defg', 'Bachelors', 33, +-- 'Peter', 'Nara', 'Skilled Manual', 'Graduate Degree', 26, +-- 'Latoya', 'Shen', 'Professional', 'Graduate Degree', 25, +-- 'Apple', '', 'Skilled Manual', 'Bachelors', 28, +-- '', 'why', 'Professional', 'Partial College', 38 +-- ] + +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +-- datatable (Version:string) [ +-- '1.2.3.4', +-- '1.2', +-- '1.2.3', +-- '1' +-- ] + +DROP TABLE IF EXISTS Versions; +CREATE TABLE Versions +( + Version String +) ENGINE = Memory; +INSERT INTO Versions VALUES ('1.2.3.4'),('1.2'),('1.2.3'),('1'); + +-- datatable (Text:string) [ +-- 'asdf', +-- 'asdf.ghkj', +-- 'asdf.qwer', +-- 'asdfghkj', +-- 'qwer', +-- 'qwerqwer' +-- ] + +drop table if exists StringTest; +create table StringTest +( + Text String +) engine = Memory; + +insert into StringTest values ('asdf'), ('asdf.ghkj'), ('asdf.qwer'), ('asdfghkj'), ('qwer'), ('qwerqwer'); + + +set dialect='kusto'; +print '-- test String Functions --'; + +print '-- Customers |where Education contains \'degree\''; +Customers |where Education contains 'degree' | order by LastName; +print ''; +print '-- Customers |where Education !contains \'degree\''; +Customers |where Education !contains 'degree' | order by LastName; +print ''; +print '-- Customers |where Education contains \'Degree\''; +Customers |where Education contains 'Degree' | order by LastName; +print ''; +print '-- Customers |where Education !contains \'Degree\''; +Customers |where Education !contains 'Degree' | order by LastName; +print ''; +print '-- Customers | where FirstName endswith \'RE\''; +Customers | where FirstName endswith 'RE' | order by LastName; +print ''; +print '-- Customers | where ! FirstName endswith \'RE\''; +Customers | where FirstName ! endswith 'RE' | order by LastName; +print ''; +print '--Customers | where FirstName endswith_cs \'re\''; +Customers | where FirstName endswith_cs 're' | order by LastName; +print ''; +print '-- Customers | where FirstName !endswith_cs \'re\''; +Customers | where FirstName !endswith_cs 're' | order by LastName; +print ''; +print '-- Customers | where Occupation == \'Skilled Manual\''; +Customers | where Occupation == 'Skilled Manual' | order by LastName; +print ''; +print '-- Customers | where Occupation != \'Skilled Manual\''; +Customers | where Occupation != 'Skilled Manual' | order by LastName; +print ''; +print '-- Customers | where Occupation has \'skilled\''; +Customers | where Occupation has 'skilled' | order by LastName; +print ''; +print '-- Customers | where Occupation !has \'skilled\''; +Customers | where Occupation !has 'skilled' | order by LastName; +print ''; +print '-- Customers | where Occupation has \'Skilled\''; +Customers | where Occupation has 'Skilled'| order by LastName; +print ''; +print '-- Customers | where Occupation !has \'Skilled\''; +Customers | where Occupation !has 'Skilled'| order by LastName; +print ''; +print '-- Customers | where Occupation hasprefix_cs \'Ab\''; +Customers | where Occupation hasprefix_cs 'Ab'| order by LastName; +print ''; +print '-- Customers | where Occupation !hasprefix_cs \'Ab\''; +Customers | where Occupation !hasprefix_cs 'Ab'| order by LastName; +print ''; +print '-- Customers | where Occupation hasprefix_cs \'ab\''; +Customers | where Occupation hasprefix_cs 'ab'| order by LastName; +print ''; +print '-- Customers | where Occupation !hasprefix_cs \'ab\''; +Customers | where Occupation !hasprefix_cs 'ab'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'Ent\''; +Customers | where Occupation hassuffix 'Ent'| order by LastName; +print ''; +print '-- Customers | where Occupation !hassuffix \'Ent\''; +Customers | where Occupation !hassuffix 'Ent'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'ent\''; +Customers | where Occupation hassuffix 'ent'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'ent\''; +Customers | where Occupation hassuffix 'ent'| order by LastName; +print ''; +print '-- Customers |where Education in (\'Bachelors\',\'High School\')'; +Customers |where Education in ('Bachelors','High School')| order by LastName; +print ''; +print '-- Customers | where Education !in (\'Bachelors\',\'High School\')'; +Customers | where Education !in ('Bachelors','High School')| order by LastName; +print ''; +print '-- Customers | where FirstName matches regex \'P.*r\''; +Customers | where FirstName matches regex 'P.*r'| order by LastName; +print ''; +print '-- Customers | where FirstName startswith \'pet\''; +Customers | where FirstName startswith 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName !startswith \'pet\''; +Customers | where FirstName !startswith 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName startswith_cs \'pet\''; +Customers | where FirstName startswith_cs 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName !startswith_cs \'pet\''; +Customers | where FirstName !startswith_cs 'pet'| order by LastName; +print ''; +print '-- Customers | where isempty(LastName)'; +Customers | where isempty(LastName); +print ''; +print '-- print isempty(1.2345)'; +print isempty(1.2345); +print ''; +print '-- Customers | where isnotempty(LastName)'; +Customers | where isnotempty(LastName); +print ''; +print '-- print isnotempty(1.2345)'; +print isnotempty(1.2345); +print ''; +print '-- Customers | where isnotnull(FirstName)'; +Customers | where isnotnull(FirstName)| order by LastName; +print ''; +print '-- Customers | where isnull(FirstName)'; +Customers | where isnull(FirstName)| order by LastName; +print ''; +print '-- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1'; +Customers | project url_decode('https%3A%2F%2Fwww.test.com%2Fhello%20word') | take 1; +print ''; +print '-- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1'; +Customers | project url_encode('https://www.test.com/hello word') | take 1; +print ''; +print '-- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2))'; +Customers | order by LastName | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2)); +print ''; +print '-- Customers | project name = strcat(FirstName, \' \', LastName)'; +Customers | order by LastName | project name = strcat(FirstName, ' ', LastName); +print ''; +print '-- Customers | project FirstName, strlen(FirstName)'; +Customers | order by LastName | project FirstName, strlen(FirstName); +print ''; +print '-- Customers | project strrep(FirstName,2,\'_\')'; +Customers | order by LastName | project strrep(FirstName,2,'_'); +print ''; +print '--print from_str = strrep("ABC", 2)'; +print from_str = strrep('ABC', 2); +print '--print from_int = strrep(123,3,".")'; +print from_int = strrep(123, 3, '.'); +print '--print from_time = strrep(3s,2," ")'; +print from_time = strrep(3s, 2, ' '); +print ''; +print '-- Customers | project toupper(FirstName)'; +Customers | order by LastName | project toupper(FirstName); +print ''; +print '-- Customers | project tolower(FirstName)'; +Customers | order by LastName | project tolower(FirstName); +print ''; +-- Customer | where LastName in~ ("diaz", "cox") +print ''; +print '-- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet'; +Customers | where Occupation has_all ('manual', 'skilled') | order by LastName; +print ''; +print '-- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet'; +Customers | where Occupation has_any ('Skilled', 'abcd'); +print ''; +print '-- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction)'; +Customers | project countof('The cat sat on the mat', 'at') | take 1; +Customers | project countof('The cat sat on the mat', 'at', 'normal') | take 1; +Customers | project countof('The cat sat on the mat', '\\s.he', 'regex') | take 1; +print ''; +print '-- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction)'; +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 3, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20', typeof(real)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(bool)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(date)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(guid)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(int)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(long)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(real)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(decimal)); +print extract(".*Action=(\\w+)",1, "dstPostNATPort=80 proto=tcp Action=alert"); +print ''; +print '-- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction); TODO: captureGroups not supported yet'; +Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 20') | take 1; +print ''; +print '-- extract_json (https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction)'; +print extract_json('', ''); -- { serverError BAD_ARGUMENTS } +print extract_json('a', ''); -- { serverError BAD_ARGUMENTS } +print extract_json('$.firstName', ''); +print extract_json('$.phoneNumbers[0].type', ''); +print extractjson('$.firstName', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}'); +print extract_json('$.phoneNumbers[0].type', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(string)); +print extract_json('$.phoneNumbers[0].type', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(int)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}'); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(int)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(long)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(bool)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(double)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(guid)); +-- print extract_json('$.phoneNumbers', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(dynamic)); we won't be able to handle this particular case for a while, because it should return a dictionary +print ''; +print '-- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction)'; +Customers | project split('aa_bb', '_') | take 1; +Customers | project split('aaa_bbb_ccc', '_', 1) | take 1; +Customers | project split('', '_') | take 1; +Customers | project split('a__b', '_') | take 1; +Customers | project split('aabbcc', 'bb') | take 1; +Customers | project split('aabbcc', '') | take 1; +Customers | project split('aaa_bbb_ccc', '_', -1) | take 1; +Customers | project split('aaa_bbb_ccc', '_', 10) | take 1; +print ''; +print '-- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now.'; +Customers | project strcat_delim('-', '1', '2', strcat('A','b')) | take 1; +Customers | project strcat_delim('-', '1', '2', 'A' , 1s) | take 1; +Customers | project strcat_delim('-', '1', '2', 'A' , 55) | take 1; +Customers | project strcat_delim('-', '1', '2', 'A' , 7.99) | take 1; +print strcat_delim(' ', "qqqqq", "fffffff", "'asd bcd'", "\"moo moo \""); +print ''; +print '-- base64_encode_fromguid()'; +-- print base64_encode_fromguid(guid(null)); +print base64_encode_fromguid(guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')); +print base64_encode_fromguid(dynamic(null)); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print base64_encode_fromguid("abcd1231"); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print '-- base64_decode_toarray()'; +print base64_decode_toarray(''); +print base64_decode_toarray('S3VzdG8='); +print '-- base64_decode_toguid()'; +print base64_decode_toguid("JpbpECu8dUy7Pv5gbeJXAA=="); +print base64_decode_toguid(base64_encode_fromguid(guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb'))) == guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb'); +print '-- base64_encode_tostring'; +print base64_encode_tostring(''); +print base64_encode_tostring('Kusto1'); +print '-- base64_decode_tostring'; +print base64_decode_tostring(''); +print base64_decode_tostring('S3VzdG8x'); +print '-- parse_url()'; +print parse_url('scheme://username:password@host:1234/this/is/a/path?k1=v1&k2=v2#fragment'); +print '-- parse_url()'; +print parse_url(''); +print '-- parse_urlquery()'; +print parse_urlquery('k1=v1&k2=v2&k3=v3'); +print '-- strcat --'; +print strcat('a', 1, 2, 3, timespan(5d)); +print strcat('a', null, 9 + 2, 1h + 1d); +print strcat('a', "b", "'c"); +print '-- strcmp()'; +print strcmp('ABC','ABC'), strcmp('abc','ABC'), strcmp('ABC','abc'), strcmp('abcde','abc'); +print '-- substring()'; +print substring("ABCD", -2, 2); +print '-- translate()'; +print translate('krasp', 'otsku', 'spark'), translate('abc', '', 'ab'), translate('abc', 'x', 'abc'); +print '-- trim()'; +print trim("--", "--https://www.ibm.com--"); +print trim("[^\w]+", strcat("- ","Te st", "1", "// $")); +print trim("", " asd "); +print trim("a$", "asd"); +print trim("^a", "asd"); +print '-- trim_start()'; +print trim_start("https://", "https://www.ibm.com"); +print trim_start("[^\w]+", strcat("- ","Te st", "1", "// $")); +print trim_start("asd$", "asdw"); +print trim_start("asd$", "asd"); +print trim_start("d$", "asd"); +print '-- trim_end()'; +print trim_end("://www.ibm.com", "https://www.ibm.com"); +print trim_end("[^\w]+", strcat("- ","Te st", "1", "// $")); +print trim_end("^asd", "wasd"); +print trim_end("^asd", "asd"); +print trim_end("^a", "asd"); +print '-- trim, trim_start, trim_end all at once'; +print str = "--https://bing.com--", pattern = '--' | extend start = trim_start(pattern, str), end = trim_end(pattern, str), both = trim(pattern, str); +print '-- replace_regex'; +print replace_regex(strcat('Number is ', '1'), 'is (\d+)', 'was: \1'); +print '-- has_any_index()'; +print has_any_index('this is an example', dynamic(['this', 'example'])), has_any_index("this is an example", dynamic(['not', 'example'])), has_any_index("this is an example", dynamic(['not', 'found'])), has_any_index("this is an example", dynamic([])); +print '-- parse_version()'; +print parse_version(42); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- print parse_version(''); -> NULL +print parse_version('1.2.3.40'); +print parse_version('1.2'); +print parse_version(strcat('1.', '2')); +print parse_version('1.2.4.5.6'); +print parse_version('moo'); +print parse_version('moo.boo.foo'); +print parse_version(strcat_delim('.', 'moo', 'boo', 'foo')); +print parse_version(''); +print parse_version('....'); +Versions | project parse_version(Version); +print '-- parse_json()'; +print parse_json(dynamic([1, 2, 3])); +print parse_json('{"a":123.5, "b":"{\\"c\\":456}"}'); +print '-- parse_command_line()'; +print parse_command_line(55, 'windows'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- print parse_command_line((52 + 3) * 4 % 2, 'windows'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print parse_command_line('', 'windows'); +print parse_command_line(strrep(' ', 6), 'windows'); +-- print parse_command_line('echo \"hello world!\" print$?', 'windows'); -> ["echo","hello world!","print$?"] +-- print parse_command_line("yolo swag 'asd bcd' \"moo moo \"", 'windows'); -> ["yolo","swag","'asd","bcd'","moo moo "] +-- print parse_command_line(strcat_delim(' ', "yolo", "swag", "\'asd bcd\'", "\"moo moo \""), 'windows'); -> ["yolo","swag","'asd","bcd'","moo moo "] +print '-- reverse()'; +print reverse(123); +print reverse(123.34); +print reverse(''); +print reverse("asd"); +print reverse(dynamic([])); +print reverse(dynamic([1, 2, 3])); +print reverse(dynamic(['Darth', "Vader"])); +print reverse(datetime(2017-10-15 12:00)); +print reverse(timespan(3h)); +Customers | where Education contains 'degree' | order by reverse(FirstName); +print '-- parse_csv()'; +print parse_csv(''); +print parse_csv(65); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print parse_csv('aaa'); +print result=parse_csv('aa,b,cc'); +print result_multi_record=parse_csv('record1,a,b,c\nrecord2,x,y,z'); +-- print result=parse_csv('aa,"b,b,b",cc,"Escaping quotes: ""Title""","line1\nline2"'); -> ["aa","b,b,b","cc","Escaping quotes: \"Title\"","line1\nline2"] +-- print parse_csv(strcat(strcat_delim(',', 'aa', '"b,b,b"', 'cc', '"Escaping quotes: ""Title"""', '"line1\nline2"'), '\r\n', strcat_delim(',', 'asd', 'qcf'))); -> ["aa","b,b,b","cc","Escaping quotes: \"Title\"","line1\nline2"] +print '-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction)'; + +Customers | project indexof('abcdefgabcdefg', 'cde', 1, 10, 2) | take 1; +print indexof('abcdefg','cde'); +print idx2 = indexof('abcdefg','cde',0,3); +print idx3 = indexof('abcdefg','cde',1,2); +print idx4 = indexof('abcdefg','cde',3,4); +print idx5 = indexof('abcdefg','cde',-5); +print idx6 = indexof(1234567,5,1,4); +print idx7 = indexof('abcdefg','cde',2,-1); +print idx8 = indexof('abcdefgabcdefg', 'cde', 1, 10, 2); +print idx9 = indexof('abcdefgabcdefg', 'cde', 1, -1, 3); +print indexof('abcdefgabcdefg','cde', -1); +print indexof('abcdefgabcdefg','cde', -4); +print indexof('abcdefgabcdefg','cde', -5); +print indexof('abcdefgabcdefg','cde', -105); + +print '-- has --'; +print 'svchost.exe1' has ''; +print 'svchost.exe1' has 'svchost.exe'; +print 'svchost.exe' has 'svchost.exe'; +print 'svchost.exe' has 'svchost.exe1'; +print '' has 'svchost.exe1'; +print '' has ''; +print '.' has ''; +print '.' has ','; +print '.' has '.'; +print '.ex.e' has 'ex'; +print '.ex.e' has 'exe'; +print ''; +StringTest | where Text has 'asdf'; +print ''; +StringTest | where Text has 'asdf.qwer'; +print ''; +StringTest | where Text has 'qwer'; + +print '-- !has --'; +StringTest | where Text !has 'asdf'; +print ''; +StringTest | where Text !has 'asdf.qwer'; + +print '-- has_all --'; +StringTest | where Text has_all ('asdf', 'qwer'); + +print '-- has_any --'; +StringTest | where Text has_any ('asdf', 'qwer'); diff --git a/tests/queries/0_stateless/02366_kql_lookup_join.reference b/tests/queries/0_stateless/02366_kql_lookup_join.reference new file mode 100644 index 000000000000..397be3efd901 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_lookup_join.reference @@ -0,0 +1,74 @@ +-- lookup 1 -- +1 Bill Gates billg +2 Bill Clinton billc +3 Bill Clinton billc +4 Steve Ballmer steveb +5 Tim Cook timc +-- lookup 2 -- +1 Bill Gates billg +2 Bill Clinton billc +3 Bill Clinton billc +4 Steve Ballmer steveb +5 Tim Cook timc +-- lookup 3 -- +1 Bill Gates billg +2 Bill Clinton billc +3 Bill Clinton billc +4 Steve Ballmer +5 Tim Cook +-- lookup 4 -- +1 Bill Gates billg +2 Bill Clinton billc +3 Bill Clinton billc +4 Steve Ballmer steveb +5 Tim Cook timc +-- lookup 5 -- +1 Bill Gates billg billg +2 Bill Clinton billc billc +3 Bill Clinton billc billc +4 Steve Ballmer steveb +5 Tim Cook timc +-- Default join -- +c 4 30 +c 4 20 +b 3 10 +-- Default join 2-- +c 4 30 +c 4 20 +b 3 10 +-- Inner-join -- +c 4 30 +c 4 20 +b 3 10 +b 2 10 +-- Left outer-join -- +c 4 30 +c 4 20 +b 3 10 +b 2 10 +a 1 0 +-- Right outer-join -- +d 0 40 +c 4 30 +c 4 20 +b 3 10 +b 2 10 +-- Full outer-join -- +d 0 40 +c 4 30 +c 4 20 +b 3 10 +b 2 10 +a 1 0 +-- Left anti-join -- +a 1 0 +-- Right anti-join -- +d 0 40 +-- Left semi-join -- +c 4 +b 3 +b 2 +-- Right semi-join -- +c 30 +c 20 +b 10 diff --git a/tests/queries/0_stateless/02366_kql_lookup_join.sql b/tests/queries/0_stateless/02366_kql_lookup_join.sql new file mode 100644 index 000000000000..11f557265f06 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_lookup_join.sql @@ -0,0 +1,78 @@ +DROP TABLE IF EXISTS FactTable; +CREATE TABLE FactTable (Row String, Personal String, Family String) ENGINE = Memory; +INSERT INTO FactTable VALUES ('1', 'Bill', 'Gates'); +INSERT INTO FactTable VALUES ('2', 'Bill', 'Clinton'); +INSERT INTO FactTable VALUES ('3', 'Bill', 'Clinton'); +INSERT INTO FactTable VALUES ('4', 'Steve', 'Ballmer'); +INSERT INTO FactTable VALUES ('5', 'Tim', 'Cook'); + +DROP TABLE IF EXISTS DimTable; +CREATE TABLE DimTable (Personal String, Family String, Alias String) ENGINE = Memory; +INSERT INTO DimTable VALUES ('Bill', 'Gates', 'billg'); +INSERT INTO DimTable VALUES ('Bill', 'Clinton', 'billc'); +INSERT INTO DimTable VALUES ('Steve', 'Ballmer', 'steveb'); +INSERT INTO DimTable VALUES ('Tim', 'Cook', 'timc'); + +-- datatable (Key:string, Value1:long) [ +-- 'a', 1, +-- 'b', 2, +-- 'b', 3, +-- 'c', 4 +-- ] + +DROP TABLE IF EXISTS X; +CREATE TABLE X (Key String, Value1 Int64) ENGINE = Memory; +INSERT INTO X VALUES ('a',1); +INSERT INTO X VALUES ('b',2); +INSERT INTO X VALUES ('b',3); +INSERT INTO X VALUES ('c',4); + +-- datatable (Key:string, Value2:long) [ +-- 'b', 10, +-- 'c', 20, +-- 'c', 30, +-- 'd', 40 +-- ] + +DROP TABLE IF EXISTS Y; +CREATE TABLE Y (Key String, Value2 Int64) ENGINE = Memory; +INSERT INTO Y VALUES ('b',10); +INSERT INTO Y VALUES ('c',20); +INSERT INTO Y VALUES ('c',30); +INSERT INTO Y VALUES ('d',40); + +set dialect='kusto'; + +print '-- lookup 1 --'; +FactTable | lookup kind=leftouter DimTable on Personal, Family | order by Row asc; +print '-- lookup 2 --'; +FactTable | lookup kind=inner DimTable on Personal, Family | order by Row asc; +print '-- lookup 3 --'; +FactTable | lookup kind=leftouter (DimTable | where Personal == 'Bill') on Personal, Family | order by Row asc; +print '-- lookup 4 --'; +FactTable | project Row, Personal , Family| lookup kind=leftouter DimTable on Personal, Family | order by Row asc; +print '-- lookup 5 --'; +FactTable |project Row, Personal , Family| lookup kind=leftouter (DimTable | where Personal == 'Bill') on Personal, Family| lookup kind=inner DimTable on Personal, Family | order by Row asc; + +print '-- Default join --'; +X | order by Key, Value1 | join ( Y | order by Key, Value2 ) on $left.Key == $right.Key | order by Key, Value1, Value2; +print '-- Default join 2--'; +X | order by Key, Value1 | join kind=innerunique ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Inner-join --'; +X | order by Key, Value1 | join kind=inner ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Left outer-join --'; +X | order by Key, Value1 | join kind=leftouter ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Right outer-join --'; +X | order by Key, Value1 | join kind=rightouter ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Full outer-join --'; +X | order by Key, Value1 | join kind=fullouter ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Left anti-join --'; +X | order by Key, Value1 | join kind=leftanti ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Right anti-join --'; +X | order by Key, Value1 | join kind=rightanti ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Left semi-join --'; +-- projecting at the end shouldn't be necessary, since Value2 shouldn't make it into the result set in the first place as per KQL specification +X | order by Key, Value1 | join kind=leftsemi ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2 | project Key, Value1; +print '-- Right semi-join --'; +-- projecting at the end shouldn't be necessary, since Value1 shouldn't make it into the result set in the first place as per KQL specification +X | order by Key, Value1 | join kind=rightsemi ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2 | project Key, Value2; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_makeseries.reference b/tests/queries/0_stateless/02366_kql_makeseries.reference new file mode 100644 index 000000000000..dc25ee4beeb1 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_makeseries.reference @@ -0,0 +1,61 @@ +-- from to +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,2,0] +Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,500,0] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +-- from +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000'] [0,2] +Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000'] [0,500] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +-- to +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Costco Apple ['2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [2,0] +Aldi Snargaluff ['2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [500,0] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +-- without from/to +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Costco Apple ['2016-09-11 00:00:00.000000000'] [2] +Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +-- without by +['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [70,334,54] +-- without aggregation alias +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +Costco Apple ['2016-09-11 00:00:00.000000000'] [2] +-- assign group alias +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +Costco Apple ['2016-09-11 00:00:00.000000000'] [2] +-- 3d step +Costco Snargaluff ['2016-09-10 00:00:00.000000000'] [134.66666666666666] +Costco Apple ['2016-09-10 00:00:00.000000000'] [2] +Aldi Snargaluff ['2016-09-10 00:00:00.000000000'] [500] +Aldi Apple ['2016-09-10 00:00:00.000000000'] [5.5] +-- numeric column +-- from to +Costco Snargaluff [10,11,12,13,14] [200,0,102,0,0] +Aldi Snargaluff [10,11,12,13,14] [0,500,0,0,0] +Aldi Apple [10,11,12,13,14] [5,0,6,0,0] +Costco Apple [10,11,12,13,14] [0,2,0,0,0] +-- from +Costco Snargaluff [10,11,12] [200,0,102] +Aldi Snargaluff [10,11] [0,500] +Aldi Apple [10,11,12] [5,0,6] +Costco Apple [10,11] [0,2] +-- to +Costco Snargaluff [8,12,16] [200,102,0] +Aldi Snargaluff [8,12,16] [500,0,0] +Aldi Apple [8,12,16] [5,6,0] +Costco Apple [8,12,16] [2,0,0] +-- without from/to +Costco Snargaluff [10,12] [200,102] +Aldi Snargaluff [10] [500] +Aldi Apple [10,12] [5,6] +Costco Apple [10] [2] +-- without by +[10,12] [202,54] +['2017-01-01 00:00:00.000000000','2017-01-02 00:00:00.000000000','2017-01-03 00:00:00.000000000','2017-01-04 00:00:00.000000000','2017-01-05 00:00:00.000000000','2017-01-06 00:00:00.000000000','2017-01-07 00:00:00.000000000','2017-01-08 00:00:00.000000000','2017-01-09 00:00:00.000000000'] [4,3,5,0,10.5,4,3,8,7] diff --git a/tests/queries/0_stateless/02366_kql_makeseries.sql b/tests/queries/0_stateless/02366_kql_makeseries.sql new file mode 100644 index 000000000000..3926d91e644d --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_makeseries.sql @@ -0,0 +1,71 @@ +-- Azure Data Explore Test Data +-- let make_series_test_table = datatable (Supplier:string, Fruit:string, Price: real, Purchase:datetime) +-- [ +-- 'Aldi','Apple',4,'2016-09-10', +-- 'Costco','Apple',2,'2016-09-11', +-- 'Aldi','Apple',6,'2016-09-10', +-- 'Costco','Snargaluff',100,'2016-09-12', +-- 'Aldi','Apple',7,'2016-09-12', +-- 'Aldi','Snargaluff',400,'2016-09-11', +-- 'Costco','Snargaluff',104,'2016-09-12', +-- 'Aldi','Apple',5,'2016-09-12', +-- 'Aldi','Snargaluff',600,'2016-09-11', +-- 'Costco','Snargaluff',200,'2016-09-10', +-- ]; +DROP TABLE IF EXISTS make_series_test_table; +CREATE TABLE make_series_test_table +( + Supplier Nullable(String), + Fruit String, + Price Float64, + Purchase Date +) ENGINE = Memory; +INSERT INTO make_series_test_table VALUES ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10'); +DROP TABLE IF EXISTS make_series_test_table2; +CREATE TABLE make_series_test_table2 +( + Supplier Nullable(String), + Fruit String , + Price Int32, + Purchase Int32 +) ENGINE = Memory; +INSERT INTO make_series_test_table2 VALUES ('Aldi','Apple',4,10),('Costco','Apple',2,11),('Aldi','Apple',6,10),('Costco','Snargaluff',100,12),('Aldi','Apple',7,12),('Aldi','Snargaluff',400,11),('Costco','Snargaluff',104,12),('Aldi','Apple',5,12),('Aldi','Snargaluff',600,11),('Costco','Snargaluff',200,10); +DROP TABLE IF EXISTS make_series_test_table3; +CREATE TABLE make_series_test_table3 +( + timestamp datetime, + metric Float64, +) ENGINE = Memory; +INSERT INTO make_series_test_table3 VALUES (parseDateTimeBestEffort('2016-12-31T06:00'), 50), (parseDateTimeBestEffort('2017-01-01'), 4), (parseDateTimeBestEffort('2017-01-02'), 3), (parseDateTimeBestEffort('2017-01-03'), 4), (parseDateTimeBestEffort('2017-01-03T03:00'), 6), (parseDateTimeBestEffort('2017-01-05'), 8), (parseDateTimeBestEffort('2017-01-05T13:40'), 13), (parseDateTimeBestEffort('2017-01-06'), 4), (parseDateTimeBestEffort('2017-01-07'), 3), (parseDateTimeBestEffort('2017-01-08'), 8), (parseDateTimeBestEffort('2017-01-08T21:00'), 8), (parseDateTimeBestEffort('2017-01-09'), 2), (parseDateTimeBestEffort('2017-01-09T12:00'), 11), (parseDateTimeBestEffort('2017-01-10T05:00'), 5); + +set dialect = 'kusto'; +print '-- from to'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '-- from'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '-- to'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '-- without from/to'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '-- without by'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d; +print '-- without aggregation alias'; +make_series_test_table | make-series avg(Price) default=0 on Purchase step 1d by Supplier, Fruit; +print '-- assign group alias'; +make_series_test_table | make-series avg(Price) default=0 on Purchase step 1d by Supplier_Name = Supplier, Fruit; +print '-- 3d step'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 3d by Supplier, Fruit | order by Supplier, Fruit; + +print '-- numeric column'; +print '-- from to'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 to 15 step 1.0 by Supplier, Fruit; +print '-- from'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 step 1.0 by Supplier, Fruit; +print '-- to'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase to 18 step 4.0 by Supplier, Fruit; +print '-- without from/to'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0 by Supplier, Fruit; +print '-- without by'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0; + +make_series_test_table3 | make-series avg(metric) default=0 on timestamp from datetime(2017-01-01) to datetime(2017-01-10) step 1d; diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.reference b/tests/queries/0_stateless/02366_kql_mvexpand.reference new file mode 100644 index 000000000000..25be070eb0b7 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_mvexpand.reference @@ -0,0 +1,65 @@ +-- mv-expand -- +-- mv_expand_test_table | mv-expand c -- +1 ['Salmon','Steak','Chicken'] 1 [5,6,7,8] +1 ['Salmon','Steak','Chicken'] 2 [5,6,7,8] +1 ['Salmon','Steak','Chicken'] 3 [5,6,7,8] +1 ['Salmon','Steak','Chicken'] 4 [5,6,7,8] +-- mv_expand_test_table | mv-expand c, d -- +1 ['Salmon','Steak','Chicken'] 1 5 +1 ['Salmon','Steak','Chicken'] 2 6 +1 ['Salmon','Steak','Chicken'] 3 7 +1 ['Salmon','Steak','Chicken'] 4 8 +-- mv_expand_test_table | mv-expand b | mv-expand c -- +1 Salmon 1 [5,6,7,8] +1 Salmon 2 [5,6,7,8] +1 Salmon 3 [5,6,7,8] +1 Salmon 4 [5,6,7,8] +1 Steak 1 [5,6,7,8] +1 Steak 2 [5,6,7,8] +1 Steak 3 [5,6,7,8] +1 Steak 4 [5,6,7,8] +1 Chicken 1 [5,6,7,8] +1 Chicken 2 [5,6,7,8] +1 Chicken 3 [5,6,7,8] +1 Chicken 4 [5,6,7,8] +-- mv_expand_test_table | mv-expand with_itemindex=index b, c, d -- +0 1 Salmon 1 5 +1 1 Steak 2 6 +2 1 Chicken 3 7 +3 1 4 8 +-- mv_expand_test_table | mv-expand array_concat(c,d) -- +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 5 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 6 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 7 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 8 +-- mv_expand_test_table | mv-expand x = c, y = d -- +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 5 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 6 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 7 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 8 +-- mv_expand_test_table | mv-expand xy = array_concat(c, d) -- +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 5 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 6 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 7 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 8 +-- mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy -- +1 1 +2 1 +-- mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool) -- +0 1 ['Salmon','Steak','Chicken'] 1 true +1 1 ['Salmon','Steak','Chicken'] 2 true +2 1 ['Salmon','Steak','Chicken'] 3 true +3 1 ['Salmon','Steak','Chicken'] 4 true +-- mv_expand_test_table | mv-expand c to typeof(bool) -- +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.sql b/tests/queries/0_stateless/02366_kql_mvexpand.sql new file mode 100644 index 000000000000..e77986096463 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_mvexpand.sql @@ -0,0 +1,35 @@ +-- datatable(a: int, b: dynamic, c: dynamic, d: dynamic) [ +-- 1, dynamic(['Salmon', 'Steak', 'Chicken']), dynamic([1, 2, 3, 4]), dynamic([5, 6, 7, 8]) +-- ] + +DROP TABLE IF EXISTS mv_expand_test_table; +CREATE TABLE mv_expand_test_table +( + a UInt8, + b Array(String), + c Array(Int8), + d Array(Int8) +) ENGINE = Memory; +INSERT INTO mv_expand_test_table VALUES (1, ['Salmon', 'Steak','Chicken'],[1,2,3,4],[5,6,7,8]); +set dialect='kusto'; +print '-- mv-expand --'; +print '-- mv_expand_test_table | mv-expand c --'; +mv_expand_test_table | mv-expand c; +print '-- mv_expand_test_table | mv-expand c, d --'; +mv_expand_test_table | mv-expand c, d; +print '-- mv_expand_test_table | mv-expand b | mv-expand c --'; +mv_expand_test_table | mv-expand b | mv-expand c; +print '-- mv_expand_test_table | mv-expand with_itemindex=index b, c, d --'; +mv_expand_test_table | mv-expand with_itemindex=index b, c, d; +print '-- mv_expand_test_table | mv-expand array_concat(c,d) --'; +mv_expand_test_table | mv-expand array_concat(c,d); +print '-- mv_expand_test_table | mv-expand x = c, y = d --'; +mv_expand_test_table | mv-expand x = c, y = d; +print '-- mv_expand_test_table | mv-expand xy = array_concat(c, d) --'; +mv_expand_test_table | mv-expand xy = array_concat(c, d); +print '-- mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy --'; +mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy; +print '-- mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool) --'; +mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool); +print '-- mv_expand_test_table | mv-expand c to typeof(bool) --'; +mv_expand_test_table | mv-expand c to typeof(bool); diff --git a/tests/queries/0_stateless/02366_kql_native_bin.reference b/tests/queries/0_stateless/02366_kql_native_bin.reference new file mode 100644 index 000000000000..901fc2c687db --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_native_bin.reference @@ -0,0 +1,33 @@ +-- kql_bin -- +-- Numbers -- +4.5 +4 +3 +4.5 +\N +nan +-- Intervals -- +0 Nullable(IntervalWeek) +2000000000 +2000000000 Nullable(IntervalNanosecond) +\N +-- DateTime64 -- +2022-11-08 12:34:56.789012300 +2022-11-08 12:34:56.789012000 +2022-11-08 12:34:56.789000000 +2022-11-08 12:34:56.000000000 +2022-11-08 12:34:56.000000000 +2022-11-08 12:34:00.000000000 +2022-11-08 12:34:00.000000000 +\N +-- Date -- +2022-11-06 00:00:00.000000000 +-- Date32 -- +2022-11-06 00:00:00.000000000 +-- DateTime -- +2022-11-08 12:34:00.000000000 +-- kql_bin_at -- +4.5 +-43200000000000 +2017-05-14 12:00:00.000000000 +2017-05-14 00:00:00.000000000 diff --git a/tests/queries/0_stateless/02366_kql_native_bin.sql b/tests/queries/0_stateless/02366_kql_native_bin.sql new file mode 100644 index 000000000000..0b82dbcd03e7 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_native_bin.sql @@ -0,0 +1,47 @@ +select '-- kql_bin --'; +select '-- Numbers --'; +select kql_bin(4.5, 1.5); +select kql_bin(4.5, 2); +select kql_bin(4, 3); +select kql_bin(5, 1.5); +select kql_bin(5, 0); +select kql_bin(4.5, 0); + +select kql_bin(5, toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select kql_bin(5, toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- Intervals --'; +select kql_bin(toIntervalWeek(1), toIntervalWeek(2)) as result, toTypeName(result); +select kql_bin(toIntervalNanosecond(2500000000), toIntervalNanosecond(1000000000)); +select kql_bin(toIntervalNanosecond(2500000000), 1) as result, toTypeName(result); +select kql_bin(toIntervalNanosecond(2500000000), toIntervalNanosecond(0)); + +select kql_bin(toIntervalWeek(2), toIntervalHour(3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select kql_bin(toIntervalWeek(2), toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- DateTime64 --'; +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(100)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(1000)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(1000000)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(1000000000)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), 1); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(60000000000)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalMinute(1)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalMinute(0)); + +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- Date --'; +select kql_bin(toDate('2022-11-07'), toIntervalNanosecond(172800000000000)); + +select '-- Date32 --'; +select kql_bin(toDate32('2022-11-07'), toIntervalNanosecond(172800000000000)); + +select '-- DateTime --'; +select kql_bin(toDateTime('2022-11-08 12:34:56', 'UTC'), toIntervalNanosecond(60000000000)); + +select '-- kql_bin_at --'; +select kql_bin_at(6.5, 2.5, 7); +select kql_bin_at(toIntervalNanosecond(3600000000000), toIntervalNanosecond(86400000000000), toIntervalNanosecond(43200000000000)); +select kql_bin_at(toDateTime64('2017-05-15 10:20:00.123', 5, 'UTC'), toIntervalNanosecond(86400000000000), toDateTime('1970-01-01 12:00:00', 'UTC')); +select kql_bin_at(toDate('2017-05-17'), toIntervalNanosecond(604800000000000), toDate32('2017-06-04')); diff --git a/tests/queries/0_stateless/02366_kql_operator_in_sql.reference b/tests/queries/0_stateless/02366_kql_operator_in_sql.reference new file mode 100644 index 000000000000..c61b2ae344aa --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_operator_in_sql.reference @@ -0,0 +1,112 @@ +-- #1 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 +-- #2 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #3 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #4 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #5 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #6 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #7 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #8 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #9 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #10 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #11 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #12 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #13 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #14 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #15 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #16 -- +1 +-- #17 -- +Peter Nara Skilled Manual Graduate Degree 26 +Apple Skilled Manual Bachelors 28 +-- #18 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- #19 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #20 -- +0 +-- #21 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +-- #22 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #23 -- +Stephanie Cox Management abcd defg Bachelors 33 +-- #24 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- #25 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- operator has, !has, has_cs, !has_cs, has_all, has_any -- +50000 +0 +20000 +20000 +0 +30000 diff --git a/tests/queries/0_stateless/02366_kql_operator_in_sql.sql b/tests/queries/0_stateless/02366_kql_operator_in_sql.sql new file mode 100644 index 000000000000..506942d8c606 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_operator_in_sql.sql @@ -0,0 +1,88 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +drop table if exists StormEventsLite; +create table StormEventsLite +( + Id UUID materialized generateUUIDv4(), + EventType String, + index EventTypeIndex EventType TYPE tokenbf_v1(512, 3, 0) GRANULARITY 1, + primary key(Id) +) engine = MergeTree; + +insert into StormEventsLite select 'iddqd strong wind iddqd' from numbers(10000); +insert into StormEventsLite select 'Strong Wind' from numbers(10000); +insert into StormEventsLite select 'strong wind' from numbers(10000); +insert into StormEventsLite select 'iddqd Strong wind iddqd' from numbers(10000); +insert into StormEventsLite select 'iddqd Strong Wind iddqd' from numbers(10000); + +-- explain indexes = 1 select count(*) from StormEventsLite where hasToken(EventType, 'strong'); + +select '-- #1 --' ; +select * from kql(Customers | where FirstName !in ('Peter', 'Latoya')); +select '-- #2 --' ; +select * from kql(Customers | where FirstName !in ("test", "test2")); +select '-- #3 --' ; +select * from kql(Customers | where FirstName !contains 'Pet'); +select '-- #4 --' ; +select * from kql(Customers | where FirstName !contains_cs 'Pet'); +select '-- #5 --' ; +select * from kql(Customers | where FirstName !endswith 'ter'); +select '-- #6 --' ; +select * from kql(Customers | where FirstName !endswith_cs 'ter'); +select '-- #7 --' ; +select * from kql(Customers | where FirstName != 'Peter'); +select '-- #8 --' ; +select * from kql(Customers | where FirstName !has 'Peter'); +select '-- #9 --' ; +select * from kql(Customers | where FirstName !has_cs 'peter'); +select '-- #10 --' ; +select * from kql(Customers | where FirstName !hasprefix 'Peter'); +select '-- #11 --' ; +select * from kql(Customers | where FirstName !hasprefix_cs 'Peter'); +select '-- #12 --' ; +select * from kql(Customers | where FirstName !hassuffix 'Peter'); +select '-- #13 --' ; +select * from kql(Customers | where FirstName !hassuffix_cs 'Peter'); +select '-- #14 --' ; +select * from kql(Customers | where FirstName !startswith 'Peter'); +select '-- #15 --' ; +select * from kql(Customers | where FirstName !startswith_cs 'Peter'); +select '-- #16 --' ; +select * from kql(print t = 'a' in~ ('A', 'b', 'c')); +select '-- #17 --' ; +select * from kql(Customers | where FirstName in~ ('peter', 'apple')); +select '-- #18 --' ; +select * from kql(Customers | where FirstName in~ ((Customers | project FirstName | where FirstName == 'Peter'))); +select '-- #19 --' ; +select * from kql(Customers | where FirstName in~ ((Customers | project FirstName | where Age < 30))); +select '-- #20 --' ; +select * from kql(print t = 'a' !in~ ('A', 'b', 'c')); +select '-- #21 --' ; +select * from kql(Customers | where FirstName !in~ ('peter', 'apple')); +select '-- #22 --' ; +select * from kql(Customers | where FirstName !in~ ((Customers | project FirstName | where FirstName == 'Peter'))); +select '-- #23 --' ; +select * from kql(Customers | where FirstName !in~ ((Customers | project FirstName | where Age < 30))); +select '-- #24 --' ; +select * from kql(Customers | where FirstName =~ 'peter' and LastName =~ 'naRA'); +select '-- #25 --' ; +select * from kql(Customers | where FirstName !~ 'nEyMaR' and LastName =~ 'naRA'); +select '-- operator has, !has, has_cs, !has_cs, has_all, has_any --'; +select * from kql(StormEventsLite | where EventType has 'strong' | count); +select * from kql(StormEventsLite | where EventType !has 'strong wind' | count); +select * from kql(StormEventsLite | where EventType has_cs 'Strong Wind' | count); +select * from kql(StormEventsLite | where EventType !has_cs 'iddqd' | count); +select * from kql(StormEventsLite | where EventType has_all ('iddqd', 'string') | count); +select * from kql(StormEventsLite | where EventType has_any ('iddqd', 'string') | count); +DROP TABLE IF EXISTS Customers; +drop table if exists StormEventsLite; diff --git a/tests/queries/0_stateless/02366_kql_projectaway.reference b/tests/queries/0_stateless/02366_kql_projectaway.reference new file mode 100644 index 000000000000..068c02ef2fba --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_projectaway.reference @@ -0,0 +1,29 @@ +1-- remove one column +Diaz Skilled Manual Bachelors 28 + +2-- remove two columns +Skilled Manual Bachelors 28 + +3-- remove columns by one wildcard +Skilled Manual Bachelors 28 + +4-- remove columns by two wildcards +28 + +5-- remove columns by one wildcard, one regular column +Skilled Manual Bachelors + +6-- remove columns by one wildcard, two regular column +Skilled Manual + +7-- remove columns by two wildcard, two regular column +Skilled Manual + +8-- remove one column from previous piple result +Diaz Skilled Manual Bachelors 28 + +9-- remove one column from summized piple result +Theodore 28 + +10-- remove columns after extend +Skilled Manual Bachelors 28 Theodore Diaz diff --git a/tests/queries/0_stateless/02366_kql_projectaway.sql b/tests/queries/0_stateless/02366_kql_projectaway.sql new file mode 100644 index 000000000000..e73079c8724c --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_projectaway.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28); + +set dialect = 'kusto'; +print '1-- remove one column'; +Customers | project-away FirstName; +print ''; +print '2-- remove two columns'; +Customers | project-away FirstName, LastName; +print ''; +print '3-- remove columns by one wildcard'; +Customers | project-away *Name; +print ''; +print '4-- remove columns by two wildcards'; +Customers | project-away *Name, *tion; +print ''; +print '5-- remove columns by one wildcard, one regular column'; +Customers | project-away *Name, Age; +print ''; +print '6-- remove columns by one wildcard, two regular column'; +Customers | project-away *Name, Age, Education; +print ''; +print '7-- remove columns by two wildcard, two regular column'; +Customers | project-away *irstName, Age, *astName, Education; +print ''; +print '8-- remove one column from previous piple result'; +Customers | where Age< 30 | limit 2 | project-away FirstName; +print ''; +print '9-- remove one column from summized piple result'; +Customers|summarize sum(Age), avg(Age) by FirstName | project-away sum_Age; +print ''; +print '10-- remove columns after extend'; +Customers|extend FullName = strcat(FirstName,' ',LastName) | project-away FirstName, LastName; diff --git a/tests/queries/0_stateless/02366_kql_range.reference b/tests/queries/0_stateless/02366_kql_range.reference new file mode 100644 index 000000000000..e31e9fcc5c71 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_range.reference @@ -0,0 +1,56 @@ +-- range function int, int, int -- +[1,3,5,7,9] +-- range function int, int -- +[1,2,3,4,5,6,7,8,9,10] +-- range function float, float, float -- +[1.2,3.4000000000000004,5.6000000000000005,7.800000000000001,10] +-- range function postive float, float, int -- +[1.2,3.2,5.2,7.2,9.2] +-- range function postive float, int, float -- +[1.2,3.4000000000000004,5.6000000000000005,7.800000000000001,10] +-- range function postive integer, int, float -- +[1,3.2,5.4,7.6000000000000005,9.8] +-- range function postive intger, float, float -- +[1,3.2,5.4,7.6000000000000005,9.8] +-- range function postive float, int, int -- +[1.2,3.2,5.2,7.2,9.2] +-- range function postive int, int, negative int -- +[12,10,8,6,4] +-- range function postive float, int, negative float -- +[12.8,10.5,8.2,5.8999999999999995,3.5999999999999996] +-- range function datetime, datetime, timespan -- +['2001-01-01 00:00:00.000000000','2001-01-01 05:00:00.000000000','2001-01-01 10:00:00.000000000','2001-01-01 15:00:00.000000000','2001-01-01 20:00:00.000000000'] +-- range function datetime, datetime, negative timespan -- +['2001-01-03 00:00:00.000000000','2001-01-02 19:00:00.000000000','2001-01-02 14:00:00.000000000','2001-01-02 09:00:00.000000000','2001-01-02 04:00:00.000000000'] +-- range function datetime, datetime -- +['2001-01-01 00:00:00.000000000','2001-01-01 01:00:00.000000000','2001-01-01 02:00:00.000000000','2001-01-01 03:00:00.000000000','2001-01-01 04:00:00.000000000','2001-01-01 05:00:00.000000000','2001-01-01 06:00:00.000000000','2001-01-01 07:00:00.000000000','2001-01-01 08:00:00.000000000','2001-01-01 09:00:00.000000000','2001-01-01 10:00:00.000000000','2001-01-01 11:00:00.000000000','2001-01-01 12:00:00.000000000','2001-01-01 13:00:00.000000000','2001-01-01 14:00:00.000000000','2001-01-01 15:00:00.000000000','2001-01-01 16:00:00.000000000','2001-01-01 17:00:00.000000000','2001-01-01 18:00:00.000000000','2001-01-01 19:00:00.000000000','2001-01-01 20:00:00.000000000','2001-01-01 21:00:00.000000000','2001-01-01 22:00:00.000000000','2001-01-01 23:00:00.000000000','2001-01-02 00:00:00.000000000'] +-- range function timespan, timespan, timespan -- +[3600000000000,10800000000000,18000000000000] +-- range function timespan, timespan -- +[3600000000000,7200000000000,10800000000000,14400000000000,18000000000000] +-- range function timespan, timespan, negative timespan -- +[39600000000000,32400000000000,25200000000000,18000000000000] +-- range function float timespan, timespan, timespan -- +[5400000000000,12600000000000] +-- range function endofday, endofday, timespan -- +['2017-01-01 23:59:59.999999900','2017-01-02 23:59:59.999999900','2017-01-03 23:59:59.999999900'] +-- range orerator int, int, int -- +20 +21 +22 +23 +24 +25 +-- range orerator float, float, float -- +20.5 +22 +23.5 +25 +-- range orerator datetime, datetime, timespan -- +2023-01-01 00:00:00.000000000 +2023-01-02 00:00:00.000000000 +2023-01-03 00:00:00.000000000 +2023-01-04 00:00:00.000000000 +2023-01-05 00:00:00.000000000 +2023-01-06 00:00:00.000000000 +2023-01-07 00:00:00.000000000 diff --git a/tests/queries/0_stateless/02366_kql_range.sql b/tests/queries/0_stateless/02366_kql_range.sql new file mode 100644 index 000000000000..109db6395020 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_range.sql @@ -0,0 +1,64 @@ +set dialect = 'kusto'; + +print '-- range function int, int, int --'; +print range(1, 10, 2); + +print '-- range function int, int --'; +print range(1, 10); + +print '-- range function float, float, float --'; +print range(1.2, 10.3, 2.2); + +print '-- range function postive float, float, int --'; +print range(1.2, 10.3, 2); + +print '-- range function postive float, int, float --'; +print range(1.2, 10, 2.2); + +print '-- range function postive integer, int, float --'; +print range(1, 10, 2.2); + +print '-- range function postive intger, float, float --'; +print range(1, 10.5, 2.2); + +print '-- range function postive float, int, int --'; +print range(1.2, 10, 2); + +print '-- range function postive int, int, negative int --'; +print range(12, 3, -2); + +print '-- range function postive float, int, negative float --'; +print range(12.8, 3, -2.3); + +print '-- range function datetime, datetime, timespan --'; +print range(datetime('2001-01-01'), datetime('2001-01-02'), 5h); + +print '-- range function datetime, datetime, negative timespan --'; +print range(datetime('2001-01-03'), datetime('2001-01-02'), -5h); + +print '-- range function datetime, datetime --'; +print range(datetime('2001-01-01'), datetime('2001-01-02')); + +print '-- range function timespan, timespan, timespan --'; +print range(1h, 5h, 2h); + +print '-- range function timespan, timespan --'; +print range(1h, 5h); + +print '-- range function timespan, timespan, negative timespan --'; +print range(11h, 5h, -2h); + +print '-- range function float timespan, timespan, timespan --'; +print range(1.5h, 5h, 2h); + +print '-- range function endofday, endofday, timespan --'; +print range(endofday(datetime(2017-01-01 10:10:17)), endofday(datetime(2017-01-03 10:10:17)), 1d); + +print '-- range orerator int, int, int --'; +range Age from 20 to 25 step 1; + +print '-- range orerator float, float, float --'; +range temp from 20.5 to 25.5 step 1.5; + +print '-- range orerator datetime, datetime, timespan --'; +range FirstWeek from datetime('2023-01-01') to datetime('2023-01-07') step 1d; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_subquery.reference b/tests/queries/0_stateless/02366_kql_subquery.reference new file mode 100644 index 000000000000..51fd985276b8 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_subquery.reference @@ -0,0 +1,5 @@ +-- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 diff --git a/tests/queries/0_stateless/02366_kql_subquery.sql b/tests/queries/0_stateless/02366_kql_subquery.sql new file mode 100644 index 000000000000..68aa58de26c6 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_subquery.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect='kusto'; + +print '-- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet'; +Customers | where Age in ((Customers|project Age|where Age < 30)) | order by LastName; + diff --git a/tests/queries/0_stateless/02366_kql_summarize.reference b/tests/queries/0_stateless/02366_kql_summarize.reference new file mode 100644 index 000000000000..0b6ca779381e --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_summarize.reference @@ -0,0 +1,126 @@ +-- test summarize -- +12 25 46 32.416666666666664 389 +Skilled Manual 5 26 36 30.2 151 +Professional 6 25 46 34.166666666666664 205 +Management abcd defg 1 33 33 33 33 +Skilled Manual 0 +Professional 2 +Management abcd defg 0 +Skilled Manual 36 +Professional 38 +Management abcd defg 33 +Skilled Manual 26 +Professional 25 +Management abcd defg 33 +Skilled Manual 30.2 +Professional 29.25 +Management abcd defg 33 +Skilled Manual 151 +Professional 117 +Management abcd defg 33 +4 +4 +2 +6 +40 2 +30 4 +20 6 +Skilled Manual 5 +Professional 6 +Management abcd defg 1 +Cameron Rodriguez 28 +Christine Nara 33 +Latoya Shen 25 +Joshua Lee 26 +Dalton Wood 42 +Peter Nara 26 +Edward Hernandez 36 +\N why 38 +Apple 28 +Stephanie Cox 33 +Angel Stewart 46 +Theodore Diaz 28 +-- make_list() -- +Skilled Manual ['Bachelors','Graduate Degree','High School','Partial College','Bachelors'] +Professional ['Graduate Degree','Partial College','Partial College','Partial College','Partial College','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Bachelors','Graduate Degree'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +-- make_list_if() -- +Skilled Manual ['Edward','Christine'] +Professional ['Dalton','Angel'] +Management abcd defg ['Stephanie'] +Skilled Manual ['Edward'] +Professional ['Dalton'] +Management abcd defg ['Stephanie'] +-- make_set() -- +Skilled Manual ['Graduate Degree','High School','Partial College','Bachelors'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Graduate Degree','Bachelors'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +-- make_set_if() -- +Skilled Manual ['Partial College','High School'] +Professional ['Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['High School'] +Professional ['Partial College'] +Management abcd defg ['Bachelors'] +-- stdev() -- +6.855102059227432 +-- stdevif() -- +7.557189365836421 +-- binary_all_and -- +42 +-- binary_all_or -- +46 +-- binary_all_xor -- +4 +43.8 +[25.549999999999997,30.5,43.8] +30.5 +35 +[25,35,45] +-- Summarize following sort -- +Skilled Manual 5 +Professional 6 +Management abcd defg 1 +-- summarize with bin -- +0 1 +245000 2 +1970-01-01 00:00:00.000000000 1 +1970-01-01 00:04:05.000000000 2 +0 1 +245000000000 2 +2015-10-12 00:00:00.000000000 +2016-10-12 00:00:00.000000000 +-- make_list_with_nulls -- +['Theodore','Stephanie','Peter','Latoya','Joshua','Edward','Dalton','Christine','Cameron','Angel','Apple',NULL] +Skilled Manual ['Theodore','Peter','Edward','Christine','Apple'] +Professional ['Latoya','Joshua','Dalton','Cameron','Angel',NULL] +Management abcd defg ['Stephanie'] +Skilled Manual ['Theodore','Peter','Edward','Christine','Apple'] [28,26,36,33,28] +Professional ['Latoya','Joshua','Dalton','Cameron','Angel',NULL] [25,26,42,28,46,38] +Management abcd defg ['Stephanie'] [33] +-- count_distinct -- +4 +-- count_distinctif -- +3 +-- format_datetime -- +70-01-01 1 +70-01-03 2 +-- take_any -- +Theodore +Theodore Diaz +Cameron Rodriguez Cameron Rodriguez +Christine Nara Christine Nara +TheodoreDiaz +-- take_anyif -- +Theodore +Theodore 11 +-- variance/variancep/varianceif -- +46.992424242424185 +43.076388888888836 +1.766666666666606 diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql new file mode 100644 index 000000000000..750968decd2b --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_summarize.sql @@ -0,0 +1,133 @@ +-- datatable(FirstName:string, LastName:string, Occupation:string, Education:string, Age:int) [ +-- 'Theodore', 'Diaz', 'Skilled Manual', 'Bachelors', 28, +-- 'Stephanie', 'Cox', 'Management abcd defg', 'Bachelors', 33, +-- 'Peter', 'Nara', 'Skilled Manual', 'Graduate Degree', 26, +-- 'Latoya', 'Shen', 'Professional', 'Graduate Degree', 25, +-- 'Joshua', 'Lee', 'Professional', 'Partial College', 26, +-- 'Edward', 'Hernandez', 'Skilled Manual', 'High School', 36, +-- 'Dalton', 'Wood', 'Professional', 'Partial College', 42, +-- 'Christine', 'Nara', 'Skilled Manual', 'Partial College', 33, +-- 'Cameron', 'Rodriguez', 'Professional', 'Partial College', 28, +-- 'Angel', 'Stewart', 'Professional', 'Partial College', 46, +-- 'Apple', '', 'Skilled Manual', 'Bachelors', 28, +-- dynamic(null), 'why', 'Professional', 'Partial College', 38 +-- ] + +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Joshua','Lee','Professional','Partial College',26),('Edward','Hernandez','Skilled Manual','High School',36),('Dalton','Wood','Professional','Partial College',42),('Christine','Nara','Skilled Manual','Partial College',33),('Cameron','Rodriguez','Professional','Partial College',28),('Angel','Stewart','Professional','Partial College',46),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +-- datatable (LogEntry:string, Created:long) [ +-- 'Darth Vader has entered the room.', 546, +-- 'Rambo is suspciously looking at Darth Vader.', 245234, +-- 'Darth Sidious electrocutes both using Force Lightning.', 245554 +-- ] + +drop table if exists EventLog; +create table EventLog +( + LogEntry String, + Created Int64 +) ENGINE = Memory; + +insert into EventLog values ('Darth Vader has entered the room.', 546), ('Rambo is suspciously looking at Darth Vader.', 245234), ('Darth Sidious electrocutes both using Force Lightning.', 245554); + +drop table if exists Dates; +create table Dates +( + EventTime DateTime('UTC'), +) ENGINE = Memory; + +insert into Dates values ('2015-10-12'), ('2016-10-12'); + +select '-- test summarize --'; +set dialect='kusto'; +Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age); +Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age) by Occupation; +Customers | summarize countif(Age>40) by Occupation; +Customers | summarize MyMax = maxif(Age, Age<40) by Occupation; +Customers | summarize MyMin = minif(Age, Age<40) by Occupation; +Customers | summarize MyAvg = avgif(Age, Age<40) by Occupation; +Customers | summarize MySum = sumif(Age, Age<40) by Occupation; +Customers | summarize dcount(Education); +Customers | summarize dcount(Education, 2); +Customers | summarize dcountif(Education, Occupation=='Professional'); +Customers | summarize dcountif(Education, Occupation=='Professional', 2); +Customers | summarize count_ = count() by bin(Age, 10) | order by count_ asc; +Customers | summarize job_count = count() by Occupation | where job_count > 0; +Customers | summarize 'Edu Count'=count() by Education | sort by 'Edu Count' desc; -- { clientError 62 } +Customers | summarize by FirstName, LastName, Age; + +print '-- make_list() --'; +Customers | summarize f_list = make_list(Education) by Occupation; +Customers | summarize f_list = make_list(Education, 2) by Occupation; +print '-- make_list_if() --'; +Customers | summarize f_list = make_list_if(FirstName, Age>30) by Occupation; +Customers | summarize f_list = make_list_if(FirstName, Age>30, 1) by Occupation; +print '-- make_set() --'; +Customers | summarize f_list = make_set(Education) by Occupation; +Customers | summarize f_list = make_set(Education, 2) by Occupation; +print '-- make_set_if() --'; +Customers | summarize f_list = make_set_if(Education, Age>30) by Occupation; +Customers | summarize f_list = make_set_if(Education, Age>30, 1) by Occupation; +print '-- stdev() --'; +Customers | project Age | summarize stdev(Age); +print '-- stdevif() --'; +Customers | project Age | summarize stdevif(Age, Age%2==0); +print '-- binary_all_and --'; +Customers | project Age | where Age > 40 | summarize binary_all_and(Age); +print '-- binary_all_or --'; +Customers | project Age | where Age > 40 | summarize binary_all_or(Age); +print '-- binary_all_xor --'; +Customers | project Age | where Age > 40 | summarize binary_all_xor(Age); + +Customers | project Age | summarize percentile(Age, 95); +Customers | project Age | summarize percentiles(Age, 5, 50, 95); +Customers | project Age | summarize percentiles(Age, 5, 50, 95)[1]; +Customers | summarize w=count() by AgeBucket=bin(Age, 5) | summarize percentilew(AgeBucket, w, 75); +Customers | summarize w=count() by AgeBucket=bin(Age, 5) | summarize percentilesw(AgeBucket, w, 50, 75, 99.9); + +print '-- Summarize following sort --'; +Customers | sort by FirstName | summarize count() by Occupation; + +print '-- summarize with bin --'; +EventLog | summarize count=count() by bin(Created, 1000); +EventLog | summarize count=count() by bin(unixtime_seconds_todatetime(Created/1000), 1s); +EventLog | summarize count=count() by time_label=bin(Created / 1000 * 1s, 1s); +Dates | project bin(EventTime, 1m); +print '-- make_list_with_nulls --'; +Customers | summarize t = make_list_with_nulls(FirstName); +Customers | summarize f_list = make_list_with_nulls(FirstName) by Occupation; +Customers | summarize f_list = make_list_with_nulls(FirstName), a_list = make_list_with_nulls(Age) by Occupation; +print '-- count_distinct --'; +Customers | summarize count_distinct(Education); +print '-- count_distinctif --'; +Customers | summarize count_distinctif(Education, Age > 30); + +print '-- format_datetime --'; +EventLog | summarize count() by dt = format_datetime(bin(unixtime_seconds_todatetime(Created), 1d), 'yy-MM-dd') | order by dt asc; + +print '-- take_any --'; +Customers | summarize take_any(FirstName); +Customers | summarize take_any(FirstName), take_any(LastName); +Customers | where FirstName startswith 'C' | summarize take_any(FirstName, LastName) by FirstName, LastName; +Customers | summarize take_any(strcat(FirstName,LastName)); +print '-- take_anyif --'; +Customers | summarize take_anyif(FirstName, LastName has 'Diaz'); +Customers | summarize take_anyif(FirstName, LastName has 'Diaz'), dcount(FirstName); + +print '-- variance/variancep/varianceif --'; +Customers | summarize variance(Age); +Customers | summarize variancep(Age); +Customers | summarize varianceif(Age, Age < 30) +-- TODO: +-- arg_max() +-- arg_min() diff --git a/tests/queries/0_stateless/02366_kql_tabular.reference b/tests/queries/0_stateless/02366_kql_tabular.reference new file mode 100644 index 000000000000..e70c02ce34fa --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_tabular.reference @@ -0,0 +1,139 @@ +-- test Query only has table name: -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Joshua Lee Professional Partial College 26 +Edward Hernandez Skilled Manual High School 36 +Dalton Wood Professional Partial College 42 +Christine Nara Skilled Manual Partial College 33 +Cameron Rodriguez Professional Partial College 28 +Angel Stewart Professional Partial College 46 +-- Query has Column Selection -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +Edward Hernandez Skilled Manual +Dalton Wood Professional +Christine Nara Skilled Manual +Cameron Rodriguez Professional +Angel Stewart Professional +-- Query has limit -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +-- Query has second limit with bigger value -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +-- Query has second limit with smaller value -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +-- Query has second Column selection -- +Theodore Diaz +Stephanie Cox +Peter Nara +-- Query has second Column selection with extra column -- +-- Query with desc sort -- +Theodore +Stephanie +Peter +Latoya +Joshua +Skilled Manual +Skilled Manual +Professional +Professional +Management +-- Query with asc sort -- +Management +Professional +Professional +Skilled Manual +Skilled Manual +-- Query with sort (without keyword asc desc) -- +Theodore +Stephanie +Peter +Latoya +Joshua +Skilled Manual +Skilled Manual +Professional +Professional +Management +-- Query with sort 2 Columns with different direction -- +Stephanie Cox Management +Latoya Shen Professional +Joshua Lee Professional +Peter Nara Skilled Manual +Theodore Diaz Skilled Manual +-- Query with second sort -- +Stephanie Cox Management +Latoya Shen Professional +Joshua Lee Professional +Peter Nara Skilled Manual +Theodore Diaz Skilled Manual +-- Test String Equals (==) -- +Theodore Diaz Skilled Manual +Peter Nara Skilled Manual +Edward Hernandez Skilled Manual +Christine Nara Skilled Manual +-- Test String Not equals (!=) -- +Stephanie Cox Management +Latoya Shen Professional +Joshua Lee Professional +Dalton Wood Professional +Cameron Rodriguez Professional +Angel Stewart Professional +-- Test Filter using a list (in) -- +Theodore Diaz Skilled Manual Bachelors +Stephanie Cox Management Bachelors +Edward Hernandez Skilled Manual High School +-- Test Filter using a list (!in) -- +Peter Nara Skilled Manual Graduate Degree +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Christine Nara Skilled Manual Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (contains_cs) -- +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Christine Nara Skilled Manual Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (startswith_cs) -- +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (endswith_cs) -- +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +-- Test Filter using numerical equal (==) -- +Peter Nara Skilled Manual Graduate Degree 26 +Joshua Lee Professional Partial College 26 +-- Test Filter using numerical great and less (> , <) -- +Stephanie Cox Management Bachelors 33 +Edward Hernandez Skilled Manual High School 36 +Christine Nara Skilled Manual Partial College 33 +-- Test Filter using multi where -- +Dalton Wood Professional Partial College 42 +Angel Stewart Professional Partial College 46 +-- Complex query with unknown function -- +-- Missing column in front of startsWith -- diff --git a/tests/queries/0_stateless/02366_kql_tabular.sql b/tests/queries/0_stateless/02366_kql_tabular.sql new file mode 100644 index 000000000000..f73c4c09ccaa --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_tabular.sql @@ -0,0 +1,88 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management','Bachelors',33), ('Peter','Nara','Skilled Manual','Graduate Degree',26), ('Latoya','Shen','Professional','Graduate Degree',25), ('Joshua','Lee','Professional','Partial College',26), ('Edward','Hernandez','Skilled Manual','High School',36), ('Dalton','Wood','Professional','Partial College',42), ('Christine','Nara','Skilled Manual','Partial College',33), ('Cameron','Rodriguez','Professional','Partial College',28), ('Angel','Stewart','Professional','Partial College',46); + +set dialect='kusto'; +print '-- test Query only has table name: --'; +Customers; + +print '-- Query has Column Selection --'; +Customers | project FirstName,LastName,Occupation; + +print '-- Query has limit --'; +Customers | project FirstName,LastName,Occupation | take 5; +Customers | project FirstName,LastName,Occupation | limit 5; + +print '-- Query has second limit with bigger value --'; +Customers | project FirstName,LastName,Occupation | take 5 | take 7; + +print '-- Query has second limit with smaller value --'; +Customers | project FirstName,LastName,Occupation | take 5 | take 3; + +print '-- Query has second Column selection --'; +Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName; + +print '-- Query has second Column selection with extra column --'; +Customers| project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education;-- { serverError 47 } + +print '-- Query with desc sort --'; +Customers | project FirstName | take 5 | sort by FirstName desc; +Customers | project Occupation | take 5 | order by Occupation desc; + +print '-- Query with asc sort --'; +Customers | project Occupation | take 5 | sort by Occupation asc; + +print '-- Query with sort (without keyword asc desc) --'; +Customers | project FirstName | take 5 | sort by FirstName; +Customers | project Occupation | take 5 | order by Occupation; + +print '-- Query with sort 2 Columns with different direction --'; +Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation asc, LastName desc; + +print '-- Query with second sort --'; +Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation desc |sort by Occupation asc, LastName desc; + +print '-- Test String Equals (==) --'; +Customers | project FirstName,LastName,Occupation | where Occupation == 'Skilled Manual'; + +print '-- Test String Not equals (!=) --'; +Customers | project FirstName,LastName,Occupation | where Occupation != 'Skilled Manual'; + +print '-- Test Filter using a list (in) --'; +Customers | project FirstName,LastName,Occupation,Education | where Education in ('Bachelors','High School'); + +print '-- Test Filter using a list (!in) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education | where Education !in ('Bachelors','High School'); + +print '-- Test Filter using common string operations (contains_cs) --'; +Customers | project FirstName,LastName,Occupation,Education | where Education contains_cs 'Coll'; + +print '-- Test Filter using common string operations (startswith_cs) --'; +Customers | project FirstName,LastName,Occupation,Education | where Occupation startswith_cs 'Prof'; + +print '-- Test Filter using common string operations (endswith_cs) --'; +Customers | project FirstName,LastName,Occupation,Education | where FirstName endswith_cs 'a'; + +print '-- Test Filter using numerical equal (==) --'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age == 26; + +print '-- Test Filter using numerical great and less (> , <) --'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 and Age < 40; + +print '-- Test Filter using multi where --'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 | where Occupation == 'Professional'; + +print '-- Complex query with unknown function --'; +hits | where CounterID == 62 and EventDate >= '2013-07-14' and EventDate <= '2013-07-15' and IsRefresh == 0 and DontCountHits == 0 | summarize count() by d=bin(poopoo(EventTime), 1m) | order by d | limit 10; -- { clientError UNKNOWN_FUNCTION } + +print '-- Missing column in front of startsWith --'; +StormEvents | where startswith "W" | summarize Count=count() by State; -- { clientError SYNTAX_ERROR } diff --git a/tests/queries/0_stateless/02366_kql_test_subquery.reference b/tests/queries/0_stateless/02366_kql_test_subquery.reference new file mode 100644 index 000000000000..492c5ca3d595 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_test_subquery.reference @@ -0,0 +1,84 @@ +-- test negetivate operator in kql subuquery -- +-- #1 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #2 -- +Peter Cox Management abcd defg Bachelors 33 +-- #3 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +-- #4 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +-- #5 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +-- #6 -- +Peter Cox Management abcd defg Bachelors 33 +-- #7 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +Peter Cox Management abcd defg Bachelors 33 +-- #8 -- +Peter Cox Management abcd defg Bachelors 33 +Peter Cox Management abcd defg Bachelors 33 +-- #9 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #10 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #11 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +-- #12 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #13 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #14 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +-- #15 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #16 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #17 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +-- #18 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #19 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #20 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 + +-- test case-insensitive in operator kql subuquery -- +-- #21 -- +Peter Cox Management abcd defg Bachelors 33 +-- #22 -- +Peter Cox Management abcd defg Bachelors 33 +-- #23 -- +Peter Cox Management abcd defg Bachelors 33 +-- #24 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #25 -- +Peter Cox Management abcd defg Bachelors 33 +-- #26 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #27 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #28 -- +Peter Cox Management abcd defg Bachelors 33 +-- #29 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #30 -- +Theodore Diaz Skilled Manual Bachelors 28 + +-- test multi columns in operator kql subuquery -- +-- #32 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +-- #33 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #34 -- +Peter Cox Management abcd defg Bachelors 33 +-- #35 -- +Peter Cox Management abcd defg Bachelors 33 diff --git a/tests/queries/0_stateless/02366_kql_test_subquery.sql b/tests/queries/0_stateless/02366_kql_test_subquery.sql new file mode 100644 index 000000000000..9976d22d4cd7 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_test_subquery.sql @@ -0,0 +1,88 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Peter','Cox','Management abcd defg','Bachelors',33); + +set dialect = 'kusto'; +print '-- test negetivate operator in kql subuquery --' ; +print '-- #1 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !in ('Peter', 'Latoya'))); +print '-- #2 --' ; +Customers | where FirstName in ((Customers | project FirstName, Age | where Age !in (28, 29))); +print '-- #3 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !contains 'ste')); +print '-- #4 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !contains_cs 'Ste')); +print '-- #5 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !contains_cs 'ste')); +print '-- #6 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !endswith 'ore')); +print '-- #7 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !endswith_cs 'Ore')); +Customers | where FirstName in ((Customers | project FirstName | where FirstName !endswith_cs 'ore')); +print '-- #8 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName != 'Theodore')); +Customers | where FirstName in ((Customers | project FirstName | where FirstName !~ 'theodore')); +print '-- #9 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !has 'Peter')); +print '-- #10 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !has_cs 'Peter')); +print '-- #11 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !has_cs 'peter')); +print '-- #12 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !hasprefix 'Peter')); +print '-- #13 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !hasprefix_cs 'Peter')); +print '-- #14 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !hasprefix_cs 'peter')); +print '-- #15 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !hassuffix 'Peter')); +print '-- #16 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !hassuffix_cs 'Peter')); +print '-- #17 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !hassuffix_cs 'peter')); +print '-- #18 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !startswith 'Peter')); +print '-- #19 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !startswith_cs 'Peter')); +print '-- #20 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !startswith_cs 'peter')); +print ''; +print '-- test case-insensitive in operator kql subuquery --' ; +print '-- #21 --' ; +Customers | where FirstName !in~ ((Customers | project FirstName | where FirstName !in~ ('peter', 'apple'))); +print '-- #22 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName in~ ('peter', 'apple'))); +print '-- #23 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName in~ ((Customers | project FirstName | where FirstName == 'Peter')))); +print '-- #24 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName in~ ((Customers | project FirstName, Age | where Age < 30)))); +print '-- #25 --' ; +Customers | where substring(FirstName,0,3) in~ ((Customers | project substring(FirstName,0,3) | where FirstName in~ ('peter', 'apple'))); +print '-- #26 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !in~ ('peter', 'apple'))); +print '-- #27 --' ; +Customers | where FirstName in ((Customers |where Age <30 | project FirstName | where FirstName !in~ ((Customers | project FirstName | where FirstName =~ 'peter')))); +print '-- #28 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !in~ ((Customers | project FirstName, Age | where Age < 30)))); +print '-- #29 --' ; +Customers | where FirstName in~ ((Customers | project FirstName | where FirstName !in~ ('peter', 'apple'))); +print '-- #30 --' ; +Customers | where FirstName in~ ((Customers | where FirstName !in~ ('peter', 'apple')| project FirstName)); +print ''; +print '-- test multi columns in operator kql subuquery --' ; +print '-- #32 --' ; +Customers | where FirstName in ((Customers | project FirstName, LastName, Age)); +print '-- #33 --' ; +Customers | where FirstName in~ ((Customers | project FirstName, LastName, Age|where Age <30)); +print '-- #34 --' ; +Customers | where FirstName !in ((Customers | project FirstName, LastName, Age |where Age <30 )); +print '-- #35 --' ; +Customers | where FirstName !in~ ((Customers | project FirstName, LastName, Age |where Age <30)); \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_top_hitters.reference b/tests/queries/0_stateless/02366_kql_top_hitters.reference new file mode 100644 index 000000000000..72e1e27c88b1 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_top_hitters.reference @@ -0,0 +1,38 @@ +--top 1-- +Angel Stewart Professional Partial College 46 100 +Dalton Wood Professional Partial College 42 70 +\N why Professional Partial College 38 120 +--top 2-- +Angel Stewart Professional Partial College 46 100 +Dalton Wood Professional Partial College 42 70 +\N why Professional Partial College 38 120 +--top 3-- +Peter Nara Skilled Manual Graduate Degree 26 30 +Latoya Shen Professional Graduate Degree 25 40 +Joshua Lee Professional Partial College 26 50 +--top 4-- +\N why Professional Partial College 38 120 +Theodore Diaz Skilled Manual Bachelors 28 10 +Stephanie Cox Management Bachelors 31 20 +--top 5-- +Theodore Diaz Skilled Manual Bachelors 28 10 +Stephanie Cox Management Bachelors 31 20 +Peter Nara Skilled Manual Graduate Degree 26 30 +--top 6-- +Dalton Wood Professional Partial College 42 70 +Angel Stewart Professional Partial College 46 100 +--top hitters 1-- +28 210 +38 120 +--top hitters 2-- +28 3 +26 2 +--top hitters 3-- +38 1 +28 1 +--top hitters 4-- +38 120 +--top hitters 5-- +38 120 +--top hitters 6-- +28 3 diff --git a/tests/queries/0_stateless/02366_kql_top_hitters.sql b/tests/queries/0_stateless/02366_kql_top_hitters.sql new file mode 100644 index 000000000000..60ac6a537f6f --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_top_hitters.sql @@ -0,0 +1,39 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8), + extra Int16 +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28,10),('Stephanie','Cox','Management','Bachelors',31,20),('Peter','Nara','Skilled Manual','Graduate Degree',26,30),('Latoya','Shen','Professional','Graduate Degree',25,40),('Joshua','Lee','Professional','Partial College',26,50),('Edward','Hernandez','Skilled Manual','High School',36,60),('Dalton','Wood','Professional','Partial College',42,70),('Christine','Nara','Skilled Manual','Partial College',33,80),('Cameron','Rodriguez','Professional','Partial College',28,90),('Angel','Stewart','Professional','Partial College',46,100),('Apple','B','Skilled Manual','Bachelors',28,110),(NULL,'why','Professional','Partial College',38,120); + +set dialect = 'kusto'; +print '--top 1--'; +Customers | top 3 by Age; +print '--top 2--'; +Customers | top 3 by Age desc; +print '--top 3--'; +Customers | top 3 by Age asc | order by FirstName; +print '--top 4--'; +Customers | top 3 by FirstName desc nulls first; +print '--top 5--'; +Customers | top 3 by FirstName desc nulls last; +print '--top 6--'; +Customers | top 3 by Age | top 2 by FirstName; +print '--top hitters 1--'; +Customers | top-hitters a = 2 of Age by extra; +print '--top hitters 2--'; +Customers | top-hitters 2 of Age; +print '--top hitters 3--'; +Customers | top-hitters 2 of Age by extra | top-hitters 2 of Age | order by Age; +print '--top hitters 4--'; +Customers | top-hitters 2 of Age by extra | where Age > 30; +print '--top hitters 5--'; +Customers | top-hitters 2 of Age by extra | where approximate_sum_extra < 200; +print '--top hitters 6--'; +Customers | top-hitters 2 of Age | where approximate_count_Age > 2; + diff --git a/tests/queries/0_stateless/02366_kql_topnested.reference b/tests/queries/0_stateless/02366_kql_topnested.reference new file mode 100644 index 000000000000..6a3e4f0ae124 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_topnested.reference @@ -0,0 +1,108 @@ +-- top nested 1 layer-- +QC 125 +ON 140 +MA 145 +--top nested 2 layers-- +QC 125 Steven 41 +QC 125 Joseph 33 +ON 140 Steven 64 +ON 140 Joseph 34 +MA 145 Steven 38 +MA 145 Robert 42 +--top nested 3 layers-- +QC 125 Steven 41 03/30/1996 23 +QC 125 Steven 41 03/29/1996 13 +QC 125 Joseph 33 03/30/1996 20 +QC 125 Joseph 33 03/29/1996 10 +ON 140 Steven 64 03/31/1996 30 +ON 140 Steven 64 03/30/1996 22 +ON 140 Joseph 34 03/30/1996 18 +ON 140 Joseph 34 03/29/1996 14 +MA 145 Steven 38 03/30/1996 24 +MA 145 Steven 38 03/29/1996 14 +MA 145 Robert 42 03/31/1996 25 +MA 145 Robert 42 03/30/1996 17 +--top nested 1 layer with others-- +all other region 55 +QC 125 +ON 140 +MA 145 +--top nested 2 layers with 2 others-- +all other region 55 all other person 55 +QC 125 all other person 51 +QC 125 Steven 41 +QC 125 Joseph 33 +ON 140 all other person 42 +ON 140 Steven 64 +ON 140 Joseph 34 +MA 145 all other person 65 +MA 145 Steven 38 +MA 145 Robert 42 +--top nested 2 layers with 1st others-- +all other region 55 \N \N +QC 125 Steven 41 +QC 125 Joseph 33 +QC 125 \N \N +ON 140 Steven 64 +ON 140 Joseph 34 +ON 140 \N \N +MA 145 Steven 38 +MA 145 Robert 42 +MA 145 \N \N +--top nested 2 layer with 2nd others-- +QC 125 all other person 51 +QC 125 Steven 41 +QC 125 Joseph 33 +ON 140 all other person 42 +ON 140 Steven 64 +ON 140 Joseph 34 +MA 145 all other person 65 +MA 145 Steven 38 +MA 145 Robert 42 +\N \N all other person 55 +--top nested 3 layers with 3 others-- +all other region 55 all other person 55 all other date 55 +QC 125 all other person 51 all other date 51 +QC 125 Steven 41 all other date 5 +QC 125 Steven 41 03/30/1996 23 +QC 125 Steven 41 03/29/1996 13 +QC 125 Joseph 33 all other date 3 +QC 125 Joseph 33 03/30/1996 20 +QC 125 Joseph 33 03/29/1996 10 +ON 140 all other person 42 all other date 42 +ON 140 Steven 64 all other date 12 +ON 140 Steven 64 03/31/1996 30 +ON 140 Steven 64 03/30/1996 22 +ON 140 Joseph 34 all other date 2 +ON 140 Joseph 34 03/30/1996 18 +ON 140 Joseph 34 03/29/1996 14 +MA 145 all other person 65 all other date 65 +MA 145 Steven 38 all other date 0 +MA 145 Steven 38 03/30/1996 24 +MA 145 Steven 38 03/29/1996 14 +MA 145 Robert 42 all other date 0 +MA 145 Robert 42 03/31/1996 25 +MA 145 Robert 42 03/30/1996 17 +--top nested use expression as aggregation-- +QC 255 +ON 285 +MA 295 +--top nested use expression as top n-- +QC 125 +ON 140 +MA 145 +--top nested use expression as others-- +all other region 55 +QC 125 +ON 140 +MA 145 +--top nested use expression as column-- +Q 125 +O 140 +M 145 +B 55 +--top nested without top n-- +QC 125 +ON 140 +MA 145 +BC 55 diff --git a/tests/queries/0_stateless/02366_kql_topnested.sql b/tests/queries/0_stateless/02366_kql_topnested.sql new file mode 100644 index 000000000000..11b9d7e37338 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_topnested.sql @@ -0,0 +1,76 @@ +DROP TABLE IF EXISTS sales; +CREATE TABLE sales +(salesdate String,salesperson String,region String,amount UInt32) ENGINE = Memory; + +INSERT INTO sales VALUES ( '12/31/1995','Robert','ON',1); +INSERT INTO sales VALUES ( '12/31/1995','Joseph','ON',2); +INSERT INTO sales VALUES ( '12/31/1995','Joseph','QC',3); +INSERT INTO sales VALUES ( '12/31/1995','Joseph','MA',4); +INSERT INTO sales VALUES ( '12/31/1995','Steven','QC',5); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','ON',6); +INSERT INTO sales VALUES ( '03/29/1996','Robert','QC',7); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','ON',8); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','BC',9); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','QC',10); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','MA',11); +INSERT INTO sales VALUES ( '03/29/1996','Steven','ON',12); +INSERT INTO sales VALUES ( '03/29/1996','Steven','QC',13); +INSERT INTO sales VALUES ( '03/29/1996','Steven','MA',14); +INSERT INTO sales VALUES ( '03/30/1996','Robert','ON',15); +INSERT INTO sales VALUES ( '03/30/1996','Robert','QC',16); +INSERT INTO sales VALUES ( '03/30/1996','Robert','MA',17); +INSERT INTO sales VALUES ( '03/30/1996','Joseph','ON',18); +INSERT INTO sales VALUES ( '03/30/1996','Joseph','BC',19); +INSERT INTO sales VALUES ( '03/30/1996','Joseph','QC',20); +INSERT INTO sales VALUES ( '03/30/1996','Joseph','MA',21); +INSERT INTO sales VALUES ( '03/30/1996','Steven','ON',22); +INSERT INTO sales VALUES ( '03/30/1996','Steven','QC',23); +INSERT INTO sales VALUES ( '03/30/1996','Steven','MA',24); +INSERT INTO sales VALUES ( '03/31/1996','Robert','MA',25); +INSERT INTO sales VALUES ( '03/31/1996','Thomas','ON',26); +INSERT INTO sales VALUES ( '03/31/1996','Thomas','BC',27); +INSERT INTO sales VALUES ( '03/31/1996','Thomas','QC',28); +INSERT INTO sales VALUES ( '03/31/1996','Thomas','MA',29); +INSERT INTO sales VALUES ( '03/31/1996','Steven','ON',30); + + +set dialect = 'kusto'; + +print '-- top nested 1 layer--'; +sales | top-nested 3 of region by sum(amount)|order by region; + +print '--top nested 2 layers--'; +sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount)|order by region, salesperson; + +print '--top nested 3 layers--'; +sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount), top-nested 2 of salesdate by sum(amount)|order by region, salesperson, salesdate; + +print '--top nested 1 layer with others--'; +sales | top-nested 3 of region with others = 'all other region' by sum(amount)|order by region; + +print '--top nested 2 layers with 2 others--'; +sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount)|order by region, salesperson; + +print '--top nested 2 layers with 1st others--'; +sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson by sum(amount)|order by region, salesperson; + +print '--top nested 2 layer with 2nd others--'; +sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount)|order by region, salesperson; + +print '--top nested 3 layers with 3 others--'; +sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount), top-nested 2 of salesdate with others = 'all other date' by sum(amount)|order by region, salesperson, salesdate; + +print '--top nested use expression as aggregation--'; +sales | top-nested 3 of region by sum(amount)*2 + 5|order by region; + +print '--top nested use expression as top n--'; +sales | top-nested strlen('abc') of region by sum(amount)|order by region; + +print '--top nested use expression as others--'; +sales | top-nested 3 of region with others = strcat("all other"," region") by sum(amount)|order by region; + +print '--top nested use expression as column--'; +sales | top-nested of substring(region,0,1) by sum(amount)|order by Column1; + +print '--top nested without top n--'; +sales | top-nested of region by sum(amount)|order by region; \ No newline at end of file diff --git a/tests/queries/0_stateless/02455_dateTime64Diff.reference b/tests/queries/0_stateless/02455_dateTime64Diff.reference new file mode 100644 index 000000000000..db9adebf1b3a --- /dev/null +++ b/tests/queries/0_stateless/02455_dateTime64Diff.reference @@ -0,0 +1,5 @@ +-- dateTime64Diff -- +28200397123456789 +-28200397123456789 +-- DateTime64 arithmetic -- +28200397123456789 diff --git a/tests/queries/0_stateless/02455_dateTime64Diff.sql b/tests/queries/0_stateless/02455_dateTime64Diff.sql new file mode 100644 index 000000000000..06e13df465f1 --- /dev/null +++ b/tests/queries/0_stateless/02455_dateTime64Diff.sql @@ -0,0 +1,11 @@ +-- dateTime64Diff +select '-- dateTime64Diff --'; +select dateTime64Diff(toDateTime64('2022-11-23 09:26:37.123456789', 9), toDateTime64('2022-01-01', 0)); +select dateTime64Diff(toDateTime64('2022-01-01', 0), toDateTime64('2022-11-23 09:26:37.123456789', 9)); +select dateTime64Diff(toDateTime64('2022-11-23 09:26:37.123456789', 9), toDate('2022-01-01')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select dateTime64Diff(toDateTime64('2022-11-23 09:26:37.123456789', 9), toDate32('2022-01-01')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select dateTime64Diff(toDateTime64('2022-11-23 09:26:37.123456789', 9), toDateTime('2022-01-01 01:02:03')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- DateTime64 - DateTime64 +select '-- DateTime64 arithmetic --'; +select toDateTime64('2022-11-23 09:26:37.123456789', 9) - toDateTime64('2022-01-01', 0); diff --git a/tests/queries/0_stateless/02455_interval.reference b/tests/queries/0_stateless/02455_interval.reference new file mode 100644 index 000000000000..83c880e8aca9 --- /dev/null +++ b/tests/queries/0_stateless/02455_interval.reference @@ -0,0 +1,21 @@ +-- Interval -- +1000 IntervalNanosecond +-- Unary Operations -- +-1000 IntervalNanosecond +-1 Int8 +-- Binary Operations -- +-1000 Nullable(IntervalNanosecond) +3000 Nullable(IntervalNanosecond) +2.5 Nullable(Float64) +500 Nullable(IntervalNanosecond) +\N +inf +2500 IntervalNanosecond +2 Nullable(Int64) +0 Nullable(Int64) +1000 IntervalNanosecond +0 Nullable(IntervalNanosecond) +-- Conversion -- +1000 +1000 +1000 diff --git a/tests/queries/0_stateless/02455_interval.sql b/tests/queries/0_stateless/02455_interval.sql new file mode 100644 index 000000000000..9cc110106417 --- /dev/null +++ b/tests/queries/0_stateless/02455_interval.sql @@ -0,0 +1,51 @@ +select '-- Interval --'; +select toIntervalNanosecond(1000) as i, toTypeName(i); + +select '-- Unary Operations --'; +select -toIntervalNanosecond(1000) as i, toTypeName(i); + +select sign(toIntervalNanosecond(-1000)) as i, toTypeName(i); + +select abs(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitCount(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitNot(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select __bitSwapLastTwo(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select __bitWrapperFunc(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select intExp2(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select intExp10(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select roundAge(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select roundDuration(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select roundToExp2(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- Binary Operations --'; +select (toIntervalNanosecond(1000) - toIntervalNanosecond(2000)) as i, toTypeName(i); +select (toIntervalNanosecond(1000) + toIntervalNanosecond(2000)) as i, toTypeName(i); +select (toIntervalNanosecond(2500) / toIntervalNanosecond(1000)) as i, toTypeName(i); +select (toIntervalNanosecond(2500) % toIntervalNanosecond(1000)) as i, toTypeName(i); +select toIntervalNanosecond(1000) / 0; +select toIntervalNanosecond(1000) / toIntervalNanosecond(0); + +select (toIntervalNanosecond(2500) * toIntervalNanosecond(1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select greatest(toIntervalNanosecond(2500), toIntervalNanosecond(1000)) as i, toTypeName(i); +select intDiv(toIntervalNanosecond(2500), toIntervalNanosecond(1000)) as i, toTypeName(i); +select intDivOrZero(toIntervalNanosecond(2500), toIntervalNanosecond(0)) as i, toTypeName(i); +select least(toIntervalNanosecond(2500), toIntervalNanosecond(1000)) as i, toTypeName(i); +select moduloOrZero(toIntervalNanosecond(2500), toIntervalNanosecond(0)) as i, toTypeName(i); + +select bitAnd(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select __bitBoolMaskAnd(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select __bitBoolMaskOr(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitHammingDistance(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitOr(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitRotateLeft(toIntervalNanosecond(1000), 1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitRotateRight(toIntervalNanosecond(1000), 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitShiftLeft(toIntervalNanosecond(1000), 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitShiftRight(toIntervalNanosecond(1000), 4); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitTest(toIntervalNanosecond(1000), 5); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitXor(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- Conversion --'; +select toFloat64(toIntervalNanosecond(1000)); +select toInt64(toIntervalNanosecond(1000)); +select toString(toIntervalNanosecond(1000)); diff --git a/utils/check-style/codespell-ignore-words.list b/utils/check-style/codespell-ignore-words.list index 27e08de80eee..48abdbafcb82 100644 --- a/utils/check-style/codespell-ignore-words.list +++ b/utils/check-style/codespell-ignore-words.list @@ -32,3 +32,5 @@ nam ubuntu toolchain vie +Iif +iif