Skip to content

Commit e0b4cd4

Browse files
kagamiorifacebook-github-bot
authored andcommitted
feat(fuzzer): Add input generator for json_parse in expression fuzzer (#12019)
Summary: Make expression fuzzer generate input vectors of valid JSON strings for the json_parse function. To test corner cases, the JSON strings may be randomly truncated or inserted with a space character. Differential Revision: D67820571
1 parent 50c525a commit e0b4cd4

18 files changed

+396
-204
lines changed

velox/exec/fuzzer/PrestoQueryRunner.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,6 @@ bool PrestoQueryRunner::isSupported(const exec::FunctionSignature& signature) {
456456
usesTypeName(signature, "interval year to month") ||
457457
usesTypeName(signature, "hugeint") ||
458458
usesTypeName(signature, "hyperloglog") ||
459-
usesInputTypeName(signature, "json") ||
460459
usesInputTypeName(signature, "ipaddress") ||
461460
usesInputTypeName(signature, "ipprefix") ||
462461
usesInputTypeName(signature, "uuid"));
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "velox/expression/fuzzer/ArgsOverrideFunctions.h"
18+
19+
#include "velox/common/fuzzer/ConstrainedGenerators.h"
20+
#include "velox/common/fuzzer/Utils.h"
21+
#include "velox/core/Expressions.h"
22+
#include "velox/vector/fuzzer/VectorFuzzer.h"
23+
24+
namespace facebook::velox::fuzzer {
25+
26+
std::vector<core::TypedExprPtr> generateJsonParseArgs::generate(
27+
const CallableSignature& signature,
28+
const VectorFuzzer::Options& options,
29+
FuzzerGenerator& rng,
30+
ExpressionFuzzerState& state) {
31+
VELOX_CHECK_EQ(signature.args.size(), 1);
32+
std::vector<core::TypedExprPtr> inputExpressions;
33+
34+
state.inputRowTypes_.emplace_back(signature.args[0]);
35+
state.inputRowNames_.emplace_back(
36+
fmt::format("c{}", state.inputRowTypes_.size() - 1));
37+
38+
const auto representedType = facebook::velox::randType(rng, 3);
39+
const auto seed = rand<uint32_t>(rng);
40+
const auto nullRatio = options.nullRatio;
41+
state.customInputGenerators_.emplace_back(
42+
std::make_shared<fuzzer::JsonInputGenerator>(
43+
seed,
44+
signature.args[0],
45+
nullRatio,
46+
fuzzer::getRandomInputGenerator(seed, representedType, nullRatio),
47+
true));
48+
49+
inputExpressions.push_back(std::make_shared<core::FieldAccessTypedExpr>(
50+
signature.args[0], state.inputRowNames_.back()));
51+
return inputExpressions;
52+
}
53+
54+
/*std::vector<core::TypedExprPtr> generateJsonParseArg(
55+
const CallableSignature& signature,
56+
const VectorFuzzer::Options& options,
57+
FuzzerGenerator& rng,
58+
ExpressionFuzzer::State& state) {
59+
VELOX_CHECK_EQ(signature.args.size(), 1);
60+
std::vector<core::TypedExprPtr> inputExpressions;
61+
62+
state.inputRowTypes_.emplace_back(signature.args[0]);
63+
state.inputRowNames_.emplace_back(
64+
fmt::format("c{}", state.inputRowTypes_.size() - 1));
65+
66+
const auto representedType = facebook::velox::randType(rng, 3);
67+
const auto seed = rand<uint32_t>(rng);
68+
const auto nullRatio = options.nullRatio;
69+
state.customInputGenerators_.emplace_back(
70+
std::make_shared<fuzzer::JsonInputGenerator>(
71+
seed,
72+
signature.args[0],
73+
nullRatio,
74+
fuzzer::getRandomInputGenerator(seed, representedType, nullRatio),
75+
true));
76+
77+
inputExpressions.push_back(std::make_shared<core::FieldAccessTypedExpr>(
78+
signature.args[0], state.inputRowNames_.back()));
79+
return inputExpressions;
80+
}*/
81+
82+
} // namespace facebook::velox::fuzzer
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#pragma once
17+
18+
#include "velox/expression/fuzzer/FuzzerToolkit.h"
19+
20+
namespace facebook::velox::fuzzer {
21+
22+
class generateJsonParseArgs : public ArgValuesGenerator {
23+
public:
24+
~generateJsonParseArgs() override = default;
25+
26+
std::vector<core::TypedExprPtr> generate(
27+
const CallableSignature& signature,
28+
const VectorFuzzer::Options& options,
29+
FuzzerGenerator& rng,
30+
ExpressionFuzzerState& state) override;
31+
};
32+
33+
} // namespace facebook::velox::fuzzer

velox/expression/fuzzer/CMakeLists.txt

+4-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ target_link_libraries(
2525

2626
add_library(
2727
velox_expression_fuzzer
28+
ArgsOverrideFunctions.cpp
2829
ArgumentTypeFuzzer.cpp
2930
DecimalArgGeneratorBase.cpp
3031
ExpressionFuzzer.cpp
@@ -38,11 +39,13 @@ target_link_libraries(
3839
velox_type
3940
velox_vector_fuzzer
4041
velox_vector_test_lib
42+
velox_constrained_input_generators
4143
velox_function_registry
4244
velox_expression_test_utility
4345
velox_file
4446
velox_hive_connector
45-
velox_fuzzer_util)
47+
velox_fuzzer_util
48+
velox_common_fuzzer_util)
4649

4750
add_executable(velox_expression_fuzzer_test ExpressionFuzzerTest.cpp)
4851

velox/expression/fuzzer/ExpressionFuzzer.cpp

+46-75
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <unordered_set>
2323

2424
#include "velox/common/base/Exceptions.h"
25+
#include "velox/common/fuzzer/ConstrainedGenerators.h"
2526
#include "velox/exec/fuzzer/FuzzerUtil.h"
2627
#include "velox/expression/Expr.h"
2728
#include "velox/expression/FunctionSignature.h"
@@ -272,11 +273,14 @@ ExpressionFuzzer::ExpressionFuzzer(
272273
const std::shared_ptr<VectorFuzzer>& vectorFuzzer,
273274
const std::optional<ExpressionFuzzer::Options>& options,
274275
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
275-
argGenerators)
276+
argGenerators,
277+
const std::unordered_map<std::string, std::shared_ptr<ArgValuesGenerator>>&
278+
argsOverrideFuncs)
276279
: options_(options.value_or(Options())),
277280
vectorFuzzer_(vectorFuzzer),
278-
state{rng_, std::max(1, options_.maxLevelOfNesting)},
279-
argGenerators_(argGenerators) {
281+
state_{rng_, std::max(1, options_.maxLevelOfNesting)},
282+
argGenerators_(argGenerators),
283+
funcArgOverrides_{argsOverrideFuncs} {
280284
VELOX_CHECK(vectorFuzzer, "Vector fuzzer must be provided");
281285
seed(initialSeed);
282286

@@ -432,10 +436,6 @@ ExpressionFuzzer::ExpressionFuzzer(
432436
addToTypeToExpressionListByTicketTimes("row", "row_constructor");
433437
addToTypeToExpressionListByTicketTimes(kTypeParameterName, "dereference");
434438
}
435-
436-
// Register function override (for cases where we want to restrict the types
437-
// or parameters we pass to functions).
438-
registerFuncOverride(&ExpressionFuzzer::generateSwitchArgs, "switch");
439439
}
440440

441441
bool ExpressionFuzzer::isSupportedSignature(
@@ -519,13 +519,6 @@ void ExpressionFuzzer::addToTypeToExpressionListByTicketTimes(
519519
}
520520
}
521521

522-
template <typename TFunc>
523-
void ExpressionFuzzer::registerFuncOverride(
524-
TFunc func,
525-
const std::string& name) {
526-
funcArgOverrides_[name] = std::bind(func, this, std::placeholders::_1);
527-
}
528-
529522
void ExpressionFuzzer::seed(size_t seed) {
530523
rng_.seed(seed);
531524
vectorFuzzer_->reSeed(seed);
@@ -548,22 +541,23 @@ core::TypedExprPtr ExpressionFuzzer::generateArgConstant(const TypePtr& arg) {
548541
// columns of the same type exist then there is a 30% chance that it will
549542
// re-use one of them.
550543
core::TypedExprPtr ExpressionFuzzer::generateArgColumn(const TypePtr& arg) {
551-
auto& listOfCandidateCols = state.typeToColumnNames_[arg->toString()];
544+
auto& listOfCandidateCols = state_.typeToColumnNames_[arg->toString()];
552545
bool reuseColumn = options_.enableColumnReuse &&
553546
!listOfCandidateCols.empty() && vectorFuzzer_->coinToss(0.3);
554547

555548
if (!reuseColumn && options_.maxInputsThreshold.has_value() &&
556-
state.inputRowTypes_.size() >= options_.maxInputsThreshold.value()) {
549+
state_.inputRowTypes_.size() >= options_.maxInputsThreshold.value()) {
557550
reuseColumn = !listOfCandidateCols.empty();
558551
}
559552

560553
if (!reuseColumn) {
561-
state.inputRowTypes_.emplace_back(arg);
562-
state.inputRowNames_.emplace_back(
563-
fmt::format("c{}", state.inputRowTypes_.size() - 1));
564-
listOfCandidateCols.push_back(state.inputRowNames_.back());
554+
state_.inputRowTypes_.emplace_back(arg);
555+
state_.inputRowNames_.emplace_back(
556+
fmt::format("c{}", state_.inputRowTypes_.size() - 1));
557+
state_.customInputGenerators_.emplace_back(nullptr);
558+
listOfCandidateCols.push_back(state_.inputRowNames_.back());
565559
return std::make_shared<core::FieldAccessTypedExpr>(
566-
arg, state.inputRowNames_.back());
560+
arg, state_.inputRowNames_.back());
567561
}
568562
size_t chosenColIndex = rand32(0, listOfCandidateCols.size() - 1);
569563
return std::make_shared<core::FieldAccessTypedExpr>(
@@ -582,7 +576,7 @@ core::TypedExprPtr ExpressionFuzzer::generateArg(const TypePtr& arg) {
582576
// - Lambdas
583577
// - Try
584578
if (argClass >= kArgExpression) {
585-
if (state.remainingLevelOfNesting_ > 0) {
579+
if (state_.remainingLevelOfNesting_ > 0) {
586580
return generateExpression(arg);
587581
}
588582
argClass = rand32(0, 1);
@@ -732,18 +726,19 @@ std::vector<core::TypedExprPtr> ExpressionFuzzer::generateSwitchArgs(
732726

733727
ExpressionFuzzer::FuzzedExpressionData ExpressionFuzzer::fuzzExpressions(
734728
const RowTypePtr& outType) {
735-
state.reset();
729+
state_.reset();
736730
VELOX_CHECK_EQ(
737-
state.remainingLevelOfNesting_, std::max(1, options_.maxLevelOfNesting));
731+
state_.remainingLevelOfNesting_, std::max(1, options_.maxLevelOfNesting));
738732

739733
std::vector<core::TypedExprPtr> expressions;
740734
for (int i = 0; i < outType->size(); i++) {
741735
expressions.push_back(generateExpression(outType->childAt(i)));
742736
}
743737
return {
744738
std::move(expressions),
745-
ROW(std::move(state.inputRowNames_), std::move(state.inputRowTypes_)),
746-
std::move(state.expressionStats_)};
739+
ROW(std::move(state_.inputRowNames_), std::move(state_.inputRowTypes_)),
740+
std::move(state_.customInputGenerators_),
741+
std::move(state_.expressionStats_)};
747742
}
748743

749744
ExpressionFuzzer::FuzzedExpressionData ExpressionFuzzer::fuzzExpressions(
@@ -760,16 +755,16 @@ ExpressionFuzzer::FuzzedExpressionData ExpressionFuzzer::fuzzExpression() {
760755
// chance that it will re-use one of them.
761756
core::TypedExprPtr ExpressionFuzzer::generateExpression(
762757
const TypePtr& returnType) {
763-
VELOX_CHECK_GT(state.remainingLevelOfNesting_, 0);
764-
--state.remainingLevelOfNesting_;
765-
auto guard = folly::makeGuard([&] { ++state.remainingLevelOfNesting_; });
758+
VELOX_CHECK_GT(state_.remainingLevelOfNesting_, 0);
759+
--state_.remainingLevelOfNesting_;
760+
auto guard = folly::makeGuard([&] { ++state_.remainingLevelOfNesting_; });
766761

767762
core::TypedExprPtr expression;
768763
bool reuseExpression =
769764
options_.enableExpressionReuse && vectorFuzzer_->coinToss(0.3);
770765
if (reuseExpression) {
771-
expression = state.expressionBank_.getRandomExpression(
772-
returnType, state.remainingLevelOfNesting_ + 1);
766+
expression = state_.expressionBank_.getRandomExpression(
767+
returnType, state_.remainingLevelOfNesting_ + 1);
773768
if (expression) {
774769
return expression;
775770
}
@@ -796,11 +791,11 @@ core::TypedExprPtr ExpressionFuzzer::generateExpression(
796791

797792
auto exprTransformer = options_.exprTransformers.find(chosenFunctionName);
798793
if (exprTransformer != options_.exprTransformers.end()) {
799-
state.remainingLevelOfNesting_ -=
794+
state_.remainingLevelOfNesting_ -=
800795
exprTransformer->second->extraLevelOfNesting();
801796
}
802797

803-
if (state.remainingLevelOfNesting_ >= 0) {
798+
if (state_.remainingLevelOfNesting_ >= 0) {
804799
if (chosenFunctionName == "cast") {
805800
expression = generateCastExpression(returnType);
806801
} else if (chosenFunctionName == "row_constructor") {
@@ -825,7 +820,7 @@ core::TypedExprPtr ExpressionFuzzer::generateExpression(
825820
if (expression) {
826821
expression = exprTransformer->second->transform(std::move(expression));
827822
}
828-
state.remainingLevelOfNesting_ +=
823+
state_.remainingLevelOfNesting_ +=
829824
exprTransformer->second->extraLevelOfNesting();
830825
}
831826
}
@@ -841,17 +836,32 @@ core::TypedExprPtr ExpressionFuzzer::generateExpression(
841836
return generateArgColumn(returnType);
842837
}
843838
}
844-
state.expressionBank_.insert(expression);
839+
state_.expressionBank_.insert(expression);
845840
return expression;
846841
}
847842

848843
std::vector<core::TypedExprPtr> ExpressionFuzzer::getArgsForCallable(
849844
const CallableSignature& callable) {
845+
// Special case for switch because it has a variable number of arguments not
846+
// specified in the signature. Other functions' argument override should be
847+
// specified through funcArgOverrides_.
848+
if (callable.name == "switch") {
849+
return generateSwitchArgs(callable);
850+
}
851+
850852
auto funcIt = funcArgOverrides_.find(callable.name);
851853
if (funcIt == funcArgOverrides_.end()) {
852854
return generateArgs(callable);
853855
}
854-
return funcIt->second(callable);
856+
auto args = funcIt->second->generate(
857+
callable, vectorFuzzer_->getOptions(), rng_, state_);
858+
for (auto i = 0; i < args.size(); ++i) {
859+
// Generate arguments not specified in the override.
860+
if (args[i] == nullptr) {
861+
args[i] = generateArg(callable.args.at(i), callable.constantArgs.at(i));
862+
}
863+
}
864+
return args;
855865
}
856866

857867
core::TypedExprPtr ExpressionFuzzer::getCallExprFromCallable(
@@ -1124,45 +1134,6 @@ core::TypedExprPtr ExpressionFuzzer::generateDereferenceExpression(
11241134
inputExpressions[0],
11251135
fmt::format("row_field{}", referencedIndex));
11261136
}
1127-
void ExpressionFuzzer::ExprBank::insert(const core::TypedExprPtr& expression) {
1128-
auto typeString = expression->type()->toString();
1129-
if (typeToExprsByLevel_.find(typeString) == typeToExprsByLevel_.end()) {
1130-
typeToExprsByLevel_.insert(
1131-
{typeString, ExprsIndexedByLevel(maxLevelOfNesting_ + 1)});
1132-
}
1133-
auto& expressionsByLevel = typeToExprsByLevel_[typeString];
1134-
int nestingLevel = getNestedLevel(expression);
1135-
VELOX_CHECK_LE(nestingLevel, maxLevelOfNesting_);
1136-
expressionsByLevel[nestingLevel].push_back(expression);
1137-
}
1138-
1139-
core::TypedExprPtr ExpressionFuzzer::ExprBank::getRandomExpression(
1140-
const facebook::velox::TypePtr& returnType,
1141-
int uptoLevelOfNesting) {
1142-
VELOX_CHECK_LE(uptoLevelOfNesting, maxLevelOfNesting_);
1143-
auto typeString = returnType->toString();
1144-
if (typeToExprsByLevel_.find(typeString) == typeToExprsByLevel_.end()) {
1145-
return nullptr;
1146-
}
1147-
auto& expressionsByLevel = typeToExprsByLevel_[typeString];
1148-
int totalToConsider = 0;
1149-
for (int i = 0; i <= uptoLevelOfNesting; i++) {
1150-
totalToConsider += expressionsByLevel[i].size();
1151-
}
1152-
if (totalToConsider > 0) {
1153-
int choice = boost::random::uniform_int_distribution<uint32_t>(
1154-
0, totalToConsider - 1)(rng_);
1155-
for (int i = 0; i <= uptoLevelOfNesting; i++) {
1156-
if (choice >= expressionsByLevel[i].size()) {
1157-
choice -= expressionsByLevel[i].size();
1158-
continue;
1159-
}
1160-
return expressionsByLevel[i][choice];
1161-
}
1162-
VELOX_CHECK(false, "Should have found an expression.");
1163-
}
1164-
return nullptr;
1165-
}
11661137

11671138
TypePtr ExpressionFuzzer::fuzzReturnType() {
11681139
auto chooseFromConcreteSignatures = rand32(0, 1);

0 commit comments

Comments
 (0)