Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion include/wabt/binary-reader-logging.h
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,10 @@ class BinaryReaderLogging : public BinaryReaderDelegate {
Result BeginCodeMetadataSection(std::string_view name, Offset size) override;
Result OnCodeMetadataFuncCount(Index count) override;
Result OnCodeMetadataCount(Index function_index, Index count) override;
Result OnCodeMetadata(Offset offset, const void* data, Address size) override;
Result OnCodeMetadataCodeOffset(Offset offset) override;
Result OnCodeMetadata(const void* data, Address size) override;
Result OnCodeMetadataCallTarget(Index target_index,
uint32_t call_frequency) override;
Result EndCodeMetadataSection() override;

private:
Expand Down
9 changes: 6 additions & 3 deletions include/wabt/binary-reader-nop.h
Original file line number Diff line number Diff line change
Expand Up @@ -489,9 +489,12 @@ class BinaryReaderNop : public BinaryReaderDelegate {
Result OnCodeMetadataCount(Index function_index, Index count) override {
return Result::Ok;
}
Result OnCodeMetadata(Offset offset,
const void* data,
Address size) override {
Result OnCodeMetadata(const void* data, Address size) override {
return Result::Ok;
}
Result OnCodeMetadataCodeOffset(Offset offset) override { return Result::Ok; }
Result OnCodeMetadataCallTarget(Index target_index,
uint32_t call_frequency) override {
return Result::Ok;
}
Result EndCodeMetadataSection() override { return Result::Ok; }
Expand Down
7 changes: 4 additions & 3 deletions include/wabt/binary-reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -498,9 +498,10 @@ class BinaryReaderDelegate {
Offset size) = 0;
virtual Result OnCodeMetadataFuncCount(Index count) = 0;
virtual Result OnCodeMetadataCount(Index function_index, Index count) = 0;
virtual Result OnCodeMetadata(Offset offset,
const void* data,
Address size) = 0;
virtual Result OnCodeMetadata(const void* data, Address size) = 0;
virtual Result OnCodeMetadataCodeOffset(Offset offset) = 0;
virtual Result OnCodeMetadataCallTarget(Index target_index,
uint32_t call_frequency) = 0;
virtual Result EndCodeMetadataSection() = 0;

const State* state = nullptr;
Expand Down
91 changes: 85 additions & 6 deletions include/wabt/ir.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
#include "wabt/intrusive-list.h"
#include "wabt/opcode.h"

#include <optional>

namespace wabt {

struct Module;
Expand Down Expand Up @@ -702,15 +704,92 @@ class CallIndirectExpr : public ExprMixin<ExprType::CallIndirect> {

class CodeMetadataExpr : public ExprMixin<ExprType::CodeMetadata> {
public:
explicit CodeMetadataExpr(std::string_view name,
std::vector<uint8_t> data,
const Location& loc = Location())
struct CallTarget {
Var func;
uint32_t frequency;
};
struct CompilationPriority {
uint32_t compilation_priority;
std::optional<uint32_t> optimization_priority;
};
struct InstructionFrequency {
uint32_t frequency;
};

CodeMetadataExpr(std::string_view name,
std::vector<uint8_t> data,
const Location& loc = Location())
: ExprMixin<ExprType::CodeMetadata>(loc), name(name), type(Type::Binary) {
new (&hint.data) std::vector<uint8_t>(std::move(data));
}

CodeMetadataExpr(std::string_view name,
CompilationPriority compilation_priority,
const Location& loc = Location())
: ExprMixin<ExprType::CodeMetadata>(loc),
name(std::move(name)),
data(std::move(data)) {}
name(name),
type(Type::CompilationHint) {
new (&hint.compilation_priority) CompilationPriority(compilation_priority);
}

CodeMetadataExpr(std::string_view name,
InstructionFrequency instruction_frequency,
const Location& loc = Location())
: ExprMixin<ExprType::CodeMetadata>(loc),
name(name),
type(Type::InstructionFrequency) {
new (&hint.instruction_frequency)
InstructionFrequency(instruction_frequency);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you just do hint.instruction_frequency = instruction_frequency here instead of the placement new?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since hint.instruction_frequency is not a valid C++ object, we're not allowed to use its assignment copy operator. In practice, using it should work as expected, but it is still undefined behavior nevertheless (afaik). That's why I opted for just writing over it, which should definitely be safe.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain why more? Why can't we using the assignment operator here? Surely the LHS doesn't matter here since its being clobbered by the RHS.

Does the compiler complain if you do hint.instruction_frequency = instruction_frequency?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While the compiler does not catch the undefined behavior, ASAN does:

  +AddressSanitizer:DEADLYSIGNAL
  +=================================================================
  +==720746==ERROR: AddressSanitizer: SEGV on unknown address (pc 0x55d1e4e29fd8 bp 0x000000000002 sp 0x7fffd147b960 T0)
  +==720746==The signal is caused by a READ memory access.
  +==720746==Hint: this fault was caused by a dereference of a high value address (see register values below).  Disassemble the provided pc to learn which register was used.
  +    #0 0x55d1e4e29fd8 in atomic_compare_exchange_strong<__sanitizer::atomic_uint8_t> /home/abuild/rpmbuild/BUILD/llvm20-20.1.8-build/llvm-20.1.8.src/projects/compiler-rt/lib/asan/../sanitizer_common/sanitizer_atomic_clang.h:84:10
  +    #1 0x55d1e4e29fd8 in AtomicallySetQuarantineFlagIfAllocated /home/abuild/rpmbuild/BUILD/llvm20-20.1.8-build/llvm-20.1.8.src/projects/compiler-rt/lib/asan/asan_allocator.cpp:669:10
  +    #2 0x55d1e4e29fd8 in __asan::Allocator::Deallocate(void*, unsigned long, unsigned long, __sanitizer::BufferedStackTrace*, __asan::AllocType) /home/abuild/rpmbuild/BUILD/llvm20-20.1.8-build/llvm-20.1.8.src/projects/compiler-rt/lib/asan/asan_allocator.cpp:733:10
  +    #3 0x55d1e4f12b9b in operator delete(void*, unsigned long) /home/abuild/rpmbuild/BUILD/llvm20-20.1.8-build/llvm-20.1.8.src/projects/compiler-rt/lib/asan/asan_new_delete.cpp:155:3
  +    #4 0x55d1e4f1c4a4 in std::__new_allocator<unsigned char>::deallocate(unsigned char*, unsigned long) /usr/bin/../lib64/gcc/x86_64-suse-linux/15/../../../../include/c++/15/bits/new_allocator.h:172:2
  +    #5 0x55d1e4f1c43b in std::allocator_traits<std::allocator<unsigned char>>::deallocate(std::allocator<unsigned char>&, unsigned char*, unsigned long) /usr/bin/../lib64/gcc/x86_64-suse-linux/15/../../../../include/c++/15/bits/alloc_traits.h:649:13
  +    #6 0x55d1e4f1c43b in std::_Vector_base<unsigned char, std::allocator<unsigned char>>::_M_deallocate(unsigned char*, unsigned long) /usr/bin/../lib64/gcc/x86_64-suse-linux/15/../../../../include/c++/15/bits/stl_vector.h:396:4
  +    #7 0x55d1e4f1c3ce in std::_Vector_base<unsigned char, std::allocator<unsigned char>>::~_Vector_base() /usr/bin/../lib64/gcc/x86_64-suse-linux/15/../../../../include/c++/15/bits/stl_vector.h:375:2
  +    #8 0x55d1e4f18230 in std::vector<unsigned char, std::allocator<unsigned char>>::~vector() /usr/bin/../lib64/gcc/x86_64-suse-linux/15/../../../../include/c++/15/bits/stl_vector.h:805:7
  +    #9 0x55d1e503d839 in std::vector<unsigned char, std::allocator<unsigned char>>::_M_move_assign(std::vector<unsigned char, std::allocator<unsigned char>>&&, std::integral_constant<bool, true>) /usr/bin/../lib64/gcc/x86_64-suse-linux/15/../../../../include/c++/15/bits/stl_vector.h:2266:7
  +    #10 0x55d1e50280f4 in std::vector<unsigned char, std::allocator<unsigned char>>::operator=(std::vector<unsigned char, std::allocator<unsigned char>>&&) /usr/bin/../lib64/gcc/x86_64-suse-linux/15/../../../../include/c++/15/bits/stl_vector.h:838:2
  +    #11 0x55d1e505984a in wabt::CodeMetadataExpr::CodeMetadataExpr(std::basic_string_view<char, std::char_traits<char>>, std::vector<unsigned char, std::allocator<unsigned char>>, wabt::Location const&) /home/wabt/include/wabt/ir.h:723:15

When using the assignment operator of the LHS, its implementation (in this case of std::vector) wrongfully assumes a valid object state of the LHS and, therefore, accesses uninitialized memory. That's why we're only allowed to call the assignment operator on objects in a proper state, which the LHS is not. Therefore, we use the placement new operator to fully overwrite the memory associated with the (non-existent) LHS object.

}

CodeMetadataExpr(std::string_view name,
std::vector<CallTarget> targets,
const Location& loc = Location())
: ExprMixin<ExprType::CodeMetadata>(loc),
name(name),
type(Type::CallTargets) {
new (&hint.call_targets) std::vector<CallTarget>(std::move(targets));
}

~CodeMetadataExpr() override {
switch (type) {
case Type::Binary:
hint.data.~vector();
break;
case Type::CallTargets:
hint.call_targets.~vector();
break;
default:
// CompilationHint and InstructionFrequency do not allocate memory.;
break;
}
}

bool is_function_annotation() const { return type == Type::CompilationHint; }

// convert non-binary hints to binary
std::vector<uint8_t> serialize(const Module&) const;

std::string_view name;
std::vector<uint8_t> data;

private:
union Hint {
std::vector<uint8_t> data;
CompilationPriority compilation_priority;
InstructionFrequency instruction_frequency{};
std::vector<CallTarget> call_targets;
Hint() {}
~Hint() {}
} hint;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be an anonymous union?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we need to overwrite the constructor, we need to name the union, afaik. But please correct me if there's a way to define a destructor for an anonymous union in C++.


enum class Type {
Binary,
CompilationHint,
InstructionFrequency,
CallTargets
};

Type type;
};

class ReturnCallIndirectExpr : public ExprMixin<ExprType::ReturnCallIndirect> {
Expand Down
7 changes: 7 additions & 0 deletions include/wabt/token.def
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

/* Tokens with no additional data (i.e. bare). */
WABT_TOKEN(Invalid, "Invalid")
WABT_TOKEN(AlwaysOpt, "always_opt")
WABT_TOKEN(After, "after")
WABT_TOKEN(Array, "array")
WABT_TOKEN(AssertException, "assert_exception")
Expand All @@ -31,6 +32,7 @@ WABT_TOKEN(AssertTrap, "assert_trap")
WABT_TOKEN(AssertUnlinkable, "assert_unlinkable")
WABT_TOKEN(Before, "before")
WABT_TOKEN(Bin, "bin")
WABT_TOKEN(Compilation, "compilation")
WABT_TOKEN(Item, "item")
WABT_TOKEN(Data, "data")
WABT_TOKEN(Declare, "declare")
Expand All @@ -40,9 +42,11 @@ WABT_TOKEN(Either, "either")
WABT_TOKEN(Elem, "elem")
WABT_TOKEN(Eof, "EOF")
WABT_TOKEN(Tag, "tag")
WABT_TOKEN(Target, "target")
WABT_TOKEN(Export, "export")
WABT_TOKEN(Field, "field")
WABT_TOKEN(Function, "function")
WABT_TOKEN(Freq, "freq")
WABT_TOKEN(Get, "get")
WABT_TOKEN(Global, "global")
WABT_TOKEN(Import, "import")
Expand All @@ -55,7 +59,9 @@ WABT_TOKEN(Module, "module")
WABT_TOKEN(Mut, "mut")
WABT_TOKEN(NanArithmetic, "nan:arithmetic")
WABT_TOKEN(NanCanonical, "nan:canonical")
WABT_TOKEN(NeverOpt, "never_opt")
WABT_TOKEN(Offset, "offset")
WABT_TOKEN(Optimization, "optimization")
WABT_TOKEN(Output, "output")
WABT_TOKEN(PageSize, "pagesize")
WABT_TOKEN(Param, "param")
Expand All @@ -64,6 +70,7 @@ WABT_TOKEN(Quote, "quote")
WABT_TOKEN(Register, "register")
WABT_TOKEN(Result, "result")
WABT_TOKEN(Rpar, ")")
WABT_TOKEN(RunOnce, "run_once")
WABT_TOKEN(Shared, "shared")
WABT_TOKEN(Start, "start")
WABT_TOKEN(Struct, "struct")
Expand Down
9 changes: 9 additions & 0 deletions include/wabt/wast-parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,15 @@ class WastParser {
Result ParseTerminatingInstrList(ExprList*);
Result ParseInstr(ExprList*);
Result ParseCodeMetadataAnnotation(ExprList*);
Result ParseCodeMetaDataCompilationPriorityAnnotation(ExprList*,
std::string_view,
Location);
Result ParseCodeMetaDataInstrFreqAnnotation(ExprList*,
std::string_view,
Location);
Result ParseCodeMetaDataCallTargetsAnnotation(ExprList*,
std::string_view,
Location);
Result ParsePlainInstr(std::unique_ptr<Expr>*);
Result ParseF32(Const*, ConstType type);
Result ParseF64(Const*, ConstType type);
Expand Down
15 changes: 10 additions & 5 deletions src/binary-reader-ir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,8 @@ class BinaryReaderIR : public BinaryReaderNop {
Result BeginCodeMetadataSection(std::string_view name, Offset size) override;
Result OnCodeMetadataFuncCount(Index count) override;
Result OnCodeMetadataCount(Index function_index, Index count) override;
Result OnCodeMetadata(Offset offset, const void* data, Address size) override;
Result OnCodeMetadataCodeOffset(Offset offset) override;
Result OnCodeMetadata(const void* data, Address size) override;

Result OnTagSymbol(Index index,
uint32_t flags,
Expand Down Expand Up @@ -398,6 +399,7 @@ class BinaryReaderIR : public BinaryReaderNop {

CodeMetadataExprQueue code_metadata_queue_;
std::string_view current_metadata_name_;
Offset current_metadata_offset_;
};

BinaryReaderIR::BinaryReaderIR(Module* out_module,
Expand Down Expand Up @@ -1710,18 +1712,21 @@ Result BinaryReaderIR::OnCodeMetadataCount(Index function_index, Index count) {
return Result::Error;
}

Result BinaryReaderIR::OnCodeMetadata(Offset offset,
const void* data,
Address size) {
Result BinaryReaderIR::OnCodeMetadata(const void* data, const Address size) {
std::vector<uint8_t> data_(static_cast<const uint8_t*>(data),
static_cast<const uint8_t*>(data) + size);
auto meta = std::make_unique<CodeMetadataExpr>(current_metadata_name_,
std::move(data_));
meta->loc.offset = offset;
meta->loc.offset = current_metadata_offset_;
code_metadata_queue_.push_metadata(std::move(meta));
return Result::Ok;
}

Result BinaryReaderIR::OnCodeMetadataCodeOffset(const Offset offset) {
current_metadata_offset_ = offset;
return Result::Ok;
}

Result BinaryReaderIR::OnLocalName(Index func_index,
Index local_index,
std::string_view name) {
Expand Down
24 changes: 17 additions & 7 deletions src/binary-reader-logging.cc
Original file line number Diff line number Diff line change
Expand Up @@ -681,13 +681,23 @@ Result BinaryReaderLogging::BeginCodeMetadataSection(std::string_view name,
Indent();
return reader_->BeginCodeMetadataSection(name, size);
}
Result BinaryReaderLogging::OnCodeMetadata(Offset code_offset,
const void* data,
Address size) {
std::string_view content(static_cast<const char*>(data), size);
LOGF("OnCodeMetadata(offset: %" PRIzd ", data: \"" PRIstringview "\")\n",
code_offset, WABT_PRINTF_STRING_VIEW_ARG(content));
return reader_->OnCodeMetadata(code_offset, data, size);
Result BinaryReaderLogging::OnCodeMetadataCodeOffset(const Offset code_offset) {
LOGF("OnCodeMetadataCodeOffset(offset: %" PRIzd ")\n", code_offset);
return reader_->OnCodeMetadataCodeOffset(code_offset);
}
Result BinaryReaderLogging::OnCodeMetadata(const void* data,
const Address size) {
const std::string_view content(static_cast<const char*>(data), size);
LOGF("OnCodeMetadata(data: \"" PRIstringview "\")\n",
WABT_PRINTF_STRING_VIEW_ARG(content));
return reader_->OnCodeMetadata(data, size);
}
Result BinaryReaderLogging::OnCodeMetadataCallTarget(
const Index target_index,
const uint32_t call_frequency) {
LOGF("OnCodeMetadataCallTarget(target_index: %u, call_frequency: %u)\n",
target_index, call_frequency);
return reader_->OnCodeMetadataCallTarget(target_index, call_frequency);
}

Result BinaryReaderLogging::OnGenericCustomSection(std::string_view name,
Expand Down
35 changes: 27 additions & 8 deletions src/binary-reader-objdump.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1286,9 +1286,10 @@ class BinaryReaderObjdump : public BinaryReaderObjdumpBase {
Result OnRefNullExpr(Type type) override;
Result OnGlobalGetExpr(Index global_index) override;
Result OnCodeMetadataCount(Index function_index, Index count) override;
Result OnCodeMetadata(Offset code_offset,
const void* data,
Address size) override;
Result OnCodeMetadataCodeOffset(Offset code_offset) override;
Result OnCodeMetadata(const void* data, Address size) override;
Result OnCodeMetadataCallTarget(Index target_index,
uint32_t call_frequency) override;

private:
Result EndInitExpr();
Expand Down Expand Up @@ -2446,25 +2447,43 @@ Result BinaryReaderObjdump::OnCodeMetadataCount(Index function_index,
return Result::Ok;
}
printf(" - func[%" PRIindex "]", function_index);
auto name = GetFunctionName(function_index);
const auto name = GetFunctionName(function_index);
if (!name.empty()) {
printf(" <" PRIstringview ">", WABT_PRINTF_STRING_VIEW_ARG(name));
}
printf(":\n");
return Result::Ok;
}
Result BinaryReaderObjdump::OnCodeMetadata(Offset code_offset,
const void* data,
Address size) {
Result BinaryReaderObjdump::OnCodeMetadataCodeOffset(const Offset code_offset) {
if (!ShouldPrintDetails()) {
return Result::Ok;
}
printf(" - meta[%" PRIzx "]:\n", code_offset);

return Result::Ok;
}
Result BinaryReaderObjdump::OnCodeMetadata(const void* data,
const Address size) {
if (!ShouldPrintDetails()) {
return Result::Ok;
}
out_stream_->WriteMemoryDump(data, size, 0, PrintChars::Yes, " - ");
return Result::Ok;
}
Result BinaryReaderObjdump::OnCodeMetadataCallTarget(
const Index target_index,
const uint32_t call_frequency) {
if (!ShouldPrintDetails()) {
return Result::Ok;
}

printf(" - target [%" PRIindex "]", target_index);
const auto name = GetFunctionName(target_index);
if (!name.empty()) {
printf(" <" PRIstringview ">", WABT_PRINTF_STRING_VIEW_ARG(name));
}
printf(": %" PRIu32 "%%\n", call_frequency);
return Result::Ok;
}
} // end anonymous namespace

std::string_view ObjdumpNames::Get(Index index) const {
Expand Down
25 changes: 20 additions & 5 deletions src/binary-reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2467,11 +2467,26 @@ Result BinaryReader::ReadCodeMetadataSection(std::string_view name,
last_code_offset == kInvalidOffset || code_offset > last_code_offset,
"code offset out of order: %" PRIzx, code_offset);
last_code_offset = code_offset;

Address data_size;
const void* data;
CHECK_RESULT(ReadBytes(&data, &data_size, "instance data"));
CALLBACK(OnCodeMetadata, code_offset, data, data_size);
CALLBACK(OnCodeMetadataCodeOffset, code_offset);

if (name == "call_targets") {
uint32_t hint_size;
CHECK_RESULT(ReadU32Leb128(&hint_size, "call targets hint size"));
Offset end = state_.offset + hint_size;
while (state_.offset < end) {
Index target_index;
CHECK_RESULT(ReadIndex(&target_index, "call target index"));
uint32_t call_frequency;
CHECK_RESULT(ReadU32Leb128(&call_frequency, "call frequency"));
CALLBACK(OnCodeMetadataCallTarget, target_index, call_frequency);
}
assert(state_.offset == end);
} else {
Address data_size;
const void* data;
CHECK_RESULT(ReadBytes(&data, &data_size, "instance data"));
CALLBACK(OnCodeMetadata, data, data_size);
}
}
}

Expand Down
8 changes: 6 additions & 2 deletions src/binary-writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1146,8 +1146,12 @@ void BinaryWriter::WriteExpr(const Func* func, const Expr* expr) {
s.entries.emplace_back(cur_func_index_);
}
auto& a = s.entries.back();
Offset code_offset = stream_->offset() - cur_func_start_offset_;
a.entries.emplace_back(code_offset, meta_expr->data);
Offset code_offset;
if (meta_expr->is_function_annotation())
code_offset = 0;
else
code_offset = stream_->offset() - cur_func_start_offset_;
a.entries.emplace_back(code_offset, meta_expr->serialize(*module_));
break;
}
}
Expand Down
Loading
Loading