Skip to content
Open
Show file tree
Hide file tree
Changes from 60 commits
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
0967b41
Allow funcref literals to have inexact types
tlively Oct 8, 2025
7cbdfea
marge
kripken Oct 21, 2025
82ef0e0
fix
kripken Oct 21, 2025
d857e7d
fix
kripken Oct 21, 2025
d1d2ed8
fix
kripken Oct 21, 2025
e26f676
fix
kripken Oct 21, 2025
7fda8cf
work
kripken Oct 22, 2025
5ce973b
work
kripken Oct 22, 2025
4b167eb
work
kripken Oct 22, 2025
7927749
work
kripken Oct 22, 2025
160123e
work
kripken Oct 22, 2025
b8bb97a
work
kripken Oct 22, 2025
89103b9
work
kripken Oct 22, 2025
24dea81
work
kripken Oct 22, 2025
d4aabd2
work
kripken Oct 22, 2025
a4fe585
work
kripken Oct 22, 2025
76eac4c
work
kripken Oct 22, 2025
020f119
work
kripken Oct 22, 2025
21588c7
work
kripken Oct 22, 2025
fdd7253
work
kripken Oct 22, 2025
7a58395
work
kripken Oct 22, 2025
4616380
work
kripken Oct 22, 2025
46826b0
work
kripken Oct 22, 2025
73024ea
work
kripken Oct 22, 2025
4de7694
work
kripken Oct 22, 2025
d03376d
work
kripken Oct 22, 2025
82061c8
work
kripken Oct 22, 2025
1859cf0
work
kripken Oct 22, 2025
1791066
work
kripken Oct 22, 2025
7278c6a
format
kripken Oct 22, 2025
31573ac
work
kripken Oct 22, 2025
b099733
fix
kripken Oct 22, 2025
34837c3
fix
kripken Oct 22, 2025
616b23a
fix
kripken Oct 23, 2025
46b9d70
work
kripken Oct 23, 2025
8b5f7dd
fix
kripken Oct 23, 2025
c541380
work
kripken Oct 24, 2025
982af15
work
kripken Oct 24, 2025
e2de806
work
kripken Oct 24, 2025
73bffc8
work
kripken Oct 24, 2025
8931d56
update
kripken Oct 24, 2025
c2f9d15
format
kripken Oct 24, 2025
44faf20
fix
kripken Oct 24, 2025
719bf2c
fix
kripken Oct 24, 2025
c24956c
fix
kripken Oct 24, 2025
aea5996
undo
kripken Oct 24, 2025
e2d9c70
update.tests
kripken Oct 24, 2025
d360f0b
fix
kripken Oct 24, 2025
2d25b8d
fix
kripken Oct 28, 2025
bc122d5
fix
kripken Oct 28, 2025
5de1db0
fix
kripken Oct 28, 2025
61e0b66
Merge remote-tracking branch 'origin/main' into import.func.type
kripken Oct 31, 2025
8341afd
fix signature of branch-hinting function
kripken Oct 31, 2025
7d40a94
Merge remote-tracking branch 'origin/main' into import.func.type
kripken Nov 4, 2025
457b0e5
failing test
kripken Nov 4, 2025
848151a
fix the types of imported ref.funcs in the interpreter
kripken Nov 4, 2025
a6ff39a
Merge remote-tracking branch 'myself/import.func.type' into import.fu…
kripken Nov 4, 2025
e5081e3
fix spec tests
kripken Nov 4, 2025
b99dd75
fix another spec test
kripken Nov 4, 2025
87e1094
fmt
kripken Nov 4, 2025
de14c6a
TODO: ExtractFunction casts
kripken Nov 5, 2025
2802d94
Update test/lit/exec/imported-func.wast
kripken Nov 5, 2025
33189d5
Update test/lit/exec/imported-func.wast
kripken Nov 5, 2025
aab3067
Clean up test
kripken Nov 5, 2025
aa7290d
Simplify gufa test
kripken Nov 5, 2025
89cf0c0
test exact casts too
kripken Nov 5, 2025
0a93101
split no-cd
kripken Nov 5, 2025
03f7970
Revert "split no-cd"
kripken Nov 5, 2025
b77d0f9
todo
kripken Nov 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ full changeset diff at the end of each section.
Current Trunk
-------------

- C and JS APIs now assume RefFuncs are created after imported functions (non-
imported functions can still be created later). This is necessary because
imported function types can vary (due to Custom Descriptors), and we need to
look up that type at RefFunc creation time.
- The --mod-asyncify-never-unwind and --mod-asyncify-always-and-only-unwind
passed were deleted. They only existed to support the lazy code loading
support in emscripten that was removed. (#7893)
Expand Down
2 changes: 1 addition & 1 deletion scripts/fuzz_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2482,7 +2482,7 @@ def get_random_opts():
# disabled, its dependent features need to be disabled as well.
IMPLIED_FEATURE_OPTS = {
'--disable-reference-types': ['--disable-gc', '--disable-exception-handling', '--disable-strings'],
'--disable-gc': ['--disable-strings', '--disable-stack-switching'],
'--disable-gc': ['--disable-strings', '--disable-stack-switching', '--disable-custom-descriptors'],
}

print('''
Expand Down
19 changes: 16 additions & 3 deletions src/binaryen-c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ Literal fromBinaryenLiteral(BinaryenLiteral x) {
}
}
if (heapType.isSignature()) {
return Literal::makeFunc(Name(x.func), heapType);
return Literal::makeFunc(Name(x.func), type);
}
assert(heapType.isData());
WASM_UNREACHABLE("TODO: gc data");
Expand Down Expand Up @@ -1609,8 +1609,21 @@ BinaryenExpressionRef BinaryenRefAs(BinaryenModuleRef module,
BinaryenExpressionRef BinaryenRefFunc(BinaryenModuleRef module,
const char* func,
BinaryenHeapType type) {
return static_cast<Expression*>(
Builder(*(Module*)module).makeRefFunc(func, HeapType(type)));
// We can assume imports have been created at this point in time, but not
// other defined functions. See if the function exists already, and assume it
// is non-imported if not. TODO: If we want to allow creating imports later,
// we would need an API addition or change.
auto* wasm = (Module*)module;
if (wasm->getFunctionOrNull(func)) {
// Use the HeapType constructor, which will do a lookup on the module.
return static_cast<Expression*>(
Builder(*(Module*)module).makeRefFunc(func, HeapType(type)));
} else {
// Assume non-imported, and provide the full type for that.
Type full = Type(HeapType(type), NonNullable, Exact);
return static_cast<Expression*>(
Builder(*(Module*)module).makeRefFunc(func, full));
}
}

BinaryenExpressionRef BinaryenRefEq(BinaryenModuleRef module,
Expand Down
4 changes: 1 addition & 3 deletions src/ir/ReFinalize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,7 @@ void ReFinalize::visitMemoryGrow(MemoryGrow* curr) { curr->finalize(); }
void ReFinalize::visitRefNull(RefNull* curr) { curr->finalize(); }
void ReFinalize::visitRefIsNull(RefIsNull* curr) { curr->finalize(); }
void ReFinalize::visitRefFunc(RefFunc* curr) {
// TODO: should we look up the function and update the type from there? This
// could handle a change to the function's type, but is also not really what
// this class has been meant to do.
curr->finalize(curr->type.getHeapType(), *getModule());
}
void ReFinalize::visitRefEq(RefEq* curr) { curr->finalize(); }
void ReFinalize::visitTableGet(TableGet* curr) { curr->finalize(); }
Expand Down
45 changes: 37 additions & 8 deletions src/ir/module-splitting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
#include "ir/export-utils.h"
#include "ir/module-utils.h"
#include "ir/names.h"
#include "ir/utils.h"
#include "pass.h"
#include "support/insert_ordered.h"
#include "wasm-builder.h"
Expand Down Expand Up @@ -274,7 +275,8 @@ TableSlotManager::Slot TableSlotManager::getSlot(Name func, HeapType type) {
activeBase.index + Index(activeSegment->data.size())};

Builder builder(module);
activeSegment->data.push_back(builder.makeRefFunc(func, type));
auto funcType = Type(type, NonNullable, Inexact);
activeSegment->data.push_back(builder.makeRefFunc(func, funcType));

addSlot(func, newSlot);
if (activeTable->initial <= newSlot.index) {
Expand Down Expand Up @@ -339,6 +341,7 @@ struct ModuleSplitter {
void setupTablePatching();
void shareImportableItems();
void removeUnusedSecondaryElements();
void updateIR();

ModuleSplitter(Module& primary, const Config& config)
: config(config), primary(primary), tableManager(primary),
Expand All @@ -355,6 +358,7 @@ struct ModuleSplitter {
setupTablePatching();
shareImportableItems();
removeUnusedSecondaryElements();
updateIR();
}
};

Expand All @@ -372,7 +376,7 @@ void ModuleSplitter::setupJSPI() {
// Add an imported function to load the secondary module.
auto import = Builder::makeFunction(
ModuleSplitting::LOAD_SECONDARY_MODULE,
Type(Signature(Type::none, Type::none), NonNullable, Exact),
Type(Signature(Type::none, Type::none), NonNullable, Inexact),
{});
import->module = ENV;
import->base = ModuleSplitting::LOAD_SECONDARY_MODULE;
Expand Down Expand Up @@ -516,6 +520,7 @@ void ModuleSplitter::exportImportFunction(Name funcName,
func->hasExplicitName = primaryFunc->hasExplicitName;
func->module = config.importNamespace;
func->base = exportName;
func->type = func->type.with(Inexact);
secondary->addFunction(std::move(func));
}
}
Expand Down Expand Up @@ -790,9 +795,8 @@ void ModuleSplitter::setupTablePatching() {
placeholder->name = Names::getValidFunctionName(
primary, std::string("placeholder_") + placeholder->base.toString());
placeholder->hasExplicitName = true;
placeholder->type = secondaryFunc->type;
elem = Builder(primary).makeRefFunc(placeholder->name,
placeholder->type.getHeapType());
placeholder->type = secondaryFunc->type.with(Inexact);
elem = Builder(primary).makeRefFunc(placeholder->name, placeholder->type);
primary.addFunction(std::move(placeholder));
});

Expand Down Expand Up @@ -833,8 +837,7 @@ void ModuleSplitter::setupTablePatching() {
// primarySeg->data[i] is a placeholder, so use the secondary
// function.
auto* func = replacement->second;
auto* ref = Builder(secondary).makeRefFunc(func->name,
func->type.getHeapType());
auto* ref = Builder(secondary).makeRefFunc(func->name, func->type);
secondaryElems.push_back(ref);
++replacement;
} else if (auto* get = primarySeg->data[i]->dynCast<RefFunc>()) {
Expand Down Expand Up @@ -876,7 +879,7 @@ void ModuleSplitter::setupTablePatching() {
}
auto* func = curr->second;
currData.push_back(
Builder(secondary).makeRefFunc(func->name, func->type.getHeapType()));
Builder(secondary).makeRefFunc(func->name, func->type));
}
if (currData.size()) {
finishSegment();
Expand Down Expand Up @@ -971,11 +974,37 @@ void ModuleSplitter::removeUnusedSecondaryElements() {
// code size in the primary module as well.
for (auto& secondaryPtr : secondaries) {
PassRunner runner(secondaryPtr.get());
// Do not validate here in the middle, as the IR still needs updating later.
runner.options.validate = false;
runner.add("remove-unused-module-elements");
runner.run();
}
}

void ModuleSplitter::updateIR() {
// Imported functions may need type updates.
struct Fixer : public PostWalker<Fixer> {
void visitRefFunc(RefFunc* curr) {
auto& wasm = *getModule();
auto* func = wasm.getFunction(curr->func);
if (func->type != curr->type) {
// This became an import, and lost exactness.
assert(!func->type.isExact());
assert(curr->type.isExact());
if (wasm.features.hasCustomDescriptors()) {
// Add a cast, as the parent may depend on the exactness to validate.
replaceCurrent(Builder(wasm).makeRefCast(curr, curr->type));
}
curr->type = curr->type.with(Inexact);
}
}
} fixer;
fixer.walkModule(&primary);
for (auto& secondaryPtr : secondaries) {
fixer.walkModule(secondaryPtr.get());
}
}

} // anonymous namespace

Results splitFunctions(Module& primary, const Config& config) {
Expand Down
16 changes: 11 additions & 5 deletions src/ir/possible-contents.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "ir/module-utils.h"
#include "ir/possible-contents.h"
#include "support/insert_ordered.h"
#include "wasm-type.h"
#include "wasm.h"

namespace std {
Expand Down Expand Up @@ -641,9 +642,15 @@ struct InfoCollector
addRoot(curr);
}
void visitRefFunc(RefFunc* curr) {
addRoot(curr,
PossibleContents::literal(
Literal::makeFunc(curr->func, curr->type.getHeapType())));
if (!getModule()->getFunction(curr->func)->imported()) {
// This is not imported, so we know the exact function literal.
addRoot(
curr,
PossibleContents::literal(Literal::makeFunc(curr->func, *getModule())));
} else {
// This is imported, so it might be anything of the proper type.
addRoot(curr);
Comment on lines +651 to +652
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we still want to track that this is a reference to the imported function? It would just have an inexact type.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But without knowing the identity, I think we can misoptimize? Imagine we have equality for a second, then ref.func a == ref.func a is definitely 1, but ref.func a == ref.func b is not necessarily 0 (can have duplicate imports).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But functions references cannot be compared for equality, so there is nothing to misoptimize, unless I'm missing something.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did say "imagine" 😆

But, while we don't have ref.eq on functions in wasm userspace, we do have optimizations that compare functions in other ways. E.g. folding an if with ref.eq arms, or GUFA inferences. I admit I don't see an actual bug atm in our optimizer, but a future one is conceivable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can see how optimizations might see that two references are the same and e.g. merge two equivalent if arms or something like that, but I still don't see how we could ever have an optimization that does something unsafe when reasoning that two different function references are different. Can we at least consider changes here in a separate PR?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, agreed. I'm not opposed to being conservative here to be on the safe side. But we should keep the less-conservative status quo in this PR to make sure we're not unexpectedly regressing optimizations due to just the introduction of inexact imported functions.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keeping the status quo does mean keeping the known cases of invalid optimization we have today, including the new gufa.wast tests here, like

(module
  (type $func (sub (func)))
  (type $sub (sub $func (func)))

  (import "" "" (func $f (type $func)))

  (func $test (export "test") (result i32)
    (ref.test (ref $sub)
      (ref.func $f)
    )
  )
)

We misoptimize that to 0 before the fix, because we think imported function literals are actual concrete functions. Given such an actual function, we don't need exactness to know that it will fail that test.

The fuzzer can find this after this PR - perhaps because of the new testcases? Or perhaps because of the companion fuzzing PR #7963, which should really land as it increases coverage enough to find those recent vulnerabilities. So while I see your point, our options seem to be

  • Land this PR as is, fixing the misoptimization but potentially regressing optimizations on imported function references.
  • Land this without fixing the misoptimization, which will not regress any opts, and work around it in the fuzzer, maybe not landing Fuzzer: Merge and optimize even with closed world in Two() #7963, maybe marking new tests as non-fuzzable, maybe both.
  • Fix the misoptimization otherwise, e.g., GUFA/possible-contents could special case function literals in various places.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if GUFA has a function literal, but its type isn't exact?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is still a literal. The code assumes that a literal is an actual identifiable thing, like 42 or the function "foo", and unlike a global "bar" (whose value we don't know).

We could special-case the code to make it treat an inexact funcref as "a literal, but not really; more like a global." But that won't work once we have exact function imports - the same problem would happen with exact ones.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see the problem. Once we have exact imports, then the Literal for the imported function would have an exact type iff the import is exact. GUFA would then look at the literal type to see whether casts would succeed or fail, for example. The changes to support inexact function literals are already in this PR.

}

// The presence of a RefFunc indicates the function may be called
// indirectly, so add the relevant connections for this particular function.
Expand Down Expand Up @@ -1861,8 +1868,7 @@ void TNHOracle::infer() {
// lot of other optimizations become possible anyhow.
auto target = possibleTargets[0]->name;
info.inferences[call->target] =
PossibleContents::literal(Literal::makeFunc(
target, wasm.getFunction(target)->type.getHeapType()));
PossibleContents::literal(Literal::makeFunc(target, wasm));
continue;
}

Expand Down
2 changes: 1 addition & 1 deletion src/ir/properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ inline Literal getLiteral(const Expression* curr) {
} else if (auto* n = curr->dynCast<RefNull>()) {
return Literal(n->type);
} else if (auto* r = curr->dynCast<RefFunc>()) {
return Literal::makeFunc(r->func, r->type.getHeapType());
return Literal::makeFunc(r->func, r->type);
} else if (auto* i = curr->dynCast<RefI31>()) {
if (auto* c = i->value->dynCast<Const>()) {
return Literal::makeI31(c->value.geti32(),
Expand Down
9 changes: 7 additions & 2 deletions src/literal.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

namespace wasm {

class Module;
class Literals;
struct FuncData;
struct GCData;
Expand Down Expand Up @@ -70,6 +71,9 @@ class Literal {

public:
// Type of the literal. Immutable because the literal's payload depends on it.
// For references to defined heap types, this is almost always an exact type.
// The exception is references to imported functions, since the function
// provided at instantiation time may have a subtype of the import type.
const Type type;

Literal() : v128(), type(Type::none) {}
Expand All @@ -90,7 +94,7 @@ class Literal {
explicit Literal(const std::array<Literal, 8>&);
explicit Literal(const std::array<Literal, 4>&);
explicit Literal(const std::array<Literal, 2>&);
explicit Literal(std::shared_ptr<FuncData> funcData, HeapType type);
explicit Literal(std::shared_ptr<FuncData> funcData, Type type);
explicit Literal(std::shared_ptr<GCData> gcData, HeapType type);
explicit Literal(std::shared_ptr<ExnData> exnData);
explicit Literal(std::shared_ptr<ContData> contData);
Expand Down Expand Up @@ -252,7 +256,8 @@ class Literal {
}
// Simple way to create a function from the name and type, without a full
// FuncData.
static Literal makeFunc(Name func, HeapType type);
static Literal makeFunc(Name func, Type type);
static Literal makeFunc(Name func, Module& wasm);
static Literal makeI31(int32_t value, Shareability share) {
auto lit = Literal(Type(HeapTypes::i31.getBasic(share), NonNullable));
lit.i32 = value | 0x80000000;
Expand Down
5 changes: 4 additions & 1 deletion src/parser/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -1419,6 +1419,9 @@ struct ParseModuleTypesCtx : TypeParserCtx<ParseModuleTypesCtx>,
return in.err(pos, "expected signature type");
}
f->type = f->type.with(type.type);
if (f->imported()) {
f->type = f->type.with(Inexact);
}
// If we are provided with too many names (more than the function has), we
// will error on that later when we check the signature matches the type.
// For now, avoid asserting in setLocalName.
Expand Down Expand Up @@ -1601,7 +1604,7 @@ struct ParseDefsCtx : TypeParserCtx<ParseDefsCtx>, AnnotationParserCtx {
elems.push_back(expr);
}
void appendFuncElem(std::vector<Expression*>& elems, Name func) {
auto type = wasm.getFunction(func)->type.getHeapType();
auto type = wasm.getFunction(func)->type;
elems.push_back(builder.makeRefFunc(func, type));
}

Expand Down
6 changes: 6 additions & 0 deletions src/passes/ExtractFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include <cctype>

#include "ir/utils.h"
#include "pass.h"
#include "wasm-builder.h"
#include "wasm.h"
Expand All @@ -37,6 +38,7 @@ static void extract(PassRunner* runner, Module* module, Name name) {
func->module = "env";
func->base = func->name;
func->vars.clear();
func->type = func->type.with(Inexact);
func->body = nullptr;
} else {
found = true;
Expand All @@ -46,6 +48,10 @@ static void extract(PassRunner* runner, Module* module, Name name) {
Fatal() << "could not find the function to extract\n";
}

// Update function references after making things imports.
ReFinalize().run(runner, module);
ReFinalize().walkModuleCode(module);

// Leave just one export, for the thing we want.
module->exports.clear();
module->updateMaps();
Expand Down
2 changes: 1 addition & 1 deletion src/passes/FuncCastEmulation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ struct FuncCastEmulation : public Pass {
}
auto* thunk = iter->second;
ref->func = thunk->name;
ref->finalize(thunk->type.getHeapType());
ref->finalize(thunk->type.getHeapType(), *module);
}
}

Expand Down
10 changes: 9 additions & 1 deletion src/passes/InstrumentBranchHints.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
#include "ir/names.h"
#include "ir/parents.h"
#include "ir/properties.h"
#include "ir/utils.h"
#include "pass.h"
#include "support/string.h"
#include "wasm-builder.h"
Expand Down Expand Up @@ -193,19 +194,26 @@ struct InstrumentBranchHints
auto* func = module->getFunction(existing);
func->body = Builder(*module).makeNop();
func->module = func->base = Name();
func->type = func->type.with(Exact);
}

// Add our import.
auto* func = module->addFunction(Builder::makeFunction(
Names::getValidFunctionName(*module, BASE),
Signature({Type::i32, Type::i32, Type::i32}, Type::none),
Type(Signature({Type::i32, Type::i32, Type::i32}, Type::none),
NonNullable,
Exact),
{}));
func->module = MODULE;
func->base = BASE;
logBranch = func->name;

// Walk normally, using logBranch as we go.
Super::doWalkModule(module);

// Update ref.func type changes.
ReFinalize().run(getPassRunner(), module);
ReFinalize().walkModuleCode(module);
}
};

Expand Down
Loading
Loading