Skip to content

Commit 2aba836

Browse files
doggeralfacebook-github-bot
authored andcommitted
Per-entry seed in XNNPACK weights cache for XNNPACK-upgrade invalidation (#20170)
Summary: XNNPACK exposes `xnn_weights_cache_look_up_key.seed` — a per-ukernel value that XNNPACK guarantees is consistent across runs of the same ukernel and changes whenever a ukernel implementation changes. Store this seed per cache entry so a stale cached packing produced by an old XNNPACK ukernel is rejected after upgrade, instead of being handed back to a newer ukernel that expects a different layout. Changes: - `PackedDataMeta` gains `uint32_t seed{0}`. - `look_up` rejects (returns `SIZE_MAX`) when a name hit has a stored seed that doesn't match `cache_key->seed`. This forces `look_up_or_insert` to re-pack with the current ukernel and avoids the slow `memcmp` path catching it later. - `look_up_or_insert` records `cache_key->seed` on insert. - On-disk index entry layout extended to `[name_len:u32][name][file_offset:u64][data_size:u64][seed:u32]` (was 16 bytes after the name, now 20). - `load_packed_cache` reads the per-entry seed and bumps the trailing bytes bound check accordingly. - `kCacheVersion` bumped 1 → 2 so existing v1 files (which carry no seed) are rejected at load instead of being loaded with `seed=0` and mismatching every fresh `look_up`. Cleanup of orphaned in-memory and on-disk entries left by an invalidated look-up is a follow-up — this diff only adds the detection. Reviewed By: GregoryComer Differential Revision: D108082431
1 parent e257a71 commit 2aba836

3 files changed

Lines changed: 260 additions & 3 deletions

File tree

backends/xnnpack/runtime/XNNWeightsCache.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,19 @@ size_t XNNWeightsCache::look_up(
348348
if (packed_weight_entry == context->name_to_packed_data_metadata_.end()) {
349349
return SIZE_MAX;
350350
}
351+
// XNNPACK upgrade detection: a ukernel whose implementation changed
352+
// produces a different seed. Reject the cached entry so look_up_or_insert
353+
// falls through to re-pack with the current ukernel.
354+
if (packed_weight_entry->second.seed != cache_key->seed) {
355+
ET_LOG(
356+
Info,
357+
"look_up: seed mismatch for '%s' (cached=0x%08x, current=0x%08x); "
358+
"treating as miss for re-pack",
359+
weight_bias_name.c_str(),
360+
packed_weight_entry->second.seed,
361+
cache_key->seed);
362+
return SIZE_MAX;
363+
}
351364
packed_weight_entry->second.in_current_runtime = true;
352365
return packed_weight_entry->second.offset;
353366
}
@@ -474,6 +487,7 @@ size_t XNNWeightsCache::look_up_or_insert(
474487
packed_data_metadata.ref_count =
475488
0; // ref_count is only incremented after finalizing for runtime
476489
packed_data_metadata.in_current_runtime = true;
490+
packed_data_metadata.seed = cache_key->seed;
477491
context->name_to_packed_data_metadata_[weight_bias_name] =
478492
packed_data_metadata;
479493
} else {
@@ -524,7 +538,7 @@ Error XNNWeightsCache::save_packed_index() {
524538
std::vector<uint8_t> buf;
525539
uint32_t entry_count = 0;
526540

527-
// Index entry: [name_len:u32][name][file_offset:u64][data_size:u64]
541+
// Index entry: [name_len:u32][name][file_offset:u64][data_size:u64][seed:u32]
528542
for (const auto& [name, meta] : name_to_packed_data_metadata_) {
529543
void* ptr = packed_data_ptrs_[meta.offset];
530544
auto it = ptr_to_file_offset_.find(ptr);
@@ -536,6 +550,7 @@ Error XNNWeightsCache::save_packed_index() {
536550
buf.insert(buf.end(), name.begin(), name.end());
537551
append_le(buf, static_cast<uint64_t>(it->second));
538552
append_le(buf, static_cast<uint64_t>(meta.data_size));
553+
append_le(buf, meta.seed);
539554
}
540555

541556
// Footer: [index_start:u64][entry_count:u32][magic:u32][version:u32]
@@ -635,7 +650,8 @@ bool XNNWeightsCache::load_packed_cache() {
635650
for (uint32_t i = 0; i < entry_count && cursor + 4 <= end; ++i) {
636651
uint32_t name_len = read_le<uint32_t>(cursor);
637652
cursor += 4;
638-
if (cursor + name_len + 16 > end) {
653+
// [file_offset:u64][data_size:u64][seed:u32] = 20 bytes
654+
if (cursor + name_len + 20 > end) {
639655
// Truncated entry header: trailer doesn't match the entry_count we
640656
// read from the footer, so the cache is corrupt. Apply the same
641657
// full rollback as the invalid-bounds branch below — otherwise the
@@ -660,6 +676,8 @@ bool XNNWeightsCache::load_packed_cache() {
660676
cursor += 8;
661677
uint64_t data_size = read_le<uint64_t>(cursor);
662678
cursor += 8;
679+
uint32_t seed = read_le<uint32_t>(cursor);
680+
cursor += 4;
663681

664682
// Bounds check: the entry's bytes must lie entirely inside the
665683
// packed-data region.
@@ -692,6 +710,7 @@ bool XNNWeightsCache::load_packed_cache() {
692710
meta.ref_count = 0;
693711
meta.in_current_runtime = false;
694712
meta.from_load = true;
713+
meta.seed = seed;
695714
name_to_packed_data_metadata_[name] = meta;
696715
}
697716

backends/xnnpack/runtime/XNNWeightsCache.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,13 @@ struct PackedDataMeta {
4444
// cache_loaded_ is auto-invalidated so the next init re-enters
4545
// load_packed_cache and reuses the saved file instead of re-packing.
4646
bool from_load{false};
47+
// Per-ukernel seed from xnn_weights_cache_look_up_key.seed. XNNPACK
48+
// guarantees this is consistent across runs of the same ukernel; when
49+
// XNNPACK upgrades and a ukernel implementation changes, the seed
50+
// changes. look_up rejects entries whose stored seed doesn't match
51+
// the caller's seed so that stale cache entries don't deliver wrongly
52+
// packed weights to a newer ukernel.
53+
uint32_t seed{0};
4754
};
4855

4956
class XNNWeightsCache {
@@ -151,7 +158,11 @@ class XNNWeightsCache {
151158

152159
private:
153160
static constexpr uint32_t kCacheMagic = 0x58505743; // "XPWC"
154-
static constexpr uint32_t kCacheVersion = 1;
161+
// Bump when the on-disk layout (footer or per-entry record) changes.
162+
// v2: per-entry seed added — old v1 files don't carry seeds and would
163+
// load with seed=0, mismatching every fresh look_up with a non-zero
164+
// seed, causing a stampede of re-packs. Reject v1 outright.
165+
static constexpr uint32_t kCacheVersion = 2;
155166
bool load_packed_cache();
156167
void reset_for_fresh_write();
157168
void release_entry(void* packed_data_ptr);

backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,233 @@ TEST_F(XNNWeightsCacheTest, MultiplePTEsInSameInstance_NoFileGrowth) {
704704
::unlink(cache_path.c_str());
705705
}
706706

707+
namespace {
708+
709+
// Little-endian decode helpers matching XNNWeightsCache's on-disk format.
710+
uint32_t read_le_u32(const uint8_t* p) {
711+
uint32_t v = 0;
712+
for (int i = 0; i < 4; ++i) {
713+
v |= static_cast<uint32_t>(p[i]) << (8 * i);
714+
}
715+
return v;
716+
}
717+
uint64_t read_le_u64(const uint8_t* p) {
718+
uint64_t v = 0;
719+
for (int i = 0; i < 8; ++i) {
720+
v |= static_cast<uint64_t>(p[i]) << (8 * i);
721+
}
722+
return v;
723+
}
724+
void write_le_u32(std::ostream& f, uint32_t v) {
725+
for (int i = 0; i < 4; ++i) {
726+
char b = static_cast<char>((v >> (8 * i)) & 0xff);
727+
f.write(&b, 1);
728+
}
729+
}
730+
void write_le_u64(std::ostream& f, uint64_t v) {
731+
for (int i = 0; i < 8; ++i) {
732+
char b = static_cast<char>((v >> (8 * i)) & 0xff);
733+
f.write(&b, 1);
734+
}
735+
}
736+
737+
} // namespace
738+
739+
// A cache file written by older code (kCacheVersion=1) carries no per-entry
740+
// seed field. Loading such a file with the current schema would yield
741+
// entries with seed=0 and mismatch every fresh look_up. The version bump
742+
// must reject it outright so the next init re-packs from scratch.
743+
TEST_F(XNNWeightsCacheTest, LoadPackedCache_RejectsV1Format) {
744+
std::string cache_path = std::string("/tmp/xnn_weights_cache_v1_") +
745+
std::to_string(::getpid()) + ".packed_cache";
746+
::unlink(cache_path.c_str());
747+
748+
// v1 layout: 64 bytes of dummy data, then 20-byte footer with version=1.
749+
{
750+
std::ofstream f(cache_path, std::ios::binary);
751+
std::vector<char> data(64, 0);
752+
f.write(data.data(), data.size());
753+
write_le_u64(f, 64); // index_start
754+
write_le_u32(f, 0); // entry_count
755+
write_le_u32(f, 0x58505743); // kCacheMagic "XPWC"
756+
write_le_u32(f, 1); // OLD kCacheVersion = 1
757+
}
758+
759+
XNNWeightsCache cache;
760+
cache.set_packed_cache_path(cache_path);
761+
Error err =
762+
cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
763+
ASSERT_EQ(err, Error::Ok);
764+
// Version mismatch → load_packed_cache returned false → no entries.
765+
EXPECT_EQ(cache.get_packed_data_names().size(), 0u);
766+
767+
::unlink(cache_path.c_str());
768+
}
769+
770+
// Verify save_packed_index writes the schema version 2 footer and embeds a
771+
// 4-byte seed field in each entry record. Guards against future refactors
772+
// silently dropping the seed write.
773+
TEST_F(XNNWeightsCacheTest, SavePackedIndex_EntryFormatIncludesSeed) {
774+
std::string cache_path = std::string("/tmp/xnn_weights_cache_format_") +
775+
std::to_string(::getpid()) + ".packed_cache";
776+
::unlink(cache_path.c_str());
777+
778+
std::vector<size_t> batches{1, 2, 3};
779+
size_t input_channels = 3;
780+
size_t output_channels = 4;
781+
size_t num_batches = 1 * 2 * 3;
782+
size_t padding = 32;
783+
std::vector<float> input(num_batches * input_channels + padding, 1.0f);
784+
std::vector<float> output(num_batches * output_channels, 0.0f);
785+
786+
{
787+
XNNWeightsCache cache;
788+
cache.set_packed_cache_path(cache_path);
789+
cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
790+
BuildAndRunGraphWithWeightsCache(
791+
cache,
792+
batches,
793+
input_channels,
794+
output_channels,
795+
input.data(),
796+
output.data());
797+
ASSERT_EQ(cache.save_packed_index(), Error::Ok);
798+
}
799+
800+
// Parse footer at file_size - 20.
801+
std::ifstream f(cache_path, std::ios::binary);
802+
ASSERT_TRUE(f.is_open());
803+
f.seekg(0, std::ios::end);
804+
size_t file_size = f.tellg();
805+
ASSERT_GE(file_size, 24u);
806+
807+
uint8_t footer[20];
808+
f.seekg(file_size - 20);
809+
f.read(reinterpret_cast<char*>(footer), 20);
810+
uint32_t magic = read_le_u32(footer + 12);
811+
uint32_t version = read_le_u32(footer + 16);
812+
EXPECT_EQ(magic, 0x58505743u);
813+
EXPECT_EQ(version, 2u);
814+
815+
// Walk first entry: [name_len:u32][name][file_offset:u64][data_size:u64][seed:u32]
816+
uint64_t index_start = read_le_u64(footer);
817+
uint32_t entry_count = read_le_u32(footer + 8);
818+
ASSERT_GT(entry_count, 0u);
819+
820+
f.seekg(index_start);
821+
uint8_t name_len_buf[4];
822+
f.read(reinterpret_cast<char*>(name_len_buf), 4);
823+
uint32_t name_len = read_le_u32(name_len_buf);
824+
825+
// The seed field sits at index_start + 4 + name_len + 8 + 8.
826+
f.seekg(index_start + 4 + name_len + 8 + 8);
827+
uint8_t seed_buf[4];
828+
f.read(reinterpret_cast<char*>(seed_buf), 4);
829+
// XNNPACK ukernel seeds are non-zero in practice. The signal here is
830+
// simply that 4 well-formed bytes follow the size field — confirming
831+
// the new entry layout was written, not the legacy 16-byte tail.
832+
uint32_t stored_seed = read_le_u32(seed_buf);
833+
EXPECT_NE(stored_seed, 0u);
834+
835+
::unlink(cache_path.c_str());
836+
}
837+
838+
// After loading a cache file whose entry seed has been tampered with
839+
// (simulating an XNNPACK upgrade where the same ukernel now emits a
840+
// different seed), the next inference must produce correct output. Either
841+
// look_up's seed check or look_up_or_insert's memcmp fallback drives the
842+
// re-pack; this test exercises the end-to-end safety net.
843+
TEST_F(XNNWeightsCacheTest, LoadPackedCache_CorruptedSeed_ProducesCorrectOutput) {
844+
std::string cache_path = std::string("/tmp/xnn_weights_cache_badseed_") +
845+
std::to_string(::getpid()) + ".packed_cache";
846+
::unlink(cache_path.c_str());
847+
848+
std::vector<size_t> batches{1, 2, 3};
849+
size_t input_channels = 3;
850+
size_t output_channels = 4;
851+
size_t num_batches = 1 * 2 * 3;
852+
size_t padding = 32;
853+
std::vector<float> input(num_batches * input_channels + padding, 1.0f);
854+
855+
// Baseline: fresh pack, heap-only, no cache file.
856+
std::vector<float> baseline(num_batches * output_channels, 0.0f);
857+
{
858+
XNNWeightsCache cache;
859+
cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
860+
BuildAndRunGraphWithWeightsCache(
861+
cache,
862+
batches,
863+
input_channels,
864+
output_channels,
865+
input.data(),
866+
baseline.data());
867+
}
868+
869+
// Write a valid cache file.
870+
{
871+
XNNWeightsCache cache;
872+
cache.set_packed_cache_path(cache_path);
873+
cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
874+
std::vector<float> out(num_batches * output_channels, 0.0f);
875+
BuildAndRunGraphWithWeightsCache(
876+
cache,
877+
batches,
878+
input_channels,
879+
output_channels,
880+
input.data(),
881+
out.data());
882+
ASSERT_EQ(cache.save_packed_index(), Error::Ok);
883+
}
884+
885+
// Corrupt the seed field of the first entry to a value no real ukernel
886+
// would emit (0xDEADBEEF).
887+
{
888+
std::fstream f(cache_path, std::ios::binary | std::ios::in | std::ios::out);
889+
ASSERT_TRUE(f.is_open());
890+
f.seekg(0, std::ios::end);
891+
size_t file_size = f.tellg();
892+
ASSERT_GE(file_size, 24u);
893+
894+
uint8_t footer_buf[20];
895+
f.seekg(file_size - 20);
896+
f.read(reinterpret_cast<char*>(footer_buf), 20);
897+
uint64_t index_start = read_le_u64(footer_buf);
898+
uint32_t entry_count = read_le_u32(footer_buf + 8);
899+
ASSERT_GT(entry_count, 0u);
900+
901+
f.seekg(index_start);
902+
uint8_t name_len_buf[4];
903+
f.read(reinterpret_cast<char*>(name_len_buf), 4);
904+
uint32_t name_len = read_le_u32(name_len_buf);
905+
906+
size_t seed_offset = index_start + 4 + name_len + 8 + 8;
907+
f.seekp(seed_offset);
908+
uint32_t corrupted = 0xDEADBEEFu;
909+
f.write(reinterpret_cast<const char*>(&corrupted), 4);
910+
f.close();
911+
}
912+
913+
// Reload and run. Output must still match baseline.
914+
std::vector<float> after_corruption(num_batches * output_channels, 0.0f);
915+
{
916+
XNNWeightsCache cache;
917+
cache.set_packed_cache_path(cache_path);
918+
cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
919+
ASSERT_GT(cache.get_packed_data_names().size(), 0u);
920+
BuildAndRunGraphWithWeightsCache(
921+
cache,
922+
batches,
923+
input_channels,
924+
output_channels,
925+
input.data(),
926+
after_corruption.data());
927+
}
928+
929+
EXPECT_EQ(after_corruption, baseline);
930+
931+
::unlink(cache_path.c_str());
932+
}
933+
707934
// save_packed_index must be a true no-op when no new reserve_space happened
708935
// since the last save — same content but writing would still bump mtime,
709936
// making the cache file look modified on every model load.

0 commit comments

Comments
 (0)