Skip to content

Commit 305e97e

Browse files
committed
Add in-toto format as hash of shard hashes
This converts model serialization manifests that record every model file shard hash into an in-toto payload that can then be passed to Sigstore's `sign_intoto` for signing to generate a Sigstore `Bundle` (if using Sigstore). To identify the models, we compute a hash of all hashes of the file shards and use that as the subject. The individual file hashes are used as the payload and we would have the verifier check them as part of the verification process. Signed-off-by: Mihai Maruseac <[email protected]>
1 parent 8875185 commit 305e97e

File tree

9 files changed

+345
-0
lines changed

9 files changed

+345
-0
lines changed

model_signing/signing/in_toto.py

+98
Original file line numberDiff line numberDiff line change
@@ -254,3 +254,101 @@ def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
254254
predicate_top_level_name="files",
255255
)
256256
return cls(statement)
257+
258+
259+
class DigestOfShardDigestsIntotoPayload(IntotoPayload):
260+
"""In-toto payload where the subject is a digest of digests of file shards.
261+
262+
This payload is supposed to be used for manifests where every file shard in
263+
the model is matched with a digest. Because existing tooling only supports
264+
established hashing algorithms, we record every such digest in the predicate
265+
part and compute a hash for the subject by using sha256 on the concatenation
266+
of the shard hashes. To ensure determinism, the hashes are sorted
267+
by file shard (alphabetically by name, then ordered by start offset).
268+
269+
Example:
270+
```json
271+
{
272+
"_type": "https://in-toto.io/Statement/v1",
273+
"subject": [
274+
{
275+
"digest": {
276+
"sha256": "18b5a4..."
277+
}
278+
}
279+
],
280+
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
281+
"predicate": {
282+
"shards": [
283+
{
284+
"digest": "6efa14...",
285+
"algorithm": "file-sha256-1000000",
286+
"name": "d0/d1/d2/d3/d4/f0:0:16"
287+
},
288+
{
289+
"digest": "a9bc14...",
290+
"algorithm": "file-sha256-1000000",
291+
"name": "d0/d1/d2/d3/d4/f1:0:16"
292+
},
293+
{
294+
"digest": "5f597e...",
295+
"algorithm": "file-sha256-1000000",
296+
"name": "d0/d1/d2/d3/d4/f2:0:16"
297+
},
298+
{
299+
"digest": "eaf677...",
300+
"algorithm": "file-sha256-1000000",
301+
"name": "d0/d1/d2/d3/d4/f3:0:16"
302+
}
303+
]
304+
}
305+
}
306+
```
307+
308+
A missing predicate, or a predicate for which an entry does not have valid
309+
name, digest, or algorithm should be considered invalid and fail integrity
310+
verification.
311+
312+
See also https://github.com/sigstore/sigstore-python/issues/1018.
313+
"""
314+
315+
predicate_type: Final[str] = (
316+
"https://model_signing/DigestOfShardDigests/v0.1"
317+
)
318+
319+
def __init__(self, statement: statement.Statement):
320+
"""Builds an instance of this in-toto payload.
321+
322+
Don't call this directly in production. Use `from_manifest()` instead.
323+
324+
Args:
325+
statement: The DSSE statement representing this in-toto payload.
326+
"""
327+
self.statement = statement
328+
329+
@classmethod
330+
@override
331+
def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
332+
"""Converts a manifest to the signing payload used for signing.
333+
334+
The manifest must be one where every model shard is paired with its own
335+
digest. Currently, this is only `ShardLevelManifest`.
336+
337+
Args:
338+
manifest: the manifest to convert to signing payload.
339+
340+
Returns:
341+
An instance of `DigestOfDigestsIntotoPayload`.
342+
343+
Raises:
344+
TypeError: If the manifest is not `ShardLevelManifest`.
345+
"""
346+
if not isinstance(manifest, manifest_module.ShardLevelManifest):
347+
raise TypeError("Only ShardLevelManifest is supported")
348+
349+
statement = _convert_descriptors_to_hashed_statement(
350+
manifest,
351+
predicate_type=cls.predicate_type,
352+
predicate_top_level_name="shards",
353+
)
354+
return cls(statement)

model_signing/signing/in_toto_test.py

+65
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from model_signing.hashing import hashing
3333
from model_signing.manifest import manifest as manifest_module
3434
from model_signing.serialization import serialize_by_file
35+
from model_signing.serialization import serialize_by_file_shard
3536
from model_signing.signing import in_toto
3637

3738

@@ -143,3 +144,67 @@ def test_only_runs_on_expected_manifest_types(self):
143144
match="Only FileLevelManifest is supported",
144145
):
145146
in_toto.DigestOfDigestsIntotoPayload.from_manifest(manifest)
147+
148+
149+
class TestDigestOfShardDigestsIntotoPayload:
150+
151+
def _hasher_factory(
152+
self, path: pathlib.Path, start: int, end: int
153+
) -> file.ShardedFileHasher:
154+
return file.ShardedFileHasher(
155+
path, memory.SHA256(), start=start, end=end
156+
)
157+
158+
@pytest.mark.parametrize("model_fixture_name", test_support.all_test_models)
159+
def test_known_models(self, request, model_fixture_name):
160+
# Set up variables (arrange)
161+
testdata_path = request.path.parent / "testdata"
162+
test_path = testdata_path / "in_toto"
163+
test_class_path = test_path / "TestDigestOfShardDigestsIntotoPayload"
164+
golden_path = test_class_path / model_fixture_name
165+
should_update = request.config.getoption("update_goldens")
166+
model = request.getfixturevalue(model_fixture_name)
167+
168+
# Compute payload (act)
169+
serializer = serialize_by_file_shard.ManifestSerializer(
170+
self._hasher_factory, allow_symlinks=True
171+
)
172+
manifest = serializer.serialize(model)
173+
payload = in_toto.DigestOfShardDigestsIntotoPayload.from_manifest(
174+
manifest
175+
)
176+
177+
# Compare with golden, or write to golden (approximately "assert")
178+
if should_update:
179+
with open(golden_path, "w", encoding="utf-8") as f:
180+
f.write(f"{json_format.MessageToJson(payload.statement.pb)}\n")
181+
else:
182+
with open(golden_path, "r", encoding="utf-8") as f:
183+
json_contents = f.read()
184+
expected_proto = json_format.Parse(
185+
json_contents, statement_pb2.Statement()
186+
)
187+
188+
assert payload.statement.pb == expected_proto
189+
190+
def test_produces_valid_statements(self, sample_model_folder):
191+
serializer = serialize_by_file_shard.ManifestSerializer(
192+
self._hasher_factory, allow_symlinks=True
193+
)
194+
manifest = serializer.serialize(sample_model_folder)
195+
196+
payload = in_toto.DigestOfShardDigestsIntotoPayload.from_manifest(
197+
manifest
198+
)
199+
200+
payload.statement.validate()
201+
202+
def test_only_runs_on_expected_manifest_types(self):
203+
digest = hashing.Digest("test", b"test_digest")
204+
manifest = manifest_module.DigestManifest(digest)
205+
206+
with pytest.raises(
207+
TypeError,
208+
match="Only ShardLevelManifest is supported",
209+
):
210+
in_toto.DigestOfShardDigestsIntotoPayload.from_manifest(manifest)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "18b5a45fe7983f7194e8ffd96c80f5f0ec53191bf4a32b6aff293f043e816d7a"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
11+
"predicate": {
12+
"shards": [
13+
{
14+
"digest": "6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff",
15+
"algorithm": "file-sha256-1000000",
16+
"name": "d0/d1/d2/d3/d4/f0:0:16"
17+
},
18+
{
19+
"digest": "a9bc149b70b9d325cd68d275d582cfdb98c0347d3ce54590aa6533368daed3d2",
20+
"algorithm": "file-sha256-1000000",
21+
"name": "d0/d1/d2/d3/d4/f1:0:16"
22+
},
23+
{
24+
"digest": "5f597e6a92d1324d9adbed43d527926d11d0131487baf315e65ae1ef3b1ca3c0",
25+
"algorithm": "file-sha256-1000000",
26+
"name": "d0/d1/d2/d3/d4/f2:0:16"
27+
},
28+
{
29+
"digest": "eaf677c35fec6b87889d9e4563d8bb65dcb9869ca0225697c9cc44cf49dca008",
30+
"algorithm": "file-sha256-1000000",
31+
"name": "d0/d1/d2/d3/d4/f3:0:16"
32+
}
33+
]
34+
}
35+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
11+
"predicate": {
12+
"shards": []
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
11+
"predicate": {
12+
"shards": []
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
11+
"predicate": {
12+
"shards": []
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "002d162f867c5eee944e5080d25829b6625be0e3f081f6fbafc7dd655ca2e178"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
11+
"predicate": {
12+
"shards": [
13+
{
14+
"digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b",
15+
"algorithm": "file-sha256-1000000",
16+
"name": ".:0:22"
17+
}
18+
]
19+
}
20+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "143cc682e555951649f18e2761c3d526d2502996f5e32dc187ef7f8a614f8df7"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
11+
"predicate": {
12+
"shards": [
13+
{
14+
"digest": "fdd8925354242a7fd1515e79534317b800015607a609cd306e0b4dcfe6c92249",
15+
"algorithm": "file-sha256-1000000",
16+
"name": "d0/f00:0:23"
17+
},
18+
{
19+
"digest": "e16940b5e44ce981150bda37c4ba95881a749a521b4a297c5cdf97bdcfe965e6",
20+
"algorithm": "file-sha256-1000000",
21+
"name": "d0/f01:0:23"
22+
},
23+
{
24+
"digest": "407822246ea8f9e26380842c3f4cd10d7b23e78f1fe7c74c293608682886a426",
25+
"algorithm": "file-sha256-1000000",
26+
"name": "d0/f02:0:23"
27+
},
28+
{
29+
"digest": "6a3b08b5df77c4d418ceee1ac136a9ad49fc7c41358b5e82c1176daccb21ff3f",
30+
"algorithm": "file-sha256-1000000",
31+
"name": "d1/f10:0:23"
32+
},
33+
{
34+
"digest": "a484b3d8ea5e99b75f9f123f9a42c882388693edc7d85d82ccba54834712cadf",
35+
"algorithm": "file-sha256-1000000",
36+
"name": "d1/f11:0:23"
37+
},
38+
{
39+
"digest": "8f577930f5f40c2c2133cb299d36f9527fde98c1608569017cae6b5bcd01abb3",
40+
"algorithm": "file-sha256-1000000",
41+
"name": "d1/f12:0:23"
42+
},
43+
{
44+
"digest": "997b37cc51f1ca1c7a270466607e26847429cd7264c30148c1b9352e224083fc",
45+
"algorithm": "file-sha256-1000000",
46+
"name": "f0:0:24"
47+
},
48+
{
49+
"digest": "c88a04d48353133fb065ba2c8ab369abab21395b9526aa20373ad828915fa7ae",
50+
"algorithm": "file-sha256-1000000",
51+
"name": "f1:0:24"
52+
},
53+
{
54+
"digest": "700e3ba5065d8dd47e41fd928ea086670d628f891ba363be0ca3c31d20d7d719",
55+
"algorithm": "file-sha256-1000000",
56+
"name": "f2:0:24"
57+
},
58+
{
59+
"digest": "912bcf5ebdf44dc7b4085b07940e0a81d157fba24b276e73fd911121d4544c4a",
60+
"algorithm": "file-sha256-1000000",
61+
"name": "f3:0:24"
62+
}
63+
]
64+
}
65+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "002d162f867c5eee944e5080d25829b6625be0e3f081f6fbafc7dd655ca2e178"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
11+
"predicate": {
12+
"shards": [
13+
{
14+
"digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b",
15+
"algorithm": "file-sha256-1000000",
16+
"name": "symlink_file:0:22"
17+
}
18+
]
19+
}
20+
}

0 commit comments

Comments
 (0)