Skip to content

Commit b4a4f6a

Browse files
Add in-toto format with hashes of shards as subjects (#267)
This converts model serialization manifests that record every model file shard hash into an in-toto payload that can then be passed to Sigstore's `sign_intoto` for signing to generate a Sigstore `Bundle` (if using Sigstore). This time, we record every hash as part of the subject instead of in the payload. We require verifiers to be aware of this and acknowledge that verifiers that only check subject by subject (that is, they check if the hash of a passed in argument is in the list of subjects and don't check if all the hashes are present), can fail to fully detect if the model integrity is compromised by renaming one file in the model, interchanging two file names, deleting a file, or reordering two shards. The signing library will have additional checks for this, but verifying the signature with other tools might result in invalid results. Signed-off-by: Mihai Maruseac <[email protected]>
1 parent c662544 commit b4a4f6a

File tree

9 files changed

+368
-0
lines changed

9 files changed

+368
-0
lines changed

model_signing/signing/in_toto.py

+105
Original file line numberDiff line numberDiff line change
@@ -481,3 +481,108 @@ def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
481481
manifest, predicate_type=cls.predicate_type
482482
)
483483
return cls(statement)
484+
485+
486+
class ShardDigestsIntotoPayload(IntotoPayload):
487+
"""In-toto payload where the subjects are the model shards themselves.
488+
489+
This payload is supposed to be used for manifests where every file shard in
490+
the model is matched with a digest. Because existing tooling only supports
491+
established hashing algorithms, we annotate every subject with the actual
492+
hash algorithm used to compute the file digest, and use "sha256" as the
493+
algorithm name in the digest itself.
494+
495+
Example:
496+
```json
497+
{
498+
"_type": "https://in-toto.io/Statement/v1",
499+
"subject": [
500+
{
501+
"name": "d0/d1/d2/d3/d4/f0:0:16",
502+
"digest": {
503+
"sha256": "6efa14..."
504+
},
505+
"annotations": {
506+
"actual_hash_algorithm": "file-sha256-1000000"
507+
}
508+
},
509+
{
510+
"name": "d0/d1/d2/d3/d4/f1:0:16",
511+
"digest": {
512+
"sha256": "a9bc14..."
513+
},
514+
"annotations": {
515+
"actual_hash_algorithm": "file-sha256-1000000"
516+
}
517+
},
518+
{
519+
"name": "d0/d1/d2/d3/d4/f2:0:16",
520+
"digest": {
521+
"sha256": "5f597e..."
522+
},
523+
"annotations": {
524+
"actual_hash_algorithm": "file-sha256-1000000"
525+
}
526+
},
527+
{
528+
"name": "d0/d1/d2/d3/d4/f3:0:16",
529+
"digest": {
530+
"sha256": "eaf677..."
531+
},
532+
"annotations": {
533+
"actual_hash_algorithm": "file-sha256-1000000"
534+
}
535+
}
536+
],
537+
"predicateType": "https://model_signing/ShardDigests/v0.1",
538+
"predicate": {
539+
"unused": "Unused, just passed due to API requirements"
540+
}
541+
}
542+
```
543+
544+
If the annotation for a subject is missing, or it does not contain
545+
actual_hash_algorithm, it should be assumed that the digest is computed via
546+
the algorithm listed in the digest dictionary (i.e., sha256).
547+
548+
See also https://github.com/sigstore/sigstore-python/issues/1018.
549+
"""
550+
551+
predicate_type: Final[str] = (
552+
"https://model_signing/ShardDigests/v0.1"
553+
)
554+
555+
def __init__(self, statement: statement.Statement):
556+
"""Builds an instance of this in-toto payload.
557+
558+
Don't call this directly in production. Use `from_manifest()` instead.
559+
560+
Args:
561+
statement: The DSSE statement representing this in-toto payload.
562+
"""
563+
self.statement = statement
564+
565+
@classmethod
566+
@override
567+
def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
568+
"""Converts a manifest to the signing payload used for signing.
569+
570+
The manifest must be one where every model shard is paired with its own
571+
digest. Currently, this is only `ShardLevelManifest`.
572+
573+
Args:
574+
manifest: the manifest to convert to signing payload.
575+
576+
Returns:
577+
An instance of `DigestOfDigestsIntotoPayload`.
578+
579+
Raises:
580+
TypeError: If the manifest is not `ShardLevelManifest`.
581+
"""
582+
if not isinstance(manifest, manifest_module.ShardLevelManifest):
583+
raise TypeError("Only ShardLevelManifest is supported")
584+
585+
statement = _convert_descriptors_to_direct_statement(
586+
manifest, predicate_type=cls.predicate_type
587+
)
588+
return cls(statement)

model_signing/signing/in_toto_test.py

+62
Original file line numberDiff line numberDiff line change
@@ -264,3 +264,65 @@ def test_only_runs_on_expected_manifest_types(self):
264264
match="Only FileLevelManifest is supported",
265265
):
266266
in_toto.DigestsIntotoPayload.from_manifest(manifest)
267+
268+
269+
class TestShardDigestsIntotoPayload:
270+
271+
def _hasher_factory(
272+
self, path: pathlib.Path, start: int, end: int
273+
) -> file.ShardedFileHasher:
274+
return file.ShardedFileHasher(
275+
path, memory.SHA256(), start=start, end=end
276+
)
277+
278+
@pytest.mark.parametrize("model_fixture_name", test_support.all_test_models)
279+
def test_known_models(self, request, model_fixture_name):
280+
# Set up variables (arrange)
281+
testdata_path = request.path.parent / "testdata"
282+
test_path = testdata_path / "in_toto"
283+
test_class_path = test_path / "TestShardDigestsIntotoPayload"
284+
golden_path = test_class_path / model_fixture_name
285+
should_update = request.config.getoption("update_goldens")
286+
model = request.getfixturevalue(model_fixture_name)
287+
288+
# Compute payload (act)
289+
serializer = serialize_by_file_shard.ManifestSerializer(
290+
self._hasher_factory, allow_symlinks=True
291+
)
292+
manifest = serializer.serialize(model)
293+
payload = in_toto.ShardDigestsIntotoPayload.from_manifest(manifest)
294+
295+
# Compare with golden, or write to golden (approximately "assert")
296+
if should_update:
297+
with open(golden_path, "w", encoding="utf-8") as f:
298+
f.write(f"{json_format.MessageToJson(payload.statement.pb)}\n")
299+
else:
300+
with open(golden_path, "r", encoding="utf-8") as f:
301+
json_contents = f.read()
302+
proto = json_format.Parse(
303+
json_contents, statement_pb2.Statement()
304+
)
305+
306+
assert payload.statement.pb == proto
307+
308+
def test_produces_valid_statements(self, sample_model_folder):
309+
serializer = serialize_by_file_shard.ManifestSerializer(
310+
self._hasher_factory, allow_symlinks=True
311+
)
312+
manifest = serializer.serialize(sample_model_folder)
313+
314+
payload = in_toto.ShardDigestsIntotoPayload.from_manifest(
315+
manifest
316+
)
317+
318+
payload.statement.validate()
319+
320+
def test_only_runs_on_expected_manifest_types(self):
321+
digest = hashing.Digest("test", b"test_digest")
322+
manifest = manifest_module.DigestManifest(digest)
323+
324+
with pytest.raises(
325+
TypeError,
326+
match="Only ShardLevelManifest is supported",
327+
):
328+
in_toto.ShardDigestsIntotoPayload.from_manifest(manifest)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"name": "d0/d1/d2/d3/d4/f0:0:16",
6+
"digest": {
7+
"sha256": "6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff"
8+
},
9+
"annotations": {
10+
"actual_hash_algorithm": "file-sha256-1000000"
11+
}
12+
},
13+
{
14+
"name": "d0/d1/d2/d3/d4/f1:0:16",
15+
"digest": {
16+
"sha256": "a9bc149b70b9d325cd68d275d582cfdb98c0347d3ce54590aa6533368daed3d2"
17+
},
18+
"annotations": {
19+
"actual_hash_algorithm": "file-sha256-1000000"
20+
}
21+
},
22+
{
23+
"name": "d0/d1/d2/d3/d4/f2:0:16",
24+
"digest": {
25+
"sha256": "5f597e6a92d1324d9adbed43d527926d11d0131487baf315e65ae1ef3b1ca3c0"
26+
},
27+
"annotations": {
28+
"actual_hash_algorithm": "file-sha256-1000000"
29+
}
30+
},
31+
{
32+
"name": "d0/d1/d2/d3/d4/f3:0:16",
33+
"digest": {
34+
"sha256": "eaf677c35fec6b87889d9e4563d8bb65dcb9869ca0225697c9cc44cf49dca008"
35+
},
36+
"annotations": {
37+
"actual_hash_algorithm": "file-sha256-1000000"
38+
}
39+
}
40+
],
41+
"predicateType": "https://model_signing/ShardDigests/v0.1",
42+
"predicate": {
43+
"unused": "Unused, just passed due to API requirements"
44+
}
45+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"predicateType": "https://model_signing/ShardDigests/v0.1",
4+
"predicate": {
5+
"unused": "Unused, just passed due to API requirements"
6+
}
7+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"predicateType": "https://model_signing/ShardDigests/v0.1",
4+
"predicate": {
5+
"unused": "Unused, just passed due to API requirements"
6+
}
7+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"predicateType": "https://model_signing/ShardDigests/v0.1",
4+
"predicate": {
5+
"unused": "Unused, just passed due to API requirements"
6+
}
7+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"name": ".:0:22",
6+
"digest": {
7+
"sha256": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b"
8+
},
9+
"annotations": {
10+
"actual_hash_algorithm": "file-sha256-1000000"
11+
}
12+
}
13+
],
14+
"predicateType": "https://model_signing/ShardDigests/v0.1",
15+
"predicate": {
16+
"unused": "Unused, just passed due to API requirements"
17+
}
18+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"name": "d0/f00:0:23",
6+
"digest": {
7+
"sha256": "fdd8925354242a7fd1515e79534317b800015607a609cd306e0b4dcfe6c92249"
8+
},
9+
"annotations": {
10+
"actual_hash_algorithm": "file-sha256-1000000"
11+
}
12+
},
13+
{
14+
"name": "d0/f01:0:23",
15+
"digest": {
16+
"sha256": "e16940b5e44ce981150bda37c4ba95881a749a521b4a297c5cdf97bdcfe965e6"
17+
},
18+
"annotations": {
19+
"actual_hash_algorithm": "file-sha256-1000000"
20+
}
21+
},
22+
{
23+
"name": "d0/f02:0:23",
24+
"digest": {
25+
"sha256": "407822246ea8f9e26380842c3f4cd10d7b23e78f1fe7c74c293608682886a426"
26+
},
27+
"annotations": {
28+
"actual_hash_algorithm": "file-sha256-1000000"
29+
}
30+
},
31+
{
32+
"name": "d1/f10:0:23",
33+
"digest": {
34+
"sha256": "6a3b08b5df77c4d418ceee1ac136a9ad49fc7c41358b5e82c1176daccb21ff3f"
35+
},
36+
"annotations": {
37+
"actual_hash_algorithm": "file-sha256-1000000"
38+
}
39+
},
40+
{
41+
"name": "d1/f11:0:23",
42+
"digest": {
43+
"sha256": "a484b3d8ea5e99b75f9f123f9a42c882388693edc7d85d82ccba54834712cadf"
44+
},
45+
"annotations": {
46+
"actual_hash_algorithm": "file-sha256-1000000"
47+
}
48+
},
49+
{
50+
"name": "d1/f12:0:23",
51+
"digest": {
52+
"sha256": "8f577930f5f40c2c2133cb299d36f9527fde98c1608569017cae6b5bcd01abb3"
53+
},
54+
"annotations": {
55+
"actual_hash_algorithm": "file-sha256-1000000"
56+
}
57+
},
58+
{
59+
"name": "f0:0:24",
60+
"digest": {
61+
"sha256": "997b37cc51f1ca1c7a270466607e26847429cd7264c30148c1b9352e224083fc"
62+
},
63+
"annotations": {
64+
"actual_hash_algorithm": "file-sha256-1000000"
65+
}
66+
},
67+
{
68+
"name": "f1:0:24",
69+
"digest": {
70+
"sha256": "c88a04d48353133fb065ba2c8ab369abab21395b9526aa20373ad828915fa7ae"
71+
},
72+
"annotations": {
73+
"actual_hash_algorithm": "file-sha256-1000000"
74+
}
75+
},
76+
{
77+
"name": "f2:0:24",
78+
"digest": {
79+
"sha256": "700e3ba5065d8dd47e41fd928ea086670d628f891ba363be0ca3c31d20d7d719"
80+
},
81+
"annotations": {
82+
"actual_hash_algorithm": "file-sha256-1000000"
83+
}
84+
},
85+
{
86+
"name": "f3:0:24",
87+
"digest": {
88+
"sha256": "912bcf5ebdf44dc7b4085b07940e0a81d157fba24b276e73fd911121d4544c4a"
89+
},
90+
"annotations": {
91+
"actual_hash_algorithm": "file-sha256-1000000"
92+
}
93+
}
94+
],
95+
"predicateType": "https://model_signing/ShardDigests/v0.1",
96+
"predicate": {
97+
"unused": "Unused, just passed due to API requirements"
98+
}
99+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"name": "symlink_file:0:22",
6+
"digest": {
7+
"sha256": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b"
8+
},
9+
"annotations": {
10+
"actual_hash_algorithm": "file-sha256-1000000"
11+
}
12+
}
13+
],
14+
"predicateType": "https://model_signing/ShardDigests/v0.1",
15+
"predicate": {
16+
"unused": "Unused, just passed due to API requirements"
17+
}
18+
}

0 commit comments

Comments
 (0)