Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit e411db8

Browse files
committedJul 27, 2024··
Add in-toto format for items with hash of hashes.
This converts model serialization manifests that record every model file hash into an in-toto payload that can then be passed to Sigstore's `sign_intoto` for signing to generate a Sigstore `Bundle` (if using Sigstore). To identify the models, we compute a hash of all hashes of the files and use that as the subject. The individual file hashes are used as the payload and we would have the verifier check them as part of the verification process. Signed-off-by: Mihai Maruseac <mihaimaruseac@google.com>
1 parent c1976dc commit e411db8

File tree

10 files changed

+385
-5
lines changed

10 files changed

+385
-5
lines changed
 

‎.github/workflows/lint.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -89,5 +89,5 @@ jobs:
8989
# We should actually migrate to ruff, but that's configured via pyproject.toml which we use when we release the wheel
9090
pylint \
9191
--max-line-length 80 \
92-
--disable C0114,C0115,C0116,R0801,R0903,R0904,R0913,R0914,R1721,R1737,W0107,W0212,W0223,W0231,W0511,W0621 \
92+
--disable C0103,C0114,C0115,C0116,R0801,R0903,R0904,R0913,R0914,R1721,R1737,W0107,W0212,W0223,W0231,W0511,W0621 \
9393
model_signing/{hashing,manifest,serialization,signature,signing}

‎model_signing/signing/in_toto.py

+133-1
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@
1919
envelope format is DSSE, see https://github.com/secure-systems-lab/dsse.
2020
"""
2121

22-
from in_toto_attestation.v1 import statement
2322
from typing import Final, Self
2423
from typing_extensions import override
2524

25+
from in_toto_attestation.v1 import statement
26+
27+
from model_signing.hashing import memory
2628
from model_signing.manifest import manifest as manifest_module
2729
from model_signing.signing import signing
2830

@@ -122,3 +124,133 @@ def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
122124
digest_hex=digest.digest_hex,
123125
digest_algorithm=digest.algorithm,
124126
)
127+
128+
129+
def _convert_descriptors_to_hashed_statement(
130+
manifest: manifest_module.Manifest,
131+
*,
132+
predicate_type: str,
133+
predicate_top_level_name: str,
134+
):
135+
"""Converts manifest descriptors to an in-toto statement with payload.
136+
137+
Args:
138+
manifest: The manifest to extract the descriptors from. Assumed valid.
139+
predicate_type: The predicate_type to use in the in-toto statement.
140+
predicate_top_level_name: Name to use in the payload for the array of
141+
the subjects.
142+
"""
143+
hasher = memory.SHA256()
144+
subjects = []
145+
for descriptor in manifest.resource_descriptors():
146+
hasher.update(descriptor.digest.digest_value)
147+
subjects.append({
148+
"name": descriptor.identifier,
149+
"digest": descriptor.digest.digest_hex,
150+
"algorithm": descriptor.digest.algorithm,
151+
})
152+
153+
digest = {"sha256": hasher.compute().digest_hex}
154+
descriptor = statement.ResourceDescriptor(digest=digest).pb
155+
156+
return statement.Statement(
157+
subjects=[descriptor],
158+
predicate_type=predicate_type,
159+
predicate={predicate_top_level_name: subjects},
160+
)
161+
162+
163+
class DigestOfDigestsIntotoPayload(IntotoPayload):
164+
"""In-toto payload where the subject is a digest of digests of model files.
165+
166+
This payload is supposed to be used for manifests where every file in the
167+
model is matched with a digest. Because existing tooling only supports
168+
established hashing algorithms, we record every such digest in the predicate
169+
part and compute a hash for the subject by using sha256 on the concatenation
170+
of the file hashes. To ensure determinism, the hashes are sorted
171+
alphabetically by filename.
172+
173+
Example:
174+
```json
175+
{
176+
"_type": "https://in-toto.io/Statement/v1",
177+
"subject": [
178+
{
179+
"digest": {
180+
"sha256": "18b5a4..."
181+
}
182+
}
183+
],
184+
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
185+
"predicate": {
186+
"files": [
187+
{
188+
"digest": "6efa14...",
189+
"algorithm": "file-sha256",
190+
"name": "d0/d1/d2/d3/d4/f0"
191+
},
192+
{
193+
"digest": "a9bc14...",
194+
"algorithm": "file-sha256",
195+
"name": "d0/d1/d2/d3/d4/f1"
196+
},
197+
{
198+
"digest": "5f597e...",
199+
"algorithm": "file-sha256",
200+
"name": "d0/d1/d2/d3/d4/f2"
201+
},
202+
{
203+
"digest": "eaf677...",
204+
"algorithm": "file-sha256",
205+
"name": "d0/d1/d2/d3/d4/f3"
206+
}
207+
]
208+
}
209+
}
210+
```
211+
212+
A missing predicate, or a predicate for which an entry does not have valid
213+
name, digest, or algorithm should be considered invalid and fail integrity
214+
verification.
215+
216+
See also https://github.com/sigstore/sigstore-python/issues/1018.
217+
"""
218+
219+
predicate_type: Final[str] = "https://model_signing/DigestOfDigests/v0.1"
220+
221+
def __init__(self, statement: statement.Statement):
222+
"""Builds an instance of this in-toto payload.
223+
224+
Don't call this directly in production. Use `from_manifest()` instead.
225+
226+
Args:
227+
statement: The DSSE statement representing this in-toto payload.
228+
"""
229+
self.statement = statement
230+
231+
@classmethod
232+
@override
233+
def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
234+
"""Converts a manifest to the signing payload used for signing.
235+
236+
The manifest must be one where every model file is paired with its own
237+
digest. Currently, this is only `FileLevelManifest`.
238+
239+
Args:
240+
manifest: the manifest to convert to signing payload.
241+
242+
Returns:
243+
An instance of `DigestOfDigestsIntotoPayload`.
244+
245+
Raises:
246+
TypeError: If the manifest is not `FileLevelManifest`.
247+
"""
248+
if not isinstance(manifest, manifest_module.FileLevelManifest):
249+
raise TypeError("Only FileLevelManifest is supported")
250+
251+
statement = _convert_descriptors_to_hashed_statement(
252+
manifest,
253+
predicate_type=cls.predicate_type,
254+
predicate_top_level_name="files",
255+
)
256+
return cls(statement)

‎model_signing/signing/in_toto_test.py

+57-3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
from google.protobuf import json_format
2424
from in_toto_attestation.v1 import statement_pb2
25+
import pathlib
2526
import pytest
2627

2728
from model_signing import test_support
@@ -77,9 +78,6 @@ def test_produces_valid_statements(self):
7778
payload.statement.validate()
7879

7980
def test_only_runs_on_expected_manifest_types(self, sample_model_folder):
80-
file_hasher = file.SimpleFileHasher(
81-
test_support.UNUSED_PATH, memory.SHA256()
82-
)
8381
serializer = serialize_by_file.ManifestSerializer(
8482
lambda f: file.SimpleFileHasher(f, memory.SHA256()),
8583
allow_symlinks=True,
@@ -88,3 +86,59 @@ def test_only_runs_on_expected_manifest_types(self, sample_model_folder):
8886

8987
with pytest.raises(TypeError, match="Only DigestManifest is supported"):
9088
in_toto.SingleDigestIntotoPayload.from_manifest(manifest)
89+
90+
91+
class TestDigestOfDigestsIntotoPayload:
92+
93+
def _hasher_factory(self, path: pathlib.Path) -> file.FileHasher:
94+
return file.SimpleFileHasher(path, memory.SHA256())
95+
96+
@pytest.mark.parametrize("model_fixture_name", test_support.all_test_models)
97+
def test_known_models(self, request, model_fixture_name):
98+
# Set up variables (arrange)
99+
testdata_path = request.path.parent / "testdata"
100+
test_path = testdata_path / "in_toto"
101+
test_class_path = test_path / "TestDigestOfDigestsIntotoPayload"
102+
golden_path = test_class_path / model_fixture_name
103+
should_update = request.config.getoption("update_goldens")
104+
model = request.getfixturevalue(model_fixture_name)
105+
106+
# Compute payload (act)
107+
serializer = serialize_by_file.ManifestSerializer(
108+
self._hasher_factory, allow_symlinks=True
109+
)
110+
manifest = serializer.serialize(model)
111+
payload = in_toto.DigestOfDigestsIntotoPayload.from_manifest(manifest)
112+
113+
# Compare with golden, or write to golden (approximately "assert")
114+
if should_update:
115+
with open(golden_path, "w", encoding="utf-8") as f:
116+
f.write(f"{json_format.MessageToJson(payload.statement.pb)}\n")
117+
else:
118+
with open(golden_path, "r", encoding="utf-8") as f:
119+
json_contents = f.read()
120+
expected_proto = json_format.Parse(
121+
json_contents, statement_pb2.Statement()
122+
)
123+
124+
assert payload.statement.pb == expected_proto
125+
126+
def test_produces_valid_statements(self, sample_model_folder):
127+
serializer = serialize_by_file.ManifestSerializer(
128+
self._hasher_factory, allow_symlinks=True
129+
)
130+
manifest = serializer.serialize(sample_model_folder)
131+
132+
payload = in_toto.DigestOfDigestsIntotoPayload.from_manifest(manifest)
133+
134+
payload.statement.validate()
135+
136+
def test_only_runs_on_expected_manifest_types(self):
137+
digest = hashing.Digest("test", b"test_digest")
138+
manifest = manifest_module.DigestManifest(digest)
139+
140+
with pytest.raises(
141+
TypeError,
142+
match="Only FileLevelManifest is supported",
143+
):
144+
in_toto.DigestOfDigestsIntotoPayload.from_manifest(manifest)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "18b5a45fe7983f7194e8ffd96c80f5f0ec53191bf4a32b6aff293f043e816d7a"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
11+
"predicate": {
12+
"files": [
13+
{
14+
"digest": "6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff",
15+
"algorithm": "file-sha256",
16+
"name": "d0/d1/d2/d3/d4/f0"
17+
},
18+
{
19+
"digest": "a9bc149b70b9d325cd68d275d582cfdb98c0347d3ce54590aa6533368daed3d2",
20+
"algorithm": "file-sha256",
21+
"name": "d0/d1/d2/d3/d4/f1"
22+
},
23+
{
24+
"digest": "5f597e6a92d1324d9adbed43d527926d11d0131487baf315e65ae1ef3b1ca3c0",
25+
"algorithm": "file-sha256",
26+
"name": "d0/d1/d2/d3/d4/f2"
27+
},
28+
{
29+
"digest": "eaf677c35fec6b87889d9e4563d8bb65dcb9869ca0225697c9cc44cf49dca008",
30+
"algorithm": "file-sha256",
31+
"name": "d0/d1/d2/d3/d4/f3"
32+
}
33+
]
34+
}
35+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "5df6e0e2761359d30a8275058e299fcc0381534545f55cf43e41983f5d4c9456"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
11+
"predicate": {
12+
"files": [
13+
{
14+
"digest": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
15+
"algorithm": "file-sha256",
16+
"name": "."
17+
}
18+
]
19+
}
20+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
11+
"predicate": {
12+
"files": []
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "5df6e0e2761359d30a8275058e299fcc0381534545f55cf43e41983f5d4c9456"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
11+
"predicate": {
12+
"files": [
13+
{
14+
"digest": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
15+
"algorithm": "file-sha256",
16+
"name": "empty_file"
17+
}
18+
]
19+
}
20+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "002d162f867c5eee944e5080d25829b6625be0e3f081f6fbafc7dd655ca2e178"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
11+
"predicate": {
12+
"files": [
13+
{
14+
"digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b",
15+
"algorithm": "file-sha256",
16+
"name": "."
17+
}
18+
]
19+
}
20+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "143cc682e555951649f18e2761c3d526d2502996f5e32dc187ef7f8a614f8df7"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
11+
"predicate": {
12+
"files": [
13+
{
14+
"digest": "fdd8925354242a7fd1515e79534317b800015607a609cd306e0b4dcfe6c92249",
15+
"algorithm": "file-sha256",
16+
"name": "d0/f00"
17+
},
18+
{
19+
"digest": "e16940b5e44ce981150bda37c4ba95881a749a521b4a297c5cdf97bdcfe965e6",
20+
"algorithm": "file-sha256",
21+
"name": "d0/f01"
22+
},
23+
{
24+
"digest": "407822246ea8f9e26380842c3f4cd10d7b23e78f1fe7c74c293608682886a426",
25+
"algorithm": "file-sha256",
26+
"name": "d0/f02"
27+
},
28+
{
29+
"digest": "6a3b08b5df77c4d418ceee1ac136a9ad49fc7c41358b5e82c1176daccb21ff3f",
30+
"algorithm": "file-sha256",
31+
"name": "d1/f10"
32+
},
33+
{
34+
"digest": "a484b3d8ea5e99b75f9f123f9a42c882388693edc7d85d82ccba54834712cadf",
35+
"algorithm": "file-sha256",
36+
"name": "d1/f11"
37+
},
38+
{
39+
"digest": "8f577930f5f40c2c2133cb299d36f9527fde98c1608569017cae6b5bcd01abb3",
40+
"algorithm": "file-sha256",
41+
"name": "d1/f12"
42+
},
43+
{
44+
"digest": "997b37cc51f1ca1c7a270466607e26847429cd7264c30148c1b9352e224083fc",
45+
"algorithm": "file-sha256",
46+
"name": "f0"
47+
},
48+
{
49+
"digest": "c88a04d48353133fb065ba2c8ab369abab21395b9526aa20373ad828915fa7ae",
50+
"algorithm": "file-sha256",
51+
"name": "f1"
52+
},
53+
{
54+
"digest": "700e3ba5065d8dd47e41fd928ea086670d628f891ba363be0ca3c31d20d7d719",
55+
"algorithm": "file-sha256",
56+
"name": "f2"
57+
},
58+
{
59+
"digest": "912bcf5ebdf44dc7b4085b07940e0a81d157fba24b276e73fd911121d4544c4a",
60+
"algorithm": "file-sha256",
61+
"name": "f3"
62+
}
63+
]
64+
}
65+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "002d162f867c5eee944e5080d25829b6625be0e3f081f6fbafc7dd655ca2e178"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
11+
"predicate": {
12+
"files": [
13+
{
14+
"digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b",
15+
"algorithm": "file-sha256",
16+
"name": "symlink_file"
17+
}
18+
]
19+
}
20+
}

0 commit comments

Comments
 (0)
Please sign in to comment.