Skip to content

Commit d716351

Browse files
committed
Create in-toto signing payload for single digests.
This converts `DigestManifest`s objects to an in-toto format where the model is identified by its complete digest (as result of the serialization). For Sigstore signing, this payload can be signed via `sign_intoto`, producing a Sigstore `Bundle` as the signature. Signed-off-by: Mihai Maruseac <[email protected]>
1 parent 2198c9d commit d716351

File tree

10 files changed

+312
-1
lines changed

10 files changed

+312
-1
lines changed

.github/workflows/lint.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -89,5 +89,5 @@ jobs:
8989
# We should actually migrate to ruff, but that's configured via pyproject.toml which we use when we release the wheel
9090
pylint \
9191
--max-line-length 80 \
92-
--disable C0114,C0115,C0116,R0801,R0903,R0904,R0913,R0914,R1721,R1737,W0107,W0212,W0223,W0231,W0511,W0621 \
92+
--disable C0103,C0114,C0115,C0116,E1101,R0801,R0903,R0904,R0913,R0914,R1721,R1737,W0107,W0212,W0223,W0231,W0511,W0621 \
9393
model_signing/{hashing,manifest,serialization,signature,signing}

model_signing/signing/in_toto.py

+125
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# Copyright 2024 The Sigstore Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Signing payloads for models as in-toto statements.
16+
17+
To generate the signing payload we convert model manifests to in-toto formats,
18+
as described by https://github.com/in-toto/attestation/tree/main/spec/v1. The
19+
envelope format is DSSE, see https://github.com/secure-systems-lab/dsse.
20+
"""
21+
22+
from typing import Final, Self
23+
from typing_extensions import override
24+
25+
from in_toto_attestation.v1 import statement
26+
27+
from model_signing.manifest import manifest as manifest_module
28+
from model_signing.signing import signing
29+
30+
31+
class IntotoPayload(signing.SigningPayload):
32+
"""A generic payload in in-toto format.
33+
34+
This class is abstract for now as we will support multiple payload formats
35+
below.
36+
37+
Each subclass defines a constant for the predicate type class attribute
38+
defined below.
39+
"""
40+
41+
predicate_type: Final[str]
42+
43+
44+
class SingleDigestIntotoPayload(IntotoPayload):
45+
"""In-toto payload where the model is serialized to just one digest.
46+
47+
In this case, we encode the model as the only subject of the statement. We
48+
don't set the name field, and use the digest as the one resulting from the
49+
model serialization.
50+
51+
However, since we use custom hashing algorithms, but these are not supported
52+
by existing tools, we claim that the digest algorithm is sha-256 and include
53+
the real digest in the predicate.
54+
55+
Example:
56+
```json
57+
{
58+
"_type": "https://in-toto.io/Statement/v1",
59+
"subject": [
60+
{
61+
"digest": {
62+
"sha256": "3aab065c...."
63+
}
64+
}
65+
],
66+
"predicateType": "https://model_signing/Digest/v0.1",
67+
"predicate": {
68+
"actual_hash_algorithm": "file-sha256"
69+
}
70+
}
71+
```
72+
73+
If the predicate is missing (or does not set "actual_hash_algorithm"), it
74+
should be assumed that the digest is actually computed via the algorithm
75+
present in the resource descriptor (i.e., sha256).
76+
77+
See also https://github.com/sigstore/sigstore-python/issues/1018.
78+
"""
79+
80+
predicate_type: Final[str] = "https://model_signing/Digest/v0.1"
81+
82+
def __init__(self, *, digest_hex: str, digest_algorithm: str):
83+
"""Builds an instance of this in-toto payload.
84+
85+
Don't call this directly in production. Use `from_manifest()` instead.
86+
87+
Args:
88+
digest_hex: the hexadecimal, human readable, digest of the subject.
89+
digest_algorithm: the algorithm used to compute the digest.
90+
"""
91+
digest = {"sha256": digest_hex}
92+
descriptor = statement.ResourceDescriptor(digest=digest).pb
93+
94+
self.statement = statement.Statement(
95+
subjects=[descriptor],
96+
predicate_type=self.predicate_type,
97+
predicate={"actual_hash_algorithm": digest_algorithm},
98+
)
99+
100+
@classmethod
101+
@override
102+
def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
103+
"""Converts a manifest to the signing payload used for signing.
104+
105+
The manifest must be a `DigestManifest` instance.
106+
107+
Args:
108+
manifest: the manifest to convert to signing payload.
109+
110+
Returns:
111+
An instance of `SingleDigestIntotoPayload`.
112+
113+
Raises:
114+
TypeError: If the manifest is not `DigestManifest`.
115+
"""
116+
if not isinstance(manifest, manifest_module.DigestManifest):
117+
raise TypeError("Only DigestManifest is supported")
118+
119+
# guaranteed to have exactly one item
120+
subject = list(manifest.resource_descriptors())[0]
121+
digest = subject.digest
122+
return cls(
123+
digest_hex=digest.digest_hex,
124+
digest_algorithm=digest.algorithm,
125+
)

model_signing/signing/in_toto_test.py

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# Copyright 2024 The Sigstore Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Tests for in-toto signing payloads.
16+
17+
NOTE: This test uses a goldens setup to compare expected results with data from
18+
files. If the golden tests are failing, regenerate the golden files with
19+
20+
pytest model_signing/ --update_goldens
21+
"""
22+
23+
import pytest
24+
25+
from google.protobuf import json_format
26+
from in_toto_attestation.v1 import statement_pb2
27+
28+
from model_signing import test_support
29+
from model_signing.hashing import file
30+
from model_signing.hashing import memory
31+
from model_signing.hashing import hashing
32+
from model_signing.manifest import manifest as manifest_module
33+
from model_signing.serialization import serialize_by_file
34+
from model_signing.signing import in_toto
35+
36+
37+
class TestSingleDigestIntotoPayload:
38+
39+
@pytest.mark.parametrize("model_fixture_name", test_support.all_test_models)
40+
def test_known_models(self, request, model_fixture_name):
41+
# Set up variables (arrange)
42+
testdata_path = request.path.parent / "testdata"
43+
test_path = testdata_path / "in_toto"
44+
test_class_path = test_path / "TestSingleDigestIntotoPayload"
45+
golden_path = test_class_path / model_fixture_name
46+
should_update = request.config.getoption("update_goldens")
47+
model = request.getfixturevalue(model_fixture_name)
48+
49+
# Compute payload (act)
50+
file_hasher = file.SimpleFileHasher(
51+
test_support.UNUSED_PATH, memory.SHA256()
52+
)
53+
serializer = serialize_by_file.DigestSerializer(
54+
file_hasher, memory.SHA256, allow_symlinks=True
55+
)
56+
manifest = serializer.serialize(model)
57+
payload = in_toto.SingleDigestIntotoPayload.from_manifest(manifest)
58+
59+
# Compare with golden, or write to golden (approximately "assert")
60+
if should_update:
61+
with open(golden_path, "w", encoding="utf-8") as f:
62+
f.write(f"{json_format.MessageToJson(payload.statement.pb)}\n")
63+
else:
64+
with open(golden_path, "r", encoding="utf-8") as f:
65+
json_contents = f.read()
66+
expected_proto = json_format.Parse(
67+
json_contents, statement_pb2.Statement()
68+
)
69+
70+
assert payload.statement.pb == expected_proto
71+
72+
def test_produces_valid_statements(self):
73+
digest = hashing.Digest("test", b"test_digest")
74+
manifest = manifest_module.DigestManifest(digest)
75+
76+
payload = in_toto.SingleDigestIntotoPayload.from_manifest(manifest)
77+
78+
payload.statement.validate()
79+
80+
def test_only_runs_on_expected_manifest_types(self, sample_model_folder):
81+
serializer = serialize_by_file.ManifestSerializer(
82+
lambda f: file.SimpleFileHasher(f, memory.SHA256()),
83+
allow_symlinks=True,
84+
)
85+
manifest = serializer.serialize(sample_model_folder)
86+
87+
with pytest.raises(TypeError, match="Only DigestManifest is supported"):
88+
in_toto.SingleDigestIntotoPayload.from_manifest(manifest)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "36eed9389ebbbe15ac15d33c81dabb60ccb7c945ff641d78f59db9aa9dc47ac9"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/Digest/v0.1",
11+
"predicate": {
12+
"actual_hash_algorithm": "sha256"
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/Digest/v0.1",
11+
"predicate": {
12+
"actual_hash_algorithm": "file-sha256"
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/Digest/v0.1",
11+
"predicate": {
12+
"actual_hash_algorithm": "sha256"
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "68efd863f20e083173846a5e98ad11387a1979efe20ded426a7930bab8358a9c"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/Digest/v0.1",
11+
"predicate": {
12+
"actual_hash_algorithm": "sha256"
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/Digest/v0.1",
11+
"predicate": {
12+
"actual_hash_algorithm": "file-sha256"
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "310af4fc4c52bf63cd1687c67076ed3e56bc5480a1b151539e6c550506ae0301"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/Digest/v0.1",
11+
"predicate": {
12+
"actual_hash_algorithm": "sha256"
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"_type": "https://in-toto.io/Statement/v1",
3+
"subject": [
4+
{
5+
"digest": {
6+
"sha256": "8372365be7578241d18db47ec83b735bb450a10a1b4298d9b7b0d8bf543b7271"
7+
}
8+
}
9+
],
10+
"predicateType": "https://model_signing/Digest/v0.1",
11+
"predicate": {
12+
"actual_hash_algorithm": "sha256"
13+
}
14+
}

0 commit comments

Comments
 (0)