Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions model_signing/signing/in_toto.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from in_toto_attestation.v1 import statement
from typing_extensions import override

from model_signing.hashing import memory
from model_signing.manifest import manifest as manifest_module
from model_signing.signing import signing

Expand Down Expand Up @@ -123,3 +124,133 @@ def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
digest_hex=digest.digest_hex,
digest_algorithm=digest.algorithm,
)


def _convert_descriptors_to_hashed_statement(
manifest: manifest_module.Manifest,
*,
predicate_type: str,
predicate_top_level_name: str,
):
"""Converts manifest descriptors to an in-toto statement with payload.

Args:
manifest: The manifest to extract the descriptors from. Assumed valid.
predicate_type: The predicate_type to use in the in-toto statement.
predicate_top_level_name: Name to use in the payload for the array of
the subjects.
"""
hasher = memory.SHA256()
subjects = []
for descriptor in manifest.resource_descriptors():
hasher.update(descriptor.digest.digest_value)
subjects.append({
"name": descriptor.identifier,
"digest": descriptor.digest.digest_hex,
"algorithm": descriptor.digest.algorithm,
})

digest = {"sha256": hasher.compute().digest_hex}
descriptor = statement.ResourceDescriptor(digest=digest).pb

return statement.Statement(
subjects=[descriptor],
predicate_type=predicate_type,
predicate={predicate_top_level_name: subjects},
)
Comment on lines +145 to +160
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't the subject be a list of the file hashes?

It feels a bit like misusing the Statement format when the digests are put into the predicate.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doing that in #266. But in #111 there was a discussion that the semantics of verification on subjects is not working properly for models, so we put as subject a single digest for the entire model and then the actual hashes in payload to reconstruct as needed.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really see the verification argument. It applies to every format you roll with, if there is another implementation there is always the risk of doing it wrong/different.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just implemented all manifest ideas discussed in #111 so we can experiment with them and see which one is more ergonomic, better suited for our needs, less prone to failures

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, this specific PR is what's being discussed in #248 (comment)



class DigestOfDigestsIntotoPayload(IntotoPayload):
"""In-toto payload where the subject is a digest of digests of model files.

This payload is supposed to be used for manifests where every file in the
model is matched with a digest. Because existing tooling only supports
established hashing algorithms, we record every such digest in the predicate
part and compute a hash for the subject by using sha256 on the concatenation
of the file hashes. To ensure determinism, the hashes are sorted
alphabetically by filename.

Example:
```json
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "18b5a4..."
}
}
],
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
"predicate": {
"files": [
{
"digest": "6efa14...",
"algorithm": "file-sha256",
"name": "d0/d1/d2/d3/d4/f0"
},
{
"digest": "a9bc14...",
"algorithm": "file-sha256",
"name": "d0/d1/d2/d3/d4/f1"
},
{
"digest": "5f597e...",
"algorithm": "file-sha256",
"name": "d0/d1/d2/d3/d4/f2"
},
{
"digest": "eaf677...",
"algorithm": "file-sha256",
"name": "d0/d1/d2/d3/d4/f3"
}
]
}
}
```

A missing predicate, or a predicate for which an entry does not have valid
name, digest, or algorithm should be considered invalid and fail integrity
verification.

See also https://github.com/sigstore/sigstore-python/issues/1018.
"""

predicate_type: Final[str] = "https://model_signing/DigestOfDigests/v0.1"
Comment on lines +218 to +219
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was curious if it mattered that this wasn't resolvable. The answer is apparently no, but should we namespace this any more to avoid collisions?

SHOULD resolve to a human-readable description, but MAY be unresolvable. SHOULD include a version number to allow for revisions.

TypeURIs are not registered. The natural namespacing of URIs is sufficient to prevent collisions.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likely yes. Right now all these constants are just for testing. When we standardize, we will pick sensible normal values for them.

Thank you for flagging and the review!


def __init__(self, statement: statement.Statement):
"""Builds an instance of this in-toto payload.

Don't call this directly in production. Use `from_manifest()` instead.

Args:
statement: The DSSE statement representing this in-toto payload.
"""
self.statement = statement

@classmethod
@override
def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
"""Converts a manifest to the signing payload used for signing.

The manifest must be one where every model file is paired with its own
digest. Currently, this is only `FileLevelManifest`.

Args:
manifest: the manifest to convert to signing payload.

Returns:
An instance of `DigestOfDigestsIntotoPayload`.

Raises:
TypeError: If the manifest is not `FileLevelManifest`.
"""
if not isinstance(manifest, manifest_module.FileLevelManifest):
raise TypeError("Only FileLevelManifest is supported")

statement = _convert_descriptors_to_hashed_statement(
manifest,
predicate_type=cls.predicate_type,
predicate_top_level_name="files",
)
return cls(statement)
58 changes: 58 additions & 0 deletions model_signing/signing/in_toto_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
pytest model_signing/ --update_goldens
"""

import pathlib

from google.protobuf import json_format
from in_toto_attestation.v1 import statement_pb2
import pytest
Expand Down Expand Up @@ -85,3 +87,59 @@ def test_only_runs_on_expected_manifest_types(self, sample_model_folder):

with pytest.raises(TypeError, match="Only DigestManifest is supported"):
in_toto.SingleDigestIntotoPayload.from_manifest(manifest)


class TestDigestOfDigestsIntotoPayload:

def _hasher_factory(self, path: pathlib.Path) -> file.FileHasher:
return file.SimpleFileHasher(path, memory.SHA256())

@pytest.mark.parametrize("model_fixture_name", test_support.all_test_models)
def test_known_models(self, request, model_fixture_name):
# Set up variables (arrange)
testdata_path = request.path.parent / "testdata"
test_path = testdata_path / "in_toto"
test_class_path = test_path / "TestDigestOfDigestsIntotoPayload"
golden_path = test_class_path / model_fixture_name
should_update = request.config.getoption("update_goldens")
model = request.getfixturevalue(model_fixture_name)

# Compute payload (act)
serializer = serialize_by_file.ManifestSerializer(
self._hasher_factory, allow_symlinks=True
)
manifest = serializer.serialize(model)
payload = in_toto.DigestOfDigestsIntotoPayload.from_manifest(manifest)

# Compare with golden, or write to golden (approximately "assert")
if should_update:
with open(golden_path, "w", encoding="utf-8") as f:
f.write(f"{json_format.MessageToJson(payload.statement.pb)}\n")
else:
with open(golden_path, "r", encoding="utf-8") as f:
json_contents = f.read()
expected_proto = json_format.Parse(
json_contents, statement_pb2.Statement()
)

assert payload.statement.pb == expected_proto

def test_produces_valid_statements(self, sample_model_folder):
serializer = serialize_by_file.ManifestSerializer(
self._hasher_factory, allow_symlinks=True
)
manifest = serializer.serialize(sample_model_folder)

payload = in_toto.DigestOfDigestsIntotoPayload.from_manifest(manifest)

payload.statement.validate()

def test_only_runs_on_expected_manifest_types(self):
digest = hashing.Digest("test", b"test_digest")
manifest = manifest_module.DigestManifest(digest)

with pytest.raises(
TypeError,
match="Only FileLevelManifest is supported",
):
in_toto.DigestOfDigestsIntotoPayload.from_manifest(manifest)
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "18b5a45fe7983f7194e8ffd96c80f5f0ec53191bf4a32b6aff293f043e816d7a"
}
}
],
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
"predicate": {
"files": [
{
"digest": "6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff",
"algorithm": "file-sha256",
"name": "d0/d1/d2/d3/d4/f0"
},
{
"digest": "a9bc149b70b9d325cd68d275d582cfdb98c0347d3ce54590aa6533368daed3d2",
"algorithm": "file-sha256",
"name": "d0/d1/d2/d3/d4/f1"
},
{
"digest": "5f597e6a92d1324d9adbed43d527926d11d0131487baf315e65ae1ef3b1ca3c0",
"algorithm": "file-sha256",
"name": "d0/d1/d2/d3/d4/f2"
},
{
"digest": "eaf677c35fec6b87889d9e4563d8bb65dcb9869ca0225697c9cc44cf49dca008",
"algorithm": "file-sha256",
"name": "d0/d1/d2/d3/d4/f3"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "5df6e0e2761359d30a8275058e299fcc0381534545f55cf43e41983f5d4c9456"
}
}
],
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
"predicate": {
"files": [
{
"digest": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
"algorithm": "file-sha256",
"name": "."
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
}
}
],
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
"predicate": {
"files": []
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "5df6e0e2761359d30a8275058e299fcc0381534545f55cf43e41983f5d4c9456"
}
}
],
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
"predicate": {
"files": [
{
"digest": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
"algorithm": "file-sha256",
"name": "empty_file"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "002d162f867c5eee944e5080d25829b6625be0e3f081f6fbafc7dd655ca2e178"
}
}
],
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
"predicate": {
"files": [
{
"digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b",
"algorithm": "file-sha256",
"name": "."
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "143cc682e555951649f18e2761c3d526d2502996f5e32dc187ef7f8a614f8df7"
}
}
],
"predicateType": "https://model_signing/DigestOfDigests/v0.1",
"predicate": {
"files": [
{
"digest": "fdd8925354242a7fd1515e79534317b800015607a609cd306e0b4dcfe6c92249",
"algorithm": "file-sha256",
"name": "d0/f00"
},
{
"digest": "e16940b5e44ce981150bda37c4ba95881a749a521b4a297c5cdf97bdcfe965e6",
"algorithm": "file-sha256",
"name": "d0/f01"
},
{
"digest": "407822246ea8f9e26380842c3f4cd10d7b23e78f1fe7c74c293608682886a426",
"algorithm": "file-sha256",
"name": "d0/f02"
},
{
"digest": "6a3b08b5df77c4d418ceee1ac136a9ad49fc7c41358b5e82c1176daccb21ff3f",
"algorithm": "file-sha256",
"name": "d1/f10"
},
{
"digest": "a484b3d8ea5e99b75f9f123f9a42c882388693edc7d85d82ccba54834712cadf",
"algorithm": "file-sha256",
"name": "d1/f11"
},
{
"digest": "8f577930f5f40c2c2133cb299d36f9527fde98c1608569017cae6b5bcd01abb3",
"algorithm": "file-sha256",
"name": "d1/f12"
},
{
"digest": "997b37cc51f1ca1c7a270466607e26847429cd7264c30148c1b9352e224083fc",
"algorithm": "file-sha256",
"name": "f0"
},
{
"digest": "c88a04d48353133fb065ba2c8ab369abab21395b9526aa20373ad828915fa7ae",
"algorithm": "file-sha256",
"name": "f1"
},
{
"digest": "700e3ba5065d8dd47e41fd928ea086670d628f891ba363be0ca3c31d20d7d719",
"algorithm": "file-sha256",
"name": "f2"
},
{
"digest": "912bcf5ebdf44dc7b4085b07940e0a81d157fba24b276e73fd911121d4544c4a",
"algorithm": "file-sha256",
"name": "f3"
}
]
}
}
Loading