|
24 | 24 |
|
25 | 25 | from in_toto_attestation.v1 import statement
|
26 | 26 |
|
| 27 | +from model_signing.hashing import memory |
27 | 28 | from model_signing.manifest import manifest as manifest_module
|
28 | 29 | from model_signing.signing import signing
|
29 | 30 |
|
@@ -123,3 +124,133 @@ def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
|
123 | 124 | digest_hex=digest.digest_hex,
|
124 | 125 | digest_algorithm=digest.algorithm,
|
125 | 126 | )
|
| 127 | + |
| 128 | + |
| 129 | +def _convert_descriptors_to_hashed_statement( |
| 130 | + manifest: manifest_module.Manifest, |
| 131 | + *, |
| 132 | + predicate_type: str, |
| 133 | + predicate_top_level_name: str, |
| 134 | +): |
| 135 | + """Converts manifest descriptors to an in-toto statement with payload. |
| 136 | +
|
| 137 | + Args: |
| 138 | + manifest: The manifest to extract the descriptors from. Assumed valid. |
| 139 | + predicate_type: The predicate_type to use in the in-toto statement. |
| 140 | + predicate_top_level_name: Name to use in the payload for the array of |
| 141 | + the subjects. |
| 142 | + """ |
| 143 | + hasher = memory.SHA256() |
| 144 | + subjects = [] |
| 145 | + for descriptor in manifest.resource_descriptors(): |
| 146 | + hasher.update(descriptor.digest.digest_value) |
| 147 | + subjects.append({ |
| 148 | + "name": descriptor.identifier, |
| 149 | + "digest": descriptor.digest.digest_hex, |
| 150 | + "algorithm": descriptor.digest.algorithm, |
| 151 | + }) |
| 152 | + |
| 153 | + digest = {"sha256": hasher.compute().digest_hex} |
| 154 | + descriptor = statement.ResourceDescriptor(digest=digest).pb |
| 155 | + |
| 156 | + return statement.Statement( |
| 157 | + subjects=[descriptor], |
| 158 | + predicate_type=predicate_type, |
| 159 | + predicate={predicate_top_level_name: subjects}, |
| 160 | + ) |
| 161 | + |
| 162 | + |
| 163 | +class DigestOfDigestsIntotoPayload(IntotoPayload): |
| 164 | + """In-toto payload where the subject is a digest of digests of model files. |
| 165 | +
|
| 166 | + This payload is supposed to be used for manifests where every file in the |
| 167 | + model is matched with a digest. Because existing tooling only supports |
| 168 | + established hashing algorithms, we record every such digest in the predicate |
| 169 | + part and compute a hash for the subject by using sha256 on the concatenation |
| 170 | + of the file hashes. To ensure determinism, the hashes are sorted |
| 171 | + alphabetically by filename. |
| 172 | +
|
| 173 | + Example: |
| 174 | + ```json |
| 175 | + { |
| 176 | + "_type": "https://in-toto.io/Statement/v1", |
| 177 | + "subject": [ |
| 178 | + { |
| 179 | + "digest": { |
| 180 | + "sha256": "18b5a4..." |
| 181 | + } |
| 182 | + } |
| 183 | + ], |
| 184 | + "predicateType": "https://model_signing/DigestOfDigests/v0.1", |
| 185 | + "predicate": { |
| 186 | + "files": [ |
| 187 | + { |
| 188 | + "digest": "6efa14...", |
| 189 | + "algorithm": "file-sha256", |
| 190 | + "name": "d0/d1/d2/d3/d4/f0" |
| 191 | + }, |
| 192 | + { |
| 193 | + "digest": "a9bc14...", |
| 194 | + "algorithm": "file-sha256", |
| 195 | + "name": "d0/d1/d2/d3/d4/f1" |
| 196 | + }, |
| 197 | + { |
| 198 | + "digest": "5f597e...", |
| 199 | + "algorithm": "file-sha256", |
| 200 | + "name": "d0/d1/d2/d3/d4/f2" |
| 201 | + }, |
| 202 | + { |
| 203 | + "digest": "eaf677...", |
| 204 | + "algorithm": "file-sha256", |
| 205 | + "name": "d0/d1/d2/d3/d4/f3" |
| 206 | + } |
| 207 | + ] |
| 208 | + } |
| 209 | + } |
| 210 | + ``` |
| 211 | +
|
| 212 | + A missing predicate, or a predicate for which an entry does not have valid |
| 213 | + name, digest, or algorithm should be considered invalid and fail integrity |
| 214 | + verification. |
| 215 | +
|
| 216 | + See also https://github.com/sigstore/sigstore-python/issues/1018. |
| 217 | + """ |
| 218 | + |
| 219 | + predicate_type: Final[str] = "https://model_signing/DigestOfDigests/v0.1" |
| 220 | + |
| 221 | + def __init__(self, statement: statement.Statement): |
| 222 | + """Builds an instance of this in-toto payload. |
| 223 | +
|
| 224 | + Don't call this directly in production. Use `from_manifest()` instead. |
| 225 | +
|
| 226 | + Args: |
| 227 | + statement: The DSSE statement representing this in-toto payload. |
| 228 | + """ |
| 229 | + self.statement = statement |
| 230 | + |
| 231 | + @classmethod |
| 232 | + @override |
| 233 | + def from_manifest(cls, manifest: manifest_module.Manifest) -> Self: |
| 234 | + """Converts a manifest to the signing payload used for signing. |
| 235 | +
|
| 236 | + The manifest must be one where every model file is paired with its own |
| 237 | + digest. Currently, this is only `FileLevelManifest`. |
| 238 | +
|
| 239 | + Args: |
| 240 | + manifest: the manifest to convert to signing payload. |
| 241 | +
|
| 242 | + Returns: |
| 243 | + An instance of `DigestOfDigestsIntotoPayload`. |
| 244 | +
|
| 245 | + Raises: |
| 246 | + TypeError: If the manifest is not `FileLevelManifest`. |
| 247 | + """ |
| 248 | + if not isinstance(manifest, manifest_module.FileLevelManifest): |
| 249 | + raise TypeError("Only FileLevelManifest is supported") |
| 250 | + |
| 251 | + statement = _convert_descriptors_to_hashed_statement( |
| 252 | + manifest, |
| 253 | + predicate_type=cls.predicate_type, |
| 254 | + predicate_top_level_name="files", |
| 255 | + ) |
| 256 | + return cls(statement) |
0 commit comments