|
2 | 2 | // Unreferenced will delete all blobs that are not referenced by any manifest.
|
3 | 3 | // Untagged will delete all blobs that are not referenced by any manifest and are not tagged.
|
4 | 4 |
|
5 |
| -import { ServerError } from "../errors"; |
6 | 5 | import { ManifestSchema } from "../manifest";
|
| 6 | +import { hexToDigest } from "../user"; |
| 7 | +import {symlinkHeader} from "./r2"; |
7 | 8 |
|
8 | 9 | export type GarbageCollectionMode = "unreferenced" | "untagged";
|
9 | 10 | export type GCOptions = {
|
@@ -147,7 +148,7 @@ export class GarbageCollector {
|
147 | 148 | }
|
148 | 149 |
|
149 | 150 | private async list(prefix: string, callback: (object: R2Object) => Promise<boolean>): Promise<boolean> {
|
150 |
| - const listed = await this.registry.list({ prefix }); |
| 151 | + const listed = await this.registry.list({ prefix: prefix, include: ["customMetadata"] }); |
151 | 152 | for (const object of listed.objects) {
|
152 | 153 | if ((await callback(object)) === false) {
|
153 | 154 | return false;
|
@@ -182,61 +183,142 @@ export class GarbageCollector {
|
182 | 183 |
|
183 | 184 | private async collectInner(options: GCOptions): Promise<boolean> {
|
184 | 185 | // We can run out of memory, this should be a bloom filter
|
185 |
| - let referencedBlobs = new Set<string>(); |
| 186 | + const manifestList: { [key: string]: Set<string> } = {}; |
186 | 187 | const mark = await this.getInsertionMark(options.name);
|
187 | 188 |
|
| 189 | + // List manifest from repo to be scanned |
188 | 190 | await this.list(`${options.name}/manifests/`, async (manifestObject) => {
|
189 |
| - const tag = manifestObject.key.split("/").pop(); |
190 |
| - if (!tag || (options.mode === "untagged" && tag.startsWith("sha256:"))) { |
191 |
| - return true; |
| 191 | + const currentHashFile = hexToDigest(manifestObject.checksums.sha256!); |
| 192 | + if (manifestList[currentHashFile] === undefined) { |
| 193 | + manifestList[currentHashFile] = new Set<string>(); |
192 | 194 | }
|
193 |
| - const manifest = await this.registry.get(manifestObject.key); |
194 |
| - if (!manifest) { |
195 |
| - return true; |
| 195 | + manifestList[currentHashFile].add(manifestObject.key); |
| 196 | + return true; |
| 197 | + }); |
| 198 | + |
| 199 | + // In untagged mode, search for manifest to delete |
| 200 | + if (options.mode === "untagged") { |
| 201 | + const manifestToRemove = new Set<string>(); |
| 202 | + const referencedManifests = new Set<string>(); |
| 203 | + // List tagged manifest to find manifest-list |
| 204 | + for (const [_, manifests] of Object.entries(manifestList)) { |
| 205 | + const taggedManifest = [...manifests].filter((item) => !item.split("/").pop()?.startsWith("sha256:")); |
| 206 | + for (const manifestPath of taggedManifest) { |
| 207 | + // Tagged manifest some, load manifest content |
| 208 | + const manifest = await this.registry.get(manifestPath); |
| 209 | + if (!manifest) { |
| 210 | + continue; |
| 211 | + } |
| 212 | + |
| 213 | + const manifestData = (await manifest.json()) as ManifestSchema; |
| 214 | + // Search for manifest list |
| 215 | + if (manifestData.schemaVersion == 2 && "manifests" in manifestData) { |
| 216 | + // Extract referenced manifests from manifest list |
| 217 | + manifestData.manifests.forEach((manifest) => { |
| 218 | + referencedManifests.add(manifest.digest); |
| 219 | + }); |
| 220 | + } |
| 221 | + } |
196 | 222 | }
|
197 | 223 |
|
198 |
| - const manifestData = (await manifest.json()) as ManifestSchema; |
199 |
| - // TODO: garbage collect manifests. |
200 |
| - if ("manifests" in manifestData) { |
201 |
| - return true; |
| 224 | + for (const [key, manifests] of Object.entries(manifestList)) { |
| 225 | + if (referencedManifests.has(key)) { |
| 226 | + continue; |
| 227 | + } |
| 228 | + if (![...manifests].some((item) => !item.split("/").pop()?.startsWith("sha256:"))) { |
| 229 | + // Add untagged manifest that should be removed |
| 230 | + manifests.forEach((manifest) => { |
| 231 | + manifestToRemove.add(manifest); |
| 232 | + }); |
| 233 | + // Manifest to be removed shouldn't be parsed to search for referenced layers |
| 234 | + delete manifestList[key]; |
| 235 | + } |
| 236 | + } |
| 237 | + |
| 238 | + // Deleting untagged manifest |
| 239 | + if (manifestToRemove.size > 0) { |
| 240 | + if (!(await this.checkIfGCCanContinue(options.name, mark))) { |
| 241 | + throw new Error("there is a manifest insertion going, the garbage collection shall stop"); |
| 242 | + } |
| 243 | + |
| 244 | + // GC will deleted untagged manifest |
| 245 | + await this.registry.delete(manifestToRemove.values().toArray()); |
| 246 | + } |
| 247 | + } |
| 248 | + |
| 249 | + const referencedBlobs = new Set<string>(); |
| 250 | + // From manifest, extract referenced layers |
| 251 | + for (const [_, manifests] of Object.entries(manifestList)) { |
| 252 | + // Select only one manifest per unique manifest |
| 253 | + const manifestPath = manifests.values().next().value; |
| 254 | + if (manifestPath === undefined) { |
| 255 | + continue; |
202 | 256 | }
|
| 257 | + const manifest = await this.registry.get(manifestPath); |
| 258 | + // Skip if manifest not found |
| 259 | + if (!manifest) continue; |
| 260 | + |
| 261 | + const manifestData = (await manifest.json()) as ManifestSchema; |
203 | 262 |
|
204 | 263 | if (manifestData.schemaVersion === 1) {
|
205 | 264 | manifestData.fsLayers.forEach((layer) => {
|
206 | 265 | referencedBlobs.add(layer.blobSum);
|
207 | 266 | });
|
208 | 267 | } else {
|
| 268 | + // Skip manifest-list, they don't contain any layers references |
| 269 | + if ("manifests" in manifestData) continue; |
| 270 | + // Add referenced layers from current manifest |
209 | 271 | manifestData.layers.forEach((layer) => {
|
210 | 272 | referencedBlobs.add(layer.digest);
|
211 | 273 | });
|
| 274 | + // Add referenced config blob from current manifest |
| 275 | + referencedBlobs.add(manifestData.config.digest); |
212 | 276 | }
|
| 277 | + } |
213 | 278 |
|
| 279 | + const unreferencedBlobs = new Set<string>(); |
| 280 | + // List blobs to be removed |
| 281 | + await this.list(`${options.name}/blobs/`, async (object) => { |
| 282 | + const blobHash = object.key.split("/").pop(); |
| 283 | + if (blobHash && !referencedBlobs.has(blobHash)) { |
| 284 | + unreferencedBlobs.add(object.key); |
| 285 | + } |
214 | 286 | return true;
|
215 | 287 | });
|
216 | 288 |
|
217 |
| - let unreferencedKeys: string[] = []; |
218 |
| - const deleteThreshold = 15; |
219 |
| - await this.list(`${options.name}/blobs/`, async (object) => { |
220 |
| - const hash = object.key.split("/").pop(); |
221 |
| - if (hash && !referencedBlobs.has(hash)) { |
222 |
| - unreferencedKeys.push(object.key); |
223 |
| - if (unreferencedKeys.length > deleteThreshold) { |
224 |
| - if (!(await this.checkIfGCCanContinue(options.name, mark))) { |
225 |
| - throw new ServerError("there is a manifest insertion going, the garbage collection shall stop"); |
| 289 | + // Check for symlink before removal |
| 290 | + if (unreferencedBlobs.size >= 0) { |
| 291 | + await this.list("", async (object) => { |
| 292 | + const objectPath = object.key; |
| 293 | + // Skip non-blobs object and from any other repository (symlink only target cross repository blobs) |
| 294 | + if (objectPath.startsWith(`${options.name}/`) || !objectPath.includes("/blobs/sha256:")) { |
| 295 | + return true; |
| 296 | + } |
| 297 | + if (object.customMetadata && object.customMetadata[symlinkHeader] !== undefined) { |
| 298 | + // Check if the symlink target the current GC repository |
| 299 | + if (object.customMetadata[symlinkHeader] !== options.name) return true; |
| 300 | + // Get symlink blob to retrieve its target |
| 301 | + const symlinkBlob = await this.registry.get(object.key); |
| 302 | + // Skip if symlinkBlob not found |
| 303 | + if (!symlinkBlob) return true; |
| 304 | + // Get the path of the target blob from the symlink blob |
| 305 | + const targetBlobPath = await symlinkBlob.text(); |
| 306 | + if (unreferencedBlobs.has(targetBlobPath)) { |
| 307 | + // This symlink target a layer that should be removed |
| 308 | + unreferencedBlobs.delete(targetBlobPath); |
226 | 309 | }
|
227 |
| - |
228 |
| - await this.registry.delete(unreferencedKeys); |
229 |
| - unreferencedKeys = []; |
230 | 310 | }
|
231 |
| - } |
232 |
| - return true; |
233 |
| - }); |
234 |
| - if (unreferencedKeys.length > 0) { |
| 311 | + return unreferencedBlobs.size > 0; |
| 312 | + }); |
| 313 | + } |
| 314 | + |
| 315 | + if (unreferencedBlobs.size > 0) { |
235 | 316 | if (!(await this.checkIfGCCanContinue(options.name, mark))) {
|
236 | 317 | throw new Error("there is a manifest insertion going, the garbage collection shall stop");
|
237 | 318 | }
|
238 | 319 |
|
239 |
| - await this.registry.delete(unreferencedKeys); |
| 320 | + // GC will delete unreferenced blobs |
| 321 | + await this.registry.delete(unreferencedBlobs.values().toArray()); |
240 | 322 | }
|
241 | 323 |
|
242 | 324 | return true;
|
|
0 commit comments