From 641487ca0a856e6dcee2403ad9bcd1333c02850a Mon Sep 17 00:00:00 2001 From: Rob Best Date: Wed, 17 Apr 2024 08:55:37 +0100 Subject: [PATCH 1/2] ghcr.io: List packages based on whether owner is an org or a user We were previously using releases to figure out tags for a Github package. I think this was wrong. Not all release tags will be pushed as package versions, and vice versa. Someone may choose to use GHCR without taking advantage of releases at all. I've modified it so that the client will check if the owner is a user or an org and then make the appropriate PackageGetAllVersions function call to retrieve the tags. I've also fixed a few other things I ran into while testing this: 1. Create the github client in `New` so that rate limiting and other goodness is reused across calls. 2. Fix `RepoImageFromPath`, so it will split the repository path into the 'owner' and 'repo' segments that `Tags` expects. Previously we would have got errors for subrepositories. 3. Update `TestRepoImage` to ensure it doesn't panic on unexpected inputs. 4. We don't need to use regex to match `ghcr.io` to `ghcr.io`. 5. If we're excluding `.att` tags then we should probably exclude `.sig` and `.sbom` too. --- pkg/client/ghcr/ghcr.go | 114 +++++++++++++---------------------- pkg/client/ghcr/path.go | 19 +++--- pkg/client/ghcr/path_test.go | 16 ++++- 3 files changed, 64 insertions(+), 85 deletions(-) diff --git a/pkg/client/ghcr/ghcr.go b/pkg/client/ghcr/ghcr.go index 33595840..125a5d97 100644 --- a/pkg/client/ghcr/ghcr.go +++ b/pkg/client/ghcr/ghcr.go @@ -3,9 +3,8 @@ package ghcr import ( "context" "fmt" - "net/http" + "net/url" "strings" - "time" "github.com/gofri/go-github-ratelimit/github_ratelimit" "github.com/google/go-github/v58/github" @@ -17,27 +16,10 @@ type Options struct { } type Client struct { - *http.Client - Options + client *github.Client } func New(opts Options) *Client { - return &Client{ - Options: opts, - Client: &http.Client{ - Timeout: time.Second * 5, - }, - } -} - -func (c *Client) Name() string { - return "ghcr" -} - -func (c *Client) Tags(ctx context.Context, host, owner, repo string) ([]api.ImageTag, error) { - var err error - var tags []api.ImageTag - rateLimitDetection := func(ctx *github_ratelimit.CallbackContext) { fmt.Printf("Hit Github Rate Limit, sleeping for %v", ctx.TotalSleepTime) } @@ -47,42 +29,47 @@ func (c *Client) Tags(ctx context.Context, host, owner, repo string) ([]api.Imag if err != nil { panic(err) } - client := github.NewClient(ghRateLimiter).WithAuthToken(c.Token) + client := github.NewClient(ghRateLimiter).WithAuthToken(opts.Token) - if repoExist(client, owner, repo) { - tags, err = getTagsFromRelease(client, owner, repo) - if err != nil { - return nil, err - } - } else { - - tags, err = getTagsFromOrgPackage(client, owner, repo) - if err != nil { - return nil, err - } + return &Client{ + client: client, } - return tags, nil } -func repoExist(client *github.Client, owner string, repo string) bool { - _, _, err := client.Repositories.Get(context.TODO(), owner, repo) - return err == nil +func (c *Client) Name() string { + return "ghcr" } -func getTagsFromOrgPackage(client *github.Client, owner string, repo string) ([]api.ImageTag, error) { - var tags []api.ImageTag - packageType := "container" - packageState := "active" +func (c *Client) Tags(ctx context.Context, host, owner, repo string) ([]api.ImageTag, error) { + // Choose the correct list packages function based on whether the owner + // is a user or an organization + getAllVersions := c.client.Organizations.PackageGetAllVersions + user, _, err := c.client.Users.Get(ctx, owner) + if err != nil { + return nil, fmt.Errorf("fetching user: %w", err) + } + if strings.ToLower(user.GetType()) == "user" { + getAllVersions = c.client.Users.PackageGetAllVersions + // The User implementation doesn't path escape this for you: + // - https://github.com/google/go-github/blob/v58.0.0/github/users_packages.go#L136 + // - https://github.com/google/go-github/blob/v58.0.0/github/orgs_packages.go#L105 + repo = url.PathEscape(repo) + } + opts := &github.PackageListOptions{ - PackageType: &packageType, - State: &packageState, - ListOptions: github.ListOptions{PerPage: 100}, + PackageType: github.String("container"), + State: github.String("active"), + ListOptions: github.ListOptions{ + PerPage: 100, + }, } + var tags []api.ImageTag + for { - versions, resp, err := client.Organizations.PackageGetAllVersions(context.TODO(), owner, packageType, repo, opts) + versions, resp, err := getAllVersions(ctx, owner, "container", repo, opts) if err != nil { - return nil, fmt.Errorf("failed to get Org Package Versions: %s", err) + return nil, fmt.Errorf("getting versions: %w", err) } for _, ver := range versions { @@ -96,7 +83,14 @@ func getTagsFromOrgPackage(client *github.Client, owner string, repo string) ([] } for _, tag := range ver.Metadata.Container.Tags { - if strings.HasSuffix(tag, ".att") { // Skip tags that are attestations + // Exclude attestations, signatures and sboms + if strings.HasSuffix(tag, ".att") { + continue + } + if strings.HasSuffix(tag, ".sig") { + continue + } + if strings.HasSuffix(tag, ".sbom") { continue } @@ -110,35 +104,9 @@ func getTagsFromOrgPackage(client *github.Client, owner string, repo string) ([] if resp.NextPage == 0 { break } + opts.ListOptions.Page = resp.NextPage } - return tags, nil -} -func getTagsFromRelease(client *github.Client, owner string, repo string) ([]api.ImageTag, error) { - var tags []api.ImageTag - opt := &github.ListOptions{PerPage: 50} - for { - releases, resp, err := client.Repositories.ListReleases(context.TODO(), owner, repo, opt) - if err != nil { - return nil, fmt.Errorf("failed to get github Releases: %s", err) - } - - for _, rel := range releases { - if rel.TagName == nil { - continue - } - tags = append(tags, api.ImageTag{ - Tag: *rel.TagName, - SHA: "", - Timestamp: rel.PublishedAt.Time, - }) - } - - if resp.NextPage == 0 { - break - } - opt.Page = resp.NextPage - } return tags, nil } diff --git a/pkg/client/ghcr/path.go b/pkg/client/ghcr/path.go index f26586ef..60dbd56d 100644 --- a/pkg/client/ghcr/path.go +++ b/pkg/client/ghcr/path.go @@ -1,20 +1,21 @@ package ghcr import ( - "regexp" "strings" ) -var ( - reg = regexp.MustCompile(`^ghcr.io$`) -) - func (c *Client) IsHost(host string) bool { - return reg.MatchString(host) + return host == "ghcr.io" } func (c *Client) RepoImageFromPath(path string) (string, string) { - lastIndex := strings.LastIndex(path, "/") - - return path[:lastIndex], path[lastIndex+1:] + var owner, pkg string + parts := strings.SplitN(path, "/", 2) + if len(parts) > 0 { + owner = parts[0] + } + if len(parts) > 1 { + pkg = parts[1] + } + return owner, pkg } diff --git a/pkg/client/ghcr/path_test.go b/pkg/client/ghcr/path_test.go index cfec82c2..8be5df7c 100644 --- a/pkg/client/ghcr/path_test.go +++ b/pkg/client/ghcr/path_test.go @@ -53,15 +53,25 @@ func TestRepoImage(t *testing.T) { path string expRepo, expImage string }{ + "empty path should be interpreted as an empty repo and image": { + path: "", + expRepo: "", + expImage: "", + }, + "one segement should be interpreted as 'repo'": { + path: "jetstack-cre", + expRepo: "jetstack-cre", + expImage: "", + }, "two segments to path should return both": { path: "jetstack-cre/version-checker", expRepo: "jetstack-cre", expImage: "version-checker", }, - "multiple segments to path should return all in repo, last segment image": { + "multiple segments to path should return first segment in repo, rest in image": { path: "k8s-artifacts-prod/ingress-nginx/nginx", - expRepo: "k8s-artifacts-prod/ingress-nginx", - expImage: "nginx", + expRepo: "k8s-artifacts-prod", + expImage: "ingress-nginx/nginx", }, } From efb6cc44b9ed2b5ee0fe173f4873e8aa72a2a826 Mon Sep 17 00:00:00 2001 From: Rob Best Date: Wed, 24 Apr 2024 08:35:46 +0100 Subject: [PATCH 2/2] Cache type from Users.Get --- pkg/client/ghcr/ghcr.go | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/pkg/client/ghcr/ghcr.go b/pkg/client/ghcr/ghcr.go index 125a5d97..169a12f0 100644 --- a/pkg/client/ghcr/ghcr.go +++ b/pkg/client/ghcr/ghcr.go @@ -16,7 +16,9 @@ type Options struct { } type Client struct { - client *github.Client + client *github.Client + opts Options + ownerTypes map[string]string } func New(opts Options) *Client { @@ -32,7 +34,9 @@ func New(opts Options) *Client { client := github.NewClient(ghRateLimiter).WithAuthToken(opts.Token) return &Client{ - client: client, + client: client, + opts: opts, + ownerTypes: map[string]string{}, } } @@ -44,11 +48,11 @@ func (c *Client) Tags(ctx context.Context, host, owner, repo string) ([]api.Imag // Choose the correct list packages function based on whether the owner // is a user or an organization getAllVersions := c.client.Organizations.PackageGetAllVersions - user, _, err := c.client.Users.Get(ctx, owner) + ownerType, err := c.ownerType(ctx, owner) if err != nil { - return nil, fmt.Errorf("fetching user: %w", err) + return nil, fmt.Errorf("fetching owner type: %w", err) } - if strings.ToLower(user.GetType()) == "user" { + if ownerType == "user" { getAllVersions = c.client.Users.PackageGetAllVersions // The User implementation doesn't path escape this for you: // - https://github.com/google/go-github/blob/v58.0.0/github/users_packages.go#L136 @@ -110,3 +114,18 @@ func (c *Client) Tags(ctx context.Context, host, owner, repo string) ([]api.Imag return tags, nil } + +func (c *Client) ownerType(ctx context.Context, owner string) (string, error) { + if ownerType, ok := c.ownerTypes[owner]; ok { + return ownerType, nil + } + user, _, err := c.client.Users.Get(ctx, owner) + if err != nil { + return "", fmt.Errorf("fetching user: %w", err) + } + ownerType := strings.ToLower(user.GetType()) + + c.ownerTypes[owner] = ownerType + + return ownerType, nil +}