Skip to content

Commit

Permalink
Fix private repository backup (#28)
Browse files Browse the repository at this point in the history
  • Loading branch information
amitsaha authored Apr 22, 2019
1 parent bc6af8e commit 0005256
Show file tree
Hide file tree
Showing 8 changed files with 117 additions and 17 deletions.
33 changes: 29 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ GitLab (including custom GitLab installations).
pull requests or other data associated with a git repository. This may or may not be in the future
scope of this tool.

If you are following along my Linux Journal article, please obtain the version of the source tagged
with [lj-0.1](https://github.com/amitsaha/gitbackup/releases/tag/lj-0.1).
If you are following along my Linux Journal article (published in 2017), please obtain the version of the
source tagged with [lj-0.1](https://github.com/amitsaha/gitbackup/releases/tag/lj-0.1).

## Installling `gitbackup`

Expand All @@ -23,6 +23,28 @@ and architecture and copy the binary somewhere in your ``$PATH``. It is recommen
backing up GitHub repositories and [GitLab personal access token](https://gitlab.com/profile/personal_access_tokens)
for GitLab. You can supply the token to ``gitbackup`` using ``GITHUB_TOKEN`` and ``GITLAB_TOKEN`` environment variables respectively.

### OAuth Scopes required

#### GitHub

- `repo`: Reading repositories, including private repositories
- `user - read:user`: Reading the authenticated user details. This is needed for retrieving username which is needed for retrieving private repositories.

#### GitLab

- `api`: Grants complete read/write access to the API, including all groups and projects.
For some reason, `read_user` and `read_repository` is not sufficient.

### Security and credentials

When you provide the tokens via environment variables, they remain accessible in your shell history
and via the processes' environment for the lifetime of the process. By default, SSH authentication
is used to clone your repositories. If `use-https-clone` is specified, private repositories
are cloned via `https` basic auth and the token provided will be stored in the repositories'
`.git/config`.

### Examples

Typing ``-help`` will display the command line options that `gitbackup` recognizes:

```
Expand All @@ -38,8 +60,12 @@ Usage of ./bin/gitbackup:
Project type to clone (all, owner, member) (default "all")
-gitlab.projectVisibility string
Visibility level of Projects to clone (internal, public, private) (default "internal")
-ignore-private
Ignore private repositories/projects
-service string
Git Hosted Service Name (github/gitlab)
Git Hosted Service Name (github/gitlab)
-use-https-clone
Use HTTPS for cloning instead of SSH
```
### Backing up your GitHub repositories

Expand Down Expand Up @@ -123,7 +149,6 @@ Similarly, it will create a ``gitlab.com`` directory, if you are backing up repo
If you have specified a Git Host URL, it will create a directory structure ``data/host-url/``.



## Building

If you have Golang 1.12.x+ installed, you can clone the repository and:
Expand Down
16 changes: 14 additions & 2 deletions backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,20 @@ func backUp(backupDir string, repo *Repository, wg *sync.WaitGroup) ([]byte, err
cmd := execCommand(gitCommand, "-C", repoDir, "pull")
stdoutStderr, err = cmd.CombinedOutput()
} else {
log.Printf("Cloning %s \n", repo.Name)
cmd := execCommand(gitCommand, "clone", repo.GitURL, repoDir)
log.Printf("Cloning %s\n", repo.Name)
log.Printf("%#v\n", repo)

if repo.Private && useHTTPSClone != nil && *useHTTPSClone && ignorePrivate != nil && !*ignorePrivate {
// Add username and token to the clone URL
// https://gitlab.com/amitsaha/testproject1 => https://amitsaha:[email protected]/amitsaha/testproject1
u, err := url.Parse(repo.CloneURL)
if err != nil {
log.Fatalf("Invalid clone URL: %v\n", err)
}
repo.CloneURL = u.Scheme + "://" + gitHostUsername + ":" + gitHostToken + "@" + u.Host + u.Path
}

cmd := execCommand(gitCommand, "clone", repo.CloneURL, repoDir)
stdoutStderr, err = cmd.CombinedOutput()
}

Expand Down
5 changes: 3 additions & 2 deletions backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@ package main

import (
"fmt"
"github.com/spf13/afero"
"os"
"os/exec"
"path"
"sync"
"testing"

"github.com/spf13/afero"
)

func fakePullCommand(command string, args ...string) (cmd *exec.Cmd) {
Expand All @@ -28,7 +29,7 @@ func fakeCloneCommand(command string, args ...string) (cmd *exec.Cmd) {

func TestBackup(t *testing.T) {
var wg sync.WaitGroup
repo := Repository{Name: "testrepo", GitURL: "git://foo.com/foo"}
repo := Repository{Name: "testrepo", CloneURL: "git://foo.com/foo"}
backupDir := "/tmp/backupdir"

// Memory FS
Expand Down
2 changes: 2 additions & 0 deletions client.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ func newClient(service string, gitHostURL string) interface{} {
if githubToken == "" {
log.Fatal("GITHUB_TOKEN environment variable not set")
}
gitHostToken = githubToken
ts := oauth2.StaticTokenSource(
&oauth2.Token{AccessToken: githubToken},
)
Expand All @@ -46,6 +47,7 @@ func newClient(service string, gitHostURL string) interface{} {
if gitlabToken == "" {
log.Fatal("GITLAB_TOKEN environment variable not set")
}
gitHostToken = gitlabToken
client := gitlab.NewClient(nil, gitlabToken)
if gitHostURLParsed != nil {
client.SetBaseURL(gitHostURLParsed.String())
Expand Down
35 changes: 35 additions & 0 deletions helpers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package main

import (
"context"
"log"

"github.com/google/go-github/github"
gitlab "github.com/xanzy/go-gitlab"
)

func getUsername(client interface{}, service string) string {

if client == nil {
log.Fatalf("Couldn't acquire a client to talk to %s", service)
}

if service == "github" {
ctx := context.Background()
user, _, err := client.(*github.Client).Users.Get(ctx, "")
if err != nil {
log.Fatal("Error retrieving username", err.Error())
}
return *user.Name
}

if service == "gitlab" {
user, _, err := client.(*gitlab.Client).Users.CurrentUser()
if err != nil {
log.Fatal("Error retrieving username", err.Error())
}
return user.Username
}

return ""
}
13 changes: 13 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ import (
// concurrent git clones
var MaxConcurrentClones = 20

var gitHostToken string
var useHTTPSClone *bool
var ignorePrivate *bool
var gitHostUsername string

func main() {

// Used for waiting for all the goroutines to finish before exiting
Expand All @@ -26,6 +31,8 @@ func main() {
service := flag.String("service", "", "Git Hosted Service Name (github/gitlab)")
githostURL := flag.String("githost.url", "", "DNS of the custom Git host")
backupDir := flag.String("backupdir", "", "Backup directory")
ignorePrivate = flag.Bool("ignore-private", false, "Ignore private repositories/projects")
useHTTPSClone = flag.Bool("use-https-clone", false, "Use HTTPS for cloning instead of SSH")

// GitHub specific flags
githubRepoType := flag.String("github.repoType", "all", "Repo types to backup (all, owner, member)")
Expand All @@ -42,6 +49,12 @@ func main() {
*backupDir = setupBackupDir(*backupDir, *service, *githostURL)
tokens := make(chan bool, MaxConcurrentClones)
client := newClient(*service, *githostURL)

gitHostUsername = getUsername(client, *service)

if len(gitHostUsername) == 0 && !*ignorePrivate && *useHTTPSClone {
log.Fatal("Your Git host's username is needed for backing up private repositories via HTTPS")
}
repos, err := getRepositories(client, *service, *githubRepoType, *gitlabRepoVisibility, *gitlabProjectMembership)
if err != nil {
log.Fatal(err)
Expand Down
18 changes: 15 additions & 3 deletions repositories.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ type Response struct {
// Repository is a container for the details for a repository
// we will backup
type Repository struct {
GitURL string
CloneURL string
Name string
Namespace string
Private bool
}

func getRepositories(client interface{}, service string, githubRepoType string, gitlabRepoVisibility string, gitlabProjectType string) ([]*Repository, error) {
Expand All @@ -42,6 +43,7 @@ func getRepositories(client interface{}, service string, githubRepoType string,
}

var repositories []*Repository
var cloneURL string

if service == "github" {
ctx := context.Background()
Expand All @@ -51,7 +53,12 @@ func getRepositories(client interface{}, service string, githubRepoType string,
if err == nil {
for _, repo := range repos {
namespace := strings.Split(*repo.FullName, "/")[0]
repositories = append(repositories, &Repository{GitURL: *repo.GitURL, Name: *repo.Name, Namespace: namespace})
if useHTTPSClone != nil && *useHTTPSClone {
cloneURL = *repo.CloneURL
} else {
cloneURL = *repo.SSHURL
}
repositories = append(repositories, &Repository{CloneURL: cloneURL, Name: *repo.Name, Namespace: namespace, Private: *repo.Private})
}
} else {
return nil, err
Expand Down Expand Up @@ -103,7 +110,12 @@ func getRepositories(client interface{}, service string, githubRepoType string,
if err == nil {
for _, repo := range repos {
namespace := strings.Split(repo.PathWithNamespace, "/")[0]
repositories = append(repositories, &Repository{GitURL: repo.SSHURLToRepo, Name: repo.Name, Namespace: namespace})
if useHTTPSClone != nil && *useHTTPSClone {
cloneURL = repo.WebURL
} else {
cloneURL = repo.SSHURLToRepo
}
repositories = append(repositories, &Repository{CloneURL: cloneURL, Name: repo.Name, Namespace: namespace, Private: repo.Public})
}
} else {
return nil, err
Expand Down
12 changes: 6 additions & 6 deletions repositories_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
"testing"

"github.com/google/go-github/github"
"github.com/xanzy/go-gitlab"
gitlab "github.com/xanzy/go-gitlab"
)

var (
Expand Down Expand Up @@ -57,15 +57,15 @@ func TestGetGitHubRepositories(t *testing.T) {
defer teardown()

mux.HandleFunc("/user/repos", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, `[{"full_name": "test/r1", "id":1, "git_url": "git://github.com/u/r1", "name": "r1"}]`)
fmt.Fprint(w, `[{"full_name": "test/r1", "id":1, "ssh_url": "https://github.com/u/r1", "name": "r1", "private": false}]`)
})

repos, err := getRepositories(GitHubClient, "github", "all", "", "")
if err != nil {
t.Fatalf("%v", err)
}
var expected []*Repository
expected = append(expected, &Repository{Namespace: "test", GitURL: "git://github.com/u/r1", Name: "r1"})
expected = append(expected, &Repository{Namespace: "test", CloneURL: "https://github.com/u/r1", Name: "r1", Private: false})
if !reflect.DeepEqual(repos, expected) {
t.Errorf("Expected %+v, Got %+v", expected, repos)
}
Expand All @@ -76,15 +76,15 @@ func TestGetGitLabRepositories(t *testing.T) {
defer teardown()

mux.HandleFunc("/api/v4/projects", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, `[{"path_with_namespace": "test/r1", "id":1, "ssh_url_to_repo": "git://gitlab.com/u/r1", "name": "r1"}]`)
fmt.Fprint(w, `[{"path_with_namespace": "test/r1", "id":1, "ssh_url_to_repo": "https://gitlab.com/u/r1", "name": "r1"}]`)
})

repos, err := getRepositories(GitLabClient, "gitlab", "internal", "","")
repos, err := getRepositories(GitLabClient, "gitlab", "internal", "", "")
if err != nil {
t.Fatalf("%v", err)
}
var expected []*Repository
expected = append(expected, &Repository{Namespace: "test", GitURL: "git://gitlab.com/u/r1", Name: "r1"})
expected = append(expected, &Repository{Namespace: "test", CloneURL: "https://gitlab.com/u/r1", Name: "r1"})
if !reflect.DeepEqual(repos, expected) {
for i := 0; i < len(repos); i++ {
t.Errorf("Expected %+v, Got %+v", expected[i], repos[i])
Expand Down

0 comments on commit 0005256

Please sign in to comment.