Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit 4a62292

Browse files
authored
Merge pull request #1128 from filipnavara/commitgraph-fmt
plumbing: format/commitgraph, add APIs for reading and writing commit-graph files
2 parents 44a20de + ab5b89c commit 4a62292

File tree

5 files changed

+689
-0
lines changed

5 files changed

+689
-0
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package commitgraph
2+
3+
import (
4+
"time"
5+
6+
"gopkg.in/src-d/go-git.v4/plumbing"
7+
)
8+
9+
// Node is a reduced representation of Commit as presented in the commit graph
10+
// file. It is merely useful as an optimization for walking the commit graphs.
11+
type Node struct {
12+
// TreeHash is the hash of the root tree of the commit.
13+
TreeHash plumbing.Hash
14+
// ParentIndexes are the indexes of the parent commits of the commit.
15+
ParentIndexes []int
16+
// ParentHashes are the hashes of the parent commits of the commit.
17+
ParentHashes []plumbing.Hash
18+
// Generation number is the pre-computed generation in the commit graph
19+
// or zero if not available
20+
Generation int
21+
// When is the timestamp of the commit.
22+
When time.Time
23+
}
24+
25+
// Index represents a representation of commit graph that allows indexed
26+
// access to the nodes using commit object hash
27+
type Index interface {
28+
// GetIndexByHash gets the index in the commit graph from commit hash, if available
29+
GetIndexByHash(h plumbing.Hash) (int, error)
30+
// GetNodeByIndex gets the commit node from the commit graph using index
31+
// obtained from child node, if available
32+
GetNodeByIndex(i int) (*Node, error)
33+
// Hashes returns all the hashes that are available in the index
34+
Hashes() []plumbing.Hash
35+
}
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
package commitgraph_test
2+
3+
import (
4+
"io/ioutil"
5+
"os"
6+
"path"
7+
"testing"
8+
9+
"golang.org/x/exp/mmap"
10+
11+
. "gopkg.in/check.v1"
12+
"gopkg.in/src-d/go-git-fixtures.v3"
13+
"gopkg.in/src-d/go-git.v4/plumbing"
14+
"gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph"
15+
)
16+
17+
func Test(t *testing.T) { TestingT(t) }
18+
19+
type CommitgraphSuite struct {
20+
fixtures.Suite
21+
}
22+
23+
var _ = Suite(&CommitgraphSuite{})
24+
25+
func testDecodeHelper(c *C, path string) {
26+
reader, err := mmap.Open(path)
27+
c.Assert(err, IsNil)
28+
defer reader.Close()
29+
index, err := commitgraph.OpenFileIndex(reader)
30+
c.Assert(err, IsNil)
31+
32+
// Root commit
33+
nodeIndex, err := index.GetIndexByHash(plumbing.NewHash("347c91919944a68e9413581a1bc15519550a3afe"))
34+
c.Assert(err, IsNil)
35+
node, err := index.GetNodeByIndex(nodeIndex)
36+
c.Assert(err, IsNil)
37+
c.Assert(len(node.ParentIndexes), Equals, 0)
38+
c.Assert(len(node.ParentHashes), Equals, 0)
39+
40+
// Regular commit
41+
nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("e713b52d7e13807e87a002e812041f248db3f643"))
42+
c.Assert(err, IsNil)
43+
node, err = index.GetNodeByIndex(nodeIndex)
44+
c.Assert(err, IsNil)
45+
c.Assert(len(node.ParentIndexes), Equals, 1)
46+
c.Assert(len(node.ParentHashes), Equals, 1)
47+
c.Assert(node.ParentHashes[0].String(), Equals, "347c91919944a68e9413581a1bc15519550a3afe")
48+
49+
// Merge commit
50+
nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("b29328491a0682c259bcce28741eac71f3499f7d"))
51+
c.Assert(err, IsNil)
52+
node, err = index.GetNodeByIndex(nodeIndex)
53+
c.Assert(err, IsNil)
54+
c.Assert(len(node.ParentIndexes), Equals, 2)
55+
c.Assert(len(node.ParentHashes), Equals, 2)
56+
c.Assert(node.ParentHashes[0].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643")
57+
c.Assert(node.ParentHashes[1].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981")
58+
59+
// Octopus merge commit
60+
nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560"))
61+
c.Assert(err, IsNil)
62+
node, err = index.GetNodeByIndex(nodeIndex)
63+
c.Assert(err, IsNil)
64+
c.Assert(len(node.ParentIndexes), Equals, 3)
65+
c.Assert(len(node.ParentHashes), Equals, 3)
66+
c.Assert(node.ParentHashes[0].String(), Equals, "ce275064ad67d51e99f026084e20827901a8361c")
67+
c.Assert(node.ParentHashes[1].String(), Equals, "bb13916df33ed23004c3ce9ed3b8487528e655c1")
68+
c.Assert(node.ParentHashes[2].String(), Equals, "a45273fe2d63300e1962a9e26a6b15c276cd7082")
69+
70+
// Check all hashes
71+
hashes := index.Hashes()
72+
c.Assert(len(hashes), Equals, 11)
73+
c.Assert(hashes[0].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981")
74+
c.Assert(hashes[10].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643")
75+
}
76+
77+
func (s *CommitgraphSuite) TestDecode(c *C) {
78+
fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
79+
dotgit := f.DotGit()
80+
testDecodeHelper(c, path.Join(dotgit.Root(), "objects", "info", "commit-graph"))
81+
})
82+
}
83+
84+
func (s *CommitgraphSuite) TestReencode(c *C) {
85+
fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
86+
dotgit := f.DotGit()
87+
88+
reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph"))
89+
c.Assert(err, IsNil)
90+
defer reader.Close()
91+
index, err := commitgraph.OpenFileIndex(reader)
92+
c.Assert(err, IsNil)
93+
94+
writer, err := ioutil.TempFile(dotgit.Root(), "commit-graph")
95+
c.Assert(err, IsNil)
96+
tmpName := writer.Name()
97+
defer os.Remove(tmpName)
98+
encoder := commitgraph.NewEncoder(writer)
99+
err = encoder.Encode(index)
100+
c.Assert(err, IsNil)
101+
writer.Close()
102+
103+
testDecodeHelper(c, tmpName)
104+
})
105+
}
106+
107+
func (s *CommitgraphSuite) TestReencodeInMemory(c *C) {
108+
fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
109+
dotgit := f.DotGit()
110+
111+
reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph"))
112+
c.Assert(err, IsNil)
113+
index, err := commitgraph.OpenFileIndex(reader)
114+
c.Assert(err, IsNil)
115+
memoryIndex := commitgraph.NewMemoryIndex()
116+
for i, hash := range index.Hashes() {
117+
node, err := index.GetNodeByIndex(i)
118+
c.Assert(err, IsNil)
119+
err = memoryIndex.Add(hash, node)
120+
c.Assert(err, IsNil)
121+
}
122+
reader.Close()
123+
124+
writer, err := ioutil.TempFile(dotgit.Root(), "commit-graph")
125+
c.Assert(err, IsNil)
126+
tmpName := writer.Name()
127+
defer os.Remove(tmpName)
128+
encoder := commitgraph.NewEncoder(writer)
129+
err = encoder.Encode(memoryIndex)
130+
c.Assert(err, IsNil)
131+
writer.Close()
132+
133+
testDecodeHelper(c, tmpName)
134+
})
135+
}
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
package commitgraph
2+
3+
import (
4+
"crypto/sha1"
5+
"hash"
6+
"io"
7+
8+
"gopkg.in/src-d/go-git.v4/plumbing"
9+
"gopkg.in/src-d/go-git.v4/utils/binary"
10+
)
11+
12+
// Encoder writes MemoryIndex structs to an output stream.
13+
type Encoder struct {
14+
io.Writer
15+
hash hash.Hash
16+
}
17+
18+
// NewEncoder returns a new stream encoder that writes to w.
19+
func NewEncoder(w io.Writer) *Encoder {
20+
h := sha1.New()
21+
mw := io.MultiWriter(w, h)
22+
return &Encoder{mw, h}
23+
}
24+
25+
func (e *Encoder) Encode(idx Index) error {
26+
var err error
27+
28+
// Get all the hashes in the input index
29+
hashes := idx.Hashes()
30+
31+
// Sort the inout and prepare helper structures we'll need for encoding
32+
hashToIndex, fanout, largeEdgesCount := e.prepare(idx, hashes)
33+
34+
chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature}
35+
chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 36}
36+
if largeEdgesCount > 0 {
37+
chunkSignatures = append(chunkSignatures, largeEdgeListSignature)
38+
chunkSizes = append(chunkSizes, uint64(largeEdgesCount)*4)
39+
}
40+
41+
if err = e.encodeFileHeader(len(chunkSignatures)); err != nil {
42+
return err
43+
}
44+
if err = e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil {
45+
return err
46+
}
47+
if err = e.encodeFanout(fanout); err != nil {
48+
return err
49+
}
50+
if err = e.encodeOidLookup(hashes); err != nil {
51+
return err
52+
}
53+
if largeEdges, err := e.encodeCommitData(hashes, hashToIndex, idx); err == nil {
54+
if err = e.encodeLargeEdges(largeEdges); err != nil {
55+
return err
56+
}
57+
}
58+
if err != nil {
59+
return err
60+
}
61+
return e.encodeChecksum()
62+
}
63+
64+
func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, largeEdgesCount uint32) {
65+
// Sort the hashes and build our index
66+
plumbing.HashesSort(hashes)
67+
hashToIndex = make(map[plumbing.Hash]uint32)
68+
fanout = make([]uint32, 256)
69+
for i, hash := range hashes {
70+
hashToIndex[hash] = uint32(i)
71+
fanout[hash[0]]++
72+
}
73+
74+
// Convert the fanout to cumulative values
75+
for i := 1; i <= 0xff; i++ {
76+
fanout[i] += fanout[i-1]
77+
}
78+
79+
// Find out if we will need large edge table
80+
for i := 0; i < len(hashes); i++ {
81+
v, _ := idx.GetNodeByIndex(i)
82+
if len(v.ParentHashes) > 2 {
83+
largeEdgesCount += uint32(len(v.ParentHashes) - 1)
84+
break
85+
}
86+
}
87+
88+
return
89+
}
90+
91+
func (e *Encoder) encodeFileHeader(chunkCount int) (err error) {
92+
if _, err = e.Write(commitFileSignature); err == nil {
93+
_, err = e.Write([]byte{1, 1, byte(chunkCount), 0})
94+
}
95+
return
96+
}
97+
98+
func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) {
99+
// 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator
100+
offset := uint64(8 + len(chunkSignatures)*12 + 12)
101+
for i, signature := range chunkSignatures {
102+
if _, err = e.Write(signature); err == nil {
103+
err = binary.WriteUint64(e, offset)
104+
}
105+
if err != nil {
106+
return
107+
}
108+
offset += chunkSizes[i]
109+
}
110+
if _, err = e.Write(lastSignature); err == nil {
111+
err = binary.WriteUint64(e, offset)
112+
}
113+
return
114+
}
115+
116+
func (e *Encoder) encodeFanout(fanout []uint32) (err error) {
117+
for i := 0; i <= 0xff; i++ {
118+
if err = binary.WriteUint32(e, fanout[i]); err != nil {
119+
return
120+
}
121+
}
122+
return
123+
}
124+
125+
func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) {
126+
for _, hash := range hashes {
127+
if _, err = e.Write(hash[:]); err != nil {
128+
return err
129+
}
130+
}
131+
return
132+
}
133+
134+
func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (largeEdges []uint32, err error) {
135+
for _, hash := range hashes {
136+
origIndex, _ := idx.GetIndexByHash(hash)
137+
commitData, _ := idx.GetNodeByIndex(origIndex)
138+
if _, err = e.Write(commitData.TreeHash[:]); err != nil {
139+
return
140+
}
141+
142+
var parent1, parent2 uint32
143+
if len(commitData.ParentHashes) == 0 {
144+
parent1 = parentNone
145+
parent2 = parentNone
146+
} else if len(commitData.ParentHashes) == 1 {
147+
parent1 = hashToIndex[commitData.ParentHashes[0]]
148+
parent2 = parentNone
149+
} else if len(commitData.ParentHashes) == 2 {
150+
parent1 = hashToIndex[commitData.ParentHashes[0]]
151+
parent2 = hashToIndex[commitData.ParentHashes[1]]
152+
} else if len(commitData.ParentHashes) > 2 {
153+
parent1 = hashToIndex[commitData.ParentHashes[0]]
154+
parent2 = uint32(len(largeEdges)) | parentOctopusUsed
155+
for _, parentHash := range commitData.ParentHashes[1:] {
156+
largeEdges = append(largeEdges, hashToIndex[parentHash])
157+
}
158+
largeEdges[len(largeEdges)-1] |= parentLast
159+
}
160+
161+
if err = binary.WriteUint32(e, parent1); err == nil {
162+
err = binary.WriteUint32(e, parent2)
163+
}
164+
if err != nil {
165+
return
166+
}
167+
168+
unixTime := uint64(commitData.When.Unix())
169+
unixTime |= uint64(commitData.Generation) << 34
170+
if err = binary.WriteUint64(e, unixTime); err != nil {
171+
return
172+
}
173+
}
174+
return
175+
}
176+
177+
func (e *Encoder) encodeLargeEdges(largeEdges []uint32) (err error) {
178+
for _, parent := range largeEdges {
179+
if err = binary.WriteUint32(e, parent); err != nil {
180+
return
181+
}
182+
}
183+
return
184+
}
185+
186+
func (e *Encoder) encodeChecksum() error {
187+
_, err := e.Write(e.hash.Sum(nil)[:20])
188+
return err
189+
}

0 commit comments

Comments
 (0)