diff --git a/.gitignore b/.gitignore index b25c15b..f25b919 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ *~ +TEST_py* +fat-test*/ diff --git a/git-fat b/git-fat index 135f4e2..412146a 100755 --- a/git-fat +++ b/git-fat @@ -1,7 +1,6 @@ #!/usr/bin/env python # -*- mode:python -*- - -from __future__ import print_function, with_statement +from __future__ import print_function, with_statement,unicode_literals import sys import hashlib @@ -15,10 +14,27 @@ import threading import time import collections -if not type(sys.version_info) is tuple and sys.version_info.major > 2: - sys.stderr.write('git-fat does not support Python-3 yet. Please use python2.\n') - sys.exit(1) - +if sys.version_info[0] > 2: + unicode = str +else: + from io import open + +def touni(s,encoding='utf8'): + """Automate unicode conversion""" + if isinstance(s,(str,unicode)): + return s + if hasattr(s,'decode'): + return s.decode(encoding) + raise ValueError('Cound not decode') + +def tobytes(s,encoding='utf8'): + """Automatic byte conversion""" + if isinstance(s,bytes): + return s + if hasattr(s,'encode'): + return s.encode(encoding) + raise ValueError('Could not encode') + try: from subprocess import check_output del check_output @@ -90,6 +106,7 @@ def difftreez_reader(input): newread = input.read(BLOCK_SIZE) if not newread: break + newread = touni(newread) partial += newread while True: head, sep, partial = partial.partition('\0') @@ -114,7 +131,7 @@ def gitconfig_get(name, file=None): elif p.returncode: return gitconfig_get(name) else: - return output + return touni(output) def gitconfig_set(name, value, file=None): args = ['git', 'config'] if file is not None: @@ -128,16 +145,18 @@ class GitFat(object): self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore try: self.gitroot = subprocess.check_output('git rev-parse --show-toplevel'.split()).strip() + self.gitroot = touni(self.gitroot) except subprocess.CalledProcessError: sys.exit(1) self.gitdir = subprocess.check_output('git rev-parse --git-dir'.split()).strip() + self.gitdir = touni(self.gitdir) self.objdir = os.path.join(self.gitdir, 'fat', 'objects') if os.environ.get('GIT_FAT_VERSION') == '1': self.encode = self.encode_v1 else: self.encode = self.encode_v2 def magiclen(enc): - return len(enc(hashlib.sha1('dummy').hexdigest(), 5)) + return len(enc(hashlib.sha1(b'dummy').hexdigest(), 5)) self.magiclen = magiclen(self.encode) # Current version self.magiclens = [magiclen(enc) for enc in [self.encode_v1, self.encode_v2]] # All prior versions def setup(self): @@ -164,7 +183,6 @@ class GitFat(object): self.verbose('Pushing to %s' % (remote)) else: self.verbose('Pulling from %s' % (remote)) - cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-'] rshopts = '' if ssh_user: @@ -181,7 +199,7 @@ class GitFat(object): cmd += [remote + '/', self.objdir + '/'] return cmd def revparse(self, revname): - return subprocess.check_output(['git', 'rev-parse', revname]).strip() + return touni(subprocess.check_output(['git', 'rev-parse', revname]).strip()) def encode_v1(self, digest, bytes): 'Produce legacy representation of file to be stored in repository.' return '#$# git-fat %s\n' % (digest,) @@ -190,6 +208,7 @@ class GitFat(object): return '#$# git-fat %s %20d\n' % (digest, bytes) def decode(self, string, noraise=False): cookie = '#$# git-fat ' + string = touni(string) if string.startswith(cookie): parts = string[len(cookie):].split() digest = parts[0] @@ -217,7 +236,7 @@ class GitFat(object): return False, None # read file try: - digest, bytes = self.decode_stream(open(fname)) + digest, bytes = self.decode_stream(open(fname,'rb')) except IOError: return False, None if isinstance(digest, str): @@ -240,7 +259,7 @@ class GitFat(object): try: ishanging = False cached = False # changes to True when file is cached - with os.fdopen(fd, 'w') as cache: + with os.fdopen(fd, 'wb') as cache: outstream = cache firstblock = True for block in readblocks(instream): @@ -265,7 +284,7 @@ class GitFat(object): os.rename(tmpname, objfile) self.verbose('git-fat filter-clean: caching to %s' % objfile) cached = True - outstreamclean.write(self.encode(digest, bytes)) + outstreamclean.write(tobytes(self.encode(digest, bytes))) finally: if not cached: os.remove(tmpname) @@ -276,22 +295,30 @@ class GitFat(object): version of the file on stdin and produces the "clean" (repository) version on stdout. ''' self.setup() - self.filter_clean(sys.stdin, sys.stdout) + if hasattr(sys.stdin,'buffer'): + stdin,stdout = sys.stdin.buffer,sys.stdout.buffer + else: + stdin,stdout = sys.stdin,sys.stdout + self.filter_clean(stdin, stdout) def cmd_filter_smudge(self): self.setup() - result, bytes = self.decode_stream(sys.stdin) + if hasattr(sys.stdin,'buffer'): + stdin,stdout = sys.stdin.buffer,sys.stdout.buffer + else: + stdin,stdout = sys.stdin,sys.stdout + result, bytes = self.decode_stream(stdin) if isinstance(result, str): # We got a digest objfile = os.path.join(self.objdir, result) try: - cat(open(objfile), sys.stdout) + cat(open(objfile,'rb'), stdout) self.verbose('git-fat filter-smudge: restoring from %s' % objfile) except IOError: # file not found self.verbose('git-fat filter-smudge: fat object missing %s' % objfile) - sys.stdout.write(self.encode(result, bytes)) # could leave a better notice about how to recover this file + stdout.write(tobytes(self.encode(result, bytes))) # could leave a better notice about how to recover this file else: # We have an iterable over the original input. self.verbose('git-fat filter-smudge: not a managed file') - cat_iter(result, sys.stdout) + cat_iter(result, stdout) def catalog_objects(self): return set(os.listdir(self.objdir)) def referenced_objects(self, rev=None, all=False): @@ -304,15 +331,18 @@ class GitFat(object): p1 = subprocess.Popen(['git','rev-list','--objects',rev], stdout=subprocess.PIPE) def cut_sha1hash(input, output): for line in input: - output.write(line.split()[0] + '\n') + line = touni(line) + line = line.split()[0] + '\n' + output.write(tobytes(line)) output.close() # ...`cat-file --batch-check` filters for git-fat object candidates in bulk... p2 = subprocess.Popen(['git','cat-file','--batch-check'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) def filter_gitfat_candidates(input, output): for line in input: + line = touni(line) objhash, objtype, size = line.split() if objtype == 'blob' and int(size) in self.magiclens: - output.write(objhash + '\n') + output.write(tobytes(objhash + '\n')) output.close() # ...`cat-file --batch` provides full contents of git-fat candidates in bulk p3 = subprocess.Popen(['git','cat-file','--batch'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) @@ -326,11 +356,11 @@ class GitFat(object): metadata_line = p3.stdout.readline() if not metadata_line: break # EOF - objhash, objtype, size_str = metadata_line.split() + objhash, objtype, size_str = touni(metadata_line).split() size, bytes_read = int(size_str), 0 # We know from filter that item is a candidate git-fat object and # is small enough to read into memory and process - content = '' + content = b'' while bytes_read < size: data = p3.stdout.read(size - bytes_read) if not data: @@ -338,7 +368,7 @@ class GitFat(object): content += data bytes_read += len(data) try: - fathash = self.decode(content)[0] + fathash = touni(self.decode(content)[0]) referenced.add(fathash) except GitFat.DecodeError: pass @@ -361,7 +391,8 @@ class GitFat(object): 'generator for all orphan placeholders in the working tree' if not patterns or patterns == ['']: patterns = ['.'] - for fname in subprocess.check_output(['git', 'ls-files', '-z'] + patterns).split('\x00')[:-1]: + for fname in subprocess.check_output(['git', 'ls-files', '-z'] + patterns).split(b'\x00')[:-1]: + fname = touni(fname) digest = self.decode_file(fname)[0] if digest: yield (digest, fname) @@ -398,7 +429,7 @@ class GitFat(object): cmd = self.get_rsync_command(push=True) self.verbose('Executing: %s' % ' '.join(cmd)) p = subprocess.Popen(cmd, stdin=subprocess.PIPE) - p.communicate(input='\x00'.join(files)) + p.communicate(input=b'\x00'.join(tobytes(file) for file in files)) if p.returncode: sys.exit(p.returncode) def checkout(self, show_orphans=False): @@ -442,7 +473,7 @@ class GitFat(object): cmd = self.get_rsync_command(push=False) self.verbose('Executing: %s' % ' '.join(cmd)) p = subprocess.Popen(cmd, stdin=subprocess.PIPE) - p.communicate(input='\x00'.join(files)) + p.communicate(input=b'\x00'.join(tobytes(file) for file in files)) if p.returncode: sys.exit(p.returncode) self.checkout() @@ -480,7 +511,7 @@ class GitFat(object): for obj in self.catalog_objects(): fname = os.path.join(self.objdir, obj) h = hashlib.new('sha1') - for block in readblocks(open(fname)): + for block in readblocks(open(fname,'rb')): h.update(block) data_hash = h.hexdigest() if obj != data_hash: @@ -507,7 +538,7 @@ class GitFat(object): This truncates to one hash per line. """ for line in input: - output.write(line[:40] + '\n') + output.write(line[:40] + b'\n') output.close() revlist = subprocess.Popen(['git', 'rev-list', '--all', '--objects'], stdout=subprocess.PIPE, bufsize=-1) objcheck = subprocess.Popen(['git', 'cat-file', '--batch-check'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=-1) @@ -516,6 +547,7 @@ class GitFat(object): numblobs = 0; numlarge = 1 # Build dict with the sizes of all large blobs for line in objcheck.stdout: + line = touni(line) objhash, blob, size = line.split() if blob != 'blob': continue @@ -555,6 +587,7 @@ class GitFat(object): lsfiles = subprocess.Popen(['git', 'ls-files', '-s'], stdout=subprocess.PIPE) updateindex = subprocess.Popen(['git', 'update-index', '--index-info'], stdin=subprocess.PIPE) for line in lsfiles.stdout: + line = touni(line) mode, sep, tail = line.partition(' ') blobhash, sep, tail = tail.partition(' ') stageno, sep, tail = tail.partition('\t') @@ -576,24 +609,24 @@ class GitFat(object): hashobject.stdin.close() filterclean = threading.Thread(target=dofilter) filterclean.start() - cleanedobj = hashobject.stdout.read().rstrip() + cleanedobj = touni(hashobject.stdout.read()).rstrip() catfile.wait() hashobject.wait() filterclean.join() mkdir_p(os.path.dirname(hashfile)) open(hashfile, 'w').write(cleanedobj + '\n') - updateindex.stdin.write('%s %s %s\t%s\n' % (mode, cleanedobj, stageno, filename)) + updateindex.stdin.write(tobytes('%s %s %s\t%s\n' % (mode, cleanedobj, stageno, filename))) if manage_gitattributes: try: - mode, blobsha1, stageno, filename = subprocess.check_output(['git', 'ls-files', '-s', '.gitattributes']).split() - gitattributes_lines = subprocess.check_output(['git', 'cat-file', 'blob', blobsha1]).splitlines() + mode, blobsha1, stageno, filename = touni(subprocess.check_output(['git', 'ls-files', '-s', '.gitattributes'])).split() + gitattributes_lines = touni(subprocess.check_output(['git', 'cat-file', 'blob', blobsha1])).splitlines() except ValueError: # Nothing to unpack, thus no file mode, stageno = '100644', '0' gitattributes_lines = [] gitattributes_extra = ['%s filter=fat -text' % line.split()[0] for line in filelist] hashobject = subprocess.Popen(['git', 'hash-object', '-w', '--stdin'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) - stdout, stderr = hashobject.communicate('\n'.join(gitattributes_lines + gitattributes_extra) + '\n') - updateindex.stdin.write('%s %s %s\t%s\n' % (mode, stdout.strip(), stageno, '.gitattributes')) + stdout, stderr = hashobject.communicate(b'\n'.join(tobytes(l) for l in gitattributes_lines + gitattributes_extra) + b'\n') + updateindex.stdin.write(tobytes('%s %s %s\t%s\n' % (mode, stdout.strip(), stageno, '.gitattributes'))) updateindex.stdin.close() lsfiles.wait() updateindex.wait() diff --git a/run_test.py b/run_test.py new file mode 100755 index 0000000..d41fc9f --- /dev/null +++ b/run_test.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# -*- mode:python -*- +""" +Simple runner for test.sh but it modifies it to explicily test python2 and 3 +""" +from __future__ import print_function,unicode_literals +import sys +import os +import shutil +import subprocess + +if sys.version_info[0] <= 2: + from io import open + + +# Build a dead-simple CLI. Not worth argparse, etc +help="""\ +Run tests with specific versions + + $ ./run_test.py # Both Python 2 and 3 + $ ./run_test.py 2 # Only python2 + $ ./run_test.py 3 # Only python3 + +Any argument specified will be appended to the git-fat shebang. For example + + $ ./run_test.py 2.6 + +will change the shebang to + + #!/usr/bin/env python2.6 + +Or specify more than one: + + $ ./run_test.py 2 3 2.6 + +""" +vers = sys.argv[1:] +if len(vers) == 0: + vers = ['2','3'] + +if '-h' in vers or '--help' in vers: + print(help) + sys.exit() + +for ver in vers: + print('-='*20) + print('Testing %s' % ver) + print('-_'*20) + + testdir = 'TEST_py%s' % ver + testdir = os.path.abspath(testdir) + + # Delete the prior test dir and make a new one + if os.path.isdir(testdir): + shutil.rmtree(testdir) + os.makedirs(testdir) + + shebang = '#!/usr/bin/env python%s\n' % ver + pathline = 'export PATH=%s:$PATH\n' % testdir + + testfile = os.path.join(testdir,'test%s.sh' % ver) + testfileR = os.path.join(testdir,'test-retroactive%s.sh' % ver) + fatfile = os.path.join(testdir,'git-fat') + + # Write the files. Do not use multiple with's to support 2.6 + with open('git-fat','rt') as infile: + with open(fatfile,'wt') as outfile: + infile.readline() # Skip shebang + outfile.write(shebang) + outfile.write(infile.read()) + + with open('test.sh','rt') as infile: + with open(testfile,'wt') as outfile: + outfile.write(infile.readline()) # copy shebang + outfile.write(pathline) + outfile.write(infile.read()) + + with open('test-retroactive.sh','rt') as infile: + with open(testfileR,'wt') as outfile: + outfile.write(infile.readline()) # copy shebang + outfile.write(pathline) + outfile.write(infile.read()) + + os.chmod(fatfile, 509) + os.chmod(testfile, 509) + os.chmod(testfileR, 509) + + try: + subprocess.check_call(['./test%s.sh' % ver],cwd=testdir) + except subprocess.CalledProcessError as err: + print('F'*60) + print(err,file=sys.stderr) + print('FAILED python %s'%ver,file=sys.stderr) + sys.exit(1) + + print('###################') + print('###### RETRO ######') + print('###################') + + try: + subprocess.check_call(['./test-retroactive%s.sh' % ver],cwd=testdir) + except subprocess.CalledProcessError as err: + print('F'*60) + print(err,file=sys.stderr) + print('FAILED RETRO python %s'%ver,file=sys.stderr) + sys.exit(1) + + + + diff --git a/test.sh b/test.sh index 0ee63ea..500481a 100755 --- a/test.sh +++ b/test.sh @@ -19,16 +19,18 @@ git commit -m'Initial fat repository' ln -s /oe/dss-oe/dss-add-ons-testing-build/deploy/licenses/common-licenses/GPL-3 c git add c git commit -m'add broken symlink' -echo 'fat content a' > a.fat +#echo 'fat content a' > a.fat +# Encode random bytes so it can't be interpreted as a string +echo "AEi2RiChMJ+N0hR+AJs89QDmu4D3kaKDnmlgy0YJqm8A"|base64 --decode > a.fat git add a.fat git commit -m'add a.fat' echo 'fat content b' > b.fat git add b.fat git commit -m'add b.fat' -echo 'revise fat content a' > a.fat +#echo 'revise fat content a' > a.fat +echo "AEmBARZ2es0t0mPeXOLAjQCd4QlgPiKFJz9rPqqBiGQA"|base64 --decode > a.fat git commit -am'revise a.fat' git fat push - cd .. git clone fat-test fat-test2 cd fat-test2 @@ -47,7 +49,11 @@ then fi git fat init git fat pull -- 'a.fa*' -cat a.fat +t=$(cat a.fat | base64 ) +if [ "$t" != "AEmBARZ2es0t0mPeXOLAjQCd4QlgPiKFJz9rPqqBiGQA" ]; then + echo 'ERROR: "git fat pull" did not return binary file' + exit 1 +fi echo 'file which is committed and removed afterwards' > d git add d git commit -m'add d with normal content'