diff --git a/Rakefile b/Rakefile index 3b2c241..f5b6453 100644 --- a/Rakefile +++ b/Rakefile @@ -26,6 +26,13 @@ namespace :test do sandbox_dir = File.join "test", "tmp" rm_rf sandbox_dir end + + + desc "run a DEBUG test run after cleaning" + task :debug => :clean do + ENV["DEBUG"] = "1" + Rake::Task["test"].invoke + end end task :default => :test diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index 2da318e..2823b72 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -2,77 +2,488 @@ # Class to extract files and folders from a git repository, while maintaining # The git history. -class CodeExtractor - attr_reader :extraction - - def self.run - new.extract +module CodeExtractor + def run + Runner.new.run end + module_function :run - def initialize(extraction = 'extractions.yml') - @extraction = YAML.load_file(extraction) - @extraction[:upstream_branch] ||= "master" + class Config + def initialize(config_file = 'extractions.yml') + @config = YAML.load_file(config_file) - missing = %i[name destination upstream upstream_name extractions].reject { |k| @extraction[k] } - raise ArgumentError, "#{missing.map(&:inspect).join(", ")} key(s) missing" if missing.any? + @config[:destination] = File.expand_path(@config[:destination]) + @config[:upstream_branch] ||= "master" - @extraction[:destination] = File.expand_path(@extraction[:destination]) - end + validate! + end - def extract - puts @extraction - clone - extract_branch - remove_remote - remove_tags - filter_branch - end + def [](key) + @config[key] + end - def clone - return if Dir.exist?(@extraction[:destination]) - puts 'Cloning…' - system "git clone -o upstream #{@extraction[:upstream]} #{@extraction[:destination]}" - end + def inspect + @config.inspect + end + alias to_s inspect - def extract_branch - puts 'Extracting Branch…' - Dir.chdir(@extraction[:destination]) - branch = "extract_#{@extraction[:name]}" - `git checkout #{@extraction[:upstream_branch]}` - `git fetch upstream && git rebase upstream/master` - if system("git branch | grep #{branch}") - `git branch -D #{branch}` - end - `git checkout -b #{branch}` - extractions = @extraction[:extractions].join(' ') - `git rm -r #{extractions}` - `git commit -m "Extract #{@extraction[:name]}"` + def validate! + missing = %i[name destination upstream upstream_name extractions].reject { |k| @config[k] } + raise ArgumentError, "#{missing.map(&:inspect).join(", ")} key(s) missing" if missing.any? + end end - def remove_remote - `git remote rm upstream` - end + class GitProject + attr_reader :name, :url, :git_dir, :new_branch, :source_branch, :target_name, :upstream_name + + def initialize name, url + @name = name + @url = url + end + + def clone_to destination, origin_name = "upstream" + @git_dir ||= destination + + if Dir.exist?(git_dir) + raise "Not a git dir!" unless system "git -C #{git_dir} status" + else + puts 'Cloning…' + system "git clone --origin #{origin_name} #{url} #{git_dir}" + end + end + + def extract_branch source_branch, new_branch, extractions + puts 'Extracting Branch…' + @new_branch = new_branch + @source_branch = source_branch + Dir.chdir git_dir do + `git checkout #{source_branch}` + `git fetch upstream && git rebase upstream/#{source_branch}` + if system("git branch | grep #{new_branch}") + `git branch -D #{new_branch}` + end + `git checkout -b #{new_branch}` + `git rm -r #{extractions}` + `git commit -m "Extract #{name}"` + end + end + + def remove_remote + Dir.chdir git_dir do + `git remote rm upstream` + end + end + + def remove_tags + puts 'removing tags' + Dir.chdir git_dir do + tags = `git tag` + tags.split.each do |tag| + puts "Removing tag #{tag}" + `git tag -d #{tag}` + end + end + end + + def extract_commits extractions, upstream_name + Dir.chdir git_dir do + `time git filter-branch --index-filter ' + git read-tree --empty + git reset $GIT_COMMIT -- #{extractions} + ' #{msg_filter upstream_name} -- #{source_branch} -- #{extractions}` + end + end + + # Three step process to filter out the commits we want in three passes: + # + # - Move code we want to keep into a separate tmp dir (using @prune_script) + # - Prune anything that isn't in that subdirectory + # - Move the tmp directory back into the root of the directory + # + # + # Note: We don't use `--subdirectory-filter` here as it will remove merge + # commits, which we don't want. + # + def prune_commits extractions, upstream_name + puts "Pruning commits…" + + @upstream_name ||= upstream_name + + build_prune_script extractions + + Dir.chdir git_dir do + `git checkout -b #{prune_branch} #{@source_branch}` + `git filter-branch -f --prune-empty --tree-filter #{@prune_script} #{msg_filter upstream_name} HEAD` + `git filter-branch -f --prune-empty --index-filter ' + git read-tree --empty + git reset $GIT_COMMIT -- #{@keep_directory} + ' HEAD` + `git filter-branch -f --prune-empty --tree-filter 'mv #{@keep_directory}/* .' HEAD` + end + end + + def add_target_remote target_name, target_remote + puts "Add target repo as a remote…" + @target_name = target_name + + Dir.chdir git_dir do + `git remote add #{target_remote_name} #{target_remote}` + `git fetch #{target_remote_name}` + end + end + + # "Inject" commits one repo's branch back into the target repo's + # + # Assuming the target remote has been added (see add_target_remote), this + # method does so by doing the following to achieve the "injected" history: + # + # 1. Filters commits that already exist in the target repo. Additionally, + # the last commit that is shared between the two is actually used as the + # "root" commit for the injected commits. The rest are assumed to be new + # from the new repository. + # + # The "root" commit has it's commit message modified to reflect this + # change. + # + # The other part of the filter branch also applies the changes so they + # exist within in context the target codebase, and not just in isolation + # for itself (hence the `git reset #{@reference_target_branch} -- .` bit). + # The changes from the upstream repo are then applied on top of the + # existing code base. + # + # 2. A new branch is checked out that is based off the target remote's + # target branch, but does not track that branch. + # + # 3. The commits that have been filtered are cherry-picked on to this new + # branch, and the "root" commit assumes the parent of the current HEAD of + # the target remote's (master) branch + # + def inject_commits target_base_branch + puts "Injecting commits…" + + target_base_branch ||= 'master' + @reference_target_branch = "#{target_remote_name}/#{target_base_branch}" + + Dir.chdir git_dir do + `git checkout #{prune_branch}` + # special commit that will get renamed re-worded to: + # + # Re-insert extractions from #{upstream_name} + # + last_extracted_commit = previously_extracted_commits.first + first_injected_msg = `git show -s --format="%s%n%n%b" #{last_extracted_commit}` + first_injected_msg = first_injected_msg.lines.reject { |line| + line.include? commit_msg_filter + }.join + + # Fetch "committer data" from the `last_extracted_commit` in a bar + # delimited format. Fetches the following data from the commit: + # + # - Author Name + # - Author Email + # - Author Date of commit + # - Committer Name + # - Committer Email + # - Committer Date of commit + # + committer_data = `git show -s --format="%an|%ae|%ad|%cn|%ce|%cd" #{last_extracted_commit}`.split("|") + + first_injected_msg.prepend <<-COMMIT_MSG.gsub(/^ {10}/, '') + Re-insert extractions from #{upstream_name} + + *** Original Commit message shown below *** + + Author: #{committer_data[0]} #{committer_data[1]} #{committer_data[2]} + Committer: #{committer_data[3]} #{committer_data[4]} #{committer_data[5]} + COMMIT_MSG + + File.write File.expand_path("../LAST_EXTRACTED_COMMIT_MSG", git_dir), first_injected_msg + + `git checkout --no-track -b #{inject_branch} #{prune_branch}` + `time git filter-branch -f --commit-filter ' + export was_extracted="#{previously_extracted_commits.map {|c| "#{c}|" }.join}" + echo "#{last_extracted_commit}" > #{File.expand_path("../LAST_EXTRACTED_COMMIT", git_dir)} + if [ "$GIT_COMMIT" = "#{last_extracted_commit}" ] || [ -n "${was_extracted##*$GIT_COMMIT|*}" ]; then + git commit-tree "$@"; + else + skip_commit "$@"; + fi + ' --env-filter ' + if [ "$GIT_COMMIT" = "#{last_extracted_commit}" ]; then + GIT_AUTHOR_NAME=$(git config user.name) + GIT_AUTHOR_EMAIL=$(git config user.email) + GIT_COMMITTER_NAME=$(git config user.name) + GIT_COMMITTER_EMAIL=$(git config user.email) + fi + ' --msg-filter ' + if [ "$GIT_COMMIT" = "#{last_extracted_commit}" ]; then + cat #{File.expand_path File.join("..", "LAST_EXTRACTED_COMMIT_MSG"), git_dir} + else + cat - + fi + ' -- #{inject_branch}` + + # Old (bad: doesn't handle merges) + # + # `git checkout --no-track -b #{inject_branch} #{@reference_target_branch}` + # `git cherry-pick ..#{prune_branch}` + # + # + # Attempted #1 (not working, but uses older `git` methods) + # + # `git checkout --orphan #{inject_branch}` + # `git commit -m "Dummy Init commit"` + # orphan_commit = `git log --pretty="%H" -n1`.chomp + # prune_first = `git log --pretty="%H" --reverse -n1 #{prune_branch}`.chomp + # `git rebase --onto #{orphan_commit} #{prune_first} #{inject_branch}` + # `git replace #{orphan_commit} #{@reference_target_branch}` + # + # + # Better + # + # Ref: git rebase --onto code_extractor_inject_the_extracted --root + # + + # Grafting Testing + # + graft_parent_commit = false + if graft_parent_commit + # Attempt #1 + # + # orphan_branch = "code_extractor_orphan_branch_#{Time.now.to_i}" + # `git checkout --orphan #{orphan_branch}` + # `git rm -rf ./*` + # `git commit --allow-empty -m "Empty Commit"` + # `git rebase --rebase-merges=rebase-cousins --root --onto #{orphan_branch} #{inject_branch}` + # + # Attempt #2 + # + # `git replace --graft fe8c6c6228 250a0b46fb05ad5891d442745b93543c38ee0914` + + `git checkout -b old_base #{graft_parent_commit}` + `git checkout #{inject_branch}` # TODO: needed? + `git rebase --rebase-merges=rebase-cousins --onto old_base #{inject_branch}` + `git merge --no-ff #{@reference_target_branch}` + else + `git rebase --rebase-merges=rebase-cousins --root --onto #{@reference_target_branch} #{inject_branch}` + end + end + end - def remove_tags - puts 'removing tags' - tags = `git tag` - tags.split.each do |tag| - puts "Removing tag #{tag}" - `git tag -d #{tag}` + def run_extra_cmds cmds + Dir.chdir git_dir do + cmds.each { |cmd| system cmd } if cmds + end + end + + private + + def target_remote_name + @target_remote_name ||= "code_extractor_target_for_#{name}" + end + + def prune_branch + @prune_branch ||= "code_extractor_prune_#{name}" + end + alias prune_commits_remote prune_branch + + def inject_branch + @inject_branch ||= "code_extractor_inject_#{name}" + end + alias inject_remote inject_branch + + # Given a list of extractions, build a script that will move a list of + # files (extractions) from their current location in a given commit to a + # unused directory. + # + # More complicated than it looks, this will be used as part of a three part + # `git filter-branch` to: + # + # 1. move extractable files into a subdirectory with `--tree-filter` + # 2. only keep commits for files moved into that subdirectory + # 3. make the subdirectory the new project root. + # + # For consistency, we want to keep the subdirectories' structure in the + # same line as what was there previously, so this script helps do that, and + # also creates directories/files when they don't exist. + # + # Returns `true` at the end of the script incase the last `mv` fails (the + # source doesn't exist in this commit, for example) + # + def build_prune_script extractions + require 'set' + require 'fileutils' + + @keep_directory = "code_extractor_git_keeps_#{Time.now.to_i}" + git_log_follow = "git log --name-only --format=format: --follow" + prune_mkdirs = Set.new + prune_mvs = [] + + Dir.chdir git_dir do + `git checkout #{source_branch}` + extractions.each do |file_or_dir| + if Dir.exist? file_or_dir + files = Dir["#{file_or_dir}/**/*"] + else + files = [file_or_dir] + end + + files.each do |extraction_file| + raw_file_history = `#{git_log_follow} -- #{extraction_file}` + file_and_ancestors = raw_file_history.split("\n").uniq + + file_and_ancestors.reject! { |file| file.length == 0 } + + file_and_ancestors.each do |file| + file_dir = File.dirname file + prune_mkdirs.add file_dir + prune_mvs << [file, "#{@keep_directory}/#{file_dir}"] + end + end + end + end + + @prune_script = File.join Dir.pwd, "code_extractor_#{name}_prune_script.sh" + + File.open @prune_script, "w" do |script| + prune_mkdirs.each do |dir| + script.puts "mkdir -p #{File.join @keep_directory, dir}" + end + + script.puts + prune_mvs.each do |(file, dir)| + script.puts "mv #{file} #{dir} 2>/dev/null" + end + + script.puts + script.puts "true" + end + FileUtils.chmod "+x", @prune_script + end + + def commit_msg_filter + @commit_msg_filter ||= "(transferred from #{target_name}@" + end + + def previously_extracted_commits + return @previously_extracted_commits if defined? @previously_extracted_commits + + @previously_extracted_commits = `git log --pretty="%H" --grep="#{commit_msg_filter}"`.lines + + # Fallback if the extracted code didn't use `code-extractor` + # + # Effectivelly, we just look for commits that exist in the upstream + # branch with the same first line of the commit message, and if that is + # found, double check the file changes are the same. + # + # Not fool proof, but the SHAs will be different unfortunately, so can't + # match on that... + # + first_commit = @previously_extracted_commits.first + if first_commit + @previously_extracted_commits.each(&:chomp!) + else + # Long method... + # + # Test to see if any commit on the target remote include the same full + # message as each of the commits. + # + # Check first by first line of commit for speed, and return just the + # matching git SHAs and Author name. If some exist, then match by full + # commit msg. + # + # fetch author name (%an) and commit sha (%H), using a '|' delimeter + # + @previously_extracted_commits = `git log --pretty="%an|%H"`.lines.each(&:chomp!) + @previously_extracted_commits.map! {|line| line.split "|" } + @previously_extracted_commits.tap do |commits| + commits.select! do |(author, commit)| + # Make sure to escape quotes in commit messages + #upstream_msg = `git show -s --format="%s" #{commit}`.gsub /"/, '\"' + #target_commits = `git log --pretty="%H" \ + # --author="#{author}" \ + # --grep="#{upstream_msg.chomp}" \ + # #{@reference_target_branch}`.lines.each(&:chomp!) + # + + target_commits = `git log --pretty="%H" --fixed-strings \ + --author="#{author}" \ + --grep="$(git show -s --format="%s" #{commit} | sed 's/"/\\\\"/g')" \ + #{@reference_target_branch}`.lines.each(&:chomp!) + + if target_commits.empty? + false + else + upstream_full_msg = `git show -s --format="%s%n%n%b" #{commit}` + upstream_full_msg.gsub! /^\(transferred from #{upstream_name}@.*$/, '' + + # TODO: Change this to a `.one?` check, and if this fails, and + # there is more than one, then compare changed file base names... + # which is harder still... + target_commits.any? do |target_commit| + target_full_msg = `git show -s --format="%s%n%n%b" #{target_commit}` + target_full_msg.gsub! /^\(transferred from #{upstream_name}@.*$/, '' + upstream_full_msg.strip == target_full_msg.strip + end + end + end + commits.map!(&:last) + end + end + end + + def msg_filter upstream_name + <<-MSG_FILTER.gsub(/^ {8}/, '').chomp + --msg-filter ' + cat - + echo + echo + echo "(transferred from #{upstream_name}@$GIT_COMMIT)" + ' + MSG_FILTER end end - def filter_branch - extractions = @extraction[:extractions].join(' ') - `time git filter-branch --index-filter ' - git read-tree --empty - git reset $GIT_COMMIT -- #{extractions} - ' --msg-filter ' - cat - - echo - echo - echo "(transferred from #{@extraction[:upstream_name]}@$GIT_COMMIT)" - ' -- #{@extraction[:upstream_branch]} -- #{extractions}` + class Runner + def initialize config = nil + @config = config || Config.new + @source_project = GitProject.new @config[:name], @config[:upstream] + end + + # Either run `.reinsert` or `.extract` + # + # The `.reinsert` method will eject with `nil` unless the config setting to + # run in that mode is set + # + def run + puts @config + + @source_project.clone_to @config[:destination] + @source_project.extract_branch @config[:upstream_branch], "extract_#{@config[:name]}", extractions + @source_project.remove_remote + @source_project.remove_tags + + reinsert || extract + end + + def extractions + @extractions ||= @config[:extractions].join(' ') + end + + def extract + @source_project.extract_commits extractions, @config[:upstream_name] + end + + def reinsert + return unless @config[:reinsert] + + @source_project.prune_commits @config[:extractions], @config[:upstream_name] + @source_project.run_extra_cmds @config[:extra_cmds] + @source_project.add_target_remote @config[:target_name], @config[:target_remote] + @source_project.inject_commits @config[:target_base_branch] + + true + end end end diff --git a/test/code_extractor_reinsert_test.rb b/test/code_extractor_reinsert_test.rb new file mode 100644 index 0000000..583c332 --- /dev/null +++ b/test/code_extractor_reinsert_test.rb @@ -0,0 +1,289 @@ +require 'test_helper' + +class CodeExtractorReinsertTest < CodeExtractor::TestCase + def test_unextract_an_extraction + # original extraction to work off of, in which we "un-extract" this later + create_base_repo + set_extractions ["foo"] + run_extraction + + # Perform updates to extracted repo to simulate changes since extraction + perform_merges_of_extracted_code + apply_new_commits_on_extracted_repo + update_extraction_hash + + # Run new extraction, with some extra commits added to the new repo that + # has been extracted previously + # + # This next line will run the actual extraction we are testing + # + # aka: updated commits and puts 'lib/foo' back into the original repo + run_extraction + + in_git_dir do + assert_commits [ + "Move foo/ into lib/", + "add new baz", + "update bar content", + "Re-insert extractions from MyOrg/extracted_repo", + "Merged branch 'extract_my_extractions' into master", + "Extract my_extractions", + "Commit #3", + "add Bar content", + "Initial Commit" + ] + + refute Dir.exist? "foo" + refute File.exist? "README.md" + + assert File.exist? "qux" + assert File.exist? "lib/foo/bar" + assert File.exist? "lib/foo/baz" + end + end + + def test_reinsert_when_code_extractor_was_not_used + # original extraction to work off of, in which we "un-extract" this later + create_base_repo + set_extractions ["foo"] + run_extraction + + # Perform updates to extracted repo to simulate changes since extraction + perform_merges_of_extracted_code + + # Remove `(transferred from Org/repo@-----)` lines from extracted commits + # + # This is to simulate a case where code_extractor wasn't used to handle the + # extraction + + ################################################## + + # TOO MANY HOURS WASTED trying to get Rugged to do a rebase... + # + # Gave up and did a `git filter-branch --msg-filter`... + # + + # puts + # current_commit = destination_repo.head.target + # puts current_commit.message.lines.reject {|l| l.include? "transferred" }.join + # puts "rebasing" + # rebase = Rugged::Rebase.new @destination_repo, "master", "HEAD^"#, :inmemory => true + # while rebase_commit = rebase.next do + # puts rebase_commit[:id] + # # destination_repo.index.write_tree + # # commit = destination_repo.head.target + # commit = Rugged::Commit.lookup @destination_repo, rebase_commit[:id] + # # puts commit.message + # # puts commit.committer.inspect + # # puts + # # # puts "inmemory_index:" + # # # puts rebase.inmemory_index + # puts "new message" + # puts commit.message.lines.reject {|l| l.include? "transferred" }.join + # puts + # commit_hash = { + # # :committer => commit.committer, + # :message => commit.message.lines.reject {|l| l.include? "transferred" }.join + # } + # # rebase.commit(commit_hash) + # # commit.amend commit_hash + # rebase.commit commit.to_hash + # end + # puts + # puts "rebase_commit:" + # puts rebase_commit + # puts + + # rebase.finish({:name => "system", :email => "system"}) + + + # start_commit = destination_repo.last_commit + # sorting = Rugged::SORT_TOPO # aka: sort like git-log + # actual_commits = destination_repo.walk(start_commit, sorting).each do |c| + # puts "#{c.oid} #{c.message.lines.first.chomp}" + # end + + ################################################## + + Dir.chdir extracted_dir do + `git filter-branch -f --msg-filter ' + cat - | grep -v "(transferred from " + ' -- master` + end + + destination_repo.checkout "extract_my_extractions" + + apply_new_commits_on_extracted_repo + update_extraction_hash + run_extraction + + in_git_dir do + assert_commits [ + "Move foo/ into lib/", + "add new baz", + "update bar content", + "Re-insert extractions from MyOrg/extracted_repo", + "Merged branch 'extract_my_extractions' into master", + "Extract my_extractions", + "Commit #3", + "add Bar content", + "Initial Commit" + ] + + refute Dir.exist? "foo" + refute File.exist? "README.md" + + assert File.exist? "qux" + assert File.exist? "lib/foo/bar" + assert File.exist? "lib/foo/baz" + end + end + + # Basically, a test to ensure we are using a rebase and not a cherry-pick + # method, since `git cherry-pick` doesn't handle merge commits well. + def test_reinsert_when_there_is_a_merge_commit + # original extraction to work off of, in which we "un-extract" this later + create_base_repo + set_extractions ["foo"] + run_extraction + + # Perform updates to extracted repo to simulate changes since extraction + perform_merges_of_extracted_code + apply_new_commits_on_extracted_repo do + checkout_b 'master', 'origin/master' + + update_file "foo/bar", "Updated Bar Content" + commit "update bar content" + + checkout_b 'add_baz', 'master' + + update_file "foo/baz", "Baz Content" + commit "add new baz" + + checkout 'master' + merge 'add_baz' + + add_file "README.md", "READ ME!" + commit "add README" + end + update_extraction_hash + + # Run new extraction, with some extra commits added to the new repo that + # has been extracted previously + # + # This next line will run the actual extraction we are testing + # + # aka: updated commits and puts 'lib/foo' back into the original repo + run_extraction + + in_git_dir do + assert_commits [ + "Move foo/ into lib/", + "Merged branch 'add_baz' into master", + "add new baz", + "update bar content", + "Re-insert extractions from MyOrg/extracted_repo", + "Merged branch 'extract_my_extractions' into master", + "Extract my_extractions", + "Commit #3", + "add Bar content", + "Initial Commit" + ] + + refute Dir.exist? "foo" + refute File.exist? "README.md" + + assert File.exist? "qux" + assert File.exist? "lib/foo/bar" + assert File.exist? "lib/foo/baz" + end + end + + def perform_merges_of_extracted_code + # Merge our extracted branch (removed code) into the master branch of the + # original repository + # + is_bare = true + @bare_repo_dir = File.join @sandbox_dir, "bare_original.git" + Rugged::Repository.init_at @bare_repo_dir, is_bare + + # Can't push to a local non-bare repo via Rugged currently... hence this + # extra weirdness being done... + destination_repo.remotes.create("original", @bare_repo_dir) + destination_repo.remotes["original"].push [destination_repo.head.name] + + original_repo.remotes.create("origin", @bare_repo_dir) + original_repo.fetch("origin") + original_repo.remotes["origin"].push [original_repo.head.name] + original_repo.create_branch "extract_my_extractions", "origin/extract_my_extractions" + + CodeExtractor::TestRepo.merge original_repo, "extract_my_extractions" + original_repo.remotes['origin'].push [original_repo.head.name] + end + + def apply_new_commits_on_extracted_repo &block + @new_upstream_dir = File.join @sandbox_dir, "new_upstream.git" + @cloned_extractions_dir = File.join @sandbox_dir, "cloned_extractions.git" + + unless block_given? + block = proc do + checkout_b 'master', 'origin/master' + + update_file "foo/bar", "Updated Bar Content" + commit "update bar content" + + update_file "foo/baz", "Baz Content" + commit "add new baz" + + add_file "README.md", "READ ME!" + commit "add README" + end + end + + self.reference_repo_dir = @cloned_extractions_dir + CodeExtractor::TestRepo.clone_at(extracted_dir, @cloned_extractions_dir, &block) + end + + # Update the configuration for the (second) extraction + # + # Boiler plate will do the following: + # + # - Assumes "foo/" is still the only extracted directory + # - A "reinsert" is the new action that will be performed + # - The following methods have been executed prior + # * `.create_base_repo` + # * `.perform_merges_of_extracted_code` + # * `.apply_new_commits_on_extracted_repo` + # - As a result, assumes the following instance variables are set: + # * `@new_upstream_dir` + # * `@bare_repo_dir` + # * `@cloned_extractions_dir` + # - The desired behavior of `.extra_cmds` is to move `foo` into `lib/` + # + # There is a `custom_updates` hash available in the args for adding + # additional changes, so if any of the above are not the case, changes can be + # made at the end. + # + def update_extraction_hash custom_updates = {} + set_extractions ["foo"] + set_destination_dir @new_upstream_dir + + extractions_hash[:name] = "the_extracted" + extractions_hash[:reinsert] = true + extractions_hash[:target_name] = "MyOrg/repo" + extractions_hash[:target_remote] = @bare_repo_dir + extractions_hash[:target_base_branch] = "master" + extractions_hash[:upstream] = @cloned_extractions_dir + extractions_hash[:upstream_name] = "MyOrg/extracted_repo" + extractions_hash[:extra_cmds] = [ + "mkdir lib", + "mv foo lib", + "git add -A", + "git commit -m 'Move foo/ into lib/'" + ] + + custom_updates.each do |hash_key, value| + extractions_hash[hash_key] = value + end + end +end diff --git a/test/code_extractor_test.rb b/test/code_extractor_test.rb index ac88fe1..ee417b8 100644 --- a/test/code_extractor_test.rb +++ b/test/code_extractor_test.rb @@ -2,21 +2,7 @@ class CodeExtractorTest < CodeExtractor::TestCase def test_code_extractor - repo_structure = %w[ - foo/bar - baz - ] - - create_repo repo_structure do - update_file "foo/bar", "Bar Content" - commit "add Bar content" - tag "v1.0" - - add_file "qux", "QUX!!!" - commit - tag "v2.0" - end - + create_base_repo set_extractions ["foo"] output, _ = run_extraction diff --git a/test/test_helper.rb b/test/test_helper.rb index a14a970..a35388b 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -13,7 +13,7 @@ FileUtils.mkdir_p TEST_SANDBOX -class CodeExtractor +module CodeExtractor # Base class for all test classes. # # Will create a sandbox directory in `test/tmp/sandbox`, uniq to each test @@ -21,7 +21,7 @@ class CodeExtractor # test, and clean up when it is done. # class TestCase < Minitest::Test - attr_writer :extractions, :extractions_hash + attr_writer :extractions, :extractions_hash, :reference_repo_dir attr_reader :extracted_dir, :repo_dir, :sandbox_dir # Create a sandbox for the given test, and name it after the test class and @@ -37,6 +37,23 @@ def teardown FileUtils.remove_entry @sandbox_dir unless ENV["DEBUG"] end + def create_base_repo + repo_structure = %w[ + foo/bar + baz + ] + + create_repo repo_structure do + update_file "foo/bar", "Bar Content" + commit "add Bar content" + tag "v1.0" + + add_file "qux", "QUX!!!" + commit + tag "v2.0" + end + end + # Custom Assertions def assert_no_tags @@ -44,12 +61,46 @@ def assert_no_tags assert tags.empty?, "Expected there to be no tags, but got `#{tags.join ', '}'" end + TRANSFERRED_FROM_REGEXP = /\(transferred from (?<UPSTREAM>[^@]*)@(?<SHA>[^\)]*)\)/ + def assert_commits expected_commits + start_commit = destination_repo.last_commit + sorting = Rugged::SORT_TOPO # aka: sort like git-log + actual_commits = destination_repo.walk(start_commit, sorting).map {|c| c } + commit_msgs = actual_commits.map { |commit| commit.message.lines.first.chomp } + + assert_equal expected_commits, commit_msgs + + # Check that the "transferred from ..." reference line is correct + actual_commits.map do |commit| + [ + commit, + commit.message.lines.last.match(TRANSFERRED_FROM_REGEXP) + ] + end.each do |commit, transfered| + next unless transfered + + transferred_commit = reference_repo.lookup(transfered[:SHA]) + assert transferred_commit.is_a?(Rugged::Commit), + "'transfered from' of #{transfered[:SHA]} from #{commit} is not a valid commit" + assert_equal commit.message.lines.first.chomp, + transferred_commit.message.lines.first.chomp + end + end + # Helper methods + def original_repo + @original_repo ||= Rugged::Repository.new @repo_dir + end + def destination_repo @destination_repo ||= Rugged::Repository.new @extracted_dir end + def reference_repo + @reference_repo ||= Rugged::Repository.new @reference_repo_dir + end + def current_commit_message destination_repo.head.target.message end @@ -80,7 +131,8 @@ def run_extraction File.write extractions_yml, extractions_yaml capture_subprocess_io do - CodeExtractor.new(extractions_yml).extract + config = Config.new extractions_yml + Runner.new(config).run end ensure Dir.chdir pwd @@ -91,6 +143,12 @@ def set_extractions new_extractions extractions_hash[:extractions] = @extractions end + def set_destination_dir dir + @destination_repo = nil # reset + @extracted_dir = dir + extractions_hash[:destination] = @extracted_dir + end + def extractions_hash @extractions_hash ||= { :name => "my_extractions", @@ -156,13 +214,28 @@ def extractions_yaml # to be empty, otherwise a defining files in them is enough. # class TestRepo - attr_reader :file_struct, :last_commit, :repo_path, :repo, :index + attr_accessor :repo + attr_reader :file_struct, :last_commit, :repo_path,:index def self.generate repo_path, file_struct, &block repo = new repo_path, file_struct repo.generate(&block) end + def self.clone_at url, dir, &block + repo = new dir, [] + repo.clone(url, &block) + end + + def self.merge repo, branch, base_branch = nil + repo = Rugged::Repository.new repo unless repo.is_a? Rugged::Repository + dir = repo.workdir + + test_repo = new dir, [] + test_repo.repo = repo + test_repo.merge branch, base_branch + end + def initialize repo_path, file_struct @commit_count = 0 @repo_path = Pathname.new repo_path @@ -177,7 +250,21 @@ def generate &block git_init git_commit_initial - instance_eval(&block) if block_given? + execute(&block) if block_given? + end + + def clone url, &block + @repo = Rugged::Repository.clone_at url, @repo_path.to_s + @index = repo.index + @last_commit = repo.last_commit + # puts @repo.inspect + + execute(&block) if block_given? + end + + # Run DSL methods for given TestRepo instance + def execute &block + instance_eval(&block) end # Create a new branch (don't checkout) @@ -192,6 +279,12 @@ def checkout branch repo.checkout branch end + def checkout_b branch, source = nil + repo.create_branch(*[branch, source].compact) + repo.checkout branch + @last_commit = repo.last_commit + end + # Commit with all changes added to the index # # $ git add . && git commit -am "${msg}" @@ -213,6 +306,41 @@ def tag tag_name repo.tags.create tag_name, @last_commit end + # Add a merge branch into current branch with `--no-ff` + # + # (AKA: Merge a PR like on github) + # + # $ git merge --no-ff --no-edit + # + # If `base_branch` is passed, use that, otherwise use `HEAD` + # + def merge branch, base_branch = nil + # Code is a combination of the examples found here: + # + # - https://github.com/libgit2/rugged/blob/3de6a0a7/test/merge_test.rb#L4-L18 + # - http://violetzijing.is-programmer.com/2015/11/6/some_notes_about_rugged.187772.html + # - https://stackoverflow.com/a/27290470 + # + # In otherwords... not obvious how to do a `git merge --no-ff --no-edit` + # with rugged... le-sigh... + repo.checkout base_branch if base_branch + + base = (base_branch ? repo.branches[base_branch] : repo.head).target_id + topic = repo.branches[branch].target_id + merge_index = repo.merge_commits(base, topic) + + Rugged::Commit.create( + repo, + :message => "Merged branch '#{branch}' into #{base_branch || current_branch_name}", + :parents => [base, topic], + :tree => merge_index.write_tree(repo), + :update_ref => "HEAD" + ) + + repo.checkout_head :strategy => :force + @last_commit = repo.last_commit + end + # Add (or update) a file in the repo, and optionally write content to it # # The content is optional, but it will fully overwrite the content @@ -235,6 +363,10 @@ def add_to_file entry, content File.write path, content, :mode => "a" end + def current_branch_name + repo.head.name.sub(/^refs\/heads\//, '') + end + private # Generate repo structure based on file_structure array