From c900c5642bf8c932d50499b5f4ebfbbd15a3b91a Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Wed, 30 Oct 2019 17:19:58 -0500 Subject: [PATCH 01/32] [CodeExtractor] Convert to module This could have been done as part of the "Gemify" effort, but the intent of that branch was to make as few changes to the core script before starting to make sweeping changes. By converting `CodeExtractor` to a `module`, that file and `lib/code_extractor/version.rb` can be loaded at the same time (yes, they couldn't before...), and it is just a better practice for dealing with namespaces in general. The `Runner` class was a necessary side effect, but I think it is mostly understandable what it's role is and should be a straight forward change. Fun Fact -------- The `attr_reader :extraction` is actually never used. Kept it as a part of `CodeExtractor::Runner`, but really, it is never used. --- lib/code_extractor.rb | 121 ++++++++++++++++++++++-------------------- test/test_helper.rb | 4 +- 2 files changed, 64 insertions(+), 61 deletions(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index 2da318e..7f147bf 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -2,77 +2,80 @@ # Class to extract files and folders from a git repository, while maintaining # The git history. -class CodeExtractor - attr_reader :extraction - - def self.run - new.extract +module CodeExtractor + def run + Runner.new.extract end + module_function :run - def initialize(extraction = 'extractions.yml') - @extraction = YAML.load_file(extraction) - @extraction[:upstream_branch] ||= "master" + class Runner + attr_reader :extraction - missing = %i[name destination upstream upstream_name extractions].reject { |k| @extraction[k] } - raise ArgumentError, "#{missing.map(&:inspect).join(", ")} key(s) missing" if missing.any? + def initialize(extraction = 'extractions.yml') + @extraction = YAML.load_file(extraction) + @extraction[:upstream_branch] ||= "master" - @extraction[:destination] = File.expand_path(@extraction[:destination]) - end + missing = %i[name destination upstream upstream_name extractions].reject { |k| @extraction[k] } + raise ArgumentError, "#{missing.map(&:inspect).join(", ")} key(s) missing" if missing.any? - def extract - puts @extraction - clone - extract_branch - remove_remote - remove_tags - filter_branch - end + @extraction[:destination] = File.expand_path(@extraction[:destination]) + end - def clone - return if Dir.exist?(@extraction[:destination]) - puts 'Cloning…' - system "git clone -o upstream #{@extraction[:upstream]} #{@extraction[:destination]}" - end + def extract + puts @extraction + clone + extract_branch + remove_remote + remove_tags + filter_branch + end - def extract_branch - puts 'Extracting Branch…' - Dir.chdir(@extraction[:destination]) - branch = "extract_#{@extraction[:name]}" - `git checkout #{@extraction[:upstream_branch]}` - `git fetch upstream && git rebase upstream/master` - if system("git branch | grep #{branch}") - `git branch -D #{branch}` + def clone + return if Dir.exist?(@extraction[:destination]) + puts 'Cloning…' + system "git clone -o upstream #{@extraction[:upstream]} #{@extraction[:destination]}" end - `git checkout -b #{branch}` - extractions = @extraction[:extractions].join(' ') - `git rm -r #{extractions}` - `git commit -m "Extract #{@extraction[:name]}"` - end - def remove_remote - `git remote rm upstream` - end + def extract_branch + puts 'Extracting Branch…' + Dir.chdir(@extraction[:destination]) + branch = "extract_#{@extraction[:name]}" + `git checkout #{@extraction[:upstream_branch]}` + `git fetch upstream && git rebase upstream/master` + if system("git branch | grep #{branch}") + `git branch -D #{branch}` + end + `git checkout -b #{branch}` + extractions = @extraction[:extractions].join(' ') + `git rm -r #{extractions}` + `git commit -m "Extract #{@extraction[:name]}"` + end - def remove_tags - puts 'removing tags' - tags = `git tag` - tags.split.each do |tag| - puts "Removing tag #{tag}" - `git tag -d #{tag}` + def remove_remote + `git remote rm upstream` end - end - def filter_branch - extractions = @extraction[:extractions].join(' ') - `time git filter-branch --index-filter ' - git read-tree --empty - git reset $GIT_COMMIT -- #{extractions} - ' --msg-filter ' - cat - - echo - echo - echo "(transferred from #{@extraction[:upstream_name]}@$GIT_COMMIT)" - ' -- #{@extraction[:upstream_branch]} -- #{extractions}` + def remove_tags + puts 'removing tags' + tags = `git tag` + tags.split.each do |tag| + puts "Removing tag #{tag}" + `git tag -d #{tag}` + end + end + + def filter_branch + extractions = @extraction[:extractions].join(' ') + `time git filter-branch --index-filter ' + git read-tree --empty + git reset $GIT_COMMIT -- #{extractions} + ' --msg-filter ' + cat - + echo + echo + echo "(transferred from #{@extraction[:upstream_name]}@$GIT_COMMIT)" + ' -- #{@extraction[:upstream_branch]} -- #{extractions}` + end end end diff --git a/test/test_helper.rb b/test/test_helper.rb index a14a970..8716d95 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -13,7 +13,7 @@ FileUtils.mkdir_p TEST_SANDBOX -class CodeExtractor +module CodeExtractor # Base class for all test classes. # # Will create a sandbox directory in `test/tmp/sandbox`, uniq to each test @@ -80,7 +80,7 @@ def run_extraction File.write extractions_yml, extractions_yaml capture_subprocess_io do - CodeExtractor.new(extractions_yml).extract + CodeExtractor::Runner.new(extractions_yml).extract end ensure Dir.chdir pwd From 72ba047a8fecf4caa9b6b6b06f3c7e241527bb5a Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Wed, 30 Oct 2019 17:49:16 -0500 Subject: [PATCH 02/32] [CodeExtractor] Break out Config to it's own class This is just a refactor commit to start the process of breaking out the pieces of the code so that we can more easily distinguish certain pieces of the script. --- lib/code_extractor.rb | 53 ++++++++++++++++++++++++++++--------------- test/test_helper.rb | 3 ++- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index 7f147bf..e0eecf0 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -8,21 +8,38 @@ def run end module_function :run - class Runner - attr_reader :extraction + class Config + def initialize(config_file = 'extractions.yml') + @config = YAML.load_file(config_file) + + @config[:upstream_branch] ||= "master" + @config[:destination] = File.expand_path(@config[:destination]) + + validate! + end + + def [](key) + @config[key] + end - def initialize(extraction = 'extractions.yml') - @extraction = YAML.load_file(extraction) - @extraction[:upstream_branch] ||= "master" + def inspect + @config.inspect + end + alias to_s inspect - missing = %i[name destination upstream upstream_name extractions].reject { |k| @extraction[k] } + def validate! + missing = %i[name destination upstream upstream_name extractions].reject { |k| @config[k] } raise ArgumentError, "#{missing.map(&:inspect).join(", ")} key(s) missing" if missing.any? + end + end - @extraction[:destination] = File.expand_path(@extraction[:destination]) + class Runner + def initialize config = nil + @config = config || Config.new end def extract - puts @extraction + puts @config clone extract_branch remove_remote @@ -31,24 +48,24 @@ def extract end def clone - return if Dir.exist?(@extraction[:destination]) + return if Dir.exist?(@config[:destination]) puts 'Cloning…' - system "git clone -o upstream #{@extraction[:upstream]} #{@extraction[:destination]}" + system "git clone -o upstream #{@config[:upstream]} #{@config[:destination]}" end def extract_branch puts 'Extracting Branch…' - Dir.chdir(@extraction[:destination]) - branch = "extract_#{@extraction[:name]}" - `git checkout #{@extraction[:upstream_branch]}` + Dir.chdir(@config[:destination]) + branch = "extract_#{@config[:name]}" + `git checkout #{@config[:upstream_branch]}` `git fetch upstream && git rebase upstream/master` if system("git branch | grep #{branch}") `git branch -D #{branch}` end `git checkout -b #{branch}` - extractions = @extraction[:extractions].join(' ') + extractions = @config[:extractions].join(' ') `git rm -r #{extractions}` - `git commit -m "Extract #{@extraction[:name]}"` + `git commit -m "Extract #{@config[:name]}"` end def remove_remote @@ -65,7 +82,7 @@ def remove_tags end def filter_branch - extractions = @extraction[:extractions].join(' ') + extractions = @config[:extractions].join(' ') `time git filter-branch --index-filter ' git read-tree --empty git reset $GIT_COMMIT -- #{extractions} @@ -73,8 +90,8 @@ def filter_branch cat - echo echo - echo "(transferred from #{@extraction[:upstream_name]}@$GIT_COMMIT)" - ' -- #{@extraction[:upstream_branch]} -- #{extractions}` + echo "(transferred from #{@config[:upstream_name]}@$GIT_COMMIT)" + ' -- #{@config[:upstream_branch]} -- #{extractions}` end end end diff --git a/test/test_helper.rb b/test/test_helper.rb index 8716d95..7d5c5a7 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -80,7 +80,8 @@ def run_extraction File.write extractions_yml, extractions_yaml capture_subprocess_io do - CodeExtractor::Runner.new(extractions_yml).extract + config = Config.new extractions_yml + Runner.new(config).extract end ensure Dir.chdir pwd From 4c1dba94437282acfb37daefa43e7056bbbb3eec Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Wed, 30 Oct 2019 18:38:59 -0500 Subject: [PATCH 03/32] [CodeExtractor] Break out GitProject This commit separates the concerns of the runner and the GitProject to define methods that make sense for a generic GitProject. In the future, the intent is that this lib should be able to move code between two separate and already existing `GitProject` instances, and having this abstraction layer in place will allow code to be shared more easily. This also still attempts to retain the "script-like" nature of the original project in the `Runner`, but hopefully creating this generic `GitProject` construct will facilitate future improvements. --- lib/code_extractor.rb | 111 ++++++++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 43 deletions(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index e0eecf0..cf26f24 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -12,8 +12,8 @@ class Config def initialize(config_file = 'extractions.yml') @config = YAML.load_file(config_file) - @config[:upstream_branch] ||= "master" @config[:destination] = File.expand_path(@config[:destination]) + @config[:upstream_branch] ||= "master" validate! end @@ -33,65 +33,90 @@ def validate! end end - class Runner - def initialize config = nil - @config = config || Config.new - end + class GitProject + attr_reader :name, :url, :git_dir, :new_branch, :source_branch - def extract - puts @config - clone - extract_branch - remove_remote - remove_tags - filter_branch + def initialize name, url + @name = name + @url = url end - def clone - return if Dir.exist?(@config[:destination]) - puts 'Cloning…' - system "git clone -o upstream #{@config[:upstream]} #{@config[:destination]}" + def clone_to destination, origin_name = "upstream" + @git_dir ||= destination + + if Dir.exist?(git_dir) + raise "Not a git dir!" unless system "git -C #{git_dir} status" + else + puts 'Cloning…' + system "git clone --origin #{origin_name} #{url} #{git_dir}" + end end - def extract_branch + def extract_branch source_branch, new_branch, extractions puts 'Extracting Branch…' - Dir.chdir(@config[:destination]) - branch = "extract_#{@config[:name]}" - `git checkout #{@config[:upstream_branch]}` - `git fetch upstream && git rebase upstream/master` - if system("git branch | grep #{branch}") - `git branch -D #{branch}` + @new_branch = new_branch + @source_branch = source_branch + Dir.chdir git_dir do + `git checkout #{source_branch}` + `git fetch upstream && git rebase upstream/master` + if system("git branch | grep #{new_branch}") + `git branch -D #{new_branch}` + end + `git checkout -b #{new_branch}` + `git rm -r #{extractions}` + `git commit -m "Extract #{name}"` end - `git checkout -b #{branch}` - extractions = @config[:extractions].join(' ') - `git rm -r #{extractions}` - `git commit -m "Extract #{@config[:name]}"` end def remove_remote - `git remote rm upstream` + Dir.chdir git_dir do + `git remote rm upstream` + end end def remove_tags puts 'removing tags' - tags = `git tag` - tags.split.each do |tag| - puts "Removing tag #{tag}" - `git tag -d #{tag}` + Dir.chdir git_dir do + tags = `git tag` + tags.split.each do |tag| + puts "Removing tag #{tag}" + `git tag -d #{tag}` + end + end + end + + def filter_branch extractions, upstream_name + Dir.chdir git_dir do + `time git filter-branch --index-filter ' + git read-tree --empty + git reset $GIT_COMMIT -- #{extractions} + ' --msg-filter ' + cat - + echo + echo + echo "(transferred from #{upstream_name}@$GIT_COMMIT)" + ' -- #{source_branch} -- #{extractions}` end end + end - def filter_branch - extractions = @config[:extractions].join(' ') - `time git filter-branch --index-filter ' - git read-tree --empty - git reset $GIT_COMMIT -- #{extractions} - ' --msg-filter ' - cat - - echo - echo - echo "(transferred from #{@config[:upstream_name]}@$GIT_COMMIT)" - ' -- #{@config[:upstream_branch]} -- #{extractions}` + class Runner + def initialize config = nil + @config = config || Config.new + @source_project = GitProject.new @config[:name], @config[:upstream] + end + + def extractions + @extractions ||= @config[:extractions].join(' ') + end + + def extract + puts @config + @source_project.clone_to @config[:destination] + @source_project.extract_branch @config[:upstream_branch], "extract_#{@config[:name]}", extractions + @source_project.remove_remote + @source_project.remove_tags + @source_project.filter_branch extractions, @config[:upstream_name] end end end From eecb699667b327f47c18c19a26b929c0c715058c Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Tue, 12 Nov 2019 19:58:01 -0600 Subject: [PATCH 04/32] [Rakefile] Add test:debug task A convenience task for running specs and do some diagnosis on the generated repos following the test run. Basically avoids cleaning up the sandbox git directory after each test. --- Rakefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Rakefile b/Rakefile index 3b2c241..f5b6453 100644 --- a/Rakefile +++ b/Rakefile @@ -26,6 +26,13 @@ namespace :test do sandbox_dir = File.join "test", "tmp" rm_rf sandbox_dir end + + + desc "run a DEBUG test run after cleaning" + task :debug => :clean do + ENV["DEBUG"] = "1" + Rake::Task["test"].invoke + end end task :default => :test From 8754a0e08836b3181990279c2b9d2e375fae3341 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Tue, 12 Nov 2019 20:16:14 -0600 Subject: [PATCH 05/32] [test_helper.rb] Add assert_commits helper Used to test certain commits exist in the destination repo for the current branch. --- test/test_helper.rb | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/test_helper.rb b/test/test_helper.rb index 7d5c5a7..62f6786 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -44,6 +44,16 @@ def assert_no_tags assert tags.empty?, "Expected there to be no tags, but got `#{tags.join ', '}'" end + def assert_commits expected_commits + start_commit = destination_repo.last_commit + sorting = Rugged::SORT_TOPO # aka: sort like git-log + actual_commits = destination_repo.walk(start_commit, sorting).map(&:message) + + actual_commits.map! { |msg| msg.lines.first.chomp } + + assert_equal expected_commits, actual_commits + end + # Helper methods def destination_repo From fbfac90732496c030fb6b472eaa721cd5044ee2f Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Tue, 12 Nov 2019 20:19:25 -0600 Subject: [PATCH 06/32] [test_helper.rb] Add .set_destination_dir Used to reset the location of the destination_repo directory and clear out any related variables. --- test/test_helper.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/test_helper.rb b/test/test_helper.rb index 62f6786..4bb8bbe 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -102,6 +102,12 @@ def set_extractions new_extractions extractions_hash[:extractions] = @extractions end + def set_destination_dir dir + @destination_repo = nil # reset + @extracted_dir = dir + extractions_hash[:destination] = @extracted_dir + end + def extractions_hash @extractions_hash ||= { :name => "my_extractions", From f909737f83c1d7f721e19bb71184bd4f0cbb9adf Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Tue, 12 Nov 2019 20:20:33 -0600 Subject: [PATCH 07/32] [test_helper.rb] Add TestRepo.clone_at helper Used to clone a repo (first arg) to a new directory (second arg). `TestRepo#clone` is added as well to assit with this as well. --- test/test_helper.rb | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/test_helper.rb b/test/test_helper.rb index 4bb8bbe..2d90440 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -180,6 +180,11 @@ def self.generate repo_path, file_struct, &block repo.generate(&block) end + def self.clone_at url, dir, &block + repo = new dir, [] + repo.clone(url, &block) + end + def initialize repo_path, file_struct @commit_count = 0 @repo_path = Pathname.new repo_path @@ -197,6 +202,15 @@ def generate &block instance_eval(&block) if block_given? end + def clone url, &block + @repo = Rugged::Repository.clone_at url, @repo_path.to_s + @index = repo.index + @last_commit = repo.last_commit + # puts @repo.inspect + + instance_eval(&block) if block_given? + end + # Create a new branch (don't checkout) # # $ git branch other_branch From 365eff23b4e8175190530b01d56ae681a6cfc489 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Tue, 12 Nov 2019 20:22:40 -0600 Subject: [PATCH 08/32] [test_helper.rb] Add TestRepo#checkout_b A instance method helper for TestRepo which performs a checkout out of a new branch based off another (or the current branch if no second arg is passed). Mirrors the usage of `git checkout -b [new_branch] [base_branch]` --- test/test_helper.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/test_helper.rb b/test/test_helper.rb index 2d90440..d93dd0c 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -223,6 +223,12 @@ def checkout branch repo.checkout branch end + def checkout_b branch, source = nil + repo.create_branch(*[branch, source].compact) + repo.checkout branch + @last_commit = repo.last_commit + end + # Commit with all changes added to the index # # $ git add . && git commit -am "${msg}" From 3fbac22308b723b087c4f0709bda44f5e12f6e70 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Mon, 18 Nov 2019 18:11:50 -0600 Subject: [PATCH 09/32] [test_helper.rb] Add TestRepo#execute Shared method for running DSL methods (basically calls just calls `instance_eval`) --- test/test_helper.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test/test_helper.rb b/test/test_helper.rb index d93dd0c..bd928bc 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -199,7 +199,7 @@ def generate &block git_init git_commit_initial - instance_eval(&block) if block_given? + execute(&block) if block_given? end def clone url, &block @@ -208,7 +208,12 @@ def clone url, &block @last_commit = repo.last_commit # puts @repo.inspect - instance_eval(&block) if block_given? + execute(&block) if block_given? + end + + # Run DSL methods for given TestRepo instance + def execute &block + instance_eval(&block) end # Create a new branch (don't checkout) From ed3a6107409532bdca01b946bb4dc131ed115f79 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Mon, 18 Nov 2019 18:05:13 -0600 Subject: [PATCH 10/32] [test_helper.rb] Add TestRepo.merge Adds a helper function for handling merges via rugged. Of note: A `.checkout_head` call was added in addition to what was there previously. After doing a `commit` in rugged with a merge, the git tree is left in a odd state (probably since the merge functionality isn't "porcelain"), so this makes sure the git tree state is on HEAD. --- test/test_helper.rb | 51 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/test/test_helper.rb b/test/test_helper.rb index bd928bc..ea8b0b5 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -173,7 +173,8 @@ def extractions_yaml # to be empty, otherwise a defining files in them is enough. # class TestRepo - attr_reader :file_struct, :last_commit, :repo_path, :repo, :index + attr_accessor :repo + attr_reader :file_struct, :last_commit, :repo_path,:index def self.generate repo_path, file_struct, &block repo = new repo_path, file_struct @@ -185,6 +186,15 @@ def self.clone_at url, dir, &block repo.clone(url, &block) end + def self.merge repo, branch, base_branch = nil + repo = Rugged::Repository.new repo unless repo.is_a? Rugged::Repository + dir = repo.workdir + + test_repo = new dir, [] + test_repo.repo = repo + test_repo.merge branch, base_branch + end + def initialize repo_path, file_struct @commit_count = 0 @repo_path = Pathname.new repo_path @@ -255,6 +265,41 @@ def tag tag_name repo.tags.create tag_name, @last_commit end + # Add a merge branch into current branch with `--no-ff` + # + # (AKA: Merge a PR like on github) + # + # $ git merge --no-ff --no-edit + # + # If `base_branch` is passed, use that, otherwise use `HEAD` + # + def merge branch, base_branch = nil + # Code is a combination of the examples found here: + # + # - https://github.com/libgit2/rugged/blob/3de6a0a7/test/merge_test.rb#L4-L18 + # - http://violetzijing.is-programmer.com/2015/11/6/some_notes_about_rugged.187772.html + # - https://stackoverflow.com/a/27290470 + # + # In otherwords... not obvious how to do a `git merge --no-ff --no-edit` + # with rugged... le-sigh... + repo.checkout base_branch if base_branch + + base = (base_branch ? repo.branches[base_branch] : repo.head).target_id + topic = repo.branches[branch].target_id + merge_index = repo.merge_commits(base, topic) + + Rugged::Commit.create( + repo, + :message => "Merged branch '#{branch}' into #{base_branch || current_branch_name}", + :parents => [base, topic], + :tree => merge_index.write_tree(repo), + :update_ref => "HEAD" + ) + + repo.checkout_head :strategy => :force + @last_commit = repo.last_commit + end + # Add (or update) a file in the repo, and optionally write content to it # # The content is optional, but it will fully overwrite the content @@ -277,6 +322,10 @@ def add_to_file entry, content File.write path, content, :mode => "a" end + def current_branch_name + repo.head.name.sub(/^refs\/heads\//, '') + end + private # Generate repo structure based on file_structure array From 81920699c2a3f6a73eb3091d3d5a65a63a4e43d2 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Tue, 12 Nov 2019 20:25:19 -0600 Subject: [PATCH 11/32] [CodeExtractor] Add reinsert functionality This is effectively the "undo" feature of the original project. By that, it allows a user to take a portion (or all) of a previously extracted project, and re-insert it back into the original project from which it came from. This becomes tricky, as we need to filter out the existing commits that existed previously in the old ("target") repository. As a result, 3 `filter-branch` passes are done to achieve this change: 1. After determining all file names that existed for the current extractions, filter commits to down to ones that only include the files we wish to commit. In addition, create a script to move any of those files into a temporary directory, maintaining the directory structure as we go. 2. Take the temporary directory that was created and do a second filter that makes the new root of the project the temporary directory we created in the previous filter. 3. After adding the target repository as a remote, filter the commits out that already exist in that remote. Also in this step, a commit is re-written that is shared between both repos that will be the "re-inject" commit, which all of the injected commits will be based off of. From there, cherry-pick commits onto a new branch that is based off the existing HEAD of the current target remote's branch (most likely `master`) that will be receiving the "injected" commits. This allows the target repo to have the commits that were created "post extraction" that are based off a commit that re-adds the code in a state where it was originally extracted from. --- lib/code_extractor.rb | 240 ++++++++++++++++++++++++++++++++++-- test/code_extractor_test.rb | 123 ++++++++++++++++++ test/test_helper.rb | 6 +- 3 files changed, 356 insertions(+), 13 deletions(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index cf26f24..c443500 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -4,7 +4,7 @@ # The git history. module CodeExtractor def run - Runner.new.extract + Runner.new.run end module_function :run @@ -34,7 +34,7 @@ def validate! end class GitProject - attr_reader :name, :url, :git_dir, :new_branch, :source_branch + attr_reader :name, :url, :git_dir, :new_branch, :source_branch, :target_name def initialize name, url @name = name @@ -58,7 +58,7 @@ def extract_branch source_branch, new_branch, extractions @source_branch = source_branch Dir.chdir git_dir do `git checkout #{source_branch}` - `git fetch upstream && git rebase upstream/master` + `git fetch upstream && git rebase upstream/#{source_branch}` if system("git branch | grep #{new_branch}") `git branch -D #{new_branch}` end @@ -85,18 +85,212 @@ def remove_tags end end - def filter_branch extractions, upstream_name + def extract_commits extractions, upstream_name Dir.chdir git_dir do `time git filter-branch --index-filter ' git read-tree --empty git reset $GIT_COMMIT -- #{extractions} + ' #{msg_filter upstream_name} -- #{source_branch} -- #{extractions}` + end + end + + def prune_commits extractions + puts "Pruning commits…" + + build_prune_script extractions + + Dir.chdir git_dir do + `git checkout -b #{prune_branch} #{@source_branch}` + `git filter-branch -f --prune-empty --tree-filter #{@prune_script} HEAD` + `git filter-branch -f --prune-empty --subdirectory-filter #{@keep_directory}` + end + end + + def add_target_remote target_name, target_remote + puts "Add target repo as a remote…" + @target_name = target_name + + Dir.chdir git_dir do + puts "git remote add #{target_remote_name} #{target_remote}" + `git remote add #{target_remote_name} #{target_remote}` + `git fetch #{target_remote_name}` + end + end + + # "Inject" commits one repo's branch back into the target repo's + # + # Assuming the target remote has been added (see add_target_remote), this + # method does so by doing the following to achieve the "injected" history: + # + # 1. Filters commits that already exist in the target repo. Additionally, + # the last commit that is shared between the two is actually used as the + # "root" commit for the injected commits. The rest are assumed to be new + # from the new repository. + # + # The "root" commit has it's commit message modified to reflect this + # change. + # + # The other part of the filter branch also applies the changes so they + # exist within in context the target codebase, and not just in isolation + # for itself (hence the `git reset #{@reference_target_branch} -- .` bit). + # The changes from the upstream repo are then applied on top of the + # existing code base. + # + # 2. A new branch is checked out that is based off the target remote's + # target branch, but does not track that branch. + # + # 3. The commits that have been filtered are cherry-picked on to this new + # branch, and the "root" commit assumes the parent of the current HEAD of + # the target remote's (master) branch + # + def inject_commits target_base_branch, upstream_name + puts "Injecting commits…" + + target_base_branch ||= 'master' + commit_msg_filter = "(transferred from #{upstream_name}" + + Dir.chdir git_dir do + reference_target_branch = "#{target_remote_name}/#{target_base_branch}" + previously_extracted_commits = `git log --pretty="%H" --grep="#{commit_msg_filter}"` + + # special commit that will get renamed re-worded to: + # + # Re-insert extractions from #{upstream_name} + # + last_extracted_commit = previously_extracted_commits.lines[0].chomp! + first_injected_msg = `git show -s --format="%s%n%n%b" #{last_extracted_commit}` + first_injected_msg = first_injected_msg.lines.reject { |line| + line.include? commit_msg_filter + }.join + first_injected_msg.prepend "*** Original Commit message shown below ***\n\n" + first_injected_msg.prepend "Re-insert extractions from #{target_name}\n\n" + File.write File.expand_path("../LAST_EXTRACTED_COMMIT_MSG", git_dir), first_injected_msg + + `time git filter-branch -f --commit-filter ' + export was_extracted=$(git show -s --format="%s%n%n%b" $GIT_COMMIT | grep -s "#{commit_msg_filter}") + if [ "$GIT_COMMIT" = "#{last_extracted_commit}" ] || [ "$was_extracted" == "" ]; then + git commit-tree "$@"; + else + skip_commit "$@"; + fi + ' --index-filter ' + git read-tree --empty + git reset #{reference_target_branch} -- . + git checkout $GIT_COMMIT -- . ' --msg-filter ' + if [ "$GIT_COMMIT" = "#{last_extracted_commit}" ]; then + cat #{File.expand_path File.join("..", "LAST_EXTRACTED_COMMIT_MSG"), git_dir} + else + cat - + fi + echo + echo + echo "(transferred from #{upstream_name}@$GIT_COMMIT)" + ' -- #{prune_branch}` + + `git checkout --no-track -b #{inject_branch} #{reference_target_branch}` + `git cherry-pick ..#{prune_branch}` + end + end + + def run_extra_cmds cmds + Dir.chdir git_dir do + cmds.each { |cmd| system cmd } if cmds + end + end + + private + + def target_remote_name + @target_remote_name ||= "code_extractor_target_for_#{name}" + end + + def prune_branch + @prune_branch ||= "code_extractor_prune_#{name}" + end + alias prune_commits_remote prune_branch + + def inject_branch + @inject_branch ||= "code_extractor_inject_#{name}" + end + alias inject_remote inject_branch + + # Given a list of extractions, build a script that will move a list of + # files (extractions) from their current location in a given commit to a + # unused directory. + # + # More complicated than it looks, this will be used as part of a two-phased + # `git filter-branch` to: + # + # 1. move extractable files into a subdirectory with `--tree-filter` + # 2. only keep commits for files moved into that subdirectory, and make + # the subdirectory the new project root. + # + # For consistency, we want to keep the subdirectories' structure in the + # same line as what was there previously, so this script helps do that, and + # also creates directories/files when they don't exist. + # + # Returns `true` at the end of the script incase the last `mv` fails (the + # source doesn't exist in this commit, for example) + # + def build_prune_script extractions + require 'set' + require 'fileutils' + + @keep_directory = "code_extractor_git_keeps_#{Time.now.to_i}" + git_log_follow = "git log --name-only --format=format: --follow" + prune_mkdirs = Set.new + prune_mvs = [] + + Dir.chdir git_dir do + extractions.each do |file_or_dir| + if Dir.exist? file_or_dir + files = Dir.glob["#{file_or_dir}/**/*"] + else + files = [file_or_dir] + end + + files.each do |extraction_file| + file_and_ancestors = `#{git_log_follow} -- #{extraction_file}`.split("\n").uniq + + file_and_ancestors.reject! { |file| file.length == 0 } + + file_and_ancestors.each do |file| + file_dir = File.dirname file + prune_mkdirs.add file_dir + prune_mvs << [file, "#{@keep_directory}/#{file_dir}"] + end + end + end + end + + @prune_script = File.join Dir.pwd, "code_extractor_#{name}_prune_script.sh" + + File.open @prune_script, "w" do |script| + prune_mkdirs.each do |dir| + script.puts "mkdir -p #{File.join @keep_directory, dir}" + end + + script.puts + prune_mvs.each do |(file, dir)| + script.puts "mv #{file} #{dir} 2>/dev/null" + end + + script.puts + script.puts "true" + end + FileUtils.chmod "+x", @prune_script + end + + def msg_filter upstream_name + <<-MSG_FILTER.gsub(/^ {8}/, '').chomp + --msg-filter ' cat - echo echo echo "(transferred from #{upstream_name}@$GIT_COMMIT)" - ' -- #{source_branch} -- #{extractions}` - end + ' + MSG_FILTER end end @@ -106,17 +300,39 @@ def initialize config = nil @source_project = GitProject.new @config[:name], @config[:upstream] end - def extractions - @extractions ||= @config[:extractions].join(' ') - end - - def extract + # Either run `.reinsert` or `.extract` + # + # The `.reinsert` method will eject with `nil` unless the config setting to + # run in that mode is set + # + def run puts @config + @source_project.clone_to @config[:destination] @source_project.extract_branch @config[:upstream_branch], "extract_#{@config[:name]}", extractions @source_project.remove_remote @source_project.remove_tags - @source_project.filter_branch extractions, @config[:upstream_name] + + reinsert || extract + end + + def extractions + @extractions ||= @config[:extractions].join(' ') + end + + def extract + @source_project.extract_commits extractions, @config[:upstream_name] + end + + def reinsert + return unless @config[:reinsert] + + @source_project.prune_commits @config[:extractions] + @source_project.run_extra_cmds @config[:extra_cmds] + @source_project.add_target_remote @config[:target_name], @config[:target_remote] + @source_project.inject_commits @config[:target_base_branch], @config[:upstream_name] + + true end end end diff --git a/test/code_extractor_test.rb b/test/code_extractor_test.rb index ac88fe1..b570a58 100644 --- a/test/code_extractor_test.rb +++ b/test/code_extractor_test.rb @@ -111,4 +111,127 @@ def test_code_extractor_skips_cloning_if_directory_exists refute File.exist? "baz" end end + + def test_unextract_an_extraction + repo_structure = %w[ + foo/bar + baz + ] + + create_repo repo_structure do + update_file "foo/bar", "Bar Content" + commit "add Bar content" + tag "v1.0" + + add_file "qux", "QUX!!!" + commit + tag "v2.0" + end + + # original extraction to work off of, in which we "un-extract" this later + set_extractions ["foo"] + run_extraction + + # Merge our extracted branch (removed code) into the master branch of the + # original repository + # + is_bare = true + bare_repo_dir = File.join @sandbox_dir, "bare_original.git" + Rugged::Repository.init_at bare_repo_dir, is_bare + + # Can't push to a local non-bare repo via Rugged currently... hence this + # extra weirdness being done... + destination_repo.remotes.create("original", bare_repo_dir) + destination_repo.remotes["original"].push [destination_repo.head.name] + + original_repo.remotes.create("origin", bare_repo_dir) + original_repo.fetch("origin") + original_repo.remotes["origin"].push [original_repo.head.name] + original_repo.create_branch "extract_my_extractions", "origin/extract_my_extractions" + + # Code is a combination of the examples found here: + # + # - https://github.com/libgit2/rugged/blob/3de6a0a7/test/merge_test.rb#L4-L18 + # - http://violetzijing.is-programmer.com/2015/11/6/some_notes_about_rugged.187772.html + # - https://stackoverflow.com/a/27290470 + # + # In otherwords... not obvious how to do a `git merge --no-ff --no-edit` + # with rugged... le-sigh... + # + # TODO: Move into a helper + base = original_repo.branches["master"].target_id + topic = original_repo.branches["extract_my_extractions"].target_id + merge_index = original_repo.merge_commits(base, topic) + + Rugged::Commit.create( + original_repo, + :message => "Merged branch 'extract_my_extractions' into master", + :parents => [base, topic], + :tree => merge_index.write_tree(original_repo), + :update_ref => "HEAD" + ) + + original_repo.remotes['origin'].push [original_repo.head.name] + + + # Run new extraction, with some extra commits added to the new repo that + # has been extracted previously + + new_upstream_dir = File.join @sandbox_dir, "new_upstream.git" + cloned_extractions_dir = File.join @sandbox_dir, "cloned_extractions.git" + + CodeExtractor::TestRepo.clone_at extracted_dir, cloned_extractions_dir do + checkout_b 'master', 'origin/master' + + update_file "foo/bar", "Updated Bar Content" + commit "update bar content" + + update_file "foo/baz", "Baz Content" + commit "add new baz" + end + + # Update the configuration for the (second) extraction + + set_extractions ["foo"] + + extractions_hash[:name] = "the_extracted" + extractions_hash[:reinsert] = true + extractions_hash[:target_name] = "MyOrg/extracted_repo" + extractions_hash[:target_remote] = bare_repo_dir + extractions_hash[:target_base_branch] = "master" + extractions_hash[:upstream] = cloned_extractions_dir + extractions_hash[:upstream_name] = "MyOrg/repo" + extractions_hash[:extra_cmds] = [ + "mkdir lib", + "mv foo lib", + "git add -A", + "git commit -m 'Move foo/ into lib/'" + ] + + set_destination_dir new_upstream_dir + + # The run the actual extraction we are testing + # + # aka: updated commits and puts 'lib/foo' back into the original repo + run_extraction + + in_git_dir do + assert_commits [ + "Move foo/ into lib/", + "add new baz", + "update bar content", + "Re-insert extractions from MyOrg/extracted_repo", + "Merged branch 'extract_my_extractions' into master", + "Extract my_extractions", + "Commit #3", + "add Bar content", + "Initial Commit" + ] + + refute Dir.exist? "foo" + assert File.exist? "qux" + assert File.exist? "lib/foo/bar" + assert File.exist? "lib/foo/baz" + end + end end diff --git a/test/test_helper.rb b/test/test_helper.rb index ea8b0b5..9dd2a3d 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -56,6 +56,10 @@ def assert_commits expected_commits # Helper methods + def original_repo + @original_repo ||= Rugged::Repository.new @repo_dir + end + def destination_repo @destination_repo ||= Rugged::Repository.new @extracted_dir end @@ -91,7 +95,7 @@ def run_extraction capture_subprocess_io do config = Config.new extractions_yml - Runner.new(config).extract + Runner.new(config).run end ensure Dir.chdir pwd From 75bbfb68c4edea10627dce805486557f6430dfce Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 14 Nov 2019 15:57:06 -0600 Subject: [PATCH 12/32] [code_extractor_test.rb] Add .create_base_repo Meant as a seed method for building up the same sample repo, and reducing boiler plate in a few of the tests. Currently isn't made terribly flexible, but if that is required, it can be added in the future. --- test/code_extractor_test.rb | 49 ++++++++++++++----------------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/test/code_extractor_test.rb b/test/code_extractor_test.rb index b570a58..9d45e71 100644 --- a/test/code_extractor_test.rb +++ b/test/code_extractor_test.rb @@ -2,21 +2,7 @@ class CodeExtractorTest < CodeExtractor::TestCase def test_code_extractor - repo_structure = %w[ - foo/bar - baz - ] - - create_repo repo_structure do - update_file "foo/bar", "Bar Content" - commit "add Bar content" - tag "v1.0" - - add_file "qux", "QUX!!!" - commit - tag "v2.0" - end - + create_base_repo set_extractions ["foo"] output, _ = run_extraction @@ -113,22 +99,8 @@ def test_code_extractor_skips_cloning_if_directory_exists end def test_unextract_an_extraction - repo_structure = %w[ - foo/bar - baz - ] - - create_repo repo_structure do - update_file "foo/bar", "Bar Content" - commit "add Bar content" - tag "v1.0" - - add_file "qux", "QUX!!!" - commit - tag "v2.0" - end - # original extraction to work off of, in which we "un-extract" this later + create_base_repo set_extractions ["foo"] run_extraction @@ -234,4 +206,21 @@ def test_unextract_an_extraction assert File.exist? "lib/foo/baz" end end + + def create_base_repo + repo_structure = %w[ + foo/bar + baz + ] + + create_repo repo_structure do + update_file "foo/bar", "Bar Content" + commit "add Bar content" + tag "v1.0" + + add_file "qux", "QUX!!!" + commit + tag "v2.0" + end + end end From 25ec0456d38094cabdd058fa415857fd32421767 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 14 Nov 2019 17:31:03 -0600 Subject: [PATCH 13/32] [code_extractor_test] Add .perform_merges_of_extracted_code Another helper method for breaking up some of the setup code for the "unextract" test, which handles the portion of the setup that replicates the process of merging the original extraction. Not the best implementation, since `@bare_repo_dir` gets set as part of the new method, and is used later in the test itself as the `target_remote` in the `extraction_hash`, but for now it is a quick solution. --- test/code_extractor_test.rb | 88 +++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 43 deletions(-) diff --git a/test/code_extractor_test.rb b/test/code_extractor_test.rb index 9d45e71..ba184f4 100644 --- a/test/code_extractor_test.rb +++ b/test/code_extractor_test.rb @@ -103,48 +103,7 @@ def test_unextract_an_extraction create_base_repo set_extractions ["foo"] run_extraction - - # Merge our extracted branch (removed code) into the master branch of the - # original repository - # - is_bare = true - bare_repo_dir = File.join @sandbox_dir, "bare_original.git" - Rugged::Repository.init_at bare_repo_dir, is_bare - - # Can't push to a local non-bare repo via Rugged currently... hence this - # extra weirdness being done... - destination_repo.remotes.create("original", bare_repo_dir) - destination_repo.remotes["original"].push [destination_repo.head.name] - - original_repo.remotes.create("origin", bare_repo_dir) - original_repo.fetch("origin") - original_repo.remotes["origin"].push [original_repo.head.name] - original_repo.create_branch "extract_my_extractions", "origin/extract_my_extractions" - - # Code is a combination of the examples found here: - # - # - https://github.com/libgit2/rugged/blob/3de6a0a7/test/merge_test.rb#L4-L18 - # - http://violetzijing.is-programmer.com/2015/11/6/some_notes_about_rugged.187772.html - # - https://stackoverflow.com/a/27290470 - # - # In otherwords... not obvious how to do a `git merge --no-ff --no-edit` - # with rugged... le-sigh... - # - # TODO: Move into a helper - base = original_repo.branches["master"].target_id - topic = original_repo.branches["extract_my_extractions"].target_id - merge_index = original_repo.merge_commits(base, topic) - - Rugged::Commit.create( - original_repo, - :message => "Merged branch 'extract_my_extractions' into master", - :parents => [base, topic], - :tree => merge_index.write_tree(original_repo), - :update_ref => "HEAD" - ) - - original_repo.remotes['origin'].push [original_repo.head.name] - + perform_merges_of_extracted_code # Run new extraction, with some extra commits added to the new repo that # has been extracted previously @@ -169,7 +128,7 @@ def test_unextract_an_extraction extractions_hash[:name] = "the_extracted" extractions_hash[:reinsert] = true extractions_hash[:target_name] = "MyOrg/extracted_repo" - extractions_hash[:target_remote] = bare_repo_dir + extractions_hash[:target_remote] = @bare_repo_dir extractions_hash[:target_base_branch] = "master" extractions_hash[:upstream] = cloned_extractions_dir extractions_hash[:upstream_name] = "MyOrg/repo" @@ -223,4 +182,47 @@ def create_base_repo tag "v2.0" end end + + def perform_merges_of_extracted_code + # Merge our extracted branch (removed code) into the master branch of the + # original repository + # + is_bare = true + @bare_repo_dir = File.join @sandbox_dir, "bare_original.git" + Rugged::Repository.init_at @bare_repo_dir, is_bare + + # Can't push to a local non-bare repo via Rugged currently... hence this + # extra weirdness being done... + destination_repo.remotes.create("original", @bare_repo_dir) + destination_repo.remotes["original"].push [destination_repo.head.name] + + original_repo.remotes.create("origin", @bare_repo_dir) + original_repo.fetch("origin") + original_repo.remotes["origin"].push [original_repo.head.name] + original_repo.create_branch "extract_my_extractions", "origin/extract_my_extractions" + + # Code is a combination of the examples found here: + # + # - https://github.com/libgit2/rugged/blob/3de6a0a7/test/merge_test.rb#L4-L18 + # - http://violetzijing.is-programmer.com/2015/11/6/some_notes_about_rugged.187772.html + # - https://stackoverflow.com/a/27290470 + # + # In otherwords... not obvious how to do a `git merge --no-ff --no-edit` + # with rugged... le-sigh... + # + # TODO: Move into a helper + base = original_repo.branches["master"].target_id + topic = original_repo.branches["extract_my_extractions"].target_id + merge_index = original_repo.merge_commits(base, topic) + + Rugged::Commit.create( + original_repo, + :message => "Merged branch 'extract_my_extractions' into master", + :parents => [base, topic], + :tree => merge_index.write_tree(original_repo), + :update_ref => "HEAD" + ) + + original_repo.remotes['origin'].push [original_repo.head.name] + end end From cb508a887d15566c71858f8b5bb84d8b7258ab5f Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 14 Nov 2019 17:37:05 -0600 Subject: [PATCH 14/32] [code_extractor_test] Add .apply_new_commits_on_extracted_repo Another helper method that includes boilerplate for updating the newly extracted repo with some new commits for use when "unextracting" Once again, instance variables were used here in a less than graceful fashion, but it allowed for moving a bit more code out of the main testing method that was mostly trivial. --- test/code_extractor_test.rb | 41 ++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/test/code_extractor_test.rb b/test/code_extractor_test.rb index ba184f4..c0e6444 100644 --- a/test/code_extractor_test.rb +++ b/test/code_extractor_test.rb @@ -104,22 +104,7 @@ def test_unextract_an_extraction set_extractions ["foo"] run_extraction perform_merges_of_extracted_code - - # Run new extraction, with some extra commits added to the new repo that - # has been extracted previously - - new_upstream_dir = File.join @sandbox_dir, "new_upstream.git" - cloned_extractions_dir = File.join @sandbox_dir, "cloned_extractions.git" - - CodeExtractor::TestRepo.clone_at extracted_dir, cloned_extractions_dir do - checkout_b 'master', 'origin/master' - - update_file "foo/bar", "Updated Bar Content" - commit "update bar content" - - update_file "foo/baz", "Baz Content" - commit "add new baz" - end + apply_new_commits_on_extracted_repo # Update the configuration for the (second) extraction @@ -130,7 +115,7 @@ def test_unextract_an_extraction extractions_hash[:target_name] = "MyOrg/extracted_repo" extractions_hash[:target_remote] = @bare_repo_dir extractions_hash[:target_base_branch] = "master" - extractions_hash[:upstream] = cloned_extractions_dir + extractions_hash[:upstream] = @cloned_extractions_dir extractions_hash[:upstream_name] = "MyOrg/repo" extractions_hash[:extra_cmds] = [ "mkdir lib", @@ -139,9 +124,12 @@ def test_unextract_an_extraction "git commit -m 'Move foo/ into lib/'" ] - set_destination_dir new_upstream_dir + set_destination_dir @new_upstream_dir - # The run the actual extraction we are testing + # Run new extraction, with some extra commits added to the new repo that + # has been extracted previously + # + # This next line will run the actual extraction we are testing # # aka: updated commits and puts 'lib/foo' back into the original repo run_extraction @@ -225,4 +213,19 @@ def perform_merges_of_extracted_code original_repo.remotes['origin'].push [original_repo.head.name] end + + def apply_new_commits_on_extracted_repo + @new_upstream_dir = File.join @sandbox_dir, "new_upstream.git" + @cloned_extractions_dir = File.join @sandbox_dir, "cloned_extractions.git" + + CodeExtractor::TestRepo.clone_at extracted_dir, @cloned_extractions_dir do + checkout_b 'master', 'origin/master' + + update_file "foo/bar", "Updated Bar Content" + commit "update bar content" + + update_file "foo/baz", "Baz Content" + commit "add new baz" + end + end end From c50bfd064000b31eafa70d81aa26858516683322 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 14 Nov 2019 17:52:13 -0600 Subject: [PATCH 15/32] [code_extractor_test.rb] Add .update_extraction_hash Another helper methods for updating common changes to the extraction_hash when doing a re-insert. This, and the previous commits, will make more sense in a follow up commit handling a different edge case with `re-insert`. --- test/code_extractor_test.rb | 66 ++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/test/code_extractor_test.rb b/test/code_extractor_test.rb index c0e6444..a299cda 100644 --- a/test/code_extractor_test.rb +++ b/test/code_extractor_test.rb @@ -103,28 +103,11 @@ def test_unextract_an_extraction create_base_repo set_extractions ["foo"] run_extraction + + # Perform updates to extracted repo to simulate changes since extraction perform_merges_of_extracted_code apply_new_commits_on_extracted_repo - - # Update the configuration for the (second) extraction - - set_extractions ["foo"] - - extractions_hash[:name] = "the_extracted" - extractions_hash[:reinsert] = true - extractions_hash[:target_name] = "MyOrg/extracted_repo" - extractions_hash[:target_remote] = @bare_repo_dir - extractions_hash[:target_base_branch] = "master" - extractions_hash[:upstream] = @cloned_extractions_dir - extractions_hash[:upstream_name] = "MyOrg/repo" - extractions_hash[:extra_cmds] = [ - "mkdir lib", - "mv foo lib", - "git add -A", - "git commit -m 'Move foo/ into lib/'" - ] - - set_destination_dir @new_upstream_dir + update_extraction_hash # Run new extraction, with some extra commits added to the new repo that # has been extracted previously @@ -228,4 +211,47 @@ def apply_new_commits_on_extracted_repo commit "add new baz" end end + + # Update the configuration for the (second) extraction + # + # Boiler plate will do the following: + # + # - Assumes "foo/" is still the only extracted directory + # - A "reinsert" is the new action that will be performed + # - The following methods have been executed prior + # * `.create_base_repo` + # * `.perform_merges_of_extracted_code` + # * `.apply_new_commits_on_extracted_repo` + # - As a result, assumes the following instance variables are set: + # * `@new_upstream_dir` + # * `@bare_repo_dir` + # * `@cloned_extractions_dir` + # - The desired behavior of `.extra_cmds` is to move `foo` into `lib/` + # + # There is a `custom_updates` hash available in the args for adding + # additional changes, so if any of the above are not the case, changes can be + # made at the end. + # + def update_extraction_hash custom_updates = {} + set_extractions ["foo"] + set_destination_dir @new_upstream_dir + + extractions_hash[:name] = "the_extracted" + extractions_hash[:reinsert] = true + extractions_hash[:target_name] = "MyOrg/extracted_repo" + extractions_hash[:target_remote] = @bare_repo_dir + extractions_hash[:target_base_branch] = "master" + extractions_hash[:upstream] = @cloned_extractions_dir + extractions_hash[:upstream_name] = "MyOrg/repo" + extractions_hash[:extra_cmds] = [ + "mkdir lib", + "mv foo lib", + "git add -A", + "git commit -m 'Move foo/ into lib/'" + ] + + custom_updates.each do |hash_key, value| + extractions_hash[hash_key] = value + end + end end From bdad7949e86fdae891fa9f59228247ca8fe5f6a7 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 14 Nov 2019 18:15:27 -0600 Subject: [PATCH 16/32] Add code_extractor_reinsert_test.rb This extracts the "unextract" (aka: "reinsert") test into it's own test, with the intent to later fill out other test cases in future commits. Note: The `create_base_repo` needed to be added to the `test_helper.rb` since it was needed by both test files. Not a huge change, but worth nothing. --- test/code_extractor_reinsert_test.rb | 122 +++++++++++++++++++++ test/code_extractor_test.rb | 157 --------------------------- test/test_helper.rb | 17 +++ 3 files changed, 139 insertions(+), 157 deletions(-) create mode 100644 test/code_extractor_reinsert_test.rb diff --git a/test/code_extractor_reinsert_test.rb b/test/code_extractor_reinsert_test.rb new file mode 100644 index 0000000..a73a7ba --- /dev/null +++ b/test/code_extractor_reinsert_test.rb @@ -0,0 +1,122 @@ +require 'test_helper' + +class CodeExtractorReinsertTest < CodeExtractor::TestCase + def test_unextract_an_extraction + # original extraction to work off of, in which we "un-extract" this later + create_base_repo + set_extractions ["foo"] + run_extraction + + # Perform updates to extracted repo to simulate changes since extraction + perform_merges_of_extracted_code + apply_new_commits_on_extracted_repo + update_extraction_hash + + # Run new extraction, with some extra commits added to the new repo that + # has been extracted previously + # + # This next line will run the actual extraction we are testing + # + # aka: updated commits and puts 'lib/foo' back into the original repo + run_extraction + + in_git_dir do + assert_commits [ + "Move foo/ into lib/", + "add new baz", + "update bar content", + "Re-insert extractions from MyOrg/extracted_repo", + "Merged branch 'extract_my_extractions' into master", + "Extract my_extractions", + "Commit #3", + "add Bar content", + "Initial Commit" + ] + + refute Dir.exist? "foo" + assert File.exist? "qux" + assert File.exist? "lib/foo/bar" + assert File.exist? "lib/foo/baz" + end + end + + def perform_merges_of_extracted_code + # Merge our extracted branch (removed code) into the master branch of the + # original repository + # + is_bare = true + @bare_repo_dir = File.join @sandbox_dir, "bare_original.git" + Rugged::Repository.init_at @bare_repo_dir, is_bare + + # Can't push to a local non-bare repo via Rugged currently... hence this + # extra weirdness being done... + destination_repo.remotes.create("original", @bare_repo_dir) + destination_repo.remotes["original"].push [destination_repo.head.name] + + original_repo.remotes.create("origin", @bare_repo_dir) + original_repo.fetch("origin") + original_repo.remotes["origin"].push [original_repo.head.name] + original_repo.create_branch "extract_my_extractions", "origin/extract_my_extractions" + + CodeExtractor::TestRepo.merge original_repo, "extract_my_extractions" + original_repo.remotes['origin'].push [original_repo.head.name] + end + + def apply_new_commits_on_extracted_repo + @new_upstream_dir = File.join @sandbox_dir, "new_upstream.git" + @cloned_extractions_dir = File.join @sandbox_dir, "cloned_extractions.git" + + CodeExtractor::TestRepo.clone_at extracted_dir, @cloned_extractions_dir do + checkout_b 'master', 'origin/master' + + update_file "foo/bar", "Updated Bar Content" + commit "update bar content" + + update_file "foo/baz", "Baz Content" + commit "add new baz" + end + end + + # Update the configuration for the (second) extraction + # + # Boiler plate will do the following: + # + # - Assumes "foo/" is still the only extracted directory + # - A "reinsert" is the new action that will be performed + # - The following methods have been executed prior + # * `.create_base_repo` + # * `.perform_merges_of_extracted_code` + # * `.apply_new_commits_on_extracted_repo` + # - As a result, assumes the following instance variables are set: + # * `@new_upstream_dir` + # * `@bare_repo_dir` + # * `@cloned_extractions_dir` + # - The desired behavior of `.extra_cmds` is to move `foo` into `lib/` + # + # There is a `custom_updates` hash available in the args for adding + # additional changes, so if any of the above are not the case, changes can be + # made at the end. + # + def update_extraction_hash custom_updates = {} + set_extractions ["foo"] + set_destination_dir @new_upstream_dir + + extractions_hash[:name] = "the_extracted" + extractions_hash[:reinsert] = true + extractions_hash[:target_name] = "MyOrg/extracted_repo" + extractions_hash[:target_remote] = @bare_repo_dir + extractions_hash[:target_base_branch] = "master" + extractions_hash[:upstream] = @cloned_extractions_dir + extractions_hash[:upstream_name] = "MyOrg/repo" + extractions_hash[:extra_cmds] = [ + "mkdir lib", + "mv foo lib", + "git add -A", + "git commit -m 'Move foo/ into lib/'" + ] + + custom_updates.each do |hash_key, value| + extractions_hash[hash_key] = value + end + end +end diff --git a/test/code_extractor_test.rb b/test/code_extractor_test.rb index a299cda..ee417b8 100644 --- a/test/code_extractor_test.rb +++ b/test/code_extractor_test.rb @@ -97,161 +97,4 @@ def test_code_extractor_skips_cloning_if_directory_exists refute File.exist? "baz" end end - - def test_unextract_an_extraction - # original extraction to work off of, in which we "un-extract" this later - create_base_repo - set_extractions ["foo"] - run_extraction - - # Perform updates to extracted repo to simulate changes since extraction - perform_merges_of_extracted_code - apply_new_commits_on_extracted_repo - update_extraction_hash - - # Run new extraction, with some extra commits added to the new repo that - # has been extracted previously - # - # This next line will run the actual extraction we are testing - # - # aka: updated commits and puts 'lib/foo' back into the original repo - run_extraction - - in_git_dir do - assert_commits [ - "Move foo/ into lib/", - "add new baz", - "update bar content", - "Re-insert extractions from MyOrg/extracted_repo", - "Merged branch 'extract_my_extractions' into master", - "Extract my_extractions", - "Commit #3", - "add Bar content", - "Initial Commit" - ] - - refute Dir.exist? "foo" - assert File.exist? "qux" - assert File.exist? "lib/foo/bar" - assert File.exist? "lib/foo/baz" - end - end - - def create_base_repo - repo_structure = %w[ - foo/bar - baz - ] - - create_repo repo_structure do - update_file "foo/bar", "Bar Content" - commit "add Bar content" - tag "v1.0" - - add_file "qux", "QUX!!!" - commit - tag "v2.0" - end - end - - def perform_merges_of_extracted_code - # Merge our extracted branch (removed code) into the master branch of the - # original repository - # - is_bare = true - @bare_repo_dir = File.join @sandbox_dir, "bare_original.git" - Rugged::Repository.init_at @bare_repo_dir, is_bare - - # Can't push to a local non-bare repo via Rugged currently... hence this - # extra weirdness being done... - destination_repo.remotes.create("original", @bare_repo_dir) - destination_repo.remotes["original"].push [destination_repo.head.name] - - original_repo.remotes.create("origin", @bare_repo_dir) - original_repo.fetch("origin") - original_repo.remotes["origin"].push [original_repo.head.name] - original_repo.create_branch "extract_my_extractions", "origin/extract_my_extractions" - - # Code is a combination of the examples found here: - # - # - https://github.com/libgit2/rugged/blob/3de6a0a7/test/merge_test.rb#L4-L18 - # - http://violetzijing.is-programmer.com/2015/11/6/some_notes_about_rugged.187772.html - # - https://stackoverflow.com/a/27290470 - # - # In otherwords... not obvious how to do a `git merge --no-ff --no-edit` - # with rugged... le-sigh... - # - # TODO: Move into a helper - base = original_repo.branches["master"].target_id - topic = original_repo.branches["extract_my_extractions"].target_id - merge_index = original_repo.merge_commits(base, topic) - - Rugged::Commit.create( - original_repo, - :message => "Merged branch 'extract_my_extractions' into master", - :parents => [base, topic], - :tree => merge_index.write_tree(original_repo), - :update_ref => "HEAD" - ) - - original_repo.remotes['origin'].push [original_repo.head.name] - end - - def apply_new_commits_on_extracted_repo - @new_upstream_dir = File.join @sandbox_dir, "new_upstream.git" - @cloned_extractions_dir = File.join @sandbox_dir, "cloned_extractions.git" - - CodeExtractor::TestRepo.clone_at extracted_dir, @cloned_extractions_dir do - checkout_b 'master', 'origin/master' - - update_file "foo/bar", "Updated Bar Content" - commit "update bar content" - - update_file "foo/baz", "Baz Content" - commit "add new baz" - end - end - - # Update the configuration for the (second) extraction - # - # Boiler plate will do the following: - # - # - Assumes "foo/" is still the only extracted directory - # - A "reinsert" is the new action that will be performed - # - The following methods have been executed prior - # * `.create_base_repo` - # * `.perform_merges_of_extracted_code` - # * `.apply_new_commits_on_extracted_repo` - # - As a result, assumes the following instance variables are set: - # * `@new_upstream_dir` - # * `@bare_repo_dir` - # * `@cloned_extractions_dir` - # - The desired behavior of `.extra_cmds` is to move `foo` into `lib/` - # - # There is a `custom_updates` hash available in the args for adding - # additional changes, so if any of the above are not the case, changes can be - # made at the end. - # - def update_extraction_hash custom_updates = {} - set_extractions ["foo"] - set_destination_dir @new_upstream_dir - - extractions_hash[:name] = "the_extracted" - extractions_hash[:reinsert] = true - extractions_hash[:target_name] = "MyOrg/extracted_repo" - extractions_hash[:target_remote] = @bare_repo_dir - extractions_hash[:target_base_branch] = "master" - extractions_hash[:upstream] = @cloned_extractions_dir - extractions_hash[:upstream_name] = "MyOrg/repo" - extractions_hash[:extra_cmds] = [ - "mkdir lib", - "mv foo lib", - "git add -A", - "git commit -m 'Move foo/ into lib/'" - ] - - custom_updates.each do |hash_key, value| - extractions_hash[hash_key] = value - end - end end diff --git a/test/test_helper.rb b/test/test_helper.rb index 9dd2a3d..6d76895 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -37,6 +37,23 @@ def teardown FileUtils.remove_entry @sandbox_dir unless ENV["DEBUG"] end + def create_base_repo + repo_structure = %w[ + foo/bar + baz + ] + + create_repo repo_structure do + update_file "foo/bar", "Bar Content" + commit "add Bar content" + tag "v1.0" + + add_file "qux", "QUX!!!" + commit + tag "v2.0" + end + end + # Custom Assertions def assert_no_tags From 566519811c2bd6ad9c39181c38bd202ad131da39 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Mon, 18 Nov 2019 12:25:51 -0600 Subject: [PATCH 17/32] Add Fallback for when code_extractor wasn't used Allows reinsert to work when code_extractor wasn't used to originally extract the commits --- lib/code_extractor.rb | 53 ++++++++++++---- test/code_extractor_reinsert_test.rb | 95 ++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 11 deletions(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index c443500..01794ad 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -34,7 +34,7 @@ def validate! end class GitProject - attr_reader :name, :url, :git_dir, :new_branch, :source_branch, :target_name + attr_reader :name, :url, :git_dir, :new_branch, :source_branch, :target_name, :upstream_name def initialize name, url @name = name @@ -146,18 +146,17 @@ def add_target_remote target_name, target_remote def inject_commits target_base_branch, upstream_name puts "Injecting commits…" + @upstream_name ||= upstream_name target_base_branch ||= 'master' - commit_msg_filter = "(transferred from #{upstream_name}" - Dir.chdir git_dir do - reference_target_branch = "#{target_remote_name}/#{target_base_branch}" - previously_extracted_commits = `git log --pretty="%H" --grep="#{commit_msg_filter}"` + @reference_target_branch = "#{target_remote_name}/#{target_base_branch}" + Dir.chdir git_dir do # special commit that will get renamed re-worded to: # # Re-insert extractions from #{upstream_name} # - last_extracted_commit = previously_extracted_commits.lines[0].chomp! + last_extracted_commit = previously_extracted_commits.first first_injected_msg = `git show -s --format="%s%n%n%b" #{last_extracted_commit}` first_injected_msg = first_injected_msg.lines.reject { |line| line.include? commit_msg_filter @@ -167,15 +166,16 @@ def inject_commits target_base_branch, upstream_name File.write File.expand_path("../LAST_EXTRACTED_COMMIT_MSG", git_dir), first_injected_msg `time git filter-branch -f --commit-filter ' - export was_extracted=$(git show -s --format="%s%n%n%b" $GIT_COMMIT | grep -s "#{commit_msg_filter}") - if [ "$GIT_COMMIT" = "#{last_extracted_commit}" ] || [ "$was_extracted" == "" ]; then + export was_extracted="#{previously_extracted_commits.map {|c| "#{c}|" }.join}" + echo "#{last_extracted_commit}" > #{File.expand_path("../LAST_EXTRACTED_COMMIT", git_dir)} + if [ "$GIT_COMMIT" = "#{last_extracted_commit}" ] || [ -n "${was_extracted##*$GIT_COMMIT|*}" ]; then git commit-tree "$@"; else skip_commit "$@"; fi ' --index-filter ' git read-tree --empty - git reset #{reference_target_branch} -- . + git reset #{@reference_target_branch} -- . git checkout $GIT_COMMIT -- . ' --msg-filter ' if [ "$GIT_COMMIT" = "#{last_extracted_commit}" ]; then @@ -188,7 +188,7 @@ def inject_commits target_base_branch, upstream_name echo "(transferred from #{upstream_name}@$GIT_COMMIT)" ' -- #{prune_branch}` - `git checkout --no-track -b #{inject_branch} #{reference_target_branch}` + `git checkout --no-track -b #{inject_branch} #{@reference_target_branch}` `git cherry-pick ..#{prune_branch}` end end @@ -245,7 +245,7 @@ def build_prune_script extractions Dir.chdir git_dir do extractions.each do |file_or_dir| if Dir.exist? file_or_dir - files = Dir.glob["#{file_or_dir}/**/*"] + files = Dir["#{file_or_dir}/**/*"] else files = [file_or_dir] end @@ -282,6 +282,37 @@ def build_prune_script extractions FileUtils.chmod "+x", @prune_script end + def commit_msg_filter + @commit_msg_filter ||= "(transferred from #{upstream_name}" + end + + def previously_extracted_commits + return @previously_extracted_commits if defined? @previously_extracted_commits + + @previously_extracted_commits = `git log --pretty="%H" --grep="#{commit_msg_filter}"`.lines + + # Fallback if the extracted code didn't use `code-extractor` + # + # Effectivelly, we just look for commits that exist in the upstream + # branch with the same first line of the commit message, and if that is + # found, double check the file changes are the same. + # + # Not fool proof, but the SHAs will be different unfortunately... + # + first_commit = @previously_extracted_commits.first + if !first_commit or !first_commit.chomp.empty? + @previously_extracted_commits = `git log --pretty="%H"`.lines.each(&:chomp!) + @previously_extracted_commits.tap do |commits| + commits.reject! do |commit| + has = `git log --pretty="%H" --grep="$(git show -s --format="%s" #{commit})" #{@reference_target_branch}` + has.chomp.empty? + end + end + else + @previously_extracted_commits.each(&:chomp!) + end + end + def msg_filter upstream_name <<-MSG_FILTER.gsub(/^ {8}/, '').chomp --msg-filter ' diff --git a/test/code_extractor_reinsert_test.rb b/test/code_extractor_reinsert_test.rb index a73a7ba..735fea9 100644 --- a/test/code_extractor_reinsert_test.rb +++ b/test/code_extractor_reinsert_test.rb @@ -40,6 +40,101 @@ def test_unextract_an_extraction end end + def test_reinsert_when_code_extractor_was_not_used + # original extraction to work off of, in which we "un-extract" this later + create_base_repo + set_extractions ["foo"] + run_extraction + + # Perform updates to extracted repo to simulate changes since extraction + perform_merges_of_extracted_code + + # Remove `(transferred from Org/repo@-----)` lines from extracted commits + # + # This is to simulate a case where code_extractor wasn't used to handle the + # extraction + + ################################################## + + # TOO MANY HOURS WASTED trying to get Rugged to do a rebase... + # + # Gave up and did a `git filter-branch --msg-filter`... + # + + # puts + # current_commit = destination_repo.head.target + # puts current_commit.message.lines.reject {|l| l.include? "transferred" }.join + # puts "rebasing" + # rebase = Rugged::Rebase.new @destination_repo, "master", "HEAD^"#, :inmemory => true + # while rebase_commit = rebase.next do + # puts rebase_commit[:id] + # # destination_repo.index.write_tree + # # commit = destination_repo.head.target + # commit = Rugged::Commit.lookup @destination_repo, rebase_commit[:id] + # # puts commit.message + # # puts commit.committer.inspect + # # puts + # # # puts "inmemory_index:" + # # # puts rebase.inmemory_index + # puts "new message" + # puts commit.message.lines.reject {|l| l.include? "transferred" }.join + # puts + # commit_hash = { + # # :committer => commit.committer, + # :message => commit.message.lines.reject {|l| l.include? "transferred" }.join + # } + # # rebase.commit(commit_hash) + # # commit.amend commit_hash + # rebase.commit commit.to_hash + # end + # puts + # puts "rebase_commit:" + # puts rebase_commit + # puts + + # rebase.finish({:name => "system", :email => "system"}) + + + # start_commit = destination_repo.last_commit + # sorting = Rugged::SORT_TOPO # aka: sort like git-log + # actual_commits = destination_repo.walk(start_commit, sorting).each do |c| + # puts "#{c.oid} #{c.message.lines.first.chomp}" + # end + + ################################################## + + Dir.chdir extracted_dir do + `git filter-branch -f --msg-filter ' + cat - | grep -v "(transferred from " + ' -- master` + end + + destination_repo.checkout "extract_my_extractions" + + apply_new_commits_on_extracted_repo + update_extraction_hash + run_extraction + + in_git_dir do + assert_commits [ + "Move foo/ into lib/", + "add new baz", + "update bar content", + "Re-insert extractions from MyOrg/extracted_repo", + "Merged branch 'extract_my_extractions' into master", + "Extract my_extractions", + "Commit #3", + "add Bar content", + "Initial Commit" + ] + + refute Dir.exist? "foo" + assert File.exist? "qux" + assert File.exist? "lib/foo/bar" + assert File.exist? "lib/foo/baz" + end + end + def perform_merges_of_extracted_code # Merge our extracted branch (removed code) into the master branch of the # original repository From 235f616ee1f76263c5ab7592803c25cb0004b663 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Mon, 18 Nov 2019 19:04:49 -0600 Subject: [PATCH 18/32] [CodeExtractorReinsertTest] Support custom commits In .apply_new_commits_on_extracted_repo, add the ability to pass a block, which will then instead be used instead of what was used by default in the method. This allows for taking advantage of the new `.merge` DSL for representing so more real-world extracted repos --- test/code_extractor_reinsert_test.rb | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/test/code_extractor_reinsert_test.rb b/test/code_extractor_reinsert_test.rb index 735fea9..69fcd48 100644 --- a/test/code_extractor_reinsert_test.rb +++ b/test/code_extractor_reinsert_test.rb @@ -157,19 +157,23 @@ def perform_merges_of_extracted_code original_repo.remotes['origin'].push [original_repo.head.name] end - def apply_new_commits_on_extracted_repo + def apply_new_commits_on_extracted_repo &block @new_upstream_dir = File.join @sandbox_dir, "new_upstream.git" @cloned_extractions_dir = File.join @sandbox_dir, "cloned_extractions.git" - CodeExtractor::TestRepo.clone_at extracted_dir, @cloned_extractions_dir do - checkout_b 'master', 'origin/master' + unless block_given? + block = proc do + checkout_b 'master', 'origin/master' - update_file "foo/bar", "Updated Bar Content" - commit "update bar content" + update_file "foo/bar", "Updated Bar Content" + commit "update bar content" - update_file "foo/baz", "Baz Content" - commit "add new baz" + update_file "foo/baz", "Baz Content" + commit "add new baz" + end end + + CodeExtractor::TestRepo.clone_at(extracted_dir, @cloned_extractions_dir, &block) end # Update the configuration for the (second) extraction From 96d270230c834b4145de6acf4e57209d6385797f Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Mon, 18 Nov 2019 19:07:04 -0600 Subject: [PATCH 19/32] [CodeExtractor] Support merge commits When using cherry-pick, it wouldn't handle merges properly, since it requires a `-m 1` argument for it to work properly. By switching to using `git rebase --root --onto`, we can take the branch that now is a "orphan" branch, and apply the commits onto the target remote's target branch. Left extra code in there to show other possibilities (since this isn't getting merged in away) --- lib/code_extractor.rb | 40 +++++++++++++++-- test/code_extractor_reinsert_test.rb | 67 ++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 3 deletions(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index 01794ad..ce10ed3 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -94,6 +94,16 @@ def extract_commits extractions, upstream_name end end + # Three step process to filter out the commits we want in three passes: + # + # - Move code we want to keep into a separate tmp dir (using @prune_script) + # - Prune anything that isn't in that subdirectory + # - Move the tmp directory back into the root of the directory + # + # + # Note: We don't use `--subdirectory-filter` here as it will remove merge + # commits, which we don't want. + # def prune_commits extractions puts "Pruning commits…" @@ -102,7 +112,11 @@ def prune_commits extractions Dir.chdir git_dir do `git checkout -b #{prune_branch} #{@source_branch}` `git filter-branch -f --prune-empty --tree-filter #{@prune_script} HEAD` - `git filter-branch -f --prune-empty --subdirectory-filter #{@keep_directory}` + `git filter-branch -f --prune-empty --index-filter ' + git read-tree --empty + git reset $GIT_COMMIT -- #{@keep_directory} + 'HEAD` + `git filter-branch -f --prune-empty --tree-filter 'mv #{@keep_directory}/* .' HEAD` end end @@ -188,8 +202,28 @@ def inject_commits target_base_branch, upstream_name echo "(transferred from #{upstream_name}@$GIT_COMMIT)" ' -- #{prune_branch}` - `git checkout --no-track -b #{inject_branch} #{@reference_target_branch}` - `git cherry-pick ..#{prune_branch}` + # Old (bad: doesn't handle merges) + # + # `git checkout --no-track -b #{inject_branch} #{@reference_target_branch}` + # `git cherry-pick ..#{prune_branch}` + # + # + # Attempted #1 (not working, but uses older `git` methods) + # + # `git checkout --orphan #{inject_branch}` + # `git commit -m "Dummy Init commit"` + # orphan_commit = `git log --pretty="%H" -n1`.chomp + # prune_first = `git log --pretty="%H" --reverse -n1 #{prune_branch}`.chomp + # `git rebase --onto #{orphan_commit} #{prune_first} #{inject_branch}` + # `git replace #{orphan_commit} #{@reference_target_branch}` + # + # + # Better + # + # Ref: git rebase --onto code_extractor_inject_the_extracted --root + # + `git checkout --no-track -b #{inject_branch} #{prune_branch}` + `git rebase --preserve-merges --root --onto #{@reference_target_branch} #{inject_branch}` end end diff --git a/test/code_extractor_reinsert_test.rb b/test/code_extractor_reinsert_test.rb index 69fcd48..67c786b 100644 --- a/test/code_extractor_reinsert_test.rb +++ b/test/code_extractor_reinsert_test.rb @@ -34,6 +34,8 @@ def test_unextract_an_extraction ] refute Dir.exist? "foo" + refute File.exist? "README.md" + assert File.exist? "qux" assert File.exist? "lib/foo/bar" assert File.exist? "lib/foo/baz" @@ -129,6 +131,68 @@ def test_reinsert_when_code_extractor_was_not_used ] refute Dir.exist? "foo" + refute File.exist? "README.md" + + assert File.exist? "qux" + assert File.exist? "lib/foo/bar" + assert File.exist? "lib/foo/baz" + end + end + + # Basically, a test to ensure we are using a rebase and not a cherry-pick + # method, since `git cherry-pick` doesn't handle merge commits well. + def test_reinsert_when_there_is_a_merge_commit + # original extraction to work off of, in which we "un-extract" this later + create_base_repo + set_extractions ["foo"] + run_extraction + + # Perform updates to extracted repo to simulate changes since extraction + perform_merges_of_extracted_code + apply_new_commits_on_extracted_repo do + checkout_b 'master', 'origin/master' + + update_file "foo/bar", "Updated Bar Content" + commit "update bar content" + + checkout_b 'add_baz', 'master' + + update_file "foo/baz", "Baz Content" + commit "add new baz" + + checkout 'master' + merge 'add_baz' + + add_file "README.md", "READ ME!" + commit "add README" + end + update_extraction_hash + + # Run new extraction, with some extra commits added to the new repo that + # has been extracted previously + # + # This next line will run the actual extraction we are testing + # + # aka: updated commits and puts 'lib/foo' back into the original repo + run_extraction + + in_git_dir do + assert_commits [ + "Move foo/ into lib/", + "Merged branch 'add_baz' into master", + "add new baz", + "update bar content", + "Re-insert extractions from MyOrg/extracted_repo", + "Merged branch 'extract_my_extractions' into master", + "Extract my_extractions", + "Commit #3", + "add Bar content", + "Initial Commit" + ] + + refute Dir.exist? "foo" + refute File.exist? "README.md" + assert File.exist? "qux" assert File.exist? "lib/foo/bar" assert File.exist? "lib/foo/baz" @@ -170,6 +234,9 @@ def apply_new_commits_on_extracted_repo &block update_file "foo/baz", "Baz Content" commit "add new baz" + + add_file "README.md", "READ ME!" + commit "add README" end end From 361b72c7096960152356aeb5e080208b29407860 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 21 Nov 2019 16:39:44 -0600 Subject: [PATCH 20/32] [CodeExtractor] Better .previously_extracted_commits Improves the fallback approach for `.previously_extracted_commits` to check against the entirety of the commit message. For those cases where it is "fixes tests" or "Review Comments". --- lib/code_extractor.rb | 54 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index ce10ed3..8a01b5c 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -331,19 +331,57 @@ def previously_extracted_commits # branch with the same first line of the commit message, and if that is # found, double check the file changes are the same. # - # Not fool proof, but the SHAs will be different unfortunately... + # Not fool proof, but the SHAs will be different unfortunately, so can't + # match on that... # first_commit = @previously_extracted_commits.first - if !first_commit or !first_commit.chomp.empty? - @previously_extracted_commits = `git log --pretty="%H"`.lines.each(&:chomp!) + if first_commit + @previously_extracted_commits.each(&:chomp!) + else + # Long method... + # + # Test to see if any commit on the target remote include the same full + # message as each of the commits. + # + # Check first by first line of commit for speed, and return just the + # matching git SHAs and Author name. If some exist, then match by full + # commit msg. + # + # fetch author name (%an) and commit sha (%H), using a '|' delimeter + # + @previously_extracted_commits = `git log --pretty="%an|%H"`.lines.each(&:chomp!) + @previously_extracted_commits.map! {|line| line.split "|" } @previously_extracted_commits.tap do |commits| - commits.reject! do |commit| - has = `git log --pretty="%H" --grep="$(git show -s --format="%s" #{commit})" #{@reference_target_branch}` - has.chomp.empty? + commits.select! do |(author, commit)| + # Make sure to escape quotes in commit messages + #upstream_msg = `git show -s --format="%s" #{commit}`.gsub /"/, '\"' + #target_commits = `git log --pretty="%H" \ + # --author="#{author}" \ + # --grep="#{upstream_msg.chomp}" \ + # #{@reference_target_branch}`.lines.each(&:chomp!) + # + + target_commits = `git log --pretty="%H" --fixed-strings \ + --author="#{author}" \ + --grep="$(git show -s --format="%s" #{commit} | sed 's/"/\\\\"/g')" \ + #{@reference_target_branch}`.lines.each(&:chomp!) + + if target_commits.empty? + false + else + upstream_full_msg = `git show -s --format="%s%n%n%b" #{commit}` + + # TODO: Change this to a `.one?` check, and if this fails, and + # there is more than one, then compare changed file base names... + # which is harder still... + target_commits.any? do |target_commit| + target_full_msg = `git show -s --format="%s%n%n%b" #{target_commit}` + upstream_full_msg == target_full_msg + end + end end + commits.map!(&:last) end - else - @previously_extracted_commits.each(&:chomp!) end end From 7de0f9d4866c9d92b8ee9b685ec170b660c1ae14 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 21 Nov 2019 16:41:52 -0600 Subject: [PATCH 21/32] [FIXUP][CodeExtractor] spacing for git-filter-branch cmd --- lib/code_extractor.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index 8a01b5c..1a2d852 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -115,7 +115,7 @@ def prune_commits extractions `git filter-branch -f --prune-empty --index-filter ' git read-tree --empty git reset $GIT_COMMIT -- #{@keep_directory} - 'HEAD` + ' HEAD` `git filter-branch -f --prune-empty --tree-filter 'mv #{@keep_directory}/* .' HEAD` end end From af74685390275641c96b7788b8a5bed7d3c323e2 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 21 Nov 2019 17:00:09 -0600 Subject: [PATCH 22/32] [CodeExtractor] Use git user for base commit When updating the commit that will be the root commit for the injected commits, update the committer and author values so that it is more obvious who re-added the commit. As a result, also include the original committer/author info in the updated commit message. Formatting has been tweaked in this commit as well to make it a little easier to see what is being done. --- lib/code_extractor.rb | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index 1a2d852..eb80824 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -175,8 +175,18 @@ def inject_commits target_base_branch, upstream_name first_injected_msg = first_injected_msg.lines.reject { |line| line.include? commit_msg_filter }.join - first_injected_msg.prepend "*** Original Commit message shown below ***\n\n" - first_injected_msg.prepend "Re-insert extractions from #{target_name}\n\n" + + committer_data = `git show -s --format="%an|%ae|%ad|%cn|%ce|%cd" #{last_extracted_commit}`.split("|") + + first_injected_msg.prepend <<-COMMIT_MSG.gsub(/^ {10}/, '') + Re-insert extractions from #{target_name} + + *** Original Commit message shown below *** + + Author: #{committer_data[0]} #{committer_data[1]} #{committer_data[2]} + Committer: #{committer_data[3]} #{committer_data[4]} #{committer_data[5]} + COMMIT_MSG + File.write File.expand_path("../LAST_EXTRACTED_COMMIT_MSG", git_dir), first_injected_msg `time git filter-branch -f --commit-filter ' @@ -187,6 +197,13 @@ def inject_commits target_base_branch, upstream_name else skip_commit "$@"; fi + ' --env-filter ' + if [ "$GIT_COMMIT" = "#{last_extracted_commit}" ]; then + GIT_AUTHOR_NAME=$(git config user.name) + GIT_AUTHOR_EMAIL=$(git config user.email) + GIT_COMMITTER_NAME=$(git config user.name) + GIT_COMMITTER_EMAIL=$(git config user.email) + fi ' --index-filter ' git read-tree --empty git reset #{@reference_target_branch} -- . From f0eed265a444ba1077fdb7e82243e5b6602f003e Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 21 Nov 2019 17:06:09 -0600 Subject: [PATCH 23/32] [CodeExtractor] Use inject_branch for injecting commits Makes it easier to try test things out from post "pruning" step. --- lib/code_extractor.rb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index eb80824..8dcbfe4 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -166,6 +166,7 @@ def inject_commits target_base_branch, upstream_name @reference_target_branch = "#{target_remote_name}/#{target_base_branch}" Dir.chdir git_dir do + `git checkout #{prune_branch}` # special commit that will get renamed re-worded to: # # Re-insert extractions from #{upstream_name} @@ -189,6 +190,7 @@ def inject_commits target_base_branch, upstream_name File.write File.expand_path("../LAST_EXTRACTED_COMMIT_MSG", git_dir), first_injected_msg + `git checkout --no-track -b #{inject_branch} #{prune_branch}` `time git filter-branch -f --commit-filter ' export was_extracted="#{previously_extracted_commits.map {|c| "#{c}|" }.join}" echo "#{last_extracted_commit}" > #{File.expand_path("../LAST_EXTRACTED_COMMIT", git_dir)} @@ -217,7 +219,7 @@ def inject_commits target_base_branch, upstream_name echo echo echo "(transferred from #{upstream_name}@$GIT_COMMIT)" - ' -- #{prune_branch}` + ' -- #{inject_branch}` # Old (bad: doesn't handle merges) # @@ -239,8 +241,7 @@ def inject_commits target_base_branch, upstream_name # # Ref: git rebase --onto code_extractor_inject_the_extracted --root # - `git checkout --no-track -b #{inject_branch} #{prune_branch}` - `git rebase --preserve-merges --root --onto #{@reference_target_branch} #{inject_branch}` + `git rebase --rebase-merges=rebase-cousins --root --onto #{@reference_target_branch} #{inject_branch}` end end From 6191b9c4888b3aa0317742c5a2d00e014401d6fe Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 21 Nov 2019 17:30:00 -0600 Subject: [PATCH 24/32] [FIXUP] Remove inject's --index-filter This was a mistake to have this. Since the commits will work just fine as they are, and don't need to have the content of master to do so, this can be removed entirely. This step is much simpler now, as it is just updating the commits we are bringing over and then applying them with a `git rebase --onto ...`. --- lib/code_extractor.rb | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index 8dcbfe4..8960bcd 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -206,10 +206,6 @@ def inject_commits target_base_branch, upstream_name GIT_COMMITTER_NAME=$(git config user.name) GIT_COMMITTER_EMAIL=$(git config user.email) fi - ' --index-filter ' - git read-tree --empty - git reset #{@reference_target_branch} -- . - git checkout $GIT_COMMIT -- . ' --msg-filter ' if [ "$GIT_COMMIT" = "#{last_extracted_commit}" ]; then cat #{File.expand_path File.join("..", "LAST_EXTRACTED_COMMIT_MSG"), git_dir} From f7b6e3fbc26bd6b01511aec15d546fea4d1ea465 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 21 Nov 2019 19:41:49 -0600 Subject: [PATCH 25/32] [FIXUP] Fix inject "initial commit" The upstream_name should be used here, and the target name should be used for the filter. --- lib/code_extractor.rb | 4 ++-- test/code_extractor_reinsert_test.rb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index 8960bcd..f95bc8c 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -180,7 +180,7 @@ def inject_commits target_base_branch, upstream_name committer_data = `git show -s --format="%an|%ae|%ad|%cn|%ce|%cd" #{last_extracted_commit}`.split("|") first_injected_msg.prepend <<-COMMIT_MSG.gsub(/^ {10}/, '') - Re-insert extractions from #{target_name} + Re-insert extractions from #{upstream_name} *** Original Commit message shown below *** @@ -331,7 +331,7 @@ def build_prune_script extractions end def commit_msg_filter - @commit_msg_filter ||= "(transferred from #{upstream_name}" + @commit_msg_filter ||= "(transferred from #{target_name}" end def previously_extracted_commits diff --git a/test/code_extractor_reinsert_test.rb b/test/code_extractor_reinsert_test.rb index 67c786b..43d4a76 100644 --- a/test/code_extractor_reinsert_test.rb +++ b/test/code_extractor_reinsert_test.rb @@ -269,11 +269,11 @@ def update_extraction_hash custom_updates = {} extractions_hash[:name] = "the_extracted" extractions_hash[:reinsert] = true - extractions_hash[:target_name] = "MyOrg/extracted_repo" + extractions_hash[:target_name] = "MyOrg/repo" extractions_hash[:target_remote] = @bare_repo_dir extractions_hash[:target_base_branch] = "master" extractions_hash[:upstream] = @cloned_extractions_dir - extractions_hash[:upstream_name] = "MyOrg/repo" + extractions_hash[:upstream_name] = "MyOrg/extracted_repo" extractions_hash[:extra_cmds] = [ "mkdir lib", "mv foo lib", From af8f2f5d228019105a8cbddf5fc517e7b0504d8f Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 21 Nov 2019 20:17:45 -0600 Subject: [PATCH 26/32] [FIXUP] Clean up debugging puts --- lib/code_extractor.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index f95bc8c..16bcbdc 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -125,7 +125,6 @@ def add_target_remote target_name, target_remote @target_name = target_name Dir.chdir git_dir do - puts "git remote add #{target_remote_name} #{target_remote}" `git remote add #{target_remote_name} #{target_remote}` `git fetch #{target_remote_name}` end From 06719ea2466eb7f643226622be54d1b1fd1f0785 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 21 Nov 2019 20:18:44 -0600 Subject: [PATCH 27/32] [CodeExtractor] Fix "transferred from" in reinsert This could be a "[FIXUP]" commit, but I think it is worth pointing out what I did here. Since there is multiple `git-filter-branch` calls, the original commit SHAs that existed are re-written after each. So delaying writing the updated commit message will only produce references to commits that don't exist since they were temporary. So moving `--msg-filter` to the first `git-filter-branch` was the first logical step. However, this caused another issue. Since we allow a fallback for finding the commit messages that existed in the previous repository, that would now be incorrect (in it's previous form) since it relied on matching the exact messages created. To handle this in most cases, and lines of `(transferred from ...@` are filtered out from the commit messages before comparing. * * * The remaining changes are just to support this change. - Adjust some method signatures to support this change. - Update `assert_commits` to confirm the commit messages are pointing to a proper commit - Add a `reference_repo_dir` and `reference_repo` to help with the `assert_commits` changes (set in one of the helper methods) --- lib/code_extractor.rb | 23 ++++++++++---------- test/code_extractor_reinsert_test.rb | 1 + test/test_helper.rb | 32 ++++++++++++++++++++++------ 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index 16bcbdc..e236ad1 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -104,14 +104,16 @@ def extract_commits extractions, upstream_name # Note: We don't use `--subdirectory-filter` here as it will remove merge # commits, which we don't want. # - def prune_commits extractions + def prune_commits extractions, upstream_name puts "Pruning commits…" + @upstream_name ||= upstream_name + build_prune_script extractions Dir.chdir git_dir do `git checkout -b #{prune_branch} #{@source_branch}` - `git filter-branch -f --prune-empty --tree-filter #{@prune_script} HEAD` + `git filter-branch -f --prune-empty --tree-filter #{@prune_script} #{msg_filter upstream_name} HEAD` `git filter-branch -f --prune-empty --index-filter ' git read-tree --empty git reset $GIT_COMMIT -- #{@keep_directory} @@ -156,12 +158,10 @@ def add_target_remote target_name, target_remote # branch, and the "root" commit assumes the parent of the current HEAD of # the target remote's (master) branch # - def inject_commits target_base_branch, upstream_name + def inject_commits target_base_branch puts "Injecting commits…" - @upstream_name ||= upstream_name - target_base_branch ||= 'master' - + target_base_branch ||= 'master' @reference_target_branch = "#{target_remote_name}/#{target_base_branch}" Dir.chdir git_dir do @@ -211,9 +211,6 @@ def inject_commits target_base_branch, upstream_name else cat - fi - echo - echo - echo "(transferred from #{upstream_name}@$GIT_COMMIT)" ' -- #{inject_branch}` # Old (bad: doesn't handle merges) @@ -383,13 +380,15 @@ def previously_extracted_commits false else upstream_full_msg = `git show -s --format="%s%n%n%b" #{commit}` + upstream_full_msg.gsub! /^\(transferred from #{upstream_name}@.*$/, '' # TODO: Change this to a `.one?` check, and if this fails, and # there is more than one, then compare changed file base names... # which is harder still... target_commits.any? do |target_commit| target_full_msg = `git show -s --format="%s%n%n%b" #{target_commit}` - upstream_full_msg == target_full_msg + target_full_msg.gsub! /^\(transferred from #{upstream_name}@.*$/, '' + upstream_full_msg.strip == target_full_msg.strip end end end @@ -443,10 +442,10 @@ def extract def reinsert return unless @config[:reinsert] - @source_project.prune_commits @config[:extractions] + @source_project.prune_commits @config[:extractions], @config[:upstream_name] @source_project.run_extra_cmds @config[:extra_cmds] @source_project.add_target_remote @config[:target_name], @config[:target_remote] - @source_project.inject_commits @config[:target_base_branch], @config[:upstream_name] + @source_project.inject_commits @config[:target_base_branch] true end diff --git a/test/code_extractor_reinsert_test.rb b/test/code_extractor_reinsert_test.rb index 43d4a76..583c332 100644 --- a/test/code_extractor_reinsert_test.rb +++ b/test/code_extractor_reinsert_test.rb @@ -240,6 +240,7 @@ def apply_new_commits_on_extracted_repo &block end end + self.reference_repo_dir = @cloned_extractions_dir CodeExtractor::TestRepo.clone_at(extracted_dir, @cloned_extractions_dir, &block) end diff --git a/test/test_helper.rb b/test/test_helper.rb index 6d76895..a35388b 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -21,7 +21,7 @@ module CodeExtractor # test, and clean up when it is done. # class TestCase < Minitest::Test - attr_writer :extractions, :extractions_hash + attr_writer :extractions, :extractions_hash, :reference_repo_dir attr_reader :extracted_dir, :repo_dir, :sandbox_dir # Create a sandbox for the given test, and name it after the test class and @@ -61,14 +61,30 @@ def assert_no_tags assert tags.empty?, "Expected there to be no tags, but got `#{tags.join ', '}'" end + TRANSFERRED_FROM_REGEXP = /\(transferred from (?[^@]*)@(?[^\)]*)\)/ def assert_commits expected_commits start_commit = destination_repo.last_commit sorting = Rugged::SORT_TOPO # aka: sort like git-log - actual_commits = destination_repo.walk(start_commit, sorting).map(&:message) - - actual_commits.map! { |msg| msg.lines.first.chomp } - - assert_equal expected_commits, actual_commits + actual_commits = destination_repo.walk(start_commit, sorting).map {|c| c } + commit_msgs = actual_commits.map { |commit| commit.message.lines.first.chomp } + + assert_equal expected_commits, commit_msgs + + # Check that the "transferred from ..." reference line is correct + actual_commits.map do |commit| + [ + commit, + commit.message.lines.last.match(TRANSFERRED_FROM_REGEXP) + ] + end.each do |commit, transfered| + next unless transfered + + transferred_commit = reference_repo.lookup(transfered[:SHA]) + assert transferred_commit.is_a?(Rugged::Commit), + "'transfered from' of #{transfered[:SHA]} from #{commit} is not a valid commit" + assert_equal commit.message.lines.first.chomp, + transferred_commit.message.lines.first.chomp + end end # Helper methods @@ -81,6 +97,10 @@ def destination_repo @destination_repo ||= Rugged::Repository.new @extracted_dir end + def reference_repo + @reference_repo ||= Rugged::Repository.new @reference_repo_dir + end + def current_commit_message destination_repo.head.target.message end From 73fd210abfb1c1da171a4ccdc5b1b86f4b7cd4ee Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 21 Nov 2019 20:28:59 -0600 Subject: [PATCH 28/32] [FIXUP] Moar better commit_msg_filter --- lib/code_extractor.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index e236ad1..5f84660 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -327,7 +327,7 @@ def build_prune_script extractions end def commit_msg_filter - @commit_msg_filter ||= "(transferred from #{target_name}" + @commit_msg_filter ||= "(transferred from #{target_name}@" end def previously_extracted_commits From eabcfa0c5a275ca9f5b7a2579665625eaaa5c854 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Fri, 22 Nov 2019 15:53:18 -0600 Subject: [PATCH 29/32] [CodeExtractor] checkout before build_prune_script Prior, when doing `build_prune_script`, the branch was left at what ever state the previous step had left it at. However, this caused and issue when running `build_prune_script` as it was attempting to find the history for files that were just deleted in the `extract_branch` step (the branch that will be pushed to the `upstream_repo` that "deletes code"). This step ensures a `git checkout #{source_branch}` prior to building the script to ensure the files and their previous versions still exist. And additional tweak was also added to split up the initialization of the `file_and_ancestors` variable to make it easier to debug. --- lib/code_extractor.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index 5f84660..4204073 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -287,6 +287,7 @@ def build_prune_script extractions prune_mvs = [] Dir.chdir git_dir do + `git checkout #{source_branch}` extractions.each do |file_or_dir| if Dir.exist? file_or_dir files = Dir["#{file_or_dir}/**/*"] @@ -295,7 +296,8 @@ def build_prune_script extractions end files.each do |extraction_file| - file_and_ancestors = `#{git_log_follow} -- #{extraction_file}`.split("\n").uniq + raw_file_history = `#{git_log_follow} -- #{extraction_file}` + file_and_ancestors = raw_file_history.split("\n").uniq file_and_ancestors.reject! { |file| file.length == 0 } From bd0b361be31fa36e11e67ef2cefca096de3bc6d1 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Mon, 20 Jan 2020 18:06:08 -0600 Subject: [PATCH 30/32] [CodeExtractor] Better/Consistent documentation In a method above, "3 steps" was used to described how the prune script would be used. This change updates the second explaination to match. --- lib/code_extractor.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index 4204073..232b338 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -263,12 +263,12 @@ def inject_branch # files (extractions) from their current location in a given commit to a # unused directory. # - # More complicated than it looks, this will be used as part of a two-phased + # More complicated than it looks, this will be used as part of a three part # `git filter-branch` to: # # 1. move extractable files into a subdirectory with `--tree-filter` - # 2. only keep commits for files moved into that subdirectory, and make - # the subdirectory the new project root. + # 2. only keep commits for files moved into that subdirectory + # 3. make the subdirectory the new project root. # # For consistency, we want to keep the subdirectories' structure in the # same line as what was there previously, so this script helps do that, and From b6720f59818caada2c523efdea5a4189c3e08e06 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Mon, 20 Jan 2020 18:11:41 -0600 Subject: [PATCH 31/32] [CodeExtractor] Document `committer_data` info --- lib/code_extractor.rb | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index 232b338..8084a6f 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -176,6 +176,16 @@ def inject_commits target_base_branch line.include? commit_msg_filter }.join + # Fetch "committer data" from the `last_extracted_commit` in a bar + # delimited format. Fetches the following data from the commit: + # + # - Author Name + # - Author Email + # - Author Date of commit + # - Committer Name + # - Committer Email + # - Committer Date of commit + # committer_data = `git show -s --format="%an|%ae|%ad|%cn|%ce|%cd" #{last_extracted_commit}`.split("|") first_injected_msg.prepend <<-COMMIT_MSG.gsub(/^ {10}/, '') From 133fdb118391ec035ff5da3e6b42fd1c2cb78b6c Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Tue, 21 Jan 2020 15:13:17 -0600 Subject: [PATCH 32/32] [WIP] grafting commits This is a test to also include grafting as a feature (currently disabled). The intent is that a `graft_parent_commit` could be provided in the config yaml, and that would be used in the changes here. The steps in exampled in order: 1. Create a new branch with a HEAD at the "graft" point 2. Get back on the "inject_branch" (possibly unnecessary) 3. Rebase ("graft") the inject_branch onto the "old_branch" 4. Update the now grafted inject_branch to have the latest upstream changes Step two is only "possibly unnecessary" if making use of another argument of `git rebase` which I have yet to test. Besides that though, the changes are pretty minimal to work with, so this ends up being a pretty simple section to wrap your head around (once you wrap your head around getting to this point in the first place...). Of note: Since `inject_branch` is a branch with no parent commit for it's root commit, the `--root` option is used here for `git rebase`, which is important since the second arg is then the tip of `inject_branch` (the branch name), and not the base of the branch that you might normally provide when using `git rebase`. This is important since providing the base commit otherwise would "drop" that commit as part of the rebase, and cause some other issues as a result (merge conflicts). --- lib/code_extractor.rb | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/lib/code_extractor.rb b/lib/code_extractor.rb index 8084a6f..2823b72 100644 --- a/lib/code_extractor.rb +++ b/lib/code_extractor.rb @@ -243,7 +243,30 @@ def inject_commits target_base_branch # # Ref: git rebase --onto code_extractor_inject_the_extracted --root # - `git rebase --rebase-merges=rebase-cousins --root --onto #{@reference_target_branch} #{inject_branch}` + + # Grafting Testing + # + graft_parent_commit = false + if graft_parent_commit + # Attempt #1 + # + # orphan_branch = "code_extractor_orphan_branch_#{Time.now.to_i}" + # `git checkout --orphan #{orphan_branch}` + # `git rm -rf ./*` + # `git commit --allow-empty -m "Empty Commit"` + # `git rebase --rebase-merges=rebase-cousins --root --onto #{orphan_branch} #{inject_branch}` + # + # Attempt #2 + # + # `git replace --graft fe8c6c6228 250a0b46fb05ad5891d442745b93543c38ee0914` + + `git checkout -b old_base #{graft_parent_commit}` + `git checkout #{inject_branch}` # TODO: needed? + `git rebase --rebase-merges=rebase-cousins --onto old_base #{inject_branch}` + `git merge --no-ff #{@reference_target_branch}` + else + `git rebase --rebase-merges=rebase-cousins --root --onto #{@reference_target_branch} #{inject_branch}` + end end end