From 84eeca301868f4d486ddcff432794615ffa5539d Mon Sep 17 00:00:00 2001 From: Maximilian Meister Date: Tue, 5 Apr 2016 08:55:52 +0200 Subject: [PATCH 01/16] apply_role: log only when needed and at the right place * log message is misleading when node list is empty * like this it is easier to see where (on which node) chef-client failed (cherry picked from commit a5707a817fc4c5a5e9b7ecf5d7e0c17ea328459b) (cherry picked from commit e8e721bbe781a26012d714688260cc6bfadb3e70) --- crowbar_framework/app/models/service_object.rb | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crowbar_framework/app/models/service_object.rb b/crowbar_framework/app/models/service_object.rb index 99d30c513a..1e013b5106 100644 --- a/crowbar_framework/app/models/service_object.rb +++ b/crowbar_framework/app/models/service_object.rb @@ -1325,9 +1325,6 @@ def apply_role(role, inst, in_queue) non_admin_nodes << node_name end - @logger.debug("AR: Calling chef-client for #{role.name} on non-admin nodes #{non_admin_nodes.join(" ")}") - @logger.debug("AR: Calling chef-client for #{role.name} on admin nodes #{admin_list.join(" ")}") - # # XXX: We used to do this twice - do we really need twice??? # Yes! We do! The system has some transient issues that are hidden @@ -1336,6 +1333,9 @@ def apply_role(role, inst, in_queue) # pids = {} unless non_admin_nodes.empty? + @logger.debug( + "AR: Calling chef-client for #{role.name} on non-admin nodes #{non_admin_nodes.join(" ")}" + ) non_admin_nodes.each do |node| pre_cached_nodes[node] ||= NodeObject.find_node_by_name(node) nobj = pre_cached_nodes[node] @@ -1375,6 +1375,9 @@ def apply_role(role, inst, in_queue) end unless admin_list.empty? + @logger.debug( + "AR: Calling chef-client for #{role.name} on admin nodes #{admin_list.join(" ")}" + ) admin_list.each do |node| filename = "#{ENV['CROWBAR_LOG_DIR']}/chef-client/#{node}.log" pid = run_remote_chef_client(node, Rails.root.join("..", "bin", "single_chef_client.sh").expand_path.to_s, filename) From 29aa4844a1112207a6ba044a2d1e848f04e24da4 Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Fri, 1 Jul 2016 15:45:54 +0200 Subject: [PATCH 02/16] crowbar: Some cleanup in apply_role Rename variables, remove unneeded logs. (cherry picked from commit d4ed8c58b5d10d166853be9cf6ef6c756a5f20e8) (cherry picked from commit f33f25494f5e69e57980ae0bc456bd4d760685ad) --- .../app/models/service_object.rb | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/crowbar_framework/app/models/service_object.rb b/crowbar_framework/app/models/service_object.rb index 1e013b5106..86125552ec 100644 --- a/crowbar_framework/app/models/service_object.rb +++ b/crowbar_framework/app/models/service_object.rb @@ -1027,11 +1027,7 @@ def apply_role(role, inst, in_queue) # We'll build an Array where each item represents a batch of work, # and the batches must be performed sequentially in this order. - # This will mirror the ordering specified by element_order below, - # but the sub-arrays of run_order will be Arrays of names of the - # involved nodes, whereas the sub-arrays of element_order are Arrays - # of names of Chef roles. - run_order = [] + batches = [] # get proposal to remember potential removal of a role proposal = Proposal.where(barclamp: @bc_name, name: inst).first @@ -1146,15 +1142,14 @@ def apply_role(role, inst, in_queue) end end # roles.each - @logger.debug "nodes_in_batch #{nodes_in_batch.inspect}" - run_order << nodes_in_batch unless nodes_in_batch.empty? - @logger.debug "run_order #{run_order.inspect}" + batches << nodes_in_batch unless nodes_in_batch.empty? end + @logger.debug "batches: #{batches.inspect}" # save databag with the role removal intention proposal.save if save_proposal - applying_nodes = run_order.flatten.uniq.sort + applying_nodes = batches.flatten.uniq.sort # Mark nodes as applying; beware that all_nodes do not contain nodes that # are actually removed. @@ -1303,21 +1298,21 @@ def apply_role(role, inst, in_queue) return [405, message] end + ran_admin = false + # Invalidate cache as apply_role_pre_chef_call can save nodes pre_cached_nodes = {} # Each batch is a list of nodes that can be done in parallel. - ran_admin = false - run_order.each do |batch| - next if batch.empty? - @logger.debug "batch #{batch.inspect}" + batches.each do |nodes| + next if nodes.empty? + @logger.debug "Applying batch: #{nodes.inspect}" non_admin_nodes = [] admin_list = [] - batch.each do |node_name| + nodes.each do |node_name| # Run admin nodes a different way. if admin_nodes.include?(node_name) - @logger.debug "#{node_name} is in admin_nodes #{admin_nodes.inspect}" admin_list << node_name ran_admin = true next From 0c8737e60815c7dac722ba70f849d0de4da205d9 Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Wed, 29 Jun 2016 10:33:55 +0200 Subject: [PATCH 03/16] crowbar: Fix apply_role not returning expected structure when queuing (cherry picked from commit a130195dd7a7681e1ae294248a06cb3330f2ed7c) (cherry picked from commit f0b83f3dcc0f84b035f73cecd42efa6968a2ad3a) --- crowbar_framework/app/models/service_object.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crowbar_framework/app/models/service_object.rb b/crowbar_framework/app/models/service_object.rb index 86125552ec..a57fa49d57 100644 --- a/crowbar_framework/app/models/service_object.rb +++ b/crowbar_framework/app/models/service_object.rb @@ -953,7 +953,7 @@ def apply_role(role, inst, in_queue) # deps = proposal_dependencies(role) delay, pre_cached_nodes = queue_proposal(inst, element_order, new_elements, deps) - return [202, delay] unless delay.empty? + return [202, "Queued: waiting for #{delay.join(", ")}"] unless delay.empty? @logger.debug "delay empty - running proposal" From b86960f6773cf2f2c2f2de129b1805383b9d19c3 Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Sun, 10 Jul 2016 06:42:36 +0200 Subject: [PATCH 04/16] crowbar: Minor cleanup in ServiceObjet.apply_role Don't be so complicated to build a list of nodes... (cherry picked from commit 91741e515c5b67af3dddaba6d9e637e65b416a1b) (cherry picked from commit fe8e1319ac286992c83670be27ae23a08820a62a) --- crowbar_framework/app/models/service_object.rb | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/crowbar_framework/app/models/service_object.rb b/crowbar_framework/app/models/service_object.rb index a57fa49d57..299b08085b 100644 --- a/crowbar_framework/app/models/service_object.rb +++ b/crowbar_framework/app/models/service_object.rb @@ -1005,6 +1005,9 @@ def apply_role(role, inst, in_queue) role_map = new_deployment["element_states"] role_map = {} unless role_map + # List of all *new* nodes which will be changed (sans deleted ones) + all_nodes = new_elements.values.flatten + # deployment["element_order"] tells us which order the various # roles should be applied, and deployment["elements"] tells us # which nodes each role should be applied to. We need to "join @@ -1022,9 +1025,6 @@ def apply_role(role, inst, in_queue) # } pending_node_actions = {} - # List of all *new* nodes which will be changed (sans deleted ones) - all_nodes = [] - # We'll build an Array where each item represents a batch of work, # and the batches must be performed sequentially in this order. batches = [] @@ -1129,8 +1129,6 @@ def apply_role(role, inst, in_queue) pre_cached_nodes[node_name] = node - all_nodes << node_name unless all_nodes.include?(node_name) - # A new node that we did not know before unless old_nodes.include?(node_name) @logger.debug "add node #{node_name}" From 03719c01d0b8c45ff76af006d7428a49a3b6a715 Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Sun, 10 Jul 2016 06:44:39 +0200 Subject: [PATCH 05/16] crowbar: Improve debug log in ServiceObject.apply_role This includes removing log lines which duplicate data already in the debug logs. (cherry picked from commit 8774bb25943dec10dc579d7dec9b5d87d1166fac) (cherry picked from commit a0d6671840dc223ecd44feed0ed61a351b4d412a) --- crowbar_framework/app/models/service_object.rb | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/crowbar_framework/app/models/service_object.rb b/crowbar_framework/app/models/service_object.rb index 299b08085b..e12704af14 100644 --- a/crowbar_framework/app/models/service_object.rb +++ b/crowbar_framework/app/models/service_object.rb @@ -1038,7 +1038,7 @@ def apply_role(role, inst, in_queue) element_order.each do |roles| # roles is an Array of names of Chef roles which can all be # applied in parallel. - @logger.debug "elems #{roles.inspect}" + @logger.debug "Preparing batch with following roles: #{roles.inspect}" # A list of nodes changed when applying roles from this batch nodes_in_batch = [] @@ -1051,9 +1051,9 @@ def apply_role(role, inst, in_queue) old_nodes = old_elements[role_name] || [] new_nodes = new_elements[role_name] || [] - @logger.debug "role_name #{role_name.inspect}" - @logger.debug "old_nodes #{old_nodes.inspect}" - @logger.debug "new_nodes #{new_nodes.inspect}" + @logger.debug "Preparing role #{role_name} for batch:" + @logger.debug " Nodes in old applied proposal for role: #{old_nodes.inspect}" + @logger.debug " Nodes in new applied proposal for role: #{new_nodes.inspect}" remove_role_name = "#{role_name}_remove" @@ -1214,7 +1214,7 @@ def apply_role(role, inst, in_queue) # Part III: Update run lists of nodes to reflect new deployment. I.e. write # through the deployment schedule in pending node actions into run lists. - @logger.debug "Clean the run_lists for #{pending_node_actions.inspect}" + @logger.debug "Update the run_lists for #{pending_node_actions.inspect}" admin_nodes = [] @@ -1232,9 +1232,6 @@ def apply_role(role, inst, in_queue) rlist = lists[:remove] alist = lists[:add] - @logger.debug "rlist #{rlist.pretty_inspect}" - @logger.debug "alist #{alist.pretty_inspect}" - # Remove the roles being lost rlist.each do |item| next unless node.role? item @@ -1273,7 +1270,6 @@ def apply_role(role, inst, in_queue) end end - @logger.debug("AR: Saving node #{node.name}") if save_it node.save if save_it end From d15c4e536883c2b105ea5fdf764136519feb1923 Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Thu, 21 Jul 2016 15:56:01 +0200 Subject: [PATCH 06/16] crowbar: Fix queue message when applying a proposal (bsc#989958) There was an attempt to fix the internal API so that ServiceObject.apply_role always returns [int, str], but this actually breaks the UI-only path in the controller. So revert this and document with a FIXME that this should eventually be fixed by moving everything to [int, hash] where we will have more freedom. https://bugzilla.suse.com/show_bug.cgi?id=989958 (cherry picked from commit 1ad066488776c313b1a01b3ddc19b216111e0658) (cherry picked from commit ec172747aee6e9fb9da532dd148b4a88726297cd) --- crowbar_framework/app/controllers/barclamp_controller.rb | 2 ++ crowbar_framework/app/models/service_object.rb | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/crowbar_framework/app/controllers/barclamp_controller.rb b/crowbar_framework/app/controllers/barclamp_controller.rb index 8646d99100..3da16dc346 100644 --- a/crowbar_framework/app/controllers/barclamp_controller.rb +++ b/crowbar_framework/app/controllers/barclamp_controller.rb @@ -569,6 +569,8 @@ def proposal_update "crowbar-deep-merge-template" ) ) + # See FIXME in ServiceObject.apply_role + message = "" if code = 202 respond_to do |format| case code diff --git a/crowbar_framework/app/models/service_object.rb b/crowbar_framework/app/models/service_object.rb index e12704af14..e5a9692102 100644 --- a/crowbar_framework/app/models/service_object.rb +++ b/crowbar_framework/app/models/service_object.rb @@ -953,7 +953,9 @@ def apply_role(role, inst, in_queue) # deps = proposal_dependencies(role) delay, pre_cached_nodes = queue_proposal(inst, element_order, new_elements, deps) - return [202, "Queued: waiting for #{delay.join(", ")}"] unless delay.empty? + # FIXME: this breaks the convention that we return a string; but really, + # we should return a hash everywhere, to avoid this... + return [202, delay] unless delay.empty? @logger.debug "delay empty - running proposal" From e54aa77e517cef3b61e3e9d9f9288cd887e01042 Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Fri, 1 Apr 2016 09:42:28 +0200 Subject: [PATCH 07/16] crowbar: Refactor apply_role thanks to the "ensure" We move some code that must always be run before returning in the global ensure; this helps make sure that code is actually run in all cases. (cherry picked from commit 7010dc7b3a2cb1298319c4bd005c17df0f904651) (cherry picked from commit 99d978ffeb251e5003b321ec39c20ccb7ae52e46) --- .../app/models/service_object.rb | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/crowbar_framework/app/models/service_object.rb b/crowbar_framework/app/models/service_object.rb index e5a9692102..28e2dd6b79 100644 --- a/crowbar_framework/app/models/service_object.rb +++ b/crowbar_framework/app/models/service_object.rb @@ -923,6 +923,10 @@ def self.proposal_to_role(proposal, bc_name) def apply_role(role, inst, in_queue) @logger.debug "apply_role(#{role.name}, #{inst}, #{in_queue})" + # Variables used in the global ensure + apply_locks = [] + applying_nodes = [] + # Part I: Looking up data & checks # # we look up the role in the database (if there is one), the new one is @@ -953,9 +957,13 @@ def apply_role(role, inst, in_queue) # deps = proposal_dependencies(role) delay, pre_cached_nodes = queue_proposal(inst, element_order, new_elements, deps) - # FIXME: this breaks the convention that we return a string; but really, - # we should return a hash everywhere, to avoid this... - return [202, delay] unless delay.empty? + unless delay.empty? + # force not processing the queue further + in_queue = true + # FIXME: this breaks the convention that we return a string; but really, + # we should return a hash everywhere, to avoid this... + return [202, delay] + end @logger.debug "delay empty - running proposal" @@ -967,7 +975,6 @@ def apply_role(role, inst, in_queue) @logger.fatal("apply_role: Failed to expand items #{failures.inspect} for role \"#{role_name}\"") message = "Failed to apply the proposal: cannot expand list of nodes for role \"#{role_name}\", following items do not exist: #{failures.join(", ")}" update_proposal_status(inst, "failed", message) - process_queue unless in_queue return [405, message] end end @@ -1186,8 +1193,6 @@ def apply_role(role, inst, in_queue) rescue Crowbar::Error::LockingFailure => e message = "Failed to apply the proposal: #{e.message}" update_proposal_status(inst, "failed", message) - restore_to_ready(applying_nodes) - process_queue unless in_queue return [409, message] # 409 is 'Conflict' end @@ -1289,8 +1294,6 @@ def apply_role(role, inst, in_queue) @logger.fatal("apply_role: Exception #{e.message} #{e.backtrace.join("\n")}") message = "Failed to apply the proposal: exception before calling chef (#{e.message})" update_proposal_status(inst, "failed", message) - restore_to_ready(applying_nodes) - process_queue unless in_queue return [405, message] end @@ -1358,8 +1361,6 @@ def apply_role(role, inst, in_queue) message = message + "#{pids[baddie[0]]} \n"+ get_log_lines("#{pids[baddie[0]]}") end update_proposal_status(inst, "failed", message) - restore_to_ready(applying_nodes) - process_queue unless in_queue return [405, message] end end @@ -1396,8 +1397,6 @@ def apply_role(role, inst, in_queue) message = message + "#{pids[baddie[0]]} \n "+ get_log_lines("#{pids[baddie[0]]}") end update_proposal_status(inst, "failed", message) - restore_to_ready(applying_nodes) - process_queue unless in_queue return [405, message] end end @@ -1414,8 +1413,6 @@ def apply_role(role, inst, in_queue) @logger.fatal("apply_role: Exception #{e.message} #{e.backtrace.join("\n")}") message = "Failed to apply the proposal: exception after calling chef (#{e.message})" update_proposal_status(inst, "failed", message) - restore_to_ready(applying_nodes) - process_queue unless in_queue return [405, message] end @@ -1450,11 +1447,11 @@ def apply_role(role, inst, in_queue) proposal.save unless roles_to_remove.empty? update_proposal_status(inst, "success", "") - restore_to_ready(applying_nodes) - process_queue unless in_queue [200, {}] ensure - apply_locks.each(&:release) if apply_locks + apply_locks.each(&:release) if apply_locks.any? + restore_to_ready(applying_nodes) if applying_nodes.any? + process_queue unless in_queue end def apply_role_pre_chef_call(old_role, role, all_nodes) From e56e57ac412c789e0d659e2a1bec0cbe5cab68e1 Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Tue, 26 Jul 2016 09:55:44 +0200 Subject: [PATCH 08/16] crowbar: Catch exceptions in apply_role (partially fixes bsc#840255) We catch all exceptions that aren't caught from this method to set the status of the proposal to "failed", to avoid people being blocked by a proposal staying forever in "applying". This helps with https://bugzilla.suse.com/show_bug.cgi?id=840255 (cherry picked from commit 43bc948079ba0fc3aa868fde417218953c82dffd) (cherry picked from commit 50ec9c08fc64e3797e3a0bdfbc7853b68ce9d601) --- crowbar_framework/app/models/service_object.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crowbar_framework/app/models/service_object.rb b/crowbar_framework/app/models/service_object.rb index 28e2dd6b79..20671da2c4 100644 --- a/crowbar_framework/app/models/service_object.rb +++ b/crowbar_framework/app/models/service_object.rb @@ -1448,6 +1448,11 @@ def apply_role(role, inst, in_queue) update_proposal_status(inst, "success", "") [200, {}] + rescue StandardError => e + @logger.fatal("apply_role: Uncaught exception #{e.message} #{e.backtrace.join("\n")}") + message = "Failed to apply the proposal: uncaught exception (#{e.message})" + update_proposal_status(inst, "failed", message) + [405, message] ensure apply_locks.each(&:release) if apply_locks.any? restore_to_ready(applying_nodes) if applying_nodes.any? From 4fd053b221991184431c0bb33e152c54b660c35d Mon Sep 17 00:00:00 2001 From: Rick Salevsky Date: Thu, 15 Dec 2016 13:36:51 +0100 Subject: [PATCH 09/16] proposal: Add brackets around active_update params This change makes the code a little bit more readable. (cherry picked from commit 627e75ba0744b87de50e46123e2ff5917b92644d) (cherry picked from commit 2717af20a9de1fe9869dc127bedb30aff909dedb) --- crowbar_framework/app/models/service_object.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crowbar_framework/app/models/service_object.rb b/crowbar_framework/app/models/service_object.rb index 20671da2c4..19688c2da4 100644 --- a/crowbar_framework/app/models/service_object.rb +++ b/crowbar_framework/app/models/service_object.rb @@ -563,7 +563,7 @@ def proposal_commit(inst, in_queue = false, validate_after_save = true) # Put mark on the wall prop["deployment"][@bc_name]["crowbar-committing"] = true save_proposal!(prop, validate_after_save: validate_after_save) - response = active_update prop.raw_data, inst, in_queue + response = active_update(prop.raw_data, inst, in_queue) rescue Chef::Exceptions::ValidationFailed => e @logger.error ([e.message] + e.backtrace).join("\n") response = [400, "Failed to validate proposal: #{e.message}"] From e327ca0c3ee8a1e1f0a5fb47efe735f9849f6b02 Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Fri, 2 Dec 2016 12:19:51 +0100 Subject: [PATCH 10/16] crowbar: Mark the proposal as not applied on saving We mark it as applied when committing, but then we never reset it to not applied when we do a change and just save. That results in the code assuming that the saved proposal is always the latest one. (cherry picked from commit cefacf692a5ed63ea178ee2601383fdfb6ed968e) (cherry picked from commit 09e3303ebc7e60068fed72e6e8f010846c65b02f) --- crowbar_framework/app/models/service_object.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/crowbar_framework/app/models/service_object.rb b/crowbar_framework/app/models/service_object.rb index 19688c2da4..6fe1f3c8ce 100644 --- a/crowbar_framework/app/models/service_object.rb +++ b/crowbar_framework/app/models/service_object.rb @@ -543,6 +543,7 @@ def save_proposal!(prop, options = {}) clean_proposal(prop.raw_data) validate_proposal(prop.raw_data) validate_proposal_elements(prop.elements) + prop.latest_applied = false prop.save validate_proposal_after_save(prop.raw_data) if options[:validate_after_save] end From 32d3a8085d4ddace4f337d2905b55c7fa6b832b9 Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Fri, 22 Jul 2016 15:24:34 +0200 Subject: [PATCH 11/16] crowbar: Fix regression in 1ad06648, with wrong test condition Was part of https://github.com/crowbar/crowbar-core/pull/560 (cherry picked from commit 7d12ecbddd8305ab7f2111b3b825b8adb88f8adc) (cherry picked from commit 457222b5cfd1323f492a75c2243ed660782f2fab) --- crowbar_framework/app/controllers/barclamp_controller.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crowbar_framework/app/controllers/barclamp_controller.rb b/crowbar_framework/app/controllers/barclamp_controller.rb index 3da16dc346..c43f46e6c8 100644 --- a/crowbar_framework/app/controllers/barclamp_controller.rb +++ b/crowbar_framework/app/controllers/barclamp_controller.rb @@ -570,7 +570,7 @@ def proposal_update ) ) # See FIXME in ServiceObject.apply_role - message = "" if code = 202 + message = "" if code == 202 respond_to do |format| case code From 188228f098eb3a2b18d1c86e4bf983f224d593b1 Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Wed, 14 Dec 2016 22:01:15 +0100 Subject: [PATCH 12/16] crowbar: Avoid reloading nodes all the time in apply_role In a few cases, we load a node just to look for some attribute that we could have cached earlier. So let's just do that. (cherry picked from commit 3374d9f3364ad81c262025abc5b9a62f632ae425) (cherry picked from commit 890e72089bfa9c4386107742378a85ba97c96d38) --- .../app/models/service_object.rb | 39 ++++++++++--------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/crowbar_framework/app/models/service_object.rb b/crowbar_framework/app/models/service_object.rb index 6fe1f3c8ce..ad49f9c7a7 100644 --- a/crowbar_framework/app/models/service_object.rb +++ b/crowbar_framework/app/models/service_object.rb @@ -928,6 +928,9 @@ def apply_role(role, inst, in_queue) apply_locks = [] applying_nodes = [] + # Cache some node attributes to avoid useless node reloads + node_attr_cache = {} + # Part I: Looking up data & checks # # we look up the role in the database (if there is one), the new one is @@ -1078,17 +1081,15 @@ def apply_role(role, inst, in_queue) use_remove_role = !tmprole.nil? old_nodes.each do |node_name| - node = NodeObject.find_node_by_name(node_name) + pre_cached_nodes[node_name] ||= NodeObject.find_node_by_name(node_name) # Don't add deleted nodes to the run order, they clearly won't have # the old role - if node.nil? + if pre_cached_nodes[node_name].nil? @logger.debug "skipping deleted node #{node_name}" next end - pre_cached_nodes[node_name] = node - # An old node that is not in the new deployment, drop it unless new_nodes.include?(node_name) @logger.debug "remove node #{node_name}" @@ -1119,7 +1120,7 @@ def apply_role(role, inst, in_queue) # If new_nodes is empty, we are just removing the proposal. unless new_nodes.empty? new_nodes.each do |node_name| - node = NodeObject.find_node_by_name(node_name) + pre_cached_nodes[node_name] ||= NodeObject.find_node_by_name(node_name) # Don't add deleted nodes to the run order # @@ -1132,13 +1133,11 @@ def apply_role(role, inst, in_queue) # have some alias that be used to assign all existing nodes to a # role (which would be an improvement over the requirement to # explicitly list all nodes). - if node.nil? + if pre_cached_nodes[node_name].nil? @logger.debug "skipping deleted node #{node_name}" next end - pre_cached_nodes[node_name] = node - # A new node that we did not know before unless old_nodes.include?(node_name) @logger.debug "add node #{node_name}" @@ -1154,6 +1153,15 @@ def apply_role(role, inst, in_queue) end @logger.debug "batches: #{batches.inspect}" + # Cache attributes that are useful later on + pre_cached_nodes.each do |node_name, node| + node_attr_cache[node_name] = { + "alias" => node.alias, + "windows" => node[:platform_family] == "windows", + "admin" => node.admin? + } + end + # save databag with the role removal intention proposal.save if save_proposal @@ -1171,9 +1179,7 @@ def apply_role(role, inst, in_queue) # pause-file.lock exists which the daemons will honour due to a # custom patch: nodes_to_lock = applying_nodes.reject do |node_name| - pre_cached_nodes[node_name] ||= NodeObject.find_node_by_name(node_name) - node = pre_cached_nodes[node_name] - node[:platform_family] == "windows" || node.admin? + node_attr_cache[node_name]["windows"] || node_attr_cache[node_name]["admin"] end begin @@ -1332,13 +1338,10 @@ def apply_role(role, inst, in_queue) "AR: Calling chef-client for #{role.name} on non-admin nodes #{non_admin_nodes.join(" ")}" ) non_admin_nodes.each do |node| - pre_cached_nodes[node] ||= NodeObject.find_node_by_name(node) - nobj = pre_cached_nodes[node] - unless nobj[:platform_family] == "windows" - filename = "#{ENV['CROWBAR_LOG_DIR']}/chef-client/#{node}.log" - pid = run_remote_chef_client(node, "chef-client", filename) - pids[pid] = node - end + next if node_attr_cache[node]["windows"] + filename = "#{ENV['CROWBAR_LOG_DIR']}/chef-client/#{node}.log" + pid = run_remote_chef_client(node, "chef-client", filename) + pids[pid] = node end status = Process.waitall badones = status.select { |x| x[1].exitstatus != 0 } From 93525eb88901d7cc37894264f3abacb6c7fd3e1f Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Wed, 14 Dec 2016 22:13:10 +0100 Subject: [PATCH 13/16] crowbar: Change ServiceObject.proposal_commit to use keyword arguments We want to have more arguments, and it's going to be confusing. (cherry picked from commit 6d69e8abc3e45aec6abf05f51f8585058800d079) (cherry picked from commit b31bec10a9cb340156be97d1b2d2fd87318494e1) --- crowbar_framework/app/models/crowbar_service.rb | 8 ++++---- crowbar_framework/app/models/service_object.rb | 11 ++++++++--- crowbar_framework/lib/crowbar/deployment_queue.rb | 3 +-- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/crowbar_framework/app/models/crowbar_service.rb b/crowbar_framework/app/models/crowbar_service.rb index 786ebd9b5b..55799f0809 100644 --- a/crowbar_framework/app/models/crowbar_service.rb +++ b/crowbar_framework/app/models/crowbar_service.rb @@ -45,7 +45,7 @@ def role_constraints # Unfortunatelly we need to explicitely look at crowbar-status of the proposal # because apply_role from this model ignores errors from superclass's apply_role. def commit_and_check_proposal - answer = proposal_commit("default", false, false) + answer = proposal_commit("default", in_queue: false, validate_after_save: false) # check if error message is saved in one of the nodes if answer.first != 200 found_errors = [] @@ -390,7 +390,7 @@ def prepare_nodes_for_crowbar_upgrade proposal.raw_data["deployment"]["crowbar"]["elements"]["crowbar-upgrade"] = nodes_to_upgrade proposal.save # commit the proposal so chef recipe get executed - proposal_commit("default", false, false) + proposal_commit("default", in_queue: false, validate_after_save: false) end def disable_non_core_proposals @@ -492,7 +492,7 @@ def revert_nodes_from_crowbar_upgrade # commit current proposal (with the crowbar-upgrade role still assigned to nodes), # so the recipe is executed when nodes have 'ready' state - proposal_commit("default", false, false) + proposal_commit("default", in_queue: false, validate_after_save: false) # now remove the nodes from upgrade role proposal["deployment"]["crowbar"]["elements"]["crowbar-upgrade"] = [] proposal.save @@ -569,7 +569,7 @@ def apply_role (role, inst, in_queue) else unless answer[1].include?(id) @logger.debug("Crowbar apply_role: #{k}.#{id} wasn't active: Activating") - answer = service.proposal_commit(id, false, false) + answer = service.proposal_commit(id, in_queue: false, validate_after_save: false) if answer[0] != 200 answer[1] = "Failed to commit proposal '#{id}' for '#{k}' " + "(The error message was: #{answer[1].strip})" diff --git a/crowbar_framework/app/models/service_object.rb b/crowbar_framework/app/models/service_object.rb index ad49f9c7a7..f554691947 100644 --- a/crowbar_framework/app/models/service_object.rb +++ b/crowbar_framework/app/models/service_object.rb @@ -551,7 +551,12 @@ def save_proposal!(prop, options = {}) # XXX: this is where proposal gets copied into a role, scheduling / ops order # is computed (in apply_role) and chef client gets called on the nodes. # Hopefully, this will get moved into a background job. - def proposal_commit(inst, in_queue = false, validate_after_save = true) + def proposal_commit(inst, options = {}) + options.reverse_merge!( + in_queue: false, + validate_after_save: true + ) + prop = Proposal.where(barclamp: @bc_name, name: inst).first if prop.nil? @@ -563,8 +568,8 @@ def proposal_commit(inst, in_queue = false, validate_after_save = true) begin # Put mark on the wall prop["deployment"][@bc_name]["crowbar-committing"] = true - save_proposal!(prop, validate_after_save: validate_after_save) - response = active_update(prop.raw_data, inst, in_queue) + save_proposal!(prop, validate_after_save: options[:validate_after_save]) + response = active_update(prop.raw_data, inst, options[:in_queue]) rescue Chef::Exceptions::ValidationFailed => e @logger.error ([e.message] + e.backtrace).join("\n") response = [400, "Failed to validate proposal: #{e.message}"] diff --git a/crowbar_framework/lib/crowbar/deployment_queue.rb b/crowbar_framework/lib/crowbar/deployment_queue.rb index 149c5443d4..5b6a9e3db5 100644 --- a/crowbar_framework/lib/crowbar/deployment_queue.rb +++ b/crowbar_framework/lib/crowbar/deployment_queue.rb @@ -202,8 +202,7 @@ def commit_proposal(bc, inst) service = eval("#{bc.camelize}Service.new logger") # This will call apply_role and chef-client. - # Params: (inst, in_queue, validate_after_save) - status, message = service.proposal_commit(inst, true, false) + status, message = service.proposal_commit(inst, in_queue: true, validate_after_save: false) logger.debug("process queue: committed item #{bc}:#{inst}: results = #{message.inspect}") From a061df204a417a6f53646cb83cb20cafc58a76cc Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Wed, 14 Dec 2016 22:24:23 +0100 Subject: [PATCH 14/16] crowbar: Do not validate proposal twice on apply It's done once when saving, and another time when applying. (cherry picked from commit 50e24e257b20a2bdff765b84fe9e481584f3b5a9) (cherry picked from commit 936f2c3f5873deedd8eb94522dcae19163ee4884) --- crowbar_framework/app/controllers/barclamp_controller.rb | 3 ++- crowbar_framework/app/models/crowbar_service.rb | 4 ++-- crowbar_framework/app/models/service_object.rb | 9 +++++---- crowbar_framework/lib/crowbar/deployment_queue.rb | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/crowbar_framework/app/controllers/barclamp_controller.rb b/crowbar_framework/app/controllers/barclamp_controller.rb index c43f46e6c8..075466cc9e 100644 --- a/crowbar_framework/app/controllers/barclamp_controller.rb +++ b/crowbar_framework/app/controllers/barclamp_controller.rb @@ -625,7 +625,8 @@ def proposal_update @proposal["attributes"][params[:barclamp]] = JSON.parse(params[:proposal_attributes]) @proposal["deployment"][params[:barclamp]] = JSON.parse(params[:proposal_deployment]) @service_object.save_proposal!(@proposal) - answer = @service_object.proposal_commit(params[:name]) + # validation already happened on save + answer = @service_object.proposal_commit(params[:name], validate: false, validate_after_save: false) flash[:alert] = answer[1] if answer[0] >= 400 flash[:notice] = answer[1] if answer[0] >= 300 and answer[0] < 400 flash[:notice] = t("barclamp.proposal_show.commit_proposal_success") if answer[0] == 200 diff --git a/crowbar_framework/app/models/crowbar_service.rb b/crowbar_framework/app/models/crowbar_service.rb index 55799f0809..44d220113f 100644 --- a/crowbar_framework/app/models/crowbar_service.rb +++ b/crowbar_framework/app/models/crowbar_service.rb @@ -45,7 +45,7 @@ def role_constraints # Unfortunatelly we need to explicitely look at crowbar-status of the proposal # because apply_role from this model ignores errors from superclass's apply_role. def commit_and_check_proposal - answer = proposal_commit("default", in_queue: false, validate_after_save: false) + answer = proposal_commit("default", in_queue: false, validate: false, validate_after_save: false) # check if error message is saved in one of the nodes if answer.first != 200 found_errors = [] @@ -492,7 +492,7 @@ def revert_nodes_from_crowbar_upgrade # commit current proposal (with the crowbar-upgrade role still assigned to nodes), # so the recipe is executed when nodes have 'ready' state - proposal_commit("default", in_queue: false, validate_after_save: false) + proposal_commit("default", in_queue: false, validate: false, validate_after_save: false) # now remove the nodes from upgrade role proposal["deployment"]["crowbar"]["elements"]["crowbar-upgrade"] = [] proposal.save diff --git a/crowbar_framework/app/models/service_object.rb b/crowbar_framework/app/models/service_object.rb index f554691947..0cb33051ed 100644 --- a/crowbar_framework/app/models/service_object.rb +++ b/crowbar_framework/app/models/service_object.rb @@ -539,10 +539,10 @@ def proposal_delete(inst) # FIXME: most of these can be validations on the model itself, # preferrably refactored into Validator classes. def save_proposal!(prop, options = {}) - options.reverse_merge!(validate_after_save: true) + options.reverse_merge!(validate: true, validate_after_save: true) clean_proposal(prop.raw_data) - validate_proposal(prop.raw_data) - validate_proposal_elements(prop.elements) + validate_proposal(prop.raw_data) if options[:validate] + validate_proposal_elements(prop.elements) if options[:validate] prop.latest_applied = false prop.save validate_proposal_after_save(prop.raw_data) if options[:validate_after_save] @@ -554,6 +554,7 @@ def save_proposal!(prop, options = {}) def proposal_commit(inst, options = {}) options.reverse_merge!( in_queue: false, + validate: true, validate_after_save: true ) @@ -568,7 +569,7 @@ def proposal_commit(inst, options = {}) begin # Put mark on the wall prop["deployment"][@bc_name]["crowbar-committing"] = true - save_proposal!(prop, validate_after_save: options[:validate_after_save]) + save_proposal!(prop, validate: options[:validate], validate_after_save: options[:validate_after_save]) response = active_update(prop.raw_data, inst, options[:in_queue]) rescue Chef::Exceptions::ValidationFailed => e @logger.error ([e.message] + e.backtrace).join("\n") diff --git a/crowbar_framework/lib/crowbar/deployment_queue.rb b/crowbar_framework/lib/crowbar/deployment_queue.rb index 5b6a9e3db5..d63fbafd26 100644 --- a/crowbar_framework/lib/crowbar/deployment_queue.rb +++ b/crowbar_framework/lib/crowbar/deployment_queue.rb @@ -202,7 +202,7 @@ def commit_proposal(bc, inst) service = eval("#{bc.camelize}Service.new logger") # This will call apply_role and chef-client. - status, message = service.proposal_commit(inst, in_queue: true, validate_after_save: false) + status, message = service.proposal_commit(inst, in_queue: true, validate: false, validate_after_save: false) logger.debug("process queue: committed item #{bc}:#{inst}: results = #{message.inspect}") From e64e4a11eaafb8b21cb8bf33387a49f85f1c756d Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Wed, 14 Dec 2016 23:28:21 +0100 Subject: [PATCH 15/16] crowbar: Avoid uselessly saving a node on re-allocation of IP If the node already has the info, then no need to save, which is expensive. (cherry picked from commit 2013a47d3f5bd457f68557260496c5fabd7965f1) (cherry picked from commit 85cf95164688a46fbbfec3530badd91ab186b321) --- crowbar_framework/app/models/network_service.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crowbar_framework/app/models/network_service.rb b/crowbar_framework/app/models/network_service.rb index 94c79a633a..d3a466f65d 100644 --- a/crowbar_framework/app/models/network_service.rb +++ b/crowbar_framework/app/models/network_service.rb @@ -122,8 +122,10 @@ def allocate_ip_by_type(bc_instance, network, range, object, type, suggestion = if type == :node # Save the information. - node.crowbar["crowbar"]["network"][network] = net_info - node.save + if node.crowbar["crowbar"]["network"][network] != net_info + node.crowbar["crowbar"]["network"][network] = net_info + node.save + end end @logger.info("Network allocate ip for #{type}: Assigned: #{name} #{network} #{range} #{net_info["address"]}") From 5d1b187c2da44bccfc4f1f84f0e625ee0dfaa871 Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Thu, 15 Dec 2016 01:01:57 +0100 Subject: [PATCH 16/16] Make hound happy (cherry picked from commit 1442925a4d2c31098ecd966016825c51e453b87f) (cherry picked from commit b6b44c3f1aed24e237e10a282fdf46823f2e6eda) --- crowbar_framework/app/controllers/barclamp_controller.rb | 4 +++- crowbar_framework/app/models/crowbar_service.rb | 7 ++++++- crowbar_framework/app/models/service_object.rb | 4 +++- crowbar_framework/lib/crowbar/deployment_queue.rb | 7 ++++++- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/crowbar_framework/app/controllers/barclamp_controller.rb b/crowbar_framework/app/controllers/barclamp_controller.rb index 075466cc9e..59e75a9678 100644 --- a/crowbar_framework/app/controllers/barclamp_controller.rb +++ b/crowbar_framework/app/controllers/barclamp_controller.rb @@ -626,7 +626,9 @@ def proposal_update @proposal["deployment"][params[:barclamp]] = JSON.parse(params[:proposal_deployment]) @service_object.save_proposal!(@proposal) # validation already happened on save - answer = @service_object.proposal_commit(params[:name], validate: false, validate_after_save: false) + answer = @service_object.proposal_commit(params[:name], + validate: false, + validate_after_save: false) flash[:alert] = answer[1] if answer[0] >= 400 flash[:notice] = answer[1] if answer[0] >= 300 and answer[0] < 400 flash[:notice] = t("barclamp.proposal_show.commit_proposal_success") if answer[0] == 200 diff --git a/crowbar_framework/app/models/crowbar_service.rb b/crowbar_framework/app/models/crowbar_service.rb index 44d220113f..8b23b9efb8 100644 --- a/crowbar_framework/app/models/crowbar_service.rb +++ b/crowbar_framework/app/models/crowbar_service.rb @@ -45,7 +45,12 @@ def role_constraints # Unfortunatelly we need to explicitely look at crowbar-status of the proposal # because apply_role from this model ignores errors from superclass's apply_role. def commit_and_check_proposal - answer = proposal_commit("default", in_queue: false, validate: false, validate_after_save: false) + answer = proposal_commit( + "default", + in_queue: false, + validate: false, + validate_after_save: false + ) # check if error message is saved in one of the nodes if answer.first != 200 found_errors = [] diff --git a/crowbar_framework/app/models/service_object.rb b/crowbar_framework/app/models/service_object.rb index 0cb33051ed..1b973d8196 100644 --- a/crowbar_framework/app/models/service_object.rb +++ b/crowbar_framework/app/models/service_object.rb @@ -569,7 +569,9 @@ def proposal_commit(inst, options = {}) begin # Put mark on the wall prop["deployment"][@bc_name]["crowbar-committing"] = true - save_proposal!(prop, validate: options[:validate], validate_after_save: options[:validate_after_save]) + save_proposal!(prop, + validate: options[:validate], + validate_after_save: options[:validate_after_save]) response = active_update(prop.raw_data, inst, options[:in_queue]) rescue Chef::Exceptions::ValidationFailed => e @logger.error ([e.message] + e.backtrace).join("\n") diff --git a/crowbar_framework/lib/crowbar/deployment_queue.rb b/crowbar_framework/lib/crowbar/deployment_queue.rb index d63fbafd26..029438c60b 100644 --- a/crowbar_framework/lib/crowbar/deployment_queue.rb +++ b/crowbar_framework/lib/crowbar/deployment_queue.rb @@ -202,7 +202,12 @@ def commit_proposal(bc, inst) service = eval("#{bc.camelize}Service.new logger") # This will call apply_role and chef-client. - status, message = service.proposal_commit(inst, in_queue: true, validate: false, validate_after_save: false) + status, message = service.proposal_commit( + inst, + in_queue: true, + validate: false, + validate_after_save: false + ) logger.debug("process queue: committed item #{bc}:#{inst}: results = #{message.inspect}")