diff --git a/app/models/node.rb b/app/models/node.rb index d26d456c1..8d7c09d1a 100644 --- a/app/models/node.rb +++ b/app/models/node.rb @@ -221,9 +221,9 @@ def find_and_assign_last_inspect_report end end - def prune_reports(cutoff) + def prune_reports(cutoff, condition, status) transaction do - old_report_ids = self.reports.where('reports.time < ?', cutoff).pluck(:id) + old_report_ids = self.reports.where("reports.time < ? AND reports.status #{condition} ?", cutoff, status).pluck(:id) deleted_count = Report.bulk_delete(old_report_ids) self.find_and_assign_last_inspect_report self.find_and_assign_last_apply_report diff --git a/lib/tasks/prune_reports.rake b/lib/tasks/prune_reports.rake index b12a0c030..5227b4a8c 100644 --- a/lib/tasks/prune_reports.rake +++ b/lib/tasks/prune_reports.rake @@ -13,13 +13,34 @@ namespace :reports do } known_units = units.keys.join(',') + statuses = [ + 'changed', + 'unchanged', + 'pending', + 'failed' + ] + known_statuses = statuses.join(',') + + conditions = { + 'is' => '=', + 'not' => '!=' + } + known_conditions = conditions.keys.join(',') + usage = %{ EXAMPLE: # Prune records upto 1 month old: rake reports:prune upto=1 unit=mon + # Prune records upto 1 month old and with status 'unchanged' + rake reports:prune upto=1 unit=mon condition=is status=unchanged + UNITS: Valid units of time are: #{known_units} + +STATUS & CONDITION: + Valid status values are: #{known_statuses} + Valid condition values are: #{known_conditions} }.strip unless ENV['upto'] || ENV['unit'] @@ -32,15 +53,35 @@ UNITS: if ENV['upto'] =~ /^\d+$/ upto = ENV['upto'].to_i else - errors << "You must specify how far up you want to prune as an integer, e.g.: upto={some integer}" \ + errors << "You must specify how far up you want to prune as an integer, e.g.: upto={some integer}" end if unit = ENV['unit'] unless units.has_key?(unit) - errors << "I don't know that unit. Valid units are: #{known_units}" \ + errors << "I don't know that unit. Valid units are: #{known_units}" end else - errors << "You must specify the unit of time, .e.g.: unit={#{known_units}}" \ + errors << "You must specify the unit of time, .e.g.: unit={#{known_units}}" + end + + if ( ENV['status'] && ! ENV['condition'] ) || ( ENV['condition'] && ! ENV['status'] ) + errors << "You must specify status AND condition!" + end + + if status = ENV['status'] + unless statuses.include?(status) + errors << "I don't know that status. Valid statuses are: #{known_statuses}" + end + else + status = "" + end + + if condition = ENV['condition'] + unless conditions.has_key?(condition) + errors << "I don't know that condition. Valid conditions are: #{known_conditions}" + end + else + condition = "not" end if errors.present? @@ -50,15 +91,17 @@ UNITS: exit 1 end + condition = conditions[condition] + # Thin query for nodes with reports that may be pruned. # By selecting the 'id' column only, it does not eager load all of the # nodes and definitely not all of the reports, making this much faster. cutoff = Time.now.gmtime - (upto * units[unit].to_i) affected_nodes = Node.select('DISTINCT nodes.id') \ .joins('LEFT OUTER JOIN reports ON reports.node_id = nodes.id') \ - .where(['reports.time < ?', cutoff]) + .where("reports.time < ? AND reports.status #{condition} ?", cutoff, status) deletion_count = affected_nodes.count - puts "#{Time.now.to_s(:db)}: Deleting reports before #{cutoff} for #{deletion_count} nodes" + puts "#{Time.now.to_s(:db)}: Deleting reports before #{cutoff} and with report status #{condition} \"#{status}\" for #{deletion_count} nodes" pbar = ProgressBar.new('Deleting', deletion_count, STDOUT) deleted_count = 0 @@ -66,7 +109,7 @@ UNITS: begin affected_nodes.each do |node| node.reload # Become a complete object (the query above returns shalow objects with 'id' only) - deleted_count += node.prune_reports(cutoff) + deleted_count += node.prune_reports(cutoff, condition, status) pbar.inc end rescue SignalException