Skip to content
This repository was archived by the owner on Mar 26, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions app/models/node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,9 @@ def find_and_assign_last_inspect_report
end
end

def prune_reports(cutoff)
def prune_reports(cutoff, condition, status)
transaction do
old_report_ids = self.reports.where('reports.time < ?', cutoff).pluck(:id)
old_report_ids = self.reports.where("reports.time < \"#{cutoff}\" and reports.status #{condition} \"#{status}\"").pluck(:id)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I missed this one, same change as in the other file:
.where("reports.time < ? AND reports.status #{condition} ?", cutoff, status)

deleted_count = Report.bulk_delete(old_report_ids)
self.find_and_assign_last_inspect_report
self.find_and_assign_last_apply_report
Expand Down
51 changes: 48 additions & 3 deletions lib/tasks/prune_reports.rake
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,34 @@ namespace :reports do
}
known_units = units.keys.join(',')

statuses = [
'changed',
'unchanged',
'pending',
'failed'
]
known_statuses = statuses.join(',')

conditions = {
'is' => '=',
'not' => '!='
}
known_conditions = conditions.keys.join(',')

usage = %{
EXAMPLE:
# Prune records upto 1 month old:
rake reports:prune upto=1 unit=mon

# Prune records upto 1 month old and with status 'unchanged'
rake reports:prune upto=1 unit=mon condition=is status=unchanged

UNITS:
Valid units of time are: #{known_units}

STATUS & CONDITION:
Valid status values are: #{known_statuses}
Valid condition values are: #{known_conditions}
}.strip

unless ENV['upto'] || ENV['unit']
Expand All @@ -43,30 +64,54 @@ UNITS:
errors << "You must specify the unit of time, .e.g.: unit={#{known_units}}" \
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This trailing \ appears to be a mistake, please remove in your PR to help clean up the existing code.

end

if ( ENV['status'] && ! ENV['condition'] ) || ( ENV['condition'] && ! ENV['status'] )
errors << "You must specify status AND condition!"\
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The trailing \ isn't needed. (I see it above in the time units line, that looks like a mistake.)

end

if status = ENV['status']
unless statuses.include?(status)
errors << "I don't know that status. Valid statuses are: #{known_statuses}"\
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

trailing \

end
else
status = ""
end

if condition = ENV['condition']
unless conditions.has_key?(condition)
errors << "I don't know that condition. Valid conditionss are: #{known_conditions}"\
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extra 's' on conditions.

end
else
condition = "!="
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make this not instead of !=

end

if errors.present?
$stderr.puts errors.map { |error| "ERROR: #{error}" }
$stderr.puts
$stderr.puts usage
exit 1
end

if condition != "!="
condition = conditions[condition]
end

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the if, since above you'll set the default to not which means you can unconditionally access the hash.

# Thin query for nodes with reports that may be pruned.
# By selecting the 'id' column only, it does not eager load all of the
# nodes and definitely not all of the reports, making this much faster.
cutoff = Time.now.gmtime - (upto * units[unit].to_i)
affected_nodes = Node.select('DISTINCT nodes.id') \
.joins('LEFT OUTER JOIN reports ON reports.node_id = nodes.id') \
.where(['reports.time < ?', cutoff])
.where("reports.time < \"#{cutoff}\" and reports.status #{condition} \"#{status}\"")
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even though it's interpolating on condition, I think it's still better to use the ? syntax here:

.where("reports.time < ? AND reports.status #{condition} ?", cutoff, status)

deletion_count = affected_nodes.count
puts "#{Time.now.to_s(:db)}: Deleting reports before #{cutoff} for #{deletion_count} nodes"
puts "#{Time.now.to_s(:db)}: Deleting reports before #{cutoff} and with report status #{condition} \"#{status}\" for #{deletion_count} nodes"

pbar = ProgressBar.new('Deleting', deletion_count, STDOUT)
deleted_count = 0

begin
affected_nodes.each do |node|
node.reload # Become a complete object (the query above returns shalow objects with 'id' only)
deleted_count += node.prune_reports(cutoff)
deleted_count += node.prune_reports(cutoff, condition, status)
pbar.inc
end
rescue SignalException
Expand Down