diff --git a/bin/haproxyctl b/bin/haproxyctl index 8c02286..b95feba 100755 --- a/bin/haproxyctl +++ b/bin/haproxyctl @@ -53,12 +53,20 @@ begin start end when 'reload' - if pidof + if pidof reload(pidof) else puts 'haproxy not running. starting...' start end + when /reload_kill_zombies\s(\d+)/ + if pidof + seconds_to_wait = Regexp.last_match[1].to_i + reload_kill_zombies(pidof, seconds_to_wait) + else + puts 'haproxy not running. starting...' + start + end when 'status' if pidof puts "haproxy is running on pid(s) #{pidof.join(', ')}.\nthese ports are used and guys are connected:" @@ -101,56 +109,62 @@ begin # else # puts 'status err haproxy is not running!' # end - when 'show health' - status = unixsock('show stat') + when /(\d\s)?show health/ + process = Regexp.last_match[1].to_i + status = unixsock('show stat', process) status.each do |line| data = line.split(',') printf "%-30s %-30s %-7s %3s\n", data[0], data[1], data[17], data[18] end - when /show backend(s?)/ - status = unixsock('show stat').grep(/BACKEND/) + when /(\d\s)?show backends?/ + process = Regexp.last_match[1].to_i + status = unixsock('show stat', process).grep(/BACKEND/) status.each do |line| data = line.split(',') printf "%-30s %-30s %-7s %3s\n", data[0], data[1], data[17], data[18] end - when /disable all EXCEPT (.+)/ - servername = Regexp.last_match[ 1] - status = unixsock('show stat') + when /(\d\s)?disable all EXCEPT (.+)/ + process = Regexp.last_match[1].to_i + servername = Regexp.last_match[2] + status = unixsock('show stat', process) backend = status.grep(/#{servername}/) backend.each do |line| backend_group = line.split(',') status.each do |pool| data = pool.split(',') - if (data[0] == backend_group[0]) && ( data[1] !~ /#{servername}|BACKEND|FRONTEND/) && ( data[17] == 'UP') - unixsock("disable server #{data[0]}/#{data[1]}") + if (data[0] == backend_group[0]) && ( data[1] !~ /#{servername}|BACKEND|FRONTEND/) + unixsock("disable server #{data[0]}/#{data[1]}", process) end end end - when /disable all (.+)/ - servername = Regexp.last_match[ 1] - status = unixsock('show stat') + when /(\d\s)?disable all (.+)/ + process = Regexp.last_match[1].to_i + servername = Regexp.last_match[2] + status = unixsock('show stat', process) status.each do |line| data = line.split(',') - if ( data[1] == servername) && ( data[17] == 'UP') - unixsock("disable server #{data[0]}/#{servername}") + if ( data[1] == servername) + unixsock("disable server #{data[0]}/#{servername}", process) end end - when /enable all EXCEPT (.+)/ - servername = Regexp.last_match[ 1] - status = unixsock('show stat') + when /(\d\s)?enable all EXCEPT (.+)/ + process = Regexp.last_match[1].to_i + servername = Regexp.last_match[2] + status = unixsock('show stat', process) backend = status.grep(/#{servername}/) backend.each do |line| backend_group = line.split(',') status.each do |pool| data = pool.split(',') - if (data[0] == backend_group[0]) && ( data[1] !~ /#{servername}|BACKEND|FRONTEND/) && ( data[17] =~ /Down|MAINT/i) - unixsock("enable server #{data[0]}/#{data[1]}") + if (data[0] == backend_group[0]) && ( data[1] !~ /#{servername}|BACKEND|FRONTEND/) + unixsock("enable server #{data[0]}/#{data[1]}", process) end end end - when /show stat (.+)/ - fieldnames = Regexp.last_match[ 1] - status = unixsock('show stat') + when /(\d\s)?show stat (.+)/ + process = Regexp.last_match[1].to_i + fieldnames = Regexp.last_match[2] + status = unixsock('show stat', process) indices = fieldnames.split(' ').map do |name| status.first.split(',').index(name) || begin $stderr.puts("no such field: #{name}") @@ -163,19 +177,50 @@ begin filtered = indices.map { |index| row[index] } puts (row[0...2] + filtered).compact.join(',') end - when /enable all (.+)/ - servername = Regexp.last_match[ 1] - status = unixsock('show stat') + when /(\d\s)?enable all (.+)/ + process = Regexp.last_match[1].to_i + servername = Regexp.last_match[2] + status = unixsock('show stat', process) status.each do |line| data = line.split(',') - if ( data[1] == servername) && ( data[17] =~ /Down|MAINT/i) - unixsock("enable server #{data[0]}/#{servername}") + if ( data[1] == servername) + unixsock("enable server #{data[0]}/#{servername}", process) + end + end + when /(\d\s)?drain all (.+)/ + process = Regexp.last_match[1].to_i + servername = Regexp.last_match[2] + status = unixsock('show stat', process) + status.each do |line| + data = line.split(',') + if ( data[1] == servername) + unixsock("set server #{data[0]}/#{servername} state drain", process) + end + end + when /(\d\s)?drain all EXCEPT (.+)/ + process = Regexp.last_match[1].to_i + servername = Regexp.last_match[2] + status = unixsock('show stat', process) + backend = status.grep(/#{servername}/) + backend.each do |line| + backend_group = line.split(',') + status.each do |pool| + data = pool.split(',') + if (data[0] == backend_group[0]) && ( data[1] !~ /#{servername}|BACKEND|FRONTEND/) + unixsock("set server #{data[0]}/#{data[1]} state drain", process) + end end end when 'version' version + when /(\d\s)?(.*)/ + process = Regexp.last_match[1].to_i + command = Regexp.last_match[2] + puts unixsock(command, process) else - puts unixsock(argument) + # this case shouldn't be reached due to the + # above regex, but leaving it here just in case + puts unixsock(argument, nil) end rescue Errno::ENOENT => e STDERR.puts e diff --git a/lib/haproxyctl.rb b/lib/haproxyctl.rb index 07e88fe..116bdc5 100644 --- a/lib/haproxyctl.rb +++ b/lib/haproxyctl.rb @@ -20,6 +20,7 @@ def start def stop(pids) if pids + write_server_state_file puts "stopping haproxy on pids #{pids.join(', ')}..." pids.each { |pid| system("kill #{pid}") || system("kill -9 #{pid}") } puts '... stopped' @@ -29,34 +30,100 @@ def stop(pids) end def reload(pids) - if pids - puts "gracefully stopping connections on pids #{pids.join(', ')}..." - system("#{exec} -D -f #{config_path} -p #{pidfile} -sf $(cat #{pidfile})") - puts "checking if connections still alive on #{pids.join(', ')}..." - nowpids = check_running - while pids == nowpids - puts "still haven't killed old pids. - waiting 2s for existing connections to die... - (ctrl+c to stop this check)" + unless pids + puts 'haproxy is not running!' + return + end + write_server_state_file + pids_string = pids.join(' ') + puts "gracefully stopping connections on pids #{pids_string}..." + reload_succeeded = system("#{exec} -D -f #{config_path} -p #{pidfile} -sf $(cat #{pidfile})") + puts "checking if connections still alive on #{pids_string}..." + nowpids = check_running + if !reload_succeeded + puts "pids no longer exist, haproxy may no longer be running" unless pids == nowpids + puts "failed to reload haproxy, check errors above" + return + end + while pids == nowpids + puts "still haven't killed old pids. + waiting 2s for existing connections to die... + (ctrl+c to stop this check)" + sleep 2 + nowpids = check_running || 0 + end + puts "reloaded haproxy on pids #{nowpids.join(', ')}" + end + + def reload_kill_zombies(pids, seconds_to_wait) + unless pids + puts 'haproxy is not running!' + return + end + write_server_state_file + pids_string = pids.join(' ') + puts "gracefully stopping connections on pids #{pids_string}..." + reload_succeeded = system("#{exec} -D -f #{config_path} -p #{pidfile} -sf #{pids_string}") + puts "checking if connections still alive on #{pids_string}..." + nowpids = check_running + if !reload_succeeded + puts "pids no longer exist, haproxy may no longer be running" unless pids == nowpids + puts "failed to reload haproxy, check errors above" + return + end + while pids == nowpids + puts "still haven't killed old pids. + waiting 2s for existing connections to die... + (ctrl+c to stop this check)" + sleep 2 + nowpids = check_running || 0 + end + puts "reloaded haproxy on pids #{nowpids.join(', ')}" + puts "ensuring that old pids aren't zombies" + seconds_waited = 0 + termed = false + while any_running pids + if seconds_waited >= seconds_to_wait + puts "waited #{seconds_waited} for old pids to exit. + they did not die gracefully. + terminating #{pids_string}" + if termed + puts "SIGTERM didn't work, killing #{pids_string}" + system("kill -9 #{pids_string} 2> /dev/null") + else + system("kill #{pids_string} 2> /dev/null") + termed = true + end + else + puts "old pids still alive. + waiting 2s and checking again" sleep 2 - nowpids = check_running || 0 + seconds_waited = seconds_waited + 2 + end + end + end + + def write_server_state_file + return unless server_state_file + begin + File.open(server_state_file, 'w') do |f| + (1..nbproc).each do |i| + f.puts(unixsock('show servers state', i)) + end end - puts "reloaded haproxy on pids #{nowpids.join(', ')}" - else - puts 'haproxy is not running!' end end - def unixsock(command) + def unixsock(command, process) output = [] runs = 0 begin - ctl = UNIXSocket.open(socket) + ctl = UNIXSocket.open(socket(process)) if ctl ctl.write "#{command}\r\n" else - puts "cannot talk to #{socket}" + puts "cannot talk to #{socket(process)}" end rescue Errno::EPIPE ctl.close @@ -65,7 +132,7 @@ def unixsock(command) if runs < 4 retry else - puts "the unix socket at #{socket} closed before we could complete this request" + puts "the unix socket at #{socket(process)} closed before we could complete this request" exit end end @@ -88,34 +155,39 @@ def usage <<-USAGE usage: #{$PROGRAM_NAME} where can be: - start : start haproxy unless it is already running - stop : stop an existing haproxy - restart : immediately shutdown and restart - reload : gracefully terminate existing connections, reload #{config_path} - status : is haproxy running? on what ports per lsof? - configcheck : check #{config_path} - nagios : nagios-friendly status for running process and listener - cloudkick : cloudkick.com-friendly status and metric for connected users - show health : show status of all frontends and backend servers - show backends : show status of backend pools of servers - enable all : re-enable a server previously in maint mode on multiple backends - disable all : disable a server from every backend it exists - enable all EXCEPT : like 'enable all', but re-enables every backend except for - disable all EXCEPT : like 'disable all', but disables every backend except for - clear counters : clear max statistics counters (add 'all' for all counters) - help : this message - prompt : toggle interactive mode with prompt - quit : disconnect - show info : report information about the running process - show stat : report counters for each proxy and server - show errors : report last request and response errors for each proxy - show sess [id] : report the list of current sessions or dump this session - get weight : report a server's current weight - set weight : change a server's weight - set timeout : change a timeout setting - disable server : set a server in maintenance mode - enable server : re-enable a server that was previously in maintenance mode - version : version of this script + start : start haproxy unless it is already running + stop : stop an existing haproxy + restart : immediately shutdown and restart + reload : gracefully terminate existing connections, reload #{config_path} + status : is haproxy running? on what ports per lsof? + configcheck : check #{config_path} + nagios : nagios-friendly status for running process and listener + show health : show status of all frontends and backend servers + show backends : show status of backend pools of servers + enable all : re-enable a server previously in maint mode on multiple backends + disable all : disable a server from every backend it exists + drain all : drain a server from every backend it exists + enable all EXCEPT : like 'enable all', but re-enables every backend except for + disable all EXCEPT : like 'disable all', but disables every backend except for + drain all EXCEPT : like 'drain all', but drains every backend except for + clear counters : clear max statistics counters (add 'all' for all counters) + help : this message + prompt : toggle interactive mode with prompt + quit : disconnect + show info : report information about the running process + show stat : report counters for each proxy and server + show errors : report last request and response errors for each proxy + show sess [id] : report the list of current sessions or dump this session + get weight : report a server's current weight + set weight : change a server's weight + set timeout : change a timeout setting + disable server : set a server in maintenance mode + enable server : re-enable a server that was previously in maintenance mode + version : version of this script + + is an optional numerical argument that selects the process number to target + - only applicable when nbproc > 1 + - defaults to 1 USAGE end end diff --git a/lib/haproxyctl/environment.rb b/lib/haproxyctl/environment.rb index aee53fd..2e8f742 100644 --- a/lib/haproxyctl/environment.rb +++ b/lib/haproxyctl/environment.rb @@ -38,19 +38,47 @@ def exec def nbproc @nbproc ||= begin - config.match /nbproc \s*(\d*)\s*/ - Regexp.last_match[1].to_i || 1 + config.match /^\s*nbproc \s*(\d*)\s*/ + (Regexp.last_match && Regexp.last_match[1].to_i) || 1 + rescue + 1 end end - def socket - @socket ||= begin - # If the haproxy config is using nbproc > 1, we assume that all cores - # except for 1 do not need commands sent to their sockets (if they exist). - # This is a poor assumption, so TODO: improve CLI to accept argument for - # processes to target. + def load_server_state_from_file + @load_server_state_from_file ||= begin + config.match /^\s*load-server-state-from-file \s*(.*)/ + Regexp.last_match && Regexp.last_match[1] && Regexp.last_match[1].strip + rescue + nil + end + end + + def server_state_file + @server_state_file ||= begin + # we don't support writing local state files yet, only global + return nil unless load_server_state_from_file == 'global' + config.match /^\s*server-state-file \s*(.*)/ + state_file = Regexp.last_match && Regexp.last_match[1].strip + return nil if !state_file + return state_file if state_file.start_with?('/') + config.match /^\s*server-state-base \s*(.*)/ + state_base = (Regexp.last_match && Regexp.last_match[1].strip) || Dir.pwd + return File.join(state_base, state_file) + rescue + nil + end + end + + def socket(process = 1) + process = 1 if process == 0 + @sockets ||= [] + @sockets[process] ||= begin + # If the haproxy config is using nbproc > 1, we pick which socket to use based + # on the stats socket process assignment. We expect each stats socket to be + # assigned to a single process (we don't support ranges even though haproxy does). if nbproc > 1 - config.match /stats\s+socket \s*([^\s]*) \s*.*process \s*1[\d^]?/ + config.match /stats\s+socket \s*([^\s]*) \s*.*process \s*#{process}[\d^]?/ else config.match /stats\s+socket \s*([^\s]*)/ end @@ -84,5 +112,10 @@ def check_running end end alias_method :pidof, :check_running + + def any_running(pids) + return false if !pids || pids.empty? + pids.any? { |pid| pid =~ /^\d+$/ and `ps -p #{pid} -o cmd=` =~ /#{exec}/ } + end end end