From 0dcea9ecd081eb31cb4d4f03816fa969c1ffaa2f Mon Sep 17 00:00:00 2001 From: Dave Parfitt Date: Mon, 22 Jul 2013 11:17:56 -0400 Subject: [PATCH] cleanup rt_dirty behavior --- src/riak_repl2_fscoordinator.erl | 25 +++++++++++++++++++++++-- src/riak_repl_stats.erl | 6 ++++-- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/riak_repl2_fscoordinator.erl b/src/riak_repl2_fscoordinator.erl index 3ec95977..6128cb89 100644 --- a/src/riak_repl2_fscoordinator.erl +++ b/src/riak_repl2_fscoordinator.erl @@ -293,6 +293,7 @@ handle_cast(start_fullsync, State) -> lager:info("Starting fullsync (source) with max_fssource_node=~p and max_fssource_cluster=~p", [MaxSource, MaxCluster]), {ok, Ring} = riak_core_ring_manager:get_my_ring(), + check_nodes_for_rt_dirty(Ring), N = largest_n(Ring), Partitions = sort_partitions(Ring), State2 = State#state{ @@ -742,8 +743,17 @@ notify_rt_dirty_nodes(State = #state{dirty_nodes = DirtyNodes, NodesToNotify = lists:subtract(AllNodesList, ordsets:to_list(DirtyNodesDuringFS)), lager:debug("Notifying nodes ~p", [ NodesToNotify]), - rpc:multicall(NodesToNotify, riak_repl_stats, clear_rt_dirty, []), - State#state{dirty_nodes=ordsets:new()}; + {_, BadNodes} = rpc:multicall(NodesToNotify, + riak_repl_stats, + clear_rt_dirty, []), + case BadNodes of + [] -> + %% all nodes nodified, clear rt_dirty state + State#state{dirty_nodes=ordsets:new()}; + Nodes -> + lager:warning("Failed to clear rt_dirty on ~p", [Nodes]), + State + end; false -> lager:debug("No dirty nodes before fullsync started"), State @@ -762,3 +772,14 @@ notify_rt_dirty_nodes(State = #state{dirty_nodes = DirtyNodes, nodeset_to_string_list(Set) -> string:join([erlang:atom_to_list(V) || V <- ordsets:to_list(Set)],","). + + +%% check all nodes in the cluster for existing rt_dirty files +%% and reset their rt_dirty flag if it exists +check_nodes_for_rt_dirty(Ring) -> + Owners = riak_core_ring:all_owners(Ring), + [ case rpc:call(Node, riak_repl_stats, is_rt_dirty,[]) of + false -> ok; + _ -> riak_repl2_fscoordinator:node_dirty(Node) + end || Node <- Owners ]. + diff --git a/src/riak_repl_stats.erl b/src/riak_repl_stats.erl index 7b70b614..f35755bc 100644 --- a/src/riak_repl_stats.erl +++ b/src/riak_repl_stats.erl @@ -137,8 +137,10 @@ rt_dirty() -> riak_repl2_fscoordinator:node_dirty(node()) catch _:_ -> - lager:debug("Failed to notify coordinator of rt_dirty status") - end + %% This could be triggered on startup if the + %% fscoordinator isn't running + lager:warning("Failed to notify coordinator of rt_dirty status.") + end end), ok; false -> ok