Open
Description
Heya, for hot-fixing a cluster I had to write a _conflicts
and _deleted _conflicts
finder that I could paste into remsh. I thought this might be a good idea to turn into a couch scanner plugin, but I don’t have time to do this right now. If someone would like to take this on, you’re all very welcome.
This is “make it work for me” levels of good code, but I ran this across ~2TB worth of shard files without issue. The “progress bar” can probably taken out and the doc/s reporting needs to go into stats, but that’s all.
Here’s my code:
rr(couch_db).
rr(couch_changes).
MinConflicts = 5.
ioq:set_io_priority({compaction, self()}).
Sorter = fun({_, A}, {_, B}) -> A > B end.
MinConflictsFilter = fun ({_, Conflicts}) when Conflicts < MinConflicts -> false;
(_) -> true
end.
MaybeAppend = fun (_Id, 0, Acc) -> Acc;
(Id, List, Acc) -> lists:append([{Id, List}], Acc)
end.
GetDocCount = fun(Db) ->
{ok, DbInfo} = couch_db:get_db_info(Db),
DocCount = proplists:get_value(doc_count, DbInfo, 0),
DelDocCount = proplists:get_value(del_doc_count, DbInfo, 0),
DocCount + DelDocCount
end.
MaybePrintStats = fun(_, 0, _, LastPrinted) -> LastPrinted;
(_, _, 0, LastPrinted) -> LastPrinted;
(Begin, DocCount, DocsProcessed, LastPrinted) ->
Perc = 100 / DocCount * DocsProcessed,
DoJump = (Perc - LastPrinted) > 10,
case DoJump of
true ->
End = os:timestamp(),
Duration = timer:now_diff(End, Begin) / 1000 / 1000,
DocsPerSecond = DocsProcessed / Duration,
io:format("~p% (~.2f docs/s) ", [trunc(Perc), DocsPerSecond]),
Perc;
_ -> LastPrinted
end
end.
Scanner = fun(DbName) ->
OpenOpts = [{user_ctx, #user_ctx{name = <<"admin">>, roles = [<<"_admin">>]}}],
{ok, Db} = couch_db:open(DbName, OpenOpts),
Begin = os:timestamp(),
io:format("~n ~p: ", [DbName]),
DocCount = GetDocCount(Db),
UserFun = fun(FullDocInfo, Acc) ->
DocInfo = couch_doc:to_doc_info(FullDocInfo),
% RevTree = FullDocInfo#full_doc_info.rev_tree,
% Leafs = couch_key_tree:get_all_leafs(RevTree),
% io:format("~n FullDocInfo: ~p~n", [FullDocInfo]),
% io:format("~n DocInfo: ~p~n", [DocInfo]),
% io:format("~n Leafs: ~p~n", [Leafs]),
[_ | Revs ] = DocInfo#doc_info.revs, % first one is winning rev?
{DeletedConflicts, Conflicts} = lists:partition(fun(RevInfo) ->
% io:format("~n RevInfo: ~p~n", [RevInfo]),
% io:format("~n RevInfo#rev_info.deleted: ~p~n", [RevInfo#rev_info.deleted]),
% {Leaf0} = Leaf,
% io:format("~n Leaf0: ~p~n", [Leaf0]),
RevInfo#rev_info.deleted
end, Revs),
{AccDeletedConflicts, AccConflicts, DocsProcessed, LastPrinted} = Acc#changes_acc.user_acc,
NewLastPrinted = MaybePrintStats(Begin, DocCount, DocsProcessed, LastPrinted),
{ok, Acc#changes_acc{
user_acc = {
MaybeAppend(DocInfo#doc_info.id, length(DeletedConflicts), AccDeletedConflicts),
MaybeAppend(DocInfo#doc_info.id, length(Conflicts), AccConflicts),
DocsProcessed + 1,
NewLastPrinted
}
}}
end,
StartSeq = 0,
UserAcc = #changes_acc{user_acc = {[], [], 0, 0}},
Opts = [{include_docs, true},{deleted, true}],
{ok, ChangesAcc} = couch_db:fold_changes(Db, StartSeq, UserFun, UserAcc, Opts),
couch_db:close(Db),
{DeletedConflicts, Conflicts, DocsProcessed, _} = ChangesAcc#changes_acc.user_acc,
io:format("Total Docs Processed: ~p ", [DocsProcessed]),
ConflictsSorted = lists:filter(MinConflictsFilter, lists:sort(Sorter, Conflicts)),
DeletedConflictsSorted = lists:filter(MinConflictsFilter, lists:sort(Sorter, DeletedConflicts)),
{ConflictsSorted, DeletedConflictsSorted}
end.
Enumerator = fun () ->
{ok, AllShards} = couch_server:all_databases(),
lists:foreach(fun(Shard) ->
{Conflicts, DeletedConflicts} = Scanner(Shard),
io:format("~n Conflicts: ~n ~p ~n DeletedConflicts: ~n ~p", [Conflicts, DeletedConflicts])
end, AllShards)
end.
Enumerator().