Skip to content

CouchDB Scanner Plugin: conflict finder #5393

Open
@janl

Description

Heya, for hot-fixing a cluster I had to write a _conflicts and _deleted _conflicts finder that I could paste into remsh. I thought this might be a good idea to turn into a couch scanner plugin, but I don’t have time to do this right now. If someone would like to take this on, you’re all very welcome.

This is “make it work for me” levels of good code, but I ran this across ~2TB worth of shard files without issue. The “progress bar” can probably taken out and the doc/s reporting needs to go into stats, but that’s all.

Here’s my code:

rr(couch_db).
rr(couch_changes).

MinConflicts = 5.
ioq:set_io_priority({compaction, self()}).
 
Sorter = fun({_, A}, {_, B}) -> A > B end.
MinConflictsFilter = fun ({_, Conflicts}) when Conflicts < MinConflicts -> false;
				   (_) -> true
end.

MaybeAppend = fun (_Id, 0, Acc) -> Acc;
				(Id, List, Acc) -> lists:append([{Id, List}], Acc)
end.

GetDocCount = fun(Db) ->
	{ok, DbInfo} = couch_db:get_db_info(Db),
	DocCount = proplists:get_value(doc_count, DbInfo, 0),
	DelDocCount = proplists:get_value(del_doc_count, DbInfo, 0),
	DocCount + DelDocCount
end.

MaybePrintStats = fun(_, 0, _, LastPrinted) -> LastPrinted;
					 (_, _, 0, LastPrinted) -> LastPrinted;
					 (Begin, DocCount, DocsProcessed, LastPrinted) ->
					 Perc = 100 / DocCount * DocsProcessed,
					 DoJump = (Perc - LastPrinted) > 10,
					 case DoJump of
						true ->
						End = os:timestamp(),
						Duration = timer:now_diff(End, Begin) / 1000 / 1000,
						DocsPerSecond = DocsProcessed / Duration,
						io:format("~p% (~.2f docs/s) ", [trunc(Perc), DocsPerSecond]),
						Perc;
						_ -> LastPrinted
					 end
end.

Scanner = fun(DbName) -> 
	OpenOpts = [{user_ctx, #user_ctx{name = <<"admin">>, roles = [<<"_admin">>]}}],
	{ok, Db} = couch_db:open(DbName, OpenOpts),
	Begin = os:timestamp(),
	io:format("~n ~p: ", [DbName]),
	DocCount = GetDocCount(Db),
	UserFun = fun(FullDocInfo, Acc) ->
		DocInfo = couch_doc:to_doc_info(FullDocInfo),
		% RevTree = FullDocInfo#full_doc_info.rev_tree,
		% Leafs = couch_key_tree:get_all_leafs(RevTree),
		% io:format("~n FullDocInfo: ~p~n", [FullDocInfo]),
		% io:format("~n DocInfo: ~p~n", [DocInfo]),
		% io:format("~n Leafs: ~p~n", [Leafs]),
		[_ | Revs ] = DocInfo#doc_info.revs, % first one is winning rev?
		{DeletedConflicts, Conflicts} = lists:partition(fun(RevInfo) ->
			% io:format("~n RevInfo: ~p~n", [RevInfo]),
			% io:format("~n RevInfo#rev_info.deleted: ~p~n", [RevInfo#rev_info.deleted]),
			% {Leaf0} = Leaf,
			% io:format("~n Leaf0: ~p~n", [Leaf0]),
			RevInfo#rev_info.deleted
		end, Revs),
		{AccDeletedConflicts, AccConflicts, DocsProcessed, LastPrinted} = Acc#changes_acc.user_acc,
		NewLastPrinted = MaybePrintStats(Begin, DocCount, DocsProcessed, LastPrinted),
		{ok, Acc#changes_acc{
			user_acc = {
				MaybeAppend(DocInfo#doc_info.id, length(DeletedConflicts), AccDeletedConflicts),
				MaybeAppend(DocInfo#doc_info.id, length(Conflicts), AccConflicts),
				DocsProcessed + 1,
				NewLastPrinted
			}
		}}
	end,
	StartSeq = 0,
	UserAcc = #changes_acc{user_acc = {[], [], 0, 0}},
	Opts = [{include_docs, true},{deleted, true}],
	{ok, ChangesAcc} = couch_db:fold_changes(Db, StartSeq, UserFun, UserAcc, Opts),
	couch_db:close(Db),
	{DeletedConflicts, Conflicts, DocsProcessed, _} = ChangesAcc#changes_acc.user_acc,
	io:format("Total Docs Processed: ~p ", [DocsProcessed]),
	ConflictsSorted = lists:filter(MinConflictsFilter, lists:sort(Sorter, Conflicts)),
	DeletedConflictsSorted = lists:filter(MinConflictsFilter, lists:sort(Sorter, DeletedConflicts)),
	{ConflictsSorted, DeletedConflictsSorted}
end.

Enumerator = fun () ->
	{ok, AllShards} = couch_server:all_databases(),
	lists:foreach(fun(Shard) -> 
		{Conflicts, DeletedConflicts} = Scanner(Shard),
		io:format("~n      Conflicts: ~n     ~p ~n      DeletedConflicts: ~n     ~p", [Conflicts, DeletedConflicts])
	end, AllShards)
end.

Enumerator().

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions