Skip to content

Backport storage-alerts #544

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions test/luatest_helpers/asserts.lua
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,19 @@ function asserts:wait_fullmesh(servers, wait_time)
end)
end

function asserts:assert_server_no_alerts(server)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This entire file seems dead in vshard. I propose to

  • Drop test/luatest_helpers/asserts.lua.
  • Your new assertion better be added to test/luatest_helpers/server.lua. As a server's method. You even take the server as an argument here. So it makes sense to make it a server's method.

server:exec(function()
ilt.assert_equals(ivshard.storage.info().alerts, {})
end)
end

function asserts:info_assert_alert(alerts, alert_name)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are not using anything in this asserts object. So it doesn't need to be a method of it. I suggest to either just duplicate this function in the needed tests, or put it into vtest. This is where we store all our vshard testing helpers.

for _, alert in pairs(alerts) do
if alert[1] == alert_name then
return alert
end
end
t.fail(('There is no %s in alerts').format(alert_name))
end

return asserts
13 changes: 3 additions & 10 deletions test/router-luatest/router_2_2_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ local vtest = require('test.luatest_helpers.vtest')
local vutil = require('vshard.util')
local vconsts = require('vshard.consts')
local vserver = require('test.luatest_helpers.server')
local asserts = require('test.luatest_helpers.asserts')

local g = t.group('router')
local cfg_template = {
Expand Down Expand Up @@ -1044,14 +1045,6 @@ g.test_failed_calls_affect_priority = function()
vtest.router_cfg(g.router, global_cfg)
end

local function info_find_alert(alerts, alert_name)
for _, v in pairs(alerts) do
if v[1] == alert_name then
return v
end
end
end

--
-- gh-474: error during alert construction
--
Expand All @@ -1069,7 +1062,7 @@ g.test_info_with_named_identification = function()
ilt.assert(ok, 'no error')
return result.alerts
end)
t.assert(info_find_alert(alerts, 'MISSING_MASTER'),
t.assert(asserts:info_assert_alert(alerts, 'MISSING_MASTER'),
'MISSING_MASTER alert is constructed')

--
Expand All @@ -1085,7 +1078,7 @@ g.test_info_with_named_identification = function()
ilt.assert(ok, 'no error')
return result.alerts
end)
local alert = info_find_alert(alerts, 'UNREACHABLE_MASTER')
local alert = asserts:info_assert_alert(alerts, 'UNREACHABLE_MASTER')
t.assert(alert, 'UNREACHABLE_MASTER alert is constructed')
t.assert_not_str_contains(alert[2], 'replicaset nil',
'alert contains valid replicaset id')
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
local t = require('luatest')
local vtest = require('test.luatest_helpers.vtest')
local server = require('test.luatest_helpers.server')
local vutil = require('vshard.util')
local asserts = require('test.luatest_helpers.asserts')

local test_group = t.group('storage')

Expand Down Expand Up @@ -103,3 +105,63 @@ test_group.test_switch_to_uuid = function(g)
_G.bucket_gc_wait()
end, {bid, g.replica_1_a:replicaset_name()})
end

local function persistent_names_remove(g)
return vtest.cluster_exec_each_master(g, function()
local name = box.info.name
box.cfg{force_recovery = true}
box.space._cluster:update(box.info.id, {{'=', 3, box.NULL}})
box.cfg{force_recovery = false}
return name
end)
end

local function persistent_names_restore(g, names)
for vtest_name, persistent_name in pairs(names) do
g[vtest_name]:exec(function(name)
box.cfg{force_recovery = true}
box.space._cluster:update(box.info.id, {{'=', 3, name}})
box.cfg{force_recovery = false}
end, {persistent_name})
end
end

--
-- vshard throwed unrelevant UNREACHABLE_REPLICA warning, when names are
-- not set and `name_as_key` identification_mode is used.
--
test_group.test_no_unreachable_replica_alert = function(g)
local names = persistent_names_remove(g)
asserts:assert_server_no_alerts(g.replica_1_a)
asserts:assert_server_no_alerts(g.replica_2_a)
persistent_names_restore(g, names)
end

--
-- gh-493: vshard should not show alerts for replicas, which are not in the
-- vshard's config.
--
test_group.test_alerts_for_named_replica = function(g)
t.run_only_if(vutil.feature.persistent_names)

local named_replica = server:new({
alias = 'named_replica_with_name_identification',
box_cfg = {
replication = g.replica_1_a.net_box_uri,
instance_name = 'named_replica'
}
})

named_replica:start()
named_replica:wait_for_vclock_of(g.replica_1_a)
asserts:assert_server_no_alerts(g.replica_1_a)
local instance_id = named_replica:instance_id()

named_replica:stop()
asserts:assert_server_no_alerts(g.replica_1_a)

named_replica:drop()
g.replica_1_a:exec(function(id)
box.space._cluster:delete(id)
end, {instance_id})
end
47 changes: 47 additions & 0 deletions test/storage-luatest/persistent_names_2_test.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
local t = require('luatest')
local vtest = require('test.luatest_helpers.vtest')
local vutil = require('vshard.util')
local asserts = require('test.luatest_helpers.asserts')

local test_group = t.group('storage')

local cfg_template = {
sharding = {
repliacset_1 = {
replicas = {
replica_1_a = {
master = true
},
replica_1_b = {},
},
},
},
bucket_count = 20,
identification_mode = 'name_as_key'
}

local global_cfg

test_group.before_all(function(g)
t.run_only_if(vutil.feature.persistent_names)
global_cfg = vtest.config_new(cfg_template)

vtest.cluster_new(g, global_cfg)
vtest.cluster_bootstrap(g, global_cfg)
vtest.cluster_wait_vclock_all(g)
vtest.cluster_rebalancer_disable(g)
end)

test_group.after_all(function(g)
g.cluster:drop()
end)

test_group.test_named_replicaset_alerts_when_replica_disconnects = function(g)
g.replica_1_b:stop()
local alerts = g.replica_1_a:exec(function()
return ivshard.storage.info().alerts
end)
asserts:info_assert_alert(alerts, 'UNREACHABLE_REPLICA')
asserts:info_assert_alert(alerts, 'UNREACHABLE_REPLICASET')
g.replica_1_b:start()
end
47 changes: 47 additions & 0 deletions test/storage-luatest/storage_1_test.lua
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
local t = require('luatest')
local vtest = require('test.luatest_helpers.vtest')
local vutil = require('vshard.util')
local server = require('test.luatest_helpers.server')
local asserts = require('test.luatest_helpers.asserts')

local group_config = {{engine = 'memtx'}, {engine = 'vinyl'}}

Expand Down Expand Up @@ -207,6 +209,7 @@ test_group.test_named_hot_reload = function(g)
ilt.assert_equals(ok, true)
_G.vshard.storage = storage
end)
vtest.cluster_cfg(g, global_cfg)
end

--
Expand Down Expand Up @@ -587,3 +590,47 @@ test_group.test_moved_buckets_various_statuses = function(g)
_G.bucket_gc_continue()
end)
end

--
-- gh-493: vshard should not show alerts for replicas, which are not
-- in the vshard's config.
--
local function test_alerts_for_non_vshard_config_template(replicaset, replica)
replica:start()
replica:wait_for_vclock_of(replicaset.replica_1_a)
asserts:assert_server_no_alerts(replicaset.replica_1_a)
local id = replica:instance_id()

replica:stop()
asserts:assert_server_no_alerts(replicaset.replica_1_a)

replica:drop()
replicaset.replica_1_a:exec(function(id)
box.space._cluster:delete(id)
end, {id})
end

test_group.test_alerts_for_unnamed_replica = function(g)
local non_config_replica = server:new({
alias = 'non_config_replica',
box_cfg = {
replication = g.replica_1_a.net_box_uri,
}
})

test_alerts_for_non_vshard_config_template(g, non_config_replica)
end

test_group.test_alerts_for_named_replica = function(g)
t.run_only_if(vutil.feature.persistent_names)

local non_config_replica = server:new({
alias = 'non_config_replica',
box_cfg = {
replication = g.replica_1_a.net_box_uri,
instance_name = 'named_replica'
}
})

test_alerts_for_non_vshard_config_template(g, non_config_replica)
end
23 changes: 17 additions & 6 deletions vshard/storage/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -3959,8 +3959,10 @@ local function storage_info(opts)
local is_named = M.this_replica.id == M.this_replica.name
if this_master and this_master ~= M.this_replica then
for _, replica in pairs(box.info.replication) do
if (not is_named and replica.uuid ~= this_master.uuid)
or (is_named and replica.name ~= this_master.name) then
-- If at least one identificator matches, we think,
-- that this replica is our master.
if replica.uuid ~= this_master.uuid and
replica.name ~= this_master.name then
goto cont
end
state.replication.status = replica.upstream.status
Expand Down Expand Up @@ -3998,15 +4000,24 @@ local function storage_info(opts)
state.replication.status = 'master'
local replica_count = 0
local not_available_replicas = 0
local id = box.info.id
for _, replica in pairs(box.info.replication) do
if (not is_named and replica.uuid ~= M.this_replica.uuid)
or (is_named and replica.name ~= M.this_replica.name) then
-- Alerts for other replicas.
if id ~= replica.id and (
M.this_replicaset.replicas[replica.name] ~= nil or
M.this_replicaset.replicas[replica.uuid] ~= nil
) then
replica_count = replica_count + 1
if replica.downstream == nil or
replica.downstream.vclock == nil then
local instance_id = is_named and replica.name or
replica.uuid
-- May be box.NULL, when names have not been set yet.
if instance_id == nil then
instance_id = replica.id
end
table.insert(state.alerts, alert(code.UNREACHABLE_REPLICA,
is_named and replica.name
or replica.uuid))
instance_id))
state.status = math.max(state.status, consts.STATUS.YELLOW)
not_available_replicas = not_available_replicas + 1
end
Expand Down
Loading