diff --git a/chef/cookbooks/corosync/attributes/default.rb b/chef/cookbooks/corosync/attributes/default.rb index 0e749862..aa0defc7 100644 --- a/chef/cookbooks/corosync/attributes/default.rb +++ b/chef/cookbooks/corosync/attributes/default.rb @@ -20,6 +20,13 @@ default[:corosync][:mcast_port] = 5405 default[:corosync][:members] = [] default[:corosync][:transport] = "udp" +default[:corosync][:ring_mode] = "single_ring" + +default[:corosync][:second_ring_used] = false +default[:corosync][:second_ring_bind_addr] = "" +default[:corosync][:second_ring_mcast_addr] = "" +default[:corosync][:second_ring_mcast_port] = "" +default[:corosync][:second_ring_members] = [] case node[:platform_family] when "suse" diff --git a/chef/cookbooks/corosync/recipes/config.rb b/chef/cookbooks/corosync/recipes/config.rb index 8a41c31e..2134dbb3 100644 --- a/chef/cookbooks/corosync/recipes/config.rb +++ b/chef/cookbooks/corosync/recipes/config.rb @@ -38,6 +38,24 @@ raise "Members have to be defined when using \"udpu\" transport!" end +interfaces = [ + { + bind_addr: node[:corosync][:bind_addr], + mcast_addr: node[:corosync][:mcast_addr], + mcast_port: node[:corosync][:mcast_port] + } +] + +if node[:corosync][:second_ring_used] + interfaces += [ + { + bind_addr: node[:corosync][:second_ring_bind_addr], + mcast_addr: node[:corosync][:second_ring_mcast_addr], + mcast_port: node[:corosync][:second_ring_mcast_port] + } + ] +end + template "/etc/corosync/corosync.conf" do if node[:platform] == "suse" && node[:platform_version].to_f < 12.0 source "corosync.conf.erb" @@ -49,11 +67,12 @@ mode 0600 variables( cluster_name: node[:corosync][:cluster_name], - bind_addr: node[:corosync][:bind_addr], - mcast_addr: node[:corosync][:mcast_addr], - mcast_port: node[:corosync][:mcast_port], + interfaces: interfaces, members: node[:corosync][:members], - transport: node[:corosync][:transport] + transport: node[:corosync][:transport], + fqdn: node[:fqdn], + second_ring_used: node[:corosync][:second_ring_used], + second_ring_members: node[:corosync][:second_ring_members] ) # If the config parameters are changed, it's too risky to just diff --git a/chef/cookbooks/corosync/templates/default/corosync.conf.v2.erb b/chef/cookbooks/corosync/templates/default/corosync.conf.v2.erb index 544ae292..f352065e 100644 --- a/chef/cookbooks/corosync/templates/default/corosync.conf.v2.erb +++ b/chef/cookbooks/corosync/templates/default/corosync.conf.v2.erb @@ -1,6 +1,6 @@ # DO NOT EDIT! -# This file is automatically configured via Chef on <%= node["fqdn"] %> +# This file is automatically configured via Chef on <%= @fqdn %> # Please read the corosync.conf.5 manual page totem { @@ -44,15 +44,19 @@ totem { # interface: define at least one interface to communicate # over. If you define more than one interface stanza, you must # also set rrp_mode. +<% if @second_ring_used -%> + rrp_mode: passive +<% end -%> +<% @interfaces.each.with_index do |interface, ring_index| -%> interface { # Rings must be consecutively numbered, starting at 0. - ringnumber: 0 + ringnumber: <%= ring_index %> # This is normally the *network* address of the # interface to bind to. This ensures that you can use # identical instances of this configuration file # across all your cluster nodes, without having to # modify this option. - bindnetaddr: <%= @bind_addr %> + bindnetaddr: <%= interface[:bind_addr] %> # However, if you have multiple physical network # interfaces configured for the same subnet, then the # network address alone is not sufficient to identify @@ -67,19 +71,20 @@ totem { # addresses across multiple Corosync clusters sharing # the same network. <% if @transport == 'udp' -%> - mcastaddr: <%= @mcast_addr %> + mcastaddr: <%= interface[:mcast_addr] %> <% end -%> # Corosync uses the port you specify here for UDP # messaging, and also the immediately preceding # port. Thus if you set this to 5405, Corosync sends # messages over UDP ports 5405 and 5404. - mcastport: <%= @mcast_port %> + mcastport: <%= interface[:mcast_port] %> # Time-to-live for cluster communication packets. The # number of hops (routers) that this ring will allow # itself to pass. Note that multicast routing must be # specifically enabled on most network routers. ttl: 1 } +<% end -%> transport: <%= @transport %> } @@ -112,9 +117,12 @@ logging { <% if @transport == 'udpu' -%> nodelist { - <% @members.sort.each do |memberaddr| -%> + <% @members.sort.each.with_index do |memberaddr, member_idx| -%> node { ring0_addr: <%= memberaddr %> +<% if @second_ring_used -%> + ring1_addr: <%= @second_ring_members[member_idx] %> +<% end -%> } <% end -%> } diff --git a/chef/cookbooks/pacemaker/files/default/reload_corosync_configuration.sh b/chef/cookbooks/pacemaker/files/default/reload_corosync_configuration.sh new file mode 100644 index 00000000..c84f0ccd --- /dev/null +++ b/chef/cookbooks/pacemaker/files/default/reload_corosync_configuration.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Script to re-load corosync configuration on a crowbar managed pacemaker +# cluster. +# This script can be used when you switch from single corosync ring to +# dual corosync ring. +# After you applied the modified proposal successfully, run this script on +# crowbar +# Please stop all chef-client runs while running this script. +set -eux + +CLUSTER_PROPOSAL_NAME="$1" + +# Make sure that the proposal exists by asking for it's details +# if this command fails, the script exits +crowbarctl proposal show pacemaker "$CLUSTER_PROPOSAL_NAME" + +NODES=$(crowbarctl \ + proposal show pacemaker "$CLUSTER_PROPOSAL_NAME" \ + --format=plain \ + --filter "deployment.pacemaker.elements.pacemaker-cluster-member" | + cut -d " " -f 2) + +# Get the first node - it will be used to issue crm commands +for node in $NODES; do + FIRST_NODE="$node" + break +done + +# Print out initial configuration +ssh "$FIRST_NODE" corosync-cfgtool -s + +# Put the cluster in maintenance mode +ssh "$FIRST_NODE" crm --wait configure property maintenance-mode=true + +# Restart corosync on all nodes +for node in $NODES; do + ssh "$node" systemctl restart corosync +done + +# Give some time for corosync to stand up +sleep 30 + +# Exit from maintenance mode +ssh "$FIRST_NODE" crm --wait configure property maintenance-mode=false + +# Print out new configuration +ssh "$FIRST_NODE" corosync-cfgtool -s diff --git a/chef/data_bags/crowbar/migrate/pacemaker/033_add_double_ring.rb b/chef/data_bags/crowbar/migrate/pacemaker/033_add_double_ring.rb new file mode 100644 index 00000000..6b8d2efa --- /dev/null +++ b/chef/data_bags/crowbar/migrate/pacemaker/033_add_double_ring.rb @@ -0,0 +1,13 @@ +def upgrade ta, td, a, d + a["corosync"]["ring_mode"] = \ + ta["corosync"]["ring_mode"] + a["corosync"]["second_ring_network"] = \ + ta["corosync"]["second_ring_network"] + return a, d +end + +def downgrade ta, td, a, d + a["corosync"].delete("ring_mode") + a["corosync"].delete("second_ring_network") + return a, d +end diff --git a/chef/data_bags/crowbar/template-pacemaker.json b/chef/data_bags/crowbar/template-pacemaker.json index e289351b..c3395ca1 100644 --- a/chef/data_bags/crowbar/template-pacemaker.json +++ b/chef/data_bags/crowbar/template-pacemaker.json @@ -8,7 +8,9 @@ "mcast_addr": "239.255.0.1", "mcast_port": 5405, "password": "crowbar", - "require_clean_for_autostart_wrapper": "auto" + "require_clean_for_autostart_wrapper": "auto", + "ring_mode": "single_ring", + "second_ring_network": "" }, "crm": { "no_quorum_policy": "stop" @@ -54,7 +56,7 @@ "pacemaker": { "crowbar-revision": 0, "crowbar-applied": false, - "schema-revision": 32, + "schema-revision": 33, "element_states": { "pacemaker-cluster-member" : [ "readying", "ready", "applying" ], "hawk-server" : [ "readying", "ready", "applying" ], diff --git a/chef/data_bags/crowbar/template-pacemaker.schema b/chef/data_bags/crowbar/template-pacemaker.schema index c4d2abcf..73338094 100644 --- a/chef/data_bags/crowbar/template-pacemaker.schema +++ b/chef/data_bags/crowbar/template-pacemaker.schema @@ -20,7 +20,9 @@ "mcast_addr": { "type": "str", "required": true }, "mcast_port": { "type": "int", "required": true }, "password": { "type": "str", "required": true }, - "require_clean_for_autostart_wrapper": { "type": "str", "required": true, "pattern": "/^true$|^false$|^auto$/" } + "require_clean_for_autostart_wrapper": { "type": "str", "required": true, "pattern": "/^true$|^false$|^auto$/" }, + "ring_mode": { "type": "str", "required": true }, + "second_ring_network": { "type": "str", "required": true } } }, "crm": { diff --git a/crowbar_framework/app/helpers/barclamp/pacemaker_helper.rb b/crowbar_framework/app/helpers/barclamp/pacemaker_helper.rb index 2ba97bbd..18436865 100644 --- a/crowbar_framework/app/helpers/barclamp/pacemaker_helper.rb +++ b/crowbar_framework/app/helpers/barclamp/pacemaker_helper.rb @@ -31,6 +31,16 @@ def transport_for_pacemaker(selected) ) end + def ring_modes_for_corosync(selected) + options_for_select( + [ + [t(".corosync.ring_modes.single_ring"), "single_ring"], + [t(".corosync.ring_modes.dual_ring"), "dual_ring"] + ], + selected.to_s + ) + end + def no_quorum_policy_for_pacemaker(selected) # no translation for the strings as we simply show the values that will end # up in the config file diff --git a/crowbar_framework/app/models/pacemaker_service.rb b/crowbar_framework/app/models/pacemaker_service.rb index 5ac63011..4940e6e2 100644 --- a/crowbar_framework/app/models/pacemaker_service.rb +++ b/crowbar_framework/app/models/pacemaker_service.rb @@ -369,6 +369,28 @@ def apply_role_pre_chef_call(old_role, role, all_nodes) role.default_attributes["corosync"]["members"] = member_nodes.map{ |n| n.get_network_by_type("admin")["address"] } role.default_attributes["corosync"]["transport"] = role.default_attributes["pacemaker"]["corosync"]["transport"] + # Set up the second ring if dual_ring mode configured + ring_mode = role.default_attributes["pacemaker"]["corosync"]["ring_mode"] + + if ring_mode == "dual_ring" + second_ring_network = role.default_attributes["pacemaker"]["corosync"]["second_ring_network"] + second_ring_net = Chef::DataBag.load("crowbar/#{second_ring_network}_network") + + net_svc = NetworkService.new @logger + second_ring_members = member_nodes.map do |member_node| + allocated_ip_response = net_svc.allocate_ip "default", second_ring_network, "host", member_node.name + allocated_ip_response[1]["address"] + end + + role.default_attributes["corosync"]["second_ring_used"] = true + role.default_attributes["corosync"]["second_ring_bind_addr"] = second_ring_net["network"]["subnet"] + role.default_attributes["corosync"]["second_ring_mcast_addr"] = role.default_attributes["pacemaker"]["corosync"]["mcast_addr"] + role.default_attributes["corosync"]["second_ring_mcast_port"] = role.default_attributes["pacemaker"]["corosync"]["mcast_port"] + role.default_attributes["corosync"]["second_ring_members"] = second_ring_members + else + role.default_attributes["corosync"]["second_ring_used"] = false + end + role.default_attributes["drbd"] ||= {} role.default_attributes["drbd"]["common"] ||= {} role.default_attributes["drbd"]["common"]["net"] ||= {} @@ -626,6 +648,35 @@ def validate_proposal_after_save proposal ) end + ring_mode = proposal["attributes"][@bc_name]["corosync"]["ring_mode"] + unless %w(single_ring dual_ring).include?(ring_mode) + validation_error I18n.t( + "barclamp.#{bc_name}.validation.transport_value", + ring_mode: ring_mode + ) + end + + if ring_mode == "dual_ring" + unless transport == "udpu" + validation_error I18n.t( + "barclamp.#{bc_name}.validation.second_ring_only_udpu" + ) + end + second_ring_network = proposal["attributes"][@bc_name]["corosync"]["second_ring_network"] + if second_ring_network == "admin" + validation_error I18n.t( + "barclamp.#{bc_name}.validation.second_ring_network_must_differ_from_admin" + ) + end + second_ring_net = Chef::DataBag.load("crowbar/#{second_ring_network}_network") rescue nil + unless second_ring_net + validation_error I18n.t( + "barclamp.#{bc_name}.validation.second_ring_network_value", + second_ring_network: second_ring_network + ) + end + end + no_quorum_policy = proposal["attributes"][@bc_name]["crm"]["no_quorum_policy"] unless %w(ignore freeze stop suicide).include?(no_quorum_policy) validation_error I18n.t( diff --git a/crowbar_framework/app/views/barclamp/pacemaker/_edit_attributes.html.haml b/crowbar_framework/app/views/barclamp/pacemaker/_edit_attributes.html.haml index 174960bb..2b2b676d 100644 --- a/crowbar_framework/app/views/barclamp/pacemaker/_edit_attributes.html.haml +++ b/crowbar_framework/app/views/barclamp/pacemaker/_edit_attributes.html.haml @@ -5,6 +5,11 @@ .panel-body = select_field %w(corosync transport), :collection => :transport_for_pacemaker + = select_field %w(corosync ring_mode), :collection => :ring_modes_for_corosync, "data-showit" => "dual_ring", "data-showit-target" => "#dual_ring_container", "data-showit-direct" => "true" + + %div{ :id => "dual_ring_container" } + = string_field %w(corosync second_ring_network) + = select_field %w(crm no_quorum_policy), :collection => :no_quorum_policy_for_pacemaker %span.help-block = t('.crm.no_quorum_policy_hint_html') diff --git a/crowbar_framework/config/locales/pacemaker/en.yml b/crowbar_framework/config/locales/pacemaker/en.yml index b08b5ce1..74ed42b1 100644 --- a/crowbar_framework/config/locales/pacemaker/en.yml +++ b/crowbar_framework/config/locales/pacemaker/en.yml @@ -60,6 +60,11 @@ en: down or rebooted; you will have to manually start it after having fixed the issue. "Automatic" means that this setting will be set to "true" for two-nodes cluster, and to "false" otherwise.' + ring_mode: 'Ring configuration' + ring_modes: + single_ring: 'Single ring (only admin network)' + dual_ring: 'Dual ring' + second_ring_network: 'Network to use as second ring' crm: no_quorum_policy: 'Policy when cluster does not have quorum' no_quorum_policy_hint_html: 'Refer to the pacemaker documentation for a description of each value.' @@ -123,6 +128,10 @@ en: drbd: 'Setting up DRBD requires a cluster of two nodes.' hae_repo: 'The HAE repositories have not been setup.' transport_value: 'Invalid transport value: %{transport}.' + ring_mode_value: 'Invalid ring mode: %{ring_mode}.' + second_ring_network_value: 'Network not found for setting up second corosync ring: %{second_ring_network}.' + second_ring_only_udpu: 'Dual corosync ring is only supported with Unicast (UDPU).' + second_ring_network_must_differ_from_admin: 'The second network must not be the admin network' quorum_policy: 'Invalid no-quorum-policy value: %{no_quorum_policy}.' platform: 'All nodes in proposal must have the same platform.' pacemaker_proposal: 'Nodes cannot be part of multiple Pacemaker proposals, but %{other_member} is already part of proposal \"%{p_name}\".'