-
Notifications
You must be signed in to change notification settings - Fork 8
feat(linstor): simplify get_controller_uri using ss command
#121
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,9 +15,13 @@ | |
| # along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
| # | ||
|
|
||
| from sm_typing import Any, Dict, override | ||
| from sm_typing import ( | ||
| Any, | ||
| Dict, | ||
| List, | ||
| override, | ||
| ) | ||
|
|
||
| import errno | ||
| import json | ||
| import linstor | ||
| import os.path | ||
|
|
@@ -38,6 +42,8 @@ | |
| DATABASE_PATH = '/var/lib/linstor' | ||
| DATABASE_MKFS = 'mkfs.ext4' | ||
|
|
||
| LINSTOR_SATELLITE_PORT = 3366 | ||
|
|
||
| REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary") | ||
| REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$') | ||
|
|
||
|
|
@@ -128,73 +134,25 @@ def round_down(value, divisor): | |
|
|
||
| # ============================================================================== | ||
|
|
||
| def get_remote_host_ip(node_name): | ||
| (ret, stdout, stderr) = util.doexec([ | ||
| 'drbdsetup', 'show', DATABASE_VOLUME_NAME, '--json' | ||
| ]) | ||
| if ret != 0: | ||
| return | ||
|
|
||
| def _get_controller_addresses() -> List[str]: | ||
| try: | ||
| conf = json.loads(stdout) | ||
| if not conf: | ||
| return | ||
|
|
||
| for connection in conf[0]['connections']: | ||
| if connection['net']['_name'] == node_name: | ||
| value = connection['path']['_remote_host'] | ||
| res = REG_DRBDSETUP_IP.match(value) | ||
| if res: | ||
| return res.groups()[0] | ||
| break | ||
| except Exception: | ||
| pass | ||
|
|
||
|
|
||
| def _get_controller_uri(): | ||
| PLUGIN_CMD = 'hasControllerRunning' | ||
|
|
||
| # Try to find controller using drbdadm. | ||
| (ret, stdout, stderr) = util.doexec([ | ||
| 'drbdadm', 'status', DATABASE_VOLUME_NAME | ||
| ]) | ||
| if ret == 0: | ||
| # If we are here, the database device exists locally. | ||
|
|
||
| if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): | ||
| # Nice case, we have the controller running on this local host. | ||
| return 'linstor://localhost' | ||
|
|
||
| # Try to find the host using DRBD connections. | ||
| res = REG_DRBDADM_PRIMARY.search(stdout) | ||
| if res: | ||
| node_name = res.groups()[0] | ||
| ip = get_remote_host_ip(node_name) | ||
| if ip: | ||
| return 'linstor://' + ip | ||
|
|
||
| # Worst case: we use many hosts in the pool (>= 4), so we can't find the | ||
| # primary using drbdadm because we don't have all connections to the | ||
| # replicated volume. `drbdadm status xcp-persistent-database` returns | ||
| # 3 connections by default. | ||
| try: | ||
| session = util.timeout_call(10, util.get_localAPI_session) | ||
| (ret, stdout, stderr) = util.doexec([ | ||
| "/usr/sbin/ss", "-tnpH", "state", "established", f"( sport = :{LINSTOR_SATELLITE_PORT} )" | ||
| ]) | ||
| if ret == 0: | ||
| return [ | ||
| line.split()[3].rsplit(":", 1)[0] | ||
| for line in stdout.splitlines() | ||
| ] | ||
| util.SMlog(f"Unexpected code {ret}: {stderr}") | ||
| except Exception as e: | ||
| util.SMlog(f"Unable to get controller addresses: {e}") | ||
| return [] | ||
|
|
||
| for host_ref, host_record in session.xenapi.host.get_all_records().items(): | ||
| node_name = host_record['hostname'] | ||
| try: | ||
| if util.strtobool( | ||
| session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {}) | ||
| ): | ||
| return 'linstor://' + host_record['address'] | ||
| except Exception as e: | ||
| # Can throw and exception if a host is offline. So catch it. | ||
| util.SMlog('Unable to search controller on `{}`: {}'.format( | ||
| node_name, e | ||
| )) | ||
| except: | ||
| # Not found, maybe we are trying to create the SR... | ||
| pass | ||
| def _get_controller_uri() -> str: | ||
| # TODO: Check that an IP address from the current pool is returned. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might not be needed to check that the controller is an existing pool. It's unlikely but a future configuration could place the controller somewhere else.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My fear is that a host might be moved to another LINSTOR pool. I think there are some situations we just don't want to happen. |
||
| addresses = _get_controller_addresses() | ||
| return "linstor://" + addresses[0] if addresses else "" | ||
|
|
||
| def get_controller_uri(): | ||
| retries = 0 | ||
|
|
@@ -204,7 +162,7 @@ def get_controller_uri(): | |
| return uri | ||
|
|
||
| retries += 1 | ||
| if retries >= 10: | ||
| if retries >= 30: | ||
Millefeuille42 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| break | ||
| time.sleep(1) | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should check the return code and also that the output is indeed an IP address. It could very much be that the
sscommad is missing (even if very unlikely) or that it doesn't return anything. It could also be that the output changed or many other things.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure it's useful to check if it's an IP address (let's say the output later shows us a hostname; we can theoretically leave it as is, all we want is a destination string). However, I'll modify the script to support IPv6 just in case...
If the program isn't there, an exception is already raised by doexec. No need to verify that.
However, you make a good point about the return check; I didn't backport that fix correctly, as I'm using a different API in my new code. :)