From 54b17038368370f307c3dc44139671326c2e7cdf Mon Sep 17 00:00:00 2001 From: Mark Syms Date: Mon, 9 Dec 2024 14:12:15 +0000 Subject: [PATCH 01/72] CA-403593: don't log the session ref Signed-off-by: Mark Syms --- drivers/SRCommand.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/SRCommand.py b/drivers/SRCommand.py index 6afbdfbde..72694c091 100755 --- a/drivers/SRCommand.py +++ b/drivers/SRCommand.py @@ -182,12 +182,16 @@ def _run(self, sr, target): dconf_type = sr.dconf.get("type") if not dconf_type or not NO_LOGGING.get(dconf_type) or \ self.cmd not in NO_LOGGING[dconf_type]: - if 'device_config' in self.params: - util.SMlog("%s %s" % ( - self.cmd, util.hidePasswdInParams( - self.params, 'device_config'))) - else: - util.SMlog("%s %s" % (self.cmd, repr(self.params))) + params_to_log = self.params + + if 'device_config' in params_to_log: + params_to_log = util.hidePasswdInParams( + self.params, 'device_config') + + if 'session_ref' in params_to_log: + params_to_log['session_ref'] = '******' + + util.SMlog("%s %s" % (self.cmd, repr(params_to_log))) caching_params = dict((k, self.params.get(k)) for k in [blktap2.VDI.CONF_KEY_ALLOW_CACHING, From 8a1490736bbb2b81e898f85d9d3d62e02523b97d Mon Sep 17 00:00:00 2001 From: Lunfan Zhang Date: Sun, 26 Jan 2025 01:49:36 -0500 Subject: [PATCH 02/72] CA-405381 Mpathcount Info Does Not Automatically Refresh in XenCenter After Disabling and Enabling Multipath Signed-off-by: Lunfan Zhang --- drivers/mpathcount.py | 36 ++++++++++++++++++++++++++++-------- tests/test_mpathcount.py | 32 +++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/drivers/mpathcount.py b/drivers/mpathcount.py index 3137e9099..f89e37a90 100755 --- a/drivers/mpathcount.py +++ b/drivers/mpathcount.py @@ -22,6 +22,7 @@ import xs_errors import mpath_cli import json +import subprocess supported = ['iscsi', 'lvmoiscsi', 'rawhba', 'lvmohba', 'ocfsohba', 'ocfsoiscsi', 'netapp', 'lvmofcoe', 'gfs2'] @@ -35,6 +36,7 @@ match_bySCSIid = False mpath_enabled = True SCSIid = 'NOTSUPPLIED' +XAPI_HEALTH_CHECK = '/opt/xensource/libexec/xapi-health-check' cached_DM_maj = None @@ -199,13 +201,31 @@ def check_devconfig(devconfig, sm_config, config, remove, add, mpath_status=None else: update_config(key, i, config[key], remove, add, mpath_status) - -def check_xapi_is_enabled(session, hostref): - host = session.xenapi.host.get_record(hostref) - if not host['enabled']: - util.SMlog("Xapi is not enabled, exiting") - mpc_exit(session, 0) - +def check_xapi_is_enabled(): + """Check XAPI health status""" + def _run_command(command, timeout): + try: + process = subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True + ) + try: + stdout, stderr = process.communicate(timeout=timeout) + return process.returncode, stdout, stderr + except subprocess.TimeoutExpired: + process.kill() + util.SMlog(f"Command execution timeout after {timeout}s: {' '.join(command)}") + return -1, "", "Timeout" + except Exception as e: + util.SMlog(f"Error executing command: {e}") + return -1, "", str(e) + + returncode, _, stderr = _run_command([XAPI_HEALTH_CHECK], timeout=120) + if returncode != 0: + util.SMlog(f"XAPI health check failed: {stderr}") + return returncode == 0 if __name__ == '__main__': try: @@ -215,7 +235,7 @@ def check_xapi_is_enabled(session, hostref): sys.exit(-1) localhost = session.xenapi.host.get_by_uuid(get_localhost_uuid()) - check_xapi_is_enabled(session, localhost) + check_xapi_is_enabled() # Check whether multipathing is enabled (either for root dev or SRs) try: if get_root_dev_major() != get_dm_major(): diff --git a/tests/test_mpathcount.py b/tests/test_mpathcount.py index c02478057..922e1f204 100644 --- a/tests/test_mpathcount.py +++ b/tests/test_mpathcount.py @@ -211,27 +211,37 @@ def test_exit_log_out_error(self, mock_exit): session.xenapi.session.logout.assert_called_once() @mock.patch('mpathcount.sys.exit', autospec=True) - def test_check_xapi_enabled_yes(self, mock_exit): + @mock.patch('mpathcount.util.SMlog', autospec=True) + @mock.patch('mpathcount.subprocess.Popen', autospec=True) + def test_check_xapi_enabled_yes(self, mock_popen, mock_smlog, mock_exit): # Arrange - session = mock.MagicMock() - session.xenapi.host.get_record.return_value = {'enabled': True} - hostref = mock.MagicMock() + process_mock = mock.Mock() + attrs = {'communicate.return_value': ('output', ''), 'returncode': 0} + process_mock.configure_mock(**attrs) + mock_popen.return_value = process_mock # Act - mpathcount.check_xapi_is_enabled(session, hostref) + result = mpathcount.check_xapi_is_enabled() # Assert + self.assertTrue(result) mock_exit.assert_not_called() + mock_smlog.assert_not_called() @mock.patch('mpathcount.sys.exit', autospec=True) - def test_check_xapi_enabled_no(self, mock_exit): + @mock.patch('mpathcount.util.SMlog', autospec=True) + @mock.patch('mpathcount.subprocess.Popen', autospec=True) + def test_check_xapi_enabled_no(self, mock_popen, mock_smlog, mock_exit): # Arrange - session = mock.MagicMock() - session.xenapi.host.get_record.return_value = {'enabled': False} - hostref = mock.MagicMock() + process_mock = mock.Mock() + attrs = {'communicate.return_value': ('', 'error'), 'returncode': 1} + process_mock.configure_mock(**attrs) + mock_popen.return_value = process_mock # Act - mpathcount.check_xapi_is_enabled(session, hostref) + result = mpathcount.check_xapi_is_enabled() # Assert - mock_exit.assert_called_once_with(0) + self.assertFalse(result) + mock_exit.assert_not_called() + mock_smlog.assert_called_once_with('XAPI health check failed: error') From 511144d246d9e50f867d24b297279cca407f4ad8 Mon Sep 17 00:00:00 2001 From: Samuel Verschelde Date: Thu, 13 Aug 2020 15:22:17 +0200 Subject: [PATCH 03/72] Update xs-sm.service's description for XCP-ng This was a patch added to the sm RPM git repo before we had this forked git repo for sm in the xcp-ng github organisation. --- systemd/xs-sm.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/systemd/xs-sm.service b/systemd/xs-sm.service index 99cb313f3..609c6ef5d 100644 --- a/systemd/xs-sm.service +++ b/systemd/xs-sm.service @@ -1,5 +1,5 @@ [Unit] -Description=XenServer Storage Manager (SM) +Description=XCP-ng Storage Manager (SM) Before=xapi.service Conflicts=shutdown.target RefuseManualStop=yes From 45816fb084f4027104b90e7e478eb44dae8e7764 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Mon, 20 Jul 2020 16:26:42 +0200 Subject: [PATCH 04/72] feat(drivers): add CephFS and GlusterFS drivers --- Makefile | 2 + drivers/CephFSSR.py | 296 +++++++++++++++++++++++++++++++++++++++++ drivers/GlusterFSSR.py | 287 +++++++++++++++++++++++++++++++++++++++ drivers/cleanup.py | 4 +- 4 files changed, 588 insertions(+), 1 deletion(-) create mode 100644 drivers/CephFSSR.py create mode 100644 drivers/GlusterFSSR.py diff --git a/Makefile b/Makefile index e6ff5a7a0..d4f540f48 100755 --- a/Makefile +++ b/Makefile @@ -14,6 +14,8 @@ SM_DRIVERS += LVHDoHBA SM_DRIVERS += SHM SM_DRIVERS += SMB SM_DRIVERS += LVHDoFCoE +SM_DRIVERS += CephFS +SM_DRIVERS += GlusterFS SM_LIBS := SR SM_LIBS += SRCommand diff --git a/drivers/CephFSSR.py b/drivers/CephFSSR.py new file mode 100644 index 000000000..415152f7e --- /dev/null +++ b/drivers/CephFSSR.py @@ -0,0 +1,296 @@ +#!/usr/bin/env python +# +# Original work copyright (C) Citrix systems +# Modified work copyright (C) Vates SAS and XCP-ng community +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; version 2.1 only. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# CEPHFSSR: Based on FileSR, mounts ceph fs share + +import errno +import os +import syslog as _syslog +import xmlrpclib +from syslog import syslog + +# careful with the import order here +# FileSR has a circular dependency: +# FileSR -> blktap2 -> lvutil -> EXTSR -> FileSR +# importing in this order seems to avoid triggering the issue. +import SR +import SRCommand +import FileSR +# end of careful +import cleanup +import util +import vhdutil +import xs_errors +from lock import Lock + +CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "SR_CACHING", + "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", + "VDI_UPDATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", + "VDI_GENERATE_CONFIG", + "VDI_RESET_ON_BOOT/2", "ATOMIC_PAUSE"] + +CONFIGURATION = [ + ['server', 'Ceph server(s) (required, ex: "192.168.0.12" or "10.10.10.10,10.10.10.26")'], + ['serverpath', 'Ceph FS path (required, ex: "/")'], + ['serverport', 'ex: 6789'], + ['options', 'Ceph FS client name, and secretfile (required, ex: "name=admin,secretfile=/etc/ceph/admin.secret")'] +] + +DRIVER_INFO = { + 'name': 'CephFS VHD', + 'description': 'SR plugin which stores disks as VHD files on a CephFS storage', + 'vendor': 'Vates SAS', + 'copyright': '(C) 2020 Vates SAS', + 'driver_version': '1.0', + 'required_api_version': '1.0', + 'capabilities': CAPABILITIES, + 'configuration': CONFIGURATION +} + +DRIVER_CONFIG = {"ATTACH_FROM_CONFIG_WITH_TAPDISK": True} + +# The mountpoint for the directory when performing an sr_probe. All probes +# are guaranteed to be serialised by xapi, so this single mountpoint is fine. +PROBE_MOUNTPOINT = os.path.join(SR.MOUNT_BASE, "probe") + + +class CephFSException(Exception): + def __init__(self, errstr): + self.errstr = errstr + + +# mountpoint = /var/run/sr-mount/CephFS/uuid +# linkpath = mountpoint/uuid - path to SR directory on share +# path = /var/run/sr-mount/uuid - symlink to SR directory on share +class CephFSSR(FileSR.FileSR): + """Ceph file-based storage repository""" + + DRIVER_TYPE = 'cephfs' + + def handles(sr_type): + # fudge, because the parent class (FileSR) checks for smb to alter its behavior + return sr_type == CephFSSR.DRIVER_TYPE or sr_type == 'smb' + + handles = staticmethod(handles) + + def load(self, sr_uuid): + if not self._is_ceph_available(): + raise xs_errors.XenError( + 'SRUnavailable', + opterr='ceph is not installed' + ) + + self.ops_exclusive = FileSR.OPS_EXCLUSIVE + self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) + self.sr_vditype = SR.DEFAULT_TAP + self.driver_config = DRIVER_CONFIG + if 'server' not in self.dconf: + raise xs_errors.XenError('ConfigServerMissing') + self.remoteserver = self.dconf['server'] + self.remotepath = self.dconf['serverpath'] + # if serverport is not specified, use default 6789 + if 'serverport' not in self.dconf: + self.remoteport = "6789" + else: + self.remoteport = self.dconf['serverport'] + if self.sr_ref and self.session is not None: + self.sm_config = self.session.xenapi.SR.get_sm_config(self.sr_ref) + else: + self.sm_config = self.srcmd.params.get('sr_sm_config') or {} + self.mountpoint = os.path.join(SR.MOUNT_BASE, 'CephFS', sr_uuid) + self.linkpath = os.path.join(self.mountpoint, sr_uuid or "") + self.path = os.path.join(SR.MOUNT_BASE, sr_uuid) + self._check_o_direct() + + def checkmount(self): + return util.ioretry(lambda: ((util.pathexists(self.mountpoint) and + util.ismount(self.mountpoint)) and + util.pathexists(self.path))) + + def mount(self, mountpoint=None): + """Mount the remote ceph export at 'mountpoint'""" + if mountpoint is None: + mountpoint = self.mountpoint + elif not util.is_string(mountpoint) or mountpoint == "": + raise CephFSException("mountpoint not a string object") + + try: + if not util.ioretry(lambda: util.isdir(mountpoint)): + util.ioretry(lambda: util.makedirs(mountpoint)) + except util.CommandException, inst: + raise CephFSException("Failed to make directory: code is %d" % inst.code) + + try: + options = [] + if self.dconf.has_key('options'): + options.append(self.dconf['options']) + if options: + options = ['-o', ','.join(options)] + command = ["mount", '-t', 'ceph', self.remoteserver+":"+self.remoteport+":"+self.remotepath, mountpoint] + options + util.ioretry(lambda: util.pread(command), errlist=[errno.EPIPE, errno.EIO], maxretry=2, nofail=True) + except util.CommandException, inst: + syslog(_syslog.LOG_ERR, 'CephFS mount failed ' + inst.__str__()) + raise CephFSException("mount failed with return code %d" % inst.code) + + # Sanity check to ensure that the user has at least RO access to the + # mounted share. Windows sharing and security settings can be tricky. + try: + util.listdir(mountpoint) + except util.CommandException: + try: + self.unmount(mountpoint, True) + except CephFSException: + util.logException('CephFSSR.unmount()') + raise CephFSException("Permission denied. Please check user privileges.") + + def unmount(self, mountpoint, rmmountpoint): + try: + util.pread(["umount", mountpoint]) + except util.CommandException, inst: + raise CephFSException("umount failed with return code %d" % inst.code) + if rmmountpoint: + try: + os.rmdir(mountpoint) + except OSError, inst: + raise CephFSException("rmdir failed with error '%s'" % inst.strerror) + + def attach(self, sr_uuid): + if not self.checkmount(): + try: + self.mount() + os.symlink(self.linkpath, self.path) + except CephFSException as exc: + raise xs_errors.SROSError(12, exc.errstr) + self.attached = True + + def probe(self): + try: + self.mount(PROBE_MOUNTPOINT) + sr_list = filter(util.match_uuid, util.listdir(PROBE_MOUNTPOINT)) + self.unmount(PROBE_MOUNTPOINT, True) + except (util.CommandException, xs_errors.XenError): + raise + # Create a dictionary from the SR uuids to feed SRtoXML() + sr_dict = {sr_uuid: {} for sr_uuid in sr_list} + return util.SRtoXML(sr_dict) + + def detach(self, sr_uuid): + if not self.checkmount(): + return + util.SMlog("Aborting GC/coalesce") + cleanup.abort(self.uuid) + # Change directory to avoid unmount conflicts + os.chdir(SR.MOUNT_BASE) + self.unmount(self.mountpoint, True) + os.unlink(self.path) + self.attached = False + + def create(self, sr_uuid, size): + if self.checkmount(): + raise xs_errors.SROSError(113, 'CephFS mount point already attached') + + try: + self.mount() + except CephFSException, exc: + # noinspection PyBroadException + try: + os.rmdir(self.mountpoint) + except: + # we have no recovery strategy + pass + raise xs_errors.SROSError(111, "CephFS mount error [opterr=%s]" % exc.errstr) + + if util.ioretry(lambda: util.pathexists(self.linkpath)): + if len(util.ioretry(lambda: util.listdir(self.linkpath))) != 0: + self.detach(sr_uuid) + raise xs_errors.XenError('SRExists') + else: + try: + util.ioretry(lambda: util.makedirs(self.linkpath)) + os.symlink(self.linkpath, self.path) + except util.CommandException, inst: + if inst.code != errno.EEXIST: + try: + self.unmount(self.mountpoint, True) + except CephFSException: + util.logException('CephFSSR.unmount()') + raise xs_errors.SROSError(116, + "Failed to create CephFS SR. remote directory creation error: {}".format( + os.strerror(inst.code))) + self.detach(sr_uuid) + + def delete(self, sr_uuid): + # try to remove/delete non VDI contents first + super(CephFSSR, self).delete(sr_uuid) + try: + if self.checkmount(): + self.detach(sr_uuid) + self.mount() + if util.ioretry(lambda: util.pathexists(self.linkpath)): + util.ioretry(lambda: os.rmdir(self.linkpath)) + util.SMlog(str(self.unmount(self.mountpoint, True))) + except util.CommandException, inst: + self.detach(sr_uuid) + if inst.code != errno.ENOENT: + raise xs_errors.SROSError(114, "Failed to remove CephFS mount point") + + def vdi(self, uuid, loadLocked=False): + return CephFSFileVDI(self, uuid) + + @staticmethod + def _is_ceph_available(): + import distutils.spawn + return distutils.spawn.find_executable('ceph') + +class CephFSFileVDI(FileSR.FileVDI): + def attach(self, sr_uuid, vdi_uuid): + if not hasattr(self, 'xenstore_data'): + self.xenstore_data = {} + + self.xenstore_data['storage-type'] = CephFSSR.DRIVER_TYPE + + return super(CephFSFileVDI, self).attach(sr_uuid, vdi_uuid) + + def generate_config(self, sr_uuid, vdi_uuid): + util.SMlog("SMBFileVDI.generate_config") + if not util.pathexists(self.path): + raise xs_errors.XenError('VDIUnavailable') + resp = {'device_config': self.sr.dconf, + 'sr_uuid': sr_uuid, + 'vdi_uuid': vdi_uuid, + 'sr_sm_config': self.sr.sm_config, + 'command': 'vdi_attach_from_config'} + # Return the 'config' encoded within a normal XMLRPC response so that + # we can use the regular response/error parsing code. + config = xmlrpclib.dumps(tuple([resp]), "vdi_attach_from_config") + return xmlrpclib.dumps((config,), "", True) + + def attach_from_config(self, sr_uuid, vdi_uuid): + try: + if not util.pathexists(self.sr.path): + self.sr.attach(sr_uuid) + except: + util.logException("SMBFileVDI.attach_from_config") + raise xs_errors.XenError('SRUnavailable', + opterr='Unable to attach from config') + + +if __name__ == '__main__': + SRCommand.run(CephFSSR, DRIVER_INFO) +else: + SR.registerSR(CephFSSR) diff --git a/drivers/GlusterFSSR.py b/drivers/GlusterFSSR.py new file mode 100644 index 000000000..72c482ae8 --- /dev/null +++ b/drivers/GlusterFSSR.py @@ -0,0 +1,287 @@ +#!/usr/bin/env python +# +# Original work copyright (C) Citrix systems +# Modified work copyright (C) Vates SAS and XCP-ng community +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; version 2.1 only. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +import errno +import os +import syslog as _syslog +import xmlrpclib +from syslog import syslog + +# careful with the import order here +# FileSR has a circular dependency: FileSR- > blktap2 -> lvutil -> EXTSR -> FileSR +# importing in this order seems to avoid triggering the issue. +import SR +import SRCommand +import FileSR +# end of careful +import cleanup +import util +import vhdutil +import xs_errors +from lock import Lock + +CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "SR_CACHING", + "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", + "VDI_UPDATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", + "VDI_GENERATE_CONFIG", + "VDI_RESET_ON_BOOT/2", "ATOMIC_PAUSE"] + +CONFIGURATION = [['server', 'Full path to share on gluster server (required, ex: "192.168.0.12:/gv0")'], + ['backupservers', 'list of servers separated by ":"'], + ['fetchattempts', 'number of attempts to fetch files before switching to the backup server'] + ] + +DRIVER_INFO = { + 'name': 'GlusterFS VHD', + 'description': 'SR plugin which stores disks as VHD files on a GlusterFS storage', + 'vendor': 'Vates SAS', + 'copyright': '(C) 2020 Vates SAS', + 'driver_version': '1.0', + 'required_api_version': '1.0', + 'capabilities': CAPABILITIES, + 'configuration': CONFIGURATION +} + +DRIVER_CONFIG = {"ATTACH_FROM_CONFIG_WITH_TAPDISK": True} + +# The mountpoint for the directory when performing an sr_probe. All probes +# are guaranteed to be serialised by xapi, so this single mountpoint is fine. +PROBE_MOUNTPOINT = os.path.join(SR.MOUNT_BASE, "probe") + + +class GlusterFSException(Exception): + def __init__(self, errstr): + self.errstr = errstr + + +# mountpoint = /var/run/sr-mount/GlusterFS//uuid +# linkpath = mountpoint/uuid - path to SR directory on share +# path = /var/run/sr-mount/uuid - symlink to SR directory on share +class GlusterFSSR(FileSR.FileSR): + """Gluster file-based storage repository""" + + DRIVER_TYPE = 'glusterfs' + + def handles(sr_type): + # fudge, because the parent class (FileSR) checks for smb to alter its behavior + return sr_type == GlusterFSSR.DRIVER_TYPE or sr_type == 'smb' + + handles = staticmethod(handles) + + def load(self, sr_uuid): + if not self._is_glusterfs_available(): + raise xs_errors.XenError( + 'SRUnavailable', + opterr='glusterfs is not installed' + ) + + self.ops_exclusive = FileSR.OPS_EXCLUSIVE + self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) + self.sr_vditype = SR.DEFAULT_TAP + self.driver_config = DRIVER_CONFIG + if 'server' not in self.dconf: + raise xs_errors.XenError('ConfigServerMissing') + self.remoteserver = self.dconf['server'] + if self.sr_ref and self.session is not None: + self.sm_config = self.session.xenapi.SR.get_sm_config(self.sr_ref) + else: + self.sm_config = self.srcmd.params.get('sr_sm_config') or {} + self.mountpoint = os.path.join(SR.MOUNT_BASE, 'GlusterFS', self.remoteserver.split(':')[0], sr_uuid) + self.linkpath = os.path.join(self.mountpoint, sr_uuid or "") + self.path = os.path.join(SR.MOUNT_BASE, sr_uuid) + self._check_o_direct() + + def checkmount(self): + return util.ioretry(lambda: ((util.pathexists(self.mountpoint) and + util.ismount(self.mountpoint)) and + util.pathexists(self.linkpath))) + + def mount(self, mountpoint=None): + """Mount the remote gluster export at 'mountpoint'""" + if mountpoint is None: + mountpoint = self.mountpoint + elif not util.is_string(mountpoint) or mountpoint == "": + raise GlusterFSException("mountpoint not a string object") + + try: + if not util.ioretry(lambda: util.isdir(mountpoint)): + util.ioretry(lambda: util.makedirs(mountpoint)) + except util.CommandException, inst: + raise GlusterFSException("Failed to make directory: code is %d" % inst.code) + try: + options = [] + if 'backupservers' in self.dconf: + options.append('backup-volfile-servers=' + self.dconf['backupservers']) + if 'fetchattempts' in self.dconf: + options.append('fetch-attempts=' + self.dconf['fetchattempts']) + if options: + options = ['-o', ','.join(options)] + command = ["mount", '-t', 'glusterfs', self.remoteserver, mountpoint] + options + util.ioretry(lambda: util.pread(command), errlist=[errno.EPIPE, errno.EIO], maxretry=2, nofail=True) + except util.CommandException, inst: + syslog(_syslog.LOG_ERR, 'GlusterFS mount failed ' + inst.__str__()) + raise GlusterFSException("mount failed with return code %d" % inst.code) + + # Sanity check to ensure that the user has at least RO access to the + # mounted share. Windows sharing and security settings can be tricky. + try: + util.listdir(mountpoint) + except util.CommandException: + try: + self.unmount(mountpoint, True) + except GlusterFSException: + util.logException('GlusterFSSR.unmount()') + raise GlusterFSException("Permission denied. Please check user privileges.") + + def unmount(self, mountpoint, rmmountpoint): + try: + util.pread(["umount", mountpoint]) + except util.CommandException, inst: + raise GlusterFSException("umount failed with return code %d" % inst.code) + if rmmountpoint: + try: + os.rmdir(mountpoint) + except OSError, inst: + raise GlusterFSException("rmdir failed with error '%s'" % inst.strerror) + + def attach(self, sr_uuid): + if not self.checkmount(): + try: + self.mount() + os.symlink(self.linkpath, self.path) + except GlusterFSException as exc: + raise xs_errors.SROSError(12, exc.errstr) + self.attached = True + + def probe(self): + try: + self.mount(PROBE_MOUNTPOINT) + sr_list = filter(util.match_uuid, util.listdir(PROBE_MOUNTPOINT)) + self.unmount(PROBE_MOUNTPOINT, True) + except (util.CommandException, xs_errors.XenError): + raise + # Create a dictionary from the SR uuids to feed SRtoXML() + sr_dict = {sr_uuid: {} for sr_uuid in sr_list} + return util.SRtoXML(sr_dict) + + def detach(self, sr_uuid): + if not self.checkmount(): + return + util.SMlog("Aborting GC/coalesce") + cleanup.abort(self.uuid) + # Change directory to avoid unmount conflicts + os.chdir(SR.MOUNT_BASE) + self.unmount(self.mountpoint, True) + os.unlink(self.path) + self.attached = False + + def create(self, sr_uuid, size): + if self.checkmount(): + raise xs_errors.SROSError(113, 'GlusterFS mount point already attached') + + try: + self.mount() + except GlusterFSException, exc: + # noinspection PyBroadException + try: + os.rmdir(self.mountpoint) + except: + # we have no recovery strategy + pass + raise xs_errors.SROSError(111, "GlusterFS mount error [opterr=%s]" % exc.errstr) + + if util.ioretry(lambda: util.pathexists(self.linkpath)): + if len(util.ioretry(lambda: util.listdir(self.linkpath))) != 0: + self.detach(sr_uuid) + raise xs_errors.XenError('SRExists') + else: + try: + util.ioretry(lambda: util.makedirs(self.linkpath)) + os.symlink(self.linkpath, self.path) + except util.CommandException, inst: + if inst.code != errno.EEXIST: + try: + self.unmount(self.mountpoint, True) + except GlusterFSException: + util.logException('GlusterFSSR.unmount()') + raise xs_errors.SROSError(116, + "Failed to create GlusterFS SR. remote directory creation error: {}".format( + os.strerror(inst.code))) + self.detach(sr_uuid) + + def delete(self, sr_uuid): + # try to remove/delete non VDI contents first + super(GlusterFSSR, self).delete(sr_uuid) + try: + if self.checkmount(): + self.detach(sr_uuid) + self.mount() + if util.ioretry(lambda: util.pathexists(self.linkpath)): + util.ioretry(lambda: os.rmdir(self.linkpath)) + self.unmount(self.mountpoint, True) + except util.CommandException, inst: + self.detach(sr_uuid) + if inst.code != errno.ENOENT: + raise xs_errors.SROSError(114, "Failed to remove GlusterFS mount point") + + def vdi(self, uuid, loadLocked=False): + return GlusterFSFileVDI(self, uuid) + + @staticmethod + def _is_glusterfs_available(): + import distutils.spawn + return distutils.spawn.find_executable('glusterfs') + + +class GlusterFSFileVDI(FileSR.FileVDI): + def attach(self, sr_uuid, vdi_uuid): + if not hasattr(self, 'xenstore_data'): + self.xenstore_data = {} + + self.xenstore_data['storage-type'] = GlusterFSSR.DRIVER_TYPE + + return super(GlusterFSFileVDI, self).attach(sr_uuid, vdi_uuid) + + def generate_config(self, sr_uuid, vdi_uuid): + util.SMlog("SMBFileVDI.generate_config") + if not util.pathexists(self.path): + raise xs_errors.XenError('VDIUnavailable') + resp = {'device_config': self.sr.dconf, + 'sr_uuid': sr_uuid, + 'vdi_uuid': vdi_uuid, + 'sr_sm_config': self.sr.sm_config, + 'command': 'vdi_attach_from_config'} + # Return the 'config' encoded within a normal XMLRPC response so that + # we can use the regular response/error parsing code. + config = xmlrpclib.dumps(tuple([resp]), "vdi_attach_from_config") + return xmlrpclib.dumps((config,), "", True) + + def attach_from_config(self, sr_uuid, vdi_uuid): + try: + if not util.pathexists(self.sr.path): + self.sr.attach(sr_uuid) + except: + util.logException("SMBFileVDI.attach_from_config") + raise xs_errors.XenError('SRUnavailable', + opterr='Unable to attach from config') + + +if __name__ == '__main__': + SRCommand.run(GlusterFSSR, DRIVER_INFO) +else: + SR.registerSR(GlusterFSSR) diff --git a/drivers/cleanup.py b/drivers/cleanup.py index 76fcb8d1a..24496ac4c 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -2987,7 +2987,9 @@ def normalizeType(type): if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: # temporary while LVHD is symlinked as LVM type = SR.TYPE_LVHD - if type in ["ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb"]: + if type in [ + "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", + ]: type = SR.TYPE_FILE if not type in SR.TYPES: raise util.SMException("Unsupported SR type: %s" % type) From 95942ac994b2a1f9259a7c4e3554a6ec88fe7dc6 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Mon, 20 Jul 2020 16:26:42 +0200 Subject: [PATCH 05/72] feat(drivers): add XFS driver Originally-by: Ronan Abhamon This version obtained through merge in ff1bf65e57f1c7584748628a5731e40e8a041d51: git restore -SW -s ydi/forks/2.30.7/xfs drivers/EXTSR.py mv drivers/EXTSR.py drivers/XFSSR.py git restore -SW drivers/EXTSR.py Signed-off-by: Yann Dirson --- Makefile | 1 + drivers/XFSSR.py | 249 +++++++++++++++++++++++++++++++++++++++++++++ drivers/cleanup.py | 1 + 3 files changed, 251 insertions(+) create mode 100755 drivers/XFSSR.py diff --git a/Makefile b/Makefile index d4f540f48..cdf39f32b 100755 --- a/Makefile +++ b/Makefile @@ -16,6 +16,7 @@ SM_DRIVERS += SMB SM_DRIVERS += LVHDoFCoE SM_DRIVERS += CephFS SM_DRIVERS += GlusterFS +SM_DRIVERS += XFS SM_LIBS := SR SM_LIBS += SRCommand diff --git a/drivers/XFSSR.py b/drivers/XFSSR.py new file mode 100755 index 000000000..1dfde0956 --- /dev/null +++ b/drivers/XFSSR.py @@ -0,0 +1,249 @@ +#!/usr/bin/python3 +# +# Original work copyright (C) Citrix Systems Inc. +# Modified work copyright (C) Vates SAS and XCP-ng community +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; version 2.1 only. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# XFSSR: Based on local-file storage repository, mounts xfs partition + +import SR +from SR import deviceCheck +import SRCommand +import FileSR +import util +import lvutil +import scsiutil + +import os +import xs_errors +import vhdutil +from lock import Lock +from constants import EXT_PREFIX + +CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "SR_SUPPORTS_LOCAL_CACHING", \ + "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", \ + "VDI_UPDATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", \ + "VDI_GENERATE_CONFIG", \ + "VDI_RESET_ON_BOOT/2", "ATOMIC_PAUSE", "VDI_CONFIG_CBT", + "VDI_ACTIVATE", "VDI_DEACTIVATE", "THIN_PROVISIONING", "VDI_READ_CACHING"] + +CONFIGURATION = [['device', 'local device path (required) (e.g. /dev/sda3)']] + +DRIVER_INFO = { + 'name': 'Local XFS VHD', + 'description': 'SR plugin which represents disks as VHD files stored on a local XFS filesystem, created inside an LVM volume', + 'vendor': 'Vates SAS', + 'copyright': '(C) 2019 Vates SAS', + 'driver_version': '1.0', + 'required_api_version': '1.0', + 'capabilities': CAPABILITIES, + 'configuration': CONFIGURATION + } + +DRIVER_CONFIG = {"ATTACH_FROM_CONFIG_WITH_TAPDISK": True} + + +class XFSSR(FileSR.FileSR): + """XFS Local file storage repository""" + + DRIVER_TYPE = 'xfs' + + def handles(srtype): + return srtype == XFSSR.DRIVER_TYPE + handles = staticmethod(handles) + + def load(self, sr_uuid): + if not self._is_xfs_available(): + raise xs_errors.XenError( + 'SRUnavailable', + opterr='xfsprogs is not installed' + ) + + self.ops_exclusive = FileSR.OPS_EXCLUSIVE + self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) + self.sr_vditype = SR.DEFAULT_TAP + + self.path = os.path.join(SR.MOUNT_BASE, sr_uuid) + self.vgname = EXT_PREFIX + sr_uuid + self.remotepath = os.path.join("/dev", self.vgname, sr_uuid) + self.attached = self._checkmount() + self.driver_config = DRIVER_CONFIG + + def delete(self, sr_uuid): + super(XFSSR, self).delete(sr_uuid) + + # Check PVs match VG + try: + for dev in self.dconf['device'].split(','): + cmd = ["pvs", dev] + txt = util.pread2(cmd) + if txt.find(self.vgname) == -1: + raise xs_errors.XenError('VolNotFound', \ + opterr='volume is %s' % self.vgname) + except util.CommandException as inst: + raise xs_errors.XenError('PVSfailed', \ + opterr='error is %d' % inst.code) + + # Remove LV, VG and pv + try: + cmd = ["lvremove", "-f", self.remotepath] + util.pread2(cmd) + + cmd = ["vgremove", self.vgname] + util.pread2(cmd) + + for dev in self.dconf['device'].split(','): + cmd = ["pvremove", dev] + util.pread2(cmd) + except util.CommandException as inst: + raise xs_errors.XenError('LVMDelete', \ + opterr='errno is %d' % inst.code) + + def attach(self, sr_uuid): + if not self._checkmount(): + try: + #Activate LV + cmd = ['lvchange', '-ay', self.remotepath] + util.pread2(cmd) + + # make a mountpoint: + if not os.path.isdir(self.path): + os.makedirs(self.path) + except util.CommandException as inst: + raise xs_errors.XenError('LVMMount', \ + opterr='Unable to activate LV. Errno is %d' % inst.code) + + try: + util.pread(["fsck", "-a", self.remotepath]) + except util.CommandException as inst: + if inst.code == 1: + util.SMlog("FSCK detected and corrected FS errors. Not fatal.") + else: + raise xs_errors.XenError('LVMMount', \ + opterr='FSCK failed on %s. Errno is %d' % (self.remotepath, inst.code)) + + try: + util.pread(["mount", self.remotepath, self.path]) + except util.CommandException as inst: + raise xs_errors.XenError('LVMMount', \ + opterr='Failed to mount FS. Errno is %d' % inst.code) + + self.attached = True + + #Update SCSIid string + scsiutil.add_serial_record(self.session, self.sr_ref, \ + scsiutil.devlist_to_serialstring(self.dconf['device'].split(','))) + + # Set the block scheduler + for dev in self.dconf['device'].split(','): + self.block_setscheduler(dev) + + def detach(self, sr_uuid): + super(XFSSR, self).detach(sr_uuid) + try: + # deactivate SR + cmd = ["lvchange", "-an", self.remotepath] + util.pread2(cmd) + except util.CommandException as inst: + raise xs_errors.XenError('LVMUnMount', \ + opterr='lvm -an failed errno is %d' % inst.code) + + @deviceCheck + def probe(self): + return lvutil.srlist_toxml(lvutil.scan_srlist(EXT_PREFIX, self.dconf['device']), + EXT_PREFIX) + + @deviceCheck + def create(self, sr_uuid, size): + if self._checkmount(): + raise xs_errors.XenError('SRExists') + + # Check none of the devices already in use by other PBDs + if util.test_hostPBD_devs(self.session, sr_uuid, self.dconf['device']): + raise xs_errors.XenError('SRInUse') + + # Check serial number entry in SR records + for dev in self.dconf['device'].split(','): + if util.test_scsiserial(self.session, dev): + raise xs_errors.XenError('SRInUse') + + if not lvutil._checkVG(self.vgname): + lvutil.createVG(self.dconf['device'], self.vgname) + + if lvutil._checkLV(self.remotepath): + raise xs_errors.XenError('SRExists') + + try: + numdevs = len(self.dconf['device'].split(',')) + cmd = ["lvcreate", "-n", sr_uuid] + if numdevs > 1: + lowest = -1 + for dev in self.dconf['device'].split(','): + stats = lvutil._getPVstats(dev) + if lowest < 0 or stats['freespace'] < lowest: + lowest = stats['freespace'] + size_mb = (lowest // (1024 * 1024)) * numdevs + + # Add stripe parameter to command + cmd += ["-i", str(numdevs), "-I", "2048"] + else: + stats = lvutil._getVGstats(self.vgname) + size_mb = stats['freespace'] // (1024 * 1024) + assert(size_mb > 0) + cmd += ["-L", str(size_mb), self.vgname] + text = util.pread(cmd) + + cmd = ["lvchange", "-ay", self.remotepath] + text = util.pread(cmd) + except util.CommandException as inst: + raise xs_errors.XenError('LVMCreate', \ + opterr='lv operation, error %d' % inst.code) + except AssertionError: + raise xs_errors.XenError('SRNoSpace', \ + opterr='Insufficient space in VG %s' % self.vgname) + + try: + util.pread2(["mkfs.xfs", self.remotepath]) + except util.CommandException as inst: + raise xs_errors.XenError('LVMFilesystem', \ + opterr='mkfs failed error %d' % inst.code) + + #Update serial number string + scsiutil.add_serial_record(self.session, self.sr_ref, \ + scsiutil.devlist_to_serialstring(self.dconf['device'].split(','))) + + def vdi(self, uuid, loadLocked = False): + return XFSFileVDI(self, uuid) + + @staticmethod + def _is_xfs_available(): + import distutils.spawn + return distutils.spawn.find_executable('mkfs.xfs') + + +class XFSFileVDI(FileSR.FileVDI): + def attach(self, sr_uuid, vdi_uuid): + if not hasattr(self, 'xenstore_data'): + self.xenstore_data = {} + + self.xenstore_data['storage-type'] = XFSSR.DRIVER_TYPE + + return super(XFSFileVDI, self).attach(sr_uuid, vdi_uuid) + + +if __name__ == '__main__': + SRCommand.run(XFSSR, DRIVER_INFO) +else: + SR.registerSR(XFSSR) diff --git a/drivers/cleanup.py b/drivers/cleanup.py index 24496ac4c..7fb8f8fa2 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -2989,6 +2989,7 @@ def normalizeType(type): type = SR.TYPE_LVHD if type in [ "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", + "xfs" ]: type = SR.TYPE_FILE if not type in SR.TYPES: From 90476c908f8e9f3e190dc92abe14288bfcaa1cfd Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Wed, 12 Aug 2020 11:14:33 +0200 Subject: [PATCH 06/72] feat(drivers): add ZFS driver to avoid losing VDI metadata (xcp-ng/xcp#401) --- Makefile | 1 + drivers/XE_SR_ERRORCODES.xml | 10 +++ drivers/ZFSSR.py | 137 +++++++++++++++++++++++++++++++++++ drivers/cleanup.py | 2 +- 4 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 drivers/ZFSSR.py diff --git a/Makefile b/Makefile index cdf39f32b..95aa5b2b6 100755 --- a/Makefile +++ b/Makefile @@ -17,6 +17,7 @@ SM_DRIVERS += LVHDoFCoE SM_DRIVERS += CephFS SM_DRIVERS += GlusterFS SM_DRIVERS += XFS +SM_DRIVERS += ZFS SM_LIBS := SR SM_LIBS += SRCommand diff --git a/drivers/XE_SR_ERRORCODES.xml b/drivers/XE_SR_ERRORCODES.xml index 47fefd830..120b92fc9 100755 --- a/drivers/XE_SR_ERRORCODES.xml +++ b/drivers/XE_SR_ERRORCODES.xml @@ -915,5 +915,15 @@ 1200 + + ZFSSRCreate + ZFS SR creation error + 5000 + + + ZFSSRDelete + ZFS SR deletion error + 5001 + diff --git a/drivers/ZFSSR.py b/drivers/ZFSSR.py new file mode 100644 index 000000000..1b2f398f6 --- /dev/null +++ b/drivers/ZFSSR.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# +# Copyright (C) 2020 Vates SAS +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import SR +import SRCommand + +import FileSR + +import util +import xs_errors + +CAPABILITIES = [ + 'SR_PROBE', + 'SR_UPDATE', + 'VDI_CREATE', + 'VDI_DELETE', + 'VDI_ATTACH', + 'VDI_DETACH', + 'VDI_CLONE', + 'VDI_SNAPSHOT', + 'VDI_RESIZE', + 'VDI_MIRROR', + 'VDI_GENERATE_CONFIG', + 'ATOMIC_PAUSE', + 'VDI_CONFIG_CBT', + 'VDI_ACTIVATE', + 'VDI_DEACTIVATE', + 'THIN_PROVISIONING' +] + +CONFIGURATION = [ + ['location', 'local ZFS directory path (required)'] +] + +DRIVER_INFO = { + 'name': 'Local ZFS VHD', + 'description': + 'SR plugin which represents disks as VHD files stored on a ZFS disk', + 'vendor': 'Vates SAS', + 'copyright': '(C) 2020 Vates SAS', + 'driver_version': '1.0', + 'required_api_version': '1.0', + 'capabilities': CAPABILITIES, + 'configuration': CONFIGURATION +} + + +class ZFSSR(FileSR.FileSR): + DRIVER_TYPE = 'zfs' + + @staticmethod + def handles(type): + return type == ZFSSR.DRIVER_TYPE + + def load(self, sr_uuid): + if not self._is_zfs_available(): + raise xs_errors.XenError( + 'SRUnavailable', + opterr='zfs is not installed or module is not loaded' + ) + return super(ZFSSR, self).load(sr_uuid) + + def create(self, sr_uuid, size): + if not self._is_zfs_path(self.remotepath): + raise xs_errors.XenError( + 'ZFSSRCreate', + opterr='Cannot create SR, path is not a ZFS mountpoint' + ) + return super(ZFSSR, self).create(sr_uuid, size) + + def delete(self, sr_uuid): + if not self._checkmount(): + raise xs_errors.XenError( + 'ZFSSRDelete', + opterr='ZFS SR is not mounted or uses an invalid FS type' + ) + return super(ZFSSR, self).delete(sr_uuid) + + def attach(self, sr_uuid): + if not self._is_zfs_path(self.remotepath): + raise xs_errors.XenError( + 'SRUnavailable', + opterr='Invalid ZFS path' + ) + return super(ZFSSR, self).attach(sr_uuid) + + def detach(self, sr_uuid): + return super(ZFSSR, self).detach(sr_uuid) + + def vdi(self, uuid, loadLocked=False): + return ZFSFileVDI(self, uuid) + + # Ensure _checkmount is overridden to prevent bad behaviors in FileSR. + def _checkmount(self): + return super(ZFSSR, self)._checkmount() and \ + self._is_zfs_path(self.remotepath) + + @staticmethod + def _is_zfs_path(path): + cmd = ['findmnt', '-o', 'FSTYPE', '-n', path] + fs_type = util.pread2(cmd).split('\n')[0] + return fs_type == 'zfs' + + @staticmethod + def _is_zfs_available(): + import distutils.spawn + return distutils.spawn.find_executable('zfs') and \ + util.pathexists('/sys/module/zfs/initstate') + + +class ZFSFileVDI(FileSR.FileVDI): + def attach(self, sr_uuid, vdi_uuid): + if not hasattr(self, 'xenstore_data'): + self.xenstore_data = {} + + self.xenstore_data['storage-type'] = ZFSSR.DRIVER_TYPE + + return super(ZFSFileVDI, self).attach(sr_uuid, vdi_uuid) + + +if __name__ == '__main__': + SRCommand.run(ZFSSR, DRIVER_INFO) +else: + SR.registerSR(ZFSSR) diff --git a/drivers/cleanup.py b/drivers/cleanup.py index 7fb8f8fa2..62e283abc 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -2989,7 +2989,7 @@ def normalizeType(type): type = SR.TYPE_LVHD if type in [ "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", - "xfs" + "xfs", "zfs" ]: type = SR.TYPE_FILE if not type in SR.TYPES: From 05ec78f199934723320296f27451f4919c6ff7ec Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Mon, 16 Mar 2020 15:39:44 +0100 Subject: [PATCH 07/72] feat(drivers): add LinstorSR driver Some important points: - linstor.KV must use an identifier name that starts with a letter (so it uses a "sr-" prefix). - Encrypted VDI are supported with key_hash attribute (not tested, experimental). - When a new LINSTOR volume is created on a host (via snapshot or create), the remaining diskless devices are not necessarily created on other hosts. So if a resource definition exists without local device path, we ask it to LINSTOR. Wait 5s for symlink creation when a new volume is created => 5s is is purely arbitrary, but this guarantees that we do not try to access the volume if the symlink has not yet been created by the udev rule. - Can change the provisioning using the device config 'provisioning' param. - We can only increase volume size (See: https://github.com/LINBIT/linstor-server/issues/66), it would be great if we could shrink volumes to limit the space used by the snapshots. - Inflate/Deflate can only be executed on the master host, a linstor-manager plugin is present to do this from slaves. The same plugin is used to open LINSTOR ports + start controller. - Use a `total_allocated_volume_size` method to have a good idea of the reserved memory Why? Because `physical_free_size` is computed using the LVM used size, in the case of thick provisioning it's ok, but when thin provisioning is choosen LVM returns only the allocated size using the used block count. So this method solves this problem, it takes the fixed virtual volume size of each node to compute the required size to store the volume data. - Call vhd-util on remote hosts using the linstor-manager when necessary, i.e. vhd-util is called to get vhd info, the DRBD device can be in use (and unusable by external processes), so we must use the local LVM device that contains the DRBD data or a remote disk if the DRBD device is diskless. - If a DRBD device is in use when vhdutil.getVHDInfo is called, we must have no errors. So a LinstorVhdUtil wrapper is now used to bypass DRBD layer when VDIs are loaded. - Refresh PhyLink when unpause in called on DRBD devices: We must always recreate the symlink to ensure we have the right info. Why? Because if the volume UUID is changed in LINSTOR the symlink is not directly updated. When live leaf coalesce is executed we have these steps: "A" -> "OLD_A" "B" -> "A" Without symlink update the previous "A" path is reused instead of "B" path. Note: "A", "B" and "OLD_A" are UUIDs. - Since linstor python modules are not present on every XCP-ng host, module imports are protected by try.. except... blocks. - Provide a linstor-monitor daemon to check master changes --- Makefile | 12 +- drivers/LinstorSR.py | 2100 +++++++++++++++++++++++++++++++ drivers/XE_SR_ERRORCODES.xml | 36 + drivers/cleanup.py | 366 +++++- drivers/linstor-manager | 272 ++++ drivers/linstorjournaler.py | 155 +++ drivers/linstorvhdutil.py | 186 +++ drivers/linstorvolumemanager.py | 1713 +++++++++++++++++++++++++ drivers/tapdisk-pause | 52 +- drivers/util.py | 43 +- linstor/Makefile | 22 + linstor/linstor-monitord.c | 402 ++++++ systemd/linstor-monitor.service | 13 + tests/mocks/linstor/__init__.py | 0 14 files changed, 5360 insertions(+), 12 deletions(-) create mode 100755 drivers/LinstorSR.py create mode 100755 drivers/linstor-manager create mode 100755 drivers/linstorjournaler.py create mode 100644 drivers/linstorvhdutil.py create mode 100755 drivers/linstorvolumemanager.py create mode 100644 linstor/Makefile create mode 100644 linstor/linstor-monitord.c create mode 100644 systemd/linstor-monitor.service create mode 100644 tests/mocks/linstor/__init__.py diff --git a/Makefile b/Makefile index 95aa5b2b6..3cacbff21 100755 --- a/Makefile +++ b/Makefile @@ -8,6 +8,7 @@ SM_DRIVERS += Dummy SM_DRIVERS += udev SM_DRIVERS += ISO SM_DRIVERS += HBA +SM_DRIVERS += Linstor SM_DRIVERS += LVHD SM_DRIVERS += LVHDoISCSI SM_DRIVERS += LVHDoHBA @@ -31,6 +32,9 @@ SM_LIBS += verifyVHDsOnSR SM_LIBS += scsiutil SM_LIBS += scsi_host_rescan SM_LIBS += vhdutil +SM_LIBS += linstorjournaler +SM_LIBS += linstorvhdutil +SM_LIBS += linstorvolumemanager SM_LIBS += lvhdutil SM_LIBS += cifutils SM_LIBS += xs_errors @@ -96,7 +100,8 @@ SM_PY_FILES = $(foreach LIB, $(SM_LIBS), drivers/$(LIB).py) $(foreach DRIVER, $( .PHONY: build build: - make -C dcopy + make -C dcopy + make -C linstor .PHONY: precommit precommit: build @@ -173,6 +178,8 @@ install: precheck $(SM_STAGING)/$(SYSTEMD_SERVICE_DIR) install -m 644 systemd/SMGC@.service \ $(SM_STAGING)/$(SYSTEMD_SERVICE_DIR) + install -m 644 systemd/linstor-monitor.service \ + $(SM_STAGING)/$(SYSTEMD_SERVICE_DIR) for i in $(UDEV_RULES); do \ install -m 644 udev/$$i.rules \ $(SM_STAGING)$(UDEV_RULES_DIR); done @@ -190,6 +197,7 @@ install: precheck cd $(SM_STAGING)$(SM_DEST) && rm -f LVHDoFCoESR && ln -sf LVHDoFCoESR.py LVMoFCoESR ln -sf $(SM_DEST)mpathutil.py $(SM_STAGING)/sbin/mpathutil install -m 755 drivers/02-vhdcleanup $(SM_STAGING)$(MASTER_SCRIPT_DEST) + install -m 755 drivers/linstor-manager $(SM_STAGING)$(PLUGIN_SCRIPT_DEST) install -m 755 drivers/lvhd-thin $(SM_STAGING)$(PLUGIN_SCRIPT_DEST) install -m 755 drivers/on_slave.py $(SM_STAGING)$(PLUGIN_SCRIPT_DEST)/on-slave install -m 755 drivers/testing-hooks $(SM_STAGING)$(PLUGIN_SCRIPT_DEST) @@ -209,6 +217,7 @@ install: precheck mkdir -p $(SM_STAGING)/etc/xapi.d/xapi-pre-shutdown/ install -m 755 scripts/stop_all_gc $(SM_STAGING)/etc/xapi.d/xapi-pre-shutdown/ $(MAKE) -C dcopy install DESTDIR=$(SM_STAGING) + $(MAKE) -C linstor install DESTDIR=$(SM_STAGING) ln -sf $(SM_DEST)blktap2.py $(SM_STAGING)$(BIN_DEST)/blktap2 ln -sf $(SM_DEST)lcache.py $(SM_STAGING)$(BIN_DEST)tapdisk-cache-stats ln -sf /dev/null $(SM_STAGING)$(UDEV_RULES_DIR)/69-dm-lvm-metad.rules @@ -219,4 +228,3 @@ install: precheck .PHONY: clean clean: rm -rf $(SM_STAGING) - diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py new file mode 100755 index 000000000..ffd70abfc --- /dev/null +++ b/drivers/LinstorSR.py @@ -0,0 +1,2100 @@ +#!/usr/bin/env python +# +# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from constants import CBTLOG_TAG + +try: + from linstorjournaler import LinstorJournaler + from linstorvhdutil import LinstorVhdUtil + from linstorvolumemanager \ + import LinstorVolumeManager, LinstorVolumeManagerError + LINSTOR_AVAILABLE = True +except ImportError: + LINSTOR_AVAILABLE = False + +from lock import Lock, LOCK_TYPE_GC_RUNNING +import blktap2 +import cleanup +import errno +import functools +import scsiutil +import SR +import SRCommand +import time +import traceback +import util +import VDI +import vhdutil +import xmlrpclib +import xs_errors + +from srmetadata import \ + NAME_LABEL_TAG, NAME_DESCRIPTION_TAG, IS_A_SNAPSHOT_TAG, SNAPSHOT_OF_TAG, \ + TYPE_TAG, VDI_TYPE_TAG, READ_ONLY_TAG, SNAPSHOT_TIME_TAG, \ + METADATA_OF_POOL_TAG + +HIDDEN_TAG = 'hidden' + +# ============================================================================== + +# TODO: Supports 'VDI_INTRODUCE', 'VDI_RESET_ON_BOOT/2', 'SR_TRIM', +# 'VDI_CONFIG_CBT', 'SR_PROBE' + +CAPABILITIES = [ + 'ATOMIC_PAUSE', + 'SR_UPDATE', + 'VDI_CREATE', + 'VDI_DELETE', + 'VDI_UPDATE', + 'VDI_ATTACH', + 'VDI_DETACH', + 'VDI_ACTIVATE', + 'VDI_DEACTIVATE', + 'VDI_CLONE', + 'VDI_MIRROR', + 'VDI_RESIZE', + 'VDI_SNAPSHOT', + 'VDI_GENERATE_CONFIG' +] + +CONFIGURATION = [ + ['group-name', 'LVM group name'], + ['hosts', 'host names to use'], + ['redundancy', 'replication count'], + ['provisioning', '"thin" or "thick" are accepted'] +] + +DRIVER_INFO = { + 'name': 'LINSTOR resources on XCP-ng', + 'description': 'SR plugin which uses Linstor to manage VDIs', + 'vendor': 'Vates', + 'copyright': '(C) 2020 Vates', + 'driver_version': '1.0', + 'required_api_version': '1.0', + 'capabilities': CAPABILITIES, + 'configuration': CONFIGURATION +} + +DRIVER_CONFIG = {'ATTACH_FROM_CONFIG_WITH_TAPDISK': False} + +OPS_EXCLUSIVE = [ + 'sr_create', 'sr_delete', 'sr_attach', 'sr_detach', 'sr_scan', + 'sr_update', 'vdi_create', 'vdi_delete', 'vdi_clone', 'vdi_snapshot' +] + +# ============================================================================== +# Misc helpers used by LinstorSR and linstor-thin plugin. +# ============================================================================== + + +def compute_volume_size(virtual_size, image_type): + if image_type == vhdutil.VDI_TYPE_VHD: + # All LINSTOR VDIs have the metadata area preallocated for + # the maximum possible virtual size (for fast online VDI.resize). + meta_overhead = vhdutil.calcOverheadEmpty(LinstorVDI.MAX_SIZE) + bitmap_overhead = vhdutil.calcOverheadBitmap(virtual_size) + virtual_size += meta_overhead + bitmap_overhead + elif image_type != vhdutil.VDI_TYPE_RAW: + raise Exception('Invalid image type: {}'.format(image_type)) + + return LinstorVolumeManager.round_up_volume_size(virtual_size) + + +def try_lock(lock): + for i in range(20): + if lock.acquireNoblock(): + return + time.sleep(1) + raise util.SRBusyException() + + +def attach_thin(session, journaler, linstor, sr_uuid, vdi_uuid): + volume_metadata = linstor.get_volume_metadata(vdi_uuid) + image_type = volume_metadata.get(VDI_TYPE_TAG) + if image_type == vhdutil.VDI_TYPE_RAW: + return + + lock = Lock(vhdutil.LOCK_TYPE_SR, sr_uuid) + try: + try_lock(lock) + + device_path = linstor.get_device_path(vdi_uuid) + + # If the virtual VHD size is lower than the LINSTOR volume size, + # there is nothing to do. + vhd_size = compute_volume_size( + LinstorVhdUtil(session, linstor).get_size_virt(vdi_uuid), + image_type + ) + + volume_info = linstor.get_volume_info(vdi_uuid) + volume_size = volume_info.virtual_size + + if vhd_size > volume_size: + inflate( + journaler, linstor, vdi_uuid, device_path, + vhd_size, volume_size + ) + finally: + lock.release() + + +def detach_thin(session, linstor, sr_uuid, vdi_uuid): + volume_metadata = linstor.get_volume_metadata(vdi_uuid) + image_type = volume_metadata.get(VDI_TYPE_TAG) + if image_type == vhdutil.VDI_TYPE_RAW: + return + + lock = Lock(vhdutil.LOCK_TYPE_SR, sr_uuid) + try: + try_lock(lock) + + vdi_ref = session.xenapi.VDI.get_by_uuid(vdi_uuid) + vbds = session.xenapi.VBD.get_all_records_where( + 'field "VDI" = "{}"'.format(vdi_ref) + ) + + num_plugged = 0 + for vbd_rec in vbds.values(): + if vbd_rec['currently_attached']: + num_plugged += 1 + if num_plugged > 1: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Cannot deflate VDI {}, already used by ' + 'at least 2 VBDs'.format(vdi_uuid) + ) + + device_path = linstor.get_device_path(vdi_uuid) + new_volume_size = LinstorVolumeManager.round_up_volume_size( + LinstorVhdUtil(session, linstor).get_size_phys(device_path) + ) + + volume_info = linstor.get_volume_info(vdi_uuid) + old_volume_size = volume_info.virtual_size + deflate(vdi_uuid, device_path, new_volume_size, old_volume_size) + finally: + lock.release() + + +def inflate(journaler, linstor, vdi_uuid, vdi_path, new_size, old_size): + # Only inflate if the LINSTOR volume capacity is not enough. + new_size = LinstorVolumeManager.round_up_volume_size(new_size) + if new_size <= old_size: + return + + util.SMlog( + 'Inflate {} (new VHD size={}, previous={})' + .format(vdi_uuid, new_size, old_size) + ) + + journaler.create( + LinstorJournaler.INFLATE, vdi_uuid, old_size + ) + linstor.resize_volume(vdi_uuid, new_size) + + if not util.zeroOut( + vdi_path, new_size - vhdutil.VHD_FOOTER_SIZE, + vhdutil.VHD_FOOTER_SIZE + ): + raise xs_errors.XenError( + 'EIO', + opterr='Failed to zero out VHD footer {}'.format(vdi_path) + ) + + vhdutil.setSizePhys(vdi_path, new_size, False) + journaler.remove(LinstorJournaler.INFLATE, vdi_uuid) + + +def deflate(vdi_uuid, vdi_path, new_size, old_size): + new_size = LinstorVolumeManager.round_up_volume_size(new_size) + if new_size >= old_size: + return + + util.SMlog( + 'Deflate {} (new size={}, previous={})' + .format(vdi_uuid, new_size, old_size) + ) + + vhdutil.setSizePhys(vdi_path, new_size) + # TODO: Change the LINSTOR volume size using linstor.resize_volume. + + +# ============================================================================== + +# Usage example: +# xe sr-create type=linstor name-label=linstor-sr +# host-uuid=d2deba7a-c5ad-4de1-9a20-5c8df3343e93 +# device-config:hosts=node-linstor1,node-linstor2,node-linstor3 +# device-config:group-name=vg_loop device-config:redundancy=2 + + +class LinstorSR(SR.SR): + DRIVER_TYPE = 'linstor' + + PROVISIONING_TYPES = ['thin', 'thick'] + PROVISIONING_DEFAULT = 'thin' + + MANAGER_PLUGIN = 'linstor-manager' + + # -------------------------------------------------------------------------- + # SR methods. + # -------------------------------------------------------------------------- + + @staticmethod + def handles(type): + return type == LinstorSR.DRIVER_TYPE + + def load(self, sr_uuid): + if not LINSTOR_AVAILABLE: + raise util.SMException( + 'Can\'t load LinstorSR: LINSTOR libraries are missing' + ) + + # Check parameters. + if 'hosts' not in self.dconf or not self.dconf['hosts']: + raise xs_errors.XenError('LinstorConfigHostsMissing') + if 'group-name' not in self.dconf or not self.dconf['group-name']: + raise xs_errors.XenError('LinstorConfigGroupNameMissing') + if 'redundancy' not in self.dconf or not self.dconf['redundancy']: + raise xs_errors.XenError('LinstorConfigRedundancyMissing') + + self.driver_config = DRIVER_CONFIG + + # Check provisioning config. + provisioning = self.dconf.get('provisioning') + if provisioning: + if provisioning in self.PROVISIONING_TYPES: + self._provisioning = provisioning + else: + raise xs_errors.XenError( + 'InvalidArg', + opterr='Provisioning parameter must be one of {}'.format( + self.PROVISIONING_TYPES + ) + ) + else: + self._provisioning = self.PROVISIONING_DEFAULT + + # Note: We don't have access to the session field if the + # 'vdi_attach_from_config' command is executed. + self._has_session = self.sr_ref and self.session is not None + if self._has_session: + self.sm_config = self.session.xenapi.SR.get_sm_config(self.sr_ref) + else: + self.sm_config = self.srcmd.params.get('sr_sm_config') or {} + + provisioning = self.sm_config.get('provisioning') + if provisioning in self.PROVISIONING_TYPES: + self._provisioning = provisioning + + # Define properties for SR parent class. + self.ops_exclusive = OPS_EXCLUSIVE + self.path = LinstorVolumeManager.DEV_ROOT_PATH + self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) + self.sr_vditype = SR.DEFAULT_TAP + + self._hosts = self.dconf['hosts'].split(',') + self._redundancy = int(self.dconf['redundancy'] or 1) + self._linstor = None # Ensure that LINSTOR attribute exists. + self._journaler = None + + self._is_master = False + if 'SRmaster' in self.dconf and self.dconf['SRmaster'] == 'true': + self._is_master = True + self._group_name = self.dconf['group-name'] + + self._master_uri = None + self._vdi_shared_locked = False + + self._initialized = False + + def _locked_load(method): + @functools.wraps(method) + def wrap(self, *args, **kwargs): + if self._initialized: + return method(self, *args, **kwargs) + self._initialized = True + + if not self._has_session: + if self.srcmd.cmd == 'vdi_attach_from_config': + # We must have a valid LINSTOR instance here without using + # the XAPI. + self._master_uri = 'linstor://{}'.format( + util.get_master_address() + ) + self._journaler = LinstorJournaler( + self._master_uri, self._group_name, logger=util.SMlog + ) + + try: + self._linstor = LinstorVolumeManager( + self._master_uri, + self._group_name, + logger=util.SMlog + ) + return + except Exception as e: + util.SMlog( + 'Ignore exception. Failed to build LINSTOR ' + 'instance without session: {}'.format(e) + ) + return + + self._master_uri = 'linstor://{}'.format( + util.get_master_rec(self.session)['address'] + ) + + if not self._is_master: + if self.cmd in [ + 'sr_create', 'sr_delete', 'sr_update', 'sr_probe', + 'sr_scan', 'vdi_create', 'vdi_delete', 'vdi_resize', + 'vdi_snapshot', 'vdi_clone' + ]: + util.SMlog('{} blocked for non-master'.format(self.cmd)) + raise xs_errors.XenError('LinstorMaster') + + # Because the LINSTOR KV objects cache all values, we must lock + # the VDI before the LinstorJournaler/LinstorVolumeManager + # instantiation and before any action on the master to avoid a + # bad read. The lock is also necessary to avoid strange + # behaviors if the GC is executed during an action on a slave. + if self.cmd.startswith('vdi_'): + self._shared_lock_vdi(self.srcmd.params['vdi_uuid']) + self._vdi_shared_locked = True + + self._journaler = LinstorJournaler( + self._master_uri, self._group_name, logger=util.SMlog + ) + + # Ensure ports are opened and LINSTOR controller/satellite + # are activated. + if self.srcmd.cmd == 'sr_create': + # TODO: Disable if necessary + self._enable_linstor_on_all_hosts(status=True) + + try: + # Try to open SR if exists. + self._linstor = LinstorVolumeManager( + self._master_uri, + self._group_name, + repair=self._is_master, + logger=util.SMlog + ) + self._vhdutil = LinstorVhdUtil(self.session, self._linstor) + except Exception as e: + if self.srcmd.cmd == 'sr_create' or \ + self.srcmd.cmd == 'sr_detach': + # Ignore exception in this specific case: sr_create. + # At this moment the LinstorVolumeManager cannot be + # instantiated. Concerning the sr_detach command, we must + # ignore LINSTOR exceptions (if the volume group doesn't + # exist for example after a bad user action). + pass + else: + raise xs_errors.XenError('SRUnavailable', opterr=str(e)) + + if self._linstor: + try: + hosts = self._linstor.disconnected_hosts + except Exception as e: + raise xs_errors.XenError('SRUnavailable', opterr=str(e)) + + if hosts: + util.SMlog('Failed to join node(s): {}'.format(hosts)) + + try: + # If the command is a SR command on the master, we must + # load all VDIs and clean journal transactions. + # We must load the VDIs in the snapshot case too. + if self._is_master and self.cmd not in [ + 'vdi_attach', 'vdi_detach', + 'vdi_activate', 'vdi_deactivate', + 'vdi_epoch_begin', 'vdi_epoch_end', + 'vdi_update', 'vdi_destroy' + ]: + self._load_vdis() + self._undo_all_journal_transactions() + self._linstor.remove_resourceless_volumes() + + self._synchronize_metadata() + except Exception as e: + util.SMlog( + 'Ignoring exception in LinstorSR.load: {}'.format(e) + ) + util.SMlog(traceback.format_exc()) + + return method(self, *args, **kwargs) + + return wrap + + @_locked_load + def cleanup(self): + if self._vdi_shared_locked: + self._shared_lock_vdi(self.srcmd.params['vdi_uuid'], locked=False) + + @_locked_load + def create(self, uuid, size): + util.SMlog('LinstorSR.create for {}'.format(self.uuid)) + + if self._redundancy > len(self._hosts): + raise xs_errors.XenError( + 'LinstorSRCreate', + opterr='Redundancy greater than host count' + ) + + xenapi = self.session.xenapi + srs = xenapi.SR.get_all_records_where( + 'field "type" = "{}"'.format(self.DRIVER_TYPE) + ) + srs = dict(filter(lambda e: e[1]['uuid'] != self.uuid, srs.items())) + + for sr in srs.values(): + for pbd in sr['PBDs']: + device_config = xenapi.PBD.get_device_config(pbd) + group_name = device_config.get('group-name') + if group_name and group_name == self._group_name: + raise xs_errors.XenError( + 'LinstorSRCreate', + opterr='group name must be unique' + ) + + # Create SR. + # Throw if the SR already exists. + try: + self._linstor = LinstorVolumeManager.create_sr( + self._master_uri, + self._group_name, + self._hosts, + self._redundancy, + thin_provisioning=self._provisioning == 'thin', + logger=util.SMlog + ) + self._vhdutil = LinstorVhdUtil(self.session, self._linstor) + except Exception as e: + util.SMlog('Failed to create LINSTOR SR: {}'.format(e)) + raise xs_errors.XenError('LinstorSRCreate', opterr=str(e)) + + @_locked_load + def delete(self, uuid): + util.SMlog('LinstorSR.delete for {}'.format(self.uuid)) + cleanup.gc_force(self.session, self.uuid) + + if self.vdis: + raise xs_errors.XenError('SRNotEmpty') + + try: + # TODO: Use specific exceptions. If the LINSTOR group doesn't + # exist, we can remove it without problem. + + # TODO: Maybe remove all volumes unused by the SMAPI. + # We must ensure it's a safe idea... + + self._linstor.destroy() + Lock.cleanupAll(self.uuid) + except Exception as e: + util.SMlog('Failed to delete LINSTOR SR: {}'.format(e)) + raise xs_errors.XenError( + 'LinstorSRDelete', + opterr=str(e) + ) + + @_locked_load + def update(self, uuid): + util.SMlog('LinstorSR.update for {}'.format(self.uuid)) + + # Well, how can we update a SR if it doesn't exist? :thinking: + if not self._linstor: + raise xs_errors.XenError( + 'SRUnavailable', + opterr='no such volume group: {}'.format(self._group_name) + ) + + self._update_stats(0) + + # Update the SR name and description only in LINSTOR metadata. + xenapi = self.session.xenapi + self._linstor.metadata = { + NAME_LABEL_TAG: util.to_plain_string( + xenapi.SR.get_name_label(self.sr_ref) + ), + NAME_DESCRIPTION_TAG: util.to_plain_string( + xenapi.SR.get_name_description(self.sr_ref) + ) + } + + @_locked_load + def attach(self, uuid): + util.SMlog('LinstorSR.attach for {}'.format(self.uuid)) + + if not self._linstor: + raise xs_errors.XenError( + 'SRUnavailable', + opterr='no such group: {}'.format(self._group_name) + ) + + @_locked_load + def detach(self, uuid): + util.SMlog('LinstorSR.detach for {}'.format(self.uuid)) + cleanup.abort(self.uuid) + + @_locked_load + def probe(self): + util.SMlog('LinstorSR.probe for {}'.format(self.uuid)) + # TODO + + @_locked_load + def scan(self, uuid): + util.SMlog('LinstorSR.scan for {}'.format(self.uuid)) + if not self._linstor: + raise xs_errors.XenError( + 'SRUnavailable', + opterr='no such volume group: {}'.format(self._group_name) + ) + + self._update_physical_size() + + for vdi_uuid in self.vdis.keys(): + if self.vdis[vdi_uuid].deleted: + del self.vdis[vdi_uuid] + + # Update the database before the restart of the GC to avoid + # bad sync in the process if new VDIs have been introduced. + ret = super(LinstorSR, self).scan(self.uuid) + self._kick_gc() + return ret + + @_locked_load + def vdi(self, uuid): + return LinstorVDI(self, uuid) + + _locked_load = staticmethod(_locked_load) + + # -------------------------------------------------------------------------- + # Lock. + # -------------------------------------------------------------------------- + + def _shared_lock_vdi(self, vdi_uuid, locked=True): + pools = self.session.xenapi.pool.get_all() + master = self.session.xenapi.pool.get_master(pools[0]) + + method = 'lockVdi' + args = { + 'groupName': self._group_name, + 'srUuid': self.uuid, + 'vdiUuid': vdi_uuid, + 'locked': str(locked) + } + + ret = self.session.xenapi.host.call_plugin( + master, self.MANAGER_PLUGIN, method, args + ) + util.SMlog( + 'call-plugin ({} with {}) returned: {}' + .format(method, args, ret) + ) + if ret == 'False': + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Plugin {} failed'.format(self.MANAGER_PLUGIN) + ) + + # -------------------------------------------------------------------------- + # Network. + # -------------------------------------------------------------------------- + + def _enable_linstor(self, host, status): + method = 'enable' + args = {'enabled': str(bool(status))} + + ret = self.session.xenapi.host.call_plugin( + host, self.MANAGER_PLUGIN, method, args + ) + util.SMlog( + 'call-plugin ({} with {}) returned: {}'.format(method, args, ret) + ) + if ret == 'False': + raise xs_errors.XenError( + 'SRUnavailable', + opterr='Plugin {} failed'.format(self.MANAGER_PLUGIN) + ) + + def _enable_linstor_on_master(self, status): + pools = self.session.xenapi.pool.get_all() + master = self.session.xenapi.pool.get_master(pools[0]) + self._enable_linstor(master, status) + + def _enable_linstor_on_all_hosts(self, status): + self._enable_linstor_on_master(status) + for slave in util.get_all_slaves(self.session): + self._enable_linstor(slave, status) + + # -------------------------------------------------------------------------- + # Metadata. + # -------------------------------------------------------------------------- + + def _synchronize_metadata_and_xapi(self): + try: + # First synch SR parameters. + self.update(self.uuid) + + # Now update the VDI information in the metadata if required. + xenapi = self.session.xenapi + volumes_metadata = self._linstor.volumes_with_metadata + for vdi_uuid, volume_metadata in volumes_metadata.items(): + try: + vdi_ref = xenapi.VDI.get_by_uuid(vdi_uuid) + except Exception: + # May be the VDI is not in XAPI yet dont bother. + continue + + label = util.to_plain_string( + xenapi.VDI.get_name_label(vdi_ref) + ) + description = util.to_plain_string( + xenapi.VDI.get_name_description(vdi_ref) + ) + + if ( + volume_metadata.get(NAME_LABEL_TAG) != label or + volume_metadata.get(NAME_DESCRIPTION_TAG) != description + ): + self._linstor.update_volume_metadata(vdi_uuid, { + NAME_LABEL_TAG: label, + NAME_DESCRIPTION_TAG: description + }) + except Exception as e: + raise xs_errors.XenError( + 'MetadataError', + opterr='Error synching SR Metadata and XAPI: {}'.format(e) + ) + + def _synchronize_metadata(self): + if not self._is_master: + return + + util.SMlog('Synchronize metadata...') + if self.cmd == 'sr_attach': + try: + util.SMlog( + 'Synchronize SR metadata and the state on the storage.' + ) + self._synchronize_metadata_and_xapi() + except Exception as e: + util.SMlog('Failed to synchronize metadata: {}'.format(e)) + + # -------------------------------------------------------------------------- + # Stats. + # -------------------------------------------------------------------------- + + def _update_stats(self, virt_alloc_delta): + valloc = int(self.session.xenapi.SR.get_virtual_allocation( + self.sr_ref + )) + + # Update size attributes of the SR parent class. + self.virtual_allocation = valloc + virt_alloc_delta + + # Physical size contains the total physical size. + # i.e. the sum of the sizes of all devices on all hosts, not the AVG. + self._update_physical_size() + + # Notify SR parent class. + self._db_update() + + def _update_physical_size(self): + # Physical size contains the total physical size. + # i.e. the sum of the sizes of all devices on all hosts, not the AVG. + self.physical_size = self._linstor.physical_size + + # `self._linstor.physical_free_size` contains the total physical free + # memory. If Thin provisioning is used we can't use it, we must use + # LINSTOR volume size to gives a good idea of the required + # usable memory to the users. + self.physical_utilisation = self._linstor.total_allocated_volume_size + + # If Thick provisioning is used, we can use this line instead: + # self.physical_utilisation = \ + # self.physical_size - self._linstor.physical_free_size + + # -------------------------------------------------------------------------- + # VDIs. + # -------------------------------------------------------------------------- + + def _load_vdis(self): + if self.vdis: + return + + # 1. Get existing VDIs in XAPI. + xenapi = self.session.xenapi + xapi_vdi_uuids = set() + for vdi in xenapi.SR.get_VDIs(self.sr_ref): + xapi_vdi_uuids.add(xenapi.VDI.get_uuid(vdi)) + + # 2. Get volumes info. + all_volume_info = self._linstor.volumes_with_info + volumes_metadata = self._linstor.volumes_with_metadata + + # 3. Get CBT vdis. + # See: https://support.citrix.com/article/CTX230619 + cbt_vdis = set() + for volume_metadata in volumes_metadata.values(): + cbt_uuid = volume_metadata.get(CBTLOG_TAG) + if cbt_uuid: + cbt_vdis.add(cbt_uuid) + + introduce = False + + if self.cmd == 'sr_scan': + has_clone_entries = list(self._journaler.get_all( + LinstorJournaler.CLONE + ).items()) + + if has_clone_entries: + util.SMlog( + 'Cannot introduce VDIs during scan because it exists ' + 'CLONE entries in journaler on SR {}'.format(self.uuid) + ) + else: + introduce = True + + # 4. Now check all volume info. + vdi_to_snaps = {} + for vdi_uuid, volume_info in all_volume_info.items(): + if vdi_uuid.startswith(cleanup.SR.TMP_RENAME_PREFIX): + continue + + # 4.a. Check if the VDI in LINSTOR is in XAPI VDIs. + if vdi_uuid not in xapi_vdi_uuids: + if not introduce: + continue + + volume_metadata = volumes_metadata.get(vdi_uuid) + if not volume_metadata: + util.SMlog( + 'Skipping volume {} because no metadata could be found' + .format(vdi_uuid) + ) + continue + + util.SMlog( + 'Trying to introduce VDI {} as it is present in ' + 'LINSTOR and not in XAPI...' + .format(vdi_uuid) + ) + + try: + self._linstor.get_device_path(vdi_uuid) + except Exception as e: + util.SMlog( + 'Cannot introduce {}, unable to get path: {}' + .format(vdi_uuid, e) + ) + continue + + name_label = volume_metadata.get(NAME_LABEL_TAG) or '' + type = volume_metadata.get(TYPE_TAG) or 'user' + vdi_type = volume_metadata.get(VDI_TYPE_TAG) + + if not vdi_type: + util.SMlog( + 'Cannot introduce {} '.format(vdi_uuid) + + 'without vdi_type' + ) + continue + + sm_config = { + 'vdi_type': vdi_type + } + + if vdi_type == vhdutil.VDI_TYPE_RAW: + managed = not volume_metadata.get(HIDDEN_TAG) + elif vdi_type == vhdutil.VDI_TYPE_VHD: + vhd_info = self._vhdutil.get_vhd_info(vdi_uuid) + managed = not vhd_info.hidden + if vhd_info.parentUuid: + sm_config['vhd-parent'] = vhd_info.parentUuid + else: + util.SMlog( + 'Cannot introduce {} with invalid VDI type {}' + .format(vdi_uuid, vdi_type) + ) + continue + + util.SMlog( + 'Introducing VDI {} '.format(vdi_uuid) + + ' (name={}, virtual_size={}, physical_size={})'.format( + name_label, + volume_info.virtual_size, + volume_info.physical_size + ) + ) + + vdi_ref = xenapi.VDI.db_introduce( + vdi_uuid, + name_label, + volume_metadata.get(NAME_DESCRIPTION_TAG) or '', + self.sr_ref, + type, + False, # sharable + bool(volume_metadata.get(READ_ONLY_TAG)), + {}, # other_config + vdi_uuid, # location + {}, # xenstore_data + sm_config, + managed, + str(volume_info.virtual_size), + str(volume_info.physical_size) + ) + + is_a_snapshot = volume_metadata.get(IS_A_SNAPSHOT_TAG) + xenapi.VDI.set_is_a_snapshot(vdi_ref, bool(is_a_snapshot)) + if is_a_snapshot: + xenapi.VDI.set_snapshot_time( + vdi_ref, + xmlrpclib.DateTime( + volume_metadata[SNAPSHOT_TIME_TAG] or + '19700101T00:00:00Z' + ) + ) + + snap_uuid = volume_metadata[SNAPSHOT_OF_TAG] + if snap_uuid in vdi_to_snaps: + vdi_to_snaps[snap_uuid].append(vdi_uuid) + else: + vdi_to_snaps[snap_uuid] = [vdi_uuid] + + # 4.b. Add the VDI in the list. + vdi = self.vdi(vdi_uuid) + self.vdis[vdi_uuid] = vdi + + if vdi.vdi_type == vhdutil.VDI_TYPE_VHD: + vdi.sm_config_override['key_hash'] = \ + self._vhdutil.get_key_hash(vdi_uuid) + + # 4.c. Update CBT status of disks either just added + # or already in XAPI. + cbt_uuid = volume_metadata.get(CBTLOG_TAG) + if cbt_uuid in cbt_vdis: + vdi_ref = xenapi.VDI.get_by_uuid(vdi_uuid) + xenapi.VDI.set_cbt_enabled(vdi_ref, True) + # For existing VDIs, update local state too. + # Scan in base class SR updates existing VDIs + # again based on local states. + self.vdis[vdi_uuid].cbt_enabled = True + cbt_vdis.remove(cbt_uuid) + + # 5. Now set the snapshot statuses correctly in XAPI. + for src_uuid in vdi_to_snaps: + try: + src_ref = xenapi.VDI.get_by_uuid(src_uuid) + except Exception: + # The source VDI no longer exists, continue. + continue + + for snap_uuid in vdi_to_snaps[src_uuid]: + try: + # This might fail in cases where its already set. + snap_ref = xenapi.VDI.get_by_uuid(snap_uuid) + xenapi.VDI.set_snapshot_of(snap_ref, src_ref) + except Exception as e: + util.SMlog('Setting snapshot failed: {}'.format(e)) + + # TODO: Check correctly how to use CBT. + # Update cbt_enabled on the right VDI, check LVM/FileSR code. + + # 6. If we have items remaining in this list, + # they are cbt_metadata VDI that XAPI doesn't know about. + # Add them to self.vdis and they'll get added to the DB. + for cbt_uuid in cbt_vdis: + new_vdi = self.vdi(cbt_uuid) + new_vdi.ty = 'cbt_metadata' + new_vdi.cbt_enabled = True + self.vdis[cbt_uuid] = new_vdi + + # 7. Update virtual allocation, build geneology and remove useless VDIs + self.virtual_allocation = 0 + + # 8. Build geneology. + geneology = {} + + for vdi_uuid, vdi in self.vdis.items(): + if vdi.parent: + if vdi.parent in self.vdis: + self.vdis[vdi.parent].read_only = True + if vdi.parent in geneology: + geneology[vdi.parent].append(vdi_uuid) + else: + geneology[vdi.parent] = [vdi_uuid] + if not vdi.hidden: + self.virtual_allocation += vdi.utilisation + + # 9. Remove all hidden leaf nodes to avoid introducing records that + # will be GC'ed. + for vdi_uuid in self.vdis.keys(): + if vdi_uuid not in geneology and self.vdis[vdi_uuid].hidden: + util.SMlog( + 'Scan found hidden leaf ({}), ignoring'.format(vdi_uuid) + ) + del self.vdis[vdi_uuid] + + # -------------------------------------------------------------------------- + # Journals. + # -------------------------------------------------------------------------- + + def _get_vdi_path_and_parent(self, vdi_uuid, volume_name): + try: + device_path = self._linstor.build_device_path(volume_name) + if not util.pathexists(device_path): + return (None, None) + + # If it's a RAW VDI, there is no parent. + volume_metadata = self._linstor.get_volume_metadata(vdi_uuid) + vdi_type = volume_metadata[VDI_TYPE_TAG] + if vdi_type == vhdutil.VDI_TYPE_RAW: + return (device_path, None) + + # Otherwise it's a VHD and a parent can exist. + if not self._vhdutil.check(vdi_uuid): + return (None, None) + + vhd_info = self._vhdutil.get_vhd_info(vdi_uuid) + if vhd_info: + return (device_path, vhd_info.parentUuid) + except Exception as e: + util.SMlog( + 'Failed to get VDI path and parent, ignoring: {}' + .format(e) + ) + return (None, None) + + def _undo_all_journal_transactions(self): + util.SMlog('Undoing all journal transactions...') + self.lock.acquire() + try: + self._handle_interrupted_inflate_ops() + self._handle_interrupted_clone_ops() + pass + finally: + self.lock.release() + + def _handle_interrupted_inflate_ops(self): + transactions = self._journaler.get_all(LinstorJournaler.INFLATE) + for vdi_uuid, old_size in transactions.items(): + self._handle_interrupted_inflate(vdi_uuid, old_size) + self._journaler.remove(LinstorJournaler.INFLATE, vdi_uuid) + + def _handle_interrupted_clone_ops(self): + transactions = self._journaler.get_all(LinstorJournaler.CLONE) + for vdi_uuid, old_size in transactions.items(): + self._handle_interrupted_clone(vdi_uuid, old_size) + self._journaler.remove(LinstorJournaler.CLONE, vdi_uuid) + + def _handle_interrupted_inflate(self, vdi_uuid, old_size): + util.SMlog( + '*** INTERRUPTED INFLATE OP: for {} ({})' + .format(vdi_uuid, old_size) + ) + + vdi = self.vdis.get(vdi_uuid) + if not vdi: + util.SMlog('Cannot deflate missing VDI {}'.format(vdi_uuid)) + return + + current_size = self._linstor.get_volume_info(self.uuid).virtual_size + util.zeroOut( + vdi.path, + current_size - vhdutil.VHD_FOOTER_SIZE, + vhdutil.VHD_FOOTER_SIZE + ) + deflate(vdi_uuid, vdi.path, old_size, current_size) + + def _handle_interrupted_clone( + self, vdi_uuid, clone_info, force_undo=False + ): + util.SMlog( + '*** INTERRUPTED CLONE OP: for {} ({})' + .format(vdi_uuid, clone_info) + ) + + base_uuid, snap_uuid = clone_info.split('_') + + # Use LINSTOR data because new VDIs may not be in the XAPI. + volume_names = self._linstor.volumes_with_name + + # Check if we don't have a base VDI. (If clone failed at startup.) + if base_uuid not in volume_names: + if vdi_uuid in volume_names: + util.SMlog('*** INTERRUPTED CLONE OP: nothing to do') + return + raise util.SMException( + 'Base copy {} not present, but no original {} found' + .format(base_uuid, vdi_uuid) + ) + + if force_undo: + util.SMlog('Explicit revert') + self._undo_clone( + volume_names, vdi_uuid, base_uuid, snap_uuid + ) + return + + # If VDI or snap uuid is missing... + if vdi_uuid not in volume_names or \ + (snap_uuid and snap_uuid not in volume_names): + util.SMlog('One or both leaves missing => revert') + self._undo_clone(volume_names, vdi_uuid, base_uuid, snap_uuid) + return + + vdi_path, vdi_parent_uuid = self._get_vdi_path_and_parent( + vdi_uuid, volume_names[vdi_uuid] + ) + snap_path, snap_parent_uuid = self._get_vdi_path_and_parent( + snap_uuid, volume_names[snap_uuid] + ) + + if not vdi_path or (snap_uuid and not snap_path): + util.SMlog('One or both leaves invalid (and path(s)) => revert') + self._undo_clone(volume_names, vdi_uuid, base_uuid, snap_uuid) + return + + util.SMlog('Leaves valid but => revert') + self._undo_clone(volume_names, vdi_uuid, base_uuid, snap_uuid) + + def _undo_clone(self, volume_names, vdi_uuid, base_uuid, snap_uuid): + base_path = self._linstor.build_device_path(volume_names[base_uuid]) + base_metadata = self._linstor.get_volume_metadata(base_uuid) + base_type = base_metadata[VDI_TYPE_TAG] + + if not util.pathexists(base_path): + util.SMlog('Base not found! Exit...') + util.SMlog('*** INTERRUPTED CLONE OP: rollback fail') + return + + # Un-hide the parent. + self._linstor.update_volume_metadata(base_uuid, {READ_ONLY_TAG: False}) + if base_type == vhdutil.VDI_TYPE_VHD: + vhd_info = self._vhdutil.get_vhd_info(base_uuid, False) + if vhd_info.hidden: + vhdutil.setHidden(base_path, False) + elif base_type == vhdutil.VDI_TYPE_RAW and \ + base_metadata.get(HIDDEN_TAG): + self._linstor.update_volume_metadata( + base_uuid, {HIDDEN_TAG: False} + ) + + # Remove the child nodes. + if snap_uuid and snap_uuid in volume_names: + util.SMlog('Destroying snap {}...'.format(snap_uuid)) + snap_metadata = self._linstor.get_volume_metadata(snap_uuid) + + if snap_metadata.get(VDI_TYPE_TAG) != vhdutil.VDI_TYPE_VHD: + raise util.SMException('Clone {} not VHD'.format(snap_uuid)) + + try: + self._linstor.destroy_volume(snap_uuid) + except Exception as e: + util.SMlog( + 'Cannot destroy snap {} during undo clone: {}' + .format(snap_uuid, e) + ) + + if vdi_uuid in volume_names: + try: + util.SMlog('Destroying {}...'.format(vdi_uuid)) + self._linstor.destroy_volume(vdi_uuid) + except Exception as e: + util.SMlog( + 'Cannot destroy VDI {} during undo clone: {}' + .format(vdi_uuid, e) + ) + # We can get an exception like this: + # "Shutdown of the DRBD resource 'XXX failed", so the + # volume info remains... The problem is we can't rename + # properly the base VDI below this line, so we must change the + # UUID of this bad VDI before. + self._linstor.update_volume_uuid( + vdi_uuid, 'DELETED_' + vdi_uuid, force=True + ) + + # Rename! + self._linstor.update_volume_uuid(base_uuid, vdi_uuid) + + # Inflate to the right size. + if base_type == vhdutil.VDI_TYPE_VHD: + vdi = self.vdi(vdi_uuid) + volume_size = compute_volume_size(vdi.size, vdi.vdi_type) + inflate( + self._journaler, self._linstor, vdi_uuid, vdi.path, + volume_size, vdi.capacity + ) + self.vdis[vdi_uuid] = vdi + + # At this stage, tapdisk and SM vdi will be in paused state. Remove + # flag to facilitate vm deactivate. + vdi_ref = self.session.xenapi.VDI.get_by_uuid(vdi_uuid) + self.session.xenapi.VDI.remove_from_sm_config(vdi_ref, 'paused') + + util.SMlog('*** INTERRUPTED CLONE OP: rollback success') + + # -------------------------------------------------------------------------- + # Misc. + # -------------------------------------------------------------------------- + + def _ensure_space_available(self, amount_needed): + space_available = self._linstor.max_volume_size_allowed + if (space_available < amount_needed): + util.SMlog( + 'Not enough space! Free space: {}, need: {}'.format( + space_available, amount_needed + ) + ) + raise xs_errors.XenError('SRNoSpace') + + def _kick_gc(self): + # Don't bother if an instance already running. This is just an + # optimization to reduce the overhead of forking a new process if we + # don't have to, but the process will check the lock anyways. + lock = Lock(LOCK_TYPE_GC_RUNNING, self.uuid) + if not lock.acquireNoblock(): + if not cleanup.should_preempt(self.session, self.uuid): + util.SMlog('A GC instance already running, not kicking') + return + + util.SMlog('Aborting currently-running coalesce of garbage VDI') + try: + if not cleanup.abort(self.uuid, soft=True): + util.SMlog('The GC has already been scheduled to re-start') + except util.CommandException as e: + if e.code != errno.ETIMEDOUT: + raise + util.SMlog('Failed to abort the GC') + else: + lock.release() + + util.SMlog('Kicking GC') + cleanup.gc(self.session, self.uuid, True) + +# ============================================================================== +# LinstorSr VDI +# ============================================================================== + + +class LinstorVDI(VDI.VDI): + # Warning: Not the same values than vhdutil.VDI_TYPE_*. + # These values represents the types given on the command line. + TYPE_RAW = 'raw' + TYPE_VHD = 'vhd' + + MAX_SIZE = 2 * 1024 * 1024 * 1024 * 1024 # Max VHD size. + + # Metadata size given to the "S" param of vhd-util create. + # "-S size (MB) for metadata preallocation". + # Increase the performance when resize is called. + MAX_METADATA_VIRT_SIZE = 2 * 1024 * 1024 + + # -------------------------------------------------------------------------- + # VDI methods. + # -------------------------------------------------------------------------- + + def load(self, vdi_uuid): + self._lock = self.sr.lock + self._exists = True + self._linstor = self.sr._linstor + + # Update hidden parent property. + self.hidden = False + + def raise_bad_load(e): + util.SMlog( + 'Got exception in LinstorVDI.load: {}'.format(e) + ) + util.SMlog(traceback.format_exc()) + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Could not load {} because: {}'.format(self.uuid, e) + ) + + # Try to load VDI. + try: + if ( + self.sr.srcmd.cmd == 'vdi_attach_from_config' or + self.sr.srcmd.cmd == 'vdi_detach_from_config' + ) and self.sr.srcmd.params['vdi_uuid'] == self.uuid: + self.vdi_type = vhdutil.VDI_TYPE_RAW + self.path = self.sr.srcmd.params['vdi_path'] + else: + self._determine_type_and_path() + self._load_this() + + util.SMlog('VDI {} loaded! (path={}, hidden={})'.format( + self.uuid, self.path, self.hidden + )) + except LinstorVolumeManagerError as e: + # 1. It may be a VDI deletion. + if e.code == LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: + if self.sr.srcmd.cmd == 'vdi_delete': + self.deleted = True + return + + # 2. Or maybe a creation. + if self.sr.srcmd.cmd == 'vdi_create': + # Set type attribute of VDI parent class. + # We use VHD by default. + self.vdi_type = vhdutil.VDI_TYPE_VHD + self._key_hash = None # Only used in create. + + self._exists = False + vdi_sm_config = self.sr.srcmd.params.get('vdi_sm_config') + if vdi_sm_config is not None: + type = vdi_sm_config.get('type') + if type is not None: + if type == self.TYPE_RAW: + self.vdi_type = vhdutil.VDI_TYPE_RAW + elif type == self.TYPE_VHD: + self.vdi_type = vhdutil.VDI_TYPE_VHD + else: + raise xs_errors.XenError( + 'VDICreate', + opterr='Invalid VDI type {}'.format(type) + ) + if self.vdi_type == vhdutil.VDI_TYPE_VHD: + self._key_hash = vdi_sm_config.get('key_hash') + + # For the moment we don't have a path. + self._update_device_name(None) + return + raise_bad_load(e) + except Exception as e: + raise_bad_load(e) + + def create(self, sr_uuid, vdi_uuid, size): + # Usage example: + # xe vdi-create sr-uuid=39a5826b-5a90-73eb-dd09-51e3a116f937 + # name-label="linstor-vdi-1" virtual-size=4096MiB sm-config:type=vhd + + # 1. Check if we are on the master and if the VDI doesn't exist. + util.SMlog('LinstorVDI.create for {}'.format(self.uuid)) + if self._exists: + raise xs_errors.XenError('VDIExists') + + assert self.uuid + assert self.ty + assert self.vdi_type + + # 2. Compute size and check space available. + size = vhdutil.validate_and_round_vhd_size(long(size)) + util.SMlog('LinstorVDI.create: type={}, size={}'.format( + self.vdi_type, size + )) + + volume_size = compute_volume_size(size, self.vdi_type) + self.sr._ensure_space_available(volume_size) + + # 3. Set sm_config attribute of VDI parent class. + self.sm_config = self.sr.srcmd.params['vdi_sm_config'] + + # 4. Create! + failed = False + try: + self._linstor.create_volume( + self.uuid, volume_size, persistent=False + ) + volume_info = self._linstor.get_volume_info(self.uuid) + + self._update_device_name(volume_info.name) + + if self.vdi_type == vhdutil.VDI_TYPE_RAW: + self.size = volume_info.virtual_size + else: + vhdutil.create( + self.path, size, False, self.MAX_METADATA_VIRT_SIZE + ) + self.size = self.sr._vhdutil.get_size_virt(self.uuid) + + if self._key_hash: + vhdutil.setKey(self.path, self._key_hash) + + # Because vhdutil commands modify the volume data, + # we must retrieve a new time the utilisation size. + volume_info = self._linstor.get_volume_info(self.uuid) + + volume_metadata = { + NAME_LABEL_TAG: util.to_plain_string(self.label), + NAME_DESCRIPTION_TAG: util.to_plain_string(self.description), + IS_A_SNAPSHOT_TAG: False, + SNAPSHOT_OF_TAG: '', + SNAPSHOT_TIME_TAG: '', + TYPE_TAG: self.ty, + VDI_TYPE_TAG: self.vdi_type, + READ_ONLY_TAG: bool(self.read_only), + METADATA_OF_POOL_TAG: '' + } + self._linstor.set_volume_metadata(self.uuid, volume_metadata) + self._linstor.mark_volume_as_persistent(self.uuid) + except util.CommandException as e: + failed = True + raise xs_errors.XenError( + 'VDICreate', opterr='error {}'.format(e.code) + ) + except Exception as e: + failed = True + raise xs_errors.XenError('VDICreate', opterr='error {}'.format(e)) + finally: + if failed: + util.SMlog('Unable to create VDI {}'.format(self.uuid)) + try: + self._linstor.destroy_volume(self.uuid) + except Exception as e: + util.SMlog( + 'Ignoring exception after fail in LinstorVDI.create: ' + '{}'.format(e) + ) + + self.utilisation = volume_info.physical_size + self.sm_config['vdi_type'] = self.vdi_type + + self.ref = self._db_introduce() + self.sr._update_stats(volume_info.virtual_size) + + return VDI.VDI.get_params(self) + + def delete(self, sr_uuid, vdi_uuid, data_only=False): + util.SMlog('LinstorVDI.delete for {}'.format(self.uuid)) + if self.attached: + raise xs_errors.XenError('VDIInUse') + + if self.deleted: + return super(LinstorVDI, self).delete( + sr_uuid, vdi_uuid, data_only + ) + + vdi_ref = self.sr.srcmd.params['vdi_ref'] + if not self.session.xenapi.VDI.get_managed(vdi_ref): + raise xs_errors.XenError( + 'VDIDelete', + opterr='Deleting non-leaf node not permitted' + ) + + try: + # Remove from XAPI and delete from LINSTOR. + self._linstor.destroy_volume(self.uuid) + if not data_only: + self._db_forget() + + self.sr.lock.cleanupAll(vdi_uuid) + except Exception as e: + util.SMlog( + 'Failed to remove the volume (maybe is leaf coalescing) ' + 'for {} err: {}'.format(self.uuid, e) + ) + raise xs_errors.XenError('VDIDelete', opterr=str(e)) + + if self.uuid in self.sr.vdis: + del self.sr.vdis[self.uuid] + + # TODO: Check size after delete. + self.sr._update_stats(-self.capacity) + self.sr._kick_gc() + return super(LinstorVDI, self).delete(sr_uuid, vdi_uuid, data_only) + + def attach(self, sr_uuid, vdi_uuid): + util.SMlog('LinstorVDI.attach for {}'.format(self.uuid)) + if ( + self.sr.srcmd.cmd != 'vdi_attach_from_config' or + self.sr.srcmd.params['vdi_uuid'] != self.uuid + ) and self.sr._journaler.has_entries(self.uuid): + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Interrupted operation detected on this VDI, ' + 'scan SR first to trigger auto-repair' + ) + + writable = 'args' not in self.sr.srcmd.params or \ + self.sr.srcmd.params['args'][0] == 'true' + + # We need to inflate the volume if we don't have enough place + # to mount the VHD image. I.e. the volume capacity must be greater + # than the VHD size + bitmap size. + need_inflate = True + if self.vdi_type == vhdutil.VDI_TYPE_RAW or not writable or \ + self.capacity >= compute_volume_size(self.size, self.vdi_type): + need_inflate = False + + if need_inflate: + try: + self._prepare_thin(True) + except Exception as e: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Failed to attach VDI during "prepare thin": {}' + .format(e) + ) + + if not util.pathexists(self.path): + raise xs_errors.XenError( + 'VDIUnavailable', opterr='Could not find: {}'.format(self.path) + ) + + if not hasattr(self, 'xenstore_data'): + self.xenstore_data = {} + + # TODO: Is it useful? + self.xenstore_data.update(scsiutil.update_XS_SCSIdata( + self.uuid, scsiutil.gen_synthetic_page_data(self.uuid) + )) + + self.xenstore_data['storage-type'] = LinstorSR.DRIVER_TYPE + + self.attached = True + + return VDI.VDI.attach(self, self.sr.uuid, self.uuid) + + def detach(self, sr_uuid, vdi_uuid): + util.SMlog('LinstorVDI.detach for {}'.format(self.uuid)) + self.attached = False + + if self.vdi_type == vhdutil.VDI_TYPE_RAW: + return + + # The VDI is already deflated if the VHD image size + metadata is + # equal to the LINSTOR volume size. + volume_size = compute_volume_size(self.size, self.vdi_type) + already_deflated = self.capacity <= volume_size + + if already_deflated: + util.SMlog( + 'VDI {} already deflated (old volume size={}, volume size={})' + .format(self.uuid, self.capacity, volume_size) + ) + + need_deflate = True + if already_deflated: + need_deflate = False + elif self.sr._provisioning == 'thick': + need_deflate = False + + vdi_ref = self.sr.srcmd.params['vdi_ref'] + if self.session.xenapi.VDI.get_is_a_snapshot(vdi_ref): + need_deflate = True + + if need_deflate: + try: + self._prepare_thin(False) + except Exception as e: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Failed to detach VDI during "prepare thin": {}' + .format(e) + ) + + def resize(self, sr_uuid, vdi_uuid, size): + util.SMlog('LinstorVDI.resize for {}'.format(self.uuid)) + if self.hidden: + raise xs_errors.XenError('VDIUnavailable', opterr='hidden VDI') + + if size < self.size: + util.SMlog( + 'vdi_resize: shrinking not supported: ' + '(current size: {}, new size: {})'.format(self.size, size) + ) + raise xs_errors.XenError('VDISize', opterr='shrinking not allowed') + + # Compute the virtual VHD size. + size = vhdutil.validate_and_round_vhd_size(long(size)) + + if size == self.size: + return VDI.VDI.get_params(self) + + # Compute the LINSTOR volume size. + new_volume_size = compute_volume_size(size, self.vdi_type) + if self.vdi_type == vhdutil.VDI_TYPE_RAW: + old_volume_size = self.size + else: + old_volume_size = self.capacity + if self.sr._provisioning == 'thin': + # VDI is currently deflated, so keep it deflated. + new_volume_size = old_volume_size + assert new_volume_size >= old_volume_size + + space_needed = new_volume_size - old_volume_size + self.sr._ensure_space_available(space_needed) + + old_capacity = self.capacity + if self.vdi_type == vhdutil.VDI_TYPE_RAW: + self._linstor.resize(self.uuid, new_volume_size) + else: + if new_volume_size != old_volume_size: + inflate( + self.sr._journaler, self._linstor, self.uuid, self.path, + new_volume_size, old_volume_size + ) + vhdutil.setSizeVirtFast(self.path, size) + + # Reload size attributes. + self._load_this() + + vdi_ref = self.sr.srcmd.params['vdi_ref'] + self.session.xenapi.VDI.set_virtual_size(vdi_ref, str(self.size)) + self.session.xenapi.VDI.set_physical_utilisation( + vdi_ref, str(self.utilisation) + ) + self.sr._update_stats(self.capacity - old_capacity) + return VDI.VDI.get_params(self) + + def clone(self, sr_uuid, vdi_uuid): + return self._do_snapshot(sr_uuid, vdi_uuid, VDI.SNAPSHOT_DOUBLE) + + def compose(self, sr_uuid, vdi1, vdi2): + util.SMlog('VDI.compose for {} -> {}'.format(vdi2, vdi1)) + if self.vdi_type != vhdutil.VDI_TYPE_VHD: + raise xs_errors.XenError('Unimplemented') + + parent_uuid = vdi1 + parent_path = self._linstor.get_device_path(parent_uuid) + + # We must pause tapdisk to correctly change the parent. Otherwise we + # have a readonly error. + # See: https://github.com/xapi-project/xen-api/blob/b3169a16d36dae0654881b336801910811a399d9/ocaml/xapi/storage_migrate.ml#L928-L929 + # and: https://github.com/xapi-project/xen-api/blob/b3169a16d36dae0654881b336801910811a399d9/ocaml/xapi/storage_migrate.ml#L775 + + if not blktap2.VDI.tap_pause(self.session, self.sr.uuid, self.uuid): + raise util.SMException('Failed to pause VDI {}'.format(self.uuid)) + try: + vhdutil.setParent(self.path, parent_path, False) + vhdutil.setHidden(parent_path) + self.sr.session.xenapi.VDI.set_managed( + self.sr.srcmd.params['args'][0], False + ) + finally: + blktap2.VDI.tap_unpause(self.session, self.sr.uuid, self.uuid) + + if not blktap2.VDI.tap_refresh(self.session, self.sr.uuid, self.uuid): + raise util.SMException( + 'Failed to refresh VDI {}'.format(self.uuid) + ) + + util.SMlog('Compose done') + + def generate_config(self, sr_uuid, vdi_uuid): + """ + Generate the XML config required to attach and activate + a VDI for use when XAPI is not running. Attach and + activation is handled by vdi_attach_from_config below. + """ + + util.SMlog('LinstorVDI.generate_config for {}'.format(self.uuid)) + + if not self.path or not util.pathexists(self.path): + available = False + # Try to refresh symlink path... + try: + self.path = self._linstor.get_device_path(vdi_uuid) + available = util.pathexists(self.path) + except Exception: + pass + if not available: + raise xs_errors.XenError('VDIUnavailable') + + resp = {} + resp['device_config'] = self.sr.dconf + resp['sr_uuid'] = sr_uuid + resp['vdi_uuid'] = self.uuid + resp['sr_sm_config'] = self.sr.sm_config + resp['vdi_path'] = self.path + resp['command'] = 'vdi_attach_from_config' + + config = xmlrpclib.dumps(tuple([resp]), 'vdi_attach_from_config') + return xmlrpclib.dumps((config,), "", True) + + def attach_from_config(self, sr_uuid, vdi_uuid): + """ + Attach and activate a VDI using config generated by + vdi_generate_config above. This is used for cases such as + the HA state-file and the redo-log. + """ + + util.SMlog('LinstorVDI.attach_from_config for {}'.format(vdi_uuid)) + + try: + if not util.pathexists(self.sr.path): + self.sr.attach(sr_uuid) + + if not DRIVER_CONFIG['ATTACH_FROM_CONFIG_WITH_TAPDISK']: + return self.attach(sr_uuid, vdi_uuid) + except Exception: + util.logException('LinstorVDI.attach_from_config') + raise xs_errors.XenError( + 'SRUnavailable', + opterr='Unable to attach from config' + ) + + def reset_leaf(self, sr_uuid, vdi_uuid): + if self.vdi_type != vhdutil.VDI_TYPE_VHD: + raise xs_errors.XenError('Unimplemented') + + if not self.sr._vhdutil.has_parent(self.uuid): + raise util.SMException( + 'ERROR: VDI {} has no parent, will not reset contents' + .format(self.uuid) + ) + + vhdutil.killData(self.path) + + def _load_this(self): + volume_metadata = self._linstor.get_volume_metadata(self.uuid) + volume_info = self._linstor.get_volume_info(self.uuid) + + # Contains the physical size used on all disks. + # When LINSTOR LVM driver is used, the size should be similar to + # virtual size (i.e. the LINSTOR max volume size). + # When LINSTOR Thin LVM driver is used, the used physical size should + # be lower than virtual size at creation. + # The physical size increases after each write in a new block. + self.utilisation = volume_info.physical_size + self.capacity = volume_info.virtual_size + + if self.vdi_type == vhdutil.VDI_TYPE_RAW: + self.hidden = int(volume_metadata.get(HIDDEN_TAG) or 0) + self.size = volume_info.virtual_size + self.parent = '' + else: + vhd_info = self.sr._vhdutil.get_vhd_info(self.uuid) + self.hidden = vhd_info.hidden + self.size = vhd_info.sizeVirt + self.parent = vhd_info.parentUuid + + if self.hidden: + self.managed = False + + self.label = volume_metadata.get(NAME_LABEL_TAG) or '' + self.description = volume_metadata.get(NAME_DESCRIPTION_TAG) or '' + + # Update sm_config_override of VDI parent class. + self.sm_config_override = {'vhd-parent': self.parent or None} + + def _mark_hidden(self, hidden=True): + if self.hidden == hidden: + return + + if self.vdi_type == vhdutil.VDI_TYPE_VHD: + vhdutil.setHidden(self.path, hidden) + else: + self._linstor.update_volume_metadata(self.uuid, { + HIDDEN_TAG: hidden + }) + self.hidden = hidden + + def update(self, sr_uuid, vdi_uuid): + xenapi = self.session.xenapi + vdi_ref = xenapi.VDI.get_by_uuid(self.uuid) + + volume_metadata = { + NAME_LABEL_TAG: util.to_plain_string( + xenapi.VDI.get_name_label(vdi_ref) + ), + NAME_DESCRIPTION_TAG: util.to_plain_string( + xenapi.VDI.get_name_description(vdi_ref) + ) + } + + try: + self._linstor.update_volume_metadata(self.uuid, volume_metadata) + except LinstorVolumeManagerError as e: + if e.code == LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='LINSTOR volume {} not found'.format(self.uuid) + ) + raise xs_errors.XenError('VDIUnavailable', opterr=str(e)) + + # -------------------------------------------------------------------------- + # Thin provisioning. + # -------------------------------------------------------------------------- + + def _prepare_thin(self, attach): + if self.sr._is_master: + if attach: + attach_thin( + self.session, self.sr._journaler, self._linstor, + self.sr.uuid, self.uuid + ) + else: + detach_thin( + self.session, self._linstor, self.sr.uuid, self.uuid + ) + else: + fn = 'attach' if attach else 'detach' + + # We assume the first pool is always the one currently in use. + pools = self.session.xenapi.pool.get_all() + master = self.session.xenapi.pool.get_master(pools[0]) + args = { + 'groupName': self.sr._group_name, + 'srUuid': self.sr.uuid, + 'vdiUuid': self.uuid + } + ret = self.session.xenapi.host.call_plugin( + master, self.sr.MANAGER_PLUGIN, fn, args + ) + util.SMlog( + 'call-plugin ({} with {}) returned: {}'.format(fn, args, ret) + ) + if ret == 'False': + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Plugin {} failed'.format(self.sr.MANAGER_PLUGIN) + ) + + # Reload size attrs after inflate or deflate! + self._load_this() + self.sr._update_physical_size() + + vdi_ref = self.sr.srcmd.params['vdi_ref'] + self.session.xenapi.VDI.set_physical_utilisation( + vdi_ref, str(self.utilisation) + ) + + self.session.xenapi.SR.set_physical_utilisation( + self.sr.sr_ref, str(self.sr.physical_utilisation) + ) + + # -------------------------------------------------------------------------- + # Generic helpers. + # -------------------------------------------------------------------------- + + def _determine_type_and_path(self): + """ + Determine whether this is a RAW or a VHD VDI. + """ + + # 1. Check vdi_ref and vdi_type in config. + try: + vdi_ref = self.session.xenapi.VDI.get_by_uuid(self.uuid) + if vdi_ref: + sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref) + vdi_type = sm_config.get('vdi_type') + if vdi_type: + # Update parent fields. + self.vdi_type = vdi_type + self.sm_config_override = sm_config + self._update_device_name( + self._linstor.get_volume_name(self.uuid) + ) + return + except Exception: + pass + + # 2. Otherwise use the LINSTOR volume manager directly. + # It's probably a new VDI created via snapshot. + volume_metadata = self._linstor.get_volume_metadata(self.uuid) + self.vdi_type = volume_metadata.get(VDI_TYPE_TAG) + if not self.vdi_type: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='failed to get vdi_type in metadata' + ) + self._update_device_name( + self._linstor.get_volume_name(self.uuid) + ) + + def _update_device_name(self, device_name): + self._device_name = device_name + + # Mark path of VDI parent class. + if device_name: + self.path = self._linstor.build_device_path(self._device_name) + else: + self.path = None + + def _create_snapshot(self, snap_uuid, snap_of_uuid=None): + """ + Snapshot self and return the snapshot VDI object. + """ + + # 1. Create a new LINSTOR volume with the same size than self. + snap_path = self._linstor.shallow_clone_volume( + self.uuid, snap_uuid, persistent=False + ) + + # 2. Write the snapshot content. + is_raw = (self.vdi_type == vhdutil.VDI_TYPE_RAW) + vhdutil.snapshot( + snap_path, self.path, is_raw, self.MAX_METADATA_VIRT_SIZE + ) + + # 3. Get snapshot parent. + snap_parent = self.sr._vhdutil.get_parent(snap_uuid) + + # 4. Update metadata. + util.SMlog('Set VDI {} metadata of snapshot'.format(snap_uuid)) + volume_metadata = { + NAME_LABEL_TAG: util.to_plain_string(self.label), + NAME_DESCRIPTION_TAG: util.to_plain_string(self.description), + IS_A_SNAPSHOT_TAG: bool(snap_of_uuid), + SNAPSHOT_OF_TAG: snap_of_uuid, + SNAPSHOT_TIME_TAG: '', + TYPE_TAG: self.ty, + VDI_TYPE_TAG: vhdutil.VDI_TYPE_VHD, + READ_ONLY_TAG: False, + METADATA_OF_POOL_TAG: '' + } + self._linstor.set_volume_metadata(snap_uuid, volume_metadata) + + # 5. Set size. + snap_vdi = LinstorVDI(self.sr, snap_uuid) + if not snap_vdi._exists: + raise xs_errors.XenError('VDISnapshot') + + volume_info = self._linstor.get_volume_info(snap_uuid) + + snap_vdi.size = self.sr._vhdutil.get_size_virt(snap_uuid) + snap_vdi.utilisation = volume_info.physical_size + + # 6. Update sm config. + snap_vdi.sm_config = {} + snap_vdi.sm_config['vdi_type'] = snap_vdi.vdi_type + if snap_parent: + snap_vdi.sm_config['vhd-parent'] = snap_parent + snap_vdi.parent = snap_parent + + snap_vdi.label = self.label + snap_vdi.description = self.description + + self._linstor.mark_volume_as_persistent(snap_uuid) + + return snap_vdi + + # -------------------------------------------------------------------------- + # Implement specific SR methods. + # -------------------------------------------------------------------------- + + def _rename(self, oldpath, newpath): + # TODO: I'm not sure... Used by CBT. + volume_uuid = self._linstor.get_volume_uuid_from_device_path(oldpath) + self._linstor.update_volume_name(volume_uuid, newpath) + + def _do_snapshot( + self, sr_uuid, vdi_uuid, snap_type, secondary=None, cbtlog=None + ): + # If cbt enabled, save file consistency state. + if cbtlog is not None: + if blktap2.VDI.tap_status(self.session, vdi_uuid): + consistency_state = False + else: + consistency_state = True + util.SMlog( + 'Saving log consistency state of {} for vdi: {}' + .format(consistency_state, vdi_uuid) + ) + else: + consistency_state = None + + if self.vdi_type != vhdutil.VDI_TYPE_VHD: + raise xs_errors.XenError('Unimplemented') + + if not blktap2.VDI.tap_pause(self.session, sr_uuid, vdi_uuid): + raise util.SMException('Failed to pause VDI {}'.format(vdi_uuid)) + try: + return self._snapshot(snap_type, cbtlog, consistency_state) + finally: + blktap2.VDI.tap_unpause(self.session, sr_uuid, vdi_uuid, secondary) + + def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None): + util.SMlog( + 'LinstorVDI._snapshot for {} (type {})' + .format(self.uuid, snap_type) + ) + + # 1. Checks... + if self.hidden: + raise xs_errors.XenError('VDIClone', opterr='hidden VDI') + + depth = self.sr._vhdutil.get_depth(self.uuid) + if depth == -1: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='failed to get VHD depth' + ) + elif depth >= vhdutil.MAX_CHAIN_SIZE: + raise xs_errors.XenError('SnapshotChainTooLong') + + volume_path = self.path + if not util.pathexists(volume_path): + raise xs_errors.XenError( + 'EIO', + opterr='IO error checking path {}'.format(volume_path) + ) + + # 2. Create base and snap uuid (if required) and a journal entry. + base_uuid = util.gen_uuid() + snap_uuid = None + + if snap_type == VDI.SNAPSHOT_DOUBLE: + snap_uuid = util.gen_uuid() + + clone_info = '{}_{}'.format(base_uuid, snap_uuid) + + active_uuid = self.uuid + self.sr._journaler.create( + LinstorJournaler.CLONE, active_uuid, clone_info + ) + + try: + # 3. Self becomes the new base. + # The device path remains the same. + self._linstor.update_volume_uuid(self.uuid, base_uuid) + self.uuid = base_uuid + self.location = self.uuid + self.read_only = True + self.managed = False + + # 4. Create snapshots (new active and snap). + active_vdi = self._create_snapshot(active_uuid) + + snap_vdi = None + if snap_type == VDI.SNAPSHOT_DOUBLE: + snap_vdi = self._create_snapshot(snap_uuid, active_uuid) + + self.label = 'base copy' + self.description = '' + + # 5. Mark the base VDI as hidden so that it does not show up + # in subsequent scans. + self._mark_hidden() + self._linstor.update_volume_metadata( + self.uuid, {READ_ONLY_TAG: True} + ) + + # 6. We must update the new active VDI with the "paused" and + # "host_" properties. Why? Because the original VDI has been + # paused and we we must unpause it after the snapshot. + # See: `tap_unpause` in `blktap2.py`. + vdi_ref = self.session.xenapi.VDI.get_by_uuid(active_uuid) + sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref) + for key in filter( + lambda x: x == 'paused' or x.startswith('host_'), + sm_config.keys() + ): + active_vdi.sm_config[key] = sm_config[key] + + # 7. Verify parent locator field of both children and + # delete base if unused. + introduce_parent = True + try: + snap_parent = None + if snap_vdi: + snap_parent = snap_vdi.parent + + if active_vdi.parent != self.uuid and ( + snap_type == VDI.SNAPSHOT_SINGLE or + snap_type == VDI.SNAPSHOT_INTERNAL or + snap_parent != self.uuid + ): + util.SMlog( + 'Destroy unused base volume: {} (path={})' + .format(self.uuid, self.path) + ) + introduce_parent = False + self._linstor.destroy_volume(self.uuid) + except Exception as e: + util.SMlog('Ignoring exception: {}'.format(e)) + pass + + # 8. Introduce the new VDI records. + if snap_vdi: + # If the parent is encrypted set the key_hash for the + # new snapshot disk. + vdi_ref = self.sr.srcmd.params['vdi_ref'] + sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref) + # TODO: Maybe remove key_hash support. + if 'key_hash' in sm_config: + snap_vdi.sm_config['key_hash'] = sm_config['key_hash'] + # If we have CBT enabled on the VDI, + # set CBT status for the new snapshot disk. + if cbtlog: + snap_vdi.cbt_enabled = True + + if snap_vdi: + snap_vdi_ref = snap_vdi._db_introduce() + util.SMlog( + 'vdi_clone: introduced VDI: {} ({})' + .format(snap_vdi_ref, snap_vdi.uuid) + ) + if introduce_parent: + base_vdi_ref = self._db_introduce() + self.session.xenapi.VDI.set_managed(base_vdi_ref, False) + util.SMlog( + 'vdi_clone: introduced VDI: {} ({})' + .format(base_vdi_ref, self.uuid) + ) + self._linstor.update_volume_metadata(self.uuid, { + NAME_LABEL_TAG: util.to_plain_string(self.label), + NAME_DESCRIPTION_TAG: util.to_plain_string( + self.description + ), + READ_ONLY_TAG: True, + METADATA_OF_POOL_TAG: '' + }) + + # 9. Update cbt files if user created snapshot (SNAPSHOT_DOUBLE) + if snap_type == VDI.SNAPSHOT_DOUBLE and cbtlog: + try: + self._cbt_snapshot(snap_uuid, cbt_consistency) + except Exception: + # CBT operation failed. + # TODO: Implement me. + raise + + if snap_type != VDI.SNAPSHOT_INTERNAL: + self.sr._update_stats(self.capacity) + + # 10. Return info on the new user-visible leaf VDI. + ret_vdi = snap_vdi + if not ret_vdi: + ret_vdi = self + if not ret_vdi: + ret_vdi = active_vdi + + vdi_ref = self.sr.srcmd.params['vdi_ref'] + self.session.xenapi.VDI.set_sm_config( + vdi_ref, active_vdi.sm_config + ) + except Exception as e: + util.logException('Failed to snapshot!') + try: + self.sr._handle_interrupted_clone( + active_uuid, clone_info, force_undo=True + ) + self.sr._journaler.remove(LinstorJournaler.CLONE, active_uuid) + except Exception as e: + util.SMlog( + 'WARNING: Failed to clean up failed snapshot: {}' + .format(e) + ) + raise xs_errors.XenError('VDIClone', opterr=str(e)) + + self.sr._journaler.remove(LinstorJournaler.CLONE, active_uuid) + + return ret_vdi.get_params() + +# ------------------------------------------------------------------------------ + + +if __name__ == '__main__': + SRCommand.run(LinstorSR, DRIVER_INFO) +else: + SR.registerSR(LinstorSR) diff --git a/drivers/XE_SR_ERRORCODES.xml b/drivers/XE_SR_ERRORCODES.xml index 120b92fc9..1986bc551 100755 --- a/drivers/XE_SR_ERRORCODES.xml +++ b/drivers/XE_SR_ERRORCODES.xml @@ -926,4 +926,40 @@ ZFS SR deletion error 5001 + + + LinstorMaster + Linstor request must come from master + 5002 + + + + LinstorConfigHostsMissing + The request is missing the LINSTOR hosts parameter + 5003 + + + + LinstorConfigGroupNameMissing + The request is missing the LINSTOR group name parameter + 5004 + + + + LinstorConfigRedundancyMissing + The request is missing the LINSTOR redundancy parameter + 5005 + + + + LinstorSRCreate + LINSTOR SR creation error + 5006 + + + + LinstorSRDelete + LINSTOR SR delete error + 5007 + diff --git a/drivers/cleanup.py b/drivers/cleanup.py index 62e283abc..74b28674f 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -46,10 +46,19 @@ from refcounter import RefCounter from ipc import IPCFlag from lvmanager import LVActivator -from srmetadata import LVMMetadataHandler +from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG from functools import reduce from time import monotonic as _time +try: + from linstorjournaler import LinstorJournaler + from linstorvhdutil import LinstorVhdUtil + from linstorvolumemanager \ + import LinstorVolumeManager, LinstorVolumeManagerError + LINSTOR_AVAILABLE = True +except ImportError: + LINSTOR_AVAILABLE = False + # Disable automatic leaf-coalescing. Online leaf-coalesce is currently not # possible due to lvhd_stop_using_() not working correctly. However, we leave # this option available through the explicit LEAFCLSC_FORCE flag in the VDI @@ -698,7 +707,19 @@ def getAllPrunable(self): if child not in childList: thisPrunable = False - if not self.scanError and thisPrunable: + # We can destroy the current VDI if all childs are hidden BUT the + # current VDI must be hidden too to do that! + # Example in this case (after a failed live leaf coalesce): + # + # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): + # SMGC: [32436] b5458d61(1.000G/4.127M) + # SMGC: [32436] *OLD_b545(1.000G/4.129M) + # + # OLD_b545 is hidden and must be removed, but b5458d61 not. + # Normally we are not in this function when the delete action is + # executed but in `_liveLeafCoalesce`. + + if not self.scanError and not self.hidden and thisPrunable: vdiList.append(self) return vdiList @@ -1482,6 +1503,112 @@ def _calcExtraSpaceForSnapshotCoalescing(self): lvhdutil.calcSizeLV(self.getSizeVHD()) +class LinstorVDI(VDI): + """Object representing a VDI in a LINSTOR SR""" + + MAX_SIZE = 2 * 1024 * 1024 * 1024 * 1024 # Max VHD size. + + VOLUME_LOCK_TIMEOUT = 30 + + def load(self, info=None): + self.parentUuid = info.parentUuid + self.scanError = True + self.parent = None + self.children = [] + + self.fileName = self.sr._linstor.get_volume_name(self.uuid) + self.path = self.sr._linstor.build_device_path(self.fileName) + if not util.pathexists(self.path): + raise util.SMException( + '{} of {} not found' + .format(self.fileName, self.uuid) + ) + + if not info: + try: + info = self.sr._vhdutil.get_vhd_info(self.uuid) + except util.SMException: + Util.log( + ' [VDI {}: failed to read VHD metadata]'.format(self.uuid) + ) + return + + self.parentUuid = info.parentUuid + self.sizeVirt = info.sizeVirt + self._sizeVHD = info.sizePhys + self.hidden = info.hidden + self.scanError = False + + def rename(self, uuid): + Util.log('Renaming {} -> {} (path={})'.format( + self.uuid, uuid, self.path + )) + self.sr._linstor.update_volume_uuid(self.uuid, uuid) + VDI.rename(self, uuid) + + def delete(self): + if len(self.children) > 0: + raise util.SMException( + 'VDI {} has children, can\'t delete'.format(self.uuid) + ) + self.sr.lock() + try: + self.sr._linstor.destroy_volume(self.uuid) + self.sr.forgetVDI(self.uuid) + finally: + self.sr.unlock() + VDI.delete(self) + + def pauseVDIs(self, vdiList): + self.sr._linstor.ensure_volume_list_is_not_locked( + vdiList, timeout=self.VOLUME_LOCK_TIMEOUT + ) + return super(VDI).pauseVDIs(vdiList) + + def _liveLeafCoalesce(self, vdi): + self.sr._linstor.ensure_volume_is_not_locked( + vdi.uuid, timeout=self.VOLUME_LOCK_TIMEOUT + ) + return super(VDI)._liveLeafCoalesce(vdi) + + def _relinkSkip(self): + abortFlag = IPCFlag(self.sr.uuid) + for child in self.children: + if abortFlag.test(FLAG_TYPE_ABORT): + raise AbortException('Aborting due to signal') + Util.log( + ' Relinking {} from {} to {}'.format( + child, self, self.parent + ) + ) + + session = child.sr.xapi.session + sr_uuid = child.sr.uuid + vdi_uuid = child.uuid + try: + self.sr._linstor.ensure_volume_is_not_locked( + vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT + ) + blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) + child._setParent(self.parent) + finally: + blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) + self.children = [] + + def _setHidden(self, hidden=True): + HIDDEN_TAG = 'hidden' + + if self.raw: + self.sr._linstor.update_volume_metadata(self.uuid, { + HIDDEN_TAG: hidden + }) + self.hidden = hidden + else: + VDI._setHidden(self, hidden) + + def _queryVHDBlocks(self): + return self.sr._vhdutil.get_block_bitmap(self.uuid) + ################################################################################ # # SR @@ -1536,7 +1663,8 @@ def _getTreeStr(self, vdi, indent=8): TYPE_FILE = "file" TYPE_LVHD = "lvhd" - TYPES = [TYPE_LVHD, TYPE_FILE] + TYPE_LINSTOR = "linstor" + TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] LOCK_RETRY_INTERVAL = 3 LOCK_RETRY_ATTEMPTS = 20 @@ -1557,6 +1685,8 @@ def getInstance(uuid, xapiSession, createLock=True, force=False): return FileSR(uuid, xapi, createLock, force) elif type == SR.TYPE_LVHD: return LVHDSR(uuid, xapi, createLock, force) + elif type == SR.TYPE_LINSTOR: + return LinstorSR(uuid, xapi, createLock, force) raise util.SMException("SR type %s not recognized" % type) getInstance = staticmethod(getInstance) @@ -2951,6 +3081,232 @@ def _updateSlavesOnResize(self, vdi): vdi.fileName, vdi.uuid, slaves) +class LinstorSR(SR): + TYPE = SR.TYPE_LINSTOR + + def __init__(self, uuid, xapi, createLock, force): + if not LINSTOR_AVAILABLE: + raise util.SMException( + 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' + ) + + SR.__init__(self, uuid, xapi, createLock, force) + self._master_uri = 'linstor://localhost' + self.path = LinstorVolumeManager.DEV_ROOT_PATH + self._reloadLinstor() + + def deleteVDI(self, vdi): + self._checkSlaves(vdi) + SR.deleteVDI(self, vdi) + + def getFreeSpace(self): + return self._linstor.max_volume_size_allowed + + def scan(self, force=False): + all_vdi_info = self._scan(force) + for uuid, vdiInfo in all_vdi_info.iteritems(): + # When vdiInfo is None, the VDI is RAW. + vdi = self.getVDI(uuid) + if not vdi: + self.logFilter.logNewVDI(uuid) + vdi = LinstorVDI(self, uuid, not vdiInfo) + self.vdis[uuid] = vdi + if vdiInfo: + vdi.load(vdiInfo) + self._removeStaleVDIs(all_vdi_info.keys()) + self._buildTree(force) + self.logFilter.logState() + self._handleInterruptedCoalesceLeaf() + + def _reloadLinstor(self): + session = self.xapi.session + host_ref = util.get_this_host_ref(session) + sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) + + pbd = util.find_my_pbd(session, host_ref, sr_ref) + if pbd is None: + raise util.SMException('Failed to find PBD') + + dconf = session.xenapi.PBD.get_device_config(pbd) + group_name = dconf['group-name'] + + self.journaler = LinstorJournaler( + self._master_uri, group_name, logger=util.SMlog + ) + + self._linstor = LinstorVolumeManager( + self._master_uri, + group_name, + repair=True, + logger=util.SMlog + ) + self._vhdutil = LinstorVhdUtil(session, self._linstor) + + def _scan(self, force): + for i in range(SR.SCAN_RETRY_ATTEMPTS): + self._reloadLinstor() + error = False + try: + all_vdi_info = self._load_vdi_info() + for uuid, vdiInfo in all_vdi_info.iteritems(): + if vdiInfo and vdiInfo.error: + error = True + break + if not error: + return all_vdi_info + Util.log('Scan error, retrying ({})'.format(i)) + except Exception as e: + Util.log('Scan exception, retrying ({}): {}'.format(i, e)) + Util.log(traceback.format_exc()) + + if force: + return all_vdi_info + raise util.SMException('Scan error') + + def _load_vdi_info(self): + all_vdi_info = {} + + # TODO: Ensure metadata contains the right info. + + all_volume_info = self._linstor.volumes_with_info + volumes_metadata = self._linstor.volumes_with_metadata + for vdi_uuid, volume_info in all_volume_info.items(): + try: + if not volume_info.name and \ + not list(volumes_metadata[vdi_uuid].items()): + continue # Ignore it, probably deleted. + + vdi_type = volumes_metadata[vdi_uuid][VDI_TYPE_TAG] + if vdi_type == vhdutil.VDI_TYPE_VHD: + info = self._vhdutil.get_vhd_info(vdi_uuid) + else: + info = None + except Exception as e: + Util.log( + ' [VDI {}: failed to load VDI info]: {}' + .format(self.uuid, e) + ) + info = vhdutil.VHDInfo(vdi_uuid) + info.error = 1 + all_vdi_info[vdi_uuid] = info + return all_vdi_info + + # TODO: Maybe implement _liveLeafCoalesce/_prepareCoalesceLeaf/ + # _finishCoalesceLeaf/_updateSlavesOnResize like LVM plugin. + + def _calcExtraSpaceNeeded(self, child, parent): + meta_overhead = vhdutil.calcOverheadEmpty(LinstorVDI.MAX_SIZE) + bitmap_overhead = vhdutil.calcOverheadBitmap(parent.sizeVirt) + virtual_size = LinstorVolumeManager.round_up_volume_size( + parent.sizeVirt + meta_overhead + bitmap_overhead + ) + # TODO: Check result. + return virtual_size - self._linstor.get_volume_size(parent.uuid) + + def _hasValidDevicePath(self, uuid): + try: + self._linstor.get_device_path(uuid) + except Exception: + # TODO: Maybe log exception. + return False + return True + + def _handleInterruptedCoalesceLeaf(self): + entries = self.journaler.get_all(VDI.JRN_LEAF) + for uuid, parentUuid in entries.iteritems(): + if self._hasValidDevicePath(parentUuid) or \ + self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): + self._undoInterruptedCoalesceLeaf(uuid, parentUuid) + else: + self._finishInterruptedCoalesceLeaf(uuid, parentUuid) + self.journaler.remove(VDI.JRN_LEAF, uuid) + vdi = self.getVDI(uuid) + if vdi: + vdi.ensureUnpaused() + + def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): + Util.log('*** UNDO LEAF-COALESCE') + parent = self.getVDI(parentUuid) + if not parent: + parent = self.getVDI(childUuid) + if not parent: + raise util.SMException( + 'Neither {} nor {} found'.format(parentUuid, childUuid) + ) + Util.log( + 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) + ) + parent.rename(parentUuid) + util.fistpoint.activate('LVHDRT_coaleaf_undo_after_rename', self.uuid) + + child = self.getVDI(childUuid) + if not child: + child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) + if not child: + raise util.SMException( + 'Neither {} nor {} found'.format( + childUuid, self.TMP_RENAME_PREFIX + childUuid + ) + ) + Util.log('Renaming child back to {}'.format(childUuid)) + child.rename(childUuid) + Util.log('Updating the VDI record') + child.setConfig(VDI.DB_VHD_PARENT, parentUuid) + child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) + util.fistpoint.activate( + 'LVHDRT_coaleaf_undo_after_rename2', self.uuid + ) + + # TODO: Maybe deflate here. + + if child.hidden: + child._setHidden(False) + if not parent.hidden: + parent._setHidden(True) + self._updateSlavesOnUndoLeafCoalesce(parent, child) + util.fistpoint.activate('LVHDRT_coaleaf_undo_end', self.uuid) + Util.log('*** leaf-coalesce undo successful') + if util.fistpoint.is_active('LVHDRT_coaleaf_stop_after_recovery'): + child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) + + def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): + Util.log('*** FINISH LEAF-COALESCE') + vdi = self.getVDI(childUuid) + if not vdi: + raise util.SMException('VDI {} not found'.format(childUuid)) + # TODO: Maybe inflate. + try: + self.forgetVDI(parentUuid) + except XenAPI.Failure: + pass + self._updateSlavesOnResize(vdi) + util.fistpoint.activate('LVHDRT_coaleaf_finish_end', self.uuid) + Util.log('*** finished leaf-coalesce successfully') + + def _checkSlaves(self, vdi): + try: + states = self._linstor.get_usage_states(vdi.uuid) + for node_name, state in states.items(): + self._checkSlave(node_name, vdi, state) + except LinstorVolumeManagerError as e: + if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: + raise + + @staticmethod + def _checkSlave(node_name, vdi, state): + # If state is None, LINSTOR doesn't know the host state + # (bad connection?). + if state is None: + raise util.SMException( + 'Unknown state for VDI {} on {}'.format(vdi.uuid, node_name) + ) + + if state: + raise util.SMException( + 'VDI {} is in use on {}'.format(vdi.uuid, node_name) + ) + + ################################################################################ # # Helpers @@ -2992,7 +3348,9 @@ def normalizeType(type): "xfs", "zfs" ]: type = SR.TYPE_FILE - if not type in SR.TYPES: + if type in ["linstor"]: + type = SR.TYPE_LINSTOR + if type not in SR.TYPES: raise util.SMException("Unsupported SR type: %s" % type) return type diff --git a/drivers/linstor-manager b/drivers/linstor-manager new file mode 100755 index 000000000..f7ce18099 --- /dev/null +++ b/drivers/linstor-manager @@ -0,0 +1,272 @@ +#!/usr/bin/env python +# +# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import base64 +import distutils.util +import subprocess +import sys +import XenAPIPlugin + +sys.path.append('/opt/xensource/sm/') +from linstorjournaler import LinstorJournaler +from linstorvolumemanager import LinstorVolumeManager +from lock import Lock +import json +import LinstorSR +import util +import vhdutil + + +FIREWALL_PORT_SCRIPT = '/etc/xapi.d/plugins/firewall-port' +LINSTOR_PORTS = [3366, 3370, 3376, 3377, '7000:8000'] + + +def get_linstor_uri(session): + return 'linstor://{}'.format(util.get_master_rec(session)['address']) + + +def update_port(port, open): + fn = 'open' if open else 'close' + args = ( + FIREWALL_PORT_SCRIPT, fn, str(port), 'tcp' + ) + + (ret, out, err) = util.doexec(args) + if ret == 0: + return + raise Exception('Failed to {} port: {} {}'.format(fn, out, err)) + + +def update_all_ports(open): + for port in LINSTOR_PORTS: + update_port(port, open) + + +def update_service(start): + fn = 'enable' if start else 'disable' + args = ('systemctl', fn, '--now', 'linstor-satellite') + (ret, out, err) = util.doexec(args) + if ret == 0: + return + raise Exception('Failed to {} satellite: {} {}'.format(fn, out, err)) + + +def enable(session, args): + try: + enabled = distutils.util.strtobool(args['enabled']) + update_all_ports(open=enabled) + update_service(start=enabled) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:disable error: {}'.format(e)) + return str(False) + + +def attach(session, args): + try: + sr_uuid = args['srUuid'] + vdi_uuid = args['vdiUuid'] + group_name = args['groupName'] + + linstor_uri = get_linstor_uri(session) + journaler = LinstorJournaler( + linstor_uri, group_name, logger=util.SMlog + ) + linstor = LinstorVolumeManager( + linstor_uri, + group_name, + logger=util.SMlog + ) + LinstorSR.attach_thin(session, journaler, linstor, sr_uuid, vdi_uuid) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:attach error: {}'.format(e)) + return str(False) + + +def detach(session, args): + try: + sr_uuid = args['srUuid'] + vdi_uuid = args['vdiUuid'] + group_name = args['groupName'] + + linstor = LinstorVolumeManager( + get_linstor_uri(session), + group_name, + logger=util.SMlog + ) + LinstorSR.detach_thin(session, linstor, sr_uuid, vdi_uuid) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:detach error: {}'.format(e)) + return str(False) + + +def check(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.check(device_path)) + except Exception as e: + util.SMlog('linstor-manager:check error: {}'.format(e)) + raise + + +def get_vhd_info(session, args): + try: + device_path = args['devicePath'] + group_name = args['groupName'] + include_parent = distutils.util.strtobool(args['includeParent']) + + linstor = LinstorVolumeManager( + get_linstor_uri(session), + group_name, + logger=util.SMlog + ) + + def extract_uuid(device_path): + # TODO: Remove new line in the vhdutil module. Not here. + return linstor.get_volume_uuid_from_device_path( + device_path.rstrip('\n') + ) + + vhd_info = vhdutil.getVHDInfo( + device_path, extract_uuid, include_parent + ) + return json.dumps(vhd_info.__dict__) + except Exception as e: + util.SMlog('linstor-manager:get_vhd_info error: {}'.format(e)) + raise + + +def has_parent(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.hasParent(device_path)) + except Exception as e: + util.SMlog('linstor-manager:has_parent error: {}'.format(e)) + raise + + +def get_parent(session, args): + try: + device_path = args['devicePath'] + group_name = args['groupName'] + + linstor = LinstorVolumeManager( + get_linstor_uri(session), + group_name, + logger=util.SMlog + ) + + def extract_uuid(device_path): + # TODO: Remove new line in the vhdutil module. Not here. + return linstor.get_volume_uuid_from_device_path( + device_path.rstrip('\n') + ) + + return vhdutil.getParent(device_path, extract_uuid) + except Exception as e: + util.SMlog('linstor-manager:get_parent error: {}'.format(e)) + raise + + +def get_size_virt(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.getSizeVirt(device_path)) + except Exception as e: + util.SMlog('linstor-manager:get_size_virt error: {}'.format(e)) + raise + + +def get_size_phys(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.getSizePhys(device_path)) + except Exception as e: + util.SMlog('linstor-manager:get_size_phys error: {}'.format(e)) + raise + + +def get_depth(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.getDepth(device_path)) + except Exception as e: + util.SMlog('linstor-manager:get_depth error: {}'.format(e)) + raise + + +def get_key_hash(session, args): + try: + device_path = args['devicePath'] + return vhdutil.getKeyHash(device_path) or '' + except Exception as e: + util.SMlog('linstor-manager:get_key_hash error: {}'.format(e)) + raise + + +def get_block_bitmap(session, args): + try: + device_path = args['devicePath'] + return base64.b64encode(vhdutil.getBlockBitmap(device_path)) or '' + except Exception as e: + util.SMlog('linstor-manager:get_block_bitmap error: {}'.format(e)) + raise + + +def lock_vdi(session, args): + lock = None + try: + sr_uuid = args['srUuid'] + vdi_uuid = args['vdiUuid'] + group_name = args['groupName'] + locked = distutils.util.strtobool(args['locked']) + + lock = Lock(vhdutil.LOCK_TYPE_SR, sr_uuid) + + linstor = LinstorVolumeManager( + get_linstor_uri(session), + group_name, + logger=util.SMlog + ) + linstor.lock_volume(vdi_uuid, locked) + + return str(True) + except Exception as e: + util.SMlog('linstor-manager:lock_vdi error: {}'.format(e)) + finally: + if lock: + lock.release() + return str(False) + + +if __name__ == '__main__': + XenAPIPlugin.dispatch({ + 'enable': enable, + 'attach': attach, + 'detach': detach, + 'check': check, + 'getVHDInfo': get_vhd_info, + 'hasParent': has_parent, + 'getParent': get_parent, + 'getSizeVirt': get_size_virt, + 'getSizePhys': get_size_phys, + 'getDepth': get_depth, + 'getKeyHash': get_key_hash, + 'getBlockBitmap': get_block_bitmap, + 'lockVdi': lock_vdi + }) diff --git a/drivers/linstorjournaler.py b/drivers/linstorjournaler.py new file mode 100755 index 000000000..749533056 --- /dev/null +++ b/drivers/linstorjournaler.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python +# +# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +from linstorvolumemanager import LinstorVolumeManager +import linstor +import re +import util + + +class LinstorJournalerError(Exception): + pass + +# ============================================================================== + + +class LinstorJournaler: + """ + Simple journaler that uses LINSTOR properties for persistent "storage". + A journal is a id-value pair, and there can be only one journal for a + given id. An identifier is juste a transaction name. + """ + + REG_TYPE = re.compile('^([^/]+)$') + REG_TRANSACTION = re.compile('^[^/]+/([^/]+)$') + + """ + Types of transaction in the journal. + """ + CLONE = 'clone' + INFLATE = 'inflate' + + @staticmethod + def default_logger(*args): + print(args) + + def __init__(self, uri, group_name, logger=default_logger.__func__): + self._namespace = '{}journal/'.format( + LinstorVolumeManager._build_sr_namespace() + ) + + def connect(): + self._journal = linstor.KV( + LinstorVolumeManager._build_group_name(group_name), + uri=uri, + namespace=self._namespace + ) + + util.retry( + connect, + maxretry=60, + exceptions=[linstor.errors.LinstorNetworkError] + ) + self._logger = logger + + def create(self, type, identifier, value): + # TODO: Maybe rename to 'add' in the future (in Citrix code too). + + key = self._get_key(type, identifier) + + # 1. Ensure transaction doesn't exist. + current_value = self.get(type, identifier) + if current_value is not None: + raise LinstorJournalerError( + 'Journal transaction already exists for \'{}:{}\': {}' + .format(type, identifier, current_value) + ) + + # 2. Write! + try: + self._reset_namespace() + self._logger( + 'Create journal transaction \'{}:{}\''.format(type, identifier) + ) + self._journal[key] = str(value) + except Exception as e: + try: + self._journal.pop(key, 'empty') + except Exception as e2: + self._logger( + 'Failed to clean up failed journal write: {} (Ignored)' + .format(e2) + ) + + raise LinstorJournalerError( + 'Failed to write to journal: {}'.format(e) + ) + + def remove(self, type, identifier): + key = self._get_key(type, identifier) + try: + self._reset_namespace() + self._logger( + 'Destroy journal transaction \'{}:{}\'' + .format(type, identifier) + ) + self._journal.pop(key) + except Exception as e: + raise LinstorJournalerError( + 'Failed to remove transaction \'{}:{}\': {}' + .format(type, identifier, e) + ) + + def get(self, type, identifier): + return self._journal.get(self._get_key(type, identifier)) + + def get_all(self, type): + entries = {} + + self._journal.namespace = self._namespace + '{}/'.format(type) + for (key, value) in self._journal.items(): + res = self.REG_TYPE.match(key) + if res: + identifier = res.groups()[0] + entries[identifier] = value + return entries + + # Added to compatibility with Citrix API. + def getAll(self, type): + return self.get_all(type) + + def has_entries(self, identifier): + self._reset_namespace() + for (key, value) in self._journal.items(): + res = self.REG_TRANSACTION.match(key) + if res: + current_identifier = res.groups()[0] + if current_identifier == identifier: + return True + return False + + # Added to compatibility with Citrix API. + def hasJournals(self, identifier): + return self.has_entries(identifier) + + def _reset_namespace(self): + self._journal.namespace = self._namespace + + @staticmethod + def _get_key(type, identifier): + return '{}/{}'.format(type, identifier) diff --git a/drivers/linstorvhdutil.py b/drivers/linstorvhdutil.py new file mode 100644 index 000000000..f31c75258 --- /dev/null +++ b/drivers/linstorvhdutil.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python +# +# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import base64 +import distutils.util +import errno +import json +import socket +import util +import vhdutil +import xs_errors + +MANAGER_PLUGIN = 'linstor-manager' + + +def linstorhostcall(local_method, remote_method): + def decorated(func): + def wrapper(*args, **kwargs): + self = args[0] + vdi_uuid = args[1] + + device_path = self._linstor.build_device_path( + self._linstor.get_volume_name(vdi_uuid) + ) + + # A. Try a call using directly the DRBD device to avoid + # remote request. + + # Try to read locally if the device is not in use or if the device + # is up to date and not diskless. + (node_names, in_use) = \ + self._linstor.find_up_to_date_diskfull_nodes(vdi_uuid) + + try: + if not in_use or socket.gethostname() in node_names: + return local_method(device_path, *args[2:], **kwargs) + except util.CommandException as e: + # EMEDIUMTYPE constant (124) is not available in python2. + if e.code != errno.EROFS and e.code != 124: + raise + + # B. Execute the plugin on master or slave. + def exec_remote_method(): + host_ref = self._get_readonly_host( + vdi_uuid, device_path, node_names + ) + args = { + 'devicePath': device_path, + 'groupName': self._linstor.group_name + } + args.update(**kwargs) + + try: + response = self._session.xenapi.host.call_plugin( + host_ref, MANAGER_PLUGIN, remote_method, args + ) + except Exception as e: + util.SMlog('call-plugin ({} with {}) exception: {}'.format( + remote_method, args, e + )) + raise + + util.SMlog('call-plugin ({} with {}) returned: {}'.format( + remote_method, args, response + )) + if response == 'False': + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Plugin {} failed'.format(MANAGER_PLUGIN) + ) + kwargs['response'] = response + + util.retry(exec_remote_method, 5, 3) + return func(*args, **kwargs) + return wrapper + return decorated + + +class LinstorVhdUtil: + def __init__(self, session, linstor): + self._session = session + self._linstor = linstor + + @linstorhostcall(vhdutil.check, 'check') + def check(self, vdi_uuid, **kwargs): + return distutils.util.strtobool(kwargs['response']) + + def get_vhd_info(self, vdi_uuid, include_parent=True): + kwargs = {'includeParent': str(include_parent)} + return self._get_vhd_info(vdi_uuid, self._extract_uuid, **kwargs) + + @linstorhostcall(vhdutil.getVHDInfo, 'getVHDInfo') + def _get_vhd_info(self, vdi_uuid, *args, **kwargs): + obj = json.loads(kwargs['response']) + + vhd_info = vhdutil.VHDInfo(vdi_uuid) + vhd_info.sizeVirt = obj['sizeVirt'] + vhd_info.sizePhys = obj['sizePhys'] + if 'parentPath' in obj: + vhd_info.parentPath = obj['parentPath'] + vhd_info.parentUuid = obj['parentUuid'] + vhd_info.hidden = obj['hidden'] + vhd_info.path = obj['path'] + + return vhd_info + + @linstorhostcall(vhdutil.hasParent, 'hasParent') + def has_parent(self, vdi_uuid, **kwargs): + return distutils.util.strtobool(kwargs['response']) + + def get_parent(self, vdi_uuid): + return self._get_parent(vdi_uuid, self._extract_uuid) + + @linstorhostcall(vhdutil.getParent, 'getParent') + def _get_parent(self, vdi_uuid, *args, **kwargs): + return kwargs['response'] + + @linstorhostcall(vhdutil.getSizeVirt, 'getSizeVirt') + def get_size_virt(self, vdi_uuid, **kwargs): + return int(kwargs['response']) + + @linstorhostcall(vhdutil.getSizePhys, 'getSizePhys') + def get_size_phys(self, vdi_uuid, **kwargs): + return int(kwargs['response']) + + @linstorhostcall(vhdutil.getDepth, 'getDepth') + def get_depth(self, vdi_uuid, **kwargs): + return int(kwargs['response']) + + @linstorhostcall(vhdutil.getKeyHash, 'getKeyHash') + def get_key_hash(self, vdi_uuid, **kwargs): + return kwargs['response'] or None + + @linstorhostcall(vhdutil.getBlockBitmap, 'getBlockBitmap') + def get_block_bitmap(self, vdi_uuid, **kwargs): + return base64.b64decode(kwargs['response']) + + # -------------------------------------------------------------------------- + # Helpers. + # -------------------------------------------------------------------------- + + def _extract_uuid(self, device_path): + # TODO: Remove new line in the vhdutil module. Not here. + return self._linstor.get_volume_uuid_from_device_path( + device_path.rstrip('\n') + ) + + def _get_readonly_host(self, vdi_uuid, device_path, node_names): + """ + When vhd-util is called to fetch VDI info we must find a + diskfull DRBD disk to read the data. It's the goal of this function. + Why? Because when a VHD is open in RO mode, the LVM layer is used + directly to bypass DRBD verifications (we can have only one process + that reads/writes to disk with DRBD devices). + """ + + if not node_names: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Unable to find diskfull node: {} (path={})' + .format(vdi_uuid, device_path) + ) + + hosts = self._session.xenapi.host.get_all_records() + for host_ref, host_record in hosts.items(): + if host_record['hostname'] in node_names: + return host_ref + + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Unable to find a valid host from VDI: {} (path={})' + .format(vdi_uuid, device_path) + ) diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py new file mode 100755 index 000000000..d40042179 --- /dev/null +++ b/drivers/linstorvolumemanager.py @@ -0,0 +1,1713 @@ +#!/usr/bin/env python +# +# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +import json +import linstor +import os.path +import re +import socket +import time +import util + + +def round_up(value, divisor): + assert divisor + divisor = int(divisor) + return int((int(value) + divisor - 1) / divisor) * divisor + + +def round_down(value, divisor): + assert divisor + value = int(value) + return value - (value % int(divisor)) + + +class LinstorVolumeManagerError(Exception): + ERR_GENERIC = 0, + ERR_VOLUME_EXISTS = 1, + ERR_VOLUME_NOT_EXISTS = 2 + + def __init__(self, message, code=ERR_GENERIC): + super(LinstorVolumeManagerError, self).__init__(message) + self._code = code + + @property + def code(self): + return self._code + +# ============================================================================== + +# Note: +# If a storage pool is not accessible after a network change: +# linstor node interface modify default --ip + + +class LinstorVolumeManager(object): + """ + API to manager LINSTOR volumes in XCP-ng. + A volume in this context is a physical part of the storage layer. + """ + + DEV_ROOT_PATH = '/dev/drbd/by-res/' + + # Default LVM extent size. + BLOCK_SIZE = 4 * 1024 * 1024 + + # List of volume properties. + PROP_METADATA = 'metadata' + PROP_NOT_EXISTS = 'not-exists' + PROP_VOLUME_NAME = 'volume-name' + PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp' + + # A volume can only be locked for a limited duration. + # The goal is to give enough time to slaves to execute some actions on + # a device before an UUID update or a coalesce for example. + # Expiration is expressed in seconds. + LOCKED_EXPIRATION_DELAY = 1 * 60 + + # Used when volume uuid is being updated. + PROP_UPDATING_UUID_SRC = 'updating-uuid-src' + + # States of property PROP_NOT_EXISTS. + STATE_EXISTS = '0' + STATE_NOT_EXISTS = '1' + STATE_CREATING = '2' + + # Property namespaces. + NAMESPACE_SR = 'xcp/sr' + NAMESPACE_VOLUME = 'volume' + + # Regex to match properties. + REG_PROP = '^([^/]+)/{}$' + + REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA)) + REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS)) + REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME)) + REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC)) + + # Prefixes of SR/VOLUME in the LINSTOR DB. + # A LINSTOR (resource, group, ...) name cannot start with a number. + # So we add a prefix behind our SR/VOLUME uuids. + PREFIX_SR = 'xcp-sr-' + PREFIX_VOLUME = 'xcp-volume-' + + @staticmethod + def default_logger(*args): + print(args) + + # -------------------------------------------------------------------------- + # API. + # -------------------------------------------------------------------------- + + class VolumeInfo(object): + __slots__ = ( + 'name', + 'physical_size', # Total physical size used by this volume on + # all disks. + 'virtual_size' # Total virtual available size of this volume + # (i.e. the user size at creation). + ) + + def __init__(self, name): + self.name = name + self.physical_size = 0 + self.virtual_size = 0 + + def __repr__(self): + return 'VolumeInfo("{}", {}, {})'.format( + self.name, self.physical_size, self.virtual_size + ) + + # -------------------------------------------------------------------------- + + def __init__( + self, uri, group_name, repair=False, logger=default_logger.__func__ + ): + """ + Create a new LinstorApi object. + :param str uri: URI to communicate with the LINSTOR controller. + :param str group_name: The SR goup name to use. + :param bool repair: If true we try to remove bad volumes due to a crash + or unexpected behavior. + :param function logger: Function to log messages. + """ + + self._uri = uri + self._linstor = self._create_linstor_instance(uri) + self._base_group_name = group_name + + # Ensure group exists. + group_name = self._build_group_name(group_name) + groups = self._linstor.resource_group_list_raise([group_name]) + groups = groups.resource_groups + if not groups: + raise LinstorVolumeManagerError( + 'Unable to find `{}` Linstor SR'.format(group_name) + ) + + # Ok. ;) + self._logger = logger + self._redundancy = groups[0].select_filter.place_count + self._group_name = group_name + self._build_volumes(repair=repair) + + @property + def group_name(self): + """ + Give the used group name. + :return: The group name. + :rtype: str + """ + return self._base_group_name + + @property + def volumes(self): + """ + Give the volumes uuid set. + :return: The volumes uuid set. + :rtype: set(str) + """ + return self._volumes + + @property + def volumes_with_name(self): + """ + Give a volume dictionnary that contains names actually owned. + :return: A volume/name dict. + :rtype: dict(str, str) + """ + return self._get_volumes_by_property(self.REG_VOLUME_NAME) + + @property + def volumes_with_info(self): + """ + Give a volume dictionnary that contains VolumeInfos. + :return: A volume/VolumeInfo dict. + :rtype: dict(str, VolumeInfo) + """ + + volumes = {} + + all_volume_info = self._get_volumes_info() + volume_names = self.volumes_with_name + for volume_uuid, volume_name in volume_names.items(): + if volume_name: + volume_info = all_volume_info.get(volume_name) + if volume_info: + volumes[volume_uuid] = volume_info + continue + + # Well I suppose if this volume is not available, + # LINSTOR has been used directly without using this API. + volumes[volume_uuid] = self.VolumeInfo('') + + return volumes + + @property + def volumes_with_metadata(self): + """ + Give a volume dictionnary that contains metadata. + :return: A volume/metadata dict. + :rtype: dict(str, dict) + """ + + volumes = {} + + metadata = self._get_volumes_by_property(self.REG_METADATA) + for volume_uuid, volume_metadata in metadata.items(): + if volume_metadata: + volume_metadata = json.loads(volume_metadata) + if isinstance(volume_metadata, dict): + volumes[volume_uuid] = volume_metadata + continue + raise LinstorVolumeManagerError( + 'Expected dictionary in volume metadata: {}' + .format(volume_uuid) + ) + + volumes[volume_uuid] = {} + + return volumes + + @property + def max_volume_size_allowed(self): + """ + Give the max volume size currently available in B. + :return: The current size. + :rtype: int + """ + + candidates = self._find_best_size_candidates() + if not candidates: + raise LinstorVolumeManagerError( + 'Failed to get max volume size allowed' + ) + + size = candidates[0].max_volume_size + if size < 0: + raise LinstorVolumeManagerError( + 'Invalid max volume size allowed given: {}'.format(size) + ) + return self.round_down_volume_size(size * 1024) + + @property + def physical_size(self): + """ + Give the total physical size of the SR. + :return: The physical size. + :rtype: int + """ + return self._compute_size('total_capacity') + + @property + def physical_free_size(self): + """ + Give the total free physical size of the SR. + :return: The physical free size. + :rtype: int + """ + return self._compute_size('free_capacity') + + @property + def total_allocated_volume_size(self): + """ + Give the sum of all created volumes. + :return: The physical required size to use the volumes. + :rtype: int + """ + + size = 0 + for resource in self._linstor.resource_list_raise().resources: + for volume in resource.volumes: + # We ignore diskless pools of the form "DfltDisklessStorPool". + if volume.storage_pool_name == self._group_name: + current_size = volume.usable_size + if current_size < 0: + raise LinstorVolumeManagerError( + 'Failed to get usable size of `{}` on `{}`' + .format(resource.name, volume.storage_pool_name) + ) + size += current_size + return size * 1024 + + @property + def metadata(self): + """ + Get the metadata of the SR. + :return: Dictionary that contains metadata. + :rtype: dict(str, dict) + """ + + sr_properties = self._get_sr_properties() + metadata = sr_properties.get(self.PROP_METADATA) + if metadata is not None: + metadata = json.loads(metadata) + if isinstance(metadata, dict): + return metadata + raise LinstorVolumeManagerError( + 'Expected dictionary in SR metadata: {}'.format( + self._group_name + ) + ) + + return {} + + @metadata.setter + def metadata(self, metadata): + """ + Set the metadata of the SR. + :param dict metadata: Dictionary that contains metadata. + """ + + assert isinstance(metadata, dict) + sr_properties = self._get_sr_properties() + sr_properties[self.PROP_METADATA] = json.dumps(metadata) + + @property + def disconnected_hosts(self): + """ + Get the list of disconnected hosts. + :return: Set that contains disconnected hosts. + :rtype: set(str) + """ + + pools = self._linstor.storage_pool_list_raise( + filter_by_stor_pools=[self._group_name] + ).storage_pools + + disconnected_hosts = set() + for pool in pools: + for report in pool.reports: + if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \ + linstor.consts.WARN_NOT_CONNECTED: + disconnected_hosts.add(pool.node_name) + break + return disconnected_hosts + + def check_volume_exists(self, volume_uuid): + """ + Check if a volume exists in the SR. + :return: True if volume exists. + :rtype: bool + """ + return volume_uuid in self._volumes + + def create_volume(self, volume_uuid, size, persistent=True): + """ + Create a new volume on the SR. + :param str volume_uuid: The volume uuid to use. + :param int size: volume size in B. + :param bool persistent: If false the volume will be unavailable + on the next constructor call LinstorSR(...). + :return: The current device path of the volume. + :rtype: str + """ + + self._logger('Creating LINSTOR volume {}...'.format(volume_uuid)) + volume_name = self.build_volume_name(util.gen_uuid()) + volume_properties = self._create_volume_with_properties( + volume_uuid, volume_name, size, place_resources=True + ) + + try: + self._logger( + 'Find device path of LINSTOR volume {}...'.format(volume_uuid) + ) + device_path = self._find_device_path(volume_uuid, volume_name) + if persistent: + volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS + self._volumes.add(volume_uuid) + self._logger( + 'LINSTOR volume {} created!'.format(volume_uuid) + ) + return device_path + except Exception: + self._force_destroy_volume(volume_uuid, volume_properties) + raise + + def mark_volume_as_persistent(self, volume_uuid): + """ + Mark volume as persistent if created with persistent=False. + :param str volume_uuid: The volume uuid to mark. + """ + + self._ensure_volume_exists(volume_uuid) + + # Mark volume as persistent. + volume_properties = self._get_volume_properties(volume_uuid) + volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS + + def destroy_volume(self, volume_uuid): + """ + Destroy a volume. + :param str volume_uuid: The volume uuid to destroy. + """ + + self._ensure_volume_exists(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + + # Mark volume as destroyed. + volume_properties = self._get_volume_properties(volume_uuid) + volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS + + self._volumes.remove(volume_uuid) + self._destroy_volume(volume_uuid, volume_properties) + + def lock_volume(self, volume_uuid, locked=True): + """ + Prevent modifications of the volume properties during + "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked + when used. This method is useful to attach/detach correctly a volume on + a slave. Without it the GC can rename a volume, in this case the old + volume path can be used by a slave... + :param str volume_uuid: The volume uuid to protect/unprotect. + :param bool locked: Lock/unlock the volume. + """ + + self._ensure_volume_exists(volume_uuid) + + self._logger( + '{} volume {} as locked'.format( + 'Mark' if locked else 'Unmark', + volume_uuid + ) + ) + + volume_properties = self._get_volume_properties(volume_uuid) + if locked: + volume_properties[ + self.PROP_IS_READONLY_TIMESTAMP + ] = str(time.time()) + elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties: + volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) + + def ensure_volume_is_not_locked(self, volume_uuid, timeout=None): + """ + Ensure a volume is not locked. Wait if necessary. + :param str volume_uuid: The volume uuid to check. + :param int timeout: If the volume is always locked after the expiration + of the timeout, an exception is thrown. + """ + return self.ensure_volume_list_is_not_locked([volume_uuid], timeout) + + def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None): + checked = set() + for volume_uuid in volume_uuids: + if volume_uuid in self._volumes: + checked.add(volume_uuid) + + if not checked: + return + + waiting = False + + start = time.time() + while True: + # Can't delete in for loop, use a copy of the list. + remaining = checked.copy() + for volume_uuid in checked: + volume_properties = self._get_volume_properties(volume_uuid) + timestamp = volume_properties.get( + self.PROP_IS_READONLY_TIMESTAMP + ) + if timestamp is None: + remaining.remove(volume_uuid) + continue + + now = time.time() + if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY: + self._logger( + 'Remove readonly timestamp on {}'.format(volume_uuid) + ) + volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) + remaining.remove(volume_uuid) + continue + + if not waiting: + self._logger( + 'Volume {} is locked, waiting...'.format(volume_uuid) + ) + waiting = True + break + + if not remaining: + break + checked = remaining + + if timeout is not None and now - start > timeout: + raise LinstorVolumeManagerError( + 'volume `{}` is locked and timeout has been reached' + .format(volume_uuid), + LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS + ) + + # We must wait to use the volume. After that we can modify it + # ONLY if the SR is locked to avoid bad reads on the slaves. + time.sleep(1) + + if waiting: + self._logger('No volume locked now!') + + def introduce_volume(self, volume_uuid): + pass # TODO: Implement me. + + def resize_volume(self, volume_uuid, new_size): + """ + Resize a volume. + :param str volume_uuid: The volume uuid to resize. + :param int new_size: New size in B. + """ + + volume_name = self.get_volume_name(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + new_size = self.round_up_volume_size(new_size) + + result = self._linstor.volume_dfn_modify( + rsc_name=volume_name, + volume_nr=0, + size=new_size / 1024 + ) + error_str = self._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Could not resize volume `{}` from SR `{}`: {}' + .format(volume_uuid, self._group_name, error_str) + ) + + def get_volume_name(self, volume_uuid): + """ + Get the name of a particular volume. + :param str volume_uuid: The volume uuid of the name to get. + :return: The volume name. + :rtype: str + """ + + self._ensure_volume_exists(volume_uuid) + volume_properties = self._get_volume_properties(volume_uuid) + volume_name = volume_properties.get(self.PROP_VOLUME_NAME) + if volume_name: + return volume_name + raise LinstorVolumeManagerError( + 'Failed to get volume name of {}'.format(volume_uuid) + ) + + def get_volume_size(self, volume_uuid): + """ + Get the size of a particular volume. + :param str volume_uuid: The volume uuid of the size to get. + :return: The volume size. + :rtype: int + """ + + volume_name = self.get_volume_name(volume_uuid) + dfns = self._linstor.resource_dfn_list_raise( + query_volume_definitions=True, + filter_by_resource_definitions=[volume_name] + ).resource_definitions + + size = dfns[0].volume_definitions[0].size + if size < 0: + raise LinstorVolumeManagerError( + 'Failed to get volume size of: {}'.format(volume_uuid) + ) + return size * 1024 + + def get_volume_info(self, volume_uuid): + """ + Get the volume info of a particular volume. + :param str volume_uuid: The volume uuid of the volume info to get. + :return: The volume info. + :rtype: VolumeInfo + """ + + volume_name = self.get_volume_name(volume_uuid) + return self._get_volumes_info(filter=[volume_name])[volume_name] + + def get_device_path(self, volume_uuid): + """ + Get the dev path of a volume. + :param str volume_uuid: The volume uuid to get the dev path. + :return: The current device path of the volume. + :rtype: str + """ + + volume_name = self.get_volume_name(volume_uuid) + return self._find_device_path(volume_uuid, volume_name) + + def get_volume_uuid_from_device_path(self, device_path): + """ + Get the volume uuid of a device_path. + :param str device_path: The dev path to find the volume uuid. + :return: The volume uuid of the local device path. + :rtype: str + """ + + expected_volume_name = \ + self.get_volume_name_from_device_path(device_path) + + volume_names = self.volumes_with_name + for volume_uuid, volume_name in volume_names.items(): + if volume_name == expected_volume_name: + return volume_uuid + + raise LinstorVolumeManagerError( + 'Unable to find volume uuid from dev path `{}`'.format(device_path) + ) + + def get_volume_name_from_device_path(self, device_path): + """ + Get the volume name of a device_path on the current host. + :param str device_path: The dev path to find the volume name. + :return: The volume name of the local device path. + :rtype: str + """ + + node_name = socket.gethostname() + resources = self._linstor.resource_list_raise( + filter_by_nodes=[node_name] + ).resources + + real_device_path = os.path.realpath(device_path) + for resource in resources: + if resource.volumes[0].device_path == real_device_path: + return resource.name + + raise LinstorVolumeManagerError( + 'Unable to find volume name from dev path `{}`' + .format(device_path) + ) + + def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False): + """ + Change the uuid of a volume. + :param str volume_uuid: The volume to modify. + :param str new_volume_uuid: The new volume uuid to use. + :param bool force: If true we doesn't check if volume_uuid is in the + volume list. I.e. the volume can be marked as deleted but the volume + can still be in the LINSTOR KV store if the deletion has failed. + In specific cases like "undo" after a failed clone we must rename a bad + deleted VDI. + """ + + self._logger( + 'Trying to update volume UUID {} to {}...' + .format(volume_uuid, new_volume_uuid) + ) + if not force: + self._ensure_volume_exists(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + + if new_volume_uuid in self._volumes: + raise LinstorVolumeManagerError( + 'Volume `{}` already exists'.format(new_volume_uuid), + LinstorVolumeManagerError.ERR_VOLUME_EXISTS + ) + + volume_properties = self._get_volume_properties(volume_uuid) + if volume_properties.get(self.PROP_UPDATING_UUID_SRC): + raise LinstorVolumeManagerError( + 'Cannot update volume uuid {}: invalid state' + .format(volume_uuid) + ) + + new_volume_properties = self._get_volume_properties( + new_volume_uuid + ) + if list(new_volume_properties.items()): + raise LinstorVolumeManagerError( + 'Cannot update volume uuid {} to {}: ' + .format(volume_uuid, new_volume_uuid) + + 'this last one is not empty' + ) + + assert volume_properties.namespace != \ + new_volume_properties.namespace + + try: + # 1. Mark new volume properties with PROP_UPDATING_UUID_SRC. + # If we crash after that, the new properties can be removed + # properly. + new_volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS + new_volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid + + # 2. Copy the properties. + for property in [self.PROP_METADATA, self.PROP_VOLUME_NAME]: + new_volume_properties[property] = \ + volume_properties.get(property) + + # 3. Ok! + new_volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS + except Exception as e: + try: + new_volume_properties.clear() + except Exception as e: + self._logger( + 'Failed to clear new volume properties: {} (ignoring...)' + .format(e) + ) + raise LinstorVolumeManagerError( + 'Failed to copy volume properties: {}'.format(e) + ) + + try: + # 4. After this point, it's ok we can remove the + # PROP_UPDATING_UUID_SRC property and clear the src properties + # without problems. + volume_properties.clear() + new_volume_properties.pop(self.PROP_UPDATING_UUID_SRC) + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to clear volume properties ' + 'after volume uuid update: {}'.format(e) + ) + + self._volumes.remove(volume_uuid) + self._volumes.add(new_volume_uuid) + + self._logger( + 'UUID update succeeded of {} to {}! (properties={})' + .format( + volume_uuid, new_volume_uuid, + self._get_filtered_properties(new_volume_properties) + ) + ) + + def update_volume_name(self, volume_uuid, volume_name): + """ + Change the volume name of a volume. + :param str volume_uuid: The volume to modify. + :param str volume_name: The volume_name to use. + """ + + self._ensure_volume_exists(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + if not volume_name.startswith(self.PREFIX_VOLUME): + raise LinstorVolumeManagerError( + 'Volume name `{}` must be start with `{}`' + .format(volume_name, self.PREFIX_VOLUME) + ) + + if volume_name not in self._fetch_resource_names(): + raise LinstorVolumeManagerError( + 'Volume `{}` doesn\'t exist'.format(volume_name) + ) + + volume_properties = self._get_volume_properties(volume_uuid) + volume_properties[self.PROP_VOLUME_NAME] = volume_name + + def get_usage_states(self, volume_uuid): + """ + Check if a volume is currently used. + :param str volume_uuid: The volume uuid to check. + :return: A dictionnary that contains states. + :rtype: dict(str, bool or None) + """ + + states = {} + + volume_name = self.get_volume_name(volume_uuid) + for resource_state in self._linstor.resource_list_raise( + filter_by_resources=[volume_name] + ).resource_states: + states[resource_state.node_name] = resource_state.in_use + + return states + + def get_volume_metadata(self, volume_uuid): + """ + Get the metadata of a volume. + :return: Dictionary that contains metadata. + :rtype: dict + """ + + self._ensure_volume_exists(volume_uuid) + volume_properties = self._get_volume_properties(volume_uuid) + metadata = volume_properties.get(self.PROP_METADATA) + if metadata: + metadata = json.loads(metadata) + if isinstance(metadata, dict): + return metadata + raise LinstorVolumeManagerError( + 'Expected dictionary in volume metadata: {}' + .format(volume_uuid) + ) + return {} + + def set_volume_metadata(self, volume_uuid, metadata): + """ + Set the metadata of a volume. + :param dict metadata: Dictionary that contains metadata. + """ + + self._ensure_volume_exists(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + + assert isinstance(metadata, dict) + volume_properties = self._get_volume_properties(volume_uuid) + volume_properties[self.PROP_METADATA] = json.dumps(metadata) + + def update_volume_metadata(self, volume_uuid, metadata): + """ + Update the metadata of a volume. It modify only the given keys. + It doesn't remove unreferenced key instead of set_volume_metadata. + :param dict metadata: Dictionary that contains metadata. + """ + + self._ensure_volume_exists(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + + assert isinstance(metadata, dict) + volume_properties = self._get_volume_properties(volume_uuid) + + current_metadata = json.loads( + volume_properties.get(self.PROP_METADATA, '{}') + ) + if not isinstance(metadata, dict): + raise LinstorVolumeManagerError( + 'Expected dictionary in volume metadata: {}' + .format(volume_uuid) + ) + + for key, value in metadata.items(): + current_metadata[key] = value + volume_properties[self.PROP_METADATA] = json.dumps(current_metadata) + + def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True): + """ + Clone a volume. Do not copy the data, this method creates a new volume + with the same size. It tries to create the volume on the same host + than volume source. + :param str volume_uuid: The volume to clone. + :param str clone_uuid: The cloned volume. + :param bool persistent: If false the volume will be unavailable + on the next constructor call LinstorSR(...). + :return: The current device path of the cloned volume. + :rtype: str + """ + + volume_name = self.get_volume_name(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + + # 1. Find ideal nodes + size to use. + ideal_node_names, size = self._get_volume_node_names_and_size( + volume_name + ) + if size <= 0: + raise LinstorVolumeManagerError( + 'Invalid size of {} for volume `{}`'.format(size, volume_name) + ) + + # 2. Find the node(s) with the maximum space. + candidates = self._find_best_size_candidates() + if not candidates: + raise LinstorVolumeManagerError( + 'Unable to shallow clone volume `{}`, no free space found.' + ) + + # 3. Compute node names and search if we can try to clone + # on the same nodes than volume. + def find_best_nodes(): + for candidate in candidates: + for node_name in candidate.node_names: + if node_name in ideal_node_names: + return candidate.node_names + + node_names = find_best_nodes() + if not node_names: + node_names = candidates[0].node_names + + if len(node_names) < self._redundancy: + raise LinstorVolumeManagerError( + 'Unable to shallow clone volume `{}`, '.format(volume_uuid) + + '{} are required to clone, found: {}'.format( + self._redundancy, len(node_names) + ) + ) + + # 4. Compute resources to create. + clone_volume_name = self.build_volume_name(util.gen_uuid()) + diskless_node_names = self._get_node_names() + resources = [] + for node_name in node_names: + diskless_node_names.remove(node_name) + resources.append(linstor.ResourceData( + node_name=node_name, + rsc_name=clone_volume_name, + storage_pool=self._group_name + )) + for node_name in diskless_node_names: + resources.append(linstor.ResourceData( + node_name=node_name, + rsc_name=clone_volume_name, + diskless=True + )) + + # 5. Create resources! + def clean(properties): + try: + self._destroy_volume(clone_uuid, properties) + except Exception as e: + self._logger( + 'Unable to destroy volume {} after shallow clone fail: {}' + .format(clone_uuid, e) + ) + + def create(): + try: + volume_properties = self._create_volume_with_properties( + clone_uuid, clone_volume_name, size, + place_resources=False + ) + + result = self._linstor.resource_create(resources) + error_str = self._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Could not create cloned volume `{}` of `{}` from ' + 'SR `{}`: {}'.format( + clone_uuid, volume_uuid, self._group_name, + error_str + ) + ) + return volume_properties + except Exception: + clean(volume_properties) + raise + + # Retry because we can get errors like this: + # "Resource disappeared while waiting for it to be ready" or + # "Resource did not became ready on node 'XXX' within reasonable time, check Satellite for errors." + # in the LINSTOR server. + volume_properties = util.retry(create, maxretry=5) + + try: + device_path = self._find_device_path(clone_uuid, clone_volume_name) + if persistent: + volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS + self._volumes.add(clone_uuid) + return device_path + except Exception as e: + clean(volume_properties) + raise + + def remove_resourceless_volumes(self): + """ + Remove all volumes without valid or non-empty name + (i.e. without LINSTOR resource). It's different than + LinstorVolumeManager constructor that takes a `repair` param that + removes volumes with `PROP_NOT_EXISTS` to 1. + """ + + resource_names = self._fetch_resource_names() + for volume_uuid, volume_name in self.volumes_with_name.items(): + if not volume_name or volume_name not in resource_names: + self.destroy_volume(volume_uuid) + + def destroy(self, force=False): + """ + Destroy this SR. Object should not be used after that. + :param bool force: Try to destroy volumes before if true. + """ + + if (force): + for volume_uuid in self._volumes: + self.destroy_volume(volume_uuid) + + # TODO: Throw exceptions in the helpers below if necessary. + # TODO: What's the required action if it exists remaining volumes? + + self._destroy_resource_group(self._linstor, self._group_name) + + pools = self._linstor.storage_pool_list_raise( + filter_by_stor_pools=[self._group_name] + ).storage_pools + for pool in pools: + self._destroy_storage_pool( + self._linstor, pool.name, pool.node_name + ) + + def find_up_to_date_diskfull_nodes(self, volume_uuid): + """ + Find all nodes that contain a specific volume using diskfull disks. + The disk must be up to data to be used. + :param str volume_uuid: The volume to use. + :return: The available nodes. + :rtype: tuple(set(str), bool) + """ + + volume_name = self.get_volume_name(volume_uuid) + + in_use = False + node_names = set() + resource_list = self._linstor.resource_list_raise( + filter_by_resources=[volume_name] + ) + for resource_state in resource_list.resource_states: + volume_state = resource_state.volume_states[0] + if volume_state.disk_state == 'UpToDate': + node_names.add(resource_state.node_name) + if resource_state.in_use: + in_use = True + + return (node_names, in_use) + + @classmethod + def create_sr( + cls, uri, group_name, node_names, redundancy, + thin_provisioning=False, + logger=default_logger.__func__ + ): + """ + Create a new SR on the given nodes. + :param str uri: URI to communicate with the LINSTOR controller. + :param str group_name: The SR group_name to use. + :param list[str] node_names: String list of nodes. + :param int redundancy: How many copy of volumes should we store? + :param function logger: Function to log messages. + :return: A new LinstorSr instance. + :rtype: LinstorSr + """ + + # 1. Check if SR already exists. + lin = cls._create_linstor_instance(uri) + driver_pool_name = group_name + group_name = cls._build_group_name(group_name) + pools = lin.storage_pool_list_raise(filter_by_stor_pools=[group_name]) + + # TODO: Maybe if the SR already exists and if the nodes are the same, + # we can try to use it directly. + pools = pools.storage_pools + if pools: + existing_node_names = map(lambda pool: pool.node_name, pools) + raise LinstorVolumeManagerError( + 'Unable to create SR `{}`. It already exists on node(s): {}' + .format(group_name, existing_node_names) + ) + + if lin.resource_group_list_raise( + [group_name] + ).resource_groups: + raise LinstorVolumeManagerError( + 'Unable to create SR `{}`: The group name already exists' + .format(group_name) + ) + + if thin_provisioning: + driver_pool_parts = driver_pool_name.split('/') + if not len(driver_pool_parts) == 2: + raise LinstorVolumeManagerError( + 'Invalid group name using thin provisioning. ' + 'Expected format: \'VG/LV`\'' + ) + + # 2. Create storage pool on each node + resource group. + i = 0 + try: + # 2.a. Create storage pools. + while i < len(node_names): + node_name = node_names[i] + + result = lin.storage_pool_create( + node_name=node_name, + storage_pool_name=group_name, + storage_driver='LVM_THIN' if thin_provisioning else 'LVM', + driver_pool_name=driver_pool_name + ) + + error_str = cls._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Could not create SP `{}` on node `{}`: {}'.format( + group_name, + node_name, + error_str + ) + ) + i += 1 + + # 2.b. Create resource group. + result = lin.resource_group_create( + name=group_name, + place_count=redundancy, + storage_pool=group_name, + diskless_on_remaining=True + ) + error_str = cls._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Could not create RG `{}`: {}'.format( + group_name, error_str + ) + ) + + # 2.c. Create volume group. + result = lin.volume_group_create(group_name) + error_str = cls._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Could not create VG `{}`: {}'.format( + group_name, error_str + ) + ) + + # 3. Remove storage pools/resource/volume group in the case of errors. + except Exception as e: + try: + cls._destroy_resource_group(lin, group_name) + except Exception: + pass + j = 0 + i = min(i, len(node_names) - 1) + while j <= i: + try: + cls._destroy_storage_pool(lin, group_name, node_names[j]) + except Exception: + pass + j += 1 + raise e + + # 4. Return new instance. + instance = cls.__new__(cls) + instance._uri = uri + instance._linstor = lin + instance._logger = logger + instance._redundancy = redundancy + instance._group_name = group_name + instance._volumes = set() + return instance + + @classmethod + def build_device_path(cls, volume_name): + """ + Build a device path given a volume name. + :param str volume_name: The volume name to use. + :return: A valid or not device path. + :rtype: str + """ + + return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name) + + @classmethod + def build_volume_name(cls, base_name): + """ + Build a volume name given a base name (i.e. a UUID). + :param str volume_name: The volume name to use. + :return: A valid or not device path. + :rtype: str + """ + return '{}{}'.format(cls.PREFIX_VOLUME, base_name) + + @classmethod + def round_up_volume_size(cls, volume_size): + """ + Align volume size on higher multiple of BLOCK_SIZE. + :param int volume_size: The volume size to align. + :return: An aligned volume size. + :rtype: int + """ + return round_up(volume_size, cls.BLOCK_SIZE) + + @classmethod + def round_down_volume_size(cls, volume_size): + """ + Align volume size on lower multiple of BLOCK_SIZE. + :param int volume_size: The volume size to align. + :return: An aligned volume size. + :rtype: int + """ + return round_down(volume_size, cls.BLOCK_SIZE) + + # -------------------------------------------------------------------------- + # Private helpers. + # -------------------------------------------------------------------------- + + def _ensure_volume_exists(self, volume_uuid): + if volume_uuid not in self._volumes: + raise LinstorVolumeManagerError( + 'volume `{}` doesn\'t exist'.format(volume_uuid), + LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS + ) + + def _find_best_size_candidates(self): + result = self._linstor.resource_group_qmvs(self._group_name) + error_str = self._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Failed to get max volume size allowed of SR `{}`: {}'.format( + self._group_name, + error_str + ) + ) + return result[0].candidates + + def _fetch_resource_names(self): + resource_names = set() + dfns = self._linstor.resource_dfn_list_raise().resource_definitions + for dfn in dfns: + if dfn.resource_group_name == self._group_name and \ + linstor.consts.FLAG_DELETE not in dfn.flags: + resource_names.add(dfn.name) + return resource_names + + def _get_volumes_info(self, filter=None): + all_volume_info = {} + resources = self._linstor.resource_list_raise( + filter_by_resources=filter + ) + for resource in resources.resources: + if resource.name not in all_volume_info: + current = all_volume_info[resource.name] = self.VolumeInfo( + resource.name + ) + else: + current = all_volume_info[resource.name] + + for volume in resource.volumes: + # We ignore diskless pools of the form "DfltDisklessStorPool". + if volume.storage_pool_name == self._group_name: + if volume.allocated_size < 0: + raise LinstorVolumeManagerError( + 'Failed to get allocated size of `{}` on `{}`' + .format(resource.name, volume.storage_pool_name) + ) + current.physical_size += volume.allocated_size + + if volume.usable_size < 0: + raise LinstorVolumeManagerError( + 'Failed to get usable size of `{}` on `{}`' + .format(resource.name, volume.storage_pool_name) + ) + virtual_size = volume.usable_size + + current.virtual_size = current.virtual_size and \ + min(current.virtual_size, virtual_size) or virtual_size + + for current in all_volume_info.values(): + current.physical_size *= 1024 + current.virtual_size *= 1024 + + return all_volume_info + + def _get_volume_node_names_and_size(self, volume_name): + node_names = set() + size = -1 + for resource in self._linstor.resource_list_raise( + filter_by_resources=[volume_name] + ).resources: + for volume in resource.volumes: + # We ignore diskless pools of the form "DfltDisklessStorPool". + if volume.storage_pool_name == self._group_name: + node_names.add(resource.node_name) + + current_size = volume.usable_size + if current_size < 0: + raise LinstorVolumeManagerError( + 'Failed to get usable size of `{}` on `{}`' + .format(resource.name, volume.storage_pool_name) + ) + + if size < 0: + size = current_size + else: + size = min(size, current_size) + + return (node_names, size * 1024) + + def _compute_size(self, attr): + pools = self._linstor.storage_pool_list_raise( + filter_by_stor_pools=[self._group_name] + ).storage_pools + + capacity = 0 + for pool in pools: + space = pool.free_space + if space: + size = getattr(space, attr) + if size < 0: + raise LinstorVolumeManagerError( + 'Failed to get pool {} attr of `{}`' + .format(attr, pool.node_name) + ) + capacity += size + return capacity * 1024 + + def _get_node_names(self): + node_names = set() + pools = self._linstor.storage_pool_list_raise( + filter_by_stor_pools=[self._group_name] + ).storage_pools + for pool in pools: + node_names.add(pool.node_name) + return node_names + + def _check_volume_creation_errors(self, result, volume_uuid): + errors = self._filter_errors(result) + if self._check_errors(errors, [ + linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN + ]): + raise LinstorVolumeManagerError( + 'Failed to create volume `{}` from SR `{}`, it already exists' + .format(volume_uuid, self._group_name), + LinstorVolumeManagerError.ERR_VOLUME_EXISTS + ) + + if errors: + raise LinstorVolumeManagerError( + 'Failed to create volume `{}` from SR `{}`: {}'.format( + volume_uuid, + self._group_name, + self._get_error_str(errors) + ) + ) + + def _create_volume(self, volume_uuid, volume_name, size, place_resources): + size = self.round_up_volume_size(size) + + self._check_volume_creation_errors(self._linstor.resource_group_spawn( + rsc_grp_name=self._group_name, + rsc_dfn_name=volume_name, + vlm_sizes=['{}B'.format(size)], + definitions_only=not place_resources + ), volume_uuid) + + def _create_volume_with_properties( + self, volume_uuid, volume_name, size, place_resources + ): + if self.check_volume_exists(volume_uuid): + raise LinstorVolumeManagerError( + 'Could not create volume `{}` from SR `{}`, it already exists' + .format(volume_uuid, self._group_name) + ' in properties', + LinstorVolumeManagerError.ERR_VOLUME_EXISTS + ) + + if volume_name in self._fetch_resource_names(): + raise LinstorVolumeManagerError( + 'Could not create volume `{}` from SR `{}`, '.format( + volume_uuid, self._group_name + ) + 'resource of the same name already exists in LINSTOR' + ) + + # I am paranoid. + volume_properties = self._get_volume_properties(volume_uuid) + if (volume_properties.get(self.PROP_NOT_EXISTS) is not None): + raise LinstorVolumeManagerError( + 'Could not create volume `{}`, '.format(volume_uuid) + + 'properties already exist' + ) + + try: + volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING + volume_properties[self.PROP_VOLUME_NAME] = volume_name + + self._create_volume( + volume_uuid, volume_name, size, place_resources + ) + + return volume_properties + except LinstorVolumeManagerError as e: + # Do not destroy existing resource! + # In theory we can't get this error because we check this event + # before the `self._create_volume` case. + # It can only happen if the same volume uuid is used in the same + # call in another host. + if e.code == LinstorVolumeManagerError.ERR_VOLUME_EXISTS: + raise + self._force_destroy_volume(volume_uuid, volume_properties) + raise + except Exception: + self._force_destroy_volume(volume_uuid, volume_properties) + raise + + def _find_device_path(self, volume_uuid, volume_name): + current_device_path = self._request_device_path( + volume_uuid, volume_name, activate=True + ) + + # We use realpath here to get the /dev/drbd path instead of + # /dev/drbd/by-res/. + expected_device_path = self.build_device_path(volume_name) + util.wait_for_path(expected_device_path, 5) + + device_realpath = os.path.realpath(expected_device_path) + if current_device_path != device_realpath: + raise LinstorVolumeManagerError( + 'Invalid path, current={}, expected={} (realpath={})' + .format( + current_device_path, + expected_device_path, + device_realpath + ) + ) + return expected_device_path + + def _request_device_path(self, volume_uuid, volume_name, activate=False): + node_name = socket.gethostname() + resources = self._linstor.resource_list( + filter_by_nodes=[node_name], + filter_by_resources=[volume_name] + ) + + if not resources or not resources[0]: + raise LinstorVolumeManagerError( + 'No response list for dev path of `{}`'.format(volume_uuid) + ) + if isinstance(resources[0], linstor.responses.ResourceResponse): + if not resources[0].resources: + if activate: + self._activate_device_path(node_name, volume_name) + return self._request_device_path(volume_uuid, volume_name) + raise LinstorVolumeManagerError( + 'Empty dev path for `{}`, but definition "seems" to exist' + .format(volume_uuid) + ) + # Contains a path of the /dev/drbd form. + return resources[0].resources[0].volumes[0].device_path + + raise LinstorVolumeManagerError( + 'Unable to get volume dev path `{}`: {}'.format( + volume_uuid, str(resources[0]) + ) + ) + + def _activate_device_path(self, node_name, volume_name): + result = self._linstor.resource_create([ + linstor.ResourceData(node_name, volume_name, diskless=True) + ]) + if linstor.Linstor.all_api_responses_no_error(result): + return + errors = linstor.Linstor.filter_api_call_response_errors(result) + if len(errors) == 1 and errors[0].is_error( + linstor.consts.FAIL_EXISTS_RSC + ): + return + + raise LinstorVolumeManagerError( + 'Unable to activate device path of `{}` on node `{}`: {}' + .format(volume_name, node_name, ', '.join( + [str(x) for x in result])) + ) + + def _destroy_resource(self, resource_name): + result = self._linstor.resource_dfn_delete(resource_name) + error_str = self._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Could not destroy resource `{}` from SR `{}`: {}' + .format(resource_name, self._group_name, error_str) + ) + + def _destroy_volume(self, volume_uuid, volume_properties): + assert volume_properties.namespace == \ + self._build_volume_namespace(volume_uuid) + + try: + volume_name = volume_properties.get(self.PROP_VOLUME_NAME) + if volume_name in self._fetch_resource_names(): + self._destroy_resource(volume_name) + + # Assume this call is atomic. + volume_properties.clear() + except Exception as e: + raise LinstorVolumeManagerError( + 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e) + ) + + def _force_destroy_volume(self, volume_uuid, volume_properties): + try: + self._destroy_volume(volume_uuid, volume_properties) + except Exception as e: + self._logger('Ignore fail: {}'.format(e)) + + def _build_volumes(self, repair): + properties = linstor.KV( + self._get_store_name(), + uri=self._uri, + namespace=self._build_volume_namespace() + ) + + resource_names = self._fetch_resource_names() + + self._volumes = set() + + updating_uuid_volumes = self._get_volumes_by_property( + self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False + ) + if updating_uuid_volumes and not repair: + raise LinstorVolumeManagerError( + 'Cannot build LINSTOR volume list: ' + 'It exists invalid "updating uuid volumes", repair is required' + ) + + existing_volumes = self._get_volumes_by_property( + self.REG_NOT_EXISTS, ignore_inexisting_volumes=False + ) + for volume_uuid, not_exists in existing_volumes.items(): + properties.namespace = self._build_volume_namespace( + volume_uuid + ) + + src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC) + if src_uuid: + self._logger( + 'Ignoring volume during manager initialization with prop ' + ' PROP_UPDATING_UUID_SRC: {} (properties={})' + .format( + volume_uuid, + self._get_filtered_properties(properties) + ) + ) + continue + + # Insert volume in list if the volume exists. Or if the volume + # is being created and a slave wants to use it (repair = False). + # + # If we are on the master and if repair is True and state is + # Creating, it's probably a bug or crash: the creation process has + # been stopped. + if not_exists == self.STATE_EXISTS or ( + not repair and not_exists == self.STATE_CREATING + ): + self._volumes.add(volume_uuid) + continue + + if not repair: + self._logger( + 'Ignoring bad volume during manager initialization: {} ' + '(properties={})'.format( + volume_uuid, + self._get_filtered_properties(properties) + ) + ) + continue + + # Remove bad volume. + try: + self._logger( + 'Removing bad volume during manager initialization: {} ' + '(properties={})'.format( + volume_uuid, + self._get_filtered_properties(properties) + ) + ) + volume_name = properties.get(self.PROP_VOLUME_NAME) + + # Little optimization, don't call `self._destroy_volume`, + # we already have resource name list. + if volume_name in resource_names: + self._destroy_resource(volume_name) + + # Assume this call is atomic. + properties.clear() + except Exception as e: + # Do not raise, we don't want to block user action. + self._logger( + 'Cannot clean volume {}: {}'.format(volume_uuid, e) + ) + + for dest_uuid, src_uuid in updating_uuid_volumes.items(): + dest_properties = self._get_volume_properties(dest_uuid) + if int(dest_properties.get(self.PROP_NOT_EXISTS) or + self.STATE_EXISTS): + dest_properties.clear() + continue + + src_properties = self._get_volume_properties(src_uuid) + src_properties.clear() + + dest_properties.pop(self.PROP_UPDATING_UUID_SRC) + + if src_uuid in self._volumes: + self._volumes.remove(src_uuid) + self._volumes.add(dest_uuid) + + def _get_sr_properties(self): + return linstor.KV( + self._get_store_name(), + uri=self._uri, + namespace=self._build_sr_namespace() + ) + + def _get_volumes_by_property( + self, reg_prop, ignore_inexisting_volumes=True + ): + base_properties = linstor.KV( + self._get_store_name(), + uri=self._uri, + namespace=self._build_volume_namespace() + ) + + volume_properties = {} + for volume_uuid in self._volumes: + volume_properties[volume_uuid] = '' + + for key, value in base_properties.items(): + res = reg_prop.match(key) + if res: + volume_uuid = res.groups()[0] + if not ignore_inexisting_volumes or \ + volume_uuid in self._volumes: + volume_properties[volume_uuid] = value + + return volume_properties + + def _get_volume_properties(self, volume_uuid): + return linstor.KV( + self._get_store_name(), + uri=self._uri, + namespace=self._build_volume_namespace(volume_uuid) + ) + + def _get_store_name(self): + return 'xcp-sr-{}'.format(self._group_name) + + @classmethod + def _build_sr_namespace(cls): + return '/{}/'.format(cls.NAMESPACE_SR) + + @classmethod + def _build_volume_namespace(cls, volume_uuid=None): + # Return a path to all volumes if `volume_uuid` is not given. + if volume_uuid is None: + return '/{}/'.format(cls.NAMESPACE_VOLUME) + return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid) + + @classmethod + def _get_error_str(cls, result): + return ', '.join([ + err.message for err in cls._filter_errors(result) + ]) + + @classmethod + def _create_linstor_instance(cls, uri): + def connect(): + instance = linstor.Linstor(uri, keep_alive=True) + instance.connect() + return instance + + return util.retry( + connect, + maxretry=60, + exceptions=[linstor.errors.LinstorNetworkError] + ) + + @classmethod + def _destroy_storage_pool(cls, lin, group_name, node_name): + result = lin.storage_pool_delete(node_name, group_name) + error_str = cls._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Failed to destroy SP `{}` on node `{}`: {}'.format( + group_name, + node_name, + error_str + ) + ) + + @classmethod + def _destroy_resource_group(cls, lin, group_name): + result = lin.resource_group_delete(group_name) + error_str = cls._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Failed to destroy RG `{}`: {}'.format(group_name, error_str) + ) + + @classmethod + def _build_group_name(cls, base_name): + # If thin provisioning is used we have a path like this: + # `VG/LV`. "/" is not accepted by LINSTOR. + return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_')) + + @staticmethod + def _get_filtered_properties(properties): + return dict(properties.items()) + + @staticmethod + def _filter_errors(result): + return [ + err for err in result + if hasattr(err, 'is_error') and err.is_error() + ] + + @staticmethod + def _check_errors(result, codes): + for err in result: + for code in codes: + if err.is_error(code): + return True + return False diff --git a/drivers/tapdisk-pause b/drivers/tapdisk-pause index 6b7fc09f9..932fc3ca6 100755 --- a/drivers/tapdisk-pause +++ b/drivers/tapdisk-pause @@ -29,6 +29,12 @@ import lvhdutil import vhdutil import lvmcache +try: + from linstorvolumemanager import LinstorVolumeManager + LINSTOR_AVAILABLE = True +except ImportError: + LINSTOR_AVAILABLE = False + TAPDEV_BACKPATH_PFX = "/dev/sm/backend" TAPDEV_PHYPATH_PFX = "/dev/sm/phy" @@ -130,7 +136,51 @@ class Tapdisk: self.realpath = p if ty == "LV": self.vdi_type = "aio" else: self.vdi_type = "vhd" - + elif realpath.startswith('/dev/drbd/by-res/xcp-volume-'): + if not LINSTOR_AVAILABLE: + raise util.SMException( + 'Can\'t refresh tapdisk: LINSTOR libraries are missing' + ) + + # We must always recreate the symlink to ensure we have + # the right info. Why? Because if the volume UUID is changed in + # LINSTOR the symlink is not directly updated. When live leaf + # coalesce is executed we have these steps: + # "A" -> "OLD_A" + # "B" -> "A" + # Without symlink update the previous "A" path is reused instead of + # "B" path. Note: "A", "B" and "OLD_A" are UUIDs. + session = self.session + + linstor_uri = 'linstor://{}'.format( + util.get_master_rec(session)['address'] + ) + + host_ref = util.get_this_host_ref(session) + sr_ref = session.xenapi.SR.get_by_uuid(self.sr_uuid) + + pbd = util.find_my_pbd(session, host_ref, sr_ref) + if pbd is None: + raise util.SMException('Failed to find PBD') + + dconf = session.xenapi.PBD.get_device_config(pbd) + group_name = dconf['group-name'] + + device_path = LinstorVolumeManager( + linstor_uri, + group_name, + logger=util.SMlog + ).get_device_path(self.vdi_uuid) + + if realpath != device_path: + util.SMlog( + 'Update LINSTOR PhyLink (previous={}, current={})' + .format(realpath, device_path) + ) + os.unlink(self.phypath) + _mkphylink(self.sr_uuid, self.vdi_uuid, device_path) + self.realpath = device_path + @locking("VDIUnavailable") def Pause(self): util.SMlog("Pause for %s" % self.vdi_uuid) diff --git a/drivers/util.py b/drivers/util.py index 3a568d599..a913fcf5c 100755 --- a/drivers/util.py +++ b/drivers/util.py @@ -699,10 +699,35 @@ def get_this_host(): return uuid -def is_master(session): +def get_master_ref(session): pools = session.xenapi.pool.get_all() - master = session.xenapi.pool.get_master(pools[0]) - return get_this_host_ref(session) == master + return session.xenapi.pool.get_master(pools[0]) + + +def get_master_rec(session): + return session.xenapi.host.get_record(get_master_ref(session)) + + +def is_master(session): + return get_this_host_ref(session) == get_master_ref(session) + + +def get_master_address(): + address = None + try: + fd = open('/etc/xensource/pool.conf', 'r') + try: + items = fd.readline().split(':') + if items[0].strip() == 'master': + address = 'localhost' + else: + address = items[1].strip() + finally: + fd.close() + except Exception: + pass + return address + def get_localhost_ref(session): @@ -1548,13 +1573,21 @@ def get_connected_sockets(pid): return sockets -def retry(f, maxretry=20, period=3): +def retry(f, maxretry=20, period=3, exceptions=[Exception]): retries = 0 while True: try: return f() except Exception as e: - SMlog("Got exception: %s. Retry number: %s" % (str(e), retries)) + for exception in exceptions: + if isinstance(e, exception): + SMlog('Got exception: {}. Retry number: {}'.format( + str(e), retries + )) + break + else: + SMlog('Got bad exception: {}. Raising...'.format(e)) + raise e retries += 1 if retries >= maxretry: diff --git a/linstor/Makefile b/linstor/Makefile new file mode 100644 index 000000000..c329ca302 --- /dev/null +++ b/linstor/Makefile @@ -0,0 +1,22 @@ +PREFIX ?= /opt/xensource/libexec +DESTDIR ?= +DEBUGDIR ?= /opt/xensource/debug + + +OPTS := -Wall -std=gnu99 + +SRC := linstor-monitord.c + +BIN := linstor-monitord + +all: daemon + +daemon: linstor-monitord.c + $(CC) $(OPTS) $(SRC) -o $(BIN) + +install: linstor-monitord + mkdir -p $(DESTDIR)$(PREFIX) + install -m 755 $^ $(DESTDIR)$(PREFIX) + +clean: + rm -f linstor-monitord diff --git a/linstor/linstor-monitord.c b/linstor/linstor-monitord.c new file mode 100644 index 000000000..8161813d7 --- /dev/null +++ b/linstor/linstor-monitord.c @@ -0,0 +1,402 @@ +/* + * Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// TODO: Handle new hosts. +// TODO: https://github.com/xcp-ng/xcp/issues/421 + +// ============================================================================= + +#define POOL_CONF_DIR "/etc/xensource" +#define POOL_CONF_FILE "pool.conf" +#define POOL_CONF_ABS_FILE POOL_CONF_DIR "/" POOL_CONF_FILE + +// In milliseconds. +#define POLL_TIMEOUT 2000 + +// ----------------------------------------------------------------------------- + +static inline void normalizeTime (struct timespec *spec) { + while (spec->tv_nsec >= 1000000000) { + ++spec->tv_sec; + spec->tv_nsec -= 1000000000; + } + while (spec->tv_nsec < 0) { + --spec->tv_sec; + spec->tv_nsec += 1000000000; + } +} + +static inline struct timespec getCurrentTime () { + struct timespec spec; + clock_gettime(CLOCK_MONOTONIC, &spec); + return (struct timespec){ + .tv_sec = spec.tv_sec, + .tv_nsec = spec.tv_nsec + }; +} + +static inline struct timespec getTimeDiff (const struct timespec *a, const struct timespec *b) { + struct timespec result = *a; + result.tv_sec -= b->tv_sec - 1; + result.tv_nsec -= b->tv_nsec + 1000000000; + normalizeTime(&result); + return result; +} + +static inline int64_t convertToMilliseconds (struct timespec spec) { + spec.tv_nsec += 1000 - spec.tv_nsec % 1000; + normalizeTime(&spec); + return spec.tv_sec * 1000 + spec.tv_nsec / 1000000; +} + +// ----------------------------------------------------------------------------- + +static inline int readPoolConf (char *buffer, size_t bufferSize) { + FILE *f = fopen(POOL_CONF_ABS_FILE, "r"); + if (!f) { + syslog(LOG_ERR, "Failed to open `" POOL_CONF_ABS_FILE "`: `%s`.", strerror(errno)); + return -errno; + } + + int ret = 0; + if (!fgets(buffer, bufferSize, f)) { + syslog(LOG_ERR, "Cannot read `" POOL_CONF_ABS_FILE "`."); + ret = -EIO; + } + + fclose(f); + + return ret; +} + +static inline int isMasterHost (int *error) { + if (error) + *error = 0; + + char buffer[512]; + + int ret = readPoolConf(buffer, sizeof buffer); + if (ret < 0) { + if (error) + *error = ret; + return 0; + } + + static const char masterStr[] = "master"; + static const size_t masterLen = sizeof masterStr - 1; + if (!strncmp(buffer, masterStr, masterLen)) { + const char end = buffer[masterLen]; + ret = end == '\0' || isspace(end); + } + + if (ret < 0) { + if (error) + *error = ret; + return 0; + } + + return ret; +} + +// ----------------------------------------------------------------------------- + +typedef struct { + int inotifyFd; + // TODO: Should be completed with at least a hostname field. +} State; + +// ----------------------------------------------------------------------------- + +static inline int execCommand (char *argv[]) { + const pid_t pid = fork(); + if (pid < 0) + return -errno; + + // Child process. + if (pid == 0) { + if (execvp(*argv, argv) < 0) + syslog(LOG_ERR, "Failed to exec `%s` command.", *argv); + exit(EXIT_FAILURE); + } + + // Main process. + int status; + if (waitpid(pid, &status, 0) < 0) { + syslog(LOG_ERR, "Failed to wait command: `%s`.", *argv); + return -errno; + } + + if (WIFEXITED(status)) { + const int code = WEXITSTATUS(status); + if (code == 0) + syslog(LOG_INFO, "`%s` completed normally.", *argv); + else + syslog(LOG_ERR, "`%s` exited with an error: %d.", *argv, code); + } else if (WIFSIGNALED(status)) + syslog(LOG_ERR, "`%s` terminated by signal %d.", *argv, WTERMSIG(status)); + + return 0; +} + +// ----------------------------------------------------------------------------- + +static inline int createInotifyInstance () { + const int fd = inotify_init1(IN_CLOEXEC); + if (fd < 0) { + syslog(LOG_ERR, "Unable to create inotify instance: `%s`.", strerror(errno)); + return -errno; + } + return fd; +} + +static inline int addInotifyWatch (int inotifyFd, const char *filepath, uint32_t mask) { + const int wd = inotify_add_watch(inotifyFd, filepath, mask); + if (wd < 0) { + syslog(LOG_ERR, "Unable to register `%s`: `%s`.", filepath, strerror(errno)); + return -errno; + } + return wd; +} + +// ----------------------------------------------------------------------------- + +static inline int updateLinstorServices () { + int error; + const int isMaster = isMasterHost(&error); + if (error) + return error; + + syslog(LOG_INFO, "%s linstor-controller...", isMaster ? "Enabling" : "Disabling"); + char *argv[] = { + "systemctl", + isMaster ? "enable" : "disable", + "--now", + "linstor-controller", + NULL + }; + return execCommand(argv); +} + +static inline int updateLinstorNode (State *state) { + char buffer[256]; + if (gethostname(buffer, sizeof buffer) == -1) { + syslog(LOG_ERR, "Failed to get hostname: `%s`.", strerror(errno)); + return errno ? -errno : -EINVAL; + } + + // TODO: Finish me, see: https://github.com/xcp-ng/xcp/issues/421 + + return 0; +} + +// ----------------------------------------------------------------------------- + +#define PROCESS_MODE_DEFAULT 0 +#define PROCESS_MODE_WAIT_FILE_CREATION 1 + +static inline int waitForPoolConfCreation (State *state, int *wdFile); + +static inline int processPoolConfEvents (int inotifyFd, int wd, char **buffer, size_t *bufferSize, int mode, int *process) { + size_t size = 0; + if (ioctl(inotifyFd, FIONREAD, (char *)&size) == -1) { + syslog(LOG_ERR, "Failed to get buffer size from inotify descriptor: `%s`.", strerror(errno)); + return -errno; + } + + if (*bufferSize < size) { + void *ptr = realloc(*buffer, size); + if (!ptr) { + syslog(LOG_ERR, "Failed to reallocate buffer with size %zu: `%s`.", size, strerror(errno)); + return -errno; + } + *buffer = ptr; + *bufferSize = size; + } + + if ((size = (size_t)read(inotifyFd, *buffer, size)) == (size_t)-1) { + syslog(LOG_ERR, "Failed to read buffer from inotify descriptor: `%s`.", strerror(errno)); + return -errno; + } + + uint32_t mask = 0; + for (char *p = *buffer, *end = p + size; p < end; ) { + const struct inotify_event *event = (struct inotify_event *)p; + + if (event->mask & IN_Q_OVERFLOW) + syslog(LOG_WARNING, "Event queue overflow."); + + if (event->wd == wd) { + if (event->len) { + // Event in the watched directory. + if (!strncmp(event->name, POOL_CONF_FILE, event->len)) + mask |= event->mask; + } else { + // Directory or watched file event. + if (mode == PROCESS_MODE_DEFAULT) + mask |= event->mask; + else if (event->mask & (IN_DELETE_SELF | IN_MOVE_SELF | IN_UNMOUNT)) { + syslog(LOG_ERR, "Watched `" POOL_CONF_DIR "` dir has been removed!"); + return -EIO; // The process should be exited after that. + } + } + } + + p += sizeof(struct inotify_event) + event->len; + } + + int ret = 0; + if (mode == PROCESS_MODE_DEFAULT) { + if (!mask) + return 0; + + syslog(LOG_INFO, "Updating linstor services... (Inotify mask=%" PRIu32 ")", mask); + if (mask & (IN_DELETE_SELF | IN_MOVE_SELF | IN_UNMOUNT)) { + syslog(LOG_ERR, "Watched `" POOL_CONF_ABS_FILE "` file has been removed!"); + inotify_rm_watch(inotifyFd, wd); // Do not forget to remove watch to avoid leaks. + return -EIO; + } + ret = updateLinstorServices(); + } else { + if (mask & (IN_CREATE | IN_MOVED_TO)) { + syslog(LOG_ERR, "Watched `" POOL_CONF_ABS_FILE "` file has been recreated!"); + *process = 0; + } + } + + return ret; +} + +static inline int waitAndProcessEvents (State *state, int wd, int mode) { + char *buffer = NULL; + size_t bufferSize = 0; + + int ret = 0; + int process = 1; + + struct timespec previousTime = getCurrentTime(); + do { + struct timespec currentTime = getCurrentTime(); + const int64_t elapsedTime = convertToMilliseconds(getTimeDiff(¤tTime, &previousTime)); + + int timeout; + if (elapsedTime >= POLL_TIMEOUT) { + updateLinstorNode(state); + timeout = POLL_TIMEOUT; + previousTime = getCurrentTime(); + } else { + timeout = POLL_TIMEOUT - elapsedTime; + } + + struct pollfd fds = { state->inotifyFd, POLLIN, 0 }; + const int res = poll(&fds, 1, timeout); + if (res < 0) { + if (errno == EAGAIN) + continue; + syslog(LOG_ERR, "Failed to poll from inotify descriptor: `%s`.", strerror(errno)); + ret = -errno; + } else if (res > 0) { + ret = processPoolConfEvents(state->inotifyFd, wd, &buffer, &bufferSize, mode, &process); + } + } while (ret >= 0 && process); + + free(buffer); + return ret; +} + +static inline int waitAndProcessFileEvents (State *state, int wd) { + return waitAndProcessEvents(state, wd, PROCESS_MODE_DEFAULT); +} + +static inline int waitAndProcessDirEvents (State *state, int wd) { + return waitAndProcessEvents(state, wd, PROCESS_MODE_WAIT_FILE_CREATION); +} + +static inline int waitForPoolConfCreation (State *state, int *wdFile) { + const int wdDir = addInotifyWatch( + state->inotifyFd, POOL_CONF_DIR, IN_MOVED_TO | IN_CREATE | IN_MOVE_SELF | IN_DELETE_SELF + ); + if (wdDir < 0) + return wdDir; + + int ret = 0; + do { + do { + // Update LINSTOR services... + ret = updateLinstorServices(); + + // Ok we can't read the pool configuration file. + // Maybe the file doesn't exist. Waiting its creation... + } while ((ret == -ENOENT || ret == -EIO) && !(ret = waitAndProcessDirEvents(state, wdDir))); + + // The services have been updated, now we must add a new watch on the pool config file directly. + if (!ret) { + *wdFile = addInotifyWatch(state->inotifyFd, POOL_CONF_ABS_FILE, IN_MODIFY | IN_MOVE_SELF | IN_DELETE_SELF); + if (*wdFile < 0) + ret = *wdFile; + } + } while (ret == -ENOENT); + + inotify_rm_watch(state->inotifyFd, wdDir); + return ret; +} + +// ----------------------------------------------------------------------------- + +int main (int argc, char *argv[]) { + (void)argc; + (void)argv; + + openlog(argv[0], LOG_PID, LOG_USER | LOG_MAIL); + setlogmask(LOG_UPTO(LOG_INFO)); + + State state = { + .inotifyFd = -1 + }; + + const int inotifyFd = createInotifyInstance(); + if (inotifyFd < 0) + return -inotifyFd; + state.inotifyFd = inotifyFd; + + updateLinstorNode(&state); + + int ret = 0; + while (!ret || ret == -ENOENT || ret == -EIO) { + int wdFile; + if ((ret = waitForPoolConfCreation(&state, &wdFile)) < 0) + break; // If the pool config dir cannot be watched or accessed, we consider it is a fatal error. + + ret = waitAndProcessFileEvents(&state, wdFile); + } + + close(inotifyFd); + return -ret; +} diff --git a/systemd/linstor-monitor.service b/systemd/linstor-monitor.service new file mode 100644 index 000000000..5f8f0a760 --- /dev/null +++ b/systemd/linstor-monitor.service @@ -0,0 +1,13 @@ +[Unit] +Description=LINSTOR Monitor +Before=xs-sm.service +ConditionPathExists=/usr/share/linstor-server/bin/Controller + +[Service] +StandardOutput=null +StandardError=journal +ExecStart=/opt/xensource/libexec/linstor-monitord +Restart=always + +[Install] +WantedBy=multi-user.target diff --git a/tests/mocks/linstor/__init__.py b/tests/mocks/linstor/__init__.py new file mode 100644 index 000000000..e69de29bb From f2dccdd49a8ac53fbcc6459a7ebdf0bc7f794bac Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Tue, 27 Oct 2020 15:04:36 +0100 Subject: [PATCH 08/72] feat(tests): add unit tests concerning ZFS (close xcp-ng/xcp#425) - Check if "create" doesn't succeed without zfs packages - Check if "scan" failed if the path is not mounted (not a ZFS mountpoint) --- drivers/ZFSSR.py | 32 ++++++------ tests/test_ZFSSR.py | 121 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 16 deletions(-) create mode 100644 tests/test_ZFSSR.py diff --git a/drivers/ZFSSR.py b/drivers/ZFSSR.py index 1b2f398f6..d37521016 100644 --- a/drivers/ZFSSR.py +++ b/drivers/ZFSSR.py @@ -58,6 +58,18 @@ } +def is_zfs_available(): + import distutils.spawn + return distutils.spawn.find_executable('zfs') and \ + util.pathexists('/sys/module/zfs/initstate') + + +def is_zfs_path(path): + cmd = ['findmnt', '-o', 'FSTYPE', '-n', path] + fs_type = util.pread2(cmd).split('\n')[0] + return fs_type == 'zfs' + + class ZFSSR(FileSR.FileSR): DRIVER_TYPE = 'zfs' @@ -66,7 +78,7 @@ def handles(type): return type == ZFSSR.DRIVER_TYPE def load(self, sr_uuid): - if not self._is_zfs_available(): + if not is_zfs_available(): raise xs_errors.XenError( 'SRUnavailable', opterr='zfs is not installed or module is not loaded' @@ -74,7 +86,7 @@ def load(self, sr_uuid): return super(ZFSSR, self).load(sr_uuid) def create(self, sr_uuid, size): - if not self._is_zfs_path(self.remotepath): + if not is_zfs_path(self.remotepath): raise xs_errors.XenError( 'ZFSSRCreate', opterr='Cannot create SR, path is not a ZFS mountpoint' @@ -90,7 +102,7 @@ def delete(self, sr_uuid): return super(ZFSSR, self).delete(sr_uuid) def attach(self, sr_uuid): - if not self._is_zfs_path(self.remotepath): + if not is_zfs_path(self.remotepath): raise xs_errors.XenError( 'SRUnavailable', opterr='Invalid ZFS path' @@ -106,19 +118,7 @@ def vdi(self, uuid, loadLocked=False): # Ensure _checkmount is overridden to prevent bad behaviors in FileSR. def _checkmount(self): return super(ZFSSR, self)._checkmount() and \ - self._is_zfs_path(self.remotepath) - - @staticmethod - def _is_zfs_path(path): - cmd = ['findmnt', '-o', 'FSTYPE', '-n', path] - fs_type = util.pread2(cmd).split('\n')[0] - return fs_type == 'zfs' - - @staticmethod - def _is_zfs_available(): - import distutils.spawn - return distutils.spawn.find_executable('zfs') and \ - util.pathexists('/sys/module/zfs/initstate') + is_zfs_path(self.remotepath) class ZFSFileVDI(FileSR.FileVDI): diff --git a/tests/test_ZFSSR.py b/tests/test_ZFSSR.py new file mode 100644 index 000000000..879ea3729 --- /dev/null +++ b/tests/test_ZFSSR.py @@ -0,0 +1,121 @@ +import uuid + +import FileSR +import mock +import os +import SR +import unittest +import ZFSSR + + +XML_DEFS = os.path.dirname(os.path.abspath(__file__)) + \ + '/../drivers/XE_SR_ERRORCODES.xml' + + +class FakeZFSSR(ZFSSR.ZFSSR): + uuid = None + sr_ref = None + session = None + srcmd = None + other_config = {} + vdis = {} + passthrough = True + + def __init__(self, srcmd, none): + self.dconf = srcmd.dconf + self.srcmd = srcmd + self.uuid = str(uuid.uuid4()) + + +class TestZFSSR(unittest.TestCase): + def create_zfs_sr(self, sr_uuid='asr_uuid', location='fake_path'): + srcmd = mock.Mock() + srcmd.dconf = { + 'location': location + } + srcmd.params = { + 'command': 'some_command', + 'device_config': {} + } + sr = FakeZFSSR(srcmd, None) + sr.load(sr_uuid) + return sr + + @mock.patch('ZFSSR.is_zfs_available', autospec=True) + @mock.patch('FileSR.Lock', autospec=True) + def test_load(self, lock, is_zfs_available): + self.create_zfs_sr() + + @mock.patch('xs_errors.XML_DEFS', new=XML_DEFS) + def test_load_with_zfs_unavailable(self): + failed = False + try: + self.create_zfs_sr() + except SR.SROSError as e: + # Check SRUnavailable error. + if e.errno != 47: + raise + failed = True + self.assertTrue(failed) + + @mock.patch('ZFSSR.is_zfs_available', autospec=True) + @mock.patch('ZFSSR.is_zfs_path', autospec=True) + @mock.patch('FileSR.Lock', autospec=True) + def test_create(self, lock, is_zfs_path, is_zfs_available): + sr = self.create_zfs_sr() + sr.create(sr.uuid, 42) + + @mock.patch('ZFSSR.is_zfs_available', autospec=True) + @mock.patch('ZFSSR.is_zfs_path', autospec=True) + @mock.patch('FileSR.Lock', autospec=True) + @mock.patch('xs_errors.XML_DEFS', new=XML_DEFS) + def test_create_with_invalid_zfs_path( + self, lock, is_zfs_path, is_zfs_available + ): + failed = False + + is_zfs_path.return_value = False + sr = self.create_zfs_sr() + try: + sr.create(sr.uuid, 42) + except SR.SROSError as e: + # Check ZFSSRCreate error. + if e.errno != 5000: + raise + failed = True + self.assertTrue(failed) + + @mock.patch('ZFSSR.is_zfs_available', autospec=True) + @mock.patch('ZFSSR.is_zfs_path', autospec=True) + @mock.patch('FileSR.Lock', autospec=True) + @mock.patch('FileSR.FileSR._checkmount', autospec=True) + @mock.patch('FileSR.FileSR._loadvdis', autospec=True) + @mock.patch('SR.SR.scan', autospec=True) + @mock.patch('os.path.ismount', autospec=True) + def test_scan( + self, ismount, scan, _loadvdis, _checkmount, lock, + is_zfs_path, is_zfs_available + ): + sr = self.create_zfs_sr() + sr.scan(sr.uuid) + + @mock.patch('ZFSSR.is_zfs_available', autospec=True) + @mock.patch('ZFSSR.is_zfs_path', autospec=True) + @mock.patch('FileSR.Lock', autospec=True) + @mock.patch('FileSR.FileSR._checkmount', autospec=True) + @mock.patch('xs_errors.XML_DEFS', new=XML_DEFS) + def test_scan_with_invalid_zfs_path( + self, _checkmount, lock, is_zfs_path, is_zfs_available + ): + failed = False + + is_zfs_path.return_value = False + sr = self.create_zfs_sr() + try: + sr.scan(sr.uuid) + except SR.SROSError as e: + # Check SRUnavailable error. + if e.errno != 47: + raise + failed = True + self.assertTrue(failed) From d53d40e78471324ce9ec2a3034cc6639e123949f Mon Sep 17 00:00:00 2001 From: Aleksander Wieliczko Date: Fri, 29 Jan 2021 15:21:23 +0100 Subject: [PATCH 09/72] Added SM Driver for MooseFS Co-authored-by: Piotr Robert Konopelko Signed-off-by: Aleksander Wieliczko Signed-off-by: Ronan Abhamon --- Makefile | 1 + drivers/MooseFSSR.py | 271 ++++++++++++++++++++++++++++++++++++++++ drivers/cleanup.py | 2 +- tests/test_MooseFSSR.py | 64 ++++++++++ 4 files changed, 337 insertions(+), 1 deletion(-) create mode 100755 drivers/MooseFSSR.py create mode 100644 tests/test_MooseFSSR.py diff --git a/Makefile b/Makefile index 3cacbff21..5255a583b 100755 --- a/Makefile +++ b/Makefile @@ -19,6 +19,7 @@ SM_DRIVERS += CephFS SM_DRIVERS += GlusterFS SM_DRIVERS += XFS SM_DRIVERS += ZFS +SM_DRIVERS += MooseFS SM_LIBS := SR SM_LIBS += SRCommand diff --git a/drivers/MooseFSSR.py b/drivers/MooseFSSR.py new file mode 100755 index 000000000..53485a0cb --- /dev/null +++ b/drivers/MooseFSSR.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python +# +# Original work copyright (C) Citrix systems +# Modified work copyright (C) Tappest sp. z o.o., Vates SAS and XCP-ng community +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; version 2.1 only. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# MooseFSSR: Based on CEPHFSSR and FileSR, mounts MooseFS share + +import errno +import os +import syslog as _syslog +import xmlrpclib +from syslog import syslog + +# careful with the import order here +# FileSR has a circular dependency: +# FileSR -> blktap2 -> lvutil -> EXTSR -> FileSR +# importing in this order seems to avoid triggering the issue. +import SR +import SRCommand +import FileSR +# end of careful +import cleanup +import util +import vhdutil +import xs_errors +from lock import Lock + +CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "SR_CACHING", + "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", + "VDI_UPDATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", + "VDI_GENERATE_CONFIG", + "VDI_RESET_ON_BOOT/2", "ATOMIC_PAUSE"] + +CONFIGURATION = [ + ['masterhost', 'MooseFS Master Server hostname or IP address (required, e.g.: "mfsmaster.local.lan" or "10.10.10.1")'], + ['masterport', 'MooseFS Master Server port, default: 9421'], + ['rootpath', 'MooseFS path (required, e.g.: "/")'], + ['options', 'MooseFS Client additional options (e.g.: "mfspassword=PASSWORD,mfstimeout=300")'] +] + +DRIVER_INFO = { + 'name': 'MooseFS VHD', + 'description': 'SR plugin which stores disks as VHD files on a MooseFS storage', + 'vendor': 'Tappest sp. z o.o.', + 'copyright': '(C) 2021 Tappest sp. z o.o.', + 'driver_version': '1.0', + 'required_api_version': '1.0', + 'capabilities': CAPABILITIES, + 'configuration': CONFIGURATION +} + +DRIVER_CONFIG = {"ATTACH_FROM_CONFIG_WITH_TAPDISK": True} + +# The mountpoint for the directory when performing an sr_probe. All probes +# are guaranteed to be serialised by xapi, so this single mountpoint is fine. +PROBE_MOUNTPOINT = os.path.join(SR.MOUNT_BASE, "probe") + + +class MooseFSException(Exception): + def __init__(self, errstr): + self.errstr = errstr + + +class MooseFSSR(FileSR.FileSR): + """MooseFS file-based storage""" + + DRIVER_TYPE = 'moosefs' + + def handles(sr_type): + # fudge, because the parent class (FileSR) checks for smb to alter its behavior + return sr_type == MooseFSSR.DRIVER_TYPE or sr_type == 'smb' + + handles = staticmethod(handles) + + def load(self, sr_uuid): + if not self._is_moosefs_available(): + raise xs_errors.XenError( + 'SRUnavailable', + opterr='MooseFS Client is not installed!' + ) + + self.ops_exclusive = FileSR.OPS_EXCLUSIVE + self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) + self.sr_vditype = SR.DEFAULT_TAP + self.driver_config = DRIVER_CONFIG + if 'masterhost' not in self.dconf: + raise xs_errors.XenError('ConfigServerMissing') + self.remoteserver = self.dconf['masterhost'] + self.remotepath = self.dconf['rootpath'] + # if masterport is not specified, use default: 9421 + if 'masterport' not in self.dconf: + self.remoteport = "9421" + else: + self.remoteport = self.dconf['masterport'] + if self.sr_ref and self.session is not None: + self.sm_config = self.session.xenapi.SR.get_sm_config(self.sr_ref) + else: + self.sm_config = self.srcmd.params.get('sr_sm_config') or {} + self.attached = False + self.path = os.path.join(SR.MOUNT_BASE, sr_uuid) + self.mountpoint = self.path + self.linkpath = self.path + self._check_o_direct() + + def checkmount(self): + return util.ioretry(lambda: ((util.pathexists(self.mountpoint) and + util.ismount(self.mountpoint)))) + + def mount(self, mountpoint=None): + """Mount MooseFS share at 'mountpoint'""" + if mountpoint is None: + mountpoint = self.mountpoint + elif not util.is_string(mountpoint) or mountpoint == "": + raise MooseFSException("Mountpoint is not a string object") + + try: + if not util.ioretry(lambda: util.isdir(mountpoint)): + util.ioretry(lambda: util.makedirs(mountpoint)) + except util.CommandException, inst: + raise MooseFSException("Failed to make directory: code is %d" % inst.code) + + try: + options = [] + if self.dconf.has_key('options'): + options.append(self.dconf['options']) + if options: + options = ['-o', ','.join(options)] + command = ["mount", '-t', 'moosefs', self.remoteserver+":"+self.remoteport+":"+self.remotepath, mountpoint] + options + util.ioretry(lambda: util.pread(command), errlist=[errno.EPIPE, errno.EIO], maxretry=2, nofail=True) + except util.CommandException, inst: + syslog(_syslog.LOG_ERR, 'MooseFS mount failed ' + inst.__str__()) + raise MooseFSException("Mount failed with return code %d" % inst.code) + + # Sanity check to ensure that the user has at least RO access to the + # mounted share. Windows sharing and security settings can be tricky. + try: + util.listdir(mountpoint) + except util.CommandException: + try: + self.unmount(mountpoint, True) + except MooseFSException: + util.logException('MooseFSSR.unmount()') + raise MooseFSException("Permission denied. Please check user privileges.") + + def unmount(self, mountpoint, rmmountpoint): + try: + util.pread(["umount", mountpoint]) + except util.CommandException, inst: + raise MooseFSException("Command umount failed with return code %d" % inst.code) + if rmmountpoint: + try: + os.rmdir(mountpoint) + except OSError, inst: + raise MooseFSException("Command rmdir failed with error '%s'" % inst.strerror) + + def attach(self, sr_uuid): + if not self.checkmount(): + try: + self.mount() + except MooseFSException as exc: + raise xs_errors.SROSError(12, exc.errstr) + self.attached = True + + def probe(self): + try: + self.mount(PROBE_MOUNTPOINT) + sr_list = filter(util.match_uuid, util.listdir(PROBE_MOUNTPOINT)) + self.unmount(PROBE_MOUNTPOINT, True) + except (util.CommandException, xs_errors.XenError): + raise + # Create a dictionary from the SR uuids to feed SRtoXML() + sr_dict = {sr_uuid: {} for sr_uuid in sr_list} + return util.SRtoXML(sr_dict) + + def detach(self, sr_uuid): + if not self.checkmount(): + return + util.SMlog("Aborting GC/coalesce") + cleanup.abort(sr_uuid) + # Change directory to avoid unmount conflicts + os.chdir(SR.MOUNT_BASE) + self.unmount(self.mountpoint, True) + self.attached = False + + def create(self, sr_uuid, size): + if self.checkmount(): + raise xs_errors.SROSError(113, 'MooseFS mount point already attached') + + try: + self.mount() + except MooseFSException, exc: + # noinspection PyBroadException + try: + os.rmdir(self.mountpoint) + except: + # we have no recovery strategy + pass + raise xs_errors.SROSError(111, "MooseFS mount error [opterr=%s]" % exc.errstr) + + + def delete(self, sr_uuid): + # try to remove/delete non VDI contents first + super(MooseFSSR, self).delete(sr_uuid) + try: + if self.checkmount(): + self.detach(sr_uuid) + if util.ioretry(lambda: util.pathexists(self.mountpoint)): + util.ioretry(lambda: os.rmdir(self.mountpoint)) + except util.CommandException, inst: + self.detach(sr_uuid) + if inst.code != errno.ENOENT: + raise xs_errors.SROSError(114, "Failed to remove MooseFS mount point") + + def vdi(self, uuid, loadLocked=False): + return MooseFSFileVDI(self, uuid) + + @staticmethod + def _is_moosefs_available(): + import distutils.spawn + return distutils.spawn.find_executable('mfsmount') + +class MooseFSFileVDI(FileSR.FileVDI): + def attach(self, sr_uuid, vdi_uuid): + if not hasattr(self, 'xenstore_data'): + self.xenstore_data = {} + + self.xenstore_data['storage-type'] = MooseFSSR.DRIVER_TYPE + + return super(MooseFSFileVDI, self).attach(sr_uuid, vdi_uuid) + + def generate_config(self, sr_uuid, vdi_uuid): + util.SMlog("MooseFSFileVDI.generate_config") + if not util.pathexists(self.path): + raise xs_errors.XenError('VDIUnavailable') + resp = {'device_config': self.sr.dconf, + 'sr_uuid': sr_uuid, + 'vdi_uuid': vdi_uuid, + 'sr_sm_config': self.sr.sm_config, + 'command': 'vdi_attach_from_config'} + # Return the 'config' encoded within a normal XMLRPC response so that + # we can use the regular response/error parsing code. + config = xmlrpclib.dumps(tuple([resp]), "vdi_attach_from_config") + return xmlrpclib.dumps((config,), "", True) + + def attach_from_config(self, sr_uuid, vdi_uuid): + try: + if not util.pathexists(self.sr.path): + self.sr.attach(sr_uuid) + except: + util.logException("MooseFSFileVDI.attach_from_config") + raise xs_errors.XenError('SRUnavailable', + opterr='Unable to attach from config') + + +if __name__ == '__main__': + SRCommand.run(MooseFSSR, DRIVER_INFO) +else: + SR.registerSR(MooseFSSR) diff --git a/drivers/cleanup.py b/drivers/cleanup.py index 74b28674f..02e1a4ea3 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -3345,7 +3345,7 @@ def normalizeType(type): type = SR.TYPE_LVHD if type in [ "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", - "xfs", "zfs" + "moosefs", "xfs", "zfs" ]: type = SR.TYPE_FILE if type in ["linstor"]: diff --git a/tests/test_MooseFSSR.py b/tests/test_MooseFSSR.py new file mode 100644 index 000000000..3349a449d --- /dev/null +++ b/tests/test_MooseFSSR.py @@ -0,0 +1,64 @@ +import mock +import MooseFSSR +import unittest + + +class FakeMooseFSSR(MooseFSSR.MooseFSSR): + uuid = None + sr_ref = None + srcmd = None + other_config = {} + + def __init__(self, srcmd, none): + self.dconf = srcmd.dconf + self.srcmd = srcmd + + +class TestMooseFSSR(unittest.TestCase): + + def create_moosefssr(self, masterhost='aServer', rootpath='/aServerpath', + sr_uuid='asr_uuid', useroptions=''): + srcmd = mock.Mock() + srcmd.dconf = { + 'masterhost': masterhost, + 'rootpath': rootpath + } + if useroptions: + srcmd.dconf.update({'options': useroptions}) + srcmd.params = { + 'command': 'some_command', + 'device_config': {} + } + moosefssr = FakeMooseFSSR(srcmd, None) + moosefssr.load(sr_uuid) + return moosefssr + + @mock.patch('MooseFSSR.MooseFSSR._is_moosefs_available', mock.MagicMock(return_value="mfsmount")) + @mock.patch('MooseFSSR.Lock', autospec=True) + def test_load(self, Lock): + self.create_moosefssr() + + @mock.patch('MooseFSSR.MooseFSSR._is_moosefs_available', mock.MagicMock(return_value="mfsmount")) + @mock.patch('MooseFSSR.MooseFSSR.checkmount', autospec=True) + @mock.patch('MooseFSSR.Lock', autospec=True) + def test_attach_if_mounted_then_attached(self, mock_lock, mock_checkmount): + mfssr = self.create_moosefssr() + mock_checkmount.return_value=True + mfssr.attach('asr_uuid') + self.assertTrue(mfssr.attached) + + @mock.patch('MooseFSSR.MooseFSSR._is_moosefs_available', mock.MagicMock(return_value="mfsmount")) + @mock.patch('MooseFSSR.Lock', autospec=True) + def test_mount_mountpoint_empty_string(self, mock_lock): + mfssr = self.create_moosefssr() + self.assertRaises(MooseFSSR.MooseFSException, mfssr.mount) + + @mock.patch('MooseFSSR.MooseFSSR._is_moosefs_available', mock.MagicMock(return_value="mfsmount")) + @mock.patch('MooseFSSR.MooseFSSR.checkmount',return_value=False, autospec=True) + @mock.patch('MooseFSSR.Lock', autospec=True) + def test_detach_not_detached_if_not_mounted(self, mock_lock, mock_checkmount): + mfssr = self.create_moosefssr() + mfssr.attached = True + mock_checkmount.return_value=False + mfssr.detach('asr_uuid') + self.assertTrue(mfssr.attached) From 881bf442205ae6b3f39842009deb6c9f73c8f934 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Thu, 2 Dec 2021 09:28:37 +0100 Subject: [PATCH 10/72] Avoid usage of `umount` in `ISOSR` when `legacy_mode` is used `umount` should not be called when `legacy_mode` is enabled, otherwise a mounted dir used during SR creation is unmounted at the end of the `create` call (and also when a PBD is unplugged) in `detach` block. Signed-off-by: Ronan Abhamon --- drivers/ISOSR.py | 3 +-- tests/test_ISOSR.py | 59 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/drivers/ISOSR.py b/drivers/ISOSR.py index f591d2371..9ca44506f 100755 --- a/drivers/ISOSR.py +++ b/drivers/ISOSR.py @@ -569,8 +569,7 @@ def getCacheOptions(self): def detach(self, sr_uuid): """Std. detach""" - # This handles legacy mode too, so no need to check - if not self._checkmount(): + if 'legacy_mode' in self.dconf or not self._checkmount(): return try: diff --git a/tests/test_ISOSR.py b/tests/test_ISOSR.py index 3aea79633..760c566a0 100644 --- a/tests/test_ISOSR.py +++ b/tests/test_ISOSR.py @@ -24,6 +24,65 @@ def __init__(self, srcmd, none): self.dconf = srcmd.dconf self.srcmd = srcmd +class TestISOSR_overLocal(unittest.TestCase): + def create_isosr(self, location='/local_sr', sr_uuid='asr_uuid'): + srcmd = mock.Mock() + srcmd.dconf = { + 'location': location, + 'type': 'iso', + 'legacy_mode': True + } + srcmd.params = { + 'command': 'some_command' + } + isosr = FakeISOSR(srcmd, None) + isosr.load(sr_uuid) + return isosr + + @mock.patch('util.pread') + def test_load(self, pread): + self.create_isosr() + # Check `mount/umount` is never called. + self.assertFalse(pread.called) + + @mock.patch('os.path.exists', autospec=True) + @mock.patch('util.pread') + def test_attach_and_detach_local(self, pread, exists): + isosr = self.create_isosr() + isosr.attach(None) + self.assertFalse(pread.called) + isosr.detach(None) + self.assertFalse(pread.called) + + @mock.patch('os.path.exists', autospec=True) + @mock.patch('util.pread') + @mock.patch('ISOSR.ISOSR._checkmount') + def test_attach_and_detach_local_with_mounted_path( + self, _checkmount, pread, exists + ): + _checkmount.return_value = True + + isosr = self.create_isosr() + isosr.attach(None) + self.assertFalse(pread.called) + isosr.detach(None) + self.assertFalse(pread.called) + + @testlib.with_context + @mock.patch('os.path.exists') + @mock.patch('util.pread') + def test_attach_local_with_bad_path(self, context, pread, exists): + context.setup_error_codes() + + # Local path doesn't exist, but error list yes. + exists.side_effect = [False, True] + + isosr = self.create_isosr() + with self.assertRaises(SR.SROSError) as ose: + isosr.attach(None) + self.assertEquals(ose.exception.errno, 226) + self.assertFalse(pread.called) + class TestISOSR_overNFS(unittest.TestCase): From 1bbeb6a754b8444c120229d7b839436b826f7545 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Wed, 18 May 2022 17:28:09 +0200 Subject: [PATCH 11/72] MooseFS SR uses now UUID subdirs for each SR A sm-config boolean param `subdir` is available to configure where to store the VHDs: - In a subdir with the SR UUID, the new behavior - In the root directory of the MooseFS SR By default, new SRs are created with `subdir` = True. Existing SRs are not modified and continue to use the folder that was given at SR creation, directly, without looking for a subdirectory. Signed-off-by: Ronan Abhamon --- drivers/MooseFSSR.py | 56 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/drivers/MooseFSSR.py b/drivers/MooseFSSR.py index 53485a0cb..b2e056af7 100755 --- a/drivers/MooseFSSR.py +++ b/drivers/MooseFSSR.py @@ -18,6 +18,7 @@ # # MooseFSSR: Based on CEPHFSSR and FileSR, mounts MooseFS share +import distutils.util import errno import os import syslog as _syslog @@ -99,7 +100,8 @@ def load(self, sr_uuid): if 'masterhost' not in self.dconf: raise xs_errors.XenError('ConfigServerMissing') self.remoteserver = self.dconf['masterhost'] - self.remotepath = self.dconf['rootpath'] + self.rootpath = self.dconf['rootpath'] + self.remotepath = self.rootpath # if masterport is not specified, use default: 9421 if 'masterport' not in self.dconf: self.remoteport = "9421" @@ -109,6 +111,14 @@ def load(self, sr_uuid): self.sm_config = self.session.xenapi.SR.get_sm_config(self.sr_ref) else: self.sm_config = self.srcmd.params.get('sr_sm_config') or {} + + if self.srcmd.cmd != 'sr_create': + self.subdir = distutils.util.strtobool( + self.sm_config.get('subdir') or '0' + ) + if self.subdir: + self.remotepath = os.path.join(self.remotepath, sr_uuid) + self.attached = False self.path = os.path.join(SR.MOUNT_BASE, sr_uuid) self.mountpoint = self.path @@ -138,7 +148,10 @@ def mount(self, mountpoint=None): options.append(self.dconf['options']) if options: options = ['-o', ','.join(options)] - command = ["mount", '-t', 'moosefs', self.remoteserver+":"+self.remoteport+":"+self.remotepath, mountpoint] + options + remote = '{}:{}:{}'.format( + self.remoteserver, self.remoteport, self.remotepath + ) + command = ["mount", '-t', 'moosefs', remote, mountpoint] + options util.ioretry(lambda: util.pread(command), errlist=[errno.EPIPE, errno.EIO], maxretry=2, nofail=True) except util.CommandException, inst: syslog(_syslog.LOG_ERR, 'MooseFS mount failed ' + inst.__str__()) @@ -199,6 +212,7 @@ def create(self, sr_uuid, size): if self.checkmount(): raise xs_errors.SROSError(113, 'MooseFS mount point already attached') + assert self.remotepath == self.rootpath try: self.mount() except MooseFSException, exc: @@ -210,6 +224,33 @@ def create(self, sr_uuid, size): pass raise xs_errors.SROSError(111, "MooseFS mount error [opterr=%s]" % exc.errstr) + try: + self.subdir = self.sm_config.get('subdir') + if self.subdir is None: + self.subdir = True + else: + self.subdir = distutils.util.strtobool(self.subdir) + + self.sm_config['subdir'] = str(self.subdir) + self.session.xenapi.SR.set_sm_config(self.sr_ref, self.sm_config) + + if not self.subdir: + return + + subdir = os.path.join(self.mountpoint, sr_uuid) + if util.ioretry(lambda: util.pathexists(subdir)): + if util.ioretry(lambda: util.isdir(subdir)): + raise xs_errors.XenError('SRExists') + else: + try: + util.ioretry(lambda: util.makedirs(subdir)) + except util.CommandException as e: + if e.code != errno.EEXIST: + raise MooseFSException( + 'Failed to create SR subdir: {}'.format(e) + ) + finally: + self.detach(sr_uuid) def delete(self, sr_uuid): # try to remove/delete non VDI contents first @@ -217,8 +258,15 @@ def delete(self, sr_uuid): try: if self.checkmount(): self.detach(sr_uuid) - if util.ioretry(lambda: util.pathexists(self.mountpoint)): - util.ioretry(lambda: os.rmdir(self.mountpoint)) + + if self.subdir: + # Mount using rootpath () instead of /. + self.remotepath = self.rootpath + self.attach(sr_uuid) + subdir = os.path.join(self.mountpoint, sr_uuid) + if util.ioretry(lambda: util.pathexists(subdir)): + util.ioretry(lambda: os.rmdir(subdir)) + self.detach(sr_uuid) except util.CommandException, inst: self.detach(sr_uuid) if inst.code != errno.ENOENT: From 36830da8767f0fa96b8759b7c60318def42c3d77 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Thu, 23 Jun 2022 10:36:36 +0200 Subject: [PATCH 12/72] Fix is_open call for many drivers (#25) Ensure all shared drivers are imported in `_is_open` definition to register them in the driver list. Otherwise this function always fails with a SRUnknownType exception. Also, we must add two fake mandatory parameters to make MooseFS happy: `masterhost` and `rootpath`. Same for CephFS with: `serverpath`. (NFS driver is directly patched to ensure there is no usage of the `serverpath` param because its value is equal to None.) `location` param is required to use ZFS, to be more precise, in the parent class: `FileSR`. Signed-off-by: Ronan Abhamon --- drivers/GlusterFSSR.py | 3 ++- drivers/NFSSR.py | 12 ++++++++---- drivers/on_slave.py | 17 +++++++++++++++-- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/GlusterFSSR.py b/drivers/GlusterFSSR.py index 72c482ae8..750eb233c 100644 --- a/drivers/GlusterFSSR.py +++ b/drivers/GlusterFSSR.py @@ -96,7 +96,8 @@ def load(self, sr_uuid): self.driver_config = DRIVER_CONFIG if 'server' not in self.dconf: raise xs_errors.XenError('ConfigServerMissing') - self.remoteserver = self.dconf['server'] + # Can be None => on-slave plugin hack (is_open function). + self.remoteserver = self.dconf['server'] or '' if self.sr_ref and self.session is not None: self.sm_config = self.session.xenapi.SR.get_sm_config(self.sr_ref) else: diff --git a/drivers/NFSSR.py b/drivers/NFSSR.py index b499cc905..ef73e1b4f 100755 --- a/drivers/NFSSR.py +++ b/drivers/NFSSR.py @@ -88,9 +88,12 @@ def load(self, sr_uuid): self.sm_config = self.srcmd.params.get('sr_sm_config') or {} self.other_config = self.srcmd.params.get('sr_other_config') or {} self.nosubdir = self.sm_config.get('nosubdir') == "true" - if 'serverpath' in self.dconf: - self.remotepath = os.path.join(self.dconf['serverpath'], - not self.nosubdir and sr_uuid or "") + serverpath = self.dconf.get('serverpath') + if serverpath is not None: + self.remotepath = os.path.join( + serverpath, + not self.nosubdir and sr_uuid or "" + ) self.path = os.path.join(SR.MOUNT_BASE, sr_uuid) # Handle optional dconf attributes @@ -102,7 +105,8 @@ def load(self, sr_uuid): self.options = '' def validate_remotepath(self, scan): - if 'serverpath' not in self.dconf: + serverpath = self.dconf.get('serverpath') + if serverpath is None: if scan: try: self.scan_exports(self.dconf['server']) diff --git a/drivers/on_slave.py b/drivers/on_slave.py index b4f33de20..bb3f5db65 100755 --- a/drivers/on_slave.py +++ b/drivers/on_slave.py @@ -76,9 +76,14 @@ def _is_open(session, args): """Check if VDI is open by a tapdisk on this host""" import SRCommand import SR - import NFSSR + import CephFSSR import EXTSR + import GlusterFSSR import LVHDSR + import MooseFSSR + import NFSSR + import XFSSR + import ZFSSR import blktap2 util.SMlog("on-slave.is_open: %s" % args) @@ -93,7 +98,15 @@ def _is_open(session, args): srType = "lvhd" cmd = SRCommand.SRCommand(None) cmd.driver_info = {"capabilities": None} - cmd.dconf = {"server": None, "device": "/HACK"} + cmd.dconf = { + "server": None, + "device": "/HACK", + # Hack for custom XCP-ng drivers. + "masterhost": None, # MooseFS + "rootpath": None, # MooseFS + "serverpath": None, # CephFS + "location": "/HACK" # ZFS + } cmd.params = {"command": None} driver = SR.driver(srType) From cf100e50b862b6cb9cc7a4baa9d555f16ab915cf Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Thu, 23 Jun 2022 10:37:07 +0200 Subject: [PATCH 13/72] Remove SR_CACHING capability for many SR types (#24) SR_CACHING offers the capacity to use IntelliCache, but this feature is only available using NFS SR. For more details, the implementation of `_setup_cache` in blktap2.py uses only an instance of NFSFileVDI for the shared target. Signed-off-by: Ronan Abhamon --- drivers/CephFSSR.py | 2 +- drivers/GlusterFSSR.py | 2 +- drivers/MooseFSSR.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/CephFSSR.py b/drivers/CephFSSR.py index 415152f7e..f334bb308 100644 --- a/drivers/CephFSSR.py +++ b/drivers/CephFSSR.py @@ -38,7 +38,7 @@ import xs_errors from lock import Lock -CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "SR_CACHING", +CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", "VDI_UPDATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", "VDI_GENERATE_CONFIG", diff --git a/drivers/GlusterFSSR.py b/drivers/GlusterFSSR.py index 750eb233c..290a33129 100644 --- a/drivers/GlusterFSSR.py +++ b/drivers/GlusterFSSR.py @@ -35,7 +35,7 @@ import xs_errors from lock import Lock -CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "SR_CACHING", +CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", "VDI_UPDATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", "VDI_GENERATE_CONFIG", diff --git a/drivers/MooseFSSR.py b/drivers/MooseFSSR.py index b2e056af7..504e8503b 100755 --- a/drivers/MooseFSSR.py +++ b/drivers/MooseFSSR.py @@ -39,7 +39,7 @@ import xs_errors from lock import Lock -CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "SR_CACHING", +CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", "VDI_UPDATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", "VDI_GENERATE_CONFIG", From 8489ae4c46eb264183c238686328f7444ecead51 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Mon, 19 Sep 2022 10:31:00 +0200 Subject: [PATCH 14/72] Fix code coverage regarding MooseFSSR and ZFSSR (#29) Signed-off-by: Ronan Abhamon --- tests/test_MooseFSSR.py | 2 -- tests/test_ZFSSR.py | 12 +++--------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/tests/test_MooseFSSR.py b/tests/test_MooseFSSR.py index 3349a449d..feaac6267 100644 --- a/tests/test_MooseFSSR.py +++ b/tests/test_MooseFSSR.py @@ -23,8 +23,6 @@ def create_moosefssr(self, masterhost='aServer', rootpath='/aServerpath', 'masterhost': masterhost, 'rootpath': rootpath } - if useroptions: - srcmd.dconf.update({'options': useroptions}) srcmd.params = { 'command': 'some_command', 'device_config': {} diff --git a/tests/test_ZFSSR.py b/tests/test_ZFSSR.py index 879ea3729..d0cca9359 100644 --- a/tests/test_ZFSSR.py +++ b/tests/test_ZFSSR.py @@ -53,9 +53,7 @@ def test_load_with_zfs_unavailable(self): self.create_zfs_sr() except SR.SROSError as e: # Check SRUnavailable error. - if e.errno != 47: - raise - failed = True + failed = e.errno == 47 self.assertTrue(failed) @mock.patch('ZFSSR.is_zfs_available', autospec=True) @@ -80,9 +78,7 @@ def test_create_with_invalid_zfs_path( sr.create(sr.uuid, 42) except SR.SROSError as e: # Check ZFSSRCreate error. - if e.errno != 5000: - raise - failed = True + failed = e.errno == 5000 self.assertTrue(failed) @mock.patch('ZFSSR.is_zfs_available', autospec=True) @@ -115,7 +111,5 @@ def test_scan_with_invalid_zfs_path( sr.scan(sr.uuid) except SR.SROSError as e: # Check SRUnavailable error. - if e.errno != 47: - raise - failed = True + failed = e.errno == 47 self.assertTrue(failed) From d4a82cbef4acf45592ead00f2804d2a90ede1d26 Mon Sep 17 00:00:00 2001 From: Yann Dirson Date: Wed, 8 Mar 2023 10:13:18 +0100 Subject: [PATCH 15/72] py3: simple changes from futurize on XCP-ng drivers * `except` syntax fixes * drop `has_key()` usage * drop `filter()` usage (but drop their silly `list(x.keys())` wrappings) * drop `map()` usage * use `int` not `long` * use `items()` not `iteritems()` Signed-off-by: Yann Dirson --- drivers/CephFSSR.py | 16 ++++++++-------- drivers/GlusterFSSR.py | 14 +++++++------- drivers/LinstorSR.py | 11 ++++------- drivers/MooseFSSR.py | 14 +++++++------- drivers/cleanup.py | 6 +++--- drivers/linstorvolumemanager.py | 2 +- 6 files changed, 30 insertions(+), 33 deletions(-) diff --git a/drivers/CephFSSR.py b/drivers/CephFSSR.py index f334bb308..f83beb3dd 100644 --- a/drivers/CephFSSR.py +++ b/drivers/CephFSSR.py @@ -132,18 +132,18 @@ def mount(self, mountpoint=None): try: if not util.ioretry(lambda: util.isdir(mountpoint)): util.ioretry(lambda: util.makedirs(mountpoint)) - except util.CommandException, inst: + except util.CommandException as inst: raise CephFSException("Failed to make directory: code is %d" % inst.code) try: options = [] - if self.dconf.has_key('options'): + if 'options' in self.dconf: options.append(self.dconf['options']) if options: options = ['-o', ','.join(options)] command = ["mount", '-t', 'ceph', self.remoteserver+":"+self.remoteport+":"+self.remotepath, mountpoint] + options util.ioretry(lambda: util.pread(command), errlist=[errno.EPIPE, errno.EIO], maxretry=2, nofail=True) - except util.CommandException, inst: + except util.CommandException as inst: syslog(_syslog.LOG_ERR, 'CephFS mount failed ' + inst.__str__()) raise CephFSException("mount failed with return code %d" % inst.code) @@ -161,12 +161,12 @@ def mount(self, mountpoint=None): def unmount(self, mountpoint, rmmountpoint): try: util.pread(["umount", mountpoint]) - except util.CommandException, inst: + except util.CommandException as inst: raise CephFSException("umount failed with return code %d" % inst.code) if rmmountpoint: try: os.rmdir(mountpoint) - except OSError, inst: + except OSError as inst: raise CephFSException("rmdir failed with error '%s'" % inst.strerror) def attach(self, sr_uuid): @@ -206,7 +206,7 @@ def create(self, sr_uuid, size): try: self.mount() - except CephFSException, exc: + except CephFSException as exc: # noinspection PyBroadException try: os.rmdir(self.mountpoint) @@ -223,7 +223,7 @@ def create(self, sr_uuid, size): try: util.ioretry(lambda: util.makedirs(self.linkpath)) os.symlink(self.linkpath, self.path) - except util.CommandException, inst: + except util.CommandException as inst: if inst.code != errno.EEXIST: try: self.unmount(self.mountpoint, True) @@ -244,7 +244,7 @@ def delete(self, sr_uuid): if util.ioretry(lambda: util.pathexists(self.linkpath)): util.ioretry(lambda: os.rmdir(self.linkpath)) util.SMlog(str(self.unmount(self.mountpoint, True))) - except util.CommandException, inst: + except util.CommandException as inst: self.detach(sr_uuid) if inst.code != errno.ENOENT: raise xs_errors.SROSError(114, "Failed to remove CephFS mount point") diff --git a/drivers/GlusterFSSR.py b/drivers/GlusterFSSR.py index 290a33129..6d5e06146 100644 --- a/drivers/GlusterFSSR.py +++ b/drivers/GlusterFSSR.py @@ -122,7 +122,7 @@ def mount(self, mountpoint=None): try: if not util.ioretry(lambda: util.isdir(mountpoint)): util.ioretry(lambda: util.makedirs(mountpoint)) - except util.CommandException, inst: + except util.CommandException as inst: raise GlusterFSException("Failed to make directory: code is %d" % inst.code) try: options = [] @@ -134,7 +134,7 @@ def mount(self, mountpoint=None): options = ['-o', ','.join(options)] command = ["mount", '-t', 'glusterfs', self.remoteserver, mountpoint] + options util.ioretry(lambda: util.pread(command), errlist=[errno.EPIPE, errno.EIO], maxretry=2, nofail=True) - except util.CommandException, inst: + except util.CommandException as inst: syslog(_syslog.LOG_ERR, 'GlusterFS mount failed ' + inst.__str__()) raise GlusterFSException("mount failed with return code %d" % inst.code) @@ -152,12 +152,12 @@ def mount(self, mountpoint=None): def unmount(self, mountpoint, rmmountpoint): try: util.pread(["umount", mountpoint]) - except util.CommandException, inst: + except util.CommandException as inst: raise GlusterFSException("umount failed with return code %d" % inst.code) if rmmountpoint: try: os.rmdir(mountpoint) - except OSError, inst: + except OSError as inst: raise GlusterFSException("rmdir failed with error '%s'" % inst.strerror) def attach(self, sr_uuid): @@ -197,7 +197,7 @@ def create(self, sr_uuid, size): try: self.mount() - except GlusterFSException, exc: + except GlusterFSException as exc: # noinspection PyBroadException try: os.rmdir(self.mountpoint) @@ -214,7 +214,7 @@ def create(self, sr_uuid, size): try: util.ioretry(lambda: util.makedirs(self.linkpath)) os.symlink(self.linkpath, self.path) - except util.CommandException, inst: + except util.CommandException as inst: if inst.code != errno.EEXIST: try: self.unmount(self.mountpoint, True) @@ -235,7 +235,7 @@ def delete(self, sr_uuid): if util.ioretry(lambda: util.pathexists(self.linkpath)): util.ioretry(lambda: os.rmdir(self.linkpath)) self.unmount(self.mountpoint, True) - except util.CommandException, inst: + except util.CommandException as inst: self.detach(sr_uuid) if inst.code != errno.ENOENT: raise xs_errors.SROSError(114, "Failed to remove GlusterFS mount point") diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index ffd70abfc..4e8888e22 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -460,7 +460,7 @@ def create(self, uuid, size): srs = xenapi.SR.get_all_records_where( 'field "type" = "{}"'.format(self.DRIVER_TYPE) ) - srs = dict(filter(lambda e: e[1]['uuid'] != self.uuid, srs.items())) + srs = dict([e for e in srs.items() if e[1]['uuid'] != self.uuid]) for sr in srs.values(): for pbd in sr['PBDs']: @@ -1296,7 +1296,7 @@ def create(self, sr_uuid, vdi_uuid, size): assert self.vdi_type # 2. Compute size and check space available. - size = vhdutil.validate_and_round_vhd_size(long(size)) + size = vhdutil.validate_and_round_vhd_size(int(size)) util.SMlog('LinstorVDI.create: type={}, size={}'.format( self.vdi_type, size )) @@ -1514,7 +1514,7 @@ def resize(self, sr_uuid, vdi_uuid, size): raise xs_errors.XenError('VDISize', opterr='shrinking not allowed') # Compute the virtual VHD size. - size = vhdutil.validate_and_round_vhd_size(long(size)) + size = vhdutil.validate_and_round_vhd_size(int(size)) if size == self.size: return VDI.VDI.get_params(self) @@ -1985,10 +1985,7 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None): # See: `tap_unpause` in `blktap2.py`. vdi_ref = self.session.xenapi.VDI.get_by_uuid(active_uuid) sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref) - for key in filter( - lambda x: x == 'paused' or x.startswith('host_'), - sm_config.keys() - ): + for key in [x for x in sm_config.keys() if x == 'paused' or x.startswith('host_')]: active_vdi.sm_config[key] = sm_config[key] # 7. Verify parent locator field of both children and diff --git a/drivers/MooseFSSR.py b/drivers/MooseFSSR.py index 504e8503b..05a42640d 100755 --- a/drivers/MooseFSSR.py +++ b/drivers/MooseFSSR.py @@ -139,12 +139,12 @@ def mount(self, mountpoint=None): try: if not util.ioretry(lambda: util.isdir(mountpoint)): util.ioretry(lambda: util.makedirs(mountpoint)) - except util.CommandException, inst: + except util.CommandException as inst: raise MooseFSException("Failed to make directory: code is %d" % inst.code) try: options = [] - if self.dconf.has_key('options'): + if 'options' in self.dconf: options.append(self.dconf['options']) if options: options = ['-o', ','.join(options)] @@ -153,7 +153,7 @@ def mount(self, mountpoint=None): ) command = ["mount", '-t', 'moosefs', remote, mountpoint] + options util.ioretry(lambda: util.pread(command), errlist=[errno.EPIPE, errno.EIO], maxretry=2, nofail=True) - except util.CommandException, inst: + except util.CommandException as inst: syslog(_syslog.LOG_ERR, 'MooseFS mount failed ' + inst.__str__()) raise MooseFSException("Mount failed with return code %d" % inst.code) @@ -171,12 +171,12 @@ def mount(self, mountpoint=None): def unmount(self, mountpoint, rmmountpoint): try: util.pread(["umount", mountpoint]) - except util.CommandException, inst: + except util.CommandException as inst: raise MooseFSException("Command umount failed with return code %d" % inst.code) if rmmountpoint: try: os.rmdir(mountpoint) - except OSError, inst: + except OSError as inst: raise MooseFSException("Command rmdir failed with error '%s'" % inst.strerror) def attach(self, sr_uuid): @@ -215,7 +215,7 @@ def create(self, sr_uuid, size): assert self.remotepath == self.rootpath try: self.mount() - except MooseFSException, exc: + except MooseFSException as exc: # noinspection PyBroadException try: os.rmdir(self.mountpoint) @@ -267,7 +267,7 @@ def delete(self, sr_uuid): if util.ioretry(lambda: util.pathexists(subdir)): util.ioretry(lambda: os.rmdir(subdir)) self.detach(sr_uuid) - except util.CommandException, inst: + except util.CommandException as inst: self.detach(sr_uuid) if inst.code != errno.ENOENT: raise xs_errors.SROSError(114, "Failed to remove MooseFS mount point") diff --git a/drivers/cleanup.py b/drivers/cleanup.py index 02e1a4ea3..b9855a431 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -3104,7 +3104,7 @@ def getFreeSpace(self): def scan(self, force=False): all_vdi_info = self._scan(force) - for uuid, vdiInfo in all_vdi_info.iteritems(): + for uuid, vdiInfo in all_vdi_info.items(): # When vdiInfo is None, the VDI is RAW. vdi = self.getVDI(uuid) if not vdi: @@ -3148,7 +3148,7 @@ def _scan(self, force): error = False try: all_vdi_info = self._load_vdi_info() - for uuid, vdiInfo in all_vdi_info.iteritems(): + for uuid, vdiInfo in all_vdi_info.items(): if vdiInfo and vdiInfo.error: error = True break @@ -3213,7 +3213,7 @@ def _hasValidDevicePath(self, uuid): def _handleInterruptedCoalesceLeaf(self): entries = self.journaler.get_all(VDI.JRN_LEAF) - for uuid, parentUuid in entries.iteritems(): + for uuid, parentUuid in entries.items(): if self._hasValidDevicePath(parentUuid) or \ self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): self._undoInterruptedCoalesceLeaf(uuid, parentUuid) diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py index d40042179..dca96456d 100755 --- a/drivers/linstorvolumemanager.py +++ b/drivers/linstorvolumemanager.py @@ -1053,7 +1053,7 @@ def create_sr( # we can try to use it directly. pools = pools.storage_pools if pools: - existing_node_names = map(lambda pool: pool.node_name, pools) + existing_node_names = [pool.node_name for pool in pools] raise LinstorVolumeManagerError( 'Unable to create SR `{}`. It already exists on node(s): {}' .format(group_name, existing_node_names) From ba08ea35fe57b8a2ba20a1fadc3999158db725c6 Mon Sep 17 00:00:00 2001 From: Yann Dirson Date: Wed, 8 Mar 2023 10:28:10 +0100 Subject: [PATCH 16/72] py3: futurize fix of xmlrpc calls for CephFS, GlusterFS, MooseFS, Linstore Signed-off-by: Yann Dirson --- drivers/CephFSSR.py | 6 +++--- drivers/GlusterFSSR.py | 6 +++--- drivers/LinstorSR.py | 8 ++++---- drivers/MooseFSSR.py | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/CephFSSR.py b/drivers/CephFSSR.py index f83beb3dd..be2521fd7 100644 --- a/drivers/CephFSSR.py +++ b/drivers/CephFSSR.py @@ -21,7 +21,7 @@ import errno import os import syslog as _syslog -import xmlrpclib +import xmlrpc.client from syslog import syslog # careful with the import order here @@ -277,8 +277,8 @@ def generate_config(self, sr_uuid, vdi_uuid): 'command': 'vdi_attach_from_config'} # Return the 'config' encoded within a normal XMLRPC response so that # we can use the regular response/error parsing code. - config = xmlrpclib.dumps(tuple([resp]), "vdi_attach_from_config") - return xmlrpclib.dumps((config,), "", True) + config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") + return xmlrpc.client.dumps((config,), "", True) def attach_from_config(self, sr_uuid, vdi_uuid): try: diff --git a/drivers/GlusterFSSR.py b/drivers/GlusterFSSR.py index 6d5e06146..48471f969 100644 --- a/drivers/GlusterFSSR.py +++ b/drivers/GlusterFSSR.py @@ -19,7 +19,7 @@ import errno import os import syslog as _syslog -import xmlrpclib +import xmlrpc.client from syslog import syslog # careful with the import order here @@ -269,8 +269,8 @@ def generate_config(self, sr_uuid, vdi_uuid): 'command': 'vdi_attach_from_config'} # Return the 'config' encoded within a normal XMLRPC response so that # we can use the regular response/error parsing code. - config = xmlrpclib.dumps(tuple([resp]), "vdi_attach_from_config") - return xmlrpclib.dumps((config,), "", True) + config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") + return xmlrpc.client.dumps((config,), "", True) def attach_from_config(self, sr_uuid, vdi_uuid): try: diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index 4e8888e22..f6badabbf 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -38,7 +38,7 @@ import util import VDI import vhdutil -import xmlrpclib +import xmlrpc.client import xs_errors from srmetadata import \ @@ -865,7 +865,7 @@ def _load_vdis(self): if is_a_snapshot: xenapi.VDI.set_snapshot_time( vdi_ref, - xmlrpclib.DateTime( + xmlrpc.client.DateTime( volume_metadata[SNAPSHOT_TIME_TAG] or '19700101T00:00:00Z' ) @@ -1617,8 +1617,8 @@ def generate_config(self, sr_uuid, vdi_uuid): resp['vdi_path'] = self.path resp['command'] = 'vdi_attach_from_config' - config = xmlrpclib.dumps(tuple([resp]), 'vdi_attach_from_config') - return xmlrpclib.dumps((config,), "", True) + config = xmlrpc.client.dumps(tuple([resp]), 'vdi_attach_from_config') + return xmlrpc.client.dumps((config,), "", True) def attach_from_config(self, sr_uuid, vdi_uuid): """ diff --git a/drivers/MooseFSSR.py b/drivers/MooseFSSR.py index 05a42640d..c29c7583b 100755 --- a/drivers/MooseFSSR.py +++ b/drivers/MooseFSSR.py @@ -22,7 +22,7 @@ import errno import os import syslog as _syslog -import xmlrpclib +import xmlrpc.client from syslog import syslog # careful with the import order here @@ -300,8 +300,8 @@ def generate_config(self, sr_uuid, vdi_uuid): 'command': 'vdi_attach_from_config'} # Return the 'config' encoded within a normal XMLRPC response so that # we can use the regular response/error parsing code. - config = xmlrpclib.dumps(tuple([resp]), "vdi_attach_from_config") - return xmlrpclib.dumps((config,), "", True) + config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") + return xmlrpc.client.dumps((config,), "", True) def attach_from_config(self, sr_uuid, vdi_uuid): try: From 2ae1223604a6c727c17e0adf94073f4e1ef43e2f Mon Sep 17 00:00:00 2001 From: Yann Dirson Date: Wed, 8 Mar 2023 10:32:37 +0100 Subject: [PATCH 17/72] py3: use of integer division operator Guided by futurize's "old_div" use Signed-off-by: Yann Dirson --- drivers/linstorvolumemanager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py index dca96456d..1b86a439b 100755 --- a/drivers/linstorvolumemanager.py +++ b/drivers/linstorvolumemanager.py @@ -28,7 +28,7 @@ def round_up(value, divisor): assert divisor divisor = int(divisor) - return int((int(value) + divisor - 1) / divisor) * divisor + return ((int(value) + divisor - 1) // divisor) * divisor def round_down(value, divisor): @@ -540,7 +540,7 @@ def resize_volume(self, volume_uuid, new_size): result = self._linstor.volume_dfn_modify( rsc_name=volume_name, volume_nr=0, - size=new_size / 1024 + size=new_size // 1024 ) error_str = self._get_error_str(result) if error_str: From ccbeefe45eb6b9c452bdb0c786f4b68db579db67 Mon Sep 17 00:00:00 2001 From: Yann Dirson Date: Wed, 8 Mar 2023 13:53:21 +0100 Subject: [PATCH 18/72] test_on_slave: allow to work with SR using absolute PROBE_MOUNTPOINT PROBE_MOUNTPOINT in a some drivers is a relative path, which is resolved using MOUNT_BASE at probe time, but CephFS, GlusterFS and MooseFS it is set on driver load to an absolute path, and this requires MOUNT_BASE to be looking like a path component. ``` drivers/CephFSSR.py:69: in PROBE_MOUNTPOINT = os.path.join(SR.MOUNT_BASE, "probe") _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ a = , p = ('probe',) def join(a, *p): """Join two or more pathname components, inserting '/' as needed. If any component is an absolute path, all previous path components will be discarded. An empty last part will result in a path that ends with a separator.""" > a = os.fspath(a) E TypeError: expected str, bytes or os.PathLike object, not MagicMock /usr/lib64/python3.6/posixpath.py:80: TypeError ``` Note this same idiom is also used in upstream SMBFS, although that does not appear to cause any problem with the tests. Signed-off-by: Yann Dirson --- tests/test_on_slave.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_on_slave.py b/tests/test_on_slave.py index 90347477d..679d4421d 100644 --- a/tests/test_on_slave.py +++ b/tests/test_on_slave.py @@ -30,6 +30,7 @@ def setUp(self): self.mock_import.side_effect = self.fake_import self.mock_sr = mock.MagicMock() + self.mock_sr.MOUNT_BASE = "/tmp/sr_mount" self.mocks['SR'] = self.mock_sr self.mock_blktap2 = mock.MagicMock() self.mocks['blktap2'] = self.mock_blktap2 From 8fd96af235962161bcfdca68873b583ed3ff9030 Mon Sep 17 00:00:00 2001 From: Yann Dirson Date: Mon, 27 Mar 2023 15:30:46 +0200 Subject: [PATCH 19/72] py3: switch interpreter to python3 --- drivers/CephFSSR.py | 2 +- drivers/GlusterFSSR.py | 2 +- drivers/LinstorSR.py | 2 +- drivers/MooseFSSR.py | 2 +- drivers/ZFSSR.py | 2 +- drivers/linstorjournaler.py | 2 +- drivers/linstorvhdutil.py | 2 +- drivers/linstorvolumemanager.py | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/CephFSSR.py b/drivers/CephFSSR.py index be2521fd7..bd6a4b149 100644 --- a/drivers/CephFSSR.py +++ b/drivers/CephFSSR.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Original work copyright (C) Citrix systems # Modified work copyright (C) Vates SAS and XCP-ng community diff --git a/drivers/GlusterFSSR.py b/drivers/GlusterFSSR.py index 48471f969..42e5ab52d 100644 --- a/drivers/GlusterFSSR.py +++ b/drivers/GlusterFSSR.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Original work copyright (C) Citrix systems # Modified work copyright (C) Vates SAS and XCP-ng community diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index f6badabbf..e7022ca9c 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr # diff --git a/drivers/MooseFSSR.py b/drivers/MooseFSSR.py index c29c7583b..3911b0965 100755 --- a/drivers/MooseFSSR.py +++ b/drivers/MooseFSSR.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Original work copyright (C) Citrix systems # Modified work copyright (C) Tappest sp. z o.o., Vates SAS and XCP-ng community diff --git a/drivers/ZFSSR.py b/drivers/ZFSSR.py index d37521016..354ca90e1 100644 --- a/drivers/ZFSSR.py +++ b/drivers/ZFSSR.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Copyright (C) 2020 Vates SAS # diff --git a/drivers/linstorjournaler.py b/drivers/linstorjournaler.py index 749533056..bc7cff7c2 100755 --- a/drivers/linstorjournaler.py +++ b/drivers/linstorjournaler.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr # diff --git a/drivers/linstorvhdutil.py b/drivers/linstorvhdutil.py index f31c75258..7a1356627 100644 --- a/drivers/linstorvhdutil.py +++ b/drivers/linstorvhdutil.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr # diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py index 1b86a439b..182b88992 100755 --- a/drivers/linstorvolumemanager.py +++ b/drivers/linstorvolumemanager.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr # From 0918908a19367acbd2cd17650ae77f014a72fdf5 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Thu, 4 May 2023 10:24:22 +0200 Subject: [PATCH 20/72] Support recent version of coverage tool (coverage 7.2.5) Without these changes many warns/errors are emitted: - "assertEquals" is deprecated, "assertEqual" must be used instead - mocked objects in "setUp" method like "cleanup.IPCFlag" cannot be repatched at the level of the test functions, otherwise tests are aborted, this is the behavior of coverage version 7.2.5 Signed-off-by: Ronan Abhamon --- tests/test_ISOSR.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_ISOSR.py b/tests/test_ISOSR.py index 760c566a0..9dec459c7 100644 --- a/tests/test_ISOSR.py +++ b/tests/test_ISOSR.py @@ -80,7 +80,7 @@ def test_attach_local_with_bad_path(self, context, pread, exists): isosr = self.create_isosr() with self.assertRaises(SR.SROSError) as ose: isosr.attach(None) - self.assertEquals(ose.exception.errno, 226) + self.assertEqual(ose.exception.errno, 226) self.assertFalse(pread.called) From bd2d6a0502aca188a4d90ed949b315111666e23a Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Fri, 20 Nov 2020 16:42:52 +0100 Subject: [PATCH 21/72] feat(LinstorSR): import all 8.2 changes Signed-off-by: Ronan Abhamon --- Makefile | 13 + drivers/LinstorSR.py | 1463 ++++++++--- drivers/blktap2.py | 24 +- drivers/cleanup.py | 351 ++- drivers/linstor-manager | 941 ++++++- drivers/linstorjournaler.py | 48 +- drivers/linstorvhdutil.py | 480 +++- drivers/linstorvolumemanager.py | 2336 +++++++++++++---- drivers/on_slave.py | 23 +- drivers/tapdisk-pause | 8 +- drivers/util.py | 125 +- drivers/vhdutil.py | 5 +- .../drbd-reactor.service.d/override.conf | 6 + .../linstor-satellite.service.d/override.conf | 5 + etc/systemd/system/var-lib-linstor.service | 21 + linstor/linstor-monitord.c | 199 +- multipath/multipath.conf | 1 + scripts/fork-log-daemon | 36 + scripts/linstor-kv-tool | 84 + scripts/safe-umount | 39 + tests/test_on_slave.py | 10 +- 21 files changed, 5115 insertions(+), 1103 deletions(-) create mode 100644 etc/systemd/system/drbd-reactor.service.d/override.conf create mode 100644 etc/systemd/system/linstor-satellite.service.d/override.conf create mode 100644 etc/systemd/system/var-lib-linstor.service create mode 100755 scripts/fork-log-daemon create mode 100755 scripts/linstor-kv-tool create mode 100755 scripts/safe-umount diff --git a/Makefile b/Makefile index 5255a583b..9a8a03147 100755 --- a/Makefile +++ b/Makefile @@ -86,6 +86,7 @@ PLUGIN_SCRIPT_DEST := /etc/xapi.d/plugins/ LIBEXEC := /opt/xensource/libexec/ UDEV_RULES_DIR := /etc/udev/rules.d/ UDEV_SCRIPTS_DIR := /etc/udev/scripts/ +SYSTEMD_CONF_DIR := /etc/systemd/system/ SYSTEMD_SERVICE_DIR := /usr/lib/systemd/system/ INIT_DIR := /etc/rc.d/init.d/ MPATH_CONF_DIR := /etc/multipath.xenserver/ @@ -133,6 +134,9 @@ install: precheck mkdir -p $(SM_STAGING)$(UDEV_RULES_DIR) mkdir -p $(SM_STAGING)$(UDEV_SCRIPTS_DIR) mkdir -p $(SM_STAGING)$(INIT_DIR) + mkdir -p $(SM_STAGING)$(SYSTEMD_CONF_DIR) + mkdir -p $(SM_STAGING)$(SYSTEMD_CONF_DIR)/drbd-reactor.service.d + mkdir -p $(SM_STAGING)$(SYSTEMD_CONF_DIR)/linstor-satellite.service.d mkdir -p $(SM_STAGING)$(SYSTEMD_SERVICE_DIR) mkdir -p $(SM_STAGING)$(MPATH_CONF_DIR) mkdir -p $(SM_STAGING)$(MPATH_CUSTOM_CONF_DIR) @@ -161,6 +165,12 @@ install: precheck $(SM_STAGING)/$(SM_DEST) install -m 644 etc/logrotate.d/$(SMLOG_CONF) \ $(SM_STAGING)/$(LOGROTATE_DIR) + install -m 644 etc/systemd/system/drbd-reactor.service.d/override.conf \ + $(SM_STAGING)/$(SYSTEMD_CONF_DIR)/drbd-reactor.service.d/ + install -m 644 etc/systemd/system/linstor-satellite.service.d/override.conf \ + $(SM_STAGING)/$(SYSTEMD_CONF_DIR)/linstor-satellite.service.d/ + install -m 644 etc/systemd/system/var-lib-linstor.service \ + $(SM_STAGING)/$(SYSTEMD_CONF_DIR) install -m 644 etc/make-dummy-sr.service \ $(SM_STAGING)/$(SYSTEMD_SERVICE_DIR) install -m 644 systemd/xs-sm.service \ @@ -210,6 +220,9 @@ install: precheck install -m 755 drivers/iscsilib.py $(SM_STAGING)$(SM_DEST) install -m 755 drivers/fcoelib.py $(SM_STAGING)$(SM_DEST) mkdir -p $(SM_STAGING)$(LIBEXEC) + install -m 755 scripts/fork-log-daemon $(SM_STAGING)$(LIBEXEC) + install -m 755 scripts/linstor-kv-tool $(SM_STAGING)$(BIN_DEST) + install -m 755 scripts/safe-umount $(SM_STAGING)$(LIBEXEC) install -m 755 scripts/local-device-change $(SM_STAGING)$(LIBEXEC) install -m 755 scripts/check-device-sharing $(SM_STAGING)$(LIBEXEC) install -m 755 scripts/usb_change $(SM_STAGING)$(LIBEXEC) diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index e7022ca9c..52d5c26ea 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -19,25 +19,39 @@ try: from linstorjournaler import LinstorJournaler from linstorvhdutil import LinstorVhdUtil - from linstorvolumemanager \ - import LinstorVolumeManager, LinstorVolumeManagerError + from linstorvolumemanager import get_controller_uri + from linstorvolumemanager import get_controller_node_name + from linstorvolumemanager import LinstorVolumeManager + from linstorvolumemanager import LinstorVolumeManagerError + from linstorvolumemanager import PERSISTENT_PREFIX + LINSTOR_AVAILABLE = True except ImportError: + PERSISTENT_PREFIX = 'unknown' + LINSTOR_AVAILABLE = False from lock import Lock, LOCK_TYPE_GC_RUNNING import blktap2 import cleanup +import distutils import errno import functools +import lvutil +import os +import re import scsiutil +import signal +import socket import SR import SRCommand +import subprocess import time import traceback import util import VDI import vhdutil +import xml.etree.ElementTree as xml_parser import xmlrpc.client import xs_errors @@ -48,6 +62,27 @@ HIDDEN_TAG = 'hidden' +XHA_CONFIG_PATH = '/etc/xensource/xhad.conf' + +FORK_LOG_DAEMON = '/opt/xensource/libexec/fork-log-daemon' + +# This flag can be disabled to debug the DRBD layer. +# When this config var is False, the HA can only be used under +# specific conditions: +# - Only one heartbeat diskless VDI is present in the pool. +# - The other hearbeat volumes must be diskful and limited to a maximum of 3. +USE_HTTP_NBD_SERVERS = True + +# Useful flag to trace calls using cProfile. +TRACE_PERFS = False + +# Enable/Disable VHD key hash support. +USE_KEY_HASH = False + +# Special volumes. +HA_VOLUME_NAME = PERSISTENT_PREFIX + 'ha-statefile' +REDO_LOG_VOLUME_NAME = PERSISTENT_PREFIX + 'redo-log' + # ============================================================================== # TODO: Supports 'VDI_INTRODUCE', 'VDI_RESET_ON_BOOT/2', 'SR_TRIM', @@ -72,9 +107,9 @@ CONFIGURATION = [ ['group-name', 'LVM group name'], - ['hosts', 'host names to use'], ['redundancy', 'replication count'], - ['provisioning', '"thin" or "thick" are accepted'] + ['provisioning', '"thin" or "thick" are accepted (optional, defaults to thin)'], + ['monitor-db-quorum', 'disable controller when only one host is online (optional, defaults to true)'] ] DRIVER_INFO = { @@ -92,7 +127,8 @@ OPS_EXCLUSIVE = [ 'sr_create', 'sr_delete', 'sr_attach', 'sr_detach', 'sr_scan', - 'sr_update', 'vdi_create', 'vdi_delete', 'vdi_clone', 'vdi_snapshot' + 'sr_update', 'sr_probe', 'vdi_init', 'vdi_create', 'vdi_delete', + 'vdi_attach', 'vdi_detach', 'vdi_clone', 'vdi_snapshot', ] # ============================================================================== @@ -100,68 +136,39 @@ # ============================================================================== -def compute_volume_size(virtual_size, image_type): - if image_type == vhdutil.VDI_TYPE_VHD: - # All LINSTOR VDIs have the metadata area preallocated for - # the maximum possible virtual size (for fast online VDI.resize). - meta_overhead = vhdutil.calcOverheadEmpty(LinstorVDI.MAX_SIZE) - bitmap_overhead = vhdutil.calcOverheadBitmap(virtual_size) - virtual_size += meta_overhead + bitmap_overhead - elif image_type != vhdutil.VDI_TYPE_RAW: - raise Exception('Invalid image type: {}'.format(image_type)) - - return LinstorVolumeManager.round_up_volume_size(virtual_size) - - -def try_lock(lock): - for i in range(20): - if lock.acquireNoblock(): - return - time.sleep(1) - raise util.SRBusyException() - - def attach_thin(session, journaler, linstor, sr_uuid, vdi_uuid): volume_metadata = linstor.get_volume_metadata(vdi_uuid) image_type = volume_metadata.get(VDI_TYPE_TAG) if image_type == vhdutil.VDI_TYPE_RAW: return - lock = Lock(vhdutil.LOCK_TYPE_SR, sr_uuid) - try: - try_lock(lock) - - device_path = linstor.get_device_path(vdi_uuid) + device_path = linstor.get_device_path(vdi_uuid) - # If the virtual VHD size is lower than the LINSTOR volume size, - # there is nothing to do. - vhd_size = compute_volume_size( - LinstorVhdUtil(session, linstor).get_size_virt(vdi_uuid), - image_type - ) + # If the virtual VHD size is lower than the LINSTOR volume size, + # there is nothing to do. + vhd_size = LinstorVhdUtil.compute_volume_size( + # TODO: Replace pylint comment with this feature when possible: + # https://github.com/PyCQA/pylint/pull/2926 + LinstorVhdUtil(session, linstor).get_size_virt(vdi_uuid), # pylint: disable = E1120 + image_type + ) - volume_info = linstor.get_volume_info(vdi_uuid) - volume_size = volume_info.virtual_size + volume_info = linstor.get_volume_info(vdi_uuid) + volume_size = volume_info.virtual_size - if vhd_size > volume_size: - inflate( - journaler, linstor, vdi_uuid, device_path, - vhd_size, volume_size - ) - finally: - lock.release() + if vhd_size > volume_size: + LinstorVhdUtil(session, linstor).inflate( + journaler, vdi_uuid, device_path, vhd_size, volume_size + ) -def detach_thin(session, linstor, sr_uuid, vdi_uuid): +def detach_thin_impl(session, linstor, sr_uuid, vdi_uuid): volume_metadata = linstor.get_volume_metadata(vdi_uuid) image_type = volume_metadata.get(VDI_TYPE_TAG) if image_type == vhdutil.VDI_TYPE_RAW: return - lock = Lock(vhdutil.LOCK_TYPE_SR, sr_uuid) - try: - try_lock(lock) - + def check_vbd_count(): vdi_ref = session.xenapi.VDI.get_by_uuid(vdi_uuid) vbds = session.xenapi.VBD.get_all_records_where( 'field "VDI" = "{}"'.format(vdi_ref) @@ -178,67 +185,103 @@ def detach_thin(session, linstor, sr_uuid, vdi_uuid): 'at least 2 VBDs'.format(vdi_uuid) ) - device_path = linstor.get_device_path(vdi_uuid) - new_volume_size = LinstorVolumeManager.round_up_volume_size( - LinstorVhdUtil(session, linstor).get_size_phys(device_path) - ) + # We can have multiple VBDs attached to a VDI during a VM-template clone. + # So we use a timeout to ensure that we can detach the volume properly. + util.retry(check_vbd_count, maxretry=10, period=1) - volume_info = linstor.get_volume_info(vdi_uuid) - old_volume_size = volume_info.virtual_size - deflate(vdi_uuid, device_path, new_volume_size, old_volume_size) - finally: - lock.release() + device_path = linstor.get_device_path(vdi_uuid) + vhdutil_inst = LinstorVhdUtil(session, linstor) + new_volume_size = LinstorVolumeManager.round_up_volume_size( + # TODO: Replace pylint comment with this feature when possible: + # https://github.com/PyCQA/pylint/pull/2926 + vhdutil_inst.get_size_phys(vdi_uuid) # pylint: disable = E1120 + ) + volume_info = linstor.get_volume_info(vdi_uuid) + old_volume_size = volume_info.virtual_size + vhdutil_inst.deflate(device_path, new_volume_size, old_volume_size) -def inflate(journaler, linstor, vdi_uuid, vdi_path, new_size, old_size): - # Only inflate if the LINSTOR volume capacity is not enough. - new_size = LinstorVolumeManager.round_up_volume_size(new_size) - if new_size <= old_size: - return - util.SMlog( - 'Inflate {} (new VHD size={}, previous={})' - .format(vdi_uuid, new_size, old_size) - ) +def detach_thin(session, linstor, sr_uuid, vdi_uuid): + # This function must always return without errors. + # Otherwise it could cause errors in the XAPI regarding the state of the VDI. + # It's why we use this `try` block. + try: + detach_thin_impl(session, linstor, sr_uuid, vdi_uuid) + except Exception as e: + util.SMlog('Failed to detach properly VDI {}: {}'.format(vdi_uuid, e)) - journaler.create( - LinstorJournaler.INFLATE, vdi_uuid, old_size - ) - linstor.resize_volume(vdi_uuid, new_size) - if not util.zeroOut( - vdi_path, new_size - vhdutil.VHD_FOOTER_SIZE, - vhdutil.VHD_FOOTER_SIZE - ): - raise xs_errors.XenError( - 'EIO', - opterr='Failed to zero out VHD footer {}'.format(vdi_path) +def get_ips_from_xha_config_file(): + ips = dict() + host_id = None + try: + # Ensure there is no dirty read problem. + # For example if the HA is reloaded. + tree = util.retry( + lambda: xml_parser.parse(XHA_CONFIG_PATH), + maxretry=10, + period=1 ) + except: + return (None, ips) + + def parse_host_nodes(ips, node): + current_id = None + current_ip = None + + for sub_node in node: + if sub_node.tag == 'IPaddress': + current_ip = sub_node.text + elif sub_node.tag == 'HostID': + current_id = sub_node.text + else: + continue - vhdutil.setSizePhys(vdi_path, new_size, False) - journaler.remove(LinstorJournaler.INFLATE, vdi_uuid) - + if current_id and current_ip: + ips[current_id] = current_ip + return + util.SMlog('Ill-formed XHA file, missing IPaddress or/and HostID') + + def parse_common_config(ips, node): + for sub_node in node: + if sub_node.tag == 'host': + parse_host_nodes(ips, sub_node) + + def parse_local_config(ips, node): + for sub_node in node: + if sub_node.tag == 'localhost': + for host_node in sub_node: + if host_node.tag == 'HostID': + return host_node.text + + for node in tree.getroot(): + if node.tag == 'common-config': + parse_common_config(ips, node) + elif node.tag == 'local-config': + host_id = parse_local_config(ips, node) + else: + continue -def deflate(vdi_uuid, vdi_path, new_size, old_size): - new_size = LinstorVolumeManager.round_up_volume_size(new_size) - if new_size >= old_size: - return + if ips and host_id: + break - util.SMlog( - 'Deflate {} (new size={}, previous={})' - .format(vdi_uuid, new_size, old_size) - ) + return (host_id and ips.get(host_id), ips) - vhdutil.setSizePhys(vdi_path, new_size) - # TODO: Change the LINSTOR volume size using linstor.resize_volume. +def activate_lvm_group(group_name): + path = group_name.split('/') + assert path and len(path) <= 2 + try: + lvutil.setActiveVG(path[0], True) + except Exception as e: + util.SMlog('Cannot active VG `{}`: {}'.format(path[0], e)) # ============================================================================== # Usage example: # xe sr-create type=linstor name-label=linstor-sr # host-uuid=d2deba7a-c5ad-4de1-9a20-5c8df3343e93 -# device-config:hosts=node-linstor1,node-linstor2,node-linstor3 # device-config:group-name=vg_loop device-config:redundancy=2 @@ -250,6 +293,11 @@ class LinstorSR(SR.SR): MANAGER_PLUGIN = 'linstor-manager' + INIT_STATUS_NOT_SET = 0 + INIT_STATUS_IN_PROGRESS = 1 + INIT_STATUS_OK = 2 + INIT_STATUS_FAIL = 3 + # -------------------------------------------------------------------------- # SR methods. # -------------------------------------------------------------------------- @@ -265,8 +313,6 @@ def load(self, sr_uuid): ) # Check parameters. - if 'hosts' not in self.dconf or not self.dconf['hosts']: - raise xs_errors.XenError('LinstorConfigHostsMissing') if 'group-name' not in self.dconf or not self.dconf['group-name']: raise xs_errors.XenError('LinstorConfigGroupNameMissing') if 'redundancy' not in self.dconf or not self.dconf['redundancy']: @@ -289,6 +335,10 @@ def load(self, sr_uuid): else: self._provisioning = self.PROVISIONING_DEFAULT + monitor_db_quorum = self.dconf.get('monitor-db-quorum') + self._monitor_db_quorum = (monitor_db_quorum is None) or \ + distutils.util.strtobool(monitor_db_quorum) + # Note: We don't have access to the session field if the # 'vdi_attach_from_config' command is executed. self._has_session = self.sr_ref and self.session is not None @@ -307,8 +357,8 @@ def load(self, sr_uuid): self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) self.sr_vditype = SR.DEFAULT_TAP - self._hosts = self.dconf['hosts'].split(',') - self._redundancy = int(self.dconf['redundancy'] or 1) + if self.cmd == 'sr_create': + self._redundancy = int(self.dconf['redundancy']) or 1 self._linstor = None # Ensure that LINSTOR attribute exists. self._journaler = None @@ -317,46 +367,75 @@ def load(self, sr_uuid): self._is_master = True self._group_name = self.dconf['group-name'] - self._master_uri = None - self._vdi_shared_locked = False + self._vdi_shared_time = 0 + + self._init_status = self.INIT_STATUS_NOT_SET - self._initialized = False + self._vdis_loaded = False + self._all_volume_info_cache = None + self._all_volume_metadata_cache = None def _locked_load(method): - @functools.wraps(method) - def wrap(self, *args, **kwargs): - if self._initialized: - return method(self, *args, **kwargs) - self._initialized = True + def wrapped_method(self, *args, **kwargs): + self._init_status = self.INIT_STATUS_OK + return method(self, *args, **kwargs) - if not self._has_session: - if self.srcmd.cmd == 'vdi_attach_from_config': - # We must have a valid LINSTOR instance here without using - # the XAPI. - self._master_uri = 'linstor://{}'.format( - util.get_master_address() - ) - self._journaler = LinstorJournaler( - self._master_uri, self._group_name, logger=util.SMlog - ) + def load(self, *args, **kwargs): + # Activate all LVMs to make drbd-reactor happy. + if self.srcmd.cmd in ('sr_attach', 'vdi_attach_from_config'): + activate_lvm_group(self._group_name) - try: + if not self._has_session: + if self.srcmd.cmd in ( + 'vdi_attach_from_config', + 'vdi_detach_from_config', + # When on-slave (is_open) is executed we have an + # empty command. + None + ): + def create_linstor(uri, attempt_count=30): self._linstor = LinstorVolumeManager( - self._master_uri, + uri, self._group_name, - logger=util.SMlog - ) - return - except Exception as e: - util.SMlog( - 'Ignore exception. Failed to build LINSTOR ' - 'instance without session: {}'.format(e) + logger=util.SMlog, + attempt_count=attempt_count ) - return + # Only required if we are attaching from config using a non-special VDI. + # I.e. not an HA volume. + self._vhdutil = LinstorVhdUtil(self.session, self._linstor) - self._master_uri = 'linstor://{}'.format( - util.get_master_rec(self.session)['address'] - ) + controller_uri = get_controller_uri() + if controller_uri: + create_linstor(controller_uri) + else: + def connect(): + # We must have a valid LINSTOR instance here without using + # the XAPI. Fallback with the HA config file. + for ip in get_ips_from_xha_config_file()[1].values(): + controller_uri = 'linstor://' + ip + try: + util.SMlog('Connecting from config to LINSTOR controller using: {}'.format(ip)) + create_linstor(controller_uri, attempt_count=0) + return controller_uri + except: + pass + + controller_uri = util.retry(connect, maxretry=30, period=1) + if not controller_uri: + raise xs_errors.XenError( + 'SRUnavailable', + opterr='No valid controller URI to attach/detach from config' + ) + + self._journaler = LinstorJournaler( + controller_uri, self._group_name, logger=util.SMlog + ) + + if self.srcmd.cmd is None: + # Only useful on on-slave plugin (is_open). + self._vhdutil = LinstorVhdUtil(self.session, self._linstor) + + return wrapped_method(self, *args, **kwargs) if not self._is_master: if self.cmd in [ @@ -374,37 +453,12 @@ def wrap(self, *args, **kwargs): # behaviors if the GC is executed during an action on a slave. if self.cmd.startswith('vdi_'): self._shared_lock_vdi(self.srcmd.params['vdi_uuid']) - self._vdi_shared_locked = True - - self._journaler = LinstorJournaler( - self._master_uri, self._group_name, logger=util.SMlog - ) + self._vdi_shared_time = time.time() - # Ensure ports are opened and LINSTOR controller/satellite - # are activated. - if self.srcmd.cmd == 'sr_create': - # TODO: Disable if necessary - self._enable_linstor_on_all_hosts(status=True) - - try: - # Try to open SR if exists. - self._linstor = LinstorVolumeManager( - self._master_uri, - self._group_name, - repair=self._is_master, - logger=util.SMlog - ) - self._vhdutil = LinstorVhdUtil(self.session, self._linstor) - except Exception as e: - if self.srcmd.cmd == 'sr_create' or \ - self.srcmd.cmd == 'sr_detach': - # Ignore exception in this specific case: sr_create. - # At this moment the LinstorVolumeManager cannot be - # instantiated. Concerning the sr_detach command, we must - # ignore LINSTOR exceptions (if the volume group doesn't - # exist for example after a bad user action). - pass - else: + if self.srcmd.cmd != 'sr_create' and self.srcmd.cmd != 'sr_detach': + try: + self._reconnect() + except Exception as e: raise xs_errors.XenError('SRUnavailable', opterr=str(e)) if self._linstor: @@ -416,41 +470,87 @@ def wrap(self, *args, **kwargs): if hosts: util.SMlog('Failed to join node(s): {}'.format(hosts)) + # Ensure we use a non-locked volume when vhdutil is called. + if ( + self._is_master and self.cmd.startswith('vdi_') and + self.cmd != 'vdi_create' + ): + self._linstor.ensure_volume_is_not_locked( + self.srcmd.params['vdi_uuid'] + ) + try: - # If the command is a SR command on the master, we must - # load all VDIs and clean journal transactions. - # We must load the VDIs in the snapshot case too. + # If the command is a SR scan command on the master, + # we must load all VDIs and clean journal transactions. + # We must load the VDIs in the snapshot case too only if + # there is at least one entry in the journal. + # + # If the command is a SR command we want at least to remove + # resourceless volumes. if self._is_master and self.cmd not in [ 'vdi_attach', 'vdi_detach', 'vdi_activate', 'vdi_deactivate', 'vdi_epoch_begin', 'vdi_epoch_end', 'vdi_update', 'vdi_destroy' ]: - self._load_vdis() - self._undo_all_journal_transactions() + load_vdis = ( + self.cmd == 'sr_scan' or + self.cmd == 'sr_attach' + ) or len( + self._journaler.get_all(LinstorJournaler.INFLATE) + ) or len( + self._journaler.get_all(LinstorJournaler.CLONE) + ) + + if load_vdis: + self._load_vdis() + self._linstor.remove_resourceless_volumes() self._synchronize_metadata() except Exception as e: + if self.cmd == 'sr_scan' or self.cmd == 'sr_attach': + # Always raise, we don't want to remove VDIs + # from the XAPI database otherwise. + raise e util.SMlog( 'Ignoring exception in LinstorSR.load: {}'.format(e) ) util.SMlog(traceback.format_exc()) - return method(self, *args, **kwargs) + return wrapped_method(self, *args, **kwargs) + + @functools.wraps(wrapped_method) + def wrap(self, *args, **kwargs): + if self._init_status in \ + (self.INIT_STATUS_OK, self.INIT_STATUS_IN_PROGRESS): + return wrapped_method(self, *args, **kwargs) + if self._init_status == self.INIT_STATUS_FAIL: + util.SMlog( + 'Can\'t call method {} because initialization failed' + .format(method) + ) + else: + try: + self._init_status = self.INIT_STATUS_IN_PROGRESS + return load(self, *args, **kwargs) + except Exception: + if self._init_status != self.INIT_STATUS_OK: + self._init_status = self.INIT_STATUS_FAIL + raise return wrap - @_locked_load def cleanup(self): - if self._vdi_shared_locked: + if self._vdi_shared_time: self._shared_lock_vdi(self.srcmd.params['vdi_uuid'], locked=False) @_locked_load def create(self, uuid, size): util.SMlog('LinstorSR.create for {}'.format(self.uuid)) - if self._redundancy > len(self._hosts): + host_adresses = util.get_host_addresses(self.session) + if self._redundancy > len(host_adresses): raise xs_errors.XenError( 'LinstorSRCreate', opterr='Redundancy greater than host count' @@ -469,18 +569,50 @@ def create(self, uuid, size): if group_name and group_name == self._group_name: raise xs_errors.XenError( 'LinstorSRCreate', - opterr='group name must be unique' + opterr='group name must be unique, already used by PBD {}'.format( + xenapi.PBD.get_uuid(pbd) + ) ) + if srs: + raise xs_errors.XenError( + 'LinstorSRCreate', + opterr='LINSTOR SR must be unique in a pool' + ) + + online_hosts = util.get_online_hosts(self.session) + if len(online_hosts) < len(host_adresses): + raise xs_errors.XenError( + 'LinstorSRCreate', + opterr='Not enough online hosts' + ) + + ips = {} + for host_ref in online_hosts: + record = self.session.xenapi.host.get_record(host_ref) + hostname = record['hostname'] + ips[hostname] = record['address'] + + if len(ips) != len(online_hosts): + raise xs_errors.XenError( + 'LinstorSRCreate', + opterr='Multiple hosts with same hostname' + ) + + # Ensure ports are opened and LINSTOR satellites + # are activated. In the same time the drbd-reactor instances + # must be stopped. + self._prepare_sr_on_all_hosts(self._group_name, enabled=True) + # Create SR. # Throw if the SR already exists. try: self._linstor = LinstorVolumeManager.create_sr( - self._master_uri, self._group_name, - self._hosts, + ips, self._redundancy, thin_provisioning=self._provisioning == 'thin', + auto_quorum=self._monitor_db_quorum, logger=util.SMlog ) self._vhdutil = LinstorVhdUtil(self.session, self._linstor) @@ -488,30 +620,83 @@ def create(self, uuid, size): util.SMlog('Failed to create LINSTOR SR: {}'.format(e)) raise xs_errors.XenError('LinstorSRCreate', opterr=str(e)) + try: + util.SMlog( + "Finishing SR creation, enable drbd-reactor on all hosts..." + ) + self._update_drbd_reactor_on_all_hosts(enabled=True) + except Exception as e: + try: + self._linstor.destroy() + except Exception as e2: + util.SMlog( + 'Failed to destroy LINSTOR SR after creation fail: {}' + .format(e2) + ) + raise e + @_locked_load def delete(self, uuid): util.SMlog('LinstorSR.delete for {}'.format(self.uuid)) cleanup.gc_force(self.session, self.uuid) - if self.vdis: + if self.vdis or self._linstor._volumes: raise xs_errors.XenError('SRNotEmpty') - try: - # TODO: Use specific exceptions. If the LINSTOR group doesn't - # exist, we can remove it without problem. + node_name = get_controller_node_name() + if not node_name: + raise xs_errors.XenError( + 'LinstorSRDelete', + opterr='Cannot get controller node name' + ) - # TODO: Maybe remove all volumes unused by the SMAPI. - # We must ensure it's a safe idea... + host = None + if node_name == 'localhost': + host = util.get_this_host_ref(self.session) + else: + for slave in util.get_all_slaves(self.session): + r_name = self.session.xenapi.host.get_record(slave)['hostname'] + if r_name == node_name: + host = slave + break + + if not host: + raise xs_errors.XenError( + 'LinstorSRDelete', + opterr='Failed to find host with hostname: {}'.format( + node_name + ) + ) - self._linstor.destroy() - Lock.cleanupAll(self.uuid) + try: + self._update_drbd_reactor_on_all_hosts( + controller_node_name=node_name, enabled=False + ) + + args = { + 'groupName': self._group_name, + } + self._exec_manager_command( + host, 'destroy', args, 'LinstorSRDelete' + ) except Exception as e: + try: + self._update_drbd_reactor_on_all_hosts( + controller_node_name=node_name, enabled=True + ) + except Exception as e2: + util.SMlog( + 'Failed to restart drbd-reactor after destroy fail: {}' + .format(e2) + ) util.SMlog('Failed to delete LINSTOR SR: {}'.format(e)) raise xs_errors.XenError( 'LinstorSRDelete', opterr=str(e) ) + Lock.cleanupAll(self.uuid) + @_locked_load def update(self, uuid): util.SMlog('LinstorSR.update for {}'.format(self.uuid)) @@ -558,6 +743,9 @@ def probe(self): @_locked_load def scan(self, uuid): + if self._init_status == self.INIT_STATUS_FAIL: + return + util.SMlog('LinstorSR.scan for {}'.format(self.uuid)) if not self._linstor: raise xs_errors.XenError( @@ -565,12 +753,30 @@ def scan(self, uuid): opterr='no such volume group: {}'.format(self._group_name) ) + # Note: `scan` can be called outside this module, so ensure the VDIs + # are loaded. + self._load_vdis() self._update_physical_size() for vdi_uuid in self.vdis.keys(): if self.vdis[vdi_uuid].deleted: del self.vdis[vdi_uuid] + # Security to prevent VDIs from being forgotten if the controller + # is started without a shared and mounted /var/lib/linstor path. + try: + self._linstor.get_database_path() + except Exception: + # Failed to get database path, ensure we don't have + # VDIs in the XAPI database... + if self.session.xenapi.SR.get_VDIs( + self.session.xenapi.SR.get_by_uuid(self.uuid) + ): + raise xs_errors.XenError( + 'SRUnavailable', + opterr='Database is not mounted' + ) + # Update the database before the restart of the GC to avoid # bad sync in the process if new VDIs have been introduced. ret = super(LinstorSR, self).scan(self.uuid) @@ -588,10 +794,9 @@ def vdi(self, uuid): # -------------------------------------------------------------------------- def _shared_lock_vdi(self, vdi_uuid, locked=True): - pools = self.session.xenapi.pool.get_all() - master = self.session.xenapi.pool.get_master(pools[0]) + master = util.get_master_ref(self.session) - method = 'lockVdi' + command = 'lockVdi' args = { 'groupName': self._group_name, 'srUuid': self.uuid, @@ -599,48 +804,128 @@ def _shared_lock_vdi(self, vdi_uuid, locked=True): 'locked': str(locked) } - ret = self.session.xenapi.host.call_plugin( - master, self.MANAGER_PLUGIN, method, args - ) - util.SMlog( - 'call-plugin ({} with {}) returned: {}' - .format(method, args, ret) - ) - if ret == 'False': - raise xs_errors.XenError( - 'VDIUnavailable', - opterr='Plugin {} failed'.format(self.MANAGER_PLUGIN) - ) + # Note: We must avoid to unlock the volume if the timeout is reached + # because during volume unlock, the SR lock is not used. Otherwise + # we could destroy a valid lock acquired from another host... + # + # This code is not very clean, the ideal solution would be to acquire + # the SR lock during volume unlock (like lock) but it's not easy + # to implement without impacting performance. + if not locked: + elapsed_time = time.time() - self._vdi_shared_time + timeout = LinstorVolumeManager.LOCKED_EXPIRATION_DELAY * 0.7 + if elapsed_time >= timeout: + util.SMlog( + 'Avoid unlock call of {} because timeout has been reached' + .format(vdi_uuid) + ) + return + + self._exec_manager_command(master, command, args, 'VDIUnavailable') # -------------------------------------------------------------------------- # Network. # -------------------------------------------------------------------------- - def _enable_linstor(self, host, status): - method = 'enable' - args = {'enabled': str(bool(status))} + def _exec_manager_command(self, host_ref, command, args, error): + host_rec = self.session.xenapi.host.get_record(host_ref) + host_uuid = host_rec['uuid'] + + try: + ret = self.session.xenapi.host.call_plugin( + host_ref, self.MANAGER_PLUGIN, command, args + ) + except Exception as e: + util.SMlog( + 'call-plugin on {} ({}:{} with {}) raised'.format( + host_uuid, self.MANAGER_PLUGIN, command, args + ) + ) + raise e - ret = self.session.xenapi.host.call_plugin( - host, self.MANAGER_PLUGIN, method, args - ) util.SMlog( - 'call-plugin ({} with {}) returned: {}'.format(method, args, ret) + 'call-plugin on {} ({}:{} with {}) returned: {}'.format( + host_uuid, self.MANAGER_PLUGIN, command, args, ret + ) ) if ret == 'False': raise xs_errors.XenError( - 'SRUnavailable', + error, opterr='Plugin {} failed'.format(self.MANAGER_PLUGIN) ) - def _enable_linstor_on_master(self, status): - pools = self.session.xenapi.pool.get_all() - master = self.session.xenapi.pool.get_master(pools[0]) - self._enable_linstor(master, status) + def _prepare_sr(self, host, group_name, enabled): + self._exec_manager_command( + host, + 'prepareSr' if enabled else 'releaseSr', + {'groupName': group_name}, + 'SRUnavailable' + ) + + def _prepare_sr_on_all_hosts(self, group_name, enabled): + master = util.get_master_ref(self.session) + self._prepare_sr(master, group_name, enabled) - def _enable_linstor_on_all_hosts(self, status): - self._enable_linstor_on_master(status) for slave in util.get_all_slaves(self.session): - self._enable_linstor(slave, status) + self._prepare_sr(slave, group_name, enabled) + + def _update_drbd_reactor(self, host, enabled): + self._exec_manager_command( + host, + 'updateDrbdReactor', + {'enabled': str(enabled)}, + 'SRUnavailable' + ) + + def _update_drbd_reactor_on_all_hosts( + self, enabled, controller_node_name=None + ): + if controller_node_name == 'localhost': + controller_node_name = self.session.xenapi.host.get_record( + util.get_this_host_ref(self.session) + )['hostname'] + assert controller_node_name + assert controller_node_name != 'localhost' + + controller_host = None + secondary_hosts = [] + + hosts = self.session.xenapi.host.get_all_records() + for host_ref, host_rec in hosts.iteritems(): + hostname = host_rec['hostname'] + if controller_node_name == hostname: + controller_host = host_ref + else: + secondary_hosts.append((host_ref, hostname)) + + action_name = 'Starting' if enabled else 'Stopping' + if controller_node_name and not controller_host: + util.SMlog('Failed to find controller host: `{}`'.format( + controller_node_name + )) + + if enabled and controller_host: + util.SMlog('{} drbd-reactor on controller host `{}`...'.format( + action_name, controller_node_name + )) + # If enabled is true, we try to start the controller on the desired + # node name first. + self._update_drbd_reactor(controller_host, enabled) + + for host_ref, hostname in secondary_hosts: + util.SMlog('{} drbd-reactor on host {}...'.format( + action_name, hostname + )) + self._update_drbd_reactor(host_ref, enabled) + + if not enabled and controller_host: + util.SMlog('{} drbd-reactor on controller host `{}`...'.format( + action_name, controller_node_name + )) + # If enabled is false, we disable the drbd-reactor service of + # the controller host last. Why? Otherwise the linstor-controller + # of other nodes can be started, and we don't want that. + self._update_drbd_reactor(controller_host, enabled) # -------------------------------------------------------------------------- # Metadata. @@ -653,7 +938,7 @@ def _synchronize_metadata_and_xapi(self): # Now update the VDI information in the metadata if required. xenapi = self.session.xenapi - volumes_metadata = self._linstor.volumes_with_metadata + volumes_metadata = self._linstor.get_volumes_with_metadata() for vdi_uuid, volume_metadata in volumes_metadata.items(): try: vdi_ref = xenapi.VDI.get_by_uuid(vdi_uuid) @@ -708,36 +993,43 @@ def _update_stats(self, virt_alloc_delta): # Update size attributes of the SR parent class. self.virtual_allocation = valloc + virt_alloc_delta - # Physical size contains the total physical size. - # i.e. the sum of the sizes of all devices on all hosts, not the AVG. self._update_physical_size() # Notify SR parent class. self._db_update() def _update_physical_size(self): - # Physical size contains the total physical size. - # i.e. the sum of the sizes of all devices on all hosts, not the AVG. - self.physical_size = self._linstor.physical_size - - # `self._linstor.physical_free_size` contains the total physical free - # memory. If Thin provisioning is used we can't use it, we must use - # LINSTOR volume size to gives a good idea of the required - # usable memory to the users. - self.physical_utilisation = self._linstor.total_allocated_volume_size + # We use the size of the smallest disk, this is an approximation that + # ensures the displayed physical size is reachable by the user. + (min_physical_size, pool_count) = self._linstor.get_min_physical_size() + self.physical_size = min_physical_size * pool_count / \ + self._linstor.redundancy - # If Thick provisioning is used, we can use this line instead: - # self.physical_utilisation = \ - # self.physical_size - self._linstor.physical_free_size + self.physical_utilisation = self._linstor.allocated_volume_size # -------------------------------------------------------------------------- # VDIs. # -------------------------------------------------------------------------- def _load_vdis(self): - if self.vdis: + if self._vdis_loaded: return + assert self._is_master + + # We use a cache to avoid repeated JSON parsing. + # The performance gain is not big but we can still + # enjoy it with a few lines. + self._create_linstor_cache() + self._load_vdis_ex() + self._destroy_linstor_cache() + + # We must mark VDIs as loaded only if the load is a success. + self._vdis_loaded = True + + self._undo_all_journal_transactions() + + def _load_vdis_ex(self): # 1. Get existing VDIs in XAPI. xenapi = self.session.xenapi xapi_vdi_uuids = set() @@ -745,8 +1037,8 @@ def _load_vdis(self): xapi_vdi_uuids.add(xenapi.VDI.get_uuid(vdi)) # 2. Get volumes info. - all_volume_info = self._linstor.volumes_with_info - volumes_metadata = self._linstor.volumes_with_metadata + all_volume_info = self._all_volume_info_cache + volumes_metadata = self._all_volume_metadata_cache # 3. Get CBT vdis. # See: https://support.citrix.com/article/CTX230619 @@ -758,7 +1050,8 @@ def _load_vdis(self): introduce = False - if self.cmd == 'sr_scan': + # Try to introduce VDIs only during scan/attach. + if self.cmd == 'sr_scan' or self.cmd == 'sr_attach': has_clone_entries = list(self._journaler.get_all( LinstorJournaler.CLONE ).items()) @@ -782,6 +1075,9 @@ def _load_vdis(self): if not introduce: continue + if vdi_uuid.startswith('DELETED_'): + continue + volume_metadata = volumes_metadata.get(vdi_uuid) if not volume_metadata: util.SMlog( @@ -836,10 +1132,10 @@ def _load_vdis(self): util.SMlog( 'Introducing VDI {} '.format(vdi_uuid) + - ' (name={}, virtual_size={}, physical_size={})'.format( + ' (name={}, virtual_size={}, allocated_size={})'.format( name_label, volume_info.virtual_size, - volume_info.physical_size + volume_info.allocated_size ) ) @@ -857,7 +1153,7 @@ def _load_vdis(self): sm_config, managed, str(volume_info.virtual_size), - str(volume_info.physical_size) + str(volume_info.allocated_size) ) is_a_snapshot = volume_metadata.get(IS_A_SNAPSHOT_TAG) @@ -881,9 +1177,11 @@ def _load_vdis(self): vdi = self.vdi(vdi_uuid) self.vdis[vdi_uuid] = vdi - if vdi.vdi_type == vhdutil.VDI_TYPE_VHD: + if USE_KEY_HASH and vdi.vdi_type == vhdutil.VDI_TYPE_VHD: + # TODO: Replace pylint comment with this feature when possible: + # https://github.com/PyCQA/pylint/pull/2926 vdi.sm_config_override['key_hash'] = \ - self._vhdutil.get_key_hash(vdi_uuid) + self._vhdutil.get_key_hash(vdi_uuid) # pylint: disable = E1120 # 4.c. Update CBT status of disks either just added # or already in XAPI. @@ -940,7 +1238,7 @@ def _load_vdis(self): else: geneology[vdi.parent] = [vdi_uuid] if not vdi.hidden: - self.virtual_allocation += vdi.utilisation + self.virtual_allocation += vdi.size # 9. Remove all hidden leaf nodes to avoid introducing records that # will be GC'ed. @@ -1014,13 +1312,12 @@ def _handle_interrupted_inflate(self, vdi_uuid, old_size): util.SMlog('Cannot deflate missing VDI {}'.format(vdi_uuid)) return - current_size = self._linstor.get_volume_info(self.uuid).virtual_size - util.zeroOut( - vdi.path, - current_size - vhdutil.VHD_FOOTER_SIZE, - vhdutil.VHD_FOOTER_SIZE - ) - deflate(vdi_uuid, vdi.path, old_size, current_size) + assert not self._all_volume_info_cache + volume_info = self._linstor.get_volume_info(vdi_uuid) + + current_size = volume_info.virtual_size + assert current_size > 0 + self._vhdutil.force_deflate(vdi.path, old_size, current_size, zeroize=True) def _handle_interrupted_clone( self, vdi_uuid, clone_info, force_undo=False @@ -1033,7 +1330,7 @@ def _handle_interrupted_clone( base_uuid, snap_uuid = clone_info.split('_') # Use LINSTOR data because new VDIs may not be in the XAPI. - volume_names = self._linstor.volumes_with_name + volume_names = self._linstor.get_volumes_with_name() # Check if we don't have a base VDI. (If clone failed at startup.) if base_uuid not in volume_names: @@ -1089,7 +1386,7 @@ def _undo_clone(self, volume_names, vdi_uuid, base_uuid, snap_uuid): if base_type == vhdutil.VDI_TYPE_VHD: vhd_info = self._vhdutil.get_vhd_info(base_uuid, False) if vhd_info.hidden: - vhdutil.setHidden(base_path, False) + self._vhdutil.set_hidden(base_path, False) elif base_type == vhdutil.VDI_TYPE_RAW and \ base_metadata.get(HIDDEN_TAG): self._linstor.update_volume_metadata( @@ -1099,10 +1396,6 @@ def _undo_clone(self, volume_names, vdi_uuid, base_uuid, snap_uuid): # Remove the child nodes. if snap_uuid and snap_uuid in volume_names: util.SMlog('Destroying snap {}...'.format(snap_uuid)) - snap_metadata = self._linstor.get_volume_metadata(snap_uuid) - - if snap_metadata.get(VDI_TYPE_TAG) != vhdutil.VDI_TYPE_VHD: - raise util.SMException('Clone {} not VHD'.format(snap_uuid)) try: self._linstor.destroy_volume(snap_uuid) @@ -1136,9 +1429,9 @@ def _undo_clone(self, volume_names, vdi_uuid, base_uuid, snap_uuid): # Inflate to the right size. if base_type == vhdutil.VDI_TYPE_VHD: vdi = self.vdi(vdi_uuid) - volume_size = compute_volume_size(vdi.size, vdi.vdi_type) - inflate( - self._journaler, self._linstor, vdi_uuid, vdi.path, + volume_size = LinstorVhdUtil.compute_volume_size(vdi.size, vdi.vdi_type) + self._vhdutil.inflate( + self._journaler, vdi_uuid, vdi.path, volume_size, vdi.capacity ) self.vdis[vdi_uuid] = vdi @@ -1150,10 +1443,64 @@ def _undo_clone(self, volume_names, vdi_uuid, base_uuid, snap_uuid): util.SMlog('*** INTERRUPTED CLONE OP: rollback success') + # -------------------------------------------------------------------------- + # Cache. + # -------------------------------------------------------------------------- + + def _create_linstor_cache(self): + # TODO: use a nonlocal with python3. + class context: + reconnect = False + + def create_cache(): + try: + if context.reconnect: + self._reconnect() + return self._linstor.get_volumes_with_info() + except Exception as e: + context.reconnect = True + raise e + + self._all_volume_metadata_cache = \ + self._linstor.get_volumes_with_metadata() + self._all_volume_info_cache = util.retry( + create_cache, + maxretry=10, + period=3 + ) + + def _destroy_linstor_cache(self): + self._all_volume_info_cache = None + self._all_volume_metadata_cache = None + # -------------------------------------------------------------------------- # Misc. # -------------------------------------------------------------------------- + def _reconnect(self): + controller_uri = get_controller_uri() + + self._journaler = LinstorJournaler( + controller_uri, self._group_name, logger=util.SMlog + ) + + # Try to open SR if exists. + # We can repair only if we are on the master AND if + # we are trying to execute an exclusive operation. + # Otherwise we could try to delete a VDI being created or + # during a snapshot. An exclusive op is the guarantee that + # the SR is locked. + self._linstor = LinstorVolumeManager( + controller_uri, + self._group_name, + repair=( + self._is_master and + self.srcmd.cmd in self.ops_exclusive + ), + logger=util.SMlog + ) + self._vhdutil = LinstorVhdUtil(self.session, self._linstor) + def _ensure_space_available(self, amount_needed): space_available = self._linstor.max_volume_size_allowed if (space_available < amount_needed): @@ -1199,8 +1546,6 @@ class LinstorVDI(VDI.VDI): TYPE_RAW = 'raw' TYPE_VHD = 'vhd' - MAX_SIZE = 2 * 1024 * 1024 * 1024 * 1024 # Max VHD size. - # Metadata size given to the "S" param of vhd-util create. # "-S size (MB) for metadata preallocation". # Increase the performance when resize is called. @@ -1233,7 +1578,7 @@ def raise_bad_load(e): if ( self.sr.srcmd.cmd == 'vdi_attach_from_config' or self.sr.srcmd.cmd == 'vdi_detach_from_config' - ) and self.sr.srcmd.params['vdi_uuid'] == self.uuid: + ): self.vdi_type = vhdutil.VDI_TYPE_RAW self.path = self.sr.srcmd.params['vdi_path'] else: @@ -1297,11 +1642,11 @@ def create(self, sr_uuid, vdi_uuid, size): # 2. Compute size and check space available. size = vhdutil.validate_and_round_vhd_size(int(size)) - util.SMlog('LinstorVDI.create: type={}, size={}'.format( - self.vdi_type, size - )) - - volume_size = compute_volume_size(size, self.vdi_type) + volume_size = LinstorVhdUtil.compute_volume_size(size, self.vdi_type) + util.SMlog( + 'LinstorVDI.create: type={}, vhd-size={}, volume-size={}' + .format(self.vdi_type, size, volume_size) + ) self.sr._ensure_space_available(volume_size) # 3. Set sm_config attribute of VDI parent class. @@ -1310,8 +1655,15 @@ def create(self, sr_uuid, vdi_uuid, size): # 4. Create! failed = False try: + volume_name = None + if self.ty == 'ha_statefile': + volume_name = HA_VOLUME_NAME + elif self.ty == 'redo_log': + volume_name = REDO_LOG_VOLUME_NAME + self._linstor.create_volume( - self.uuid, volume_size, persistent=False + self.uuid, volume_size, persistent=False, + volume_name=volume_name ) volume_info = self._linstor.get_volume_info(self.uuid) @@ -1320,16 +1672,16 @@ def create(self, sr_uuid, vdi_uuid, size): if self.vdi_type == vhdutil.VDI_TYPE_RAW: self.size = volume_info.virtual_size else: - vhdutil.create( + self.sr._vhdutil.create( self.path, size, False, self.MAX_METADATA_VIRT_SIZE ) self.size = self.sr._vhdutil.get_size_virt(self.uuid) if self._key_hash: - vhdutil.setKey(self.path, self._key_hash) + self.sr._vhdutil.set_key(self.path, self._key_hash) # Because vhdutil commands modify the volume data, - # we must retrieve a new time the utilisation size. + # we must retrieve a new time the utilization size. volume_info = self._linstor.get_volume_info(self.uuid) volume_metadata = { @@ -1344,6 +1696,13 @@ def create(self, sr_uuid, vdi_uuid, size): METADATA_OF_POOL_TAG: '' } self._linstor.set_volume_metadata(self.uuid, volume_metadata) + + # Set the open timeout to 1min to reduce CPU usage + # in http-disk-server when a secondary server tries to open + # an already opened volume. + if self.ty == 'ha_statefile' or self.ty == 'redo_log': + self._linstor.set_auto_promote_timeout(self.uuid, 600) + self._linstor.mark_volume_as_persistent(self.uuid) except util.CommandException as e: failed = True @@ -1364,11 +1723,11 @@ def create(self, sr_uuid, vdi_uuid, size): '{}'.format(e) ) - self.utilisation = volume_info.physical_size + self.utilisation = volume_info.allocated_size self.sm_config['vdi_type'] = self.vdi_type self.ref = self._db_introduce() - self.sr._update_stats(volume_info.virtual_size) + self.sr._update_stats(self.size) return VDI.VDI.get_params(self) @@ -1401,20 +1760,28 @@ def delete(self, sr_uuid, vdi_uuid, data_only=False): 'Failed to remove the volume (maybe is leaf coalescing) ' 'for {} err: {}'.format(self.uuid, e) ) - raise xs_errors.XenError('VDIDelete', opterr=str(e)) + + try: + raise xs_errors.XenError('VDIDelete', opterr=str(e)) + except LinstorVolumeManagerError as e: + if e.code != LinstorVolumeManagerError.ERR_VOLUME_DESTROY: + raise xs_errors.XenError('VDIDelete', opterr=str(e)) + + return if self.uuid in self.sr.vdis: del self.sr.vdis[self.uuid] # TODO: Check size after delete. - self.sr._update_stats(-self.capacity) + self.sr._update_stats(-self.size) self.sr._kick_gc() return super(LinstorVDI, self).delete(sr_uuid, vdi_uuid, data_only) def attach(self, sr_uuid, vdi_uuid): util.SMlog('LinstorVDI.attach for {}'.format(self.uuid)) + attach_from_config = self.sr.srcmd.cmd == 'vdi_attach_from_config' if ( - self.sr.srcmd.cmd != 'vdi_attach_from_config' or + not attach_from_config or self.sr.srcmd.params['vdi_uuid'] != self.uuid ) and self.sr._journaler.has_entries(self.uuid): raise xs_errors.XenError( @@ -1423,56 +1790,62 @@ def attach(self, sr_uuid, vdi_uuid): 'scan SR first to trigger auto-repair' ) - writable = 'args' not in self.sr.srcmd.params or \ - self.sr.srcmd.params['args'][0] == 'true' - - # We need to inflate the volume if we don't have enough place - # to mount the VHD image. I.e. the volume capacity must be greater - # than the VHD size + bitmap size. - need_inflate = True - if self.vdi_type == vhdutil.VDI_TYPE_RAW or not writable or \ - self.capacity >= compute_volume_size(self.size, self.vdi_type): - need_inflate = False + if not attach_from_config or self.sr._is_master: + writable = 'args' not in self.sr.srcmd.params or \ + self.sr.srcmd.params['args'][0] == 'true' - if need_inflate: - try: - self._prepare_thin(True) - except Exception as e: - raise xs_errors.XenError( - 'VDIUnavailable', - opterr='Failed to attach VDI during "prepare thin": {}' - .format(e) - ) + # We need to inflate the volume if we don't have enough place + # to mount the VHD image. I.e. the volume capacity must be greater + # than the VHD size + bitmap size. + need_inflate = True + if ( + self.vdi_type == vhdutil.VDI_TYPE_RAW or + not writable or + self.capacity >= LinstorVhdUtil.compute_volume_size(self.size, self.vdi_type) + ): + need_inflate = False - if not util.pathexists(self.path): - raise xs_errors.XenError( - 'VDIUnavailable', opterr='Could not find: {}'.format(self.path) - ) + if need_inflate: + try: + self._prepare_thin(True) + except Exception as e: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Failed to attach VDI during "prepare thin": {}' + .format(e) + ) if not hasattr(self, 'xenstore_data'): self.xenstore_data = {} + self.xenstore_data['storage-type'] = LinstorSR.DRIVER_TYPE - # TODO: Is it useful? - self.xenstore_data.update(scsiutil.update_XS_SCSIdata( - self.uuid, scsiutil.gen_synthetic_page_data(self.uuid) - )) + if ( + USE_HTTP_NBD_SERVERS and + attach_from_config and + self.path.startswith('/dev/http-nbd/') + ): + return self._attach_using_http_nbd() - self.xenstore_data['storage-type'] = LinstorSR.DRIVER_TYPE + # Ensure we have a path... + self._create_chain_paths(self.uuid) self.attached = True - return VDI.VDI.attach(self, self.sr.uuid, self.uuid) def detach(self, sr_uuid, vdi_uuid): util.SMlog('LinstorVDI.detach for {}'.format(self.uuid)) + detach_from_config = self.sr.srcmd.cmd == 'vdi_detach_from_config' self.attached = False + if detach_from_config and self.path.startswith('/dev/http-nbd/'): + return self._detach_using_http_nbd() + if self.vdi_type == vhdutil.VDI_TYPE_RAW: return # The VDI is already deflated if the VHD image size + metadata is # equal to the LINSTOR volume size. - volume_size = compute_volume_size(self.size, self.vdi_type) + volume_size = LinstorVhdUtil.compute_volume_size(self.size, self.vdi_type) already_deflated = self.capacity <= volume_size if already_deflated: @@ -1501,11 +1874,45 @@ def detach(self, sr_uuid, vdi_uuid): .format(e) ) + # We remove only on slaves because the volume can be used by the GC. + if self.sr._is_master: + return + + while vdi_uuid: + try: + path = self._linstor.build_device_path(self._linstor.get_volume_name(vdi_uuid)) + parent_vdi_uuid = self.sr._vhdutil.get_vhd_info(vdi_uuid).parentUuid + except Exception: + break + + if util.pathexists(path): + try: + self._linstor.remove_volume_if_diskless(vdi_uuid) + except Exception as e: + # Ensure we can always detach properly. + # I don't want to corrupt the XAPI info. + util.SMlog('Failed to clean VDI {} during detach: {}'.format(vdi_uuid, e)) + vdi_uuid = parent_vdi_uuid + def resize(self, sr_uuid, vdi_uuid, size): util.SMlog('LinstorVDI.resize for {}'.format(self.uuid)) + if not self.sr._is_master: + raise xs_errors.XenError( + 'VDISize', + opterr='resize on slave not allowed' + ) + if self.hidden: raise xs_errors.XenError('VDIUnavailable', opterr='hidden VDI') + # Compute the virtual VHD and DRBD volume size. + size = vhdutil.validate_and_round_vhd_size(int(size)) + volume_size = LinstorVhdUtil.compute_volume_size(size, self.vdi_type) + util.SMlog( + 'LinstorVDI.resize: type={}, vhd-size={}, volume-size={}' + .format(self.vdi_type, size, volume_size) + ) + if size < self.size: util.SMlog( 'vdi_resize: shrinking not supported: ' @@ -1513,36 +1920,34 @@ def resize(self, sr_uuid, vdi_uuid, size): ) raise xs_errors.XenError('VDISize', opterr='shrinking not allowed') - # Compute the virtual VHD size. - size = vhdutil.validate_and_round_vhd_size(int(size)) - if size == self.size: return VDI.VDI.get_params(self) - # Compute the LINSTOR volume size. - new_volume_size = compute_volume_size(size, self.vdi_type) if self.vdi_type == vhdutil.VDI_TYPE_RAW: old_volume_size = self.size + new_volume_size = LinstorVolumeManager.round_up_volume_size(size) else: - old_volume_size = self.capacity + old_volume_size = self.utilisation if self.sr._provisioning == 'thin': # VDI is currently deflated, so keep it deflated. new_volume_size = old_volume_size + else: + new_volume_size = LinstorVhdUtil.compute_volume_size(size, self.vdi_type) assert new_volume_size >= old_volume_size space_needed = new_volume_size - old_volume_size self.sr._ensure_space_available(space_needed) - old_capacity = self.capacity + old_size = self.size if self.vdi_type == vhdutil.VDI_TYPE_RAW: self._linstor.resize(self.uuid, new_volume_size) else: if new_volume_size != old_volume_size: - inflate( - self.sr._journaler, self._linstor, self.uuid, self.path, + self.sr._vhdutil.inflate( + self.sr._journaler, self.uuid, self.path, new_volume_size, old_volume_size ) - vhdutil.setSizeVirtFast(self.path, size) + self.sr._vhdutil.set_size_virt_fast(self.path, size) # Reload size attributes. self._load_this() @@ -1552,7 +1957,7 @@ def resize(self, sr_uuid, vdi_uuid, size): self.session.xenapi.VDI.set_physical_utilisation( vdi_ref, str(self.utilisation) ) - self.sr._update_stats(self.capacity - old_capacity) + self.sr._update_stats(self.size - old_size) return VDI.VDI.get_params(self) def clone(self, sr_uuid, vdi_uuid): @@ -1574,8 +1979,8 @@ def compose(self, sr_uuid, vdi1, vdi2): if not blktap2.VDI.tap_pause(self.session, self.sr.uuid, self.uuid): raise util.SMException('Failed to pause VDI {}'.format(self.uuid)) try: - vhdutil.setParent(self.path, parent_path, False) - vhdutil.setHidden(parent_path) + self.sr._vhdutil.set_parent(self.path, parent_path, False) + self.sr._vhdutil.set_hidden(parent_path) self.sr.session.xenapi.VDI.set_managed( self.sr.srcmd.params['args'][0], False ) @@ -1598,25 +2003,40 @@ def generate_config(self, sr_uuid, vdi_uuid): util.SMlog('LinstorVDI.generate_config for {}'.format(self.uuid)) - if not self.path or not util.pathexists(self.path): - available = False - # Try to refresh symlink path... - try: - self.path = self._linstor.get_device_path(vdi_uuid) - available = util.pathexists(self.path) - except Exception: - pass - if not available: - raise xs_errors.XenError('VDIUnavailable') - resp = {} resp['device_config'] = self.sr.dconf resp['sr_uuid'] = sr_uuid resp['vdi_uuid'] = self.uuid resp['sr_sm_config'] = self.sr.sm_config - resp['vdi_path'] = self.path resp['command'] = 'vdi_attach_from_config' + # By default, we generate a normal config. + # But if the disk is persistent, we must use a HTTP/NBD + # server to ensure we can always write or read data. + # Why? DRBD is unsafe when used with more than 4 hosts: + # We are limited to use 1 diskless and 3 full. + # We can't increase this limitation, so we use a NBD/HTTP device + # instead. + volume_name = self._linstor.get_volume_name(self.uuid) + if not USE_HTTP_NBD_SERVERS or volume_name not in [ + HA_VOLUME_NAME, REDO_LOG_VOLUME_NAME + ]: + if not self.path or not util.pathexists(self.path): + available = False + # Try to refresh symlink path... + try: + self.path = self._linstor.get_device_path(vdi_uuid) + available = util.pathexists(self.path) + except Exception: + pass + if not available: + raise xs_errors.XenError('VDIUnavailable') + + resp['vdi_path'] = self.path + else: + # Axiom: DRBD device is present on at least one host. + resp['vdi_path'] = '/dev/http-nbd/' + volume_name + config = xmlrpc.client.dumps(tuple([resp]), 'vdi_attach_from_config') return xmlrpc.client.dumps((config,), "", True) @@ -1652,19 +2072,28 @@ def reset_leaf(self, sr_uuid, vdi_uuid): .format(self.uuid) ) - vhdutil.killData(self.path) + self.sr._vhdutil.kill_data(self.path) def _load_this(self): - volume_metadata = self._linstor.get_volume_metadata(self.uuid) - volume_info = self._linstor.get_volume_info(self.uuid) + volume_metadata = None + if self.sr._all_volume_metadata_cache: + volume_metadata = self.sr._all_volume_metadata_cache.get(self.uuid) + if volume_metadata is None: + volume_metadata = self._linstor.get_volume_metadata(self.uuid) + + volume_info = None + if self.sr._all_volume_info_cache: + volume_info = self.sr._all_volume_info_cache.get(self.uuid) + if volume_info is None: + volume_info = self._linstor.get_volume_info(self.uuid) - # Contains the physical size used on all disks. + # Contains the max physical size used on a disk. # When LINSTOR LVM driver is used, the size should be similar to # virtual size (i.e. the LINSTOR max volume size). # When LINSTOR Thin LVM driver is used, the used physical size should # be lower than virtual size at creation. # The physical size increases after each write in a new block. - self.utilisation = volume_info.physical_size + self.utilisation = volume_info.allocated_size self.capacity = volume_info.virtual_size if self.vdi_type == vhdutil.VDI_TYPE_RAW: @@ -1691,7 +2120,7 @@ def _mark_hidden(self, hidden=True): return if self.vdi_type == vhdutil.VDI_TYPE_VHD: - vhdutil.setHidden(self.path, hidden) + self.sr._vhdutil.set_hidden(self.path, hidden) else: self._linstor.update_volume_metadata(self.uuid, { HIDDEN_TAG: hidden @@ -1739,25 +2168,19 @@ def _prepare_thin(self, attach): else: fn = 'attach' if attach else 'detach' - # We assume the first pool is always the one currently in use. - pools = self.session.xenapi.pool.get_all() - master = self.session.xenapi.pool.get_master(pools[0]) + master = util.get_master_ref(self.session) + args = { 'groupName': self.sr._group_name, 'srUuid': self.sr.uuid, 'vdiUuid': self.uuid } - ret = self.session.xenapi.host.call_plugin( - master, self.sr.MANAGER_PLUGIN, fn, args - ) - util.SMlog( - 'call-plugin ({} with {}) returned: {}'.format(fn, args, ret) - ) - if ret == 'False': - raise xs_errors.XenError( - 'VDIUnavailable', - opterr='Plugin {} failed'.format(self.sr.MANAGER_PLUGIN) - ) + + try: + self.sr._exec_manager_command(master, fn, args, 'VDIUnavailable') + except Exception: + if fn != 'detach': + raise # Reload size attrs after inflate or deflate! self._load_this() @@ -1807,9 +2230,7 @@ def _determine_type_and_path(self): 'VDIUnavailable', opterr='failed to get vdi_type in metadata' ) - self._update_device_name( - self._linstor.get_volume_name(self.uuid) - ) + self._update_device_name(self._linstor.get_volume_name(self.uuid)) def _update_device_name(self, device_name): self._device_name = device_name @@ -1832,7 +2253,7 @@ def _create_snapshot(self, snap_uuid, snap_of_uuid=None): # 2. Write the snapshot content. is_raw = (self.vdi_type == vhdutil.VDI_TYPE_RAW) - vhdutil.snapshot( + self.sr._vhdutil.snapshot( snap_path, self.path, is_raw, self.MAX_METADATA_VIRT_SIZE ) @@ -1862,7 +2283,7 @@ def _create_snapshot(self, snap_uuid, snap_of_uuid=None): volume_info = self._linstor.get_volume_info(snap_uuid) snap_vdi.size = self.sr._vhdutil.get_size_virt(snap_uuid) - snap_vdi.utilisation = volume_info.physical_size + snap_vdi.utilisation = volume_info.allocated_size # 6. Update sm config. snap_vdi.sm_config = {} @@ -1932,6 +2353,9 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None): elif depth >= vhdutil.MAX_CHAIN_SIZE: raise xs_errors.XenError('SnapshotChainTooLong') + # Ensure we have a valid path if we don't have a local diskful. + self._create_chain_paths(self.uuid) + volume_path = self.path if not util.pathexists(volume_path): raise xs_errors.XenError( @@ -2057,7 +2481,7 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None): raise if snap_type != VDI.SNAPSHOT_INTERNAL: - self.sr._update_stats(self.capacity) + self.sr._update_stats(self.size) # 10. Return info on the new user-visible leaf VDI. ret_vdi = snap_vdi @@ -2070,7 +2494,7 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None): self.session.xenapi.VDI.set_sm_config( vdi_ref, active_vdi.sm_config ) - except Exception as e: + except Exception: util.logException('Failed to snapshot!') try: self.sr._handle_interrupted_clone( @@ -2088,10 +2512,349 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None): return ret_vdi.get_params() + @staticmethod + def _start_persistent_http_server(volume_name): + pid_path = None + http_server = None + + try: + if volume_name == HA_VOLUME_NAME: + port = '8076' + else: + port = '8077' + + try: + # Use a timeout call because XAPI may be unusable on startup + # or if the host has been ejected. So in this case the call can + # block indefinitely. + session = util.timeout_call(5, util.get_localAPI_session) + host_ip = util.get_this_host_address(session) + except: + # Fallback using the XHA file if session not available. + host_ip, _ = get_ips_from_xha_config_file() + if not host_ip: + raise Exception( + 'Cannot start persistent HTTP server: no XAPI session, nor XHA config file' + ) + + arguments = [ + 'http-disk-server', + '--disk', + '/dev/drbd/by-res/{}/0'.format(volume_name), + '--ip', + host_ip, + '--port', + port + ] + + util.SMlog('Starting {} on port {}...'.format(arguments[0], port)) + http_server = subprocess.Popen( + [FORK_LOG_DAEMON] + arguments, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + # Ensure we use another group id to kill this process without + # touch the current one. + preexec_fn=os.setsid + ) + + pid_path = '/run/http-server-{}.pid'.format(volume_name) + with open(pid_path, 'w') as pid_file: + pid_file.write(str(http_server.pid)) + + reg_server_ready = re.compile("Server ready!$") + def is_ready(): + while http_server.poll() is None: + line = http_server.stdout.readline() + if reg_server_ready.search(line): + return True + return False + try: + if not util.timeout_call(10, is_ready): + raise Exception('Failed to wait HTTP server startup, bad output') + except util.TimeoutException: + raise Exception('Failed to wait for HTTP server startup during given delay') + except Exception as e: + if pid_path: + try: + os.remove(pid_path) + except Exception: + pass + + if http_server: + # Kill process and children in this case... + try: + os.killpg(os.getpgid(http_server.pid), signal.SIGTERM) + except: + pass + + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Failed to start http-server: {}'.format(e) + ) + + def _start_persistent_nbd_server(self, volume_name): + pid_path = None + nbd_path = None + nbd_server = None + + try: + # We use a precomputed device size. + # So if the XAPI is modified, we must update these values! + if volume_name == HA_VOLUME_NAME: + # See: https://github.com/xapi-project/xen-api/blob/703479fa448a8d7141954bb6e8964d8e25c4ac2e/ocaml/xapi/xha_statefile.ml#L32-L37 + port = '8076' + device_size = 4 * 1024 * 1024 + else: + # See: https://github.com/xapi-project/xen-api/blob/703479fa448a8d7141954bb6e8964d8e25c4ac2e/ocaml/database/redo_log.ml#L41-L44 + port = '8077' + device_size = 256 * 1024 * 1024 + + try: + session = util.timeout_call(5, util.get_localAPI_session) + ips = util.get_host_addresses(session) + except Exception as e: + _, ips = get_ips_from_xha_config_file() + if not ips: + raise Exception( + 'Cannot start persistent NBD server: no XAPI session, nor XHA config file ({})'.format(e) + ) + ips = ips.values() + + arguments = [ + 'nbd-http-server', + '--socket-path', + '/run/{}.socket'.format(volume_name), + '--nbd-name', + volume_name, + '--urls', + ','.join(map(lambda ip: 'http://' + ip + ':' + port, ips)), + '--device-size', + str(device_size) + ] + + util.SMlog('Starting {} using port {}...'.format(arguments[0], port)) + nbd_server = subprocess.Popen( + [FORK_LOG_DAEMON] + arguments, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + # Ensure we use another group id to kill this process without + # touch the current one. + preexec_fn=os.setsid + ) + + pid_path = '/run/nbd-server-{}.pid'.format(volume_name) + with open(pid_path, 'w') as pid_file: + pid_file.write(str(nbd_server.pid)) + + reg_nbd_path = re.compile("NBD `(/dev/nbd[0-9]+)` is now attached.$") + def get_nbd_path(): + while nbd_server.poll() is None: + line = nbd_server.stdout.readline() + match = reg_nbd_path.search(line) + if match: + return match.group(1) + # Use a timeout to never block the smapi if there is a problem. + try: + nbd_path = util.timeout_call(10, get_nbd_path) + if nbd_path is None: + raise Exception('Empty NBD path (NBD server is probably dead)') + except util.TimeoutException: + raise Exception('Unable to read NBD path') + + util.SMlog('Create symlink: {} -> {}'.format(self.path, nbd_path)) + os.symlink(nbd_path, self.path) + except Exception as e: + if pid_path: + try: + os.remove(pid_path) + except Exception: + pass + + if nbd_path: + try: + os.remove(nbd_path) + except Exception: + pass + + if nbd_server: + # Kill process and children in this case... + try: + os.killpg(os.getpgid(nbd_server.pid), signal.SIGTERM) + except: + pass + + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Failed to start nbd-server: {}'.format(e) + ) + + @classmethod + def _kill_persistent_server(self, type, volume_name, sig): + try: + path = '/run/{}-server-{}.pid'.format(type, volume_name) + if not os.path.exists(path): + return + + pid = None + with open(path, 'r') as pid_file: + try: + pid = int(pid_file.read()) + except Exception: + pass + + if pid is not None and util.check_pid_exists(pid): + util.SMlog('Kill {} server {} (pid={})'.format(type, path, pid)) + try: + os.killpg(os.getpgid(pid), sig) + except Exception as e: + util.SMlog('Failed to kill {} server: {}'.format(type, e)) + + os.remove(path) + except: + pass + + @classmethod + def _kill_persistent_http_server(self, volume_name, sig=signal.SIGTERM): + return self._kill_persistent_server('nbd', volume_name, sig) + + @classmethod + def _kill_persistent_nbd_server(self, volume_name, sig=signal.SIGTERM): + return self._kill_persistent_server('http', volume_name, sig) + + def _check_http_nbd_volume_name(self): + volume_name = self.path[14:] + if volume_name not in [ + HA_VOLUME_NAME, REDO_LOG_VOLUME_NAME + ]: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Unsupported path: {}'.format(self.path) + ) + return volume_name + + def _attach_using_http_nbd(self): + volume_name = self._check_http_nbd_volume_name() + + # Ensure there is no NBD and HTTP server running. + self._kill_persistent_nbd_server(volume_name) + self._kill_persistent_http_server(volume_name) + + # 0. Fetch drbd path. + must_get_device_path = True + if not self.sr._is_master: + # We are on a slave, we must try to find a diskful locally. + try: + volume_info = self._linstor.get_volume_info(self.uuid) + except Exception as e: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Cannot get volume info of {}: {}' + .format(self.uuid, e) + ) + + hostname = socket.gethostname() + must_get_device_path = hostname in volume_info.diskful + + drbd_path = None + if must_get_device_path or self.sr._is_master: + # If we are master, we must ensure we have a diskless + # or diskful available to init HA. + # It also avoid this error in xensource.log + # (/usr/libexec/xapi/cluster-stack/xhad/ha_set_pool_state): + # init exited with code 8 [stdout = ''; stderr = 'SF: failed to write in State-File \x10 (fd 4208696). (sys 28)\x0A'] + # init returned MTC_EXIT_CAN_NOT_ACCESS_STATEFILE (State-File is inaccessible) + available = False + try: + drbd_path = self._linstor.get_device_path(self.uuid) + available = util.pathexists(drbd_path) + except Exception: + pass + + if not available: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Cannot get device path of {}'.format(self.uuid) + ) + + # 1. Prepare http-nbd folder. + try: + if not os.path.exists('/dev/http-nbd/'): + os.makedirs('/dev/http-nbd/') + elif os.path.islink(self.path): + os.remove(self.path) + except OSError as e: + if e.errno != errno.EEXIST: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Cannot prepare http-nbd: {}'.format(e) + ) + + # 2. Start HTTP service if we have a diskful or if we are master. + http_service = None + if drbd_path: + assert(drbd_path in ( + '/dev/drbd/by-res/{}/0'.format(HA_VOLUME_NAME), + '/dev/drbd/by-res/{}/0'.format(REDO_LOG_VOLUME_NAME) + )) + self._start_persistent_http_server(volume_name) + + # 3. Start NBD server in all cases. + try: + self._start_persistent_nbd_server(volume_name) + except Exception as e: + if drbd_path: + self._kill_persistent_http_server(volume_name) + raise + + self.attached = True + return VDI.VDI.attach(self, self.sr.uuid, self.uuid) + + def _detach_using_http_nbd(self): + volume_name = self._check_http_nbd_volume_name() + self._kill_persistent_nbd_server(volume_name) + self._kill_persistent_http_server(volume_name) + + def _create_chain_paths(self, vdi_uuid): + # OPTIMIZE: Add a limit_to_first_allocated_block param to limit vhdutil calls. + # Useful for the snapshot code algorithm. + + while vdi_uuid: + path = self._linstor.get_device_path(vdi_uuid) + if not util.pathexists(path): + raise xs_errors.XenError( + 'VDIUnavailable', opterr='Could not find: {}'.format(path) + ) + + # Diskless path can be created on the fly, ensure we can open it. + def check_volume_usable(): + while True: + try: + with open(path, 'r+'): + pass + except IOError as e: + if e.errno == errno.ENODATA: + time.sleep(2) + continue + if e.errno == errno.EROFS: + util.SMlog('Volume not attachable because RO. Openers: {}'.format( + self.sr._linstor.get_volume_openers(vdi_uuid) + )) + raise + break + util.retry(check_volume_usable, 15, 2) + + vdi_uuid = self.sr._vhdutil.get_vhd_info(vdi_uuid).parentUuid + # ------------------------------------------------------------------------------ if __name__ == '__main__': - SRCommand.run(LinstorSR, DRIVER_INFO) + def run(): + SRCommand.run(LinstorSR, DRIVER_INFO) + + if not TRACE_PERFS: + run() + else: + util.make_profile('LinstorSR', run) else: SR.registerSR(LinstorSR) diff --git a/drivers/blktap2.py b/drivers/blktap2.py index e9887c8ed..c900f57d8 100755 --- a/drivers/blktap2.py +++ b/drivers/blktap2.py @@ -50,6 +50,12 @@ from xmlrpc.client import ServerProxy, Transport from socket import socket, AF_UNIX, SOCK_STREAM +try: + from linstorvolumemanager import log_drbd_openers + LINSTOR_AVAILABLE = True +except ImportError: + LINSTOR_AVAILABLE = False + PLUGIN_TAP_PAUSE = "tapdisk-pause" SOCKPATH = "/var/xapi/xcp-rrdd" @@ -811,7 +817,23 @@ def launch_on_tap(cls, blktap, path, _type, options): TapCtl.attach(pid, minor) try: - TapCtl.open(pid, minor, _type, path, options) + retry_open = 0 + while True: + try: + TapCtl.open(pid, minor, _type, path, options) + break + except TapCtl.CommandFailure as e: + err = ( + 'status' in e.info and e.info['status'] + ) or None + if err in (errno.EIO, errno.EROFS, errno.EAGAIN): + if retry_open < 5: + retry_open += 1 + time.sleep(1) + continue + if LINSTOR_AVAILABLE and err == errno.EROFS: + log_drbd_openers(path) + raise try: tapdisk = cls.__from_blktap(blktap) node = '/sys/dev/block/%d:%d' % (tapdisk.major(), tapdisk.minor) diff --git a/drivers/cleanup.py b/drivers/cleanup.py index b9855a431..cd354d042 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -53,8 +53,11 @@ try: from linstorjournaler import LinstorJournaler from linstorvhdutil import LinstorVhdUtil - from linstorvolumemanager \ - import LinstorVolumeManager, LinstorVolumeManagerError + from linstorvolumemanager import get_controller_uri + from linstorvolumemanager import LinstorVolumeManager + from linstorvolumemanager import LinstorVolumeManagerError + from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX + LINSTOR_AVAILABLE = True except ImportError: LINSTOR_AVAILABLE = False @@ -491,7 +494,7 @@ def set_task_status(self, status): # # VDI # -class VDI: +class VDI(object): """Object representing a VDI of a VHD-based SR""" POLL_INTERVAL = 1 @@ -781,6 +784,12 @@ def delete(self): lock.Lock.cleanupAll(self.uuid) self._clear() + def getParent(self): + return vhdutil.getParent(self.path, lambda x: x.strip()) + + def repair(self, parent): + vhdutil.repair(parent) + def __str__(self): strHidden = "" if self.hidden: @@ -898,12 +907,15 @@ def _reportCoalesceError(vdi, ce): xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) _reportCoalesceError = staticmethod(_reportCoalesceError) + def coalesce(self): + # size is returned in sectors + return vhdutil.coalesce(self.path) * 512 + def _doCoalesceVHD(vdi): try: startTime = time.time() vhdSize = vdi.getAllocatedSize() - # size is returned in sectors - coalesced_size = vhdutil.coalesce(vdi.path) * 512 + coalesced_size = vdi.coalesce() endTime = time.time() vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) except util.CommandException as ce: @@ -942,12 +954,12 @@ def _coalesceVHD(self, timeOut): # Try a repair and reraise the exception parent = "" try: - parent = vhdutil.getParent(self.path, lambda x: x.strip()) + parent = self.getParent() if not self._vdi_is_raw(parent): # Repair error is logged and ignored. Error reraised later util.SMlog('Coalesce failed on %s, attempting repair on ' \ 'parent %s' % (self.uuid, parent)) - vhdutil.repair(parent) + self.repair(parent) except Exception as e: util.SMlog('(error ignored) Failed to repair parent %s ' \ 'after failed coalesce on %s, err: %s' % @@ -1049,10 +1061,10 @@ def _setHidden(self, hidden=True): self.hidden = hidden def _increaseSizeVirt(self, size, atomic=True): - """ensure the virtual size of 'self' is at least 'size'. Note that + """ensure the virtual size of 'self' is at least 'size'. Note that resizing a VHD must always be offline and atomically: the file must not be open by anyone and no concurrent operations may take place. - Thus we use the Agent API call for performing paused atomic + Thus we use the Agent API call for performing paused atomic operations. If the caller is already in the atomic context, it must call with atomic = False""" if self.sizeVirt >= size: @@ -1506,8 +1518,6 @@ def _calcExtraSpaceForSnapshotCoalescing(self): class LinstorVDI(VDI): """Object representing a VDI in a LINSTOR SR""" - MAX_SIZE = 2 * 1024 * 1024 * 1024 * 1024 # Max VHD size. - VOLUME_LOCK_TIMEOUT = 30 def load(self, info=None): @@ -1518,11 +1528,6 @@ def load(self, info=None): self.fileName = self.sr._linstor.get_volume_name(self.uuid) self.path = self.sr._linstor.build_device_path(self.fileName) - if not util.pathexists(self.path): - raise util.SMException( - '{} of {} not found' - .format(self.fileName, self.uuid) - ) if not info: try: @@ -1535,9 +1540,62 @@ def load(self, info=None): self.parentUuid = info.parentUuid self.sizeVirt = info.sizeVirt - self._sizeVHD = info.sizePhys + self._sizeVHD = -1 + self._sizeAllocated = -1 + self.drbd_size = -1 self.hidden = info.hidden self.scanError = False + self.vdi_type = vhdutil.VDI_TYPE_VHD + + def getSizeVHD(self, fetch=False): + if self._sizeVHD < 0 or fetch: + self._sizeVHD = self.sr._vhdutil.get_size_phys(self.uuid) + return self._sizeVHD + + def getDrbdSize(self, fetch=False): + if self.drbd_size < 0 or fetch: + self.drbd_size = self.sr._vhdutil.get_drbd_size(self.uuid) + return self.drbd_size + + def getAllocatedSize(self): + if self._sizeAllocated == -1: + if not self.raw: + self._sizeAllocated = self.sr._vhdutil.get_allocated_size(self.uuid) + return self._sizeAllocated + + def inflate(self, size): + if self.raw: + return + self.sr.lock() + try: + # Ensure we use the real DRBD size and not the cached one. + # Why? Because this attribute can be changed if volume is resized by user. + self.drbd_size = self.getDrbdSize(fetch=True) + self.sr._vhdutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) + finally: + self.sr.unlock() + self.drbd_size = -1 + self._sizeVHD = -1 + self._sizeAllocated = -1 + + def deflate(self): + if self.raw: + return + self.sr.lock() + try: + # Ensure we use the real sizes and not the cached info. + self.drbd_size = self.getDrbdSize(fetch=True) + self._sizeVHD = self.getSizeVHD(fetch=True) + self.sr._vhdutil.force_deflate(self.path, self._sizeVHD, self.drbd_size, zeroize=False) + finally: + self.sr.unlock() + self.drbd_size = -1 + self._sizeVHD = -1 + self._sizeAllocated = -1 + + def inflateFully(self): + if not self.raw: + self.inflate(LinstorVhdUtil.compute_volume_size(self.sizeVirt, self.vdi_type)) def rename(self, uuid): Util.log('Renaming {} -> {} (path={})'.format( @@ -1559,17 +1617,30 @@ def delete(self): self.sr.unlock() VDI.delete(self) - def pauseVDIs(self, vdiList): - self.sr._linstor.ensure_volume_list_is_not_locked( - vdiList, timeout=self.VOLUME_LOCK_TIMEOUT - ) - return super(VDI).pauseVDIs(vdiList) + def validate(self, fast=False): + if not self.raw and not self.sr._vhdutil.check(self.uuid, fast=fast): + raise util.SMException('VHD {} corrupted'.format(self)) - def _liveLeafCoalesce(self, vdi): + def pause(self, failfast=False): self.sr._linstor.ensure_volume_is_not_locked( - vdi.uuid, timeout=self.VOLUME_LOCK_TIMEOUT + self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT + ) + return super(LinstorVDI, self).pause(failfast) + + def coalesce(self): + # Note: We raise `SMException` here to skip the current coalesce in case of failure. + # Using another exception we can't execute the next coalesce calls. + return self.sr._vhdutil.force_coalesce(self.path) * 512 + + def getParent(self): + return self.sr._vhdutil.get_parent( + self.sr._linstor.get_volume_uuid_from_device_path(self.path) + ) + + def repair(self, parent_uuid): + self.sr._vhdutil.force_repair( + self.sr._linstor.get_device_path(parent_uuid) ) - return super(VDI)._liveLeafCoalesce(vdi) def _relinkSkip(self): abortFlag = IPCFlag(self.sr.uuid) @@ -1595,6 +1666,40 @@ def _relinkSkip(self): blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) self.children = [] + def _setParent(self, parent): + self.sr._linstor.get_device_path(self.uuid) + self.sr._vhdutil.force_parent(self.path, parent.path) + self.parent = parent + self.parentUuid = parent.uuid + parent.children.append(self) + try: + self.setConfig(self.DB_VHD_PARENT, self.parentUuid) + Util.log("Updated the vhd-parent field for child %s with %s" % \ + (self.uuid, self.parentUuid)) + except: + Util.log("Failed to update %s with vhd-parent field %s" % \ + (self.uuid, self.parentUuid)) + + def _doCoalesce(self): + try: + self._activateChain() + self.parent.validate() + self._inflateParentForCoalesce() + VDI._doCoalesce(self) + finally: + self.parent.deflate() + + def _activateChain(self): + vdi = self + while vdi: + try: + p = self.sr._linstor.get_device_path(vdi.uuid) + except Exception as e: + # Use SMException to skip coalesce. + # Otherwise the GC is stopped... + raise util.SMException(str(e)) + vdi = vdi.parent + def _setHidden(self, hidden=True): HIDDEN_TAG = 'hidden' @@ -1606,14 +1711,57 @@ def _setHidden(self, hidden=True): else: VDI._setHidden(self, hidden) + def _setSizeVirt(self, size): + jfile = self.uuid + '-jvhd' + self.sr._linstor.create_volume( + jfile, vhdutil.MAX_VHD_JOURNAL_SIZE, persistent=False, volume_name=jfile + ) + try: + self.inflate(LinstorVhdUtil.compute_volume_size(size, self.vdi_type)) + self.sr._vhdutil.set_size_virt(size, jfile) + finally: + try: + self.sr._linstor.destroy_volume(jfile) + except Exception: + # We can ignore it, in any case this volume is not persistent. + pass + def _queryVHDBlocks(self): return self.sr._vhdutil.get_block_bitmap(self.uuid) + def _inflateParentForCoalesce(self): + if self.parent.raw: + return + inc = self._calcExtraSpaceForCoalescing() + if inc > 0: + self.parent.inflate(self.parent.getDrbdSize() + inc) + + def _calcExtraSpaceForCoalescing(self): + if self.parent.raw: + return 0 + size_coalesced = LinstorVhdUtil.compute_volume_size( + self._getCoalescedSizeData(), self.vdi_type + ) + Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) + return size_coalesced - self.parent.getDrbdSize() + + def _calcExtraSpaceForLeafCoalescing(self): + assert self.getDrbdSize() > 0 + assert self.getSizeVHD() > 0 + deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) + assert deflate_diff >= 0 + return self._calcExtraSpaceForCoalescing() - deflate_diff + + def _calcExtraSpaceForSnapshotCoalescing(self): + assert self.getSizeVHD() > 0 + return self._calcExtraSpaceForCoalescing() + \ + LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) + ################################################################################ # # SR # -class SR: +class SR(object): class LogFilter: def __init__(self, sr): self.sr = sr @@ -2264,7 +2412,7 @@ def printSummary(self): def _coalesceLeaf(self, vdi): """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot - complete due to external changes, namely vdi_delete and vdi_snapshot + complete due to external changes, namely vdi_delete and vdi_snapshot that alter leaf-coalescibility of vdi""" tracker = self.CoalesceTracker(self) while not vdi.canLiveCoalesce(self.getStorageSpeed()): @@ -2481,7 +2629,7 @@ def _doCoalesceLeaf(self, vdi): self.forgetVDI(origParentUuid) self._finishCoalesceLeaf(parent) self._updateSlavesOnResize(parent) - + def _calcExtraSpaceNeeded(self, child, parent): assert(not parent.raw) # raw parents not supported extra = child.getSizeVHD() - parent.getSizeVHD() @@ -2515,9 +2663,9 @@ def _removeStaleVDIs(self, uuidsPresent): del self.vdis[uuid] def _handleInterruptedCoalesceLeaf(self): - """An interrupted leaf-coalesce operation may leave the VHD tree in an - inconsistent state. If the old-leaf VDI is still present, we revert the - operation (in case the original error is persistent); otherwise we must + """An interrupted leaf-coalesce operation may leave the VHD tree in an + inconsistent state. If the old-leaf VDI is still present, we revert the + operation (in case the original error is persistent); otherwise we must finish the operation""" # abstract pass @@ -2598,10 +2746,10 @@ def deleteVDIs(self, vdiList): self.xapi.markCacheSRsDirty() def cleanupCache(self, maxAge=-1): - """Clean up IntelliCache cache files. Caches for leaf nodes are - removed when the leaf node no longer exists or its allow-caching - attribute is not set. Caches for parent nodes are removed when the - parent node no longer exists or it hasn't been used in more than + """Clean up IntelliCache cache files. Caches for leaf nodes are + removed when the leaf node no longer exists or its allow-caching + attribute is not set. Caches for parent nodes are removed when the + parent node no longer exists or it hasn't been used in more than hours. Return number of caches removed. """ @@ -3091,7 +3239,6 @@ def __init__(self, uuid, xapi, createLock, force): ) SR.__init__(self, uuid, xapi, createLock, force) - self._master_uri = 'linstor://localhost' self.path = LinstorVolumeManager.DEV_ROOT_PATH self._reloadLinstor() @@ -3118,6 +3265,12 @@ def scan(self, force=False): self.logFilter.logState() self._handleInterruptedCoalesceLeaf() + def pauseVDIs(self, vdiList): + self._linstor.ensure_volume_list_is_not_locked( + vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT + ) + return super(LinstorSR, self).pauseVDIs(vdiList) + def _reloadLinstor(self): session = self.xapi.session host_ref = util.get_this_host_ref(session) @@ -3130,12 +3283,13 @@ def _reloadLinstor(self): dconf = session.xenapi.PBD.get_device_config(pbd) group_name = dconf['group-name'] + controller_uri = get_controller_uri() self.journaler = LinstorJournaler( - self._master_uri, group_name, logger=util.SMlog + controller_uri, group_name, logger=util.SMlog ) self._linstor = LinstorVolumeManager( - self._master_uri, + controller_uri, group_name, repair=True, logger=util.SMlog @@ -3168,40 +3322,79 @@ def _load_vdi_info(self): # TODO: Ensure metadata contains the right info. - all_volume_info = self._linstor.volumes_with_info - volumes_metadata = self._linstor.volumes_with_metadata + all_volume_info = self._linstor.get_volumes_with_info() + volumes_metadata = self._linstor.get_volumes_with_metadata() for vdi_uuid, volume_info in all_volume_info.items(): try: - if not volume_info.name and \ - not list(volumes_metadata[vdi_uuid].items()): + volume_metadata = volumes_metadata[vdi_uuid] + if not volume_info.name and not list(volume_metadata.items()): continue # Ignore it, probably deleted. - vdi_type = volumes_metadata[vdi_uuid][VDI_TYPE_TAG] - if vdi_type == vhdutil.VDI_TYPE_VHD: + if vdi_uuid.startswith('DELETED_'): + # Assume it's really a RAW volume of a failed snap without VHD header/footer. + # We must remove this VDI now without adding it in the VDI list. + # Otherwise `Relinking` calls and other actions can be launched on it. + # We don't want that... + Util.log('Deleting bad VDI {}'.format(vdi_uuid)) + + self.lock() + try: + self._linstor.destroy_volume(vdi_uuid) + try: + self.forgetVDI(vdi_uuid) + except: + pass + except Exception as e: + Util.log('Cannot delete bad VDI: {}'.format(e)) + finally: + self.unlock() + continue + + vdi_type = volume_metadata.get(VDI_TYPE_TAG) + volume_name = self._linstor.get_volume_name(vdi_uuid) + if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): + # Always RAW! + info = None + elif vdi_type == vhdutil.VDI_TYPE_VHD: info = self._vhdutil.get_vhd_info(vdi_uuid) else: - info = None + # Ensure it's not a VHD... + try: + info = self._vhdutil.get_vhd_info(vdi_uuid) + except: + try: + self._vhdutil.force_repair( + self._linstor.get_device_path(vdi_uuid) + ) + info = self._vhdutil.get_vhd_info(vdi_uuid) + except: + info = None + except Exception as e: Util.log( ' [VDI {}: failed to load VDI info]: {}' - .format(self.uuid, e) + .format(vdi_uuid, e) ) info = vhdutil.VHDInfo(vdi_uuid) info.error = 1 + all_vdi_info[vdi_uuid] = info + return all_vdi_info - # TODO: Maybe implement _liveLeafCoalesce/_prepareCoalesceLeaf/ - # _finishCoalesceLeaf/_updateSlavesOnResize like LVM plugin. + def _prepareCoalesceLeaf(self, vdi): + vdi._activateChain() + vdi.deflate() + vdi._inflateParentForCoalesce() + + def _finishCoalesceLeaf(self, parent): + if not parent.isSnapshot() or parent.isAttachedRW(): + parent.inflateFully() + else: + parent.deflate() def _calcExtraSpaceNeeded(self, child, parent): - meta_overhead = vhdutil.calcOverheadEmpty(LinstorVDI.MAX_SIZE) - bitmap_overhead = vhdutil.calcOverheadBitmap(parent.sizeVirt) - virtual_size = LinstorVolumeManager.round_up_volume_size( - parent.sizeVirt + meta_overhead + bitmap_overhead - ) - # TODO: Check result. - return virtual_size - self._linstor.get_volume_size(parent.uuid) + return LinstorVhdUtil.compute_volume_size(parent.sizeVirt, parent.vdi_type) - parent.getDrbdSize() def _hasValidDevicePath(self, uuid): try: @@ -3211,6 +3404,16 @@ def _hasValidDevicePath(self, uuid): return False return True + def _liveLeafCoalesce(self, vdi): + self.lock() + try: + self._linstor.ensure_volume_is_not_locked( + vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT + ) + return super(LinstorSR, self)._liveLeafCoalesce(vdi) + finally: + self.unlock() + def _handleInterruptedCoalesceLeaf(self): entries = self.journaler.get_all(VDI.JRN_LEAF) for uuid, parentUuid in entries.items(): @@ -3237,7 +3440,6 @@ def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) ) parent.rename(parentUuid) - util.fistpoint.activate('LVHDRT_coaleaf_undo_after_rename', self.uuid) child = self.getVDI(childUuid) if not child: @@ -3253,9 +3455,6 @@ def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): Util.log('Updating the VDI record') child.setConfig(VDI.DB_VHD_PARENT, parentUuid) child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) - util.fistpoint.activate( - 'LVHDRT_coaleaf_undo_after_rename2', self.uuid - ) # TODO: Maybe deflate here. @@ -3264,10 +3463,7 @@ def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): if not parent.hidden: parent._setHidden(True) self._updateSlavesOnUndoLeafCoalesce(parent, child) - util.fistpoint.activate('LVHDRT_coaleaf_undo_end', self.uuid) Util.log('*** leaf-coalesce undo successful') - if util.fistpoint.is_active('LVHDRT_coaleaf_stop_after_recovery'): - child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): Util.log('*** FINISH LEAF-COALESCE') @@ -3280,32 +3476,21 @@ def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): except XenAPI.Failure: pass self._updateSlavesOnResize(vdi) - util.fistpoint.activate('LVHDRT_coaleaf_finish_end', self.uuid) Util.log('*** finished leaf-coalesce successfully') def _checkSlaves(self, vdi): try: - states = self._linstor.get_usage_states(vdi.uuid) - for node_name, state in states.items(): - self._checkSlave(node_name, vdi, state) + all_openers = self._linstor.get_volume_openers(vdi.uuid) + for openers in all_openers.itervalues(): + for opener in openers.values(): + if opener['process-name'] != 'tapdisk': + raise util.SMException( + 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) + ) except LinstorVolumeManagerError as e: if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: raise - @staticmethod - def _checkSlave(node_name, vdi, state): - # If state is None, LINSTOR doesn't know the host state - # (bad connection?). - if state is None: - raise util.SMException( - 'Unknown state for VDI {} on {}'.format(vdi.uuid, node_name) - ) - - if state: - raise util.SMException( - 'VDI {} is in use on {}'.format(vdi.uuid, node_name) - ) - ################################################################################ # @@ -3638,9 +3823,9 @@ def abort(srUuid, soft=False): def gc(session, srUuid, inBackground, dryRun=False): - """Garbage collect all deleted VDIs in SR "srUuid". Fork & return - immediately if inBackground=True. - + """Garbage collect all deleted VDIs in SR "srUuid". Fork & return + immediately if inBackground=True. + The following algorithm is used: 1. If we are already GC'ing in this SR, return 2. If we are already coalescing a VDI pair: diff --git a/drivers/linstor-manager b/drivers/linstor-manager index f7ce18099..8a3083447 100755 --- a/drivers/linstor-manager +++ b/drivers/linstor-manager @@ -14,32 +14,53 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +# We must modify default import path, we don't want to import modules +# installed in plugins folder and instead we must import from LINSTOR driver +# folder. +import sys +sys.path[0] = '/opt/xensource/sm/' + import base64 import distutils.util -import subprocess -import sys +import os +import socket +import XenAPI import XenAPIPlugin -sys.path.append('/opt/xensource/sm/') from linstorjournaler import LinstorJournaler -from linstorvolumemanager import LinstorVolumeManager +from linstorvhdutil import LinstorVhdUtil +from linstorvolumemanager import get_controller_uri, get_local_volume_openers, LinstorVolumeManager from lock import Lock import json import LinstorSR +import re import util import vhdutil +BACKING_DISK_RE = re.compile('^/dev/([^/]+)/(?:[^/]+)$') +LVM_PLUGIN = 'lvm.py' +THIN_POOL = 'thin_pool' FIREWALL_PORT_SCRIPT = '/etc/xapi.d/plugins/firewall-port' -LINSTOR_PORTS = [3366, 3370, 3376, 3377, '7000:8000'] +LINSTOR_PORTS = [3366, 3370, 3376, 3377, 8076, 8077] +DRBD_PORTS = '7000:8000' + +DRBD_REACTOR_CONF = '/etc/drbd-reactor.d/sm-linstor.toml' + +DRBD_REACTOR_CONF_CONTENT = """[[promoter]] +[promoter.resources.xcp-persistent-database] +start = [ "var-lib-linstor.service", "linstor-controller.service" ] +""" -def get_linstor_uri(session): - return 'linstor://{}'.format(util.get_master_rec(session)['address']) +DRBD_REACTOR_DEPS = [ + '/run/systemd/system/linstor-controller.service.d/reactor.conf', + '/run/systemd/system/var-lib-linstor.service.d/reactor.conf' +] -def update_port(port, open): - fn = 'open' if open else 'close' +def update_linstor_port(port, open_ports): + fn = 'open' if open_ports else 'close' args = ( FIREWALL_PORT_SCRIPT, fn, str(port), 'tcp' ) @@ -50,28 +71,251 @@ def update_port(port, open): raise Exception('Failed to {} port: {} {}'.format(fn, out, err)) -def update_all_ports(open): - for port in LINSTOR_PORTS: - update_port(port, open) +def has_iptables_rule(rule): + (ret, stdout, stderr) = util.doexec(['iptables', '-C'] + rule) + return not ret -def update_service(start): - fn = 'enable' if start else 'disable' - args = ('systemctl', fn, '--now', 'linstor-satellite') - (ret, out, err) = util.doexec(args) - if ret == 0: +def update_drbd_ports(open_ports): + # We want to use a static rule regarding DRBD volumes, + # so we can't use the XAPI firewall port script, we have to manually + # check for existing rules before updating iptables service. + rule = ['INPUT', '-p', 'tcp', '--dport', DRBD_PORTS, '-j', 'ACCEPT'] + if open_ports == has_iptables_rule(rule): return - raise Exception('Failed to {} satellite: {} {}'.format(fn, out, err)) + if open_ports: + rule.insert(1, '1') + (ret, stdout, stderr) = util.doexec(['iptables', '-I'] + rule) + if ret: + raise Exception('Failed to add DRBD rule: {}'.format(stderr)) + else: + (ret, stdout, stderr) = util.doexec(['iptables', '-D'] + rule) + if ret: + raise Exception('Failed to remove DRBD rule: {}'.format(stderr)) + (ret, stdout, stderr) = util.doexec(['service', 'iptables', 'save']) + if ret: + raise Exception('Failed to save DRBD rule: {}'.format(stderr)) + + +def update_all_ports(open_ports): + for port in LINSTOR_PORTS: + update_linstor_port(port, open_ports) + update_drbd_ports(open_ports) + + +def update_linstor_satellite_service(start): + service = 'linstor-satellite' + + # Stop services in all cases first. + # Ensure we don't have an invalid cache used by a satellite. + # (We found an issue with a new added disk which used a volume group name + # formerly involved by another disk. To avoid this kind of problem, we + # always restart the satellite.) + util.enable_and_start_service(service, False) + if start: + util.enable_and_start_service(service, True) + + +def update_drbd_reactor_service(start): + if start: + util.atomicFileWrite(DRBD_REACTOR_CONF, None, DRBD_REACTOR_CONF_CONTENT) + else: + try: + os.remove(DRBD_REACTOR_CONF) + except Exception: + pass + + util.stop_service('drbd-reactor') + + try: + util.stop_service('drbd-promote@xcp\x2dpersistent\x2ddatabase.service') + except Exception as e: + if str(e).rstrip().endswith(' not loaded.'): + pass + raise e + + util.stop_service('linstor-controller') + util.stop_service('var-lib-linstor.service') + + for dep in DRBD_REACTOR_DEPS: + try: + os.remove(dep) + except Exception: + pass + + util.doexec(['systemctl', 'daemon-reload']) + util.enable_and_start_service('drbd-reactor', start) + + +def exec_create_sr(session, name, description, disks, volume_group, redundancy, provisioning, force): + disk_hostnames = disks.keys() + thin = provisioning == 'thin' + + # Create volumes. + hosts = session.xenapi.host.get_all_records() + hostnames = [] + for host_ref, host_record in hosts.items(): + hostname = host_record['hostname'] + hostnames.append(hostname) + + if force: + try: + session.xenapi.host.call_plugin( + host_ref, LVM_PLUGIN, 'destroy_volume_group', { + 'vg_name': volume_group, + 'force': 'True' + } + ) + except Exception as e: + try: + response = session.xenapi.host.call_plugin( + host_ref, LVM_PLUGIN, 'list_volume_groups', { + 'vg_name': volume_group + } + ) + if response != '{}': + raise e + except Exception: + raise e + + if hostname not in disk_hostnames or not disks[hostname]: + if force or session.xenapi.host.call_plugin( + host_ref, LVM_PLUGIN, 'list_volume_groups', { + 'vg_name': volume_group + } + ) == '{}': + continue + raise Exception('Volume group should not exist on `{}`, you must remove it manually'.format(hostname)) + + host_disks = disks[hostname] + if type(host_disks) is list: + host_disks = ','.join(disks[hostname]) + else: + raise Exception('Disk value of `{}` must be a disk list'.format(hostname)) + + session.xenapi.host.call_plugin( + host_ref, LVM_PLUGIN, 'create_physical_volume', { + 'devices': host_disks, + 'force': str(force) + } + ) + + session.xenapi.host.call_plugin( + host_ref, LVM_PLUGIN, 'create_volume_group', { + 'vg_name': volume_group, + 'devices': host_disks + } + ) + + if thin: + session.xenapi.host.call_plugin( + host_ref, LVM_PLUGIN, 'create_thin_pool', { + 'vg_name': volume_group, + 'lv_name': THIN_POOL + } + ) + # Create SR. + master_ref = session.xenapi.pool.get_all_records().values()[0]['master'] -def enable(session, args): + device_config = { + 'redundancy': str(redundancy), + 'provisioning': 'thin' if thin else 'thick', + 'group-name': '{}/{}'.format(volume_group, THIN_POOL) if thin else volume_group, + 'hosts': ','.join(hostnames), + 'monitor-db-quorum': str(len(hostnames) > 2) + } + sr_ref = session.xenapi.SR.create( + master_ref, device_config, '0', name, description, 'linstor', '', True, {} + ) + return session.xenapi.SR.get_uuid(sr_ref) + + +def get_drbd_volumes(volume_group=None): + drbd_volumes = {} + (ret, stdout, stderr) = util.doexec(['drbdsetup', 'show', '--json']) + if ret: + raise Exception('Failed to get JSON object: {}'.format(stderr)) + + config = json.loads(stdout) + for resource in config: + for volume in resource['_this_host']['volumes']: + backing_disk = volume['backing-disk'] + match = BACKING_DISK_RE.match(backing_disk) + if not match: + continue + + cur_volume_group = match.groups()[0] + if volume_group and cur_volume_group != volume_group: + continue + + minor = int(volume['device_minor']) + if cur_volume_group in drbd_volumes: + drbd_volumes[cur_volume_group].append(minor) + else: + drbd_volumes[cur_volume_group] = [minor] + return drbd_volumes + + +def force_destroy_drbd_volume(minor): + (ret, stdout, stderr) = util.doexec(['drbdsetup', 'detach', minor, '--force']) + if ret: + raise Exception('Failed to detach volume: {}'.format(stderr)) + (ret, stdout, stderr) = util.doexec(['drbdsetup', 'del-minor', minor]) + if ret: + raise Exception('Failed to destroy volume: {}'.format(stderr)) + + +def get_ip_addr_of_pif(session, pif_uuid): + pif_ref = session.xenapi.PIF.get_by_uuid(pif_uuid) + pif = session.xenapi.PIF.get_record(pif_ref) + + if not pif['currently_attached']: + raise XenAPIPlugin.Failure('-1', ['PIF is not plugged']) + + ip_addr = pif['IP'] if pif['primary_address_type'].lower() == 'ipv4' else pif['IPv6'].split('/')[0] + if ip_addr == '': + raise XenAPIPlugin.Failure('-1', ['PIF has no IP']) + return ip_addr + +# ------------------------------------------------------------------------------ + + +def prepare_sr(session, args): + try: + LinstorSR.activate_lvm_group(args['groupName']) + + update_all_ports(open_ports=True) + # We don't want to enable and start drbd-reactor daemon during + # SR creation. + update_drbd_reactor_service(start=False) + update_linstor_satellite_service(start=True) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:prepare_sr error: {}'.format(e)) + return str(False) + + +def release_sr(session, args): + try: + update_linstor_satellite_service(start=False) + update_drbd_reactor_service(start=False) + update_all_ports(open_ports=False) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:release_sr error: {}'.format(e)) + return str(False) + + +def update_drbd_reactor(session, args): try: enabled = distutils.util.strtobool(args['enabled']) - update_all_ports(open=enabled) - update_service(start=enabled) + update_drbd_reactor_service(start=enabled) return str(True) except Exception as e: - util.SMlog('linstor-manager:disable error: {}'.format(e)) + util.SMlog( + 'linstor-manager:update_drbd_reactor error: {}'.format(e) + ) return str(False) @@ -81,12 +325,12 @@ def attach(session, args): vdi_uuid = args['vdiUuid'] group_name = args['groupName'] - linstor_uri = get_linstor_uri(session) + controller_uri = get_controller_uri() journaler = LinstorJournaler( - linstor_uri, group_name, logger=util.SMlog + controller_uri, group_name, logger=util.SMlog ) linstor = LinstorVolumeManager( - linstor_uri, + controller_uri, group_name, logger=util.SMlog ) @@ -104,7 +348,7 @@ def detach(session, args): group_name = args['groupName'] linstor = LinstorVolumeManager( - get_linstor_uri(session), + get_controller_uri(), group_name, logger=util.SMlog ) @@ -115,10 +359,37 @@ def detach(session, args): return str(False) +def destroy(session, args): + try: + group_name = args['groupName'] + + # When destroy is called, there are no running drbd-reactor daemons. + # So the controllers are stopped too, we must start an instance. + util.restart_service('var-lib-linstor.service') + util.restart_service('linstor-controller') + + linstor = LinstorVolumeManager( + 'linstor://localhost', + group_name, + logger=util.SMlog + ) + linstor.destroy() + return str(True) + except Exception as e: + util.stop_service('linstor-controller') + util.stop_service('var-lib-linstor.service') + util.SMlog('linstor-manager:destroy error: {}'.format(e)) + return str(False) + + def check(session, args): try: device_path = args['devicePath'] - return str(vhdutil.check(device_path)) + ignore_missing_footer = distutils.util.strtobool( + args['ignoreMissingFooter'] + ) + fast = distutils.util.strtobool(args['fast']) + return str(vhdutil.check(device_path, ignore_missing_footer, fast)) except Exception as e: util.SMlog('linstor-manager:check error: {}'.format(e)) raise @@ -131,7 +402,7 @@ def get_vhd_info(session, args): include_parent = distutils.util.strtobool(args['includeParent']) linstor = LinstorVolumeManager( - get_linstor_uri(session), + get_controller_uri(), group_name, logger=util.SMlog ) @@ -143,7 +414,7 @@ def get_vhd_info(session, args): ) vhd_info = vhdutil.getVHDInfo( - device_path, extract_uuid, include_parent + device_path, extract_uuid, include_parent, False ) return json.dumps(vhd_info.__dict__) except Exception as e: @@ -166,7 +437,7 @@ def get_parent(session, args): group_name = args['groupName'] linstor = LinstorVolumeManager( - get_linstor_uri(session), + get_controller_uri(), group_name, logger=util.SMlog ) @@ -201,6 +472,15 @@ def get_size_phys(session, args): raise +def get_allocated_size(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.getAllocatedSize(device_path)) + except Exception as e: + util.SMlog('linstor-manager:get_allocated_size error: {}'.format(e)) + raise + + def get_depth(session, args): try: device_path = args['devicePath'] @@ -228,6 +508,68 @@ def get_block_bitmap(session, args): raise +def get_drbd_size(session, args): + try: + device_path = args['devicePath'] + (ret, stdout, stderr) = util.doexec(['blockdev', '--getsize64', device_path]) + if ret == 0: + return stdout.strip() + raise Exception('Failed to get DRBD size: {}'.format(stderr)) + except Exception: + util.SMlog('linstor-manager:get_drbd_size error: {}'.format(stderr)) + raise + + +def set_parent(session, args): + try: + device_path = args['devicePath'] + parent_path = args['parentPath'] + vhdutil.setParent(device_path, parent_path, False) + return '' + except Exception as e: + util.SMlog('linstor-manager:set_parent error: {}'.format(e)) + raise + + +def coalesce(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.coalesce(device_path)) + except Exception as e: + util.SMlog('linstor-manager:coalesce error: {}'.format(e)) + raise + + +def repair(session, args): + try: + device_path = args['devicePath'] + vhdutil.repair(device_path) + return '' + except Exception as e: + util.SMlog('linstor-manager:repair error: {}'.format(e)) + raise + + +def deflate(session, args): + try: + device_path = args['devicePath'] + new_size = int(args['newSize']) + old_size = int(args['oldSize']) + zeroize = distutils.util.strtobool(args['zeroize']) + group_name = args['groupName'] + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + LinstorVhdUtil(session, linstor).deflate(device_path, new_size, old_size, zeroize) + return '' + except Exception as e: + util.SMlog('linstor-manager:deflate error: {}'.format(e)) + raise + + def lock_vdi(session, args): lock = None try: @@ -236,10 +578,13 @@ def lock_vdi(session, args): group_name = args['groupName'] locked = distutils.util.strtobool(args['locked']) + # We must lock to mark the VDI. lock = Lock(vhdutil.LOCK_TYPE_SR, sr_uuid) + if locked: + lock.acquire() linstor = LinstorVolumeManager( - get_linstor_uri(session), + get_controller_uri(), group_name, logger=util.SMlog ) @@ -249,24 +594,552 @@ def lock_vdi(session, args): except Exception as e: util.SMlog('linstor-manager:lock_vdi error: {}'.format(e)) finally: - if lock: + if locked and lock: lock.release() return str(False) +def has_controller_running(session, args): + (ret, stdout, stderr) = util.doexec([ + 'systemctl', 'is-active', '--quiet', 'linstor-controller' + ]) + return str(ret == 0) + + +def add_host(session, args): + group_name = args['groupName'] + + # 1. Find all LINSTOR SRs and PBDs. + srs = dict() + for sr_ref, sr in session.xenapi.SR.get_all_records().items(): + if sr.get('type') == 'linstor': + srs[sr_ref] = sr + + pbds = dict() + for pbd_ref, pbd in session.xenapi.PBD.get_all_records().items(): + device_config = pbd.get('device_config') + if ( + device_config and + device_config.get('group-name') == group_name + and pbd['SR'] in srs + ): + pbds[pbd_ref] = pbd + + # 2. Ensure there is at least one PBD and all PBDs are used in + # the same SR. + if not pbds: + raise Exception( + 'Failed to find PBDs of group `{}`'.format(group_name) + ) + + sr_ref = None + for pbd in pbds.values(): + if not sr_ref: + sr_ref = pbd['SR'] + elif pbd['SR'] != sr_ref: + raise Exception( + 'Group `{}` is used by many SRs!'.format(group_name) + ) + + # 3. Ensure node doesn't exist. + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + + node_name = socket.gethostname() + has_node = linstor.has_node(node_name) + + new_pbd_ref = None + + try: + # 4. Enable services. + update_all_ports(open_ports=True) + update_drbd_reactor_service(start=True) + update_linstor_satellite_service(start=True) + + # 5. Try to create local node. + if not has_node: + linstor.create_node(node_name, util.get_this_host_address(session)) + + # 6. Try to create PBD. + this_host = util.get_this_host_ref(session) + create_new_pbd = True + + assert pbds + for pbd in pbds.values(): + if pbd['host'] == this_host: + create_new_pbd = False + break + + device_config = pbd['device_config'] + # Should be the same on all hosts. + provisioning = device_config['provisioning'] + + # 7. Create new PBD. + if create_new_pbd: + new_pbd_ref = session.xenapi.PBD.create({ + 'host': this_host, + 'SR': sr_ref, + 'device_config': { + 'group-name': group_name, + 'redundancy': linstor.redundancy, + 'provisioning': provisioning + } + }) + try: + session.xenapi.PBD.plug(new_pbd_ref) + except Exception as e: + util.SMlog('Failed to plug new PBD: {}'.format(e)) + + return str(True) + except Exception as e: + stop_services = not has_node + if stop_services: + try: + linstor.destroy_node(node_name) + except Exception: + pass + + if new_pbd_ref: + try: + session.xenapi.PBD.unplug(new_pbd_ref) + except Exception: + pass + + try: + session.xenapi.PBD.destroy(new_pbd_ref) + except Exception: + pass + + try: + # If we failed to remove the node, we don't stop services. + if stop_services and not linstor.has_node(node_name): + update_linstor_satellite_service(start=False) + update_drbd_reactor_service(start=False) + update_all_ports(open_ports=False) + except Exception: + pass + + raise e + + +def remove_host(session, args): + group_name = args['groupName'] + + # 1. Find all LINSTOR SRs and PBDs. + srs = dict() + for sr_ref, sr in session.xenapi.SR.get_all_records().items(): + if sr.get('type') == 'linstor': + srs[sr_ref] = sr + + pbds = dict() + for pbd_ref, pbd in session.xenapi.PBD.get_all_records().items(): + device_config = pbd.get('device_config') + if ( + device_config and + device_config.get('group-name') == group_name + and pbd['SR'] in srs + ): + pbds[pbd_ref] = pbd + + # 2. Remove node. + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + + node_name = socket.gethostname() + if linstor.has_node(node_name): + linstor.destroy_node(node_name) + if linstor.has_node(node_name): + raise Exception('Failed to remove node! Unknown error.') + + this_host = util.get_this_host_ref(session) + + # 3. Remove PBD. + for pbd_ref, pbd in pbds.items(): + host = pbd['host'] + if host == this_host: + if pbd['currently_attached']: + session.xenapi.PBD.unplug(pbd_ref) + session.xenapi.PBD.destroy(pbd_ref) + break + + # 3. Stop services. + try: + update_linstor_satellite_service(start=False) + update_drbd_reactor_service(start=False) + update_all_ports(open_ports=False) + except Exception as e: + util.SMlog('Error while stopping services: {}'.format(e)) + pass + + return str('True') + + +def create_sr(session, args): + try: + # Use a complex parsing contrary to the other functions because + # this helper is a public method and is not easy to use. + name = args.get('name') + if not name: + raise Exception('`name` is empty') + + description = args.get('description') or '' + + disks = args.get('disks') + if not disks: + raise Exception('`disks` is empty') + try: + disks = json.loads(disks) + except Exception as e: + raise Exception('failed to decode `disks`: {}'.format(e)) + if type(disks) is not dict: + raise Exception('`disks` must be a JSON object') + + volume_group = args.get('volume_group') + if not volume_group: + raise Exception('`volume_group` is empty') + + redundancy = args.get('redundancy') + if not redundancy: + raise Exception('`redundancy` is empty') + + try: + redundancy = int(redundancy) + except Exception: + raise Exception('`redundancy` is not a number') + + provisioning = args.get('provisioning') + if not provisioning: + provisioning = 'thin' + elif provisioning != 'thin' and provisioning != 'thick': + raise Exception('unsupported provisioning') + + force = distutils.util.strtobool(args.get('force') or '0') + + return exec_create_sr( + session, name, description, disks, volume_group, redundancy, provisioning, force + ) + except Exception as e: + util.SMlog('linstor-manager:create_sr error: {}'.format(e)) + raise + + +def demote_drbd_resource(session, args): + try: + resource_name = args['resource_name'] + (ret, stdout, stderr) = util.doexec(['drbdsetup', 'secondary', resource_name]) + if ret: + raise Exception('Failed to demote resource: {}'.format(stderr)) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:demote_drbd_resource error: {}'.format(e)) + return str(False) + + +def list_drbd_volumes(session, args): + try: + volume_group = args.get('volume_group') + return json.dumps(get_drbd_volumes(volume_group)) + except Exception as e: + util.SMlog('linstor-manager:list_drbd_volumes error: {}'.format(e)) + raise + + +def destroy_drbd_volume(session, args): + try: + minor = args.get('minor') + if not minor: + raise Exception('Cannot destroy DRBD volume without minor.') + force_destroy_drbd_volume(minor) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:destroy_drbd_volume error: {}'.format(e)) + return str(False) + + +def destroy_drbd_volumes(session, args): + try: + volume_group = args.get('volume_group') + if not volume_group: + raise Exception('Cannot destroy DRBD volumes without volume group.') + for minor in get_drbd_volumes(volume_group).get(volume_group, []): + force_destroy_drbd_volume(str(minor)) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:destroy_drbd_volumes error: {}'.format(e)) + return str(False) + + +def get_drbd_openers(session, args): + try: + resource_name = args.get('resourceName') + volume = args.get('volume') + return get_local_volume_openers(resource_name, volume) + except Exception as e: + util.SMlog('linstor-manager:get_drbd_openers error: {}'.format(e)) + raise + + +def health_check(session, args): + group_name = args['groupName'] + + result = { + 'controller-uri': '', + 'nodes': {}, + 'storage-pools': {}, + 'warnings': [], + 'errors': [] + } + + def format_result(): + return json.dumps(result) + + # 1. Get controller. + try: + controller_uri = get_controller_uri() + + result['controller-uri'] = controller_uri + try: + if controller_uri == 'linstor://localhost': + # Replace `localhost` with IP to give a better info for users. + result['controller-uri'] = 'linstor://' + util.get_this_host_address(session) + except Exception: + # Ignore error: can be a XAPI restart or something else. + pass + + linstor = LinstorVolumeManager( + controller_uri, + group_name, + logger=util.SMlog + ) + except Exception as e: + # Probably a network issue, or offline controller. + result['errors'].append('Cannot join SR: `{}`.'.format(e)) + return format_result() + + try: + # 2. Check node statuses. + nodes = linstor.get_nodes_info() + result['nodes'] = nodes + for node_name, status in nodes.items(): + if status != 'ONLINE': + result['warnings'].append('Node `{}` is {}.'.format(node_name, status)) + + # 3. Check storage pool statuses. + storage_pools_per_node = linstor.get_storage_pools_info() + result['storage-pools'] = storage_pools_per_node + for node_name, storage_pools in storage_pools_per_node.items(): + for storage_pool in storage_pools: + free_size = storage_pool['free-size'] + capacity = storage_pool['capacity'] + if free_size < 0 or capacity <= 0: + result['errors'].append( + 'Cannot get free size and/or capacity of storage pool `{}`.' + .format(storage_pool['uuid']) + ) + elif free_size > capacity: + result['errors'].append( + 'Free size of storage pool `{}` is greater than capacity.' + .format(storage_pool['uuid']) + ) + else: + remaining_percent = free_size / float(capacity) * 100.0 + threshold = 10.0 + if remaining_percent < threshold: + result['warnings'].append( + 'Remaining size of storage pool `{}` is below {}% of its capacity.' + .format(storage_pool['uuid'], threshold) + ) + + # 4. Check resource statuses. + all_resources = linstor.get_resources_info() + result['resources'] = all_resources + + for resource_name, resource_by_node in all_resources.items(): + for node_name, resource in resource_by_node.items(): + for volume_index, volume in enumerate(resource['volumes']): + disk_state = volume['disk-state'] + if disk_state in ['UpToDate', 'Created', 'Attached']: + continue + if disk_state == 'DUnknown': + result['warnings'].append( + 'Unknown state for volume `{}` at index {} for resource `{}` on node `{}`' + .format(volume['device-path'], volume_index, resource_name, node_name) + ) + continue + if disk_state in ['Inconsistent', 'Failed', 'To: Creating', 'To: Attachable', 'To: Attaching']: + result['errors'].append( + 'Invalid state `{}` for volume `{}` at index {} for resource `{}` on node `{}`' + .format(disk_state, volume['device-path'], volume_index, resource_name, node_name) + ) + continue + if disk_state == 'Diskless': + if resource['diskful']: + result['errors'].append( + 'Unintentional diskless state detected for volume `{}` at index {} for resource `{}` on node `{}`' + .format(volume['device-path'], volume_index, resource_name, node_name) + ) + elif resource['tie-breaker']: + volume['disk-state'] = 'TieBreaker' + continue + result['warnings'].append( + 'Unhandled state `{}` for volume `{}` at index {} for resource `{}` on node `{}`' + .format(disk_state, volume['device-path'], volume_index, resource_name, node_name) + ) + + except Exception as e: + result['errors'].append('Unexpected error: `{}`'.format(e)) + + return format_result() + + +def create_node_interface(session, args): + group_name = args['groupName'] + hostname = args['hostname'] + name = args['name'] + pif_uuid = args['pifUuid'] + + ip_addr = get_ip_addr_of_pif(session, pif_uuid) + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + try: + linstor.create_node_interface(hostname, name, ip_addr) + except Exception as e: + raise XenAPIPlugin.Failure('-1', [str(e)]) + return str(True) + + +def destroy_node_interface(session, args): + group_name = args['groupName'] + hostname = args['hostname'] + name = args['name'] + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + try: + linstor.destroy_node_interface(hostname, name) + except Exception as e: + raise XenAPIPlugin.Failure('-1', [str(e)]) + return str(True) + + +def modify_node_interface(session, args): + group_name = args['groupName'] + hostname = args['hostname'] + name = args['name'] + pif_uuid = args['pifUuid'] + + ip_addr = get_ip_addr_of_pif(session, pif_uuid) + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + try: + linstor.modify_node_interface(hostname, name, ip_addr) + except Exception as e: + raise XenAPIPlugin.Failure('-1', [str(e)]) + return str(True) + + +def list_node_interfaces(session, args): + group_name = args['groupName'] + hostname = args['hostname'] + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + try: + return json.dumps(linstor.list_node_interfaces(hostname)) + except Exception as e: + raise XenAPIPlugin.Failure('-1', [str(e)]) + + +def set_node_preferred_interface(session, args): + group_name = args['groupName'] + hostname = args['hostname'] + name = args['name'] + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + try: + linstor.set_node_preferred_interface(hostname, name) + except Exception as e: + raise XenAPIPlugin.Failure('-1', [str(e)]) + return str(True) + + if __name__ == '__main__': XenAPIPlugin.dispatch({ - 'enable': enable, + 'prepareSr': prepare_sr, + 'releaseSr': release_sr, + 'updateDrbdReactor': update_drbd_reactor, 'attach': attach, 'detach': detach, + 'destroy': destroy, + + # vhdutil wrappers called by linstorvhdutil. + # Note: When a VHD is open in RO mode (so for all vhdutil getters), + # the LVM layer is used directly to bypass DRBD verifications. + # In this case there can't be EROFS errors. + # Note 2: We assume linstorvhdutil executes remote calls on diskful + # DRBDs, otherwise we still have EROFS errors... 'check': check, 'getVHDInfo': get_vhd_info, 'hasParent': has_parent, 'getParent': get_parent, 'getSizeVirt': get_size_virt, 'getSizePhys': get_size_phys, + 'getAllocatedSize': get_allocated_size, 'getDepth': get_depth, 'getKeyHash': get_key_hash, 'getBlockBitmap': get_block_bitmap, - 'lockVdi': lock_vdi + + # Small helper to get the DRBD blockdev size. + 'getDrbdSize': get_drbd_size, + + # Called by cleanup.py to coalesce when a primary + # is opened on a non-local host. + 'setParent': set_parent, + 'coalesce': coalesce, + 'repair': repair, + + # Misc writters. + 'deflate': deflate, + + 'lockVdi': lock_vdi, + 'hasControllerRunning': has_controller_running, + 'addHost': add_host, + 'removeHost': remove_host, + 'createSr': create_sr, + 'listDrbdVolumes': list_drbd_volumes, + 'demoteDrbdResource': demote_drbd_resource, + 'destroyDrbdVolume': destroy_drbd_volume, + 'destroyDrbdVolumes': destroy_drbd_volumes, + 'getDrbdOpeners': get_drbd_openers, + 'healthCheck': health_check, + + 'createNodeInterface': create_node_interface, + 'destroyNodeInterface': destroy_node_interface, + 'modifyNodeInterface': modify_node_interface, + 'listNodeInterfaces': list_node_interfaces, + 'setNodePreferredInterface': set_node_preferred_interface }) diff --git a/drivers/linstorjournaler.py b/drivers/linstorjournaler.py index bc7cff7c2..a61d9f11b 100755 --- a/drivers/linstorjournaler.py +++ b/drivers/linstorjournaler.py @@ -16,7 +16,8 @@ # -from linstorvolumemanager import LinstorVolumeManager +from linstorvolumemanager import \ + get_controller_uri, LinstorVolumeManager, LinstorVolumeManagerError import linstor import re import util @@ -52,20 +53,10 @@ def __init__(self, uri, group_name, logger=default_logger.__func__): self._namespace = '{}journal/'.format( LinstorVolumeManager._build_sr_namespace() ) - - def connect(): - self._journal = linstor.KV( - LinstorVolumeManager._build_group_name(group_name), - uri=uri, - namespace=self._namespace - ) - - util.retry( - connect, - maxretry=60, - exceptions=[linstor.errors.LinstorNetworkError] - ) self._logger = logger + self._journal = self._create_journal_instance( + uri, group_name, self._namespace + ) def create(self, type, identifier, value): # TODO: Maybe rename to 'add' in the future (in Citrix code too). @@ -116,6 +107,7 @@ def remove(self, type, identifier): ) def get(self, type, identifier): + self._reset_namespace() return self._journal.get(self._get_key(type, identifier)) def get_all(self, type): @@ -150,6 +142,34 @@ def hasJournals(self, identifier): def _reset_namespace(self): self._journal.namespace = self._namespace + @classmethod + def _create_journal_instance(cls, uri, group_name, namespace): + def connect(uri): + if not uri: + uri = get_controller_uri() + if not uri: + raise LinstorVolumeManagerError( + 'Unable to find controller uri...' + ) + return linstor.KV( + LinstorVolumeManager._build_group_name(group_name), + uri=uri, + namespace=namespace + ) + + try: + return connect(uri) + except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError): + pass + + return util.retry( + lambda: connect(None), + maxretry=10, + exceptions=[ + linstor.errors.LinstorNetworkError, LinstorVolumeManagerError + ] + ) + @staticmethod def _get_key(type, identifier): return '{}/{}'.format(type, identifier) diff --git a/drivers/linstorvhdutil.py b/drivers/linstorvhdutil.py index 7a1356627..836f4ceb9 100644 --- a/drivers/linstorvhdutil.py +++ b/drivers/linstorvhdutil.py @@ -14,6 +14,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +from linstorjournaler import LinstorJournaler +from linstorvolumemanager import LinstorVolumeManager import base64 import distutils.util import errno @@ -25,9 +27,46 @@ MANAGER_PLUGIN = 'linstor-manager' +# EMEDIUMTYPE constant (124) is not available in python2. +EMEDIUMTYPE = 124 + + +def call_remote_method(session, host_ref, method, device_path, args): + try: + response = session.xenapi.host.call_plugin( + host_ref, MANAGER_PLUGIN, method, args + ) + except Exception as e: + util.SMlog('call-plugin ({} with {}) exception: {}'.format( + method, args, e + )) + raise util.SMException(str(e)) + + util.SMlog('call-plugin ({} with {}) returned: {}'.format( + method, args, response + )) + + return response + + +class LinstorCallException(util.SMException): + def __init__(self, cmd_err): + self.cmd_err = cmd_err + + def __str__(self): + return str(self.cmd_err) + + +class ErofsLinstorCallException(LinstorCallException): + pass + + +class NoPathLinstorCallException(LinstorCallException): + pass + def linstorhostcall(local_method, remote_method): - def decorated(func): + def decorated(response_parser): def wrapper(*args, **kwargs): self = args[0] vdi_uuid = args[1] @@ -41,70 +80,94 @@ def wrapper(*args, **kwargs): # Try to read locally if the device is not in use or if the device # is up to date and not diskless. - (node_names, in_use) = \ - self._linstor.find_up_to_date_diskfull_nodes(vdi_uuid) + (node_names, in_use_by) = \ + self._linstor.find_up_to_date_diskful_nodes(vdi_uuid) + local_e = None try: - if not in_use or socket.gethostname() in node_names: - return local_method(device_path, *args[2:], **kwargs) - except util.CommandException as e: - # EMEDIUMTYPE constant (124) is not available in python2. - if e.code != errno.EROFS and e.code != 124: - raise + if not in_use_by or socket.gethostname() in node_names: + return self._call_local_method(local_method, device_path, *args[2:], **kwargs) + except ErofsLinstorCallException as e: + local_e = e.cmd_err + except Exception as e: + local_e = e + + util.SMlog( + 'unable to execute `{}` locally, retry using a readable host... (cause: {})'.format( + remote_method, local_e if local_e else 'local diskless + in use or not up to date' + ) + ) + + if in_use_by: + node_names = {in_use_by} # B. Execute the plugin on master or slave. - def exec_remote_method(): - host_ref = self._get_readonly_host( - vdi_uuid, device_path, node_names - ) - args = { - 'devicePath': device_path, - 'groupName': self._linstor.group_name - } - args.update(**kwargs) + remote_args = { + 'devicePath': device_path, + 'groupName': self._linstor.group_name + } + remote_args.update(**kwargs) + remote_args = {str(key): str(value) for key, value in remote_args.iteritems()} - try: - response = self._session.xenapi.host.call_plugin( - host_ref, MANAGER_PLUGIN, remote_method, args - ) - except Exception as e: - util.SMlog('call-plugin ({} with {}) exception: {}'.format( - remote_method, args, e - )) - raise - - util.SMlog('call-plugin ({} with {}) returned: {}'.format( - remote_method, args, response - )) - if response == 'False': - raise xs_errors.XenError( - 'VDIUnavailable', - opterr='Plugin {} failed'.format(MANAGER_PLUGIN) - ) - kwargs['response'] = response - - util.retry(exec_remote_method, 5, 3) - return func(*args, **kwargs) + try: + def remote_call(): + host_ref = self._get_readonly_host(vdi_uuid, device_path, node_names) + return call_remote_method(self._session, host_ref, remote_method, device_path, remote_args) + response = util.retry(remote_call, 5, 2) + except Exception as remote_e: + self._raise_openers_exception(device_path, local_e or remote_e) + + return response_parser(self, vdi_uuid, response) + return wrapper + return decorated + + +def linstormodifier(): + def decorated(func): + def wrapper(*args, **kwargs): + self = args[0] + + ret = func(*args, **kwargs) + self._linstor.invalidate_resource_cache() + return ret return wrapper return decorated class LinstorVhdUtil: + MAX_SIZE = 2 * 1024 * 1024 * 1024 * 1024 # Max VHD size. + def __init__(self, session, linstor): self._session = session self._linstor = linstor + # -------------------------------------------------------------------------- + # Getters: read locally and try on another host in case of failure. + # -------------------------------------------------------------------------- + + def check(self, vdi_uuid, ignore_missing_footer=False, fast=False): + kwargs = { + 'ignoreMissingFooter': ignore_missing_footer, + 'fast': fast + } + return self._check(vdi_uuid, **kwargs) # pylint: disable = E1123 + @linstorhostcall(vhdutil.check, 'check') - def check(self, vdi_uuid, **kwargs): - return distutils.util.strtobool(kwargs['response']) + def _check(self, vdi_uuid, response): + return distutils.util.strtobool(response) def get_vhd_info(self, vdi_uuid, include_parent=True): - kwargs = {'includeParent': str(include_parent)} - return self._get_vhd_info(vdi_uuid, self._extract_uuid, **kwargs) + kwargs = { + 'includeParent': include_parent, + 'resolveParent': False + } + # TODO: Replace pylint comment with this feature when possible: + # https://github.com/PyCQA/pylint/pull/2926 + return self._get_vhd_info(vdi_uuid, self._extract_uuid, **kwargs) # pylint: disable = E1123 @linstorhostcall(vhdutil.getVHDInfo, 'getVHDInfo') - def _get_vhd_info(self, vdi_uuid, *args, **kwargs): - obj = json.loads(kwargs['response']) + def _get_vhd_info(self, vdi_uuid, response): + obj = json.loads(response) vhd_info = vhdutil.VHDInfo(vdi_uuid) vhd_info.sizeVirt = obj['sizeVirt'] @@ -118,35 +181,184 @@ def _get_vhd_info(self, vdi_uuid, *args, **kwargs): return vhd_info @linstorhostcall(vhdutil.hasParent, 'hasParent') - def has_parent(self, vdi_uuid, **kwargs): - return distutils.util.strtobool(kwargs['response']) + def has_parent(self, vdi_uuid, response): + return distutils.util.strtobool(response) def get_parent(self, vdi_uuid): return self._get_parent(vdi_uuid, self._extract_uuid) @linstorhostcall(vhdutil.getParent, 'getParent') - def _get_parent(self, vdi_uuid, *args, **kwargs): - return kwargs['response'] + def _get_parent(self, vdi_uuid, response): + return response @linstorhostcall(vhdutil.getSizeVirt, 'getSizeVirt') - def get_size_virt(self, vdi_uuid, **kwargs): - return int(kwargs['response']) + def get_size_virt(self, vdi_uuid, response): + return int(response) @linstorhostcall(vhdutil.getSizePhys, 'getSizePhys') - def get_size_phys(self, vdi_uuid, **kwargs): - return int(kwargs['response']) + def get_size_phys(self, vdi_uuid, response): + return int(response) + + @linstorhostcall(vhdutil.getAllocatedSize, 'getAllocatedSize') + def get_allocated_size(self, vdi_uuid, response): + return int(response) @linstorhostcall(vhdutil.getDepth, 'getDepth') - def get_depth(self, vdi_uuid, **kwargs): - return int(kwargs['response']) + def get_depth(self, vdi_uuid, response): + return int(response) @linstorhostcall(vhdutil.getKeyHash, 'getKeyHash') - def get_key_hash(self, vdi_uuid, **kwargs): - return kwargs['response'] or None + def get_key_hash(self, vdi_uuid, response): + return response or None @linstorhostcall(vhdutil.getBlockBitmap, 'getBlockBitmap') - def get_block_bitmap(self, vdi_uuid, **kwargs): - return base64.b64decode(kwargs['response']) + def get_block_bitmap(self, vdi_uuid, response): + return base64.b64decode(response) + + @linstorhostcall('_get_drbd_size', 'getDrbdSize') + def get_drbd_size(self, vdi_uuid, response): + return int(response) + + def _get_drbd_size(self, path): + (ret, stdout, stderr) = util.doexec(['blockdev', '--getsize64', path]) + if ret == 0: + return int(stdout.strip()) + raise util.SMException('Failed to get DRBD size: {}'.format(stderr)) + + # -------------------------------------------------------------------------- + # Setters: only used locally. + # -------------------------------------------------------------------------- + + @linstormodifier() + def create(self, path, size, static, msize=0): + return self._call_local_method_or_fail(vhdutil.create, path, size, static, msize) + + @linstormodifier() + def set_size_virt(self, path, size, jfile): + return self._call_local_method_or_fail(vhdutil.setSizeVirt, path, size, jfile) + + @linstormodifier() + def set_size_virt_fast(self, path, size): + return self._call_local_method_or_fail(vhdutil.setSizeVirtFast, path, size) + + @linstormodifier() + def set_size_phys(self, path, size, debug=True): + return self._call_local_method_or_fail(vhdutil.setSizePhys, path, size, debug) + + @linstormodifier() + def set_parent(self, path, parentPath, parentRaw=False): + return self._call_local_method_or_fail(vhdutil.setParent, path, parentPath, parentRaw) + + @linstormodifier() + def set_hidden(self, path, hidden=True): + return self._call_local_method_or_fail(vhdutil.setHidden, path, hidden) + + @linstormodifier() + def set_key(self, path, key_hash): + return self._call_local_method_or_fail(vhdutil.setKey, path, key_hash) + + @linstormodifier() + def kill_data(self, path): + return self._call_local_method_or_fail(vhdutil.killData, path) + + @linstormodifier() + def snapshot(self, path, parent, parentRaw, msize=0, checkEmpty=True): + return self._call_local_method_or_fail(vhdutil.snapshot, path, parent, parentRaw, msize, checkEmpty) + + def inflate(self, journaler, vdi_uuid, vdi_path, new_size, old_size): + # Only inflate if the LINSTOR volume capacity is not enough. + new_size = LinstorVolumeManager.round_up_volume_size(new_size) + if new_size <= old_size: + return + + util.SMlog( + 'Inflate {} (size={}, previous={})' + .format(vdi_path, new_size, old_size) + ) + + journaler.create( + LinstorJournaler.INFLATE, vdi_uuid, old_size + ) + self._linstor.resize_volume(vdi_uuid, new_size) + + # TODO: Replace pylint comment with this feature when possible: + # https://github.com/PyCQA/pylint/pull/2926 + result_size = self.get_drbd_size(vdi_uuid) # pylint: disable = E1120 + if result_size < new_size: + util.SMlog( + 'WARNING: Cannot inflate volume to {}B, result size: {}B' + .format(new_size, result_size) + ) + + self._zeroize(vdi_path, result_size - vhdutil.VHD_FOOTER_SIZE) + self.set_size_phys(vdi_path, result_size, False) + journaler.remove(LinstorJournaler.INFLATE, vdi_uuid) + + def deflate(self, vdi_path, new_size, old_size, zeroize=False): + if zeroize: + assert old_size > vhdutil.VHD_FOOTER_SIZE + self._zeroize(vdi_path, old_size - vhdutil.VHD_FOOTER_SIZE) + + new_size = LinstorVolumeManager.round_up_volume_size(new_size) + if new_size >= old_size: + return + + util.SMlog( + 'Deflate {} (new size={}, previous={})' + .format(vdi_path, new_size, old_size) + ) + + self.set_size_phys(vdi_path, new_size) + # TODO: Change the LINSTOR volume size using linstor.resize_volume. + + # -------------------------------------------------------------------------- + # Remote setters: write locally and try on another host in case of failure. + # -------------------------------------------------------------------------- + + @linstormodifier() + def force_parent(self, path, parentPath, parentRaw=False): + kwargs = { + 'parentPath': str(parentPath), + 'parentRaw': parentRaw + } + return self._call_method(vhdutil.setParent, 'setParent', path, use_parent=False, **kwargs) + + @linstormodifier() + def force_coalesce(self, path): + return int(self._call_method(vhdutil.coalesce, 'coalesce', path, use_parent=True)) + + @linstormodifier() + def force_repair(self, path): + return self._call_method(vhdutil.repair, 'repair', path, use_parent=False) + + @linstormodifier() + def force_deflate(self, path, newSize, oldSize, zeroize): + kwargs = { + 'newSize': newSize, + 'oldSize': oldSize, + 'zeroize': zeroize + } + return self._call_method('_force_deflate', 'deflate', path, use_parent=False, **kwargs) + + def _force_deflate(self, path, newSize, oldSize, zeroize): + self.deflate(path, newSize, oldSize, zeroize) + + # -------------------------------------------------------------------------- + # Static helpers. + # -------------------------------------------------------------------------- + + @classmethod + def compute_volume_size(cls, virtual_size, image_type): + if image_type == vhdutil.VDI_TYPE_VHD: + # All LINSTOR VDIs have the metadata area preallocated for + # the maximum possible virtual size (for fast online VDI.resize). + meta_overhead = vhdutil.calcOverheadEmpty(cls.MAX_SIZE) + bitmap_overhead = vhdutil.calcOverheadBitmap(virtual_size) + virtual_size += meta_overhead + bitmap_overhead + elif image_type != vhdutil.VDI_TYPE_RAW: + raise Exception('Invalid image type: {}'.format(image_type)) + + return LinstorVolumeManager.round_up_volume_size(virtual_size) # -------------------------------------------------------------------------- # Helpers. @@ -161,7 +373,7 @@ def _extract_uuid(self, device_path): def _get_readonly_host(self, vdi_uuid, device_path, node_names): """ When vhd-util is called to fetch VDI info we must find a - diskfull DRBD disk to read the data. It's the goal of this function. + diskful DRBD disk to read the data. It's the goal of this function. Why? Because when a VHD is open in RO mode, the LVM layer is used directly to bypass DRBD verifications (we can have only one process that reads/writes to disk with DRBD devices). @@ -170,7 +382,7 @@ def _get_readonly_host(self, vdi_uuid, device_path, node_names): if not node_names: raise xs_errors.XenError( 'VDIUnavailable', - opterr='Unable to find diskfull node: {} (path={})' + opterr='Unable to find diskful node: {} (path={})' .format(vdi_uuid, device_path) ) @@ -184,3 +396,147 @@ def _get_readonly_host(self, vdi_uuid, device_path, node_names): opterr='Unable to find a valid host from VDI: {} (path={})' .format(vdi_uuid, device_path) ) + + # -------------------------------------------------------------------------- + + def _raise_openers_exception(self, device_path, e): + if isinstance(e, util.CommandException): + e_str = 'cmd: `{}`, code: `{}`, reason: `{}`'.format(e.cmd, e.code, e.reason) + else: + e_str = str(e) + + try: + volume_uuid = self._linstor.get_volume_uuid_from_device_path( + device_path + ) + e_wrapper = Exception( + e_str + ' (openers: {})'.format( + self._linstor.get_volume_openers(volume_uuid) + ) + ) + except Exception as illformed_e: + e_wrapper = Exception( + e_str + ' (unable to get openers: {})'.format(illformed_e) + ) + util.SMlog('raise opener exception: {}'.format(e_wrapper)) + raise e_wrapper # pylint: disable = E0702 + + def _call_local_method(self, local_method, device_path, *args, **kwargs): + if isinstance(local_method, str): + local_method = getattr(self, local_method) + + try: + def local_call(): + try: + return local_method(device_path, *args, **kwargs) + except util.CommandException as e: + if e.code == errno.EROFS or e.code == EMEDIUMTYPE: + raise ErofsLinstorCallException(e) # Break retry calls. + if e.code == errno.ENOENT: + raise NoPathLinstorCallException(e) + raise e + # Retry only locally if it's not an EROFS exception. + return util.retry(local_call, 5, 2, exceptions=[util.CommandException]) + except util.CommandException as e: + util.SMlog('failed to execute locally vhd-util (sys {})'.format(e.code)) + raise e + + def _call_local_method_or_fail(self, local_method, device_path, *args, **kwargs): + try: + return self._call_local_method(local_method, device_path, *args, **kwargs) + except ErofsLinstorCallException as e: + # Volume is locked on a host, find openers. + self._raise_openers_exception(device_path, e.cmd_err) + + def _call_method(self, local_method, remote_method, device_path, use_parent, *args, **kwargs): + # Note: `use_parent` exists to know if the VHD parent is used by the local/remote method. + # Normally in case of failure, if the parent is unused we try to execute the method on + # another host using the DRBD opener list. In the other case, if the parent is required, + # we must check where this last one is open instead of the child. + + if isinstance(local_method, str): + local_method = getattr(self, local_method) + + # A. Try to write locally... + try: + return self._call_local_method(local_method, device_path, *args, **kwargs) + except Exception: + pass + + util.SMlog('unable to execute `{}` locally, retry using a writable host...'.format(remote_method)) + + # B. Execute the command on another host. + # B.1. Get host list. + try: + hosts = self._session.xenapi.host.get_all_records() + except Exception as e: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Unable to get host list to run vhd-util command `{}` (path={}): {}' + .format(remote_method, device_path, e) + ) + + # B.2. Prepare remote args. + remote_args = { + 'devicePath': device_path, + 'groupName': self._linstor.group_name + } + remote_args.update(**kwargs) + remote_args = {str(key): str(value) for key, value in remote_args.iteritems()} + + volume_uuid = self._linstor.get_volume_uuid_from_device_path( + device_path + ) + parent_volume_uuid = None + if use_parent: + parent_volume_uuid = self.get_parent(volume_uuid) + + openers_uuid = parent_volume_uuid if use_parent else volume_uuid + + # B.3. Call! + def remote_call(): + try: + all_openers = self._linstor.get_volume_openers(openers_uuid) + except Exception as e: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Unable to get DRBD openers to run vhd-util command `{}` (path={}): {}' + .format(remote_method, device_path, e) + ) + + no_host_found = True + for hostname, openers in all_openers.iteritems(): + if not openers: + continue + + try: + host_ref = next(ref for ref, rec in hosts.iteritems() if rec['hostname'] == hostname) + except StopIteration: + continue + + no_host_found = False + try: + return call_remote_method(self._session, host_ref, remote_method, device_path, remote_args) + except Exception: + pass + + if no_host_found: + try: + return local_method(device_path, *args, **kwargs) + except Exception as e: + self._raise_openers_exception(device_path, e) + + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='No valid host found to run vhd-util command `{}` (path=`{}`, openers=`{}`)' + .format(remote_method, device_path, openers) + ) + return util.retry(remote_call, 5, 2) + + @staticmethod + def _zeroize(path, size): + if not util.zeroOut(path, size, vhdutil.VHD_FOOTER_SIZE): + raise xs_errors.XenError( + 'EIO', + opterr='Failed to zero out VHD footer {}'.format(path) + ) diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py index 182b88992..5e5bcd518 100755 --- a/drivers/linstorvolumemanager.py +++ b/drivers/linstorvolumemanager.py @@ -16,14 +16,104 @@ # +import distutils.util +import errno +import glob import json import linstor import os.path import re +import shutil import socket +import stat import time import util +import uuid +# Persistent prefix to add to RAW persistent volumes. +PERSISTENT_PREFIX = 'xcp-persistent-' + +# Contains the data of the "/var/lib/linstor" directory. +DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database' +DATABASE_SIZE = 1 << 30 # 1GB. +DATABASE_PATH = '/var/lib/linstor' +DATABASE_MKFS = 'mkfs.ext4' + +REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary") +REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$') + +DRBD_BY_RES_PATH = '/dev/drbd/by-res/' + +PLUGIN = 'linstor-manager' + + +# ============================================================================== + +def get_local_volume_openers(resource_name, volume): + if not resource_name or volume is None: + raise Exception('Cannot get DRBD openers without resource name and/or volume.') + + path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format( + resource_name, volume + ) + + with open(path, 'r') as openers: + # Not a big cost, so read all lines directly. + lines = openers.readlines() + + result = {} + + opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)') + for line in lines: + match = opener_re.match(line) + assert match + + groups = match.groups() + process_name = groups[0] + pid = groups[1] + open_duration_ms = groups[2] + result[pid] = { + 'process-name': process_name, + 'open-duration': open_duration_ms + } + + return json.dumps(result) + +def get_all_volume_openers(resource_name, volume): + PLUGIN_CMD = 'getDrbdOpeners' + + volume = str(volume) + openers = {} + + # Make sure this call never stucks because this function can be called + # during HA init and in this case we can wait forever. + session = util.timeout_call(10, util.get_localAPI_session) + + hosts = session.xenapi.host.get_all_records() + for host_ref, host_record in hosts.items(): + node_name = host_record['hostname'] + try: + if not session.xenapi.host_metrics.get_record( + host_record['metrics'] + )['live']: + # Ensure we call plugin on online hosts only. + continue + + openers[node_name] = json.loads( + session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, { + 'resourceName': resource_name, + 'volume': volume + }) + ) + except Exception as e: + util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format( + resource_name, node_name, e + )) + + return openers + + +# ============================================================================== def round_up(value, divisor): assert divisor @@ -37,10 +127,153 @@ def round_down(value, divisor): return value - (value % int(divisor)) +# ============================================================================== + +def get_remote_host_ip(node_name): + (ret, stdout, stderr) = util.doexec([ + 'drbdsetup', 'show', DATABASE_VOLUME_NAME, '--json' + ]) + if ret != 0: + return + + try: + conf = json.loads(stdout) + if not conf: + return + + for connection in conf[0]['connections']: + if connection['net']['_name'] == node_name: + value = connection['path']['_remote_host'] + res = REG_DRBDSETUP_IP.match(value) + if res: + return res.groups()[0] + break + except Exception: + pass + + +def _get_controller_uri(): + PLUGIN_CMD = 'hasControllerRunning' + + # Try to find controller using drbdadm. + (ret, stdout, stderr) = util.doexec([ + 'drbdadm', 'status', DATABASE_VOLUME_NAME + ]) + if ret == 0: + # If we are here, the database device exists locally. + + if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): + # Nice case, we have the controller running on this local host. + return 'linstor://localhost' + + # Try to find the host using DRBD connections. + res = REG_DRBDADM_PRIMARY.search(stdout) + if res: + node_name = res.groups()[0] + ip = get_remote_host_ip(node_name) + if ip: + return 'linstor://' + ip + + # Worst case: we use many hosts in the pool (>= 4), so we can't find the + # primary using drbdadm because we don't have all connections to the + # replicated volume. `drbdadm status xcp-persistent-database` returns + # 3 connections by default. + try: + session = util.timeout_call(10, util.get_localAPI_session) + + for host_ref, host_record in session.xenapi.host.get_all_records().items(): + node_name = host_record['hostname'] + try: + if distutils.util.strtobool( + session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {}) + ): + return 'linstor://' + host_record['address'] + except Exception as e: + # Can throw and exception if a host is offline. So catch it. + util.SMlog('Unable to search controller on `{}`: {}'.format( + node_name, e + )) + except: + # Not found, maybe we are trying to create the SR... + pass + +def get_controller_uri(): + retries = 0 + while True: + uri = _get_controller_uri() + if uri: + return uri + + retries += 1 + if retries >= 10: + break + time.sleep(1) + + +def get_controller_node_name(): + PLUGIN_CMD = 'hasControllerRunning' + + (ret, stdout, stderr) = util.doexec([ + 'drbdadm', 'status', DATABASE_VOLUME_NAME + ]) + + if ret == 0: + if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): + return 'localhost' + + res = REG_DRBDADM_PRIMARY.search(stdout) + if res: + return res.groups()[0] + + session = util.timeout_call(5, util.get_localAPI_session) + + for host_ref, host_record in session.xenapi.host.get_all_records().items(): + node_name = host_record['hostname'] + try: + if not session.xenapi.host_metrics.get_record( + host_record['metrics'] + )['live']: + continue + + if distutils.util.strtobool(session.xenapi.host.call_plugin( + host_ref, PLUGIN, PLUGIN_CMD, {} + )): + return node_name + except Exception as e: + util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format( + node_name, e + )) + + +def demote_drbd_resource(node_name, resource_name): + PLUGIN_CMD = 'demoteDrbdResource' + + session = util.timeout_call(5, util.get_localAPI_session) + + for host_ref, host_record in session.xenapi.host.get_all_records().items(): + if host_record['hostname'] != node_name: + continue + + try: + session.xenapi.host.call_plugin( + host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name} + ) + except Exception as e: + util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format( + resource_name, node_name, e + )) + raise Exception( + 'Can\'t demote resource `{}`, unable to find node `{}`' + .format(resource_name, node_name) + ) + +# ============================================================================== + class LinstorVolumeManagerError(Exception): ERR_GENERIC = 0, ERR_VOLUME_EXISTS = 1, - ERR_VOLUME_NOT_EXISTS = 2 + ERR_VOLUME_NOT_EXISTS = 2, + ERR_VOLUME_DESTROY = 3 def __init__(self, message, code=ERR_GENERIC): super(LinstorVolumeManagerError, self).__init__(message) @@ -50,6 +283,7 @@ def __init__(self, message, code=ERR_GENERIC): def code(self): return self._code + # ============================================================================== # Note: @@ -63,10 +297,20 @@ class LinstorVolumeManager(object): A volume in this context is a physical part of the storage layer. """ - DEV_ROOT_PATH = '/dev/drbd/by-res/' + __slots__ = ( + '_linstor', '_logger', + '_uri', '_base_group_name', + '_redundancy', '_group_name', + '_volumes', '_storage_pools', + '_storage_pools_time', + '_kv_cache', '_resource_cache', '_volume_info_cache', + '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty' + ) - # Default LVM extent size. - BLOCK_SIZE = 4 * 1024 * 1024 + DEV_ROOT_PATH = DRBD_BY_RES_PATH + + # Default sector size. + BLOCK_SIZE = 512 # List of volume properties. PROP_METADATA = 'metadata' @@ -90,7 +334,7 @@ class LinstorVolumeManager(object): # Property namespaces. NAMESPACE_SR = 'xcp/sr' - NAMESPACE_VOLUME = 'volume' + NAMESPACE_VOLUME = 'xcp/volume' # Regex to match properties. REG_PROP = '^([^/]+)/{}$' @@ -106,6 +350,10 @@ class LinstorVolumeManager(object): PREFIX_SR = 'xcp-sr-' PREFIX_VOLUME = 'xcp-volume-' + # Limit request number when storage pool info is asked, we fetch + # the current pool status after N elapsed seconds. + STORAGE_POOLS_FETCH_INTERVAL = 15 + @staticmethod def default_logger(*args): print(args) @@ -117,38 +365,43 @@ def default_logger(*args): class VolumeInfo(object): __slots__ = ( 'name', - 'physical_size', # Total physical size used by this volume on - # all disks. - 'virtual_size' # Total virtual available size of this volume - # (i.e. the user size at creation). + 'allocated_size', # Allocated size, place count is not used. + 'virtual_size', # Total virtual available size of this volume + # (i.e. the user size at creation). + 'diskful' # Array of nodes that have a diskful volume. ) def __init__(self, name): self.name = name - self.physical_size = 0 + self.allocated_size = 0 self.virtual_size = 0 + self.diskful = [] def __repr__(self): - return 'VolumeInfo("{}", {}, {})'.format( - self.name, self.physical_size, self.virtual_size + return 'VolumeInfo("{}", {}, {}, {})'.format( + self.name, self.allocated_size, self.virtual_size, + self.diskful ) # -------------------------------------------------------------------------- def __init__( - self, uri, group_name, repair=False, logger=default_logger.__func__ + self, uri, group_name, repair=False, logger=default_logger.__func__, + attempt_count=30 ): """ - Create a new LinstorApi object. + Create a new LinstorVolumeManager object. :param str uri: URI to communicate with the LINSTOR controller. :param str group_name: The SR goup name to use. :param bool repair: If true we try to remove bad volumes due to a crash or unexpected behavior. :param function logger: Function to log messages. + :param int attempt_count: Number of attempts to join the controller. """ - self._uri = uri - self._linstor = self._create_linstor_instance(uri) + self._linstor = self._create_linstor_instance( + uri, attempt_count=attempt_count + ) self._base_group_name = group_name # Ensure group exists. @@ -164,6 +417,16 @@ def __init__( self._logger = logger self._redundancy = groups[0].select_filter.place_count self._group_name = group_name + self._volumes = set() + self._storage_pools_time = 0 + + # To increate performance and limit request count to LINSTOR services, + # we use caches. + self._kv_cache = self._create_kv_cache() + self._resource_cache = None + self._resource_cache_dirty = True + self._volume_info_cache = None + self._volume_info_cache_dirty = True self._build_volumes(repair=repair) @property @@ -175,6 +438,15 @@ def group_name(self): """ return self._base_group_name + @property + def redundancy(self): + """ + Give the used redundancy. + :return: The redundancy. + :rtype: int + """ + return self._redundancy + @property def volumes(self): """ @@ -184,66 +456,6 @@ def volumes(self): """ return self._volumes - @property - def volumes_with_name(self): - """ - Give a volume dictionnary that contains names actually owned. - :return: A volume/name dict. - :rtype: dict(str, str) - """ - return self._get_volumes_by_property(self.REG_VOLUME_NAME) - - @property - def volumes_with_info(self): - """ - Give a volume dictionnary that contains VolumeInfos. - :return: A volume/VolumeInfo dict. - :rtype: dict(str, VolumeInfo) - """ - - volumes = {} - - all_volume_info = self._get_volumes_info() - volume_names = self.volumes_with_name - for volume_uuid, volume_name in volume_names.items(): - if volume_name: - volume_info = all_volume_info.get(volume_name) - if volume_info: - volumes[volume_uuid] = volume_info - continue - - # Well I suppose if this volume is not available, - # LINSTOR has been used directly without using this API. - volumes[volume_uuid] = self.VolumeInfo('') - - return volumes - - @property - def volumes_with_metadata(self): - """ - Give a volume dictionnary that contains metadata. - :return: A volume/metadata dict. - :rtype: dict(str, dict) - """ - - volumes = {} - - metadata = self._get_volumes_by_property(self.REG_METADATA) - for volume_uuid, volume_metadata in metadata.items(): - if volume_metadata: - volume_metadata = json.loads(volume_metadata) - if isinstance(volume_metadata, dict): - volumes[volume_uuid] = volume_metadata - continue - raise LinstorVolumeManagerError( - 'Expected dictionary in volume metadata: {}' - .format(volume_uuid) - ) - - volumes[volume_uuid] = {} - - return volumes - @property def max_volume_size_allowed(self): """ @@ -284,26 +496,67 @@ def physical_free_size(self): return self._compute_size('free_capacity') @property - def total_allocated_volume_size(self): + def allocated_volume_size(self): """ - Give the sum of all created volumes. - :return: The physical required size to use the volumes. + Give the allocated size for all volumes. The place count is not + used here. When thick lvm is used, the size for one volume should + be equal to the virtual volume size. With thin lvm, the size is equal + or lower to the volume size. + :return: The allocated size of all volumes. :rtype: int """ - size = 0 - for resource in self._linstor.resource_list_raise().resources: + # Paths: /res_name/vol_number/size + sizes = {} + + for resource in self._get_resource_cache().resources: + if resource.name not in sizes: + current = sizes[resource.name] = {} + else: + current = sizes[resource.name] + for volume in resource.volumes: # We ignore diskless pools of the form "DfltDisklessStorPool". - if volume.storage_pool_name == self._group_name: - current_size = volume.usable_size - if current_size < 0: - raise LinstorVolumeManagerError( - 'Failed to get usable size of `{}` on `{}`' - .format(resource.name, volume.storage_pool_name) - ) - size += current_size - return size * 1024 + if volume.storage_pool_name != self._group_name: + continue + + current_size = volume.allocated_size + if current_size < 0: + raise LinstorVolumeManagerError( + 'Failed to get allocated size of `{}` on `{}`' + .format(resource.name, volume.storage_pool_name) + ) + current[volume.number] = max(current_size, current.get(volume.number) or 0) + + total_size = 0 + for volumes in sizes.itervalues(): + for size in volumes.itervalues(): + total_size += size + + return total_size * 1024 + + def get_min_physical_size(self): + """ + Give the minimum physical size of the SR. + I.e. the size of the smallest disk + the number of pools. + :return: The physical min size. + :rtype: tuple(int, int) + """ + size = None + pool_count = 0 + for pool in self._get_storage_pools(force=True): + space = pool.free_space + if space: + pool_count += 1 + current_size = space.total_capacity + if current_size < 0: + raise LinstorVolumeManagerError( + 'Failed to get pool total_capacity attr of `{}`' + .format(pool.node_name) + ) + if size is None or current_size < size: + size = current_size + return (pool_count, (size or 0) * 1024) @property def metadata(self): @@ -346,12 +599,8 @@ def disconnected_hosts(self): :rtype: set(str) """ - pools = self._linstor.storage_pool_list_raise( - filter_by_stor_pools=[self._group_name] - ).storage_pools - disconnected_hosts = set() - for pool in pools: + for pool in self._get_storage_pools(): for report in pool.reports: if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \ linstor.consts.WARN_NOT_CONNECTED: @@ -367,23 +616,29 @@ def check_volume_exists(self, volume_uuid): """ return volume_uuid in self._volumes - def create_volume(self, volume_uuid, size, persistent=True): + def create_volume( + self, volume_uuid, size, persistent=True, volume_name=None + ): """ Create a new volume on the SR. :param str volume_uuid: The volume uuid to use. :param int size: volume size in B. :param bool persistent: If false the volume will be unavailable on the next constructor call LinstorSR(...). + :param str volume_name: If set, this name is used in the LINSTOR + database instead of a generated name. :return: The current device path of the volume. :rtype: str """ self._logger('Creating LINSTOR volume {}...'.format(volume_uuid)) - volume_name = self.build_volume_name(util.gen_uuid()) + if not volume_name: + volume_name = self.build_volume_name(util.gen_uuid()) volume_properties = self._create_volume_with_properties( volume_uuid, volume_name, size, place_resources=True ) + # Volume created! Now try to find the device path. try: self._logger( 'Find device path of LINSTOR volume {}...'.format(volume_uuid) @@ -396,8 +651,10 @@ def create_volume(self, volume_uuid, size, persistent=True): 'LINSTOR volume {} created!'.format(volume_uuid) ) return device_path - except Exception: - self._force_destroy_volume(volume_uuid, volume_properties) + except Exception as e: + # There is an issue to find the path. + # At this point the volume has just been created, so force flag can be used. + self._destroy_volume(volume_uuid, force=True) raise def mark_volume_as_persistent(self, volume_uuid): @@ -425,8 +682,14 @@ def destroy_volume(self, volume_uuid): volume_properties = self._get_volume_properties(volume_uuid) volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS - self._volumes.remove(volume_uuid) - self._destroy_volume(volume_uuid, volume_properties) + try: + self._volumes.remove(volume_uuid) + self._destroy_volume(volume_uuid) + except Exception as e: + raise LinstorVolumeManagerError( + str(e), + LinstorVolumeManagerError.ERR_VOLUME_DESTROY + ) def lock_volume(self, volume_uuid, locked=True): """ @@ -476,12 +739,15 @@ def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None): waiting = False + volume_properties = self._get_kv_cache() + start = time.time() while True: # Can't delete in for loop, use a copy of the list. remaining = checked.copy() for volume_uuid in checked: - volume_properties = self._get_volume_properties(volume_uuid) + volume_properties.namespace = \ + self._build_volume_namespace(volume_uuid) timestamp = volume_properties.get( self.PROP_IS_READONLY_TIMESTAMP ) @@ -519,10 +785,33 @@ def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None): # We must wait to use the volume. After that we can modify it # ONLY if the SR is locked to avoid bad reads on the slaves. time.sleep(1) + volume_properties = self._create_kv_cache() if waiting: self._logger('No volume locked now!') + def remove_volume_if_diskless(self, volume_uuid): + """ + Remove disless path from local node. + :param str volume_uuid: The volume uuid to remove. + """ + + self._ensure_volume_exists(volume_uuid) + + volume_properties = self._get_volume_properties(volume_uuid) + volume_name = volume_properties.get(self.PROP_VOLUME_NAME) + + node_name = socket.gethostname() + result = self._linstor.resource_delete_if_diskless( + node_name=node_name, rsc_name=volume_name + ) + if not linstor.Linstor.all_api_responses_no_error(result): + raise LinstorVolumeManagerError( + 'Unable to delete diskless path of `{}` on node `{}`: {}' + .format(volume_name, node_name, ', '.join( + [str(x) for x in result])) + ) + def introduce_volume(self, volume_uuid): pass # TODO: Implement me. @@ -535,15 +824,30 @@ def resize_volume(self, volume_uuid, new_size): volume_name = self.get_volume_name(volume_uuid) self.ensure_volume_is_not_locked(volume_uuid) - new_size = self.round_up_volume_size(new_size) + new_size = self.round_up_volume_size(new_size) // 1024 + + retry_count = 30 + while True: + result = self._linstor.volume_dfn_modify( + rsc_name=volume_name, + volume_nr=0, + size=new_size + ) + + self._mark_resource_cache_as_dirty() + + error_str = self._get_error_str(result) + if not error_str: + break + + # After volume creation, DRBD volume can be unusable during many seconds. + # So we must retry the definition change if the device is not up to date. + # Often the case for thick provisioning. + if retry_count and error_str.find('non-UpToDate DRBD device') >= 0: + time.sleep(2) + retry_count -= 1 + continue - result = self._linstor.volume_dfn_modify( - rsc_name=volume_name, - volume_nr=0, - size=new_size // 1024 - ) - error_str = self._get_error_str(result) - if error_str: raise LinstorVolumeManagerError( 'Could not resize volume `{}` from SR `{}`: {}' .format(volume_uuid, self._group_name, error_str) @@ -587,6 +891,24 @@ def get_volume_size(self, volume_uuid): ) return size * 1024 + def set_auto_promote_timeout(self, volume_uuid, timeout): + """ + Define the blocking time of open calls when a DRBD + is already open on another host. + :param str volume_uuid: The volume uuid to modify. + """ + + volume_name = self.get_volume_name(volume_uuid) + result = self._linstor.resource_dfn_modify(volume_name, { + 'DrbdOptions/Resource/auto-promote-timeout': timeout + }) + error_str = self._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Could not change the auto promote timeout of `{}`: {}' + .format(volume_uuid, error_str) + ) + def get_volume_info(self, volume_uuid): """ Get the volume info of a particular volume. @@ -596,11 +918,11 @@ def get_volume_info(self, volume_uuid): """ volume_name = self.get_volume_name(volume_uuid) - return self._get_volumes_info(filter=[volume_name])[volume_name] + return self._get_volumes_info()[volume_name] def get_device_path(self, volume_uuid): """ - Get the dev path of a volume. + Get the dev path of a volume, create a diskless if necessary. :param str volume_uuid: The volume uuid to get the dev path. :return: The current device path of the volume. :rtype: str @@ -620,7 +942,7 @@ def get_volume_uuid_from_device_path(self, device_path): expected_volume_name = \ self.get_volume_name_from_device_path(device_path) - volume_names = self.volumes_with_name + volume_names = self.get_volumes_with_name() for volume_uuid, volume_name in volume_names.items(): if volume_name == expected_volume_name: return volume_uuid @@ -631,26 +953,24 @@ def get_volume_uuid_from_device_path(self, device_path): def get_volume_name_from_device_path(self, device_path): """ - Get the volume name of a device_path on the current host. + Get the volume name of a device_path. :param str device_path: The dev path to find the volume name. - :return: The volume name of the local device path. + :return: The volume name of the device path. :rtype: str """ - node_name = socket.gethostname() - resources = self._linstor.resource_list_raise( - filter_by_nodes=[node_name] - ).resources - - real_device_path = os.path.realpath(device_path) - for resource in resources: - if resource.volumes[0].device_path == real_device_path: - return resource.name + # Assume that we have a path like this: + # - "/dev/drbd/by-res/xcp-volume-/0" + # - "../xcp-volume-/0" + if device_path.startswith(DRBD_BY_RES_PATH): + prefix_len = len(DRBD_BY_RES_PATH) + else: + assert device_path.startswith('../') + prefix_len = 3 - raise LinstorVolumeManagerError( - 'Unable to find volume name from dev path `{}`' - .format(device_path) - ) + res_name_end = device_path.find('/', prefix_len) + assert res_name_end != -1 + return device_path[prefix_len:res_name_end] def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False): """ @@ -668,6 +988,8 @@ def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False): 'Trying to update volume UUID {} to {}...' .format(volume_uuid, new_volume_uuid) ) + assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value' + if not force: self._ensure_volume_exists(volume_uuid) self.ensure_volume_is_not_locked(volume_uuid) @@ -685,36 +1007,45 @@ def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False): .format(volume_uuid) ) - new_volume_properties = self._get_volume_properties( + # 1. Copy in temp variables metadata and volume_name. + metadata = volume_properties.get(self.PROP_METADATA) + volume_name = volume_properties.get(self.PROP_VOLUME_NAME) + + # 2. Switch to new volume namespace. + volume_properties.namespace = self._build_volume_namespace( new_volume_uuid ) - if list(new_volume_properties.items()): + + if list(volume_properties.items()): raise LinstorVolumeManagerError( 'Cannot update volume uuid {} to {}: ' .format(volume_uuid, new_volume_uuid) + 'this last one is not empty' ) - assert volume_properties.namespace != \ - new_volume_properties.namespace - try: - # 1. Mark new volume properties with PROP_UPDATING_UUID_SRC. + # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC. # If we crash after that, the new properties can be removed # properly. - new_volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS - new_volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid - - # 2. Copy the properties. - for property in [self.PROP_METADATA, self.PROP_VOLUME_NAME]: - new_volume_properties[property] = \ - volume_properties.get(property) + volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS + volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid + + # 4. Copy the properties. + # Note: On new volumes, during clone for example, the metadata + # may be missing. So we must test it to avoid this error: + # "None has to be a str/unicode, but is " + if metadata: + volume_properties[self.PROP_METADATA] = metadata + volume_properties[self.PROP_VOLUME_NAME] = volume_name - # 3. Ok! - new_volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS + # 5. Ok! + volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS except Exception as e: try: - new_volume_properties.clear() + # Clear the new volume properties in case of failure. + assert volume_properties.namespace == \ + self._build_volume_namespace(new_volume_uuid) + volume_properties.clear() except Exception as e: self._logger( 'Failed to clear new volume properties: {} (ignoring...)' @@ -725,11 +1056,21 @@ def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False): ) try: - # 4. After this point, it's ok we can remove the + # 6. After this point, it's ok we can remove the # PROP_UPDATING_UUID_SRC property and clear the src properties # without problems. + + # 7. Switch to old volume namespace. + volume_properties.namespace = self._build_volume_namespace( + volume_uuid + ) volume_properties.clear() - new_volume_properties.pop(self.PROP_UPDATING_UUID_SRC) + + # 8. Switch a last time to new volume namespace. + volume_properties.namespace = self._build_volume_namespace( + new_volume_uuid + ) + volume_properties.pop(self.PROP_UPDATING_UUID_SRC) except Exception as e: raise LinstorVolumeManagerError( 'Failed to clear volume properties ' @@ -743,7 +1084,7 @@ def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False): 'UUID update succeeded of {} to {}! (properties={})' .format( volume_uuid, new_volume_uuid, - self._get_filtered_properties(new_volume_properties) + self._get_filtered_properties(volume_properties) ) ) @@ -788,55 +1129,121 @@ def get_usage_states(self, volume_uuid): return states - def get_volume_metadata(self, volume_uuid): + def get_volume_openers(self, volume_uuid): """ - Get the metadata of a volume. - :return: Dictionary that contains metadata. - :rtype: dict + Get openers of a volume. + :param str volume_uuid: The volume uuid to monitor. + :return: A dictionnary that contains openers. + :rtype: dict(str, obj) """ + return get_all_volume_openers(self.get_volume_name(volume_uuid), '0') - self._ensure_volume_exists(volume_uuid) - volume_properties = self._get_volume_properties(volume_uuid) - metadata = volume_properties.get(self.PROP_METADATA) - if metadata: - metadata = json.loads(metadata) - if isinstance(metadata, dict): - return metadata - raise LinstorVolumeManagerError( - 'Expected dictionary in volume metadata: {}' - .format(volume_uuid) - ) - return {} - - def set_volume_metadata(self, volume_uuid, metadata): + def get_volumes_with_name(self): """ - Set the metadata of a volume. - :param dict metadata: Dictionary that contains metadata. + Give a volume dictionnary that contains names actually owned. + :return: A volume/name dict. + :rtype: dict(str, str) """ + return self._get_volumes_by_property(self.REG_VOLUME_NAME) - self._ensure_volume_exists(volume_uuid) - self.ensure_volume_is_not_locked(volume_uuid) - - assert isinstance(metadata, dict) - volume_properties = self._get_volume_properties(volume_uuid) - volume_properties[self.PROP_METADATA] = json.dumps(metadata) - - def update_volume_metadata(self, volume_uuid, metadata): + def get_volumes_with_info(self): """ - Update the metadata of a volume. It modify only the given keys. - It doesn't remove unreferenced key instead of set_volume_metadata. - :param dict metadata: Dictionary that contains metadata. + Give a volume dictionnary that contains VolumeInfos. + :return: A volume/VolumeInfo dict. + :rtype: dict(str, VolumeInfo) """ - self._ensure_volume_exists(volume_uuid) - self.ensure_volume_is_not_locked(volume_uuid) + volumes = {} - assert isinstance(metadata, dict) - volume_properties = self._get_volume_properties(volume_uuid) + all_volume_info = self._get_volumes_info() + volume_names = self.get_volumes_with_name() + for volume_uuid, volume_name in volume_names.items(): + if volume_name: + volume_info = all_volume_info.get(volume_name) + if volume_info: + volumes[volume_uuid] = volume_info + continue - current_metadata = json.loads( - volume_properties.get(self.PROP_METADATA, '{}') - ) + # Well I suppose if this volume is not available, + # LINSTOR has been used directly without using this API. + volumes[volume_uuid] = self.VolumeInfo('') + + return volumes + + def get_volumes_with_metadata(self): + """ + Give a volume dictionnary that contains metadata. + :return: A volume/metadata dict. + :rtype: dict(str, dict) + """ + + volumes = {} + + metadata = self._get_volumes_by_property(self.REG_METADATA) + for volume_uuid, volume_metadata in metadata.items(): + if volume_metadata: + volume_metadata = json.loads(volume_metadata) + if isinstance(volume_metadata, dict): + volumes[volume_uuid] = volume_metadata + continue + raise LinstorVolumeManagerError( + 'Expected dictionary in volume metadata: {}' + .format(volume_uuid) + ) + + volumes[volume_uuid] = {} + + return volumes + + def get_volume_metadata(self, volume_uuid): + """ + Get the metadata of a volume. + :return: Dictionary that contains metadata. + :rtype: dict + """ + + self._ensure_volume_exists(volume_uuid) + volume_properties = self._get_volume_properties(volume_uuid) + metadata = volume_properties.get(self.PROP_METADATA) + if metadata: + metadata = json.loads(metadata) + if isinstance(metadata, dict): + return metadata + raise LinstorVolumeManagerError( + 'Expected dictionary in volume metadata: {}' + .format(volume_uuid) + ) + return {} + + def set_volume_metadata(self, volume_uuid, metadata): + """ + Set the metadata of a volume. + :param dict metadata: Dictionary that contains metadata. + """ + + self._ensure_volume_exists(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + + assert isinstance(metadata, dict) + volume_properties = self._get_volume_properties(volume_uuid) + volume_properties[self.PROP_METADATA] = json.dumps(metadata) + + def update_volume_metadata(self, volume_uuid, metadata): + """ + Update the metadata of a volume. It modify only the given keys. + It doesn't remove unreferenced key instead of set_volume_metadata. + :param dict metadata: Dictionary that contains metadata. + """ + + self._ensure_volume_exists(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + + assert isinstance(metadata, dict) + volume_properties = self._get_volume_properties(volume_uuid) + + current_metadata = json.loads( + volume_properties.get(self.PROP_METADATA, '{}') + ) if not isinstance(metadata, dict): raise LinstorVolumeManagerError( 'Expected dictionary in volume metadata: {}' @@ -850,8 +1257,7 @@ def update_volume_metadata(self, volume_uuid, metadata): def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True): """ Clone a volume. Do not copy the data, this method creates a new volume - with the same size. It tries to create the volume on the same host - than volume source. + with the same size. :param str volume_uuid: The volume to clone. :param str clone_uuid: The cloned volume. :param bool persistent: If false the volume will be unavailable @@ -872,98 +1278,8 @@ def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True): 'Invalid size of {} for volume `{}`'.format(size, volume_name) ) - # 2. Find the node(s) with the maximum space. - candidates = self._find_best_size_candidates() - if not candidates: - raise LinstorVolumeManagerError( - 'Unable to shallow clone volume `{}`, no free space found.' - ) - - # 3. Compute node names and search if we can try to clone - # on the same nodes than volume. - def find_best_nodes(): - for candidate in candidates: - for node_name in candidate.node_names: - if node_name in ideal_node_names: - return candidate.node_names - - node_names = find_best_nodes() - if not node_names: - node_names = candidates[0].node_names - - if len(node_names) < self._redundancy: - raise LinstorVolumeManagerError( - 'Unable to shallow clone volume `{}`, '.format(volume_uuid) + - '{} are required to clone, found: {}'.format( - self._redundancy, len(node_names) - ) - ) - - # 4. Compute resources to create. - clone_volume_name = self.build_volume_name(util.gen_uuid()) - diskless_node_names = self._get_node_names() - resources = [] - for node_name in node_names: - diskless_node_names.remove(node_name) - resources.append(linstor.ResourceData( - node_name=node_name, - rsc_name=clone_volume_name, - storage_pool=self._group_name - )) - for node_name in diskless_node_names: - resources.append(linstor.ResourceData( - node_name=node_name, - rsc_name=clone_volume_name, - diskless=True - )) - - # 5. Create resources! - def clean(properties): - try: - self._destroy_volume(clone_uuid, properties) - except Exception as e: - self._logger( - 'Unable to destroy volume {} after shallow clone fail: {}' - .format(clone_uuid, e) - ) - - def create(): - try: - volume_properties = self._create_volume_with_properties( - clone_uuid, clone_volume_name, size, - place_resources=False - ) - - result = self._linstor.resource_create(resources) - error_str = self._get_error_str(result) - if error_str: - raise LinstorVolumeManagerError( - 'Could not create cloned volume `{}` of `{}` from ' - 'SR `{}`: {}'.format( - clone_uuid, volume_uuid, self._group_name, - error_str - ) - ) - return volume_properties - except Exception: - clean(volume_properties) - raise - - # Retry because we can get errors like this: - # "Resource disappeared while waiting for it to be ready" or - # "Resource did not became ready on node 'XXX' within reasonable time, check Satellite for errors." - # in the LINSTOR server. - volume_properties = util.retry(create, maxretry=5) - - try: - device_path = self._find_device_path(clone_uuid, clone_volume_name) - if persistent: - volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS - self._volumes.add(clone_uuid) - return device_path - except Exception as e: - clean(volume_properties) - raise + # 2. Create clone! + return self.create_volume(clone_uuid, size, persistent) def remove_resourceless_volumes(self): """ @@ -974,83 +1290,461 @@ def remove_resourceless_volumes(self): """ resource_names = self._fetch_resource_names() - for volume_uuid, volume_name in self.volumes_with_name.items(): + for volume_uuid, volume_name in self.get_volumes_with_name().items(): if not volume_name or volume_name not in resource_names: + # Don't force, we can be sure of what's happening. self.destroy_volume(volume_uuid) - def destroy(self, force=False): + def destroy(self): """ Destroy this SR. Object should not be used after that. :param bool force: Try to destroy volumes before if true. """ - if (force): - for volume_uuid in self._volumes: - self.destroy_volume(volume_uuid) + # 1. Ensure volume list is empty. No cost. + if self._volumes: + raise LinstorVolumeManagerError( + 'Cannot destroy LINSTOR volume manager: ' + 'It exists remaining volumes' + ) + + # 2. Fetch ALL resource names. + # This list may therefore contain volumes created outside + # the scope of the driver. + resource_names = self._fetch_resource_names(ignore_deleted=False) + try: + resource_names.remove(DATABASE_VOLUME_NAME) + except KeyError: + # Really strange to reach that point. + # Normally we always have the database volume in the list. + pass + + # 3. Ensure the resource name list is entirely empty... + if resource_names: + raise LinstorVolumeManagerError( + 'Cannot destroy LINSTOR volume manager: ' + 'It exists remaining volumes (created externally or being deleted)' + ) + + # 4. Destroying... + controller_is_running = self._controller_is_running() + uri = 'linstor://localhost' + try: + if controller_is_running: + self._start_controller(start=False) + + # 4.1. Umount LINSTOR database. + self._mount_database_volume( + self.build_device_path(DATABASE_VOLUME_NAME), + mount=False, + force=True + ) - # TODO: Throw exceptions in the helpers below if necessary. - # TODO: What's the required action if it exists remaining volumes? + # 4.2. Refresh instance. + self._start_controller(start=True) + self._linstor = self._create_linstor_instance( + uri, keep_uri_unmodified=True + ) - self._destroy_resource_group(self._linstor, self._group_name) + # 4.3. Destroy database volume. + self._destroy_resource(DATABASE_VOLUME_NAME) + + # 4.4. Refresh linstor connection. + # Without we get this error: + # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.." + # Because the deletion of the databse was not seen by Linstor for some reason. + # It seems a simple refresh of the Linstor connection make it aware of the deletion. + self._linstor.disconnect() + self._linstor.connect() + + # 4.5. Destroy group and storage pools. + self._destroy_resource_group(self._linstor, self._group_name) + for pool in self._get_storage_pools(force=True): + self._destroy_storage_pool( + self._linstor, pool.name, pool.node_name + ) + except Exception as e: + self._start_controller(start=controller_is_running) + raise e - pools = self._linstor.storage_pool_list_raise( - filter_by_stor_pools=[self._group_name] - ).storage_pools - for pool in pools: - self._destroy_storage_pool( - self._linstor, pool.name, pool.node_name + try: + self._start_controller(start=False) + for file in glob.glob(DATABASE_PATH + '/'): + os.remove(file) + except Exception as e: + util.SMlog( + 'Ignoring failure after LINSTOR SR destruction: {}' + .format(e) ) - def find_up_to_date_diskfull_nodes(self, volume_uuid): + def find_up_to_date_diskful_nodes(self, volume_uuid): """ - Find all nodes that contain a specific volume using diskfull disks. + Find all nodes that contain a specific volume using diskful disks. The disk must be up to data to be used. :param str volume_uuid: The volume to use. :return: The available nodes. - :rtype: tuple(set(str), bool) + :rtype: tuple(set(str), str) """ volume_name = self.get_volume_name(volume_uuid) - in_use = False + in_use_by = None node_names = set() - resource_list = self._linstor.resource_list_raise( - filter_by_resources=[volume_name] + + resource_states = filter( + lambda resource_state: resource_state.name == volume_name, + self._get_resource_cache().resource_states ) - for resource_state in resource_list.resource_states: + + for resource_state in resource_states: volume_state = resource_state.volume_states[0] if volume_state.disk_state == 'UpToDate': node_names.add(resource_state.node_name) if resource_state.in_use: - in_use = True + in_use_by = resource_state.node_name + + return (node_names, in_use_by) + + def invalidate_resource_cache(self): + """ + If resources are impacted by external commands like vhdutil, + it's necessary to call this function to invalidate current resource + cache. + """ + self._mark_resource_cache_as_dirty() + + def has_node(self, node_name): + """ + Check if a node exists in the LINSTOR database. + :rtype: bool + """ + result = self._linstor.node_list() + error_str = self._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Failed to list nodes using `{}`: {}' + .format(node_name, error_str) + ) + return bool(result[0].node(node_name)) + + def create_node(self, node_name, ip): + """ + Create a new node in the LINSTOR database. + :param str node_name: Node name to use. + :param str ip: Host IP to communicate. + """ + result = self._linstor.node_create( + node_name, + linstor.consts.VAL_NODE_TYPE_CMBD, + ip + ) + errors = self._filter_errors(result) + if errors: + error_str = self._get_error_str(errors) + raise LinstorVolumeManagerError( + 'Failed to create node `{}`: {}'.format(node_name, error_str) + ) + + def destroy_node(self, node_name): + """ + Destroy a node in the LINSTOR database. + :param str node_name: Node name to remove. + """ + result = self._linstor.node_delete(node_name) + errors = self._filter_errors(result) + if errors: + error_str = self._get_error_str(errors) + raise LinstorVolumeManagerError( + 'Failed to destroy node `{}`: {}'.format(node_name, error_str) + ) - return (node_names, in_use) + def create_node_interface(self, node_name, name, ip): + """ + Create a new node interface in the LINSTOR database. + :param str node_name: Node name of the interface to use. + :param str name: Interface to create. + :param str ip: IP of the interface. + """ + result = self._linstor.netinterface_create(node_name, name, ip) + errors = self._filter_errors(result) + if errors: + error_str = self._get_error_str(errors) + raise LinstorVolumeManagerError( + 'Failed to create node interface on `{}`: {}'.format(node_name, error_str) + ) + + def destroy_node_interface(self, node_name, name): + """ + Destroy a node interface in the LINSTOR database. + :param str node_name: Node name of the interface to remove. + :param str name: Interface to remove. + """ + result = self._linstor.netinterface_delete(node_name, name) + errors = self._filter_errors(result) + if errors: + error_str = self._get_error_str(errors) + raise LinstorVolumeManagerError( + 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str) + ) + + def modify_node_interface(self, node_name, name, ip): + """ + Modify a node interface in the LINSTOR database. Create it if necessary. + :param str node_name: Node name of the interface to use. + :param str name: Interface to modify or create. + :param str ip: IP of the interface. + """ + result = self._linstor.netinterface_create(node_name, name, ip) + errors = self._filter_errors(result) + if not errors: + return + + if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]): + result = self._linstor.netinterface_modify(node_name, name, ip) + errors = self._filter_errors(result) + if not errors: + return + + error_str = self._get_error_str(errors) + raise LinstorVolumeManagerError( + 'Unable to modify interface on `{}`: {}'.format(node_name, error_str) + ) + + def list_node_interfaces(self, node_name): + """ + List all node interfaces. + :param str node_name: Node name to use to list interfaces. + :rtype: list + : + """ + result = self._linstor.net_interface_list(node_name) + if not result: + raise LinstorVolumeManagerError( + 'Unable to list interfaces on `{}`: no list received'.format(node_name) + ) + + interfaces = {} + for interface in result: + interface = interface._rest_data + interfaces[interface['name']] = { + 'address': interface['address'], + 'active': interface['is_active'] + } + return interfaces + + def set_node_preferred_interface(self, node_name, name): + """ + Set the preferred interface to use on a node. + :param str node_name: Node name of the interface. + :param str name: Preferred interface to use. + """ + result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name}) + errors = self._filter_errors(result) + if errors: + error_str = self._get_error_str(errors) + raise LinstorVolumeManagerError( + 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str) + ) + + def get_nodes_info(self): + """ + Get all nodes + statuses, used or not by the pool. + :rtype: dict(str, dict) + """ + try: + nodes = {} + for node in self._linstor.node_list_raise().nodes: + nodes[node.name] = node.connection_status + return nodes + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to get all nodes: `{}`'.format(e) + ) + + def get_storage_pools_info(self): + """ + Give all storage pools of current group name. + :rtype: dict(str, list) + """ + storage_pools = {} + for pool in self._get_storage_pools(force=True): + if pool.node_name not in storage_pools: + storage_pools[pool.node_name] = [] + + size = -1 + capacity = -1 + + space = pool.free_space + if space: + size = space.free_capacity + if size < 0: + size = -1 + else: + size *= 1024 + capacity = space.total_capacity + if capacity <= 0: + capacity = -1 + else: + capacity *= 1024 + + storage_pools[pool.node_name].append({ + 'storage-pool-name': pool.name, + 'uuid': pool.uuid, + 'free-size': size, + 'capacity': capacity + }) + + return storage_pools + + def get_resources_info(self): + """ + Give all resources of current group name. + :rtype: dict(str, list) + """ + resources = {} + resource_list = self._linstor.resource_list_raise() + for resource in resource_list.resources: + if resource.name not in resources: + resources[resource.name] = {} + + resources[resource.name][resource.node_name] = { + 'volumes': [], + 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags, + 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags + } + + for volume in resource.volumes: + # We ignore diskless pools of the form "DfltDisklessStorPool". + if volume.storage_pool_name != self._group_name: + continue + + usable_size = volume.usable_size + if usable_size < 0: + usable_size = -1 + else: + usable_size *= 1024 + + allocated_size = volume.allocated_size + if allocated_size < 0: + allocated_size = -1 + else: + allocated_size *= 1024 + + resources[resource.name][resource.node_name]['volumes'].append({ + 'storage-pool-name': volume.storage_pool_name, + 'uuid': volume.uuid, + 'number': volume.number, + 'device-path': volume.device_path, + 'usable-size': usable_size, + 'allocated-size': allocated_size + }) + + for resource_state in resource_list.resource_states: + resource = resources[resource_state.rsc_name][resource_state.node_name] + resource['in-use'] = resource_state.in_use + + volumes = resource['volumes'] + for volume_state in resource_state.volume_states: + volume = next((x for x in volumes if x['number'] == volume_state.number), None) + if volume: + volume['disk-state'] = volume_state.disk_state + + return resources + + def get_database_path(self): + """ + Get the database path. + :return: The current database path. + :rtype: str + """ + return self._request_database_path(self._linstor) @classmethod def create_sr( - cls, uri, group_name, node_names, redundancy, - thin_provisioning=False, + cls, group_name, ips, redundancy, + thin_provisioning, auto_quorum, logger=default_logger.__func__ ): """ Create a new SR on the given nodes. - :param str uri: URI to communicate with the LINSTOR controller. :param str group_name: The SR group_name to use. - :param list[str] node_names: String list of nodes. + :param set(str) ips: Node ips. :param int redundancy: How many copy of volumes should we store? + :param bool thin_provisioning: Use thin or thick provisioning. + :param bool auto_quorum: DB quorum is monitored by LINSTOR. :param function logger: Function to log messages. :return: A new LinstorSr instance. :rtype: LinstorSr """ + try: + cls._start_controller(start=True) + sr = cls._create_sr( + group_name, + ips, + redundancy, + thin_provisioning, + auto_quorum, + logger + ) + finally: + # Controller must be stopped and volume unmounted because + # it is the role of the drbd-reactor daemon to do the right + # actions. + cls._start_controller(start=False) + cls._mount_volume( + cls.build_device_path(DATABASE_VOLUME_NAME), + DATABASE_PATH, + mount=False + ) + return sr + + @classmethod + def _create_sr( + cls, group_name, ips, redundancy, + thin_provisioning, auto_quorum, + logger=default_logger.__func__ + ): # 1. Check if SR already exists. - lin = cls._create_linstor_instance(uri) + uri = 'linstor://localhost' + + lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) + + node_names = ips.keys() + for node_name, ip in ips.iteritems(): + while True: + # Try to create node. + result = lin.node_create( + node_name, + linstor.consts.VAL_NODE_TYPE_CMBD, + ip + ) + + errors = cls._filter_errors(result) + if cls._check_errors( + errors, [linstor.consts.FAIL_EXISTS_NODE] + ): + # If it already exists, remove, then recreate. + result = lin.node_delete(node_name) + error_str = cls._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Failed to remove old node `{}`: {}' + .format(node_name, error_str) + ) + elif not errors: + break # Created! + else: + raise LinstorVolumeManagerError( + 'Failed to create node `{}` with ip `{}`: {}'.format( + node_name, ip, cls._get_error_str(errors) + ) + ) + driver_pool_name = group_name + base_group_name = group_name group_name = cls._build_group_name(group_name) pools = lin.storage_pool_list_raise(filter_by_stor_pools=[group_name]) - - # TODO: Maybe if the SR already exists and if the nodes are the same, - # we can try to use it directly. pools = pools.storage_pools if pools: existing_node_names = [pool.node_name for pool in pools] @@ -1062,10 +1756,18 @@ def create_sr( if lin.resource_group_list_raise( [group_name] ).resource_groups: - raise LinstorVolumeManagerError( - 'Unable to create SR `{}`: The group name already exists' - .format(group_name) - ) + if not lin.resource_dfn_list_raise().resource_definitions: + backup_path = cls._create_database_backup_path() + logger( + 'Group name already exists `{}` without LVs. ' + 'Ignoring and moving the config files in {}'.format(group_name, backup_path) + ) + cls._move_files(DATABASE_PATH, backup_path) + else: + raise LinstorVolumeManagerError( + 'Unable to create SR `{}`: The group name already exists' + .format(group_name) + ) if thin_provisioning: driver_pool_parts = driver_pool_name.split('/') @@ -1076,9 +1778,14 @@ def create_sr( ) # 2. Create storage pool on each node + resource group. + reg_volume_group_not_found = re.compile( + ".*Volume group '.*' not found$" + ) + i = 0 try: # 2.a. Create storage pools. + storage_pool_count = 0 while i < len(node_names): node_name = node_names[i] @@ -1089,32 +1796,63 @@ def create_sr( driver_pool_name=driver_pool_name ) - error_str = cls._get_error_str(result) - if error_str: - raise LinstorVolumeManagerError( - 'Could not create SP `{}` on node `{}`: {}'.format( - group_name, - node_name, - error_str + errors = linstor.Linstor.filter_api_call_response_errors( + result + ) + if errors: + if len(errors) == 1 and errors[0].is_error( + linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR + ) and reg_volume_group_not_found.match(errors[0].message): + logger( + 'Volume group `{}` not found on `{}`. Ignoring...' + .format(group_name, node_name) ) - ) + cls._destroy_storage_pool(lin, group_name, node_name) + else: + error_str = cls._get_error_str(result) + raise LinstorVolumeManagerError( + 'Could not create SP `{}` on node `{}`: {}' + .format(group_name, node_name, error_str) + ) + else: + storage_pool_count += 1 i += 1 - # 2.b. Create resource group. - result = lin.resource_group_create( - name=group_name, - place_count=redundancy, - storage_pool=group_name, - diskless_on_remaining=True - ) - error_str = cls._get_error_str(result) - if error_str: + if not storage_pool_count: raise LinstorVolumeManagerError( - 'Could not create RG `{}`: {}'.format( - group_name, error_str + 'Unable to create SR `{}`: No VG group found'.format( + group_name, ) ) + # 2.b. Create resource group. + rg_creation_attempt = 0 + while True: + result = lin.resource_group_create( + name=group_name, + place_count=redundancy, + storage_pool=group_name, + diskless_on_remaining=False + ) + error_str = cls._get_error_str(result) + if not error_str: + break + + errors = cls._filter_errors(result) + if cls._check_errors(errors, [linstor.consts.FAIL_EXISTS_RSC_GRP]): + rg_creation_attempt += 1 + if rg_creation_attempt < 2: + try: + cls._destroy_resource_group(lin, group_name) + except Exception as e: + error_str = 'Failed to destroy old and empty RG: {}'.format(e) + else: + continue + + raise LinstorVolumeManagerError( + 'Could not create RG `{}`: {}'.format(group_name, error_str) + ) + # 2.c. Create volume group. result = lin.volume_group_create(group_name) error_str = cls._get_error_str(result) @@ -1125,30 +1863,78 @@ def create_sr( ) ) - # 3. Remove storage pools/resource/volume group in the case of errors. + # 3. Create the LINSTOR database volume and mount it. + try: + logger('Creating database volume...') + volume_path = cls._create_database_volume( + lin, group_name, node_names, redundancy, auto_quorum + ) + except LinstorVolumeManagerError as e: + if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: + logger('Destroying database volume after creation fail...') + cls._force_destroy_database_volume(lin, group_name) + raise + + try: + logger('Mounting database volume...') + + # First we must disable the controller to move safely the + # LINSTOR config. + cls._start_controller(start=False) + + cls._mount_database_volume(volume_path) + except Exception as e: + # Ensure we are connected because controller has been + # restarted during mount call. + logger('Destroying database volume after mount fail...') + + try: + cls._start_controller(start=True) + except Exception: + pass + + lin = cls._create_linstor_instance( + uri, keep_uri_unmodified=True + ) + cls._force_destroy_database_volume(lin, group_name) + raise e + + cls._start_controller(start=True) + lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) + + # 4. Remove storage pools/resource/volume group in the case of errors. except Exception as e: + logger('Destroying resource group and storage pools after fail...') try: cls._destroy_resource_group(lin, group_name) - except Exception: + except Exception as e2: + logger('Failed to destroy resource group: {}'.format(e2)) pass j = 0 i = min(i, len(node_names) - 1) while j <= i: try: cls._destroy_storage_pool(lin, group_name, node_names[j]) - except Exception: + except Exception as e2: + logger('Failed to destroy resource group: {}'.format(e2)) pass j += 1 raise e - # 4. Return new instance. + # 5. Return new instance. instance = cls.__new__(cls) - instance._uri = uri instance._linstor = lin instance._logger = logger instance._redundancy = redundancy + instance._base_group_name = base_group_name instance._group_name = group_name instance._volumes = set() + instance._storage_pools_time = 0 + instance._kv_cache = instance._create_kv_cache() + instance._resource_cache = None + instance._resource_cache_dirty = True + instance._volume_info_cache = None + instance._volume_info_cache_dirty = True return instance @classmethod @@ -1196,6 +1982,32 @@ def round_down_volume_size(cls, volume_size): # Private helpers. # -------------------------------------------------------------------------- + def _create_kv_cache(self): + self._kv_cache = self._create_linstor_kv('/') + self._kv_cache_dirty = False + return self._kv_cache + + def _get_kv_cache(self): + if self._kv_cache_dirty: + self._kv_cache = self._create_kv_cache() + return self._kv_cache + + def _create_resource_cache(self): + self._resource_cache = self._linstor.resource_list_raise() + self._resource_cache_dirty = False + return self._resource_cache + + def _get_resource_cache(self): + if self._resource_cache_dirty: + self._resource_cache = self._create_resource_cache() + return self._resource_cache + + def _mark_resource_cache_as_dirty(self): + self._resource_cache_dirty = True + self._volume_info_cache_dirty = True + + # -------------------------------------------------------------------------- + def _ensure_volume_exists(self, volume_uuid): if volume_uuid not in self._volumes: raise LinstorVolumeManagerError( @@ -1215,21 +2027,24 @@ def _find_best_size_candidates(self): ) return result[0].candidates - def _fetch_resource_names(self): + def _fetch_resource_names(self, ignore_deleted=True): resource_names = set() dfns = self._linstor.resource_dfn_list_raise().resource_definitions for dfn in dfns: - if dfn.resource_group_name == self._group_name and \ - linstor.consts.FLAG_DELETE not in dfn.flags: + if dfn.resource_group_name == self._group_name and ( + ignore_deleted or + linstor.consts.FLAG_DELETE not in dfn.flags + ): resource_names.add(dfn.name) return resource_names - def _get_volumes_info(self, filter=None): + def _get_volumes_info(self, volume_name=None): all_volume_info = {} - resources = self._linstor.resource_list_raise( - filter_by_resources=filter - ) - for resource in resources.resources: + + if not self._volume_info_cache_dirty: + return self._volume_info_cache + + for resource in self._get_resource_cache().resources: if resource.name not in all_volume_info: current = all_volume_info[resource.name] = self.VolumeInfo( resource.name @@ -1237,6 +2052,9 @@ def _get_volumes_info(self, filter=None): else: current = all_volume_info[resource.name] + if linstor.consts.FLAG_DISKLESS not in resource.flags: + current.diskful.append(resource.node_name) + for volume in resource.volumes: # We ignore diskless pools of the form "DfltDisklessStorPool". if volume.storage_pool_name == self._group_name: @@ -1245,22 +2063,32 @@ def _get_volumes_info(self, filter=None): 'Failed to get allocated size of `{}` on `{}`' .format(resource.name, volume.storage_pool_name) ) - current.physical_size += volume.allocated_size + allocated_size = volume.allocated_size - if volume.usable_size < 0: - raise LinstorVolumeManagerError( - 'Failed to get usable size of `{}` on `{}`' - .format(resource.name, volume.storage_pool_name) - ) - virtual_size = volume.usable_size + current.allocated_size = current.allocated_size and \ + max(current.allocated_size, allocated_size) or \ + allocated_size - current.virtual_size = current.virtual_size and \ - min(current.virtual_size, virtual_size) or virtual_size + usable_size = volume.usable_size + if usable_size > 0 and ( + usable_size < current.virtual_size or + not current.virtual_size + ): + current.virtual_size = usable_size + + if current.virtual_size <= 0: + raise LinstorVolumeManagerError( + 'Failed to get usable size of `{}` on `{}`' + .format(resource.name, volume.storage_pool_name) + ) for current in all_volume_info.values(): - current.physical_size *= 1024 + current.allocated_size *= 1024 current.virtual_size *= 1024 + self._volume_info_cache_dirty = False + self._volume_info_cache = all_volume_info + return all_volume_info def _get_volume_node_names_and_size(self, volume_name): @@ -1289,12 +2117,8 @@ def _get_volume_node_names_and_size(self, volume_name): return (node_names, size * 1024) def _compute_size(self, attr): - pools = self._linstor.storage_pool_list_raise( - filter_by_stor_pools=[self._group_name] - ).storage_pools - capacity = 0 - for pool in pools: + for pool in self._get_storage_pools(force=True): space = pool.free_space if space: size = getattr(space, attr) @@ -1308,42 +2132,73 @@ def _compute_size(self, attr): def _get_node_names(self): node_names = set() - pools = self._linstor.storage_pool_list_raise( - filter_by_stor_pools=[self._group_name] - ).storage_pools - for pool in pools: + for pool in self._get_storage_pools(): node_names.add(pool.node_name) return node_names - def _check_volume_creation_errors(self, result, volume_uuid): - errors = self._filter_errors(result) - if self._check_errors(errors, [ - linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN - ]): - raise LinstorVolumeManagerError( - 'Failed to create volume `{}` from SR `{}`, it already exists' - .format(volume_uuid, self._group_name), - LinstorVolumeManagerError.ERR_VOLUME_EXISTS + def _get_storage_pools(self, force=False): + cur_time = time.time() + elsaped_time = cur_time - self._storage_pools_time + + if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL: + self._storage_pools = self._linstor.storage_pool_list_raise( + filter_by_stor_pools=[self._group_name] + ).storage_pools + self._storage_pools_time = time.time() + + return self._storage_pools + + def _create_volume( + self, volume_uuid, volume_name, size, place_resources + ): + size = self.round_up_volume_size(size) + self._mark_resource_cache_as_dirty() + + def create_definition(): + self._check_volume_creation_errors( + self._linstor.resource_group_spawn( + rsc_grp_name=self._group_name, + rsc_dfn_name=volume_name, + vlm_sizes=['{}B'.format(size)], + definitions_only=True + ), + volume_uuid, + self._group_name ) + self._configure_volume_peer_slots(self._linstor, volume_name) - if errors: - raise LinstorVolumeManagerError( - 'Failed to create volume `{}` from SR `{}`: {}'.format( - volume_uuid, - self._group_name, - self._get_error_str(errors) + def clean(): + try: + self._destroy_volume(volume_uuid, force=True) + except Exception as e: + self._logger( + 'Unable to destroy volume {} after creation fail: {}' + .format(volume_uuid, e) ) - ) - def _create_volume(self, volume_uuid, volume_name, size, place_resources): - size = self.round_up_volume_size(size) + def create(): + try: + create_definition() + if place_resources: + # Basic case when we use the default redundancy of the group. + self._check_volume_creation_errors( + self._linstor.resource_auto_place( + rsc_name=volume_name, + place_count=self._redundancy, + diskless_on_remaining=False + ), + volume_uuid, + self._group_name + ) + except LinstorVolumeManagerError as e: + if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: + clean() + raise + except Exception: + clean() + raise - self._check_volume_creation_errors(self._linstor.resource_group_spawn( - rsc_grp_name=self._group_name, - rsc_dfn_name=volume_name, - vlm_sizes=['{}B'.format(size)], - definitions_only=not place_resources - ), volume_uuid) + util.retry(create, maxretry=5) def _create_volume_with_properties( self, volume_uuid, volume_name, size, place_resources @@ -1378,6 +2233,8 @@ def _create_volume_with_properties( volume_uuid, volume_name, size, place_resources ) + assert volume_properties.namespace == \ + self._build_volume_namespace(volume_uuid) return volume_properties except LinstorVolumeManagerError as e: # Do not destroy existing resource! @@ -1385,12 +2242,8 @@ def _create_volume_with_properties( # before the `self._create_volume` case. # It can only happen if the same volume uuid is used in the same # call in another host. - if e.code == LinstorVolumeManagerError.ERR_VOLUME_EXISTS: - raise - self._force_destroy_volume(volume_uuid, volume_properties) - raise - except Exception: - self._force_destroy_volume(volume_uuid, volume_properties) + if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: + self._destroy_volume(volume_uuid, force=True) raise def _find_device_path(self, volume_uuid, volume_name): @@ -1417,68 +2270,73 @@ def _find_device_path(self, volume_uuid, volume_name): def _request_device_path(self, volume_uuid, volume_name, activate=False): node_name = socket.gethostname() - resources = self._linstor.resource_list( - filter_by_nodes=[node_name], - filter_by_resources=[volume_name] + + resources = filter( + lambda resource: resource.node_name == node_name and + resource.name == volume_name, + self._get_resource_cache().resources ) - if not resources or not resources[0]: - raise LinstorVolumeManagerError( - 'No response list for dev path of `{}`'.format(volume_uuid) - ) - if isinstance(resources[0], linstor.responses.ResourceResponse): - if not resources[0].resources: - if activate: - self._activate_device_path(node_name, volume_name) - return self._request_device_path(volume_uuid, volume_name) - raise LinstorVolumeManagerError( - 'Empty dev path for `{}`, but definition "seems" to exist' - .format(volume_uuid) + if not resources: + if activate: + self._mark_resource_cache_as_dirty() + self._activate_device_path( + self._linstor, node_name, volume_name ) - # Contains a path of the /dev/drbd form. - return resources[0].resources[0].volumes[0].device_path - - raise LinstorVolumeManagerError( - 'Unable to get volume dev path `{}`: {}'.format( - volume_uuid, str(resources[0]) + return self._request_device_path(volume_uuid, volume_name) + raise LinstorVolumeManagerError( + 'Empty dev path for `{}`, but definition "seems" to exist' + .format(volume_uuid) ) - ) - - def _activate_device_path(self, node_name, volume_name): - result = self._linstor.resource_create([ - linstor.ResourceData(node_name, volume_name, diskless=True) - ]) - if linstor.Linstor.all_api_responses_no_error(result): - return - errors = linstor.Linstor.filter_api_call_response_errors(result) - if len(errors) == 1 and errors[0].is_error( - linstor.consts.FAIL_EXISTS_RSC - ): - return - - raise LinstorVolumeManagerError( - 'Unable to activate device path of `{}` on node `{}`: {}' - .format(volume_name, node_name, ', '.join( - [str(x) for x in result])) - ) + # Contains a path of the /dev/drbd form. + return resources[0].volumes[0].device_path - def _destroy_resource(self, resource_name): + def _destroy_resource(self, resource_name, force=False): result = self._linstor.resource_dfn_delete(resource_name) error_str = self._get_error_str(result) - if error_str: + if not error_str: + self._mark_resource_cache_as_dirty() + return + + if not force: + self._mark_resource_cache_as_dirty() raise LinstorVolumeManagerError( - 'Could not destroy resource `{}` from SR `{}`: {}' + 'Could not destroy resource `{}` from SR `{}`: {}' .format(resource_name, self._group_name, error_str) ) - def _destroy_volume(self, volume_uuid, volume_properties): - assert volume_properties.namespace == \ - self._build_volume_namespace(volume_uuid) + # If force is used, ensure there is no opener. + all_openers = get_all_volume_openers(resource_name, '0') + for openers in all_openers.itervalues(): + if openers: + self._mark_resource_cache_as_dirty() + raise LinstorVolumeManagerError( + 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)' + .format(resource_name, self._group_name, error_str, all_openers) + ) + + # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue? + resource_states = filter( + lambda resource_state: resource_state.name == resource_name, + self._get_resource_cache().resource_states + ) + + # Mark only after computation of states. + self._mark_resource_cache_as_dirty() + + for resource_state in resource_states: + volume_state = resource_state.volume_states[0] + if resource_state.in_use: + demote_drbd_resource(resource_state.node_name, resource_name) + break + self._destroy_resource(resource_name) + def _destroy_volume(self, volume_uuid, force=False): + volume_properties = self._get_volume_properties(volume_uuid) try: volume_name = volume_properties.get(self.PROP_VOLUME_NAME) if volume_name in self._fetch_resource_names(): - self._destroy_resource(volume_name) + self._destroy_resource(volume_name, force) # Assume this call is atomic. volume_properties.clear() @@ -1487,19 +2345,8 @@ def _destroy_volume(self, volume_uuid, volume_properties): 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e) ) - def _force_destroy_volume(self, volume_uuid, volume_properties): - try: - self._destroy_volume(volume_uuid, volume_properties) - except Exception as e: - self._logger('Ignore fail: {}'.format(e)) - def _build_volumes(self, repair): - properties = linstor.KV( - self._get_store_name(), - uri=self._uri, - namespace=self._build_volume_namespace() - ) - + properties = self._kv_cache resource_names = self._fetch_resource_names() self._volumes = set() @@ -1517,9 +2364,7 @@ def _build_volumes(self, repair): self.REG_NOT_EXISTS, ignore_inexisting_volumes=False ) for volume_uuid, not_exists in existing_volumes.items(): - properties.namespace = self._build_volume_namespace( - volume_uuid - ) + properties.namespace = self._build_volume_namespace(volume_uuid) src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC) if src_uuid: @@ -1569,7 +2414,7 @@ def _build_volumes(self, repair): # Little optimization, don't call `self._destroy_volume`, # we already have resource name list. if volume_name in resource_names: - self._destroy_resource(volume_name) + self._destroy_resource(volume_name, force=True) # Assume this call is atomic. properties.clear() @@ -1579,37 +2424,42 @@ def _build_volumes(self, repair): 'Cannot clean volume {}: {}'.format(volume_uuid, e) ) + # The volume can't be removed, maybe it's still in use, + # in this case rename it with the "DELETED_" prefix. + # This prefix is mandatory if it exists a snap transaction to + # rollback because the original VDI UUID can try to be renamed + # with the UUID we are trying to delete... + if not volume_uuid.startswith('DELETED_'): + self.update_volume_uuid( + volume_uuid, 'DELETED_' + volume_uuid, force=True + ) + for dest_uuid, src_uuid in updating_uuid_volumes.items(): - dest_properties = self._get_volume_properties(dest_uuid) - if int(dest_properties.get(self.PROP_NOT_EXISTS) or - self.STATE_EXISTS): - dest_properties.clear() + dest_namespace = self._build_volume_namespace(dest_uuid) + + properties.namespace = dest_namespace + if int(properties.get(self.PROP_NOT_EXISTS)): + properties.clear() continue - src_properties = self._get_volume_properties(src_uuid) - src_properties.clear() + properties.namespace = self._build_volume_namespace(src_uuid) + properties.clear() - dest_properties.pop(self.PROP_UPDATING_UUID_SRC) + properties.namespace = dest_namespace + properties.pop(self.PROP_UPDATING_UUID_SRC) if src_uuid in self._volumes: self._volumes.remove(src_uuid) self._volumes.add(dest_uuid) def _get_sr_properties(self): - return linstor.KV( - self._get_store_name(), - uri=self._uri, - namespace=self._build_sr_namespace() - ) + return self._create_linstor_kv(self._build_sr_namespace()) def _get_volumes_by_property( self, reg_prop, ignore_inexisting_volumes=True ): - base_properties = linstor.KV( - self._get_store_name(), - uri=self._uri, - namespace=self._build_volume_namespace() - ) + base_properties = self._get_kv_cache() + base_properties.namespace = self._build_volume_namespace() volume_properties = {} for volume_uuid in self._volumes: @@ -1625,15 +2475,17 @@ def _get_volumes_by_property( return volume_properties - def _get_volume_properties(self, volume_uuid): + def _create_linstor_kv(self, namespace): return linstor.KV( - self._get_store_name(), - uri=self._uri, - namespace=self._build_volume_namespace(volume_uuid) + self._group_name, + uri=self._linstor.controller_host(), + namespace=namespace ) - def _get_store_name(self): - return 'xcp-sr-{}'.format(self._group_name) + def _get_volume_properties(self, volume_uuid): + properties = self._get_kv_cache() + properties.namespace = self._build_volume_namespace(volume_uuid) + return properties @classmethod def _build_sr_namespace(cls): @@ -1653,46 +2505,433 @@ def _get_error_str(cls, result): ]) @classmethod - def _create_linstor_instance(cls, uri): - def connect(): + def _create_linstor_instance( + cls, uri, keep_uri_unmodified=False, attempt_count=30 + ): + retry = False + + def connect(uri): + if not uri: + uri = get_controller_uri() + if not uri: + raise LinstorVolumeManagerError( + 'Unable to find controller uri...' + ) instance = linstor.Linstor(uri, keep_alive=True) instance.connect() return instance + try: + return connect(uri) + except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError): + pass + + if not keep_uri_unmodified: + uri = None + return util.retry( - connect, - maxretry=60, - exceptions=[linstor.errors.LinstorNetworkError] + lambda: connect(uri), + maxretry=attempt_count, + period=1, + exceptions=[ + linstor.errors.LinstorNetworkError, + LinstorVolumeManagerError + ] ) @classmethod - def _destroy_storage_pool(cls, lin, group_name, node_name): - result = lin.storage_pool_delete(node_name, group_name) + def _configure_volume_peer_slots(cls, lin, volume_name): + result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3) error_str = cls._get_error_str(result) if error_str: raise LinstorVolumeManagerError( - 'Failed to destroy SP `{}` on node `{}`: {}'.format( - group_name, - node_name, - error_str + 'Could not configure volume peer slots of {}: {}' + .format(volume_name, error_str) + ) + + @classmethod + def _activate_device_path(cls, lin, node_name, volume_name): + result = lin.resource_make_available(node_name, volume_name, diskful=False) + if linstor.Linstor.all_api_responses_no_error(result): + return + errors = linstor.Linstor.filter_api_call_response_errors(result) + if len(errors) == 1 and errors[0].is_error( + linstor.consts.FAIL_EXISTS_RSC + ): + return + + raise LinstorVolumeManagerError( + 'Unable to activate device path of `{}` on node `{}`: {}' + .format(volume_name, node_name, ', '.join( + [str(x) for x in result])) + ) + + @classmethod + def _request_database_path(cls, lin, activate=False): + node_name = socket.gethostname() + + try: + resources = filter( + lambda resource: resource.node_name == node_name and + resource.name == DATABASE_VOLUME_NAME, + lin.resource_list_raise().resources + ) + except Exception as e: + raise LinstorVolumeManagerError( + 'Unable to get resources during database creation: {}' + .format(e) + ) + + if not resources: + if activate: + cls._activate_device_path( + lin, node_name, DATABASE_VOLUME_NAME + ) + return cls._request_database_path( + DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME ) + raise LinstorVolumeManagerError( + 'Empty dev path for `{}`, but definition "seems" to exist' + .format(DATABASE_PATH) ) + # Contains a path of the /dev/drbd form. + return resources[0].volumes[0].device_path @classmethod - def _destroy_resource_group(cls, lin, group_name): - result = lin.resource_group_delete(group_name) + def _create_database_volume( + cls, lin, group_name, node_names, redundancy, auto_quorum + ): + try: + dfns = lin.resource_dfn_list_raise().resource_definitions + except Exception as e: + raise LinstorVolumeManagerError( + 'Unable to get definitions during database creation: {}' + .format(e) + ) + + if dfns: + raise LinstorVolumeManagerError( + 'Could not create volume `{}` from SR `{}`, '.format( + DATABASE_VOLUME_NAME, group_name + ) + 'LINSTOR volume list must be empty.' + ) + + # Workaround to use thin lvm. Without this line an error is returned: + # "Not enough available nodes" + # I don't understand why but this command protect against this bug. + try: + pools = lin.storage_pool_list_raise( + filter_by_stor_pools=[group_name] + ) + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to get storage pool list before database creation: {}' + .format(e) + ) + + # Ensure we have a correct list of storage pools. + nodes_with_pool = map(lambda pool: pool.node_name, pools.storage_pools) + assert nodes_with_pool # We must have at least one storage pool! + for node_name in nodes_with_pool: + assert node_name in node_names + util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool)) + + # Create the database definition. + size = cls.round_up_volume_size(DATABASE_SIZE) + cls._check_volume_creation_errors(lin.resource_group_spawn( + rsc_grp_name=group_name, + rsc_dfn_name=DATABASE_VOLUME_NAME, + vlm_sizes=['{}B'.format(size)], + definitions_only=True + ), DATABASE_VOLUME_NAME, group_name) + cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME) + + # Create real resources on the first nodes. + resources = [] + + diskful_nodes = [] + diskless_nodes = [] + for node_name in node_names: + if node_name in nodes_with_pool: + diskful_nodes.append(node_name) + else: + diskless_nodes.append(node_name) + + assert diskful_nodes + for node_name in diskful_nodes[:redundancy]: + util.SMlog('Create database diskful on {}'.format(node_name)) + resources.append(linstor.ResourceData( + node_name=node_name, + rsc_name=DATABASE_VOLUME_NAME, + storage_pool=group_name + )) + # Create diskless resources on the remaining set. + for node_name in diskful_nodes[redundancy:] + diskless_nodes: + util.SMlog('Create database diskless on {}'.format(node_name)) + resources.append(linstor.ResourceData( + node_name=node_name, + rsc_name=DATABASE_VOLUME_NAME, + diskless=True + )) + + result = lin.resource_create(resources) error_str = cls._get_error_str(result) if error_str: raise LinstorVolumeManagerError( - 'Failed to destroy RG `{}`: {}'.format(group_name, error_str) + 'Could not create database volume from SR `{}`: {}'.format( + group_name, error_str + ) + ) + + # We must modify the quorum. Otherwise we can't use correctly the + # drbd-reactor daemon. + if auto_quorum: + result = lin.resource_dfn_modify(DATABASE_VOLUME_NAME, { + 'DrbdOptions/auto-quorum': 'disabled', + 'DrbdOptions/Resource/quorum': 'majority' + }) + error_str = cls._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Could not activate quorum on database volume: {}' + .format(error_str) + ) + + # Create database and ensure path exists locally and + # on replicated devices. + current_device_path = cls._request_database_path(lin, activate=True) + + # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be + # plugged. + for node_name in node_names: + cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME) + + # We use realpath here to get the /dev/drbd path instead of + # /dev/drbd/by-res/. + expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME) + util.wait_for_path(expected_device_path, 5) + + device_realpath = os.path.realpath(expected_device_path) + if current_device_path != device_realpath: + raise LinstorVolumeManagerError( + 'Invalid path, current={}, expected={} (realpath={})' + .format( + current_device_path, + expected_device_path, + device_realpath + ) + ) + + try: + util.retry( + lambda: util.pread2([DATABASE_MKFS, expected_device_path]), + maxretry=5 + ) + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to execute {} on database volume: {}' + .format(DATABASE_MKFS, e) + ) + + return expected_device_path + + @classmethod + def _destroy_database_volume(cls, lin, group_name): + error_str = cls._get_error_str( + lin.resource_dfn_delete(DATABASE_VOLUME_NAME) + ) + if error_str: + raise LinstorVolumeManagerError( + 'Could not destroy resource `{}` from SR `{}`: {}' + .format(DATABASE_VOLUME_NAME, group_name, error_str) ) + @classmethod + def _mount_database_volume(cls, volume_path, mount=True, force=False): + try: + # 1. Create a backup config folder. + database_not_empty = bool(os.listdir(DATABASE_PATH)) + backup_path = cls._create_database_backup_path() + + # 2. Move the config in the mounted volume. + if database_not_empty: + cls._move_files(DATABASE_PATH, backup_path) + + cls._mount_volume(volume_path, DATABASE_PATH, mount) + + if database_not_empty: + cls._move_files(backup_path, DATABASE_PATH, force) + + # 3. Remove useless backup directory. + try: + os.rmdir(backup_path) + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to remove backup path {} of LINSTOR config: {}' + .format(backup_path, e) + ) + except Exception as e: + def force_exec(fn): + try: + fn() + except Exception: + pass + + if mount == cls._is_mounted(DATABASE_PATH): + force_exec(lambda: cls._move_files( + DATABASE_PATH, backup_path + )) + force_exec(lambda: cls._mount_volume( + volume_path, DATABASE_PATH, not mount + )) + + if mount != cls._is_mounted(DATABASE_PATH): + force_exec(lambda: cls._move_files( + backup_path, DATABASE_PATH + )) + + force_exec(lambda: os.rmdir(backup_path)) + raise e + + @classmethod + def _force_destroy_database_volume(cls, lin, group_name): + try: + cls._destroy_database_volume(lin, group_name) + except Exception: + pass + + @classmethod + def _destroy_storage_pool(cls, lin, group_name, node_name): + def destroy(): + result = lin.storage_pool_delete(node_name, group_name) + errors = cls._filter_errors(result) + if cls._check_errors(errors, [ + linstor.consts.FAIL_NOT_FOUND_STOR_POOL, + linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN + ]): + return + + if errors: + raise LinstorVolumeManagerError( + 'Failed to destroy SP `{}` on node `{}`: {}'.format( + group_name, + node_name, + cls._get_error_str(errors) + ) + ) + + # We must retry to avoid errors like: + # "can not be deleted as volumes / snapshot-volumes are still using it" + # after LINSTOR database volume destruction. + return util.retry(destroy, maxretry=10) + + @classmethod + def _destroy_resource_group(cls, lin, group_name): + def destroy(): + result = lin.resource_group_delete(group_name) + errors = cls._filter_errors(result) + if cls._check_errors(errors, [ + linstor.consts.FAIL_NOT_FOUND_RSC_GRP + ]): + return + + if errors: + raise LinstorVolumeManagerError( + 'Failed to destroy RG `{}`: {}' + .format(group_name, cls._get_error_str(errors)) + ) + + return util.retry(destroy, maxretry=10) + @classmethod def _build_group_name(cls, base_name): # If thin provisioning is used we have a path like this: # `VG/LV`. "/" is not accepted by LINSTOR. return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_')) + @classmethod + def _check_volume_creation_errors(cls, result, volume_uuid, group_name): + errors = cls._filter_errors(result) + if cls._check_errors(errors, [ + linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN + ]): + raise LinstorVolumeManagerError( + 'Failed to create volume `{}` from SR `{}`, it already exists' + .format(volume_uuid, group_name), + LinstorVolumeManagerError.ERR_VOLUME_EXISTS + ) + + if errors: + raise LinstorVolumeManagerError( + 'Failed to create volume `{}` from SR `{}`: {}'.format( + volume_uuid, + group_name, + cls._get_error_str(errors) + ) + ) + + @classmethod + def _move_files(cls, src_dir, dest_dir, force=False): + def listdir(dir): + ignored = ['lost+found'] + return filter(lambda file: file not in ignored, os.listdir(dir)) + + try: + if not force: + files = listdir(dest_dir) + if files: + raise LinstorVolumeManagerError( + 'Cannot move files from {} to {} because destination ' + 'contains: {}'.format(src_dir, dest_dir, files) + ) + except LinstorVolumeManagerError: + raise + except Exception as e: + raise LinstorVolumeManagerError( + 'Cannot list dir {}: {}'.format(dest_dir, e) + ) + + try: + for file in listdir(src_dir): + try: + dest_file = os.path.join(dest_dir, file) + if not force and os.path.exists(dest_file): + raise LinstorVolumeManagerError( + 'Cannot move {} because it already exists in the ' + 'destination'.format(file) + ) + shutil.move(os.path.join(src_dir, file), dest_file) + except LinstorVolumeManagerError: + raise + except Exception as e: + raise LinstorVolumeManagerError( + 'Cannot move {}: {}'.format(file, e) + ) + except Exception as e: + if not force: + try: + cls._move_files(dest_dir, src_dir, force=True) + except Exception: + pass + + raise LinstorVolumeManagerError( + 'Failed to move files from {} to {}: {}'.format( + src_dir, dest_dir, e + ) + ) + + @staticmethod + def _create_database_backup_path(): + path = DATABASE_PATH + '-' + str(uuid.uuid4()) + try: + os.mkdir(path) + return path + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to create backup path {} of LINSTOR config: {}' + .format(path, e) + ) + @staticmethod def _get_filtered_properties(properties): return dict(properties.items()) @@ -1711,3 +2950,110 @@ def _check_errors(result, codes): if err.is_error(code): return True return False + + @classmethod + def _controller_is_running(cls): + return cls._service_is_running('linstor-controller') + + @classmethod + def _start_controller(cls, start=True): + return cls._start_service('linstor-controller', start) + + @staticmethod + def _start_service(name, start=True): + action = 'start' if start else 'stop' + (ret, out, err) = util.doexec([ + 'systemctl', action, name + ]) + if ret != 0: + raise LinstorVolumeManagerError( + 'Failed to {} {}: {} {}' + .format(action, name, out, err) + ) + + @staticmethod + def _service_is_running(name): + (ret, out, err) = util.doexec([ + 'systemctl', 'is-active', '--quiet', name + ]) + return not ret + + @staticmethod + def _is_mounted(mountpoint): + (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint]) + return ret == 0 + + @classmethod + def _mount_volume(cls, volume_path, mountpoint, mount=True): + if mount: + try: + util.pread(['mount', volume_path, mountpoint]) + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to mount volume {} on {}: {}' + .format(volume_path, mountpoint, e) + ) + else: + try: + if cls._is_mounted(mountpoint): + util.pread(['umount', mountpoint]) + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to umount volume {} on {}: {}' + .format(volume_path, mountpoint, e) + ) + + +# ============================================================================== + +# Check if a path is a DRBD resource and log the process name/pid +# that opened it. +def log_drbd_openers(path): + # Ignore if it's not a symlink to DRBD resource. + if not path.startswith(DRBD_BY_RES_PATH): + return + + # Compute resource name. + res_name_end = path.find('/', len(DRBD_BY_RES_PATH)) + if res_name_end == -1: + return + res_name = path[len(DRBD_BY_RES_PATH):res_name_end] + + volume_end = path.rfind('/') + if volume_end == res_name_end: + return + volume = path[volume_end + 1:] + + try: + # Ensure path is a DRBD. + drbd_path = os.path.realpath(path) + stats = os.stat(drbd_path) + if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147: + return + + # Find where the device is open. + (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name]) + if ret != 0: + util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format( + res_name, stderr + )) + return + + # Is it a local device? + if stdout.startswith('{} role:Primary'.format(res_name)): + util.SMlog( + 'DRBD resource `{}` is open on local host: {}' + .format(path, get_local_volume_openers(res_name, volume)) + ) + return + + # Is it a remote device? + util.SMlog( + 'DRBD resource `{}` is open on hosts: {}' + .format(path, get_all_volume_openers(res_name, volume)) + ) + except Exception as e: + util.SMlog( + 'Got exception while trying to determine where DRBD resource ' + + '`{}` is open: {}'.format(path, e) + ) diff --git a/drivers/on_slave.py b/drivers/on_slave.py index bb3f5db65..524424f61 100755 --- a/drivers/on_slave.py +++ b/drivers/on_slave.py @@ -79,6 +79,7 @@ def _is_open(session, args): import CephFSSR import EXTSR import GlusterFSSR + import LinstorSR import LVHDSR import MooseFSSR import NFSSR @@ -109,8 +110,28 @@ def _is_open(session, args): } cmd.params = {"command": None} + sr_uuid = srRec["uuid"] + + # Another ugly piece of code to load a real Linstor SR, otherwise + # we can't fetch the VDI path. + if srType == 'linstor': + host_ref = util.get_this_host_ref(session) + sr_ref = session.xenapi.SR.get_by_uuid(sr_uuid) + + pbd = util.find_my_pbd(session, host_ref, sr_ref) + if pbd is None: + raise util.SMException('Failed to find Linstor PBD') + + cmd.dconf = session.xenapi.PBD.get_device_config(pbd) + driver = SR.driver(srType) - sr = driver(cmd, srRec["uuid"]) + sr = driver(cmd, sr_uuid) + + # session_ref param is required to have a valid session when SR object is created. + # It's not the case here, so attach the current session object to make LinstorSR happy. + if srType == 'linstor': + sr.session = session + vdi = sr.vdi(vdiUuid) tapdisk = blktap2.Tapdisk.find_by_path(vdi.path) util.SMlog("Tapdisk for %s: %s" % (vdi.path, tapdisk)) diff --git a/drivers/tapdisk-pause b/drivers/tapdisk-pause index 932fc3ca6..75328757b 100755 --- a/drivers/tapdisk-pause +++ b/drivers/tapdisk-pause @@ -30,7 +30,7 @@ import vhdutil import lvmcache try: - from linstorvolumemanager import LinstorVolumeManager + from linstorvolumemanager import get_controller_uri, LinstorVolumeManager LINSTOR_AVAILABLE = True except ImportError: LINSTOR_AVAILABLE = False @@ -152,10 +152,6 @@ class Tapdisk: # "B" path. Note: "A", "B" and "OLD_A" are UUIDs. session = self.session - linstor_uri = 'linstor://{}'.format( - util.get_master_rec(session)['address'] - ) - host_ref = util.get_this_host_ref(session) sr_ref = session.xenapi.SR.get_by_uuid(self.sr_uuid) @@ -167,7 +163,7 @@ class Tapdisk: group_name = dconf['group-name'] device_path = LinstorVolumeManager( - linstor_uri, + get_controller_uri(), group_name, logger=util.SMlog ).get_device_path(self.vdi_uuid) diff --git a/drivers/util.py b/drivers/util.py index a913fcf5c..98b21ae1d 100755 --- a/drivers/util.py +++ b/drivers/util.py @@ -704,32 +704,10 @@ def get_master_ref(session): return session.xenapi.pool.get_master(pools[0]) -def get_master_rec(session): - return session.xenapi.host.get_record(get_master_ref(session)) - - def is_master(session): return get_this_host_ref(session) == get_master_ref(session) -def get_master_address(): - address = None - try: - fd = open('/etc/xensource/pool.conf', 'r') - try: - items = fd.readline().split(':') - if items[0].strip() == 'master': - address = 'localhost' - else: - address = items[1].strip() - finally: - fd.close() - except Exception: - pass - return address - - - def get_localhost_ref(session): filename = '/etc/xensource-inventory' try: @@ -770,6 +748,17 @@ def get_hosts_attached_on(session, vdi_uuids): host_refs[key[len('host_'):]] = True return host_refs.keys() +def get_this_host_address(session): + host_uuid = get_this_host() + host_ref = session.xenapi.host.get_by_uuid(host_uuid) + return session.xenapi.host.get_record(host_ref)['address'] + +def get_host_addresses(session): + addresses = [] + hosts = session.xenapi.host.get_all_records() + for record in hosts.itervalues(): + addresses.append(record['address']) + return addresses def get_this_host_ref(session): host_uuid = get_this_host() @@ -2010,3 +1999,95 @@ def sessions_less_than_targets(other_config, device_config): return (sessions < targets) else: return False + + +def enable_and_start_service(name, start): + attempt = 0 + while True: + attempt += 1 + fn = 'enable' if start else 'disable' + args = ('systemctl', fn, '--now', name) + (ret, out, err) = doexec(args) + if ret == 0: + return + elif attempt >= 3: + raise Exception( + 'Failed to {} {}: {} {}'.format(fn, name, out, err) + ) + time.sleep(1) + + +def stop_service(name): + args = ('systemctl', 'stop', name) + (ret, out, err) = doexec(args) + if ret == 0: + return + raise Exception('Failed to stop {}: {} {}'.format(name, out, err)) + + +def restart_service(name): + attempt = 0 + while True: + attempt += 1 + SMlog('Restarting service {} {}...'.format(name, attempt)) + args = ('systemctl', 'restart', name) + (ret, out, err) = doexec(args) + if ret == 0: + return + elif attempt >= 3: + SMlog('Restart service FAILED {} {}'.format(name, attempt)) + raise Exception( + 'Failed to restart {}: {} {}'.format(name, out, err) + ) + time.sleep(1) + + +def check_pid_exists(pid): + try: + os.kill(pid, 0) + except OSError: + return False + else: + return True + + +def make_profile(name, function): + """ + Helper to execute cProfile using unique log file. + """ + + import cProfile + import itertools + import os.path + import time + + assert name + assert function + + FOLDER = '/tmp/sm-perfs/' + makedirs(FOLDER) + + filename = time.strftime('{}_%Y%m%d_%H%M%S.prof'.format(name)) + + def gen_path(path): + yield path + root, ext = os.path.splitext(path) + for i in itertools.count(start=1, step=1): + yield root + '.{}.'.format(i) + ext + + for profile_path in gen_path(FOLDER + filename): + try: + file = open_atomic(profile_path, 'w') + file.close() + break + except OSError as e: + if e.errno == errno.EEXIST: + pass + else: + raise + + try: + SMlog('* Start profiling of {} ({}) *'.format(name, filename)) + cProfile.runctx('function()', None, locals(), profile_path) + finally: + SMlog('* End profiling of {} ({}) *'.format(name, filename)) diff --git a/drivers/vhdutil.py b/drivers/vhdutil.py index c4be0eef6..6a71804c7 100755 --- a/drivers/vhdutil.py +++ b/drivers/vhdutil.py @@ -99,13 +99,16 @@ def convertAllocatedSizeToBytes(size): return size * 2 * 1024 * 1024 -def getVHDInfo(path, extractUuidFunction, includeParent=True): +def getVHDInfo(path, extractUuidFunction, includeParent=True, resolveParent=True): """Get the VHD info. The parent info may optionally be omitted: vhd-util tries to verify the parent by opening it, which results in error if the VHD resides on an inactive LV""" opts = "-vsaf" if includeParent: opts += "p" + if not resolveParent: + opts += "u" + cmd = [VHD_UTIL, "query", OPT_LOG_ERR, opts, "-n", path] ret = ioretry(cmd) fields = ret.strip().split('\n') diff --git a/etc/systemd/system/drbd-reactor.service.d/override.conf b/etc/systemd/system/drbd-reactor.service.d/override.conf new file mode 100644 index 000000000..2f99a46af --- /dev/null +++ b/etc/systemd/system/drbd-reactor.service.d/override.conf @@ -0,0 +1,6 @@ +[Service] +StartLimitInterval=60 +StartLimitBurst=10 + +Restart=always +RestartSec=2 diff --git a/etc/systemd/system/linstor-satellite.service.d/override.conf b/etc/systemd/system/linstor-satellite.service.d/override.conf new file mode 100644 index 000000000..b1686b4f3 --- /dev/null +++ b/etc/systemd/system/linstor-satellite.service.d/override.conf @@ -0,0 +1,5 @@ +[Service] +Environment=LS_KEEP_RES=^xcp-persistent* + +[Unit] +After=drbd.service diff --git a/etc/systemd/system/var-lib-linstor.service b/etc/systemd/system/var-lib-linstor.service new file mode 100644 index 000000000..e9deb9042 --- /dev/null +++ b/etc/systemd/system/var-lib-linstor.service @@ -0,0 +1,21 @@ +# Regarding the current version of systemd (v.219) used in XCP-ng, we can't use +# the ReadWriteOnly option (to apply the -w flag, it's not the same than -o rw). +# This file is a workaround to avoid RO. It must be replaced with the code below +# in a mount unit. Compatible with version >= 246. +# +# [Unit] +# Description=Filesystem for the LINSTOR controller +# +# [Mount] +# What=/dev/drbd/by-res/xcp-persistent-database/0 +# Where=/var/lib/linstor +# ReadWriteOnly=true + +[Unit] +Description=Mount filesystem for the LINSTOR controller + +[Service] +Type=oneshot +ExecStart=/bin/mount -w /dev/drbd/by-res/xcp-persistent-database/0 /var/lib/linstor +ExecStop=/opt/xensource/libexec/safe-umount /var/lib/linstor +RemainAfterExit=true diff --git a/linstor/linstor-monitord.c b/linstor/linstor-monitord.c index 8161813d7..47740598c 100644 --- a/linstor/linstor-monitord.c +++ b/linstor/linstor-monitord.c @@ -14,8 +14,10 @@ * along with this program. If not, see . */ +#include #include #include +#include #include #include #include @@ -39,7 +41,8 @@ #define POOL_CONF_ABS_FILE POOL_CONF_DIR "/" POOL_CONF_FILE // In milliseconds. -#define POLL_TIMEOUT 2000 +#define UPDATE_LINSTOR_NODE_TIMEOUT 2000 +#define SR_SCAN_TIMEOUT 720000 // ----------------------------------------------------------------------------- @@ -130,24 +133,120 @@ static inline int isMasterHost (int *error) { typedef struct { int inotifyFd; + struct timespec lastScanTime; + int isMaster; // TODO: Should be completed with at least a hostname field. } State; // ----------------------------------------------------------------------------- -static inline int execCommand (char *argv[]) { +typedef struct { + char *data; + size_t size; + size_t capacity; +} Buffer; + +#define max(a, b) ({ \ + __typeof__(a) _a = (a); \ + __typeof__(b) _b = (b); \ + _a > _b ? _a : _b; \ +}) + +static inline ssize_t readAll (int fd, Buffer *buffer) { + assert(buffer->capacity >= buffer->size); + + ssize_t ret = 0; + do { + size_t byteCount = buffer->capacity - buffer->size; + if (byteCount < 16) { + const size_t newCapacity = max(buffer->capacity << 1, 64); + char *p = realloc(buffer->data, newCapacity); + if (!p) + return -errno; + + buffer->data = p; + buffer->capacity = newCapacity; + + byteCount = buffer->capacity - buffer->size; + } + + ret = read(fd, buffer->data + buffer->size, byteCount); + if (ret > 0) + buffer->size += ret; + else if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) + ret = 0; + } while (ret > 0); + + return ret; +} + +// ----------------------------------------------------------------------------- + +static inline int execCommand (char *argv[], Buffer *buffer) { + int pipefd[2]; + if (buffer) { + if (pipe(pipefd) < 0) { + syslog(LOG_ERR, "Failed to exec pipe: `%s`.", strerror(errno)); + return -errno; + } + + if (fcntl(pipefd[0], F_SETFL, O_NONBLOCK) < 0) { + syslog(LOG_ERR, "Failed to exec fcntl on pipe in: `%s`.", strerror(errno)); + close(pipefd[0]); + close(pipefd[1]); + return -errno; + } + } + const pid_t pid = fork(); - if (pid < 0) + if (pid < 0) { + syslog(LOG_ERR, "Failed to fork: `%s`.", strerror(errno)); + if (buffer) { + close(pipefd[0]); + close(pipefd[1]); + } return -errno; + } // Child process. if (pid == 0) { + if (buffer) { + close(STDOUT_FILENO); + dup(pipefd[1]); + + close(pipefd[0]); + close(pipefd[1]); + } + if (execvp(*argv, argv) < 0) syslog(LOG_ERR, "Failed to exec `%s` command.", *argv); exit(EXIT_FAILURE); } // Main process. + int ret = 0; + if (buffer) { + close(pipefd[1]); + + do { + struct pollfd fds = { pipefd[0], POLLIN | POLLHUP, 0 }; + const int res = poll(&fds, 1, 0); + if (res < 0) { + if (errno == EAGAIN) + continue; + syslog(LOG_ERR, "Failed to poll from command: `%s`.", strerror(errno)); + ret = -errno; + } else if (res > 0) { + if (fds.revents & POLLIN) + ret = readAll(pipefd[0], buffer); + if (fds.revents & POLLHUP) + break; // Input has been closed. + } + } while (ret >= 0); + + close(pipefd[0]); + } + int status; if (waitpid(pid, &status, 0) < 0) { syslog(LOG_ERR, "Failed to wait command: `%s`.", *argv); @@ -163,7 +262,7 @@ static inline int execCommand (char *argv[]) { } else if (WIFSIGNALED(status)) syslog(LOG_ERR, "`%s` terminated by signal %d.", *argv, WTERMSIG(status)); - return 0; + return ret; } // ----------------------------------------------------------------------------- @@ -188,23 +287,6 @@ static inline int addInotifyWatch (int inotifyFd, const char *filepath, uint32_t // ----------------------------------------------------------------------------- -static inline int updateLinstorServices () { - int error; - const int isMaster = isMasterHost(&error); - if (error) - return error; - - syslog(LOG_INFO, "%s linstor-controller...", isMaster ? "Enabling" : "Disabling"); - char *argv[] = { - "systemctl", - isMaster ? "enable" : "disable", - "--now", - "linstor-controller", - NULL - }; - return execCommand(argv); -} - static inline int updateLinstorNode (State *state) { char buffer[256]; if (gethostname(buffer, sizeof buffer) == -1) { @@ -219,14 +301,53 @@ static inline int updateLinstorNode (State *state) { // ----------------------------------------------------------------------------- +#define UUID_PARAM "uuid=" +#define UUID_PARAM_LEN (sizeof(UUID_PARAM) - 1) +#define UUID_LENGTH 36 + +static inline void scanLinstorSr (const char *uuid) { + char uuidBuf[UUID_LENGTH + UUID_PARAM_LEN + 1] = UUID_PARAM; + strncpy(uuidBuf + UUID_PARAM_LEN, uuid, UUID_LENGTH); + uuidBuf[UUID_LENGTH + UUID_PARAM_LEN] = '\0'; + execCommand((char *[]){ "xe", "sr-scan", uuidBuf, NULL }, NULL); +} + +// Called to update the physical/virtual size used by LINSTOR SRs in XAPI DB. +static inline int scanLinstorSrs () { + Buffer srs = {}; + const int ret = execCommand((char *[]){ "xe", "sr-list", "type=linstor", "--minimal", NULL }, &srs); + if (ret) { + free(srs.data); + return ret; + } + + const char *end = srs.data + srs.size; + char *pos = srs.data; + for (char *off; (off = memchr(pos, ',', end - pos)); pos = off + 1) + if (off - pos == UUID_LENGTH) + scanLinstorSr(pos); + + if (end - pos >= UUID_LENGTH) { + for (--end; end - pos >= UUID_LENGTH && isspace(*end); --end) {} + if (isalnum(*end)) + scanLinstorSr(pos); + } + + free(srs.data); + + return 0; +} + +// ----------------------------------------------------------------------------- + #define PROCESS_MODE_DEFAULT 0 #define PROCESS_MODE_WAIT_FILE_CREATION 1 static inline int waitForPoolConfCreation (State *state, int *wdFile); -static inline int processPoolConfEvents (int inotifyFd, int wd, char **buffer, size_t *bufferSize, int mode, int *process) { +static inline int processPoolConfEvents (State *state, int wd, char **buffer, size_t *bufferSize, int mode, int *process) { size_t size = 0; - if (ioctl(inotifyFd, FIONREAD, (char *)&size) == -1) { + if (ioctl(state->inotifyFd, FIONREAD, (char *)&size) == -1) { syslog(LOG_ERR, "Failed to get buffer size from inotify descriptor: `%s`.", strerror(errno)); return -errno; } @@ -241,7 +362,7 @@ static inline int processPoolConfEvents (int inotifyFd, int wd, char **buffer, s *bufferSize = size; } - if ((size = (size_t)read(inotifyFd, *buffer, size)) == (size_t)-1) { + if ((size = (size_t)read(state->inotifyFd, *buffer, size)) == (size_t)-1) { syslog(LOG_ERR, "Failed to read buffer from inotify descriptor: `%s`.", strerror(errno)); return -errno; } @@ -280,10 +401,9 @@ static inline int processPoolConfEvents (int inotifyFd, int wd, char **buffer, s syslog(LOG_INFO, "Updating linstor services... (Inotify mask=%" PRIu32 ")", mask); if (mask & (IN_DELETE_SELF | IN_MOVE_SELF | IN_UNMOUNT)) { syslog(LOG_ERR, "Watched `" POOL_CONF_ABS_FILE "` file has been removed!"); - inotify_rm_watch(inotifyFd, wd); // Do not forget to remove watch to avoid leaks. + inotify_rm_watch(state->inotifyFd, wd); // Do not forget to remove watch to avoid leaks. return -EIO; } - ret = updateLinstorServices(); } else { if (mask & (IN_CREATE | IN_MOVED_TO)) { syslog(LOG_ERR, "Watched `" POOL_CONF_ABS_FILE "` file has been recreated!"); @@ -303,16 +423,24 @@ static inline int waitAndProcessEvents (State *state, int wd, int mode) { struct timespec previousTime = getCurrentTime(); do { - struct timespec currentTime = getCurrentTime(); + const struct timespec currentTime = getCurrentTime(); const int64_t elapsedTime = convertToMilliseconds(getTimeDiff(¤tTime, &previousTime)); int timeout; - if (elapsedTime >= POLL_TIMEOUT) { + if (elapsedTime >= UPDATE_LINSTOR_NODE_TIMEOUT) { updateLinstorNode(state); - timeout = POLL_TIMEOUT; + timeout = UPDATE_LINSTOR_NODE_TIMEOUT; previousTime = getCurrentTime(); } else { - timeout = POLL_TIMEOUT - elapsedTime; + timeout = UPDATE_LINSTOR_NODE_TIMEOUT - elapsedTime; + } + + const int64_t elapsedScanTime = convertToMilliseconds(getTimeDiff(¤tTime, &state->lastScanTime)); + if (elapsedScanTime >= SR_SCAN_TIMEOUT) { + state->isMaster = isMasterHost(&ret); + if (state->isMaster) + scanLinstorSrs(); + state->lastScanTime = getCurrentTime(); } struct pollfd fds = { state->inotifyFd, POLLIN, 0 }; @@ -323,7 +451,9 @@ static inline int waitAndProcessEvents (State *state, int wd, int mode) { syslog(LOG_ERR, "Failed to poll from inotify descriptor: `%s`.", strerror(errno)); ret = -errno; } else if (res > 0) { - ret = processPoolConfEvents(state->inotifyFd, wd, &buffer, &bufferSize, mode, &process); + state->isMaster = isMasterHost(&ret); + if (!ret) + ret = processPoolConfEvents(state, wd, &buffer, &bufferSize, mode, &process); } } while (ret >= 0 && process); @@ -350,7 +480,8 @@ static inline int waitForPoolConfCreation (State *state, int *wdFile) { do { do { // Update LINSTOR services... - ret = updateLinstorServices(); + int ret; + state->isMaster = isMasterHost(&ret); // Ok we can't read the pool configuration file. // Maybe the file doesn't exist. Waiting its creation... @@ -378,7 +509,9 @@ int main (int argc, char *argv[]) { setlogmask(LOG_UPTO(LOG_INFO)); State state = { - .inotifyFd = -1 + .inotifyFd = -1, + .lastScanTime = getCurrentTime(), + .isMaster = 0 }; const int inotifyFd = createInotifyInstance(); diff --git a/multipath/multipath.conf b/multipath/multipath.conf index 166bd8476..38cc8a8b1 100644 --- a/multipath/multipath.conf +++ b/multipath/multipath.conf @@ -23,6 +23,7 @@ blacklist { devnode "scini*" devnode "^rbd[0-9]*" devnode "^nbd[0-9]*" + devnode "^drbd[0-9]*" } # Leave this section in place even if empty blacklist_exceptions { diff --git a/scripts/fork-log-daemon b/scripts/fork-log-daemon new file mode 100755 index 000000000..665a60baf --- /dev/null +++ b/scripts/fork-log-daemon @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +import select +import signal +import subprocess +import sys +import syslog + +def main(): + process = subprocess.Popen(sys.argv[1:], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + signal.signal(signal.SIGTERM, signal.SIG_IGN) + write_to_stdout = True + + while process.poll() is None: + while True: + output = process.stdout.readline() + if not output: + break + + if write_to_stdout: + try: + print(output) + sys.stdout.flush() + except Exception: + # Probably a broken pipe. So the process reading stdout is dead. + write_to_stdout = False + syslog.syslog(output) + +if __name__ == "__main__": + syslog.openlog(ident=sys.argv[1], facility=syslog.LOG_DAEMON) + try: + main() + except Exception as e: + syslog.syslog(sys.argv[1] + ' terminated with exception: {}'.format(e)) + finally: + syslog.syslog(sys.argv[1] + ' is now terminated!') diff --git a/scripts/linstor-kv-tool b/scripts/linstor-kv-tool new file mode 100755 index 000000000..b845ec2b9 --- /dev/null +++ b/scripts/linstor-kv-tool @@ -0,0 +1,84 @@ +#!/usr/bin/env python +# +# Copyright (C) 2022 Vates SAS +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +import sys +sys.path[0] = '/opt/xensource/sm/' + +from linstorvolumemanager import get_controller_uri + +import argparse +import json +import linstor + + +def dump_kv(controller_uri, group_name, namespace): + kv = linstor.KV( + group_name, + uri=controller_uri, + namespace=namespace + ) + print(json.dumps(kv, sort_keys=True, indent=2)) + + +def remove_volume(controller_uri, group_name, vdi_name): + assert vdi_name + kv = linstor.KV( + group_name, + uri=controller_uri, + namespace='/xcp/volume/{}'.format(vdi_name) + ) + + for key, value in list(kv.items()): + del kv[key] + + +def remove_all_volumes(controller_uri, group_name): + kv = linstor.KV( + group_name, + uri=controller_uri, + namespace='/' + ) + + for key, value in list(kv.items()): + if key.startswith('xcp/volume/') or key.startswith('xcp/sr/journal/'): + size = key.rindex('/') + kv.namespace = key[:size] + del kv[key[size + 1:]] + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-u', '--uri', required=False) + parser.add_argument('-g', '--group-name', required=True) + parser.add_argument('-n', '--namespace', default='/') + + action = parser.add_mutually_exclusive_group(required=True) + action.add_argument('--dump-volumes', action='store_true') + action.add_argument('--remove-volume', metavar='VDI_UUID') + action.add_argument('--remove-all-volumes', action='store_true') + + args = parser.parse_args() + controller_uri = get_controller_uri() if args.uri is None else args.uri + + if args.dump_volumes: + dump_kv(controller_uri, args.group_name, args.namespace) + elif args.remove_volume: + remove_volume(controller_uri, args.group_name, args.remove_volume) + elif args.remove_all_volumes: + remove_all_volumes(controller_uri, args.group_name) + + +if __name__ == '__main__': + main() diff --git a/scripts/safe-umount b/scripts/safe-umount new file mode 100755 index 000000000..9c1dcc400 --- /dev/null +++ b/scripts/safe-umount @@ -0,0 +1,39 @@ +#!/usr/bin/env python2 + +import argparse +import subprocess +import sys +import time + + +def safe_umount(path): + retry_count = 10 + not_mounted_str = 'umount: {}: not mounted'.format(path) + + last_code = 0 + while retry_count: + proc = subprocess.Popen(['mountpoint', '-q', path]) + proc.wait() + if proc.returncode: + return 0 + + proc = subprocess.Popen(['umount', path], stderr=subprocess.PIPE) + (stdout, stderr) = proc.communicate() + if not proc.returncode: + return 0 + + error = stderr.strip() + if error == not_mounted_str: + return 0 + + retry_count -= 1 + last_code = proc.returncode + time.sleep(0.500) + return last_code + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('path') + args = parser.parse_args() + sys.exit(safe_umount(args.path)) diff --git a/tests/test_on_slave.py b/tests/test_on_slave.py index 679d4421d..4b59f6323 100644 --- a/tests/test_on_slave.py +++ b/tests/test_on_slave.py @@ -13,7 +13,15 @@ class Test_on_slave_is_open(unittest.TestCase): - MOCK_IMPORTS = ['SRCommand', 'SR', 'NFSSR', 'EXTSR', 'LVHDSR', 'blktap2'] + MOCK_IMPORTS = [ + 'SRCommand', + 'SR', + 'NFSSR', + 'EXTSR', + 'LVHDSR', + 'LinstorSR', + 'blktap2' + ] def fake_import(self, *args, **kwargs): print('Asked to import {}'.format(args[0])) From c7ae12b5237608cf3b54fcea3163f3f5e4a3d63a Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Fri, 30 Jun 2023 12:41:43 +0200 Subject: [PATCH 22/72] feat(LinstorSR): is now compatible with python 3 Signed-off-by: Ronan Abhamon --- drivers/LinstorSR.py | 19 +++++++++---------- drivers/cleanup.py | 2 +- drivers/linstor-manager | 4 ++-- drivers/linstorvhdutil.py | 13 +++++-------- drivers/linstorvolumemanager.py | 30 +++++++++++++++--------------- drivers/util.py | 2 +- scripts/fork-log-daemon | 2 +- scripts/linstor-kv-tool | 2 +- scripts/safe-umount | 2 +- 9 files changed, 36 insertions(+), 40 deletions(-) diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index 52d5c26ea..1c9bd54a0 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -758,7 +758,7 @@ def scan(self, uuid): self._load_vdis() self._update_physical_size() - for vdi_uuid in self.vdis.keys(): + for vdi_uuid in list(self.vdis.keys()): if self.vdis[vdi_uuid].deleted: del self.vdis[vdi_uuid] @@ -891,7 +891,7 @@ def _update_drbd_reactor_on_all_hosts( secondary_hosts = [] hosts = self.session.xenapi.host.get_all_records() - for host_ref, host_rec in hosts.iteritems(): + for host_ref, host_rec in hosts.items(): hostname = host_rec['hostname'] if controller_node_name == hostname: controller_host = host_ref @@ -1002,7 +1002,7 @@ def _update_physical_size(self): # We use the size of the smallest disk, this is an approximation that # ensures the displayed physical size is reachable by the user. (min_physical_size, pool_count) = self._linstor.get_min_physical_size() - self.physical_size = min_physical_size * pool_count / \ + self.physical_size = min_physical_size * pool_count // \ self._linstor.redundancy self.physical_utilisation = self._linstor.allocated_volume_size @@ -1242,7 +1242,7 @@ def _load_vdis_ex(self): # 9. Remove all hidden leaf nodes to avoid introducing records that # will be GC'ed. - for vdi_uuid in self.vdis.keys(): + for vdi_uuid in list(self.vdis.keys()): if vdi_uuid not in geneology and self.vdis[vdi_uuid].hidden: util.SMlog( 'Scan found hidden leaf ({}), ignoring'.format(vdi_uuid) @@ -1448,17 +1448,16 @@ def _undo_clone(self, volume_names, vdi_uuid, base_uuid, snap_uuid): # -------------------------------------------------------------------------- def _create_linstor_cache(self): - # TODO: use a nonlocal with python3. - class context: - reconnect = False + reconnect = False def create_cache(): + nonlocal reconnect try: - if context.reconnect: + if reconnect: self._reconnect() return self._linstor.get_volumes_with_info() except Exception as e: - context.reconnect = True + reconnect = True raise e self._all_volume_metadata_cache = \ @@ -2627,7 +2626,7 @@ def _start_persistent_nbd_server(self, volume_name): '--nbd-name', volume_name, '--urls', - ','.join(map(lambda ip: 'http://' + ip + ':' + port, ips)), + ','.join(['http://' + ip + ':' + port for ip in ips]), '--device-size', str(device_size) ] diff --git a/drivers/cleanup.py b/drivers/cleanup.py index cd354d042..e3b78e525 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -3481,7 +3481,7 @@ def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): def _checkSlaves(self, vdi): try: all_openers = self._linstor.get_volume_openers(vdi.uuid) - for openers in all_openers.itervalues(): + for openers in all_openers.values(): for opener in openers.values(): if opener['process-name'] != 'tapdisk': raise util.SMException( diff --git a/drivers/linstor-manager b/drivers/linstor-manager index 8a3083447..8d313ec72 100755 --- a/drivers/linstor-manager +++ b/drivers/linstor-manager @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr # @@ -502,7 +502,7 @@ def get_key_hash(session, args): def get_block_bitmap(session, args): try: device_path = args['devicePath'] - return base64.b64encode(vhdutil.getBlockBitmap(device_path)) or '' + return base64.b64encode(vhdutil.getBlockBitmap(device_path)).decode('ascii') except Exception as e: util.SMlog('linstor-manager:get_block_bitmap error: {}'.format(e)) raise diff --git a/drivers/linstorvhdutil.py b/drivers/linstorvhdutil.py index 836f4ceb9..13e1bb08c 100644 --- a/drivers/linstorvhdutil.py +++ b/drivers/linstorvhdutil.py @@ -27,9 +27,6 @@ MANAGER_PLUGIN = 'linstor-manager' -# EMEDIUMTYPE constant (124) is not available in python2. -EMEDIUMTYPE = 124 - def call_remote_method(session, host_ref, method, device_path, args): try: @@ -107,7 +104,7 @@ def wrapper(*args, **kwargs): 'groupName': self._linstor.group_name } remote_args.update(**kwargs) - remote_args = {str(key): str(value) for key, value in remote_args.iteritems()} + remote_args = {str(key): str(value) for key, value in remote_args.items()} try: def remote_call(): @@ -430,7 +427,7 @@ def local_call(): try: return local_method(device_path, *args, **kwargs) except util.CommandException as e: - if e.code == errno.EROFS or e.code == EMEDIUMTYPE: + if e.code == errno.EROFS or e.code == errno.EMEDIUMTYPE: raise ErofsLinstorCallException(e) # Break retry calls. if e.code == errno.ENOENT: raise NoPathLinstorCallException(e) @@ -482,7 +479,7 @@ def _call_method(self, local_method, remote_method, device_path, use_parent, *ar 'groupName': self._linstor.group_name } remote_args.update(**kwargs) - remote_args = {str(key): str(value) for key, value in remote_args.iteritems()} + remote_args = {str(key): str(value) for key, value in remote_args.items()} volume_uuid = self._linstor.get_volume_uuid_from_device_path( device_path @@ -505,12 +502,12 @@ def remote_call(): ) no_host_found = True - for hostname, openers in all_openers.iteritems(): + for hostname, openers in all_openers.items(): if not openers: continue try: - host_ref = next(ref for ref, rec in hosts.iteritems() if rec['hostname'] == hostname) + host_ref = next(ref for ref, rec in hosts.items() if rec['hostname'] == hostname) except StopIteration: continue diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py index 5e5bcd518..dbca3b412 100755 --- a/drivers/linstorvolumemanager.py +++ b/drivers/linstorvolumemanager.py @@ -529,8 +529,8 @@ def allocated_volume_size(self): current[volume.number] = max(current_size, current.get(volume.number) or 0) total_size = 0 - for volumes in sizes.itervalues(): - for size in volumes.itervalues(): + for volumes in sizes.values(): + for size in volumes.values(): total_size += size return total_size * 1024 @@ -1710,8 +1710,8 @@ def _create_sr( lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) - node_names = ips.keys() - for node_name, ip in ips.iteritems(): + node_names = list(ips.keys()) + for node_name, ip in ips.items(): while True: # Try to create node. result = lin.node_create( @@ -2271,13 +2271,13 @@ def _find_device_path(self, volume_uuid, volume_name): def _request_device_path(self, volume_uuid, volume_name, activate=False): node_name = socket.gethostname() - resources = filter( + resource = next(filter( lambda resource: resource.node_name == node_name and resource.name == volume_name, self._get_resource_cache().resources - ) + ), None) - if not resources: + if not resource: if activate: self._mark_resource_cache_as_dirty() self._activate_device_path( @@ -2289,7 +2289,7 @@ def _request_device_path(self, volume_uuid, volume_name, activate=False): .format(volume_uuid) ) # Contains a path of the /dev/drbd form. - return resources[0].volumes[0].device_path + return resource.volumes[0].device_path def _destroy_resource(self, resource_name, force=False): result = self._linstor.resource_dfn_delete(resource_name) @@ -2307,7 +2307,7 @@ def _destroy_resource(self, resource_name, force=False): # If force is used, ensure there is no opener. all_openers = get_all_volume_openers(resource_name, '0') - for openers in all_openers.itervalues(): + for openers in all_openers.values(): if openers: self._mark_resource_cache_as_dirty() raise LinstorVolumeManagerError( @@ -2571,18 +2571,18 @@ def _request_database_path(cls, lin, activate=False): node_name = socket.gethostname() try: - resources = filter( + resource = next(filter( lambda resource: resource.node_name == node_name and resource.name == DATABASE_VOLUME_NAME, lin.resource_list_raise().resources - ) + ), None) except Exception as e: raise LinstorVolumeManagerError( 'Unable to get resources during database creation: {}' .format(e) ) - if not resources: + if not resource: if activate: cls._activate_device_path( lin, node_name, DATABASE_VOLUME_NAME @@ -2595,7 +2595,7 @@ def _request_database_path(cls, lin, activate=False): .format(DATABASE_PATH) ) # Contains a path of the /dev/drbd form. - return resources[0].volumes[0].device_path + return resource.volumes[0].device_path @classmethod def _create_database_volume( @@ -2630,7 +2630,7 @@ def _create_database_volume( ) # Ensure we have a correct list of storage pools. - nodes_with_pool = map(lambda pool: pool.node_name, pools.storage_pools) + nodes_with_pool = [pool.node_name for pool in pools.storage_pools] assert nodes_with_pool # We must have at least one storage pool! for node_name in nodes_with_pool: assert node_name in node_names @@ -2874,7 +2874,7 @@ def _check_volume_creation_errors(cls, result, volume_uuid, group_name): def _move_files(cls, src_dir, dest_dir, force=False): def listdir(dir): ignored = ['lost+found'] - return filter(lambda file: file not in ignored, os.listdir(dir)) + return [file for file in os.listdir(dir) if file not in ignored] try: if not force: diff --git a/drivers/util.py b/drivers/util.py index 98b21ae1d..4590a46c0 100755 --- a/drivers/util.py +++ b/drivers/util.py @@ -756,7 +756,7 @@ def get_this_host_address(session): def get_host_addresses(session): addresses = [] hosts = session.xenapi.host.get_all_records() - for record in hosts.itervalues(): + for record in hosts.values(): addresses.append(record['address']) return addresses diff --git a/scripts/fork-log-daemon b/scripts/fork-log-daemon index 665a60baf..986de63ff 100755 --- a/scripts/fork-log-daemon +++ b/scripts/fork-log-daemon @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import select import signal diff --git a/scripts/linstor-kv-tool b/scripts/linstor-kv-tool index b845ec2b9..de14e731e 100755 --- a/scripts/linstor-kv-tool +++ b/scripts/linstor-kv-tool @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Copyright (C) 2022 Vates SAS # diff --git a/scripts/safe-umount b/scripts/safe-umount index 9c1dcc400..3c64a3f31 100755 --- a/scripts/safe-umount +++ b/scripts/safe-umount @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 import argparse import subprocess From e22589d051ea11935f6fd269b1a9d65cbd9ba5f3 Mon Sep 17 00:00:00 2001 From: BenjiReis Date: Fri, 4 Aug 2023 12:10:37 +0200 Subject: [PATCH 23/72] Remove `SR_PROBE` from ZFS capabilities (#36) The probe method is not implemented so we shouldn't advertise it. Signed-off-by: BenjiReis --- drivers/ZFSSR.py | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ZFSSR.py b/drivers/ZFSSR.py index 354ca90e1..5301d5ecc 100644 --- a/drivers/ZFSSR.py +++ b/drivers/ZFSSR.py @@ -23,7 +23,6 @@ import xs_errors CAPABILITIES = [ - 'SR_PROBE', 'SR_UPDATE', 'VDI_CREATE', 'VDI_DELETE', From 411c5dd34c9184a0eb9e9ed1fc6c74079cc06830 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Fri, 22 Sep 2023 11:11:27 +0200 Subject: [PATCH 24/72] Repair coverage to be compatible with 8.3 test env Impacted drivers: LINSTOR, MooseFS and ZFS. - Ignore all linstor.* members during coverage, the module is not installed in github runner. - Use mock from unittest, the old one is not found now. - Remove useless return from LinstorSR scan method. Signed-off-by: Ronan Abhamon --- drivers/LinstorSR.py | 3 +-- tests/pylintrc | 2 +- tests/test_MooseFSSR.py | 4 +++- tests/test_ZFSSR.py | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index 1c9bd54a0..fe6d01d4d 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -779,9 +779,8 @@ def scan(self, uuid): # Update the database before the restart of the GC to avoid # bad sync in the process if new VDIs have been introduced. - ret = super(LinstorSR, self).scan(self.uuid) + super(LinstorSR, self).scan(self.uuid) self._kick_gc() - return ret @_locked_load def vdi(self, uuid): diff --git a/tests/pylintrc b/tests/pylintrc index a982913b3..4588675ba 100644 --- a/tests/pylintrc +++ b/tests/pylintrc @@ -84,7 +84,7 @@ ignored-classes=SQLObject # List of members which are set dynamically and missed by pylint inference # system, and so shouldn't trigger E0201 when accessed. -generated-members=REQUEST,acl_users,aq_parent +generated-members=REQUEST,acl_users,aq_parent,linstor.* # List of module names for which member attributes should not be checked # # (useful for modules/projects where namespaces are manipulated during runtime diff --git a/tests/test_MooseFSSR.py b/tests/test_MooseFSSR.py index feaac6267..f4e0a852f 100644 --- a/tests/test_MooseFSSR.py +++ b/tests/test_MooseFSSR.py @@ -1,4 +1,6 @@ -import mock +from unittest import mock +import unittest + import MooseFSSR import unittest diff --git a/tests/test_ZFSSR.py b/tests/test_ZFSSR.py index d0cca9359..544ea39ac 100644 --- a/tests/test_ZFSSR.py +++ b/tests/test_ZFSSR.py @@ -1,10 +1,10 @@ +from unittest import mock +import unittest import uuid import FileSR -import mock import os import SR -import unittest import ZFSSR From ba6032717533513c9f2c35b0a1d87d7bd83c6dba Mon Sep 17 00:00:00 2001 From: BenjiReis Date: Mon, 25 Sep 2023 16:13:13 +0200 Subject: [PATCH 25/72] Support IPv6 in Ceph Driver Signed-off-by: BenjiReis --- drivers/CephFSSR.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/CephFSSR.py b/drivers/CephFSSR.py index bd6a4b149..f7c263364 100644 --- a/drivers/CephFSSR.py +++ b/drivers/CephFSSR.py @@ -20,6 +20,7 @@ import errno import os +import socket import syslog as _syslog import xmlrpc.client from syslog import syslog @@ -141,7 +142,17 @@ def mount(self, mountpoint=None): options.append(self.dconf['options']) if options: options = ['-o', ','.join(options)] - command = ["mount", '-t', 'ceph', self.remoteserver+":"+self.remoteport+":"+self.remotepath, mountpoint] + options + acc = [] + for server in self.remoteserver.split(','): + try: + addr_info = socket.getaddrinfo(server, 0)[0] + except Exception: + continue + + acc.append('[' + server + ']' if addr_info[0] == socket.AF_INET6 else server) + + remoteserver = ','.join(acc) + command = ["mount", '-t', 'ceph', remoteserver + ":" + self.remoteport + ":" + self.remotepath, mountpoint] + options util.ioretry(lambda: util.pread(command), errlist=[errno.EPIPE, errno.EIO], maxretry=2, nofail=True) except util.CommandException as inst: syslog(_syslog.LOG_ERR, 'CephFS mount failed ' + inst.__str__()) From ca1958037242d2bd272a25485ee4ade50b8a20b1 Mon Sep 17 00:00:00 2001 From: Yann Dirson Date: Wed, 5 Jul 2023 16:57:26 +0200 Subject: [PATCH 26/72] lvutil: use wipefs not dd to clear existing signatures (xapi-project#624) Signed-off-by: Yann Dirson --- drivers/XE_SR_ERRORCODES.xml | 6 ++++++ drivers/lvutil.py | 20 ++++---------------- drivers/util.py | 5 +++++ 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/XE_SR_ERRORCODES.xml b/drivers/XE_SR_ERRORCODES.xml index 1986bc551..3578dd55a 100755 --- a/drivers/XE_SR_ERRORCODES.xml +++ b/drivers/XE_SR_ERRORCODES.xml @@ -909,6 +909,12 @@ 461 + + WipefsFailure + Failed to wipe pre-existing filesystem signature. + 462 + + GenericException SM has thrown a generic python exception diff --git a/drivers/lvutil.py b/drivers/lvutil.py index 48767ead7..2d990a9bd 100755 --- a/drivers/lvutil.py +++ b/drivers/lvutil.py @@ -478,24 +478,12 @@ def createVG(root, vgname): f = _openExclusive(dev, True) os.close(f) + + # Wipe any fs signature try: - # Overwrite the disk header, try direct IO first - cmd = [util.CMD_DD, "if=/dev/zero", "of=%s" % dev, "bs=1M", - "count=10", "oflag=direct"] - util.pread2(cmd) + util.wipefs(dev) except util.CommandException as inst: - if inst.code == errno.EPERM: - try: - # Overwrite the disk header, try normal IO - cmd = [util.CMD_DD, "if=/dev/zero", "of=%s" % dev, - "bs=1M", "count=10"] - util.pread2(cmd) - except util.CommandException as inst: - raise xs_errors.XenError('LVMWrite', \ - opterr='device %s' % dev) - else: - raise xs_errors.XenError('LVMWrite', \ - opterr='device %s' % dev) + raise xs_errors.XenError('WipefsFailure', opterr='device %s' % dev) # from inst if not (dev == rootdev): try: diff --git a/drivers/util.py b/drivers/util.py index 4590a46c0..e5d5da0af 100755 --- a/drivers/util.py +++ b/drivers/util.py @@ -643,6 +643,11 @@ def zeroOut(path, fromByte, bytes): return True +def wipefs(blockdev): + "Wipe filesystem signatures from `blockdev`" + pread2(["/usr/sbin/wipefs", "-a", blockdev]) + + def match_rootdev(s): regex = re.compile("^PRIMARY_DISK") return regex.search(s, 0) From ad42020aacbd31f149a867dff0ad5c86bebd8e68 Mon Sep 17 00:00:00 2001 From: Damien Thenot Date: Fri, 12 Apr 2024 15:08:59 +0200 Subject: [PATCH 27/72] feat(LargeBlock): introduce largeblocksr (#51) A SR inheriting from a EXTSR allowing to use a 4KiB blocksize device as SR. Create a 512 bytes loop device on top of a 4KiB device then give it to EXTSR code. It uses the same device-config as a normal local SR, i.e. `device-config:device=/dev/nvme0n1` After creation, the driver find the device under the VG to identify the correct disk. It means that creating the SR with a non-stable disk identifier is doable and it will work as EXTSR would by ignoring the device-config after creation. Identifying the correct disk by using LVM infos. The VG is created using a different prefix name from EXTSR. It is `XSLocalLargeBlock-`. The SR artificially limits the creation to disk not being 512b. It will throw an error if a disk whose blocksize is 512 is given. We currently don't support multi devices, it fails at the EXTSR creation. We added an error to explicitly say that multi devices SR is not supported on the driver. Before that, it would make another error: ``` Error code: SR_BACKEND_FAILURE_77 Error parameters: , Logical Volume group creation failed, ``` Sometimes the pvremove from EXTSR using the loop device fails. In this case, we need to remove the real device from PV list ourself in the error handling. Signed-off-by: Damien Thenot --- Makefile | 1 + drivers/LargeBlockSR.py | 249 +++++++++++++++++++++++++++++++++++ drivers/XE_SR_ERRORCODES.xml | 22 ++++ drivers/cleanup.py | 2 +- drivers/lvutil.py | 8 +- drivers/on_slave.py | 1 + 6 files changed, 280 insertions(+), 3 deletions(-) create mode 100644 drivers/LargeBlockSR.py diff --git a/Makefile b/Makefile index 9a8a03147..d33c06059 100755 --- a/Makefile +++ b/Makefile @@ -20,6 +20,7 @@ SM_DRIVERS += GlusterFS SM_DRIVERS += XFS SM_DRIVERS += ZFS SM_DRIVERS += MooseFS +SM_DRIVERS += LargeBlock SM_LIBS := SR SM_LIBS += SRCommand diff --git a/drivers/LargeBlockSR.py b/drivers/LargeBlockSR.py new file mode 100644 index 000000000..ba0ac1d13 --- /dev/null +++ b/drivers/LargeBlockSR.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2024 Vates SAS - damien.thenot@vates.tech +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import SR +from SR import deviceCheck +import SRCommand +import EXTSR +import util +import xs_errors +import os +import re +import lvutil + +CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "SR_SUPPORTS_LOCAL_CACHING", + "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", + "VDI_UPDATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", + "VDI_GENERATE_CONFIG", + "VDI_RESET_ON_BOOT/2", "ATOMIC_PAUSE", "VDI_CONFIG_CBT", + "VDI_ACTIVATE", "VDI_DEACTIVATE", "THIN_PROVISIONING", "VDI_READ_CACHING"] + +CONFIGURATION = [['device', 'local device path (required) (e.g. /dev/sda3)']] + +DRIVER_INFO = { + 'name': 'Large Block SR', + 'description': 'SR plugin which emulates a 512 bytes disk on top of a 4KiB device then create a EXT SR', + 'vendor': 'Vates', + 'copyright': '(C) 2024 Vates', + 'driver_version': '1.0', + 'required_api_version': '1.0', + 'capabilities': CAPABILITIES, + 'configuration': CONFIGURATION +} + +LARGEBLOCK_PREFIX = "XSLocalLargeBlock-" + +class LargeBlockSR(EXTSR.EXTSR): + """Emulating 512b drives for EXT storage repository""" + + DRIVER_TYPE = "largeblock" + LOOP_SECTOR_SIZE = 512 + + @staticmethod + def handles(srtype): + return srtype == LargeBlockSR.DRIVER_TYPE + + def load(self, sr_uuid): + super(LargeBlockSR, self).load(sr_uuid) + self.is_deleting = False + self.vgname = LARGEBLOCK_PREFIX + sr_uuid + self.remotepath = os.path.join("/dev", self.vgname, sr_uuid) + + def attach(self, sr_uuid): + if not self.is_deleting: + vg_device = self._get_device() + self.dconf["device"] = ",".join(vg_device) + self._create_emulated_device() + if not self._is_vg_connection_correct(): # Check if we need to redo the connection by parsing `vgs -o vg_name,devices self.vgname` + self._redo_vg_connection() # Call redo VG connection to connect it correctly to the loop device instead of the real 4KiB block device + super(LargeBlockSR, self).attach(sr_uuid) + + def detach(self, sr_uuid): + if not self.is_deleting: + vg_device = self._get_device() + self.dconf["device"] = ",".join(vg_device) + super(LargeBlockSR, self).detach(sr_uuid) + if not self.is_deleting: + self._destroy_emulated_device() + + @deviceCheck + def create(self, sr_uuid, size): + base_devices = self.dconf["device"].split(",") + if len(base_devices) > 1: + raise xs_errors.XenError("ConfigDeviceInvalid", opterr="Multiple devices configuration is not supported") + + for dev in base_devices: + logical_blocksize = util.pread2(["blockdev", "--getss", dev]).strip() + if logical_blocksize == "512": + raise xs_errors.XenError("LargeBlockIncorrectBlocksize", opterr="The logical blocksize of the device {} is compatible with normal SR types".format(dev)) + + try: + self._create_emulated_device() + super(LargeBlockSR, self).create(sr_uuid, size) + finally: + self._destroy_emulated_device(base_devices) + + def delete(self, sr_uuid): + base_devices = self._get_device() + self.dconf["device"] = ",".join(self._get_loopdev_from_device(base_devices)) + + self.is_deleting = True + try: + super(LargeBlockSR, self).delete(sr_uuid) + except xs_errors.SROSError: + # In case, the lvremove doesn't like the loop device, it will throw an error. + # We need to remove the device ourselves using the real device in this case. + for dev in base_devices: + util.pread2(["pvremove", dev]) + finally: + self._destroy_emulated_device(base_devices) + self.is_deleting = False + + @deviceCheck + def probe(self): + # We override EXTSR.probe because it uses EXT_PREFIX in this call + return lvutil.srlist_toxml( + lvutil.scan_srlist(LARGEBLOCK_PREFIX, self.dconf['device']), + LARGEBLOCK_PREFIX + ) + + def _create_loopdev(self, dev, emulated_path): + cmd = ["losetup", "-f", "-v", "--show", "--sector-size", str(self.LOOP_SECTOR_SIZE), dev] + loopdev = util.pread2(cmd).rstrip() + + if os.path.exists(emulated_path) and os.path.islink(emulated_path): + os.unlink(emulated_path) + + try: + os.symlink(loopdev, emulated_path) + except OSError: + raise xs_errors.XenError("LargeBlockSymlinkExist", opterr="Symlink {} couldn't be created".format(emulated_path)) + + def _delete_loopdev(self, dev, emulated_path): + if os.path.exists(emulated_path) and os.path.islink(emulated_path): + os.unlink(emulated_path) + + # The backing file isn't a symlink if given by ID in device-config but the real device + dev = os.path.realpath(dev) + loopdevs = self._get_loopdev_from_device(dev) + + if loopdevs != None: + try: + for lp in loopdevs: + cmd = ["losetup", "-d", lp] # Remove the loop device + util.pread2(cmd) + except xs_errors.SROSError: + util.SMlog("Couldn't removed losetup devices: {}".format(loopdevs)) + else: + xs_errors.XenError("LargeBlockNoLosetup", opterr="Couldn't find loop device for {}".format(dev)) + + @staticmethod + def _get_loopdev_from_device(device): + lpdevs = [] + output = util.pread2(["losetup", "--list"]).rstrip() + if output: + for line in output.split("\n"): + line = line.split() + loopdev = line[0] + dev = line[5].strip() + if dev in device: + lpdevs.append(loopdev) + return lpdevs + + @staticmethod + def _get_device_from_loopdev(loopdevs): + devices = [] + output = util.pread2(["losetup", "--list"]).rstrip() + if output: + for line in output.split("\n"): + line = line.split() + lpdev = line[0] + dev = line[5] + if lpdev in loopdevs: + devices.append(dev) + return devices + + def _get_device_from_vg(self): + devices = [] + output = util.pread2(["vgs", "--noheadings", "-o", "vg_name,devices", self.vgname]).splitlines() + for line in output: + line = line.split() + dev = line[1].split("(")[0] + if os.path.islink(dev): + dev = os.path.realpath(dev) + devices.append(dev) + return devices + + def _get_device(self): + vg_device = self._get_device_from_vg() + for dev in vg_device: + if re.match(r"(.*\.512)|(/dev/loop[0-9]+)", dev): + lpdev = os.path.realpath(dev) + realdev = self._get_device_from_loopdev(lpdev)[0] + vg_device.remove(dev) + vg_device.append(realdev) + + return vg_device + + def _is_vg_connection_correct(self): + output = util.pread2(["vgs", "--noheadings", "-o", "vg_name,devices", self.vgname]).split() + output[1] = output[1].split("(")[0] + return bool(re.match(r"(.*\.512)|(/dev/loop[0-9]+)", output[1])) + + def _redo_vg_connection(self): + """ + In case of using a LargeBlockSR, the LVM scan at boot will find the LogicalVolume on the real block device. + And when the PBD is connecting, it will mount from the original device instead of the loop device since LVM prefers real devices it has seen first. + The PBD plug will succeed but then the SR will be accessed through the 4KiB device, returning to the erroneous behaviour on 4KiB device. + VM won't be able to run because vhd-util will fail to scan the VDI. + This function force the LogicalVolume to be mounted on top of our emulation layer by disabling the VolumeGroup and re-enabling while applying a filter. + """ + + util.SMlog("Reconnecting VG {} to use emulated device".format(self.vgname)) + try: + lvutil.setActiveVG(self.vgname, False) + lvutil.setActiveVG(self.vgname, True, config="devices{ global_filter = [ \"r|^/dev/nvme.*|\", \"a|/dev/loop.*|\" ] }") + except util.CommandException as e: + xs_errors.XenError("LargeBlockVGReconnectFailed", opterr="Failed to reconnect the VolumeGroup {}, error: {}".format(self.vgname, e)) + + + @classmethod + def _get_emulated_device_path(cls, dev): + return "{dev}.{bs}".format(dev=dev, bs=cls.LOOP_SECTOR_SIZE) + + def _create_emulated_device(self): + base_devices = self.dconf["device"].split(",") + emulated_devices = [] + for dev in base_devices: + emulated_path = self._get_emulated_device_path(dev) + self._create_loopdev(dev, emulated_path) + emulated_devices.append(emulated_path) + + emulated_devices = ",".join(emulated_devices) + self.dconf["device"] = emulated_devices + + def _destroy_emulated_device(self, devices=None): + if devices is None: + devices = self.dconf["device"].split(",") + + for dev in devices: + emulated_path = self._get_emulated_device_path(dev) + self._delete_loopdev(dev, emulated_path) + +if __name__ == '__main__': + SRCommand.run(LargeBlockSR, DRIVER_INFO) +else: + SR.registerSR(LargeBlockSR) diff --git a/drivers/XE_SR_ERRORCODES.xml b/drivers/XE_SR_ERRORCODES.xml index 3578dd55a..b5813153f 100755 --- a/drivers/XE_SR_ERRORCODES.xml +++ b/drivers/XE_SR_ERRORCODES.xml @@ -968,4 +968,26 @@ LINSTOR SR delete error 5007 + + + LargeBlockSymlinkExist + Symlink already exists + 5008 + + + + LargeBlockNoLosetup + Couldn't find loop device + 5009 + + + LargeBlockIncorrectBlocksize + Blocksize isn't compatible with the driver + 5010 + + + LargeBlockVGReconnectFailed + Failed to reconnect the VolumeGroup + 5011 + diff --git a/drivers/cleanup.py b/drivers/cleanup.py index e3b78e525..9713ffe5b 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -3530,7 +3530,7 @@ def normalizeType(type): type = SR.TYPE_LVHD if type in [ "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", - "moosefs", "xfs", "zfs" + "moosefs", "xfs", "zfs", "largeblock" ]: type = SR.TYPE_FILE if type in ["linstor"]: diff --git a/drivers/lvutil.py b/drivers/lvutil.py index 2d990a9bd..dfd9d5c55 100755 --- a/drivers/lvutil.py +++ b/drivers/lvutil.py @@ -569,12 +569,16 @@ def resizePV(dev): util.SMlog("Failed to grow the PV, non-fatal") -def setActiveVG(path, active): +def setActiveVG(path, active, config=None): "activate or deactivate VG 'path'" val = "n" if active: val = "y" - text = cmd_lvm([CMD_VGCHANGE, "-a" + val, path]) + cmd = [CMD_VGCHANGE, "-a" + val, path] + if config: + cmd.append("--config") + cmd.append(config) + cmd_lvm(cmd) @lvmretry diff --git a/drivers/on_slave.py b/drivers/on_slave.py index 524424f61..2f58281ae 100755 --- a/drivers/on_slave.py +++ b/drivers/on_slave.py @@ -78,6 +78,7 @@ def _is_open(session, args): import SR import CephFSSR import EXTSR + import LargeBlockSR import GlusterFSSR import LinstorSR import LVHDSR From 1dbb1fc42a5440fc62504b8efc12ca1e99f5b441 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Tue, 30 Jul 2024 15:31:23 +0200 Subject: [PATCH 28/72] feat(LVHDSR): add a way to modify config of LVMs (#60) With this change the driver supports a "lvm-conf" param on "other-config". For now The configuration is only used by "remove" calls from LVMCache. Example to issue discards after a lvremove command: > xe sr-param-set uuid= other-config:lvm-conf=issue_discards=1 And to remove the param: > xe sr-param-remove uuid= param-name=other-config param-key=lvm-conf Signed-off-by: Ronan Abhamon --- drivers/LVHDSR.py | 15 ++++++++++++--- drivers/cleanup.py | 7 ++++++- drivers/lvmcache.py | 5 +++-- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/LVHDSR.py b/drivers/LVHDSR.py index 6e0d71eba..23adf3a9c 100755 --- a/drivers/LVHDSR.py +++ b/drivers/LVHDSR.py @@ -161,14 +161,24 @@ def load(self, sr_uuid): self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgname) self.mdpath = os.path.join(self.path, self.MDVOLUME_NAME) self.provision = self.PROVISIONING_DEFAULT + + self.other_conf = None + has_sr_ref = self.srcmd.params.get("sr_ref") + if has_sr_ref: + self.other_conf = self.session.xenapi.SR.get_other_config(self.sr_ref) + + self.lvm_conf = None + if self.other_conf: + self.lvm_conf = self.other_conf.get('lvm-conf') + try: - self.lvmCache = lvmcache.LVMCache(self.vgname) + self.lvmCache = lvmcache.LVMCache(self.vgname, self.lvm_conf) except: raise xs_errors.XenError('SRUnavailable', \ opterr='Failed to initialise the LVMCache') self.lvActivator = LVActivator(self.uuid, self.lvmCache) self.journaler = Journaler(self.lvmCache) - if not self.srcmd.params.get("sr_ref"): + if not has_sr_ref: return # must be a probe call # Test for thick vs thin provisioning conf parameter if 'allocation' in self.dconf: @@ -178,7 +188,6 @@ def load(self, sr_uuid): raise xs_errors.XenError('InvalidArg', \ opterr='Allocation parameter must be one of %s' % self.PROVISIONING_TYPES) - self.other_conf = self.session.xenapi.SR.get_other_config(self.sr_ref) if self.other_conf.get(self.TEST_MODE_KEY): self.testMode = self.other_conf[self.TEST_MODE_KEY] self._prepareTestMode() diff --git a/drivers/cleanup.py b/drivers/cleanup.py index 9713ffe5b..577ff4bb2 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -2934,7 +2934,12 @@ def __init__(self, uuid, xapi, createLock, force): SR.__init__(self, uuid, xapi, createLock, force) self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid) self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName) - self.lvmCache = lvmcache.LVMCache(self.vgName) + + sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) + other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) + lvm_conf = other_conf.get('lvm-conf') if other_conf else None + self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) + self.lvActivator = LVActivator(self.uuid, self.lvmCache) self.journaler = journaler.Journaler(self.lvmCache) diff --git a/drivers/lvmcache.py b/drivers/lvmcache.py index 8c63d45a3..6e21568ea 100644 --- a/drivers/lvmcache.py +++ b/drivers/lvmcache.py @@ -59,10 +59,11 @@ class LVMCache: """Per-VG object to store LV information. Can be queried for cached LVM information and refreshed""" - def __init__(self, vgName): + def __init__(self, vgName, config=None): """Create a cache for VG vgName, but don't scan the VG yet""" self.vgName = vgName self.vgPath = "/dev/%s" % self.vgName + self.config = config self.lvs = dict() self.tags = dict() self.initialized = False @@ -115,7 +116,7 @@ def create(self, lvName, size, tag=None): @lazyInit def remove(self, lvName): path = self._getPath(lvName) - lvutil.remove(path) + lvutil.remove(path, self.config) for tag in self.lvs[lvName].tags: self._removeTag(lvName, tag) del self.lvs[lvName] From 5d7af6f0f2e8a6c338aa1fbd2ca02b97e0ceaae5 Mon Sep 17 00:00:00 2001 From: Benjamin Reis Date: Tue, 13 Aug 2024 11:11:39 +0200 Subject: [PATCH 29/72] reflect upstream changes in our tests Signed-off-by: Benjamin Reis --- tests/test_ISOSR.py | 7 ++----- tests/test_ZFSSR.py | 7 ++++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/tests/test_ISOSR.py b/tests/test_ISOSR.py index 9dec459c7..ad5d99dc3 100644 --- a/tests/test_ISOSR.py +++ b/tests/test_ISOSR.py @@ -68,17 +68,14 @@ def test_attach_and_detach_local_with_mounted_path( isosr.detach(None) self.assertFalse(pread.called) - @testlib.with_context @mock.patch('os.path.exists') @mock.patch('util.pread') - def test_attach_local_with_bad_path(self, context, pread, exists): - context.setup_error_codes() - + def test_attach_local_with_bad_path(self, pread, exists): # Local path doesn't exist, but error list yes. exists.side_effect = [False, True] isosr = self.create_isosr() - with self.assertRaises(SR.SROSError) as ose: + with self.assertRaises(xs_errors.SROSError) as ose: isosr.attach(None) self.assertEqual(ose.exception.errno, 226) self.assertFalse(pread.called) diff --git a/tests/test_ZFSSR.py b/tests/test_ZFSSR.py index 544ea39ac..47d72b89b 100644 --- a/tests/test_ZFSSR.py +++ b/tests/test_ZFSSR.py @@ -6,6 +6,7 @@ import os import SR import ZFSSR +import xs_errors XML_DEFS = os.path.dirname(os.path.abspath(__file__)) + \ @@ -51,7 +52,7 @@ def test_load_with_zfs_unavailable(self): failed = False try: self.create_zfs_sr() - except SR.SROSError as e: + except xs_errors.SROSError as e: # Check SRUnavailable error. failed = e.errno == 47 self.assertTrue(failed) @@ -76,7 +77,7 @@ def test_create_with_invalid_zfs_path( sr = self.create_zfs_sr() try: sr.create(sr.uuid, 42) - except SR.SROSError as e: + except xs_errors.SROSError as e: # Check ZFSSRCreate error. failed = e.errno == 5000 self.assertTrue(failed) @@ -109,7 +110,7 @@ def test_scan_with_invalid_zfs_path( sr = self.create_zfs_sr() try: sr.scan(sr.uuid) - except SR.SROSError as e: + except xs_errors.SROSError as e: # Check SRUnavailable error. failed = e.errno == 47 self.assertTrue(failed) From 8d513c814232edb334d5e6a10ab5a6d403309811 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Thu, 26 Sep 2024 18:53:04 +0200 Subject: [PATCH 30/72] Synchronization with 8.2 LINSTOR before a stable release (#68) Last commit: 9207abe1f2e1ff1795cdba1a0aeb76574412a583 "fix(linstor): check if resource is tiebreaker (#62)" Signed-off-by: Ronan Abhamon --- dev_requirements.txt | 1 + drivers/LinstorSR.py | 102 +++--- drivers/linstor-manager | 183 ++++++++--- drivers/linstorvhdutil.py | 60 +++- drivers/linstorvolumemanager.py | 349 +++++++++++++++------ drivers/tapdisk-pause | 6 +- {tests/mocks => mocks}/linstor/__init__.py | 0 7 files changed, 499 insertions(+), 202 deletions(-) rename {tests/mocks => mocks}/linstor/__init__.py (100%) diff --git a/dev_requirements.txt b/dev_requirements.txt index 104316bae..f25f76868 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -2,3 +2,4 @@ coverage astroid==2.3.3 pylint==2.4.4 bitarray +python-linstor diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index fe6d01d4d..829c48f8d 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -362,9 +362,6 @@ def load(self, sr_uuid): self._linstor = None # Ensure that LINSTOR attribute exists. self._journaler = None - self._is_master = False - if 'SRmaster' in self.dconf and self.dconf['SRmaster'] == 'true': - self._is_master = True self._group_name = self.dconf['group-name'] self._vdi_shared_time = 0 @@ -437,7 +434,7 @@ def connect(): return wrapped_method(self, *args, **kwargs) - if not self._is_master: + if not self.is_master(): if self.cmd in [ 'sr_create', 'sr_delete', 'sr_update', 'sr_probe', 'sr_scan', 'vdi_create', 'vdi_delete', 'vdi_resize', @@ -472,7 +469,7 @@ def connect(): # Ensure we use a non-locked volume when vhdutil is called. if ( - self._is_master and self.cmd.startswith('vdi_') and + self.is_master() and self.cmd.startswith('vdi_') and self.cmd != 'vdi_create' ): self._linstor.ensure_volume_is_not_locked( @@ -487,7 +484,7 @@ def connect(): # # If the command is a SR command we want at least to remove # resourceless volumes. - if self._is_master and self.cmd not in [ + if self.is_master() and self.cmd not in [ 'vdi_attach', 'vdi_detach', 'vdi_activate', 'vdi_deactivate', 'vdi_epoch_begin', 'vdi_epoch_end', @@ -650,17 +647,17 @@ def delete(self, uuid): opterr='Cannot get controller node name' ) - host = None + host_ref = None if node_name == 'localhost': - host = util.get_this_host_ref(self.session) + host_ref = util.get_this_host_ref(self.session) else: for slave in util.get_all_slaves(self.session): r_name = self.session.xenapi.host.get_record(slave)['hostname'] if r_name == node_name: - host = slave + host_ref = slave break - if not host: + if not host_ref: raise xs_errors.XenError( 'LinstorSRDelete', opterr='Failed to find host with hostname: {}'.format( @@ -677,7 +674,7 @@ def delete(self, uuid): 'groupName': self._group_name, } self._exec_manager_command( - host, 'destroy', args, 'LinstorSRDelete' + host_ref, 'destroy', args, 'LinstorSRDelete' ) except Exception as e: try: @@ -766,7 +763,7 @@ def scan(self, uuid): # is started without a shared and mounted /var/lib/linstor path. try: self._linstor.get_database_path() - except Exception: + except Exception as e: # Failed to get database path, ensure we don't have # VDIs in the XAPI database... if self.session.xenapi.SR.get_VDIs( @@ -774,7 +771,7 @@ def scan(self, uuid): ): raise xs_errors.XenError( 'SRUnavailable', - opterr='Database is not mounted' + opterr='Database is not mounted or node name is invalid ({})'.format(e) ) # Update the database before the restart of the GC to avoid @@ -782,6 +779,15 @@ def scan(self, uuid): super(LinstorSR, self).scan(self.uuid) self._kick_gc() + def is_master(self): + if not hasattr(self, '_is_master'): + if 'SRmaster' not in self.dconf: + self._is_master = self.session is not None and util.is_master(self.session) + else: + self._is_master = self.dconf['SRmaster'] == 'true' + + return self._is_master + @_locked_load def vdi(self, uuid): return LinstorVDI(self, uuid) @@ -967,7 +973,7 @@ def _synchronize_metadata_and_xapi(self): ) def _synchronize_metadata(self): - if not self._is_master: + if not self.is_master(): return util.SMlog('Synchronize metadata...') @@ -1014,7 +1020,7 @@ def _load_vdis(self): if self._vdis_loaded: return - assert self._is_master + assert self.is_master() # We use a cache to avoid repeated JSON parsing. # The performance gain is not big but we can still @@ -1492,7 +1498,7 @@ def _reconnect(self): controller_uri, self._group_name, repair=( - self._is_master and + self.is_master() and self.srcmd.cmd in self.ops_exclusive ), logger=util.SMlog @@ -1660,8 +1666,11 @@ def create(self, sr_uuid, vdi_uuid, size): volume_name = REDO_LOG_VOLUME_NAME self._linstor.create_volume( - self.uuid, volume_size, persistent=False, - volume_name=volume_name + self.uuid, + volume_size, + persistent=False, + volume_name=volume_name, + high_availability=volume_name is not None ) volume_info = self._linstor.get_volume_info(self.uuid) @@ -1788,10 +1797,10 @@ def attach(self, sr_uuid, vdi_uuid): 'scan SR first to trigger auto-repair' ) - if not attach_from_config or self.sr._is_master: - writable = 'args' not in self.sr.srcmd.params or \ - self.sr.srcmd.params['args'][0] == 'true' + writable = 'args' not in self.sr.srcmd.params or \ + self.sr.srcmd.params['args'][0] == 'true' + if not attach_from_config or self.sr.is_master(): # We need to inflate the volume if we don't have enough place # to mount the VHD image. I.e. the volume capacity must be greater # than the VHD size + bitmap size. @@ -1825,7 +1834,7 @@ def attach(self, sr_uuid, vdi_uuid): return self._attach_using_http_nbd() # Ensure we have a path... - self._create_chain_paths(self.uuid) + self.sr._vhdutil.create_chain_paths(self.uuid, readonly=not writable) self.attached = True return VDI.VDI.attach(self, self.sr.uuid, self.uuid) @@ -1873,7 +1882,7 @@ def detach(self, sr_uuid, vdi_uuid): ) # We remove only on slaves because the volume can be used by the GC. - if self.sr._is_master: + if self.sr.is_master(): return while vdi_uuid: @@ -1894,7 +1903,7 @@ def detach(self, sr_uuid, vdi_uuid): def resize(self, sr_uuid, vdi_uuid, size): util.SMlog('LinstorVDI.resize for {}'.format(self.uuid)) - if not self.sr._is_master: + if not self.sr.is_master(): raise xs_errors.XenError( 'VDISize', opterr='resize on slave not allowed' @@ -2153,7 +2162,7 @@ def update(self, sr_uuid, vdi_uuid): # -------------------------------------------------------------------------- def _prepare_thin(self, attach): - if self.sr._is_master: + if self.sr.is_master(): if attach: attach_thin( self.session, self.sr._journaler, self._linstor, @@ -2352,7 +2361,7 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None): raise xs_errors.XenError('SnapshotChainTooLong') # Ensure we have a valid path if we don't have a local diskful. - self._create_chain_paths(self.uuid) + self.sr._vhdutil.create_chain_paths(self.uuid, readonly=True) volume_path = self.path if not util.pathexists(volume_path): @@ -2499,10 +2508,10 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None): active_uuid, clone_info, force_undo=True ) self.sr._journaler.remove(LinstorJournaler.CLONE, active_uuid) - except Exception as e: + except Exception as clean_error: util.SMlog( 'WARNING: Failed to clean up failed snapshot: {}' - .format(e) + .format(clean_error) ) raise xs_errors.XenError('VDIClone', opterr=str(e)) @@ -2550,6 +2559,7 @@ def _start_persistent_http_server(volume_name): [FORK_LOG_DAEMON] + arguments, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + universal_newlines=True, # Ensure we use another group id to kill this process without # touch the current one. preexec_fn=os.setsid @@ -2635,6 +2645,7 @@ def _start_persistent_nbd_server(self, volume_name): [FORK_LOG_DAEMON] + arguments, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + universal_newlines=True, # Ensure we use another group id to kill this process without # touch the current one. preexec_fn=os.setsid @@ -2739,7 +2750,7 @@ def _attach_using_http_nbd(self): # 0. Fetch drbd path. must_get_device_path = True - if not self.sr._is_master: + if not self.sr.is_master(): # We are on a slave, we must try to find a diskful locally. try: volume_info = self._linstor.get_volume_info(self.uuid) @@ -2754,7 +2765,7 @@ def _attach_using_http_nbd(self): must_get_device_path = hostname in volume_info.diskful drbd_path = None - if must_get_device_path or self.sr._is_master: + if must_get_device_path or self.sr.is_master(): # If we are master, we must ensure we have a diskless # or diskful available to init HA. # It also avoid this error in xensource.log @@ -2812,37 +2823,6 @@ def _detach_using_http_nbd(self): self._kill_persistent_nbd_server(volume_name) self._kill_persistent_http_server(volume_name) - def _create_chain_paths(self, vdi_uuid): - # OPTIMIZE: Add a limit_to_first_allocated_block param to limit vhdutil calls. - # Useful for the snapshot code algorithm. - - while vdi_uuid: - path = self._linstor.get_device_path(vdi_uuid) - if not util.pathexists(path): - raise xs_errors.XenError( - 'VDIUnavailable', opterr='Could not find: {}'.format(path) - ) - - # Diskless path can be created on the fly, ensure we can open it. - def check_volume_usable(): - while True: - try: - with open(path, 'r+'): - pass - except IOError as e: - if e.errno == errno.ENODATA: - time.sleep(2) - continue - if e.errno == errno.EROFS: - util.SMlog('Volume not attachable because RO. Openers: {}'.format( - self.sr._linstor.get_volume_openers(vdi_uuid) - )) - raise - break - util.retry(check_volume_usable, 15, 2) - - vdi_uuid = self.sr._vhdutil.get_vhd_info(vdi_uuid).parentUuid - # ------------------------------------------------------------------------------ diff --git a/drivers/linstor-manager b/drivers/linstor-manager index 8d313ec72..47c434a3f 100755 --- a/drivers/linstor-manager +++ b/drivers/linstor-manager @@ -27,8 +27,9 @@ import socket import XenAPI import XenAPIPlugin +from json import JSONEncoder from linstorjournaler import LinstorJournaler -from linstorvhdutil import LinstorVhdUtil +from linstorvhdutil import LinstorVhdUtil, check_ex from linstorvolumemanager import get_controller_uri, get_local_volume_openers, LinstorVolumeManager from lock import Lock import json @@ -240,7 +241,10 @@ def get_drbd_volumes(volume_group=None): config = json.loads(stdout) for resource in config: for volume in resource['_this_host']['volumes']: - backing_disk = volume['backing-disk'] + backing_disk = volume.get('backing-disk') + if not backing_disk: + continue + match = BACKING_DISK_RE.match(backing_disk) if not match: continue @@ -389,7 +393,8 @@ def check(session, args): args['ignoreMissingFooter'] ) fast = distutils.util.strtobool(args['fast']) - return str(vhdutil.check(device_path, ignore_missing_footer, fast)) + check_ex(device_path, ignore_missing_footer, fast) + return str(True) except Exception as e: util.SMlog('linstor-manager:check error: {}'.format(e)) raise @@ -534,7 +539,8 @@ def set_parent(session, args): def coalesce(session, args): try: device_path = args['devicePath'] - return str(vhdutil.coalesce(device_path)) + vhdutil.coalesce(device_path) + return '' except Exception as e: util.SMlog('linstor-manager:coalesce error: {}'.format(e)) raise @@ -885,6 +891,64 @@ def get_drbd_openers(session, args): raise +class HealthCheckError(object): + __slots__ = ('data') + + MASK_REPORT_LEVEL = 0x7000000 + MASK_TYPE = 0xFF0000 + MASK_VALUE = 0XFFFF + + # 24-26 bits + REPORT_LEVEL_WARN = 0x1000000 + REPORT_LEVEL_ERR = 0x2000000 + + # 16-23 bits + TYPE_GENERIC = 0x10000 + TYPE_NODE = 0x20000 + TYPE_STORAGE_POOL = 0x30000 + TYPE_VOLUME = 0x40000 + TYPE_RESOURCE = 0x50000 + + # 1-15 bits + GENERIC_UNEXPECTED = REPORT_LEVEL_ERR | TYPE_GENERIC | 0 + GENERIC_LINSTOR_UNREACHABLE = REPORT_LEVEL_ERR | TYPE_GENERIC | 1 + + NODE_NOT_ONLINE = REPORT_LEVEL_WARN | TYPE_NODE | 0 + + STORAGE_POOL_UNKNOWN_FREE_SIZE = REPORT_LEVEL_ERR | TYPE_STORAGE_POOL | 0 + STORAGE_POOL_UNKNOWN_CAPACITY = REPORT_LEVEL_ERR | TYPE_STORAGE_POOL | 1 + STORAGE_POOL_LOW_FREE_SIZE = REPORT_LEVEL_WARN | TYPE_STORAGE_POOL | 2 + + VOLUME_UNKNOWN_STATE = REPORT_LEVEL_WARN | TYPE_VOLUME | 0 + VOLUME_INVALID_STATE = REPORT_LEVEL_ERR | TYPE_VOLUME | 1 + VOLUME_WRONG_DISKLESS_STATE = REPORT_LEVEL_WARN | TYPE_VOLUME | 2 + VOLUME_INTERNAL_UNVERIFIED_STATE = REPORT_LEVEL_WARN | TYPE_VOLUME | 3 + + MAP_CODE_TO_PARAMS = { + GENERIC_UNEXPECTED: { 'message' }, + GENERIC_LINSTOR_UNREACHABLE: { 'message' }, + NODE_NOT_ONLINE: { 'name', 'status' }, + STORAGE_POOL_UNKNOWN_FREE_SIZE: { 'name' }, + STORAGE_POOL_UNKNOWN_CAPACITY: { 'name' }, + STORAGE_POOL_LOW_FREE_SIZE: { 'name', 'threshold' }, + VOLUME_UNKNOWN_STATE: { 'node', 'resource', 'number' }, + VOLUME_INVALID_STATE: { 'node', 'resource', 'number', 'state' }, + VOLUME_WRONG_DISKLESS_STATE: { 'node', 'resource', 'number', 'state' }, + VOLUME_INTERNAL_UNVERIFIED_STATE: { 'node', 'resource', 'number', 'state' } + } + + def __init__(self, code, **kwargs): + attributes = self.MAP_CODE_TO_PARAMS[code] + data = { 'code': code } + for attr_name, attr_value in kwargs.items(): + assert attr_name in attributes + data[attr_name] = attr_value + self.data = data + + def to_json(self): + return self.data + + def health_check(session, args): group_name = args['groupName'] @@ -892,11 +956,16 @@ def health_check(session, args): 'controller-uri': '', 'nodes': {}, 'storage-pools': {}, - 'warnings': [], + 'resources': {}, 'errors': [] } def format_result(): + # See: https://stackoverflow.com/questions/18478287/making-object-json-serializable-with-regular-encoder/18561055#18561055 + def _default(self, obj): + return getattr(obj.__class__, 'to_json', _default.default)(obj) + _default.default = JSONEncoder().default + JSONEncoder.default = _default return json.dumps(result) # 1. Get controller. @@ -919,7 +988,10 @@ def health_check(session, args): ) except Exception as e: # Probably a network issue, or offline controller. - result['errors'].append('Cannot join SR: `{}`.'.format(e)) + result['errors'].append(HealthCheckError( + code=HealthCheckError.GENERIC_LINSTOR_UNREACHABLE, + message=str(e) + )) return format_result() try: @@ -928,7 +1000,11 @@ def health_check(session, args): result['nodes'] = nodes for node_name, status in nodes.items(): if status != 'ONLINE': - result['warnings'].append('Node `{}` is {}.'.format(node_name, status)) + result['errors'].append(HealthCheckError( + code=HealthCheckError.NODE_NOT_ONLINE, + name=node_name, + status=status + )) # 3. Check storage pool statuses. storage_pools_per_node = linstor.get_storage_pools_info() @@ -938,23 +1014,25 @@ def health_check(session, args): free_size = storage_pool['free-size'] capacity = storage_pool['capacity'] if free_size < 0 or capacity <= 0: - result['errors'].append( - 'Cannot get free size and/or capacity of storage pool `{}`.' - .format(storage_pool['uuid']) - ) - elif free_size > capacity: - result['errors'].append( - 'Free size of storage pool `{}` is greater than capacity.' - .format(storage_pool['uuid']) - ) + if free_size < 0: + result['errors'].append(HealthCheckError( + code=HealthCheckError.STORAGE_POOL_UNKNOWN_FREE_SIZE, + name=storage_pool['name'] + )) + elif capacity < 0: + result['errors'].append(HealthCheckError( + code=HealthCheckError.STORAGE_POOL_UNKNOWN_CAPACITY, + name=storage_pool['name'] + )) else: remaining_percent = free_size / float(capacity) * 100.0 threshold = 10.0 if remaining_percent < threshold: - result['warnings'].append( - 'Remaining size of storage pool `{}` is below {}% of its capacity.' - .format(storage_pool['uuid'], threshold) - ) + result['errors'].append(HealthCheckError( + code=HealthCheckError.STORAGE_POOL_LOW_FREE_SIZE, + name=storage_pool['name'], + threshold=threshold + )) # 4. Check resource statuses. all_resources = linstor.get_resources_info() @@ -967,33 +1045,46 @@ def health_check(session, args): if disk_state in ['UpToDate', 'Created', 'Attached']: continue if disk_state == 'DUnknown': - result['warnings'].append( - 'Unknown state for volume `{}` at index {} for resource `{}` on node `{}`' - .format(volume['device-path'], volume_index, resource_name, node_name) - ) + result['errors'].append(HealthCheckError( + code=HealthCheckError.VOLUME_UNKNOWN_STATE, + node=node_name, + resource=resource_name, + number=volume_index + )) continue if disk_state in ['Inconsistent', 'Failed', 'To: Creating', 'To: Attachable', 'To: Attaching']: - result['errors'].append( - 'Invalid state `{}` for volume `{}` at index {} for resource `{}` on node `{}`' - .format(disk_state, volume['device-path'], volume_index, resource_name, node_name) - ) + result['errors'].append(HealthCheckError( + code=HealthCheckError.VOLUME_INVALID_STATE, + node=node_name, + resource=resource_name, + number=volume_index, + state=disk_state + )) continue if disk_state == 'Diskless': if resource['diskful']: - result['errors'].append( - 'Unintentional diskless state detected for volume `{}` at index {} for resource `{}` on node `{}`' - .format(volume['device-path'], volume_index, resource_name, node_name) - ) + result['errors'].append(HealthCheckError( + code=HealthCheckError.VOLUME_WRONG_DISKLESS_STATE, + node=node_name, + resource=resource_name, + number=volume_index, + state=disk_state + )) elif resource['tie-breaker']: volume['disk-state'] = 'TieBreaker' continue - result['warnings'].append( - 'Unhandled state `{}` for volume `{}` at index {} for resource `{}` on node `{}`' - .format(disk_state, volume['device-path'], volume_index, resource_name, node_name) - ) - + result['errors'].append(HealthCheckError( + code=HealthCheckError.VOLUME_INTERNAL_UNVERIFIED_STATE, + node=node_name, + resource=resource_name, + number=volume_index, + state=disk_state + )) except Exception as e: - result['errors'].append('Unexpected error: `{}`'.format(e)) + result['errors'].append(HealthCheckError( + code=HealthCheckError.GENERIC_UNEXPECTED, + message=str(e) + )) return format_result() @@ -1070,6 +1161,21 @@ def list_node_interfaces(session, args): raise XenAPIPlugin.Failure('-1', [str(e)]) +def get_node_preferred_interface(session, args): + group_name = args['groupName'] + hostname = args['hostname'] + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + try: + return linstor.get_node_preferred_interface(hostname) + except Exception as e: + raise XenAPIPlugin.Failure('-1', [str(e)]) + + def set_node_preferred_interface(session, args): group_name = args['groupName'] hostname = args['hostname'] @@ -1141,5 +1247,6 @@ if __name__ == '__main__': 'destroyNodeInterface': destroy_node_interface, 'modifyNodeInterface': modify_node_interface, 'listNodeInterfaces': list_node_interfaces, + 'getNodePreferredInterface': get_node_preferred_interface, 'setNodePreferredInterface': set_node_preferred_interface }) diff --git a/drivers/linstorvhdutil.py b/drivers/linstorvhdutil.py index 13e1bb08c..046c96952 100644 --- a/drivers/linstorvhdutil.py +++ b/drivers/linstorvhdutil.py @@ -21,6 +21,7 @@ import errno import json import socket +import time import util import vhdutil import xs_errors @@ -46,6 +47,16 @@ def call_remote_method(session, host_ref, method, device_path, args): return response +def check_ex(path, ignoreMissingFooter = False, fast = False): + cmd = [vhdutil.VHD_UTIL, "check", vhdutil.OPT_LOG_ERR, "-n", path] + if ignoreMissingFooter: + cmd.append("-i") + if fast: + cmd.append("-B") + + vhdutil.ioretry(cmd) + + class LinstorCallException(util.SMException): def __init__(self, cmd_err): self.cmd_err = cmd_err @@ -138,6 +149,44 @@ def __init__(self, session, linstor): self._session = session self._linstor = linstor + def create_chain_paths(self, vdi_uuid, readonly=False): + # OPTIMIZE: Add a limit_to_first_allocated_block param to limit vhdutil calls. + # Useful for the snapshot code algorithm. + + leaf_vdi_path = self._linstor.get_device_path(vdi_uuid) + path = leaf_vdi_path + while True: + if not util.pathexists(path): + raise xs_errors.XenError( + 'VDIUnavailable', opterr='Could not find: {}'.format(path) + ) + + # Diskless path can be created on the fly, ensure we can open it. + def check_volume_usable(): + while True: + try: + with open(path, 'r' if readonly else 'r+'): + pass + except IOError as e: + if e.errno == errno.ENODATA: + time.sleep(2) + continue + if e.errno == errno.EROFS: + util.SMlog('Volume not attachable because RO. Openers: {}'.format( + self._linstor.get_volume_openers(vdi_uuid) + )) + raise + break + util.retry(check_volume_usable, 15, 2) + + vdi_uuid = self.get_vhd_info(vdi_uuid).parentUuid + if not vdi_uuid: + break + path = self._linstor.get_device_path(vdi_uuid) + readonly = True # Non-leaf is always readonly. + + return leaf_vdi_path + # -------------------------------------------------------------------------- # Getters: read locally and try on another host in case of failure. # -------------------------------------------------------------------------- @@ -147,9 +196,14 @@ def check(self, vdi_uuid, ignore_missing_footer=False, fast=False): 'ignoreMissingFooter': ignore_missing_footer, 'fast': fast } - return self._check(vdi_uuid, **kwargs) # pylint: disable = E1123 + try: + self._check(vdi_uuid, **kwargs) # pylint: disable = E1123 + return True + except Exception as e: + util.SMlog('Call to `check` failed: {}'.format(e)) + return False - @linstorhostcall(vhdutil.check, 'check') + @linstorhostcall(check_ex, 'check') def _check(self, vdi_uuid, response): return distutils.util.strtobool(response) @@ -322,7 +376,7 @@ def force_parent(self, path, parentPath, parentRaw=False): @linstormodifier() def force_coalesce(self, path): - return int(self._call_method(vhdutil.coalesce, 'coalesce', path, use_parent=True)) + return self._call_method(vhdutil.coalesce, 'coalesce', path, use_parent=True) @linstormodifier() def force_repair(self, path): diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py index dbca3b412..a470dfecc 100755 --- a/drivers/linstorvolumemanager.py +++ b/drivers/linstorvolumemanager.py @@ -18,7 +18,6 @@ import distutils.util import errno -import glob import json import linstor import os.path @@ -273,7 +272,8 @@ class LinstorVolumeManagerError(Exception): ERR_GENERIC = 0, ERR_VOLUME_EXISTS = 1, ERR_VOLUME_NOT_EXISTS = 2, - ERR_VOLUME_DESTROY = 3 + ERR_VOLUME_DESTROY = 3, + ERR_GROUP_NOT_EXISTS = 4 def __init__(self, message, code=ERR_GENERIC): super(LinstorVolumeManagerError, self).__init__(message) @@ -298,11 +298,9 @@ class LinstorVolumeManager(object): """ __slots__ = ( - '_linstor', '_logger', - '_uri', '_base_group_name', - '_redundancy', '_group_name', - '_volumes', '_storage_pools', - '_storage_pools_time', + '_linstor', '_logger', '_redundancy', + '_base_group_name', '_group_name', '_ha_group_name', + '_volumes', '_storage_pools', '_storage_pools_time', '_kv_cache', '_resource_cache', '_volume_info_cache', '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty' ) @@ -348,6 +346,7 @@ class LinstorVolumeManager(object): # A LINSTOR (resource, group, ...) name cannot start with a number. # So we add a prefix behind our SR/VOLUME uuids. PREFIX_SR = 'xcp-sr-' + PREFIX_HA = 'xcp-ha-' PREFIX_VOLUME = 'xcp-volume-' # Limit request number when storage pool info is asked, we fetch @@ -406,8 +405,7 @@ def __init__( # Ensure group exists. group_name = self._build_group_name(group_name) - groups = self._linstor.resource_group_list_raise([group_name]) - groups = groups.resource_groups + groups = self._linstor.resource_group_list_raise([group_name]).resource_groups if not groups: raise LinstorVolumeManagerError( 'Unable to find `{}` Linstor SR'.format(group_name) @@ -417,6 +415,7 @@ def __init__( self._logger = logger self._redundancy = groups[0].select_filter.place_count self._group_name = group_name + self._ha_group_name = self._build_ha_group_name(self._base_group_name) self._volumes = set() self._storage_pools_time = 0 @@ -617,7 +616,12 @@ def check_volume_exists(self, volume_uuid): return volume_uuid in self._volumes def create_volume( - self, volume_uuid, size, persistent=True, volume_name=None + self, + volume_uuid, + size, + persistent=True, + volume_name=None, + high_availability=False ): """ Create a new volume on the SR. @@ -627,6 +631,8 @@ def create_volume( on the next constructor call LinstorSR(...). :param str volume_name: If set, this name is used in the LINSTOR database instead of a generated name. + :param bool high_availability: If set, the volume is created in + the HA group. :return: The current device path of the volume. :rtype: str """ @@ -635,7 +641,11 @@ def create_volume( if not volume_name: volume_name = self.build_volume_name(util.gen_uuid()) volume_properties = self._create_volume_with_properties( - volume_uuid, volume_name, size, place_resources=True + volume_uuid, + volume_name, + size, + True, # place_resources + high_availability ) # Volume created! Now try to find the device path. @@ -651,7 +661,7 @@ def create_volume( 'LINSTOR volume {} created!'.format(volume_uuid) ) return device_path - except Exception as e: + except Exception: # There is an issue to find the path. # At this point the volume has just been created, so force flag can be used. self._destroy_volume(volume_uuid, force=True) @@ -802,6 +812,13 @@ def remove_volume_if_diskless(self, volume_uuid): volume_name = volume_properties.get(self.PROP_VOLUME_NAME) node_name = socket.gethostname() + + for resource in self._get_resource_cache().resources: + if resource.name == volume_name and resource.node_name == node_name: + if linstor.consts.FLAG_TIE_BREAKER in resource.flags: + return + break + result = self._linstor.resource_delete_if_diskless( node_name=node_name, rsc_name=volume_name ) @@ -1351,14 +1368,29 @@ def destroy(self): # 4.4. Refresh linstor connection. # Without we get this error: - # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.." + # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.." # Because the deletion of the databse was not seen by Linstor for some reason. # It seems a simple refresh of the Linstor connection make it aware of the deletion. self._linstor.disconnect() self._linstor.connect() - # 4.5. Destroy group and storage pools. + # 4.5. Destroy remaining drbd nodes on hosts. + # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups. + # It needs to be done locally by each host so we go through the linstor-manager plugin. + # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with: + # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it." + session = util.timeout_call(5, util.get_localAPI_session) + for host_ref in session.xenapi.host.get_all(): + try: + response = session.xenapi.host.call_plugin( + host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name} + ) + except Exception as e: + util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e)) + + # 4.6. Destroy group and storage pools. self._destroy_resource_group(self._linstor, self._group_name) + self._destroy_resource_group(self._linstor, self._ha_group_name) for pool in self._get_storage_pools(force=True): self._destroy_storage_pool( self._linstor, pool.name, pool.node_name @@ -1369,8 +1401,9 @@ def destroy(self): try: self._start_controller(start=False) - for file in glob.glob(DATABASE_PATH + '/'): - os.remove(file) + for file in os.listdir(DATABASE_PATH): + if file != 'lost+found': + os.remove(DATABASE_PATH + '/' + file) except Exception as e: util.SMlog( 'Ignoring failure after LINSTOR SR destruction: {}' @@ -1479,6 +1512,12 @@ def destroy_node_interface(self, node_name, name): :param str node_name: Node name of the interface to remove. :param str name: Interface to remove. """ + + if name == 'default': + raise LinstorVolumeManagerError( + 'Unable to delete the default interface of a node!' + ) + result = self._linstor.netinterface_delete(node_name, name) errors = self._filter_errors(result) if errors: @@ -1532,6 +1571,23 @@ def list_node_interfaces(self, node_name): } return interfaces + def get_node_preferred_interface(self, node_name): + """ + Get the preferred interface used by a node. + :param str node_name: Node name of the interface to get. + :rtype: str + """ + try: + nodes = self._linstor.node_list_raise([node_name]).nodes + if nodes: + properties = nodes[0].props + return properties.get('PrefNic', 'default') + return nodes + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to get preferred interface: `{}`'.format(e) + ) + def set_node_preferred_interface(self, node_name, name): """ Set the preferred interface to use on a node. @@ -1588,8 +1644,8 @@ def get_storage_pools_info(self): capacity *= 1024 storage_pools[pool.node_name].append({ - 'storage-pool-name': pool.name, - 'uuid': pool.uuid, + 'name': pool.name, + 'linstor-uuid': pool.uuid, 'free-size': size, 'capacity': capacity }) @@ -1602,16 +1658,19 @@ def get_resources_info(self): :rtype: dict(str, list) """ resources = {} - resource_list = self._linstor.resource_list_raise() + resource_list = self._get_resource_cache() + volume_names = self.get_volumes_with_name() for resource in resource_list.resources: if resource.name not in resources: - resources[resource.name] = {} + resources[resource.name] = { 'nodes': {}, 'uuid': '' } + resource_nodes = resources[resource.name]['nodes'] - resources[resource.name][resource.node_name] = { + resource_nodes[resource.node_name] = { 'volumes': [], 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags, 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags } + resource_volumes = resource_nodes[resource.node_name]['volumes'] for volume in resource.volumes: # We ignore diskless pools of the form "DfltDisklessStorPool". @@ -1630,17 +1689,17 @@ def get_resources_info(self): else: allocated_size *= 1024 - resources[resource.name][resource.node_name]['volumes'].append({ - 'storage-pool-name': volume.storage_pool_name, - 'uuid': volume.uuid, - 'number': volume.number, - 'device-path': volume.device_path, - 'usable-size': usable_size, - 'allocated-size': allocated_size - }) + resource_volumes.append({ + 'storage-pool-name': volume.storage_pool_name, + 'linstor-uuid': volume.uuid, + 'number': volume.number, + 'device-path': volume.device_path, + 'usable-size': usable_size, + 'allocated-size': allocated_size + }) for resource_state in resource_list.resource_states: - resource = resources[resource_state.rsc_name][resource_state.node_name] + resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name] resource['in-use'] = resource_state.in_use volumes = resource['volumes'] @@ -1649,6 +1708,11 @@ def get_resources_info(self): if volume: volume['disk-state'] = volume_state.disk_state + for volume_uuid, volume_name in volume_names.items(): + resource = resources.get(volume_name) + if resource: + resource['uuid'] = volume_uuid + return resources def get_database_path(self): @@ -1659,6 +1723,16 @@ def get_database_path(self): """ return self._request_database_path(self._linstor) + @classmethod + def get_all_group_names(cls, base_name): + """ + Get all group names. I.e. list of current group + HA. + :param str base_name: The SR group_name to use. + :return: List of group names. + :rtype: list + """ + return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)] + @classmethod def create_sr( cls, group_name, ips, redundancy, @@ -1744,8 +1818,8 @@ def _create_sr( driver_pool_name = group_name base_group_name = group_name group_name = cls._build_group_name(group_name) - pools = lin.storage_pool_list_raise(filter_by_stor_pools=[group_name]) - pools = pools.storage_pools + storage_pool_name = group_name + pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools if pools: existing_node_names = [pool.node_name for pool in pools] raise LinstorVolumeManagerError( @@ -1754,7 +1828,7 @@ def _create_sr( ) if lin.resource_group_list_raise( - [group_name] + cls.get_all_group_names(base_group_name) ).resource_groups: if not lin.resource_dfn_list_raise().resource_definitions: backup_path = cls._create_database_backup_path() @@ -1791,7 +1865,7 @@ def _create_sr( result = lin.storage_pool_create( node_name=node_name, - storage_pool_name=group_name, + storage_pool_name=storage_pool_name, storage_driver='LVM_THIN' if thin_provisioning else 'LVM', driver_pool_name=driver_pool_name ) @@ -1807,7 +1881,7 @@ def _create_sr( 'Volume group `{}` not found on `{}`. Ignoring...' .format(group_name, node_name) ) - cls._destroy_storage_pool(lin, group_name, node_name) + cls._destroy_storage_pool(lin, storage_pool_name, node_name) else: error_str = cls._get_error_str(result) raise LinstorVolumeManagerError( @@ -1825,49 +1899,28 @@ def _create_sr( ) ) - # 2.b. Create resource group. - rg_creation_attempt = 0 - while True: - result = lin.resource_group_create( - name=group_name, - place_count=redundancy, - storage_pool=group_name, - diskless_on_remaining=False - ) - error_str = cls._get_error_str(result) - if not error_str: - break - - errors = cls._filter_errors(result) - if cls._check_errors(errors, [linstor.consts.FAIL_EXISTS_RSC_GRP]): - rg_creation_attempt += 1 - if rg_creation_attempt < 2: - try: - cls._destroy_resource_group(lin, group_name) - except Exception as e: - error_str = 'Failed to destroy old and empty RG: {}'.format(e) - else: - continue - - raise LinstorVolumeManagerError( - 'Could not create RG `{}`: {}'.format(group_name, error_str) - ) - - # 2.c. Create volume group. - result = lin.volume_group_create(group_name) - error_str = cls._get_error_str(result) - if error_str: - raise LinstorVolumeManagerError( - 'Could not create VG `{}`: {}'.format( - group_name, error_str - ) - ) + # 2.b. Create resource groups. + ha_group_name = cls._build_ha_group_name(base_group_name) + cls._create_resource_group( + lin, + group_name, + storage_pool_name, + redundancy, + True + ) + cls._create_resource_group( + lin, + ha_group_name, + storage_pool_name, + 3, + True + ) # 3. Create the LINSTOR database volume and mount it. try: logger('Creating database volume...') volume_path = cls._create_database_volume( - lin, group_name, node_names, redundancy, auto_quorum + lin, ha_group_name, storage_pool_name, node_names, redundancy, auto_quorum ) except LinstorVolumeManagerError as e: if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: @@ -1907,6 +1960,7 @@ def _create_sr( logger('Destroying resource group and storage pools after fail...') try: cls._destroy_resource_group(lin, group_name) + cls._destroy_resource_group(lin, ha_group_name) except Exception as e2: logger('Failed to destroy resource group: {}'.format(e2)) pass @@ -1914,7 +1968,7 @@ def _create_sr( i = min(i, len(node_names) - 1) while j <= i: try: - cls._destroy_storage_pool(lin, group_name, node_names[j]) + cls._destroy_storage_pool(lin, storage_pool_name, node_names[j]) except Exception as e2: logger('Failed to destroy resource group: {}'.format(e2)) pass @@ -1952,7 +2006,7 @@ def build_device_path(cls, volume_name): def build_volume_name(cls, base_name): """ Build a volume name given a base name (i.e. a UUID). - :param str volume_name: The volume name to use. + :param str base_name: The volume name to use. :return: A valid or not device path. :rtype: str """ @@ -2031,7 +2085,7 @@ def _fetch_resource_names(self, ignore_deleted=True): resource_names = set() dfns = self._linstor.resource_dfn_list_raise().resource_definitions for dfn in dfns: - if dfn.resource_group_name == self._group_name and ( + if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and ( ignore_deleted or linstor.consts.FLAG_DELETE not in dfn.flags ): @@ -2149,27 +2203,54 @@ def _get_storage_pools(self, force=False): return self._storage_pools def _create_volume( - self, volume_uuid, volume_name, size, place_resources + self, + volume_uuid, + volume_name, + size, + place_resources, + high_availability ): size = self.round_up_volume_size(size) self._mark_resource_cache_as_dirty() + group_name = self._ha_group_name if high_availability else self._group_name def create_definition(): - self._check_volume_creation_errors( - self._linstor.resource_group_spawn( - rsc_grp_name=self._group_name, - rsc_dfn_name=volume_name, - vlm_sizes=['{}B'.format(size)], - definitions_only=True - ), - volume_uuid, - self._group_name - ) + first_attempt = True + while True: + try: + self._check_volume_creation_errors( + self._linstor.resource_group_spawn( + rsc_grp_name=group_name, + rsc_dfn_name=volume_name, + vlm_sizes=['{}B'.format(size)], + definitions_only=True + ), + volume_uuid, + self._group_name + ) + break + except LinstorVolumeManagerError as e: + if ( + not first_attempt or + not high_availability or + e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS + ): + raise + + first_attempt = False + self._create_resource_group( + self._linstor, + group_name, + self._group_name, + 3, + True + ) + self._configure_volume_peer_slots(self._linstor, volume_name) def clean(): try: - self._destroy_volume(volume_uuid, force=True) + self._destroy_volume(volume_uuid, force=True, preserve_properties=True) except Exception as e: self._logger( 'Unable to destroy volume {} after creation fail: {}' @@ -2201,7 +2282,12 @@ def create(): util.retry(create, maxretry=5) def _create_volume_with_properties( - self, volume_uuid, volume_name, size, place_resources + self, + volume_uuid, + volume_name, + size, + place_resources, + high_availability ): if self.check_volume_exists(volume_uuid): raise LinstorVolumeManagerError( @@ -2230,7 +2316,11 @@ def _create_volume_with_properties( volume_properties[self.PROP_VOLUME_NAME] = volume_name self._create_volume( - volume_uuid, volume_name, size, place_resources + volume_uuid, + volume_name, + size, + place_resources, + high_availability ) assert volume_properties.namespace == \ @@ -2331,7 +2421,7 @@ def _destroy_resource(self, resource_name, force=False): break self._destroy_resource(resource_name) - def _destroy_volume(self, volume_uuid, force=False): + def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False): volume_properties = self._get_volume_properties(volume_uuid) try: volume_name = volume_properties.get(self.PROP_VOLUME_NAME) @@ -2339,7 +2429,8 @@ def _destroy_volume(self, volume_uuid, force=False): self._destroy_resource(volume_name, force) # Assume this call is atomic. - volume_properties.clear() + if not preserve_properties: + volume_properties.clear() except Exception as e: raise LinstorVolumeManagerError( 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e) @@ -2578,7 +2669,7 @@ def _request_database_path(cls, lin, activate=False): ), None) except Exception as e: raise LinstorVolumeManagerError( - 'Unable to get resources during database creation: {}' + 'Unable to fetch database resource: {}' .format(e) ) @@ -2599,7 +2690,7 @@ def _request_database_path(cls, lin, activate=False): @classmethod def _create_database_volume( - cls, lin, group_name, node_names, redundancy, auto_quorum + cls, lin, group_name, storage_pool_name, node_names, redundancy, auto_quorum ): try: dfns = lin.resource_dfn_list_raise().resource_definitions @@ -2621,7 +2712,7 @@ def _create_database_volume( # I don't understand why but this command protect against this bug. try: pools = lin.storage_pool_list_raise( - filter_by_stor_pools=[group_name] + filter_by_stor_pools=[storage_pool_name] ) except Exception as e: raise LinstorVolumeManagerError( @@ -2630,8 +2721,8 @@ def _create_database_volume( ) # Ensure we have a correct list of storage pools. - nodes_with_pool = [pool.node_name for pool in pools.storage_pools] - assert nodes_with_pool # We must have at least one storage pool! + assert pools.storage_pools # We must have at least one storage pool! + nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools)) for node_name in nodes_with_pool: assert node_name in node_names util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool)) @@ -2663,7 +2754,7 @@ def _create_database_volume( resources.append(linstor.ResourceData( node_name=node_name, rsc_name=DATABASE_VOLUME_NAME, - storage_pool=group_name + storage_pool=storage_pool_name )) # Create diskless resources on the remaining set. for node_name in diskful_nodes[redundancy:] + diskless_nodes: @@ -2825,6 +2916,55 @@ def destroy(): # after LINSTOR database volume destruction. return util.retry(destroy, maxretry=10) + @classmethod + def _create_resource_group( + cls, + lin, + group_name, + storage_pool_name, + redundancy, + destroy_old_group + ): + rg_creation_attempt = 0 + while True: + result = lin.resource_group_create( + name=group_name, + place_count=redundancy, + storage_pool=storage_pool_name, + diskless_on_remaining=False + ) + error_str = cls._get_error_str(result) + if not error_str: + break + + errors = cls._filter_errors(result) + if destroy_old_group and cls._check_errors(errors, [ + linstor.consts.FAIL_EXISTS_RSC_GRP + ]): + rg_creation_attempt += 1 + if rg_creation_attempt < 2: + try: + cls._destroy_resource_group(lin, group_name) + except Exception as e: + error_str = 'Failed to destroy old and empty RG: {}'.format(e) + else: + continue + + raise LinstorVolumeManagerError( + 'Could not create RG `{}`: {}'.format( + group_name, error_str + ) + ) + + result = lin.volume_group_create(group_name) + error_str = cls._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Could not create VG `{}`: {}'.format( + group_name, error_str + ) + ) + @classmethod def _destroy_resource_group(cls, lin, group_name): def destroy(): @@ -2849,6 +2989,12 @@ def _build_group_name(cls, base_name): # `VG/LV`. "/" is not accepted by LINSTOR. return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_')) + # Used to store important data in a HA context, + # i.e. a replication count of 3. + @classmethod + def _build_ha_group_name(cls, base_name): + return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_')) + @classmethod def _check_volume_creation_errors(cls, result, volume_uuid, group_name): errors = cls._filter_errors(result) @@ -2861,6 +3007,13 @@ def _check_volume_creation_errors(cls, result, volume_uuid, group_name): LinstorVolumeManagerError.ERR_VOLUME_EXISTS ) + if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]): + raise LinstorVolumeManagerError( + 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist' + .format(volume_uuid, group_name), + LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS + ) + if errors: raise LinstorVolumeManagerError( 'Failed to create volume `{}` from SR `{}`: {}'.format( diff --git a/drivers/tapdisk-pause b/drivers/tapdisk-pause index 75328757b..f98257a23 100755 --- a/drivers/tapdisk-pause +++ b/drivers/tapdisk-pause @@ -30,6 +30,7 @@ import vhdutil import lvmcache try: + from linstorvhdutil import LinstorVhdUtil from linstorvolumemanager import get_controller_uri, LinstorVolumeManager LINSTOR_AVAILABLE = True except ImportError: @@ -162,11 +163,12 @@ class Tapdisk: dconf = session.xenapi.PBD.get_device_config(pbd) group_name = dconf['group-name'] - device_path = LinstorVolumeManager( + linstor = LinstorVolumeManager( get_controller_uri(), group_name, logger=util.SMlog - ).get_device_path(self.vdi_uuid) + ) + device_path = LinstorVhdUtil(session, linstor).create_chain_paths(self.vdi_uuid) if realpath != device_path: util.SMlog( diff --git a/tests/mocks/linstor/__init__.py b/mocks/linstor/__init__.py similarity index 100% rename from tests/mocks/linstor/__init__.py rename to mocks/linstor/__init__.py From 2cba6dd3ba7b0d776c7d229e0a06be21f5d5e331 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Thu, 28 Nov 2024 15:12:21 +0100 Subject: [PATCH 31/72] fix(LinstorSR): sync fork-load-daemon with http-nbd-transfer (v1.5.0) (#73) Signed-off-by: Ronan Abhamon --- scripts/fork-log-daemon | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/fork-log-daemon b/scripts/fork-log-daemon index 986de63ff..bdc95fd24 100755 --- a/scripts/fork-log-daemon +++ b/scripts/fork-log-daemon @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import os import select import signal import subprocess @@ -7,7 +8,13 @@ import sys import syslog def main(): - process = subprocess.Popen(sys.argv[1:], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + process = subprocess.Popen( + sys.argv[1:], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, + env=dict(os.environ, PYTHONUNBUFFERED='1') + ) signal.signal(signal.SIGTERM, signal.SIG_IGN) write_to_stdout = True From cb10d3d8d7b394fea9b96ecc7bcfc71043c283c8 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Thu, 10 Apr 2025 10:39:54 +0200 Subject: [PATCH 32/72] fix(LinstorSR): simplify _kick_gc code using systemd service Signed-off-by: Ronan Abhamon --- drivers/LinstorSR.py | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index 829c48f8d..1ff3d963e 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -31,7 +31,7 @@ LINSTOR_AVAILABLE = False -from lock import Lock, LOCK_TYPE_GC_RUNNING +from lock import Lock import blktap2 import cleanup import distutils @@ -1516,28 +1516,8 @@ def _ensure_space_available(self, amount_needed): raise xs_errors.XenError('SRNoSpace') def _kick_gc(self): - # Don't bother if an instance already running. This is just an - # optimization to reduce the overhead of forking a new process if we - # don't have to, but the process will check the lock anyways. - lock = Lock(LOCK_TYPE_GC_RUNNING, self.uuid) - if not lock.acquireNoblock(): - if not cleanup.should_preempt(self.session, self.uuid): - util.SMlog('A GC instance already running, not kicking') - return - - util.SMlog('Aborting currently-running coalesce of garbage VDI') - try: - if not cleanup.abort(self.uuid, soft=True): - util.SMlog('The GC has already been scheduled to re-start') - except util.CommandException as e: - if e.code != errno.ETIMEDOUT: - raise - util.SMlog('Failed to abort the GC') - else: - lock.release() - util.SMlog('Kicking GC') - cleanup.gc(self.session, self.uuid, True) + cleanup.start_gc_service(self.uuid) # ============================================================================== # LinstorSr VDI From 3049aee5c4fe506b812468097196d53c77d58837 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Thu, 10 Apr 2025 10:44:28 +0200 Subject: [PATCH 33/72] fix(LinstorSR): imitate the CA-400106 change Signed-off-by: Ronan Abhamon --- drivers/LinstorSR.py | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index 1ff3d963e..d5d807d10 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -2319,6 +2319,7 @@ def _do_snapshot( try: return self._snapshot(snap_type, cbtlog, consistency_state) finally: + self.disable_leaf_on_secondary(vdi_uuid, secondary=secondary) blktap2.VDI.tap_unpause(self.session, sr_uuid, vdi_uuid, secondary) def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None): From ef0f3ca188e2a3e4fae3f7184cdcde5f9b59c02b Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Wed, 11 Dec 2024 11:14:27 +0100 Subject: [PATCH 34/72] fix(linstorvhdutil): coalesce helper returns the sector count now Without this change we have an error in cleanup.py that interrupts the coalesce algorithm. Signed-off-by: Ronan Abhamon --- drivers/linstor-manager | 3 +-- drivers/linstorvhdutil.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/linstor-manager b/drivers/linstor-manager index 47c434a3f..8ee6f149a 100755 --- a/drivers/linstor-manager +++ b/drivers/linstor-manager @@ -539,8 +539,7 @@ def set_parent(session, args): def coalesce(session, args): try: device_path = args['devicePath'] - vhdutil.coalesce(device_path) - return '' + return str(vhdutil.coalesce(device_path)) except Exception as e: util.SMlog('linstor-manager:coalesce error: {}'.format(e)) raise diff --git a/drivers/linstorvhdutil.py b/drivers/linstorvhdutil.py index 046c96952..b3df004a1 100644 --- a/drivers/linstorvhdutil.py +++ b/drivers/linstorvhdutil.py @@ -376,7 +376,7 @@ def force_parent(self, path, parentPath, parentRaw=False): @linstormodifier() def force_coalesce(self, path): - return self._call_method(vhdutil.coalesce, 'coalesce', path, use_parent=True) + return int(self._call_method(vhdutil.coalesce, 'coalesce', path, use_parent=True)) @linstormodifier() def force_repair(self, path): From cf63b11d78f6b52a22e4d08f5b0b2a19b9978007 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Wed, 16 Oct 2024 15:02:30 +0200 Subject: [PATCH 35/72] Prevent wrong mypy error regarding `_linstor` member not set Signed-off-by: Ronan Abhamon --- drivers/LinstorSR.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index d5d807d10..c65533f87 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -613,16 +613,16 @@ def create(self, uuid, size): logger=util.SMlog ) self._vhdutil = LinstorVhdUtil(self.session, self._linstor) - except Exception as e: - util.SMlog('Failed to create LINSTOR SR: {}'.format(e)) - raise xs_errors.XenError('LinstorSRCreate', opterr=str(e)) - try: util.SMlog( "Finishing SR creation, enable drbd-reactor on all hosts..." ) self._update_drbd_reactor_on_all_hosts(enabled=True) except Exception as e: + if not self._linstor: + util.SMlog('Failed to create LINSTOR SR: {}'.format(e)) + raise xs_errors.XenError('LinstorSRCreate', opterr=str(e)) + try: self._linstor.destroy() except Exception as e2: @@ -637,6 +637,7 @@ def delete(self, uuid): util.SMlog('LinstorSR.delete for {}'.format(self.uuid)) cleanup.gc_force(self.session, self.uuid) + assert self._linstor if self.vdis or self._linstor._volumes: raise xs_errors.XenError('SRNotEmpty') From 94f49df9ea29e9801a44ac8d615c2a9df8d162d3 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Fri, 11 Oct 2024 18:10:14 +0200 Subject: [PATCH 36/72] Fix many invalid escape sequences Signed-off-by: Ronan Abhamon --- drivers/cifutils.py | 2 +- tests/test_SMBSR.py | 2 +- tests/test_cifutils.py | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/cifutils.py b/drivers/cifutils.py index c183fe612..be82a37c1 100755 --- a/drivers/cifutils.py +++ b/drivers/cifutils.py @@ -60,7 +60,7 @@ def splitDomainAndUsername(uname): username = dom_username[1] else: raise CIFSException("A maximum of 2 tokens are expected " - "(\). {} were given." + "(\\). {} were given." .format(len(dom_username))) return username, domain diff --git a/tests/test_SMBSR.py b/tests/test_SMBSR.py index d20bcc0c6..0bb6cfb89 100644 --- a/tests/test_SMBSR.py +++ b/tests/test_SMBSR.py @@ -135,7 +135,7 @@ def test_attach_with_cifs_password( def test_attach_with_cifs_password_and_domain( self, symlink, mock_lock, makeMountPoint, mock_checkmount, mock_checklinks, mock_checkwritable): - smbsr = self.create_smbsr(username="citrix\jsmith", dconf_update={"password": "winter2019"}) + smbsr = self.create_smbsr(username="citrix\\jsmith", dconf_update={"password": "winter2019"}) mock_checkmount.return_value = False makeMountPoint.return_value = "/var/mount" smbsr.attach('asr_uuid') diff --git a/tests/test_cifutils.py b/tests/test_cifutils.py index 924aa3c62..0b8bf8356 100644 --- a/tests/test_cifutils.py +++ b/tests/test_cifutils.py @@ -44,7 +44,7 @@ def test_password_and_username_smbsr(self): self.assertEqual(domain, None) def test_password_and_username_domain(self): - junk_dconf = {"cifspassword": "123", "username": "citrix\jsmith"} + junk_dconf = {"cifspassword": "123", "username": "citrix\\jsmith"} junk_session = 123 credentials, domain = cifutils.getCIFCredentials(junk_dconf, junk_session, @@ -54,7 +54,7 @@ def test_password_and_username_domain(self): self.assertEqual(domain, "citrix") def test_password_and_username_domain_smbsr(self): - junk_dconf = {"password": "123", "username": "citrix\jsmith"} + junk_dconf = {"password": "123", "username": "citrix\\jsmith"} junk_session = 123 credentials, domain = cifutils.getCIFCredentials(junk_dconf, junk_session) @@ -90,7 +90,7 @@ def test_password_secret_and_username_smbsr(self, get_secret): @mock.patch('util.get_secret', autospec=True) def test_password_secret_and_username_also_domain(self, get_secret): junk_dconf = {"cifspassword_secret": "123", - "username": "citrix\jsmith"} + "username": "citrix\\jsmith"} junk_session = 123 get_secret.return_value = 'winter2019' credentials, domain = cifutils.getCIFCredentials(junk_dconf, @@ -104,7 +104,7 @@ def test_password_secret_and_username_also_domain(self, get_secret): @mock.patch('util.get_secret', autospec=True) def test_password_secret_and_username_also_domain_smbsr(self, get_secret): junk_dconf = {"password_secret": "123", - "username": "citrix\jsmith"} + "username": "citrix\\jsmith"} junk_session = 123 get_secret.return_value = 'winter2019' credentials, domain = cifutils.getCIFCredentials(junk_dconf, @@ -116,23 +116,23 @@ def test_password_secret_and_username_also_domain_smbsr(self, get_secret): def test_username_bad_domain(self): junk_dconf = {"cifspassword_secret": "123", - "username": "citrix\gjk\jsmith"} + "username": "citrix\\gjk\\jsmith"} junk_session = 123 with self.assertRaises(cifutils.CIFSException) as cm: cifutils.getCIFCredentials(junk_dconf, junk_session, prefix="cifs") expected_message = ("A maximum of 2 tokens are expected " - "(\). 3 were given.") + "(\\). 3 were given.") the_exception = cm.exception self.assertEqual(the_exception.errstr, expected_message) def test_username_bad_domain_smbsr(self): junk_dconf = {"password_secret": "123", - "username": "citrix\gjk\jsmith"} + "username": "citrix\\gjk\\jsmith"} junk_session = 123 with self.assertRaises(cifutils.CIFSException) as cm: cifutils.getCIFCredentials(junk_dconf, junk_session) expected_message = ("A maximum of 2 tokens are expected " - "(\). 3 were given.") + "(\\). 3 were given.") the_exception = cm.exception self.assertEqual(the_exception.errstr, expected_message) From 128cc78eeb834e0a7d32e078c7f6f1825b7b067e Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Fri, 11 Oct 2024 18:21:07 +0200 Subject: [PATCH 37/72] Fix many invalid escape sequences on regexes Signed-off-by: Ronan Abhamon --- drivers/util.py | 4 ++-- drivers/vhdutil.py | 2 +- tests/test_storage_init.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/util.py b/drivers/util.py index e5d5da0af..65b23f64b 100755 --- a/drivers/util.py +++ b/drivers/util.py @@ -1085,7 +1085,7 @@ def diskFromPartition(partition): return m.group(2) numlen = 0 # number of digit characters - m = re.match("\D+(\d+)", partition) + m = re.match(r"\D+(\d+)", partition) if m is not None: numlen = len(m.group(1)) @@ -1096,7 +1096,7 @@ def diskFromPartition(partition): # is it a mapper path? if partition.startswith("mapper"): if re.search("p[0-9]*$", partition): - numlen = len(re.match("\d+", partition[::-1]).group(0)) + 1 + numlen = len(re.match(r"\d+", partition[::-1]).group(0)) + 1 SMlog("Found mapper part, len %d" % numlen) else: numlen = 0 diff --git a/drivers/vhdutil.py b/drivers/vhdutil.py index 6a71804c7..8ac7b08da 100755 --- a/drivers/vhdutil.py +++ b/drivers/vhdutil.py @@ -206,7 +206,7 @@ def hasParent(path): cmd = [VHD_UTIL, "read", OPT_LOG_ERR, "-p", "-n", path] ret = ioretry(cmd) # pylint: disable=no-member - m = re.match(".*Disk type\s+: (\S+) hard disk.*", ret, flags=re.S) + m = re.match(r".*Disk type\s+: (\S+) hard disk.*", ret, flags=re.S) vhd_type = m.group(1) assert(vhd_type == "Differencing" or vhd_type == "Dynamic") return vhd_type == "Differencing" diff --git a/tests/test_storage_init.py b/tests/test_storage_init.py index 9b7138a4b..d91d90895 100644 --- a/tests/test_storage_init.py +++ b/tests/test_storage_init.py @@ -355,7 +355,7 @@ def _xe_command(self, args): # pragma: no cover combined_args = " ".join(sorted(args[1:])) if subcmd == "sm-list": - m = re.match("--minimal params=uuid type=(\S+)$", combined_args) + m = re.match(r"--minimal params=uuid type=(\S+)$", combined_args) if m: sm_uuid = "uuid-for-sr-type-" + m.group(1) return CmdResult(stdout=f"{sm_uuid}\n") @@ -365,7 +365,7 @@ def _xe_command(self, args): # pragma: no cover if not self.created_srs: return CmdResult() - m = re.match("--minimal params=uuid type=(\S+)$", combined_args) + m = re.match(r"--minimal params=uuid type=(\S+)$", combined_args) if m: sr_type = m.group(1) num_srs = len(self.created_srs[sr_type]) From 662bc5b0d97576b712018596454664057d33ff69 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Mon, 14 Oct 2024 17:33:49 +0200 Subject: [PATCH 38/72] Fix override of FileSR.attach The current attach method of FileSR doesn't correctly override the method of the SR class. It actually adds a "bind" parameter, which is seen as an error by analyzers like mypy. The "bind" parameter was added by this commit: "CA-371791: Fix world readable permissions on EXTSR" Signed-off-by: Ronan Abhamon --- drivers/EXTSR.py | 2 +- drivers/FileSR.py | 5 ++++- tests/test_FileSR.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/EXTSR.py b/drivers/EXTSR.py index 61e88dcdc..164816207 100755 --- a/drivers/EXTSR.py +++ b/drivers/EXTSR.py @@ -123,7 +123,7 @@ def attach(self, sr_uuid): 'LVMMount', opterr='FSCK failed on %s. Errno is %d' % (self.remotepath, inst.code)) - super(EXTSR, self).attach(sr_uuid, bind=False) + self.attach_and_bind(sr_uuid, bind=False) self.attached = True diff --git a/drivers/FileSR.py b/drivers/FileSR.py index 6ac841c90..4de359bf0 100755 --- a/drivers/FileSR.py +++ b/drivers/FileSR.py @@ -156,7 +156,10 @@ def delete(self, sr_uuid): raise xs_errors.XenError('FileSRDelete', \ opterr='error %d' % inst.code) - def attach(self, sr_uuid, bind=True): + def attach(self, sr_uuid): + self.attach_and_bind(sr_uuid) + + def attach_and_bind(self, sr_uuid, bind=True): if not self._checkmount(): try: util.ioretry(lambda: util.makedirs(self.path, mode=0o700)) diff --git a/tests/test_FileSR.py b/tests/test_FileSR.py index e755f2d8a..a82d0e5be 100644 --- a/tests/test_FileSR.py +++ b/tests/test_FileSR.py @@ -682,7 +682,7 @@ def test_attach_can_do_non_bind_mount(self, mock_chmod, mock_util_makedirs): sr.path = mount_dst sr.remotepath = mount_src - sr.attach(None, bind=False) + sr.attach_and_bind(None, bind=False) self.assertTrue(sr.attached) From f54d437c6112272324d0b6226b74fddeecfcc723 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Tue, 15 Oct 2024 14:35:40 +0200 Subject: [PATCH 39/72] Fix override of BaseISCSISR.detach The current detach method of BaseISCSISR doesn't correctly override the method of the SR class. It actually adds a "delete" parameter, which is seen as an error by analyzers like mypy. The "delete" parameter was added by this commit: "iscsi: Delete LUN on detach of RawISCSI" Signed-off-by: Ronan Abhamon --- drivers/BaseISCSI.py | 5 ++++- drivers/RawISCSISR.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/BaseISCSI.py b/drivers/BaseISCSI.py index 71e43ddc4..a52442c54 100755 --- a/drivers/BaseISCSI.py +++ b/drivers/BaseISCSI.py @@ -433,7 +433,10 @@ def attach(self, sr_uuid): realdev = os.path.realpath(os.path.join(dev_path, dev)) util.set_scheduler(os.path.basename(realdev)) - def detach(self, sr_uuid, delete=False): + def detach(self, sr_uuid): + self.detach_and_delete(sr_uuid, delete=False) + + def detach_and_delete(self, sr_uuid, delete=True): keys = [] pbdref = None try: diff --git a/drivers/RawISCSISR.py b/drivers/RawISCSISR.py index 0b17cfa9b..1df1c7a24 100644 --- a/drivers/RawISCSISR.py +++ b/drivers/RawISCSISR.py @@ -64,7 +64,7 @@ def load(self, vdi_uuid): self.managed = True def detach(self, sr_uuid): - super(RawISCSISR, self).detach(sr_uuid, True) + super(RawISCSISR, self).detach_and_delete(sr_uuid) def vdi(self, uuid): return ISCSIVDI(self, uuid) From 8e7cde1413c150feb8a539a6fe0ea9f44bb7e84a Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Tue, 15 Oct 2024 17:12:57 +0200 Subject: [PATCH 40/72] Fix override of VDI.delete in many subclasses It triggers warns in analyzers like mypy. Signed-off-by: Ronan Abhamon --- drivers/DummySR.py | 2 +- drivers/ISOSR.py | 2 +- drivers/LUNperVDI.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/DummySR.py b/drivers/DummySR.py index 44f571e50..c733fdf70 100755 --- a/drivers/DummySR.py +++ b/drivers/DummySR.py @@ -159,7 +159,7 @@ def create(self, sr_uuid, vdi_uuid, size): self.run_corner_cases_tests() return self.get_params() - def delete(self, sr_uuid, vdi_uuid): + def delete(self, sr_uuid, vdi_uuid, data_only=False): self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_ref', 'vdi_location', 'vdi_uuid']) assert(len(self.sr.srcmd.params['args']) == 0) diff --git a/drivers/ISOSR.py b/drivers/ISOSR.py index 9ca44506f..cbe0b658c 100755 --- a/drivers/ISOSR.py +++ b/drivers/ISOSR.py @@ -757,7 +757,7 @@ def create(self, sr_uuid, vdi_uuid, size): raise xs_errors.XenError('VDICreate', \ opterr='could not create file: "%s"' % self.path) - def delete(self, sr_uuid, vdi_uuid): + def delete(self, sr_uuid, vdi_uuid, data_only=False): util.SMlog("Deleting...") self.uuid = vdi_uuid diff --git a/drivers/LUNperVDI.py b/drivers/LUNperVDI.py index 306d70414..497ba6b56 100755 --- a/drivers/LUNperVDI.py +++ b/drivers/LUNperVDI.py @@ -98,7 +98,7 @@ def create(self, sr_uuid, vdi_uuid, size): return super(RAWVDI, self.sr.vdis[v['uuid']]).get_params() raise xs_errors.XenError('SRNoSpace') - def delete(self, sr_uuid, vdi_uuid): + def delete(self, sr_uuid, vdi_uuid, data_only=False): try: vdi = util._getVDI(self.sr, vdi_uuid) if not vdi['managed']: From 61f0c16009c5fed44e1b8ab3f74997eed716c95b Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Wed, 16 Oct 2024 14:52:40 +0200 Subject: [PATCH 41/72] Fix override of `VDI._do_snapshot` `cloneOp` must be present. Parameters like `snapType` must must be written in the same way between the parent class and the child class. Otherwise a linter like mypy may return an error. Signed-off-by: Ronan Abhamon --- drivers/FileSR.py | 6 +++--- drivers/LinstorSR.py | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/FileSR.py b/drivers/FileSR.py index 4de359bf0..04a6543e8 100755 --- a/drivers/FileSR.py +++ b/drivers/FileSR.py @@ -709,8 +709,8 @@ def reset_leaf(self, sr_uuid, vdi_uuid): vhdutil.killData(self.path) - def _do_snapshot(self, sr_uuid, vdi_uuid, snap_type, - _=False, secondary=None, cbtlog=None): + def _do_snapshot(self, sr_uuid, vdi_uuid, snapType, + cloneOp=False, secondary=None, cbtlog=None): # If cbt enabled, save file consistency state if cbtlog is not None: if blktap2.VDI.tap_status(self.session, vdi_uuid): @@ -728,7 +728,7 @@ def _do_snapshot(self, sr_uuid, vdi_uuid, snap_type, if not blktap2.VDI.tap_pause(self.session, sr_uuid, vdi_uuid): raise util.SMException("failed to pause VDI %s" % vdi_uuid) try: - return self._snapshot(snap_type, cbtlog, consistency_state) + return self._snapshot(snapType, cbtlog, consistency_state) finally: self.disable_leaf_on_secondary(vdi_uuid, secondary=secondary) blktap2.VDI.tap_unpause(self.session, sr_uuid, vdi_uuid, secondary) diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index c65533f87..287c15e86 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -2296,9 +2296,8 @@ def _rename(self, oldpath, newpath): volume_uuid = self._linstor.get_volume_uuid_from_device_path(oldpath) self._linstor.update_volume_name(volume_uuid, newpath) - def _do_snapshot( - self, sr_uuid, vdi_uuid, snap_type, secondary=None, cbtlog=None - ): + def _do_snapshot(self, sr_uuid, vdi_uuid, snapType, + cloneOp=False, secondary=None, cbtlog=None): # If cbt enabled, save file consistency state. if cbtlog is not None: if blktap2.VDI.tap_status(self.session, vdi_uuid): @@ -2318,7 +2317,7 @@ def _do_snapshot( if not blktap2.VDI.tap_pause(self.session, sr_uuid, vdi_uuid): raise util.SMException('Failed to pause VDI {}'.format(vdi_uuid)) try: - return self._snapshot(snap_type, cbtlog, consistency_state) + return self._snapshot(snapType, cbtlog, consistency_state) finally: self.disable_leaf_on_secondary(vdi_uuid, secondary=secondary) blktap2.VDI.tap_unpause(self.session, sr_uuid, vdi_uuid, secondary) From 31c58a56cd13c2bbb7ae8530465054988c40392c Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Tue, 22 Oct 2024 14:33:13 +0200 Subject: [PATCH 42/72] Fix override of VDI.load in LVHDVDI cleanup.py It triggers warns in analyzers like mypy. Signed-off-by: Ronan Abhamon --- drivers/cleanup.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/cleanup.py b/drivers/cleanup.py index 577ff4bb2..b9c3f5b42 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -1222,20 +1222,22 @@ class LVHDVDI(VDI): JRN_ZERO = "zero" # journal entry type for zeroing out end of parent DRIVER_NAME_RAW = "aio" - def load(self, vdiInfo): + def load(self, info=None): + # `info` is always set. `None` default value is only here to match parent method. + assert info, "No info given to LVHDVDI.load" self.parent = None self.children = [] self._sizeVHD = -1 self._sizeAllocated = -1 - self.scanError = vdiInfo.scanError - self.sizeLV = vdiInfo.sizeLV - self.sizeVirt = vdiInfo.sizeVirt - self.fileName = vdiInfo.lvName - self.lvActive = vdiInfo.lvActive - self.lvOpen = vdiInfo.lvOpen - self.lvReadonly = vdiInfo.lvReadonly - self.hidden = vdiInfo.hidden - self.parentUuid = vdiInfo.parentUuid + self.scanError = info.scanError + self.sizeLV = info.sizeLV + self.sizeVirt = info.sizeVirt + self.fileName = info.lvName + self.lvActive = info.lvActive + self.lvOpen = info.lvOpen + self.lvReadonly = info.lvReadonly + self.hidden = info.hidden + self.parentUuid = info.parentUuid self.path = os.path.join(self.sr.path, self.fileName) @staticmethod From cc3e6c5ebe7f804be445ccdbdce19f1ce95c09db Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Wed, 16 Oct 2024 14:48:38 +0200 Subject: [PATCH 43/72] Use a specific var for NFS options in ISOSR.attach Prevent mypy errors when a variable type is changed dynamically from list to string. Signed-off-by: Ronan Abhamon --- drivers/ISOSR.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/ISOSR.py b/drivers/ISOSR.py index cbe0b658c..64f99b480 100755 --- a/drivers/ISOSR.py +++ b/drivers/ISOSR.py @@ -335,14 +335,15 @@ def attach(self, sr_uuid): util.makedirs(self.mountpoint) mountcmd = [] - options = '' + options = [] + nfs_options = '' if 'options' in self.dconf: options = self.dconf['options'].split(' ') if protocol == 'cifs': options = [x for x in options if x != ""] else: - options = self.getNFSOptions(options) + nfs_options = self.getNFSOptions(options) # SMB options are passed differently for create via # XC/xe sr-create and create via xe-mount-iso-sr @@ -392,7 +393,7 @@ def attach(self, sr_uuid): io_timeout = nfs.get_nfs_timeout(self.other_config) io_retrans = nfs.get_nfs_retrans(self.other_config) nfs.soft_mount(self.mountpoint, server, path, - transport, useroptions=options, nfsversion=self.nfsversion, + transport, useroptions=nfs_options, nfsversion=self.nfsversion, timeout=io_timeout, retrans=io_retrans) else: if self.smbversion in SMB_VERSION_3: From 41e7dd418ea035feb91d5eb0eb99e41155d0815b Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Tue, 15 Oct 2024 15:15:02 +0200 Subject: [PATCH 44/72] Modernize Lock class using `staticmethod` decorator Signed-off-by: Ronan Abhamon --- drivers/lock.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/lock.py b/drivers/lock.py index ceb48fe1b..56bf9fcc0 100755 --- a/drivers/lock.py +++ b/drivers/lock.py @@ -64,6 +64,7 @@ def release(self): def held(self): raise NotImplementedError("Lock methods implemented in LockImplementation") + @staticmethod def _mknamespace(ns): if ns is None: @@ -72,7 +73,6 @@ def _mknamespace(ns): assert not ns.startswith(".") assert ns.find(os.path.sep) < 0 return ns - _mknamespace = staticmethod(_mknamespace) @staticmethod def clearAll(): @@ -82,6 +82,7 @@ def clearAll(): Lock.INSTANCES = {} Lock.BASE_INSTANCES = {} + @staticmethod def cleanup(name, ns=None): if ns: if ns in Lock.INSTANCES: @@ -97,8 +98,7 @@ def cleanup(name, ns=None): if os.path.exists(path): Lock._unlink(path) - cleanup = staticmethod(cleanup) - + @staticmethod def cleanupAll(ns=None): ns = Lock._mknamespace(ns) nspath = os.path.join(Lock.BASE_DIR, ns) @@ -112,11 +112,11 @@ def cleanupAll(ns=None): Lock._rmdir(nspath) - cleanupAll = staticmethod(cleanupAll) # # Lock and attribute file management # + @staticmethod def _mkdirs(path): """Concurrent makedirs() catching EEXIST.""" if os.path.exists(path): @@ -126,8 +126,8 @@ def _mkdirs(path): except OSError as e: if e.errno != errno.EEXIST: raise LockException("Failed to makedirs(%s)" % path) - _mkdirs = staticmethod(_mkdirs) + @staticmethod def _unlink(path): """Non-raising unlink().""" util.SMlog("lock: unlinking lock file %s" % path) @@ -135,8 +135,8 @@ def _unlink(path): os.unlink(path) except Exception as e: util.SMlog("Failed to unlink(%s): %s" % (path, e)) - _unlink = staticmethod(_unlink) + @staticmethod def _rmdir(path): """Non-raising rmdir().""" util.SMlog("lock: removing lock dir %s" % path) @@ -144,7 +144,6 @@ def _rmdir(path): os.rmdir(path) except Exception as e: util.SMlog("Failed to rmdir(%s): %s" % (path, e)) - _rmdir = staticmethod(_rmdir) class LockImplementation(object): From 7d171ed393c4b8588c2ab980f3ab5d575105381d Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Tue, 15 Oct 2024 15:33:11 +0200 Subject: [PATCH 45/72] Modernize GC using `staticmethod` decorator Signed-off-by: Ronan Abhamon --- drivers/cleanup.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/cleanup.py b/drivers/cleanup.py index b9c3f5b42..7129a2082 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -123,10 +123,11 @@ class Util: PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} + @staticmethod def log(text): util.SMlog(text, ident="SMGC") - log = staticmethod(log) + @staticmethod def logException(tag): info = sys.exc_info() if info[0] == SystemExit: @@ -140,8 +141,8 @@ def logException(tag): Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) Util.log(tb) Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") - logException = staticmethod(logException) + @staticmethod def doexec(args, expectedRC, inputtext=None, ret=None, log=True): "Execute a subprocess, then return its return code, stdout, stderr" proc = subprocess.Popen(args, @@ -170,8 +171,8 @@ def doexec(args, expectedRC, inputtext=None, ret=None, log=True): if ret == Util.RET_STDERR: return stderr return stdout - doexec = staticmethod(doexec) + @staticmethod def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): """execute func in a separate thread and kill it if abortTest signals so""" @@ -222,23 +223,23 @@ def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): resultFlag.set("failure") Util.logException("This exception has occured") os._exit(0) - runAbortable = staticmethod(runAbortable) + @staticmethod def num2str(number): for prefix in ("G", "M", "K"): if number >= Util.PREFIX[prefix]: return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) return "%s" % number - num2str = staticmethod(num2str) + @staticmethod def numBits(val): count = 0 while val: count += val & 1 val = val >> 1 return count - numBits = staticmethod(numBits) + @staticmethod def countBits(bitmap1, bitmap2): """return bit count in the bitmap produced by ORing the two bitmaps""" len1 = len(bitmap1) @@ -260,14 +261,13 @@ def countBits(bitmap1, bitmap2): val = bitmapLong[i] count += Util.numBits(val) return count - countBits = staticmethod(countBits) + @staticmethod def getThisScript(): thisScript = util.get_real_path(__file__) if thisScript.endswith(".pyc"): thisScript = thisScript[:-1] return thisScript - getThisScript = staticmethod(getThisScript) ################################################################################ @@ -293,11 +293,11 @@ class XAPI: class LookupError(util.SMException): pass + @staticmethod def getSession(): session = XenAPI.xapi_local() session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') return session - getSession = staticmethod(getSession) def __init__(self, session, srUuid): self.sessionPrivate = False @@ -853,6 +853,7 @@ def _runTapdiskDiff(self): Util.doexec(cmd, 0) return True + @staticmethod def _reportCoalesceError(vdi, ce): """Reports a coalesce error to XenCenter. @@ -905,12 +906,12 @@ def _reportCoalesceError(vdi, ce): str(now.strftime('%s'))) if xcmsg: xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) - _reportCoalesceError = staticmethod(_reportCoalesceError) def coalesce(self): # size is returned in sectors return vhdutil.coalesce(self.path) * 512 + @staticmethod def _doCoalesceVHD(vdi): try: startTime = time.time() @@ -930,7 +931,6 @@ def _doCoalesceVHD(vdi): raise ce except: raise - _doCoalesceVHD = staticmethod(_doCoalesceVHD) def _vdi_is_raw(self, vdi_path): """ @@ -1828,6 +1828,7 @@ def _getTreeStr(self, vdi, indent=8): KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" + @staticmethod def getInstance(uuid, xapiSession, createLock=True, force=False): xapi = XAPI(xapiSession, uuid) type = normalizeType(xapi.srRecord["type"]) @@ -1838,7 +1839,6 @@ def getInstance(uuid, xapiSession, createLock=True, force=False): elif type == SR.TYPE_LINSTOR: return LinstorSR(uuid, xapi, createLock, force) raise util.SMException("SR type %s not recognized" % type) - getInstance = staticmethod(getInstance) def __init__(self, uuid, xapi, createLock, force): self.logFilter = self.LogFilter(self) From 42588bcbd8bcb5b161876822b8767a9f6c31b1e3 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Tue, 15 Oct 2024 15:38:33 +0200 Subject: [PATCH 46/72] Modernize RefCounter using `staticmethod` decorator Signed-off-by: Ronan Abhamon --- drivers/refcounter.py | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/refcounter.py b/drivers/refcounter.py index 5418c858a..20585da82 100644 --- a/drivers/refcounter.py +++ b/drivers/refcounter.py @@ -39,6 +39,7 @@ class RefCounter: BASE_DIR = "/var/run/sm/refcount" + @staticmethod def get(obj, binary, ns=None): """Get (inc ref count) 'obj' in namespace 'ns' (optional). Returns new ref count""" @@ -46,8 +47,8 @@ def get(obj, binary, ns=None): return RefCounter._adjust(ns, obj, 0, 1) else: return RefCounter._adjust(ns, obj, 1, 0) - get = staticmethod(get) + @staticmethod def put(obj, binary, ns=None): """Put (dec ref count) 'obj' in namespace 'ns' (optional). If ref count was zero already, this operation is a no-op. @@ -56,8 +57,8 @@ def put(obj, binary, ns=None): return RefCounter._adjust(ns, obj, 0, -1) else: return RefCounter._adjust(ns, obj, -1, 0) - put = staticmethod(put) + @staticmethod def set(obj, count, binaryCount, ns=None): """Set normal & binary counts explicitly to the specified values. Returns new ref count""" @@ -66,14 +67,14 @@ def set(obj, count, binaryCount, ns=None): if binaryCount > 1: raise RefCounterException("Binary count = %d > 1" % binaryCount) RefCounter._set(ns, obj, count, binaryCount) - set = staticmethod(set) + @staticmethod def check(obj, ns=None): """Get the ref count values for 'obj' in namespace 'ns' (optional)""" (obj, ns) = RefCounter._getSafeNames(obj, ns) return RefCounter._get(ns, obj) - check = staticmethod(check) + @staticmethod def checkLocked(obj, ns): """Lock-protected access""" lock = Lock(obj, ns) @@ -82,13 +83,13 @@ def checkLocked(obj, ns): return RefCounter.check(obj, ns) finally: lock.release() - checkLocked = staticmethod(checkLocked) + @staticmethod def reset(obj, ns=None): """Reset ref counts for 'obj' in namespace 'ns' (optional) to 0.""" RefCounter.resetAll(ns, obj) - reset = staticmethod(reset) + @staticmethod def resetAll(ns=None, obj=None): """Reset ref counts of 'obj' in namespace 'ns' to 0. If obj is not provided, reset all existing objects in 'ns' to 0. If neither obj nor @@ -106,8 +107,8 @@ def resetAll(ns=None, obj=None): raise RefCounterException("failed to get namespace list") for ns in nsList: RefCounter._reset(ns, obj) - resetAll = staticmethod(resetAll) + @staticmethod def _adjust(ns, obj, delta, binaryDelta): """Add 'delta' to the normal refcount and 'binaryDelta' to the binary refcount of 'obj' in namespace 'ns'. @@ -133,8 +134,8 @@ def _adjust(ns, obj, delta, binaryDelta): newCount, newBinaryCount)) RefCounter._set(ns, obj, newCount, newBinaryCount) return newCount + newBinaryCount - _adjust = staticmethod(_adjust) + @staticmethod def _get(ns, obj): """Get the ref count values for 'obj' in namespace 'ns'""" objFile = os.path.join(RefCounter.BASE_DIR, ns, obj) @@ -142,8 +143,8 @@ def _get(ns, obj): if util.pathexists(objFile): (count, binaryCount) = RefCounter._readCount(objFile) return (count, binaryCount) - _get = staticmethod(_get) + @staticmethod def _set(ns, obj, count, binaryCount): """Set the ref count values for 'obj' in namespace 'ns'""" util.SMlog("Refcount for %s:%s set => (%d, %db)" % \ @@ -156,8 +157,7 @@ def _set(ns, obj, count, binaryCount): while not RefCounter._writeCount(objFile, count, binaryCount): RefCounter._createNamespace(ns) - _set = staticmethod(_set) - + @staticmethod def _getSafeNames(obj, ns): """Get a name that can be used as a file name""" if not ns: @@ -167,8 +167,8 @@ def _getSafeNames(obj, ns): for char in ['/', '*', '?', '\\']: obj = obj.replace(char, "_") return (obj, ns) - _getSafeNames = staticmethod(_getSafeNames) + @staticmethod def _createNamespace(ns): nsDir = os.path.join(RefCounter.BASE_DIR, ns) try: @@ -177,8 +177,8 @@ def _createNamespace(ns): if e.errno != errno.EEXIST: raise RefCounterException("failed to makedirs '%s' (%s)" % \ (nsDir, e)) - _createNamespace = staticmethod(_createNamespace) + @staticmethod def _removeObject(ns, obj): nsDir = os.path.join(RefCounter.BASE_DIR, ns) objFile = os.path.join(nsDir, obj) @@ -199,8 +199,8 @@ def _removeObject(ns, obj): pass else: raise RefCounterException("failed to remove '%s'" % nsDir) - _removeObject = staticmethod(_removeObject) + @staticmethod def _reset(ns, obj=None): nsDir = os.path.join(RefCounter.BASE_DIR, ns) if not util.pathexists(nsDir): @@ -216,8 +216,8 @@ def _reset(ns, obj=None): raise RefCounterException("failed to list '%s'" % ns) for obj in objList: RefCounter._removeObject(ns, obj) - _reset = staticmethod(_reset) + @staticmethod def _readCount(fn): try: f = open(fn, 'r') @@ -229,8 +229,8 @@ def _readCount(fn): except IOError: raise RefCounterException("failed to read file '%s'" % fn) return (count, binaryCount) - _readCount = staticmethod(_readCount) + @staticmethod def _writeCount(fn, count, binaryCount): try: f = open(fn, 'w') @@ -243,8 +243,8 @@ def _writeCount(fn, count, binaryCount): return False raise RefCounterException("failed to write '(%d %d)' to '%s': %s" \ % (count, binaryCount, fn, e)) - _writeCount = staticmethod(_writeCount) + @staticmethod def _runTests(): "Unit tests" @@ -535,7 +535,6 @@ def _runTests(): RefCounter.resetAll() return 0 - _runTests = staticmethod(_runTests) if __name__ == '__main__': From 9a6138c38a6482a25810f704490dc03408469fa0 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Mon, 21 Oct 2024 17:10:39 +0200 Subject: [PATCH 47/72] Simplify FakeSMBSR implementation (remove member vars in class) Signed-off-by: Ronan Abhamon --- tests/test_SMBSR.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tests/test_SMBSR.py b/tests/test_SMBSR.py index 0bb6cfb89..4cfd2733b 100644 --- a/tests/test_SMBSR.py +++ b/tests/test_SMBSR.py @@ -12,17 +12,10 @@ class FakeSMBSR(SMBSR.SMBSR): - uuid = None - sr_ref = None - mountpoint = None - linkpath = None - path = None - session = None - remoteserver = None - def __init__(self, srcmd, none): self.dconf = srcmd.dconf self.srcmd = srcmd + self.session = None self.uuid = 'auuid' self.sr_ref = 'asr_ref' self.mountpoint = 'aMountpoint' From 87225e03389b6ea04e8e13a8905dc4414cccb662 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Mon, 21 Oct 2024 17:49:20 +0200 Subject: [PATCH 48/72] Use `for session` instead of `for e` Avoid mypy error: ``` error: Assignment to variable "e" outside except: block [misc] ``` Signed-off-by: Ronan Abhamon --- drivers/iscsilib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iscsilib.py b/drivers/iscsilib.py index e77d17d26..e54de1bcd 100644 --- a/drivers/iscsilib.py +++ b/drivers/iscsilib.py @@ -535,8 +535,8 @@ def _checkAnyTGT(): except Exception as e: util.SMlog("%s failed with %s" % (cmd, e.args)) stdout = "" - for e in filter(match_session, stdout.split('\n')): - iqn = e.split()[-1] + for session in filter(match_session, stdout.split('\n')): + iqn = session.split()[-1] if not iqn in rootIQNs: return True return False From c4cd2b0a8f8338339dc8012fddf3becc4441a2ce Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Wed, 16 Oct 2024 14:44:19 +0200 Subject: [PATCH 49/72] Fix util.SRtoXML calls in many drivers Without this change, mypy triggers an error `var-annoted`: `Need type annotation for "sr_dict"` Signed-off-by: Ronan Abhamon --- drivers/CephFSSR.py | 3 +-- drivers/GlusterFSSR.py | 3 +-- drivers/MooseFSSR.py | 3 +-- drivers/SMBSR.py | 5 +---- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/CephFSSR.py b/drivers/CephFSSR.py index f7c263364..ca79a6a31 100644 --- a/drivers/CephFSSR.py +++ b/drivers/CephFSSR.py @@ -197,8 +197,7 @@ def probe(self): except (util.CommandException, xs_errors.XenError): raise # Create a dictionary from the SR uuids to feed SRtoXML() - sr_dict = {sr_uuid: {} for sr_uuid in sr_list} - return util.SRtoXML(sr_dict) + return util.SRtoXML({sr_uuid: {} for sr_uuid in sr_list}) def detach(self, sr_uuid): if not self.checkmount(): diff --git a/drivers/GlusterFSSR.py b/drivers/GlusterFSSR.py index 42e5ab52d..8adfe7001 100644 --- a/drivers/GlusterFSSR.py +++ b/drivers/GlusterFSSR.py @@ -177,8 +177,7 @@ def probe(self): except (util.CommandException, xs_errors.XenError): raise # Create a dictionary from the SR uuids to feed SRtoXML() - sr_dict = {sr_uuid: {} for sr_uuid in sr_list} - return util.SRtoXML(sr_dict) + return util.SRtoXML({sr_uuid: {} for sr_uuid in sr_list}) def detach(self, sr_uuid): if not self.checkmount(): diff --git a/drivers/MooseFSSR.py b/drivers/MooseFSSR.py index 3911b0965..6f86054fd 100755 --- a/drivers/MooseFSSR.py +++ b/drivers/MooseFSSR.py @@ -195,8 +195,7 @@ def probe(self): except (util.CommandException, xs_errors.XenError): raise # Create a dictionary from the SR uuids to feed SRtoXML() - sr_dict = {sr_uuid: {} for sr_uuid in sr_list} - return util.SRtoXML(sr_dict) + return util.SRtoXML({sr_uuid: {} for sr_uuid in sr_list}) def detach(self, sr_uuid): if not self.checkmount(): diff --git a/drivers/SMBSR.py b/drivers/SMBSR.py index aa9bda381..962060cd9 100755 --- a/drivers/SMBSR.py +++ b/drivers/SMBSR.py @@ -220,11 +220,8 @@ def probe(self): raise xs_errors.XenError(err, opterr=inst.errstr) except (util.CommandException, xs_errors.XenError): raise - # Create a dictionary from the SR uuids to feed SRtoXML() - sr_dict = {sr_uuid: {} for sr_uuid in sr_list} - - return util.SRtoXML(sr_dict) + return util.SRtoXML({sr_uuid: {} for sr_uuid in sr_list}) def detach(self, sr_uuid): """Detach the SR: Unmounts and removes the mountpoint""" From 85fe17b519c79c8efee4b0bbb7046f58ab306d2d Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Wed, 16 Oct 2024 15:10:55 +0200 Subject: [PATCH 50/72] Replace `Dict` variable with `info` in `LVHDSR` Prevent reuse of the `Dict` symbol from the `typing` module. Signed-off-by: Ronan Abhamon --- drivers/LVHDSR.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/LVHDSR.py b/drivers/LVHDSR.py index 23adf3a9c..195c6fd32 100755 --- a/drivers/LVHDSR.py +++ b/drivers/LVHDSR.py @@ -690,21 +690,21 @@ def scan(self, uuid): for vdi in vdis: vdi_uuids.add(self.session.xenapi.VDI.get_uuid(vdi)) - Dict = LVMMetadataHandler(self.mdpath, False).getMetadata()[1] + info = LVMMetadataHandler(self.mdpath, False).getMetadata()[1] - for vdi in list(Dict.keys()): - vdi_uuid = Dict[vdi][UUID_TAG] - if bool(int(Dict[vdi][IS_A_SNAPSHOT_TAG])): - if Dict[vdi][SNAPSHOT_OF_TAG] in vdiToSnaps: - vdiToSnaps[Dict[vdi][SNAPSHOT_OF_TAG]].append(vdi_uuid) + for vdi in list(info.keys()): + vdi_uuid = info[vdi][UUID_TAG] + if bool(int(info[vdi][IS_A_SNAPSHOT_TAG])): + if info[vdi][SNAPSHOT_OF_TAG] in vdiToSnaps: + vdiToSnaps[info[vdi][SNAPSHOT_OF_TAG]].append(vdi_uuid) else: - vdiToSnaps[Dict[vdi][SNAPSHOT_OF_TAG]] = [vdi_uuid] + vdiToSnaps[info[vdi][SNAPSHOT_OF_TAG]] = [vdi_uuid] if vdi_uuid not in vdi_uuids: util.SMlog("Introduce VDI %s as it is present in " \ "metadata and not in XAPI." % vdi_uuid) sm_config = {} - sm_config['vdi_type'] = Dict[vdi][VDI_TYPE_TAG] + sm_config['vdi_type'] = info[vdi][VDI_TYPE_TAG] lvname = "%s%s" % \ (lvhdutil.LV_PREFIX[sm_config['vdi_type']], vdi_uuid) self.lvActivator.activate( @@ -712,7 +712,7 @@ def scan(self, uuid): activated_lvs.add(vdi_uuid) lvPath = os.path.join(self.path, lvname) - if Dict[vdi][VDI_TYPE_TAG] == vhdutil.VDI_TYPE_RAW: + if info[vdi][VDI_TYPE_TAG] == vhdutil.VDI_TYPE_RAW: size = self.lvmCache.getSize( \ lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + \ vdi_uuid) @@ -736,31 +736,31 @@ def scan(self, uuid): vdi_ref = self.session.xenapi.VDI.db_introduce( vdi_uuid, - Dict[vdi][NAME_LABEL_TAG], - Dict[vdi][NAME_DESCRIPTION_TAG], + info[vdi][NAME_LABEL_TAG], + info[vdi][NAME_DESCRIPTION_TAG], self.sr_ref, - Dict[vdi][TYPE_TAG], + info[vdi][TYPE_TAG], False, - bool(int(Dict[vdi][READ_ONLY_TAG])), + bool(int(info[vdi][READ_ONLY_TAG])), {}, vdi_uuid, {}, sm_config) self.session.xenapi.VDI.set_managed(vdi_ref, - bool(int(Dict[vdi][MANAGED_TAG]))) + bool(int(info[vdi][MANAGED_TAG]))) self.session.xenapi.VDI.set_virtual_size(vdi_ref, str(size)) self.session.xenapi.VDI.set_physical_utilisation( \ vdi_ref, str(utilisation)) self.session.xenapi.VDI.set_is_a_snapshot( \ - vdi_ref, bool(int(Dict[vdi][IS_A_SNAPSHOT_TAG]))) - if bool(int(Dict[vdi][IS_A_SNAPSHOT_TAG])): + vdi_ref, bool(int(info[vdi][IS_A_SNAPSHOT_TAG]))) + if bool(int(info[vdi][IS_A_SNAPSHOT_TAG])): self.session.xenapi.VDI.set_snapshot_time( \ - vdi_ref, DateTime(Dict[vdi][SNAPSHOT_TIME_TAG])) - if Dict[vdi][TYPE_TAG] == 'metadata': + vdi_ref, DateTime(info[vdi][SNAPSHOT_TIME_TAG])) + if info[vdi][TYPE_TAG] == 'metadata': self.session.xenapi.VDI.set_metadata_of_pool( \ - vdi_ref, Dict[vdi][METADATA_OF_POOL_TAG]) + vdi_ref, info[vdi][METADATA_OF_POOL_TAG]) # Update CBT status of disks either just added # or already in XAPI From 1eed547f34a270048eb6eebcd321a0939cc44818 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Wed, 16 Oct 2024 14:58:18 +0200 Subject: [PATCH 51/72] Prevent mypy errors when a variable type is changed in `BaseISCSISR` Log without this change on `chappasword` and `incoming_chappassword`: ``` error: Incompatible types in assignment (expression has type "bytes", variable has type "str") [assignment] ``` Signed-off-by: Ronan Abhamon --- drivers/BaseISCSI.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/BaseISCSI.py b/drivers/BaseISCSI.py index a52442c54..750b40b02 100755 --- a/drivers/BaseISCSI.py +++ b/drivers/BaseISCSI.py @@ -175,11 +175,11 @@ def load(self, sr_uuid): and ('chappassword' in self.dconf or 'chappassword_secret' in self.dconf): self.chapuser = self.dconf['chapuser'].encode('utf-8') if 'chappassword_secret' in self.dconf: - self.chappassword = util.get_secret(self.session, self.dconf['chappassword_secret']) + chappassword = util.get_secret(self.session, self.dconf['chappassword_secret']) else: - self.chappassword = self.dconf['chappassword'] + chappassword = self.dconf['chappassword'] - self.chappassword = self.chappassword.encode('utf-8') + self.chappassword = chappassword.encode('utf-8') self.incoming_chapuser = "" self.incoming_chappassword = "" @@ -187,11 +187,11 @@ def load(self, sr_uuid): and ('incoming_chappassword' in self.dconf or 'incoming_chappassword_secret' in self.dconf): self.incoming_chapuser = self.dconf['incoming_chapuser'].encode('utf-8') if 'incoming_chappassword_secret' in self.dconf: - self.incoming_chappassword = util.get_secret(self.session, self.dconf['incoming_chappassword_secret']) + incoming_chappassword = util.get_secret(self.session, self.dconf['incoming_chappassword_secret']) else: - self.incoming_chappassword = self.dconf['incoming_chappassword'] + incoming_chappassword = self.dconf['incoming_chappassword'] - self.incoming_chappassword = self.incoming_chappassword.encode('utf-8') + self.incoming_chappassword = incoming_chappassword.encode('utf-8') self.port = DEFAULT_PORT if 'port' in self.dconf and self.dconf['port']: From cd9a35877137a17ea5af70c8584550099721bdb4 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Wed, 16 Oct 2024 15:13:52 +0200 Subject: [PATCH 52/72] Prevent bad mypy error in TestMultiLUNISCSISR using formatted-string Avoid: ``` error: Incompatible types in string interpolation (expression has type "object", placeholder has type "int | float | SupportsInt") [str-format] ``` Signed-off-by: Ronan Abhamon --- tests/test_ISCSISR.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_ISCSISR.py b/tests/test_ISCSISR.py index 39529be13..e71ac2684 100644 --- a/tests/test_ISCSISR.py +++ b/tests/test_ISCSISR.py @@ -129,7 +129,7 @@ def setUp(self): 'tpgt': 'TPGT' } self.node_records = [( - "%s:%d" % (self.node2['ip'], self.node2['port']), + f"{self.node2['ip']}:{self.node2['port']}", self.node2['tpgt'], self.node2['iqn'] )] From 04f45ba38a879738a681db07d9fd65579451743d Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Mon, 21 Oct 2024 18:44:42 +0200 Subject: [PATCH 53/72] Count correctly IQN sessions during ISCSISR attach Before this change, IQNs were concatenated into a single string when `multiSession` was used. Signed-off-by: Ronan Abhamon --- drivers/BaseISCSI.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/BaseISCSI.py b/drivers/BaseISCSI.py index 750b40b02..e60925143 100755 --- a/drivers/BaseISCSI.py +++ b/drivers/BaseISCSI.py @@ -391,10 +391,9 @@ def attach(self, sr_uuid): util._incr_iscsiSR_refcount(self.targetIQN, sr_uuid) IQNs = [] if "multiSession" in self.dconf: - IQNs = "" for iqn in self.dconf['multiSession'].split("|"): if len(iqn): - IQNs += iqn.split(',')[2] + IQNs.append(iqn.split(',')[2]) else: IQNs.append(self.targetIQN) From ea2f0b4686cd039785a753e0040ff1b3be20d5d1 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Thu, 10 Oct 2024 14:58:02 +0200 Subject: [PATCH 54/72] Use importlib instead of imp which is deprecated in python 3.4 Signed-off-by: Ronan Abhamon --- drivers/SR.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/SR.py b/drivers/SR.py index 9ca0045c5..d1aacdb33 100755 --- a/drivers/SR.py +++ b/drivers/SR.py @@ -153,7 +153,7 @@ def __init__(self, srcmd, sr_uuid): @staticmethod def from_uuid(session, sr_uuid): - import imp + import importlib.util _SR = session.xenapi.SR sr_ref = _SR.get_by_uuid(sr_uuid) @@ -169,7 +169,10 @@ def from_uuid(session, sr_uuid): driver_real = os.path.realpath(driver_path) module_name = os.path.basename(driver_path) - module = imp.load_source(module_name, driver_real) + spec = importlib.util.spec_from_file_location(module_name, driver_real) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + target = driver(sm_type) # NB. get the host pbd's device_config From b3ded469a3a826f58da076f285880489913899ea Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Fri, 11 Oct 2024 15:48:29 +0200 Subject: [PATCH 55/72] Replace deprecated calls to distutils.spawn.find_executable Signed-off-by: Ronan Abhamon --- drivers/CephFSSR.py | 3 +-- drivers/GlusterFSSR.py | 3 +-- drivers/MooseFSSR.py | 3 +-- drivers/XFSSR.py | 3 +-- drivers/ZFSSR.py | 3 +-- drivers/util.py | 4 ++++ 6 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/CephFSSR.py b/drivers/CephFSSR.py index ca79a6a31..808847674 100644 --- a/drivers/CephFSSR.py +++ b/drivers/CephFSSR.py @@ -264,8 +264,7 @@ def vdi(self, uuid, loadLocked=False): @staticmethod def _is_ceph_available(): - import distutils.spawn - return distutils.spawn.find_executable('ceph') + return util.find_executable('ceph') class CephFSFileVDI(FileSR.FileVDI): def attach(self, sr_uuid, vdi_uuid): diff --git a/drivers/GlusterFSSR.py b/drivers/GlusterFSSR.py index 8adfe7001..041a91429 100644 --- a/drivers/GlusterFSSR.py +++ b/drivers/GlusterFSSR.py @@ -244,8 +244,7 @@ def vdi(self, uuid, loadLocked=False): @staticmethod def _is_glusterfs_available(): - import distutils.spawn - return distutils.spawn.find_executable('glusterfs') + return util.find_executable('glusterfs') class GlusterFSFileVDI(FileSR.FileVDI): diff --git a/drivers/MooseFSSR.py b/drivers/MooseFSSR.py index 6f86054fd..e4dc4ad68 100755 --- a/drivers/MooseFSSR.py +++ b/drivers/MooseFSSR.py @@ -276,8 +276,7 @@ def vdi(self, uuid, loadLocked=False): @staticmethod def _is_moosefs_available(): - import distutils.spawn - return distutils.spawn.find_executable('mfsmount') + return util.find_executable('mfsmount') class MooseFSFileVDI(FileSR.FileVDI): def attach(self, sr_uuid, vdi_uuid): diff --git a/drivers/XFSSR.py b/drivers/XFSSR.py index 1dfde0956..ad4aca742 100755 --- a/drivers/XFSSR.py +++ b/drivers/XFSSR.py @@ -229,8 +229,7 @@ def vdi(self, uuid, loadLocked = False): @staticmethod def _is_xfs_available(): - import distutils.spawn - return distutils.spawn.find_executable('mkfs.xfs') + return util.find_executable('mkfs.xfs') class XFSFileVDI(FileSR.FileVDI): diff --git a/drivers/ZFSSR.py b/drivers/ZFSSR.py index 5301d5ecc..cf5eb12d6 100644 --- a/drivers/ZFSSR.py +++ b/drivers/ZFSSR.py @@ -58,8 +58,7 @@ def is_zfs_available(): - import distutils.spawn - return distutils.spawn.find_executable('zfs') and \ + return util.find_executable('zfs') and \ util.pathexists('/sys/module/zfs/initstate') diff --git a/drivers/util.py b/drivers/util.py index 65b23f64b..ccea5c52c 100755 --- a/drivers/util.py +++ b/drivers/util.py @@ -2096,3 +2096,7 @@ def gen_path(path): cProfile.runctx('function()', None, locals(), profile_path) finally: SMlog('* End profiling of {} ({}) *'.format(name, filename)) + + +def find_executable(name): + return shutil.which(name) From c9201d1746719cf255a4350bedab1e2b7bcb5045 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Fri, 11 Oct 2024 15:52:18 +0200 Subject: [PATCH 56/72] Replace deprecated calls to distutils.util.strtobool Signed-off-by: Ronan Abhamon --- drivers/LinstorSR.py | 3 +-- drivers/MooseFSSR.py | 7 ++----- drivers/linstor-manager | 15 +++++++-------- drivers/linstorvhdutil.py | 5 ++--- drivers/linstorvolumemanager.py | 5 ++--- drivers/util.py | 17 +++++++++++++++++ 6 files changed, 31 insertions(+), 21 deletions(-) diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index 287c15e86..acc6a44e1 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -34,7 +34,6 @@ from lock import Lock import blktap2 import cleanup -import distutils import errno import functools import lvutil @@ -337,7 +336,7 @@ def load(self, sr_uuid): monitor_db_quorum = self.dconf.get('monitor-db-quorum') self._monitor_db_quorum = (monitor_db_quorum is None) or \ - distutils.util.strtobool(monitor_db_quorum) + util.strtobool(monitor_db_quorum) # Note: We don't have access to the session field if the # 'vdi_attach_from_config' command is executed. diff --git a/drivers/MooseFSSR.py b/drivers/MooseFSSR.py index e4dc4ad68..8fc4a4adf 100755 --- a/drivers/MooseFSSR.py +++ b/drivers/MooseFSSR.py @@ -18,7 +18,6 @@ # # MooseFSSR: Based on CEPHFSSR and FileSR, mounts MooseFS share -import distutils.util import errno import os import syslog as _syslog @@ -113,9 +112,7 @@ def load(self, sr_uuid): self.sm_config = self.srcmd.params.get('sr_sm_config') or {} if self.srcmd.cmd != 'sr_create': - self.subdir = distutils.util.strtobool( - self.sm_config.get('subdir') or '0' - ) + self.subdir = util.strtobool(self.sm_config.get('subdir')) if self.subdir: self.remotepath = os.path.join(self.remotepath, sr_uuid) @@ -228,7 +225,7 @@ def create(self, sr_uuid, size): if self.subdir is None: self.subdir = True else: - self.subdir = distutils.util.strtobool(self.subdir) + self.subdir = util.strtobool(self.subdir) self.sm_config['subdir'] = str(self.subdir) self.session.xenapi.SR.set_sm_config(self.sr_ref, self.sm_config) diff --git a/drivers/linstor-manager b/drivers/linstor-manager index 8ee6f149a..fb2b9fe64 100755 --- a/drivers/linstor-manager +++ b/drivers/linstor-manager @@ -21,7 +21,6 @@ import sys sys.path[0] = '/opt/xensource/sm/' import base64 -import distutils.util import os import socket import XenAPI @@ -313,7 +312,7 @@ def release_sr(session, args): def update_drbd_reactor(session, args): try: - enabled = distutils.util.strtobool(args['enabled']) + enabled = util.strtobool(args['enabled']) update_drbd_reactor_service(start=enabled) return str(True) except Exception as e: @@ -389,10 +388,10 @@ def destroy(session, args): def check(session, args): try: device_path = args['devicePath'] - ignore_missing_footer = distutils.util.strtobool( + ignore_missing_footer = util.strtobool( args['ignoreMissingFooter'] ) - fast = distutils.util.strtobool(args['fast']) + fast = util.strtobool(args['fast']) check_ex(device_path, ignore_missing_footer, fast) return str(True) except Exception as e: @@ -404,7 +403,7 @@ def get_vhd_info(session, args): try: device_path = args['devicePath'] group_name = args['groupName'] - include_parent = distutils.util.strtobool(args['includeParent']) + include_parent = util.strtobool(args['includeParent']) linstor = LinstorVolumeManager( get_controller_uri(), @@ -560,7 +559,7 @@ def deflate(session, args): device_path = args['devicePath'] new_size = int(args['newSize']) old_size = int(args['oldSize']) - zeroize = distutils.util.strtobool(args['zeroize']) + zeroize = util.strtobool(args['zeroize']) group_name = args['groupName'] linstor = LinstorVolumeManager( @@ -581,7 +580,7 @@ def lock_vdi(session, args): sr_uuid = args['srUuid'] vdi_uuid = args['vdiUuid'] group_name = args['groupName'] - locked = distutils.util.strtobool(args['locked']) + locked = util.strtobool(args['locked']) # We must lock to mark the VDI. lock = Lock(vhdutil.LOCK_TYPE_SR, sr_uuid) @@ -824,7 +823,7 @@ def create_sr(session, args): elif provisioning != 'thin' and provisioning != 'thick': raise Exception('unsupported provisioning') - force = distutils.util.strtobool(args.get('force') or '0') + force = util.strtobool(args.get('force')) return exec_create_sr( session, name, description, disks, volume_group, redundancy, provisioning, force diff --git a/drivers/linstorvhdutil.py b/drivers/linstorvhdutil.py index b3df004a1..6ad4787dc 100644 --- a/drivers/linstorvhdutil.py +++ b/drivers/linstorvhdutil.py @@ -17,7 +17,6 @@ from linstorjournaler import LinstorJournaler from linstorvolumemanager import LinstorVolumeManager import base64 -import distutils.util import errno import json import socket @@ -205,7 +204,7 @@ def check(self, vdi_uuid, ignore_missing_footer=False, fast=False): @linstorhostcall(check_ex, 'check') def _check(self, vdi_uuid, response): - return distutils.util.strtobool(response) + return util.strtobool(response) def get_vhd_info(self, vdi_uuid, include_parent=True): kwargs = { @@ -233,7 +232,7 @@ def _get_vhd_info(self, vdi_uuid, response): @linstorhostcall(vhdutil.hasParent, 'hasParent') def has_parent(self, vdi_uuid, response): - return distutils.util.strtobool(response) + return util.strtobool(response) def get_parent(self, vdi_uuid): return self._get_parent(vdi_uuid, self._extract_uuid) diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py index a470dfecc..553272545 100755 --- a/drivers/linstorvolumemanager.py +++ b/drivers/linstorvolumemanager.py @@ -16,7 +16,6 @@ # -import distutils.util import errno import json import linstor @@ -183,7 +182,7 @@ def _get_controller_uri(): for host_ref, host_record in session.xenapi.host.get_all_records().items(): node_name = host_record['hostname'] try: - if distutils.util.strtobool( + if util.strtobool( session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {}) ): return 'linstor://' + host_record['address'] @@ -234,7 +233,7 @@ def get_controller_node_name(): )['live']: continue - if distutils.util.strtobool(session.xenapi.host.call_plugin( + if util.strtobool(session.xenapi.host.call_plugin( host_ref, PLUGIN, PLUGIN_CMD, {} )): return node_name diff --git a/drivers/util.py b/drivers/util.py index ccea5c52c..262c895d5 100755 --- a/drivers/util.py +++ b/drivers/util.py @@ -2098,5 +2098,22 @@ def gen_path(path): SMlog('* End profiling of {} ({}) *'.format(name, filename)) +def strtobool(str): + # Note: `distutils` package is deprecated and slated for removal in Python 3.12. + # There is not alternative for strtobool. + # See: https://peps.python.org/pep-0632/#migration-advice + # So this is a custom implementation with differences: + # - A boolean is returned instead of integer + # - Empty string and None are supported (False is returned in this case) + if not str: + return False + str = str.lower() + if str in ('y', 'yes', 't', 'true', 'on', '1'): + return True + if str in ('n', 'no', 'f', 'false', 'off', '0'): + return False + raise ValueError("invalid truth value '{}'".format(str)) + + def find_executable(name): return shutil.which(name) From eb79e93402537764ae1019752660f042a6666489 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Fri, 11 Oct 2024 15:55:02 +0200 Subject: [PATCH 57/72] Fix _locked_load calls compatibility with python 3.10 Signed-off-by: Ronan Abhamon --- drivers/LinstorSR.py | 6 ++++++ drivers/util.py | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index acc6a44e1..c302ea332 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -45,6 +45,7 @@ import SR import SRCommand import subprocess +import sys import time import traceback import util @@ -371,6 +372,9 @@ def load(self, sr_uuid): self._all_volume_info_cache = None self._all_volume_metadata_cache = None + # To remove in python 3.10. + # Use directly @staticmethod instead. + @util.conditional_decorator(staticmethod, sys.version_info >= (3, 10, 0)) def _locked_load(method): def wrapped_method(self, *args, **kwargs): self._init_status = self.INIT_STATUS_OK @@ -792,6 +796,8 @@ def is_master(self): def vdi(self, uuid): return LinstorVDI(self, uuid) + # To remove in python 3.10 + # See: https://stackoverflow.com/questions/12718187/python-version-3-9-calling-class-staticmethod-within-the-class-body _locked_load = staticmethod(_locked_load) # -------------------------------------------------------------------------- diff --git a/drivers/util.py b/drivers/util.py index 262c895d5..4053c9a3e 100755 --- a/drivers/util.py +++ b/drivers/util.py @@ -2117,3 +2117,11 @@ def strtobool(str): def find_executable(name): return shutil.which(name) + + +def conditional_decorator(decorator, condition): + def wrapper(func): + if not condition: + return func + return decorator(func) + return wrapper From 321ff7f25b04e34220e51fdf588d4d9be789189b Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Fri, 11 Oct 2024 17:51:25 +0200 Subject: [PATCH 58/72] Use static analysis tool (mypy) Signed-off-by: Ronan Abhamon --- .github/workflows/main.yml | 19 +++++++++++++++++++ Makefile | 1 + dev_requirements_static_analysis.txt | 3 +++ drivers/DummySR.py | 4 +++- drivers/FileSR.py | 4 +++- drivers/LVHDSR.py | 4 +++- drivers/blktap2.py | 19 +++++++++++-------- drivers/flock.py | 4 +++- drivers/lock.py | 6 ++++-- drivers/mpathcount.py | 6 ++++-- drivers/srmetadata.py | 5 ++++- misc/fairlock/fairlock.py | 6 ++++-- sm_typing/Makefile | 12 ++++++++++++ sm_typing/__init__.py | 2 ++ tests/test_cleanup.py | 4 +++- 15 files changed, 79 insertions(+), 20 deletions(-) create mode 100644 dev_requirements_static_analysis.txt create mode 100644 sm_typing/Makefile create mode 100644 sm_typing/__init__.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6271a21b1..d89b968c4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -3,6 +3,25 @@ name: Test SM on: [push, pull_request] jobs: + static-analysis: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3 + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r dev_requirements_static_analysis.txt + + - name: mypy + run: | + mypy . + build: runs-on: ubuntu-20.04 diff --git a/Makefile b/Makefile index d33c06059..293e5faa2 100755 --- a/Makefile +++ b/Makefile @@ -233,6 +233,7 @@ install: precheck install -m 755 scripts/stop_all_gc $(SM_STAGING)/etc/xapi.d/xapi-pre-shutdown/ $(MAKE) -C dcopy install DESTDIR=$(SM_STAGING) $(MAKE) -C linstor install DESTDIR=$(SM_STAGING) + $(MAKE) -C sm_typing install DESTDIR=$(SM_STAGING) ln -sf $(SM_DEST)blktap2.py $(SM_STAGING)$(BIN_DEST)/blktap2 ln -sf $(SM_DEST)lcache.py $(SM_STAGING)$(BIN_DEST)tapdisk-cache-stats ln -sf /dev/null $(SM_STAGING)$(UDEV_RULES_DIR)/69-dm-lvm-metad.rules diff --git a/dev_requirements_static_analysis.txt b/dev_requirements_static_analysis.txt new file mode 100644 index 000000000..8a59c8f20 --- /dev/null +++ b/dev_requirements_static_analysis.txt @@ -0,0 +1,3 @@ +bitarray +mypy +python-linstor diff --git a/drivers/DummySR.py b/drivers/DummySR.py index c733fdf70..be0298b9a 100755 --- a/drivers/DummySR.py +++ b/drivers/DummySR.py @@ -17,6 +17,8 @@ # # DummySR: an example dummy SR for the SDK +from sm_typing import List, Tuple + import SR import VDI import SRCommand @@ -28,7 +30,7 @@ "VDI_ACTIVATE", "VDI_DEACTIVATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_INTRODUCE", "VDI_MIRROR"] -CONFIGURATION = [] +CONFIGURATION: List[Tuple[str, str]] = [] DRIVER_INFO = { 'name': 'dummy', diff --git a/drivers/FileSR.py b/drivers/FileSR.py index 04a6543e8..155983fc8 100755 --- a/drivers/FileSR.py +++ b/drivers/FileSR.py @@ -17,6 +17,8 @@ # # FileSR: local-file storage repository +from sm_typing import Dict, List + import SR import VDI import SRCommand @@ -36,7 +38,7 @@ import XenAPI # pylint: disable=import-error from constants import CBTLOG_TAG -geneology = {} +geneology: Dict[str, List[str]] = {} CAPABILITIES = ["SR_PROBE", "SR_UPDATE", \ "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", \ "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", diff --git a/drivers/LVHDSR.py b/drivers/LVHDSR.py index 195c6fd32..70463b6c4 100755 --- a/drivers/LVHDSR.py +++ b/drivers/LVHDSR.py @@ -18,6 +18,8 @@ # LVHDSR: VHD on LVM storage repository # +from sm_typing import Dict, List + import SR from SR import deviceCheck import VDI @@ -54,7 +56,7 @@ from fairlock import Fairlock DEV_MAPPER_ROOT = os.path.join('/dev/mapper', lvhdutil.VG_PREFIX) -geneology = {} +geneology: Dict[str, List[str]] = {} CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "SR_TRIM", "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", "VDI_MIRROR", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "ATOMIC_PAUSE", diff --git a/drivers/blktap2.py b/drivers/blktap2.py index c900f57d8..0daf3e977 100755 --- a/drivers/blktap2.py +++ b/drivers/blktap2.py @@ -17,6 +17,9 @@ # # blktap2: blktap/tapdisk management layer # + +from sm_typing import Any, Callable, ClassVar, Dict + import grp import os import re @@ -513,7 +516,7 @@ def mkdirs(path, mode=0o777): class KObject(object): - SYSFS_CLASSTYPE = None + SYSFS_CLASSTYPE: ClassVar[str] = "" def sysfs_devname(self): raise NotImplementedError("sysfs_devname is undefined") @@ -521,7 +524,7 @@ def sysfs_devname(self): class Attribute(object): - SYSFS_NODENAME = None + SYSFS_NODENAME: ClassVar[str] = "" def __init__(self, path): self.path = path @@ -1167,7 +1170,7 @@ class Link(object): # before VDI.activate. Therefore those link steps where we # relink existing devices under deterministic path names. - BASEDIR = None + BASEDIR: ClassVar[str] = "" def _mklink(self, target): raise NotImplementedError("_mklink is not defined") @@ -2132,7 +2135,7 @@ def __str__(self): return "Uevent '%s' not handled by %s" % \ (self.event, self.handler.__class__.__name__) - ACTIONS = {} + ACTIONS: Dict[str, Callable] = {} def run(self): @@ -2228,7 +2231,7 @@ def get_size(self): class BusDevice(KObject): - SYSFS_BUSTYPE = None + SYSFS_BUSTYPE: ClassVar[str] = "" @classmethod def sysfs_bus_path(cls): @@ -2246,7 +2249,7 @@ class XenbusDevice(BusDevice): XBT_NIL = "" - XENBUS_DEVTYPE = None + XENBUS_DEVTYPE: ClassVar[str] = "" def __init__(self, domid, devid): self.domid = int(domid) @@ -2395,7 +2398,7 @@ def __init__(self, domid, devid): self._q_events = None class XenstoreValueError(Exception): - KEY = None + KEY: ClassVar[str] = "" def __init__(self, vbd, _str): self.vbd = vbd @@ -2832,7 +2835,7 @@ def usage(stream): elif _class == 'tap': - attrs = {} + attrs: Dict[str, Any] = {} for item in sys.argv[2:]: try: key, val = item.split('=') diff --git a/drivers/flock.py b/drivers/flock.py index dceb04283..2d295ec4e 100644 --- a/drivers/flock.py +++ b/drivers/flock.py @@ -23,6 +23,8 @@ got to grow our own. """ +from sm_typing import ClassVar + import os import fcntl import struct @@ -73,7 +75,7 @@ class FcntlLockBase: definition of LOCK_TYPE (fcntl.{F_RDLCK|F_WRLCK}) determines the type.""" - LOCK_TYPE = None + LOCK_TYPE: ClassVar[int] if __debug__: ERROR_ISLOCKED = "Attempt to acquire lock held." diff --git a/drivers/lock.py b/drivers/lock.py index 56bf9fcc0..2e6e2c9c0 100755 --- a/drivers/lock.py +++ b/drivers/lock.py @@ -16,6 +16,8 @@ """Serialization for concurrent operations""" +from sm_typing import Dict + import os import errno import flock @@ -37,8 +39,8 @@ class Lock(object): BASE_DIR = "/var/lock/sm" - INSTANCES = {} - BASE_INSTANCES = {} + INSTANCES: Dict[str, 'LockImplementation'] = {} + BASE_INSTANCES: Dict[str, 'LockImplementation'] = {} def __new__(cls, name, ns=None, *args, **kwargs): if ns: diff --git a/drivers/mpathcount.py b/drivers/mpathcount.py index f89e37a90..43285edc6 100755 --- a/drivers/mpathcount.py +++ b/drivers/mpathcount.py @@ -15,6 +15,8 @@ # along with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +from sm_typing import Dict + import util import os import sys @@ -267,7 +269,7 @@ def _add(key, val): mpc_exit(session, -1) try: - mpath_status = {} + mpath_status: Dict[str, str] = {} for pbd in pbds: def remove(key): session.xenapi.PBD.remove_from_other_config(pbd, key) @@ -287,7 +289,7 @@ def add(key, val): util.atomicFileWrite(MPATH_FILE_NAME, MPATHS_DIR, json.dumps(mpath_status)) os.chmod(MPATH_FILE_NAME, 0o0644) except: - util.SMlog("MPATH: Failure updating db. %s" % sys.exc_info()) + util.SMlog("MPATH: Failure updating db. %s" % str(sys.exc_info())) mpc_exit(session, -1) util.SMlog("MPATH: Update done") diff --git a/drivers/srmetadata.py b/drivers/srmetadata.py index f86711e28..c80fb6d7d 100755 --- a/drivers/srmetadata.py +++ b/drivers/srmetadata.py @@ -15,6 +15,9 @@ # # Functions to read and write SR metadata # + +from sm_typing import ClassVar + from io import SEEK_SET import util @@ -254,7 +257,7 @@ def getMetadataLength(fd): # ----------------- # General helper functions - end # ----------------- class MetadataHandler: - VDI_INFO_SIZE_IN_SECTORS = None + VDI_INFO_SIZE_IN_SECTORS: ClassVar[int] # constructor def __init__(self, path=None, write=True): diff --git a/misc/fairlock/fairlock.py b/misc/fairlock/fairlock.py index 91a6a4992..af85a797a 100644 --- a/misc/fairlock/fairlock.py +++ b/misc/fairlock/fairlock.py @@ -1,3 +1,5 @@ +from sm_typing import Any, Callable, Dict, Optional + import os import socket import inspect @@ -7,8 +9,8 @@ START_SERVICE_TIMEOUT_SECS = 2 class SingletonWithArgs(type): - _instances = {} - _init = {} + _instances: Dict[Any, Any] = {} + _init: Dict[type, Optional[Callable[..., None]]] = {} def __init__(cls, name, bases, dct): cls._init[cls] = dct.get('__init__', None) diff --git a/sm_typing/Makefile b/sm_typing/Makefile new file mode 100644 index 000000000..1e2ea815f --- /dev/null +++ b/sm_typing/Makefile @@ -0,0 +1,12 @@ +PYTHONLIBDIR = $(shell python3 -c "import sys; print(sys.path.pop())") +DESTDIR ?= + +.PHONY: install +install: + install -D -m 644 __init__.py $(DESTDIR)$(PYTHONLIBDIR)/sm_typing.py + python3 -m compileall $(DESTDIR)$(PYTHONLIBDIR)/sm_typing.py + +.PHONY: uninstall +uninstall: + rm -rf $(DESTDIR)$(PYTHONLIBDIR)/sm_typing.py + rm -rf $(DESTDIR)$(PYTHONLIBDIR)/__pycache__/sm_typing.* diff --git a/sm_typing/__init__.py b/sm_typing/__init__.py new file mode 100644 index 000000000..c515056cf --- /dev/null +++ b/sm_typing/__init__.py @@ -0,0 +1,2 @@ +import typing +from typing import * diff --git a/tests/test_cleanup.py b/tests/test_cleanup.py index a2b14d602..bbd04b15c 100644 --- a/tests/test_cleanup.py +++ b/tests/test_cleanup.py @@ -1,3 +1,5 @@ +from sm_typing import List + import errno import signal import unittest @@ -26,7 +28,7 @@ class FakeException(Exception): class FakeUtil: - record = [] + record: List[str] = [] def log(input): FakeUtil.record.append(input) From f9a2695cf0d425209a7040bebe64a66cd7eec487 Mon Sep 17 00:00:00 2001 From: Damien Thenot Date: Wed, 9 Oct 2024 15:44:05 +0200 Subject: [PATCH 59/72] Add mypy stubs Co-authored-by: Damien Thenot Co-authored-by: Ronan Abhamon Signed-off-by: Damien Thenot --- stubs/XenAPIPlugin.pyi | 5 +++++ stubs/xen/__init__.pyi | 0 stubs/xen/lowlevel/xs.pyi | 0 3 files changed, 5 insertions(+) create mode 100644 stubs/XenAPIPlugin.pyi create mode 100644 stubs/xen/__init__.pyi create mode 100644 stubs/xen/lowlevel/xs.pyi diff --git a/stubs/XenAPIPlugin.pyi b/stubs/XenAPIPlugin.pyi new file mode 100644 index 000000000..ca5f52caf --- /dev/null +++ b/stubs/XenAPIPlugin.pyi @@ -0,0 +1,5 @@ +class Failure(Exception): + def __init__(self, code, params) -> None: ... + def __str__(self) -> str: ... + +def dispatch(fn_table) -> None: ... diff --git a/stubs/xen/__init__.pyi b/stubs/xen/__init__.pyi new file mode 100644 index 000000000..e69de29bb diff --git a/stubs/xen/lowlevel/xs.pyi b/stubs/xen/lowlevel/xs.pyi new file mode 100644 index 000000000..e69de29bb From c65ecfb3d93a730f90eeb4ee16c51c67a1da2e03 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Fri, 8 Nov 2024 13:44:57 +0100 Subject: [PATCH 60/72] Use `override` everywhere! Also use return type annotations on these methods. Signed-off-by: Ronan Abhamon --- .mypy.ini | 3 + drivers/BaseISCSI.py | 42 +++-- drivers/CephFSSR.py | 43 +++-- drivers/DummySR.py | 72 ++++--- drivers/EXTSR.py | 32 +++- drivers/FileSR.py | 102 ++++++---- drivers/GlusterFSSR.py | 43 +++-- drivers/HBASR.py | 36 ++-- drivers/ISOSR.py | 56 ++++-- drivers/LUNperVDI.py | 20 +- drivers/LVHDSR.py | 95 ++++++---- drivers/LVHDoFCoESR.py | 14 +- drivers/LVHDoHBASR.py | 38 ++-- drivers/LVHDoISCSISR.py | 41 ++-- drivers/LargeBlockSR.py | 23 ++- drivers/LinstorSR.py | 76 +++++--- drivers/MooseFSSR.py | 43 +++-- drivers/NFSSR.py | 40 ++-- drivers/RawISCSISR.py | 20 +- drivers/SHMSR.py | 42 +++-- drivers/SMBSR.py | 41 ++-- drivers/SR.py | 37 ++-- drivers/VDI.py | 54 +++--- drivers/XFSSR.py | 32 +++- drivers/ZFSSR.py | 32 +++- drivers/blktap2.py | 129 ++++++++----- drivers/cleanup.py | 320 ++++++++++++++++++++------------ drivers/flock.py | 5 +- drivers/lcache.py | 14 +- drivers/linstorvhdutil.py | 5 +- drivers/linstorvolumemanager.py | 4 +- drivers/lock.py | 2 +- drivers/mpath_cli.py | 5 +- drivers/nfs-on-slave | 6 +- drivers/srmetadata.py | 22 ++- drivers/udevSR.py | 53 ++++-- misc/fairlock/fairlock.py | 7 +- sm_typing/__init__.py | 12 ++ stubs/XenAPIPlugin.pyi | 2 +- tests/lvmlib.py | 8 +- tests/shared_iscsi_test_base.py | 11 +- tests/test_BaseISCSI.py | 5 +- tests/test_FileSR.py | 28 ++- tests/test_ISCSISR.py | 21 ++- tests/test_LVHDSR.py | 16 +- tests/test_LVHDoHBASR.py | 6 +- tests/test_LVHDoISCSISR.py | 10 +- tests/test_SMBSR.py | 8 +- tests/test_SR.py | 8 +- tests/test_blktap2.py | 12 +- tests/test_cbt.py | 43 +++-- tests/test_cleanup.py | 14 +- tests/test_fairlock.py | 5 +- tests/test_fjournaler.py | 8 +- tests/test_keymanagerutil.py | 9 +- tests/test_lock.py | 9 +- tests/test_lock_queue.py | 5 +- tests/test_lvutil.py | 15 +- tests/test_mpath_dmp.py | 6 +- tests/test_on_slave.py | 11 +- tests/test_sr_health_check.py | 6 +- tests/test_srmetadata.py | 11 +- tests/test_storage_init.py | 14 +- tests/test_util.py | 16 +- tests/testlib.py | 13 +- 65 files changed, 1298 insertions(+), 683 deletions(-) create mode 100644 .mypy.ini diff --git a/.mypy.ini b/.mypy.ini new file mode 100644 index 000000000..651016c0d --- /dev/null +++ b/.mypy.ini @@ -0,0 +1,3 @@ +[mypy] +enable_error_code = explicit-override + diff --git a/drivers/BaseISCSI.py b/drivers/BaseISCSI.py index e60925143..98546957c 100755 --- a/drivers/BaseISCSI.py +++ b/drivers/BaseISCSI.py @@ -18,7 +18,10 @@ # ISCSISR: ISCSI software initiator SR driver # +from sm_typing import override + import SR +import VDI import util import time import LUNperVDI @@ -100,11 +103,12 @@ def address(self): self._initPaths() return self._address - def handles(type): + @override + @staticmethod + def handles(type) -> bool: return False - handles = staticmethod(handles) - def _synchroniseAddrList(self, addrlist): + def _synchroniseAddrList(self, addrlist) -> None: if not self.multihomed: return change = False @@ -133,7 +137,8 @@ def _synchroniseAddrList(self, addrlist): except: pass - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: if self.force_tapdisk: self.sr_vditype = 'aio' else: @@ -276,7 +281,7 @@ def _initPaths(self): self._address = self.tgtidx self._synchroniseAddrList(addrlist) - def _init_adapters(self): + def _init_adapters(self) -> None: # Generate a list of active adapters ids = scsiutil._genHostList(ISCSI_PROCNAME) util.SMlog(ids) @@ -293,7 +298,8 @@ def _init_adapters(self): pass self._devs = scsiutil.cacheSCSIidentifiers() - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: self._mpathHandle() multiTargets = False @@ -432,10 +438,11 @@ def attach(self, sr_uuid): realdev = os.path.realpath(os.path.join(dev_path, dev)) util.set_scheduler(os.path.basename(realdev)) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: self.detach_and_delete(sr_uuid, delete=False) - def detach_and_delete(self, sr_uuid, delete=True): + def detach_and_delete(self, sr_uuid, delete=True) -> None: keys = [] pbdref = None try: @@ -476,7 +483,8 @@ def detach_and_delete(self, sr_uuid, delete=True): self.attached = False - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: # Check whether an SR already exists SRs = self.session.xenapi.SR.get_all_records() for sr in SRs: @@ -505,11 +513,13 @@ def create(self, sr_uuid, size): self.session.xenapi.SR.set_sm_config(self.sr_ref, self.sm_config) return - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: self.detach(sr_uuid) return - def probe(self): + @override + def probe(self) -> str: SRs = self.session.xenapi.SR.get_all_records() Recs = {} for sr in SRs: @@ -519,8 +529,9 @@ def probe(self): sm_config['targetIQN'] == self.targetIQN: Recs[record["uuid"]] = sm_config return self.srlist_toxml(Recs) - - def scan(self, sr_uuid): + + @override + def scan(self, sr_uuid) -> None: if not self.passthrough: if not self.attached: raise xs_errors.XenError('SRUnavailable') @@ -532,9 +543,10 @@ def scan(self, sr_uuid): if vdi.managed: self.physical_utilisation += vdi.size self.virtual_allocation = self.physical_utilisation - return super(BaseISCSISR, self).scan(sr_uuid) + super(BaseISCSISR, self).scan(sr_uuid) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return LUNperVDI.RAWVDI(self, uuid) def _scan_IQNs(self): diff --git a/drivers/CephFSSR.py b/drivers/CephFSSR.py index 808847674..9e8e46e1e 100644 --- a/drivers/CephFSSR.py +++ b/drivers/CephFSSR.py @@ -18,6 +18,8 @@ # # CEPHFSSR: Based on FileSR, mounts ceph fs share +from sm_typing import override + import errno import os import socket @@ -33,6 +35,7 @@ import SRCommand import FileSR # end of careful +import VDI import cleanup import util import vhdutil @@ -83,13 +86,14 @@ class CephFSSR(FileSR.FileSR): DRIVER_TYPE = 'cephfs' - def handles(sr_type): + @override + @staticmethod + def handles(sr_type) -> bool: # fudge, because the parent class (FileSR) checks for smb to alter its behavior return sr_type == CephFSSR.DRIVER_TYPE or sr_type == 'smb' - handles = staticmethod(handles) - - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: if not self._is_ceph_available(): raise xs_errors.XenError( 'SRUnavailable', @@ -180,7 +184,8 @@ def unmount(self, mountpoint, rmmountpoint): except OSError as inst: raise CephFSException("rmdir failed with error '%s'" % inst.strerror) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: if not self.checkmount(): try: self.mount() @@ -189,7 +194,8 @@ def attach(self, sr_uuid): raise xs_errors.SROSError(12, exc.errstr) self.attached = True - def probe(self): + @override + def probe(self) -> str: try: self.mount(PROBE_MOUNTPOINT) sr_list = filter(util.match_uuid, util.listdir(PROBE_MOUNTPOINT)) @@ -199,7 +205,8 @@ def probe(self): # Create a dictionary from the SR uuids to feed SRtoXML() return util.SRtoXML({sr_uuid: {} for sr_uuid in sr_list}) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: if not self.checkmount(): return util.SMlog("Aborting GC/coalesce") @@ -210,7 +217,8 @@ def detach(self, sr_uuid): os.unlink(self.path) self.attached = False - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: if self.checkmount(): raise xs_errors.SROSError(113, 'CephFS mount point already attached') @@ -244,7 +252,8 @@ def create(self, sr_uuid, size): os.strerror(inst.code))) self.detach(sr_uuid) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: # try to remove/delete non VDI contents first super(CephFSSR, self).delete(sr_uuid) try: @@ -259,7 +268,8 @@ def delete(self, sr_uuid): if inst.code != errno.ENOENT: raise xs_errors.SROSError(114, "Failed to remove CephFS mount point") - def vdi(self, uuid, loadLocked=False): + @override + def vdi(self, uuid, loadLocked=False) -> VDI.VDI: return CephFSFileVDI(self, uuid) @staticmethod @@ -267,7 +277,8 @@ def _is_ceph_available(): return util.find_executable('ceph') class CephFSFileVDI(FileSR.FileVDI): - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if not hasattr(self, 'xenstore_data'): self.xenstore_data = {} @@ -275,7 +286,8 @@ def attach(self, sr_uuid, vdi_uuid): return super(CephFSFileVDI, self).attach(sr_uuid, vdi_uuid) - def generate_config(self, sr_uuid, vdi_uuid): + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("SMBFileVDI.generate_config") if not util.pathexists(self.path): raise xs_errors.XenError('VDIUnavailable') @@ -289,15 +301,16 @@ def generate_config(self, sr_uuid, vdi_uuid): config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") return xmlrpc.client.dumps((config,), "", True) - def attach_from_config(self, sr_uuid, vdi_uuid): + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: try: if not util.pathexists(self.sr.path): - self.sr.attach(sr_uuid) + return self.sr.attach(sr_uuid) except: util.logException("SMBFileVDI.attach_from_config") raise xs_errors.XenError('SRUnavailable', opterr='Unable to attach from config') - + return '' if __name__ == '__main__': SRCommand.run(CephFSSR, DRIVER_INFO) diff --git a/drivers/DummySR.py b/drivers/DummySR.py index be0298b9a..f5674db02 100755 --- a/drivers/DummySR.py +++ b/drivers/DummySR.py @@ -17,7 +17,7 @@ # # DummySR: an example dummy SR for the SDK -from sm_typing import List, Tuple +from sm_typing import Dict, Optional, List, Tuple, override import SR import VDI @@ -49,35 +49,43 @@ class DummySR(SR.SR): """dummy storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: if type == TYPE: return True return False - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: self.sr_vditype = 'phy' - def content_type(self, sr_uuid): + @override + def content_type(self, sr_uuid) -> str: return super(DummySR, self).content_type(sr_uuid) - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: self._assertValues(['sr_uuid', 'args', 'host_ref', 'session_ref', 'device_config', 'command', 'sr_ref']) assert(len(self.srcmd.params['args']) == 1) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: self._assertValues(['sr_uuid', 'args', 'host_ref', 'session_ref', 'device_config', 'command', 'sr_ref']) assert(len(self.srcmd.params['args']) == 0) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: self._assertValues(['sr_uuid', 'args', 'host_ref', 'session_ref', 'device_config', 'command', 'sr_ref']) assert(len(self.srcmd.params['args']) == 0) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: self._assertValues(['sr_uuid', 'args', 'host_ref', 'session_ref', 'device_config', 'command', 'sr_ref']) assert(len(self.srcmd.params['args']) == 0) - def probe(self): + @override + def probe(self) -> str: # N.B. There are no SR references self._assertValues(['args', 'host_ref', 'session_ref', 'device_config', 'command']) assert(len(self.srcmd.params['args']) == 0) @@ -91,10 +99,12 @@ def probe(self): # Return the Probe XML return util.SRtoXML(SRlist) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return DummyVDI(self, uuid) - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: self._assertValues(['sr_uuid', 'args', 'host_ref', 'session_ref', 'device_config', 'command', 'sr_ref']) assert(len(self.srcmd.params['args']) == 0) @@ -108,7 +118,7 @@ def scan(self, sr_uuid): self.physical_size = 2000000000000 self.physical_utilisation = 0 self.virtual_allocation = 0 - return super(DummySR, self).scan(sr_uuid) + super(DummySR, self).scan(sr_uuid) def _assertValues(self, vals): for attr in vals: @@ -134,15 +144,17 @@ def _getallVDIrecords(self): class DummyVDI(VDI.VDI): - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: self.path = "/dev/null" # returned on attach self.uuid = vdi_uuid self.size = 0 self.utilisation = 0 self.location = vdi_uuid - self.sm_config = {} + self.sm_config: Dict[str, str] = {} - def create(self, sr_uuid, vdi_uuid, size): + @override + def create(self, sr_uuid, vdi_uuid, size) -> str: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_sm_config']) assert(len(self.sr.srcmd.params['args']) == 8) @@ -161,7 +173,8 @@ def create(self, sr_uuid, vdi_uuid, size): self.run_corner_cases_tests() return self.get_params() - def delete(self, sr_uuid, vdi_uuid, data_only=False): + @override + def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_ref', 'vdi_location', 'vdi_uuid']) assert(len(self.sr.srcmd.params['args']) == 0) @@ -170,7 +183,8 @@ def delete(self, sr_uuid, vdi_uuid, data_only=False): self.run_corner_cases_tests() self._db_forget() - def introduce(self, sr_uuid, vdi_uuid): + @override + def introduce(self, sr_uuid, vdi_uuid) -> str: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_sm_config', 'new_uuid']) assert(len(self.sr.srcmd.params['args']) == 0) self.vdi_sm_config = self.sr.srcmd.params['vdi_sm_config'] @@ -186,19 +200,22 @@ def introduce(self, sr_uuid, vdi_uuid): self.run_corner_cases_tests() return super(DummyVDI, self).get_params() - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_ref', 'vdi_location', 'vdi_uuid']) assert(len(self.sr.srcmd.params['args']) == 1) vdi = super(DummyVDI, self).attach(sr_uuid, vdi_uuid) self.run_corner_cases_tests() return vdi - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_ref', 'vdi_location', 'vdi_uuid']) self.run_corner_cases_tests() assert(len(self.sr.srcmd.params['args']) == 0) - def activate(self, sr_uuid, vdi_uuid): + @override + def activate(self, sr_uuid, vdi_uuid) -> Optional[Dict[str, str]]: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_ref', 'vdi_location', 'vdi_uuid']) assert(len(self.sr.srcmd.params['args']) == 1) self.vdi_ref = self.sr.srcmd.params['vdi_ref'] @@ -206,13 +223,16 @@ def activate(self, sr_uuid, vdi_uuid): self.run_corner_cases_tests() for key in self.other_config.keys(): util.SMlog("\tvdi_other_config: [%s:%s]" % (key, self.other_config[key])) + return None - def deactivate(self, sr_uuid, vdi_uuid): + @override + def deactivate(self, sr_uuid, vdi_uuid) -> None: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_ref', 'vdi_location', 'vdi_uuid']) self.run_corner_cases_tests() assert(len(self.sr.srcmd.params['args']) == 0) - def resize(self, sr_uuid, vdi_uuid, size): + @override + def resize(self, sr_uuid, vdi_uuid, size) -> str: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_ref', 'vdi_location', 'vdi_uuid']) assert(len(self.sr.srcmd.params['args']) == 1) @@ -222,7 +242,8 @@ def resize(self, sr_uuid, vdi_uuid, size): self.run_corner_cases_tests() return super(DummyVDI, self).get_params() - def snapshot(self, sr_uuid, vdi_uuid): + @override + def snapshot(self, sr_uuid, vdi_uuid) -> str: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref']) assert(len(self.sr.srcmd.params['args']) == 0) @@ -236,7 +257,8 @@ def snapshot(self, sr_uuid, vdi_uuid): self.run_corner_cases_tests() return vdi.get_params() - def clone(self, sr_uuid, vdi_uuid): + @override + def clone(self, sr_uuid, vdi_uuid) -> str: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref']) assert(len(self.sr.srcmd.params['args']) == 0) diff --git a/drivers/EXTSR.py b/drivers/EXTSR.py index 164816207..30aa3832b 100755 --- a/drivers/EXTSR.py +++ b/drivers/EXTSR.py @@ -17,8 +17,11 @@ # # EXTSR: Based on local-file storage repository, mounts ext3 partition +from sm_typing import override + import SR from SR import deviceCheck +import VDI import SRCommand import FileSR import util @@ -57,11 +60,13 @@ class EXTSR(FileSR.FileSR): """EXT3 Local file storage repository""" - def handles(srtype): + @override + @staticmethod + def handles(srtype) -> bool: return srtype == 'ext' - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: self.ops_exclusive = FileSR.OPS_EXCLUSIVE self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) self.sr_vditype = SR.DEFAULT_TAP @@ -72,7 +77,8 @@ def load(self, sr_uuid): self.attached = self._checkmount() self.driver_config = DRIVER_CONFIG - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: super(EXTSR, self).delete(sr_uuid) # Check PVs match VG @@ -102,7 +108,8 @@ def delete(self, sr_uuid): raise xs_errors.XenError('LVMDelete', opterr='errno is %d' % inst.code) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: if not self._checkmount(): try: #Activate LV @@ -136,7 +143,8 @@ def attach(self, sr_uuid): for dev in self.dconf['device'].split(','): self.block_setscheduler(dev) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: super(EXTSR, self).detach(sr_uuid) try: # deactivate SR @@ -147,13 +155,15 @@ def detach(self, sr_uuid): 'LVMUnMount', opterr='lvm -an failed errno is %d' % inst.code) + @override @deviceCheck - def probe(self): + def probe(self) -> str: return lvutil.srlist_toxml(lvutil.scan_srlist(EXT_PREFIX, self.dconf['device']), EXT_PREFIX) + @override @deviceCheck - def create(self, sr_uuid, size): + def create(self, sr_uuid, size) -> None: if self._checkmount(): raise xs_errors.XenError('SRExists') @@ -214,12 +224,14 @@ def create(self, sr_uuid, size): self.session, self.sr_ref, scsiutil.devlist_to_serialstring(self.dconf['device'].split(','))) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return EXTFileVDI(self, uuid) class EXTFileVDI(FileSR.FileVDI): - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if not hasattr(self, 'xenstore_data'): self.xenstore_data = {} diff --git a/drivers/FileSR.py b/drivers/FileSR.py index 155983fc8..f1d983a80 100755 --- a/drivers/FileSR.py +++ b/drivers/FileSR.py @@ -17,7 +17,7 @@ # # FileSR: local-file storage repository -from sm_typing import Dict, List +from sm_typing import Dict, Optional, List, override import SR import VDI @@ -73,9 +73,10 @@ class FileSR(SR.SR): SR_TYPE = "file" - def handles(srtype): + @override + @staticmethod + def handles(srtype) -> bool: return srtype == 'file' - handles = staticmethod(handles) def _check_o_direct(self): if self.sr_ref and self.session is not None: @@ -91,7 +92,8 @@ def __init__(self, srcmd, sr_uuid): SR.SR.__init__(self, srcmd, sr_uuid) self._check_o_direct() - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: self.ops_exclusive = OPS_EXCLUSIVE self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) self.sr_vditype = vhdutil.VDI_TYPE_VHD @@ -104,7 +106,8 @@ def load(self, sr_uuid): self.attached = False self.driver_config = DRIVER_CONFIG - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: """ Create the SR. The path must not already exist, or if it does, it must be empty. (This accounts for the case where the user has mounted a device onto a directory manually and want to use this as the @@ -126,7 +129,8 @@ def create(self, sr_uuid, size): except: raise xs_errors.XenError('FileSRCreate') - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: self.attach(sr_uuid) cleanup.gc_force(self.session, self.uuid) @@ -158,10 +162,11 @@ def delete(self, sr_uuid): raise xs_errors.XenError('FileSRDelete', \ opterr='error %d' % inst.code) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: self.attach_and_bind(sr_uuid) - def attach_and_bind(self, sr_uuid, bind=True): + def attach_and_bind(self, sr_uuid, bind=True) -> None: if not self._checkmount(): try: util.ioretry(lambda: util.makedirs(self.path, mode=0o700)) @@ -180,7 +185,8 @@ def attach_and_bind(self, sr_uuid, bind=True): opterr='fail to mount FileSR. Errno is %s' % inst.code) self.attached = True - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: if self._checkmount(): try: util.SMlog("Aborting GC/coalesce") @@ -192,7 +198,8 @@ def detach(self, sr_uuid): raise xs_errors.XenError('SRInUse', opterr=str(e)) self.attached = False - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: if not self._checkmount(): raise xs_errors.XenError('SRUnavailable', \ opterr='no such directory %s' % self.path) @@ -226,7 +233,8 @@ def scan(self, sr_uuid): # default behaviour from here on super(FileSR, self).scan(sr_uuid) - def update(self, sr_uuid): + @override + def update(self, sr_uuid) -> None: if not self._checkmount(): raise xs_errors.XenError('SRUnavailable', \ opterr='no such directory %s' % self.path) @@ -239,10 +247,12 @@ def _update(self, sr_uuid, virt_alloc_delta): self.physical_utilisation = self._getutilisation() self._db_update() - def content_type(self, sr_uuid): + @override + def content_type(self, sr_uuid) -> str: return super(FileSR, self).content_type(sr_uuid) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return FileVDI(self, uuid) def added_vdi(self, vdi): @@ -252,7 +262,8 @@ def deleted_vdi(self, uuid): if uuid in self.vdis: del self.vdis[uuid] - def replay(self, uuid): + @override + def replay(self, uuid) -> None: try: file = open(self.path + "/filelog.txt", "r") data = file.readlines() @@ -392,7 +403,7 @@ def _isbind(self): st2 = os.stat(self.remotepath) return st1.st_dev == st2.st_dev and st1.st_ino == st2.st_ino - def _checkmount(self): + def _checkmount(self) -> bool: mount_path = self.path if self.handles("smb"): mount_path = self.mountpoint @@ -402,7 +413,7 @@ def _checkmount(self): util.pathexists(self.remotepath) and self._isbind())) # Override in SharedFileSR. - def _check_hardlinks(self): + def _check_hardlinks(self) -> bool: return True class FileVDI(VDI.VDI): @@ -445,7 +456,8 @@ def _find_path_with_retries(self, vdi_uuid, maxretry=5, period=2.0): return found - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: self.lock = self.sr.lock self.sr.srcmd.params['o_direct'] = self.sr.o_direct @@ -554,13 +566,15 @@ def load(self, vdi_uuid): raise xs_errors.XenError('VDILoad', \ opterr='Failed load VDI information %s' % self.path) - def update(self, sr_uuid, vdi_location): + @override + def update(self, sr_uuid, vdi_location) -> None: self.load(vdi_location) vdi_ref = self.sr.srcmd.params['vdi_ref'] self.sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref) self._db_update() - def create(self, sr_uuid, vdi_uuid, size): + @override + def create(self, sr_uuid, vdi_uuid, size) -> str: if util.ioretry(lambda: util.pathexists(self.path)): raise xs_errors.XenError('VDIExists') @@ -591,7 +605,8 @@ def create(self, sr_uuid, vdi_uuid, size): self.sr._update(self.sr.uuid, self.size) return super(FileVDI, self).get_params() - def delete(self, sr_uuid, vdi_uuid, data_only=False): + @override + def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: if not util.ioretry(lambda: util.pathexists(self.path)): return super(FileVDI, self).delete(sr_uuid, vdi_uuid, data_only) @@ -614,7 +629,8 @@ def delete(self, sr_uuid, vdi_uuid, data_only=False): self.sr._kickGC() return super(FileVDI, self).delete(sr_uuid, vdi_uuid, data_only) - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if self.path is None: self._find_path_with_retries(vdi_uuid) if not self._checkpath(self.path): @@ -638,10 +654,12 @@ def attach(self, sr_uuid, vdi_uuid): except util.CommandException as inst: raise xs_errors.XenError('VDILoad', opterr='error %d' % inst.code) - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: self.attached = False - def resize(self, sr_uuid, vdi_uuid, size): + @override + def resize(self, sr_uuid, vdi_uuid, size) -> str: if not self.exists: raise xs_errors.XenError('VDIUnavailable', \ opterr='VDI %s unavailable %s' % (vdi_uuid, self.path)) @@ -681,10 +699,12 @@ def resize(self, sr_uuid, vdi_uuid, size): super(FileVDI, self).resize_cbt(self.sr.uuid, self.uuid, self.size) return VDI.VDI.get_params(self) - def clone(self, sr_uuid, vdi_uuid): + @override + def clone(self, sr_uuid, vdi_uuid) -> str: return self._do_snapshot(sr_uuid, vdi_uuid, VDI.SNAPSHOT_DOUBLE) - def compose(self, sr_uuid, vdi1, vdi2): + @override + def compose(self, sr_uuid, vdi1, vdi2) -> None: if self.vdi_type != vhdutil.VDI_TYPE_VHD: raise xs_errors.XenError('Unimplemented') parent_fn = vdi1 + vhdutil.FILE_EXTN[vhdutil.VDI_TYPE_VHD] @@ -711,8 +731,9 @@ def reset_leaf(self, sr_uuid, vdi_uuid): vhdutil.killData(self.path) + @override def _do_snapshot(self, sr_uuid, vdi_uuid, snapType, - cloneOp=False, secondary=None, cbtlog=None): + cloneOp=False, secondary=None, cbtlog=None) -> str: # If cbt enabled, save file consistency state if cbtlog is not None: if blktap2.VDI.tap_status(self.session, vdi_uuid): @@ -735,7 +756,8 @@ def _do_snapshot(self, sr_uuid, vdi_uuid, snapType, self.disable_leaf_on_secondary(vdi_uuid, secondary=secondary) blktap2.VDI.tap_unpause(self.session, sr_uuid, vdi_uuid, secondary) - def _rename(self, src, dst): + @override + def _rename(self, src, dst) -> None: util.SMlog("FileVDI._rename %s to %s" % (src, dst)) util.ioretry(lambda: os.rename(src, dst)) @@ -919,7 +941,8 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None): ret_vdi = self return ret_vdi.get_params() - def get_params(self): + @override + def get_params(self) -> str: if not self._checkpath(self.path): raise xs_errors.XenError('VDIUnavailable', \ opterr='VDI %s unavailable %s' % (self.uuid, self.path)) @@ -999,7 +1022,8 @@ def extractUuid(path): return uuid extractUuid = staticmethod(extractUuid) - def generate_config(self, sr_uuid, vdi_uuid): + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: """ Generate the XML config required to attach and activate a VDI for use when XAPI is not running. Attach and @@ -1018,7 +1042,8 @@ def generate_config(self, sr_uuid, vdi_uuid): config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") return xmlrpc.client.dumps((config, ), "", True) - def attach_from_config(self, sr_uuid, vdi_uuid): + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: """ Attach and activate a VDI using config generated by vdi_generate_config above. This is used for cases such as @@ -1027,15 +1052,17 @@ def attach_from_config(self, sr_uuid, vdi_uuid): util.SMlog("FileVDI.attach_from_config") try: if not util.pathexists(self.sr.path): - self.sr.attach(sr_uuid) + return self.sr.attach(sr_uuid) except: util.logException("FileVDI.attach_from_config") raise xs_errors.XenError( 'SRUnavailable', opterr='Unable to attach from config' ) + return '' - def _create_cbt_log(self): + @override + def _create_cbt_log(self) -> str: # Create CBT log file # Name: .cbtlog #Handle if file already exists @@ -1044,7 +1071,8 @@ def _create_cbt_log(self): open_file.close() return super(FileVDI, self)._create_cbt_log() - def _delete_cbt_log(self): + @override + def _delete_cbt_log(self) -> None: logPath = self._get_cbt_logpath(self.uuid) try: os.remove(logPath) @@ -1052,7 +1080,8 @@ def _delete_cbt_log(self): if e.errno != errno.ENOENT: raise - def _cbt_log_exists(self, logpath): + @override + def _cbt_log_exists(self, logpath) -> bool: return util.pathexists(logpath) @@ -1078,7 +1107,8 @@ def _check_writable(self): def _raise_hardlink_error(self): raise OSError(524, "Unknown error 524") - def _check_hardlinks(self): + @override + def _check_hardlinks(self) -> bool: hardlink_conf = self._read_hardlink_conf() if hardlink_conf is not None: return hardlink_conf @@ -1120,7 +1150,7 @@ def _check_hardlinks(self): def _get_hardlink_conf_path(self): return os.path.join(self.path, 'sm-hardlink.conf') - def _read_hardlink_conf(self): + def _read_hardlink_conf(self) -> Optional[bool]: try: with open(self._get_hardlink_conf_path(), 'r') as f: try: diff --git a/drivers/GlusterFSSR.py b/drivers/GlusterFSSR.py index 041a91429..7b515dc81 100644 --- a/drivers/GlusterFSSR.py +++ b/drivers/GlusterFSSR.py @@ -16,6 +16,8 @@ # along with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +from sm_typing import override + import errno import os import syslog as _syslog @@ -29,6 +31,7 @@ import SRCommand import FileSR # end of careful +import VDI import cleanup import util import vhdutil @@ -77,13 +80,14 @@ class GlusterFSSR(FileSR.FileSR): DRIVER_TYPE = 'glusterfs' - def handles(sr_type): + @override + @staticmethod + def handles(sr_type) -> bool: # fudge, because the parent class (FileSR) checks for smb to alter its behavior return sr_type == GlusterFSSR.DRIVER_TYPE or sr_type == 'smb' - handles = staticmethod(handles) - - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: if not self._is_glusterfs_available(): raise xs_errors.XenError( 'SRUnavailable', @@ -160,7 +164,8 @@ def unmount(self, mountpoint, rmmountpoint): except OSError as inst: raise GlusterFSException("rmdir failed with error '%s'" % inst.strerror) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: if not self.checkmount(): try: self.mount() @@ -169,7 +174,8 @@ def attach(self, sr_uuid): raise xs_errors.SROSError(12, exc.errstr) self.attached = True - def probe(self): + @override + def probe(self) -> str: try: self.mount(PROBE_MOUNTPOINT) sr_list = filter(util.match_uuid, util.listdir(PROBE_MOUNTPOINT)) @@ -179,7 +185,8 @@ def probe(self): # Create a dictionary from the SR uuids to feed SRtoXML() return util.SRtoXML({sr_uuid: {} for sr_uuid in sr_list}) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: if not self.checkmount(): return util.SMlog("Aborting GC/coalesce") @@ -190,7 +197,8 @@ def detach(self, sr_uuid): os.unlink(self.path) self.attached = False - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: if self.checkmount(): raise xs_errors.SROSError(113, 'GlusterFS mount point already attached') @@ -224,7 +232,8 @@ def create(self, sr_uuid, size): os.strerror(inst.code))) self.detach(sr_uuid) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: # try to remove/delete non VDI contents first super(GlusterFSSR, self).delete(sr_uuid) try: @@ -239,7 +248,8 @@ def delete(self, sr_uuid): if inst.code != errno.ENOENT: raise xs_errors.SROSError(114, "Failed to remove GlusterFS mount point") - def vdi(self, uuid, loadLocked=False): + @override + def vdi(self, uuid, loadLocked=False) -> VDI.VDI: return GlusterFSFileVDI(self, uuid) @staticmethod @@ -248,7 +258,8 @@ def _is_glusterfs_available(): class GlusterFSFileVDI(FileSR.FileVDI): - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if not hasattr(self, 'xenstore_data'): self.xenstore_data = {} @@ -256,7 +267,8 @@ def attach(self, sr_uuid, vdi_uuid): return super(GlusterFSFileVDI, self).attach(sr_uuid, vdi_uuid) - def generate_config(self, sr_uuid, vdi_uuid): + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("SMBFileVDI.generate_config") if not util.pathexists(self.path): raise xs_errors.XenError('VDIUnavailable') @@ -270,15 +282,16 @@ def generate_config(self, sr_uuid, vdi_uuid): config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") return xmlrpc.client.dumps((config,), "", True) - def attach_from_config(self, sr_uuid, vdi_uuid): + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: try: if not util.pathexists(self.sr.path): - self.sr.attach(sr_uuid) + return self.sr.attach(sr_uuid) except: util.logException("SMBFileVDI.attach_from_config") raise xs_errors.XenError('SRUnavailable', opterr='Unable to attach from config') - + return '' if __name__ == '__main__': SRCommand.run(GlusterFSSR, DRIVER_INFO) diff --git a/drivers/HBASR.py b/drivers/HBASR.py index 66ca3ee99..1740f42ef 100755 --- a/drivers/HBASR.py +++ b/drivers/HBASR.py @@ -19,8 +19,11 @@ # hardware based iSCSI # +from sm_typing import Dict, List, override + import SR import SRCommand +import VDI import devscan import scsiutil import util @@ -50,20 +53,22 @@ class HBASR(SR.SR): """HBA storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: if type == "hba": return True return False - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: self.sr_vditype = 'phy' self.type = "any" if 'type' in self.dconf and self.dconf['type']: self.type = self.dconf['type'] self.attached = False self.procname = "" - self.devs = {} + self.devs: Dict[str, List[str]] = {} def _init_hbadict(self): if not hasattr(self, "hbas"): @@ -174,15 +179,18 @@ def _probe_hba(self): raise xs_errors.XenError('XMLParse', \ opterr='HBA probe failed') - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: self._mpathHandle() - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: if util._containsVDIinuse(self): return return - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: # Check whether an SR already exists SRs = self.session.xenapi.SR.get_all_records() for sr in SRs: @@ -211,11 +219,13 @@ def create(self, sr_uuid, size): self.sm_config['multipathable'] = 'true' self.session.xenapi.SR.set_sm_config(self.sr_ref, self.sm_config) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: self.detach(sr_uuid) return - def probe(self): + @override + def probe(self) -> str: self._init_hbadict() self.attach("") SRs = self.session.xenapi.SR.get_all_records() @@ -228,7 +238,8 @@ def probe(self): Recs[record["uuid"]] = sm_config return self.srlist_toxml(Recs) - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: self._init_hbadict() if not self.passthrough: if not self.attached: @@ -242,7 +253,7 @@ def scan(self, sr_uuid): if vdi.managed: self.physical_utilisation += vdi.size self.virtual_allocation = self.physical_utilisation - return super(HBASR, self).scan(sr_uuid) + super(HBASR, self).scan(sr_uuid) def print_devs(self): self.attach("") @@ -273,7 +284,8 @@ def _loadvdis(self): def _getLUNbySMconfig(self, sm_config): raise xs_errors.XenError('VDIUnavailable') - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return LUNperVDI.RAWVDI(self, uuid) def srlist_toxml(self, SRs): diff --git a/drivers/ISOSR.py b/drivers/ISOSR.py index 64f99b480..401461c67 100755 --- a/drivers/ISOSR.py +++ b/drivers/ISOSR.py @@ -17,6 +17,8 @@ # # ISOSR: remote iso storage repository +from sm_typing import override + import SR import VDI import SRCommand @@ -152,7 +154,7 @@ class ISOSR(SR.SR): """Local file storage repository""" # Some helper functions: - def _checkmount(self): + def _checkmount(self) -> bool: """Checks that the mountpoint exists and is mounted""" if not util.pathexists(self.mountpoint): return False @@ -221,21 +223,24 @@ def _loadvdis(self): vdi.read_only = False # Now for the main functions: - def handles(type): + @override + @staticmethod + def handles(type) -> bool: """Do we handle this type?""" if type == TYPE: return True return False - handles = staticmethod(handles) - def content_type(self, sr_uuid): + @override + def content_type(self, sr_uuid) -> str: """Returns the content_type XML""" return super(ISOSR, self).content_type(sr_uuid) # pylint: disable=no-member vdi_path_regex = re.compile(r"[a-z0-9.-]+\.(iso|img)", re.I) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: """Create a VDI class. If the VDI does not exist, we determine here what its filename should be.""" @@ -267,7 +272,8 @@ def vdi(self, uuid): return ISOVDI(self, filename) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: """Initialises the SR""" # First of all, check we've got the correct keys in dconf if 'location' not in self.dconf: @@ -302,10 +308,12 @@ def load(self, sr_uuid): # Some info we need: self.sr_vditype = 'phy' - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: pass - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: """Std. attach""" # Very-Legacy mode means the ISOs are in the local fs - so no need to attach. if 'legacy_mode' in self.dconf: @@ -475,8 +483,8 @@ def _check_nfs_server(self, location): except nfs.NfsException as e: raise xs_errors.XenError('NFSTarget', opterr=str(e.errstr)) - - def after_master_attach(self, uuid): + @override + def after_master_attach(self, uuid) -> None: """Perform actions required after attaching on the pool master Return: None @@ -568,7 +576,8 @@ def getCacheOptions(self): """Pass cache options to mount.cifs""" return "cache=none" - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: """Std. detach""" if 'legacy_mode' in self.dconf or not self._checkmount(): return @@ -579,7 +588,8 @@ def detach(self, sr_uuid): raise xs_errors.XenError('NFSUnMount', \ opterr='error is %d' % inst.code) - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: """Scan: see _loadvdis""" if not util.isdir(self.path): raise xs_errors.XenError('SRUnavailable', \ @@ -660,9 +670,10 @@ def scan(self, sr_uuid): self.session.xenapi.VDI.remove_from_sm_config(vdi, 'xs-tools') else: - return super(ISOSR, self).scan(sr_uuid) + super(ISOSR, self).scan(sr_uuid) - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: self.attach(sr_uuid) if 'type' in self.dconf: smconfig = self.session.xenapi.SR.get_sm_config(self.sr_ref) @@ -681,9 +692,10 @@ def create(self, sr_uuid, size): self.detach(sr_uuid) - + class ISOVDI(VDI.VDI): - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: # Nb, in the vdi_create call, the filename is unset, so the following # will fail. self.vdi_type = "iso" @@ -725,17 +737,20 @@ def __init__(self, mysr, filename): self.sm_config['xs-tools-version'] = product_version self.sm_config['xs-tools-build'] = build_number - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: pass - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: try: os.stat(self.path) return super(ISOVDI, self).attach(sr_uuid, vdi_uuid) except: raise xs_errors.XenError('VDIMissing') - def create(self, sr_uuid, vdi_uuid, size): + @override + def create(self, sr_uuid, vdi_uuid, size) -> str: self.uuid = vdi_uuid self.path = os.path.join(self.sr.path, self.filename) self.size = size @@ -758,7 +773,8 @@ def create(self, sr_uuid, vdi_uuid, size): raise xs_errors.XenError('VDICreate', \ opterr='could not create file: "%s"' % self.path) - def delete(self, sr_uuid, vdi_uuid, data_only=False): + @override + def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: util.SMlog("Deleting...") self.uuid = vdi_uuid diff --git a/drivers/LUNperVDI.py b/drivers/LUNperVDI.py index 497ba6b56..e3583a592 100755 --- a/drivers/LUNperVDI.py +++ b/drivers/LUNperVDI.py @@ -18,6 +18,8 @@ # LUNperVDI: Generic Raw LUN handler, used by HBASR and ISCSISR # +from sm_typing import override + import os import VDI import util @@ -28,7 +30,8 @@ class RAWVDI(VDI.VDI): - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: if not self.sr.attached: raise xs_errors.XenError('SRUnavailable') @@ -60,7 +63,8 @@ def _query(self, path, id): sm_config['backend-kind'] = 'vbd' self.sm_config = sm_config - def introduce(self, sr_uuid, vdi_uuid): + @override + def introduce(self, sr_uuid, vdi_uuid) -> str: self.sm_config = self.sr.srcmd.params['vdi_sm_config'] vdi_path = self.sr._getLUNbySMconfig(self.sm_config) self._query(vdi_path, self.sm_config['LUNid']) @@ -78,7 +82,8 @@ def introduce(self, sr_uuid, vdi_uuid): self.sr.vdis[vdi_uuid]._db_introduce() return super(RAWVDI, self).get_params() - def create(self, sr_uuid, vdi_uuid, size): + @override + def create(self, sr_uuid, vdi_uuid, size) -> str: VDIs = util._getVDIs(self.sr) self.sr._loadvdis() smallest = 0 @@ -98,7 +103,8 @@ def create(self, sr_uuid, vdi_uuid, size): return super(RAWVDI, self.sr.vdis[v['uuid']]).get_params() raise xs_errors.XenError('SRNoSpace') - def delete(self, sr_uuid, vdi_uuid, data_only=False): + @override + def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: try: vdi = util._getVDI(self.sr, vdi_uuid) if not vdi['managed']: @@ -108,7 +114,8 @@ def delete(self, sr_uuid, vdi_uuid, data_only=False): except: pass - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: self.sr._loadvdis() if vdi_uuid not in self.sr.vdis: raise xs_errors.XenError('VDIUnavailable') @@ -126,7 +133,8 @@ def attach(self, sr_uuid, vdi_uuid): raise xs_errors.XenError('VDIUnavailable') return super(RAWVDI, self).attach(sr_uuid, vdi_uuid) - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: self.sr._loadvdis() if 'SCSIid' in self.sm_config: self.sr.mpathmodule.reset(self.sm_config['SCSIid'], True) # explicitly unmap diff --git a/drivers/LVHDSR.py b/drivers/LVHDSR.py index 70463b6c4..01c9d1d7a 100755 --- a/drivers/LVHDSR.py +++ b/drivers/LVHDSR.py @@ -18,7 +18,7 @@ # LVHDSR: VHD on LVM storage repository # -from sm_typing import Dict, List +from sm_typing import Dict, List, override import SR from SR import deviceCheck @@ -135,7 +135,9 @@ class LVHDSR(SR.SR): legacyMode = True - def handles(type): + @override + @staticmethod + def handles(type) -> bool: """Returns True if this SR class understands the given dconf string""" # we can pose as LVMSR or EXTSR for compatibility purposes if __name__ == '__main__': @@ -147,9 +149,9 @@ def handles(type): elif name.endswith("EXTSR"): return type == "ext" return type == LVHDSR.DRIVER_TYPE - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: self.ops_exclusive = OPS_EXCLUSIVE self.isMaster = False @@ -238,7 +240,8 @@ def load(self, sr_uuid): except: pass - def cleanup(self): + @override + def cleanup(self) -> None: # we don't need to hold the lock to dec refcounts of activated LVs if not self.lvActivator.deactivateAll(): raise util.SMException("failed to deactivate LVs") @@ -481,8 +484,9 @@ def _expand_size(self): util.logException("LVHDSR._expand_size for %s failed to resize" " the PV" % self.uuid) + @override @deviceCheck - def create(self, uuid, size): + def create(self, uuid, size) -> None: util.SMlog("LVHDSR.create for %s" % self.uuid) if not self.isMaster: util.SMlog('sr_create blocked for non-master') @@ -510,7 +514,8 @@ def create(self, uuid, size): self.session.xenapi.SR.add_to_sm_config(self.sr_ref, \ self.FLAG_USE_VHD, 'true') - def delete(self, uuid): + @override + def delete(self, uuid) -> None: util.SMlog("LVHDSR.delete for %s" % self.uuid) if not self.isMaster: raise xs_errors.XenError('LVMMaster') @@ -564,7 +569,8 @@ def delete(self, uuid): lvutil.removeVG(self.dconf['device'], self.vgname) self._cleanup() - def attach(self, uuid): + @override + def attach(self, uuid) -> None: util.SMlog("LVHDSR.attach for %s" % self.uuid) self._cleanup(True) # in case of host crashes, if detach wasn't called @@ -602,7 +608,8 @@ def attach(self, uuid): for dev in self.dconf['device'].split(','): self.block_setscheduler(dev) - def detach(self, uuid): + @override + def detach(self, uuid) -> None: util.SMlog("LVHDSR.detach for %s" % self.uuid) cleanup.abort(self.uuid) @@ -660,12 +667,14 @@ def detach(self, uuid): # only place to do so. self._cleanup(self.isMaster) - def forget_vdi(self, uuid): + @override + def forget_vdi(self, uuid) -> None: if not self.legacyMode: LVMMetadataHandler(self.mdpath).deleteVdiFromMetadata(uuid) super(LVHDSR, self).forget_vdi(uuid) - def scan(self, uuid): + @override + def scan(self, uuid) -> None: activated_lvs = set() try: util.SMlog("LVHDSR.scan for %s" % self.uuid) @@ -685,7 +694,7 @@ def scan(self, uuid): # Now check if there are any VDIs in the metadata, which are not in # XAPI if self.mdexists: - vdiToSnaps = {} + vdiToSnaps: Dict[str, List[str]] = {} # get VDIs from XAPI vdis = self.session.xenapi.SR.get_VDIs(self.sr_ref) vdi_uuids = set([]) @@ -814,7 +823,8 @@ def scan(self, uuid): self.lvActivator.deactivate( vdi, LVActivator.NORMAL, False) - def update(self, uuid): + @override + def update(self, uuid) -> None: if not lvutil._checkVG(self.vgname): return self._updateStats(uuid, 0) @@ -843,15 +853,17 @@ def _updateStats(self, uuid, virtAllocDelta): self.physical_utilisation = stats['physical_utilisation'] self._db_update() + @override @deviceCheck - def probe(self): + def probe(self) -> str: return lvutil.srlist_toxml( lvutil.scan_srlist(lvhdutil.VG_PREFIX, self.dconf['device']), lvhdutil.VG_PREFIX, ('metadata' in self.srcmd.params['sr_sm_config'] and \ self.srcmd.params['sr_sm_config']['metadata'] == 'true')) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return LVHDVDI(self, uuid) def _loadvdis(self): @@ -1308,12 +1320,13 @@ def ensureCBTSpace(self): # Ensure we have space for at least one LV self._ensureSpaceAvailable(self.journaler.LV_SIZE) - + class LVHDVDI(VDI.VDI): JRN_CLONE = "clone" # journal entry type for the clone operation - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: self.lock = self.sr.lock self.lvActivator = self.sr.lvActivator self.loaded = False @@ -1353,7 +1366,8 @@ def load(self, vdi_uuid): self.lvname = "%s%s" % (lvhdutil.LV_PREFIX[self.vdi_type], vdi_uuid) self.path = os.path.join(self.sr.path, self.lvname) - def create(self, sr_uuid, vdi_uuid, size): + @override + def create(self, sr_uuid, vdi_uuid, size) -> str: util.SMlog("LVHDVDI.create for %s" % self.uuid) if not self.sr.isMaster: raise xs_errors.XenError('LVMMaster') @@ -1417,7 +1431,8 @@ def create(self, sr_uuid, vdi_uuid, size): return VDI.VDI.get_params(self) - def delete(self, sr_uuid, vdi_uuid, data_only=False): + @override + def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: util.SMlog("LVHDVDI.delete for %s" % self.uuid) try: self._loadThis() @@ -1462,7 +1477,8 @@ def delete(self, sr_uuid, vdi_uuid, data_only=False): self.sr._kickGC() return super(LVHDVDI, self).delete(sr_uuid, vdi_uuid, data_only) - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: util.SMlog("LVHDVDI.attach for %s" % self.uuid) if self.sr.journaler.hasJournals(self.uuid): raise xs_errors.XenError('VDIUnavailable', @@ -1492,7 +1508,8 @@ def attach(self, sr_uuid, vdi_uuid): if not self.sr.lvActivator.deactivateAll(): util.SMlog("Failed to deactivate LVs back (%s)" % self.uuid) - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: util.SMlog("LVHDVDI.detach for %s" % self.uuid) self._loadThis() already_deflated = (self.utilisation < \ @@ -1523,7 +1540,8 @@ def detach(self, sr_uuid, vdi_uuid): raise xs_errors.XenError("SMGeneral", opterr="deactivation") # We only support offline resize - def resize(self, sr_uuid, vdi_uuid, size): + @override + def resize(self, sr_uuid, vdi_uuid, size) -> str: util.SMlog("LVHDVDI.resize for %s" % self.uuid) if not self.sr.isMaster: raise xs_errors.XenError('LVMMaster') @@ -1576,11 +1594,13 @@ def resize(self, sr_uuid, vdi_uuid, size): super(LVHDVDI, self).resize_cbt(self.sr.uuid, self.uuid, self.size) return VDI.VDI.get_params(self) - def clone(self, sr_uuid, vdi_uuid): + @override + def clone(self, sr_uuid, vdi_uuid) -> str: return self._do_snapshot( sr_uuid, vdi_uuid, VDI.SNAPSHOT_DOUBLE, cloneOp=True) - def compose(self, sr_uuid, vdi1, vdi2): + @override + def compose(self, sr_uuid, vdi1, vdi2) -> None: util.SMlog("LVHDSR.compose for %s -> %s" % (vdi2, vdi1)) if self.vdi_type != vhdutil.VDI_TYPE_VHD: raise xs_errors.XenError('Unimplemented') @@ -1640,8 +1660,9 @@ def _detach(self): self._chainSetActive(False, True) self.attached = False + @override def _do_snapshot(self, sr_uuid, vdi_uuid, snapType, - cloneOp=False, secondary=None, cbtlog=None): + cloneOp=False, secondary=None, cbtlog=None) -> str: # If cbt enabled, save file consistency state if cbtlog is not None: if blktap2.VDI.tap_status(self.session, vdi_uuid): @@ -2171,7 +2192,8 @@ def _prepareThin(self, attach): self.session.xenapi.SR.set_physical_utilisation(self.sr.sr_ref, str(sr_utilisation)) - def update(self, sr_uuid, vdi_uuid): + @override + def update(self, sr_uuid, vdi_uuid) -> None: if self.sr.legacyMode: return @@ -2191,28 +2213,33 @@ def update(self, sr_uuid, vdi_uuid): self.session.xenapi.VDI.get_metadata_of_pool(vdi_ref) LVMMetadataHandler(self.sr.mdpath).updateMetadata(update_map) - def _ensure_cbt_space(self): + @override + def _ensure_cbt_space(self) -> None: self.sr.ensureCBTSpace() - def _create_cbt_log(self): + @override + def _create_cbt_log(self) -> str: logname = self._get_cbt_logname(self.uuid) self.sr.lvmCache.create(logname, self.sr.journaler.LV_SIZE, CBTLOG_TAG) logpath = super(LVHDVDI, self)._create_cbt_log() self.sr.lvmCache.deactivateNoRefcount(logname) return logpath - def _delete_cbt_log(self): + @override + def _delete_cbt_log(self) -> None: logpath = self._get_cbt_logpath(self.uuid) if self._cbt_log_exists(logpath): logname = self._get_cbt_logname(self.uuid) self.sr.lvmCache.remove(logname) - def _rename(self, oldpath, newpath): + @override + def _rename(self, oldpath, newpath) -> None: oldname = os.path.basename(oldpath) newname = os.path.basename(newpath) self.sr.lvmCache.rename(oldname, newname) - def _activate_cbt_log(self, lv_name): + @override + def _activate_cbt_log(self, lv_name) -> bool: self.sr.lvmCache.refresh() if not self.sr.lvmCache.is_active(lv_name): try: @@ -2225,14 +2252,16 @@ def _activate_cbt_log(self, lv_name): else: return False - def _deactivate_cbt_log(self, lv_name): + @override + def _deactivate_cbt_log(self, lv_name) -> None: try: self.sr.lvmCache.deactivateNoRefcount(lv_name) except Exception as e: util.SMlog("Exception in _deactivate_cbt_log, Error: %s." % str(e)) raise - def _cbt_log_exists(self, logpath): + @override + def _cbt_log_exists(self, logpath) -> bool: return lvutil.exists(logpath) if __name__ == '__main__': diff --git a/drivers/LVHDoFCoESR.py b/drivers/LVHDoFCoESR.py index 766d83f4d..11bf298ed 100755 --- a/drivers/LVHDoFCoESR.py +++ b/drivers/LVHDoFCoESR.py @@ -18,7 +18,10 @@ # LVHDoFCoESR: LVHD over Fibre Channel over Ethernet driver # +from sm_typing import override + import SR +import VDI import LVHDoHBASR import LVHDSR import SRCommand @@ -54,7 +57,9 @@ class LVHDoFCoESR(LVHDoHBASR.LVHDoHBASR): """LVHD over FCoE storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: if __name__ == '__main__': name = sys.argv[0] else: @@ -64,9 +69,9 @@ def handles(type): if type == "lvhdofcoe": return True return False - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: driver = SR.driver('hba') if 'type' not in self.original_srcmd.params['device_config'] or \ 'type' in self.original_srcmd.params['device_config'] and \ @@ -86,7 +91,8 @@ def load(self, sr_uuid): self.SCSIid = self.dconf['SCSIid'] LVHDSR.LVHDSR.load(self, sr_uuid) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return LVHDoFCoEVDI(self, uuid) diff --git a/drivers/LVHDoHBASR.py b/drivers/LVHDoHBASR.py index 1eb8fb635..784512733 100755 --- a/drivers/LVHDoHBASR.py +++ b/drivers/LVHDoHBASR.py @@ -19,9 +19,12 @@ # hardware based iSCSI # +from sm_typing import override + import SR import LVHDSR import SRCommand +import VDI import lvutil import HBASR import os @@ -58,7 +61,9 @@ class LVHDoHBASR(LVHDSR.LVHDSR): """LVHD over HBA storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: if __name__ == '__main__': name = sys.argv[0] else: @@ -68,9 +73,9 @@ def handles(type): if type == "lvhdohba": return True return False - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: driver = SR.driver('hba') self.hbasr = driver(self.original_srcmd, sr_uuid) @@ -108,7 +113,8 @@ def load(self, sr_uuid): self.SCSIid = self.dconf['SCSIid'] super(LVHDoHBASR, self).load(sr_uuid) - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: self.hbasr.attach(sr_uuid) if self.mpath == "true": self.mpathmodule.refresh(self.SCSIid, 0) @@ -121,7 +127,8 @@ def create(self, sr_uuid, size): util.remove_mpathcount_field(self.session, self.host_ref, \ self.sr_ref, self.SCSIid) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: self.hbasr.attach(sr_uuid) if self.mpath == "true": self.mpathmodule.refresh(self.SCSIid, 0) @@ -140,7 +147,8 @@ def attach(self, sr_uuid): LVHDSR.LVHDSR.attach(self, sr_uuid) self._setMultipathableFlag(SCSIid=self.SCSIid) - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: # During a reboot, scan is called ahead of attach, which causes the MGT # to point of the wrong device instead of dm-x. Running multipathing will # take care of this scenario. @@ -154,7 +162,8 @@ def scan(self, sr_uuid): self._pathrefresh(LVHDoHBASR) LVHDSR.LVHDSR.scan(self, sr_uuid) - def probe(self): + @override + def probe(self) -> str: if self.mpath == "true" and 'SCSIid' in self.dconf: # When multipathing is enabled, since we don't refcount the multipath maps, # we should not attempt to do the iscsi.attach/detach when the map is already present, @@ -181,7 +190,8 @@ def probe(self): self.mpathmodule.reset(self.SCSIid, explicit_unmap=True) raise - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: LVHDSR.LVHDSR.detach(self, sr_uuid) self.mpathmodule.reset(self.SCSIid, explicit_unmap=True) try: @@ -205,7 +215,8 @@ def _remove_device_nodes(self): (os.path.basename(node)), 'w') as f: f.write('1\n') - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: self._pathrefresh(LVHDoHBASR) try: LVHDSR.LVHDSR.delete(self, sr_uuid) @@ -214,12 +225,14 @@ def delete(self, sr_uuid): self.mpathmodule.reset(self.SCSIid, explicit_unmap=True) self._remove_device_nodes() - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return LVHDoHBAVDI(self, uuid) class LVHDoHBAVDI(LVHDSR.LVHDVDI): - def generate_config(self, sr_uuid, vdi_uuid): + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("LVHDoHBAVDI.generate_config") if not lvutil._checkLV(self.path): raise xs_errors.XenError('VDIUnavailable') @@ -235,7 +248,8 @@ def generate_config(self, sr_uuid, vdi_uuid): config = xmlrpc.client.dumps(tuple([dict]), "vdi_attach_from_config") return xmlrpc.client.dumps((config, ), "", True) - def attach_from_config(self, sr_uuid, vdi_uuid): + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("LVHDoHBAVDI.attach_from_config") self.sr.hbasr.attach(sr_uuid) if self.sr.mpath == "true": diff --git a/drivers/LVHDoISCSISR.py b/drivers/LVHDoISCSISR.py index 442ec30f7..799e89d65 100755 --- a/drivers/LVHDoISCSISR.py +++ b/drivers/LVHDoISCSISR.py @@ -18,7 +18,10 @@ # LVHDoISCSISR: LVHD over ISCSI software initiator SR driver # +from sm_typing import override + import SR +import VDI import LVHDSR import BaseISCSI import SRCommand @@ -71,7 +74,9 @@ class LVHDoISCSISR(LVHDSR.LVHDSR): """LVHD over ISCSI storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: if __name__ == '__main__': name = sys.argv[0] else: @@ -81,9 +86,9 @@ def handles(type): if type == "lvhdoiscsi": return True return False - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: if not sr_uuid: # This is a probe call, generate a temp sr_uuid sr_uuid = util.gen_uuid() @@ -426,7 +431,8 @@ def _LUNprint(self, sr_uuid): self.iscsi.print_LUNs() self.iscsi.detach(sr_uuid) - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: # Check SCSIid not already in use by other PBDs if util.test_SCSIid(self.session, sr_uuid, self.SCSIid): raise xs_errors.XenError('SRInUse') @@ -441,13 +447,15 @@ def create(self, sr_uuid, size): raise xs_errors.XenError("SRUnavailable", opterr=inst) self.iscsi.detach(sr_uuid) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: self._pathrefresh(LVHDoISCSISR) LVHDSR.LVHDSR.delete(self, sr_uuid) for i in self.iscsiSRs: i.detach(sr_uuid) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: try: connected = False stored_exception = None @@ -485,12 +493,14 @@ def attach(self, sr_uuid): raise xs_errors.XenError("SRUnavailable", opterr=inst) self._setMultipathableFlag(SCSIid=self.SCSIid) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: LVHDSR.LVHDSR.detach(self, sr_uuid) for i in self.iscsiSRs: i.detach(sr_uuid) - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: self._pathrefresh(LVHDoISCSISR) if self.mpath == "true": for i in self.iscsiSRs: @@ -500,7 +510,8 @@ def scan(self, sr_uuid): util.SMlog("Connection failed for target %s, continuing.." % i.target) LVHDSR.LVHDSR.scan(self, sr_uuid) - def probe(self): + @override + def probe(self) -> str: self.uuid = util.gen_uuid() # When multipathing is enabled, since we don't refcount the multipath maps, @@ -523,7 +534,8 @@ def probe(self): self.iscsi.detach(self.uuid) return out - def check_sr(self, sr_uuid): + @override + def check_sr(self, sr_uuid) -> None: """Hook to check SR health""" pbdref = util.find_my_pbd(self.session, self.host_ref, self.sr_ref) if pbdref: @@ -536,12 +548,14 @@ def check_sr(self, sr_uuid): except xs_errors.SROSError: util.SMlog("Failed to attach iSCSI target") - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return LVHDoISCSIVDI(self, uuid) class LVHDoISCSIVDI(LVHDSR.LVHDVDI): - def generate_config(self, sr_uuid, vdi_uuid): + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("LVHDoISCSIVDI.generate_config") if not lvutil._checkLV(self.path): raise xs_errors.XenError('VDIUnavailable') @@ -562,7 +576,8 @@ def generate_config(self, sr_uuid, vdi_uuid): config = xmlrpc.client.dumps(tuple([dict]), "vdi_attach_from_config") return xmlrpc.client.dumps((config, ), "", True) - def attach_from_config(self, sr_uuid, vdi_uuid): + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("LVHDoISCSIVDI.attach_from_config") try: self.sr.iscsi.attach(sr_uuid) diff --git a/drivers/LargeBlockSR.py b/drivers/LargeBlockSR.py index ba0ac1d13..fa66cfce8 100644 --- a/drivers/LargeBlockSR.py +++ b/drivers/LargeBlockSR.py @@ -14,6 +14,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +from sm_typing import override + import SR from SR import deviceCheck import SRCommand @@ -52,17 +54,20 @@ class LargeBlockSR(EXTSR.EXTSR): DRIVER_TYPE = "largeblock" LOOP_SECTOR_SIZE = 512 + @override @staticmethod - def handles(srtype): + def handles(srtype) -> bool: return srtype == LargeBlockSR.DRIVER_TYPE - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: super(LargeBlockSR, self).load(sr_uuid) self.is_deleting = False self.vgname = LARGEBLOCK_PREFIX + sr_uuid self.remotepath = os.path.join("/dev", self.vgname, sr_uuid) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: if not self.is_deleting: vg_device = self._get_device() self.dconf["device"] = ",".join(vg_device) @@ -71,7 +76,8 @@ def attach(self, sr_uuid): self._redo_vg_connection() # Call redo VG connection to connect it correctly to the loop device instead of the real 4KiB block device super(LargeBlockSR, self).attach(sr_uuid) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: if not self.is_deleting: vg_device = self._get_device() self.dconf["device"] = ",".join(vg_device) @@ -79,8 +85,9 @@ def detach(self, sr_uuid): if not self.is_deleting: self._destroy_emulated_device() + @override @deviceCheck - def create(self, sr_uuid, size): + def create(self, sr_uuid, size) -> None: base_devices = self.dconf["device"].split(",") if len(base_devices) > 1: raise xs_errors.XenError("ConfigDeviceInvalid", opterr="Multiple devices configuration is not supported") @@ -96,7 +103,8 @@ def create(self, sr_uuid, size): finally: self._destroy_emulated_device(base_devices) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: base_devices = self._get_device() self.dconf["device"] = ",".join(self._get_loopdev_from_device(base_devices)) @@ -112,8 +120,9 @@ def delete(self, sr_uuid): self._destroy_emulated_device(base_devices) self.is_deleting = False + @override @deviceCheck - def probe(self): + def probe(self) -> str: # We override EXTSR.probe because it uses EXT_PREFIX in this call return lvutil.srlist_toxml( lvutil.scan_srlist(LARGEBLOCK_PREFIX, self.dconf['device']), diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index c302ea332..c2579a539 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -14,6 +14,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +from sm_typing import Optional, override + from constants import CBTLOG_TAG try: @@ -302,11 +304,13 @@ class LinstorSR(SR.SR): # SR methods. # -------------------------------------------------------------------------- + @override @staticmethod - def handles(type): + def handles(type) -> bool: return type == LinstorSR.DRIVER_TYPE - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: if not LINSTOR_AVAILABLE: raise util.SMException( 'Can\'t load LinstorSR: LINSTOR libraries are missing' @@ -541,12 +545,14 @@ def wrap(self, *args, **kwargs): return wrap - def cleanup(self): + @override + def cleanup(self) -> None: if self._vdi_shared_time: self._shared_lock_vdi(self.srcmd.params['vdi_uuid'], locked=False) + @override @_locked_load - def create(self, uuid, size): + def create(self, uuid, size) -> None: util.SMlog('LinstorSR.create for {}'.format(self.uuid)) host_adresses = util.get_host_addresses(self.session) @@ -635,8 +641,9 @@ def create(self, uuid, size): ) raise e + @override @_locked_load - def delete(self, uuid): + def delete(self, uuid) -> None: util.SMlog('LinstorSR.delete for {}'.format(self.uuid)) cleanup.gc_force(self.session, self.uuid) @@ -698,8 +705,9 @@ def delete(self, uuid): Lock.cleanupAll(self.uuid) + @override @_locked_load - def update(self, uuid): + def update(self, uuid) -> None: util.SMlog('LinstorSR.update for {}'.format(self.uuid)) # Well, how can we update a SR if it doesn't exist? :thinking: @@ -722,8 +730,9 @@ def update(self, uuid): ) } + @override @_locked_load - def attach(self, uuid): + def attach(self, uuid) -> None: util.SMlog('LinstorSR.attach for {}'.format(self.uuid)) if not self._linstor: @@ -732,18 +741,22 @@ def attach(self, uuid): opterr='no such group: {}'.format(self._group_name) ) + @override @_locked_load - def detach(self, uuid): + def detach(self, uuid) -> None: util.SMlog('LinstorSR.detach for {}'.format(self.uuid)) cleanup.abort(self.uuid) + @override @_locked_load - def probe(self): + def probe(self) -> str: util.SMlog('LinstorSR.probe for {}'.format(self.uuid)) # TODO + return '' + @override @_locked_load - def scan(self, uuid): + def scan(self, uuid) -> None: if self._init_status == self.INIT_STATUS_FAIL: return @@ -792,8 +805,9 @@ def is_master(self): return self._is_master + @override @_locked_load - def vdi(self, uuid): + def vdi(self, uuid) -> VDI.VDI: return LinstorVDI(self, uuid) # To remove in python 3.10 @@ -1545,7 +1559,8 @@ class LinstorVDI(VDI.VDI): # VDI methods. # -------------------------------------------------------------------------- - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: self._lock = self.sr.lock self._exists = True self._linstor = self.sr._linstor @@ -1616,7 +1631,8 @@ def raise_bad_load(e): except Exception as e: raise_bad_load(e) - def create(self, sr_uuid, vdi_uuid, size): + @override + def create(self, sr_uuid, vdi_uuid, size) -> str: # Usage example: # xe vdi-create sr-uuid=39a5826b-5a90-73eb-dd09-51e3a116f937 # name-label="linstor-vdi-1" virtual-size=4096MiB sm-config:type=vhd @@ -1724,7 +1740,8 @@ def create(self, sr_uuid, vdi_uuid, size): return VDI.VDI.get_params(self) - def delete(self, sr_uuid, vdi_uuid, data_only=False): + @override + def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: util.SMlog('LinstorVDI.delete for {}'.format(self.uuid)) if self.attached: raise xs_errors.XenError('VDIInUse') @@ -1770,7 +1787,8 @@ def delete(self, sr_uuid, vdi_uuid, data_only=False): self.sr._kick_gc() return super(LinstorVDI, self).delete(sr_uuid, vdi_uuid, data_only) - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: util.SMlog('LinstorVDI.attach for {}'.format(self.uuid)) attach_from_config = self.sr.srcmd.cmd == 'vdi_attach_from_config' if ( @@ -1825,7 +1843,8 @@ def attach(self, sr_uuid, vdi_uuid): self.attached = True return VDI.VDI.attach(self, self.sr.uuid, self.uuid) - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: util.SMlog('LinstorVDI.detach for {}'.format(self.uuid)) detach_from_config = self.sr.srcmd.cmd == 'vdi_detach_from_config' self.attached = False @@ -1887,7 +1906,8 @@ def detach(self, sr_uuid, vdi_uuid): util.SMlog('Failed to clean VDI {} during detach: {}'.format(vdi_uuid, e)) vdi_uuid = parent_vdi_uuid - def resize(self, sr_uuid, vdi_uuid, size): + @override + def resize(self, sr_uuid, vdi_uuid, size) -> str: util.SMlog('LinstorVDI.resize for {}'.format(self.uuid)) if not self.sr.is_master(): raise xs_errors.XenError( @@ -1953,10 +1973,12 @@ def resize(self, sr_uuid, vdi_uuid, size): self.sr._update_stats(self.size - old_size) return VDI.VDI.get_params(self) - def clone(self, sr_uuid, vdi_uuid): + @override + def clone(self, sr_uuid, vdi_uuid) -> str: return self._do_snapshot(sr_uuid, vdi_uuid, VDI.SNAPSHOT_DOUBLE) - def compose(self, sr_uuid, vdi1, vdi2): + @override + def compose(self, sr_uuid, vdi1, vdi2) -> None: util.SMlog('VDI.compose for {} -> {}'.format(vdi2, vdi1)) if self.vdi_type != vhdutil.VDI_TYPE_VHD: raise xs_errors.XenError('Unimplemented') @@ -1987,7 +2009,8 @@ def compose(self, sr_uuid, vdi1, vdi2): util.SMlog('Compose done') - def generate_config(self, sr_uuid, vdi_uuid): + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: """ Generate the XML config required to attach and activate a VDI for use when XAPI is not running. Attach and @@ -2033,7 +2056,8 @@ def generate_config(self, sr_uuid, vdi_uuid): config = xmlrpc.client.dumps(tuple([resp]), 'vdi_attach_from_config') return xmlrpc.client.dumps((config,), "", True) - def attach_from_config(self, sr_uuid, vdi_uuid): + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: """ Attach and activate a VDI using config generated by vdi_generate_config above. This is used for cases such as @@ -2054,6 +2078,7 @@ def attach_from_config(self, sr_uuid, vdi_uuid): 'SRUnavailable', opterr='Unable to attach from config' ) + return '' def reset_leaf(self, sr_uuid, vdi_uuid): if self.vdi_type != vhdutil.VDI_TYPE_VHD: @@ -2120,7 +2145,8 @@ def _mark_hidden(self, hidden=True): }) self.hidden = hidden - def update(self, sr_uuid, vdi_uuid): + @override + def update(self, sr_uuid, vdi_uuid) -> None: xenapi = self.session.xenapi vdi_ref = xenapi.VDI.get_by_uuid(self.uuid) @@ -2296,13 +2322,15 @@ def _create_snapshot(self, snap_uuid, snap_of_uuid=None): # Implement specific SR methods. # -------------------------------------------------------------------------- - def _rename(self, oldpath, newpath): + @override + def _rename(self, oldpath, newpath) -> None: # TODO: I'm not sure... Used by CBT. volume_uuid = self._linstor.get_volume_uuid_from_device_path(oldpath) self._linstor.update_volume_name(volume_uuid, newpath) + @override def _do_snapshot(self, sr_uuid, vdi_uuid, snapType, - cloneOp=False, secondary=None, cbtlog=None): + cloneOp=False, secondary=None, cbtlog=None) -> str: # If cbt enabled, save file consistency state. if cbtlog is not None: if blktap2.VDI.tap_status(self.session, vdi_uuid): diff --git a/drivers/MooseFSSR.py b/drivers/MooseFSSR.py index 8fc4a4adf..4ebd7bd8c 100755 --- a/drivers/MooseFSSR.py +++ b/drivers/MooseFSSR.py @@ -18,6 +18,8 @@ # # MooseFSSR: Based on CEPHFSSR and FileSR, mounts MooseFS share +from sm_typing import override + import errno import os import syslog as _syslog @@ -32,6 +34,7 @@ import SRCommand import FileSR # end of careful +import VDI import cleanup import util import vhdutil @@ -79,13 +82,14 @@ class MooseFSSR(FileSR.FileSR): DRIVER_TYPE = 'moosefs' - def handles(sr_type): + @override + @staticmethod + def handles(sr_type) -> bool: # fudge, because the parent class (FileSR) checks for smb to alter its behavior return sr_type == MooseFSSR.DRIVER_TYPE or sr_type == 'smb' - handles = staticmethod(handles) - - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: if not self._is_moosefs_available(): raise xs_errors.XenError( 'SRUnavailable', @@ -176,7 +180,8 @@ def unmount(self, mountpoint, rmmountpoint): except OSError as inst: raise MooseFSException("Command rmdir failed with error '%s'" % inst.strerror) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: if not self.checkmount(): try: self.mount() @@ -184,7 +189,8 @@ def attach(self, sr_uuid): raise xs_errors.SROSError(12, exc.errstr) self.attached = True - def probe(self): + @override + def probe(self) -> str: try: self.mount(PROBE_MOUNTPOINT) sr_list = filter(util.match_uuid, util.listdir(PROBE_MOUNTPOINT)) @@ -194,7 +200,8 @@ def probe(self): # Create a dictionary from the SR uuids to feed SRtoXML() return util.SRtoXML({sr_uuid: {} for sr_uuid in sr_list}) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: if not self.checkmount(): return util.SMlog("Aborting GC/coalesce") @@ -204,7 +211,8 @@ def detach(self, sr_uuid): self.unmount(self.mountpoint, True) self.attached = False - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: if self.checkmount(): raise xs_errors.SROSError(113, 'MooseFS mount point already attached') @@ -248,7 +256,8 @@ def create(self, sr_uuid, size): finally: self.detach(sr_uuid) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: # try to remove/delete non VDI contents first super(MooseFSSR, self).delete(sr_uuid) try: @@ -268,7 +277,8 @@ def delete(self, sr_uuid): if inst.code != errno.ENOENT: raise xs_errors.SROSError(114, "Failed to remove MooseFS mount point") - def vdi(self, uuid, loadLocked=False): + @override + def vdi(self, uuid, loadLocked=False) -> VDI.VDI: return MooseFSFileVDI(self, uuid) @staticmethod @@ -276,7 +286,8 @@ def _is_moosefs_available(): return util.find_executable('mfsmount') class MooseFSFileVDI(FileSR.FileVDI): - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if not hasattr(self, 'xenstore_data'): self.xenstore_data = {} @@ -284,7 +295,8 @@ def attach(self, sr_uuid, vdi_uuid): return super(MooseFSFileVDI, self).attach(sr_uuid, vdi_uuid) - def generate_config(self, sr_uuid, vdi_uuid): + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("MooseFSFileVDI.generate_config") if not util.pathexists(self.path): raise xs_errors.XenError('VDIUnavailable') @@ -298,15 +310,16 @@ def generate_config(self, sr_uuid, vdi_uuid): config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") return xmlrpc.client.dumps((config,), "", True) - def attach_from_config(self, sr_uuid, vdi_uuid): + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: try: if not util.pathexists(self.sr.path): - self.sr.attach(sr_uuid) + return self.sr.attach(sr_uuid) except: util.logException("MooseFSFileVDI.attach_from_config") raise xs_errors.XenError('SRUnavailable', opterr='Unable to attach from config') - + return '' if __name__ == '__main__': SRCommand.run(MooseFSSR, DRIVER_INFO) diff --git a/drivers/NFSSR.py b/drivers/NFSSR.py index ef73e1b4f..6eeedeb7b 100755 --- a/drivers/NFSSR.py +++ b/drivers/NFSSR.py @@ -17,9 +17,12 @@ # # FileSR: local-file storage repository +from sm_typing import override + import socket import SR +import VDI import SRCommand import FileSR import util @@ -68,11 +71,13 @@ class NFSSR(FileSR.SharedFileSR): """NFS file-based storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: return type == 'nfs' - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: self.ops_exclusive = FileSR.OPS_EXCLUSIVE self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) self.sr_vditype = SR.DEFAULT_TAP @@ -137,7 +142,8 @@ def mount(self, mountpoint, remotepath, timeout=None, retrans=None): except nfs.NfsException as exc: raise xs_errors.XenError('NFSMount', opterr=exc.errstr) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: if not self._checkmount(): try: self.validate_remotepath(False) @@ -164,7 +170,8 @@ def mount_remotepath(self, sr_uuid): self.mount(self.path, self.remotepath, timeout=io_timeout, retrans=io_retrans) - def probe(self): + @override + def probe(self) -> str: # Verify NFS target and port util._testHost(self.dconf['server'], NFSPORT, 'NFSTarget') @@ -182,7 +189,8 @@ def probe(self): except: pass - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: """Detach the SR: Unmounts and removes the mountpoint""" if not self._checkmount(): return @@ -199,7 +207,8 @@ def detach(self, sr_uuid): self.attached = False - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: util._testHost(self.dconf['server'], NFSPORT, 'NFSTarget') self.validate_remotepath(True) if self._checkmount(): @@ -239,7 +248,8 @@ def create(self, sr_uuid, size): % inst.code) self.detach(sr_uuid) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: # try to remove/delete non VDI contents first super(NFSSR, self).delete(sr_uuid) try: @@ -260,7 +270,8 @@ def delete(self, sr_uuid): if inst.code != errno.ENOENT: raise xs_errors.XenError('NFSDelete') - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return NFSFileVDI(self, uuid) def scan_exports(self, target): @@ -287,7 +298,8 @@ def set_transport(self): class NFSFileVDI(FileSR.FileVDI): - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if not hasattr(self, 'xenstore_data'): self.xenstore_data = {} @@ -295,7 +307,8 @@ def attach(self, sr_uuid, vdi_uuid): return super(NFSFileVDI, self).attach(sr_uuid, vdi_uuid) - def generate_config(self, sr_uuid, vdi_uuid): + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("NFSFileVDI.generate_config") if not util.pathexists(self.path): raise xs_errors.XenError('VDIUnavailable') @@ -311,12 +324,13 @@ def generate_config(self, sr_uuid, vdi_uuid): config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") return xmlrpc.client.dumps((config, ), "", True) - def attach_from_config(self, sr_uuid, vdi_uuid): + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: """Used for HA State-file only. Will not just attach the VDI but also start a tapdisk on the file""" util.SMlog("NFSFileVDI.attach_from_config") try: - self.sr.attach(sr_uuid) + return self.sr.attach(sr_uuid) except: util.logException("NFSFileVDI.attach_from_config") raise xs_errors.XenError('SRUnavailable', \ diff --git a/drivers/RawISCSISR.py b/drivers/RawISCSISR.py index 1df1c7a24..a4848a88c 100644 --- a/drivers/RawISCSISR.py +++ b/drivers/RawISCSISR.py @@ -18,8 +18,11 @@ # ISCSISR: ISCSI software initiator SR driver # +from sm_typing import override + import SR import SRCommand +import VDI import BaseISCSI import LUNperVDI import util @@ -53,25 +56,30 @@ class RawISCSISR(BaseISCSI.BaseISCSISR): """Raw ISCSI storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: if type == "iscsi": return True return False - handles = staticmethod(handles) - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: super(RawISCSISR, self).load(vdi_uuid) self.managed = True - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: super(RawISCSISR, self).detach_and_delete(sr_uuid) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return ISCSIVDI(self, uuid) class ISCSIVDI(LUNperVDI.RAWVDI): - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: super(ISCSIVDI, self).load(vdi_uuid) self.managed = True diff --git a/drivers/SHMSR.py b/drivers/SHMSR.py index 5e3ef7f47..250d58130 100644 --- a/drivers/SHMSR.py +++ b/drivers/SHMSR.py @@ -15,6 +15,8 @@ # along with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +from sm_typing import override + import SR import VDI import SRCommand @@ -54,25 +56,29 @@ def _loadvdis(self): except: pass - def handles(type): + @override + @staticmethod + def handles(type) -> bool: """Do we handle this type?""" if type == TYPE: return True return False - handles = staticmethod(handles) - def content_type(self, sr_uuid): + @override + def content_type(self, sr_uuid) -> str: """Returns the content_type XML""" return super(SHMSR, self).content_type(sr_uuid) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: """Create a VDI class""" if 'vdi_location' in self.srcmd.params: return SHMVDI(self, uuid, self.srcmd.params['vdi_location']) else: return SHMVDI(self, uuid, self.srcmd.params['device_config']['location']) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: """Initialises the SR""" if 'location' not in self.dconf: raise xs_errors.XenError('ConfigLocationMissing') @@ -82,26 +88,31 @@ def load(self, sr_uuid): self.physical_utilisation = 0 self.virtual_allocation = 0 - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: """Std. attach""" self._loadvdis() - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: """Std. detach""" pass - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: """Scan""" self._loadvdis() - return super(SHMSR, self).scan(sr_uuid) + super(SHMSR, self).scan(sr_uuid) - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: self.attach(sr_uuid) self.detach(sr_uuid) class SHMVDI(VDI.VDI): - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: try: stat = os.stat(self.path) self.utilisation = int(stat.st_size) @@ -120,13 +131,16 @@ def __init__(self, mysr, uuid, filename): self.shareable = True self.sm_config = {} - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: pass - def clone(self, sr_uuid, vdi_uuid): + @override + def clone(self, sr_uuid, vdi_uuid) -> str: return self.get_params() - def snapshot(self, sr_uuid, vdi_uuid): + @override + def snapshot(self, sr_uuid, vdi_uuid) -> str: return self.get_params() if __name__ == '__main__': diff --git a/drivers/SMBSR.py b/drivers/SMBSR.py index 962060cd9..34ba40d67 100755 --- a/drivers/SMBSR.py +++ b/drivers/SMBSR.py @@ -17,8 +17,11 @@ # # SMBSR: SMB filesystem based storage repository +from sm_typing import override + import SR import SRCommand +import VDI import FileSR import util import errno @@ -71,11 +74,13 @@ def __init__(self, errstr): class SMBSR(FileSR.SharedFileSR): """SMB file-based storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: return type == 'smb' - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: self.ops_exclusive = FileSR.OPS_EXCLUSIVE self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) self.sr_vditype = SR.DEFAULT_TAP @@ -190,7 +195,8 @@ def __check_license(self): restrictions['restrict_cifs'] == "true": raise xs_errors.XenError('NoSMBLicense') - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: if not self.checkmount(): try: self.mount() @@ -208,7 +214,8 @@ def attach(self, sr_uuid): self.attached = True - def probe(self): + @override + def probe(self) -> str: err = "SMBMount" try: self.mount(PROBE_MOUNTPOINT) @@ -223,7 +230,8 @@ def probe(self): # Create a dictionary from the SR uuids to feed SRtoXML() return util.SRtoXML({sr_uuid: {} for sr_uuid in sr_list}) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: """Detach the SR: Unmounts and removes the mountpoint""" if not self.checkmount(): return @@ -241,7 +249,8 @@ def detach(self, sr_uuid): self.attached = False - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: self.__check_license() if self.checkmount(): @@ -283,7 +292,8 @@ def create(self, sr_uuid, size): .format(os.strerror(inst.code))) from inst self.detach(sr_uuid) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: # try to remove/delete non VDI contents first super(SMBSR, self).delete(sr_uuid) try: @@ -299,12 +309,14 @@ def delete(self, sr_uuid): if inst.code != errno.ENOENT: raise xs_errors.XenError('SMBDelete') - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return SMBFileVDI(self, uuid) class SMBFileVDI(FileSR.FileVDI): - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if not hasattr(self, 'xenstore_data'): self.xenstore_data = {} @@ -312,7 +324,8 @@ def attach(self, sr_uuid, vdi_uuid): return super(SMBFileVDI, self).attach(sr_uuid, vdi_uuid) - def generate_config(self, sr_uuid, vdi_uuid): + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("SMBFileVDI.generate_config") if not util.pathexists(self.path): raise xs_errors.XenError('VDIUnavailable') @@ -327,17 +340,19 @@ def generate_config(self, sr_uuid, vdi_uuid): config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") return xmlrpc.client.dumps((config, ), "", True) - def attach_from_config(self, sr_uuid, vdi_uuid): + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: """Used for HA State-file only. Will not just attach the VDI but also start a tapdisk on the file""" util.SMlog("SMBFileVDI.attach_from_config") try: if not util.pathexists(self.sr.path): - self.sr.attach(sr_uuid) + return self.sr.attach(sr_uuid) except: util.logException("SMBFileVDI.attach_from_config") raise xs_errors.XenError('SRUnavailable', \ opterr='Unable to attach from config') + return '' if __name__ == '__main__': diff --git a/drivers/SR.py b/drivers/SR.py index d1aacdb33..fb2798ea3 100755 --- a/drivers/SR.py +++ b/drivers/SR.py @@ -80,10 +80,10 @@ class SR(object): sr_vditype: string, repository type """ - def handles(type): + @staticmethod + def handles(type) -> bool: """Returns True if this SR class understands the given dconf string""" return False - handles = staticmethod(handles) def __init__(self, srcmd, sr_uuid): """Base class initializer. All subclasses should call SR.__init__ @@ -229,7 +229,7 @@ def _addLUNperVDIkey(self): except: pass - def create(self, uuid, size): + def create(self, uuid, size) -> None: """Create this repository. This operation may delete existing data. @@ -243,7 +243,7 @@ def create(self, uuid, size): """ raise xs_errors.XenError('Unimplemented') - def delete(self, uuid): + def delete(self, uuid) -> None: """Delete this repository and its contents. This operation IS idempotent -- it will succeed if the repository @@ -259,7 +259,7 @@ def delete(self, uuid): """ raise xs_errors.XenError('Unimplemented') - def update(self, uuid): + def update(self, uuid) -> None: """Refresh the fields in the SR object Returns: @@ -270,7 +270,7 @@ def update(self, uuid): # no-op unless individual backends implement it return - def attach(self, uuid): + def attach(self, uuid) -> None: """Initiate local access to the SR. Initialises any device state required to access the substrate. @@ -283,7 +283,7 @@ def attach(self, uuid): """ raise xs_errors.XenError('Unimplemented') - def after_master_attach(self, uuid): + def after_master_attach(self, uuid) -> None: """Perform actions required after attaching on the pool master Return: None @@ -298,7 +298,7 @@ def after_master_attach(self, uuid): self.session.xenapi.message.create( msg_name, 2, "SR", uuid, msg_body) - def detach(self, uuid): + def detach(self, uuid) -> None: """Remove local access to the SR. Destroys any device state initiated by the sr_attach() operation. @@ -312,7 +312,7 @@ def detach(self, uuid): """ raise xs_errors.XenError('Unimplemented') - def probe(self): + def probe(self) -> str: """Perform a backend-specific scan, using the current dconf. If the dconf is complete, then this will return a list of the SRs present of this type on the device, if any. If the dconf is partial, then a @@ -332,7 +332,7 @@ def probe(self): """ raise xs_errors.XenError('Unimplemented') - def scan(self, uuid): + def scan(self, uuid) -> None: """ Returns: """ @@ -342,7 +342,7 @@ def scan(self, uuid): scanrecord = ScanRecord(self) scanrecord.synchronise() - def replay(self, uuid): + def replay(self, uuid) -> None: """Replay a multi-stage log entry Returns: @@ -352,30 +352,27 @@ def replay(self, uuid): """ raise xs_errors.XenError('Unimplemented') - def content_type(self, uuid): + def content_type(self, uuid) -> str: """Returns the 'content_type' of an SR as a string""" return xmlrpc.client.dumps((str(self.sr_vditype), ), "", True) - def load(self, sr_uuid): + def load(self, sr_uuid) -> None: """Post-init hook""" pass - def check_sr(self, sr_uuid): + def check_sr(self, sr_uuid) -> None: """Hook to check SR health""" pass - def vdi(self, uuid): + def vdi(self, uuid) -> 'VDI.VDI': """Return VDI object owned by this repository""" - if uuid not in self.vdis: - self.vdis[uuid] = VDI.VDI(self, uuid) raise xs_errors.XenError('Unimplemented') - return self.vdis[uuid] - def forget_vdi(self, uuid): + def forget_vdi(self, uuid) -> None: vdi = self.session.xenapi.VDI.get_by_uuid(uuid) self.session.xenapi.VDI.db_forget(vdi) - def cleanup(self): + def cleanup(self) -> None: # callback after the op is done pass diff --git a/drivers/VDI.py b/drivers/VDI.py index d371bd1b0..fa7c5a449 100755 --- a/drivers/VDI.py +++ b/drivers/VDI.py @@ -16,6 +16,8 @@ # VDI: Base class for virtual disk instances # +from sm_typing import Dict, Optional + import cleanup import SR import xmlrpc.client @@ -134,7 +136,7 @@ def from_uuid(session, vdi_uuid): sr.srcmd.params['vdi_ref'] = vdi_ref return sr.vdi(vdi_uuid) - def create(self, sr_uuid, vdi_uuid, size): + def create(self, sr_uuid, vdi_uuid, size) -> str: """Create a VDI of size MB on the given SR. This operation IS NOT idempotent and will fail if the UUID @@ -147,7 +149,7 @@ def create(self, sr_uuid, vdi_uuid, size): """ raise xs_errors.XenError('Unimplemented') - def update(self, sr_uuid, vdi_uuid): + def update(self, sr_uuid, vdi_uuid) -> None: """Query and update the configuration of a particular VDI. Given an SR and VDI UUID, this operation returns summary statistics @@ -157,7 +159,7 @@ def update(self, sr_uuid, vdi_uuid): # no-op unless individual backends implement it return - def introduce(self, sr_uuid, vdi_uuid): + def introduce(self, sr_uuid, vdi_uuid) -> str: """Explicitly introduce a particular VDI. Given an SR and VDI UUID and a disk location (passed in via the @@ -166,7 +168,7 @@ def introduce(self, sr_uuid, vdi_uuid): """ raise xs_errors.XenError('Unimplemented') - def attach(self, sr_uuid, vdi_uuid): + def attach(self, sr_uuid, vdi_uuid) -> str: """Initiate local access to the VDI. Initialises any device state required to access the VDI. @@ -180,7 +182,7 @@ def attach(self, sr_uuid, vdi_uuid): 'xenstore_data': (self.xenstore_data or {})} return xmlrpc.client.dumps((struct, ), "", True) - def detach(self, sr_uuid, vdi_uuid): + def detach(self, sr_uuid, vdi_uuid) -> None: """Remove local access to the VDI. Destroys any device state initialised via the vdi.attach() command. @@ -188,7 +190,7 @@ def detach(self, sr_uuid, vdi_uuid): """ raise xs_errors.XenError('Unimplemented') - def clone(self, sr_uuid, vdi_uuid): + def clone(self, sr_uuid, vdi_uuid) -> str: """Create a mutable instance of the referenced VDI. This operation is not idempotent and will fail if the UUID @@ -209,14 +211,14 @@ def resize_online(self, sr_uuid, vdi_uuid, size): been paused for the duration of this call.""" raise xs_errors.XenError('Unimplemented') - def generate_config(self, sr_uuid, vdi_uuid): + def generate_config(self, sr_uuid, vdi_uuid) -> str: """Generate the XML config required to activate a VDI for use when XAPI is not running. Activation is handled by the vdi_attach_from_config() SMAPI call. """ raise xs_errors.XenError('Unimplemented') - def compose(self, sr_uuid, vdi1, vdi2): + def compose(self, sr_uuid, vdi1, vdi2) -> None: """Layer the updates from [vdi2] onto [vdi1], calling the result [vdi2]. @@ -225,7 +227,7 @@ def compose(self, sr_uuid, vdi1, vdi2): """ raise xs_errors.XenError('Unimplemented') - def attach_from_config(self, sr_uuid, vdi_uuid): + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: """Activate a VDI based on the config passed in on the CLI. For use when XAPI is not running. The config is generated by the Activation is handled by the vdi_generate_config() SMAPI call. @@ -233,23 +235,23 @@ def attach_from_config(self, sr_uuid, vdi_uuid): raise xs_errors.XenError('Unimplemented') def _do_snapshot(self, sr_uuid, vdi_uuid, snapType, - cloneOp=False, secondary=None, cbtlog=None): + cloneOp=False, secondary=None, cbtlog=None) -> str: raise xs_errors.XenError('Unimplemented') - def _delete_cbt_log(self): + def _delete_cbt_log(self) -> None: raise xs_errors.XenError('Unimplemented') - def _rename(self, old, new): + def _rename(self, old, new) -> None: raise xs_errors.XenError('Unimplemented') - def _cbt_log_exists(self, logpath): + def _cbt_log_exists(self, logpath) -> bool: """Check if CBT log file exists Must be implemented by all classes inheriting from base VDI class """ raise xs_errors.XenError('Unimplemented') - def resize(self, sr_uuid, vdi_uuid, size): + def resize(self, sr_uuid, vdi_uuid, size) -> str: """Resize the given VDI to size MB. Size can be any valid disk size greater than [or smaller than] the current value. @@ -295,7 +297,7 @@ def resize_cbt(self, sr_uuid, vdi_uuid, size): % vdi_uuid) self._disable_cbt_on_error(alert_name, alert_str) - def delete(self, sr_uuid, vdi_uuid, data_only=False): + def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: """Delete this VDI. This operation IS idempotent and should succeed if the VDI @@ -371,7 +373,7 @@ def delete(self, sr_uuid, vdi_uuid, data_only=False): lock.release() lock.cleanup("cbtlog", str(vdi_uuid)) - def snapshot(self, sr_uuid, vdi_uuid): + def snapshot(self, sr_uuid, vdi_uuid) -> str: """Save an immutable copy of the referenced VDI. This operation IS NOT idempotent and will fail if the UUID @@ -405,7 +407,7 @@ def snapshot(self, sr_uuid, vdi_uuid): return self._do_snapshot(sr_uuid, vdi_uuid, snapType, secondary=secondary, cbtlog=cbtlog) - def activate(self, sr_uuid, vdi_uuid): + def activate(self, sr_uuid, vdi_uuid) -> Optional[Dict[str, str]]: """Activate VDI - called pre tapdisk open""" if self._get_blocktracking_status(): if 'args' in self.sr.srcmd.params: @@ -443,7 +445,7 @@ def activate(self, sr_uuid, vdi_uuid): return {'cbtlog': logpath} return None - def deactivate(self, sr_uuid, vdi_uuid): + def deactivate(self, sr_uuid, vdi_uuid) -> None: """Deactivate VDI - called post tapdisk close""" if self._get_blocktracking_status(): from lock import Lock @@ -459,7 +461,7 @@ def deactivate(self, sr_uuid, vdi_uuid): finally: lock.release() - def get_params(self): + def get_params(self) -> str: """ Returns: XMLRPC response containing a single struct with fields @@ -469,7 +471,7 @@ def get_params(self): 'uuid': self.uuid} return xmlrpc.client.dumps((struct, ), "", True) - def load(self, vdi_uuid): + def load(self, vdi_uuid) -> None: """Post-init hook""" pass @@ -806,7 +808,7 @@ def _cbt_snapshot(self, snapshot_uuid, consistency_state): % self.uuid) self._disable_cbt_on_error(alert_name, alert_str) - def _get_blocktracking_status(self, uuid=None): + def _get_blocktracking_status(self, uuid=None) -> bool: """ Get blocktracking status """ if not uuid: uuid = self.uuid @@ -828,7 +830,7 @@ def _set_blocktracking_status(self, vdi_ref, enable): self.session.xenapi.VDI.add_to_other_config( vdi_ref, "cbt_enabled", enable) - def _ensure_cbt_space(self): + def _ensure_cbt_space(self) -> None: """ Ensure enough CBT space """ pass @@ -837,12 +839,12 @@ def _get_cbt_logname(self, uuid): logName = "%s.%s" % (uuid, CBTLOG_TAG) return logName - def _get_cbt_logpath(self, uuid): + def _get_cbt_logpath(self, uuid) -> str: """ Get CBT logpath """ logName = self._get_cbt_logname(uuid) return os.path.join(self.sr.path, logName) - def _create_cbt_log(self): + def _create_cbt_log(self) -> str: """ Create CBT log """ try: logpath = self._get_cbt_logpath(self.uuid) @@ -861,7 +863,7 @@ def _create_cbt_log(self): return logpath - def _activate_cbt_log(self, logname): + def _activate_cbt_log(self, logname) -> bool: """Activate CBT log file SR specific Implementation required for VDIs on block-based SRs. @@ -869,7 +871,7 @@ def _activate_cbt_log(self, logname): """ return False - def _deactivate_cbt_log(self, logname): + def _deactivate_cbt_log(self, logname) -> None: """Deactivate CBT log file SR specific Implementation required for VDIs on block-based SRs. diff --git a/drivers/XFSSR.py b/drivers/XFSSR.py index ad4aca742..5bd732674 100755 --- a/drivers/XFSSR.py +++ b/drivers/XFSSR.py @@ -18,9 +18,12 @@ # # XFSSR: Based on local-file storage repository, mounts xfs partition +from sm_typing import override + import SR from SR import deviceCheck import SRCommand +import VDI import FileSR import util import lvutil @@ -60,11 +63,13 @@ class XFSSR(FileSR.FileSR): DRIVER_TYPE = 'xfs' - def handles(srtype): + @override + @staticmethod + def handles(srtype) -> bool: return srtype == XFSSR.DRIVER_TYPE - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: if not self._is_xfs_available(): raise xs_errors.XenError( 'SRUnavailable', @@ -81,7 +86,8 @@ def load(self, sr_uuid): self.attached = self._checkmount() self.driver_config = DRIVER_CONFIG - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: super(XFSSR, self).delete(sr_uuid) # Check PVs match VG @@ -111,7 +117,8 @@ def delete(self, sr_uuid): raise xs_errors.XenError('LVMDelete', \ opterr='errno is %d' % inst.code) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: if not self._checkmount(): try: #Activate LV @@ -150,7 +157,8 @@ def attach(self, sr_uuid): for dev in self.dconf['device'].split(','): self.block_setscheduler(dev) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: super(XFSSR, self).detach(sr_uuid) try: # deactivate SR @@ -160,13 +168,15 @@ def detach(self, sr_uuid): raise xs_errors.XenError('LVMUnMount', \ opterr='lvm -an failed errno is %d' % inst.code) + @override @deviceCheck - def probe(self): + def probe(self) -> str: return lvutil.srlist_toxml(lvutil.scan_srlist(EXT_PREFIX, self.dconf['device']), EXT_PREFIX) + @override @deviceCheck - def create(self, sr_uuid, size): + def create(self, sr_uuid, size) -> None: if self._checkmount(): raise xs_errors.XenError('SRExists') @@ -224,7 +234,8 @@ def create(self, sr_uuid, size): scsiutil.add_serial_record(self.session, self.sr_ref, \ scsiutil.devlist_to_serialstring(self.dconf['device'].split(','))) - def vdi(self, uuid, loadLocked = False): + @override + def vdi(self, uuid, loadLocked = False) -> VDI.VDI: return XFSFileVDI(self, uuid) @staticmethod @@ -233,7 +244,8 @@ def _is_xfs_available(): class XFSFileVDI(FileSR.FileVDI): - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if not hasattr(self, 'xenstore_data'): self.xenstore_data = {} diff --git a/drivers/ZFSSR.py b/drivers/ZFSSR.py index cf5eb12d6..13a895a25 100644 --- a/drivers/ZFSSR.py +++ b/drivers/ZFSSR.py @@ -14,8 +14,11 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +from sm_typing import override + import SR import SRCommand +import VDI import FileSR @@ -71,11 +74,13 @@ def is_zfs_path(path): class ZFSSR(FileSR.FileSR): DRIVER_TYPE = 'zfs' + @override @staticmethod - def handles(type): + def handles(type) -> bool: return type == ZFSSR.DRIVER_TYPE - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: if not is_zfs_available(): raise xs_errors.XenError( 'SRUnavailable', @@ -83,7 +88,8 @@ def load(self, sr_uuid): ) return super(ZFSSR, self).load(sr_uuid) - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: if not is_zfs_path(self.remotepath): raise xs_errors.XenError( 'ZFSSRCreate', @@ -91,7 +97,8 @@ def create(self, sr_uuid, size): ) return super(ZFSSR, self).create(sr_uuid, size) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: if not self._checkmount(): raise xs_errors.XenError( 'ZFSSRDelete', @@ -99,28 +106,33 @@ def delete(self, sr_uuid): ) return super(ZFSSR, self).delete(sr_uuid) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: if not is_zfs_path(self.remotepath): raise xs_errors.XenError( 'SRUnavailable', opterr='Invalid ZFS path' ) - return super(ZFSSR, self).attach(sr_uuid) + super(ZFSSR, self).attach(sr_uuid) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: return super(ZFSSR, self).detach(sr_uuid) - def vdi(self, uuid, loadLocked=False): + @override + def vdi(self, uuid, loadLocked=False) -> VDI.VDI: return ZFSFileVDI(self, uuid) # Ensure _checkmount is overridden to prevent bad behaviors in FileSR. - def _checkmount(self): + @override + def _checkmount(self) -> bool: return super(ZFSSR, self)._checkmount() and \ is_zfs_path(self.remotepath) class ZFSFileVDI(FileSR.FileVDI): - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if not hasattr(self, 'xenstore_data'): self.xenstore_data = {} diff --git a/drivers/blktap2.py b/drivers/blktap2.py index 0daf3e977..05d12b7bd 100755 --- a/drivers/blktap2.py +++ b/drivers/blktap2.py @@ -18,7 +18,9 @@ # blktap2: blktap/tapdisk management layer # -from sm_typing import Any, Callable, ClassVar, Dict +from sm_typing import Any, Callable, ClassVar, Dict, override + +from abc import abstractmethod import grp import os @@ -152,7 +154,8 @@ def __init__(self, cmd, **info): self.cmd = cmd self.info = info - def __str__(self): + @override + def __str__(self) -> str: items = self.info.items() info = ", ".join("%s=%s" % item for item in items) @@ -448,7 +451,8 @@ class TapdiskExists(Exception): def __init__(self, tapdisk): self.tapdisk = tapdisk - def __str__(self): + @override + def __str__(self) -> str: return "%s already running" % self.tapdisk @@ -458,7 +462,8 @@ class TapdiskNotRunning(Exception): def __init__(self, **attrs): self.attrs = attrs - def __str__(self): + @override + def __str__(self) -> str: items = iter(self.attrs.items()) attrs = ", ".join("%s=%s" % attr for attr in items) @@ -471,7 +476,8 @@ class TapdiskNotUnique(Exception): def __init__(self, tapdisks): self.tapdisks = tapdisks - def __str__(self): + @override + def __str__(self) -> str: tapdisks = map(str, self.tapdisks) return "Found multiple tapdisks: %s" % tapdisks @@ -483,7 +489,8 @@ def __init__(self, arg, err): self.arg = arg self.err = err - def __str__(self): + @override + def __str__(self) -> str: return "Tapdisk(%s): %s" % (self.arg, self.err) def get_error(self): @@ -496,7 +503,8 @@ class TapdiskInvalidState(Exception): def __init__(self, tapdisk): self.tapdisk = tapdisk - def __str__(self): + @override + def __str__(self) -> str: return str(self.tapdisk) @@ -518,8 +526,9 @@ class KObject(object): SYSFS_CLASSTYPE: ClassVar[str] = "" - def sysfs_devname(self): - raise NotImplementedError("sysfs_devname is undefined") + @abstractmethod + def sysfs_devname(self) -> str: + pass class Attribute(object): @@ -538,7 +547,8 @@ class NoSuchAttribute(Exception): def __init__(self, name): self.name = name - def __str__(self): + @override + def __str__(self) -> str: return "No such attribute: %s" % self.name def _open(self, mode='r'): @@ -595,10 +605,12 @@ def allocate(cls): def free(self): TapCtl.free(self.minor) - def __str__(self): + @override + def __str__(self) -> str: return "%s(minor=%d)" % (self.__class__.__name__, self.minor) - def sysfs_devname(self): + @override + def sysfs_devname(self) -> str: return "blktap!blktap%d" % self.minor class Pool(Attribute): @@ -666,7 +678,8 @@ def __init__(self, pid, minor, _type, path, state): self._dirty = False self._blktap = None - def __str__(self): + @override + def __str__(self) -> str: state = self.pause_state() return "Tapdisk(%s, pid=%d, minor=%s, state=%s)" % \ (self.get_arg(), self.pid, self.minor, state) @@ -752,7 +765,8 @@ def __init__(self, _type, path): self.type = _type self.path = path - def __str__(self): + @override + def __str__(self) -> str: return "%s:%s" % (self.type, self.path) @classmethod @@ -772,14 +786,16 @@ class InvalidType(Exception): def __init__(self, _type): self.type = _type - def __str__(self): + @override + def __str__(self) -> str: return "Not a Tapdisk type: %s" % self.type class InvalidArgument(Exception): def __init__(self, arg): self.arg = arg - def __str__(self): + @override + def __str__(self) -> str: return "Not a Tapdisk image: %s" % self.arg def get_arg(self): @@ -921,7 +937,8 @@ def _refresh(self, __get): t = self.from_minor(__get('minor')) self.__init__(t.pid, t.minor, t.type, t.path, t.state) - def __getattribute__(self, name): + @override + def __getattribute__(self, name) -> Any: def __get(name): # NB. avoid(rec(ursion) return object.__getattribute__(self, name) @@ -1087,7 +1104,8 @@ def __init__(self, vdi_type, target): self.vdi_type = vdi_type self.target = target - def __str__(self): + @override + def __str__(self) -> str: return \ "Target %s has unexpected VDI type '%s'" % \ (type(self.target), self.vdi_type) @@ -1172,11 +1190,12 @@ class Link(object): BASEDIR: ClassVar[str] = "" - def _mklink(self, target): - raise NotImplementedError("_mklink is not defined") + def _mklink(self, target) -> None: + pass - def _equals(self, target): - raise NotImplementedError("_equals is not defined") + @abstractmethod + def _equals(self, target) -> bool: + pass def __init__(self, path): self._path = path @@ -1197,7 +1216,7 @@ def path(self): def stat(self): return os.stat(self.path()) - def mklink(self, target): + def mklink(self, target) -> None: path = self.path() util.SMlog("%s -> %s" % (self, target)) @@ -1220,7 +1239,8 @@ def unlink(self): if e.errno != errno.ENOENT: raise - def __str__(self): + @override + def __str__(self) -> str: path = self.path() return "%s(%s)" % (self.__class__.__name__, path) @@ -1233,10 +1253,12 @@ def readlink(self): def symlink(self): return self.path() - def _mklink(self, target): + @override + def _mklink(self, target) -> None: os.symlink(target, self.path()) - def _equals(self, target): + @override + def _equals(self, target) -> bool: return self.readlink() == target class DeviceNode(Link): @@ -1253,7 +1275,8 @@ def is_block(cls, target): """Whether @target refers to a block device.""" return S_ISBLK(cls._real_stat(target).st_mode) - def _mklink(self, target): + @override + def _mklink(self, target) -> None: st = self._real_stat(target) if not S_ISBLK(st.st_mode): @@ -1263,7 +1286,8 @@ def _mklink(self, target): os.mknod(self.path(), st.st_mode | stat.S_IRGRP, st.st_rdev) os.chown(self.path(), st.st_uid, grp.getgrnam("disk").gr_gid) - def _equals(self, target): + @override + def _equals(self, target) -> bool: target_rdev = self._real_stat(target).st_rdev return self.stat().st_rdev == target_rdev @@ -1278,7 +1302,8 @@ def __init__(self, path, st): self.path = path self.st = st - def __str__(self): + @override + def __str__(self) -> str: return "%s is not a block device: %s" % (self.path, self.st) class Hybrid(Link): @@ -1294,14 +1319,16 @@ def rdev(self): return self._devnode.rdev() raise self._devnode.NotABlockDevice(self.path(), st) - def mklink(self, target): + @override + def mklink(self, target) -> None: if self._devnode.is_block(target): self._obj = self._devnode else: self._obj = self._symlink self._obj.mklink(target) - def _equals(self, target): + @override + def _equals(self, target) -> bool: return self._obj._equals(target) class PhyLink(SymLink): @@ -2108,7 +2135,8 @@ def __init__(self, args): super().__init__(args) self.key = args[0] - def __str__(self): + @override + def __str__(self) -> str: return \ "Key '%s' missing in environment. " % self.key + \ "Not called in udev context?" @@ -2131,7 +2159,8 @@ def __init__(self, event, handler): self.event = event self.handler = handler - def __str__(self): + @override + def __str__(self) -> str: return "Uevent '%s' not handled by %s" % \ (self.event, self.handler.__class__.__name__) @@ -2147,7 +2176,8 @@ def run(self): return fn(self) - def __str__(self): + @override + def __str__(self) -> str: try: action = self.get_action() except: @@ -2162,7 +2192,8 @@ def __init__(self): ClassDevice.__init__(self) self._default_pool = None - def sysfs_devname(self): + @override + def sysfs_devname(self) -> str: return "blktap!control" class DefaultPool(Attribute): @@ -2189,7 +2220,8 @@ class NoSuchPool(Exception): def __init__(self, name): self.name = name - def __str__(self): + @override + def __str__(self) -> str: return "No such pool: {}".format(self.name) def get_pool(self, name): @@ -2209,6 +2241,10 @@ def __init__(self, path): self.path = path self._size = None + @override + def sysfs_devname(self) -> str: + return '' + def sysfs_path(self): return self.path @@ -2349,11 +2385,13 @@ def signal_hotplug(self, online=True): self._xs_rm_path(xapi_path) self._xs_rm_path(upstream_path) - def sysfs_devname(self): + @override + def sysfs_devname(self) -> str: return "%s-%d-%d" % (self.XENBUS_DEVTYPE, self.domid, self.devid) - def __str__(self): + @override + def __str__(self) -> str: return self.sysfs_devname() @classmethod @@ -2404,7 +2442,8 @@ def __init__(self, vbd, _str): self.vbd = vbd self.str = _str - def __str__(self): + @override + def __str__(self) -> str: return "Backend %s " % self.vbd + \ "has %s = %s" % (self.KEY, self.str) @@ -2437,10 +2476,12 @@ def makedev(self): def is_tap(self): return self.major == Tapdisk.major() - def __str__(self): + @override + def __str__(self) -> str: return "%s:%s" % (self.major, self.minor) - def __eq__(self, other): + @override + def __eq__(self, other) -> bool: return \ self.major == other.major and \ self.minor == other.minor @@ -2556,14 +2597,16 @@ def __init__(self, ident=None, action=None): UEventHandler.__init__(self) - def run(self): + @override + def run(self) -> None: self.xs_path = self.getenv('XENBUS_PATH') openlog(str(self), 0, self.LOG_FACILITY) UEventHandler.run(self) - def __str__(self): + @override + def __str__(self) -> str: try: path = self.xs_path diff --git a/drivers/cleanup.py b/drivers/cleanup.py index 7129a2082..6f75cf0f0 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -18,6 +18,8 @@ # Script to coalesce and garbage collect VHD-based SR's in the background # +from sm_typing import Optional, override + import os import os.path import sys @@ -124,7 +126,7 @@ class Util: PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} @staticmethod - def log(text): + def log(text) -> None: util.SMlog(text, ident="SMGC") @staticmethod @@ -569,11 +571,11 @@ def __init__(self, sr, uuid, raw): def extractUuid(path): raise NotImplementedError("Implement in sub class") - def load(self, info=None): + def load(self, info=None) -> None: """Load VDI info""" - pass # abstract + pass - def getDriverName(self): + def getDriverName(self) -> str: return self.DRIVER_NAME_VHD def getRef(self): @@ -605,7 +607,7 @@ def ensureUnpaused(self): Util.log("Unpausing VDI %s" % self) self.unpause() - def pause(self, failfast=False): + def pause(self, failfast=False) -> None: if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, self.uuid, failfast): raise util.SMException("Failed to pause VDI %s" % self) @@ -726,10 +728,10 @@ def getAllPrunable(self): vdiList.append(self) return vdiList - def getSizeVHD(self): + def getSizeVHD(self) -> int: return self._sizeVHD - def getAllocatedSize(self): + def getAllocatedSize(self) -> int: return self._sizeAllocated def getTreeRoot(self): @@ -762,12 +764,12 @@ def getAllLeaves(self): leaves.extend(child.getAllLeaves()) return leaves - def updateBlockInfo(self): + def updateBlockInfo(self) -> Optional[str]: val = base64.b64encode(self._queryVHDBlocks()).decode() self.setConfig(VDI.DB_VHD_BLOCKS, val) return val - def rename(self, uuid): + def rename(self, uuid) -> None: "Rename the VDI file" assert(not self.sr.vdis.get(uuid)) self._clearRef() @@ -778,19 +780,20 @@ def rename(self, uuid): del self.sr.vdis[oldUuid] self.sr.vdis[self.uuid] = self - def delete(self): + def delete(self) -> None: "Physically delete the VDI" lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) lock.Lock.cleanupAll(self.uuid) self._clear() - def getParent(self): + def getParent(self) -> str: return vhdutil.getParent(self.path, lambda x: x.strip()) - def repair(self, parent): + def repair(self, parent) -> None: vhdutil.repair(parent) - def __str__(self): + @override + def __str__(self) -> str: strHidden = "" if self.hidden: strHidden = "*" @@ -811,7 +814,7 @@ def __str__(self): return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, strSizeVHD, strSizeAllocated, strType) - def validate(self, fast=False): + def validate(self, fast=False) -> None: if not vhdutil.check(self.path, fast=fast): raise util.SMException("VHD %s corrupted" % self) @@ -825,7 +828,7 @@ def _clear(self): def _clearRef(self): self._vdiRef = None - def _doCoalesce(self): + def _doCoalesce(self) -> None: """Coalesce self onto parent. Only perform the actual coalescing of VHD, but not the subsequent relinking. We'll do that as the next step, after reloading the entire SR in case things have changed while we @@ -907,7 +910,7 @@ def _reportCoalesceError(vdi, ce): if xcmsg: xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) - def coalesce(self): + def coalesce(self) -> int: # size is returned in sectors return vhdutil.coalesce(self.path) * 512 @@ -968,7 +971,7 @@ def _coalesceVHD(self, timeOut): util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) - def _relinkSkip(self): + def _relinkSkip(self) -> None: """Relink children of this VDI to point to the parent of this VDI""" abortFlag = IPCFlag(self.sr.uuid) for child in self.children: @@ -1039,7 +1042,7 @@ def _loadInfoParent(self): if ret: self.parentUuid = ret - def _setParent(self, parent): + def _setParent(self, parent) -> None: vhdutil.setParent(self.path, parent.path, False) self.parent = parent self.parentUuid = parent.uuid @@ -1052,15 +1055,15 @@ def _setParent(self, parent): Util.log("Failed to update %s with vhd-parent field %s" % \ (self.uuid, self.parentUuid)) - def _loadInfoHidden(self): + def _loadInfoHidden(self) -> None: hidden = vhdutil.getHidden(self.path) self.hidden = (hidden != 0) - def _setHidden(self, hidden=True): + def _setHidden(self, hidden=True) -> None: vhdutil.setHidden(self.path, hidden) self.hidden = hidden - def _increaseSizeVirt(self, size, atomic=True): + def _increaseSizeVirt(self, size, atomic=True) -> None: """ensure the virtual size of 'self' is at least 'size'. Note that resizing a VHD must always be offline and atomically: the file must not be open by anyone and no concurrent operations may take place. @@ -1092,14 +1095,14 @@ def _increaseSizeVirt(self, size, atomic=True): self.sizeVirt = vhdutil.getSizeVirt(self.path) - def _setSizeVirt(self, size): + def _setSizeVirt(self, size) -> None: """WARNING: do not call this method directly unless all VDIs in the subtree are guaranteed to be unplugged (and remain so for the duration of the operation): this operation is only safe for offline VHDs""" jFile = os.path.join(self.sr.path, self.uuid) vhdutil.setSizeVirt(self.path, size, jFile) - def _queryVHDBlocks(self): + def _queryVHDBlocks(self) -> bytes: return vhdutil.getBlockBitmap(self.path) def _getCoalescedSizeData(self): @@ -1118,20 +1121,20 @@ def _getCoalescedSizeData(self): assert(sizeData <= self.sizeVirt) return sizeData - def _calcExtraSpaceForCoalescing(self): + def _calcExtraSpaceForCoalescing(self) -> int: sizeData = self._getCoalescedSizeData() sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \ vhdutil.calcOverheadEmpty(self.sizeVirt) Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) return sizeCoalesced - self.parent.getSizeVHD() - def _calcExtraSpaceForLeafCoalescing(self): + def _calcExtraSpaceForLeafCoalescing(self) -> int: """How much extra space in the SR will be required to [live-]leaf-coalesce this VDI""" # the space requirements are the same as for inline coalesce return self._calcExtraSpaceForCoalescing() - def _calcExtraSpaceForSnapshotCoalescing(self): + def _calcExtraSpaceForSnapshotCoalescing(self) -> int: """How much extra space in the SR will be required to snapshot-coalesce this VDI""" return self._calcExtraSpaceForCoalescing() + \ @@ -1166,7 +1169,8 @@ def __init__(self, sr, uuid, raw): else: self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) - def load(self, info=None): + @override + def load(self, info=None) -> None: if not info: if not util.pathexists(self.path): raise util.SMException("%s not found" % self.path) @@ -1186,7 +1190,8 @@ def load(self, info=None): self.path = os.path.join(self.sr.path, "%s%s" % \ (self.uuid, vhdutil.FILE_EXTN_VHD)) - def rename(self, uuid): + @override + def rename(self, uuid) -> None: oldPath = self.path VDI.rename(self, uuid) self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) @@ -1195,7 +1200,8 @@ def rename(self, uuid): Util.log("Renaming %s -> %s" % (oldPath, self.path)) os.rename(oldPath, self.path) - def delete(self): + @override + def delete(self) -> None: if len(self.children) > 0: raise util.SMException("VDI %s has children, can't delete" % \ self.uuid) @@ -1210,7 +1216,8 @@ def delete(self): raise util.SMException("os.unlink(%s) failed" % self.path) VDI.delete(self) - def getAllocatedSize(self): + @override + def getAllocatedSize(self) -> int: if self._sizeAllocated == -1: self._sizeAllocated = vhdutil.getAllocatedSize(self.path) return self._sizeAllocated @@ -1222,7 +1229,8 @@ class LVHDVDI(VDI): JRN_ZERO = "zero" # journal entry type for zeroing out end of parent DRIVER_NAME_RAW = "aio" - def load(self, info=None): + @override + def load(self, info=None) -> None: # `info` is always set. `None` default value is only here to match parent method. assert info, "No info given to LVHDVDI.load" self.parent = None @@ -1244,7 +1252,8 @@ def load(self, info=None): def extractUuid(path): return lvhdutil.extractUuid(path) - def getDriverName(self): + @override + def getDriverName(self) -> str: if self.raw: return self.DRIVER_NAME_RAW return self.DRIVER_NAME_VHD @@ -1291,11 +1300,14 @@ def inflateParentForCoalesce(self): util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) self.parent.inflate(self.parent.sizeLV + inc) - def updateBlockInfo(self): + @override + def updateBlockInfo(self) -> Optional[str]: if not self.raw: return VDI.updateBlockInfo(self) + return None - def rename(self, uuid): + @override + def rename(self, uuid) -> None: oldUuid = self.uuid oldLVName = self.fileName VDI.rename(self, uuid) @@ -1314,7 +1326,8 @@ def rename(self, uuid): RefCounter.set(self.uuid, cnt, bcnt, ns) RefCounter.reset(oldUuid, ns) - def delete(self): + @override + def delete(self) -> None: if len(self.children) > 0: raise util.SMException("VDI %s has children, can't delete" % \ self.uuid) @@ -1327,7 +1340,8 @@ def delete(self): RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) VDI.delete(self) - def getSizeVHD(self): + @override + def getSizeVHD(self) -> int: if self._sizeVHD == -1: self._loadInfoSizeVHD() return self._sizeVHD @@ -1345,7 +1359,8 @@ def _loadInfoSizeVHD(self): raise util.SMException("phys size of %s = %d" % \ (self, self._sizeVHD)) - def getAllocatedSize(self): + @override + def getAllocatedSize(self) -> int: if self._sizeAllocated == -1: self._loadInfoSizeAllocated() return self._sizeAllocated @@ -1359,20 +1374,23 @@ def _loadInfoSizeAllocated(self): self._activate() self._sizeAllocated = vhdutil.getAllocatedSize(self.path) - def _loadInfoHidden(self): + @override + def _loadInfoHidden(self) -> None: if self.raw: self.hidden = self.sr.lvmCache.getHidden(self.fileName) else: VDI._loadInfoHidden(self) - def _setHidden(self, hidden=True): + @override + def _setHidden(self, hidden=True) -> None: if self.raw: self.sr.lvmCache.setHidden(self.fileName, hidden) self.hidden = hidden else: VDI._setHidden(self, hidden) - def __str__(self): + @override + def __str__(self) -> str: strType = "VHD" if self.raw: strType = "RAW" @@ -1394,11 +1412,13 @@ def __str__(self): Util.num2str(self.sizeVirt), strSizeVHD, strSizeAllocated, Util.num2str(self.sizeLV), strActive) - def validate(self, fast=False): + @override + def validate(self, fast=False) -> None: if not self.raw: VDI.validate(self, fast) - def _doCoalesce(self): + @override + def _doCoalesce(self) -> None: """LVHD parents must first be activated, inflated, and made writable""" try: self._activateChain() @@ -1411,7 +1431,8 @@ def _doCoalesce(self): self.parent.deflate() self.sr.lvmCache.setReadonly(self.parent.fileName, True) - def _setParent(self, parent): + @override + def _setParent(self, parent) -> None: self._activate() if self.lvReadonly: self.sr.lvmCache.setReadonly(self.fileName, False) @@ -1445,7 +1466,8 @@ def _activateChain(self): def _deactivate(self): self.sr.lvActivator.deactivate(self.uuid, False) - def _increaseSizeVirt(self, size, atomic=True): + @override + def _increaseSizeVirt(self, size, atomic=True) -> None: "ensure the virtual size of 'self' is at least 'size'" self._activate() if not self.raw: @@ -1481,7 +1503,8 @@ def _increaseSizeVirt(self, size, atomic=True): VDI.POLL_INTERVAL, 0) self.sr.journaler.remove(self.JRN_ZERO, self.uuid) - def _setSizeVirt(self, size): + @override + def _setSizeVirt(self, size) -> None: """WARNING: do not call this method directly unless all VDIs in the subtree are guaranteed to be unplugged (and remain so for the duration of the operation): this operation is only safe for offline VHDs""" @@ -1494,25 +1517,29 @@ def _setSizeVirt(self, size): finally: lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid) - def _queryVHDBlocks(self): + @override + def _queryVHDBlocks(self) -> bytes: self._activate() return VDI._queryVHDBlocks(self) - def _calcExtraSpaceForCoalescing(self): + @override + def _calcExtraSpaceForCoalescing(self) -> int: if self.parent.raw: return 0 # raw parents are never deflated in the first place sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData()) Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) return sizeCoalesced - self.parent.sizeLV - def _calcExtraSpaceForLeafCoalescing(self): + @override + def _calcExtraSpaceForLeafCoalescing(self) -> int: """How much extra space in the SR will be required to [live-]leaf-coalesce this VDI""" # we can deflate the leaf to minimize the space requirements deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD()) return self._calcExtraSpaceForCoalescing() - deflateDiff - def _calcExtraSpaceForSnapshotCoalescing(self): + @override + def _calcExtraSpaceForSnapshotCoalescing(self) -> int: return self._calcExtraSpaceForCoalescing() + \ lvhdutil.calcSizeLV(self.getSizeVHD()) @@ -1522,7 +1549,8 @@ class LinstorVDI(VDI): VOLUME_LOCK_TIMEOUT = 30 - def load(self, info=None): + @override + def load(self, info=None) -> None: self.parentUuid = info.parentUuid self.scanError = True self.parent = None @@ -1549,7 +1577,8 @@ def load(self, info=None): self.scanError = False self.vdi_type = vhdutil.VDI_TYPE_VHD - def getSizeVHD(self, fetch=False): + @override + def getSizeVHD(self, fetch=False) -> int: if self._sizeVHD < 0 or fetch: self._sizeVHD = self.sr._vhdutil.get_size_phys(self.uuid) return self._sizeVHD @@ -1559,7 +1588,8 @@ def getDrbdSize(self, fetch=False): self.drbd_size = self.sr._vhdutil.get_drbd_size(self.uuid) return self.drbd_size - def getAllocatedSize(self): + @override + def getAllocatedSize(self) -> int: if self._sizeAllocated == -1: if not self.raw: self._sizeAllocated = self.sr._vhdutil.get_allocated_size(self.uuid) @@ -1599,14 +1629,16 @@ def inflateFully(self): if not self.raw: self.inflate(LinstorVhdUtil.compute_volume_size(self.sizeVirt, self.vdi_type)) - def rename(self, uuid): + @override + def rename(self, uuid) -> None: Util.log('Renaming {} -> {} (path={})'.format( self.uuid, uuid, self.path )) self.sr._linstor.update_volume_uuid(self.uuid, uuid) VDI.rename(self, uuid) - def delete(self): + @override + def delete(self) -> None: if len(self.children) > 0: raise util.SMException( 'VDI {} has children, can\'t delete'.format(self.uuid) @@ -1619,32 +1651,38 @@ def delete(self): self.sr.unlock() VDI.delete(self) - def validate(self, fast=False): + @override + def validate(self, fast=False) -> None: if not self.raw and not self.sr._vhdutil.check(self.uuid, fast=fast): raise util.SMException('VHD {} corrupted'.format(self)) - def pause(self, failfast=False): + @override + def pause(self, failfast=False) -> None: self.sr._linstor.ensure_volume_is_not_locked( self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT ) return super(LinstorVDI, self).pause(failfast) - def coalesce(self): + @override + def coalesce(self) -> int: # Note: We raise `SMException` here to skip the current coalesce in case of failure. # Using another exception we can't execute the next coalesce calls. return self.sr._vhdutil.force_coalesce(self.path) * 512 - def getParent(self): + @override + def getParent(self) -> str: return self.sr._vhdutil.get_parent( self.sr._linstor.get_volume_uuid_from_device_path(self.path) ) - def repair(self, parent_uuid): + @override + def repair(self, parent_uuid) -> None: self.sr._vhdutil.force_repair( self.sr._linstor.get_device_path(parent_uuid) ) - def _relinkSkip(self): + @override + def _relinkSkip(self) -> None: abortFlag = IPCFlag(self.sr.uuid) for child in self.children: if abortFlag.test(FLAG_TYPE_ABORT): @@ -1668,7 +1706,8 @@ def _relinkSkip(self): blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) self.children = [] - def _setParent(self, parent): + @override + def _setParent(self, parent) -> None: self.sr._linstor.get_device_path(self.uuid) self.sr._vhdutil.force_parent(self.path, parent.path) self.parent = parent @@ -1682,7 +1721,8 @@ def _setParent(self, parent): Util.log("Failed to update %s with vhd-parent field %s" % \ (self.uuid, self.parentUuid)) - def _doCoalesce(self): + @override + def _doCoalesce(self) -> None: try: self._activateChain() self.parent.validate() @@ -1702,7 +1742,8 @@ def _activateChain(self): raise util.SMException(str(e)) vdi = vdi.parent - def _setHidden(self, hidden=True): + @override + def _setHidden(self, hidden=True) -> None: HIDDEN_TAG = 'hidden' if self.raw: @@ -1713,7 +1754,8 @@ def _setHidden(self, hidden=True): else: VDI._setHidden(self, hidden) - def _setSizeVirt(self, size): + @override + def _setSizeVirt(self, size) -> None: jfile = self.uuid + '-jvhd' self.sr._linstor.create_volume( jfile, vhdutil.MAX_VHD_JOURNAL_SIZE, persistent=False, volume_name=jfile @@ -1728,7 +1770,8 @@ def _setSizeVirt(self, size): # We can ignore it, in any case this volume is not persistent. pass - def _queryVHDBlocks(self): + @override + def _queryVHDBlocks(self) -> bytes: return self.sr._vhdutil.get_block_bitmap(self.uuid) def _inflateParentForCoalesce(self): @@ -1738,7 +1781,8 @@ def _inflateParentForCoalesce(self): if inc > 0: self.parent.inflate(self.parent.getDrbdSize() + inc) - def _calcExtraSpaceForCoalescing(self): + @override + def _calcExtraSpaceForCoalescing(self) -> int: if self.parent.raw: return 0 size_coalesced = LinstorVhdUtil.compute_volume_size( @@ -1747,14 +1791,16 @@ def _calcExtraSpaceForCoalescing(self): Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) return size_coalesced - self.parent.getDrbdSize() - def _calcExtraSpaceForLeafCoalescing(self): + @override + def _calcExtraSpaceForLeafCoalescing(self) -> int: assert self.getDrbdSize() > 0 assert self.getSizeVHD() > 0 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) assert deflate_diff >= 0 return self._calcExtraSpaceForCoalescing() - deflate_diff - def _calcExtraSpaceForSnapshotCoalescing(self): + @override + def _calcExtraSpaceForSnapshotCoalescing(self) -> int: assert self.getSizeVHD() > 0 return self._calcExtraSpaceForCoalescing() + \ LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) @@ -1935,10 +1981,10 @@ def gcEnabled(self, refresh=True): return False return True - def scan(self, force=False): + def scan(self, force=False) -> None: """Scan the SR and load VDI info for each VDI. If called repeatedly, update VDI objects if they already exist""" - pass # abstract + pass def scanLocked(self, force=False): self.lock() @@ -2148,14 +2194,14 @@ def findGarbage(self): vdiList.extend(vdi.getAllPrunable()) return vdiList - def deleteVDIs(self, vdiList): + def deleteVDIs(self, vdiList) -> None: for vdi in vdiList: if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): raise AbortException("Aborting due to signal") Util.log("Deleting unlinked VDI %s" % vdi) self.deleteVDI(vdi) - def deleteVDI(self, vdi): + def deleteVDI(self, vdi) -> None: assert(len(vdi.children) == 0) del self.vdis[vdi.uuid] if vdi.parent: @@ -2164,10 +2210,10 @@ def deleteVDI(self, vdi): self.vdiTrees.remove(vdi) vdi.delete() - def forgetVDI(self, vdiUuid): + def forgetVDI(self, vdiUuid) -> None: self.xapi.forgetVDI(self.uuid, vdiUuid) - def pauseVDIs(self, vdiList): + def pauseVDIs(self, vdiList) -> None: paused = [] failed = False for vdi in vdiList: @@ -2194,14 +2240,15 @@ def unpauseVDIs(self, vdiList): if failed: raise util.SMException("Failed to unpause VDIs") - def getFreeSpace(self): + def getFreeSpace(self) -> int: return 0 def cleanup(self): Util.log("In cleanup") return - def __str__(self): + @override + def __str__(self) -> str: if self.name: ret = "%s ('%s')" % (self.uuid[0:4], self.name) else: @@ -2236,7 +2283,7 @@ def unlock(self): if self._locked == 0: self._srLock.release() - def needUpdateBlockInfo(self): + def needUpdateBlockInfo(self) -> bool: for vdi in self.vdis.values(): if vdi.scanError or len(vdi.children) == 0: continue @@ -2244,7 +2291,7 @@ def needUpdateBlockInfo(self): return True return False - def updateBlockInfo(self): + def updateBlockInfo(self) -> None: for vdi in self.vdis.values(): if vdi.scanError or len(vdi.children) == 0: continue @@ -2273,7 +2320,7 @@ def cleanupJournals(self, dryRun=False): if not dryRun: self.journaler.remove(t, uuid) - def cleanupCache(self, maxAge=-1): + def cleanupCache(self, maxAge=-1) -> int: return 0 def _coalesce(self, vdi): @@ -2531,7 +2578,7 @@ def _snapshotCoalesce(self, vdi): return False return True - def _liveLeafCoalesce(self, vdi): + def _liveLeafCoalesce(self, vdi) -> bool: util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) self.lock() try: @@ -2632,44 +2679,43 @@ def _doCoalesceLeaf(self, vdi): self._finishCoalesceLeaf(parent) self._updateSlavesOnResize(parent) - def _calcExtraSpaceNeeded(self, child, parent): + def _calcExtraSpaceNeeded(self, child, parent) -> int: assert(not parent.raw) # raw parents not supported extra = child.getSizeVHD() - parent.getSizeVHD() if extra < 0: extra = 0 return extra - def _prepareCoalesceLeaf(self, vdi): + def _prepareCoalesceLeaf(self, vdi) -> None: pass - def _updateNode(self, vdi): + def _updateNode(self, vdi) -> None: pass - def _finishCoalesceLeaf(self, parent): + def _finishCoalesceLeaf(self, parent) -> None: pass - def _updateSlavesOnUndoLeafCoalesce(self, parent, child): + def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: pass - def _updateSlavesOnRename(self, vdi, oldName, origParentUuid): + def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: pass - def _updateSlavesOnResize(self, vdi): + def _updateSlavesOnResize(self, vdi) -> None: pass - def _removeStaleVDIs(self, uuidsPresent): + def _removeStaleVDIs(self, uuidsPresent) -> None: for uuid in list(self.vdis.keys()): if not uuid in uuidsPresent: Util.log("VDI %s disappeared since last scan" % \ self.vdis[uuid]) del self.vdis[uuid] - def _handleInterruptedCoalesceLeaf(self): + def _handleInterruptedCoalesceLeaf(self) -> None: """An interrupted leaf-coalesce operation may leave the VHD tree in an inconsistent state. If the old-leaf VDI is still present, we revert the operation (in case the original error is persistent); otherwise we must finish the operation""" - # abstract pass def _buildTree(self, force): @@ -2708,7 +2754,8 @@ def __init__(self, uuid, xapi, createLock, force): self.path = "/var/run/sr-mount/%s" % self.uuid self.journaler = fjournaler.Journaler(self.path) - def scan(self, force=False): + @override + def scan(self, force=False) -> None: if not util.pathexists(self.path): raise util.SMException("directory %s not found!" % self.uuid) vhds = self._scan(force) @@ -2734,10 +2781,12 @@ def scan(self, force=False): self.logFilter.logState() self._handleInterruptedCoalesceLeaf() - def getFreeSpace(self): + @override + def getFreeSpace(self) -> int: return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) - def deleteVDIs(self, vdiList): + @override + def deleteVDIs(self, vdiList) -> None: rootDeleted = False for vdi in vdiList: if not vdi.parent: @@ -2747,7 +2796,8 @@ def deleteVDIs(self, vdiList): if self.xapi.srRecord["type"] == "nfs" and rootDeleted: self.xapi.markCacheSRsDirty() - def cleanupCache(self, maxAge=-1): + @override + def cleanupCache(self, maxAge=-1) -> int: """Clean up IntelliCache cache files. Caches for leaf nodes are removed when the leaf node no longer exists or its allow-caching attribute is not set. Caches for parent nodes are removed when the @@ -2836,7 +2886,8 @@ def _scan(self, force): return vhds raise util.SMException("Scan error") - def deleteVDI(self, vdi): + @override + def deleteVDI(self, vdi) -> None: self._checkSlaves(vdi) SR.deleteVDI(self, vdi) @@ -2861,7 +2912,8 @@ def _checkSlave(self, hostRef, vdi): _host = self.xapi.session.xenapi.host text = _host.call_plugin( * call) - def _handleInterruptedCoalesceLeaf(self): + @override + def _handleInterruptedCoalesceLeaf(self) -> None: entries = self.journaler.getAll(VDI.JRN_LEAF) for uuid, parentUuid in entries.items(): fileList = os.listdir(self.path) @@ -2945,26 +2997,31 @@ def __init__(self, uuid, xapi, createLock, force): self.lvActivator = LVActivator(self.uuid, self.lvmCache) self.journaler = journaler.Journaler(self.lvmCache) - def deleteVDI(self, vdi): + @override + def deleteVDI(self, vdi) -> None: if self.lvActivator.get(vdi.uuid, False): self.lvActivator.deactivate(vdi.uuid, False) self._checkSlaves(vdi) SR.deleteVDI(self, vdi) - def forgetVDI(self, vdiUuid): + @override + def forgetVDI(self, vdiUuid) -> None: SR.forgetVDI(self, vdiUuid) mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) - def getFreeSpace(self): + @override + def getFreeSpace(self) -> int: stats = lvutil._getVGstats(self.vgName) return stats['physical_size'] - stats['physical_utilisation'] + @override def cleanup(self): if not self.lvActivator.deactivateAll(): Util.log("ERROR deactivating LVs while cleaning up") - def needUpdateBlockInfo(self): + @override + def needUpdateBlockInfo(self) -> bool: for vdi in self.vdis.values(): if vdi.scanError or vdi.raw or len(vdi.children) == 0: continue @@ -2972,7 +3029,8 @@ def needUpdateBlockInfo(self): return True return False - def updateBlockInfo(self): + @override + def updateBlockInfo(self) -> None: numUpdated = 0 for vdi in self.vdis.values(): if vdi.scanError or vdi.raw or len(vdi.children) == 0: @@ -2987,7 +3045,8 @@ def updateBlockInfo(self): # inherit the refcount value and preventing the correct decrement self.cleanup() - def scan(self, force=False): + @override + def scan(self, force=False) -> None: vdis = self._scan(force) for uuid, vdiInfo in vdis.items(): vdi = self.getVDI(uuid) @@ -3018,7 +3077,8 @@ def _scan(self, force): return vdis raise util.SMException("Scan error") - def _removeStaleVDIs(self, uuidsPresent): + @override + def _removeStaleVDIs(self, uuidsPresent) -> None: for uuid in list(self.vdis.keys()): if not uuid in uuidsPresent: Util.log("VDI %s disappeared since last scan" % \ @@ -3027,7 +3087,8 @@ def _removeStaleVDIs(self, uuidsPresent): if self.lvActivator.get(uuid, False): self.lvActivator.remove(uuid, False) - def _liveLeafCoalesce(self, vdi): + @override + def _liveLeafCoalesce(self, vdi) -> bool: """If the parent is raw and the child was resized (virt. size), then we'll need to resize the parent, which can take a while due to zeroing out of the extended portion of the LV. Do it before pausing the child @@ -3038,13 +3099,15 @@ def _liveLeafCoalesce(self, vdi): return SR._liveLeafCoalesce(self, vdi) - def _prepareCoalesceLeaf(self, vdi): + @override + def _prepareCoalesceLeaf(self, vdi) -> None: vdi._activateChain() self.lvmCache.setReadonly(vdi.parent.fileName, False) vdi.deflate() vdi.inflateParentForCoalesce() - def _updateNode(self, vdi): + @override + def _updateNode(self, vdi) -> None: # fix the refcounts: the remaining node should inherit the binary # refcount from the leaf (because if it was online, it should remain # refcounted as such), but the normal refcount from the parent (because @@ -3058,16 +3121,19 @@ def _updateNode(self, vdi): assert(pCnt >= 0) RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) - def _finishCoalesceLeaf(self, parent): + @override + def _finishCoalesceLeaf(self, parent) -> None: if not parent.isSnapshot() or parent.isAttachedRW(): parent.inflateFully() else: parent.deflate() - def _calcExtraSpaceNeeded(self, child, parent): + @override + def _calcExtraSpaceNeeded(self, child, parent) -> int: return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV - def _handleInterruptedCoalesceLeaf(self): + @override + def _handleInterruptedCoalesceLeaf(self) -> None: entries = self.journaler.getAll(VDI.JRN_LEAF) for uuid, parentUuid in entries.items(): childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid @@ -3178,7 +3244,8 @@ def _checkSlaves(self, vdi): if hostRef in onlineHosts: raise - def _updateSlavesOnUndoLeafCoalesce(self, parent, child): + @override + def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) if not slaves: Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ @@ -3204,7 +3271,8 @@ def _updateSlavesOnUndoLeafCoalesce(self, parent, child): slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) Util.log("call-plugin returned: '%s'" % text) - def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid): + @override + def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) if not slaves: Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) @@ -3226,7 +3294,8 @@ def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid): slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) Util.log("call-plugin returned: '%s'" % text) - def _updateSlavesOnResize(self, vdi): + @override + def _updateSlavesOnResize(self, vdi) -> None: uuids = [x.uuid for x in vdi.getAllLeaves()] slaves = util.get_slaves_attached_on(self.xapi.session, uuids) if not slaves: @@ -3249,14 +3318,17 @@ def __init__(self, uuid, xapi, createLock, force): self.path = LinstorVolumeManager.DEV_ROOT_PATH self._reloadLinstor() - def deleteVDI(self, vdi): + @override + def deleteVDI(self, vdi) -> None: self._checkSlaves(vdi) SR.deleteVDI(self, vdi) - def getFreeSpace(self): + @override + def getFreeSpace(self) -> int: return self._linstor.max_volume_size_allowed - def scan(self, force=False): + @override + def scan(self, force=False) -> None: all_vdi_info = self._scan(force) for uuid, vdiInfo in all_vdi_info.items(): # When vdiInfo is None, the VDI is RAW. @@ -3272,7 +3344,8 @@ def scan(self, force=False): self.logFilter.logState() self._handleInterruptedCoalesceLeaf() - def pauseVDIs(self, vdiList): + @override + def pauseVDIs(self, vdiList) -> None: self._linstor.ensure_volume_list_is_not_locked( vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT ) @@ -3389,18 +3462,21 @@ def _load_vdi_info(self): return all_vdi_info - def _prepareCoalesceLeaf(self, vdi): + @override + def _prepareCoalesceLeaf(self, vdi) -> None: vdi._activateChain() vdi.deflate() vdi._inflateParentForCoalesce() - def _finishCoalesceLeaf(self, parent): + @override + def _finishCoalesceLeaf(self, parent) -> None: if not parent.isSnapshot() or parent.isAttachedRW(): parent.inflateFully() else: parent.deflate() - def _calcExtraSpaceNeeded(self, child, parent): + @override + def _calcExtraSpaceNeeded(self, child, parent) -> int: return LinstorVhdUtil.compute_volume_size(parent.sizeVirt, parent.vdi_type) - parent.getDrbdSize() def _hasValidDevicePath(self, uuid): @@ -3411,7 +3487,8 @@ def _hasValidDevicePath(self, uuid): return False return True - def _liveLeafCoalesce(self, vdi): + @override + def _liveLeafCoalesce(self, vdi) -> bool: self.lock() try: self._linstor.ensure_volume_is_not_locked( @@ -3421,7 +3498,8 @@ def _liveLeafCoalesce(self, vdi): finally: self.unlock() - def _handleInterruptedCoalesceLeaf(self): + @override + def _handleInterruptedCoalesceLeaf(self) -> None: entries = self.journaler.get_all(VDI.JRN_LEAF) for uuid, parentUuid in entries.items(): if self._hasValidDevicePath(parentUuid) or \ diff --git a/drivers/flock.py b/drivers/flock.py index 2d295ec4e..a853da27d 100644 --- a/drivers/flock.py +++ b/drivers/flock.py @@ -23,7 +23,7 @@ got to grow our own. """ -from sm_typing import ClassVar +from sm_typing import ClassVar, override import os import fcntl @@ -62,7 +62,8 @@ def __getattr__(self, name): idx = self.FIELDS[name] return self.fields[idx] - def __setattr__(self, name, value): + @override + def __setattr__(self, name, value) -> None: idx = self.FIELDS.get(name) if idx is None: self.__dict__[name] = value diff --git a/drivers/lcache.py b/drivers/lcache.py index f1e25c368..1b7c78f2d 100755 --- a/drivers/lcache.py +++ b/drivers/lcache.py @@ -15,6 +15,8 @@ # along with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +from sm_typing import override + import os import blktap2 import glob @@ -71,7 +73,8 @@ def __init__(self, tapdisk, stats): self.tapdisk = tapdisk self.stats = stats - def __str__(self): + @override + def __str__(self) -> str: return \ "Tapdisk %s in state '%s' not found caching." % \ (self.tapdisk, self.stats) @@ -116,7 +119,8 @@ def vdi_stats_total(self): return rd_hits, rd_miss, wr_rdir - def __str__(self): + @override + def __str__(self) -> str: return "%s(%s, minor=%s)" % \ (self.__class__.__name__, self.tapdisk.path, self.tapdisk.minor) @@ -144,7 +148,8 @@ def vdi_stats(self): return rd_hits, rd_miss, wr_rdir - def __str__(self): + @override + def __str__(self) -> str: return "%s(%s, minor=%s)" % \ (self.__class__.__name__, self.tapdisk.path, self.tapdisk.minor) @@ -166,7 +171,8 @@ class NotAMountPoint(Exception): def __init__(self, path): self.path = path - def __str__(self): + @override + def __str__(self) -> str: return "Not a mount point: %s" % self.path @classmethod diff --git a/drivers/linstorvhdutil.py b/drivers/linstorvhdutil.py index 6ad4787dc..c33c24c06 100644 --- a/drivers/linstorvhdutil.py +++ b/drivers/linstorvhdutil.py @@ -14,6 +14,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +from sm_typing import override + from linstorjournaler import LinstorJournaler from linstorvolumemanager import LinstorVolumeManager import base64 @@ -60,7 +62,8 @@ class LinstorCallException(util.SMException): def __init__(self, cmd_err): self.cmd_err = cmd_err - def __str__(self): + @override + def __str__(self) -> str: return str(self.cmd_err) diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py index 553272545..5b7da10ac 100755 --- a/drivers/linstorvolumemanager.py +++ b/drivers/linstorvolumemanager.py @@ -15,6 +15,7 @@ # along with this program. If not, see . # +from sm_typing import override import errno import json @@ -375,7 +376,8 @@ def __init__(self, name): self.virtual_size = 0 self.diskful = [] - def __repr__(self): + @override + def __repr__(self) -> str: return 'VolumeInfo("{}", {}, {}, {})'.format( self.name, self.allocated_size, self.virtual_size, self.diskful diff --git a/drivers/lock.py b/drivers/lock.py index 2e6e2c9c0..6792d7b04 100755 --- a/drivers/lock.py +++ b/drivers/lock.py @@ -194,7 +194,7 @@ def _open(self): fd = self.lockfile.fileno() self.lock = flock.WriteLock(fd) - def _open_lockfile(self): + def _open_lockfile(self) -> None: """Provide a seam, so extreme situations could be tested""" util.SMlog("lock: opening lock file %s" % self.lockpath) self.lockfile = open(self.lockpath, "w+") diff --git a/drivers/mpath_cli.py b/drivers/mpath_cli.py index 4c7ce54a1..357e84a0a 100755 --- a/drivers/mpath_cli.py +++ b/drivers/mpath_cli.py @@ -15,6 +15,8 @@ # # Talk to the multipathd cli +from sm_typing import override + import util import re import time @@ -25,7 +27,8 @@ class MPathCLIFail(Exception): def __init__(self): return - def __str__(self): + @override + def __str__(self) -> str: return "MPath CLI failed" mpathcmd = ["/usr/sbin/multipathd", "-k"] diff --git a/drivers/nfs-on-slave b/drivers/nfs-on-slave index 3e2ee8d32..718377247 100644 --- a/drivers/nfs-on-slave +++ b/drivers/nfs-on-slave @@ -18,6 +18,9 @@ # A plugin for synchronizing slaves when something changes on the Master import sys; sys.path.append("/opt/xensource/sm/") + +from sm_typing import override + import util import os, glob, errno @@ -31,7 +34,8 @@ class NfsCheckException(Exception): except: self.exe = None - def __str__(self): + @override + def __str__(self) -> str: return "File %s in use by pid %d (%s), fd %d" % \ (self.path, self.pid, self.exe, self.fd) diff --git a/drivers/srmetadata.py b/drivers/srmetadata.py index c80fb6d7d..34aa5f16b 100755 --- a/drivers/srmetadata.py +++ b/drivers/srmetadata.py @@ -16,7 +16,9 @@ # Functions to read and write SR metadata # -from sm_typing import ClassVar +from sm_typing import ClassVar, override + +from abc import abstractmethod from io import SEEK_SET @@ -275,8 +277,9 @@ def __del__(self): def vdi_info_size(self): return self.VDI_INFO_SIZE_IN_SECTORS * SECTOR_SIZE - def spaceAvailableForVdis(self, count): - raise NotImplementedError("spaceAvailableForVdis is undefined") + @abstractmethod + def spaceAvailableForVdis(self, count) -> None: + pass # common utility functions def getMetadata(self, params={}): @@ -660,10 +663,10 @@ def getMetadataToWrite(self, sr_info, vdi_info, lower, upper, update_map, \ raise # specific functions, to be implement by the child classes - def getVdiInfo(self, Dict, generateSector=0): + def getVdiInfo(self, Dict, generateSector=0) -> bytes: return b"" - def getSRInfoForSectors(self, sr_info, range): + def getSRInfoForSectors(self, sr_info, range) -> bytes: return b"" @@ -676,7 +679,8 @@ def __init__(self, path=None, write=True): lvutil.ensurePathExists(path) MetadataHandler.__init__(self, path, write) - def spaceAvailableForVdis(self, count): + @override + def spaceAvailableForVdis(self, count) -> None: created = False try: # The easiest way to do this, is to create a dummy vdi and write it @@ -707,7 +711,8 @@ def spaceAvailableForVdis(self, count): # it also takes in a parameter to determine whether both the sector # or only one sector needs to be generated, and which one # generateSector - can be 1 or 2, defaults to 0 and generates both sectors - def getVdiInfo(self, Dict, generateSector=0): + @override + def getVdiInfo(self, Dict, generateSector=0) -> bytes: util.SMlog("Entering VDI info") try: vdi_info = b"" @@ -763,7 +768,8 @@ def getVdiInfo(self, Dict, generateSector=0): (Dict, str(e))) raise - def getSRInfoForSectors(self, sr_info, range): + @override + def getSRInfoForSectors(self, sr_info, range) -> bytes: srinfo = b"" try: diff --git a/drivers/udevSR.py b/drivers/udevSR.py index 4862ef8b9..a84427645 100755 --- a/drivers/udevSR.py +++ b/drivers/udevSR.py @@ -18,6 +18,8 @@ # udevSR: represents VDIs which are hotplugged into dom0 via udev e.g. # USB CDROM/disk devices +from sm_typing import override + import SR import VDI import SRCommand @@ -50,16 +52,19 @@ class udevSR(SR.SR): """udev-driven storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: if type == TYPE: return True return False - handles = staticmethod(handles) - def content_type(self, sr_uuid): + @override + def content_type(self, sr_uuid) -> str: return super(udevSR, self).content_type(sr_uuid) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: util.SMlog("params = %s" % (self.srcmd.params.keys())) if 'vdi_location' in self.srcmd.params: @@ -74,7 +79,8 @@ def get_vdi_location(self, uuid): vdi_ref = vdi.get_by_uuid(uuid) return vdi.get_location(vdi_ref) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: # First of all, check we've got the correct keys in dconf if 'location' not in self.dconf: raise xs_errors.XenError('ConfigLocationMissing') @@ -82,7 +88,8 @@ def load(self, sr_uuid): # Cache the sm_config self.sm_config = self.session.xenapi.SR.get_sm_config(self.sr_ref) - def update(self, sr_uuid): + @override + def update(self, sr_uuid) -> None: # Return as much information as we have sr_root = self.dconf['location'] @@ -102,22 +109,27 @@ def update(self, sr_uuid): self._db_update() - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: self.update(sr_uuid) # base class scan does all the work: - return super(udevSR, self).scan(sr_uuid) + super(udevSR, self).scan(sr_uuid) - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: pass - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: pass - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: pass - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: pass @@ -134,7 +146,8 @@ def __init__(self, sr, location): self.location = location VDI.VDI.__init__(self, sr, None) - def load(self, location): + @override + def load(self, location) -> None: self.path = self.location self.size = 0 self.utilisation = 0 @@ -149,7 +162,7 @@ def load(self, location): self.sm_config['hotplugged_at'] = iso8601 self.path = os.path.realpath(self.path) - + dev = os.path.basename(self.path) info = sysdevice.stat(dev) if "size" in info.keys(): @@ -176,7 +189,8 @@ def load(self, location): except OSError as e: self.deleted = True - def introduce(self, sr_uuid, vdi_uuid): + @override + def introduce(self, sr_uuid, vdi_uuid) -> str: self.uuid = vdi_uuid self.location = self.sr.srcmd.params['vdi_location'] self._db_introduce() @@ -184,7 +198,8 @@ def introduce(self, sr_uuid, vdi_uuid): self.sr.update(sr_uuid) return super(udevVDI, self).get_params() - def update(self, sr_uuid, vdi_location): + @override + def update(self, sr_uuid, vdi_location) -> None: self.load(vdi_location) # _db_update requires self.uuid to be set self.uuid = self.sr.srcmd.params['vdi_uuid'] @@ -198,13 +213,15 @@ def update(self, sr_uuid, vdi_location): #self.sr.session.xenapi.VDI.set_name_label(vdi, self.label) #self.sr.session.xenapi.VDI.set_name_description(vdi, self.description) - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if self.deleted: raise xs_errors.XenError('VDIUnavailable') return super(udevVDI, self).attach(sr_uuid, vdi_uuid) - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: pass if __name__ == '__main__': diff --git a/misc/fairlock/fairlock.py b/misc/fairlock/fairlock.py index af85a797a..7e5206ba2 100644 --- a/misc/fairlock/fairlock.py +++ b/misc/fairlock/fairlock.py @@ -1,4 +1,4 @@ -from sm_typing import Any, Callable, Dict, Optional +from sm_typing import Any, Callable, Dict, Optional, override import os import socket @@ -15,10 +15,11 @@ class SingletonWithArgs(type): def __init__(cls, name, bases, dct): cls._init[cls] = dct.get('__init__', None) - def __call__(cls, *args, **kwargs): + @override + def __call__(cls, *args, **kwargs) -> Any: init = cls._init[cls] if init is not None: - key = (cls, frozenset( + key: Any = (cls, frozenset( inspect.getcallargs(init, None, *args, **kwargs).items())) else: key = cls diff --git a/sm_typing/__init__.py b/sm_typing/__init__.py index c515056cf..2042dea65 100644 --- a/sm_typing/__init__.py +++ b/sm_typing/__init__.py @@ -1,2 +1,14 @@ import typing from typing import * + +if not hasattr(typing, 'override'): + def override(method): # type: ignore + try: + # Set internal attr `__override__` like described in PEP 698. + method.__override__ = True + except (AttributeError, TypeError): + pass + return method + +if not hasattr(typing, 'Never'): + Never = None # type: ignore diff --git a/stubs/XenAPIPlugin.pyi b/stubs/XenAPIPlugin.pyi index ca5f52caf..72bad09fb 100644 --- a/stubs/XenAPIPlugin.pyi +++ b/stubs/XenAPIPlugin.pyi @@ -1,5 +1,5 @@ class Failure(Exception): def __init__(self, code, params) -> None: ... - def __str__(self) -> str: ... + def __str__(self) -> str: ... # type: ignore def dispatch(fn_table) -> None: ... diff --git a/tests/lvmlib.py b/tests/lvmlib.py index c57b32e05..6ec8d2c71 100644 --- a/tests/lvmlib.py +++ b/tests/lvmlib.py @@ -1,3 +1,5 @@ +from sm_typing import Never, override + import argparse import sys @@ -7,10 +9,12 @@ class TestArgParse(argparse.ArgumentParser): to stderr during the tests """ - def exit(self, status=0, msg=None): + @override + def exit(self, status=0, msg=None) -> Never: sys.exit(status) - def error(self, msg): + @override + def error(self, msg) -> Never: """error(msg : string)""" self.exit(2, "%s: error: %s\n" % (self.prog, msg)) diff --git a/tests/shared_iscsi_test_base.py b/tests/shared_iscsi_test_base.py index a65555470..b224d30dd 100644 --- a/tests/shared_iscsi_test_base.py +++ b/tests/shared_iscsi_test_base.py @@ -1,3 +1,5 @@ +from sm_typing import Dict, List, Tuple, override + import unittest from unittest import mock @@ -6,8 +8,11 @@ class ISCSITestCase(unittest.TestCase): + # Declared in subclasses. + TEST_CLASS: str - def setUp(self): + @override + def setUp(self) -> None: iscsilib_patcher = mock.patch(f'{self.TEST_CLASS}.iscsilib', autospec=True) self.mock_iscsilib = iscsilib_patcher.start() @@ -15,8 +20,8 @@ def setUp(self): self.mock_iscsilib._checkTGT.side_effect = self._checkTGT self.mock_iscsilib.login.side_effect = self.iscsi_login self.mock_iscsilib.parse_IP_port = iscsilib.parse_IP_port - self.discovery_data = {} - self.sessions = [] + self.discovery_data: Dict[str, Tuple[str, int, str]] = {} + self.sessions: List[str] = [] sleep_patcher = mock.patch(f'{self.TEST_CLASS}.time.sleep', autospec=True) diff --git a/tests/test_BaseISCSI.py b/tests/test_BaseISCSI.py index cc489a46a..7c4ed83fa 100644 --- a/tests/test_BaseISCSI.py +++ b/tests/test_BaseISCSI.py @@ -2,6 +2,8 @@ Unit tests for the Base ISCSI SR """ +from sm_typing import override + from unittest import mock from uuid import uuid4 @@ -15,7 +17,8 @@ class TestBaseISCSI(ISCSITestCase): TEST_CLASS = 'BaseISCSI' - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) util_patcher = mock.patch('BaseISCSI.util', autospec=True) diff --git a/tests/test_FileSR.py b/tests/test_FileSR.py index a82d0e5be..109f6d306 100644 --- a/tests/test_FileSR.py +++ b/tests/test_FileSR.py @@ -1,3 +1,5 @@ +from sm_typing import Any, Optional, Set, override + import errno import os import stat @@ -18,7 +20,8 @@ class FakeFileVDI(FileSR.FileVDI): - def load(self, uuid): + @override + def load(self, uuid) -> None: self.vdi_type = vhdutil.VDI_TYPE_VHD self.hidden = False self.path = os.path.join(self.sr.path, '%s.%s' % ( @@ -27,7 +30,8 @@ def load(self, uuid): class TestFileVDI(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: startlog_patcher = mock.patch('FileSR.util.start_log_entry', autospec=True) self.mock_startlog = startlog_patcher.start() @@ -50,7 +54,7 @@ def setUp(self): fist_patcher = mock.patch('FileSR.util.FistPoint.is_active', autospec=True) self.mock_fist = fist_patcher.start() - self.active_fists = set() + self.active_fists: Set[Any] = set() def active_fists(): return self.active_fists @@ -419,15 +423,19 @@ class FakeSharedFileSR(FileSR.SharedFileSR): """ Test SR class for SharedFileSR """ - def load(self, sr_uuid): + + @override + def load(self, sr_uuid) -> None: self.path = os.path.join(SR.MOUNT_BASE, sr_uuid) - self.lock = None + self.lock = None # type: ignore - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: self._check_writable() self._check_hardlinks() - def _read_hardlink_conf(self): + @override + def _read_hardlink_conf(self) -> Optional[bool]: return None class TestShareFileSR(unittest.TestCase): @@ -437,7 +445,8 @@ class TestShareFileSR(unittest.TestCase): TEST_SR_REF = "test_sr_ref" ERROR_524 = "Unknown error 524" - def setUp(self): + @override + def setUp(self) -> None: util_patcher = mock.patch('FileSR.util', autospec=True) self.mock_util = util_patcher.start() @@ -561,7 +570,8 @@ def test_scan_load_vdis_scan_list_differ(self): self.assertEqual(1, len(test_sr.vdis)) class TestFileSR(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: pread_patcher = mock.patch('FileSR.util.pread') self.mock_pread = pread_patcher.start() diff --git a/tests/test_ISCSISR.py b/tests/test_ISCSISR.py index e71ac2684..9af3d4bc2 100644 --- a/tests/test_ISCSISR.py +++ b/tests/test_ISCSISR.py @@ -1,3 +1,5 @@ +from sm_typing import override + import unittest import BaseISCSI import unittest.mock as mock @@ -8,17 +10,20 @@ class TestBase(unittest.TestCase): """ Provides errorcodes.xml, so exceptions are sensible """ - def setUp(self): + @override + def setUp(self) -> None: self._xmldefs = xs_errors.XML_DEFS xs_errors.XML_DEFS = os.path.join( os.path.dirname(__file__), 'XE_SR_ERRORCODES.xml') - def tearDown(self): + @override + def tearDown(self) -> None: xs_errors.XML_DEFS = self._xmldefs class NonLoadingISCSISR(BaseISCSI.BaseISCSISR): - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: pass @@ -82,10 +87,12 @@ def __init__(self, node1, node2): extra_adapter: None } - def _synchroniseAddrList(self, *args, **kwargs): + @override + def _synchroniseAddrList(self, *args, **kwargs) -> None: pass - def _init_adapters(self): + @override + def _init_adapters(self) -> None: pass @@ -115,8 +122,8 @@ def test_vdi_type_modified_by_force_tapdisk(self): class TestMultiLUNISCSISR(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: self.node1 = { 'ip': '127.0.0.1', 'port': 3260, diff --git a/tests/test_LVHDSR.py b/tests/test_LVHDSR.py index 600f1379f..3c4572fd2 100644 --- a/tests/test_LVHDSR.py +++ b/tests/test_LVHDSR.py @@ -1,3 +1,5 @@ +from sm_typing import override + import copy import os import unittest @@ -36,11 +38,12 @@ def remove_stubs(self): class TestLVHDSR(unittest.TestCase, Stubs): - - def setUp(self): + @override + def setUp(self) -> None: self.init_stubs() - def tearDown(self): + @override + def tearDown(self) -> None: self.remove_stubs() def create_LVHDSR(self, master=False, command='foo', sr_uuid=None): @@ -294,8 +297,8 @@ def convert_vdi_to_meta(self, vdi_data): class TestLVHDVDI(unittest.TestCase, Stubs): - - def setUp(self): + @override + def setUp(self) -> None: self.init_stubs() lvhdutil_patcher = mock.patch('LVHDSR.lvhdutil', autospec=True) @@ -332,7 +335,8 @@ def setUp(self): self.addCleanup(mock.patch.stopall) - def tearDown(self): + @override + def tearDown(self) -> None: self.remove_stubs() def create_LVHDSR(self): diff --git a/tests/test_LVHDoHBASR.py b/tests/test_LVHDoHBASR.py index 3bc7196b6..da88e4614 100644 --- a/tests/test_LVHDoHBASR.py +++ b/tests/test_LVHDoHBASR.py @@ -1,3 +1,5 @@ +from sm_typing import override + import unittest.mock as mock import LVHDoHBASR import unittest @@ -72,8 +74,8 @@ def test_generate_config_bad_path_assert(self, class TestLVHDoHBASR(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: self.host_ref = str(uuid4()) self.session_ref = str(uuid4()) self.sr_ref = str(uuid4()) diff --git a/tests/test_LVHDoISCSISR.py b/tests/test_LVHDoISCSISR.py index 3b5e1c420..ec71f20d7 100644 --- a/tests/test_LVHDoISCSISR.py +++ b/tests/test_LVHDoISCSISR.py @@ -1,3 +1,5 @@ +from sm_typing import Set, override + import os import unittest import unittest.mock as mock @@ -62,7 +64,8 @@ class TestLVHDoISCSISR_load(unittest.TestCase): Tests for 'LVHDoISCSISR.load()' """ - def setUp(self): + @override + def setUp(self) -> None: patchers = [ mock.patch( 'BaseISCSI.BaseISCSISR', @@ -124,14 +127,15 @@ class TestLVHDoISCSISR(ISCSITestCase): TEST_CLASS = 'LVHDoISCSISR' - def setUp(self): + @override + def setUp(self) -> None: util_patcher = mock.patch('LVHDoISCSISR.util', autospec=True) self.mock_util = util_patcher.start() # self.mock_util.SMlog.side_effect = print self.mock_util.isVDICommand = util.isVDICommand self.mock_util.sessions_less_than_targets = util.sessions_less_than_targets - self.base_srs = set() + self.base_srs: Set[BaseISCSISR] = set() baseiscsi_patcher = mock.patch('LVHDoISCSISR.BaseISCSI.BaseISCSISR', autospec=True) patched_baseiscsi = baseiscsi_patcher.start() diff --git a/tests/test_SMBSR.py b/tests/test_SMBSR.py index 4cfd2733b..42ce035e2 100644 --- a/tests/test_SMBSR.py +++ b/tests/test_SMBSR.py @@ -1,3 +1,5 @@ +from sm_typing import Dict, override + import unittest import unittest.mock as mock import uuid @@ -25,14 +27,14 @@ def __init__(self, srcmd, none): class Test_SMBSR(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) pread_patcher = mock.patch('SMBSR.util.pread', autospec=True) self.mock_pread = pread_patcher.start() self.mock_pread.side_effect = self.pread - self.pread_results = {} + self.pread_results: Dict[str, str] = {} listdir_patcher = mock.patch('SMBSR.util.listdir', autospec=True) self.mock_list_dir = listdir_patcher.start() diff --git a/tests/test_SR.py b/tests/test_SR.py index b139f4c5f..4e0de361a 100644 --- a/tests/test_SR.py +++ b/tests/test_SR.py @@ -1,3 +1,5 @@ +from sm_typing import override + import unittest import unittest.mock as mock import SR @@ -18,10 +20,12 @@ def __init__(self, device=None): def verify(self): pass - def setUp(self): + @override + def setUp(self) -> None: pass - def tearDown(self): + @override + def tearDown(self) -> None: pass def create_SR(self, cmd, dconf, cmd_params=None): diff --git a/tests/test_blktap2.py b/tests/test_blktap2.py index f1f0c9208..fd33899d5 100644 --- a/tests/test_blktap2.py +++ b/tests/test_blktap2.py @@ -1,3 +1,5 @@ +from sm_typing import override + import errno import json from io import StringIO @@ -24,7 +26,8 @@ class TestTapdisk(unittest.TestCase): # hence no usual decorator mocks and the monkey patching. # https://bugs.python.org/issue23078 # - def setUp(self): + @override + def setUp(self) -> None: subprocess_patcher = mock.patch("blktap2.subprocess") self.mock_subprocess = subprocess_patcher.start() @@ -111,7 +114,8 @@ def no_medium(pid, minor, type, path, options): class TestVDI(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) lock_patcher = mock.patch('blktap2.Lock', autospec=True) @@ -409,8 +413,8 @@ def test_activate_rw_already_activating_fail( class TestTapCtl(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: subprocess_patcher = mock.patch("blktap2.subprocess") self.mock_subprocess = subprocess_patcher.start() diff --git a/tests/test_cbt.py b/tests/test_cbt.py index 9093f2162..18bdc8a0c 100644 --- a/tests/test_cbt.py +++ b/tests/test_cbt.py @@ -1,3 +1,5 @@ +from sm_typing import override + import unittest.mock as mock import SR import testlib @@ -15,7 +17,8 @@ class TestVDI(VDI.VDI): - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: self.vdi_type = vhdutil.VDI_TYPE_VHD self._state_mock = mock.Mock() self.path = "/mock/sr_path/" + str(vdi_uuid) @@ -25,46 +28,56 @@ def load(self, vdi_uuid): def state_mock(self): return self._state_mock - def _get_blocktracking_status(self, uuid=None): + @override + def _get_blocktracking_status(self, uuid=None) -> bool: return self.block_tracking_state - def _ensure_cbt_space(self): + @override + def _ensure_cbt_space(self) -> None: super(TestVDI, self)._ensure_cbt_space() self.state_mock._ensure_cbt_space() - def _get_cbt_logpath(self, uuid): + @override + def _get_cbt_logpath(self, uuid) -> str: super(TestVDI, self)._get_cbt_logpath(uuid) self.state_mock._get_cbt_logpath(uuid) return "/mock/sr_path/{0}.log".format(uuid) - def _create_cbt_log(self): + @override + def _create_cbt_log(self) -> str: logpath = super(TestVDI, self)._create_cbt_log() self.state_mock._create_cbt_log() self.block_tracking_state = True return logpath - def _delete_cbt_log(self): + @override + def _delete_cbt_log(self) -> None: self.state_mock._delete_cbt_log() self.block_tracking_state = False - def _rename(self, from_path, to_path): + @override + def _rename(self, from_path, to_path) -> None: self.state_mock._rename(from_path, to_path) + @override def _do_snapshot(self, sr_uuid, vdi_uuid, snapType, - cloneOp=False, secondary=None, cbtlog=None): - self.state_mock._do_snapshot(sr_uuid, vdi_uuid, snapType, cloneOp, - secondary, cbtlog) + cloneOp=False, secondary=None, cbtlog=None) -> str: + return self.state_mock._do_snapshot( + sr_uuid, vdi_uuid, snapType, cloneOp, secondary, cbtlog + ) - def _activate_cbt_log(self, logname): - self.state_mock._activate_cbt_log(logname) + @override + def _activate_cbt_log(self, logname) -> bool: + return self.state_mock._activate_cbt_log(logname) - def _deactivate_cbt_log(self, logname): + @override + def _deactivate_cbt_log(self, logname) -> None: self.state_mock._deactivate_cbt_log(logname) class TestCBT(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: self.sr = mock.MagicMock() self.vdi_uuid = uuid.uuid4() self.sr_uuid = uuid.uuid4() diff --git a/tests/test_cleanup.py b/tests/test_cleanup.py index bbd04b15c..2cd947139 100644 --- a/tests/test_cleanup.py +++ b/tests/test_cleanup.py @@ -1,4 +1,4 @@ -from sm_typing import List +from sm_typing import Dict, List, override import errno import signal @@ -59,7 +59,8 @@ def create_cleanup_sr(xapi, uuid=None): class TestSR(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: time_sleep_patcher = mock.patch('cleanup.time.sleep') self.mock_time_sleep = time_sleep_patcher.start() @@ -81,7 +82,8 @@ def setUp(self): self.addCleanup(mock.patch.stopall) - def tearDown(self): + @override + def tearDown(self) -> None: cleanup.SIGTERM = False def setup_abort_flag(self, ipc_mock, should_abort=False): @@ -1885,14 +1887,14 @@ def test_not_plugged_retry(self): class TestLockGCActive(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) self.lock_patcher = mock.patch('cleanup.lock.Lock') patched_lock = self.lock_patcher.start() patched_lock.side_effect = self.create_lock - self.locks = {} + self.locks: Dict[str, TestLockGCActive.DummyLock] = {} self.sr_uuid = str(uuid4()) diff --git a/tests/test_fairlock.py b/tests/test_fairlock.py index 3078c0d73..e5a4e298d 100644 --- a/tests/test_fairlock.py +++ b/tests/test_fairlock.py @@ -1,3 +1,5 @@ +from sm_typing import override + import unittest import unittest.mock as mock @@ -5,7 +7,8 @@ from fairlock import Fairlock, FairlockServiceTimeout, FairlockDeadlock class TestFairlock(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: sock_patcher = mock.patch('fairlock.socket', autospec=True) self.mock_socket = sock_patcher.start() os_patcher = mock.patch('fairlock.os', autospec=True) diff --git a/tests/test_fjournaler.py b/tests/test_fjournaler.py index 115aaee9b..678358752 100644 --- a/tests/test_fjournaler.py +++ b/tests/test_fjournaler.py @@ -1,3 +1,5 @@ +from sm_typing import Any, Dict, override + from io import BytesIO as StringIO import os import unittest @@ -32,9 +34,9 @@ def readline(self): class TestFjournaler(unittest.TestCase): - - def setUp(self): - self.files = {} + @override + def setUp(self) -> None: + self.files: Dict[str, Any] = {} self.open_handlers = {TEST_DIR_PATH: self.__fake_open} self.subject = fjournaler.Journaler(TEST_DIR_PATH) diff --git a/tests/test_keymanagerutil.py b/tests/test_keymanagerutil.py index d97bb172b..70edbe5db 100644 --- a/tests/test_keymanagerutil.py +++ b/tests/test_keymanagerutil.py @@ -1,6 +1,9 @@ """ Test the "fake" keymanager for testing VHD encryption """ + +from sm_typing import Dict, override + import base64 import copy import io @@ -15,8 +18,8 @@ class TestKeymanagerutil(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) log_patcher = mock.patch('plugins.keymanagerutil.util.SMlog', autospec=True) @@ -27,7 +30,7 @@ def setUp(self): self.mock_open = open_patcher.start() self.mock_open.side_effect = self.open - self.files = {} + self.files: Dict[str, io.StringIO] = {} isfile_patcher = mock.patch( 'plugins.keymanagerutil.os.path.isfile', autospec=True) diff --git a/tests/test_lock.py b/tests/test_lock.py index f90d3bb6b..bee447a04 100644 --- a/tests/test_lock.py +++ b/tests/test_lock.py @@ -1,3 +1,5 @@ +from sm_typing import override + import fcntl import unittest import unittest.mock as mock @@ -12,8 +14,8 @@ class TestLock(unittest.TestCase): - - def tearDown(self): + @override + def tearDown(self) -> None: lock.Lock.INSTANCES = {} lock.Lock.BASE_INSTANCES = {} @@ -197,7 +199,8 @@ def create_lock_class_that_fails_to_create_file(number_of_failures): class LockThatFailsToCreateFile(lock.LockImplementation): _failures = number_of_failures - def _open_lockfile(self): + @override + def _open_lockfile(self) -> None: if self._failures > 0: error = IOError('No such file') error.errno = errno.ENOENT diff --git a/tests/test_lock_queue.py b/tests/test_lock_queue.py index edd733dcb..e9622cff2 100644 --- a/tests/test_lock_queue.py +++ b/tests/test_lock_queue.py @@ -1,3 +1,5 @@ +from sm_typing import override + import builtins import copy import os @@ -22,7 +24,8 @@ def mock_pickle_load_fn(*args): class Test_LockQueue(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: # Re-initialize queue to empty for each test global saved_queue saved_queue = [] diff --git a/tests/test_lvutil.py b/tests/test_lvutil.py index 2df8300b8..439c1fbee 100644 --- a/tests/test_lvutil.py +++ b/tests/test_lvutil.py @@ -1,3 +1,5 @@ +from sm_typing import override + import unittest.mock as mock import os import syslog @@ -25,7 +27,8 @@ def decorated(self, context, *args, **kwargs): class TestCreate(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: lock_patcher = mock.patch('lvutil.Fairlock', autospec=True) self.addCleanup(lock_patcher.stop) self.mock_lock = lock_patcher.start() @@ -98,7 +101,8 @@ def test_create_percentage_has_precedence_over_size(self, mock_pread): class TestRemove(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: lock_patcher = mock.patch('lvutil.Fairlock', autospec=True) self.addCleanup(lock_patcher.stop) self.mock_lock = lock_patcher.start() @@ -123,8 +127,8 @@ def test_remove_additional_config_param(self, mock_pread, _bugCleanup): class TestDeactivate(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: lock_patcher = mock.patch('lvutil.Fairlock', autospec=True) pathexists_patcher = mock.patch('lvutil.util.pathexists', autospec=True) lexists_patcher = mock.patch('lvutil.os.path.lexists', autospec=True) @@ -208,7 +212,8 @@ def test_deactivate_noref_withbugcleanup_retry_fail( class TestActivate(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) lock_patcher = mock.patch('lvutil.Fairlock', autospec=True) diff --git a/tests/test_mpath_dmp.py b/tests/test_mpath_dmp.py index 134b40e0c..312b55dad 100644 --- a/tests/test_mpath_dmp.py +++ b/tests/test_mpath_dmp.py @@ -1,6 +1,9 @@ """ Unit tests for mpath dmp """ + +from sm_typing import override + import errno import os import unittest @@ -22,7 +25,8 @@ class TestMpathDmp(unittest.TestCase): Unit tests for mpath dmp """ - def setUp(self): + @override + def setUp(self) -> None: time_patcher = mock.patch('mpath_dmp.time', autospec=True) self.mock_time = time_patcher.start() diff --git a/tests/test_on_slave.py b/tests/test_on_slave.py index 4b59f6323..b55e24aae 100644 --- a/tests/test_on_slave.py +++ b/tests/test_on_slave.py @@ -1,3 +1,5 @@ +from sm_typing import override + import errno import unittest import unittest.mock as mock @@ -27,7 +29,8 @@ def fake_import(self, *args, **kwargs): print('Asked to import {}'.format(args[0])) return self.mocks.get(args[0], self.real_import(*args, **kwargs)) - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) self.mocks = {x: mock.MagicMock() for x in self.MOCK_IMPORTS} @@ -132,7 +135,8 @@ class Test_on_slave_refresh_lun(unittest.TestCase): Tests for refresh_lun_size_by_SCSIid """ - def setUp(self): + @override + def setUp(self) -> None: self.mock_session = mock.MagicMock() @mock.patch('on_slave.scsiutil') @@ -164,7 +168,8 @@ class Test_on_slave_multi(unittest.TestCase): TMP_RENAME_PREFIX = "TEST_OLD_" - def setUp(self): + @override + def setUp(self) -> None: self.session = mock.MagicMock() lvmcache_patcher = mock.patch('on_slave.LVMCache', autospec=True) diff --git a/tests/test_sr_health_check.py b/tests/test_sr_health_check.py index 47c592123..bc879f96e 100644 --- a/tests/test_sr_health_check.py +++ b/tests/test_sr_health_check.py @@ -1,3 +1,5 @@ +from sm_typing import override + import unittest import unittest.mock as mock @@ -10,8 +12,8 @@ class TestSrHealthCheck(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: util_patcher = mock.patch('sr_health_check.util') self.mock_util = util_patcher.start() self.mock_session = mock.MagicMock() diff --git a/tests/test_srmetadata.py b/tests/test_srmetadata.py index 720f12ff4..fbce62444 100644 --- a/tests/test_srmetadata.py +++ b/tests/test_srmetadata.py @@ -1,3 +1,5 @@ +from sm_typing import Generator, override + import io import random import string @@ -453,14 +455,17 @@ def __init__(self): super().__init__() self._metadata_file_content = b'\x00' * 4 * 1024 * 1024 - def start(self): + @override + def start(self) -> None: super().start() self.patch("util.gen_uuid", new=genuuid) - def generate_device_paths(self): + @override + def generate_device_paths(self) -> Generator[str, None, None]: yield self.METADATA_PATH - def fake_open(self, fname, mode='r'): + @override + def fake_open(self, fname, mode='r') -> io.TextIOBase: if fname != self.METADATA_PATH: # pragma: no cover return super().fake_open(fname, mode) else: diff --git a/tests/test_storage_init.py b/tests/test_storage_init.py index d91d90895..38e465708 100644 --- a/tests/test_storage_init.py +++ b/tests/test_storage_init.py @@ -1,3 +1,5 @@ +from sm_typing import DefaultDict, Dict, List, override + import json import os import re @@ -31,7 +33,8 @@ class TestStorageInit(unittest.TestCase): storage after installation. """ - def setUp(self): + @override + def setUp(self) -> None: self.test_dir = tempfile.TemporaryDirectory() # There are tweaks we need to make the to storage-init: @@ -103,11 +106,12 @@ def setUp(self): self.copy_command("touch") self.script_exited = False - self.created_srs = defaultdict(list) - self.misc_xe_calls = [] - self.unanticipated_xe_calls = [] + self.created_srs: DefaultDict[str, List[Dict[str, str]]] = defaultdict(list) + self.misc_xe_calls: List[List[str]] = [] + self.unanticipated_xe_calls: List[List[str]] = [] - def tearDown(self): + @override + def tearDown(self) -> None: self.socket.close() self.test_dir.cleanup() diff --git a/tests/test_util.py b/tests/test_util.py index 965eec150..26912f1f6 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,3 +1,5 @@ +from sm_typing import Any, Dict, List, Set, override + import copy import errno import io @@ -43,7 +45,8 @@ class TestUtil(unittest.TestCase): Tests for the util module methods """ - def setUp(self): + @override + def setUp(self) -> None: # OS Patchers statvfs_patcher = mock.patch("util.os.statvfs", autospec=True) self.mock_statvfs = statvfs_patcher.start() @@ -53,7 +56,7 @@ def setUp(self): self.mock_mkdir = mkdir_patcher.start() unlink_patcher = mock.patch('util.os.unlink', autospec=True) self.mock_unlink = unlink_patcher.start() - self.dir_contents = {} + self.dir_contents: Dict[str, List[str]] = {} listdir_patcher = mock.patch('util.os.listdir', autospec=True) self.mock_listdir = listdir_patcher.start() self.mock_listdir.side_effect = self.list_dir @@ -77,12 +80,12 @@ def setUp(self): self.mock_session = mock.MagicMock() self.mock_xenapi.xapi_local.return_value = self.mock_session - self.processes = {} + self.processes: Dict[str, Any] = {} popen_patcher = mock.patch('util.subprocess.Popen', autospec=True) self.mock_popen = popen_patcher.start() self.mock_popen.side_effect = self.popen - self.mock_files = {} + self.mock_files: Dict[str, Any] = {} self.addCleanup(mock.patch.stopall) @@ -714,7 +717,8 @@ def test_unictrunc(self): class TestFistPoints(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) sleep_patcher = mock.patch('util.time.sleep', autospec=True) self.mock_sleep = sleep_patcher.start() @@ -725,7 +729,7 @@ def setUp(self): exists_patcher = mock.patch('util.os.path.exists', autospec=True) self.mock_exists = exists_patcher.start() self.mock_exists.side_effect = self.exists - self.existing_files = set() + self.existing_files: Set[str] = set() xenapi_patcher = mock.patch('util.XenAPI', autospec=True) patched_xenapi = xenapi_patcher.start() diff --git a/tests/testlib.py b/tests/testlib.py index 167bb172f..066188f4c 100644 --- a/tests/testlib.py +++ b/tests/testlib.py @@ -1,3 +1,5 @@ +from sm_typing import Any, Generator, override + import re import unittest.mock as mock import os @@ -49,12 +51,13 @@ def add_disk(self): def add_parameter(self, host_class, values): self.parameters.append((host_class, values)) - def adapter_device_paths(self, host_id): + def adapter_device_paths(self, host_id) -> Generator[str, None, None]: yield '/sys/class/scsi_host/host%s' % host_id class AdapterWithNonBlockDevice(SCSIAdapter): - def adapter_device_paths(self, host_id): + @override + def adapter_device_paths(self, host_id) -> Generator[str, None, None]: for adapter_device_path in super(AdapterWithNonBlockDevice, self).adapter_device_paths(host_id): yield adapter_device_path @@ -115,7 +118,7 @@ def patch(self, *args, **kwargs): self.patchers.append(patcher) patcher.start() - def start(self): + def start(self) -> None: self.patch('builtins.open', new=self.fake_open) self.patch('fcntl.fcntl', new=self.fake_fcntl) self.patch('os.path.exists', new=self.fake_exists) @@ -190,7 +193,7 @@ def fake_uname(self): 'x86_64' ) - def fake_open(self, fname, mode='r'): + def fake_open(self, fname, mode='r') -> Any: if fname == '/etc/xensource-inventory': return io.StringIO(self.generate_inventory_contents()) @@ -264,7 +267,7 @@ def generate_path_content(self): for path, value in self._path_content.items(): yield (path, value) - def generate_device_paths(self): + def generate_device_paths(self) -> Generator[str, None, None]: actual_disk_letter = 'a' for host_id, adapter in enumerate(self.scsi_adapters): for adapter_device_path in adapter.adapter_device_paths(host_id): From d05657df30d7c0e686eaed9561b1d61587d1b50f Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Thu, 7 Nov 2024 15:50:22 +0100 Subject: [PATCH 61/72] Makefile fix: don't execute precheck during installation Due to mypy modifications, we can't build the sm RPM in Koji without a recent pylint version. So the precheck target is only executed in a github workflow now. Signed-off-by: Ronan Abhamon --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 293e5faa2..3748226cf 100755 --- a/Makefile +++ b/Makefile @@ -127,7 +127,7 @@ precheck: build echo "Precheck succeeded with no outstanding issues found." .PHONY: install -install: precheck +install: build mkdir -p $(SM_STAGING) $(call mkdir_clean,$(SM_STAGING)) mkdir -p $(SM_STAGING)$(SM_DEST) From 9105e02b2aae442da106dffb038c784e6028d9df Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Wed, 8 Jan 2025 11:12:13 +0100 Subject: [PATCH 62/72] Fix LVHDSR.load: set other_conf in cond-branch to prevent mypy error Avoid: ``` drivers/LVHDSR.py:195: error: Item "None" of "Any | None" has no attribute "get" [union-attr] drivers/LVHDSR.py:196: error: Value of type "Any | None" is not indexable [index] ``` Signed-off-by: Ronan Abhamon --- drivers/LVHDSR.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/LVHDSR.py b/drivers/LVHDSR.py index 01c9d1d7a..852cad3e5 100755 --- a/drivers/LVHDSR.py +++ b/drivers/LVHDSR.py @@ -166,10 +166,11 @@ def load(self, sr_uuid) -> None: self.mdpath = os.path.join(self.path, self.MDVOLUME_NAME) self.provision = self.PROVISIONING_DEFAULT - self.other_conf = None has_sr_ref = self.srcmd.params.get("sr_ref") if has_sr_ref: self.other_conf = self.session.xenapi.SR.get_other_config(self.sr_ref) + else: + self.other_conf = None self.lvm_conf = None if self.other_conf: From f0f3e4eb3060c8864f8c53e0ab47b4d159326ac4 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Thu, 9 Jan 2025 17:41:02 +0100 Subject: [PATCH 63/72] fix(cleanup.py): protect LinstorSR init against race condition (#79) During `LinstorSR` init, only create the journaler to make `should_preempt` happy. The volume manager MUST always be created in a SR lock context. Otherwise, we can trigger major issues. For example, a volume can be deleted from the KV-store by `cleanup.py` during a snapshot rollback. Very rare situation but which allowed this problem to be discovered. Signed-off-by: Ronan Abhamon --- drivers/cleanup.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cleanup.py b/drivers/cleanup.py index 6f75cf0f0..e6e2956f5 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -3316,7 +3316,7 @@ def __init__(self, uuid, xapi, createLock, force): SR.__init__(self, uuid, xapi, createLock, force) self.path = LinstorVolumeManager.DEV_ROOT_PATH - self._reloadLinstor() + self._reloadLinstor(journaler_only=True) @override def deleteVDI(self, vdi) -> None: @@ -3351,7 +3351,7 @@ def pauseVDIs(self, vdiList) -> None: ) return super(LinstorSR, self).pauseVDIs(vdiList) - def _reloadLinstor(self): + def _reloadLinstor(self, journaler_only=False): session = self.xapi.session host_ref = util.get_this_host_ref(session) sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) @@ -3368,6 +3368,9 @@ def _reloadLinstor(self): controller_uri, group_name, logger=util.SMlog ) + if journaler_only: + return + self._linstor = LinstorVolumeManager( controller_uri, group_name, From 1deaa0e8c8dbf6f3898e2383934408d3cb828bfc Mon Sep 17 00:00:00 2001 From: Damien Thenot Date: Thu, 9 Jan 2025 17:41:32 +0100 Subject: [PATCH 64/72] Fix filter to reject other device types (#77) Signed-off-by: Damien Thenot --- drivers/LargeBlockSR.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/LargeBlockSR.py b/drivers/LargeBlockSR.py index fa66cfce8..449b2cf1a 100644 --- a/drivers/LargeBlockSR.py +++ b/drivers/LargeBlockSR.py @@ -224,7 +224,7 @@ def _redo_vg_connection(self): util.SMlog("Reconnecting VG {} to use emulated device".format(self.vgname)) try: lvutil.setActiveVG(self.vgname, False) - lvutil.setActiveVG(self.vgname, True, config="devices{ global_filter = [ \"r|^/dev/nvme.*|\", \"a|/dev/loop.*|\" ] }") + lvutil.setActiveVG(self.vgname, True, config="devices{ global_filter = [ \"a|/dev/loop.*|\", \"r|.*|\" ] }") except util.CommandException as e: xs_errors.XenError("LargeBlockVGReconnectFailed", opterr="Failed to reconnect the VolumeGroup {}, error: {}".format(self.vgname, e)) From c1a9bab023ed5a3b3719c122eeb9df8c869f5878 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Tue, 18 Feb 2025 00:53:36 +0100 Subject: [PATCH 65/72] fix(cleanup.py): resize on a primary host (#82) Until now the cleanup VHD resize commands were performed on the master. But it doesn't work every time when a VHD of a chain is opened for reading on another host. As a reminder, this portion of code is only executed rarely. A user must have resized a VHD that must later be coalesced. Signed-off-by: Ronan Abhamon --- drivers/cleanup.py | 55 +++++++++++++++++++++++++++++++++++++ drivers/linstor-manager | 35 +++++++++++++++++++++++ drivers/linstorjournaler.py | 1 + drivers/linstorvhdutil.py | 27 ++++++++++++------ 4 files changed, 110 insertions(+), 8 deletions(-) diff --git a/drivers/cleanup.py b/drivers/cleanup.py index e6e2956f5..7bba0a059 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -1754,6 +1754,61 @@ def _setHidden(self, hidden=True) -> None: else: VDI._setHidden(self, hidden) + @override + def _increaseSizeVirt(self, size, atomic=True): + if self.raw: + offset = self.drbd_size + if self.sizeVirt < size: + oldSize = self.drbd_size + self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) + Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) + self.sr._linstor.resize_volume(self.uuid, self.drbd_size) + offset = oldSize + unfinishedZero = False + jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) + if jval: + unfinishedZero = True + offset = int(jval) + length = self.drbd_size - offset + if not length: + return + + if unfinishedZero: + Util.log(" ==> Redoing unfinished zeroing out") + else: + self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) + Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) + abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) + func = lambda: util.zeroOut(self.path, offset, length) + Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) + self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) + return + + if self.sizeVirt >= size: + return + Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ + (self, Util.num2str(self.sizeVirt), Util.num2str(size))) + + msize = self.sr._vhdutil.get_max_resize_size(self.uuid) * 1024 * 1024 + if (size <= msize): + self.sr._vhdutil.set_size_virt_fast(self.path, size) + else: + if atomic: + vdiList = self._getAllSubtree() + self.sr.lock() + try: + self.sr.pauseVDIs(vdiList) + try: + self._setSizeVirt(size) + finally: + self.sr.unpauseVDIs(vdiList) + finally: + self.sr.unlock() + else: + self._setSizeVirt(size) + + self.sizeVirt = self.sr._vhdutil.get_size_virt(self.uuid) + @override def _setSizeVirt(self, size) -> None: jfile = self.uuid + '-jvhd' diff --git a/drivers/linstor-manager b/drivers/linstor-manager index fb2b9fe64..9cef5f825 100755 --- a/drivers/linstor-manager +++ b/drivers/linstor-manager @@ -485,6 +485,15 @@ def get_allocated_size(session, args): raise +def get_max_resize_size(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.getMaxResizeSize(device_path)) + except Exception as e: + util.SMlog('linstor-manager:get_size_phys error: {}'.format(e)) + raise + + def get_depth(session, args): try: device_path = args['devicePath'] @@ -524,6 +533,29 @@ def get_drbd_size(session, args): raise +def set_size_virt(session, args): + try: + device_path = args['devicePath'] + size = int(args['size']) + jfile = args['jfile'] + vhdutil.setSizeVirt(device_path, size, jfile) + return '' + except Exception as e: + util.SMlog('linstor-manager:set_size_virt error: {}'.format(e)) + raise + + +def set_size_virt_fast(session, args): + try: + device_path = args['devicePath'] + size = int(args['size']) + vhdutil.setSizeVirtFast(device_path, size) + return '' + except Exception as e: + util.SMlog('linstor-manager:set_size_virt_fast error: {}'.format(e)) + raise + + def set_parent(session, args): try: device_path = args['devicePath'] @@ -1211,6 +1243,7 @@ if __name__ == '__main__': 'hasParent': has_parent, 'getParent': get_parent, 'getSizeVirt': get_size_virt, + 'getMaxResizeSize': get_max_resize_size, 'getSizePhys': get_size_phys, 'getAllocatedSize': get_allocated_size, 'getDepth': get_depth, @@ -1222,6 +1255,8 @@ if __name__ == '__main__': # Called by cleanup.py to coalesce when a primary # is opened on a non-local host. + 'setSizeVirt': set_size_virt, + 'setSizeVirtFast': set_size_virt_fast, 'setParent': set_parent, 'coalesce': coalesce, 'repair': repair, diff --git a/drivers/linstorjournaler.py b/drivers/linstorjournaler.py index a61d9f11b..2475ae1e9 100755 --- a/drivers/linstorjournaler.py +++ b/drivers/linstorjournaler.py @@ -44,6 +44,7 @@ class LinstorJournaler: """ CLONE = 'clone' INFLATE = 'inflate' + ZERO = 'zero' @staticmethod def default_logger(*args): diff --git a/drivers/linstorvhdutil.py b/drivers/linstorvhdutil.py index c33c24c06..76996e38b 100644 --- a/drivers/linstorvhdutil.py +++ b/drivers/linstorvhdutil.py @@ -248,6 +248,10 @@ def _get_parent(self, vdi_uuid, response): def get_size_virt(self, vdi_uuid, response): return int(response) + @linstorhostcall(vhdutil.getMaxResizeSize, 'getMaxResizeSize') + def get_max_resize_size(self, vdi_uuid, response): + return int(response) + @linstorhostcall(vhdutil.getSizePhys, 'getSizePhys') def get_size_phys(self, vdi_uuid, response): return int(response) @@ -286,14 +290,6 @@ def _get_drbd_size(self, path): def create(self, path, size, static, msize=0): return self._call_local_method_or_fail(vhdutil.create, path, size, static, msize) - @linstormodifier() - def set_size_virt(self, path, size, jfile): - return self._call_local_method_or_fail(vhdutil.setSizeVirt, path, size, jfile) - - @linstormodifier() - def set_size_virt_fast(self, path, size): - return self._call_local_method_or_fail(vhdutil.setSizeVirtFast, path, size) - @linstormodifier() def set_size_phys(self, path, size, debug=True): return self._call_local_method_or_fail(vhdutil.setSizePhys, path, size, debug) @@ -368,6 +364,21 @@ def deflate(self, vdi_path, new_size, old_size, zeroize=False): # Remote setters: write locally and try on another host in case of failure. # -------------------------------------------------------------------------- + @linstormodifier() + def set_size_virt(self, path, size, jfile): + kwargs = { + 'size': size, + 'jfile': jfile + } + return self._call_method(vhdutil.setSizeVirt, 'setSizeVirt', path, use_parent=False, **kwargs) + + @linstormodifier() + def set_size_virt_fast(self, path, size): + kwargs = { + 'size': size + } + return self._call_method(vhdutil.setSizeVirtFast, 'setSizeVirtFast', path, use_parent=False, **kwargs) + @linstormodifier() def force_parent(self, path, parentPath, parentRaw=False): kwargs = { From 756509761e163aca0aca14fd442e25e8e98302c8 Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Tue, 22 Apr 2025 11:10:24 +0200 Subject: [PATCH 66/72] chore(.github/workflows): use ubuntu-24.04 Previous version is no longer available. Signed-off-by: Ronan Abhamon --- .github/workflows/main.yml | 4 ++-- dev_requirements.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d89b968c4..015d64126 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -23,14 +23,14 @@ jobs: mypy . build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v3 - name: Set up Python 3 uses: actions/setup-python@v4 with: - python-version: '3.6' + python-version: '3.x' - name: Install dependencies run: | diff --git a/dev_requirements.txt b/dev_requirements.txt index f25f76868..5fbd5907f 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,5 +1,5 @@ coverage -astroid==2.3.3 -pylint==2.4.4 +astroid +pylint bitarray python-linstor From 0d47a4186c361bbc619f8a7042024e6fd2a0d51c Mon Sep 17 00:00:00 2001 From: Ronan Abhamon Date: Tue, 22 Apr 2025 11:27:12 +0200 Subject: [PATCH 67/72] Fix warns reported by new github workflow Signed-off-by: Ronan Abhamon --- drivers/LUNperVDI.py | 1 + drivers/LVHDSR.py | 5 +++-- drivers/LinstorSR.py | 3 +-- drivers/cleanup.py | 3 +++ drivers/iscsilib.py | 2 +- drivers/mpathcount.py | 4 ++-- drivers/resetvdis.py | 2 +- 7 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/LUNperVDI.py b/drivers/LUNperVDI.py index e3583a592..070c67117 100755 --- a/drivers/LUNperVDI.py +++ b/drivers/LUNperVDI.py @@ -87,6 +87,7 @@ def create(self, sr_uuid, vdi_uuid, size) -> str: VDIs = util._getVDIs(self.sr) self.sr._loadvdis() smallest = 0 + v = {} for vdi in VDIs: if not vdi['managed'] \ and int(vdi['virtual_size']) >= int(size) \ diff --git a/drivers/LVHDSR.py b/drivers/LVHDSR.py index 852cad3e5..52d257dfe 100755 --- a/drivers/LVHDSR.py +++ b/drivers/LVHDSR.py @@ -222,6 +222,7 @@ def load(self, sr_uuid) -> None: for key in self.lvmCache.lvs.keys(): # if the lvname has a uuid in it type = None + vdi = None if contains_uuid_regex.search(key) is not None: if key.startswith(lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD]): type = vhdutil.VDI_TYPE_VHD @@ -997,8 +998,8 @@ def _undoCloneOp(self, lvs, origUuid, baseUuid, clonUuid): self.lvActivator.activate(baseUuid, base.name, False) origRefcountNormal = 1 vhdInfo = vhdutil.getVHDInfo(basePath, lvhdutil.extractUuid, False) - if base.vdiType == vhdutil.VDI_TYPE_VHD and vhdInfo.hidden: - vhdutil.setHidden(basePath, False) + if vhdInfo.hidden: + vhdutil.setHidden(basePath, False) elif base.vdiType == vhdutil.VDI_TYPE_RAW and base.hidden: self.lvmCache.setHidden(base.name, False) diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index c2579a539..2ecb0e384 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -1205,8 +1205,7 @@ def _load_vdis_ex(self): if USE_KEY_HASH and vdi.vdi_type == vhdutil.VDI_TYPE_VHD: # TODO: Replace pylint comment with this feature when possible: # https://github.com/PyCQA/pylint/pull/2926 - vdi.sm_config_override['key_hash'] = \ - self._vhdutil.get_key_hash(vdi_uuid) # pylint: disable = E1120 + vdi.sm_config_override['key_hash'] = self._vhdutil.get_key_hash(vdi_uuid) # pylint: disable = E1120 # 4.c. Update CBT status of disks either just added # or already in XAPI. diff --git a/drivers/cleanup.py b/drivers/cleanup.py index 7bba0a059..a50403c4b 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -1151,6 +1151,7 @@ def _getAllSubtree(self): class FileVDI(VDI): """Object representing a VDI in a file-based SR (EXT or NFS)""" + @override @staticmethod def extractUuid(path): path = os.path.basename(path.strip()) @@ -1248,6 +1249,7 @@ def load(self, info=None) -> None: self.parentUuid = info.parentUuid self.path = os.path.join(self.sr.path, self.fileName) + @override @staticmethod def extractUuid(path): return lvhdutil.extractUuid(path) @@ -4174,6 +4176,7 @@ def abort_optional_reenable(uuid): # def main(): action = "" + maxAge = 0 uuid = "" background = False force = False diff --git a/drivers/iscsilib.py b/drivers/iscsilib.py index e54de1bcd..27d34a546 100644 --- a/drivers/iscsilib.py +++ b/drivers/iscsilib.py @@ -248,7 +248,7 @@ def get_node_config (portal, targetIQN): ini_sec = "root" str_fp = io.StringIO("[%s]\n%s" % (ini_sec, stdout)) parser = RawConfigParser() - parser.readfp(str_fp) + parser.read_file(str_fp) str_fp.close() return dict(parser.items(ini_sec)) diff --git a/drivers/mpathcount.py b/drivers/mpathcount.py index 43285edc6..93d566693 100755 --- a/drivers/mpathcount.py +++ b/drivers/mpathcount.py @@ -89,8 +89,8 @@ def match_pathup(s): match = re.match(r'.*\d+:\d+:\d+:\d+\s+\S+\s+\S+\s+\S+\s+(\S+)', s) if match: path_status = match.group(1) - if path_status in ['faulty', 'shaky', 'failed']: - return False + if path_status in ['faulty', 'shaky', 'failed']: + return False return True diff --git a/drivers/resetvdis.py b/drivers/resetvdis.py index 81938d4c8..61454f849 100755 --- a/drivers/resetvdis.py +++ b/drivers/resetvdis.py @@ -21,6 +21,7 @@ import cleanup import util import lock +import sys import XenAPI # pylint: disable=import-error @@ -141,7 +142,6 @@ def usage(): sys.exit(1) if __name__ == '__main__': - import sys import atexit if len(sys.argv) not in [3, 4, 5]: From 97e2083e48f635353e71dcd47cc35e50dcc3a697 Mon Sep 17 00:00:00 2001 From: Rushikesh Jadhav Date: Wed, 25 Jun 2025 15:01:05 +0530 Subject: [PATCH 68/72] Improve LinstorSR.py to handle `thick` SR creation (#85) Check enabled hosts instead of online during SR creation Added `get_enabled_hosts` to get enabled hosts during SR operations. In some drivers e.g. LINSTOR, we need to ensure that hosts are enabled before performing operations, hence this function is needed. In some cases `thick` SR creation may fail due to `get_online_hosts` as the metrics could take time. Signed-off-by: Rushikesh Jadhav --- drivers/LinstorSR.py | 2 +- drivers/util.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index 2ecb0e384..dbfda35ee 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -586,7 +586,7 @@ def create(self, uuid, size) -> None: opterr='LINSTOR SR must be unique in a pool' ) - online_hosts = util.get_online_hosts(self.session) + online_hosts = util.get_enabled_hosts(self.session) if len(online_hosts) < len(host_adresses): raise xs_errors.XenError( 'LinstorSRCreate', diff --git a/drivers/util.py b/drivers/util.py index 4053c9a3e..b663c76ab 100755 --- a/drivers/util.py +++ b/drivers/util.py @@ -777,6 +777,11 @@ def get_slaves_attached_on(session, vdi_uuids): master_ref = get_this_host_ref(session) return [x for x in host_refs if x != master_ref] +def get_enabled_hosts(session): + """ + Returns a list of host refs that are enabled in the pool. + """ + return list(session.xenapi.host.get_all_records_where('field "enabled" = "true"').keys()) def get_online_hosts(session): online_hosts = [] From 021703eb7c18f036a6c987b4e4e1ea27ae8b70d2 Mon Sep 17 00:00:00 2001 From: Mathieu Labourier Date: Mon, 30 Jun 2025 12:56:02 +0200 Subject: [PATCH 69/72] fix: repair diskless resource on sub-zero allocated size Signed-off-by: Mathieu Labourier --- drivers/linstorvolumemanager.py | 51 +++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py index 5b7da10ac..f04b6fbd1 100755 --- a/drivers/linstorvolumemanager.py +++ b/drivers/linstorvolumemanager.py @@ -522,10 +522,20 @@ def allocated_volume_size(self): current_size = volume.allocated_size if current_size < 0: - raise LinstorVolumeManagerError( - 'Failed to get allocated size of `{}` on `{}`' - .format(resource.name, volume.storage_pool_name) - ) + try: + self._repair_diskless_resource(resource) + return util.retry( + lambda: self.allocated_volume_size, + maxretry=1, + period=1 + ) + except LinstorVolumeManagerError as e: + raise e + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to get allocated size of `{}` on `{}`: {}' + .format(resource.name, volume.storage_pool_name, e) + ) current[volume.number] = max(current_size, current.get(volume.number) or 0) total_size = 0 @@ -2061,6 +2071,21 @@ def _mark_resource_cache_as_dirty(self): self._resource_cache_dirty = True self._volume_info_cache_dirty = True + def _repair_diskless_resource(self, resource): + if linstor.consts.FLAG_DISKLESS not in resource.flags: + return + + self._linstor.resource_delete( + node_name=resource.node_name, + rsc_name=resource.name + ) + self._linstor.resource_auto_place( + rsc_name=resource.name, + place_count=1, + diskless_type=linstor.consts.FLAG_DRBD_DISKLESS + ) + self._mark_resource_cache_as_dirty() + # -------------------------------------------------------------------------- def _ensure_volume_exists(self, volume_uuid): @@ -2114,10 +2139,20 @@ def _get_volumes_info(self, volume_name=None): # We ignore diskless pools of the form "DfltDisklessStorPool". if volume.storage_pool_name == self._group_name: if volume.allocated_size < 0: - raise LinstorVolumeManagerError( - 'Failed to get allocated size of `{}` on `{}`' - .format(resource.name, volume.storage_pool_name) - ) + try: + self._repair_diskless_resource(resource) + return util.retry( + lambda: self._get_volumes_info(volume_name), + maxretry=1, + period=1 + ) + except LinstorVolumeManagerError as e: + raise e + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to get allocated size of `{}` on `{}`: {}' + .format(resource.name, volume.storage_pool_name, e) + ) allocated_size = volume.allocated_size current.allocated_size = current.allocated_size and \ From bae7edb8f51adc6e450f235aa3eed2b77c357684 Mon Sep 17 00:00:00 2001 From: Mathieu Labourier Date: Thu, 3 Jul 2025 11:19:59 +0200 Subject: [PATCH 70/72] fix: remove util.retry calls Signed-off-by: Mathieu Labourier --- drivers/linstorvolumemanager.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py index f04b6fbd1..3b049d1ea 100755 --- a/drivers/linstorvolumemanager.py +++ b/drivers/linstorvolumemanager.py @@ -524,11 +524,7 @@ def allocated_volume_size(self): if current_size < 0: try: self._repair_diskless_resource(resource) - return util.retry( - lambda: self.allocated_volume_size, - maxretry=1, - period=1 - ) + return self.allocated_volume_size except LinstorVolumeManagerError as e: raise e except Exception as e: @@ -2141,11 +2137,7 @@ def _get_volumes_info(self, volume_name=None): if volume.allocated_size < 0: try: self._repair_diskless_resource(resource) - return util.retry( - lambda: self._get_volumes_info(volume_name), - maxretry=1, - period=1 - ) + return self._get_volumes_info(volume_name) except LinstorVolumeManagerError as e: raise e except Exception as e: From 6b32030c0215e3b875e45c31467371f748750aca Mon Sep 17 00:00:00 2001 From: Mathieu Labourier Date: Thu, 10 Jul 2025 12:13:13 +0200 Subject: [PATCH 71/72] fix: use resource_create instead of autoplace Signed-off-by: Mathieu Labourier --- drivers/linstorvolumemanager.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py index 3b049d1ea..695b6dd06 100755 --- a/drivers/linstorvolumemanager.py +++ b/drivers/linstorvolumemanager.py @@ -523,7 +523,7 @@ def allocated_volume_size(self): current_size = volume.allocated_size if current_size < 0: try: - self._repair_diskless_resource(resource) + self._repair_diskless_resource(resource, volume) return self.allocated_volume_size except LinstorVolumeManagerError as e: raise e @@ -2067,7 +2067,7 @@ def _mark_resource_cache_as_dirty(self): self._resource_cache_dirty = True self._volume_info_cache_dirty = True - def _repair_diskless_resource(self, resource): + def _repair_diskless_resource(self, resource, volume): if linstor.consts.FLAG_DISKLESS not in resource.flags: return @@ -2075,10 +2075,21 @@ def _repair_diskless_resource(self, resource): node_name=resource.node_name, rsc_name=resource.name ) - self._linstor.resource_auto_place( - rsc_name=resource.name, - place_count=1, - diskless_type=linstor.consts.FLAG_DRBD_DISKLESS + + self._linstor.resource_create( + rscs=[ + linstor.linstorapi.ResourceData( + #node_id=?, + #layer_list=?, + node_name=resource.node_name, + rsc_name=resource.name, + storage_pool=volume.storage_pool_name, + diskless=linstor.consts.FLAG_DISKLESS in resource.flags, + drbd_diskless=linstor.consts.FLAG_DRBD_DISKLESS in resource.flags, + nvme_initiator=linstor.consts.FLAG_NVME_INITIATOR in resource.flags, + ebs_initiator=linstor.consts.FLAG_EBS_INITIATOR in resource.flags + ) + ] ) self._mark_resource_cache_as_dirty() @@ -2136,7 +2147,7 @@ def _get_volumes_info(self, volume_name=None): if volume.storage_pool_name == self._group_name: if volume.allocated_size < 0: try: - self._repair_diskless_resource(resource) + self._repair_diskless_resource(resource, volume) return self._get_volumes_info(volume_name) except LinstorVolumeManagerError as e: raise e From 9a9dec1d078e727988cd351b837cf23aae657ed6 Mon Sep 17 00:00:00 2001 From: Mathieu Labourier Date: Thu, 17 Jul 2025 10:38:03 +0200 Subject: [PATCH 72/72] refactor: scan for broken disk before main loops Signed-off-by: Mathieu Labourier --- drivers/linstorvolumemanager.py | 47 +++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py index 695b6dd06..8cb4cc4da 100755 --- a/drivers/linstorvolumemanager.py +++ b/drivers/linstorvolumemanager.py @@ -509,6 +509,8 @@ def allocated_volume_size(self): # Paths: /res_name/vol_number/size sizes = {} + self._scan_for_broken_diskless_resources() + for resource in self._get_resource_cache().resources: if resource.name not in sizes: current = sizes[resource.name] = {} @@ -522,16 +524,10 @@ def allocated_volume_size(self): current_size = volume.allocated_size if current_size < 0: - try: - self._repair_diskless_resource(resource, volume) - return self.allocated_volume_size - except LinstorVolumeManagerError as e: - raise e - except Exception as e: - raise LinstorVolumeManagerError( - 'Failed to get allocated size of `{}` on `{}`: {}' - .format(resource.name, volume.storage_pool_name, e) - ) + raise LinstorVolumeManagerError( + 'Failed to get allocated size of `{}` on `{}`' + .format(resource.name, volume.storage_pool_name) + ) current[volume.number] = max(current_size, current.get(volume.number) or 0) total_size = 0 @@ -2091,8 +2087,23 @@ def _repair_diskless_resource(self, resource, volume): ) ] ) + + def _scan_for_broken_diskless_resources(self): + for resource in self._get_resource_cache().resources: + for volume in resource.volumes: + if ( + volume.storage_pool_name != self._group_name + or volume.allocated_size >= 0 + ): + return + try: self._repair_diskless_resource(resource, volume) + except Exception as e: + util.SMlog('Failed to repair diskless resource `{}` on `{}`: {}'.format( + resource.name, volume.storage_pool_name, e + )) self._mark_resource_cache_as_dirty() + # -------------------------------------------------------------------------- def _ensure_volume_exists(self, volume_uuid): @@ -2131,6 +2142,8 @@ def _get_volumes_info(self, volume_name=None): if not self._volume_info_cache_dirty: return self._volume_info_cache + self._scan_for_broken_diskless_resources() + for resource in self._get_resource_cache().resources: if resource.name not in all_volume_info: current = all_volume_info[resource.name] = self.VolumeInfo( @@ -2146,16 +2159,10 @@ def _get_volumes_info(self, volume_name=None): # We ignore diskless pools of the form "DfltDisklessStorPool". if volume.storage_pool_name == self._group_name: if volume.allocated_size < 0: - try: - self._repair_diskless_resource(resource, volume) - return self._get_volumes_info(volume_name) - except LinstorVolumeManagerError as e: - raise e - except Exception as e: - raise LinstorVolumeManagerError( - 'Failed to get allocated size of `{}` on `{}`: {}' - .format(resource.name, volume.storage_pool_name, e) - ) + raise LinstorVolumeManagerError( + 'Failed to get allocated size of `{}` on `{}`' + .format(resource.name, volume.storage_pool_name) + ) allocated_size = volume.allocated_size current.allocated_size = current.allocated_size and \