diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6271a21b1..015d64126 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -3,15 +3,34 @@ name: Test SM on: [push, pull_request] jobs: + static-analysis: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3 + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r dev_requirements_static_analysis.txt + + - name: mypy + run: | + mypy . + build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v3 - name: Set up Python 3 uses: actions/setup-python@v4 with: - python-version: '3.6' + python-version: '3.x' - name: Install dependencies run: | diff --git a/.mypy.ini b/.mypy.ini new file mode 100644 index 000000000..651016c0d --- /dev/null +++ b/.mypy.ini @@ -0,0 +1,3 @@ +[mypy] +enable_error_code = explicit-override + diff --git a/Makefile b/Makefile index e6ff5a7a0..3748226cf 100755 --- a/Makefile +++ b/Makefile @@ -8,12 +8,19 @@ SM_DRIVERS += Dummy SM_DRIVERS += udev SM_DRIVERS += ISO SM_DRIVERS += HBA +SM_DRIVERS += Linstor SM_DRIVERS += LVHD SM_DRIVERS += LVHDoISCSI SM_DRIVERS += LVHDoHBA SM_DRIVERS += SHM SM_DRIVERS += SMB SM_DRIVERS += LVHDoFCoE +SM_DRIVERS += CephFS +SM_DRIVERS += GlusterFS +SM_DRIVERS += XFS +SM_DRIVERS += ZFS +SM_DRIVERS += MooseFS +SM_DRIVERS += LargeBlock SM_LIBS := SR SM_LIBS += SRCommand @@ -27,6 +34,9 @@ SM_LIBS += verifyVHDsOnSR SM_LIBS += scsiutil SM_LIBS += scsi_host_rescan SM_LIBS += vhdutil +SM_LIBS += linstorjournaler +SM_LIBS += linstorvhdutil +SM_LIBS += linstorvolumemanager SM_LIBS += lvhdutil SM_LIBS += cifutils SM_LIBS += xs_errors @@ -77,6 +87,7 @@ PLUGIN_SCRIPT_DEST := /etc/xapi.d/plugins/ LIBEXEC := /opt/xensource/libexec/ UDEV_RULES_DIR := /etc/udev/rules.d/ UDEV_SCRIPTS_DIR := /etc/udev/scripts/ +SYSTEMD_CONF_DIR := /etc/systemd/system/ SYSTEMD_SERVICE_DIR := /usr/lib/systemd/system/ INIT_DIR := /etc/rc.d/init.d/ MPATH_CONF_DIR := /etc/multipath.xenserver/ @@ -92,7 +103,8 @@ SM_PY_FILES = $(foreach LIB, $(SM_LIBS), drivers/$(LIB).py) $(foreach DRIVER, $( .PHONY: build build: - make -C dcopy + make -C dcopy + make -C linstor .PHONY: precommit precommit: build @@ -115,7 +127,7 @@ precheck: build echo "Precheck succeeded with no outstanding issues found." .PHONY: install -install: precheck +install: build mkdir -p $(SM_STAGING) $(call mkdir_clean,$(SM_STAGING)) mkdir -p $(SM_STAGING)$(SM_DEST) @@ -123,6 +135,9 @@ install: precheck mkdir -p $(SM_STAGING)$(UDEV_RULES_DIR) mkdir -p $(SM_STAGING)$(UDEV_SCRIPTS_DIR) mkdir -p $(SM_STAGING)$(INIT_DIR) + mkdir -p $(SM_STAGING)$(SYSTEMD_CONF_DIR) + mkdir -p $(SM_STAGING)$(SYSTEMD_CONF_DIR)/drbd-reactor.service.d + mkdir -p $(SM_STAGING)$(SYSTEMD_CONF_DIR)/linstor-satellite.service.d mkdir -p $(SM_STAGING)$(SYSTEMD_SERVICE_DIR) mkdir -p $(SM_STAGING)$(MPATH_CONF_DIR) mkdir -p $(SM_STAGING)$(MPATH_CUSTOM_CONF_DIR) @@ -151,6 +166,12 @@ install: precheck $(SM_STAGING)/$(SM_DEST) install -m 644 etc/logrotate.d/$(SMLOG_CONF) \ $(SM_STAGING)/$(LOGROTATE_DIR) + install -m 644 etc/systemd/system/drbd-reactor.service.d/override.conf \ + $(SM_STAGING)/$(SYSTEMD_CONF_DIR)/drbd-reactor.service.d/ + install -m 644 etc/systemd/system/linstor-satellite.service.d/override.conf \ + $(SM_STAGING)/$(SYSTEMD_CONF_DIR)/linstor-satellite.service.d/ + install -m 644 etc/systemd/system/var-lib-linstor.service \ + $(SM_STAGING)/$(SYSTEMD_CONF_DIR) install -m 644 etc/make-dummy-sr.service \ $(SM_STAGING)/$(SYSTEMD_SERVICE_DIR) install -m 644 systemd/xs-sm.service \ @@ -169,6 +190,8 @@ install: precheck $(SM_STAGING)/$(SYSTEMD_SERVICE_DIR) install -m 644 systemd/SMGC@.service \ $(SM_STAGING)/$(SYSTEMD_SERVICE_DIR) + install -m 644 systemd/linstor-monitor.service \ + $(SM_STAGING)/$(SYSTEMD_SERVICE_DIR) for i in $(UDEV_RULES); do \ install -m 644 udev/$$i.rules \ $(SM_STAGING)$(UDEV_RULES_DIR); done @@ -186,6 +209,7 @@ install: precheck cd $(SM_STAGING)$(SM_DEST) && rm -f LVHDoFCoESR && ln -sf LVHDoFCoESR.py LVMoFCoESR ln -sf $(SM_DEST)mpathutil.py $(SM_STAGING)/sbin/mpathutil install -m 755 drivers/02-vhdcleanup $(SM_STAGING)$(MASTER_SCRIPT_DEST) + install -m 755 drivers/linstor-manager $(SM_STAGING)$(PLUGIN_SCRIPT_DEST) install -m 755 drivers/lvhd-thin $(SM_STAGING)$(PLUGIN_SCRIPT_DEST) install -m 755 drivers/on_slave.py $(SM_STAGING)$(PLUGIN_SCRIPT_DEST)/on-slave install -m 755 drivers/testing-hooks $(SM_STAGING)$(PLUGIN_SCRIPT_DEST) @@ -197,6 +221,9 @@ install: precheck install -m 755 drivers/iscsilib.py $(SM_STAGING)$(SM_DEST) install -m 755 drivers/fcoelib.py $(SM_STAGING)$(SM_DEST) mkdir -p $(SM_STAGING)$(LIBEXEC) + install -m 755 scripts/fork-log-daemon $(SM_STAGING)$(LIBEXEC) + install -m 755 scripts/linstor-kv-tool $(SM_STAGING)$(BIN_DEST) + install -m 755 scripts/safe-umount $(SM_STAGING)$(LIBEXEC) install -m 755 scripts/local-device-change $(SM_STAGING)$(LIBEXEC) install -m 755 scripts/check-device-sharing $(SM_STAGING)$(LIBEXEC) install -m 755 scripts/usb_change $(SM_STAGING)$(LIBEXEC) @@ -205,6 +232,8 @@ install: precheck mkdir -p $(SM_STAGING)/etc/xapi.d/xapi-pre-shutdown/ install -m 755 scripts/stop_all_gc $(SM_STAGING)/etc/xapi.d/xapi-pre-shutdown/ $(MAKE) -C dcopy install DESTDIR=$(SM_STAGING) + $(MAKE) -C linstor install DESTDIR=$(SM_STAGING) + $(MAKE) -C sm_typing install DESTDIR=$(SM_STAGING) ln -sf $(SM_DEST)blktap2.py $(SM_STAGING)$(BIN_DEST)/blktap2 ln -sf $(SM_DEST)lcache.py $(SM_STAGING)$(BIN_DEST)tapdisk-cache-stats ln -sf /dev/null $(SM_STAGING)$(UDEV_RULES_DIR)/69-dm-lvm-metad.rules @@ -215,4 +244,3 @@ install: precheck .PHONY: clean clean: rm -rf $(SM_STAGING) - diff --git a/dev_requirements.txt b/dev_requirements.txt index 104316bae..5fbd5907f 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,4 +1,5 @@ coverage -astroid==2.3.3 -pylint==2.4.4 +astroid +pylint bitarray +python-linstor diff --git a/dev_requirements_static_analysis.txt b/dev_requirements_static_analysis.txt new file mode 100644 index 000000000..8a59c8f20 --- /dev/null +++ b/dev_requirements_static_analysis.txt @@ -0,0 +1,3 @@ +bitarray +mypy +python-linstor diff --git a/drivers/BaseISCSI.py b/drivers/BaseISCSI.py index 71e43ddc4..98546957c 100755 --- a/drivers/BaseISCSI.py +++ b/drivers/BaseISCSI.py @@ -18,7 +18,10 @@ # ISCSISR: ISCSI software initiator SR driver # +from sm_typing import override + import SR +import VDI import util import time import LUNperVDI @@ -100,11 +103,12 @@ def address(self): self._initPaths() return self._address - def handles(type): + @override + @staticmethod + def handles(type) -> bool: return False - handles = staticmethod(handles) - def _synchroniseAddrList(self, addrlist): + def _synchroniseAddrList(self, addrlist) -> None: if not self.multihomed: return change = False @@ -133,7 +137,8 @@ def _synchroniseAddrList(self, addrlist): except: pass - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: if self.force_tapdisk: self.sr_vditype = 'aio' else: @@ -175,11 +180,11 @@ def load(self, sr_uuid): and ('chappassword' in self.dconf or 'chappassword_secret' in self.dconf): self.chapuser = self.dconf['chapuser'].encode('utf-8') if 'chappassword_secret' in self.dconf: - self.chappassword = util.get_secret(self.session, self.dconf['chappassword_secret']) + chappassword = util.get_secret(self.session, self.dconf['chappassword_secret']) else: - self.chappassword = self.dconf['chappassword'] + chappassword = self.dconf['chappassword'] - self.chappassword = self.chappassword.encode('utf-8') + self.chappassword = chappassword.encode('utf-8') self.incoming_chapuser = "" self.incoming_chappassword = "" @@ -187,11 +192,11 @@ def load(self, sr_uuid): and ('incoming_chappassword' in self.dconf or 'incoming_chappassword_secret' in self.dconf): self.incoming_chapuser = self.dconf['incoming_chapuser'].encode('utf-8') if 'incoming_chappassword_secret' in self.dconf: - self.incoming_chappassword = util.get_secret(self.session, self.dconf['incoming_chappassword_secret']) + incoming_chappassword = util.get_secret(self.session, self.dconf['incoming_chappassword_secret']) else: - self.incoming_chappassword = self.dconf['incoming_chappassword'] + incoming_chappassword = self.dconf['incoming_chappassword'] - self.incoming_chappassword = self.incoming_chappassword.encode('utf-8') + self.incoming_chappassword = incoming_chappassword.encode('utf-8') self.port = DEFAULT_PORT if 'port' in self.dconf and self.dconf['port']: @@ -276,7 +281,7 @@ def _initPaths(self): self._address = self.tgtidx self._synchroniseAddrList(addrlist) - def _init_adapters(self): + def _init_adapters(self) -> None: # Generate a list of active adapters ids = scsiutil._genHostList(ISCSI_PROCNAME) util.SMlog(ids) @@ -293,7 +298,8 @@ def _init_adapters(self): pass self._devs = scsiutil.cacheSCSIidentifiers() - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: self._mpathHandle() multiTargets = False @@ -391,10 +397,9 @@ def attach(self, sr_uuid): util._incr_iscsiSR_refcount(self.targetIQN, sr_uuid) IQNs = [] if "multiSession" in self.dconf: - IQNs = "" for iqn in self.dconf['multiSession'].split("|"): if len(iqn): - IQNs += iqn.split(',')[2] + IQNs.append(iqn.split(',')[2]) else: IQNs.append(self.targetIQN) @@ -433,7 +438,11 @@ def attach(self, sr_uuid): realdev = os.path.realpath(os.path.join(dev_path, dev)) util.set_scheduler(os.path.basename(realdev)) - def detach(self, sr_uuid, delete=False): + @override + def detach(self, sr_uuid) -> None: + self.detach_and_delete(sr_uuid, delete=False) + + def detach_and_delete(self, sr_uuid, delete=True) -> None: keys = [] pbdref = None try: @@ -474,7 +483,8 @@ def detach(self, sr_uuid, delete=False): self.attached = False - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: # Check whether an SR already exists SRs = self.session.xenapi.SR.get_all_records() for sr in SRs: @@ -503,11 +513,13 @@ def create(self, sr_uuid, size): self.session.xenapi.SR.set_sm_config(self.sr_ref, self.sm_config) return - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: self.detach(sr_uuid) return - def probe(self): + @override + def probe(self) -> str: SRs = self.session.xenapi.SR.get_all_records() Recs = {} for sr in SRs: @@ -517,8 +529,9 @@ def probe(self): sm_config['targetIQN'] == self.targetIQN: Recs[record["uuid"]] = sm_config return self.srlist_toxml(Recs) - - def scan(self, sr_uuid): + + @override + def scan(self, sr_uuid) -> None: if not self.passthrough: if not self.attached: raise xs_errors.XenError('SRUnavailable') @@ -530,9 +543,10 @@ def scan(self, sr_uuid): if vdi.managed: self.physical_utilisation += vdi.size self.virtual_allocation = self.physical_utilisation - return super(BaseISCSISR, self).scan(sr_uuid) + super(BaseISCSISR, self).scan(sr_uuid) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return LUNperVDI.RAWVDI(self, uuid) def _scan_IQNs(self): diff --git a/drivers/CephFSSR.py b/drivers/CephFSSR.py new file mode 100644 index 000000000..9e8e46e1e --- /dev/null +++ b/drivers/CephFSSR.py @@ -0,0 +1,318 @@ +#!/usr/bin/env python3 +# +# Original work copyright (C) Citrix systems +# Modified work copyright (C) Vates SAS and XCP-ng community +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; version 2.1 only. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# CEPHFSSR: Based on FileSR, mounts ceph fs share + +from sm_typing import override + +import errno +import os +import socket +import syslog as _syslog +import xmlrpc.client +from syslog import syslog + +# careful with the import order here +# FileSR has a circular dependency: +# FileSR -> blktap2 -> lvutil -> EXTSR -> FileSR +# importing in this order seems to avoid triggering the issue. +import SR +import SRCommand +import FileSR +# end of careful +import VDI +import cleanup +import util +import vhdutil +import xs_errors +from lock import Lock + +CAPABILITIES = ["SR_PROBE", "SR_UPDATE", + "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", + "VDI_UPDATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", + "VDI_GENERATE_CONFIG", + "VDI_RESET_ON_BOOT/2", "ATOMIC_PAUSE"] + +CONFIGURATION = [ + ['server', 'Ceph server(s) (required, ex: "192.168.0.12" or "10.10.10.10,10.10.10.26")'], + ['serverpath', 'Ceph FS path (required, ex: "/")'], + ['serverport', 'ex: 6789'], + ['options', 'Ceph FS client name, and secretfile (required, ex: "name=admin,secretfile=/etc/ceph/admin.secret")'] +] + +DRIVER_INFO = { + 'name': 'CephFS VHD', + 'description': 'SR plugin which stores disks as VHD files on a CephFS storage', + 'vendor': 'Vates SAS', + 'copyright': '(C) 2020 Vates SAS', + 'driver_version': '1.0', + 'required_api_version': '1.0', + 'capabilities': CAPABILITIES, + 'configuration': CONFIGURATION +} + +DRIVER_CONFIG = {"ATTACH_FROM_CONFIG_WITH_TAPDISK": True} + +# The mountpoint for the directory when performing an sr_probe. All probes +# are guaranteed to be serialised by xapi, so this single mountpoint is fine. +PROBE_MOUNTPOINT = os.path.join(SR.MOUNT_BASE, "probe") + + +class CephFSException(Exception): + def __init__(self, errstr): + self.errstr = errstr + + +# mountpoint = /var/run/sr-mount/CephFS/uuid +# linkpath = mountpoint/uuid - path to SR directory on share +# path = /var/run/sr-mount/uuid - symlink to SR directory on share +class CephFSSR(FileSR.FileSR): + """Ceph file-based storage repository""" + + DRIVER_TYPE = 'cephfs' + + @override + @staticmethod + def handles(sr_type) -> bool: + # fudge, because the parent class (FileSR) checks for smb to alter its behavior + return sr_type == CephFSSR.DRIVER_TYPE or sr_type == 'smb' + + @override + def load(self, sr_uuid) -> None: + if not self._is_ceph_available(): + raise xs_errors.XenError( + 'SRUnavailable', + opterr='ceph is not installed' + ) + + self.ops_exclusive = FileSR.OPS_EXCLUSIVE + self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) + self.sr_vditype = SR.DEFAULT_TAP + self.driver_config = DRIVER_CONFIG + if 'server' not in self.dconf: + raise xs_errors.XenError('ConfigServerMissing') + self.remoteserver = self.dconf['server'] + self.remotepath = self.dconf['serverpath'] + # if serverport is not specified, use default 6789 + if 'serverport' not in self.dconf: + self.remoteport = "6789" + else: + self.remoteport = self.dconf['serverport'] + if self.sr_ref and self.session is not None: + self.sm_config = self.session.xenapi.SR.get_sm_config(self.sr_ref) + else: + self.sm_config = self.srcmd.params.get('sr_sm_config') or {} + self.mountpoint = os.path.join(SR.MOUNT_BASE, 'CephFS', sr_uuid) + self.linkpath = os.path.join(self.mountpoint, sr_uuid or "") + self.path = os.path.join(SR.MOUNT_BASE, sr_uuid) + self._check_o_direct() + + def checkmount(self): + return util.ioretry(lambda: ((util.pathexists(self.mountpoint) and + util.ismount(self.mountpoint)) and + util.pathexists(self.path))) + + def mount(self, mountpoint=None): + """Mount the remote ceph export at 'mountpoint'""" + if mountpoint is None: + mountpoint = self.mountpoint + elif not util.is_string(mountpoint) or mountpoint == "": + raise CephFSException("mountpoint not a string object") + + try: + if not util.ioretry(lambda: util.isdir(mountpoint)): + util.ioretry(lambda: util.makedirs(mountpoint)) + except util.CommandException as inst: + raise CephFSException("Failed to make directory: code is %d" % inst.code) + + try: + options = [] + if 'options' in self.dconf: + options.append(self.dconf['options']) + if options: + options = ['-o', ','.join(options)] + acc = [] + for server in self.remoteserver.split(','): + try: + addr_info = socket.getaddrinfo(server, 0)[0] + except Exception: + continue + + acc.append('[' + server + ']' if addr_info[0] == socket.AF_INET6 else server) + + remoteserver = ','.join(acc) + command = ["mount", '-t', 'ceph', remoteserver + ":" + self.remoteport + ":" + self.remotepath, mountpoint] + options + util.ioretry(lambda: util.pread(command), errlist=[errno.EPIPE, errno.EIO], maxretry=2, nofail=True) + except util.CommandException as inst: + syslog(_syslog.LOG_ERR, 'CephFS mount failed ' + inst.__str__()) + raise CephFSException("mount failed with return code %d" % inst.code) + + # Sanity check to ensure that the user has at least RO access to the + # mounted share. Windows sharing and security settings can be tricky. + try: + util.listdir(mountpoint) + except util.CommandException: + try: + self.unmount(mountpoint, True) + except CephFSException: + util.logException('CephFSSR.unmount()') + raise CephFSException("Permission denied. Please check user privileges.") + + def unmount(self, mountpoint, rmmountpoint): + try: + util.pread(["umount", mountpoint]) + except util.CommandException as inst: + raise CephFSException("umount failed with return code %d" % inst.code) + if rmmountpoint: + try: + os.rmdir(mountpoint) + except OSError as inst: + raise CephFSException("rmdir failed with error '%s'" % inst.strerror) + + @override + def attach(self, sr_uuid) -> None: + if not self.checkmount(): + try: + self.mount() + os.symlink(self.linkpath, self.path) + except CephFSException as exc: + raise xs_errors.SROSError(12, exc.errstr) + self.attached = True + + @override + def probe(self) -> str: + try: + self.mount(PROBE_MOUNTPOINT) + sr_list = filter(util.match_uuid, util.listdir(PROBE_MOUNTPOINT)) + self.unmount(PROBE_MOUNTPOINT, True) + except (util.CommandException, xs_errors.XenError): + raise + # Create a dictionary from the SR uuids to feed SRtoXML() + return util.SRtoXML({sr_uuid: {} for sr_uuid in sr_list}) + + @override + def detach(self, sr_uuid) -> None: + if not self.checkmount(): + return + util.SMlog("Aborting GC/coalesce") + cleanup.abort(self.uuid) + # Change directory to avoid unmount conflicts + os.chdir(SR.MOUNT_BASE) + self.unmount(self.mountpoint, True) + os.unlink(self.path) + self.attached = False + + @override + def create(self, sr_uuid, size) -> None: + if self.checkmount(): + raise xs_errors.SROSError(113, 'CephFS mount point already attached') + + try: + self.mount() + except CephFSException as exc: + # noinspection PyBroadException + try: + os.rmdir(self.mountpoint) + except: + # we have no recovery strategy + pass + raise xs_errors.SROSError(111, "CephFS mount error [opterr=%s]" % exc.errstr) + + if util.ioretry(lambda: util.pathexists(self.linkpath)): + if len(util.ioretry(lambda: util.listdir(self.linkpath))) != 0: + self.detach(sr_uuid) + raise xs_errors.XenError('SRExists') + else: + try: + util.ioretry(lambda: util.makedirs(self.linkpath)) + os.symlink(self.linkpath, self.path) + except util.CommandException as inst: + if inst.code != errno.EEXIST: + try: + self.unmount(self.mountpoint, True) + except CephFSException: + util.logException('CephFSSR.unmount()') + raise xs_errors.SROSError(116, + "Failed to create CephFS SR. remote directory creation error: {}".format( + os.strerror(inst.code))) + self.detach(sr_uuid) + + @override + def delete(self, sr_uuid) -> None: + # try to remove/delete non VDI contents first + super(CephFSSR, self).delete(sr_uuid) + try: + if self.checkmount(): + self.detach(sr_uuid) + self.mount() + if util.ioretry(lambda: util.pathexists(self.linkpath)): + util.ioretry(lambda: os.rmdir(self.linkpath)) + util.SMlog(str(self.unmount(self.mountpoint, True))) + except util.CommandException as inst: + self.detach(sr_uuid) + if inst.code != errno.ENOENT: + raise xs_errors.SROSError(114, "Failed to remove CephFS mount point") + + @override + def vdi(self, uuid, loadLocked=False) -> VDI.VDI: + return CephFSFileVDI(self, uuid) + + @staticmethod + def _is_ceph_available(): + return util.find_executable('ceph') + +class CephFSFileVDI(FileSR.FileVDI): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: + if not hasattr(self, 'xenstore_data'): + self.xenstore_data = {} + + self.xenstore_data['storage-type'] = CephFSSR.DRIVER_TYPE + + return super(CephFSFileVDI, self).attach(sr_uuid, vdi_uuid) + + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: + util.SMlog("SMBFileVDI.generate_config") + if not util.pathexists(self.path): + raise xs_errors.XenError('VDIUnavailable') + resp = {'device_config': self.sr.dconf, + 'sr_uuid': sr_uuid, + 'vdi_uuid': vdi_uuid, + 'sr_sm_config': self.sr.sm_config, + 'command': 'vdi_attach_from_config'} + # Return the 'config' encoded within a normal XMLRPC response so that + # we can use the regular response/error parsing code. + config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") + return xmlrpc.client.dumps((config,), "", True) + + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: + try: + if not util.pathexists(self.sr.path): + return self.sr.attach(sr_uuid) + except: + util.logException("SMBFileVDI.attach_from_config") + raise xs_errors.XenError('SRUnavailable', + opterr='Unable to attach from config') + return '' + +if __name__ == '__main__': + SRCommand.run(CephFSSR, DRIVER_INFO) +else: + SR.registerSR(CephFSSR) diff --git a/drivers/DummySR.py b/drivers/DummySR.py index 44f571e50..f5674db02 100755 --- a/drivers/DummySR.py +++ b/drivers/DummySR.py @@ -17,6 +17,8 @@ # # DummySR: an example dummy SR for the SDK +from sm_typing import Dict, Optional, List, Tuple, override + import SR import VDI import SRCommand @@ -28,7 +30,7 @@ "VDI_ACTIVATE", "VDI_DEACTIVATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_INTRODUCE", "VDI_MIRROR"] -CONFIGURATION = [] +CONFIGURATION: List[Tuple[str, str]] = [] DRIVER_INFO = { 'name': 'dummy', @@ -47,35 +49,43 @@ class DummySR(SR.SR): """dummy storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: if type == TYPE: return True return False - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: self.sr_vditype = 'phy' - def content_type(self, sr_uuid): + @override + def content_type(self, sr_uuid) -> str: return super(DummySR, self).content_type(sr_uuid) - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: self._assertValues(['sr_uuid', 'args', 'host_ref', 'session_ref', 'device_config', 'command', 'sr_ref']) assert(len(self.srcmd.params['args']) == 1) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: self._assertValues(['sr_uuid', 'args', 'host_ref', 'session_ref', 'device_config', 'command', 'sr_ref']) assert(len(self.srcmd.params['args']) == 0) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: self._assertValues(['sr_uuid', 'args', 'host_ref', 'session_ref', 'device_config', 'command', 'sr_ref']) assert(len(self.srcmd.params['args']) == 0) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: self._assertValues(['sr_uuid', 'args', 'host_ref', 'session_ref', 'device_config', 'command', 'sr_ref']) assert(len(self.srcmd.params['args']) == 0) - def probe(self): + @override + def probe(self) -> str: # N.B. There are no SR references self._assertValues(['args', 'host_ref', 'session_ref', 'device_config', 'command']) assert(len(self.srcmd.params['args']) == 0) @@ -89,10 +99,12 @@ def probe(self): # Return the Probe XML return util.SRtoXML(SRlist) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return DummyVDI(self, uuid) - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: self._assertValues(['sr_uuid', 'args', 'host_ref', 'session_ref', 'device_config', 'command', 'sr_ref']) assert(len(self.srcmd.params['args']) == 0) @@ -106,7 +118,7 @@ def scan(self, sr_uuid): self.physical_size = 2000000000000 self.physical_utilisation = 0 self.virtual_allocation = 0 - return super(DummySR, self).scan(sr_uuid) + super(DummySR, self).scan(sr_uuid) def _assertValues(self, vals): for attr in vals: @@ -132,15 +144,17 @@ def _getallVDIrecords(self): class DummyVDI(VDI.VDI): - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: self.path = "/dev/null" # returned on attach self.uuid = vdi_uuid self.size = 0 self.utilisation = 0 self.location = vdi_uuid - self.sm_config = {} + self.sm_config: Dict[str, str] = {} - def create(self, sr_uuid, vdi_uuid, size): + @override + def create(self, sr_uuid, vdi_uuid, size) -> str: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_sm_config']) assert(len(self.sr.srcmd.params['args']) == 8) @@ -159,7 +173,8 @@ def create(self, sr_uuid, vdi_uuid, size): self.run_corner_cases_tests() return self.get_params() - def delete(self, sr_uuid, vdi_uuid): + @override + def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_ref', 'vdi_location', 'vdi_uuid']) assert(len(self.sr.srcmd.params['args']) == 0) @@ -168,7 +183,8 @@ def delete(self, sr_uuid, vdi_uuid): self.run_corner_cases_tests() self._db_forget() - def introduce(self, sr_uuid, vdi_uuid): + @override + def introduce(self, sr_uuid, vdi_uuid) -> str: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_sm_config', 'new_uuid']) assert(len(self.sr.srcmd.params['args']) == 0) self.vdi_sm_config = self.sr.srcmd.params['vdi_sm_config'] @@ -184,19 +200,22 @@ def introduce(self, sr_uuid, vdi_uuid): self.run_corner_cases_tests() return super(DummyVDI, self).get_params() - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_ref', 'vdi_location', 'vdi_uuid']) assert(len(self.sr.srcmd.params['args']) == 1) vdi = super(DummyVDI, self).attach(sr_uuid, vdi_uuid) self.run_corner_cases_tests() return vdi - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_ref', 'vdi_location', 'vdi_uuid']) self.run_corner_cases_tests() assert(len(self.sr.srcmd.params['args']) == 0) - def activate(self, sr_uuid, vdi_uuid): + @override + def activate(self, sr_uuid, vdi_uuid) -> Optional[Dict[str, str]]: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_ref', 'vdi_location', 'vdi_uuid']) assert(len(self.sr.srcmd.params['args']) == 1) self.vdi_ref = self.sr.srcmd.params['vdi_ref'] @@ -204,13 +223,16 @@ def activate(self, sr_uuid, vdi_uuid): self.run_corner_cases_tests() for key in self.other_config.keys(): util.SMlog("\tvdi_other_config: [%s:%s]" % (key, self.other_config[key])) + return None - def deactivate(self, sr_uuid, vdi_uuid): + @override + def deactivate(self, sr_uuid, vdi_uuid) -> None: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_ref', 'vdi_location', 'vdi_uuid']) self.run_corner_cases_tests() assert(len(self.sr.srcmd.params['args']) == 0) - def resize(self, sr_uuid, vdi_uuid, size): + @override + def resize(self, sr_uuid, vdi_uuid, size) -> str: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref', 'vdi_ref', 'vdi_location', 'vdi_uuid']) assert(len(self.sr.srcmd.params['args']) == 1) @@ -220,7 +242,8 @@ def resize(self, sr_uuid, vdi_uuid, size): self.run_corner_cases_tests() return super(DummyVDI, self).get_params() - def snapshot(self, sr_uuid, vdi_uuid): + @override + def snapshot(self, sr_uuid, vdi_uuid) -> str: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref']) assert(len(self.sr.srcmd.params['args']) == 0) @@ -234,7 +257,8 @@ def snapshot(self, sr_uuid, vdi_uuid): self.run_corner_cases_tests() return vdi.get_params() - def clone(self, sr_uuid, vdi_uuid): + @override + def clone(self, sr_uuid, vdi_uuid) -> str: self.sr._assertValues(['sr_uuid', 'args', 'host_ref', 'device_config', 'command', 'sr_ref']) assert(len(self.sr.srcmd.params['args']) == 0) diff --git a/drivers/EXTSR.py b/drivers/EXTSR.py index 61e88dcdc..30aa3832b 100755 --- a/drivers/EXTSR.py +++ b/drivers/EXTSR.py @@ -17,8 +17,11 @@ # # EXTSR: Based on local-file storage repository, mounts ext3 partition +from sm_typing import override + import SR from SR import deviceCheck +import VDI import SRCommand import FileSR import util @@ -57,11 +60,13 @@ class EXTSR(FileSR.FileSR): """EXT3 Local file storage repository""" - def handles(srtype): + @override + @staticmethod + def handles(srtype) -> bool: return srtype == 'ext' - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: self.ops_exclusive = FileSR.OPS_EXCLUSIVE self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) self.sr_vditype = SR.DEFAULT_TAP @@ -72,7 +77,8 @@ def load(self, sr_uuid): self.attached = self._checkmount() self.driver_config = DRIVER_CONFIG - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: super(EXTSR, self).delete(sr_uuid) # Check PVs match VG @@ -102,7 +108,8 @@ def delete(self, sr_uuid): raise xs_errors.XenError('LVMDelete', opterr='errno is %d' % inst.code) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: if not self._checkmount(): try: #Activate LV @@ -123,7 +130,7 @@ def attach(self, sr_uuid): 'LVMMount', opterr='FSCK failed on %s. Errno is %d' % (self.remotepath, inst.code)) - super(EXTSR, self).attach(sr_uuid, bind=False) + self.attach_and_bind(sr_uuid, bind=False) self.attached = True @@ -136,7 +143,8 @@ def attach(self, sr_uuid): for dev in self.dconf['device'].split(','): self.block_setscheduler(dev) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: super(EXTSR, self).detach(sr_uuid) try: # deactivate SR @@ -147,13 +155,15 @@ def detach(self, sr_uuid): 'LVMUnMount', opterr='lvm -an failed errno is %d' % inst.code) + @override @deviceCheck - def probe(self): + def probe(self) -> str: return lvutil.srlist_toxml(lvutil.scan_srlist(EXT_PREFIX, self.dconf['device']), EXT_PREFIX) + @override @deviceCheck - def create(self, sr_uuid, size): + def create(self, sr_uuid, size) -> None: if self._checkmount(): raise xs_errors.XenError('SRExists') @@ -214,12 +224,14 @@ def create(self, sr_uuid, size): self.session, self.sr_ref, scsiutil.devlist_to_serialstring(self.dconf['device'].split(','))) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return EXTFileVDI(self, uuid) class EXTFileVDI(FileSR.FileVDI): - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if not hasattr(self, 'xenstore_data'): self.xenstore_data = {} diff --git a/drivers/FileSR.py b/drivers/FileSR.py index 6ac841c90..f1d983a80 100755 --- a/drivers/FileSR.py +++ b/drivers/FileSR.py @@ -17,6 +17,8 @@ # # FileSR: local-file storage repository +from sm_typing import Dict, Optional, List, override + import SR import VDI import SRCommand @@ -36,7 +38,7 @@ import XenAPI # pylint: disable=import-error from constants import CBTLOG_TAG -geneology = {} +geneology: Dict[str, List[str]] = {} CAPABILITIES = ["SR_PROBE", "SR_UPDATE", \ "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", \ "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", @@ -71,9 +73,10 @@ class FileSR(SR.SR): SR_TYPE = "file" - def handles(srtype): + @override + @staticmethod + def handles(srtype) -> bool: return srtype == 'file' - handles = staticmethod(handles) def _check_o_direct(self): if self.sr_ref and self.session is not None: @@ -89,7 +92,8 @@ def __init__(self, srcmd, sr_uuid): SR.SR.__init__(self, srcmd, sr_uuid) self._check_o_direct() - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: self.ops_exclusive = OPS_EXCLUSIVE self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) self.sr_vditype = vhdutil.VDI_TYPE_VHD @@ -102,7 +106,8 @@ def load(self, sr_uuid): self.attached = False self.driver_config = DRIVER_CONFIG - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: """ Create the SR. The path must not already exist, or if it does, it must be empty. (This accounts for the case where the user has mounted a device onto a directory manually and want to use this as the @@ -124,7 +129,8 @@ def create(self, sr_uuid, size): except: raise xs_errors.XenError('FileSRCreate') - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: self.attach(sr_uuid) cleanup.gc_force(self.session, self.uuid) @@ -156,7 +162,11 @@ def delete(self, sr_uuid): raise xs_errors.XenError('FileSRDelete', \ opterr='error %d' % inst.code) - def attach(self, sr_uuid, bind=True): + @override + def attach(self, sr_uuid) -> None: + self.attach_and_bind(sr_uuid) + + def attach_and_bind(self, sr_uuid, bind=True) -> None: if not self._checkmount(): try: util.ioretry(lambda: util.makedirs(self.path, mode=0o700)) @@ -175,7 +185,8 @@ def attach(self, sr_uuid, bind=True): opterr='fail to mount FileSR. Errno is %s' % inst.code) self.attached = True - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: if self._checkmount(): try: util.SMlog("Aborting GC/coalesce") @@ -187,7 +198,8 @@ def detach(self, sr_uuid): raise xs_errors.XenError('SRInUse', opterr=str(e)) self.attached = False - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: if not self._checkmount(): raise xs_errors.XenError('SRUnavailable', \ opterr='no such directory %s' % self.path) @@ -221,7 +233,8 @@ def scan(self, sr_uuid): # default behaviour from here on super(FileSR, self).scan(sr_uuid) - def update(self, sr_uuid): + @override + def update(self, sr_uuid) -> None: if not self._checkmount(): raise xs_errors.XenError('SRUnavailable', \ opterr='no such directory %s' % self.path) @@ -234,10 +247,12 @@ def _update(self, sr_uuid, virt_alloc_delta): self.physical_utilisation = self._getutilisation() self._db_update() - def content_type(self, sr_uuid): + @override + def content_type(self, sr_uuid) -> str: return super(FileSR, self).content_type(sr_uuid) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return FileVDI(self, uuid) def added_vdi(self, vdi): @@ -247,7 +262,8 @@ def deleted_vdi(self, uuid): if uuid in self.vdis: del self.vdis[uuid] - def replay(self, uuid): + @override + def replay(self, uuid) -> None: try: file = open(self.path + "/filelog.txt", "r") data = file.readlines() @@ -387,7 +403,7 @@ def _isbind(self): st2 = os.stat(self.remotepath) return st1.st_dev == st2.st_dev and st1.st_ino == st2.st_ino - def _checkmount(self): + def _checkmount(self) -> bool: mount_path = self.path if self.handles("smb"): mount_path = self.mountpoint @@ -397,7 +413,7 @@ def _checkmount(self): util.pathexists(self.remotepath) and self._isbind())) # Override in SharedFileSR. - def _check_hardlinks(self): + def _check_hardlinks(self) -> bool: return True class FileVDI(VDI.VDI): @@ -440,7 +456,8 @@ def _find_path_with_retries(self, vdi_uuid, maxretry=5, period=2.0): return found - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: self.lock = self.sr.lock self.sr.srcmd.params['o_direct'] = self.sr.o_direct @@ -549,13 +566,15 @@ def load(self, vdi_uuid): raise xs_errors.XenError('VDILoad', \ opterr='Failed load VDI information %s' % self.path) - def update(self, sr_uuid, vdi_location): + @override + def update(self, sr_uuid, vdi_location) -> None: self.load(vdi_location) vdi_ref = self.sr.srcmd.params['vdi_ref'] self.sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref) self._db_update() - def create(self, sr_uuid, vdi_uuid, size): + @override + def create(self, sr_uuid, vdi_uuid, size) -> str: if util.ioretry(lambda: util.pathexists(self.path)): raise xs_errors.XenError('VDIExists') @@ -586,7 +605,8 @@ def create(self, sr_uuid, vdi_uuid, size): self.sr._update(self.sr.uuid, self.size) return super(FileVDI, self).get_params() - def delete(self, sr_uuid, vdi_uuid, data_only=False): + @override + def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: if not util.ioretry(lambda: util.pathexists(self.path)): return super(FileVDI, self).delete(sr_uuid, vdi_uuid, data_only) @@ -609,7 +629,8 @@ def delete(self, sr_uuid, vdi_uuid, data_only=False): self.sr._kickGC() return super(FileVDI, self).delete(sr_uuid, vdi_uuid, data_only) - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if self.path is None: self._find_path_with_retries(vdi_uuid) if not self._checkpath(self.path): @@ -633,10 +654,12 @@ def attach(self, sr_uuid, vdi_uuid): except util.CommandException as inst: raise xs_errors.XenError('VDILoad', opterr='error %d' % inst.code) - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: self.attached = False - def resize(self, sr_uuid, vdi_uuid, size): + @override + def resize(self, sr_uuid, vdi_uuid, size) -> str: if not self.exists: raise xs_errors.XenError('VDIUnavailable', \ opterr='VDI %s unavailable %s' % (vdi_uuid, self.path)) @@ -676,10 +699,12 @@ def resize(self, sr_uuid, vdi_uuid, size): super(FileVDI, self).resize_cbt(self.sr.uuid, self.uuid, self.size) return VDI.VDI.get_params(self) - def clone(self, sr_uuid, vdi_uuid): + @override + def clone(self, sr_uuid, vdi_uuid) -> str: return self._do_snapshot(sr_uuid, vdi_uuid, VDI.SNAPSHOT_DOUBLE) - def compose(self, sr_uuid, vdi1, vdi2): + @override + def compose(self, sr_uuid, vdi1, vdi2) -> None: if self.vdi_type != vhdutil.VDI_TYPE_VHD: raise xs_errors.XenError('Unimplemented') parent_fn = vdi1 + vhdutil.FILE_EXTN[vhdutil.VDI_TYPE_VHD] @@ -706,8 +731,9 @@ def reset_leaf(self, sr_uuid, vdi_uuid): vhdutil.killData(self.path) - def _do_snapshot(self, sr_uuid, vdi_uuid, snap_type, - _=False, secondary=None, cbtlog=None): + @override + def _do_snapshot(self, sr_uuid, vdi_uuid, snapType, + cloneOp=False, secondary=None, cbtlog=None) -> str: # If cbt enabled, save file consistency state if cbtlog is not None: if blktap2.VDI.tap_status(self.session, vdi_uuid): @@ -725,12 +751,13 @@ def _do_snapshot(self, sr_uuid, vdi_uuid, snap_type, if not blktap2.VDI.tap_pause(self.session, sr_uuid, vdi_uuid): raise util.SMException("failed to pause VDI %s" % vdi_uuid) try: - return self._snapshot(snap_type, cbtlog, consistency_state) + return self._snapshot(snapType, cbtlog, consistency_state) finally: self.disable_leaf_on_secondary(vdi_uuid, secondary=secondary) blktap2.VDI.tap_unpause(self.session, sr_uuid, vdi_uuid, secondary) - def _rename(self, src, dst): + @override + def _rename(self, src, dst) -> None: util.SMlog("FileVDI._rename %s to %s" % (src, dst)) util.ioretry(lambda: os.rename(src, dst)) @@ -914,7 +941,8 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None): ret_vdi = self return ret_vdi.get_params() - def get_params(self): + @override + def get_params(self) -> str: if not self._checkpath(self.path): raise xs_errors.XenError('VDIUnavailable', \ opterr='VDI %s unavailable %s' % (self.uuid, self.path)) @@ -994,7 +1022,8 @@ def extractUuid(path): return uuid extractUuid = staticmethod(extractUuid) - def generate_config(self, sr_uuid, vdi_uuid): + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: """ Generate the XML config required to attach and activate a VDI for use when XAPI is not running. Attach and @@ -1013,7 +1042,8 @@ def generate_config(self, sr_uuid, vdi_uuid): config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") return xmlrpc.client.dumps((config, ), "", True) - def attach_from_config(self, sr_uuid, vdi_uuid): + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: """ Attach and activate a VDI using config generated by vdi_generate_config above. This is used for cases such as @@ -1022,15 +1052,17 @@ def attach_from_config(self, sr_uuid, vdi_uuid): util.SMlog("FileVDI.attach_from_config") try: if not util.pathexists(self.sr.path): - self.sr.attach(sr_uuid) + return self.sr.attach(sr_uuid) except: util.logException("FileVDI.attach_from_config") raise xs_errors.XenError( 'SRUnavailable', opterr='Unable to attach from config' ) + return '' - def _create_cbt_log(self): + @override + def _create_cbt_log(self) -> str: # Create CBT log file # Name: .cbtlog #Handle if file already exists @@ -1039,7 +1071,8 @@ def _create_cbt_log(self): open_file.close() return super(FileVDI, self)._create_cbt_log() - def _delete_cbt_log(self): + @override + def _delete_cbt_log(self) -> None: logPath = self._get_cbt_logpath(self.uuid) try: os.remove(logPath) @@ -1047,7 +1080,8 @@ def _delete_cbt_log(self): if e.errno != errno.ENOENT: raise - def _cbt_log_exists(self, logpath): + @override + def _cbt_log_exists(self, logpath) -> bool: return util.pathexists(logpath) @@ -1073,7 +1107,8 @@ def _check_writable(self): def _raise_hardlink_error(self): raise OSError(524, "Unknown error 524") - def _check_hardlinks(self): + @override + def _check_hardlinks(self) -> bool: hardlink_conf = self._read_hardlink_conf() if hardlink_conf is not None: return hardlink_conf @@ -1115,7 +1150,7 @@ def _check_hardlinks(self): def _get_hardlink_conf_path(self): return os.path.join(self.path, 'sm-hardlink.conf') - def _read_hardlink_conf(self): + def _read_hardlink_conf(self) -> Optional[bool]: try: with open(self._get_hardlink_conf_path(), 'r') as f: try: diff --git a/drivers/GlusterFSSR.py b/drivers/GlusterFSSR.py new file mode 100644 index 000000000..7b515dc81 --- /dev/null +++ b/drivers/GlusterFSSR.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python3 +# +# Original work copyright (C) Citrix systems +# Modified work copyright (C) Vates SAS and XCP-ng community +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; version 2.1 only. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +from sm_typing import override + +import errno +import os +import syslog as _syslog +import xmlrpc.client +from syslog import syslog + +# careful with the import order here +# FileSR has a circular dependency: FileSR- > blktap2 -> lvutil -> EXTSR -> FileSR +# importing in this order seems to avoid triggering the issue. +import SR +import SRCommand +import FileSR +# end of careful +import VDI +import cleanup +import util +import vhdutil +import xs_errors +from lock import Lock + +CAPABILITIES = ["SR_PROBE", "SR_UPDATE", + "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", + "VDI_UPDATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", + "VDI_GENERATE_CONFIG", + "VDI_RESET_ON_BOOT/2", "ATOMIC_PAUSE"] + +CONFIGURATION = [['server', 'Full path to share on gluster server (required, ex: "192.168.0.12:/gv0")'], + ['backupservers', 'list of servers separated by ":"'], + ['fetchattempts', 'number of attempts to fetch files before switching to the backup server'] + ] + +DRIVER_INFO = { + 'name': 'GlusterFS VHD', + 'description': 'SR plugin which stores disks as VHD files on a GlusterFS storage', + 'vendor': 'Vates SAS', + 'copyright': '(C) 2020 Vates SAS', + 'driver_version': '1.0', + 'required_api_version': '1.0', + 'capabilities': CAPABILITIES, + 'configuration': CONFIGURATION +} + +DRIVER_CONFIG = {"ATTACH_FROM_CONFIG_WITH_TAPDISK": True} + +# The mountpoint for the directory when performing an sr_probe. All probes +# are guaranteed to be serialised by xapi, so this single mountpoint is fine. +PROBE_MOUNTPOINT = os.path.join(SR.MOUNT_BASE, "probe") + + +class GlusterFSException(Exception): + def __init__(self, errstr): + self.errstr = errstr + + +# mountpoint = /var/run/sr-mount/GlusterFS//uuid +# linkpath = mountpoint/uuid - path to SR directory on share +# path = /var/run/sr-mount/uuid - symlink to SR directory on share +class GlusterFSSR(FileSR.FileSR): + """Gluster file-based storage repository""" + + DRIVER_TYPE = 'glusterfs' + + @override + @staticmethod + def handles(sr_type) -> bool: + # fudge, because the parent class (FileSR) checks for smb to alter its behavior + return sr_type == GlusterFSSR.DRIVER_TYPE or sr_type == 'smb' + + @override + def load(self, sr_uuid) -> None: + if not self._is_glusterfs_available(): + raise xs_errors.XenError( + 'SRUnavailable', + opterr='glusterfs is not installed' + ) + + self.ops_exclusive = FileSR.OPS_EXCLUSIVE + self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) + self.sr_vditype = SR.DEFAULT_TAP + self.driver_config = DRIVER_CONFIG + if 'server' not in self.dconf: + raise xs_errors.XenError('ConfigServerMissing') + # Can be None => on-slave plugin hack (is_open function). + self.remoteserver = self.dconf['server'] or '' + if self.sr_ref and self.session is not None: + self.sm_config = self.session.xenapi.SR.get_sm_config(self.sr_ref) + else: + self.sm_config = self.srcmd.params.get('sr_sm_config') or {} + self.mountpoint = os.path.join(SR.MOUNT_BASE, 'GlusterFS', self.remoteserver.split(':')[0], sr_uuid) + self.linkpath = os.path.join(self.mountpoint, sr_uuid or "") + self.path = os.path.join(SR.MOUNT_BASE, sr_uuid) + self._check_o_direct() + + def checkmount(self): + return util.ioretry(lambda: ((util.pathexists(self.mountpoint) and + util.ismount(self.mountpoint)) and + util.pathexists(self.linkpath))) + + def mount(self, mountpoint=None): + """Mount the remote gluster export at 'mountpoint'""" + if mountpoint is None: + mountpoint = self.mountpoint + elif not util.is_string(mountpoint) or mountpoint == "": + raise GlusterFSException("mountpoint not a string object") + + try: + if not util.ioretry(lambda: util.isdir(mountpoint)): + util.ioretry(lambda: util.makedirs(mountpoint)) + except util.CommandException as inst: + raise GlusterFSException("Failed to make directory: code is %d" % inst.code) + try: + options = [] + if 'backupservers' in self.dconf: + options.append('backup-volfile-servers=' + self.dconf['backupservers']) + if 'fetchattempts' in self.dconf: + options.append('fetch-attempts=' + self.dconf['fetchattempts']) + if options: + options = ['-o', ','.join(options)] + command = ["mount", '-t', 'glusterfs', self.remoteserver, mountpoint] + options + util.ioretry(lambda: util.pread(command), errlist=[errno.EPIPE, errno.EIO], maxretry=2, nofail=True) + except util.CommandException as inst: + syslog(_syslog.LOG_ERR, 'GlusterFS mount failed ' + inst.__str__()) + raise GlusterFSException("mount failed with return code %d" % inst.code) + + # Sanity check to ensure that the user has at least RO access to the + # mounted share. Windows sharing and security settings can be tricky. + try: + util.listdir(mountpoint) + except util.CommandException: + try: + self.unmount(mountpoint, True) + except GlusterFSException: + util.logException('GlusterFSSR.unmount()') + raise GlusterFSException("Permission denied. Please check user privileges.") + + def unmount(self, mountpoint, rmmountpoint): + try: + util.pread(["umount", mountpoint]) + except util.CommandException as inst: + raise GlusterFSException("umount failed with return code %d" % inst.code) + if rmmountpoint: + try: + os.rmdir(mountpoint) + except OSError as inst: + raise GlusterFSException("rmdir failed with error '%s'" % inst.strerror) + + @override + def attach(self, sr_uuid) -> None: + if not self.checkmount(): + try: + self.mount() + os.symlink(self.linkpath, self.path) + except GlusterFSException as exc: + raise xs_errors.SROSError(12, exc.errstr) + self.attached = True + + @override + def probe(self) -> str: + try: + self.mount(PROBE_MOUNTPOINT) + sr_list = filter(util.match_uuid, util.listdir(PROBE_MOUNTPOINT)) + self.unmount(PROBE_MOUNTPOINT, True) + except (util.CommandException, xs_errors.XenError): + raise + # Create a dictionary from the SR uuids to feed SRtoXML() + return util.SRtoXML({sr_uuid: {} for sr_uuid in sr_list}) + + @override + def detach(self, sr_uuid) -> None: + if not self.checkmount(): + return + util.SMlog("Aborting GC/coalesce") + cleanup.abort(self.uuid) + # Change directory to avoid unmount conflicts + os.chdir(SR.MOUNT_BASE) + self.unmount(self.mountpoint, True) + os.unlink(self.path) + self.attached = False + + @override + def create(self, sr_uuid, size) -> None: + if self.checkmount(): + raise xs_errors.SROSError(113, 'GlusterFS mount point already attached') + + try: + self.mount() + except GlusterFSException as exc: + # noinspection PyBroadException + try: + os.rmdir(self.mountpoint) + except: + # we have no recovery strategy + pass + raise xs_errors.SROSError(111, "GlusterFS mount error [opterr=%s]" % exc.errstr) + + if util.ioretry(lambda: util.pathexists(self.linkpath)): + if len(util.ioretry(lambda: util.listdir(self.linkpath))) != 0: + self.detach(sr_uuid) + raise xs_errors.XenError('SRExists') + else: + try: + util.ioretry(lambda: util.makedirs(self.linkpath)) + os.symlink(self.linkpath, self.path) + except util.CommandException as inst: + if inst.code != errno.EEXIST: + try: + self.unmount(self.mountpoint, True) + except GlusterFSException: + util.logException('GlusterFSSR.unmount()') + raise xs_errors.SROSError(116, + "Failed to create GlusterFS SR. remote directory creation error: {}".format( + os.strerror(inst.code))) + self.detach(sr_uuid) + + @override + def delete(self, sr_uuid) -> None: + # try to remove/delete non VDI contents first + super(GlusterFSSR, self).delete(sr_uuid) + try: + if self.checkmount(): + self.detach(sr_uuid) + self.mount() + if util.ioretry(lambda: util.pathexists(self.linkpath)): + util.ioretry(lambda: os.rmdir(self.linkpath)) + self.unmount(self.mountpoint, True) + except util.CommandException as inst: + self.detach(sr_uuid) + if inst.code != errno.ENOENT: + raise xs_errors.SROSError(114, "Failed to remove GlusterFS mount point") + + @override + def vdi(self, uuid, loadLocked=False) -> VDI.VDI: + return GlusterFSFileVDI(self, uuid) + + @staticmethod + def _is_glusterfs_available(): + return util.find_executable('glusterfs') + + +class GlusterFSFileVDI(FileSR.FileVDI): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: + if not hasattr(self, 'xenstore_data'): + self.xenstore_data = {} + + self.xenstore_data['storage-type'] = GlusterFSSR.DRIVER_TYPE + + return super(GlusterFSFileVDI, self).attach(sr_uuid, vdi_uuid) + + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: + util.SMlog("SMBFileVDI.generate_config") + if not util.pathexists(self.path): + raise xs_errors.XenError('VDIUnavailable') + resp = {'device_config': self.sr.dconf, + 'sr_uuid': sr_uuid, + 'vdi_uuid': vdi_uuid, + 'sr_sm_config': self.sr.sm_config, + 'command': 'vdi_attach_from_config'} + # Return the 'config' encoded within a normal XMLRPC response so that + # we can use the regular response/error parsing code. + config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") + return xmlrpc.client.dumps((config,), "", True) + + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: + try: + if not util.pathexists(self.sr.path): + return self.sr.attach(sr_uuid) + except: + util.logException("SMBFileVDI.attach_from_config") + raise xs_errors.XenError('SRUnavailable', + opterr='Unable to attach from config') + return '' + +if __name__ == '__main__': + SRCommand.run(GlusterFSSR, DRIVER_INFO) +else: + SR.registerSR(GlusterFSSR) diff --git a/drivers/HBASR.py b/drivers/HBASR.py index 66ca3ee99..1740f42ef 100755 --- a/drivers/HBASR.py +++ b/drivers/HBASR.py @@ -19,8 +19,11 @@ # hardware based iSCSI # +from sm_typing import Dict, List, override + import SR import SRCommand +import VDI import devscan import scsiutil import util @@ -50,20 +53,22 @@ class HBASR(SR.SR): """HBA storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: if type == "hba": return True return False - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: self.sr_vditype = 'phy' self.type = "any" if 'type' in self.dconf and self.dconf['type']: self.type = self.dconf['type'] self.attached = False self.procname = "" - self.devs = {} + self.devs: Dict[str, List[str]] = {} def _init_hbadict(self): if not hasattr(self, "hbas"): @@ -174,15 +179,18 @@ def _probe_hba(self): raise xs_errors.XenError('XMLParse', \ opterr='HBA probe failed') - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: self._mpathHandle() - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: if util._containsVDIinuse(self): return return - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: # Check whether an SR already exists SRs = self.session.xenapi.SR.get_all_records() for sr in SRs: @@ -211,11 +219,13 @@ def create(self, sr_uuid, size): self.sm_config['multipathable'] = 'true' self.session.xenapi.SR.set_sm_config(self.sr_ref, self.sm_config) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: self.detach(sr_uuid) return - def probe(self): + @override + def probe(self) -> str: self._init_hbadict() self.attach("") SRs = self.session.xenapi.SR.get_all_records() @@ -228,7 +238,8 @@ def probe(self): Recs[record["uuid"]] = sm_config return self.srlist_toxml(Recs) - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: self._init_hbadict() if not self.passthrough: if not self.attached: @@ -242,7 +253,7 @@ def scan(self, sr_uuid): if vdi.managed: self.physical_utilisation += vdi.size self.virtual_allocation = self.physical_utilisation - return super(HBASR, self).scan(sr_uuid) + super(HBASR, self).scan(sr_uuid) def print_devs(self): self.attach("") @@ -273,7 +284,8 @@ def _loadvdis(self): def _getLUNbySMconfig(self, sm_config): raise xs_errors.XenError('VDIUnavailable') - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return LUNperVDI.RAWVDI(self, uuid) def srlist_toxml(self, SRs): diff --git a/drivers/ISOSR.py b/drivers/ISOSR.py index f591d2371..401461c67 100755 --- a/drivers/ISOSR.py +++ b/drivers/ISOSR.py @@ -17,6 +17,8 @@ # # ISOSR: remote iso storage repository +from sm_typing import override + import SR import VDI import SRCommand @@ -152,7 +154,7 @@ class ISOSR(SR.SR): """Local file storage repository""" # Some helper functions: - def _checkmount(self): + def _checkmount(self) -> bool: """Checks that the mountpoint exists and is mounted""" if not util.pathexists(self.mountpoint): return False @@ -221,21 +223,24 @@ def _loadvdis(self): vdi.read_only = False # Now for the main functions: - def handles(type): + @override + @staticmethod + def handles(type) -> bool: """Do we handle this type?""" if type == TYPE: return True return False - handles = staticmethod(handles) - def content_type(self, sr_uuid): + @override + def content_type(self, sr_uuid) -> str: """Returns the content_type XML""" return super(ISOSR, self).content_type(sr_uuid) # pylint: disable=no-member vdi_path_regex = re.compile(r"[a-z0-9.-]+\.(iso|img)", re.I) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: """Create a VDI class. If the VDI does not exist, we determine here what its filename should be.""" @@ -267,7 +272,8 @@ def vdi(self, uuid): return ISOVDI(self, filename) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: """Initialises the SR""" # First of all, check we've got the correct keys in dconf if 'location' not in self.dconf: @@ -302,10 +308,12 @@ def load(self, sr_uuid): # Some info we need: self.sr_vditype = 'phy' - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: pass - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: """Std. attach""" # Very-Legacy mode means the ISOs are in the local fs - so no need to attach. if 'legacy_mode' in self.dconf: @@ -335,14 +343,15 @@ def attach(self, sr_uuid): util.makedirs(self.mountpoint) mountcmd = [] - options = '' + options = [] + nfs_options = '' if 'options' in self.dconf: options = self.dconf['options'].split(' ') if protocol == 'cifs': options = [x for x in options if x != ""] else: - options = self.getNFSOptions(options) + nfs_options = self.getNFSOptions(options) # SMB options are passed differently for create via # XC/xe sr-create and create via xe-mount-iso-sr @@ -392,7 +401,7 @@ def attach(self, sr_uuid): io_timeout = nfs.get_nfs_timeout(self.other_config) io_retrans = nfs.get_nfs_retrans(self.other_config) nfs.soft_mount(self.mountpoint, server, path, - transport, useroptions=options, nfsversion=self.nfsversion, + transport, useroptions=nfs_options, nfsversion=self.nfsversion, timeout=io_timeout, retrans=io_retrans) else: if self.smbversion in SMB_VERSION_3: @@ -474,8 +483,8 @@ def _check_nfs_server(self, location): except nfs.NfsException as e: raise xs_errors.XenError('NFSTarget', opterr=str(e.errstr)) - - def after_master_attach(self, uuid): + @override + def after_master_attach(self, uuid) -> None: """Perform actions required after attaching on the pool master Return: None @@ -567,10 +576,10 @@ def getCacheOptions(self): """Pass cache options to mount.cifs""" return "cache=none" - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: """Std. detach""" - # This handles legacy mode too, so no need to check - if not self._checkmount(): + if 'legacy_mode' in self.dconf or not self._checkmount(): return try: @@ -579,7 +588,8 @@ def detach(self, sr_uuid): raise xs_errors.XenError('NFSUnMount', \ opterr='error is %d' % inst.code) - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: """Scan: see _loadvdis""" if not util.isdir(self.path): raise xs_errors.XenError('SRUnavailable', \ @@ -660,9 +670,10 @@ def scan(self, sr_uuid): self.session.xenapi.VDI.remove_from_sm_config(vdi, 'xs-tools') else: - return super(ISOSR, self).scan(sr_uuid) + super(ISOSR, self).scan(sr_uuid) - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: self.attach(sr_uuid) if 'type' in self.dconf: smconfig = self.session.xenapi.SR.get_sm_config(self.sr_ref) @@ -681,9 +692,10 @@ def create(self, sr_uuid, size): self.detach(sr_uuid) - + class ISOVDI(VDI.VDI): - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: # Nb, in the vdi_create call, the filename is unset, so the following # will fail. self.vdi_type = "iso" @@ -725,17 +737,20 @@ def __init__(self, mysr, filename): self.sm_config['xs-tools-version'] = product_version self.sm_config['xs-tools-build'] = build_number - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: pass - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: try: os.stat(self.path) return super(ISOVDI, self).attach(sr_uuid, vdi_uuid) except: raise xs_errors.XenError('VDIMissing') - def create(self, sr_uuid, vdi_uuid, size): + @override + def create(self, sr_uuid, vdi_uuid, size) -> str: self.uuid = vdi_uuid self.path = os.path.join(self.sr.path, self.filename) self.size = size @@ -758,7 +773,8 @@ def create(self, sr_uuid, vdi_uuid, size): raise xs_errors.XenError('VDICreate', \ opterr='could not create file: "%s"' % self.path) - def delete(self, sr_uuid, vdi_uuid): + @override + def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: util.SMlog("Deleting...") self.uuid = vdi_uuid diff --git a/drivers/LUNperVDI.py b/drivers/LUNperVDI.py index 306d70414..070c67117 100755 --- a/drivers/LUNperVDI.py +++ b/drivers/LUNperVDI.py @@ -18,6 +18,8 @@ # LUNperVDI: Generic Raw LUN handler, used by HBASR and ISCSISR # +from sm_typing import override + import os import VDI import util @@ -28,7 +30,8 @@ class RAWVDI(VDI.VDI): - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: if not self.sr.attached: raise xs_errors.XenError('SRUnavailable') @@ -60,7 +63,8 @@ def _query(self, path, id): sm_config['backend-kind'] = 'vbd' self.sm_config = sm_config - def introduce(self, sr_uuid, vdi_uuid): + @override + def introduce(self, sr_uuid, vdi_uuid) -> str: self.sm_config = self.sr.srcmd.params['vdi_sm_config'] vdi_path = self.sr._getLUNbySMconfig(self.sm_config) self._query(vdi_path, self.sm_config['LUNid']) @@ -78,10 +82,12 @@ def introduce(self, sr_uuid, vdi_uuid): self.sr.vdis[vdi_uuid]._db_introduce() return super(RAWVDI, self).get_params() - def create(self, sr_uuid, vdi_uuid, size): + @override + def create(self, sr_uuid, vdi_uuid, size) -> str: VDIs = util._getVDIs(self.sr) self.sr._loadvdis() smallest = 0 + v = {} for vdi in VDIs: if not vdi['managed'] \ and int(vdi['virtual_size']) >= int(size) \ @@ -98,7 +104,8 @@ def create(self, sr_uuid, vdi_uuid, size): return super(RAWVDI, self.sr.vdis[v['uuid']]).get_params() raise xs_errors.XenError('SRNoSpace') - def delete(self, sr_uuid, vdi_uuid): + @override + def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: try: vdi = util._getVDI(self.sr, vdi_uuid) if not vdi['managed']: @@ -108,7 +115,8 @@ def delete(self, sr_uuid, vdi_uuid): except: pass - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: self.sr._loadvdis() if vdi_uuid not in self.sr.vdis: raise xs_errors.XenError('VDIUnavailable') @@ -126,7 +134,8 @@ def attach(self, sr_uuid, vdi_uuid): raise xs_errors.XenError('VDIUnavailable') return super(RAWVDI, self).attach(sr_uuid, vdi_uuid) - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: self.sr._loadvdis() if 'SCSIid' in self.sm_config: self.sr.mpathmodule.reset(self.sm_config['SCSIid'], True) # explicitly unmap diff --git a/drivers/LVHDSR.py b/drivers/LVHDSR.py index 6e0d71eba..52d257dfe 100755 --- a/drivers/LVHDSR.py +++ b/drivers/LVHDSR.py @@ -18,6 +18,8 @@ # LVHDSR: VHD on LVM storage repository # +from sm_typing import Dict, List, override + import SR from SR import deviceCheck import VDI @@ -54,7 +56,7 @@ from fairlock import Fairlock DEV_MAPPER_ROOT = os.path.join('/dev/mapper', lvhdutil.VG_PREFIX) -geneology = {} +geneology: Dict[str, List[str]] = {} CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "SR_TRIM", "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", "VDI_MIRROR", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "ATOMIC_PAUSE", @@ -133,7 +135,9 @@ class LVHDSR(SR.SR): legacyMode = True - def handles(type): + @override + @staticmethod + def handles(type) -> bool: """Returns True if this SR class understands the given dconf string""" # we can pose as LVMSR or EXTSR for compatibility purposes if __name__ == '__main__': @@ -145,9 +149,9 @@ def handles(type): elif name.endswith("EXTSR"): return type == "ext" return type == LVHDSR.DRIVER_TYPE - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: self.ops_exclusive = OPS_EXCLUSIVE self.isMaster = False @@ -161,14 +165,25 @@ def load(self, sr_uuid): self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgname) self.mdpath = os.path.join(self.path, self.MDVOLUME_NAME) self.provision = self.PROVISIONING_DEFAULT + + has_sr_ref = self.srcmd.params.get("sr_ref") + if has_sr_ref: + self.other_conf = self.session.xenapi.SR.get_other_config(self.sr_ref) + else: + self.other_conf = None + + self.lvm_conf = None + if self.other_conf: + self.lvm_conf = self.other_conf.get('lvm-conf') + try: - self.lvmCache = lvmcache.LVMCache(self.vgname) + self.lvmCache = lvmcache.LVMCache(self.vgname, self.lvm_conf) except: raise xs_errors.XenError('SRUnavailable', \ opterr='Failed to initialise the LVMCache') self.lvActivator = LVActivator(self.uuid, self.lvmCache) self.journaler = Journaler(self.lvmCache) - if not self.srcmd.params.get("sr_ref"): + if not has_sr_ref: return # must be a probe call # Test for thick vs thin provisioning conf parameter if 'allocation' in self.dconf: @@ -178,7 +193,6 @@ def load(self, sr_uuid): raise xs_errors.XenError('InvalidArg', \ opterr='Allocation parameter must be one of %s' % self.PROVISIONING_TYPES) - self.other_conf = self.session.xenapi.SR.get_other_config(self.sr_ref) if self.other_conf.get(self.TEST_MODE_KEY): self.testMode = self.other_conf[self.TEST_MODE_KEY] self._prepareTestMode() @@ -208,6 +222,7 @@ def load(self, sr_uuid): for key in self.lvmCache.lvs.keys(): # if the lvname has a uuid in it type = None + vdi = None if contains_uuid_regex.search(key) is not None: if key.startswith(lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD]): type = vhdutil.VDI_TYPE_VHD @@ -227,7 +242,8 @@ def load(self, sr_uuid): except: pass - def cleanup(self): + @override + def cleanup(self) -> None: # we don't need to hold the lock to dec refcounts of activated LVs if not self.lvActivator.deactivateAll(): raise util.SMException("failed to deactivate LVs") @@ -470,8 +486,9 @@ def _expand_size(self): util.logException("LVHDSR._expand_size for %s failed to resize" " the PV" % self.uuid) + @override @deviceCheck - def create(self, uuid, size): + def create(self, uuid, size) -> None: util.SMlog("LVHDSR.create for %s" % self.uuid) if not self.isMaster: util.SMlog('sr_create blocked for non-master') @@ -499,7 +516,8 @@ def create(self, uuid, size): self.session.xenapi.SR.add_to_sm_config(self.sr_ref, \ self.FLAG_USE_VHD, 'true') - def delete(self, uuid): + @override + def delete(self, uuid) -> None: util.SMlog("LVHDSR.delete for %s" % self.uuid) if not self.isMaster: raise xs_errors.XenError('LVMMaster') @@ -553,7 +571,8 @@ def delete(self, uuid): lvutil.removeVG(self.dconf['device'], self.vgname) self._cleanup() - def attach(self, uuid): + @override + def attach(self, uuid) -> None: util.SMlog("LVHDSR.attach for %s" % self.uuid) self._cleanup(True) # in case of host crashes, if detach wasn't called @@ -591,7 +610,8 @@ def attach(self, uuid): for dev in self.dconf['device'].split(','): self.block_setscheduler(dev) - def detach(self, uuid): + @override + def detach(self, uuid) -> None: util.SMlog("LVHDSR.detach for %s" % self.uuid) cleanup.abort(self.uuid) @@ -649,12 +669,14 @@ def detach(self, uuid): # only place to do so. self._cleanup(self.isMaster) - def forget_vdi(self, uuid): + @override + def forget_vdi(self, uuid) -> None: if not self.legacyMode: LVMMetadataHandler(self.mdpath).deleteVdiFromMetadata(uuid) super(LVHDSR, self).forget_vdi(uuid) - def scan(self, uuid): + @override + def scan(self, uuid) -> None: activated_lvs = set() try: util.SMlog("LVHDSR.scan for %s" % self.uuid) @@ -674,28 +696,28 @@ def scan(self, uuid): # Now check if there are any VDIs in the metadata, which are not in # XAPI if self.mdexists: - vdiToSnaps = {} + vdiToSnaps: Dict[str, List[str]] = {} # get VDIs from XAPI vdis = self.session.xenapi.SR.get_VDIs(self.sr_ref) vdi_uuids = set([]) for vdi in vdis: vdi_uuids.add(self.session.xenapi.VDI.get_uuid(vdi)) - Dict = LVMMetadataHandler(self.mdpath, False).getMetadata()[1] + info = LVMMetadataHandler(self.mdpath, False).getMetadata()[1] - for vdi in list(Dict.keys()): - vdi_uuid = Dict[vdi][UUID_TAG] - if bool(int(Dict[vdi][IS_A_SNAPSHOT_TAG])): - if Dict[vdi][SNAPSHOT_OF_TAG] in vdiToSnaps: - vdiToSnaps[Dict[vdi][SNAPSHOT_OF_TAG]].append(vdi_uuid) + for vdi in list(info.keys()): + vdi_uuid = info[vdi][UUID_TAG] + if bool(int(info[vdi][IS_A_SNAPSHOT_TAG])): + if info[vdi][SNAPSHOT_OF_TAG] in vdiToSnaps: + vdiToSnaps[info[vdi][SNAPSHOT_OF_TAG]].append(vdi_uuid) else: - vdiToSnaps[Dict[vdi][SNAPSHOT_OF_TAG]] = [vdi_uuid] + vdiToSnaps[info[vdi][SNAPSHOT_OF_TAG]] = [vdi_uuid] if vdi_uuid not in vdi_uuids: util.SMlog("Introduce VDI %s as it is present in " \ "metadata and not in XAPI." % vdi_uuid) sm_config = {} - sm_config['vdi_type'] = Dict[vdi][VDI_TYPE_TAG] + sm_config['vdi_type'] = info[vdi][VDI_TYPE_TAG] lvname = "%s%s" % \ (lvhdutil.LV_PREFIX[sm_config['vdi_type']], vdi_uuid) self.lvActivator.activate( @@ -703,7 +725,7 @@ def scan(self, uuid): activated_lvs.add(vdi_uuid) lvPath = os.path.join(self.path, lvname) - if Dict[vdi][VDI_TYPE_TAG] == vhdutil.VDI_TYPE_RAW: + if info[vdi][VDI_TYPE_TAG] == vhdutil.VDI_TYPE_RAW: size = self.lvmCache.getSize( \ lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + \ vdi_uuid) @@ -727,31 +749,31 @@ def scan(self, uuid): vdi_ref = self.session.xenapi.VDI.db_introduce( vdi_uuid, - Dict[vdi][NAME_LABEL_TAG], - Dict[vdi][NAME_DESCRIPTION_TAG], + info[vdi][NAME_LABEL_TAG], + info[vdi][NAME_DESCRIPTION_TAG], self.sr_ref, - Dict[vdi][TYPE_TAG], + info[vdi][TYPE_TAG], False, - bool(int(Dict[vdi][READ_ONLY_TAG])), + bool(int(info[vdi][READ_ONLY_TAG])), {}, vdi_uuid, {}, sm_config) self.session.xenapi.VDI.set_managed(vdi_ref, - bool(int(Dict[vdi][MANAGED_TAG]))) + bool(int(info[vdi][MANAGED_TAG]))) self.session.xenapi.VDI.set_virtual_size(vdi_ref, str(size)) self.session.xenapi.VDI.set_physical_utilisation( \ vdi_ref, str(utilisation)) self.session.xenapi.VDI.set_is_a_snapshot( \ - vdi_ref, bool(int(Dict[vdi][IS_A_SNAPSHOT_TAG]))) - if bool(int(Dict[vdi][IS_A_SNAPSHOT_TAG])): + vdi_ref, bool(int(info[vdi][IS_A_SNAPSHOT_TAG]))) + if bool(int(info[vdi][IS_A_SNAPSHOT_TAG])): self.session.xenapi.VDI.set_snapshot_time( \ - vdi_ref, DateTime(Dict[vdi][SNAPSHOT_TIME_TAG])) - if Dict[vdi][TYPE_TAG] == 'metadata': + vdi_ref, DateTime(info[vdi][SNAPSHOT_TIME_TAG])) + if info[vdi][TYPE_TAG] == 'metadata': self.session.xenapi.VDI.set_metadata_of_pool( \ - vdi_ref, Dict[vdi][METADATA_OF_POOL_TAG]) + vdi_ref, info[vdi][METADATA_OF_POOL_TAG]) # Update CBT status of disks either just added # or already in XAPI @@ -803,7 +825,8 @@ def scan(self, uuid): self.lvActivator.deactivate( vdi, LVActivator.NORMAL, False) - def update(self, uuid): + @override + def update(self, uuid) -> None: if not lvutil._checkVG(self.vgname): return self._updateStats(uuid, 0) @@ -832,15 +855,17 @@ def _updateStats(self, uuid, virtAllocDelta): self.physical_utilisation = stats['physical_utilisation'] self._db_update() + @override @deviceCheck - def probe(self): + def probe(self) -> str: return lvutil.srlist_toxml( lvutil.scan_srlist(lvhdutil.VG_PREFIX, self.dconf['device']), lvhdutil.VG_PREFIX, ('metadata' in self.srcmd.params['sr_sm_config'] and \ self.srcmd.params['sr_sm_config']['metadata'] == 'true')) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return LVHDVDI(self, uuid) def _loadvdis(self): @@ -973,8 +998,8 @@ def _undoCloneOp(self, lvs, origUuid, baseUuid, clonUuid): self.lvActivator.activate(baseUuid, base.name, False) origRefcountNormal = 1 vhdInfo = vhdutil.getVHDInfo(basePath, lvhdutil.extractUuid, False) - if base.vdiType == vhdutil.VDI_TYPE_VHD and vhdInfo.hidden: - vhdutil.setHidden(basePath, False) + if vhdInfo.hidden: + vhdutil.setHidden(basePath, False) elif base.vdiType == vhdutil.VDI_TYPE_RAW and base.hidden: self.lvmCache.setHidden(base.name, False) @@ -1297,12 +1322,13 @@ def ensureCBTSpace(self): # Ensure we have space for at least one LV self._ensureSpaceAvailable(self.journaler.LV_SIZE) - + class LVHDVDI(VDI.VDI): JRN_CLONE = "clone" # journal entry type for the clone operation - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: self.lock = self.sr.lock self.lvActivator = self.sr.lvActivator self.loaded = False @@ -1342,7 +1368,8 @@ def load(self, vdi_uuid): self.lvname = "%s%s" % (lvhdutil.LV_PREFIX[self.vdi_type], vdi_uuid) self.path = os.path.join(self.sr.path, self.lvname) - def create(self, sr_uuid, vdi_uuid, size): + @override + def create(self, sr_uuid, vdi_uuid, size) -> str: util.SMlog("LVHDVDI.create for %s" % self.uuid) if not self.sr.isMaster: raise xs_errors.XenError('LVMMaster') @@ -1406,7 +1433,8 @@ def create(self, sr_uuid, vdi_uuid, size): return VDI.VDI.get_params(self) - def delete(self, sr_uuid, vdi_uuid, data_only=False): + @override + def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: util.SMlog("LVHDVDI.delete for %s" % self.uuid) try: self._loadThis() @@ -1451,7 +1479,8 @@ def delete(self, sr_uuid, vdi_uuid, data_only=False): self.sr._kickGC() return super(LVHDVDI, self).delete(sr_uuid, vdi_uuid, data_only) - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: util.SMlog("LVHDVDI.attach for %s" % self.uuid) if self.sr.journaler.hasJournals(self.uuid): raise xs_errors.XenError('VDIUnavailable', @@ -1481,7 +1510,8 @@ def attach(self, sr_uuid, vdi_uuid): if not self.sr.lvActivator.deactivateAll(): util.SMlog("Failed to deactivate LVs back (%s)" % self.uuid) - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: util.SMlog("LVHDVDI.detach for %s" % self.uuid) self._loadThis() already_deflated = (self.utilisation < \ @@ -1512,7 +1542,8 @@ def detach(self, sr_uuid, vdi_uuid): raise xs_errors.XenError("SMGeneral", opterr="deactivation") # We only support offline resize - def resize(self, sr_uuid, vdi_uuid, size): + @override + def resize(self, sr_uuid, vdi_uuid, size) -> str: util.SMlog("LVHDVDI.resize for %s" % self.uuid) if not self.sr.isMaster: raise xs_errors.XenError('LVMMaster') @@ -1565,11 +1596,13 @@ def resize(self, sr_uuid, vdi_uuid, size): super(LVHDVDI, self).resize_cbt(self.sr.uuid, self.uuid, self.size) return VDI.VDI.get_params(self) - def clone(self, sr_uuid, vdi_uuid): + @override + def clone(self, sr_uuid, vdi_uuid) -> str: return self._do_snapshot( sr_uuid, vdi_uuid, VDI.SNAPSHOT_DOUBLE, cloneOp=True) - def compose(self, sr_uuid, vdi1, vdi2): + @override + def compose(self, sr_uuid, vdi1, vdi2) -> None: util.SMlog("LVHDSR.compose for %s -> %s" % (vdi2, vdi1)) if self.vdi_type != vhdutil.VDI_TYPE_VHD: raise xs_errors.XenError('Unimplemented') @@ -1629,8 +1662,9 @@ def _detach(self): self._chainSetActive(False, True) self.attached = False + @override def _do_snapshot(self, sr_uuid, vdi_uuid, snapType, - cloneOp=False, secondary=None, cbtlog=None): + cloneOp=False, secondary=None, cbtlog=None) -> str: # If cbt enabled, save file consistency state if cbtlog is not None: if blktap2.VDI.tap_status(self.session, vdi_uuid): @@ -2160,7 +2194,8 @@ def _prepareThin(self, attach): self.session.xenapi.SR.set_physical_utilisation(self.sr.sr_ref, str(sr_utilisation)) - def update(self, sr_uuid, vdi_uuid): + @override + def update(self, sr_uuid, vdi_uuid) -> None: if self.sr.legacyMode: return @@ -2180,28 +2215,33 @@ def update(self, sr_uuid, vdi_uuid): self.session.xenapi.VDI.get_metadata_of_pool(vdi_ref) LVMMetadataHandler(self.sr.mdpath).updateMetadata(update_map) - def _ensure_cbt_space(self): + @override + def _ensure_cbt_space(self) -> None: self.sr.ensureCBTSpace() - def _create_cbt_log(self): + @override + def _create_cbt_log(self) -> str: logname = self._get_cbt_logname(self.uuid) self.sr.lvmCache.create(logname, self.sr.journaler.LV_SIZE, CBTLOG_TAG) logpath = super(LVHDVDI, self)._create_cbt_log() self.sr.lvmCache.deactivateNoRefcount(logname) return logpath - def _delete_cbt_log(self): + @override + def _delete_cbt_log(self) -> None: logpath = self._get_cbt_logpath(self.uuid) if self._cbt_log_exists(logpath): logname = self._get_cbt_logname(self.uuid) self.sr.lvmCache.remove(logname) - def _rename(self, oldpath, newpath): + @override + def _rename(self, oldpath, newpath) -> None: oldname = os.path.basename(oldpath) newname = os.path.basename(newpath) self.sr.lvmCache.rename(oldname, newname) - def _activate_cbt_log(self, lv_name): + @override + def _activate_cbt_log(self, lv_name) -> bool: self.sr.lvmCache.refresh() if not self.sr.lvmCache.is_active(lv_name): try: @@ -2214,14 +2254,16 @@ def _activate_cbt_log(self, lv_name): else: return False - def _deactivate_cbt_log(self, lv_name): + @override + def _deactivate_cbt_log(self, lv_name) -> None: try: self.sr.lvmCache.deactivateNoRefcount(lv_name) except Exception as e: util.SMlog("Exception in _deactivate_cbt_log, Error: %s." % str(e)) raise - def _cbt_log_exists(self, logpath): + @override + def _cbt_log_exists(self, logpath) -> bool: return lvutil.exists(logpath) if __name__ == '__main__': diff --git a/drivers/LVHDoFCoESR.py b/drivers/LVHDoFCoESR.py index 766d83f4d..11bf298ed 100755 --- a/drivers/LVHDoFCoESR.py +++ b/drivers/LVHDoFCoESR.py @@ -18,7 +18,10 @@ # LVHDoFCoESR: LVHD over Fibre Channel over Ethernet driver # +from sm_typing import override + import SR +import VDI import LVHDoHBASR import LVHDSR import SRCommand @@ -54,7 +57,9 @@ class LVHDoFCoESR(LVHDoHBASR.LVHDoHBASR): """LVHD over FCoE storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: if __name__ == '__main__': name = sys.argv[0] else: @@ -64,9 +69,9 @@ def handles(type): if type == "lvhdofcoe": return True return False - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: driver = SR.driver('hba') if 'type' not in self.original_srcmd.params['device_config'] or \ 'type' in self.original_srcmd.params['device_config'] and \ @@ -86,7 +91,8 @@ def load(self, sr_uuid): self.SCSIid = self.dconf['SCSIid'] LVHDSR.LVHDSR.load(self, sr_uuid) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return LVHDoFCoEVDI(self, uuid) diff --git a/drivers/LVHDoHBASR.py b/drivers/LVHDoHBASR.py index 1eb8fb635..784512733 100755 --- a/drivers/LVHDoHBASR.py +++ b/drivers/LVHDoHBASR.py @@ -19,9 +19,12 @@ # hardware based iSCSI # +from sm_typing import override + import SR import LVHDSR import SRCommand +import VDI import lvutil import HBASR import os @@ -58,7 +61,9 @@ class LVHDoHBASR(LVHDSR.LVHDSR): """LVHD over HBA storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: if __name__ == '__main__': name = sys.argv[0] else: @@ -68,9 +73,9 @@ def handles(type): if type == "lvhdohba": return True return False - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: driver = SR.driver('hba') self.hbasr = driver(self.original_srcmd, sr_uuid) @@ -108,7 +113,8 @@ def load(self, sr_uuid): self.SCSIid = self.dconf['SCSIid'] super(LVHDoHBASR, self).load(sr_uuid) - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: self.hbasr.attach(sr_uuid) if self.mpath == "true": self.mpathmodule.refresh(self.SCSIid, 0) @@ -121,7 +127,8 @@ def create(self, sr_uuid, size): util.remove_mpathcount_field(self.session, self.host_ref, \ self.sr_ref, self.SCSIid) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: self.hbasr.attach(sr_uuid) if self.mpath == "true": self.mpathmodule.refresh(self.SCSIid, 0) @@ -140,7 +147,8 @@ def attach(self, sr_uuid): LVHDSR.LVHDSR.attach(self, sr_uuid) self._setMultipathableFlag(SCSIid=self.SCSIid) - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: # During a reboot, scan is called ahead of attach, which causes the MGT # to point of the wrong device instead of dm-x. Running multipathing will # take care of this scenario. @@ -154,7 +162,8 @@ def scan(self, sr_uuid): self._pathrefresh(LVHDoHBASR) LVHDSR.LVHDSR.scan(self, sr_uuid) - def probe(self): + @override + def probe(self) -> str: if self.mpath == "true" and 'SCSIid' in self.dconf: # When multipathing is enabled, since we don't refcount the multipath maps, # we should not attempt to do the iscsi.attach/detach when the map is already present, @@ -181,7 +190,8 @@ def probe(self): self.mpathmodule.reset(self.SCSIid, explicit_unmap=True) raise - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: LVHDSR.LVHDSR.detach(self, sr_uuid) self.mpathmodule.reset(self.SCSIid, explicit_unmap=True) try: @@ -205,7 +215,8 @@ def _remove_device_nodes(self): (os.path.basename(node)), 'w') as f: f.write('1\n') - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: self._pathrefresh(LVHDoHBASR) try: LVHDSR.LVHDSR.delete(self, sr_uuid) @@ -214,12 +225,14 @@ def delete(self, sr_uuid): self.mpathmodule.reset(self.SCSIid, explicit_unmap=True) self._remove_device_nodes() - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return LVHDoHBAVDI(self, uuid) class LVHDoHBAVDI(LVHDSR.LVHDVDI): - def generate_config(self, sr_uuid, vdi_uuid): + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("LVHDoHBAVDI.generate_config") if not lvutil._checkLV(self.path): raise xs_errors.XenError('VDIUnavailable') @@ -235,7 +248,8 @@ def generate_config(self, sr_uuid, vdi_uuid): config = xmlrpc.client.dumps(tuple([dict]), "vdi_attach_from_config") return xmlrpc.client.dumps((config, ), "", True) - def attach_from_config(self, sr_uuid, vdi_uuid): + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("LVHDoHBAVDI.attach_from_config") self.sr.hbasr.attach(sr_uuid) if self.sr.mpath == "true": diff --git a/drivers/LVHDoISCSISR.py b/drivers/LVHDoISCSISR.py index 442ec30f7..799e89d65 100755 --- a/drivers/LVHDoISCSISR.py +++ b/drivers/LVHDoISCSISR.py @@ -18,7 +18,10 @@ # LVHDoISCSISR: LVHD over ISCSI software initiator SR driver # +from sm_typing import override + import SR +import VDI import LVHDSR import BaseISCSI import SRCommand @@ -71,7 +74,9 @@ class LVHDoISCSISR(LVHDSR.LVHDSR): """LVHD over ISCSI storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: if __name__ == '__main__': name = sys.argv[0] else: @@ -81,9 +86,9 @@ def handles(type): if type == "lvhdoiscsi": return True return False - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: if not sr_uuid: # This is a probe call, generate a temp sr_uuid sr_uuid = util.gen_uuid() @@ -426,7 +431,8 @@ def _LUNprint(self, sr_uuid): self.iscsi.print_LUNs() self.iscsi.detach(sr_uuid) - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: # Check SCSIid not already in use by other PBDs if util.test_SCSIid(self.session, sr_uuid, self.SCSIid): raise xs_errors.XenError('SRInUse') @@ -441,13 +447,15 @@ def create(self, sr_uuid, size): raise xs_errors.XenError("SRUnavailable", opterr=inst) self.iscsi.detach(sr_uuid) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: self._pathrefresh(LVHDoISCSISR) LVHDSR.LVHDSR.delete(self, sr_uuid) for i in self.iscsiSRs: i.detach(sr_uuid) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: try: connected = False stored_exception = None @@ -485,12 +493,14 @@ def attach(self, sr_uuid): raise xs_errors.XenError("SRUnavailable", opterr=inst) self._setMultipathableFlag(SCSIid=self.SCSIid) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: LVHDSR.LVHDSR.detach(self, sr_uuid) for i in self.iscsiSRs: i.detach(sr_uuid) - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: self._pathrefresh(LVHDoISCSISR) if self.mpath == "true": for i in self.iscsiSRs: @@ -500,7 +510,8 @@ def scan(self, sr_uuid): util.SMlog("Connection failed for target %s, continuing.." % i.target) LVHDSR.LVHDSR.scan(self, sr_uuid) - def probe(self): + @override + def probe(self) -> str: self.uuid = util.gen_uuid() # When multipathing is enabled, since we don't refcount the multipath maps, @@ -523,7 +534,8 @@ def probe(self): self.iscsi.detach(self.uuid) return out - def check_sr(self, sr_uuid): + @override + def check_sr(self, sr_uuid) -> None: """Hook to check SR health""" pbdref = util.find_my_pbd(self.session, self.host_ref, self.sr_ref) if pbdref: @@ -536,12 +548,14 @@ def check_sr(self, sr_uuid): except xs_errors.SROSError: util.SMlog("Failed to attach iSCSI target") - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return LVHDoISCSIVDI(self, uuid) class LVHDoISCSIVDI(LVHDSR.LVHDVDI): - def generate_config(self, sr_uuid, vdi_uuid): + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("LVHDoISCSIVDI.generate_config") if not lvutil._checkLV(self.path): raise xs_errors.XenError('VDIUnavailable') @@ -562,7 +576,8 @@ def generate_config(self, sr_uuid, vdi_uuid): config = xmlrpc.client.dumps(tuple([dict]), "vdi_attach_from_config") return xmlrpc.client.dumps((config, ), "", True) - def attach_from_config(self, sr_uuid, vdi_uuid): + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("LVHDoISCSIVDI.attach_from_config") try: self.sr.iscsi.attach(sr_uuid) diff --git a/drivers/LargeBlockSR.py b/drivers/LargeBlockSR.py new file mode 100644 index 000000000..449b2cf1a --- /dev/null +++ b/drivers/LargeBlockSR.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2024 Vates SAS - damien.thenot@vates.tech +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from sm_typing import override + +import SR +from SR import deviceCheck +import SRCommand +import EXTSR +import util +import xs_errors +import os +import re +import lvutil + +CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "SR_SUPPORTS_LOCAL_CACHING", + "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", + "VDI_UPDATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", + "VDI_GENERATE_CONFIG", + "VDI_RESET_ON_BOOT/2", "ATOMIC_PAUSE", "VDI_CONFIG_CBT", + "VDI_ACTIVATE", "VDI_DEACTIVATE", "THIN_PROVISIONING", "VDI_READ_CACHING"] + +CONFIGURATION = [['device', 'local device path (required) (e.g. /dev/sda3)']] + +DRIVER_INFO = { + 'name': 'Large Block SR', + 'description': 'SR plugin which emulates a 512 bytes disk on top of a 4KiB device then create a EXT SR', + 'vendor': 'Vates', + 'copyright': '(C) 2024 Vates', + 'driver_version': '1.0', + 'required_api_version': '1.0', + 'capabilities': CAPABILITIES, + 'configuration': CONFIGURATION +} + +LARGEBLOCK_PREFIX = "XSLocalLargeBlock-" + +class LargeBlockSR(EXTSR.EXTSR): + """Emulating 512b drives for EXT storage repository""" + + DRIVER_TYPE = "largeblock" + LOOP_SECTOR_SIZE = 512 + + @override + @staticmethod + def handles(srtype) -> bool: + return srtype == LargeBlockSR.DRIVER_TYPE + + @override + def load(self, sr_uuid) -> None: + super(LargeBlockSR, self).load(sr_uuid) + self.is_deleting = False + self.vgname = LARGEBLOCK_PREFIX + sr_uuid + self.remotepath = os.path.join("/dev", self.vgname, sr_uuid) + + @override + def attach(self, sr_uuid) -> None: + if not self.is_deleting: + vg_device = self._get_device() + self.dconf["device"] = ",".join(vg_device) + self._create_emulated_device() + if not self._is_vg_connection_correct(): # Check if we need to redo the connection by parsing `vgs -o vg_name,devices self.vgname` + self._redo_vg_connection() # Call redo VG connection to connect it correctly to the loop device instead of the real 4KiB block device + super(LargeBlockSR, self).attach(sr_uuid) + + @override + def detach(self, sr_uuid) -> None: + if not self.is_deleting: + vg_device = self._get_device() + self.dconf["device"] = ",".join(vg_device) + super(LargeBlockSR, self).detach(sr_uuid) + if not self.is_deleting: + self._destroy_emulated_device() + + @override + @deviceCheck + def create(self, sr_uuid, size) -> None: + base_devices = self.dconf["device"].split(",") + if len(base_devices) > 1: + raise xs_errors.XenError("ConfigDeviceInvalid", opterr="Multiple devices configuration is not supported") + + for dev in base_devices: + logical_blocksize = util.pread2(["blockdev", "--getss", dev]).strip() + if logical_blocksize == "512": + raise xs_errors.XenError("LargeBlockIncorrectBlocksize", opterr="The logical blocksize of the device {} is compatible with normal SR types".format(dev)) + + try: + self._create_emulated_device() + super(LargeBlockSR, self).create(sr_uuid, size) + finally: + self._destroy_emulated_device(base_devices) + + @override + def delete(self, sr_uuid) -> None: + base_devices = self._get_device() + self.dconf["device"] = ",".join(self._get_loopdev_from_device(base_devices)) + + self.is_deleting = True + try: + super(LargeBlockSR, self).delete(sr_uuid) + except xs_errors.SROSError: + # In case, the lvremove doesn't like the loop device, it will throw an error. + # We need to remove the device ourselves using the real device in this case. + for dev in base_devices: + util.pread2(["pvremove", dev]) + finally: + self._destroy_emulated_device(base_devices) + self.is_deleting = False + + @override + @deviceCheck + def probe(self) -> str: + # We override EXTSR.probe because it uses EXT_PREFIX in this call + return lvutil.srlist_toxml( + lvutil.scan_srlist(LARGEBLOCK_PREFIX, self.dconf['device']), + LARGEBLOCK_PREFIX + ) + + def _create_loopdev(self, dev, emulated_path): + cmd = ["losetup", "-f", "-v", "--show", "--sector-size", str(self.LOOP_SECTOR_SIZE), dev] + loopdev = util.pread2(cmd).rstrip() + + if os.path.exists(emulated_path) and os.path.islink(emulated_path): + os.unlink(emulated_path) + + try: + os.symlink(loopdev, emulated_path) + except OSError: + raise xs_errors.XenError("LargeBlockSymlinkExist", opterr="Symlink {} couldn't be created".format(emulated_path)) + + def _delete_loopdev(self, dev, emulated_path): + if os.path.exists(emulated_path) and os.path.islink(emulated_path): + os.unlink(emulated_path) + + # The backing file isn't a symlink if given by ID in device-config but the real device + dev = os.path.realpath(dev) + loopdevs = self._get_loopdev_from_device(dev) + + if loopdevs != None: + try: + for lp in loopdevs: + cmd = ["losetup", "-d", lp] # Remove the loop device + util.pread2(cmd) + except xs_errors.SROSError: + util.SMlog("Couldn't removed losetup devices: {}".format(loopdevs)) + else: + xs_errors.XenError("LargeBlockNoLosetup", opterr="Couldn't find loop device for {}".format(dev)) + + @staticmethod + def _get_loopdev_from_device(device): + lpdevs = [] + output = util.pread2(["losetup", "--list"]).rstrip() + if output: + for line in output.split("\n"): + line = line.split() + loopdev = line[0] + dev = line[5].strip() + if dev in device: + lpdevs.append(loopdev) + return lpdevs + + @staticmethod + def _get_device_from_loopdev(loopdevs): + devices = [] + output = util.pread2(["losetup", "--list"]).rstrip() + if output: + for line in output.split("\n"): + line = line.split() + lpdev = line[0] + dev = line[5] + if lpdev in loopdevs: + devices.append(dev) + return devices + + def _get_device_from_vg(self): + devices = [] + output = util.pread2(["vgs", "--noheadings", "-o", "vg_name,devices", self.vgname]).splitlines() + for line in output: + line = line.split() + dev = line[1].split("(")[0] + if os.path.islink(dev): + dev = os.path.realpath(dev) + devices.append(dev) + return devices + + def _get_device(self): + vg_device = self._get_device_from_vg() + for dev in vg_device: + if re.match(r"(.*\.512)|(/dev/loop[0-9]+)", dev): + lpdev = os.path.realpath(dev) + realdev = self._get_device_from_loopdev(lpdev)[0] + vg_device.remove(dev) + vg_device.append(realdev) + + return vg_device + + def _is_vg_connection_correct(self): + output = util.pread2(["vgs", "--noheadings", "-o", "vg_name,devices", self.vgname]).split() + output[1] = output[1].split("(")[0] + return bool(re.match(r"(.*\.512)|(/dev/loop[0-9]+)", output[1])) + + def _redo_vg_connection(self): + """ + In case of using a LargeBlockSR, the LVM scan at boot will find the LogicalVolume on the real block device. + And when the PBD is connecting, it will mount from the original device instead of the loop device since LVM prefers real devices it has seen first. + The PBD plug will succeed but then the SR will be accessed through the 4KiB device, returning to the erroneous behaviour on 4KiB device. + VM won't be able to run because vhd-util will fail to scan the VDI. + This function force the LogicalVolume to be mounted on top of our emulation layer by disabling the VolumeGroup and re-enabling while applying a filter. + """ + + util.SMlog("Reconnecting VG {} to use emulated device".format(self.vgname)) + try: + lvutil.setActiveVG(self.vgname, False) + lvutil.setActiveVG(self.vgname, True, config="devices{ global_filter = [ \"a|/dev/loop.*|\", \"r|.*|\" ] }") + except util.CommandException as e: + xs_errors.XenError("LargeBlockVGReconnectFailed", opterr="Failed to reconnect the VolumeGroup {}, error: {}".format(self.vgname, e)) + + + @classmethod + def _get_emulated_device_path(cls, dev): + return "{dev}.{bs}".format(dev=dev, bs=cls.LOOP_SECTOR_SIZE) + + def _create_emulated_device(self): + base_devices = self.dconf["device"].split(",") + emulated_devices = [] + for dev in base_devices: + emulated_path = self._get_emulated_device_path(dev) + self._create_loopdev(dev, emulated_path) + emulated_devices.append(emulated_path) + + emulated_devices = ",".join(emulated_devices) + self.dconf["device"] = emulated_devices + + def _destroy_emulated_device(self, devices=None): + if devices is None: + devices = self.dconf["device"].split(",") + + for dev in devices: + emulated_path = self._get_emulated_device_path(dev) + self._delete_loopdev(dev, emulated_path) + +if __name__ == '__main__': + SRCommand.run(LargeBlockSR, DRIVER_INFO) +else: + SR.registerSR(LargeBlockSR) diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py new file mode 100755 index 000000000..dbfda35ee --- /dev/null +++ b/drivers/LinstorSR.py @@ -0,0 +1,2851 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from sm_typing import Optional, override + +from constants import CBTLOG_TAG + +try: + from linstorjournaler import LinstorJournaler + from linstorvhdutil import LinstorVhdUtil + from linstorvolumemanager import get_controller_uri + from linstorvolumemanager import get_controller_node_name + from linstorvolumemanager import LinstorVolumeManager + from linstorvolumemanager import LinstorVolumeManagerError + from linstorvolumemanager import PERSISTENT_PREFIX + + LINSTOR_AVAILABLE = True +except ImportError: + PERSISTENT_PREFIX = 'unknown' + + LINSTOR_AVAILABLE = False + +from lock import Lock +import blktap2 +import cleanup +import errno +import functools +import lvutil +import os +import re +import scsiutil +import signal +import socket +import SR +import SRCommand +import subprocess +import sys +import time +import traceback +import util +import VDI +import vhdutil +import xml.etree.ElementTree as xml_parser +import xmlrpc.client +import xs_errors + +from srmetadata import \ + NAME_LABEL_TAG, NAME_DESCRIPTION_TAG, IS_A_SNAPSHOT_TAG, SNAPSHOT_OF_TAG, \ + TYPE_TAG, VDI_TYPE_TAG, READ_ONLY_TAG, SNAPSHOT_TIME_TAG, \ + METADATA_OF_POOL_TAG + +HIDDEN_TAG = 'hidden' + +XHA_CONFIG_PATH = '/etc/xensource/xhad.conf' + +FORK_LOG_DAEMON = '/opt/xensource/libexec/fork-log-daemon' + +# This flag can be disabled to debug the DRBD layer. +# When this config var is False, the HA can only be used under +# specific conditions: +# - Only one heartbeat diskless VDI is present in the pool. +# - The other hearbeat volumes must be diskful and limited to a maximum of 3. +USE_HTTP_NBD_SERVERS = True + +# Useful flag to trace calls using cProfile. +TRACE_PERFS = False + +# Enable/Disable VHD key hash support. +USE_KEY_HASH = False + +# Special volumes. +HA_VOLUME_NAME = PERSISTENT_PREFIX + 'ha-statefile' +REDO_LOG_VOLUME_NAME = PERSISTENT_PREFIX + 'redo-log' + +# ============================================================================== + +# TODO: Supports 'VDI_INTRODUCE', 'VDI_RESET_ON_BOOT/2', 'SR_TRIM', +# 'VDI_CONFIG_CBT', 'SR_PROBE' + +CAPABILITIES = [ + 'ATOMIC_PAUSE', + 'SR_UPDATE', + 'VDI_CREATE', + 'VDI_DELETE', + 'VDI_UPDATE', + 'VDI_ATTACH', + 'VDI_DETACH', + 'VDI_ACTIVATE', + 'VDI_DEACTIVATE', + 'VDI_CLONE', + 'VDI_MIRROR', + 'VDI_RESIZE', + 'VDI_SNAPSHOT', + 'VDI_GENERATE_CONFIG' +] + +CONFIGURATION = [ + ['group-name', 'LVM group name'], + ['redundancy', 'replication count'], + ['provisioning', '"thin" or "thick" are accepted (optional, defaults to thin)'], + ['monitor-db-quorum', 'disable controller when only one host is online (optional, defaults to true)'] +] + +DRIVER_INFO = { + 'name': 'LINSTOR resources on XCP-ng', + 'description': 'SR plugin which uses Linstor to manage VDIs', + 'vendor': 'Vates', + 'copyright': '(C) 2020 Vates', + 'driver_version': '1.0', + 'required_api_version': '1.0', + 'capabilities': CAPABILITIES, + 'configuration': CONFIGURATION +} + +DRIVER_CONFIG = {'ATTACH_FROM_CONFIG_WITH_TAPDISK': False} + +OPS_EXCLUSIVE = [ + 'sr_create', 'sr_delete', 'sr_attach', 'sr_detach', 'sr_scan', + 'sr_update', 'sr_probe', 'vdi_init', 'vdi_create', 'vdi_delete', + 'vdi_attach', 'vdi_detach', 'vdi_clone', 'vdi_snapshot', +] + +# ============================================================================== +# Misc helpers used by LinstorSR and linstor-thin plugin. +# ============================================================================== + + +def attach_thin(session, journaler, linstor, sr_uuid, vdi_uuid): + volume_metadata = linstor.get_volume_metadata(vdi_uuid) + image_type = volume_metadata.get(VDI_TYPE_TAG) + if image_type == vhdutil.VDI_TYPE_RAW: + return + + device_path = linstor.get_device_path(vdi_uuid) + + # If the virtual VHD size is lower than the LINSTOR volume size, + # there is nothing to do. + vhd_size = LinstorVhdUtil.compute_volume_size( + # TODO: Replace pylint comment with this feature when possible: + # https://github.com/PyCQA/pylint/pull/2926 + LinstorVhdUtil(session, linstor).get_size_virt(vdi_uuid), # pylint: disable = E1120 + image_type + ) + + volume_info = linstor.get_volume_info(vdi_uuid) + volume_size = volume_info.virtual_size + + if vhd_size > volume_size: + LinstorVhdUtil(session, linstor).inflate( + journaler, vdi_uuid, device_path, vhd_size, volume_size + ) + + +def detach_thin_impl(session, linstor, sr_uuid, vdi_uuid): + volume_metadata = linstor.get_volume_metadata(vdi_uuid) + image_type = volume_metadata.get(VDI_TYPE_TAG) + if image_type == vhdutil.VDI_TYPE_RAW: + return + + def check_vbd_count(): + vdi_ref = session.xenapi.VDI.get_by_uuid(vdi_uuid) + vbds = session.xenapi.VBD.get_all_records_where( + 'field "VDI" = "{}"'.format(vdi_ref) + ) + + num_plugged = 0 + for vbd_rec in vbds.values(): + if vbd_rec['currently_attached']: + num_plugged += 1 + if num_plugged > 1: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Cannot deflate VDI {}, already used by ' + 'at least 2 VBDs'.format(vdi_uuid) + ) + + # We can have multiple VBDs attached to a VDI during a VM-template clone. + # So we use a timeout to ensure that we can detach the volume properly. + util.retry(check_vbd_count, maxretry=10, period=1) + + device_path = linstor.get_device_path(vdi_uuid) + vhdutil_inst = LinstorVhdUtil(session, linstor) + new_volume_size = LinstorVolumeManager.round_up_volume_size( + # TODO: Replace pylint comment with this feature when possible: + # https://github.com/PyCQA/pylint/pull/2926 + vhdutil_inst.get_size_phys(vdi_uuid) # pylint: disable = E1120 + ) + + volume_info = linstor.get_volume_info(vdi_uuid) + old_volume_size = volume_info.virtual_size + vhdutil_inst.deflate(device_path, new_volume_size, old_volume_size) + + +def detach_thin(session, linstor, sr_uuid, vdi_uuid): + # This function must always return without errors. + # Otherwise it could cause errors in the XAPI regarding the state of the VDI. + # It's why we use this `try` block. + try: + detach_thin_impl(session, linstor, sr_uuid, vdi_uuid) + except Exception as e: + util.SMlog('Failed to detach properly VDI {}: {}'.format(vdi_uuid, e)) + + +def get_ips_from_xha_config_file(): + ips = dict() + host_id = None + try: + # Ensure there is no dirty read problem. + # For example if the HA is reloaded. + tree = util.retry( + lambda: xml_parser.parse(XHA_CONFIG_PATH), + maxretry=10, + period=1 + ) + except: + return (None, ips) + + def parse_host_nodes(ips, node): + current_id = None + current_ip = None + + for sub_node in node: + if sub_node.tag == 'IPaddress': + current_ip = sub_node.text + elif sub_node.tag == 'HostID': + current_id = sub_node.text + else: + continue + + if current_id and current_ip: + ips[current_id] = current_ip + return + util.SMlog('Ill-formed XHA file, missing IPaddress or/and HostID') + + def parse_common_config(ips, node): + for sub_node in node: + if sub_node.tag == 'host': + parse_host_nodes(ips, sub_node) + + def parse_local_config(ips, node): + for sub_node in node: + if sub_node.tag == 'localhost': + for host_node in sub_node: + if host_node.tag == 'HostID': + return host_node.text + + for node in tree.getroot(): + if node.tag == 'common-config': + parse_common_config(ips, node) + elif node.tag == 'local-config': + host_id = parse_local_config(ips, node) + else: + continue + + if ips and host_id: + break + + return (host_id and ips.get(host_id), ips) + + +def activate_lvm_group(group_name): + path = group_name.split('/') + assert path and len(path) <= 2 + try: + lvutil.setActiveVG(path[0], True) + except Exception as e: + util.SMlog('Cannot active VG `{}`: {}'.format(path[0], e)) + +# ============================================================================== + +# Usage example: +# xe sr-create type=linstor name-label=linstor-sr +# host-uuid=d2deba7a-c5ad-4de1-9a20-5c8df3343e93 +# device-config:group-name=vg_loop device-config:redundancy=2 + + +class LinstorSR(SR.SR): + DRIVER_TYPE = 'linstor' + + PROVISIONING_TYPES = ['thin', 'thick'] + PROVISIONING_DEFAULT = 'thin' + + MANAGER_PLUGIN = 'linstor-manager' + + INIT_STATUS_NOT_SET = 0 + INIT_STATUS_IN_PROGRESS = 1 + INIT_STATUS_OK = 2 + INIT_STATUS_FAIL = 3 + + # -------------------------------------------------------------------------- + # SR methods. + # -------------------------------------------------------------------------- + + @override + @staticmethod + def handles(type) -> bool: + return type == LinstorSR.DRIVER_TYPE + + @override + def load(self, sr_uuid) -> None: + if not LINSTOR_AVAILABLE: + raise util.SMException( + 'Can\'t load LinstorSR: LINSTOR libraries are missing' + ) + + # Check parameters. + if 'group-name' not in self.dconf or not self.dconf['group-name']: + raise xs_errors.XenError('LinstorConfigGroupNameMissing') + if 'redundancy' not in self.dconf or not self.dconf['redundancy']: + raise xs_errors.XenError('LinstorConfigRedundancyMissing') + + self.driver_config = DRIVER_CONFIG + + # Check provisioning config. + provisioning = self.dconf.get('provisioning') + if provisioning: + if provisioning in self.PROVISIONING_TYPES: + self._provisioning = provisioning + else: + raise xs_errors.XenError( + 'InvalidArg', + opterr='Provisioning parameter must be one of {}'.format( + self.PROVISIONING_TYPES + ) + ) + else: + self._provisioning = self.PROVISIONING_DEFAULT + + monitor_db_quorum = self.dconf.get('monitor-db-quorum') + self._monitor_db_quorum = (monitor_db_quorum is None) or \ + util.strtobool(monitor_db_quorum) + + # Note: We don't have access to the session field if the + # 'vdi_attach_from_config' command is executed. + self._has_session = self.sr_ref and self.session is not None + if self._has_session: + self.sm_config = self.session.xenapi.SR.get_sm_config(self.sr_ref) + else: + self.sm_config = self.srcmd.params.get('sr_sm_config') or {} + + provisioning = self.sm_config.get('provisioning') + if provisioning in self.PROVISIONING_TYPES: + self._provisioning = provisioning + + # Define properties for SR parent class. + self.ops_exclusive = OPS_EXCLUSIVE + self.path = LinstorVolumeManager.DEV_ROOT_PATH + self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) + self.sr_vditype = SR.DEFAULT_TAP + + if self.cmd == 'sr_create': + self._redundancy = int(self.dconf['redundancy']) or 1 + self._linstor = None # Ensure that LINSTOR attribute exists. + self._journaler = None + + self._group_name = self.dconf['group-name'] + + self._vdi_shared_time = 0 + + self._init_status = self.INIT_STATUS_NOT_SET + + self._vdis_loaded = False + self._all_volume_info_cache = None + self._all_volume_metadata_cache = None + + # To remove in python 3.10. + # Use directly @staticmethod instead. + @util.conditional_decorator(staticmethod, sys.version_info >= (3, 10, 0)) + def _locked_load(method): + def wrapped_method(self, *args, **kwargs): + self._init_status = self.INIT_STATUS_OK + return method(self, *args, **kwargs) + + def load(self, *args, **kwargs): + # Activate all LVMs to make drbd-reactor happy. + if self.srcmd.cmd in ('sr_attach', 'vdi_attach_from_config'): + activate_lvm_group(self._group_name) + + if not self._has_session: + if self.srcmd.cmd in ( + 'vdi_attach_from_config', + 'vdi_detach_from_config', + # When on-slave (is_open) is executed we have an + # empty command. + None + ): + def create_linstor(uri, attempt_count=30): + self._linstor = LinstorVolumeManager( + uri, + self._group_name, + logger=util.SMlog, + attempt_count=attempt_count + ) + # Only required if we are attaching from config using a non-special VDI. + # I.e. not an HA volume. + self._vhdutil = LinstorVhdUtil(self.session, self._linstor) + + controller_uri = get_controller_uri() + if controller_uri: + create_linstor(controller_uri) + else: + def connect(): + # We must have a valid LINSTOR instance here without using + # the XAPI. Fallback with the HA config file. + for ip in get_ips_from_xha_config_file()[1].values(): + controller_uri = 'linstor://' + ip + try: + util.SMlog('Connecting from config to LINSTOR controller using: {}'.format(ip)) + create_linstor(controller_uri, attempt_count=0) + return controller_uri + except: + pass + + controller_uri = util.retry(connect, maxretry=30, period=1) + if not controller_uri: + raise xs_errors.XenError( + 'SRUnavailable', + opterr='No valid controller URI to attach/detach from config' + ) + + self._journaler = LinstorJournaler( + controller_uri, self._group_name, logger=util.SMlog + ) + + if self.srcmd.cmd is None: + # Only useful on on-slave plugin (is_open). + self._vhdutil = LinstorVhdUtil(self.session, self._linstor) + + return wrapped_method(self, *args, **kwargs) + + if not self.is_master(): + if self.cmd in [ + 'sr_create', 'sr_delete', 'sr_update', 'sr_probe', + 'sr_scan', 'vdi_create', 'vdi_delete', 'vdi_resize', + 'vdi_snapshot', 'vdi_clone' + ]: + util.SMlog('{} blocked for non-master'.format(self.cmd)) + raise xs_errors.XenError('LinstorMaster') + + # Because the LINSTOR KV objects cache all values, we must lock + # the VDI before the LinstorJournaler/LinstorVolumeManager + # instantiation and before any action on the master to avoid a + # bad read. The lock is also necessary to avoid strange + # behaviors if the GC is executed during an action on a slave. + if self.cmd.startswith('vdi_'): + self._shared_lock_vdi(self.srcmd.params['vdi_uuid']) + self._vdi_shared_time = time.time() + + if self.srcmd.cmd != 'sr_create' and self.srcmd.cmd != 'sr_detach': + try: + self._reconnect() + except Exception as e: + raise xs_errors.XenError('SRUnavailable', opterr=str(e)) + + if self._linstor: + try: + hosts = self._linstor.disconnected_hosts + except Exception as e: + raise xs_errors.XenError('SRUnavailable', opterr=str(e)) + + if hosts: + util.SMlog('Failed to join node(s): {}'.format(hosts)) + + # Ensure we use a non-locked volume when vhdutil is called. + if ( + self.is_master() and self.cmd.startswith('vdi_') and + self.cmd != 'vdi_create' + ): + self._linstor.ensure_volume_is_not_locked( + self.srcmd.params['vdi_uuid'] + ) + + try: + # If the command is a SR scan command on the master, + # we must load all VDIs and clean journal transactions. + # We must load the VDIs in the snapshot case too only if + # there is at least one entry in the journal. + # + # If the command is a SR command we want at least to remove + # resourceless volumes. + if self.is_master() and self.cmd not in [ + 'vdi_attach', 'vdi_detach', + 'vdi_activate', 'vdi_deactivate', + 'vdi_epoch_begin', 'vdi_epoch_end', + 'vdi_update', 'vdi_destroy' + ]: + load_vdis = ( + self.cmd == 'sr_scan' or + self.cmd == 'sr_attach' + ) or len( + self._journaler.get_all(LinstorJournaler.INFLATE) + ) or len( + self._journaler.get_all(LinstorJournaler.CLONE) + ) + + if load_vdis: + self._load_vdis() + + self._linstor.remove_resourceless_volumes() + + self._synchronize_metadata() + except Exception as e: + if self.cmd == 'sr_scan' or self.cmd == 'sr_attach': + # Always raise, we don't want to remove VDIs + # from the XAPI database otherwise. + raise e + util.SMlog( + 'Ignoring exception in LinstorSR.load: {}'.format(e) + ) + util.SMlog(traceback.format_exc()) + + return wrapped_method(self, *args, **kwargs) + + @functools.wraps(wrapped_method) + def wrap(self, *args, **kwargs): + if self._init_status in \ + (self.INIT_STATUS_OK, self.INIT_STATUS_IN_PROGRESS): + return wrapped_method(self, *args, **kwargs) + if self._init_status == self.INIT_STATUS_FAIL: + util.SMlog( + 'Can\'t call method {} because initialization failed' + .format(method) + ) + else: + try: + self._init_status = self.INIT_STATUS_IN_PROGRESS + return load(self, *args, **kwargs) + except Exception: + if self._init_status != self.INIT_STATUS_OK: + self._init_status = self.INIT_STATUS_FAIL + raise + + return wrap + + @override + def cleanup(self) -> None: + if self._vdi_shared_time: + self._shared_lock_vdi(self.srcmd.params['vdi_uuid'], locked=False) + + @override + @_locked_load + def create(self, uuid, size) -> None: + util.SMlog('LinstorSR.create for {}'.format(self.uuid)) + + host_adresses = util.get_host_addresses(self.session) + if self._redundancy > len(host_adresses): + raise xs_errors.XenError( + 'LinstorSRCreate', + opterr='Redundancy greater than host count' + ) + + xenapi = self.session.xenapi + srs = xenapi.SR.get_all_records_where( + 'field "type" = "{}"'.format(self.DRIVER_TYPE) + ) + srs = dict([e for e in srs.items() if e[1]['uuid'] != self.uuid]) + + for sr in srs.values(): + for pbd in sr['PBDs']: + device_config = xenapi.PBD.get_device_config(pbd) + group_name = device_config.get('group-name') + if group_name and group_name == self._group_name: + raise xs_errors.XenError( + 'LinstorSRCreate', + opterr='group name must be unique, already used by PBD {}'.format( + xenapi.PBD.get_uuid(pbd) + ) + ) + + if srs: + raise xs_errors.XenError( + 'LinstorSRCreate', + opterr='LINSTOR SR must be unique in a pool' + ) + + online_hosts = util.get_enabled_hosts(self.session) + if len(online_hosts) < len(host_adresses): + raise xs_errors.XenError( + 'LinstorSRCreate', + opterr='Not enough online hosts' + ) + + ips = {} + for host_ref in online_hosts: + record = self.session.xenapi.host.get_record(host_ref) + hostname = record['hostname'] + ips[hostname] = record['address'] + + if len(ips) != len(online_hosts): + raise xs_errors.XenError( + 'LinstorSRCreate', + opterr='Multiple hosts with same hostname' + ) + + # Ensure ports are opened and LINSTOR satellites + # are activated. In the same time the drbd-reactor instances + # must be stopped. + self._prepare_sr_on_all_hosts(self._group_name, enabled=True) + + # Create SR. + # Throw if the SR already exists. + try: + self._linstor = LinstorVolumeManager.create_sr( + self._group_name, + ips, + self._redundancy, + thin_provisioning=self._provisioning == 'thin', + auto_quorum=self._monitor_db_quorum, + logger=util.SMlog + ) + self._vhdutil = LinstorVhdUtil(self.session, self._linstor) + + util.SMlog( + "Finishing SR creation, enable drbd-reactor on all hosts..." + ) + self._update_drbd_reactor_on_all_hosts(enabled=True) + except Exception as e: + if not self._linstor: + util.SMlog('Failed to create LINSTOR SR: {}'.format(e)) + raise xs_errors.XenError('LinstorSRCreate', opterr=str(e)) + + try: + self._linstor.destroy() + except Exception as e2: + util.SMlog( + 'Failed to destroy LINSTOR SR after creation fail: {}' + .format(e2) + ) + raise e + + @override + @_locked_load + def delete(self, uuid) -> None: + util.SMlog('LinstorSR.delete for {}'.format(self.uuid)) + cleanup.gc_force(self.session, self.uuid) + + assert self._linstor + if self.vdis or self._linstor._volumes: + raise xs_errors.XenError('SRNotEmpty') + + node_name = get_controller_node_name() + if not node_name: + raise xs_errors.XenError( + 'LinstorSRDelete', + opterr='Cannot get controller node name' + ) + + host_ref = None + if node_name == 'localhost': + host_ref = util.get_this_host_ref(self.session) + else: + for slave in util.get_all_slaves(self.session): + r_name = self.session.xenapi.host.get_record(slave)['hostname'] + if r_name == node_name: + host_ref = slave + break + + if not host_ref: + raise xs_errors.XenError( + 'LinstorSRDelete', + opterr='Failed to find host with hostname: {}'.format( + node_name + ) + ) + + try: + self._update_drbd_reactor_on_all_hosts( + controller_node_name=node_name, enabled=False + ) + + args = { + 'groupName': self._group_name, + } + self._exec_manager_command( + host_ref, 'destroy', args, 'LinstorSRDelete' + ) + except Exception as e: + try: + self._update_drbd_reactor_on_all_hosts( + controller_node_name=node_name, enabled=True + ) + except Exception as e2: + util.SMlog( + 'Failed to restart drbd-reactor after destroy fail: {}' + .format(e2) + ) + util.SMlog('Failed to delete LINSTOR SR: {}'.format(e)) + raise xs_errors.XenError( + 'LinstorSRDelete', + opterr=str(e) + ) + + Lock.cleanupAll(self.uuid) + + @override + @_locked_load + def update(self, uuid) -> None: + util.SMlog('LinstorSR.update for {}'.format(self.uuid)) + + # Well, how can we update a SR if it doesn't exist? :thinking: + if not self._linstor: + raise xs_errors.XenError( + 'SRUnavailable', + opterr='no such volume group: {}'.format(self._group_name) + ) + + self._update_stats(0) + + # Update the SR name and description only in LINSTOR metadata. + xenapi = self.session.xenapi + self._linstor.metadata = { + NAME_LABEL_TAG: util.to_plain_string( + xenapi.SR.get_name_label(self.sr_ref) + ), + NAME_DESCRIPTION_TAG: util.to_plain_string( + xenapi.SR.get_name_description(self.sr_ref) + ) + } + + @override + @_locked_load + def attach(self, uuid) -> None: + util.SMlog('LinstorSR.attach for {}'.format(self.uuid)) + + if not self._linstor: + raise xs_errors.XenError( + 'SRUnavailable', + opterr='no such group: {}'.format(self._group_name) + ) + + @override + @_locked_load + def detach(self, uuid) -> None: + util.SMlog('LinstorSR.detach for {}'.format(self.uuid)) + cleanup.abort(self.uuid) + + @override + @_locked_load + def probe(self) -> str: + util.SMlog('LinstorSR.probe for {}'.format(self.uuid)) + # TODO + return '' + + @override + @_locked_load + def scan(self, uuid) -> None: + if self._init_status == self.INIT_STATUS_FAIL: + return + + util.SMlog('LinstorSR.scan for {}'.format(self.uuid)) + if not self._linstor: + raise xs_errors.XenError( + 'SRUnavailable', + opterr='no such volume group: {}'.format(self._group_name) + ) + + # Note: `scan` can be called outside this module, so ensure the VDIs + # are loaded. + self._load_vdis() + self._update_physical_size() + + for vdi_uuid in list(self.vdis.keys()): + if self.vdis[vdi_uuid].deleted: + del self.vdis[vdi_uuid] + + # Security to prevent VDIs from being forgotten if the controller + # is started without a shared and mounted /var/lib/linstor path. + try: + self._linstor.get_database_path() + except Exception as e: + # Failed to get database path, ensure we don't have + # VDIs in the XAPI database... + if self.session.xenapi.SR.get_VDIs( + self.session.xenapi.SR.get_by_uuid(self.uuid) + ): + raise xs_errors.XenError( + 'SRUnavailable', + opterr='Database is not mounted or node name is invalid ({})'.format(e) + ) + + # Update the database before the restart of the GC to avoid + # bad sync in the process if new VDIs have been introduced. + super(LinstorSR, self).scan(self.uuid) + self._kick_gc() + + def is_master(self): + if not hasattr(self, '_is_master'): + if 'SRmaster' not in self.dconf: + self._is_master = self.session is not None and util.is_master(self.session) + else: + self._is_master = self.dconf['SRmaster'] == 'true' + + return self._is_master + + @override + @_locked_load + def vdi(self, uuid) -> VDI.VDI: + return LinstorVDI(self, uuid) + + # To remove in python 3.10 + # See: https://stackoverflow.com/questions/12718187/python-version-3-9-calling-class-staticmethod-within-the-class-body + _locked_load = staticmethod(_locked_load) + + # -------------------------------------------------------------------------- + # Lock. + # -------------------------------------------------------------------------- + + def _shared_lock_vdi(self, vdi_uuid, locked=True): + master = util.get_master_ref(self.session) + + command = 'lockVdi' + args = { + 'groupName': self._group_name, + 'srUuid': self.uuid, + 'vdiUuid': vdi_uuid, + 'locked': str(locked) + } + + # Note: We must avoid to unlock the volume if the timeout is reached + # because during volume unlock, the SR lock is not used. Otherwise + # we could destroy a valid lock acquired from another host... + # + # This code is not very clean, the ideal solution would be to acquire + # the SR lock during volume unlock (like lock) but it's not easy + # to implement without impacting performance. + if not locked: + elapsed_time = time.time() - self._vdi_shared_time + timeout = LinstorVolumeManager.LOCKED_EXPIRATION_DELAY * 0.7 + if elapsed_time >= timeout: + util.SMlog( + 'Avoid unlock call of {} because timeout has been reached' + .format(vdi_uuid) + ) + return + + self._exec_manager_command(master, command, args, 'VDIUnavailable') + + # -------------------------------------------------------------------------- + # Network. + # -------------------------------------------------------------------------- + + def _exec_manager_command(self, host_ref, command, args, error): + host_rec = self.session.xenapi.host.get_record(host_ref) + host_uuid = host_rec['uuid'] + + try: + ret = self.session.xenapi.host.call_plugin( + host_ref, self.MANAGER_PLUGIN, command, args + ) + except Exception as e: + util.SMlog( + 'call-plugin on {} ({}:{} with {}) raised'.format( + host_uuid, self.MANAGER_PLUGIN, command, args + ) + ) + raise e + + util.SMlog( + 'call-plugin on {} ({}:{} with {}) returned: {}'.format( + host_uuid, self.MANAGER_PLUGIN, command, args, ret + ) + ) + if ret == 'False': + raise xs_errors.XenError( + error, + opterr='Plugin {} failed'.format(self.MANAGER_PLUGIN) + ) + + def _prepare_sr(self, host, group_name, enabled): + self._exec_manager_command( + host, + 'prepareSr' if enabled else 'releaseSr', + {'groupName': group_name}, + 'SRUnavailable' + ) + + def _prepare_sr_on_all_hosts(self, group_name, enabled): + master = util.get_master_ref(self.session) + self._prepare_sr(master, group_name, enabled) + + for slave in util.get_all_slaves(self.session): + self._prepare_sr(slave, group_name, enabled) + + def _update_drbd_reactor(self, host, enabled): + self._exec_manager_command( + host, + 'updateDrbdReactor', + {'enabled': str(enabled)}, + 'SRUnavailable' + ) + + def _update_drbd_reactor_on_all_hosts( + self, enabled, controller_node_name=None + ): + if controller_node_name == 'localhost': + controller_node_name = self.session.xenapi.host.get_record( + util.get_this_host_ref(self.session) + )['hostname'] + assert controller_node_name + assert controller_node_name != 'localhost' + + controller_host = None + secondary_hosts = [] + + hosts = self.session.xenapi.host.get_all_records() + for host_ref, host_rec in hosts.items(): + hostname = host_rec['hostname'] + if controller_node_name == hostname: + controller_host = host_ref + else: + secondary_hosts.append((host_ref, hostname)) + + action_name = 'Starting' if enabled else 'Stopping' + if controller_node_name and not controller_host: + util.SMlog('Failed to find controller host: `{}`'.format( + controller_node_name + )) + + if enabled and controller_host: + util.SMlog('{} drbd-reactor on controller host `{}`...'.format( + action_name, controller_node_name + )) + # If enabled is true, we try to start the controller on the desired + # node name first. + self._update_drbd_reactor(controller_host, enabled) + + for host_ref, hostname in secondary_hosts: + util.SMlog('{} drbd-reactor on host {}...'.format( + action_name, hostname + )) + self._update_drbd_reactor(host_ref, enabled) + + if not enabled and controller_host: + util.SMlog('{} drbd-reactor on controller host `{}`...'.format( + action_name, controller_node_name + )) + # If enabled is false, we disable the drbd-reactor service of + # the controller host last. Why? Otherwise the linstor-controller + # of other nodes can be started, and we don't want that. + self._update_drbd_reactor(controller_host, enabled) + + # -------------------------------------------------------------------------- + # Metadata. + # -------------------------------------------------------------------------- + + def _synchronize_metadata_and_xapi(self): + try: + # First synch SR parameters. + self.update(self.uuid) + + # Now update the VDI information in the metadata if required. + xenapi = self.session.xenapi + volumes_metadata = self._linstor.get_volumes_with_metadata() + for vdi_uuid, volume_metadata in volumes_metadata.items(): + try: + vdi_ref = xenapi.VDI.get_by_uuid(vdi_uuid) + except Exception: + # May be the VDI is not in XAPI yet dont bother. + continue + + label = util.to_plain_string( + xenapi.VDI.get_name_label(vdi_ref) + ) + description = util.to_plain_string( + xenapi.VDI.get_name_description(vdi_ref) + ) + + if ( + volume_metadata.get(NAME_LABEL_TAG) != label or + volume_metadata.get(NAME_DESCRIPTION_TAG) != description + ): + self._linstor.update_volume_metadata(vdi_uuid, { + NAME_LABEL_TAG: label, + NAME_DESCRIPTION_TAG: description + }) + except Exception as e: + raise xs_errors.XenError( + 'MetadataError', + opterr='Error synching SR Metadata and XAPI: {}'.format(e) + ) + + def _synchronize_metadata(self): + if not self.is_master(): + return + + util.SMlog('Synchronize metadata...') + if self.cmd == 'sr_attach': + try: + util.SMlog( + 'Synchronize SR metadata and the state on the storage.' + ) + self._synchronize_metadata_and_xapi() + except Exception as e: + util.SMlog('Failed to synchronize metadata: {}'.format(e)) + + # -------------------------------------------------------------------------- + # Stats. + # -------------------------------------------------------------------------- + + def _update_stats(self, virt_alloc_delta): + valloc = int(self.session.xenapi.SR.get_virtual_allocation( + self.sr_ref + )) + + # Update size attributes of the SR parent class. + self.virtual_allocation = valloc + virt_alloc_delta + + self._update_physical_size() + + # Notify SR parent class. + self._db_update() + + def _update_physical_size(self): + # We use the size of the smallest disk, this is an approximation that + # ensures the displayed physical size is reachable by the user. + (min_physical_size, pool_count) = self._linstor.get_min_physical_size() + self.physical_size = min_physical_size * pool_count // \ + self._linstor.redundancy + + self.physical_utilisation = self._linstor.allocated_volume_size + + # -------------------------------------------------------------------------- + # VDIs. + # -------------------------------------------------------------------------- + + def _load_vdis(self): + if self._vdis_loaded: + return + + assert self.is_master() + + # We use a cache to avoid repeated JSON parsing. + # The performance gain is not big but we can still + # enjoy it with a few lines. + self._create_linstor_cache() + self._load_vdis_ex() + self._destroy_linstor_cache() + + # We must mark VDIs as loaded only if the load is a success. + self._vdis_loaded = True + + self._undo_all_journal_transactions() + + def _load_vdis_ex(self): + # 1. Get existing VDIs in XAPI. + xenapi = self.session.xenapi + xapi_vdi_uuids = set() + for vdi in xenapi.SR.get_VDIs(self.sr_ref): + xapi_vdi_uuids.add(xenapi.VDI.get_uuid(vdi)) + + # 2. Get volumes info. + all_volume_info = self._all_volume_info_cache + volumes_metadata = self._all_volume_metadata_cache + + # 3. Get CBT vdis. + # See: https://support.citrix.com/article/CTX230619 + cbt_vdis = set() + for volume_metadata in volumes_metadata.values(): + cbt_uuid = volume_metadata.get(CBTLOG_TAG) + if cbt_uuid: + cbt_vdis.add(cbt_uuid) + + introduce = False + + # Try to introduce VDIs only during scan/attach. + if self.cmd == 'sr_scan' or self.cmd == 'sr_attach': + has_clone_entries = list(self._journaler.get_all( + LinstorJournaler.CLONE + ).items()) + + if has_clone_entries: + util.SMlog( + 'Cannot introduce VDIs during scan because it exists ' + 'CLONE entries in journaler on SR {}'.format(self.uuid) + ) + else: + introduce = True + + # 4. Now check all volume info. + vdi_to_snaps = {} + for vdi_uuid, volume_info in all_volume_info.items(): + if vdi_uuid.startswith(cleanup.SR.TMP_RENAME_PREFIX): + continue + + # 4.a. Check if the VDI in LINSTOR is in XAPI VDIs. + if vdi_uuid not in xapi_vdi_uuids: + if not introduce: + continue + + if vdi_uuid.startswith('DELETED_'): + continue + + volume_metadata = volumes_metadata.get(vdi_uuid) + if not volume_metadata: + util.SMlog( + 'Skipping volume {} because no metadata could be found' + .format(vdi_uuid) + ) + continue + + util.SMlog( + 'Trying to introduce VDI {} as it is present in ' + 'LINSTOR and not in XAPI...' + .format(vdi_uuid) + ) + + try: + self._linstor.get_device_path(vdi_uuid) + except Exception as e: + util.SMlog( + 'Cannot introduce {}, unable to get path: {}' + .format(vdi_uuid, e) + ) + continue + + name_label = volume_metadata.get(NAME_LABEL_TAG) or '' + type = volume_metadata.get(TYPE_TAG) or 'user' + vdi_type = volume_metadata.get(VDI_TYPE_TAG) + + if not vdi_type: + util.SMlog( + 'Cannot introduce {} '.format(vdi_uuid) + + 'without vdi_type' + ) + continue + + sm_config = { + 'vdi_type': vdi_type + } + + if vdi_type == vhdutil.VDI_TYPE_RAW: + managed = not volume_metadata.get(HIDDEN_TAG) + elif vdi_type == vhdutil.VDI_TYPE_VHD: + vhd_info = self._vhdutil.get_vhd_info(vdi_uuid) + managed = not vhd_info.hidden + if vhd_info.parentUuid: + sm_config['vhd-parent'] = vhd_info.parentUuid + else: + util.SMlog( + 'Cannot introduce {} with invalid VDI type {}' + .format(vdi_uuid, vdi_type) + ) + continue + + util.SMlog( + 'Introducing VDI {} '.format(vdi_uuid) + + ' (name={}, virtual_size={}, allocated_size={})'.format( + name_label, + volume_info.virtual_size, + volume_info.allocated_size + ) + ) + + vdi_ref = xenapi.VDI.db_introduce( + vdi_uuid, + name_label, + volume_metadata.get(NAME_DESCRIPTION_TAG) or '', + self.sr_ref, + type, + False, # sharable + bool(volume_metadata.get(READ_ONLY_TAG)), + {}, # other_config + vdi_uuid, # location + {}, # xenstore_data + sm_config, + managed, + str(volume_info.virtual_size), + str(volume_info.allocated_size) + ) + + is_a_snapshot = volume_metadata.get(IS_A_SNAPSHOT_TAG) + xenapi.VDI.set_is_a_snapshot(vdi_ref, bool(is_a_snapshot)) + if is_a_snapshot: + xenapi.VDI.set_snapshot_time( + vdi_ref, + xmlrpc.client.DateTime( + volume_metadata[SNAPSHOT_TIME_TAG] or + '19700101T00:00:00Z' + ) + ) + + snap_uuid = volume_metadata[SNAPSHOT_OF_TAG] + if snap_uuid in vdi_to_snaps: + vdi_to_snaps[snap_uuid].append(vdi_uuid) + else: + vdi_to_snaps[snap_uuid] = [vdi_uuid] + + # 4.b. Add the VDI in the list. + vdi = self.vdi(vdi_uuid) + self.vdis[vdi_uuid] = vdi + + if USE_KEY_HASH and vdi.vdi_type == vhdutil.VDI_TYPE_VHD: + # TODO: Replace pylint comment with this feature when possible: + # https://github.com/PyCQA/pylint/pull/2926 + vdi.sm_config_override['key_hash'] = self._vhdutil.get_key_hash(vdi_uuid) # pylint: disable = E1120 + + # 4.c. Update CBT status of disks either just added + # or already in XAPI. + cbt_uuid = volume_metadata.get(CBTLOG_TAG) + if cbt_uuid in cbt_vdis: + vdi_ref = xenapi.VDI.get_by_uuid(vdi_uuid) + xenapi.VDI.set_cbt_enabled(vdi_ref, True) + # For existing VDIs, update local state too. + # Scan in base class SR updates existing VDIs + # again based on local states. + self.vdis[vdi_uuid].cbt_enabled = True + cbt_vdis.remove(cbt_uuid) + + # 5. Now set the snapshot statuses correctly in XAPI. + for src_uuid in vdi_to_snaps: + try: + src_ref = xenapi.VDI.get_by_uuid(src_uuid) + except Exception: + # The source VDI no longer exists, continue. + continue + + for snap_uuid in vdi_to_snaps[src_uuid]: + try: + # This might fail in cases where its already set. + snap_ref = xenapi.VDI.get_by_uuid(snap_uuid) + xenapi.VDI.set_snapshot_of(snap_ref, src_ref) + except Exception as e: + util.SMlog('Setting snapshot failed: {}'.format(e)) + + # TODO: Check correctly how to use CBT. + # Update cbt_enabled on the right VDI, check LVM/FileSR code. + + # 6. If we have items remaining in this list, + # they are cbt_metadata VDI that XAPI doesn't know about. + # Add them to self.vdis and they'll get added to the DB. + for cbt_uuid in cbt_vdis: + new_vdi = self.vdi(cbt_uuid) + new_vdi.ty = 'cbt_metadata' + new_vdi.cbt_enabled = True + self.vdis[cbt_uuid] = new_vdi + + # 7. Update virtual allocation, build geneology and remove useless VDIs + self.virtual_allocation = 0 + + # 8. Build geneology. + geneology = {} + + for vdi_uuid, vdi in self.vdis.items(): + if vdi.parent: + if vdi.parent in self.vdis: + self.vdis[vdi.parent].read_only = True + if vdi.parent in geneology: + geneology[vdi.parent].append(vdi_uuid) + else: + geneology[vdi.parent] = [vdi_uuid] + if not vdi.hidden: + self.virtual_allocation += vdi.size + + # 9. Remove all hidden leaf nodes to avoid introducing records that + # will be GC'ed. + for vdi_uuid in list(self.vdis.keys()): + if vdi_uuid not in geneology and self.vdis[vdi_uuid].hidden: + util.SMlog( + 'Scan found hidden leaf ({}), ignoring'.format(vdi_uuid) + ) + del self.vdis[vdi_uuid] + + # -------------------------------------------------------------------------- + # Journals. + # -------------------------------------------------------------------------- + + def _get_vdi_path_and_parent(self, vdi_uuid, volume_name): + try: + device_path = self._linstor.build_device_path(volume_name) + if not util.pathexists(device_path): + return (None, None) + + # If it's a RAW VDI, there is no parent. + volume_metadata = self._linstor.get_volume_metadata(vdi_uuid) + vdi_type = volume_metadata[VDI_TYPE_TAG] + if vdi_type == vhdutil.VDI_TYPE_RAW: + return (device_path, None) + + # Otherwise it's a VHD and a parent can exist. + if not self._vhdutil.check(vdi_uuid): + return (None, None) + + vhd_info = self._vhdutil.get_vhd_info(vdi_uuid) + if vhd_info: + return (device_path, vhd_info.parentUuid) + except Exception as e: + util.SMlog( + 'Failed to get VDI path and parent, ignoring: {}' + .format(e) + ) + return (None, None) + + def _undo_all_journal_transactions(self): + util.SMlog('Undoing all journal transactions...') + self.lock.acquire() + try: + self._handle_interrupted_inflate_ops() + self._handle_interrupted_clone_ops() + pass + finally: + self.lock.release() + + def _handle_interrupted_inflate_ops(self): + transactions = self._journaler.get_all(LinstorJournaler.INFLATE) + for vdi_uuid, old_size in transactions.items(): + self._handle_interrupted_inflate(vdi_uuid, old_size) + self._journaler.remove(LinstorJournaler.INFLATE, vdi_uuid) + + def _handle_interrupted_clone_ops(self): + transactions = self._journaler.get_all(LinstorJournaler.CLONE) + for vdi_uuid, old_size in transactions.items(): + self._handle_interrupted_clone(vdi_uuid, old_size) + self._journaler.remove(LinstorJournaler.CLONE, vdi_uuid) + + def _handle_interrupted_inflate(self, vdi_uuid, old_size): + util.SMlog( + '*** INTERRUPTED INFLATE OP: for {} ({})' + .format(vdi_uuid, old_size) + ) + + vdi = self.vdis.get(vdi_uuid) + if not vdi: + util.SMlog('Cannot deflate missing VDI {}'.format(vdi_uuid)) + return + + assert not self._all_volume_info_cache + volume_info = self._linstor.get_volume_info(vdi_uuid) + + current_size = volume_info.virtual_size + assert current_size > 0 + self._vhdutil.force_deflate(vdi.path, old_size, current_size, zeroize=True) + + def _handle_interrupted_clone( + self, vdi_uuid, clone_info, force_undo=False + ): + util.SMlog( + '*** INTERRUPTED CLONE OP: for {} ({})' + .format(vdi_uuid, clone_info) + ) + + base_uuid, snap_uuid = clone_info.split('_') + + # Use LINSTOR data because new VDIs may not be in the XAPI. + volume_names = self._linstor.get_volumes_with_name() + + # Check if we don't have a base VDI. (If clone failed at startup.) + if base_uuid not in volume_names: + if vdi_uuid in volume_names: + util.SMlog('*** INTERRUPTED CLONE OP: nothing to do') + return + raise util.SMException( + 'Base copy {} not present, but no original {} found' + .format(base_uuid, vdi_uuid) + ) + + if force_undo: + util.SMlog('Explicit revert') + self._undo_clone( + volume_names, vdi_uuid, base_uuid, snap_uuid + ) + return + + # If VDI or snap uuid is missing... + if vdi_uuid not in volume_names or \ + (snap_uuid and snap_uuid not in volume_names): + util.SMlog('One or both leaves missing => revert') + self._undo_clone(volume_names, vdi_uuid, base_uuid, snap_uuid) + return + + vdi_path, vdi_parent_uuid = self._get_vdi_path_and_parent( + vdi_uuid, volume_names[vdi_uuid] + ) + snap_path, snap_parent_uuid = self._get_vdi_path_and_parent( + snap_uuid, volume_names[snap_uuid] + ) + + if not vdi_path or (snap_uuid and not snap_path): + util.SMlog('One or both leaves invalid (and path(s)) => revert') + self._undo_clone(volume_names, vdi_uuid, base_uuid, snap_uuid) + return + + util.SMlog('Leaves valid but => revert') + self._undo_clone(volume_names, vdi_uuid, base_uuid, snap_uuid) + + def _undo_clone(self, volume_names, vdi_uuid, base_uuid, snap_uuid): + base_path = self._linstor.build_device_path(volume_names[base_uuid]) + base_metadata = self._linstor.get_volume_metadata(base_uuid) + base_type = base_metadata[VDI_TYPE_TAG] + + if not util.pathexists(base_path): + util.SMlog('Base not found! Exit...') + util.SMlog('*** INTERRUPTED CLONE OP: rollback fail') + return + + # Un-hide the parent. + self._linstor.update_volume_metadata(base_uuid, {READ_ONLY_TAG: False}) + if base_type == vhdutil.VDI_TYPE_VHD: + vhd_info = self._vhdutil.get_vhd_info(base_uuid, False) + if vhd_info.hidden: + self._vhdutil.set_hidden(base_path, False) + elif base_type == vhdutil.VDI_TYPE_RAW and \ + base_metadata.get(HIDDEN_TAG): + self._linstor.update_volume_metadata( + base_uuid, {HIDDEN_TAG: False} + ) + + # Remove the child nodes. + if snap_uuid and snap_uuid in volume_names: + util.SMlog('Destroying snap {}...'.format(snap_uuid)) + + try: + self._linstor.destroy_volume(snap_uuid) + except Exception as e: + util.SMlog( + 'Cannot destroy snap {} during undo clone: {}' + .format(snap_uuid, e) + ) + + if vdi_uuid in volume_names: + try: + util.SMlog('Destroying {}...'.format(vdi_uuid)) + self._linstor.destroy_volume(vdi_uuid) + except Exception as e: + util.SMlog( + 'Cannot destroy VDI {} during undo clone: {}' + .format(vdi_uuid, e) + ) + # We can get an exception like this: + # "Shutdown of the DRBD resource 'XXX failed", so the + # volume info remains... The problem is we can't rename + # properly the base VDI below this line, so we must change the + # UUID of this bad VDI before. + self._linstor.update_volume_uuid( + vdi_uuid, 'DELETED_' + vdi_uuid, force=True + ) + + # Rename! + self._linstor.update_volume_uuid(base_uuid, vdi_uuid) + + # Inflate to the right size. + if base_type == vhdutil.VDI_TYPE_VHD: + vdi = self.vdi(vdi_uuid) + volume_size = LinstorVhdUtil.compute_volume_size(vdi.size, vdi.vdi_type) + self._vhdutil.inflate( + self._journaler, vdi_uuid, vdi.path, + volume_size, vdi.capacity + ) + self.vdis[vdi_uuid] = vdi + + # At this stage, tapdisk and SM vdi will be in paused state. Remove + # flag to facilitate vm deactivate. + vdi_ref = self.session.xenapi.VDI.get_by_uuid(vdi_uuid) + self.session.xenapi.VDI.remove_from_sm_config(vdi_ref, 'paused') + + util.SMlog('*** INTERRUPTED CLONE OP: rollback success') + + # -------------------------------------------------------------------------- + # Cache. + # -------------------------------------------------------------------------- + + def _create_linstor_cache(self): + reconnect = False + + def create_cache(): + nonlocal reconnect + try: + if reconnect: + self._reconnect() + return self._linstor.get_volumes_with_info() + except Exception as e: + reconnect = True + raise e + + self._all_volume_metadata_cache = \ + self._linstor.get_volumes_with_metadata() + self._all_volume_info_cache = util.retry( + create_cache, + maxretry=10, + period=3 + ) + + def _destroy_linstor_cache(self): + self._all_volume_info_cache = None + self._all_volume_metadata_cache = None + + # -------------------------------------------------------------------------- + # Misc. + # -------------------------------------------------------------------------- + + def _reconnect(self): + controller_uri = get_controller_uri() + + self._journaler = LinstorJournaler( + controller_uri, self._group_name, logger=util.SMlog + ) + + # Try to open SR if exists. + # We can repair only if we are on the master AND if + # we are trying to execute an exclusive operation. + # Otherwise we could try to delete a VDI being created or + # during a snapshot. An exclusive op is the guarantee that + # the SR is locked. + self._linstor = LinstorVolumeManager( + controller_uri, + self._group_name, + repair=( + self.is_master() and + self.srcmd.cmd in self.ops_exclusive + ), + logger=util.SMlog + ) + self._vhdutil = LinstorVhdUtil(self.session, self._linstor) + + def _ensure_space_available(self, amount_needed): + space_available = self._linstor.max_volume_size_allowed + if (space_available < amount_needed): + util.SMlog( + 'Not enough space! Free space: {}, need: {}'.format( + space_available, amount_needed + ) + ) + raise xs_errors.XenError('SRNoSpace') + + def _kick_gc(self): + util.SMlog('Kicking GC') + cleanup.start_gc_service(self.uuid) + +# ============================================================================== +# LinstorSr VDI +# ============================================================================== + + +class LinstorVDI(VDI.VDI): + # Warning: Not the same values than vhdutil.VDI_TYPE_*. + # These values represents the types given on the command line. + TYPE_RAW = 'raw' + TYPE_VHD = 'vhd' + + # Metadata size given to the "S" param of vhd-util create. + # "-S size (MB) for metadata preallocation". + # Increase the performance when resize is called. + MAX_METADATA_VIRT_SIZE = 2 * 1024 * 1024 + + # -------------------------------------------------------------------------- + # VDI methods. + # -------------------------------------------------------------------------- + + @override + def load(self, vdi_uuid) -> None: + self._lock = self.sr.lock + self._exists = True + self._linstor = self.sr._linstor + + # Update hidden parent property. + self.hidden = False + + def raise_bad_load(e): + util.SMlog( + 'Got exception in LinstorVDI.load: {}'.format(e) + ) + util.SMlog(traceback.format_exc()) + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Could not load {} because: {}'.format(self.uuid, e) + ) + + # Try to load VDI. + try: + if ( + self.sr.srcmd.cmd == 'vdi_attach_from_config' or + self.sr.srcmd.cmd == 'vdi_detach_from_config' + ): + self.vdi_type = vhdutil.VDI_TYPE_RAW + self.path = self.sr.srcmd.params['vdi_path'] + else: + self._determine_type_and_path() + self._load_this() + + util.SMlog('VDI {} loaded! (path={}, hidden={})'.format( + self.uuid, self.path, self.hidden + )) + except LinstorVolumeManagerError as e: + # 1. It may be a VDI deletion. + if e.code == LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: + if self.sr.srcmd.cmd == 'vdi_delete': + self.deleted = True + return + + # 2. Or maybe a creation. + if self.sr.srcmd.cmd == 'vdi_create': + # Set type attribute of VDI parent class. + # We use VHD by default. + self.vdi_type = vhdutil.VDI_TYPE_VHD + self._key_hash = None # Only used in create. + + self._exists = False + vdi_sm_config = self.sr.srcmd.params.get('vdi_sm_config') + if vdi_sm_config is not None: + type = vdi_sm_config.get('type') + if type is not None: + if type == self.TYPE_RAW: + self.vdi_type = vhdutil.VDI_TYPE_RAW + elif type == self.TYPE_VHD: + self.vdi_type = vhdutil.VDI_TYPE_VHD + else: + raise xs_errors.XenError( + 'VDICreate', + opterr='Invalid VDI type {}'.format(type) + ) + if self.vdi_type == vhdutil.VDI_TYPE_VHD: + self._key_hash = vdi_sm_config.get('key_hash') + + # For the moment we don't have a path. + self._update_device_name(None) + return + raise_bad_load(e) + except Exception as e: + raise_bad_load(e) + + @override + def create(self, sr_uuid, vdi_uuid, size) -> str: + # Usage example: + # xe vdi-create sr-uuid=39a5826b-5a90-73eb-dd09-51e3a116f937 + # name-label="linstor-vdi-1" virtual-size=4096MiB sm-config:type=vhd + + # 1. Check if we are on the master and if the VDI doesn't exist. + util.SMlog('LinstorVDI.create for {}'.format(self.uuid)) + if self._exists: + raise xs_errors.XenError('VDIExists') + + assert self.uuid + assert self.ty + assert self.vdi_type + + # 2. Compute size and check space available. + size = vhdutil.validate_and_round_vhd_size(int(size)) + volume_size = LinstorVhdUtil.compute_volume_size(size, self.vdi_type) + util.SMlog( + 'LinstorVDI.create: type={}, vhd-size={}, volume-size={}' + .format(self.vdi_type, size, volume_size) + ) + self.sr._ensure_space_available(volume_size) + + # 3. Set sm_config attribute of VDI parent class. + self.sm_config = self.sr.srcmd.params['vdi_sm_config'] + + # 4. Create! + failed = False + try: + volume_name = None + if self.ty == 'ha_statefile': + volume_name = HA_VOLUME_NAME + elif self.ty == 'redo_log': + volume_name = REDO_LOG_VOLUME_NAME + + self._linstor.create_volume( + self.uuid, + volume_size, + persistent=False, + volume_name=volume_name, + high_availability=volume_name is not None + ) + volume_info = self._linstor.get_volume_info(self.uuid) + + self._update_device_name(volume_info.name) + + if self.vdi_type == vhdutil.VDI_TYPE_RAW: + self.size = volume_info.virtual_size + else: + self.sr._vhdutil.create( + self.path, size, False, self.MAX_METADATA_VIRT_SIZE + ) + self.size = self.sr._vhdutil.get_size_virt(self.uuid) + + if self._key_hash: + self.sr._vhdutil.set_key(self.path, self._key_hash) + + # Because vhdutil commands modify the volume data, + # we must retrieve a new time the utilization size. + volume_info = self._linstor.get_volume_info(self.uuid) + + volume_metadata = { + NAME_LABEL_TAG: util.to_plain_string(self.label), + NAME_DESCRIPTION_TAG: util.to_plain_string(self.description), + IS_A_SNAPSHOT_TAG: False, + SNAPSHOT_OF_TAG: '', + SNAPSHOT_TIME_TAG: '', + TYPE_TAG: self.ty, + VDI_TYPE_TAG: self.vdi_type, + READ_ONLY_TAG: bool(self.read_only), + METADATA_OF_POOL_TAG: '' + } + self._linstor.set_volume_metadata(self.uuid, volume_metadata) + + # Set the open timeout to 1min to reduce CPU usage + # in http-disk-server when a secondary server tries to open + # an already opened volume. + if self.ty == 'ha_statefile' or self.ty == 'redo_log': + self._linstor.set_auto_promote_timeout(self.uuid, 600) + + self._linstor.mark_volume_as_persistent(self.uuid) + except util.CommandException as e: + failed = True + raise xs_errors.XenError( + 'VDICreate', opterr='error {}'.format(e.code) + ) + except Exception as e: + failed = True + raise xs_errors.XenError('VDICreate', opterr='error {}'.format(e)) + finally: + if failed: + util.SMlog('Unable to create VDI {}'.format(self.uuid)) + try: + self._linstor.destroy_volume(self.uuid) + except Exception as e: + util.SMlog( + 'Ignoring exception after fail in LinstorVDI.create: ' + '{}'.format(e) + ) + + self.utilisation = volume_info.allocated_size + self.sm_config['vdi_type'] = self.vdi_type + + self.ref = self._db_introduce() + self.sr._update_stats(self.size) + + return VDI.VDI.get_params(self) + + @override + def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: + util.SMlog('LinstorVDI.delete for {}'.format(self.uuid)) + if self.attached: + raise xs_errors.XenError('VDIInUse') + + if self.deleted: + return super(LinstorVDI, self).delete( + sr_uuid, vdi_uuid, data_only + ) + + vdi_ref = self.sr.srcmd.params['vdi_ref'] + if not self.session.xenapi.VDI.get_managed(vdi_ref): + raise xs_errors.XenError( + 'VDIDelete', + opterr='Deleting non-leaf node not permitted' + ) + + try: + # Remove from XAPI and delete from LINSTOR. + self._linstor.destroy_volume(self.uuid) + if not data_only: + self._db_forget() + + self.sr.lock.cleanupAll(vdi_uuid) + except Exception as e: + util.SMlog( + 'Failed to remove the volume (maybe is leaf coalescing) ' + 'for {} err: {}'.format(self.uuid, e) + ) + + try: + raise xs_errors.XenError('VDIDelete', opterr=str(e)) + except LinstorVolumeManagerError as e: + if e.code != LinstorVolumeManagerError.ERR_VOLUME_DESTROY: + raise xs_errors.XenError('VDIDelete', opterr=str(e)) + + return + + if self.uuid in self.sr.vdis: + del self.sr.vdis[self.uuid] + + # TODO: Check size after delete. + self.sr._update_stats(-self.size) + self.sr._kick_gc() + return super(LinstorVDI, self).delete(sr_uuid, vdi_uuid, data_only) + + @override + def attach(self, sr_uuid, vdi_uuid) -> str: + util.SMlog('LinstorVDI.attach for {}'.format(self.uuid)) + attach_from_config = self.sr.srcmd.cmd == 'vdi_attach_from_config' + if ( + not attach_from_config or + self.sr.srcmd.params['vdi_uuid'] != self.uuid + ) and self.sr._journaler.has_entries(self.uuid): + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Interrupted operation detected on this VDI, ' + 'scan SR first to trigger auto-repair' + ) + + writable = 'args' not in self.sr.srcmd.params or \ + self.sr.srcmd.params['args'][0] == 'true' + + if not attach_from_config or self.sr.is_master(): + # We need to inflate the volume if we don't have enough place + # to mount the VHD image. I.e. the volume capacity must be greater + # than the VHD size + bitmap size. + need_inflate = True + if ( + self.vdi_type == vhdutil.VDI_TYPE_RAW or + not writable or + self.capacity >= LinstorVhdUtil.compute_volume_size(self.size, self.vdi_type) + ): + need_inflate = False + + if need_inflate: + try: + self._prepare_thin(True) + except Exception as e: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Failed to attach VDI during "prepare thin": {}' + .format(e) + ) + + if not hasattr(self, 'xenstore_data'): + self.xenstore_data = {} + self.xenstore_data['storage-type'] = LinstorSR.DRIVER_TYPE + + if ( + USE_HTTP_NBD_SERVERS and + attach_from_config and + self.path.startswith('/dev/http-nbd/') + ): + return self._attach_using_http_nbd() + + # Ensure we have a path... + self.sr._vhdutil.create_chain_paths(self.uuid, readonly=not writable) + + self.attached = True + return VDI.VDI.attach(self, self.sr.uuid, self.uuid) + + @override + def detach(self, sr_uuid, vdi_uuid) -> None: + util.SMlog('LinstorVDI.detach for {}'.format(self.uuid)) + detach_from_config = self.sr.srcmd.cmd == 'vdi_detach_from_config' + self.attached = False + + if detach_from_config and self.path.startswith('/dev/http-nbd/'): + return self._detach_using_http_nbd() + + if self.vdi_type == vhdutil.VDI_TYPE_RAW: + return + + # The VDI is already deflated if the VHD image size + metadata is + # equal to the LINSTOR volume size. + volume_size = LinstorVhdUtil.compute_volume_size(self.size, self.vdi_type) + already_deflated = self.capacity <= volume_size + + if already_deflated: + util.SMlog( + 'VDI {} already deflated (old volume size={}, volume size={})' + .format(self.uuid, self.capacity, volume_size) + ) + + need_deflate = True + if already_deflated: + need_deflate = False + elif self.sr._provisioning == 'thick': + need_deflate = False + + vdi_ref = self.sr.srcmd.params['vdi_ref'] + if self.session.xenapi.VDI.get_is_a_snapshot(vdi_ref): + need_deflate = True + + if need_deflate: + try: + self._prepare_thin(False) + except Exception as e: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Failed to detach VDI during "prepare thin": {}' + .format(e) + ) + + # We remove only on slaves because the volume can be used by the GC. + if self.sr.is_master(): + return + + while vdi_uuid: + try: + path = self._linstor.build_device_path(self._linstor.get_volume_name(vdi_uuid)) + parent_vdi_uuid = self.sr._vhdutil.get_vhd_info(vdi_uuid).parentUuid + except Exception: + break + + if util.pathexists(path): + try: + self._linstor.remove_volume_if_diskless(vdi_uuid) + except Exception as e: + # Ensure we can always detach properly. + # I don't want to corrupt the XAPI info. + util.SMlog('Failed to clean VDI {} during detach: {}'.format(vdi_uuid, e)) + vdi_uuid = parent_vdi_uuid + + @override + def resize(self, sr_uuid, vdi_uuid, size) -> str: + util.SMlog('LinstorVDI.resize for {}'.format(self.uuid)) + if not self.sr.is_master(): + raise xs_errors.XenError( + 'VDISize', + opterr='resize on slave not allowed' + ) + + if self.hidden: + raise xs_errors.XenError('VDIUnavailable', opterr='hidden VDI') + + # Compute the virtual VHD and DRBD volume size. + size = vhdutil.validate_and_round_vhd_size(int(size)) + volume_size = LinstorVhdUtil.compute_volume_size(size, self.vdi_type) + util.SMlog( + 'LinstorVDI.resize: type={}, vhd-size={}, volume-size={}' + .format(self.vdi_type, size, volume_size) + ) + + if size < self.size: + util.SMlog( + 'vdi_resize: shrinking not supported: ' + '(current size: {}, new size: {})'.format(self.size, size) + ) + raise xs_errors.XenError('VDISize', opterr='shrinking not allowed') + + if size == self.size: + return VDI.VDI.get_params(self) + + if self.vdi_type == vhdutil.VDI_TYPE_RAW: + old_volume_size = self.size + new_volume_size = LinstorVolumeManager.round_up_volume_size(size) + else: + old_volume_size = self.utilisation + if self.sr._provisioning == 'thin': + # VDI is currently deflated, so keep it deflated. + new_volume_size = old_volume_size + else: + new_volume_size = LinstorVhdUtil.compute_volume_size(size, self.vdi_type) + assert new_volume_size >= old_volume_size + + space_needed = new_volume_size - old_volume_size + self.sr._ensure_space_available(space_needed) + + old_size = self.size + if self.vdi_type == vhdutil.VDI_TYPE_RAW: + self._linstor.resize(self.uuid, new_volume_size) + else: + if new_volume_size != old_volume_size: + self.sr._vhdutil.inflate( + self.sr._journaler, self.uuid, self.path, + new_volume_size, old_volume_size + ) + self.sr._vhdutil.set_size_virt_fast(self.path, size) + + # Reload size attributes. + self._load_this() + + vdi_ref = self.sr.srcmd.params['vdi_ref'] + self.session.xenapi.VDI.set_virtual_size(vdi_ref, str(self.size)) + self.session.xenapi.VDI.set_physical_utilisation( + vdi_ref, str(self.utilisation) + ) + self.sr._update_stats(self.size - old_size) + return VDI.VDI.get_params(self) + + @override + def clone(self, sr_uuid, vdi_uuid) -> str: + return self._do_snapshot(sr_uuid, vdi_uuid, VDI.SNAPSHOT_DOUBLE) + + @override + def compose(self, sr_uuid, vdi1, vdi2) -> None: + util.SMlog('VDI.compose for {} -> {}'.format(vdi2, vdi1)) + if self.vdi_type != vhdutil.VDI_TYPE_VHD: + raise xs_errors.XenError('Unimplemented') + + parent_uuid = vdi1 + parent_path = self._linstor.get_device_path(parent_uuid) + + # We must pause tapdisk to correctly change the parent. Otherwise we + # have a readonly error. + # See: https://github.com/xapi-project/xen-api/blob/b3169a16d36dae0654881b336801910811a399d9/ocaml/xapi/storage_migrate.ml#L928-L929 + # and: https://github.com/xapi-project/xen-api/blob/b3169a16d36dae0654881b336801910811a399d9/ocaml/xapi/storage_migrate.ml#L775 + + if not blktap2.VDI.tap_pause(self.session, self.sr.uuid, self.uuid): + raise util.SMException('Failed to pause VDI {}'.format(self.uuid)) + try: + self.sr._vhdutil.set_parent(self.path, parent_path, False) + self.sr._vhdutil.set_hidden(parent_path) + self.sr.session.xenapi.VDI.set_managed( + self.sr.srcmd.params['args'][0], False + ) + finally: + blktap2.VDI.tap_unpause(self.session, self.sr.uuid, self.uuid) + + if not blktap2.VDI.tap_refresh(self.session, self.sr.uuid, self.uuid): + raise util.SMException( + 'Failed to refresh VDI {}'.format(self.uuid) + ) + + util.SMlog('Compose done') + + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: + """ + Generate the XML config required to attach and activate + a VDI for use when XAPI is not running. Attach and + activation is handled by vdi_attach_from_config below. + """ + + util.SMlog('LinstorVDI.generate_config for {}'.format(self.uuid)) + + resp = {} + resp['device_config'] = self.sr.dconf + resp['sr_uuid'] = sr_uuid + resp['vdi_uuid'] = self.uuid + resp['sr_sm_config'] = self.sr.sm_config + resp['command'] = 'vdi_attach_from_config' + + # By default, we generate a normal config. + # But if the disk is persistent, we must use a HTTP/NBD + # server to ensure we can always write or read data. + # Why? DRBD is unsafe when used with more than 4 hosts: + # We are limited to use 1 diskless and 3 full. + # We can't increase this limitation, so we use a NBD/HTTP device + # instead. + volume_name = self._linstor.get_volume_name(self.uuid) + if not USE_HTTP_NBD_SERVERS or volume_name not in [ + HA_VOLUME_NAME, REDO_LOG_VOLUME_NAME + ]: + if not self.path or not util.pathexists(self.path): + available = False + # Try to refresh symlink path... + try: + self.path = self._linstor.get_device_path(vdi_uuid) + available = util.pathexists(self.path) + except Exception: + pass + if not available: + raise xs_errors.XenError('VDIUnavailable') + + resp['vdi_path'] = self.path + else: + # Axiom: DRBD device is present on at least one host. + resp['vdi_path'] = '/dev/http-nbd/' + volume_name + + config = xmlrpc.client.dumps(tuple([resp]), 'vdi_attach_from_config') + return xmlrpc.client.dumps((config,), "", True) + + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: + """ + Attach and activate a VDI using config generated by + vdi_generate_config above. This is used for cases such as + the HA state-file and the redo-log. + """ + + util.SMlog('LinstorVDI.attach_from_config for {}'.format(vdi_uuid)) + + try: + if not util.pathexists(self.sr.path): + self.sr.attach(sr_uuid) + + if not DRIVER_CONFIG['ATTACH_FROM_CONFIG_WITH_TAPDISK']: + return self.attach(sr_uuid, vdi_uuid) + except Exception: + util.logException('LinstorVDI.attach_from_config') + raise xs_errors.XenError( + 'SRUnavailable', + opterr='Unable to attach from config' + ) + return '' + + def reset_leaf(self, sr_uuid, vdi_uuid): + if self.vdi_type != vhdutil.VDI_TYPE_VHD: + raise xs_errors.XenError('Unimplemented') + + if not self.sr._vhdutil.has_parent(self.uuid): + raise util.SMException( + 'ERROR: VDI {} has no parent, will not reset contents' + .format(self.uuid) + ) + + self.sr._vhdutil.kill_data(self.path) + + def _load_this(self): + volume_metadata = None + if self.sr._all_volume_metadata_cache: + volume_metadata = self.sr._all_volume_metadata_cache.get(self.uuid) + if volume_metadata is None: + volume_metadata = self._linstor.get_volume_metadata(self.uuid) + + volume_info = None + if self.sr._all_volume_info_cache: + volume_info = self.sr._all_volume_info_cache.get(self.uuid) + if volume_info is None: + volume_info = self._linstor.get_volume_info(self.uuid) + + # Contains the max physical size used on a disk. + # When LINSTOR LVM driver is used, the size should be similar to + # virtual size (i.e. the LINSTOR max volume size). + # When LINSTOR Thin LVM driver is used, the used physical size should + # be lower than virtual size at creation. + # The physical size increases after each write in a new block. + self.utilisation = volume_info.allocated_size + self.capacity = volume_info.virtual_size + + if self.vdi_type == vhdutil.VDI_TYPE_RAW: + self.hidden = int(volume_metadata.get(HIDDEN_TAG) or 0) + self.size = volume_info.virtual_size + self.parent = '' + else: + vhd_info = self.sr._vhdutil.get_vhd_info(self.uuid) + self.hidden = vhd_info.hidden + self.size = vhd_info.sizeVirt + self.parent = vhd_info.parentUuid + + if self.hidden: + self.managed = False + + self.label = volume_metadata.get(NAME_LABEL_TAG) or '' + self.description = volume_metadata.get(NAME_DESCRIPTION_TAG) or '' + + # Update sm_config_override of VDI parent class. + self.sm_config_override = {'vhd-parent': self.parent or None} + + def _mark_hidden(self, hidden=True): + if self.hidden == hidden: + return + + if self.vdi_type == vhdutil.VDI_TYPE_VHD: + self.sr._vhdutil.set_hidden(self.path, hidden) + else: + self._linstor.update_volume_metadata(self.uuid, { + HIDDEN_TAG: hidden + }) + self.hidden = hidden + + @override + def update(self, sr_uuid, vdi_uuid) -> None: + xenapi = self.session.xenapi + vdi_ref = xenapi.VDI.get_by_uuid(self.uuid) + + volume_metadata = { + NAME_LABEL_TAG: util.to_plain_string( + xenapi.VDI.get_name_label(vdi_ref) + ), + NAME_DESCRIPTION_TAG: util.to_plain_string( + xenapi.VDI.get_name_description(vdi_ref) + ) + } + + try: + self._linstor.update_volume_metadata(self.uuid, volume_metadata) + except LinstorVolumeManagerError as e: + if e.code == LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='LINSTOR volume {} not found'.format(self.uuid) + ) + raise xs_errors.XenError('VDIUnavailable', opterr=str(e)) + + # -------------------------------------------------------------------------- + # Thin provisioning. + # -------------------------------------------------------------------------- + + def _prepare_thin(self, attach): + if self.sr.is_master(): + if attach: + attach_thin( + self.session, self.sr._journaler, self._linstor, + self.sr.uuid, self.uuid + ) + else: + detach_thin( + self.session, self._linstor, self.sr.uuid, self.uuid + ) + else: + fn = 'attach' if attach else 'detach' + + master = util.get_master_ref(self.session) + + args = { + 'groupName': self.sr._group_name, + 'srUuid': self.sr.uuid, + 'vdiUuid': self.uuid + } + + try: + self.sr._exec_manager_command(master, fn, args, 'VDIUnavailable') + except Exception: + if fn != 'detach': + raise + + # Reload size attrs after inflate or deflate! + self._load_this() + self.sr._update_physical_size() + + vdi_ref = self.sr.srcmd.params['vdi_ref'] + self.session.xenapi.VDI.set_physical_utilisation( + vdi_ref, str(self.utilisation) + ) + + self.session.xenapi.SR.set_physical_utilisation( + self.sr.sr_ref, str(self.sr.physical_utilisation) + ) + + # -------------------------------------------------------------------------- + # Generic helpers. + # -------------------------------------------------------------------------- + + def _determine_type_and_path(self): + """ + Determine whether this is a RAW or a VHD VDI. + """ + + # 1. Check vdi_ref and vdi_type in config. + try: + vdi_ref = self.session.xenapi.VDI.get_by_uuid(self.uuid) + if vdi_ref: + sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref) + vdi_type = sm_config.get('vdi_type') + if vdi_type: + # Update parent fields. + self.vdi_type = vdi_type + self.sm_config_override = sm_config + self._update_device_name( + self._linstor.get_volume_name(self.uuid) + ) + return + except Exception: + pass + + # 2. Otherwise use the LINSTOR volume manager directly. + # It's probably a new VDI created via snapshot. + volume_metadata = self._linstor.get_volume_metadata(self.uuid) + self.vdi_type = volume_metadata.get(VDI_TYPE_TAG) + if not self.vdi_type: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='failed to get vdi_type in metadata' + ) + self._update_device_name(self._linstor.get_volume_name(self.uuid)) + + def _update_device_name(self, device_name): + self._device_name = device_name + + # Mark path of VDI parent class. + if device_name: + self.path = self._linstor.build_device_path(self._device_name) + else: + self.path = None + + def _create_snapshot(self, snap_uuid, snap_of_uuid=None): + """ + Snapshot self and return the snapshot VDI object. + """ + + # 1. Create a new LINSTOR volume with the same size than self. + snap_path = self._linstor.shallow_clone_volume( + self.uuid, snap_uuid, persistent=False + ) + + # 2. Write the snapshot content. + is_raw = (self.vdi_type == vhdutil.VDI_TYPE_RAW) + self.sr._vhdutil.snapshot( + snap_path, self.path, is_raw, self.MAX_METADATA_VIRT_SIZE + ) + + # 3. Get snapshot parent. + snap_parent = self.sr._vhdutil.get_parent(snap_uuid) + + # 4. Update metadata. + util.SMlog('Set VDI {} metadata of snapshot'.format(snap_uuid)) + volume_metadata = { + NAME_LABEL_TAG: util.to_plain_string(self.label), + NAME_DESCRIPTION_TAG: util.to_plain_string(self.description), + IS_A_SNAPSHOT_TAG: bool(snap_of_uuid), + SNAPSHOT_OF_TAG: snap_of_uuid, + SNAPSHOT_TIME_TAG: '', + TYPE_TAG: self.ty, + VDI_TYPE_TAG: vhdutil.VDI_TYPE_VHD, + READ_ONLY_TAG: False, + METADATA_OF_POOL_TAG: '' + } + self._linstor.set_volume_metadata(snap_uuid, volume_metadata) + + # 5. Set size. + snap_vdi = LinstorVDI(self.sr, snap_uuid) + if not snap_vdi._exists: + raise xs_errors.XenError('VDISnapshot') + + volume_info = self._linstor.get_volume_info(snap_uuid) + + snap_vdi.size = self.sr._vhdutil.get_size_virt(snap_uuid) + snap_vdi.utilisation = volume_info.allocated_size + + # 6. Update sm config. + snap_vdi.sm_config = {} + snap_vdi.sm_config['vdi_type'] = snap_vdi.vdi_type + if snap_parent: + snap_vdi.sm_config['vhd-parent'] = snap_parent + snap_vdi.parent = snap_parent + + snap_vdi.label = self.label + snap_vdi.description = self.description + + self._linstor.mark_volume_as_persistent(snap_uuid) + + return snap_vdi + + # -------------------------------------------------------------------------- + # Implement specific SR methods. + # -------------------------------------------------------------------------- + + @override + def _rename(self, oldpath, newpath) -> None: + # TODO: I'm not sure... Used by CBT. + volume_uuid = self._linstor.get_volume_uuid_from_device_path(oldpath) + self._linstor.update_volume_name(volume_uuid, newpath) + + @override + def _do_snapshot(self, sr_uuid, vdi_uuid, snapType, + cloneOp=False, secondary=None, cbtlog=None) -> str: + # If cbt enabled, save file consistency state. + if cbtlog is not None: + if blktap2.VDI.tap_status(self.session, vdi_uuid): + consistency_state = False + else: + consistency_state = True + util.SMlog( + 'Saving log consistency state of {} for vdi: {}' + .format(consistency_state, vdi_uuid) + ) + else: + consistency_state = None + + if self.vdi_type != vhdutil.VDI_TYPE_VHD: + raise xs_errors.XenError('Unimplemented') + + if not blktap2.VDI.tap_pause(self.session, sr_uuid, vdi_uuid): + raise util.SMException('Failed to pause VDI {}'.format(vdi_uuid)) + try: + return self._snapshot(snapType, cbtlog, consistency_state) + finally: + self.disable_leaf_on_secondary(vdi_uuid, secondary=secondary) + blktap2.VDI.tap_unpause(self.session, sr_uuid, vdi_uuid, secondary) + + def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None): + util.SMlog( + 'LinstorVDI._snapshot for {} (type {})' + .format(self.uuid, snap_type) + ) + + # 1. Checks... + if self.hidden: + raise xs_errors.XenError('VDIClone', opterr='hidden VDI') + + depth = self.sr._vhdutil.get_depth(self.uuid) + if depth == -1: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='failed to get VHD depth' + ) + elif depth >= vhdutil.MAX_CHAIN_SIZE: + raise xs_errors.XenError('SnapshotChainTooLong') + + # Ensure we have a valid path if we don't have a local diskful. + self.sr._vhdutil.create_chain_paths(self.uuid, readonly=True) + + volume_path = self.path + if not util.pathexists(volume_path): + raise xs_errors.XenError( + 'EIO', + opterr='IO error checking path {}'.format(volume_path) + ) + + # 2. Create base and snap uuid (if required) and a journal entry. + base_uuid = util.gen_uuid() + snap_uuid = None + + if snap_type == VDI.SNAPSHOT_DOUBLE: + snap_uuid = util.gen_uuid() + + clone_info = '{}_{}'.format(base_uuid, snap_uuid) + + active_uuid = self.uuid + self.sr._journaler.create( + LinstorJournaler.CLONE, active_uuid, clone_info + ) + + try: + # 3. Self becomes the new base. + # The device path remains the same. + self._linstor.update_volume_uuid(self.uuid, base_uuid) + self.uuid = base_uuid + self.location = self.uuid + self.read_only = True + self.managed = False + + # 4. Create snapshots (new active and snap). + active_vdi = self._create_snapshot(active_uuid) + + snap_vdi = None + if snap_type == VDI.SNAPSHOT_DOUBLE: + snap_vdi = self._create_snapshot(snap_uuid, active_uuid) + + self.label = 'base copy' + self.description = '' + + # 5. Mark the base VDI as hidden so that it does not show up + # in subsequent scans. + self._mark_hidden() + self._linstor.update_volume_metadata( + self.uuid, {READ_ONLY_TAG: True} + ) + + # 6. We must update the new active VDI with the "paused" and + # "host_" properties. Why? Because the original VDI has been + # paused and we we must unpause it after the snapshot. + # See: `tap_unpause` in `blktap2.py`. + vdi_ref = self.session.xenapi.VDI.get_by_uuid(active_uuid) + sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref) + for key in [x for x in sm_config.keys() if x == 'paused' or x.startswith('host_')]: + active_vdi.sm_config[key] = sm_config[key] + + # 7. Verify parent locator field of both children and + # delete base if unused. + introduce_parent = True + try: + snap_parent = None + if snap_vdi: + snap_parent = snap_vdi.parent + + if active_vdi.parent != self.uuid and ( + snap_type == VDI.SNAPSHOT_SINGLE or + snap_type == VDI.SNAPSHOT_INTERNAL or + snap_parent != self.uuid + ): + util.SMlog( + 'Destroy unused base volume: {} (path={})' + .format(self.uuid, self.path) + ) + introduce_parent = False + self._linstor.destroy_volume(self.uuid) + except Exception as e: + util.SMlog('Ignoring exception: {}'.format(e)) + pass + + # 8. Introduce the new VDI records. + if snap_vdi: + # If the parent is encrypted set the key_hash for the + # new snapshot disk. + vdi_ref = self.sr.srcmd.params['vdi_ref'] + sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref) + # TODO: Maybe remove key_hash support. + if 'key_hash' in sm_config: + snap_vdi.sm_config['key_hash'] = sm_config['key_hash'] + # If we have CBT enabled on the VDI, + # set CBT status for the new snapshot disk. + if cbtlog: + snap_vdi.cbt_enabled = True + + if snap_vdi: + snap_vdi_ref = snap_vdi._db_introduce() + util.SMlog( + 'vdi_clone: introduced VDI: {} ({})' + .format(snap_vdi_ref, snap_vdi.uuid) + ) + if introduce_parent: + base_vdi_ref = self._db_introduce() + self.session.xenapi.VDI.set_managed(base_vdi_ref, False) + util.SMlog( + 'vdi_clone: introduced VDI: {} ({})' + .format(base_vdi_ref, self.uuid) + ) + self._linstor.update_volume_metadata(self.uuid, { + NAME_LABEL_TAG: util.to_plain_string(self.label), + NAME_DESCRIPTION_TAG: util.to_plain_string( + self.description + ), + READ_ONLY_TAG: True, + METADATA_OF_POOL_TAG: '' + }) + + # 9. Update cbt files if user created snapshot (SNAPSHOT_DOUBLE) + if snap_type == VDI.SNAPSHOT_DOUBLE and cbtlog: + try: + self._cbt_snapshot(snap_uuid, cbt_consistency) + except Exception: + # CBT operation failed. + # TODO: Implement me. + raise + + if snap_type != VDI.SNAPSHOT_INTERNAL: + self.sr._update_stats(self.size) + + # 10. Return info on the new user-visible leaf VDI. + ret_vdi = snap_vdi + if not ret_vdi: + ret_vdi = self + if not ret_vdi: + ret_vdi = active_vdi + + vdi_ref = self.sr.srcmd.params['vdi_ref'] + self.session.xenapi.VDI.set_sm_config( + vdi_ref, active_vdi.sm_config + ) + except Exception: + util.logException('Failed to snapshot!') + try: + self.sr._handle_interrupted_clone( + active_uuid, clone_info, force_undo=True + ) + self.sr._journaler.remove(LinstorJournaler.CLONE, active_uuid) + except Exception as clean_error: + util.SMlog( + 'WARNING: Failed to clean up failed snapshot: {}' + .format(clean_error) + ) + raise xs_errors.XenError('VDIClone', opterr=str(e)) + + self.sr._journaler.remove(LinstorJournaler.CLONE, active_uuid) + + return ret_vdi.get_params() + + @staticmethod + def _start_persistent_http_server(volume_name): + pid_path = None + http_server = None + + try: + if volume_name == HA_VOLUME_NAME: + port = '8076' + else: + port = '8077' + + try: + # Use a timeout call because XAPI may be unusable on startup + # or if the host has been ejected. So in this case the call can + # block indefinitely. + session = util.timeout_call(5, util.get_localAPI_session) + host_ip = util.get_this_host_address(session) + except: + # Fallback using the XHA file if session not available. + host_ip, _ = get_ips_from_xha_config_file() + if not host_ip: + raise Exception( + 'Cannot start persistent HTTP server: no XAPI session, nor XHA config file' + ) + + arguments = [ + 'http-disk-server', + '--disk', + '/dev/drbd/by-res/{}/0'.format(volume_name), + '--ip', + host_ip, + '--port', + port + ] + + util.SMlog('Starting {} on port {}...'.format(arguments[0], port)) + http_server = subprocess.Popen( + [FORK_LOG_DAEMON] + arguments, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, + # Ensure we use another group id to kill this process without + # touch the current one. + preexec_fn=os.setsid + ) + + pid_path = '/run/http-server-{}.pid'.format(volume_name) + with open(pid_path, 'w') as pid_file: + pid_file.write(str(http_server.pid)) + + reg_server_ready = re.compile("Server ready!$") + def is_ready(): + while http_server.poll() is None: + line = http_server.stdout.readline() + if reg_server_ready.search(line): + return True + return False + try: + if not util.timeout_call(10, is_ready): + raise Exception('Failed to wait HTTP server startup, bad output') + except util.TimeoutException: + raise Exception('Failed to wait for HTTP server startup during given delay') + except Exception as e: + if pid_path: + try: + os.remove(pid_path) + except Exception: + pass + + if http_server: + # Kill process and children in this case... + try: + os.killpg(os.getpgid(http_server.pid), signal.SIGTERM) + except: + pass + + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Failed to start http-server: {}'.format(e) + ) + + def _start_persistent_nbd_server(self, volume_name): + pid_path = None + nbd_path = None + nbd_server = None + + try: + # We use a precomputed device size. + # So if the XAPI is modified, we must update these values! + if volume_name == HA_VOLUME_NAME: + # See: https://github.com/xapi-project/xen-api/blob/703479fa448a8d7141954bb6e8964d8e25c4ac2e/ocaml/xapi/xha_statefile.ml#L32-L37 + port = '8076' + device_size = 4 * 1024 * 1024 + else: + # See: https://github.com/xapi-project/xen-api/blob/703479fa448a8d7141954bb6e8964d8e25c4ac2e/ocaml/database/redo_log.ml#L41-L44 + port = '8077' + device_size = 256 * 1024 * 1024 + + try: + session = util.timeout_call(5, util.get_localAPI_session) + ips = util.get_host_addresses(session) + except Exception as e: + _, ips = get_ips_from_xha_config_file() + if not ips: + raise Exception( + 'Cannot start persistent NBD server: no XAPI session, nor XHA config file ({})'.format(e) + ) + ips = ips.values() + + arguments = [ + 'nbd-http-server', + '--socket-path', + '/run/{}.socket'.format(volume_name), + '--nbd-name', + volume_name, + '--urls', + ','.join(['http://' + ip + ':' + port for ip in ips]), + '--device-size', + str(device_size) + ] + + util.SMlog('Starting {} using port {}...'.format(arguments[0], port)) + nbd_server = subprocess.Popen( + [FORK_LOG_DAEMON] + arguments, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, + # Ensure we use another group id to kill this process without + # touch the current one. + preexec_fn=os.setsid + ) + + pid_path = '/run/nbd-server-{}.pid'.format(volume_name) + with open(pid_path, 'w') as pid_file: + pid_file.write(str(nbd_server.pid)) + + reg_nbd_path = re.compile("NBD `(/dev/nbd[0-9]+)` is now attached.$") + def get_nbd_path(): + while nbd_server.poll() is None: + line = nbd_server.stdout.readline() + match = reg_nbd_path.search(line) + if match: + return match.group(1) + # Use a timeout to never block the smapi if there is a problem. + try: + nbd_path = util.timeout_call(10, get_nbd_path) + if nbd_path is None: + raise Exception('Empty NBD path (NBD server is probably dead)') + except util.TimeoutException: + raise Exception('Unable to read NBD path') + + util.SMlog('Create symlink: {} -> {}'.format(self.path, nbd_path)) + os.symlink(nbd_path, self.path) + except Exception as e: + if pid_path: + try: + os.remove(pid_path) + except Exception: + pass + + if nbd_path: + try: + os.remove(nbd_path) + except Exception: + pass + + if nbd_server: + # Kill process and children in this case... + try: + os.killpg(os.getpgid(nbd_server.pid), signal.SIGTERM) + except: + pass + + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Failed to start nbd-server: {}'.format(e) + ) + + @classmethod + def _kill_persistent_server(self, type, volume_name, sig): + try: + path = '/run/{}-server-{}.pid'.format(type, volume_name) + if not os.path.exists(path): + return + + pid = None + with open(path, 'r') as pid_file: + try: + pid = int(pid_file.read()) + except Exception: + pass + + if pid is not None and util.check_pid_exists(pid): + util.SMlog('Kill {} server {} (pid={})'.format(type, path, pid)) + try: + os.killpg(os.getpgid(pid), sig) + except Exception as e: + util.SMlog('Failed to kill {} server: {}'.format(type, e)) + + os.remove(path) + except: + pass + + @classmethod + def _kill_persistent_http_server(self, volume_name, sig=signal.SIGTERM): + return self._kill_persistent_server('nbd', volume_name, sig) + + @classmethod + def _kill_persistent_nbd_server(self, volume_name, sig=signal.SIGTERM): + return self._kill_persistent_server('http', volume_name, sig) + + def _check_http_nbd_volume_name(self): + volume_name = self.path[14:] + if volume_name not in [ + HA_VOLUME_NAME, REDO_LOG_VOLUME_NAME + ]: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Unsupported path: {}'.format(self.path) + ) + return volume_name + + def _attach_using_http_nbd(self): + volume_name = self._check_http_nbd_volume_name() + + # Ensure there is no NBD and HTTP server running. + self._kill_persistent_nbd_server(volume_name) + self._kill_persistent_http_server(volume_name) + + # 0. Fetch drbd path. + must_get_device_path = True + if not self.sr.is_master(): + # We are on a slave, we must try to find a diskful locally. + try: + volume_info = self._linstor.get_volume_info(self.uuid) + except Exception as e: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Cannot get volume info of {}: {}' + .format(self.uuid, e) + ) + + hostname = socket.gethostname() + must_get_device_path = hostname in volume_info.diskful + + drbd_path = None + if must_get_device_path or self.sr.is_master(): + # If we are master, we must ensure we have a diskless + # or diskful available to init HA. + # It also avoid this error in xensource.log + # (/usr/libexec/xapi/cluster-stack/xhad/ha_set_pool_state): + # init exited with code 8 [stdout = ''; stderr = 'SF: failed to write in State-File \x10 (fd 4208696). (sys 28)\x0A'] + # init returned MTC_EXIT_CAN_NOT_ACCESS_STATEFILE (State-File is inaccessible) + available = False + try: + drbd_path = self._linstor.get_device_path(self.uuid) + available = util.pathexists(drbd_path) + except Exception: + pass + + if not available: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Cannot get device path of {}'.format(self.uuid) + ) + + # 1. Prepare http-nbd folder. + try: + if not os.path.exists('/dev/http-nbd/'): + os.makedirs('/dev/http-nbd/') + elif os.path.islink(self.path): + os.remove(self.path) + except OSError as e: + if e.errno != errno.EEXIST: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Cannot prepare http-nbd: {}'.format(e) + ) + + # 2. Start HTTP service if we have a diskful or if we are master. + http_service = None + if drbd_path: + assert(drbd_path in ( + '/dev/drbd/by-res/{}/0'.format(HA_VOLUME_NAME), + '/dev/drbd/by-res/{}/0'.format(REDO_LOG_VOLUME_NAME) + )) + self._start_persistent_http_server(volume_name) + + # 3. Start NBD server in all cases. + try: + self._start_persistent_nbd_server(volume_name) + except Exception as e: + if drbd_path: + self._kill_persistent_http_server(volume_name) + raise + + self.attached = True + return VDI.VDI.attach(self, self.sr.uuid, self.uuid) + + def _detach_using_http_nbd(self): + volume_name = self._check_http_nbd_volume_name() + self._kill_persistent_nbd_server(volume_name) + self._kill_persistent_http_server(volume_name) + +# ------------------------------------------------------------------------------ + + +if __name__ == '__main__': + def run(): + SRCommand.run(LinstorSR, DRIVER_INFO) + + if not TRACE_PERFS: + run() + else: + util.make_profile('LinstorSR', run) +else: + SR.registerSR(LinstorSR) diff --git a/drivers/MooseFSSR.py b/drivers/MooseFSSR.py new file mode 100755 index 000000000..4ebd7bd8c --- /dev/null +++ b/drivers/MooseFSSR.py @@ -0,0 +1,327 @@ +#!/usr/bin/env python3 +# +# Original work copyright (C) Citrix systems +# Modified work copyright (C) Tappest sp. z o.o., Vates SAS and XCP-ng community +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; version 2.1 only. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# MooseFSSR: Based on CEPHFSSR and FileSR, mounts MooseFS share + +from sm_typing import override + +import errno +import os +import syslog as _syslog +import xmlrpc.client +from syslog import syslog + +# careful with the import order here +# FileSR has a circular dependency: +# FileSR -> blktap2 -> lvutil -> EXTSR -> FileSR +# importing in this order seems to avoid triggering the issue. +import SR +import SRCommand +import FileSR +# end of careful +import VDI +import cleanup +import util +import vhdutil +import xs_errors +from lock import Lock + +CAPABILITIES = ["SR_PROBE", "SR_UPDATE", + "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", + "VDI_UPDATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", + "VDI_GENERATE_CONFIG", + "VDI_RESET_ON_BOOT/2", "ATOMIC_PAUSE"] + +CONFIGURATION = [ + ['masterhost', 'MooseFS Master Server hostname or IP address (required, e.g.: "mfsmaster.local.lan" or "10.10.10.1")'], + ['masterport', 'MooseFS Master Server port, default: 9421'], + ['rootpath', 'MooseFS path (required, e.g.: "/")'], + ['options', 'MooseFS Client additional options (e.g.: "mfspassword=PASSWORD,mfstimeout=300")'] +] + +DRIVER_INFO = { + 'name': 'MooseFS VHD', + 'description': 'SR plugin which stores disks as VHD files on a MooseFS storage', + 'vendor': 'Tappest sp. z o.o.', + 'copyright': '(C) 2021 Tappest sp. z o.o.', + 'driver_version': '1.0', + 'required_api_version': '1.0', + 'capabilities': CAPABILITIES, + 'configuration': CONFIGURATION +} + +DRIVER_CONFIG = {"ATTACH_FROM_CONFIG_WITH_TAPDISK": True} + +# The mountpoint for the directory when performing an sr_probe. All probes +# are guaranteed to be serialised by xapi, so this single mountpoint is fine. +PROBE_MOUNTPOINT = os.path.join(SR.MOUNT_BASE, "probe") + + +class MooseFSException(Exception): + def __init__(self, errstr): + self.errstr = errstr + + +class MooseFSSR(FileSR.FileSR): + """MooseFS file-based storage""" + + DRIVER_TYPE = 'moosefs' + + @override + @staticmethod + def handles(sr_type) -> bool: + # fudge, because the parent class (FileSR) checks for smb to alter its behavior + return sr_type == MooseFSSR.DRIVER_TYPE or sr_type == 'smb' + + @override + def load(self, sr_uuid) -> None: + if not self._is_moosefs_available(): + raise xs_errors.XenError( + 'SRUnavailable', + opterr='MooseFS Client is not installed!' + ) + + self.ops_exclusive = FileSR.OPS_EXCLUSIVE + self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) + self.sr_vditype = SR.DEFAULT_TAP + self.driver_config = DRIVER_CONFIG + if 'masterhost' not in self.dconf: + raise xs_errors.XenError('ConfigServerMissing') + self.remoteserver = self.dconf['masterhost'] + self.rootpath = self.dconf['rootpath'] + self.remotepath = self.rootpath + # if masterport is not specified, use default: 9421 + if 'masterport' not in self.dconf: + self.remoteport = "9421" + else: + self.remoteport = self.dconf['masterport'] + if self.sr_ref and self.session is not None: + self.sm_config = self.session.xenapi.SR.get_sm_config(self.sr_ref) + else: + self.sm_config = self.srcmd.params.get('sr_sm_config') or {} + + if self.srcmd.cmd != 'sr_create': + self.subdir = util.strtobool(self.sm_config.get('subdir')) + if self.subdir: + self.remotepath = os.path.join(self.remotepath, sr_uuid) + + self.attached = False + self.path = os.path.join(SR.MOUNT_BASE, sr_uuid) + self.mountpoint = self.path + self.linkpath = self.path + self._check_o_direct() + + def checkmount(self): + return util.ioretry(lambda: ((util.pathexists(self.mountpoint) and + util.ismount(self.mountpoint)))) + + def mount(self, mountpoint=None): + """Mount MooseFS share at 'mountpoint'""" + if mountpoint is None: + mountpoint = self.mountpoint + elif not util.is_string(mountpoint) or mountpoint == "": + raise MooseFSException("Mountpoint is not a string object") + + try: + if not util.ioretry(lambda: util.isdir(mountpoint)): + util.ioretry(lambda: util.makedirs(mountpoint)) + except util.CommandException as inst: + raise MooseFSException("Failed to make directory: code is %d" % inst.code) + + try: + options = [] + if 'options' in self.dconf: + options.append(self.dconf['options']) + if options: + options = ['-o', ','.join(options)] + remote = '{}:{}:{}'.format( + self.remoteserver, self.remoteport, self.remotepath + ) + command = ["mount", '-t', 'moosefs', remote, mountpoint] + options + util.ioretry(lambda: util.pread(command), errlist=[errno.EPIPE, errno.EIO], maxretry=2, nofail=True) + except util.CommandException as inst: + syslog(_syslog.LOG_ERR, 'MooseFS mount failed ' + inst.__str__()) + raise MooseFSException("Mount failed with return code %d" % inst.code) + + # Sanity check to ensure that the user has at least RO access to the + # mounted share. Windows sharing and security settings can be tricky. + try: + util.listdir(mountpoint) + except util.CommandException: + try: + self.unmount(mountpoint, True) + except MooseFSException: + util.logException('MooseFSSR.unmount()') + raise MooseFSException("Permission denied. Please check user privileges.") + + def unmount(self, mountpoint, rmmountpoint): + try: + util.pread(["umount", mountpoint]) + except util.CommandException as inst: + raise MooseFSException("Command umount failed with return code %d" % inst.code) + if rmmountpoint: + try: + os.rmdir(mountpoint) + except OSError as inst: + raise MooseFSException("Command rmdir failed with error '%s'" % inst.strerror) + + @override + def attach(self, sr_uuid) -> None: + if not self.checkmount(): + try: + self.mount() + except MooseFSException as exc: + raise xs_errors.SROSError(12, exc.errstr) + self.attached = True + + @override + def probe(self) -> str: + try: + self.mount(PROBE_MOUNTPOINT) + sr_list = filter(util.match_uuid, util.listdir(PROBE_MOUNTPOINT)) + self.unmount(PROBE_MOUNTPOINT, True) + except (util.CommandException, xs_errors.XenError): + raise + # Create a dictionary from the SR uuids to feed SRtoXML() + return util.SRtoXML({sr_uuid: {} for sr_uuid in sr_list}) + + @override + def detach(self, sr_uuid) -> None: + if not self.checkmount(): + return + util.SMlog("Aborting GC/coalesce") + cleanup.abort(sr_uuid) + # Change directory to avoid unmount conflicts + os.chdir(SR.MOUNT_BASE) + self.unmount(self.mountpoint, True) + self.attached = False + + @override + def create(self, sr_uuid, size) -> None: + if self.checkmount(): + raise xs_errors.SROSError(113, 'MooseFS mount point already attached') + + assert self.remotepath == self.rootpath + try: + self.mount() + except MooseFSException as exc: + # noinspection PyBroadException + try: + os.rmdir(self.mountpoint) + except: + # we have no recovery strategy + pass + raise xs_errors.SROSError(111, "MooseFS mount error [opterr=%s]" % exc.errstr) + + try: + self.subdir = self.sm_config.get('subdir') + if self.subdir is None: + self.subdir = True + else: + self.subdir = util.strtobool(self.subdir) + + self.sm_config['subdir'] = str(self.subdir) + self.session.xenapi.SR.set_sm_config(self.sr_ref, self.sm_config) + + if not self.subdir: + return + + subdir = os.path.join(self.mountpoint, sr_uuid) + if util.ioretry(lambda: util.pathexists(subdir)): + if util.ioretry(lambda: util.isdir(subdir)): + raise xs_errors.XenError('SRExists') + else: + try: + util.ioretry(lambda: util.makedirs(subdir)) + except util.CommandException as e: + if e.code != errno.EEXIST: + raise MooseFSException( + 'Failed to create SR subdir: {}'.format(e) + ) + finally: + self.detach(sr_uuid) + + @override + def delete(self, sr_uuid) -> None: + # try to remove/delete non VDI contents first + super(MooseFSSR, self).delete(sr_uuid) + try: + if self.checkmount(): + self.detach(sr_uuid) + + if self.subdir: + # Mount using rootpath () instead of /. + self.remotepath = self.rootpath + self.attach(sr_uuid) + subdir = os.path.join(self.mountpoint, sr_uuid) + if util.ioretry(lambda: util.pathexists(subdir)): + util.ioretry(lambda: os.rmdir(subdir)) + self.detach(sr_uuid) + except util.CommandException as inst: + self.detach(sr_uuid) + if inst.code != errno.ENOENT: + raise xs_errors.SROSError(114, "Failed to remove MooseFS mount point") + + @override + def vdi(self, uuid, loadLocked=False) -> VDI.VDI: + return MooseFSFileVDI(self, uuid) + + @staticmethod + def _is_moosefs_available(): + return util.find_executable('mfsmount') + +class MooseFSFileVDI(FileSR.FileVDI): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: + if not hasattr(self, 'xenstore_data'): + self.xenstore_data = {} + + self.xenstore_data['storage-type'] = MooseFSSR.DRIVER_TYPE + + return super(MooseFSFileVDI, self).attach(sr_uuid, vdi_uuid) + + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: + util.SMlog("MooseFSFileVDI.generate_config") + if not util.pathexists(self.path): + raise xs_errors.XenError('VDIUnavailable') + resp = {'device_config': self.sr.dconf, + 'sr_uuid': sr_uuid, + 'vdi_uuid': vdi_uuid, + 'sr_sm_config': self.sr.sm_config, + 'command': 'vdi_attach_from_config'} + # Return the 'config' encoded within a normal XMLRPC response so that + # we can use the regular response/error parsing code. + config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") + return xmlrpc.client.dumps((config,), "", True) + + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: + try: + if not util.pathexists(self.sr.path): + return self.sr.attach(sr_uuid) + except: + util.logException("MooseFSFileVDI.attach_from_config") + raise xs_errors.XenError('SRUnavailable', + opterr='Unable to attach from config') + return '' + +if __name__ == '__main__': + SRCommand.run(MooseFSSR, DRIVER_INFO) +else: + SR.registerSR(MooseFSSR) diff --git a/drivers/NFSSR.py b/drivers/NFSSR.py index b499cc905..6eeedeb7b 100755 --- a/drivers/NFSSR.py +++ b/drivers/NFSSR.py @@ -17,9 +17,12 @@ # # FileSR: local-file storage repository +from sm_typing import override + import socket import SR +import VDI import SRCommand import FileSR import util @@ -68,11 +71,13 @@ class NFSSR(FileSR.SharedFileSR): """NFS file-based storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: return type == 'nfs' - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: self.ops_exclusive = FileSR.OPS_EXCLUSIVE self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) self.sr_vditype = SR.DEFAULT_TAP @@ -88,9 +93,12 @@ def load(self, sr_uuid): self.sm_config = self.srcmd.params.get('sr_sm_config') or {} self.other_config = self.srcmd.params.get('sr_other_config') or {} self.nosubdir = self.sm_config.get('nosubdir') == "true" - if 'serverpath' in self.dconf: - self.remotepath = os.path.join(self.dconf['serverpath'], - not self.nosubdir and sr_uuid or "") + serverpath = self.dconf.get('serverpath') + if serverpath is not None: + self.remotepath = os.path.join( + serverpath, + not self.nosubdir and sr_uuid or "" + ) self.path = os.path.join(SR.MOUNT_BASE, sr_uuid) # Handle optional dconf attributes @@ -102,7 +110,8 @@ def load(self, sr_uuid): self.options = '' def validate_remotepath(self, scan): - if 'serverpath' not in self.dconf: + serverpath = self.dconf.get('serverpath') + if serverpath is None: if scan: try: self.scan_exports(self.dconf['server']) @@ -133,7 +142,8 @@ def mount(self, mountpoint, remotepath, timeout=None, retrans=None): except nfs.NfsException as exc: raise xs_errors.XenError('NFSMount', opterr=exc.errstr) - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: if not self._checkmount(): try: self.validate_remotepath(False) @@ -160,7 +170,8 @@ def mount_remotepath(self, sr_uuid): self.mount(self.path, self.remotepath, timeout=io_timeout, retrans=io_retrans) - def probe(self): + @override + def probe(self) -> str: # Verify NFS target and port util._testHost(self.dconf['server'], NFSPORT, 'NFSTarget') @@ -178,7 +189,8 @@ def probe(self): except: pass - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: """Detach the SR: Unmounts and removes the mountpoint""" if not self._checkmount(): return @@ -195,7 +207,8 @@ def detach(self, sr_uuid): self.attached = False - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: util._testHost(self.dconf['server'], NFSPORT, 'NFSTarget') self.validate_remotepath(True) if self._checkmount(): @@ -235,7 +248,8 @@ def create(self, sr_uuid, size): % inst.code) self.detach(sr_uuid) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: # try to remove/delete non VDI contents first super(NFSSR, self).delete(sr_uuid) try: @@ -256,7 +270,8 @@ def delete(self, sr_uuid): if inst.code != errno.ENOENT: raise xs_errors.XenError('NFSDelete') - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return NFSFileVDI(self, uuid) def scan_exports(self, target): @@ -283,7 +298,8 @@ def set_transport(self): class NFSFileVDI(FileSR.FileVDI): - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if not hasattr(self, 'xenstore_data'): self.xenstore_data = {} @@ -291,7 +307,8 @@ def attach(self, sr_uuid, vdi_uuid): return super(NFSFileVDI, self).attach(sr_uuid, vdi_uuid) - def generate_config(self, sr_uuid, vdi_uuid): + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("NFSFileVDI.generate_config") if not util.pathexists(self.path): raise xs_errors.XenError('VDIUnavailable') @@ -307,12 +324,13 @@ def generate_config(self, sr_uuid, vdi_uuid): config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") return xmlrpc.client.dumps((config, ), "", True) - def attach_from_config(self, sr_uuid, vdi_uuid): + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: """Used for HA State-file only. Will not just attach the VDI but also start a tapdisk on the file""" util.SMlog("NFSFileVDI.attach_from_config") try: - self.sr.attach(sr_uuid) + return self.sr.attach(sr_uuid) except: util.logException("NFSFileVDI.attach_from_config") raise xs_errors.XenError('SRUnavailable', \ diff --git a/drivers/RawISCSISR.py b/drivers/RawISCSISR.py index 0b17cfa9b..a4848a88c 100644 --- a/drivers/RawISCSISR.py +++ b/drivers/RawISCSISR.py @@ -18,8 +18,11 @@ # ISCSISR: ISCSI software initiator SR driver # +from sm_typing import override + import SR import SRCommand +import VDI import BaseISCSI import LUNperVDI import util @@ -53,25 +56,30 @@ class RawISCSISR(BaseISCSI.BaseISCSISR): """Raw ISCSI storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: if type == "iscsi": return True return False - handles = staticmethod(handles) - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: super(RawISCSISR, self).load(vdi_uuid) self.managed = True - def detach(self, sr_uuid): - super(RawISCSISR, self).detach(sr_uuid, True) + @override + def detach(self, sr_uuid) -> None: + super(RawISCSISR, self).detach_and_delete(sr_uuid) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return ISCSIVDI(self, uuid) class ISCSIVDI(LUNperVDI.RAWVDI): - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: super(ISCSIVDI, self).load(vdi_uuid) self.managed = True diff --git a/drivers/SHMSR.py b/drivers/SHMSR.py index 5e3ef7f47..250d58130 100644 --- a/drivers/SHMSR.py +++ b/drivers/SHMSR.py @@ -15,6 +15,8 @@ # along with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +from sm_typing import override + import SR import VDI import SRCommand @@ -54,25 +56,29 @@ def _loadvdis(self): except: pass - def handles(type): + @override + @staticmethod + def handles(type) -> bool: """Do we handle this type?""" if type == TYPE: return True return False - handles = staticmethod(handles) - def content_type(self, sr_uuid): + @override + def content_type(self, sr_uuid) -> str: """Returns the content_type XML""" return super(SHMSR, self).content_type(sr_uuid) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: """Create a VDI class""" if 'vdi_location' in self.srcmd.params: return SHMVDI(self, uuid, self.srcmd.params['vdi_location']) else: return SHMVDI(self, uuid, self.srcmd.params['device_config']['location']) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: """Initialises the SR""" if 'location' not in self.dconf: raise xs_errors.XenError('ConfigLocationMissing') @@ -82,26 +88,31 @@ def load(self, sr_uuid): self.physical_utilisation = 0 self.virtual_allocation = 0 - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: """Std. attach""" self._loadvdis() - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: """Std. detach""" pass - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: """Scan""" self._loadvdis() - return super(SHMSR, self).scan(sr_uuid) + super(SHMSR, self).scan(sr_uuid) - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: self.attach(sr_uuid) self.detach(sr_uuid) class SHMVDI(VDI.VDI): - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: try: stat = os.stat(self.path) self.utilisation = int(stat.st_size) @@ -120,13 +131,16 @@ def __init__(self, mysr, uuid, filename): self.shareable = True self.sm_config = {} - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: pass - def clone(self, sr_uuid, vdi_uuid): + @override + def clone(self, sr_uuid, vdi_uuid) -> str: return self.get_params() - def snapshot(self, sr_uuid, vdi_uuid): + @override + def snapshot(self, sr_uuid, vdi_uuid) -> str: return self.get_params() if __name__ == '__main__': diff --git a/drivers/SMBSR.py b/drivers/SMBSR.py index aa9bda381..34ba40d67 100755 --- a/drivers/SMBSR.py +++ b/drivers/SMBSR.py @@ -17,8 +17,11 @@ # # SMBSR: SMB filesystem based storage repository +from sm_typing import override + import SR import SRCommand +import VDI import FileSR import util import errno @@ -71,11 +74,13 @@ def __init__(self, errstr): class SMBSR(FileSR.SharedFileSR): """SMB file-based storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: return type == 'smb' - handles = staticmethod(handles) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: self.ops_exclusive = FileSR.OPS_EXCLUSIVE self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) self.sr_vditype = SR.DEFAULT_TAP @@ -190,7 +195,8 @@ def __check_license(self): restrictions['restrict_cifs'] == "true": raise xs_errors.XenError('NoSMBLicense') - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: if not self.checkmount(): try: self.mount() @@ -208,7 +214,8 @@ def attach(self, sr_uuid): self.attached = True - def probe(self): + @override + def probe(self) -> str: err = "SMBMount" try: self.mount(PROBE_MOUNTPOINT) @@ -220,13 +227,11 @@ def probe(self): raise xs_errors.XenError(err, opterr=inst.errstr) except (util.CommandException, xs_errors.XenError): raise - # Create a dictionary from the SR uuids to feed SRtoXML() - sr_dict = {sr_uuid: {} for sr_uuid in sr_list} - - return util.SRtoXML(sr_dict) + return util.SRtoXML({sr_uuid: {} for sr_uuid in sr_list}) - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: """Detach the SR: Unmounts and removes the mountpoint""" if not self.checkmount(): return @@ -244,7 +249,8 @@ def detach(self, sr_uuid): self.attached = False - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: self.__check_license() if self.checkmount(): @@ -286,7 +292,8 @@ def create(self, sr_uuid, size): .format(os.strerror(inst.code))) from inst self.detach(sr_uuid) - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: # try to remove/delete non VDI contents first super(SMBSR, self).delete(sr_uuid) try: @@ -302,12 +309,14 @@ def delete(self, sr_uuid): if inst.code != errno.ENOENT: raise xs_errors.XenError('SMBDelete') - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: return SMBFileVDI(self, uuid) class SMBFileVDI(FileSR.FileVDI): - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if not hasattr(self, 'xenstore_data'): self.xenstore_data = {} @@ -315,7 +324,8 @@ def attach(self, sr_uuid, vdi_uuid): return super(SMBFileVDI, self).attach(sr_uuid, vdi_uuid) - def generate_config(self, sr_uuid, vdi_uuid): + @override + def generate_config(self, sr_uuid, vdi_uuid) -> str: util.SMlog("SMBFileVDI.generate_config") if not util.pathexists(self.path): raise xs_errors.XenError('VDIUnavailable') @@ -330,17 +340,19 @@ def generate_config(self, sr_uuid, vdi_uuid): config = xmlrpc.client.dumps(tuple([resp]), "vdi_attach_from_config") return xmlrpc.client.dumps((config, ), "", True) - def attach_from_config(self, sr_uuid, vdi_uuid): + @override + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: """Used for HA State-file only. Will not just attach the VDI but also start a tapdisk on the file""" util.SMlog("SMBFileVDI.attach_from_config") try: if not util.pathexists(self.sr.path): - self.sr.attach(sr_uuid) + return self.sr.attach(sr_uuid) except: util.logException("SMBFileVDI.attach_from_config") raise xs_errors.XenError('SRUnavailable', \ opterr='Unable to attach from config') + return '' if __name__ == '__main__': diff --git a/drivers/SR.py b/drivers/SR.py index 9ca0045c5..fb2798ea3 100755 --- a/drivers/SR.py +++ b/drivers/SR.py @@ -80,10 +80,10 @@ class SR(object): sr_vditype: string, repository type """ - def handles(type): + @staticmethod + def handles(type) -> bool: """Returns True if this SR class understands the given dconf string""" return False - handles = staticmethod(handles) def __init__(self, srcmd, sr_uuid): """Base class initializer. All subclasses should call SR.__init__ @@ -153,7 +153,7 @@ def __init__(self, srcmd, sr_uuid): @staticmethod def from_uuid(session, sr_uuid): - import imp + import importlib.util _SR = session.xenapi.SR sr_ref = _SR.get_by_uuid(sr_uuid) @@ -169,7 +169,10 @@ def from_uuid(session, sr_uuid): driver_real = os.path.realpath(driver_path) module_name = os.path.basename(driver_path) - module = imp.load_source(module_name, driver_real) + spec = importlib.util.spec_from_file_location(module_name, driver_real) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + target = driver(sm_type) # NB. get the host pbd's device_config @@ -226,7 +229,7 @@ def _addLUNperVDIkey(self): except: pass - def create(self, uuid, size): + def create(self, uuid, size) -> None: """Create this repository. This operation may delete existing data. @@ -240,7 +243,7 @@ def create(self, uuid, size): """ raise xs_errors.XenError('Unimplemented') - def delete(self, uuid): + def delete(self, uuid) -> None: """Delete this repository and its contents. This operation IS idempotent -- it will succeed if the repository @@ -256,7 +259,7 @@ def delete(self, uuid): """ raise xs_errors.XenError('Unimplemented') - def update(self, uuid): + def update(self, uuid) -> None: """Refresh the fields in the SR object Returns: @@ -267,7 +270,7 @@ def update(self, uuid): # no-op unless individual backends implement it return - def attach(self, uuid): + def attach(self, uuid) -> None: """Initiate local access to the SR. Initialises any device state required to access the substrate. @@ -280,7 +283,7 @@ def attach(self, uuid): """ raise xs_errors.XenError('Unimplemented') - def after_master_attach(self, uuid): + def after_master_attach(self, uuid) -> None: """Perform actions required after attaching on the pool master Return: None @@ -295,7 +298,7 @@ def after_master_attach(self, uuid): self.session.xenapi.message.create( msg_name, 2, "SR", uuid, msg_body) - def detach(self, uuid): + def detach(self, uuid) -> None: """Remove local access to the SR. Destroys any device state initiated by the sr_attach() operation. @@ -309,7 +312,7 @@ def detach(self, uuid): """ raise xs_errors.XenError('Unimplemented') - def probe(self): + def probe(self) -> str: """Perform a backend-specific scan, using the current dconf. If the dconf is complete, then this will return a list of the SRs present of this type on the device, if any. If the dconf is partial, then a @@ -329,7 +332,7 @@ def probe(self): """ raise xs_errors.XenError('Unimplemented') - def scan(self, uuid): + def scan(self, uuid) -> None: """ Returns: """ @@ -339,7 +342,7 @@ def scan(self, uuid): scanrecord = ScanRecord(self) scanrecord.synchronise() - def replay(self, uuid): + def replay(self, uuid) -> None: """Replay a multi-stage log entry Returns: @@ -349,30 +352,27 @@ def replay(self, uuid): """ raise xs_errors.XenError('Unimplemented') - def content_type(self, uuid): + def content_type(self, uuid) -> str: """Returns the 'content_type' of an SR as a string""" return xmlrpc.client.dumps((str(self.sr_vditype), ), "", True) - def load(self, sr_uuid): + def load(self, sr_uuid) -> None: """Post-init hook""" pass - def check_sr(self, sr_uuid): + def check_sr(self, sr_uuid) -> None: """Hook to check SR health""" pass - def vdi(self, uuid): + def vdi(self, uuid) -> 'VDI.VDI': """Return VDI object owned by this repository""" - if uuid not in self.vdis: - self.vdis[uuid] = VDI.VDI(self, uuid) raise xs_errors.XenError('Unimplemented') - return self.vdis[uuid] - def forget_vdi(self, uuid): + def forget_vdi(self, uuid) -> None: vdi = self.session.xenapi.VDI.get_by_uuid(uuid) self.session.xenapi.VDI.db_forget(vdi) - def cleanup(self): + def cleanup(self) -> None: # callback after the op is done pass diff --git a/drivers/SRCommand.py b/drivers/SRCommand.py index 6afbdfbde..72694c091 100755 --- a/drivers/SRCommand.py +++ b/drivers/SRCommand.py @@ -182,12 +182,16 @@ def _run(self, sr, target): dconf_type = sr.dconf.get("type") if not dconf_type or not NO_LOGGING.get(dconf_type) or \ self.cmd not in NO_LOGGING[dconf_type]: - if 'device_config' in self.params: - util.SMlog("%s %s" % ( - self.cmd, util.hidePasswdInParams( - self.params, 'device_config'))) - else: - util.SMlog("%s %s" % (self.cmd, repr(self.params))) + params_to_log = self.params + + if 'device_config' in params_to_log: + params_to_log = util.hidePasswdInParams( + self.params, 'device_config') + + if 'session_ref' in params_to_log: + params_to_log['session_ref'] = '******' + + util.SMlog("%s %s" % (self.cmd, repr(params_to_log))) caching_params = dict((k, self.params.get(k)) for k in [blktap2.VDI.CONF_KEY_ALLOW_CACHING, diff --git a/drivers/VDI.py b/drivers/VDI.py index d371bd1b0..fa7c5a449 100755 --- a/drivers/VDI.py +++ b/drivers/VDI.py @@ -16,6 +16,8 @@ # VDI: Base class for virtual disk instances # +from sm_typing import Dict, Optional + import cleanup import SR import xmlrpc.client @@ -134,7 +136,7 @@ def from_uuid(session, vdi_uuid): sr.srcmd.params['vdi_ref'] = vdi_ref return sr.vdi(vdi_uuid) - def create(self, sr_uuid, vdi_uuid, size): + def create(self, sr_uuid, vdi_uuid, size) -> str: """Create a VDI of size MB on the given SR. This operation IS NOT idempotent and will fail if the UUID @@ -147,7 +149,7 @@ def create(self, sr_uuid, vdi_uuid, size): """ raise xs_errors.XenError('Unimplemented') - def update(self, sr_uuid, vdi_uuid): + def update(self, sr_uuid, vdi_uuid) -> None: """Query and update the configuration of a particular VDI. Given an SR and VDI UUID, this operation returns summary statistics @@ -157,7 +159,7 @@ def update(self, sr_uuid, vdi_uuid): # no-op unless individual backends implement it return - def introduce(self, sr_uuid, vdi_uuid): + def introduce(self, sr_uuid, vdi_uuid) -> str: """Explicitly introduce a particular VDI. Given an SR and VDI UUID and a disk location (passed in via the @@ -166,7 +168,7 @@ def introduce(self, sr_uuid, vdi_uuid): """ raise xs_errors.XenError('Unimplemented') - def attach(self, sr_uuid, vdi_uuid): + def attach(self, sr_uuid, vdi_uuid) -> str: """Initiate local access to the VDI. Initialises any device state required to access the VDI. @@ -180,7 +182,7 @@ def attach(self, sr_uuid, vdi_uuid): 'xenstore_data': (self.xenstore_data or {})} return xmlrpc.client.dumps((struct, ), "", True) - def detach(self, sr_uuid, vdi_uuid): + def detach(self, sr_uuid, vdi_uuid) -> None: """Remove local access to the VDI. Destroys any device state initialised via the vdi.attach() command. @@ -188,7 +190,7 @@ def detach(self, sr_uuid, vdi_uuid): """ raise xs_errors.XenError('Unimplemented') - def clone(self, sr_uuid, vdi_uuid): + def clone(self, sr_uuid, vdi_uuid) -> str: """Create a mutable instance of the referenced VDI. This operation is not idempotent and will fail if the UUID @@ -209,14 +211,14 @@ def resize_online(self, sr_uuid, vdi_uuid, size): been paused for the duration of this call.""" raise xs_errors.XenError('Unimplemented') - def generate_config(self, sr_uuid, vdi_uuid): + def generate_config(self, sr_uuid, vdi_uuid) -> str: """Generate the XML config required to activate a VDI for use when XAPI is not running. Activation is handled by the vdi_attach_from_config() SMAPI call. """ raise xs_errors.XenError('Unimplemented') - def compose(self, sr_uuid, vdi1, vdi2): + def compose(self, sr_uuid, vdi1, vdi2) -> None: """Layer the updates from [vdi2] onto [vdi1], calling the result [vdi2]. @@ -225,7 +227,7 @@ def compose(self, sr_uuid, vdi1, vdi2): """ raise xs_errors.XenError('Unimplemented') - def attach_from_config(self, sr_uuid, vdi_uuid): + def attach_from_config(self, sr_uuid, vdi_uuid) -> str: """Activate a VDI based on the config passed in on the CLI. For use when XAPI is not running. The config is generated by the Activation is handled by the vdi_generate_config() SMAPI call. @@ -233,23 +235,23 @@ def attach_from_config(self, sr_uuid, vdi_uuid): raise xs_errors.XenError('Unimplemented') def _do_snapshot(self, sr_uuid, vdi_uuid, snapType, - cloneOp=False, secondary=None, cbtlog=None): + cloneOp=False, secondary=None, cbtlog=None) -> str: raise xs_errors.XenError('Unimplemented') - def _delete_cbt_log(self): + def _delete_cbt_log(self) -> None: raise xs_errors.XenError('Unimplemented') - def _rename(self, old, new): + def _rename(self, old, new) -> None: raise xs_errors.XenError('Unimplemented') - def _cbt_log_exists(self, logpath): + def _cbt_log_exists(self, logpath) -> bool: """Check if CBT log file exists Must be implemented by all classes inheriting from base VDI class """ raise xs_errors.XenError('Unimplemented') - def resize(self, sr_uuid, vdi_uuid, size): + def resize(self, sr_uuid, vdi_uuid, size) -> str: """Resize the given VDI to size MB. Size can be any valid disk size greater than [or smaller than] the current value. @@ -295,7 +297,7 @@ def resize_cbt(self, sr_uuid, vdi_uuid, size): % vdi_uuid) self._disable_cbt_on_error(alert_name, alert_str) - def delete(self, sr_uuid, vdi_uuid, data_only=False): + def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: """Delete this VDI. This operation IS idempotent and should succeed if the VDI @@ -371,7 +373,7 @@ def delete(self, sr_uuid, vdi_uuid, data_only=False): lock.release() lock.cleanup("cbtlog", str(vdi_uuid)) - def snapshot(self, sr_uuid, vdi_uuid): + def snapshot(self, sr_uuid, vdi_uuid) -> str: """Save an immutable copy of the referenced VDI. This operation IS NOT idempotent and will fail if the UUID @@ -405,7 +407,7 @@ def snapshot(self, sr_uuid, vdi_uuid): return self._do_snapshot(sr_uuid, vdi_uuid, snapType, secondary=secondary, cbtlog=cbtlog) - def activate(self, sr_uuid, vdi_uuid): + def activate(self, sr_uuid, vdi_uuid) -> Optional[Dict[str, str]]: """Activate VDI - called pre tapdisk open""" if self._get_blocktracking_status(): if 'args' in self.sr.srcmd.params: @@ -443,7 +445,7 @@ def activate(self, sr_uuid, vdi_uuid): return {'cbtlog': logpath} return None - def deactivate(self, sr_uuid, vdi_uuid): + def deactivate(self, sr_uuid, vdi_uuid) -> None: """Deactivate VDI - called post tapdisk close""" if self._get_blocktracking_status(): from lock import Lock @@ -459,7 +461,7 @@ def deactivate(self, sr_uuid, vdi_uuid): finally: lock.release() - def get_params(self): + def get_params(self) -> str: """ Returns: XMLRPC response containing a single struct with fields @@ -469,7 +471,7 @@ def get_params(self): 'uuid': self.uuid} return xmlrpc.client.dumps((struct, ), "", True) - def load(self, vdi_uuid): + def load(self, vdi_uuid) -> None: """Post-init hook""" pass @@ -806,7 +808,7 @@ def _cbt_snapshot(self, snapshot_uuid, consistency_state): % self.uuid) self._disable_cbt_on_error(alert_name, alert_str) - def _get_blocktracking_status(self, uuid=None): + def _get_blocktracking_status(self, uuid=None) -> bool: """ Get blocktracking status """ if not uuid: uuid = self.uuid @@ -828,7 +830,7 @@ def _set_blocktracking_status(self, vdi_ref, enable): self.session.xenapi.VDI.add_to_other_config( vdi_ref, "cbt_enabled", enable) - def _ensure_cbt_space(self): + def _ensure_cbt_space(self) -> None: """ Ensure enough CBT space """ pass @@ -837,12 +839,12 @@ def _get_cbt_logname(self, uuid): logName = "%s.%s" % (uuid, CBTLOG_TAG) return logName - def _get_cbt_logpath(self, uuid): + def _get_cbt_logpath(self, uuid) -> str: """ Get CBT logpath """ logName = self._get_cbt_logname(uuid) return os.path.join(self.sr.path, logName) - def _create_cbt_log(self): + def _create_cbt_log(self) -> str: """ Create CBT log """ try: logpath = self._get_cbt_logpath(self.uuid) @@ -861,7 +863,7 @@ def _create_cbt_log(self): return logpath - def _activate_cbt_log(self, logname): + def _activate_cbt_log(self, logname) -> bool: """Activate CBT log file SR specific Implementation required for VDIs on block-based SRs. @@ -869,7 +871,7 @@ def _activate_cbt_log(self, logname): """ return False - def _deactivate_cbt_log(self, logname): + def _deactivate_cbt_log(self, logname) -> None: """Deactivate CBT log file SR specific Implementation required for VDIs on block-based SRs. diff --git a/drivers/XE_SR_ERRORCODES.xml b/drivers/XE_SR_ERRORCODES.xml index 47fefd830..b5813153f 100755 --- a/drivers/XE_SR_ERRORCODES.xml +++ b/drivers/XE_SR_ERRORCODES.xml @@ -909,11 +909,85 @@ 461 + + WipefsFailure + Failed to wipe pre-existing filesystem signature. + 462 + + GenericException SM has thrown a generic python exception 1200 + + ZFSSRCreate + ZFS SR creation error + 5000 + + + + ZFSSRDelete + ZFS SR deletion error + 5001 + + + + LinstorMaster + Linstor request must come from master + 5002 + + + + LinstorConfigHostsMissing + The request is missing the LINSTOR hosts parameter + 5003 + + + LinstorConfigGroupNameMissing + The request is missing the LINSTOR group name parameter + 5004 + + + + LinstorConfigRedundancyMissing + The request is missing the LINSTOR redundancy parameter + 5005 + + + + LinstorSRCreate + LINSTOR SR creation error + 5006 + + + + LinstorSRDelete + LINSTOR SR delete error + 5007 + + + + LargeBlockSymlinkExist + Symlink already exists + 5008 + + + + LargeBlockNoLosetup + Couldn't find loop device + 5009 + + + LargeBlockIncorrectBlocksize + Blocksize isn't compatible with the driver + 5010 + + + LargeBlockVGReconnectFailed + Failed to reconnect the VolumeGroup + 5011 + diff --git a/drivers/XFSSR.py b/drivers/XFSSR.py new file mode 100755 index 000000000..5bd732674 --- /dev/null +++ b/drivers/XFSSR.py @@ -0,0 +1,260 @@ +#!/usr/bin/python3 +# +# Original work copyright (C) Citrix Systems Inc. +# Modified work copyright (C) Vates SAS and XCP-ng community +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; version 2.1 only. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# XFSSR: Based on local-file storage repository, mounts xfs partition + +from sm_typing import override + +import SR +from SR import deviceCheck +import SRCommand +import VDI +import FileSR +import util +import lvutil +import scsiutil + +import os +import xs_errors +import vhdutil +from lock import Lock +from constants import EXT_PREFIX + +CAPABILITIES = ["SR_PROBE", "SR_UPDATE", "SR_SUPPORTS_LOCAL_CACHING", \ + "VDI_CREATE", "VDI_DELETE", "VDI_ATTACH", "VDI_DETACH", \ + "VDI_UPDATE", "VDI_CLONE", "VDI_SNAPSHOT", "VDI_RESIZE", "VDI_MIRROR", \ + "VDI_GENERATE_CONFIG", \ + "VDI_RESET_ON_BOOT/2", "ATOMIC_PAUSE", "VDI_CONFIG_CBT", + "VDI_ACTIVATE", "VDI_DEACTIVATE", "THIN_PROVISIONING", "VDI_READ_CACHING"] + +CONFIGURATION = [['device', 'local device path (required) (e.g. /dev/sda3)']] + +DRIVER_INFO = { + 'name': 'Local XFS VHD', + 'description': 'SR plugin which represents disks as VHD files stored on a local XFS filesystem, created inside an LVM volume', + 'vendor': 'Vates SAS', + 'copyright': '(C) 2019 Vates SAS', + 'driver_version': '1.0', + 'required_api_version': '1.0', + 'capabilities': CAPABILITIES, + 'configuration': CONFIGURATION + } + +DRIVER_CONFIG = {"ATTACH_FROM_CONFIG_WITH_TAPDISK": True} + + +class XFSSR(FileSR.FileSR): + """XFS Local file storage repository""" + + DRIVER_TYPE = 'xfs' + + @override + @staticmethod + def handles(srtype) -> bool: + return srtype == XFSSR.DRIVER_TYPE + + @override + def load(self, sr_uuid) -> None: + if not self._is_xfs_available(): + raise xs_errors.XenError( + 'SRUnavailable', + opterr='xfsprogs is not installed' + ) + + self.ops_exclusive = FileSR.OPS_EXCLUSIVE + self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid) + self.sr_vditype = SR.DEFAULT_TAP + + self.path = os.path.join(SR.MOUNT_BASE, sr_uuid) + self.vgname = EXT_PREFIX + sr_uuid + self.remotepath = os.path.join("/dev", self.vgname, sr_uuid) + self.attached = self._checkmount() + self.driver_config = DRIVER_CONFIG + + @override + def delete(self, sr_uuid) -> None: + super(XFSSR, self).delete(sr_uuid) + + # Check PVs match VG + try: + for dev in self.dconf['device'].split(','): + cmd = ["pvs", dev] + txt = util.pread2(cmd) + if txt.find(self.vgname) == -1: + raise xs_errors.XenError('VolNotFound', \ + opterr='volume is %s' % self.vgname) + except util.CommandException as inst: + raise xs_errors.XenError('PVSfailed', \ + opterr='error is %d' % inst.code) + + # Remove LV, VG and pv + try: + cmd = ["lvremove", "-f", self.remotepath] + util.pread2(cmd) + + cmd = ["vgremove", self.vgname] + util.pread2(cmd) + + for dev in self.dconf['device'].split(','): + cmd = ["pvremove", dev] + util.pread2(cmd) + except util.CommandException as inst: + raise xs_errors.XenError('LVMDelete', \ + opterr='errno is %d' % inst.code) + + @override + def attach(self, sr_uuid) -> None: + if not self._checkmount(): + try: + #Activate LV + cmd = ['lvchange', '-ay', self.remotepath] + util.pread2(cmd) + + # make a mountpoint: + if not os.path.isdir(self.path): + os.makedirs(self.path) + except util.CommandException as inst: + raise xs_errors.XenError('LVMMount', \ + opterr='Unable to activate LV. Errno is %d' % inst.code) + + try: + util.pread(["fsck", "-a", self.remotepath]) + except util.CommandException as inst: + if inst.code == 1: + util.SMlog("FSCK detected and corrected FS errors. Not fatal.") + else: + raise xs_errors.XenError('LVMMount', \ + opterr='FSCK failed on %s. Errno is %d' % (self.remotepath, inst.code)) + + try: + util.pread(["mount", self.remotepath, self.path]) + except util.CommandException as inst: + raise xs_errors.XenError('LVMMount', \ + opterr='Failed to mount FS. Errno is %d' % inst.code) + + self.attached = True + + #Update SCSIid string + scsiutil.add_serial_record(self.session, self.sr_ref, \ + scsiutil.devlist_to_serialstring(self.dconf['device'].split(','))) + + # Set the block scheduler + for dev in self.dconf['device'].split(','): + self.block_setscheduler(dev) + + @override + def detach(self, sr_uuid) -> None: + super(XFSSR, self).detach(sr_uuid) + try: + # deactivate SR + cmd = ["lvchange", "-an", self.remotepath] + util.pread2(cmd) + except util.CommandException as inst: + raise xs_errors.XenError('LVMUnMount', \ + opterr='lvm -an failed errno is %d' % inst.code) + + @override + @deviceCheck + def probe(self) -> str: + return lvutil.srlist_toxml(lvutil.scan_srlist(EXT_PREFIX, self.dconf['device']), + EXT_PREFIX) + + @override + @deviceCheck + def create(self, sr_uuid, size) -> None: + if self._checkmount(): + raise xs_errors.XenError('SRExists') + + # Check none of the devices already in use by other PBDs + if util.test_hostPBD_devs(self.session, sr_uuid, self.dconf['device']): + raise xs_errors.XenError('SRInUse') + + # Check serial number entry in SR records + for dev in self.dconf['device'].split(','): + if util.test_scsiserial(self.session, dev): + raise xs_errors.XenError('SRInUse') + + if not lvutil._checkVG(self.vgname): + lvutil.createVG(self.dconf['device'], self.vgname) + + if lvutil._checkLV(self.remotepath): + raise xs_errors.XenError('SRExists') + + try: + numdevs = len(self.dconf['device'].split(',')) + cmd = ["lvcreate", "-n", sr_uuid] + if numdevs > 1: + lowest = -1 + for dev in self.dconf['device'].split(','): + stats = lvutil._getPVstats(dev) + if lowest < 0 or stats['freespace'] < lowest: + lowest = stats['freespace'] + size_mb = (lowest // (1024 * 1024)) * numdevs + + # Add stripe parameter to command + cmd += ["-i", str(numdevs), "-I", "2048"] + else: + stats = lvutil._getVGstats(self.vgname) + size_mb = stats['freespace'] // (1024 * 1024) + assert(size_mb > 0) + cmd += ["-L", str(size_mb), self.vgname] + text = util.pread(cmd) + + cmd = ["lvchange", "-ay", self.remotepath] + text = util.pread(cmd) + except util.CommandException as inst: + raise xs_errors.XenError('LVMCreate', \ + opterr='lv operation, error %d' % inst.code) + except AssertionError: + raise xs_errors.XenError('SRNoSpace', \ + opterr='Insufficient space in VG %s' % self.vgname) + + try: + util.pread2(["mkfs.xfs", self.remotepath]) + except util.CommandException as inst: + raise xs_errors.XenError('LVMFilesystem', \ + opterr='mkfs failed error %d' % inst.code) + + #Update serial number string + scsiutil.add_serial_record(self.session, self.sr_ref, \ + scsiutil.devlist_to_serialstring(self.dconf['device'].split(','))) + + @override + def vdi(self, uuid, loadLocked = False) -> VDI.VDI: + return XFSFileVDI(self, uuid) + + @staticmethod + def _is_xfs_available(): + return util.find_executable('mkfs.xfs') + + +class XFSFileVDI(FileSR.FileVDI): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: + if not hasattr(self, 'xenstore_data'): + self.xenstore_data = {} + + self.xenstore_data['storage-type'] = XFSSR.DRIVER_TYPE + + return super(XFSFileVDI, self).attach(sr_uuid, vdi_uuid) + + +if __name__ == '__main__': + SRCommand.run(XFSSR, DRIVER_INFO) +else: + SR.registerSR(XFSSR) diff --git a/drivers/ZFSSR.py b/drivers/ZFSSR.py new file mode 100644 index 000000000..13a895a25 --- /dev/null +++ b/drivers/ZFSSR.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2020 Vates SAS +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from sm_typing import override + +import SR +import SRCommand +import VDI + +import FileSR + +import util +import xs_errors + +CAPABILITIES = [ + 'SR_UPDATE', + 'VDI_CREATE', + 'VDI_DELETE', + 'VDI_ATTACH', + 'VDI_DETACH', + 'VDI_CLONE', + 'VDI_SNAPSHOT', + 'VDI_RESIZE', + 'VDI_MIRROR', + 'VDI_GENERATE_CONFIG', + 'ATOMIC_PAUSE', + 'VDI_CONFIG_CBT', + 'VDI_ACTIVATE', + 'VDI_DEACTIVATE', + 'THIN_PROVISIONING' +] + +CONFIGURATION = [ + ['location', 'local ZFS directory path (required)'] +] + +DRIVER_INFO = { + 'name': 'Local ZFS VHD', + 'description': + 'SR plugin which represents disks as VHD files stored on a ZFS disk', + 'vendor': 'Vates SAS', + 'copyright': '(C) 2020 Vates SAS', + 'driver_version': '1.0', + 'required_api_version': '1.0', + 'capabilities': CAPABILITIES, + 'configuration': CONFIGURATION +} + + +def is_zfs_available(): + return util.find_executable('zfs') and \ + util.pathexists('/sys/module/zfs/initstate') + + +def is_zfs_path(path): + cmd = ['findmnt', '-o', 'FSTYPE', '-n', path] + fs_type = util.pread2(cmd).split('\n')[0] + return fs_type == 'zfs' + + +class ZFSSR(FileSR.FileSR): + DRIVER_TYPE = 'zfs' + + @override + @staticmethod + def handles(type) -> bool: + return type == ZFSSR.DRIVER_TYPE + + @override + def load(self, sr_uuid) -> None: + if not is_zfs_available(): + raise xs_errors.XenError( + 'SRUnavailable', + opterr='zfs is not installed or module is not loaded' + ) + return super(ZFSSR, self).load(sr_uuid) + + @override + def create(self, sr_uuid, size) -> None: + if not is_zfs_path(self.remotepath): + raise xs_errors.XenError( + 'ZFSSRCreate', + opterr='Cannot create SR, path is not a ZFS mountpoint' + ) + return super(ZFSSR, self).create(sr_uuid, size) + + @override + def delete(self, sr_uuid) -> None: + if not self._checkmount(): + raise xs_errors.XenError( + 'ZFSSRDelete', + opterr='ZFS SR is not mounted or uses an invalid FS type' + ) + return super(ZFSSR, self).delete(sr_uuid) + + @override + def attach(self, sr_uuid) -> None: + if not is_zfs_path(self.remotepath): + raise xs_errors.XenError( + 'SRUnavailable', + opterr='Invalid ZFS path' + ) + super(ZFSSR, self).attach(sr_uuid) + + @override + def detach(self, sr_uuid) -> None: + return super(ZFSSR, self).detach(sr_uuid) + + @override + def vdi(self, uuid, loadLocked=False) -> VDI.VDI: + return ZFSFileVDI(self, uuid) + + # Ensure _checkmount is overridden to prevent bad behaviors in FileSR. + @override + def _checkmount(self) -> bool: + return super(ZFSSR, self)._checkmount() and \ + is_zfs_path(self.remotepath) + + +class ZFSFileVDI(FileSR.FileVDI): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: + if not hasattr(self, 'xenstore_data'): + self.xenstore_data = {} + + self.xenstore_data['storage-type'] = ZFSSR.DRIVER_TYPE + + return super(ZFSFileVDI, self).attach(sr_uuid, vdi_uuid) + + +if __name__ == '__main__': + SRCommand.run(ZFSSR, DRIVER_INFO) +else: + SR.registerSR(ZFSSR) diff --git a/drivers/blktap2.py b/drivers/blktap2.py index e9887c8ed..05d12b7bd 100755 --- a/drivers/blktap2.py +++ b/drivers/blktap2.py @@ -17,6 +17,11 @@ # # blktap2: blktap/tapdisk management layer # + +from sm_typing import Any, Callable, ClassVar, Dict, override + +from abc import abstractmethod + import grp import os import re @@ -50,6 +55,12 @@ from xmlrpc.client import ServerProxy, Transport from socket import socket, AF_UNIX, SOCK_STREAM +try: + from linstorvolumemanager import log_drbd_openers + LINSTOR_AVAILABLE = True +except ImportError: + LINSTOR_AVAILABLE = False + PLUGIN_TAP_PAUSE = "tapdisk-pause" SOCKPATH = "/var/xapi/xcp-rrdd" @@ -143,7 +154,8 @@ def __init__(self, cmd, **info): self.cmd = cmd self.info = info - def __str__(self): + @override + def __str__(self) -> str: items = self.info.items() info = ", ".join("%s=%s" % item for item in items) @@ -439,7 +451,8 @@ class TapdiskExists(Exception): def __init__(self, tapdisk): self.tapdisk = tapdisk - def __str__(self): + @override + def __str__(self) -> str: return "%s already running" % self.tapdisk @@ -449,7 +462,8 @@ class TapdiskNotRunning(Exception): def __init__(self, **attrs): self.attrs = attrs - def __str__(self): + @override + def __str__(self) -> str: items = iter(self.attrs.items()) attrs = ", ".join("%s=%s" % attr for attr in items) @@ -462,7 +476,8 @@ class TapdiskNotUnique(Exception): def __init__(self, tapdisks): self.tapdisks = tapdisks - def __str__(self): + @override + def __str__(self) -> str: tapdisks = map(str, self.tapdisks) return "Found multiple tapdisks: %s" % tapdisks @@ -474,7 +489,8 @@ def __init__(self, arg, err): self.arg = arg self.err = err - def __str__(self): + @override + def __str__(self) -> str: return "Tapdisk(%s): %s" % (self.arg, self.err) def get_error(self): @@ -487,7 +503,8 @@ class TapdiskInvalidState(Exception): def __init__(self, tapdisk): self.tapdisk = tapdisk - def __str__(self): + @override + def __str__(self) -> str: return str(self.tapdisk) @@ -507,15 +524,16 @@ def mkdirs(path, mode=0o777): class KObject(object): - SYSFS_CLASSTYPE = None + SYSFS_CLASSTYPE: ClassVar[str] = "" - def sysfs_devname(self): - raise NotImplementedError("sysfs_devname is undefined") + @abstractmethod + def sysfs_devname(self) -> str: + pass class Attribute(object): - SYSFS_NODENAME = None + SYSFS_NODENAME: ClassVar[str] = "" def __init__(self, path): self.path = path @@ -529,7 +547,8 @@ class NoSuchAttribute(Exception): def __init__(self, name): self.name = name - def __str__(self): + @override + def __str__(self) -> str: return "No such attribute: %s" % self.name def _open(self, mode='r'): @@ -586,10 +605,12 @@ def allocate(cls): def free(self): TapCtl.free(self.minor) - def __str__(self): + @override + def __str__(self) -> str: return "%s(minor=%d)" % (self.__class__.__name__, self.minor) - def sysfs_devname(self): + @override + def sysfs_devname(self) -> str: return "blktap!blktap%d" % self.minor class Pool(Attribute): @@ -657,7 +678,8 @@ def __init__(self, pid, minor, _type, path, state): self._dirty = False self._blktap = None - def __str__(self): + @override + def __str__(self) -> str: state = self.pause_state() return "Tapdisk(%s, pid=%d, minor=%s, state=%s)" % \ (self.get_arg(), self.pid, self.minor, state) @@ -743,7 +765,8 @@ def __init__(self, _type, path): self.type = _type self.path = path - def __str__(self): + @override + def __str__(self) -> str: return "%s:%s" % (self.type, self.path) @classmethod @@ -763,14 +786,16 @@ class InvalidType(Exception): def __init__(self, _type): self.type = _type - def __str__(self): + @override + def __str__(self) -> str: return "Not a Tapdisk type: %s" % self.type class InvalidArgument(Exception): def __init__(self, arg): self.arg = arg - def __str__(self): + @override + def __str__(self) -> str: return "Not a Tapdisk image: %s" % self.arg def get_arg(self): @@ -811,7 +836,23 @@ def launch_on_tap(cls, blktap, path, _type, options): TapCtl.attach(pid, minor) try: - TapCtl.open(pid, minor, _type, path, options) + retry_open = 0 + while True: + try: + TapCtl.open(pid, minor, _type, path, options) + break + except TapCtl.CommandFailure as e: + err = ( + 'status' in e.info and e.info['status'] + ) or None + if err in (errno.EIO, errno.EROFS, errno.EAGAIN): + if retry_open < 5: + retry_open += 1 + time.sleep(1) + continue + if LINSTOR_AVAILABLE and err == errno.EROFS: + log_drbd_openers(path) + raise try: tapdisk = cls.__from_blktap(blktap) node = '/sys/dev/block/%d:%d' % (tapdisk.major(), tapdisk.minor) @@ -896,7 +937,8 @@ def _refresh(self, __get): t = self.from_minor(__get('minor')) self.__init__(t.pid, t.minor, t.type, t.path, t.state) - def __getattribute__(self, name): + @override + def __getattribute__(self, name) -> Any: def __get(name): # NB. avoid(rec(ursion) return object.__getattribute__(self, name) @@ -1062,7 +1104,8 @@ def __init__(self, vdi_type, target): self.vdi_type = vdi_type self.target = target - def __str__(self): + @override + def __str__(self) -> str: return \ "Target %s has unexpected VDI type '%s'" % \ (type(self.target), self.vdi_type) @@ -1145,13 +1188,14 @@ class Link(object): # before VDI.activate. Therefore those link steps where we # relink existing devices under deterministic path names. - BASEDIR = None + BASEDIR: ClassVar[str] = "" - def _mklink(self, target): - raise NotImplementedError("_mklink is not defined") + def _mklink(self, target) -> None: + pass - def _equals(self, target): - raise NotImplementedError("_equals is not defined") + @abstractmethod + def _equals(self, target) -> bool: + pass def __init__(self, path): self._path = path @@ -1172,7 +1216,7 @@ def path(self): def stat(self): return os.stat(self.path()) - def mklink(self, target): + def mklink(self, target) -> None: path = self.path() util.SMlog("%s -> %s" % (self, target)) @@ -1195,7 +1239,8 @@ def unlink(self): if e.errno != errno.ENOENT: raise - def __str__(self): + @override + def __str__(self) -> str: path = self.path() return "%s(%s)" % (self.__class__.__name__, path) @@ -1208,10 +1253,12 @@ def readlink(self): def symlink(self): return self.path() - def _mklink(self, target): + @override + def _mklink(self, target) -> None: os.symlink(target, self.path()) - def _equals(self, target): + @override + def _equals(self, target) -> bool: return self.readlink() == target class DeviceNode(Link): @@ -1228,7 +1275,8 @@ def is_block(cls, target): """Whether @target refers to a block device.""" return S_ISBLK(cls._real_stat(target).st_mode) - def _mklink(self, target): + @override + def _mklink(self, target) -> None: st = self._real_stat(target) if not S_ISBLK(st.st_mode): @@ -1238,7 +1286,8 @@ def _mklink(self, target): os.mknod(self.path(), st.st_mode | stat.S_IRGRP, st.st_rdev) os.chown(self.path(), st.st_uid, grp.getgrnam("disk").gr_gid) - def _equals(self, target): + @override + def _equals(self, target) -> bool: target_rdev = self._real_stat(target).st_rdev return self.stat().st_rdev == target_rdev @@ -1253,7 +1302,8 @@ def __init__(self, path, st): self.path = path self.st = st - def __str__(self): + @override + def __str__(self) -> str: return "%s is not a block device: %s" % (self.path, self.st) class Hybrid(Link): @@ -1269,14 +1319,16 @@ def rdev(self): return self._devnode.rdev() raise self._devnode.NotABlockDevice(self.path(), st) - def mklink(self, target): + @override + def mklink(self, target) -> None: if self._devnode.is_block(target): self._obj = self._devnode else: self._obj = self._symlink self._obj.mklink(target) - def _equals(self, target): + @override + def _equals(self, target) -> bool: return self._obj._equals(target) class PhyLink(SymLink): @@ -2083,7 +2135,8 @@ def __init__(self, args): super().__init__(args) self.key = args[0] - def __str__(self): + @override + def __str__(self) -> str: return \ "Key '%s' missing in environment. " % self.key + \ "Not called in udev context?" @@ -2106,11 +2159,12 @@ def __init__(self, event, handler): self.event = event self.handler = handler - def __str__(self): + @override + def __str__(self) -> str: return "Uevent '%s' not handled by %s" % \ (self.event, self.handler.__class__.__name__) - ACTIONS = {} + ACTIONS: Dict[str, Callable] = {} def run(self): @@ -2122,7 +2176,8 @@ def run(self): return fn(self) - def __str__(self): + @override + def __str__(self) -> str: try: action = self.get_action() except: @@ -2137,7 +2192,8 @@ def __init__(self): ClassDevice.__init__(self) self._default_pool = None - def sysfs_devname(self): + @override + def sysfs_devname(self) -> str: return "blktap!control" class DefaultPool(Attribute): @@ -2164,7 +2220,8 @@ class NoSuchPool(Exception): def __init__(self, name): self.name = name - def __str__(self): + @override + def __str__(self) -> str: return "No such pool: {}".format(self.name) def get_pool(self, name): @@ -2184,6 +2241,10 @@ def __init__(self, path): self.path = path self._size = None + @override + def sysfs_devname(self) -> str: + return '' + def sysfs_path(self): return self.path @@ -2206,7 +2267,7 @@ def get_size(self): class BusDevice(KObject): - SYSFS_BUSTYPE = None + SYSFS_BUSTYPE: ClassVar[str] = "" @classmethod def sysfs_bus_path(cls): @@ -2224,7 +2285,7 @@ class XenbusDevice(BusDevice): XBT_NIL = "" - XENBUS_DEVTYPE = None + XENBUS_DEVTYPE: ClassVar[str] = "" def __init__(self, domid, devid): self.domid = int(domid) @@ -2324,11 +2385,13 @@ def signal_hotplug(self, online=True): self._xs_rm_path(xapi_path) self._xs_rm_path(upstream_path) - def sysfs_devname(self): + @override + def sysfs_devname(self) -> str: return "%s-%d-%d" % (self.XENBUS_DEVTYPE, self.domid, self.devid) - def __str__(self): + @override + def __str__(self) -> str: return self.sysfs_devname() @classmethod @@ -2373,13 +2436,14 @@ def __init__(self, domid, devid): self._q_events = None class XenstoreValueError(Exception): - KEY = None + KEY: ClassVar[str] = "" def __init__(self, vbd, _str): self.vbd = vbd self.str = _str - def __str__(self): + @override + def __str__(self) -> str: return "Backend %s " % self.vbd + \ "has %s = %s" % (self.KEY, self.str) @@ -2412,10 +2476,12 @@ def makedev(self): def is_tap(self): return self.major == Tapdisk.major() - def __str__(self): + @override + def __str__(self) -> str: return "%s:%s" % (self.major, self.minor) - def __eq__(self, other): + @override + def __eq__(self, other) -> bool: return \ self.major == other.major and \ self.minor == other.minor @@ -2531,14 +2597,16 @@ def __init__(self, ident=None, action=None): UEventHandler.__init__(self) - def run(self): + @override + def run(self) -> None: self.xs_path = self.getenv('XENBUS_PATH') openlog(str(self), 0, self.LOG_FACILITY) UEventHandler.run(self) - def __str__(self): + @override + def __str__(self) -> str: try: path = self.xs_path @@ -2810,7 +2878,7 @@ def usage(stream): elif _class == 'tap': - attrs = {} + attrs: Dict[str, Any] = {} for item in sys.argv[2:]: try: key, val = item.split('=') diff --git a/drivers/cifutils.py b/drivers/cifutils.py index c183fe612..be82a37c1 100755 --- a/drivers/cifutils.py +++ b/drivers/cifutils.py @@ -60,7 +60,7 @@ def splitDomainAndUsername(uname): username = dom_username[1] else: raise CIFSException("A maximum of 2 tokens are expected " - "(\). {} were given." + "(\\). {} were given." .format(len(dom_username))) return username, domain diff --git a/drivers/cleanup.py b/drivers/cleanup.py index 76fcb8d1a..a50403c4b 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -18,6 +18,8 @@ # Script to coalesce and garbage collect VHD-based SR's in the background # +from sm_typing import Optional, override + import os import os.path import sys @@ -46,10 +48,22 @@ from refcounter import RefCounter from ipc import IPCFlag from lvmanager import LVActivator -from srmetadata import LVMMetadataHandler +from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG from functools import reduce from time import monotonic as _time +try: + from linstorjournaler import LinstorJournaler + from linstorvhdutil import LinstorVhdUtil + from linstorvolumemanager import get_controller_uri + from linstorvolumemanager import LinstorVolumeManager + from linstorvolumemanager import LinstorVolumeManagerError + from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX + + LINSTOR_AVAILABLE = True +except ImportError: + LINSTOR_AVAILABLE = False + # Disable automatic leaf-coalescing. Online leaf-coalesce is currently not # possible due to lvhd_stop_using_() not working correctly. However, we leave # this option available through the explicit LEAFCLSC_FORCE flag in the VDI @@ -111,10 +125,11 @@ class Util: PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} - def log(text): + @staticmethod + def log(text) -> None: util.SMlog(text, ident="SMGC") - log = staticmethod(log) + @staticmethod def logException(tag): info = sys.exc_info() if info[0] == SystemExit: @@ -128,8 +143,8 @@ def logException(tag): Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) Util.log(tb) Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") - logException = staticmethod(logException) + @staticmethod def doexec(args, expectedRC, inputtext=None, ret=None, log=True): "Execute a subprocess, then return its return code, stdout, stderr" proc = subprocess.Popen(args, @@ -158,8 +173,8 @@ def doexec(args, expectedRC, inputtext=None, ret=None, log=True): if ret == Util.RET_STDERR: return stderr return stdout - doexec = staticmethod(doexec) + @staticmethod def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): """execute func in a separate thread and kill it if abortTest signals so""" @@ -210,23 +225,23 @@ def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): resultFlag.set("failure") Util.logException("This exception has occured") os._exit(0) - runAbortable = staticmethod(runAbortable) + @staticmethod def num2str(number): for prefix in ("G", "M", "K"): if number >= Util.PREFIX[prefix]: return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) return "%s" % number - num2str = staticmethod(num2str) + @staticmethod def numBits(val): count = 0 while val: count += val & 1 val = val >> 1 return count - numBits = staticmethod(numBits) + @staticmethod def countBits(bitmap1, bitmap2): """return bit count in the bitmap produced by ORing the two bitmaps""" len1 = len(bitmap1) @@ -248,14 +263,13 @@ def countBits(bitmap1, bitmap2): val = bitmapLong[i] count += Util.numBits(val) return count - countBits = staticmethod(countBits) + @staticmethod def getThisScript(): thisScript = util.get_real_path(__file__) if thisScript.endswith(".pyc"): thisScript = thisScript[:-1] return thisScript - getThisScript = staticmethod(getThisScript) ################################################################################ @@ -281,11 +295,11 @@ class XAPI: class LookupError(util.SMException): pass + @staticmethod def getSession(): session = XenAPI.xapi_local() session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') return session - getSession = staticmethod(getSession) def __init__(self, session, srUuid): self.sessionPrivate = False @@ -482,7 +496,7 @@ def set_task_status(self, status): # # VDI # -class VDI: +class VDI(object): """Object representing a VDI of a VHD-based SR""" POLL_INTERVAL = 1 @@ -557,11 +571,11 @@ def __init__(self, sr, uuid, raw): def extractUuid(path): raise NotImplementedError("Implement in sub class") - def load(self, info=None): + def load(self, info=None) -> None: """Load VDI info""" - pass # abstract + pass - def getDriverName(self): + def getDriverName(self) -> str: return self.DRIVER_NAME_VHD def getRef(self): @@ -593,7 +607,7 @@ def ensureUnpaused(self): Util.log("Unpausing VDI %s" % self) self.unpause() - def pause(self, failfast=False): + def pause(self, failfast=False) -> None: if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, self.uuid, failfast): raise util.SMException("Failed to pause VDI %s" % self) @@ -698,14 +712,26 @@ def getAllPrunable(self): if child not in childList: thisPrunable = False - if not self.scanError and thisPrunable: + # We can destroy the current VDI if all childs are hidden BUT the + # current VDI must be hidden too to do that! + # Example in this case (after a failed live leaf coalesce): + # + # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): + # SMGC: [32436] b5458d61(1.000G/4.127M) + # SMGC: [32436] *OLD_b545(1.000G/4.129M) + # + # OLD_b545 is hidden and must be removed, but b5458d61 not. + # Normally we are not in this function when the delete action is + # executed but in `_liveLeafCoalesce`. + + if not self.scanError and not self.hidden and thisPrunable: vdiList.append(self) return vdiList - def getSizeVHD(self): + def getSizeVHD(self) -> int: return self._sizeVHD - def getAllocatedSize(self): + def getAllocatedSize(self) -> int: return self._sizeAllocated def getTreeRoot(self): @@ -738,12 +764,12 @@ def getAllLeaves(self): leaves.extend(child.getAllLeaves()) return leaves - def updateBlockInfo(self): + def updateBlockInfo(self) -> Optional[str]: val = base64.b64encode(self._queryVHDBlocks()).decode() self.setConfig(VDI.DB_VHD_BLOCKS, val) return val - def rename(self, uuid): + def rename(self, uuid) -> None: "Rename the VDI file" assert(not self.sr.vdis.get(uuid)) self._clearRef() @@ -754,13 +780,20 @@ def rename(self, uuid): del self.sr.vdis[oldUuid] self.sr.vdis[self.uuid] = self - def delete(self): + def delete(self) -> None: "Physically delete the VDI" lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) lock.Lock.cleanupAll(self.uuid) self._clear() - def __str__(self): + def getParent(self) -> str: + return vhdutil.getParent(self.path, lambda x: x.strip()) + + def repair(self, parent) -> None: + vhdutil.repair(parent) + + @override + def __str__(self) -> str: strHidden = "" if self.hidden: strHidden = "*" @@ -781,7 +814,7 @@ def __str__(self): return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, strSizeVHD, strSizeAllocated, strType) - def validate(self, fast=False): + def validate(self, fast=False) -> None: if not vhdutil.check(self.path, fast=fast): raise util.SMException("VHD %s corrupted" % self) @@ -795,7 +828,7 @@ def _clear(self): def _clearRef(self): self._vdiRef = None - def _doCoalesce(self): + def _doCoalesce(self) -> None: """Coalesce self onto parent. Only perform the actual coalescing of VHD, but not the subsequent relinking. We'll do that as the next step, after reloading the entire SR in case things have changed while we @@ -823,6 +856,7 @@ def _runTapdiskDiff(self): Util.doexec(cmd, 0) return True + @staticmethod def _reportCoalesceError(vdi, ce): """Reports a coalesce error to XenCenter. @@ -875,14 +909,17 @@ def _reportCoalesceError(vdi, ce): str(now.strftime('%s'))) if xcmsg: xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) - _reportCoalesceError = staticmethod(_reportCoalesceError) + def coalesce(self) -> int: + # size is returned in sectors + return vhdutil.coalesce(self.path) * 512 + + @staticmethod def _doCoalesceVHD(vdi): try: startTime = time.time() vhdSize = vdi.getAllocatedSize() - # size is returned in sectors - coalesced_size = vhdutil.coalesce(vdi.path) * 512 + coalesced_size = vdi.coalesce() endTime = time.time() vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) except util.CommandException as ce: @@ -897,7 +934,6 @@ def _doCoalesceVHD(vdi): raise ce except: raise - _doCoalesceVHD = staticmethod(_doCoalesceVHD) def _vdi_is_raw(self, vdi_path): """ @@ -921,12 +957,12 @@ def _coalesceVHD(self, timeOut): # Try a repair and reraise the exception parent = "" try: - parent = vhdutil.getParent(self.path, lambda x: x.strip()) + parent = self.getParent() if not self._vdi_is_raw(parent): # Repair error is logged and ignored. Error reraised later util.SMlog('Coalesce failed on %s, attempting repair on ' \ 'parent %s' % (self.uuid, parent)) - vhdutil.repair(parent) + self.repair(parent) except Exception as e: util.SMlog('(error ignored) Failed to repair parent %s ' \ 'after failed coalesce on %s, err: %s' % @@ -935,7 +971,7 @@ def _coalesceVHD(self, timeOut): util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) - def _relinkSkip(self): + def _relinkSkip(self) -> None: """Relink children of this VDI to point to the parent of this VDI""" abortFlag = IPCFlag(self.sr.uuid) for child in self.children: @@ -1006,7 +1042,7 @@ def _loadInfoParent(self): if ret: self.parentUuid = ret - def _setParent(self, parent): + def _setParent(self, parent) -> None: vhdutil.setParent(self.path, parent.path, False) self.parent = parent self.parentUuid = parent.uuid @@ -1019,19 +1055,19 @@ def _setParent(self, parent): Util.log("Failed to update %s with vhd-parent field %s" % \ (self.uuid, self.parentUuid)) - def _loadInfoHidden(self): + def _loadInfoHidden(self) -> None: hidden = vhdutil.getHidden(self.path) self.hidden = (hidden != 0) - def _setHidden(self, hidden=True): + def _setHidden(self, hidden=True) -> None: vhdutil.setHidden(self.path, hidden) self.hidden = hidden - def _increaseSizeVirt(self, size, atomic=True): - """ensure the virtual size of 'self' is at least 'size'. Note that + def _increaseSizeVirt(self, size, atomic=True) -> None: + """ensure the virtual size of 'self' is at least 'size'. Note that resizing a VHD must always be offline and atomically: the file must not be open by anyone and no concurrent operations may take place. - Thus we use the Agent API call for performing paused atomic + Thus we use the Agent API call for performing paused atomic operations. If the caller is already in the atomic context, it must call with atomic = False""" if self.sizeVirt >= size: @@ -1059,14 +1095,14 @@ def _increaseSizeVirt(self, size, atomic=True): self.sizeVirt = vhdutil.getSizeVirt(self.path) - def _setSizeVirt(self, size): + def _setSizeVirt(self, size) -> None: """WARNING: do not call this method directly unless all VDIs in the subtree are guaranteed to be unplugged (and remain so for the duration of the operation): this operation is only safe for offline VHDs""" jFile = os.path.join(self.sr.path, self.uuid) vhdutil.setSizeVirt(self.path, size, jFile) - def _queryVHDBlocks(self): + def _queryVHDBlocks(self) -> bytes: return vhdutil.getBlockBitmap(self.path) def _getCoalescedSizeData(self): @@ -1085,20 +1121,20 @@ def _getCoalescedSizeData(self): assert(sizeData <= self.sizeVirt) return sizeData - def _calcExtraSpaceForCoalescing(self): + def _calcExtraSpaceForCoalescing(self) -> int: sizeData = self._getCoalescedSizeData() sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \ vhdutil.calcOverheadEmpty(self.sizeVirt) Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) return sizeCoalesced - self.parent.getSizeVHD() - def _calcExtraSpaceForLeafCoalescing(self): + def _calcExtraSpaceForLeafCoalescing(self) -> int: """How much extra space in the SR will be required to [live-]leaf-coalesce this VDI""" # the space requirements are the same as for inline coalesce return self._calcExtraSpaceForCoalescing() - def _calcExtraSpaceForSnapshotCoalescing(self): + def _calcExtraSpaceForSnapshotCoalescing(self) -> int: """How much extra space in the SR will be required to snapshot-coalesce this VDI""" return self._calcExtraSpaceForCoalescing() + \ @@ -1115,6 +1151,7 @@ def _getAllSubtree(self): class FileVDI(VDI): """Object representing a VDI in a file-based SR (EXT or NFS)""" + @override @staticmethod def extractUuid(path): path = os.path.basename(path.strip()) @@ -1133,7 +1170,8 @@ def __init__(self, sr, uuid, raw): else: self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) - def load(self, info=None): + @override + def load(self, info=None) -> None: if not info: if not util.pathexists(self.path): raise util.SMException("%s not found" % self.path) @@ -1153,7 +1191,8 @@ def load(self, info=None): self.path = os.path.join(self.sr.path, "%s%s" % \ (self.uuid, vhdutil.FILE_EXTN_VHD)) - def rename(self, uuid): + @override + def rename(self, uuid) -> None: oldPath = self.path VDI.rename(self, uuid) self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) @@ -1162,7 +1201,8 @@ def rename(self, uuid): Util.log("Renaming %s -> %s" % (oldPath, self.path)) os.rename(oldPath, self.path) - def delete(self): + @override + def delete(self) -> None: if len(self.children) > 0: raise util.SMException("VDI %s has children, can't delete" % \ self.uuid) @@ -1177,7 +1217,8 @@ def delete(self): raise util.SMException("os.unlink(%s) failed" % self.path) VDI.delete(self) - def getAllocatedSize(self): + @override + def getAllocatedSize(self) -> int: if self._sizeAllocated == -1: self._sizeAllocated = vhdutil.getAllocatedSize(self.path) return self._sizeAllocated @@ -1189,27 +1230,32 @@ class LVHDVDI(VDI): JRN_ZERO = "zero" # journal entry type for zeroing out end of parent DRIVER_NAME_RAW = "aio" - def load(self, vdiInfo): + @override + def load(self, info=None) -> None: + # `info` is always set. `None` default value is only here to match parent method. + assert info, "No info given to LVHDVDI.load" self.parent = None self.children = [] self._sizeVHD = -1 self._sizeAllocated = -1 - self.scanError = vdiInfo.scanError - self.sizeLV = vdiInfo.sizeLV - self.sizeVirt = vdiInfo.sizeVirt - self.fileName = vdiInfo.lvName - self.lvActive = vdiInfo.lvActive - self.lvOpen = vdiInfo.lvOpen - self.lvReadonly = vdiInfo.lvReadonly - self.hidden = vdiInfo.hidden - self.parentUuid = vdiInfo.parentUuid + self.scanError = info.scanError + self.sizeLV = info.sizeLV + self.sizeVirt = info.sizeVirt + self.fileName = info.lvName + self.lvActive = info.lvActive + self.lvOpen = info.lvOpen + self.lvReadonly = info.lvReadonly + self.hidden = info.hidden + self.parentUuid = info.parentUuid self.path = os.path.join(self.sr.path, self.fileName) + @override @staticmethod def extractUuid(path): return lvhdutil.extractUuid(path) - def getDriverName(self): + @override + def getDriverName(self) -> str: if self.raw: return self.DRIVER_NAME_RAW return self.DRIVER_NAME_VHD @@ -1256,11 +1302,14 @@ def inflateParentForCoalesce(self): util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) self.parent.inflate(self.parent.sizeLV + inc) - def updateBlockInfo(self): + @override + def updateBlockInfo(self) -> Optional[str]: if not self.raw: return VDI.updateBlockInfo(self) + return None - def rename(self, uuid): + @override + def rename(self, uuid) -> None: oldUuid = self.uuid oldLVName = self.fileName VDI.rename(self, uuid) @@ -1279,7 +1328,8 @@ def rename(self, uuid): RefCounter.set(self.uuid, cnt, bcnt, ns) RefCounter.reset(oldUuid, ns) - def delete(self): + @override + def delete(self) -> None: if len(self.children) > 0: raise util.SMException("VDI %s has children, can't delete" % \ self.uuid) @@ -1292,7 +1342,8 @@ def delete(self): RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) VDI.delete(self) - def getSizeVHD(self): + @override + def getSizeVHD(self) -> int: if self._sizeVHD == -1: self._loadInfoSizeVHD() return self._sizeVHD @@ -1310,7 +1361,8 @@ def _loadInfoSizeVHD(self): raise util.SMException("phys size of %s = %d" % \ (self, self._sizeVHD)) - def getAllocatedSize(self): + @override + def getAllocatedSize(self) -> int: if self._sizeAllocated == -1: self._loadInfoSizeAllocated() return self._sizeAllocated @@ -1324,20 +1376,23 @@ def _loadInfoSizeAllocated(self): self._activate() self._sizeAllocated = vhdutil.getAllocatedSize(self.path) - def _loadInfoHidden(self): + @override + def _loadInfoHidden(self) -> None: if self.raw: self.hidden = self.sr.lvmCache.getHidden(self.fileName) else: VDI._loadInfoHidden(self) - def _setHidden(self, hidden=True): + @override + def _setHidden(self, hidden=True) -> None: if self.raw: self.sr.lvmCache.setHidden(self.fileName, hidden) self.hidden = hidden else: VDI._setHidden(self, hidden) - def __str__(self): + @override + def __str__(self) -> str: strType = "VHD" if self.raw: strType = "RAW" @@ -1359,11 +1414,13 @@ def __str__(self): Util.num2str(self.sizeVirt), strSizeVHD, strSizeAllocated, Util.num2str(self.sizeLV), strActive) - def validate(self, fast=False): + @override + def validate(self, fast=False) -> None: if not self.raw: VDI.validate(self, fast) - def _doCoalesce(self): + @override + def _doCoalesce(self) -> None: """LVHD parents must first be activated, inflated, and made writable""" try: self._activateChain() @@ -1376,7 +1433,8 @@ def _doCoalesce(self): self.parent.deflate() self.sr.lvmCache.setReadonly(self.parent.fileName, True) - def _setParent(self, parent): + @override + def _setParent(self, parent) -> None: self._activate() if self.lvReadonly: self.sr.lvmCache.setReadonly(self.fileName, False) @@ -1410,7 +1468,8 @@ def _activateChain(self): def _deactivate(self): self.sr.lvActivator.deactivate(self.uuid, False) - def _increaseSizeVirt(self, size, atomic=True): + @override + def _increaseSizeVirt(self, size, atomic=True) -> None: "ensure the virtual size of 'self' is at least 'size'" self._activate() if not self.raw: @@ -1446,7 +1505,8 @@ def _increaseSizeVirt(self, size, atomic=True): VDI.POLL_INTERVAL, 0) self.sr.journaler.remove(self.JRN_ZERO, self.uuid) - def _setSizeVirt(self, size): + @override + def _setSizeVirt(self, size) -> None: """WARNING: do not call this method directly unless all VDIs in the subtree are guaranteed to be unplugged (and remain so for the duration of the operation): this operation is only safe for offline VHDs""" @@ -1459,34 +1519,354 @@ def _setSizeVirt(self, size): finally: lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid) - def _queryVHDBlocks(self): + @override + def _queryVHDBlocks(self) -> bytes: self._activate() return VDI._queryVHDBlocks(self) - def _calcExtraSpaceForCoalescing(self): + @override + def _calcExtraSpaceForCoalescing(self) -> int: if self.parent.raw: return 0 # raw parents are never deflated in the first place sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData()) Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) return sizeCoalesced - self.parent.sizeLV - def _calcExtraSpaceForLeafCoalescing(self): + @override + def _calcExtraSpaceForLeafCoalescing(self) -> int: """How much extra space in the SR will be required to [live-]leaf-coalesce this VDI""" # we can deflate the leaf to minimize the space requirements deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD()) return self._calcExtraSpaceForCoalescing() - deflateDiff - def _calcExtraSpaceForSnapshotCoalescing(self): + @override + def _calcExtraSpaceForSnapshotCoalescing(self) -> int: return self._calcExtraSpaceForCoalescing() + \ lvhdutil.calcSizeLV(self.getSizeVHD()) +class LinstorVDI(VDI): + """Object representing a VDI in a LINSTOR SR""" + + VOLUME_LOCK_TIMEOUT = 30 + + @override + def load(self, info=None) -> None: + self.parentUuid = info.parentUuid + self.scanError = True + self.parent = None + self.children = [] + + self.fileName = self.sr._linstor.get_volume_name(self.uuid) + self.path = self.sr._linstor.build_device_path(self.fileName) + + if not info: + try: + info = self.sr._vhdutil.get_vhd_info(self.uuid) + except util.SMException: + Util.log( + ' [VDI {}: failed to read VHD metadata]'.format(self.uuid) + ) + return + + self.parentUuid = info.parentUuid + self.sizeVirt = info.sizeVirt + self._sizeVHD = -1 + self._sizeAllocated = -1 + self.drbd_size = -1 + self.hidden = info.hidden + self.scanError = False + self.vdi_type = vhdutil.VDI_TYPE_VHD + + @override + def getSizeVHD(self, fetch=False) -> int: + if self._sizeVHD < 0 or fetch: + self._sizeVHD = self.sr._vhdutil.get_size_phys(self.uuid) + return self._sizeVHD + + def getDrbdSize(self, fetch=False): + if self.drbd_size < 0 or fetch: + self.drbd_size = self.sr._vhdutil.get_drbd_size(self.uuid) + return self.drbd_size + + @override + def getAllocatedSize(self) -> int: + if self._sizeAllocated == -1: + if not self.raw: + self._sizeAllocated = self.sr._vhdutil.get_allocated_size(self.uuid) + return self._sizeAllocated + + def inflate(self, size): + if self.raw: + return + self.sr.lock() + try: + # Ensure we use the real DRBD size and not the cached one. + # Why? Because this attribute can be changed if volume is resized by user. + self.drbd_size = self.getDrbdSize(fetch=True) + self.sr._vhdutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) + finally: + self.sr.unlock() + self.drbd_size = -1 + self._sizeVHD = -1 + self._sizeAllocated = -1 + + def deflate(self): + if self.raw: + return + self.sr.lock() + try: + # Ensure we use the real sizes and not the cached info. + self.drbd_size = self.getDrbdSize(fetch=True) + self._sizeVHD = self.getSizeVHD(fetch=True) + self.sr._vhdutil.force_deflate(self.path, self._sizeVHD, self.drbd_size, zeroize=False) + finally: + self.sr.unlock() + self.drbd_size = -1 + self._sizeVHD = -1 + self._sizeAllocated = -1 + + def inflateFully(self): + if not self.raw: + self.inflate(LinstorVhdUtil.compute_volume_size(self.sizeVirt, self.vdi_type)) + + @override + def rename(self, uuid) -> None: + Util.log('Renaming {} -> {} (path={})'.format( + self.uuid, uuid, self.path + )) + self.sr._linstor.update_volume_uuid(self.uuid, uuid) + VDI.rename(self, uuid) + + @override + def delete(self) -> None: + if len(self.children) > 0: + raise util.SMException( + 'VDI {} has children, can\'t delete'.format(self.uuid) + ) + self.sr.lock() + try: + self.sr._linstor.destroy_volume(self.uuid) + self.sr.forgetVDI(self.uuid) + finally: + self.sr.unlock() + VDI.delete(self) + + @override + def validate(self, fast=False) -> None: + if not self.raw and not self.sr._vhdutil.check(self.uuid, fast=fast): + raise util.SMException('VHD {} corrupted'.format(self)) + + @override + def pause(self, failfast=False) -> None: + self.sr._linstor.ensure_volume_is_not_locked( + self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT + ) + return super(LinstorVDI, self).pause(failfast) + + @override + def coalesce(self) -> int: + # Note: We raise `SMException` here to skip the current coalesce in case of failure. + # Using another exception we can't execute the next coalesce calls. + return self.sr._vhdutil.force_coalesce(self.path) * 512 + + @override + def getParent(self) -> str: + return self.sr._vhdutil.get_parent( + self.sr._linstor.get_volume_uuid_from_device_path(self.path) + ) + + @override + def repair(self, parent_uuid) -> None: + self.sr._vhdutil.force_repair( + self.sr._linstor.get_device_path(parent_uuid) + ) + + @override + def _relinkSkip(self) -> None: + abortFlag = IPCFlag(self.sr.uuid) + for child in self.children: + if abortFlag.test(FLAG_TYPE_ABORT): + raise AbortException('Aborting due to signal') + Util.log( + ' Relinking {} from {} to {}'.format( + child, self, self.parent + ) + ) + + session = child.sr.xapi.session + sr_uuid = child.sr.uuid + vdi_uuid = child.uuid + try: + self.sr._linstor.ensure_volume_is_not_locked( + vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT + ) + blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) + child._setParent(self.parent) + finally: + blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) + self.children = [] + + @override + def _setParent(self, parent) -> None: + self.sr._linstor.get_device_path(self.uuid) + self.sr._vhdutil.force_parent(self.path, parent.path) + self.parent = parent + self.parentUuid = parent.uuid + parent.children.append(self) + try: + self.setConfig(self.DB_VHD_PARENT, self.parentUuid) + Util.log("Updated the vhd-parent field for child %s with %s" % \ + (self.uuid, self.parentUuid)) + except: + Util.log("Failed to update %s with vhd-parent field %s" % \ + (self.uuid, self.parentUuid)) + + @override + def _doCoalesce(self) -> None: + try: + self._activateChain() + self.parent.validate() + self._inflateParentForCoalesce() + VDI._doCoalesce(self) + finally: + self.parent.deflate() + + def _activateChain(self): + vdi = self + while vdi: + try: + p = self.sr._linstor.get_device_path(vdi.uuid) + except Exception as e: + # Use SMException to skip coalesce. + # Otherwise the GC is stopped... + raise util.SMException(str(e)) + vdi = vdi.parent + + @override + def _setHidden(self, hidden=True) -> None: + HIDDEN_TAG = 'hidden' + + if self.raw: + self.sr._linstor.update_volume_metadata(self.uuid, { + HIDDEN_TAG: hidden + }) + self.hidden = hidden + else: + VDI._setHidden(self, hidden) + + @override + def _increaseSizeVirt(self, size, atomic=True): + if self.raw: + offset = self.drbd_size + if self.sizeVirt < size: + oldSize = self.drbd_size + self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) + Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) + self.sr._linstor.resize_volume(self.uuid, self.drbd_size) + offset = oldSize + unfinishedZero = False + jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) + if jval: + unfinishedZero = True + offset = int(jval) + length = self.drbd_size - offset + if not length: + return + + if unfinishedZero: + Util.log(" ==> Redoing unfinished zeroing out") + else: + self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) + Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) + abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) + func = lambda: util.zeroOut(self.path, offset, length) + Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) + self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) + return + + if self.sizeVirt >= size: + return + Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ + (self, Util.num2str(self.sizeVirt), Util.num2str(size))) + + msize = self.sr._vhdutil.get_max_resize_size(self.uuid) * 1024 * 1024 + if (size <= msize): + self.sr._vhdutil.set_size_virt_fast(self.path, size) + else: + if atomic: + vdiList = self._getAllSubtree() + self.sr.lock() + try: + self.sr.pauseVDIs(vdiList) + try: + self._setSizeVirt(size) + finally: + self.sr.unpauseVDIs(vdiList) + finally: + self.sr.unlock() + else: + self._setSizeVirt(size) + + self.sizeVirt = self.sr._vhdutil.get_size_virt(self.uuid) + + @override + def _setSizeVirt(self, size) -> None: + jfile = self.uuid + '-jvhd' + self.sr._linstor.create_volume( + jfile, vhdutil.MAX_VHD_JOURNAL_SIZE, persistent=False, volume_name=jfile + ) + try: + self.inflate(LinstorVhdUtil.compute_volume_size(size, self.vdi_type)) + self.sr._vhdutil.set_size_virt(size, jfile) + finally: + try: + self.sr._linstor.destroy_volume(jfile) + except Exception: + # We can ignore it, in any case this volume is not persistent. + pass + + @override + def _queryVHDBlocks(self) -> bytes: + return self.sr._vhdutil.get_block_bitmap(self.uuid) + + def _inflateParentForCoalesce(self): + if self.parent.raw: + return + inc = self._calcExtraSpaceForCoalescing() + if inc > 0: + self.parent.inflate(self.parent.getDrbdSize() + inc) + + @override + def _calcExtraSpaceForCoalescing(self) -> int: + if self.parent.raw: + return 0 + size_coalesced = LinstorVhdUtil.compute_volume_size( + self._getCoalescedSizeData(), self.vdi_type + ) + Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) + return size_coalesced - self.parent.getDrbdSize() + + @override + def _calcExtraSpaceForLeafCoalescing(self) -> int: + assert self.getDrbdSize() > 0 + assert self.getSizeVHD() > 0 + deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) + assert deflate_diff >= 0 + return self._calcExtraSpaceForCoalescing() - deflate_diff + + @override + def _calcExtraSpaceForSnapshotCoalescing(self) -> int: + assert self.getSizeVHD() > 0 + return self._calcExtraSpaceForCoalescing() + \ + LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) + ################################################################################ # # SR # -class SR: +class SR(object): class LogFilter: def __init__(self, sr): self.sr = sr @@ -1536,7 +1916,8 @@ def _getTreeStr(self, vdi, indent=8): TYPE_FILE = "file" TYPE_LVHD = "lvhd" - TYPES = [TYPE_LVHD, TYPE_FILE] + TYPE_LINSTOR = "linstor" + TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] LOCK_RETRY_INTERVAL = 3 LOCK_RETRY_ATTEMPTS = 20 @@ -1550,6 +1931,7 @@ def _getTreeStr(self, vdi, indent=8): KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" + @staticmethod def getInstance(uuid, xapiSession, createLock=True, force=False): xapi = XAPI(xapiSession, uuid) type = normalizeType(xapi.srRecord["type"]) @@ -1557,8 +1939,9 @@ def getInstance(uuid, xapiSession, createLock=True, force=False): return FileSR(uuid, xapi, createLock, force) elif type == SR.TYPE_LVHD: return LVHDSR(uuid, xapi, createLock, force) + elif type == SR.TYPE_LINSTOR: + return LinstorSR(uuid, xapi, createLock, force) raise util.SMException("SR type %s not recognized" % type) - getInstance = staticmethod(getInstance) def __init__(self, uuid, xapi, createLock, force): self.logFilter = self.LogFilter(self) @@ -1655,10 +2038,10 @@ def gcEnabled(self, refresh=True): return False return True - def scan(self, force=False): + def scan(self, force=False) -> None: """Scan the SR and load VDI info for each VDI. If called repeatedly, update VDI objects if they already exist""" - pass # abstract + pass def scanLocked(self, force=False): self.lock() @@ -1868,14 +2251,14 @@ def findGarbage(self): vdiList.extend(vdi.getAllPrunable()) return vdiList - def deleteVDIs(self, vdiList): + def deleteVDIs(self, vdiList) -> None: for vdi in vdiList: if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): raise AbortException("Aborting due to signal") Util.log("Deleting unlinked VDI %s" % vdi) self.deleteVDI(vdi) - def deleteVDI(self, vdi): + def deleteVDI(self, vdi) -> None: assert(len(vdi.children) == 0) del self.vdis[vdi.uuid] if vdi.parent: @@ -1884,10 +2267,10 @@ def deleteVDI(self, vdi): self.vdiTrees.remove(vdi) vdi.delete() - def forgetVDI(self, vdiUuid): + def forgetVDI(self, vdiUuid) -> None: self.xapi.forgetVDI(self.uuid, vdiUuid) - def pauseVDIs(self, vdiList): + def pauseVDIs(self, vdiList) -> None: paused = [] failed = False for vdi in vdiList: @@ -1914,14 +2297,15 @@ def unpauseVDIs(self, vdiList): if failed: raise util.SMException("Failed to unpause VDIs") - def getFreeSpace(self): + def getFreeSpace(self) -> int: return 0 def cleanup(self): Util.log("In cleanup") return - def __str__(self): + @override + def __str__(self) -> str: if self.name: ret = "%s ('%s')" % (self.uuid[0:4], self.name) else: @@ -1956,7 +2340,7 @@ def unlock(self): if self._locked == 0: self._srLock.release() - def needUpdateBlockInfo(self): + def needUpdateBlockInfo(self) -> bool: for vdi in self.vdis.values(): if vdi.scanError or len(vdi.children) == 0: continue @@ -1964,7 +2348,7 @@ def needUpdateBlockInfo(self): return True return False - def updateBlockInfo(self): + def updateBlockInfo(self) -> None: for vdi in self.vdis.values(): if vdi.scanError or len(vdi.children) == 0: continue @@ -1993,7 +2377,7 @@ def cleanupJournals(self, dryRun=False): if not dryRun: self.journaler.remove(t, uuid) - def cleanupCache(self, maxAge=-1): + def cleanupCache(self, maxAge=-1) -> int: return 0 def _coalesce(self, vdi): @@ -2134,7 +2518,7 @@ def printSummary(self): def _coalesceLeaf(self, vdi): """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot - complete due to external changes, namely vdi_delete and vdi_snapshot + complete due to external changes, namely vdi_delete and vdi_snapshot that alter leaf-coalescibility of vdi""" tracker = self.CoalesceTracker(self) while not vdi.canLiveCoalesce(self.getStorageSpeed()): @@ -2251,7 +2635,7 @@ def _snapshotCoalesce(self, vdi): return False return True - def _liveLeafCoalesce(self, vdi): + def _liveLeafCoalesce(self, vdi) -> bool: util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) self.lock() try: @@ -2351,45 +2735,44 @@ def _doCoalesceLeaf(self, vdi): self.forgetVDI(origParentUuid) self._finishCoalesceLeaf(parent) self._updateSlavesOnResize(parent) - - def _calcExtraSpaceNeeded(self, child, parent): + + def _calcExtraSpaceNeeded(self, child, parent) -> int: assert(not parent.raw) # raw parents not supported extra = child.getSizeVHD() - parent.getSizeVHD() if extra < 0: extra = 0 return extra - def _prepareCoalesceLeaf(self, vdi): + def _prepareCoalesceLeaf(self, vdi) -> None: pass - def _updateNode(self, vdi): + def _updateNode(self, vdi) -> None: pass - def _finishCoalesceLeaf(self, parent): + def _finishCoalesceLeaf(self, parent) -> None: pass - def _updateSlavesOnUndoLeafCoalesce(self, parent, child): + def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: pass - def _updateSlavesOnRename(self, vdi, oldName, origParentUuid): + def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: pass - def _updateSlavesOnResize(self, vdi): + def _updateSlavesOnResize(self, vdi) -> None: pass - def _removeStaleVDIs(self, uuidsPresent): + def _removeStaleVDIs(self, uuidsPresent) -> None: for uuid in list(self.vdis.keys()): if not uuid in uuidsPresent: Util.log("VDI %s disappeared since last scan" % \ self.vdis[uuid]) del self.vdis[uuid] - def _handleInterruptedCoalesceLeaf(self): - """An interrupted leaf-coalesce operation may leave the VHD tree in an - inconsistent state. If the old-leaf VDI is still present, we revert the - operation (in case the original error is persistent); otherwise we must + def _handleInterruptedCoalesceLeaf(self) -> None: + """An interrupted leaf-coalesce operation may leave the VHD tree in an + inconsistent state. If the old-leaf VDI is still present, we revert the + operation (in case the original error is persistent); otherwise we must finish the operation""" - # abstract pass def _buildTree(self, force): @@ -2428,7 +2811,8 @@ def __init__(self, uuid, xapi, createLock, force): self.path = "/var/run/sr-mount/%s" % self.uuid self.journaler = fjournaler.Journaler(self.path) - def scan(self, force=False): + @override + def scan(self, force=False) -> None: if not util.pathexists(self.path): raise util.SMException("directory %s not found!" % self.uuid) vhds = self._scan(force) @@ -2454,10 +2838,12 @@ def scan(self, force=False): self.logFilter.logState() self._handleInterruptedCoalesceLeaf() - def getFreeSpace(self): + @override + def getFreeSpace(self) -> int: return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) - def deleteVDIs(self, vdiList): + @override + def deleteVDIs(self, vdiList) -> None: rootDeleted = False for vdi in vdiList: if not vdi.parent: @@ -2467,11 +2853,12 @@ def deleteVDIs(self, vdiList): if self.xapi.srRecord["type"] == "nfs" and rootDeleted: self.xapi.markCacheSRsDirty() - def cleanupCache(self, maxAge=-1): - """Clean up IntelliCache cache files. Caches for leaf nodes are - removed when the leaf node no longer exists or its allow-caching - attribute is not set. Caches for parent nodes are removed when the - parent node no longer exists or it hasn't been used in more than + @override + def cleanupCache(self, maxAge=-1) -> int: + """Clean up IntelliCache cache files. Caches for leaf nodes are + removed when the leaf node no longer exists or its allow-caching + attribute is not set. Caches for parent nodes are removed when the + parent node no longer exists or it hasn't been used in more than hours. Return number of caches removed. """ @@ -2556,7 +2943,8 @@ def _scan(self, force): return vhds raise util.SMException("Scan error") - def deleteVDI(self, vdi): + @override + def deleteVDI(self, vdi) -> None: self._checkSlaves(vdi) SR.deleteVDI(self, vdi) @@ -2581,7 +2969,8 @@ def _checkSlave(self, hostRef, vdi): _host = self.xapi.session.xenapi.host text = _host.call_plugin( * call) - def _handleInterruptedCoalesceLeaf(self): + @override + def _handleInterruptedCoalesceLeaf(self) -> None: entries = self.journaler.getAll(VDI.JRN_LEAF) for uuid, parentUuid in entries.items(): fileList = os.listdir(self.path) @@ -2656,30 +3045,40 @@ def __init__(self, uuid, xapi, createLock, force): SR.__init__(self, uuid, xapi, createLock, force) self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid) self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName) - self.lvmCache = lvmcache.LVMCache(self.vgName) + + sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) + other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) + lvm_conf = other_conf.get('lvm-conf') if other_conf else None + self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) + self.lvActivator = LVActivator(self.uuid, self.lvmCache) self.journaler = journaler.Journaler(self.lvmCache) - def deleteVDI(self, vdi): + @override + def deleteVDI(self, vdi) -> None: if self.lvActivator.get(vdi.uuid, False): self.lvActivator.deactivate(vdi.uuid, False) self._checkSlaves(vdi) SR.deleteVDI(self, vdi) - def forgetVDI(self, vdiUuid): + @override + def forgetVDI(self, vdiUuid) -> None: SR.forgetVDI(self, vdiUuid) mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) - def getFreeSpace(self): + @override + def getFreeSpace(self) -> int: stats = lvutil._getVGstats(self.vgName) return stats['physical_size'] - stats['physical_utilisation'] + @override def cleanup(self): if not self.lvActivator.deactivateAll(): Util.log("ERROR deactivating LVs while cleaning up") - def needUpdateBlockInfo(self): + @override + def needUpdateBlockInfo(self) -> bool: for vdi in self.vdis.values(): if vdi.scanError or vdi.raw or len(vdi.children) == 0: continue @@ -2687,7 +3086,8 @@ def needUpdateBlockInfo(self): return True return False - def updateBlockInfo(self): + @override + def updateBlockInfo(self) -> None: numUpdated = 0 for vdi in self.vdis.values(): if vdi.scanError or vdi.raw or len(vdi.children) == 0: @@ -2702,7 +3102,8 @@ def updateBlockInfo(self): # inherit the refcount value and preventing the correct decrement self.cleanup() - def scan(self, force=False): + @override + def scan(self, force=False) -> None: vdis = self._scan(force) for uuid, vdiInfo in vdis.items(): vdi = self.getVDI(uuid) @@ -2733,7 +3134,8 @@ def _scan(self, force): return vdis raise util.SMException("Scan error") - def _removeStaleVDIs(self, uuidsPresent): + @override + def _removeStaleVDIs(self, uuidsPresent) -> None: for uuid in list(self.vdis.keys()): if not uuid in uuidsPresent: Util.log("VDI %s disappeared since last scan" % \ @@ -2742,7 +3144,8 @@ def _removeStaleVDIs(self, uuidsPresent): if self.lvActivator.get(uuid, False): self.lvActivator.remove(uuid, False) - def _liveLeafCoalesce(self, vdi): + @override + def _liveLeafCoalesce(self, vdi) -> bool: """If the parent is raw and the child was resized (virt. size), then we'll need to resize the parent, which can take a while due to zeroing out of the extended portion of the LV. Do it before pausing the child @@ -2753,13 +3156,15 @@ def _liveLeafCoalesce(self, vdi): return SR._liveLeafCoalesce(self, vdi) - def _prepareCoalesceLeaf(self, vdi): + @override + def _prepareCoalesceLeaf(self, vdi) -> None: vdi._activateChain() self.lvmCache.setReadonly(vdi.parent.fileName, False) vdi.deflate() vdi.inflateParentForCoalesce() - def _updateNode(self, vdi): + @override + def _updateNode(self, vdi) -> None: # fix the refcounts: the remaining node should inherit the binary # refcount from the leaf (because if it was online, it should remain # refcounted as such), but the normal refcount from the parent (because @@ -2773,16 +3178,19 @@ def _updateNode(self, vdi): assert(pCnt >= 0) RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) - def _finishCoalesceLeaf(self, parent): + @override + def _finishCoalesceLeaf(self, parent) -> None: if not parent.isSnapshot() or parent.isAttachedRW(): parent.inflateFully() else: parent.deflate() - def _calcExtraSpaceNeeded(self, child, parent): + @override + def _calcExtraSpaceNeeded(self, child, parent) -> int: return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV - def _handleInterruptedCoalesceLeaf(self): + @override + def _handleInterruptedCoalesceLeaf(self) -> None: entries = self.journaler.getAll(VDI.JRN_LEAF) for uuid, parentUuid in entries.items(): childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid @@ -2893,7 +3301,8 @@ def _checkSlaves(self, vdi): if hostRef in onlineHosts: raise - def _updateSlavesOnUndoLeafCoalesce(self, parent, child): + @override + def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) if not slaves: Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ @@ -2919,7 +3328,8 @@ def _updateSlavesOnUndoLeafCoalesce(self, parent, child): slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) Util.log("call-plugin returned: '%s'" % text) - def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid): + @override + def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) if not slaves: Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) @@ -2941,7 +3351,8 @@ def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid): slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) Util.log("call-plugin returned: '%s'" % text) - def _updateSlavesOnResize(self, vdi): + @override + def _updateSlavesOnResize(self, vdi) -> None: uuids = [x.uuid for x in vdi.getAllLeaves()] slaves = util.get_slaves_attached_on(self.xapi.session, uuids) if not slaves: @@ -2951,6 +3362,281 @@ def _updateSlavesOnResize(self, vdi): vdi.fileName, vdi.uuid, slaves) +class LinstorSR(SR): + TYPE = SR.TYPE_LINSTOR + + def __init__(self, uuid, xapi, createLock, force): + if not LINSTOR_AVAILABLE: + raise util.SMException( + 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' + ) + + SR.__init__(self, uuid, xapi, createLock, force) + self.path = LinstorVolumeManager.DEV_ROOT_PATH + self._reloadLinstor(journaler_only=True) + + @override + def deleteVDI(self, vdi) -> None: + self._checkSlaves(vdi) + SR.deleteVDI(self, vdi) + + @override + def getFreeSpace(self) -> int: + return self._linstor.max_volume_size_allowed + + @override + def scan(self, force=False) -> None: + all_vdi_info = self._scan(force) + for uuid, vdiInfo in all_vdi_info.items(): + # When vdiInfo is None, the VDI is RAW. + vdi = self.getVDI(uuid) + if not vdi: + self.logFilter.logNewVDI(uuid) + vdi = LinstorVDI(self, uuid, not vdiInfo) + self.vdis[uuid] = vdi + if vdiInfo: + vdi.load(vdiInfo) + self._removeStaleVDIs(all_vdi_info.keys()) + self._buildTree(force) + self.logFilter.logState() + self._handleInterruptedCoalesceLeaf() + + @override + def pauseVDIs(self, vdiList) -> None: + self._linstor.ensure_volume_list_is_not_locked( + vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT + ) + return super(LinstorSR, self).pauseVDIs(vdiList) + + def _reloadLinstor(self, journaler_only=False): + session = self.xapi.session + host_ref = util.get_this_host_ref(session) + sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) + + pbd = util.find_my_pbd(session, host_ref, sr_ref) + if pbd is None: + raise util.SMException('Failed to find PBD') + + dconf = session.xenapi.PBD.get_device_config(pbd) + group_name = dconf['group-name'] + + controller_uri = get_controller_uri() + self.journaler = LinstorJournaler( + controller_uri, group_name, logger=util.SMlog + ) + + if journaler_only: + return + + self._linstor = LinstorVolumeManager( + controller_uri, + group_name, + repair=True, + logger=util.SMlog + ) + self._vhdutil = LinstorVhdUtil(session, self._linstor) + + def _scan(self, force): + for i in range(SR.SCAN_RETRY_ATTEMPTS): + self._reloadLinstor() + error = False + try: + all_vdi_info = self._load_vdi_info() + for uuid, vdiInfo in all_vdi_info.items(): + if vdiInfo and vdiInfo.error: + error = True + break + if not error: + return all_vdi_info + Util.log('Scan error, retrying ({})'.format(i)) + except Exception as e: + Util.log('Scan exception, retrying ({}): {}'.format(i, e)) + Util.log(traceback.format_exc()) + + if force: + return all_vdi_info + raise util.SMException('Scan error') + + def _load_vdi_info(self): + all_vdi_info = {} + + # TODO: Ensure metadata contains the right info. + + all_volume_info = self._linstor.get_volumes_with_info() + volumes_metadata = self._linstor.get_volumes_with_metadata() + for vdi_uuid, volume_info in all_volume_info.items(): + try: + volume_metadata = volumes_metadata[vdi_uuid] + if not volume_info.name and not list(volume_metadata.items()): + continue # Ignore it, probably deleted. + + if vdi_uuid.startswith('DELETED_'): + # Assume it's really a RAW volume of a failed snap without VHD header/footer. + # We must remove this VDI now without adding it in the VDI list. + # Otherwise `Relinking` calls and other actions can be launched on it. + # We don't want that... + Util.log('Deleting bad VDI {}'.format(vdi_uuid)) + + self.lock() + try: + self._linstor.destroy_volume(vdi_uuid) + try: + self.forgetVDI(vdi_uuid) + except: + pass + except Exception as e: + Util.log('Cannot delete bad VDI: {}'.format(e)) + finally: + self.unlock() + continue + + vdi_type = volume_metadata.get(VDI_TYPE_TAG) + volume_name = self._linstor.get_volume_name(vdi_uuid) + if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): + # Always RAW! + info = None + elif vdi_type == vhdutil.VDI_TYPE_VHD: + info = self._vhdutil.get_vhd_info(vdi_uuid) + else: + # Ensure it's not a VHD... + try: + info = self._vhdutil.get_vhd_info(vdi_uuid) + except: + try: + self._vhdutil.force_repair( + self._linstor.get_device_path(vdi_uuid) + ) + info = self._vhdutil.get_vhd_info(vdi_uuid) + except: + info = None + + except Exception as e: + Util.log( + ' [VDI {}: failed to load VDI info]: {}' + .format(vdi_uuid, e) + ) + info = vhdutil.VHDInfo(vdi_uuid) + info.error = 1 + + all_vdi_info[vdi_uuid] = info + + return all_vdi_info + + @override + def _prepareCoalesceLeaf(self, vdi) -> None: + vdi._activateChain() + vdi.deflate() + vdi._inflateParentForCoalesce() + + @override + def _finishCoalesceLeaf(self, parent) -> None: + if not parent.isSnapshot() or parent.isAttachedRW(): + parent.inflateFully() + else: + parent.deflate() + + @override + def _calcExtraSpaceNeeded(self, child, parent) -> int: + return LinstorVhdUtil.compute_volume_size(parent.sizeVirt, parent.vdi_type) - parent.getDrbdSize() + + def _hasValidDevicePath(self, uuid): + try: + self._linstor.get_device_path(uuid) + except Exception: + # TODO: Maybe log exception. + return False + return True + + @override + def _liveLeafCoalesce(self, vdi) -> bool: + self.lock() + try: + self._linstor.ensure_volume_is_not_locked( + vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT + ) + return super(LinstorSR, self)._liveLeafCoalesce(vdi) + finally: + self.unlock() + + @override + def _handleInterruptedCoalesceLeaf(self) -> None: + entries = self.journaler.get_all(VDI.JRN_LEAF) + for uuid, parentUuid in entries.items(): + if self._hasValidDevicePath(parentUuid) or \ + self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): + self._undoInterruptedCoalesceLeaf(uuid, parentUuid) + else: + self._finishInterruptedCoalesceLeaf(uuid, parentUuid) + self.journaler.remove(VDI.JRN_LEAF, uuid) + vdi = self.getVDI(uuid) + if vdi: + vdi.ensureUnpaused() + + def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): + Util.log('*** UNDO LEAF-COALESCE') + parent = self.getVDI(parentUuid) + if not parent: + parent = self.getVDI(childUuid) + if not parent: + raise util.SMException( + 'Neither {} nor {} found'.format(parentUuid, childUuid) + ) + Util.log( + 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) + ) + parent.rename(parentUuid) + + child = self.getVDI(childUuid) + if not child: + child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) + if not child: + raise util.SMException( + 'Neither {} nor {} found'.format( + childUuid, self.TMP_RENAME_PREFIX + childUuid + ) + ) + Util.log('Renaming child back to {}'.format(childUuid)) + child.rename(childUuid) + Util.log('Updating the VDI record') + child.setConfig(VDI.DB_VHD_PARENT, parentUuid) + child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) + + # TODO: Maybe deflate here. + + if child.hidden: + child._setHidden(False) + if not parent.hidden: + parent._setHidden(True) + self._updateSlavesOnUndoLeafCoalesce(parent, child) + Util.log('*** leaf-coalesce undo successful') + + def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): + Util.log('*** FINISH LEAF-COALESCE') + vdi = self.getVDI(childUuid) + if not vdi: + raise util.SMException('VDI {} not found'.format(childUuid)) + # TODO: Maybe inflate. + try: + self.forgetVDI(parentUuid) + except XenAPI.Failure: + pass + self._updateSlavesOnResize(vdi) + Util.log('*** finished leaf-coalesce successfully') + + def _checkSlaves(self, vdi): + try: + all_openers = self._linstor.get_volume_openers(vdi.uuid) + for openers in all_openers.values(): + for opener in openers.values(): + if opener['process-name'] != 'tapdisk': + raise util.SMException( + 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) + ) + except LinstorVolumeManagerError as e: + if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: + raise + + ################################################################################ # # Helpers @@ -2987,9 +3673,14 @@ def normalizeType(type): if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: # temporary while LVHD is symlinked as LVM type = SR.TYPE_LVHD - if type in ["ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb"]: + if type in [ + "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", + "moosefs", "xfs", "zfs", "largeblock" + ]: type = SR.TYPE_FILE - if not type in SR.TYPES: + if type in ["linstor"]: + type = SR.TYPE_LINSTOR + if type not in SR.TYPES: raise util.SMException("Unsupported SR type: %s" % type) return type @@ -3277,9 +3968,9 @@ def abort(srUuid, soft=False): def gc(session, srUuid, inBackground, dryRun=False): - """Garbage collect all deleted VDIs in SR "srUuid". Fork & return - immediately if inBackground=True. - + """Garbage collect all deleted VDIs in SR "srUuid". Fork & return + immediately if inBackground=True. + The following algorithm is used: 1. If we are already GC'ing in this SR, return 2. If we are already coalescing a VDI pair: @@ -3485,6 +4176,7 @@ def abort_optional_reenable(uuid): # def main(): action = "" + maxAge = 0 uuid = "" background = False force = False diff --git a/drivers/flock.py b/drivers/flock.py index dceb04283..a853da27d 100644 --- a/drivers/flock.py +++ b/drivers/flock.py @@ -23,6 +23,8 @@ got to grow our own. """ +from sm_typing import ClassVar, override + import os import fcntl import struct @@ -60,7 +62,8 @@ def __getattr__(self, name): idx = self.FIELDS[name] return self.fields[idx] - def __setattr__(self, name, value): + @override + def __setattr__(self, name, value) -> None: idx = self.FIELDS.get(name) if idx is None: self.__dict__[name] = value @@ -73,7 +76,7 @@ class FcntlLockBase: definition of LOCK_TYPE (fcntl.{F_RDLCK|F_WRLCK}) determines the type.""" - LOCK_TYPE = None + LOCK_TYPE: ClassVar[int] if __debug__: ERROR_ISLOCKED = "Attempt to acquire lock held." diff --git a/drivers/iscsilib.py b/drivers/iscsilib.py index e77d17d26..27d34a546 100644 --- a/drivers/iscsilib.py +++ b/drivers/iscsilib.py @@ -248,7 +248,7 @@ def get_node_config (portal, targetIQN): ini_sec = "root" str_fp = io.StringIO("[%s]\n%s" % (ini_sec, stdout)) parser = RawConfigParser() - parser.readfp(str_fp) + parser.read_file(str_fp) str_fp.close() return dict(parser.items(ini_sec)) @@ -535,8 +535,8 @@ def _checkAnyTGT(): except Exception as e: util.SMlog("%s failed with %s" % (cmd, e.args)) stdout = "" - for e in filter(match_session, stdout.split('\n')): - iqn = e.split()[-1] + for session in filter(match_session, stdout.split('\n')): + iqn = session.split()[-1] if not iqn in rootIQNs: return True return False diff --git a/drivers/lcache.py b/drivers/lcache.py index f1e25c368..1b7c78f2d 100755 --- a/drivers/lcache.py +++ b/drivers/lcache.py @@ -15,6 +15,8 @@ # along with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +from sm_typing import override + import os import blktap2 import glob @@ -71,7 +73,8 @@ def __init__(self, tapdisk, stats): self.tapdisk = tapdisk self.stats = stats - def __str__(self): + @override + def __str__(self) -> str: return \ "Tapdisk %s in state '%s' not found caching." % \ (self.tapdisk, self.stats) @@ -116,7 +119,8 @@ def vdi_stats_total(self): return rd_hits, rd_miss, wr_rdir - def __str__(self): + @override + def __str__(self) -> str: return "%s(%s, minor=%s)" % \ (self.__class__.__name__, self.tapdisk.path, self.tapdisk.minor) @@ -144,7 +148,8 @@ def vdi_stats(self): return rd_hits, rd_miss, wr_rdir - def __str__(self): + @override + def __str__(self) -> str: return "%s(%s, minor=%s)" % \ (self.__class__.__name__, self.tapdisk.path, self.tapdisk.minor) @@ -166,7 +171,8 @@ class NotAMountPoint(Exception): def __init__(self, path): self.path = path - def __str__(self): + @override + def __str__(self) -> str: return "Not a mount point: %s" % self.path @classmethod diff --git a/drivers/linstor-manager b/drivers/linstor-manager new file mode 100755 index 000000000..9cef5f825 --- /dev/null +++ b/drivers/linstor-manager @@ -0,0 +1,1285 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# We must modify default import path, we don't want to import modules +# installed in plugins folder and instead we must import from LINSTOR driver +# folder. +import sys +sys.path[0] = '/opt/xensource/sm/' + +import base64 +import os +import socket +import XenAPI +import XenAPIPlugin + +from json import JSONEncoder +from linstorjournaler import LinstorJournaler +from linstorvhdutil import LinstorVhdUtil, check_ex +from linstorvolumemanager import get_controller_uri, get_local_volume_openers, LinstorVolumeManager +from lock import Lock +import json +import LinstorSR +import re +import util +import vhdutil + +BACKING_DISK_RE = re.compile('^/dev/([^/]+)/(?:[^/]+)$') +LVM_PLUGIN = 'lvm.py' +THIN_POOL = 'thin_pool' + +FIREWALL_PORT_SCRIPT = '/etc/xapi.d/plugins/firewall-port' +LINSTOR_PORTS = [3366, 3370, 3376, 3377, 8076, 8077] +DRBD_PORTS = '7000:8000' + +DRBD_REACTOR_CONF = '/etc/drbd-reactor.d/sm-linstor.toml' + +DRBD_REACTOR_CONF_CONTENT = """[[promoter]] + +[promoter.resources.xcp-persistent-database] +start = [ "var-lib-linstor.service", "linstor-controller.service" ] +""" + +DRBD_REACTOR_DEPS = [ + '/run/systemd/system/linstor-controller.service.d/reactor.conf', + '/run/systemd/system/var-lib-linstor.service.d/reactor.conf' +] + + +def update_linstor_port(port, open_ports): + fn = 'open' if open_ports else 'close' + args = ( + FIREWALL_PORT_SCRIPT, fn, str(port), 'tcp' + ) + + (ret, out, err) = util.doexec(args) + if ret == 0: + return + raise Exception('Failed to {} port: {} {}'.format(fn, out, err)) + + +def has_iptables_rule(rule): + (ret, stdout, stderr) = util.doexec(['iptables', '-C'] + rule) + return not ret + + +def update_drbd_ports(open_ports): + # We want to use a static rule regarding DRBD volumes, + # so we can't use the XAPI firewall port script, we have to manually + # check for existing rules before updating iptables service. + rule = ['INPUT', '-p', 'tcp', '--dport', DRBD_PORTS, '-j', 'ACCEPT'] + if open_ports == has_iptables_rule(rule): + return + if open_ports: + rule.insert(1, '1') + (ret, stdout, stderr) = util.doexec(['iptables', '-I'] + rule) + if ret: + raise Exception('Failed to add DRBD rule: {}'.format(stderr)) + else: + (ret, stdout, stderr) = util.doexec(['iptables', '-D'] + rule) + if ret: + raise Exception('Failed to remove DRBD rule: {}'.format(stderr)) + (ret, stdout, stderr) = util.doexec(['service', 'iptables', 'save']) + if ret: + raise Exception('Failed to save DRBD rule: {}'.format(stderr)) + + +def update_all_ports(open_ports): + for port in LINSTOR_PORTS: + update_linstor_port(port, open_ports) + update_drbd_ports(open_ports) + + +def update_linstor_satellite_service(start): + service = 'linstor-satellite' + + # Stop services in all cases first. + # Ensure we don't have an invalid cache used by a satellite. + # (We found an issue with a new added disk which used a volume group name + # formerly involved by another disk. To avoid this kind of problem, we + # always restart the satellite.) + util.enable_and_start_service(service, False) + if start: + util.enable_and_start_service(service, True) + + +def update_drbd_reactor_service(start): + if start: + util.atomicFileWrite(DRBD_REACTOR_CONF, None, DRBD_REACTOR_CONF_CONTENT) + else: + try: + os.remove(DRBD_REACTOR_CONF) + except Exception: + pass + + util.stop_service('drbd-reactor') + + try: + util.stop_service('drbd-promote@xcp\x2dpersistent\x2ddatabase.service') + except Exception as e: + if str(e).rstrip().endswith(' not loaded.'): + pass + raise e + + util.stop_service('linstor-controller') + util.stop_service('var-lib-linstor.service') + + for dep in DRBD_REACTOR_DEPS: + try: + os.remove(dep) + except Exception: + pass + + util.doexec(['systemctl', 'daemon-reload']) + util.enable_and_start_service('drbd-reactor', start) + + +def exec_create_sr(session, name, description, disks, volume_group, redundancy, provisioning, force): + disk_hostnames = disks.keys() + thin = provisioning == 'thin' + + # Create volumes. + hosts = session.xenapi.host.get_all_records() + hostnames = [] + for host_ref, host_record in hosts.items(): + hostname = host_record['hostname'] + hostnames.append(hostname) + + if force: + try: + session.xenapi.host.call_plugin( + host_ref, LVM_PLUGIN, 'destroy_volume_group', { + 'vg_name': volume_group, + 'force': 'True' + } + ) + except Exception as e: + try: + response = session.xenapi.host.call_plugin( + host_ref, LVM_PLUGIN, 'list_volume_groups', { + 'vg_name': volume_group + } + ) + if response != '{}': + raise e + except Exception: + raise e + + if hostname not in disk_hostnames or not disks[hostname]: + if force or session.xenapi.host.call_plugin( + host_ref, LVM_PLUGIN, 'list_volume_groups', { + 'vg_name': volume_group + } + ) == '{}': + continue + raise Exception('Volume group should not exist on `{}`, you must remove it manually'.format(hostname)) + + host_disks = disks[hostname] + if type(host_disks) is list: + host_disks = ','.join(disks[hostname]) + else: + raise Exception('Disk value of `{}` must be a disk list'.format(hostname)) + + session.xenapi.host.call_plugin( + host_ref, LVM_PLUGIN, 'create_physical_volume', { + 'devices': host_disks, + 'force': str(force) + } + ) + + session.xenapi.host.call_plugin( + host_ref, LVM_PLUGIN, 'create_volume_group', { + 'vg_name': volume_group, + 'devices': host_disks + } + ) + + if thin: + session.xenapi.host.call_plugin( + host_ref, LVM_PLUGIN, 'create_thin_pool', { + 'vg_name': volume_group, + 'lv_name': THIN_POOL + } + ) + + # Create SR. + master_ref = session.xenapi.pool.get_all_records().values()[0]['master'] + + device_config = { + 'redundancy': str(redundancy), + 'provisioning': 'thin' if thin else 'thick', + 'group-name': '{}/{}'.format(volume_group, THIN_POOL) if thin else volume_group, + 'hosts': ','.join(hostnames), + 'monitor-db-quorum': str(len(hostnames) > 2) + } + sr_ref = session.xenapi.SR.create( + master_ref, device_config, '0', name, description, 'linstor', '', True, {} + ) + return session.xenapi.SR.get_uuid(sr_ref) + + +def get_drbd_volumes(volume_group=None): + drbd_volumes = {} + (ret, stdout, stderr) = util.doexec(['drbdsetup', 'show', '--json']) + if ret: + raise Exception('Failed to get JSON object: {}'.format(stderr)) + + config = json.loads(stdout) + for resource in config: + for volume in resource['_this_host']['volumes']: + backing_disk = volume.get('backing-disk') + if not backing_disk: + continue + + match = BACKING_DISK_RE.match(backing_disk) + if not match: + continue + + cur_volume_group = match.groups()[0] + if volume_group and cur_volume_group != volume_group: + continue + + minor = int(volume['device_minor']) + if cur_volume_group in drbd_volumes: + drbd_volumes[cur_volume_group].append(minor) + else: + drbd_volumes[cur_volume_group] = [minor] + return drbd_volumes + + +def force_destroy_drbd_volume(minor): + (ret, stdout, stderr) = util.doexec(['drbdsetup', 'detach', minor, '--force']) + if ret: + raise Exception('Failed to detach volume: {}'.format(stderr)) + (ret, stdout, stderr) = util.doexec(['drbdsetup', 'del-minor', minor]) + if ret: + raise Exception('Failed to destroy volume: {}'.format(stderr)) + + +def get_ip_addr_of_pif(session, pif_uuid): + pif_ref = session.xenapi.PIF.get_by_uuid(pif_uuid) + pif = session.xenapi.PIF.get_record(pif_ref) + + if not pif['currently_attached']: + raise XenAPIPlugin.Failure('-1', ['PIF is not plugged']) + + ip_addr = pif['IP'] if pif['primary_address_type'].lower() == 'ipv4' else pif['IPv6'].split('/')[0] + if ip_addr == '': + raise XenAPIPlugin.Failure('-1', ['PIF has no IP']) + return ip_addr + +# ------------------------------------------------------------------------------ + + +def prepare_sr(session, args): + try: + LinstorSR.activate_lvm_group(args['groupName']) + + update_all_ports(open_ports=True) + # We don't want to enable and start drbd-reactor daemon during + # SR creation. + update_drbd_reactor_service(start=False) + update_linstor_satellite_service(start=True) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:prepare_sr error: {}'.format(e)) + return str(False) + + +def release_sr(session, args): + try: + update_linstor_satellite_service(start=False) + update_drbd_reactor_service(start=False) + update_all_ports(open_ports=False) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:release_sr error: {}'.format(e)) + return str(False) + + +def update_drbd_reactor(session, args): + try: + enabled = util.strtobool(args['enabled']) + update_drbd_reactor_service(start=enabled) + return str(True) + except Exception as e: + util.SMlog( + 'linstor-manager:update_drbd_reactor error: {}'.format(e) + ) + return str(False) + + +def attach(session, args): + try: + sr_uuid = args['srUuid'] + vdi_uuid = args['vdiUuid'] + group_name = args['groupName'] + + controller_uri = get_controller_uri() + journaler = LinstorJournaler( + controller_uri, group_name, logger=util.SMlog + ) + linstor = LinstorVolumeManager( + controller_uri, + group_name, + logger=util.SMlog + ) + LinstorSR.attach_thin(session, journaler, linstor, sr_uuid, vdi_uuid) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:attach error: {}'.format(e)) + return str(False) + + +def detach(session, args): + try: + sr_uuid = args['srUuid'] + vdi_uuid = args['vdiUuid'] + group_name = args['groupName'] + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + LinstorSR.detach_thin(session, linstor, sr_uuid, vdi_uuid) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:detach error: {}'.format(e)) + return str(False) + + +def destroy(session, args): + try: + group_name = args['groupName'] + + # When destroy is called, there are no running drbd-reactor daemons. + # So the controllers are stopped too, we must start an instance. + util.restart_service('var-lib-linstor.service') + util.restart_service('linstor-controller') + + linstor = LinstorVolumeManager( + 'linstor://localhost', + group_name, + logger=util.SMlog + ) + linstor.destroy() + return str(True) + except Exception as e: + util.stop_service('linstor-controller') + util.stop_service('var-lib-linstor.service') + util.SMlog('linstor-manager:destroy error: {}'.format(e)) + return str(False) + + +def check(session, args): + try: + device_path = args['devicePath'] + ignore_missing_footer = util.strtobool( + args['ignoreMissingFooter'] + ) + fast = util.strtobool(args['fast']) + check_ex(device_path, ignore_missing_footer, fast) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:check error: {}'.format(e)) + raise + + +def get_vhd_info(session, args): + try: + device_path = args['devicePath'] + group_name = args['groupName'] + include_parent = util.strtobool(args['includeParent']) + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + + def extract_uuid(device_path): + # TODO: Remove new line in the vhdutil module. Not here. + return linstor.get_volume_uuid_from_device_path( + device_path.rstrip('\n') + ) + + vhd_info = vhdutil.getVHDInfo( + device_path, extract_uuid, include_parent, False + ) + return json.dumps(vhd_info.__dict__) + except Exception as e: + util.SMlog('linstor-manager:get_vhd_info error: {}'.format(e)) + raise + + +def has_parent(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.hasParent(device_path)) + except Exception as e: + util.SMlog('linstor-manager:has_parent error: {}'.format(e)) + raise + + +def get_parent(session, args): + try: + device_path = args['devicePath'] + group_name = args['groupName'] + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + + def extract_uuid(device_path): + # TODO: Remove new line in the vhdutil module. Not here. + return linstor.get_volume_uuid_from_device_path( + device_path.rstrip('\n') + ) + + return vhdutil.getParent(device_path, extract_uuid) + except Exception as e: + util.SMlog('linstor-manager:get_parent error: {}'.format(e)) + raise + + +def get_size_virt(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.getSizeVirt(device_path)) + except Exception as e: + util.SMlog('linstor-manager:get_size_virt error: {}'.format(e)) + raise + + +def get_size_phys(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.getSizePhys(device_path)) + except Exception as e: + util.SMlog('linstor-manager:get_size_phys error: {}'.format(e)) + raise + + +def get_allocated_size(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.getAllocatedSize(device_path)) + except Exception as e: + util.SMlog('linstor-manager:get_allocated_size error: {}'.format(e)) + raise + + +def get_max_resize_size(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.getMaxResizeSize(device_path)) + except Exception as e: + util.SMlog('linstor-manager:get_size_phys error: {}'.format(e)) + raise + + +def get_depth(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.getDepth(device_path)) + except Exception as e: + util.SMlog('linstor-manager:get_depth error: {}'.format(e)) + raise + + +def get_key_hash(session, args): + try: + device_path = args['devicePath'] + return vhdutil.getKeyHash(device_path) or '' + except Exception as e: + util.SMlog('linstor-manager:get_key_hash error: {}'.format(e)) + raise + + +def get_block_bitmap(session, args): + try: + device_path = args['devicePath'] + return base64.b64encode(vhdutil.getBlockBitmap(device_path)).decode('ascii') + except Exception as e: + util.SMlog('linstor-manager:get_block_bitmap error: {}'.format(e)) + raise + + +def get_drbd_size(session, args): + try: + device_path = args['devicePath'] + (ret, stdout, stderr) = util.doexec(['blockdev', '--getsize64', device_path]) + if ret == 0: + return stdout.strip() + raise Exception('Failed to get DRBD size: {}'.format(stderr)) + except Exception: + util.SMlog('linstor-manager:get_drbd_size error: {}'.format(stderr)) + raise + + +def set_size_virt(session, args): + try: + device_path = args['devicePath'] + size = int(args['size']) + jfile = args['jfile'] + vhdutil.setSizeVirt(device_path, size, jfile) + return '' + except Exception as e: + util.SMlog('linstor-manager:set_size_virt error: {}'.format(e)) + raise + + +def set_size_virt_fast(session, args): + try: + device_path = args['devicePath'] + size = int(args['size']) + vhdutil.setSizeVirtFast(device_path, size) + return '' + except Exception as e: + util.SMlog('linstor-manager:set_size_virt_fast error: {}'.format(e)) + raise + + +def set_parent(session, args): + try: + device_path = args['devicePath'] + parent_path = args['parentPath'] + vhdutil.setParent(device_path, parent_path, False) + return '' + except Exception as e: + util.SMlog('linstor-manager:set_parent error: {}'.format(e)) + raise + + +def coalesce(session, args): + try: + device_path = args['devicePath'] + return str(vhdutil.coalesce(device_path)) + except Exception as e: + util.SMlog('linstor-manager:coalesce error: {}'.format(e)) + raise + + +def repair(session, args): + try: + device_path = args['devicePath'] + vhdutil.repair(device_path) + return '' + except Exception as e: + util.SMlog('linstor-manager:repair error: {}'.format(e)) + raise + + +def deflate(session, args): + try: + device_path = args['devicePath'] + new_size = int(args['newSize']) + old_size = int(args['oldSize']) + zeroize = util.strtobool(args['zeroize']) + group_name = args['groupName'] + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + LinstorVhdUtil(session, linstor).deflate(device_path, new_size, old_size, zeroize) + return '' + except Exception as e: + util.SMlog('linstor-manager:deflate error: {}'.format(e)) + raise + + +def lock_vdi(session, args): + lock = None + try: + sr_uuid = args['srUuid'] + vdi_uuid = args['vdiUuid'] + group_name = args['groupName'] + locked = util.strtobool(args['locked']) + + # We must lock to mark the VDI. + lock = Lock(vhdutil.LOCK_TYPE_SR, sr_uuid) + if locked: + lock.acquire() + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + linstor.lock_volume(vdi_uuid, locked) + + return str(True) + except Exception as e: + util.SMlog('linstor-manager:lock_vdi error: {}'.format(e)) + finally: + if locked and lock: + lock.release() + return str(False) + + +def has_controller_running(session, args): + (ret, stdout, stderr) = util.doexec([ + 'systemctl', 'is-active', '--quiet', 'linstor-controller' + ]) + return str(ret == 0) + + +def add_host(session, args): + group_name = args['groupName'] + + # 1. Find all LINSTOR SRs and PBDs. + srs = dict() + for sr_ref, sr in session.xenapi.SR.get_all_records().items(): + if sr.get('type') == 'linstor': + srs[sr_ref] = sr + + pbds = dict() + for pbd_ref, pbd in session.xenapi.PBD.get_all_records().items(): + device_config = pbd.get('device_config') + if ( + device_config and + device_config.get('group-name') == group_name + and pbd['SR'] in srs + ): + pbds[pbd_ref] = pbd + + # 2. Ensure there is at least one PBD and all PBDs are used in + # the same SR. + if not pbds: + raise Exception( + 'Failed to find PBDs of group `{}`'.format(group_name) + ) + + sr_ref = None + for pbd in pbds.values(): + if not sr_ref: + sr_ref = pbd['SR'] + elif pbd['SR'] != sr_ref: + raise Exception( + 'Group `{}` is used by many SRs!'.format(group_name) + ) + + # 3. Ensure node doesn't exist. + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + + node_name = socket.gethostname() + has_node = linstor.has_node(node_name) + + new_pbd_ref = None + + try: + # 4. Enable services. + update_all_ports(open_ports=True) + update_drbd_reactor_service(start=True) + update_linstor_satellite_service(start=True) + + # 5. Try to create local node. + if not has_node: + linstor.create_node(node_name, util.get_this_host_address(session)) + + # 6. Try to create PBD. + this_host = util.get_this_host_ref(session) + create_new_pbd = True + + assert pbds + for pbd in pbds.values(): + if pbd['host'] == this_host: + create_new_pbd = False + break + + device_config = pbd['device_config'] + # Should be the same on all hosts. + provisioning = device_config['provisioning'] + + # 7. Create new PBD. + if create_new_pbd: + new_pbd_ref = session.xenapi.PBD.create({ + 'host': this_host, + 'SR': sr_ref, + 'device_config': { + 'group-name': group_name, + 'redundancy': linstor.redundancy, + 'provisioning': provisioning + } + }) + try: + session.xenapi.PBD.plug(new_pbd_ref) + except Exception as e: + util.SMlog('Failed to plug new PBD: {}'.format(e)) + + return str(True) + except Exception as e: + stop_services = not has_node + if stop_services: + try: + linstor.destroy_node(node_name) + except Exception: + pass + + if new_pbd_ref: + try: + session.xenapi.PBD.unplug(new_pbd_ref) + except Exception: + pass + + try: + session.xenapi.PBD.destroy(new_pbd_ref) + except Exception: + pass + + try: + # If we failed to remove the node, we don't stop services. + if stop_services and not linstor.has_node(node_name): + update_linstor_satellite_service(start=False) + update_drbd_reactor_service(start=False) + update_all_ports(open_ports=False) + except Exception: + pass + + raise e + + +def remove_host(session, args): + group_name = args['groupName'] + + # 1. Find all LINSTOR SRs and PBDs. + srs = dict() + for sr_ref, sr in session.xenapi.SR.get_all_records().items(): + if sr.get('type') == 'linstor': + srs[sr_ref] = sr + + pbds = dict() + for pbd_ref, pbd in session.xenapi.PBD.get_all_records().items(): + device_config = pbd.get('device_config') + if ( + device_config and + device_config.get('group-name') == group_name + and pbd['SR'] in srs + ): + pbds[pbd_ref] = pbd + + # 2. Remove node. + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + + node_name = socket.gethostname() + if linstor.has_node(node_name): + linstor.destroy_node(node_name) + if linstor.has_node(node_name): + raise Exception('Failed to remove node! Unknown error.') + + this_host = util.get_this_host_ref(session) + + # 3. Remove PBD. + for pbd_ref, pbd in pbds.items(): + host = pbd['host'] + if host == this_host: + if pbd['currently_attached']: + session.xenapi.PBD.unplug(pbd_ref) + session.xenapi.PBD.destroy(pbd_ref) + break + + # 3. Stop services. + try: + update_linstor_satellite_service(start=False) + update_drbd_reactor_service(start=False) + update_all_ports(open_ports=False) + except Exception as e: + util.SMlog('Error while stopping services: {}'.format(e)) + pass + + return str('True') + + +def create_sr(session, args): + try: + # Use a complex parsing contrary to the other functions because + # this helper is a public method and is not easy to use. + name = args.get('name') + if not name: + raise Exception('`name` is empty') + + description = args.get('description') or '' + + disks = args.get('disks') + if not disks: + raise Exception('`disks` is empty') + try: + disks = json.loads(disks) + except Exception as e: + raise Exception('failed to decode `disks`: {}'.format(e)) + if type(disks) is not dict: + raise Exception('`disks` must be a JSON object') + + volume_group = args.get('volume_group') + if not volume_group: + raise Exception('`volume_group` is empty') + + redundancy = args.get('redundancy') + if not redundancy: + raise Exception('`redundancy` is empty') + + try: + redundancy = int(redundancy) + except Exception: + raise Exception('`redundancy` is not a number') + + provisioning = args.get('provisioning') + if not provisioning: + provisioning = 'thin' + elif provisioning != 'thin' and provisioning != 'thick': + raise Exception('unsupported provisioning') + + force = util.strtobool(args.get('force')) + + return exec_create_sr( + session, name, description, disks, volume_group, redundancy, provisioning, force + ) + except Exception as e: + util.SMlog('linstor-manager:create_sr error: {}'.format(e)) + raise + + +def demote_drbd_resource(session, args): + try: + resource_name = args['resource_name'] + (ret, stdout, stderr) = util.doexec(['drbdsetup', 'secondary', resource_name]) + if ret: + raise Exception('Failed to demote resource: {}'.format(stderr)) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:demote_drbd_resource error: {}'.format(e)) + return str(False) + + +def list_drbd_volumes(session, args): + try: + volume_group = args.get('volume_group') + return json.dumps(get_drbd_volumes(volume_group)) + except Exception as e: + util.SMlog('linstor-manager:list_drbd_volumes error: {}'.format(e)) + raise + + +def destroy_drbd_volume(session, args): + try: + minor = args.get('minor') + if not minor: + raise Exception('Cannot destroy DRBD volume without minor.') + force_destroy_drbd_volume(minor) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:destroy_drbd_volume error: {}'.format(e)) + return str(False) + + +def destroy_drbd_volumes(session, args): + try: + volume_group = args.get('volume_group') + if not volume_group: + raise Exception('Cannot destroy DRBD volumes without volume group.') + for minor in get_drbd_volumes(volume_group).get(volume_group, []): + force_destroy_drbd_volume(str(minor)) + return str(True) + except Exception as e: + util.SMlog('linstor-manager:destroy_drbd_volumes error: {}'.format(e)) + return str(False) + + +def get_drbd_openers(session, args): + try: + resource_name = args.get('resourceName') + volume = args.get('volume') + return get_local_volume_openers(resource_name, volume) + except Exception as e: + util.SMlog('linstor-manager:get_drbd_openers error: {}'.format(e)) + raise + + +class HealthCheckError(object): + __slots__ = ('data') + + MASK_REPORT_LEVEL = 0x7000000 + MASK_TYPE = 0xFF0000 + MASK_VALUE = 0XFFFF + + # 24-26 bits + REPORT_LEVEL_WARN = 0x1000000 + REPORT_LEVEL_ERR = 0x2000000 + + # 16-23 bits + TYPE_GENERIC = 0x10000 + TYPE_NODE = 0x20000 + TYPE_STORAGE_POOL = 0x30000 + TYPE_VOLUME = 0x40000 + TYPE_RESOURCE = 0x50000 + + # 1-15 bits + GENERIC_UNEXPECTED = REPORT_LEVEL_ERR | TYPE_GENERIC | 0 + GENERIC_LINSTOR_UNREACHABLE = REPORT_LEVEL_ERR | TYPE_GENERIC | 1 + + NODE_NOT_ONLINE = REPORT_LEVEL_WARN | TYPE_NODE | 0 + + STORAGE_POOL_UNKNOWN_FREE_SIZE = REPORT_LEVEL_ERR | TYPE_STORAGE_POOL | 0 + STORAGE_POOL_UNKNOWN_CAPACITY = REPORT_LEVEL_ERR | TYPE_STORAGE_POOL | 1 + STORAGE_POOL_LOW_FREE_SIZE = REPORT_LEVEL_WARN | TYPE_STORAGE_POOL | 2 + + VOLUME_UNKNOWN_STATE = REPORT_LEVEL_WARN | TYPE_VOLUME | 0 + VOLUME_INVALID_STATE = REPORT_LEVEL_ERR | TYPE_VOLUME | 1 + VOLUME_WRONG_DISKLESS_STATE = REPORT_LEVEL_WARN | TYPE_VOLUME | 2 + VOLUME_INTERNAL_UNVERIFIED_STATE = REPORT_LEVEL_WARN | TYPE_VOLUME | 3 + + MAP_CODE_TO_PARAMS = { + GENERIC_UNEXPECTED: { 'message' }, + GENERIC_LINSTOR_UNREACHABLE: { 'message' }, + NODE_NOT_ONLINE: { 'name', 'status' }, + STORAGE_POOL_UNKNOWN_FREE_SIZE: { 'name' }, + STORAGE_POOL_UNKNOWN_CAPACITY: { 'name' }, + STORAGE_POOL_LOW_FREE_SIZE: { 'name', 'threshold' }, + VOLUME_UNKNOWN_STATE: { 'node', 'resource', 'number' }, + VOLUME_INVALID_STATE: { 'node', 'resource', 'number', 'state' }, + VOLUME_WRONG_DISKLESS_STATE: { 'node', 'resource', 'number', 'state' }, + VOLUME_INTERNAL_UNVERIFIED_STATE: { 'node', 'resource', 'number', 'state' } + } + + def __init__(self, code, **kwargs): + attributes = self.MAP_CODE_TO_PARAMS[code] + data = { 'code': code } + for attr_name, attr_value in kwargs.items(): + assert attr_name in attributes + data[attr_name] = attr_value + self.data = data + + def to_json(self): + return self.data + + +def health_check(session, args): + group_name = args['groupName'] + + result = { + 'controller-uri': '', + 'nodes': {}, + 'storage-pools': {}, + 'resources': {}, + 'errors': [] + } + + def format_result(): + # See: https://stackoverflow.com/questions/18478287/making-object-json-serializable-with-regular-encoder/18561055#18561055 + def _default(self, obj): + return getattr(obj.__class__, 'to_json', _default.default)(obj) + _default.default = JSONEncoder().default + JSONEncoder.default = _default + return json.dumps(result) + + # 1. Get controller. + try: + controller_uri = get_controller_uri() + + result['controller-uri'] = controller_uri + try: + if controller_uri == 'linstor://localhost': + # Replace `localhost` with IP to give a better info for users. + result['controller-uri'] = 'linstor://' + util.get_this_host_address(session) + except Exception: + # Ignore error: can be a XAPI restart or something else. + pass + + linstor = LinstorVolumeManager( + controller_uri, + group_name, + logger=util.SMlog + ) + except Exception as e: + # Probably a network issue, or offline controller. + result['errors'].append(HealthCheckError( + code=HealthCheckError.GENERIC_LINSTOR_UNREACHABLE, + message=str(e) + )) + return format_result() + + try: + # 2. Check node statuses. + nodes = linstor.get_nodes_info() + result['nodes'] = nodes + for node_name, status in nodes.items(): + if status != 'ONLINE': + result['errors'].append(HealthCheckError( + code=HealthCheckError.NODE_NOT_ONLINE, + name=node_name, + status=status + )) + + # 3. Check storage pool statuses. + storage_pools_per_node = linstor.get_storage_pools_info() + result['storage-pools'] = storage_pools_per_node + for node_name, storage_pools in storage_pools_per_node.items(): + for storage_pool in storage_pools: + free_size = storage_pool['free-size'] + capacity = storage_pool['capacity'] + if free_size < 0 or capacity <= 0: + if free_size < 0: + result['errors'].append(HealthCheckError( + code=HealthCheckError.STORAGE_POOL_UNKNOWN_FREE_SIZE, + name=storage_pool['name'] + )) + elif capacity < 0: + result['errors'].append(HealthCheckError( + code=HealthCheckError.STORAGE_POOL_UNKNOWN_CAPACITY, + name=storage_pool['name'] + )) + else: + remaining_percent = free_size / float(capacity) * 100.0 + threshold = 10.0 + if remaining_percent < threshold: + result['errors'].append(HealthCheckError( + code=HealthCheckError.STORAGE_POOL_LOW_FREE_SIZE, + name=storage_pool['name'], + threshold=threshold + )) + + # 4. Check resource statuses. + all_resources = linstor.get_resources_info() + result['resources'] = all_resources + + for resource_name, resource_by_node in all_resources.items(): + for node_name, resource in resource_by_node.items(): + for volume_index, volume in enumerate(resource['volumes']): + disk_state = volume['disk-state'] + if disk_state in ['UpToDate', 'Created', 'Attached']: + continue + if disk_state == 'DUnknown': + result['errors'].append(HealthCheckError( + code=HealthCheckError.VOLUME_UNKNOWN_STATE, + node=node_name, + resource=resource_name, + number=volume_index + )) + continue + if disk_state in ['Inconsistent', 'Failed', 'To: Creating', 'To: Attachable', 'To: Attaching']: + result['errors'].append(HealthCheckError( + code=HealthCheckError.VOLUME_INVALID_STATE, + node=node_name, + resource=resource_name, + number=volume_index, + state=disk_state + )) + continue + if disk_state == 'Diskless': + if resource['diskful']: + result['errors'].append(HealthCheckError( + code=HealthCheckError.VOLUME_WRONG_DISKLESS_STATE, + node=node_name, + resource=resource_name, + number=volume_index, + state=disk_state + )) + elif resource['tie-breaker']: + volume['disk-state'] = 'TieBreaker' + continue + result['errors'].append(HealthCheckError( + code=HealthCheckError.VOLUME_INTERNAL_UNVERIFIED_STATE, + node=node_name, + resource=resource_name, + number=volume_index, + state=disk_state + )) + except Exception as e: + result['errors'].append(HealthCheckError( + code=HealthCheckError.GENERIC_UNEXPECTED, + message=str(e) + )) + + return format_result() + + +def create_node_interface(session, args): + group_name = args['groupName'] + hostname = args['hostname'] + name = args['name'] + pif_uuid = args['pifUuid'] + + ip_addr = get_ip_addr_of_pif(session, pif_uuid) + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + try: + linstor.create_node_interface(hostname, name, ip_addr) + except Exception as e: + raise XenAPIPlugin.Failure('-1', [str(e)]) + return str(True) + + +def destroy_node_interface(session, args): + group_name = args['groupName'] + hostname = args['hostname'] + name = args['name'] + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + try: + linstor.destroy_node_interface(hostname, name) + except Exception as e: + raise XenAPIPlugin.Failure('-1', [str(e)]) + return str(True) + + +def modify_node_interface(session, args): + group_name = args['groupName'] + hostname = args['hostname'] + name = args['name'] + pif_uuid = args['pifUuid'] + + ip_addr = get_ip_addr_of_pif(session, pif_uuid) + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + try: + linstor.modify_node_interface(hostname, name, ip_addr) + except Exception as e: + raise XenAPIPlugin.Failure('-1', [str(e)]) + return str(True) + + +def list_node_interfaces(session, args): + group_name = args['groupName'] + hostname = args['hostname'] + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + try: + return json.dumps(linstor.list_node_interfaces(hostname)) + except Exception as e: + raise XenAPIPlugin.Failure('-1', [str(e)]) + + +def get_node_preferred_interface(session, args): + group_name = args['groupName'] + hostname = args['hostname'] + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + try: + return linstor.get_node_preferred_interface(hostname) + except Exception as e: + raise XenAPIPlugin.Failure('-1', [str(e)]) + + +def set_node_preferred_interface(session, args): + group_name = args['groupName'] + hostname = args['hostname'] + name = args['name'] + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + try: + linstor.set_node_preferred_interface(hostname, name) + except Exception as e: + raise XenAPIPlugin.Failure('-1', [str(e)]) + return str(True) + + +if __name__ == '__main__': + XenAPIPlugin.dispatch({ + 'prepareSr': prepare_sr, + 'releaseSr': release_sr, + 'updateDrbdReactor': update_drbd_reactor, + 'attach': attach, + 'detach': detach, + 'destroy': destroy, + + # vhdutil wrappers called by linstorvhdutil. + # Note: When a VHD is open in RO mode (so for all vhdutil getters), + # the LVM layer is used directly to bypass DRBD verifications. + # In this case there can't be EROFS errors. + # Note 2: We assume linstorvhdutil executes remote calls on diskful + # DRBDs, otherwise we still have EROFS errors... + 'check': check, + 'getVHDInfo': get_vhd_info, + 'hasParent': has_parent, + 'getParent': get_parent, + 'getSizeVirt': get_size_virt, + 'getMaxResizeSize': get_max_resize_size, + 'getSizePhys': get_size_phys, + 'getAllocatedSize': get_allocated_size, + 'getDepth': get_depth, + 'getKeyHash': get_key_hash, + 'getBlockBitmap': get_block_bitmap, + + # Small helper to get the DRBD blockdev size. + 'getDrbdSize': get_drbd_size, + + # Called by cleanup.py to coalesce when a primary + # is opened on a non-local host. + 'setSizeVirt': set_size_virt, + 'setSizeVirtFast': set_size_virt_fast, + 'setParent': set_parent, + 'coalesce': coalesce, + 'repair': repair, + + # Misc writters. + 'deflate': deflate, + + 'lockVdi': lock_vdi, + 'hasControllerRunning': has_controller_running, + 'addHost': add_host, + 'removeHost': remove_host, + 'createSr': create_sr, + 'listDrbdVolumes': list_drbd_volumes, + 'demoteDrbdResource': demote_drbd_resource, + 'destroyDrbdVolume': destroy_drbd_volume, + 'destroyDrbdVolumes': destroy_drbd_volumes, + 'getDrbdOpeners': get_drbd_openers, + 'healthCheck': health_check, + + 'createNodeInterface': create_node_interface, + 'destroyNodeInterface': destroy_node_interface, + 'modifyNodeInterface': modify_node_interface, + 'listNodeInterfaces': list_node_interfaces, + 'getNodePreferredInterface': get_node_preferred_interface, + 'setNodePreferredInterface': set_node_preferred_interface + }) diff --git a/drivers/linstorjournaler.py b/drivers/linstorjournaler.py new file mode 100755 index 000000000..2475ae1e9 --- /dev/null +++ b/drivers/linstorjournaler.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +from linstorvolumemanager import \ + get_controller_uri, LinstorVolumeManager, LinstorVolumeManagerError +import linstor +import re +import util + + +class LinstorJournalerError(Exception): + pass + +# ============================================================================== + + +class LinstorJournaler: + """ + Simple journaler that uses LINSTOR properties for persistent "storage". + A journal is a id-value pair, and there can be only one journal for a + given id. An identifier is juste a transaction name. + """ + + REG_TYPE = re.compile('^([^/]+)$') + REG_TRANSACTION = re.compile('^[^/]+/([^/]+)$') + + """ + Types of transaction in the journal. + """ + CLONE = 'clone' + INFLATE = 'inflate' + ZERO = 'zero' + + @staticmethod + def default_logger(*args): + print(args) + + def __init__(self, uri, group_name, logger=default_logger.__func__): + self._namespace = '{}journal/'.format( + LinstorVolumeManager._build_sr_namespace() + ) + self._logger = logger + self._journal = self._create_journal_instance( + uri, group_name, self._namespace + ) + + def create(self, type, identifier, value): + # TODO: Maybe rename to 'add' in the future (in Citrix code too). + + key = self._get_key(type, identifier) + + # 1. Ensure transaction doesn't exist. + current_value = self.get(type, identifier) + if current_value is not None: + raise LinstorJournalerError( + 'Journal transaction already exists for \'{}:{}\': {}' + .format(type, identifier, current_value) + ) + + # 2. Write! + try: + self._reset_namespace() + self._logger( + 'Create journal transaction \'{}:{}\''.format(type, identifier) + ) + self._journal[key] = str(value) + except Exception as e: + try: + self._journal.pop(key, 'empty') + except Exception as e2: + self._logger( + 'Failed to clean up failed journal write: {} (Ignored)' + .format(e2) + ) + + raise LinstorJournalerError( + 'Failed to write to journal: {}'.format(e) + ) + + def remove(self, type, identifier): + key = self._get_key(type, identifier) + try: + self._reset_namespace() + self._logger( + 'Destroy journal transaction \'{}:{}\'' + .format(type, identifier) + ) + self._journal.pop(key) + except Exception as e: + raise LinstorJournalerError( + 'Failed to remove transaction \'{}:{}\': {}' + .format(type, identifier, e) + ) + + def get(self, type, identifier): + self._reset_namespace() + return self._journal.get(self._get_key(type, identifier)) + + def get_all(self, type): + entries = {} + + self._journal.namespace = self._namespace + '{}/'.format(type) + for (key, value) in self._journal.items(): + res = self.REG_TYPE.match(key) + if res: + identifier = res.groups()[0] + entries[identifier] = value + return entries + + # Added to compatibility with Citrix API. + def getAll(self, type): + return self.get_all(type) + + def has_entries(self, identifier): + self._reset_namespace() + for (key, value) in self._journal.items(): + res = self.REG_TRANSACTION.match(key) + if res: + current_identifier = res.groups()[0] + if current_identifier == identifier: + return True + return False + + # Added to compatibility with Citrix API. + def hasJournals(self, identifier): + return self.has_entries(identifier) + + def _reset_namespace(self): + self._journal.namespace = self._namespace + + @classmethod + def _create_journal_instance(cls, uri, group_name, namespace): + def connect(uri): + if not uri: + uri = get_controller_uri() + if not uri: + raise LinstorVolumeManagerError( + 'Unable to find controller uri...' + ) + return linstor.KV( + LinstorVolumeManager._build_group_name(group_name), + uri=uri, + namespace=namespace + ) + + try: + return connect(uri) + except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError): + pass + + return util.retry( + lambda: connect(None), + maxretry=10, + exceptions=[ + linstor.errors.LinstorNetworkError, LinstorVolumeManagerError + ] + ) + + @staticmethod + def _get_key(type, identifier): + return '{}/{}'.format(type, identifier) diff --git a/drivers/linstorvhdutil.py b/drivers/linstorvhdutil.py new file mode 100644 index 000000000..76996e38b --- /dev/null +++ b/drivers/linstorvhdutil.py @@ -0,0 +1,606 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from sm_typing import override + +from linstorjournaler import LinstorJournaler +from linstorvolumemanager import LinstorVolumeManager +import base64 +import errno +import json +import socket +import time +import util +import vhdutil +import xs_errors + +MANAGER_PLUGIN = 'linstor-manager' + + +def call_remote_method(session, host_ref, method, device_path, args): + try: + response = session.xenapi.host.call_plugin( + host_ref, MANAGER_PLUGIN, method, args + ) + except Exception as e: + util.SMlog('call-plugin ({} with {}) exception: {}'.format( + method, args, e + )) + raise util.SMException(str(e)) + + util.SMlog('call-plugin ({} with {}) returned: {}'.format( + method, args, response + )) + + return response + + +def check_ex(path, ignoreMissingFooter = False, fast = False): + cmd = [vhdutil.VHD_UTIL, "check", vhdutil.OPT_LOG_ERR, "-n", path] + if ignoreMissingFooter: + cmd.append("-i") + if fast: + cmd.append("-B") + + vhdutil.ioretry(cmd) + + +class LinstorCallException(util.SMException): + def __init__(self, cmd_err): + self.cmd_err = cmd_err + + @override + def __str__(self) -> str: + return str(self.cmd_err) + + +class ErofsLinstorCallException(LinstorCallException): + pass + + +class NoPathLinstorCallException(LinstorCallException): + pass + + +def linstorhostcall(local_method, remote_method): + def decorated(response_parser): + def wrapper(*args, **kwargs): + self = args[0] + vdi_uuid = args[1] + + device_path = self._linstor.build_device_path( + self._linstor.get_volume_name(vdi_uuid) + ) + + # A. Try a call using directly the DRBD device to avoid + # remote request. + + # Try to read locally if the device is not in use or if the device + # is up to date and not diskless. + (node_names, in_use_by) = \ + self._linstor.find_up_to_date_diskful_nodes(vdi_uuid) + + local_e = None + try: + if not in_use_by or socket.gethostname() in node_names: + return self._call_local_method(local_method, device_path, *args[2:], **kwargs) + except ErofsLinstorCallException as e: + local_e = e.cmd_err + except Exception as e: + local_e = e + + util.SMlog( + 'unable to execute `{}` locally, retry using a readable host... (cause: {})'.format( + remote_method, local_e if local_e else 'local diskless + in use or not up to date' + ) + ) + + if in_use_by: + node_names = {in_use_by} + + # B. Execute the plugin on master or slave. + remote_args = { + 'devicePath': device_path, + 'groupName': self._linstor.group_name + } + remote_args.update(**kwargs) + remote_args = {str(key): str(value) for key, value in remote_args.items()} + + try: + def remote_call(): + host_ref = self._get_readonly_host(vdi_uuid, device_path, node_names) + return call_remote_method(self._session, host_ref, remote_method, device_path, remote_args) + response = util.retry(remote_call, 5, 2) + except Exception as remote_e: + self._raise_openers_exception(device_path, local_e or remote_e) + + return response_parser(self, vdi_uuid, response) + return wrapper + return decorated + + +def linstormodifier(): + def decorated(func): + def wrapper(*args, **kwargs): + self = args[0] + + ret = func(*args, **kwargs) + self._linstor.invalidate_resource_cache() + return ret + return wrapper + return decorated + + +class LinstorVhdUtil: + MAX_SIZE = 2 * 1024 * 1024 * 1024 * 1024 # Max VHD size. + + def __init__(self, session, linstor): + self._session = session + self._linstor = linstor + + def create_chain_paths(self, vdi_uuid, readonly=False): + # OPTIMIZE: Add a limit_to_first_allocated_block param to limit vhdutil calls. + # Useful for the snapshot code algorithm. + + leaf_vdi_path = self._linstor.get_device_path(vdi_uuid) + path = leaf_vdi_path + while True: + if not util.pathexists(path): + raise xs_errors.XenError( + 'VDIUnavailable', opterr='Could not find: {}'.format(path) + ) + + # Diskless path can be created on the fly, ensure we can open it. + def check_volume_usable(): + while True: + try: + with open(path, 'r' if readonly else 'r+'): + pass + except IOError as e: + if e.errno == errno.ENODATA: + time.sleep(2) + continue + if e.errno == errno.EROFS: + util.SMlog('Volume not attachable because RO. Openers: {}'.format( + self._linstor.get_volume_openers(vdi_uuid) + )) + raise + break + util.retry(check_volume_usable, 15, 2) + + vdi_uuid = self.get_vhd_info(vdi_uuid).parentUuid + if not vdi_uuid: + break + path = self._linstor.get_device_path(vdi_uuid) + readonly = True # Non-leaf is always readonly. + + return leaf_vdi_path + + # -------------------------------------------------------------------------- + # Getters: read locally and try on another host in case of failure. + # -------------------------------------------------------------------------- + + def check(self, vdi_uuid, ignore_missing_footer=False, fast=False): + kwargs = { + 'ignoreMissingFooter': ignore_missing_footer, + 'fast': fast + } + try: + self._check(vdi_uuid, **kwargs) # pylint: disable = E1123 + return True + except Exception as e: + util.SMlog('Call to `check` failed: {}'.format(e)) + return False + + @linstorhostcall(check_ex, 'check') + def _check(self, vdi_uuid, response): + return util.strtobool(response) + + def get_vhd_info(self, vdi_uuid, include_parent=True): + kwargs = { + 'includeParent': include_parent, + 'resolveParent': False + } + # TODO: Replace pylint comment with this feature when possible: + # https://github.com/PyCQA/pylint/pull/2926 + return self._get_vhd_info(vdi_uuid, self._extract_uuid, **kwargs) # pylint: disable = E1123 + + @linstorhostcall(vhdutil.getVHDInfo, 'getVHDInfo') + def _get_vhd_info(self, vdi_uuid, response): + obj = json.loads(response) + + vhd_info = vhdutil.VHDInfo(vdi_uuid) + vhd_info.sizeVirt = obj['sizeVirt'] + vhd_info.sizePhys = obj['sizePhys'] + if 'parentPath' in obj: + vhd_info.parentPath = obj['parentPath'] + vhd_info.parentUuid = obj['parentUuid'] + vhd_info.hidden = obj['hidden'] + vhd_info.path = obj['path'] + + return vhd_info + + @linstorhostcall(vhdutil.hasParent, 'hasParent') + def has_parent(self, vdi_uuid, response): + return util.strtobool(response) + + def get_parent(self, vdi_uuid): + return self._get_parent(vdi_uuid, self._extract_uuid) + + @linstorhostcall(vhdutil.getParent, 'getParent') + def _get_parent(self, vdi_uuid, response): + return response + + @linstorhostcall(vhdutil.getSizeVirt, 'getSizeVirt') + def get_size_virt(self, vdi_uuid, response): + return int(response) + + @linstorhostcall(vhdutil.getMaxResizeSize, 'getMaxResizeSize') + def get_max_resize_size(self, vdi_uuid, response): + return int(response) + + @linstorhostcall(vhdutil.getSizePhys, 'getSizePhys') + def get_size_phys(self, vdi_uuid, response): + return int(response) + + @linstorhostcall(vhdutil.getAllocatedSize, 'getAllocatedSize') + def get_allocated_size(self, vdi_uuid, response): + return int(response) + + @linstorhostcall(vhdutil.getDepth, 'getDepth') + def get_depth(self, vdi_uuid, response): + return int(response) + + @linstorhostcall(vhdutil.getKeyHash, 'getKeyHash') + def get_key_hash(self, vdi_uuid, response): + return response or None + + @linstorhostcall(vhdutil.getBlockBitmap, 'getBlockBitmap') + def get_block_bitmap(self, vdi_uuid, response): + return base64.b64decode(response) + + @linstorhostcall('_get_drbd_size', 'getDrbdSize') + def get_drbd_size(self, vdi_uuid, response): + return int(response) + + def _get_drbd_size(self, path): + (ret, stdout, stderr) = util.doexec(['blockdev', '--getsize64', path]) + if ret == 0: + return int(stdout.strip()) + raise util.SMException('Failed to get DRBD size: {}'.format(stderr)) + + # -------------------------------------------------------------------------- + # Setters: only used locally. + # -------------------------------------------------------------------------- + + @linstormodifier() + def create(self, path, size, static, msize=0): + return self._call_local_method_or_fail(vhdutil.create, path, size, static, msize) + + @linstormodifier() + def set_size_phys(self, path, size, debug=True): + return self._call_local_method_or_fail(vhdutil.setSizePhys, path, size, debug) + + @linstormodifier() + def set_parent(self, path, parentPath, parentRaw=False): + return self._call_local_method_or_fail(vhdutil.setParent, path, parentPath, parentRaw) + + @linstormodifier() + def set_hidden(self, path, hidden=True): + return self._call_local_method_or_fail(vhdutil.setHidden, path, hidden) + + @linstormodifier() + def set_key(self, path, key_hash): + return self._call_local_method_or_fail(vhdutil.setKey, path, key_hash) + + @linstormodifier() + def kill_data(self, path): + return self._call_local_method_or_fail(vhdutil.killData, path) + + @linstormodifier() + def snapshot(self, path, parent, parentRaw, msize=0, checkEmpty=True): + return self._call_local_method_or_fail(vhdutil.snapshot, path, parent, parentRaw, msize, checkEmpty) + + def inflate(self, journaler, vdi_uuid, vdi_path, new_size, old_size): + # Only inflate if the LINSTOR volume capacity is not enough. + new_size = LinstorVolumeManager.round_up_volume_size(new_size) + if new_size <= old_size: + return + + util.SMlog( + 'Inflate {} (size={}, previous={})' + .format(vdi_path, new_size, old_size) + ) + + journaler.create( + LinstorJournaler.INFLATE, vdi_uuid, old_size + ) + self._linstor.resize_volume(vdi_uuid, new_size) + + # TODO: Replace pylint comment with this feature when possible: + # https://github.com/PyCQA/pylint/pull/2926 + result_size = self.get_drbd_size(vdi_uuid) # pylint: disable = E1120 + if result_size < new_size: + util.SMlog( + 'WARNING: Cannot inflate volume to {}B, result size: {}B' + .format(new_size, result_size) + ) + + self._zeroize(vdi_path, result_size - vhdutil.VHD_FOOTER_SIZE) + self.set_size_phys(vdi_path, result_size, False) + journaler.remove(LinstorJournaler.INFLATE, vdi_uuid) + + def deflate(self, vdi_path, new_size, old_size, zeroize=False): + if zeroize: + assert old_size > vhdutil.VHD_FOOTER_SIZE + self._zeroize(vdi_path, old_size - vhdutil.VHD_FOOTER_SIZE) + + new_size = LinstorVolumeManager.round_up_volume_size(new_size) + if new_size >= old_size: + return + + util.SMlog( + 'Deflate {} (new size={}, previous={})' + .format(vdi_path, new_size, old_size) + ) + + self.set_size_phys(vdi_path, new_size) + # TODO: Change the LINSTOR volume size using linstor.resize_volume. + + # -------------------------------------------------------------------------- + # Remote setters: write locally and try on another host in case of failure. + # -------------------------------------------------------------------------- + + @linstormodifier() + def set_size_virt(self, path, size, jfile): + kwargs = { + 'size': size, + 'jfile': jfile + } + return self._call_method(vhdutil.setSizeVirt, 'setSizeVirt', path, use_parent=False, **kwargs) + + @linstormodifier() + def set_size_virt_fast(self, path, size): + kwargs = { + 'size': size + } + return self._call_method(vhdutil.setSizeVirtFast, 'setSizeVirtFast', path, use_parent=False, **kwargs) + + @linstormodifier() + def force_parent(self, path, parentPath, parentRaw=False): + kwargs = { + 'parentPath': str(parentPath), + 'parentRaw': parentRaw + } + return self._call_method(vhdutil.setParent, 'setParent', path, use_parent=False, **kwargs) + + @linstormodifier() + def force_coalesce(self, path): + return int(self._call_method(vhdutil.coalesce, 'coalesce', path, use_parent=True)) + + @linstormodifier() + def force_repair(self, path): + return self._call_method(vhdutil.repair, 'repair', path, use_parent=False) + + @linstormodifier() + def force_deflate(self, path, newSize, oldSize, zeroize): + kwargs = { + 'newSize': newSize, + 'oldSize': oldSize, + 'zeroize': zeroize + } + return self._call_method('_force_deflate', 'deflate', path, use_parent=False, **kwargs) + + def _force_deflate(self, path, newSize, oldSize, zeroize): + self.deflate(path, newSize, oldSize, zeroize) + + # -------------------------------------------------------------------------- + # Static helpers. + # -------------------------------------------------------------------------- + + @classmethod + def compute_volume_size(cls, virtual_size, image_type): + if image_type == vhdutil.VDI_TYPE_VHD: + # All LINSTOR VDIs have the metadata area preallocated for + # the maximum possible virtual size (for fast online VDI.resize). + meta_overhead = vhdutil.calcOverheadEmpty(cls.MAX_SIZE) + bitmap_overhead = vhdutil.calcOverheadBitmap(virtual_size) + virtual_size += meta_overhead + bitmap_overhead + elif image_type != vhdutil.VDI_TYPE_RAW: + raise Exception('Invalid image type: {}'.format(image_type)) + + return LinstorVolumeManager.round_up_volume_size(virtual_size) + + # -------------------------------------------------------------------------- + # Helpers. + # -------------------------------------------------------------------------- + + def _extract_uuid(self, device_path): + # TODO: Remove new line in the vhdutil module. Not here. + return self._linstor.get_volume_uuid_from_device_path( + device_path.rstrip('\n') + ) + + def _get_readonly_host(self, vdi_uuid, device_path, node_names): + """ + When vhd-util is called to fetch VDI info we must find a + diskful DRBD disk to read the data. It's the goal of this function. + Why? Because when a VHD is open in RO mode, the LVM layer is used + directly to bypass DRBD verifications (we can have only one process + that reads/writes to disk with DRBD devices). + """ + + if not node_names: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Unable to find diskful node: {} (path={})' + .format(vdi_uuid, device_path) + ) + + hosts = self._session.xenapi.host.get_all_records() + for host_ref, host_record in hosts.items(): + if host_record['hostname'] in node_names: + return host_ref + + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Unable to find a valid host from VDI: {} (path={})' + .format(vdi_uuid, device_path) + ) + + # -------------------------------------------------------------------------- + + def _raise_openers_exception(self, device_path, e): + if isinstance(e, util.CommandException): + e_str = 'cmd: `{}`, code: `{}`, reason: `{}`'.format(e.cmd, e.code, e.reason) + else: + e_str = str(e) + + try: + volume_uuid = self._linstor.get_volume_uuid_from_device_path( + device_path + ) + e_wrapper = Exception( + e_str + ' (openers: {})'.format( + self._linstor.get_volume_openers(volume_uuid) + ) + ) + except Exception as illformed_e: + e_wrapper = Exception( + e_str + ' (unable to get openers: {})'.format(illformed_e) + ) + util.SMlog('raise opener exception: {}'.format(e_wrapper)) + raise e_wrapper # pylint: disable = E0702 + + def _call_local_method(self, local_method, device_path, *args, **kwargs): + if isinstance(local_method, str): + local_method = getattr(self, local_method) + + try: + def local_call(): + try: + return local_method(device_path, *args, **kwargs) + except util.CommandException as e: + if e.code == errno.EROFS or e.code == errno.EMEDIUMTYPE: + raise ErofsLinstorCallException(e) # Break retry calls. + if e.code == errno.ENOENT: + raise NoPathLinstorCallException(e) + raise e + # Retry only locally if it's not an EROFS exception. + return util.retry(local_call, 5, 2, exceptions=[util.CommandException]) + except util.CommandException as e: + util.SMlog('failed to execute locally vhd-util (sys {})'.format(e.code)) + raise e + + def _call_local_method_or_fail(self, local_method, device_path, *args, **kwargs): + try: + return self._call_local_method(local_method, device_path, *args, **kwargs) + except ErofsLinstorCallException as e: + # Volume is locked on a host, find openers. + self._raise_openers_exception(device_path, e.cmd_err) + + def _call_method(self, local_method, remote_method, device_path, use_parent, *args, **kwargs): + # Note: `use_parent` exists to know if the VHD parent is used by the local/remote method. + # Normally in case of failure, if the parent is unused we try to execute the method on + # another host using the DRBD opener list. In the other case, if the parent is required, + # we must check where this last one is open instead of the child. + + if isinstance(local_method, str): + local_method = getattr(self, local_method) + + # A. Try to write locally... + try: + return self._call_local_method(local_method, device_path, *args, **kwargs) + except Exception: + pass + + util.SMlog('unable to execute `{}` locally, retry using a writable host...'.format(remote_method)) + + # B. Execute the command on another host. + # B.1. Get host list. + try: + hosts = self._session.xenapi.host.get_all_records() + except Exception as e: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Unable to get host list to run vhd-util command `{}` (path={}): {}' + .format(remote_method, device_path, e) + ) + + # B.2. Prepare remote args. + remote_args = { + 'devicePath': device_path, + 'groupName': self._linstor.group_name + } + remote_args.update(**kwargs) + remote_args = {str(key): str(value) for key, value in remote_args.items()} + + volume_uuid = self._linstor.get_volume_uuid_from_device_path( + device_path + ) + parent_volume_uuid = None + if use_parent: + parent_volume_uuid = self.get_parent(volume_uuid) + + openers_uuid = parent_volume_uuid if use_parent else volume_uuid + + # B.3. Call! + def remote_call(): + try: + all_openers = self._linstor.get_volume_openers(openers_uuid) + except Exception as e: + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='Unable to get DRBD openers to run vhd-util command `{}` (path={}): {}' + .format(remote_method, device_path, e) + ) + + no_host_found = True + for hostname, openers in all_openers.items(): + if not openers: + continue + + try: + host_ref = next(ref for ref, rec in hosts.items() if rec['hostname'] == hostname) + except StopIteration: + continue + + no_host_found = False + try: + return call_remote_method(self._session, host_ref, remote_method, device_path, remote_args) + except Exception: + pass + + if no_host_found: + try: + return local_method(device_path, *args, **kwargs) + except Exception as e: + self._raise_openers_exception(device_path, e) + + raise xs_errors.XenError( + 'VDIUnavailable', + opterr='No valid host found to run vhd-util command `{}` (path=`{}`, openers=`{}`)' + .format(remote_method, device_path, openers) + ) + return util.retry(remote_call, 5, 2) + + @staticmethod + def _zeroize(path, size): + if not util.zeroOut(path, size, vhdutil.VHD_FOOTER_SIZE): + raise xs_errors.XenError( + 'EIO', + opterr='Failed to zero out VHD footer {}'.format(path) + ) diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py new file mode 100755 index 000000000..8cb4cc4da --- /dev/null +++ b/drivers/linstorvolumemanager.py @@ -0,0 +1,3258 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +from sm_typing import override + +import errno +import json +import linstor +import os.path +import re +import shutil +import socket +import stat +import time +import util +import uuid + +# Persistent prefix to add to RAW persistent volumes. +PERSISTENT_PREFIX = 'xcp-persistent-' + +# Contains the data of the "/var/lib/linstor" directory. +DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database' +DATABASE_SIZE = 1 << 30 # 1GB. +DATABASE_PATH = '/var/lib/linstor' +DATABASE_MKFS = 'mkfs.ext4' + +REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary") +REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$') + +DRBD_BY_RES_PATH = '/dev/drbd/by-res/' + +PLUGIN = 'linstor-manager' + + +# ============================================================================== + +def get_local_volume_openers(resource_name, volume): + if not resource_name or volume is None: + raise Exception('Cannot get DRBD openers without resource name and/or volume.') + + path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format( + resource_name, volume + ) + + with open(path, 'r') as openers: + # Not a big cost, so read all lines directly. + lines = openers.readlines() + + result = {} + + opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)') + for line in lines: + match = opener_re.match(line) + assert match + + groups = match.groups() + process_name = groups[0] + pid = groups[1] + open_duration_ms = groups[2] + result[pid] = { + 'process-name': process_name, + 'open-duration': open_duration_ms + } + + return json.dumps(result) + +def get_all_volume_openers(resource_name, volume): + PLUGIN_CMD = 'getDrbdOpeners' + + volume = str(volume) + openers = {} + + # Make sure this call never stucks because this function can be called + # during HA init and in this case we can wait forever. + session = util.timeout_call(10, util.get_localAPI_session) + + hosts = session.xenapi.host.get_all_records() + for host_ref, host_record in hosts.items(): + node_name = host_record['hostname'] + try: + if not session.xenapi.host_metrics.get_record( + host_record['metrics'] + )['live']: + # Ensure we call plugin on online hosts only. + continue + + openers[node_name] = json.loads( + session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, { + 'resourceName': resource_name, + 'volume': volume + }) + ) + except Exception as e: + util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format( + resource_name, node_name, e + )) + + return openers + + +# ============================================================================== + +def round_up(value, divisor): + assert divisor + divisor = int(divisor) + return ((int(value) + divisor - 1) // divisor) * divisor + + +def round_down(value, divisor): + assert divisor + value = int(value) + return value - (value % int(divisor)) + + +# ============================================================================== + +def get_remote_host_ip(node_name): + (ret, stdout, stderr) = util.doexec([ + 'drbdsetup', 'show', DATABASE_VOLUME_NAME, '--json' + ]) + if ret != 0: + return + + try: + conf = json.loads(stdout) + if not conf: + return + + for connection in conf[0]['connections']: + if connection['net']['_name'] == node_name: + value = connection['path']['_remote_host'] + res = REG_DRBDSETUP_IP.match(value) + if res: + return res.groups()[0] + break + except Exception: + pass + + +def _get_controller_uri(): + PLUGIN_CMD = 'hasControllerRunning' + + # Try to find controller using drbdadm. + (ret, stdout, stderr) = util.doexec([ + 'drbdadm', 'status', DATABASE_VOLUME_NAME + ]) + if ret == 0: + # If we are here, the database device exists locally. + + if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): + # Nice case, we have the controller running on this local host. + return 'linstor://localhost' + + # Try to find the host using DRBD connections. + res = REG_DRBDADM_PRIMARY.search(stdout) + if res: + node_name = res.groups()[0] + ip = get_remote_host_ip(node_name) + if ip: + return 'linstor://' + ip + + # Worst case: we use many hosts in the pool (>= 4), so we can't find the + # primary using drbdadm because we don't have all connections to the + # replicated volume. `drbdadm status xcp-persistent-database` returns + # 3 connections by default. + try: + session = util.timeout_call(10, util.get_localAPI_session) + + for host_ref, host_record in session.xenapi.host.get_all_records().items(): + node_name = host_record['hostname'] + try: + if util.strtobool( + session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {}) + ): + return 'linstor://' + host_record['address'] + except Exception as e: + # Can throw and exception if a host is offline. So catch it. + util.SMlog('Unable to search controller on `{}`: {}'.format( + node_name, e + )) + except: + # Not found, maybe we are trying to create the SR... + pass + +def get_controller_uri(): + retries = 0 + while True: + uri = _get_controller_uri() + if uri: + return uri + + retries += 1 + if retries >= 10: + break + time.sleep(1) + + +def get_controller_node_name(): + PLUGIN_CMD = 'hasControllerRunning' + + (ret, stdout, stderr) = util.doexec([ + 'drbdadm', 'status', DATABASE_VOLUME_NAME + ]) + + if ret == 0: + if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): + return 'localhost' + + res = REG_DRBDADM_PRIMARY.search(stdout) + if res: + return res.groups()[0] + + session = util.timeout_call(5, util.get_localAPI_session) + + for host_ref, host_record in session.xenapi.host.get_all_records().items(): + node_name = host_record['hostname'] + try: + if not session.xenapi.host_metrics.get_record( + host_record['metrics'] + )['live']: + continue + + if util.strtobool(session.xenapi.host.call_plugin( + host_ref, PLUGIN, PLUGIN_CMD, {} + )): + return node_name + except Exception as e: + util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format( + node_name, e + )) + + +def demote_drbd_resource(node_name, resource_name): + PLUGIN_CMD = 'demoteDrbdResource' + + session = util.timeout_call(5, util.get_localAPI_session) + + for host_ref, host_record in session.xenapi.host.get_all_records().items(): + if host_record['hostname'] != node_name: + continue + + try: + session.xenapi.host.call_plugin( + host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name} + ) + except Exception as e: + util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format( + resource_name, node_name, e + )) + raise Exception( + 'Can\'t demote resource `{}`, unable to find node `{}`' + .format(resource_name, node_name) + ) + +# ============================================================================== + +class LinstorVolumeManagerError(Exception): + ERR_GENERIC = 0, + ERR_VOLUME_EXISTS = 1, + ERR_VOLUME_NOT_EXISTS = 2, + ERR_VOLUME_DESTROY = 3, + ERR_GROUP_NOT_EXISTS = 4 + + def __init__(self, message, code=ERR_GENERIC): + super(LinstorVolumeManagerError, self).__init__(message) + self._code = code + + @property + def code(self): + return self._code + + +# ============================================================================== + +# Note: +# If a storage pool is not accessible after a network change: +# linstor node interface modify default --ip + + +class LinstorVolumeManager(object): + """ + API to manager LINSTOR volumes in XCP-ng. + A volume in this context is a physical part of the storage layer. + """ + + __slots__ = ( + '_linstor', '_logger', '_redundancy', + '_base_group_name', '_group_name', '_ha_group_name', + '_volumes', '_storage_pools', '_storage_pools_time', + '_kv_cache', '_resource_cache', '_volume_info_cache', + '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty' + ) + + DEV_ROOT_PATH = DRBD_BY_RES_PATH + + # Default sector size. + BLOCK_SIZE = 512 + + # List of volume properties. + PROP_METADATA = 'metadata' + PROP_NOT_EXISTS = 'not-exists' + PROP_VOLUME_NAME = 'volume-name' + PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp' + + # A volume can only be locked for a limited duration. + # The goal is to give enough time to slaves to execute some actions on + # a device before an UUID update or a coalesce for example. + # Expiration is expressed in seconds. + LOCKED_EXPIRATION_DELAY = 1 * 60 + + # Used when volume uuid is being updated. + PROP_UPDATING_UUID_SRC = 'updating-uuid-src' + + # States of property PROP_NOT_EXISTS. + STATE_EXISTS = '0' + STATE_NOT_EXISTS = '1' + STATE_CREATING = '2' + + # Property namespaces. + NAMESPACE_SR = 'xcp/sr' + NAMESPACE_VOLUME = 'xcp/volume' + + # Regex to match properties. + REG_PROP = '^([^/]+)/{}$' + + REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA)) + REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS)) + REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME)) + REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC)) + + # Prefixes of SR/VOLUME in the LINSTOR DB. + # A LINSTOR (resource, group, ...) name cannot start with a number. + # So we add a prefix behind our SR/VOLUME uuids. + PREFIX_SR = 'xcp-sr-' + PREFIX_HA = 'xcp-ha-' + PREFIX_VOLUME = 'xcp-volume-' + + # Limit request number when storage pool info is asked, we fetch + # the current pool status after N elapsed seconds. + STORAGE_POOLS_FETCH_INTERVAL = 15 + + @staticmethod + def default_logger(*args): + print(args) + + # -------------------------------------------------------------------------- + # API. + # -------------------------------------------------------------------------- + + class VolumeInfo(object): + __slots__ = ( + 'name', + 'allocated_size', # Allocated size, place count is not used. + 'virtual_size', # Total virtual available size of this volume + # (i.e. the user size at creation). + 'diskful' # Array of nodes that have a diskful volume. + ) + + def __init__(self, name): + self.name = name + self.allocated_size = 0 + self.virtual_size = 0 + self.diskful = [] + + @override + def __repr__(self) -> str: + return 'VolumeInfo("{}", {}, {}, {})'.format( + self.name, self.allocated_size, self.virtual_size, + self.diskful + ) + + # -------------------------------------------------------------------------- + + def __init__( + self, uri, group_name, repair=False, logger=default_logger.__func__, + attempt_count=30 + ): + """ + Create a new LinstorVolumeManager object. + :param str uri: URI to communicate with the LINSTOR controller. + :param str group_name: The SR goup name to use. + :param bool repair: If true we try to remove bad volumes due to a crash + or unexpected behavior. + :param function logger: Function to log messages. + :param int attempt_count: Number of attempts to join the controller. + """ + + self._linstor = self._create_linstor_instance( + uri, attempt_count=attempt_count + ) + self._base_group_name = group_name + + # Ensure group exists. + group_name = self._build_group_name(group_name) + groups = self._linstor.resource_group_list_raise([group_name]).resource_groups + if not groups: + raise LinstorVolumeManagerError( + 'Unable to find `{}` Linstor SR'.format(group_name) + ) + + # Ok. ;) + self._logger = logger + self._redundancy = groups[0].select_filter.place_count + self._group_name = group_name + self._ha_group_name = self._build_ha_group_name(self._base_group_name) + self._volumes = set() + self._storage_pools_time = 0 + + # To increate performance and limit request count to LINSTOR services, + # we use caches. + self._kv_cache = self._create_kv_cache() + self._resource_cache = None + self._resource_cache_dirty = True + self._volume_info_cache = None + self._volume_info_cache_dirty = True + self._build_volumes(repair=repair) + + @property + def group_name(self): + """ + Give the used group name. + :return: The group name. + :rtype: str + """ + return self._base_group_name + + @property + def redundancy(self): + """ + Give the used redundancy. + :return: The redundancy. + :rtype: int + """ + return self._redundancy + + @property + def volumes(self): + """ + Give the volumes uuid set. + :return: The volumes uuid set. + :rtype: set(str) + """ + return self._volumes + + @property + def max_volume_size_allowed(self): + """ + Give the max volume size currently available in B. + :return: The current size. + :rtype: int + """ + + candidates = self._find_best_size_candidates() + if not candidates: + raise LinstorVolumeManagerError( + 'Failed to get max volume size allowed' + ) + + size = candidates[0].max_volume_size + if size < 0: + raise LinstorVolumeManagerError( + 'Invalid max volume size allowed given: {}'.format(size) + ) + return self.round_down_volume_size(size * 1024) + + @property + def physical_size(self): + """ + Give the total physical size of the SR. + :return: The physical size. + :rtype: int + """ + return self._compute_size('total_capacity') + + @property + def physical_free_size(self): + """ + Give the total free physical size of the SR. + :return: The physical free size. + :rtype: int + """ + return self._compute_size('free_capacity') + + @property + def allocated_volume_size(self): + """ + Give the allocated size for all volumes. The place count is not + used here. When thick lvm is used, the size for one volume should + be equal to the virtual volume size. With thin lvm, the size is equal + or lower to the volume size. + :return: The allocated size of all volumes. + :rtype: int + """ + + # Paths: /res_name/vol_number/size + sizes = {} + + self._scan_for_broken_diskless_resources() + + for resource in self._get_resource_cache().resources: + if resource.name not in sizes: + current = sizes[resource.name] = {} + else: + current = sizes[resource.name] + + for volume in resource.volumes: + # We ignore diskless pools of the form "DfltDisklessStorPool". + if volume.storage_pool_name != self._group_name: + continue + + current_size = volume.allocated_size + if current_size < 0: + raise LinstorVolumeManagerError( + 'Failed to get allocated size of `{}` on `{}`' + .format(resource.name, volume.storage_pool_name) + ) + current[volume.number] = max(current_size, current.get(volume.number) or 0) + + total_size = 0 + for volumes in sizes.values(): + for size in volumes.values(): + total_size += size + + return total_size * 1024 + + def get_min_physical_size(self): + """ + Give the minimum physical size of the SR. + I.e. the size of the smallest disk + the number of pools. + :return: The physical min size. + :rtype: tuple(int, int) + """ + size = None + pool_count = 0 + for pool in self._get_storage_pools(force=True): + space = pool.free_space + if space: + pool_count += 1 + current_size = space.total_capacity + if current_size < 0: + raise LinstorVolumeManagerError( + 'Failed to get pool total_capacity attr of `{}`' + .format(pool.node_name) + ) + if size is None or current_size < size: + size = current_size + return (pool_count, (size or 0) * 1024) + + @property + def metadata(self): + """ + Get the metadata of the SR. + :return: Dictionary that contains metadata. + :rtype: dict(str, dict) + """ + + sr_properties = self._get_sr_properties() + metadata = sr_properties.get(self.PROP_METADATA) + if metadata is not None: + metadata = json.loads(metadata) + if isinstance(metadata, dict): + return metadata + raise LinstorVolumeManagerError( + 'Expected dictionary in SR metadata: {}'.format( + self._group_name + ) + ) + + return {} + + @metadata.setter + def metadata(self, metadata): + """ + Set the metadata of the SR. + :param dict metadata: Dictionary that contains metadata. + """ + + assert isinstance(metadata, dict) + sr_properties = self._get_sr_properties() + sr_properties[self.PROP_METADATA] = json.dumps(metadata) + + @property + def disconnected_hosts(self): + """ + Get the list of disconnected hosts. + :return: Set that contains disconnected hosts. + :rtype: set(str) + """ + + disconnected_hosts = set() + for pool in self._get_storage_pools(): + for report in pool.reports: + if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \ + linstor.consts.WARN_NOT_CONNECTED: + disconnected_hosts.add(pool.node_name) + break + return disconnected_hosts + + def check_volume_exists(self, volume_uuid): + """ + Check if a volume exists in the SR. + :return: True if volume exists. + :rtype: bool + """ + return volume_uuid in self._volumes + + def create_volume( + self, + volume_uuid, + size, + persistent=True, + volume_name=None, + high_availability=False + ): + """ + Create a new volume on the SR. + :param str volume_uuid: The volume uuid to use. + :param int size: volume size in B. + :param bool persistent: If false the volume will be unavailable + on the next constructor call LinstorSR(...). + :param str volume_name: If set, this name is used in the LINSTOR + database instead of a generated name. + :param bool high_availability: If set, the volume is created in + the HA group. + :return: The current device path of the volume. + :rtype: str + """ + + self._logger('Creating LINSTOR volume {}...'.format(volume_uuid)) + if not volume_name: + volume_name = self.build_volume_name(util.gen_uuid()) + volume_properties = self._create_volume_with_properties( + volume_uuid, + volume_name, + size, + True, # place_resources + high_availability + ) + + # Volume created! Now try to find the device path. + try: + self._logger( + 'Find device path of LINSTOR volume {}...'.format(volume_uuid) + ) + device_path = self._find_device_path(volume_uuid, volume_name) + if persistent: + volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS + self._volumes.add(volume_uuid) + self._logger( + 'LINSTOR volume {} created!'.format(volume_uuid) + ) + return device_path + except Exception: + # There is an issue to find the path. + # At this point the volume has just been created, so force flag can be used. + self._destroy_volume(volume_uuid, force=True) + raise + + def mark_volume_as_persistent(self, volume_uuid): + """ + Mark volume as persistent if created with persistent=False. + :param str volume_uuid: The volume uuid to mark. + """ + + self._ensure_volume_exists(volume_uuid) + + # Mark volume as persistent. + volume_properties = self._get_volume_properties(volume_uuid) + volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS + + def destroy_volume(self, volume_uuid): + """ + Destroy a volume. + :param str volume_uuid: The volume uuid to destroy. + """ + + self._ensure_volume_exists(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + + # Mark volume as destroyed. + volume_properties = self._get_volume_properties(volume_uuid) + volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS + + try: + self._volumes.remove(volume_uuid) + self._destroy_volume(volume_uuid) + except Exception as e: + raise LinstorVolumeManagerError( + str(e), + LinstorVolumeManagerError.ERR_VOLUME_DESTROY + ) + + def lock_volume(self, volume_uuid, locked=True): + """ + Prevent modifications of the volume properties during + "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked + when used. This method is useful to attach/detach correctly a volume on + a slave. Without it the GC can rename a volume, in this case the old + volume path can be used by a slave... + :param str volume_uuid: The volume uuid to protect/unprotect. + :param bool locked: Lock/unlock the volume. + """ + + self._ensure_volume_exists(volume_uuid) + + self._logger( + '{} volume {} as locked'.format( + 'Mark' if locked else 'Unmark', + volume_uuid + ) + ) + + volume_properties = self._get_volume_properties(volume_uuid) + if locked: + volume_properties[ + self.PROP_IS_READONLY_TIMESTAMP + ] = str(time.time()) + elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties: + volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) + + def ensure_volume_is_not_locked(self, volume_uuid, timeout=None): + """ + Ensure a volume is not locked. Wait if necessary. + :param str volume_uuid: The volume uuid to check. + :param int timeout: If the volume is always locked after the expiration + of the timeout, an exception is thrown. + """ + return self.ensure_volume_list_is_not_locked([volume_uuid], timeout) + + def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None): + checked = set() + for volume_uuid in volume_uuids: + if volume_uuid in self._volumes: + checked.add(volume_uuid) + + if not checked: + return + + waiting = False + + volume_properties = self._get_kv_cache() + + start = time.time() + while True: + # Can't delete in for loop, use a copy of the list. + remaining = checked.copy() + for volume_uuid in checked: + volume_properties.namespace = \ + self._build_volume_namespace(volume_uuid) + timestamp = volume_properties.get( + self.PROP_IS_READONLY_TIMESTAMP + ) + if timestamp is None: + remaining.remove(volume_uuid) + continue + + now = time.time() + if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY: + self._logger( + 'Remove readonly timestamp on {}'.format(volume_uuid) + ) + volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) + remaining.remove(volume_uuid) + continue + + if not waiting: + self._logger( + 'Volume {} is locked, waiting...'.format(volume_uuid) + ) + waiting = True + break + + if not remaining: + break + checked = remaining + + if timeout is not None and now - start > timeout: + raise LinstorVolumeManagerError( + 'volume `{}` is locked and timeout has been reached' + .format(volume_uuid), + LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS + ) + + # We must wait to use the volume. After that we can modify it + # ONLY if the SR is locked to avoid bad reads on the slaves. + time.sleep(1) + volume_properties = self._create_kv_cache() + + if waiting: + self._logger('No volume locked now!') + + def remove_volume_if_diskless(self, volume_uuid): + """ + Remove disless path from local node. + :param str volume_uuid: The volume uuid to remove. + """ + + self._ensure_volume_exists(volume_uuid) + + volume_properties = self._get_volume_properties(volume_uuid) + volume_name = volume_properties.get(self.PROP_VOLUME_NAME) + + node_name = socket.gethostname() + + for resource in self._get_resource_cache().resources: + if resource.name == volume_name and resource.node_name == node_name: + if linstor.consts.FLAG_TIE_BREAKER in resource.flags: + return + break + + result = self._linstor.resource_delete_if_diskless( + node_name=node_name, rsc_name=volume_name + ) + if not linstor.Linstor.all_api_responses_no_error(result): + raise LinstorVolumeManagerError( + 'Unable to delete diskless path of `{}` on node `{}`: {}' + .format(volume_name, node_name, ', '.join( + [str(x) for x in result])) + ) + + def introduce_volume(self, volume_uuid): + pass # TODO: Implement me. + + def resize_volume(self, volume_uuid, new_size): + """ + Resize a volume. + :param str volume_uuid: The volume uuid to resize. + :param int new_size: New size in B. + """ + + volume_name = self.get_volume_name(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + new_size = self.round_up_volume_size(new_size) // 1024 + + retry_count = 30 + while True: + result = self._linstor.volume_dfn_modify( + rsc_name=volume_name, + volume_nr=0, + size=new_size + ) + + self._mark_resource_cache_as_dirty() + + error_str = self._get_error_str(result) + if not error_str: + break + + # After volume creation, DRBD volume can be unusable during many seconds. + # So we must retry the definition change if the device is not up to date. + # Often the case for thick provisioning. + if retry_count and error_str.find('non-UpToDate DRBD device') >= 0: + time.sleep(2) + retry_count -= 1 + continue + + raise LinstorVolumeManagerError( + 'Could not resize volume `{}` from SR `{}`: {}' + .format(volume_uuid, self._group_name, error_str) + ) + + def get_volume_name(self, volume_uuid): + """ + Get the name of a particular volume. + :param str volume_uuid: The volume uuid of the name to get. + :return: The volume name. + :rtype: str + """ + + self._ensure_volume_exists(volume_uuid) + volume_properties = self._get_volume_properties(volume_uuid) + volume_name = volume_properties.get(self.PROP_VOLUME_NAME) + if volume_name: + return volume_name + raise LinstorVolumeManagerError( + 'Failed to get volume name of {}'.format(volume_uuid) + ) + + def get_volume_size(self, volume_uuid): + """ + Get the size of a particular volume. + :param str volume_uuid: The volume uuid of the size to get. + :return: The volume size. + :rtype: int + """ + + volume_name = self.get_volume_name(volume_uuid) + dfns = self._linstor.resource_dfn_list_raise( + query_volume_definitions=True, + filter_by_resource_definitions=[volume_name] + ).resource_definitions + + size = dfns[0].volume_definitions[0].size + if size < 0: + raise LinstorVolumeManagerError( + 'Failed to get volume size of: {}'.format(volume_uuid) + ) + return size * 1024 + + def set_auto_promote_timeout(self, volume_uuid, timeout): + """ + Define the blocking time of open calls when a DRBD + is already open on another host. + :param str volume_uuid: The volume uuid to modify. + """ + + volume_name = self.get_volume_name(volume_uuid) + result = self._linstor.resource_dfn_modify(volume_name, { + 'DrbdOptions/Resource/auto-promote-timeout': timeout + }) + error_str = self._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Could not change the auto promote timeout of `{}`: {}' + .format(volume_uuid, error_str) + ) + + def get_volume_info(self, volume_uuid): + """ + Get the volume info of a particular volume. + :param str volume_uuid: The volume uuid of the volume info to get. + :return: The volume info. + :rtype: VolumeInfo + """ + + volume_name = self.get_volume_name(volume_uuid) + return self._get_volumes_info()[volume_name] + + def get_device_path(self, volume_uuid): + """ + Get the dev path of a volume, create a diskless if necessary. + :param str volume_uuid: The volume uuid to get the dev path. + :return: The current device path of the volume. + :rtype: str + """ + + volume_name = self.get_volume_name(volume_uuid) + return self._find_device_path(volume_uuid, volume_name) + + def get_volume_uuid_from_device_path(self, device_path): + """ + Get the volume uuid of a device_path. + :param str device_path: The dev path to find the volume uuid. + :return: The volume uuid of the local device path. + :rtype: str + """ + + expected_volume_name = \ + self.get_volume_name_from_device_path(device_path) + + volume_names = self.get_volumes_with_name() + for volume_uuid, volume_name in volume_names.items(): + if volume_name == expected_volume_name: + return volume_uuid + + raise LinstorVolumeManagerError( + 'Unable to find volume uuid from dev path `{}`'.format(device_path) + ) + + def get_volume_name_from_device_path(self, device_path): + """ + Get the volume name of a device_path. + :param str device_path: The dev path to find the volume name. + :return: The volume name of the device path. + :rtype: str + """ + + # Assume that we have a path like this: + # - "/dev/drbd/by-res/xcp-volume-/0" + # - "../xcp-volume-/0" + if device_path.startswith(DRBD_BY_RES_PATH): + prefix_len = len(DRBD_BY_RES_PATH) + else: + assert device_path.startswith('../') + prefix_len = 3 + + res_name_end = device_path.find('/', prefix_len) + assert res_name_end != -1 + return device_path[prefix_len:res_name_end] + + def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False): + """ + Change the uuid of a volume. + :param str volume_uuid: The volume to modify. + :param str new_volume_uuid: The new volume uuid to use. + :param bool force: If true we doesn't check if volume_uuid is in the + volume list. I.e. the volume can be marked as deleted but the volume + can still be in the LINSTOR KV store if the deletion has failed. + In specific cases like "undo" after a failed clone we must rename a bad + deleted VDI. + """ + + self._logger( + 'Trying to update volume UUID {} to {}...' + .format(volume_uuid, new_volume_uuid) + ) + assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value' + + if not force: + self._ensure_volume_exists(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + + if new_volume_uuid in self._volumes: + raise LinstorVolumeManagerError( + 'Volume `{}` already exists'.format(new_volume_uuid), + LinstorVolumeManagerError.ERR_VOLUME_EXISTS + ) + + volume_properties = self._get_volume_properties(volume_uuid) + if volume_properties.get(self.PROP_UPDATING_UUID_SRC): + raise LinstorVolumeManagerError( + 'Cannot update volume uuid {}: invalid state' + .format(volume_uuid) + ) + + # 1. Copy in temp variables metadata and volume_name. + metadata = volume_properties.get(self.PROP_METADATA) + volume_name = volume_properties.get(self.PROP_VOLUME_NAME) + + # 2. Switch to new volume namespace. + volume_properties.namespace = self._build_volume_namespace( + new_volume_uuid + ) + + if list(volume_properties.items()): + raise LinstorVolumeManagerError( + 'Cannot update volume uuid {} to {}: ' + .format(volume_uuid, new_volume_uuid) + + 'this last one is not empty' + ) + + try: + # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC. + # If we crash after that, the new properties can be removed + # properly. + volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS + volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid + + # 4. Copy the properties. + # Note: On new volumes, during clone for example, the metadata + # may be missing. So we must test it to avoid this error: + # "None has to be a str/unicode, but is " + if metadata: + volume_properties[self.PROP_METADATA] = metadata + volume_properties[self.PROP_VOLUME_NAME] = volume_name + + # 5. Ok! + volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS + except Exception as e: + try: + # Clear the new volume properties in case of failure. + assert volume_properties.namespace == \ + self._build_volume_namespace(new_volume_uuid) + volume_properties.clear() + except Exception as e: + self._logger( + 'Failed to clear new volume properties: {} (ignoring...)' + .format(e) + ) + raise LinstorVolumeManagerError( + 'Failed to copy volume properties: {}'.format(e) + ) + + try: + # 6. After this point, it's ok we can remove the + # PROP_UPDATING_UUID_SRC property and clear the src properties + # without problems. + + # 7. Switch to old volume namespace. + volume_properties.namespace = self._build_volume_namespace( + volume_uuid + ) + volume_properties.clear() + + # 8. Switch a last time to new volume namespace. + volume_properties.namespace = self._build_volume_namespace( + new_volume_uuid + ) + volume_properties.pop(self.PROP_UPDATING_UUID_SRC) + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to clear volume properties ' + 'after volume uuid update: {}'.format(e) + ) + + self._volumes.remove(volume_uuid) + self._volumes.add(new_volume_uuid) + + self._logger( + 'UUID update succeeded of {} to {}! (properties={})' + .format( + volume_uuid, new_volume_uuid, + self._get_filtered_properties(volume_properties) + ) + ) + + def update_volume_name(self, volume_uuid, volume_name): + """ + Change the volume name of a volume. + :param str volume_uuid: The volume to modify. + :param str volume_name: The volume_name to use. + """ + + self._ensure_volume_exists(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + if not volume_name.startswith(self.PREFIX_VOLUME): + raise LinstorVolumeManagerError( + 'Volume name `{}` must be start with `{}`' + .format(volume_name, self.PREFIX_VOLUME) + ) + + if volume_name not in self._fetch_resource_names(): + raise LinstorVolumeManagerError( + 'Volume `{}` doesn\'t exist'.format(volume_name) + ) + + volume_properties = self._get_volume_properties(volume_uuid) + volume_properties[self.PROP_VOLUME_NAME] = volume_name + + def get_usage_states(self, volume_uuid): + """ + Check if a volume is currently used. + :param str volume_uuid: The volume uuid to check. + :return: A dictionnary that contains states. + :rtype: dict(str, bool or None) + """ + + states = {} + + volume_name = self.get_volume_name(volume_uuid) + for resource_state in self._linstor.resource_list_raise( + filter_by_resources=[volume_name] + ).resource_states: + states[resource_state.node_name] = resource_state.in_use + + return states + + def get_volume_openers(self, volume_uuid): + """ + Get openers of a volume. + :param str volume_uuid: The volume uuid to monitor. + :return: A dictionnary that contains openers. + :rtype: dict(str, obj) + """ + return get_all_volume_openers(self.get_volume_name(volume_uuid), '0') + + def get_volumes_with_name(self): + """ + Give a volume dictionnary that contains names actually owned. + :return: A volume/name dict. + :rtype: dict(str, str) + """ + return self._get_volumes_by_property(self.REG_VOLUME_NAME) + + def get_volumes_with_info(self): + """ + Give a volume dictionnary that contains VolumeInfos. + :return: A volume/VolumeInfo dict. + :rtype: dict(str, VolumeInfo) + """ + + volumes = {} + + all_volume_info = self._get_volumes_info() + volume_names = self.get_volumes_with_name() + for volume_uuid, volume_name in volume_names.items(): + if volume_name: + volume_info = all_volume_info.get(volume_name) + if volume_info: + volumes[volume_uuid] = volume_info + continue + + # Well I suppose if this volume is not available, + # LINSTOR has been used directly without using this API. + volumes[volume_uuid] = self.VolumeInfo('') + + return volumes + + def get_volumes_with_metadata(self): + """ + Give a volume dictionnary that contains metadata. + :return: A volume/metadata dict. + :rtype: dict(str, dict) + """ + + volumes = {} + + metadata = self._get_volumes_by_property(self.REG_METADATA) + for volume_uuid, volume_metadata in metadata.items(): + if volume_metadata: + volume_metadata = json.loads(volume_metadata) + if isinstance(volume_metadata, dict): + volumes[volume_uuid] = volume_metadata + continue + raise LinstorVolumeManagerError( + 'Expected dictionary in volume metadata: {}' + .format(volume_uuid) + ) + + volumes[volume_uuid] = {} + + return volumes + + def get_volume_metadata(self, volume_uuid): + """ + Get the metadata of a volume. + :return: Dictionary that contains metadata. + :rtype: dict + """ + + self._ensure_volume_exists(volume_uuid) + volume_properties = self._get_volume_properties(volume_uuid) + metadata = volume_properties.get(self.PROP_METADATA) + if metadata: + metadata = json.loads(metadata) + if isinstance(metadata, dict): + return metadata + raise LinstorVolumeManagerError( + 'Expected dictionary in volume metadata: {}' + .format(volume_uuid) + ) + return {} + + def set_volume_metadata(self, volume_uuid, metadata): + """ + Set the metadata of a volume. + :param dict metadata: Dictionary that contains metadata. + """ + + self._ensure_volume_exists(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + + assert isinstance(metadata, dict) + volume_properties = self._get_volume_properties(volume_uuid) + volume_properties[self.PROP_METADATA] = json.dumps(metadata) + + def update_volume_metadata(self, volume_uuid, metadata): + """ + Update the metadata of a volume. It modify only the given keys. + It doesn't remove unreferenced key instead of set_volume_metadata. + :param dict metadata: Dictionary that contains metadata. + """ + + self._ensure_volume_exists(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + + assert isinstance(metadata, dict) + volume_properties = self._get_volume_properties(volume_uuid) + + current_metadata = json.loads( + volume_properties.get(self.PROP_METADATA, '{}') + ) + if not isinstance(metadata, dict): + raise LinstorVolumeManagerError( + 'Expected dictionary in volume metadata: {}' + .format(volume_uuid) + ) + + for key, value in metadata.items(): + current_metadata[key] = value + volume_properties[self.PROP_METADATA] = json.dumps(current_metadata) + + def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True): + """ + Clone a volume. Do not copy the data, this method creates a new volume + with the same size. + :param str volume_uuid: The volume to clone. + :param str clone_uuid: The cloned volume. + :param bool persistent: If false the volume will be unavailable + on the next constructor call LinstorSR(...). + :return: The current device path of the cloned volume. + :rtype: str + """ + + volume_name = self.get_volume_name(volume_uuid) + self.ensure_volume_is_not_locked(volume_uuid) + + # 1. Find ideal nodes + size to use. + ideal_node_names, size = self._get_volume_node_names_and_size( + volume_name + ) + if size <= 0: + raise LinstorVolumeManagerError( + 'Invalid size of {} for volume `{}`'.format(size, volume_name) + ) + + # 2. Create clone! + return self.create_volume(clone_uuid, size, persistent) + + def remove_resourceless_volumes(self): + """ + Remove all volumes without valid or non-empty name + (i.e. without LINSTOR resource). It's different than + LinstorVolumeManager constructor that takes a `repair` param that + removes volumes with `PROP_NOT_EXISTS` to 1. + """ + + resource_names = self._fetch_resource_names() + for volume_uuid, volume_name in self.get_volumes_with_name().items(): + if not volume_name or volume_name not in resource_names: + # Don't force, we can be sure of what's happening. + self.destroy_volume(volume_uuid) + + def destroy(self): + """ + Destroy this SR. Object should not be used after that. + :param bool force: Try to destroy volumes before if true. + """ + + # 1. Ensure volume list is empty. No cost. + if self._volumes: + raise LinstorVolumeManagerError( + 'Cannot destroy LINSTOR volume manager: ' + 'It exists remaining volumes' + ) + + # 2. Fetch ALL resource names. + # This list may therefore contain volumes created outside + # the scope of the driver. + resource_names = self._fetch_resource_names(ignore_deleted=False) + try: + resource_names.remove(DATABASE_VOLUME_NAME) + except KeyError: + # Really strange to reach that point. + # Normally we always have the database volume in the list. + pass + + # 3. Ensure the resource name list is entirely empty... + if resource_names: + raise LinstorVolumeManagerError( + 'Cannot destroy LINSTOR volume manager: ' + 'It exists remaining volumes (created externally or being deleted)' + ) + + # 4. Destroying... + controller_is_running = self._controller_is_running() + uri = 'linstor://localhost' + try: + if controller_is_running: + self._start_controller(start=False) + + # 4.1. Umount LINSTOR database. + self._mount_database_volume( + self.build_device_path(DATABASE_VOLUME_NAME), + mount=False, + force=True + ) + + # 4.2. Refresh instance. + self._start_controller(start=True) + self._linstor = self._create_linstor_instance( + uri, keep_uri_unmodified=True + ) + + # 4.3. Destroy database volume. + self._destroy_resource(DATABASE_VOLUME_NAME) + + # 4.4. Refresh linstor connection. + # Without we get this error: + # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.." + # Because the deletion of the databse was not seen by Linstor for some reason. + # It seems a simple refresh of the Linstor connection make it aware of the deletion. + self._linstor.disconnect() + self._linstor.connect() + + # 4.5. Destroy remaining drbd nodes on hosts. + # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups. + # It needs to be done locally by each host so we go through the linstor-manager plugin. + # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with: + # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it." + session = util.timeout_call(5, util.get_localAPI_session) + for host_ref in session.xenapi.host.get_all(): + try: + response = session.xenapi.host.call_plugin( + host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name} + ) + except Exception as e: + util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e)) + + # 4.6. Destroy group and storage pools. + self._destroy_resource_group(self._linstor, self._group_name) + self._destroy_resource_group(self._linstor, self._ha_group_name) + for pool in self._get_storage_pools(force=True): + self._destroy_storage_pool( + self._linstor, pool.name, pool.node_name + ) + except Exception as e: + self._start_controller(start=controller_is_running) + raise e + + try: + self._start_controller(start=False) + for file in os.listdir(DATABASE_PATH): + if file != 'lost+found': + os.remove(DATABASE_PATH + '/' + file) + except Exception as e: + util.SMlog( + 'Ignoring failure after LINSTOR SR destruction: {}' + .format(e) + ) + + def find_up_to_date_diskful_nodes(self, volume_uuid): + """ + Find all nodes that contain a specific volume using diskful disks. + The disk must be up to data to be used. + :param str volume_uuid: The volume to use. + :return: The available nodes. + :rtype: tuple(set(str), str) + """ + + volume_name = self.get_volume_name(volume_uuid) + + in_use_by = None + node_names = set() + + resource_states = filter( + lambda resource_state: resource_state.name == volume_name, + self._get_resource_cache().resource_states + ) + + for resource_state in resource_states: + volume_state = resource_state.volume_states[0] + if volume_state.disk_state == 'UpToDate': + node_names.add(resource_state.node_name) + if resource_state.in_use: + in_use_by = resource_state.node_name + + return (node_names, in_use_by) + + def invalidate_resource_cache(self): + """ + If resources are impacted by external commands like vhdutil, + it's necessary to call this function to invalidate current resource + cache. + """ + self._mark_resource_cache_as_dirty() + + def has_node(self, node_name): + """ + Check if a node exists in the LINSTOR database. + :rtype: bool + """ + result = self._linstor.node_list() + error_str = self._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Failed to list nodes using `{}`: {}' + .format(node_name, error_str) + ) + return bool(result[0].node(node_name)) + + def create_node(self, node_name, ip): + """ + Create a new node in the LINSTOR database. + :param str node_name: Node name to use. + :param str ip: Host IP to communicate. + """ + result = self._linstor.node_create( + node_name, + linstor.consts.VAL_NODE_TYPE_CMBD, + ip + ) + errors = self._filter_errors(result) + if errors: + error_str = self._get_error_str(errors) + raise LinstorVolumeManagerError( + 'Failed to create node `{}`: {}'.format(node_name, error_str) + ) + + def destroy_node(self, node_name): + """ + Destroy a node in the LINSTOR database. + :param str node_name: Node name to remove. + """ + result = self._linstor.node_delete(node_name) + errors = self._filter_errors(result) + if errors: + error_str = self._get_error_str(errors) + raise LinstorVolumeManagerError( + 'Failed to destroy node `{}`: {}'.format(node_name, error_str) + ) + + def create_node_interface(self, node_name, name, ip): + """ + Create a new node interface in the LINSTOR database. + :param str node_name: Node name of the interface to use. + :param str name: Interface to create. + :param str ip: IP of the interface. + """ + result = self._linstor.netinterface_create(node_name, name, ip) + errors = self._filter_errors(result) + if errors: + error_str = self._get_error_str(errors) + raise LinstorVolumeManagerError( + 'Failed to create node interface on `{}`: {}'.format(node_name, error_str) + ) + + def destroy_node_interface(self, node_name, name): + """ + Destroy a node interface in the LINSTOR database. + :param str node_name: Node name of the interface to remove. + :param str name: Interface to remove. + """ + + if name == 'default': + raise LinstorVolumeManagerError( + 'Unable to delete the default interface of a node!' + ) + + result = self._linstor.netinterface_delete(node_name, name) + errors = self._filter_errors(result) + if errors: + error_str = self._get_error_str(errors) + raise LinstorVolumeManagerError( + 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str) + ) + + def modify_node_interface(self, node_name, name, ip): + """ + Modify a node interface in the LINSTOR database. Create it if necessary. + :param str node_name: Node name of the interface to use. + :param str name: Interface to modify or create. + :param str ip: IP of the interface. + """ + result = self._linstor.netinterface_create(node_name, name, ip) + errors = self._filter_errors(result) + if not errors: + return + + if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]): + result = self._linstor.netinterface_modify(node_name, name, ip) + errors = self._filter_errors(result) + if not errors: + return + + error_str = self._get_error_str(errors) + raise LinstorVolumeManagerError( + 'Unable to modify interface on `{}`: {}'.format(node_name, error_str) + ) + + def list_node_interfaces(self, node_name): + """ + List all node interfaces. + :param str node_name: Node name to use to list interfaces. + :rtype: list + : + """ + result = self._linstor.net_interface_list(node_name) + if not result: + raise LinstorVolumeManagerError( + 'Unable to list interfaces on `{}`: no list received'.format(node_name) + ) + + interfaces = {} + for interface in result: + interface = interface._rest_data + interfaces[interface['name']] = { + 'address': interface['address'], + 'active': interface['is_active'] + } + return interfaces + + def get_node_preferred_interface(self, node_name): + """ + Get the preferred interface used by a node. + :param str node_name: Node name of the interface to get. + :rtype: str + """ + try: + nodes = self._linstor.node_list_raise([node_name]).nodes + if nodes: + properties = nodes[0].props + return properties.get('PrefNic', 'default') + return nodes + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to get preferred interface: `{}`'.format(e) + ) + + def set_node_preferred_interface(self, node_name, name): + """ + Set the preferred interface to use on a node. + :param str node_name: Node name of the interface. + :param str name: Preferred interface to use. + """ + result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name}) + errors = self._filter_errors(result) + if errors: + error_str = self._get_error_str(errors) + raise LinstorVolumeManagerError( + 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str) + ) + + def get_nodes_info(self): + """ + Get all nodes + statuses, used or not by the pool. + :rtype: dict(str, dict) + """ + try: + nodes = {} + for node in self._linstor.node_list_raise().nodes: + nodes[node.name] = node.connection_status + return nodes + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to get all nodes: `{}`'.format(e) + ) + + def get_storage_pools_info(self): + """ + Give all storage pools of current group name. + :rtype: dict(str, list) + """ + storage_pools = {} + for pool in self._get_storage_pools(force=True): + if pool.node_name not in storage_pools: + storage_pools[pool.node_name] = [] + + size = -1 + capacity = -1 + + space = pool.free_space + if space: + size = space.free_capacity + if size < 0: + size = -1 + else: + size *= 1024 + capacity = space.total_capacity + if capacity <= 0: + capacity = -1 + else: + capacity *= 1024 + + storage_pools[pool.node_name].append({ + 'name': pool.name, + 'linstor-uuid': pool.uuid, + 'free-size': size, + 'capacity': capacity + }) + + return storage_pools + + def get_resources_info(self): + """ + Give all resources of current group name. + :rtype: dict(str, list) + """ + resources = {} + resource_list = self._get_resource_cache() + volume_names = self.get_volumes_with_name() + for resource in resource_list.resources: + if resource.name not in resources: + resources[resource.name] = { 'nodes': {}, 'uuid': '' } + resource_nodes = resources[resource.name]['nodes'] + + resource_nodes[resource.node_name] = { + 'volumes': [], + 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags, + 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags + } + resource_volumes = resource_nodes[resource.node_name]['volumes'] + + for volume in resource.volumes: + # We ignore diskless pools of the form "DfltDisklessStorPool". + if volume.storage_pool_name != self._group_name: + continue + + usable_size = volume.usable_size + if usable_size < 0: + usable_size = -1 + else: + usable_size *= 1024 + + allocated_size = volume.allocated_size + if allocated_size < 0: + allocated_size = -1 + else: + allocated_size *= 1024 + + resource_volumes.append({ + 'storage-pool-name': volume.storage_pool_name, + 'linstor-uuid': volume.uuid, + 'number': volume.number, + 'device-path': volume.device_path, + 'usable-size': usable_size, + 'allocated-size': allocated_size + }) + + for resource_state in resource_list.resource_states: + resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name] + resource['in-use'] = resource_state.in_use + + volumes = resource['volumes'] + for volume_state in resource_state.volume_states: + volume = next((x for x in volumes if x['number'] == volume_state.number), None) + if volume: + volume['disk-state'] = volume_state.disk_state + + for volume_uuid, volume_name in volume_names.items(): + resource = resources.get(volume_name) + if resource: + resource['uuid'] = volume_uuid + + return resources + + def get_database_path(self): + """ + Get the database path. + :return: The current database path. + :rtype: str + """ + return self._request_database_path(self._linstor) + + @classmethod + def get_all_group_names(cls, base_name): + """ + Get all group names. I.e. list of current group + HA. + :param str base_name: The SR group_name to use. + :return: List of group names. + :rtype: list + """ + return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)] + + @classmethod + def create_sr( + cls, group_name, ips, redundancy, + thin_provisioning, auto_quorum, + logger=default_logger.__func__ + ): + """ + Create a new SR on the given nodes. + :param str group_name: The SR group_name to use. + :param set(str) ips: Node ips. + :param int redundancy: How many copy of volumes should we store? + :param bool thin_provisioning: Use thin or thick provisioning. + :param bool auto_quorum: DB quorum is monitored by LINSTOR. + :param function logger: Function to log messages. + :return: A new LinstorSr instance. + :rtype: LinstorSr + """ + + try: + cls._start_controller(start=True) + sr = cls._create_sr( + group_name, + ips, + redundancy, + thin_provisioning, + auto_quorum, + logger + ) + finally: + # Controller must be stopped and volume unmounted because + # it is the role of the drbd-reactor daemon to do the right + # actions. + cls._start_controller(start=False) + cls._mount_volume( + cls.build_device_path(DATABASE_VOLUME_NAME), + DATABASE_PATH, + mount=False + ) + return sr + + @classmethod + def _create_sr( + cls, group_name, ips, redundancy, + thin_provisioning, auto_quorum, + logger=default_logger.__func__ + ): + # 1. Check if SR already exists. + uri = 'linstor://localhost' + + lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) + + node_names = list(ips.keys()) + for node_name, ip in ips.items(): + while True: + # Try to create node. + result = lin.node_create( + node_name, + linstor.consts.VAL_NODE_TYPE_CMBD, + ip + ) + + errors = cls._filter_errors(result) + if cls._check_errors( + errors, [linstor.consts.FAIL_EXISTS_NODE] + ): + # If it already exists, remove, then recreate. + result = lin.node_delete(node_name) + error_str = cls._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Failed to remove old node `{}`: {}' + .format(node_name, error_str) + ) + elif not errors: + break # Created! + else: + raise LinstorVolumeManagerError( + 'Failed to create node `{}` with ip `{}`: {}'.format( + node_name, ip, cls._get_error_str(errors) + ) + ) + + driver_pool_name = group_name + base_group_name = group_name + group_name = cls._build_group_name(group_name) + storage_pool_name = group_name + pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools + if pools: + existing_node_names = [pool.node_name for pool in pools] + raise LinstorVolumeManagerError( + 'Unable to create SR `{}`. It already exists on node(s): {}' + .format(group_name, existing_node_names) + ) + + if lin.resource_group_list_raise( + cls.get_all_group_names(base_group_name) + ).resource_groups: + if not lin.resource_dfn_list_raise().resource_definitions: + backup_path = cls._create_database_backup_path() + logger( + 'Group name already exists `{}` without LVs. ' + 'Ignoring and moving the config files in {}'.format(group_name, backup_path) + ) + cls._move_files(DATABASE_PATH, backup_path) + else: + raise LinstorVolumeManagerError( + 'Unable to create SR `{}`: The group name already exists' + .format(group_name) + ) + + if thin_provisioning: + driver_pool_parts = driver_pool_name.split('/') + if not len(driver_pool_parts) == 2: + raise LinstorVolumeManagerError( + 'Invalid group name using thin provisioning. ' + 'Expected format: \'VG/LV`\'' + ) + + # 2. Create storage pool on each node + resource group. + reg_volume_group_not_found = re.compile( + ".*Volume group '.*' not found$" + ) + + i = 0 + try: + # 2.a. Create storage pools. + storage_pool_count = 0 + while i < len(node_names): + node_name = node_names[i] + + result = lin.storage_pool_create( + node_name=node_name, + storage_pool_name=storage_pool_name, + storage_driver='LVM_THIN' if thin_provisioning else 'LVM', + driver_pool_name=driver_pool_name + ) + + errors = linstor.Linstor.filter_api_call_response_errors( + result + ) + if errors: + if len(errors) == 1 and errors[0].is_error( + linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR + ) and reg_volume_group_not_found.match(errors[0].message): + logger( + 'Volume group `{}` not found on `{}`. Ignoring...' + .format(group_name, node_name) + ) + cls._destroy_storage_pool(lin, storage_pool_name, node_name) + else: + error_str = cls._get_error_str(result) + raise LinstorVolumeManagerError( + 'Could not create SP `{}` on node `{}`: {}' + .format(group_name, node_name, error_str) + ) + else: + storage_pool_count += 1 + i += 1 + + if not storage_pool_count: + raise LinstorVolumeManagerError( + 'Unable to create SR `{}`: No VG group found'.format( + group_name, + ) + ) + + # 2.b. Create resource groups. + ha_group_name = cls._build_ha_group_name(base_group_name) + cls._create_resource_group( + lin, + group_name, + storage_pool_name, + redundancy, + True + ) + cls._create_resource_group( + lin, + ha_group_name, + storage_pool_name, + 3, + True + ) + + # 3. Create the LINSTOR database volume and mount it. + try: + logger('Creating database volume...') + volume_path = cls._create_database_volume( + lin, ha_group_name, storage_pool_name, node_names, redundancy, auto_quorum + ) + except LinstorVolumeManagerError as e: + if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: + logger('Destroying database volume after creation fail...') + cls._force_destroy_database_volume(lin, group_name) + raise + + try: + logger('Mounting database volume...') + + # First we must disable the controller to move safely the + # LINSTOR config. + cls._start_controller(start=False) + + cls._mount_database_volume(volume_path) + except Exception as e: + # Ensure we are connected because controller has been + # restarted during mount call. + logger('Destroying database volume after mount fail...') + + try: + cls._start_controller(start=True) + except Exception: + pass + + lin = cls._create_linstor_instance( + uri, keep_uri_unmodified=True + ) + cls._force_destroy_database_volume(lin, group_name) + raise e + + cls._start_controller(start=True) + lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) + + # 4. Remove storage pools/resource/volume group in the case of errors. + except Exception as e: + logger('Destroying resource group and storage pools after fail...') + try: + cls._destroy_resource_group(lin, group_name) + cls._destroy_resource_group(lin, ha_group_name) + except Exception as e2: + logger('Failed to destroy resource group: {}'.format(e2)) + pass + j = 0 + i = min(i, len(node_names) - 1) + while j <= i: + try: + cls._destroy_storage_pool(lin, storage_pool_name, node_names[j]) + except Exception as e2: + logger('Failed to destroy resource group: {}'.format(e2)) + pass + j += 1 + raise e + + # 5. Return new instance. + instance = cls.__new__(cls) + instance._linstor = lin + instance._logger = logger + instance._redundancy = redundancy + instance._base_group_name = base_group_name + instance._group_name = group_name + instance._volumes = set() + instance._storage_pools_time = 0 + instance._kv_cache = instance._create_kv_cache() + instance._resource_cache = None + instance._resource_cache_dirty = True + instance._volume_info_cache = None + instance._volume_info_cache_dirty = True + return instance + + @classmethod + def build_device_path(cls, volume_name): + """ + Build a device path given a volume name. + :param str volume_name: The volume name to use. + :return: A valid or not device path. + :rtype: str + """ + + return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name) + + @classmethod + def build_volume_name(cls, base_name): + """ + Build a volume name given a base name (i.e. a UUID). + :param str base_name: The volume name to use. + :return: A valid or not device path. + :rtype: str + """ + return '{}{}'.format(cls.PREFIX_VOLUME, base_name) + + @classmethod + def round_up_volume_size(cls, volume_size): + """ + Align volume size on higher multiple of BLOCK_SIZE. + :param int volume_size: The volume size to align. + :return: An aligned volume size. + :rtype: int + """ + return round_up(volume_size, cls.BLOCK_SIZE) + + @classmethod + def round_down_volume_size(cls, volume_size): + """ + Align volume size on lower multiple of BLOCK_SIZE. + :param int volume_size: The volume size to align. + :return: An aligned volume size. + :rtype: int + """ + return round_down(volume_size, cls.BLOCK_SIZE) + + # -------------------------------------------------------------------------- + # Private helpers. + # -------------------------------------------------------------------------- + + def _create_kv_cache(self): + self._kv_cache = self._create_linstor_kv('/') + self._kv_cache_dirty = False + return self._kv_cache + + def _get_kv_cache(self): + if self._kv_cache_dirty: + self._kv_cache = self._create_kv_cache() + return self._kv_cache + + def _create_resource_cache(self): + self._resource_cache = self._linstor.resource_list_raise() + self._resource_cache_dirty = False + return self._resource_cache + + def _get_resource_cache(self): + if self._resource_cache_dirty: + self._resource_cache = self._create_resource_cache() + return self._resource_cache + + def _mark_resource_cache_as_dirty(self): + self._resource_cache_dirty = True + self._volume_info_cache_dirty = True + + def _repair_diskless_resource(self, resource, volume): + if linstor.consts.FLAG_DISKLESS not in resource.flags: + return + + self._linstor.resource_delete( + node_name=resource.node_name, + rsc_name=resource.name + ) + + self._linstor.resource_create( + rscs=[ + linstor.linstorapi.ResourceData( + #node_id=?, + #layer_list=?, + node_name=resource.node_name, + rsc_name=resource.name, + storage_pool=volume.storage_pool_name, + diskless=linstor.consts.FLAG_DISKLESS in resource.flags, + drbd_diskless=linstor.consts.FLAG_DRBD_DISKLESS in resource.flags, + nvme_initiator=linstor.consts.FLAG_NVME_INITIATOR in resource.flags, + ebs_initiator=linstor.consts.FLAG_EBS_INITIATOR in resource.flags + ) + ] + ) + + def _scan_for_broken_diskless_resources(self): + for resource in self._get_resource_cache().resources: + for volume in resource.volumes: + if ( + volume.storage_pool_name != self._group_name + or volume.allocated_size >= 0 + ): + return + try: self._repair_diskless_resource(resource, volume) + except Exception as e: + util.SMlog('Failed to repair diskless resource `{}` on `{}`: {}'.format( + resource.name, volume.storage_pool_name, e + )) + self._mark_resource_cache_as_dirty() + + + # -------------------------------------------------------------------------- + + def _ensure_volume_exists(self, volume_uuid): + if volume_uuid not in self._volumes: + raise LinstorVolumeManagerError( + 'volume `{}` doesn\'t exist'.format(volume_uuid), + LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS + ) + + def _find_best_size_candidates(self): + result = self._linstor.resource_group_qmvs(self._group_name) + error_str = self._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Failed to get max volume size allowed of SR `{}`: {}'.format( + self._group_name, + error_str + ) + ) + return result[0].candidates + + def _fetch_resource_names(self, ignore_deleted=True): + resource_names = set() + dfns = self._linstor.resource_dfn_list_raise().resource_definitions + for dfn in dfns: + if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and ( + ignore_deleted or + linstor.consts.FLAG_DELETE not in dfn.flags + ): + resource_names.add(dfn.name) + return resource_names + + def _get_volumes_info(self, volume_name=None): + all_volume_info = {} + + if not self._volume_info_cache_dirty: + return self._volume_info_cache + + self._scan_for_broken_diskless_resources() + + for resource in self._get_resource_cache().resources: + if resource.name not in all_volume_info: + current = all_volume_info[resource.name] = self.VolumeInfo( + resource.name + ) + else: + current = all_volume_info[resource.name] + + if linstor.consts.FLAG_DISKLESS not in resource.flags: + current.diskful.append(resource.node_name) + + for volume in resource.volumes: + # We ignore diskless pools of the form "DfltDisklessStorPool". + if volume.storage_pool_name == self._group_name: + if volume.allocated_size < 0: + raise LinstorVolumeManagerError( + 'Failed to get allocated size of `{}` on `{}`' + .format(resource.name, volume.storage_pool_name) + ) + allocated_size = volume.allocated_size + + current.allocated_size = current.allocated_size and \ + max(current.allocated_size, allocated_size) or \ + allocated_size + + usable_size = volume.usable_size + if usable_size > 0 and ( + usable_size < current.virtual_size or + not current.virtual_size + ): + current.virtual_size = usable_size + + if current.virtual_size <= 0: + raise LinstorVolumeManagerError( + 'Failed to get usable size of `{}` on `{}`' + .format(resource.name, volume.storage_pool_name) + ) + + for current in all_volume_info.values(): + current.allocated_size *= 1024 + current.virtual_size *= 1024 + + self._volume_info_cache_dirty = False + self._volume_info_cache = all_volume_info + + return all_volume_info + + def _get_volume_node_names_and_size(self, volume_name): + node_names = set() + size = -1 + for resource in self._linstor.resource_list_raise( + filter_by_resources=[volume_name] + ).resources: + for volume in resource.volumes: + # We ignore diskless pools of the form "DfltDisklessStorPool". + if volume.storage_pool_name == self._group_name: + node_names.add(resource.node_name) + + current_size = volume.usable_size + if current_size < 0: + raise LinstorVolumeManagerError( + 'Failed to get usable size of `{}` on `{}`' + .format(resource.name, volume.storage_pool_name) + ) + + if size < 0: + size = current_size + else: + size = min(size, current_size) + + return (node_names, size * 1024) + + def _compute_size(self, attr): + capacity = 0 + for pool in self._get_storage_pools(force=True): + space = pool.free_space + if space: + size = getattr(space, attr) + if size < 0: + raise LinstorVolumeManagerError( + 'Failed to get pool {} attr of `{}`' + .format(attr, pool.node_name) + ) + capacity += size + return capacity * 1024 + + def _get_node_names(self): + node_names = set() + for pool in self._get_storage_pools(): + node_names.add(pool.node_name) + return node_names + + def _get_storage_pools(self, force=False): + cur_time = time.time() + elsaped_time = cur_time - self._storage_pools_time + + if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL: + self._storage_pools = self._linstor.storage_pool_list_raise( + filter_by_stor_pools=[self._group_name] + ).storage_pools + self._storage_pools_time = time.time() + + return self._storage_pools + + def _create_volume( + self, + volume_uuid, + volume_name, + size, + place_resources, + high_availability + ): + size = self.round_up_volume_size(size) + self._mark_resource_cache_as_dirty() + + group_name = self._ha_group_name if high_availability else self._group_name + def create_definition(): + first_attempt = True + while True: + try: + self._check_volume_creation_errors( + self._linstor.resource_group_spawn( + rsc_grp_name=group_name, + rsc_dfn_name=volume_name, + vlm_sizes=['{}B'.format(size)], + definitions_only=True + ), + volume_uuid, + self._group_name + ) + break + except LinstorVolumeManagerError as e: + if ( + not first_attempt or + not high_availability or + e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS + ): + raise + + first_attempt = False + self._create_resource_group( + self._linstor, + group_name, + self._group_name, + 3, + True + ) + + self._configure_volume_peer_slots(self._linstor, volume_name) + + def clean(): + try: + self._destroy_volume(volume_uuid, force=True, preserve_properties=True) + except Exception as e: + self._logger( + 'Unable to destroy volume {} after creation fail: {}' + .format(volume_uuid, e) + ) + + def create(): + try: + create_definition() + if place_resources: + # Basic case when we use the default redundancy of the group. + self._check_volume_creation_errors( + self._linstor.resource_auto_place( + rsc_name=volume_name, + place_count=self._redundancy, + diskless_on_remaining=False + ), + volume_uuid, + self._group_name + ) + except LinstorVolumeManagerError as e: + if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: + clean() + raise + except Exception: + clean() + raise + + util.retry(create, maxretry=5) + + def _create_volume_with_properties( + self, + volume_uuid, + volume_name, + size, + place_resources, + high_availability + ): + if self.check_volume_exists(volume_uuid): + raise LinstorVolumeManagerError( + 'Could not create volume `{}` from SR `{}`, it already exists' + .format(volume_uuid, self._group_name) + ' in properties', + LinstorVolumeManagerError.ERR_VOLUME_EXISTS + ) + + if volume_name in self._fetch_resource_names(): + raise LinstorVolumeManagerError( + 'Could not create volume `{}` from SR `{}`, '.format( + volume_uuid, self._group_name + ) + 'resource of the same name already exists in LINSTOR' + ) + + # I am paranoid. + volume_properties = self._get_volume_properties(volume_uuid) + if (volume_properties.get(self.PROP_NOT_EXISTS) is not None): + raise LinstorVolumeManagerError( + 'Could not create volume `{}`, '.format(volume_uuid) + + 'properties already exist' + ) + + try: + volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING + volume_properties[self.PROP_VOLUME_NAME] = volume_name + + self._create_volume( + volume_uuid, + volume_name, + size, + place_resources, + high_availability + ) + + assert volume_properties.namespace == \ + self._build_volume_namespace(volume_uuid) + return volume_properties + except LinstorVolumeManagerError as e: + # Do not destroy existing resource! + # In theory we can't get this error because we check this event + # before the `self._create_volume` case. + # It can only happen if the same volume uuid is used in the same + # call in another host. + if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: + self._destroy_volume(volume_uuid, force=True) + raise + + def _find_device_path(self, volume_uuid, volume_name): + current_device_path = self._request_device_path( + volume_uuid, volume_name, activate=True + ) + + # We use realpath here to get the /dev/drbd path instead of + # /dev/drbd/by-res/. + expected_device_path = self.build_device_path(volume_name) + util.wait_for_path(expected_device_path, 5) + + device_realpath = os.path.realpath(expected_device_path) + if current_device_path != device_realpath: + raise LinstorVolumeManagerError( + 'Invalid path, current={}, expected={} (realpath={})' + .format( + current_device_path, + expected_device_path, + device_realpath + ) + ) + return expected_device_path + + def _request_device_path(self, volume_uuid, volume_name, activate=False): + node_name = socket.gethostname() + + resource = next(filter( + lambda resource: resource.node_name == node_name and + resource.name == volume_name, + self._get_resource_cache().resources + ), None) + + if not resource: + if activate: + self._mark_resource_cache_as_dirty() + self._activate_device_path( + self._linstor, node_name, volume_name + ) + return self._request_device_path(volume_uuid, volume_name) + raise LinstorVolumeManagerError( + 'Empty dev path for `{}`, but definition "seems" to exist' + .format(volume_uuid) + ) + # Contains a path of the /dev/drbd form. + return resource.volumes[0].device_path + + def _destroy_resource(self, resource_name, force=False): + result = self._linstor.resource_dfn_delete(resource_name) + error_str = self._get_error_str(result) + if not error_str: + self._mark_resource_cache_as_dirty() + return + + if not force: + self._mark_resource_cache_as_dirty() + raise LinstorVolumeManagerError( + 'Could not destroy resource `{}` from SR `{}`: {}' + .format(resource_name, self._group_name, error_str) + ) + + # If force is used, ensure there is no opener. + all_openers = get_all_volume_openers(resource_name, '0') + for openers in all_openers.values(): + if openers: + self._mark_resource_cache_as_dirty() + raise LinstorVolumeManagerError( + 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)' + .format(resource_name, self._group_name, error_str, all_openers) + ) + + # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue? + resource_states = filter( + lambda resource_state: resource_state.name == resource_name, + self._get_resource_cache().resource_states + ) + + # Mark only after computation of states. + self._mark_resource_cache_as_dirty() + + for resource_state in resource_states: + volume_state = resource_state.volume_states[0] + if resource_state.in_use: + demote_drbd_resource(resource_state.node_name, resource_name) + break + self._destroy_resource(resource_name) + + def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False): + volume_properties = self._get_volume_properties(volume_uuid) + try: + volume_name = volume_properties.get(self.PROP_VOLUME_NAME) + if volume_name in self._fetch_resource_names(): + self._destroy_resource(volume_name, force) + + # Assume this call is atomic. + if not preserve_properties: + volume_properties.clear() + except Exception as e: + raise LinstorVolumeManagerError( + 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e) + ) + + def _build_volumes(self, repair): + properties = self._kv_cache + resource_names = self._fetch_resource_names() + + self._volumes = set() + + updating_uuid_volumes = self._get_volumes_by_property( + self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False + ) + if updating_uuid_volumes and not repair: + raise LinstorVolumeManagerError( + 'Cannot build LINSTOR volume list: ' + 'It exists invalid "updating uuid volumes", repair is required' + ) + + existing_volumes = self._get_volumes_by_property( + self.REG_NOT_EXISTS, ignore_inexisting_volumes=False + ) + for volume_uuid, not_exists in existing_volumes.items(): + properties.namespace = self._build_volume_namespace(volume_uuid) + + src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC) + if src_uuid: + self._logger( + 'Ignoring volume during manager initialization with prop ' + ' PROP_UPDATING_UUID_SRC: {} (properties={})' + .format( + volume_uuid, + self._get_filtered_properties(properties) + ) + ) + continue + + # Insert volume in list if the volume exists. Or if the volume + # is being created and a slave wants to use it (repair = False). + # + # If we are on the master and if repair is True and state is + # Creating, it's probably a bug or crash: the creation process has + # been stopped. + if not_exists == self.STATE_EXISTS or ( + not repair and not_exists == self.STATE_CREATING + ): + self._volumes.add(volume_uuid) + continue + + if not repair: + self._logger( + 'Ignoring bad volume during manager initialization: {} ' + '(properties={})'.format( + volume_uuid, + self._get_filtered_properties(properties) + ) + ) + continue + + # Remove bad volume. + try: + self._logger( + 'Removing bad volume during manager initialization: {} ' + '(properties={})'.format( + volume_uuid, + self._get_filtered_properties(properties) + ) + ) + volume_name = properties.get(self.PROP_VOLUME_NAME) + + # Little optimization, don't call `self._destroy_volume`, + # we already have resource name list. + if volume_name in resource_names: + self._destroy_resource(volume_name, force=True) + + # Assume this call is atomic. + properties.clear() + except Exception as e: + # Do not raise, we don't want to block user action. + self._logger( + 'Cannot clean volume {}: {}'.format(volume_uuid, e) + ) + + # The volume can't be removed, maybe it's still in use, + # in this case rename it with the "DELETED_" prefix. + # This prefix is mandatory if it exists a snap transaction to + # rollback because the original VDI UUID can try to be renamed + # with the UUID we are trying to delete... + if not volume_uuid.startswith('DELETED_'): + self.update_volume_uuid( + volume_uuid, 'DELETED_' + volume_uuid, force=True + ) + + for dest_uuid, src_uuid in updating_uuid_volumes.items(): + dest_namespace = self._build_volume_namespace(dest_uuid) + + properties.namespace = dest_namespace + if int(properties.get(self.PROP_NOT_EXISTS)): + properties.clear() + continue + + properties.namespace = self._build_volume_namespace(src_uuid) + properties.clear() + + properties.namespace = dest_namespace + properties.pop(self.PROP_UPDATING_UUID_SRC) + + if src_uuid in self._volumes: + self._volumes.remove(src_uuid) + self._volumes.add(dest_uuid) + + def _get_sr_properties(self): + return self._create_linstor_kv(self._build_sr_namespace()) + + def _get_volumes_by_property( + self, reg_prop, ignore_inexisting_volumes=True + ): + base_properties = self._get_kv_cache() + base_properties.namespace = self._build_volume_namespace() + + volume_properties = {} + for volume_uuid in self._volumes: + volume_properties[volume_uuid] = '' + + for key, value in base_properties.items(): + res = reg_prop.match(key) + if res: + volume_uuid = res.groups()[0] + if not ignore_inexisting_volumes or \ + volume_uuid in self._volumes: + volume_properties[volume_uuid] = value + + return volume_properties + + def _create_linstor_kv(self, namespace): + return linstor.KV( + self._group_name, + uri=self._linstor.controller_host(), + namespace=namespace + ) + + def _get_volume_properties(self, volume_uuid): + properties = self._get_kv_cache() + properties.namespace = self._build_volume_namespace(volume_uuid) + return properties + + @classmethod + def _build_sr_namespace(cls): + return '/{}/'.format(cls.NAMESPACE_SR) + + @classmethod + def _build_volume_namespace(cls, volume_uuid=None): + # Return a path to all volumes if `volume_uuid` is not given. + if volume_uuid is None: + return '/{}/'.format(cls.NAMESPACE_VOLUME) + return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid) + + @classmethod + def _get_error_str(cls, result): + return ', '.join([ + err.message for err in cls._filter_errors(result) + ]) + + @classmethod + def _create_linstor_instance( + cls, uri, keep_uri_unmodified=False, attempt_count=30 + ): + retry = False + + def connect(uri): + if not uri: + uri = get_controller_uri() + if not uri: + raise LinstorVolumeManagerError( + 'Unable to find controller uri...' + ) + instance = linstor.Linstor(uri, keep_alive=True) + instance.connect() + return instance + + try: + return connect(uri) + except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError): + pass + + if not keep_uri_unmodified: + uri = None + + return util.retry( + lambda: connect(uri), + maxretry=attempt_count, + period=1, + exceptions=[ + linstor.errors.LinstorNetworkError, + LinstorVolumeManagerError + ] + ) + + @classmethod + def _configure_volume_peer_slots(cls, lin, volume_name): + result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3) + error_str = cls._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Could not configure volume peer slots of {}: {}' + .format(volume_name, error_str) + ) + + @classmethod + def _activate_device_path(cls, lin, node_name, volume_name): + result = lin.resource_make_available(node_name, volume_name, diskful=False) + if linstor.Linstor.all_api_responses_no_error(result): + return + errors = linstor.Linstor.filter_api_call_response_errors(result) + if len(errors) == 1 and errors[0].is_error( + linstor.consts.FAIL_EXISTS_RSC + ): + return + + raise LinstorVolumeManagerError( + 'Unable to activate device path of `{}` on node `{}`: {}' + .format(volume_name, node_name, ', '.join( + [str(x) for x in result])) + ) + + @classmethod + def _request_database_path(cls, lin, activate=False): + node_name = socket.gethostname() + + try: + resource = next(filter( + lambda resource: resource.node_name == node_name and + resource.name == DATABASE_VOLUME_NAME, + lin.resource_list_raise().resources + ), None) + except Exception as e: + raise LinstorVolumeManagerError( + 'Unable to fetch database resource: {}' + .format(e) + ) + + if not resource: + if activate: + cls._activate_device_path( + lin, node_name, DATABASE_VOLUME_NAME + ) + return cls._request_database_path( + DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME + ) + raise LinstorVolumeManagerError( + 'Empty dev path for `{}`, but definition "seems" to exist' + .format(DATABASE_PATH) + ) + # Contains a path of the /dev/drbd form. + return resource.volumes[0].device_path + + @classmethod + def _create_database_volume( + cls, lin, group_name, storage_pool_name, node_names, redundancy, auto_quorum + ): + try: + dfns = lin.resource_dfn_list_raise().resource_definitions + except Exception as e: + raise LinstorVolumeManagerError( + 'Unable to get definitions during database creation: {}' + .format(e) + ) + + if dfns: + raise LinstorVolumeManagerError( + 'Could not create volume `{}` from SR `{}`, '.format( + DATABASE_VOLUME_NAME, group_name + ) + 'LINSTOR volume list must be empty.' + ) + + # Workaround to use thin lvm. Without this line an error is returned: + # "Not enough available nodes" + # I don't understand why but this command protect against this bug. + try: + pools = lin.storage_pool_list_raise( + filter_by_stor_pools=[storage_pool_name] + ) + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to get storage pool list before database creation: {}' + .format(e) + ) + + # Ensure we have a correct list of storage pools. + assert pools.storage_pools # We must have at least one storage pool! + nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools)) + for node_name in nodes_with_pool: + assert node_name in node_names + util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool)) + + # Create the database definition. + size = cls.round_up_volume_size(DATABASE_SIZE) + cls._check_volume_creation_errors(lin.resource_group_spawn( + rsc_grp_name=group_name, + rsc_dfn_name=DATABASE_VOLUME_NAME, + vlm_sizes=['{}B'.format(size)], + definitions_only=True + ), DATABASE_VOLUME_NAME, group_name) + cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME) + + # Create real resources on the first nodes. + resources = [] + + diskful_nodes = [] + diskless_nodes = [] + for node_name in node_names: + if node_name in nodes_with_pool: + diskful_nodes.append(node_name) + else: + diskless_nodes.append(node_name) + + assert diskful_nodes + for node_name in diskful_nodes[:redundancy]: + util.SMlog('Create database diskful on {}'.format(node_name)) + resources.append(linstor.ResourceData( + node_name=node_name, + rsc_name=DATABASE_VOLUME_NAME, + storage_pool=storage_pool_name + )) + # Create diskless resources on the remaining set. + for node_name in diskful_nodes[redundancy:] + diskless_nodes: + util.SMlog('Create database diskless on {}'.format(node_name)) + resources.append(linstor.ResourceData( + node_name=node_name, + rsc_name=DATABASE_VOLUME_NAME, + diskless=True + )) + + result = lin.resource_create(resources) + error_str = cls._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Could not create database volume from SR `{}`: {}'.format( + group_name, error_str + ) + ) + + # We must modify the quorum. Otherwise we can't use correctly the + # drbd-reactor daemon. + if auto_quorum: + result = lin.resource_dfn_modify(DATABASE_VOLUME_NAME, { + 'DrbdOptions/auto-quorum': 'disabled', + 'DrbdOptions/Resource/quorum': 'majority' + }) + error_str = cls._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Could not activate quorum on database volume: {}' + .format(error_str) + ) + + # Create database and ensure path exists locally and + # on replicated devices. + current_device_path = cls._request_database_path(lin, activate=True) + + # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be + # plugged. + for node_name in node_names: + cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME) + + # We use realpath here to get the /dev/drbd path instead of + # /dev/drbd/by-res/. + expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME) + util.wait_for_path(expected_device_path, 5) + + device_realpath = os.path.realpath(expected_device_path) + if current_device_path != device_realpath: + raise LinstorVolumeManagerError( + 'Invalid path, current={}, expected={} (realpath={})' + .format( + current_device_path, + expected_device_path, + device_realpath + ) + ) + + try: + util.retry( + lambda: util.pread2([DATABASE_MKFS, expected_device_path]), + maxretry=5 + ) + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to execute {} on database volume: {}' + .format(DATABASE_MKFS, e) + ) + + return expected_device_path + + @classmethod + def _destroy_database_volume(cls, lin, group_name): + error_str = cls._get_error_str( + lin.resource_dfn_delete(DATABASE_VOLUME_NAME) + ) + if error_str: + raise LinstorVolumeManagerError( + 'Could not destroy resource `{}` from SR `{}`: {}' + .format(DATABASE_VOLUME_NAME, group_name, error_str) + ) + + @classmethod + def _mount_database_volume(cls, volume_path, mount=True, force=False): + try: + # 1. Create a backup config folder. + database_not_empty = bool(os.listdir(DATABASE_PATH)) + backup_path = cls._create_database_backup_path() + + # 2. Move the config in the mounted volume. + if database_not_empty: + cls._move_files(DATABASE_PATH, backup_path) + + cls._mount_volume(volume_path, DATABASE_PATH, mount) + + if database_not_empty: + cls._move_files(backup_path, DATABASE_PATH, force) + + # 3. Remove useless backup directory. + try: + os.rmdir(backup_path) + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to remove backup path {} of LINSTOR config: {}' + .format(backup_path, e) + ) + except Exception as e: + def force_exec(fn): + try: + fn() + except Exception: + pass + + if mount == cls._is_mounted(DATABASE_PATH): + force_exec(lambda: cls._move_files( + DATABASE_PATH, backup_path + )) + force_exec(lambda: cls._mount_volume( + volume_path, DATABASE_PATH, not mount + )) + + if mount != cls._is_mounted(DATABASE_PATH): + force_exec(lambda: cls._move_files( + backup_path, DATABASE_PATH + )) + + force_exec(lambda: os.rmdir(backup_path)) + raise e + + @classmethod + def _force_destroy_database_volume(cls, lin, group_name): + try: + cls._destroy_database_volume(lin, group_name) + except Exception: + pass + + @classmethod + def _destroy_storage_pool(cls, lin, group_name, node_name): + def destroy(): + result = lin.storage_pool_delete(node_name, group_name) + errors = cls._filter_errors(result) + if cls._check_errors(errors, [ + linstor.consts.FAIL_NOT_FOUND_STOR_POOL, + linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN + ]): + return + + if errors: + raise LinstorVolumeManagerError( + 'Failed to destroy SP `{}` on node `{}`: {}'.format( + group_name, + node_name, + cls._get_error_str(errors) + ) + ) + + # We must retry to avoid errors like: + # "can not be deleted as volumes / snapshot-volumes are still using it" + # after LINSTOR database volume destruction. + return util.retry(destroy, maxretry=10) + + @classmethod + def _create_resource_group( + cls, + lin, + group_name, + storage_pool_name, + redundancy, + destroy_old_group + ): + rg_creation_attempt = 0 + while True: + result = lin.resource_group_create( + name=group_name, + place_count=redundancy, + storage_pool=storage_pool_name, + diskless_on_remaining=False + ) + error_str = cls._get_error_str(result) + if not error_str: + break + + errors = cls._filter_errors(result) + if destroy_old_group and cls._check_errors(errors, [ + linstor.consts.FAIL_EXISTS_RSC_GRP + ]): + rg_creation_attempt += 1 + if rg_creation_attempt < 2: + try: + cls._destroy_resource_group(lin, group_name) + except Exception as e: + error_str = 'Failed to destroy old and empty RG: {}'.format(e) + else: + continue + + raise LinstorVolumeManagerError( + 'Could not create RG `{}`: {}'.format( + group_name, error_str + ) + ) + + result = lin.volume_group_create(group_name) + error_str = cls._get_error_str(result) + if error_str: + raise LinstorVolumeManagerError( + 'Could not create VG `{}`: {}'.format( + group_name, error_str + ) + ) + + @classmethod + def _destroy_resource_group(cls, lin, group_name): + def destroy(): + result = lin.resource_group_delete(group_name) + errors = cls._filter_errors(result) + if cls._check_errors(errors, [ + linstor.consts.FAIL_NOT_FOUND_RSC_GRP + ]): + return + + if errors: + raise LinstorVolumeManagerError( + 'Failed to destroy RG `{}`: {}' + .format(group_name, cls._get_error_str(errors)) + ) + + return util.retry(destroy, maxretry=10) + + @classmethod + def _build_group_name(cls, base_name): + # If thin provisioning is used we have a path like this: + # `VG/LV`. "/" is not accepted by LINSTOR. + return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_')) + + # Used to store important data in a HA context, + # i.e. a replication count of 3. + @classmethod + def _build_ha_group_name(cls, base_name): + return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_')) + + @classmethod + def _check_volume_creation_errors(cls, result, volume_uuid, group_name): + errors = cls._filter_errors(result) + if cls._check_errors(errors, [ + linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN + ]): + raise LinstorVolumeManagerError( + 'Failed to create volume `{}` from SR `{}`, it already exists' + .format(volume_uuid, group_name), + LinstorVolumeManagerError.ERR_VOLUME_EXISTS + ) + + if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]): + raise LinstorVolumeManagerError( + 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist' + .format(volume_uuid, group_name), + LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS + ) + + if errors: + raise LinstorVolumeManagerError( + 'Failed to create volume `{}` from SR `{}`: {}'.format( + volume_uuid, + group_name, + cls._get_error_str(errors) + ) + ) + + @classmethod + def _move_files(cls, src_dir, dest_dir, force=False): + def listdir(dir): + ignored = ['lost+found'] + return [file for file in os.listdir(dir) if file not in ignored] + + try: + if not force: + files = listdir(dest_dir) + if files: + raise LinstorVolumeManagerError( + 'Cannot move files from {} to {} because destination ' + 'contains: {}'.format(src_dir, dest_dir, files) + ) + except LinstorVolumeManagerError: + raise + except Exception as e: + raise LinstorVolumeManagerError( + 'Cannot list dir {}: {}'.format(dest_dir, e) + ) + + try: + for file in listdir(src_dir): + try: + dest_file = os.path.join(dest_dir, file) + if not force and os.path.exists(dest_file): + raise LinstorVolumeManagerError( + 'Cannot move {} because it already exists in the ' + 'destination'.format(file) + ) + shutil.move(os.path.join(src_dir, file), dest_file) + except LinstorVolumeManagerError: + raise + except Exception as e: + raise LinstorVolumeManagerError( + 'Cannot move {}: {}'.format(file, e) + ) + except Exception as e: + if not force: + try: + cls._move_files(dest_dir, src_dir, force=True) + except Exception: + pass + + raise LinstorVolumeManagerError( + 'Failed to move files from {} to {}: {}'.format( + src_dir, dest_dir, e + ) + ) + + @staticmethod + def _create_database_backup_path(): + path = DATABASE_PATH + '-' + str(uuid.uuid4()) + try: + os.mkdir(path) + return path + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to create backup path {} of LINSTOR config: {}' + .format(path, e) + ) + + @staticmethod + def _get_filtered_properties(properties): + return dict(properties.items()) + + @staticmethod + def _filter_errors(result): + return [ + err for err in result + if hasattr(err, 'is_error') and err.is_error() + ] + + @staticmethod + def _check_errors(result, codes): + for err in result: + for code in codes: + if err.is_error(code): + return True + return False + + @classmethod + def _controller_is_running(cls): + return cls._service_is_running('linstor-controller') + + @classmethod + def _start_controller(cls, start=True): + return cls._start_service('linstor-controller', start) + + @staticmethod + def _start_service(name, start=True): + action = 'start' if start else 'stop' + (ret, out, err) = util.doexec([ + 'systemctl', action, name + ]) + if ret != 0: + raise LinstorVolumeManagerError( + 'Failed to {} {}: {} {}' + .format(action, name, out, err) + ) + + @staticmethod + def _service_is_running(name): + (ret, out, err) = util.doexec([ + 'systemctl', 'is-active', '--quiet', name + ]) + return not ret + + @staticmethod + def _is_mounted(mountpoint): + (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint]) + return ret == 0 + + @classmethod + def _mount_volume(cls, volume_path, mountpoint, mount=True): + if mount: + try: + util.pread(['mount', volume_path, mountpoint]) + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to mount volume {} on {}: {}' + .format(volume_path, mountpoint, e) + ) + else: + try: + if cls._is_mounted(mountpoint): + util.pread(['umount', mountpoint]) + except Exception as e: + raise LinstorVolumeManagerError( + 'Failed to umount volume {} on {}: {}' + .format(volume_path, mountpoint, e) + ) + + +# ============================================================================== + +# Check if a path is a DRBD resource and log the process name/pid +# that opened it. +def log_drbd_openers(path): + # Ignore if it's not a symlink to DRBD resource. + if not path.startswith(DRBD_BY_RES_PATH): + return + + # Compute resource name. + res_name_end = path.find('/', len(DRBD_BY_RES_PATH)) + if res_name_end == -1: + return + res_name = path[len(DRBD_BY_RES_PATH):res_name_end] + + volume_end = path.rfind('/') + if volume_end == res_name_end: + return + volume = path[volume_end + 1:] + + try: + # Ensure path is a DRBD. + drbd_path = os.path.realpath(path) + stats = os.stat(drbd_path) + if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147: + return + + # Find where the device is open. + (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name]) + if ret != 0: + util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format( + res_name, stderr + )) + return + + # Is it a local device? + if stdout.startswith('{} role:Primary'.format(res_name)): + util.SMlog( + 'DRBD resource `{}` is open on local host: {}' + .format(path, get_local_volume_openers(res_name, volume)) + ) + return + + # Is it a remote device? + util.SMlog( + 'DRBD resource `{}` is open on hosts: {}' + .format(path, get_all_volume_openers(res_name, volume)) + ) + except Exception as e: + util.SMlog( + 'Got exception while trying to determine where DRBD resource ' + + '`{}` is open: {}'.format(path, e) + ) diff --git a/drivers/lock.py b/drivers/lock.py index ceb48fe1b..6792d7b04 100755 --- a/drivers/lock.py +++ b/drivers/lock.py @@ -16,6 +16,8 @@ """Serialization for concurrent operations""" +from sm_typing import Dict + import os import errno import flock @@ -37,8 +39,8 @@ class Lock(object): BASE_DIR = "/var/lock/sm" - INSTANCES = {} - BASE_INSTANCES = {} + INSTANCES: Dict[str, 'LockImplementation'] = {} + BASE_INSTANCES: Dict[str, 'LockImplementation'] = {} def __new__(cls, name, ns=None, *args, **kwargs): if ns: @@ -64,6 +66,7 @@ def release(self): def held(self): raise NotImplementedError("Lock methods implemented in LockImplementation") + @staticmethod def _mknamespace(ns): if ns is None: @@ -72,7 +75,6 @@ def _mknamespace(ns): assert not ns.startswith(".") assert ns.find(os.path.sep) < 0 return ns - _mknamespace = staticmethod(_mknamespace) @staticmethod def clearAll(): @@ -82,6 +84,7 @@ def clearAll(): Lock.INSTANCES = {} Lock.BASE_INSTANCES = {} + @staticmethod def cleanup(name, ns=None): if ns: if ns in Lock.INSTANCES: @@ -97,8 +100,7 @@ def cleanup(name, ns=None): if os.path.exists(path): Lock._unlink(path) - cleanup = staticmethod(cleanup) - + @staticmethod def cleanupAll(ns=None): ns = Lock._mknamespace(ns) nspath = os.path.join(Lock.BASE_DIR, ns) @@ -112,11 +114,11 @@ def cleanupAll(ns=None): Lock._rmdir(nspath) - cleanupAll = staticmethod(cleanupAll) # # Lock and attribute file management # + @staticmethod def _mkdirs(path): """Concurrent makedirs() catching EEXIST.""" if os.path.exists(path): @@ -126,8 +128,8 @@ def _mkdirs(path): except OSError as e: if e.errno != errno.EEXIST: raise LockException("Failed to makedirs(%s)" % path) - _mkdirs = staticmethod(_mkdirs) + @staticmethod def _unlink(path): """Non-raising unlink().""" util.SMlog("lock: unlinking lock file %s" % path) @@ -135,8 +137,8 @@ def _unlink(path): os.unlink(path) except Exception as e: util.SMlog("Failed to unlink(%s): %s" % (path, e)) - _unlink = staticmethod(_unlink) + @staticmethod def _rmdir(path): """Non-raising rmdir().""" util.SMlog("lock: removing lock dir %s" % path) @@ -144,7 +146,6 @@ def _rmdir(path): os.rmdir(path) except Exception as e: util.SMlog("Failed to rmdir(%s): %s" % (path, e)) - _rmdir = staticmethod(_rmdir) class LockImplementation(object): @@ -193,7 +194,7 @@ def _open(self): fd = self.lockfile.fileno() self.lock = flock.WriteLock(fd) - def _open_lockfile(self): + def _open_lockfile(self) -> None: """Provide a seam, so extreme situations could be tested""" util.SMlog("lock: opening lock file %s" % self.lockpath) self.lockfile = open(self.lockpath, "w+") diff --git a/drivers/lvmcache.py b/drivers/lvmcache.py index 8c63d45a3..6e21568ea 100644 --- a/drivers/lvmcache.py +++ b/drivers/lvmcache.py @@ -59,10 +59,11 @@ class LVMCache: """Per-VG object to store LV information. Can be queried for cached LVM information and refreshed""" - def __init__(self, vgName): + def __init__(self, vgName, config=None): """Create a cache for VG vgName, but don't scan the VG yet""" self.vgName = vgName self.vgPath = "/dev/%s" % self.vgName + self.config = config self.lvs = dict() self.tags = dict() self.initialized = False @@ -115,7 +116,7 @@ def create(self, lvName, size, tag=None): @lazyInit def remove(self, lvName): path = self._getPath(lvName) - lvutil.remove(path) + lvutil.remove(path, self.config) for tag in self.lvs[lvName].tags: self._removeTag(lvName, tag) del self.lvs[lvName] diff --git a/drivers/lvutil.py b/drivers/lvutil.py index 48767ead7..dfd9d5c55 100755 --- a/drivers/lvutil.py +++ b/drivers/lvutil.py @@ -478,24 +478,12 @@ def createVG(root, vgname): f = _openExclusive(dev, True) os.close(f) + + # Wipe any fs signature try: - # Overwrite the disk header, try direct IO first - cmd = [util.CMD_DD, "if=/dev/zero", "of=%s" % dev, "bs=1M", - "count=10", "oflag=direct"] - util.pread2(cmd) + util.wipefs(dev) except util.CommandException as inst: - if inst.code == errno.EPERM: - try: - # Overwrite the disk header, try normal IO - cmd = [util.CMD_DD, "if=/dev/zero", "of=%s" % dev, - "bs=1M", "count=10"] - util.pread2(cmd) - except util.CommandException as inst: - raise xs_errors.XenError('LVMWrite', \ - opterr='device %s' % dev) - else: - raise xs_errors.XenError('LVMWrite', \ - opterr='device %s' % dev) + raise xs_errors.XenError('WipefsFailure', opterr='device %s' % dev) # from inst if not (dev == rootdev): try: @@ -581,12 +569,16 @@ def resizePV(dev): util.SMlog("Failed to grow the PV, non-fatal") -def setActiveVG(path, active): +def setActiveVG(path, active, config=None): "activate or deactivate VG 'path'" val = "n" if active: val = "y" - text = cmd_lvm([CMD_VGCHANGE, "-a" + val, path]) + cmd = [CMD_VGCHANGE, "-a" + val, path] + if config: + cmd.append("--config") + cmd.append(config) + cmd_lvm(cmd) @lvmretry diff --git a/drivers/mpath_cli.py b/drivers/mpath_cli.py index 4c7ce54a1..357e84a0a 100755 --- a/drivers/mpath_cli.py +++ b/drivers/mpath_cli.py @@ -15,6 +15,8 @@ # # Talk to the multipathd cli +from sm_typing import override + import util import re import time @@ -25,7 +27,8 @@ class MPathCLIFail(Exception): def __init__(self): return - def __str__(self): + @override + def __str__(self) -> str: return "MPath CLI failed" mpathcmd = ["/usr/sbin/multipathd", "-k"] diff --git a/drivers/mpathcount.py b/drivers/mpathcount.py index 3137e9099..93d566693 100755 --- a/drivers/mpathcount.py +++ b/drivers/mpathcount.py @@ -15,6 +15,8 @@ # along with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +from sm_typing import Dict + import util import os import sys @@ -22,6 +24,7 @@ import xs_errors import mpath_cli import json +import subprocess supported = ['iscsi', 'lvmoiscsi', 'rawhba', 'lvmohba', 'ocfsohba', 'ocfsoiscsi', 'netapp', 'lvmofcoe', 'gfs2'] @@ -35,6 +38,7 @@ match_bySCSIid = False mpath_enabled = True SCSIid = 'NOTSUPPLIED' +XAPI_HEALTH_CHECK = '/opt/xensource/libexec/xapi-health-check' cached_DM_maj = None @@ -85,8 +89,8 @@ def match_pathup(s): match = re.match(r'.*\d+:\d+:\d+:\d+\s+\S+\s+\S+\s+\S+\s+(\S+)', s) if match: path_status = match.group(1) - if path_status in ['faulty', 'shaky', 'failed']: - return False + if path_status in ['faulty', 'shaky', 'failed']: + return False return True @@ -199,13 +203,31 @@ def check_devconfig(devconfig, sm_config, config, remove, add, mpath_status=None else: update_config(key, i, config[key], remove, add, mpath_status) - -def check_xapi_is_enabled(session, hostref): - host = session.xenapi.host.get_record(hostref) - if not host['enabled']: - util.SMlog("Xapi is not enabled, exiting") - mpc_exit(session, 0) - +def check_xapi_is_enabled(): + """Check XAPI health status""" + def _run_command(command, timeout): + try: + process = subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True + ) + try: + stdout, stderr = process.communicate(timeout=timeout) + return process.returncode, stdout, stderr + except subprocess.TimeoutExpired: + process.kill() + util.SMlog(f"Command execution timeout after {timeout}s: {' '.join(command)}") + return -1, "", "Timeout" + except Exception as e: + util.SMlog(f"Error executing command: {e}") + return -1, "", str(e) + + returncode, _, stderr = _run_command([XAPI_HEALTH_CHECK], timeout=120) + if returncode != 0: + util.SMlog(f"XAPI health check failed: {stderr}") + return returncode == 0 if __name__ == '__main__': try: @@ -215,7 +237,7 @@ def check_xapi_is_enabled(session, hostref): sys.exit(-1) localhost = session.xenapi.host.get_by_uuid(get_localhost_uuid()) - check_xapi_is_enabled(session, localhost) + check_xapi_is_enabled() # Check whether multipathing is enabled (either for root dev or SRs) try: if get_root_dev_major() != get_dm_major(): @@ -247,7 +269,7 @@ def _add(key, val): mpc_exit(session, -1) try: - mpath_status = {} + mpath_status: Dict[str, str] = {} for pbd in pbds: def remove(key): session.xenapi.PBD.remove_from_other_config(pbd, key) @@ -267,7 +289,7 @@ def add(key, val): util.atomicFileWrite(MPATH_FILE_NAME, MPATHS_DIR, json.dumps(mpath_status)) os.chmod(MPATH_FILE_NAME, 0o0644) except: - util.SMlog("MPATH: Failure updating db. %s" % sys.exc_info()) + util.SMlog("MPATH: Failure updating db. %s" % str(sys.exc_info())) mpc_exit(session, -1) util.SMlog("MPATH: Update done") diff --git a/drivers/nfs-on-slave b/drivers/nfs-on-slave index 3e2ee8d32..718377247 100644 --- a/drivers/nfs-on-slave +++ b/drivers/nfs-on-slave @@ -18,6 +18,9 @@ # A plugin for synchronizing slaves when something changes on the Master import sys; sys.path.append("/opt/xensource/sm/") + +from sm_typing import override + import util import os, glob, errno @@ -31,7 +34,8 @@ class NfsCheckException(Exception): except: self.exe = None - def __str__(self): + @override + def __str__(self) -> str: return "File %s in use by pid %d (%s), fd %d" % \ (self.path, self.pid, self.exe, self.fd) diff --git a/drivers/on_slave.py b/drivers/on_slave.py index b4f33de20..2f58281ae 100755 --- a/drivers/on_slave.py +++ b/drivers/on_slave.py @@ -76,9 +76,16 @@ def _is_open(session, args): """Check if VDI is open by a tapdisk on this host""" import SRCommand import SR - import NFSSR + import CephFSSR import EXTSR + import LargeBlockSR + import GlusterFSSR + import LinstorSR import LVHDSR + import MooseFSSR + import NFSSR + import XFSSR + import ZFSSR import blktap2 util.SMlog("on-slave.is_open: %s" % args) @@ -93,11 +100,39 @@ def _is_open(session, args): srType = "lvhd" cmd = SRCommand.SRCommand(None) cmd.driver_info = {"capabilities": None} - cmd.dconf = {"server": None, "device": "/HACK"} + cmd.dconf = { + "server": None, + "device": "/HACK", + # Hack for custom XCP-ng drivers. + "masterhost": None, # MooseFS + "rootpath": None, # MooseFS + "serverpath": None, # CephFS + "location": "/HACK" # ZFS + } cmd.params = {"command": None} + sr_uuid = srRec["uuid"] + + # Another ugly piece of code to load a real Linstor SR, otherwise + # we can't fetch the VDI path. + if srType == 'linstor': + host_ref = util.get_this_host_ref(session) + sr_ref = session.xenapi.SR.get_by_uuid(sr_uuid) + + pbd = util.find_my_pbd(session, host_ref, sr_ref) + if pbd is None: + raise util.SMException('Failed to find Linstor PBD') + + cmd.dconf = session.xenapi.PBD.get_device_config(pbd) + driver = SR.driver(srType) - sr = driver(cmd, srRec["uuid"]) + sr = driver(cmd, sr_uuid) + + # session_ref param is required to have a valid session when SR object is created. + # It's not the case here, so attach the current session object to make LinstorSR happy. + if srType == 'linstor': + sr.session = session + vdi = sr.vdi(vdiUuid) tapdisk = blktap2.Tapdisk.find_by_path(vdi.path) util.SMlog("Tapdisk for %s: %s" % (vdi.path, tapdisk)) diff --git a/drivers/refcounter.py b/drivers/refcounter.py index 5418c858a..20585da82 100644 --- a/drivers/refcounter.py +++ b/drivers/refcounter.py @@ -39,6 +39,7 @@ class RefCounter: BASE_DIR = "/var/run/sm/refcount" + @staticmethod def get(obj, binary, ns=None): """Get (inc ref count) 'obj' in namespace 'ns' (optional). Returns new ref count""" @@ -46,8 +47,8 @@ def get(obj, binary, ns=None): return RefCounter._adjust(ns, obj, 0, 1) else: return RefCounter._adjust(ns, obj, 1, 0) - get = staticmethod(get) + @staticmethod def put(obj, binary, ns=None): """Put (dec ref count) 'obj' in namespace 'ns' (optional). If ref count was zero already, this operation is a no-op. @@ -56,8 +57,8 @@ def put(obj, binary, ns=None): return RefCounter._adjust(ns, obj, 0, -1) else: return RefCounter._adjust(ns, obj, -1, 0) - put = staticmethod(put) + @staticmethod def set(obj, count, binaryCount, ns=None): """Set normal & binary counts explicitly to the specified values. Returns new ref count""" @@ -66,14 +67,14 @@ def set(obj, count, binaryCount, ns=None): if binaryCount > 1: raise RefCounterException("Binary count = %d > 1" % binaryCount) RefCounter._set(ns, obj, count, binaryCount) - set = staticmethod(set) + @staticmethod def check(obj, ns=None): """Get the ref count values for 'obj' in namespace 'ns' (optional)""" (obj, ns) = RefCounter._getSafeNames(obj, ns) return RefCounter._get(ns, obj) - check = staticmethod(check) + @staticmethod def checkLocked(obj, ns): """Lock-protected access""" lock = Lock(obj, ns) @@ -82,13 +83,13 @@ def checkLocked(obj, ns): return RefCounter.check(obj, ns) finally: lock.release() - checkLocked = staticmethod(checkLocked) + @staticmethod def reset(obj, ns=None): """Reset ref counts for 'obj' in namespace 'ns' (optional) to 0.""" RefCounter.resetAll(ns, obj) - reset = staticmethod(reset) + @staticmethod def resetAll(ns=None, obj=None): """Reset ref counts of 'obj' in namespace 'ns' to 0. If obj is not provided, reset all existing objects in 'ns' to 0. If neither obj nor @@ -106,8 +107,8 @@ def resetAll(ns=None, obj=None): raise RefCounterException("failed to get namespace list") for ns in nsList: RefCounter._reset(ns, obj) - resetAll = staticmethod(resetAll) + @staticmethod def _adjust(ns, obj, delta, binaryDelta): """Add 'delta' to the normal refcount and 'binaryDelta' to the binary refcount of 'obj' in namespace 'ns'. @@ -133,8 +134,8 @@ def _adjust(ns, obj, delta, binaryDelta): newCount, newBinaryCount)) RefCounter._set(ns, obj, newCount, newBinaryCount) return newCount + newBinaryCount - _adjust = staticmethod(_adjust) + @staticmethod def _get(ns, obj): """Get the ref count values for 'obj' in namespace 'ns'""" objFile = os.path.join(RefCounter.BASE_DIR, ns, obj) @@ -142,8 +143,8 @@ def _get(ns, obj): if util.pathexists(objFile): (count, binaryCount) = RefCounter._readCount(objFile) return (count, binaryCount) - _get = staticmethod(_get) + @staticmethod def _set(ns, obj, count, binaryCount): """Set the ref count values for 'obj' in namespace 'ns'""" util.SMlog("Refcount for %s:%s set => (%d, %db)" % \ @@ -156,8 +157,7 @@ def _set(ns, obj, count, binaryCount): while not RefCounter._writeCount(objFile, count, binaryCount): RefCounter._createNamespace(ns) - _set = staticmethod(_set) - + @staticmethod def _getSafeNames(obj, ns): """Get a name that can be used as a file name""" if not ns: @@ -167,8 +167,8 @@ def _getSafeNames(obj, ns): for char in ['/', '*', '?', '\\']: obj = obj.replace(char, "_") return (obj, ns) - _getSafeNames = staticmethod(_getSafeNames) + @staticmethod def _createNamespace(ns): nsDir = os.path.join(RefCounter.BASE_DIR, ns) try: @@ -177,8 +177,8 @@ def _createNamespace(ns): if e.errno != errno.EEXIST: raise RefCounterException("failed to makedirs '%s' (%s)" % \ (nsDir, e)) - _createNamespace = staticmethod(_createNamespace) + @staticmethod def _removeObject(ns, obj): nsDir = os.path.join(RefCounter.BASE_DIR, ns) objFile = os.path.join(nsDir, obj) @@ -199,8 +199,8 @@ def _removeObject(ns, obj): pass else: raise RefCounterException("failed to remove '%s'" % nsDir) - _removeObject = staticmethod(_removeObject) + @staticmethod def _reset(ns, obj=None): nsDir = os.path.join(RefCounter.BASE_DIR, ns) if not util.pathexists(nsDir): @@ -216,8 +216,8 @@ def _reset(ns, obj=None): raise RefCounterException("failed to list '%s'" % ns) for obj in objList: RefCounter._removeObject(ns, obj) - _reset = staticmethod(_reset) + @staticmethod def _readCount(fn): try: f = open(fn, 'r') @@ -229,8 +229,8 @@ def _readCount(fn): except IOError: raise RefCounterException("failed to read file '%s'" % fn) return (count, binaryCount) - _readCount = staticmethod(_readCount) + @staticmethod def _writeCount(fn, count, binaryCount): try: f = open(fn, 'w') @@ -243,8 +243,8 @@ def _writeCount(fn, count, binaryCount): return False raise RefCounterException("failed to write '(%d %d)' to '%s': %s" \ % (count, binaryCount, fn, e)) - _writeCount = staticmethod(_writeCount) + @staticmethod def _runTests(): "Unit tests" @@ -535,7 +535,6 @@ def _runTests(): RefCounter.resetAll() return 0 - _runTests = staticmethod(_runTests) if __name__ == '__main__': diff --git a/drivers/resetvdis.py b/drivers/resetvdis.py index 81938d4c8..61454f849 100755 --- a/drivers/resetvdis.py +++ b/drivers/resetvdis.py @@ -21,6 +21,7 @@ import cleanup import util import lock +import sys import XenAPI # pylint: disable=import-error @@ -141,7 +142,6 @@ def usage(): sys.exit(1) if __name__ == '__main__': - import sys import atexit if len(sys.argv) not in [3, 4, 5]: diff --git a/drivers/srmetadata.py b/drivers/srmetadata.py index f86711e28..34aa5f16b 100755 --- a/drivers/srmetadata.py +++ b/drivers/srmetadata.py @@ -15,6 +15,11 @@ # # Functions to read and write SR metadata # + +from sm_typing import ClassVar, override + +from abc import abstractmethod + from io import SEEK_SET import util @@ -254,7 +259,7 @@ def getMetadataLength(fd): # ----------------- # General helper functions - end # ----------------- class MetadataHandler: - VDI_INFO_SIZE_IN_SECTORS = None + VDI_INFO_SIZE_IN_SECTORS: ClassVar[int] # constructor def __init__(self, path=None, write=True): @@ -272,8 +277,9 @@ def __del__(self): def vdi_info_size(self): return self.VDI_INFO_SIZE_IN_SECTORS * SECTOR_SIZE - def spaceAvailableForVdis(self, count): - raise NotImplementedError("spaceAvailableForVdis is undefined") + @abstractmethod + def spaceAvailableForVdis(self, count) -> None: + pass # common utility functions def getMetadata(self, params={}): @@ -657,10 +663,10 @@ def getMetadataToWrite(self, sr_info, vdi_info, lower, upper, update_map, \ raise # specific functions, to be implement by the child classes - def getVdiInfo(self, Dict, generateSector=0): + def getVdiInfo(self, Dict, generateSector=0) -> bytes: return b"" - def getSRInfoForSectors(self, sr_info, range): + def getSRInfoForSectors(self, sr_info, range) -> bytes: return b"" @@ -673,7 +679,8 @@ def __init__(self, path=None, write=True): lvutil.ensurePathExists(path) MetadataHandler.__init__(self, path, write) - def spaceAvailableForVdis(self, count): + @override + def spaceAvailableForVdis(self, count) -> None: created = False try: # The easiest way to do this, is to create a dummy vdi and write it @@ -704,7 +711,8 @@ def spaceAvailableForVdis(self, count): # it also takes in a parameter to determine whether both the sector # or only one sector needs to be generated, and which one # generateSector - can be 1 or 2, defaults to 0 and generates both sectors - def getVdiInfo(self, Dict, generateSector=0): + @override + def getVdiInfo(self, Dict, generateSector=0) -> bytes: util.SMlog("Entering VDI info") try: vdi_info = b"" @@ -760,7 +768,8 @@ def getVdiInfo(self, Dict, generateSector=0): (Dict, str(e))) raise - def getSRInfoForSectors(self, sr_info, range): + @override + def getSRInfoForSectors(self, sr_info, range) -> bytes: srinfo = b"" try: diff --git a/drivers/tapdisk-pause b/drivers/tapdisk-pause index 6b7fc09f9..f98257a23 100755 --- a/drivers/tapdisk-pause +++ b/drivers/tapdisk-pause @@ -29,6 +29,13 @@ import lvhdutil import vhdutil import lvmcache +try: + from linstorvhdutil import LinstorVhdUtil + from linstorvolumemanager import get_controller_uri, LinstorVolumeManager + LINSTOR_AVAILABLE = True +except ImportError: + LINSTOR_AVAILABLE = False + TAPDEV_BACKPATH_PFX = "/dev/sm/backend" TAPDEV_PHYPATH_PFX = "/dev/sm/phy" @@ -130,7 +137,48 @@ class Tapdisk: self.realpath = p if ty == "LV": self.vdi_type = "aio" else: self.vdi_type = "vhd" - + elif realpath.startswith('/dev/drbd/by-res/xcp-volume-'): + if not LINSTOR_AVAILABLE: + raise util.SMException( + 'Can\'t refresh tapdisk: LINSTOR libraries are missing' + ) + + # We must always recreate the symlink to ensure we have + # the right info. Why? Because if the volume UUID is changed in + # LINSTOR the symlink is not directly updated. When live leaf + # coalesce is executed we have these steps: + # "A" -> "OLD_A" + # "B" -> "A" + # Without symlink update the previous "A" path is reused instead of + # "B" path. Note: "A", "B" and "OLD_A" are UUIDs. + session = self.session + + host_ref = util.get_this_host_ref(session) + sr_ref = session.xenapi.SR.get_by_uuid(self.sr_uuid) + + pbd = util.find_my_pbd(session, host_ref, sr_ref) + if pbd is None: + raise util.SMException('Failed to find PBD') + + dconf = session.xenapi.PBD.get_device_config(pbd) + group_name = dconf['group-name'] + + linstor = LinstorVolumeManager( + get_controller_uri(), + group_name, + logger=util.SMlog + ) + device_path = LinstorVhdUtil(session, linstor).create_chain_paths(self.vdi_uuid) + + if realpath != device_path: + util.SMlog( + 'Update LINSTOR PhyLink (previous={}, current={})' + .format(realpath, device_path) + ) + os.unlink(self.phypath) + _mkphylink(self.sr_uuid, self.vdi_uuid, device_path) + self.realpath = device_path + @locking("VDIUnavailable") def Pause(self): util.SMlog("Pause for %s" % self.vdi_uuid) diff --git a/drivers/udevSR.py b/drivers/udevSR.py index 4862ef8b9..a84427645 100755 --- a/drivers/udevSR.py +++ b/drivers/udevSR.py @@ -18,6 +18,8 @@ # udevSR: represents VDIs which are hotplugged into dom0 via udev e.g. # USB CDROM/disk devices +from sm_typing import override + import SR import VDI import SRCommand @@ -50,16 +52,19 @@ class udevSR(SR.SR): """udev-driven storage repository""" - def handles(type): + @override + @staticmethod + def handles(type) -> bool: if type == TYPE: return True return False - handles = staticmethod(handles) - def content_type(self, sr_uuid): + @override + def content_type(self, sr_uuid) -> str: return super(udevSR, self).content_type(sr_uuid) - def vdi(self, uuid): + @override + def vdi(self, uuid) -> VDI.VDI: util.SMlog("params = %s" % (self.srcmd.params.keys())) if 'vdi_location' in self.srcmd.params: @@ -74,7 +79,8 @@ def get_vdi_location(self, uuid): vdi_ref = vdi.get_by_uuid(uuid) return vdi.get_location(vdi_ref) - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: # First of all, check we've got the correct keys in dconf if 'location' not in self.dconf: raise xs_errors.XenError('ConfigLocationMissing') @@ -82,7 +88,8 @@ def load(self, sr_uuid): # Cache the sm_config self.sm_config = self.session.xenapi.SR.get_sm_config(self.sr_ref) - def update(self, sr_uuid): + @override + def update(self, sr_uuid) -> None: # Return as much information as we have sr_root = self.dconf['location'] @@ -102,22 +109,27 @@ def update(self, sr_uuid): self._db_update() - def scan(self, sr_uuid): + @override + def scan(self, sr_uuid) -> None: self.update(sr_uuid) # base class scan does all the work: - return super(udevSR, self).scan(sr_uuid) + super(udevSR, self).scan(sr_uuid) - def create(self, sr_uuid, size): + @override + def create(self, sr_uuid, size) -> None: pass - def delete(self, sr_uuid): + @override + def delete(self, sr_uuid) -> None: pass - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: pass - def detach(self, sr_uuid): + @override + def detach(self, sr_uuid) -> None: pass @@ -134,7 +146,8 @@ def __init__(self, sr, location): self.location = location VDI.VDI.__init__(self, sr, None) - def load(self, location): + @override + def load(self, location) -> None: self.path = self.location self.size = 0 self.utilisation = 0 @@ -149,7 +162,7 @@ def load(self, location): self.sm_config['hotplugged_at'] = iso8601 self.path = os.path.realpath(self.path) - + dev = os.path.basename(self.path) info = sysdevice.stat(dev) if "size" in info.keys(): @@ -176,7 +189,8 @@ def load(self, location): except OSError as e: self.deleted = True - def introduce(self, sr_uuid, vdi_uuid): + @override + def introduce(self, sr_uuid, vdi_uuid) -> str: self.uuid = vdi_uuid self.location = self.sr.srcmd.params['vdi_location'] self._db_introduce() @@ -184,7 +198,8 @@ def introduce(self, sr_uuid, vdi_uuid): self.sr.update(sr_uuid) return super(udevVDI, self).get_params() - def update(self, sr_uuid, vdi_location): + @override + def update(self, sr_uuid, vdi_location) -> None: self.load(vdi_location) # _db_update requires self.uuid to be set self.uuid = self.sr.srcmd.params['vdi_uuid'] @@ -198,13 +213,15 @@ def update(self, sr_uuid, vdi_location): #self.sr.session.xenapi.VDI.set_name_label(vdi, self.label) #self.sr.session.xenapi.VDI.set_name_description(vdi, self.description) - def attach(self, sr_uuid, vdi_uuid): + @override + def attach(self, sr_uuid, vdi_uuid) -> str: if self.deleted: raise xs_errors.XenError('VDIUnavailable') return super(udevVDI, self).attach(sr_uuid, vdi_uuid) - def detach(self, sr_uuid, vdi_uuid): + @override + def detach(self, sr_uuid, vdi_uuid) -> None: pass if __name__ == '__main__': diff --git a/drivers/util.py b/drivers/util.py index 3a568d599..b663c76ab 100755 --- a/drivers/util.py +++ b/drivers/util.py @@ -643,6 +643,11 @@ def zeroOut(path, fromByte, bytes): return True +def wipefs(blockdev): + "Wipe filesystem signatures from `blockdev`" + pread2(["/usr/sbin/wipefs", "-a", blockdev]) + + def match_rootdev(s): regex = re.compile("^PRIMARY_DISK") return regex.search(s, 0) @@ -699,10 +704,13 @@ def get_this_host(): return uuid -def is_master(session): +def get_master_ref(session): pools = session.xenapi.pool.get_all() - master = session.xenapi.pool.get_master(pools[0]) - return get_this_host_ref(session) == master + return session.xenapi.pool.get_master(pools[0]) + + +def is_master(session): + return get_this_host_ref(session) == get_master_ref(session) def get_localhost_ref(session): @@ -745,6 +753,17 @@ def get_hosts_attached_on(session, vdi_uuids): host_refs[key[len('host_'):]] = True return host_refs.keys() +def get_this_host_address(session): + host_uuid = get_this_host() + host_ref = session.xenapi.host.get_by_uuid(host_uuid) + return session.xenapi.host.get_record(host_ref)['address'] + +def get_host_addresses(session): + addresses = [] + hosts = session.xenapi.host.get_all_records() + for record in hosts.values(): + addresses.append(record['address']) + return addresses def get_this_host_ref(session): host_uuid = get_this_host() @@ -758,6 +777,11 @@ def get_slaves_attached_on(session, vdi_uuids): master_ref = get_this_host_ref(session) return [x for x in host_refs if x != master_ref] +def get_enabled_hosts(session): + """ + Returns a list of host refs that are enabled in the pool. + """ + return list(session.xenapi.host.get_all_records_where('field "enabled" = "true"').keys()) def get_online_hosts(session): online_hosts = [] @@ -1066,7 +1090,7 @@ def diskFromPartition(partition): return m.group(2) numlen = 0 # number of digit characters - m = re.match("\D+(\d+)", partition) + m = re.match(r"\D+(\d+)", partition) if m is not None: numlen = len(m.group(1)) @@ -1077,7 +1101,7 @@ def diskFromPartition(partition): # is it a mapper path? if partition.startswith("mapper"): if re.search("p[0-9]*$", partition): - numlen = len(re.match("\d+", partition[::-1]).group(0)) + 1 + numlen = len(re.match(r"\d+", partition[::-1]).group(0)) + 1 SMlog("Found mapper part, len %d" % numlen) else: numlen = 0 @@ -1548,13 +1572,21 @@ def get_connected_sockets(pid): return sockets -def retry(f, maxretry=20, period=3): +def retry(f, maxretry=20, period=3, exceptions=[Exception]): retries = 0 while True: try: return f() except Exception as e: - SMlog("Got exception: %s. Retry number: %s" % (str(e), retries)) + for exception in exceptions: + if isinstance(e, exception): + SMlog('Got exception: {}. Retry number: {}'.format( + str(e), retries + )) + break + else: + SMlog('Got bad exception: {}. Raising...'.format(e)) + raise e retries += 1 if retries >= maxretry: @@ -1977,3 +2009,124 @@ def sessions_less_than_targets(other_config, device_config): return (sessions < targets) else: return False + + +def enable_and_start_service(name, start): + attempt = 0 + while True: + attempt += 1 + fn = 'enable' if start else 'disable' + args = ('systemctl', fn, '--now', name) + (ret, out, err) = doexec(args) + if ret == 0: + return + elif attempt >= 3: + raise Exception( + 'Failed to {} {}: {} {}'.format(fn, name, out, err) + ) + time.sleep(1) + + +def stop_service(name): + args = ('systemctl', 'stop', name) + (ret, out, err) = doexec(args) + if ret == 0: + return + raise Exception('Failed to stop {}: {} {}'.format(name, out, err)) + + +def restart_service(name): + attempt = 0 + while True: + attempt += 1 + SMlog('Restarting service {} {}...'.format(name, attempt)) + args = ('systemctl', 'restart', name) + (ret, out, err) = doexec(args) + if ret == 0: + return + elif attempt >= 3: + SMlog('Restart service FAILED {} {}'.format(name, attempt)) + raise Exception( + 'Failed to restart {}: {} {}'.format(name, out, err) + ) + time.sleep(1) + + +def check_pid_exists(pid): + try: + os.kill(pid, 0) + except OSError: + return False + else: + return True + + +def make_profile(name, function): + """ + Helper to execute cProfile using unique log file. + """ + + import cProfile + import itertools + import os.path + import time + + assert name + assert function + + FOLDER = '/tmp/sm-perfs/' + makedirs(FOLDER) + + filename = time.strftime('{}_%Y%m%d_%H%M%S.prof'.format(name)) + + def gen_path(path): + yield path + root, ext = os.path.splitext(path) + for i in itertools.count(start=1, step=1): + yield root + '.{}.'.format(i) + ext + + for profile_path in gen_path(FOLDER + filename): + try: + file = open_atomic(profile_path, 'w') + file.close() + break + except OSError as e: + if e.errno == errno.EEXIST: + pass + else: + raise + + try: + SMlog('* Start profiling of {} ({}) *'.format(name, filename)) + cProfile.runctx('function()', None, locals(), profile_path) + finally: + SMlog('* End profiling of {} ({}) *'.format(name, filename)) + + +def strtobool(str): + # Note: `distutils` package is deprecated and slated for removal in Python 3.12. + # There is not alternative for strtobool. + # See: https://peps.python.org/pep-0632/#migration-advice + # So this is a custom implementation with differences: + # - A boolean is returned instead of integer + # - Empty string and None are supported (False is returned in this case) + if not str: + return False + str = str.lower() + if str in ('y', 'yes', 't', 'true', 'on', '1'): + return True + if str in ('n', 'no', 'f', 'false', 'off', '0'): + return False + raise ValueError("invalid truth value '{}'".format(str)) + + +def find_executable(name): + return shutil.which(name) + + +def conditional_decorator(decorator, condition): + def wrapper(func): + if not condition: + return func + return decorator(func) + return wrapper diff --git a/drivers/vhdutil.py b/drivers/vhdutil.py index c4be0eef6..8ac7b08da 100755 --- a/drivers/vhdutil.py +++ b/drivers/vhdutil.py @@ -99,13 +99,16 @@ def convertAllocatedSizeToBytes(size): return size * 2 * 1024 * 1024 -def getVHDInfo(path, extractUuidFunction, includeParent=True): +def getVHDInfo(path, extractUuidFunction, includeParent=True, resolveParent=True): """Get the VHD info. The parent info may optionally be omitted: vhd-util tries to verify the parent by opening it, which results in error if the VHD resides on an inactive LV""" opts = "-vsaf" if includeParent: opts += "p" + if not resolveParent: + opts += "u" + cmd = [VHD_UTIL, "query", OPT_LOG_ERR, opts, "-n", path] ret = ioretry(cmd) fields = ret.strip().split('\n') @@ -203,7 +206,7 @@ def hasParent(path): cmd = [VHD_UTIL, "read", OPT_LOG_ERR, "-p", "-n", path] ret = ioretry(cmd) # pylint: disable=no-member - m = re.match(".*Disk type\s+: (\S+) hard disk.*", ret, flags=re.S) + m = re.match(r".*Disk type\s+: (\S+) hard disk.*", ret, flags=re.S) vhd_type = m.group(1) assert(vhd_type == "Differencing" or vhd_type == "Dynamic") return vhd_type == "Differencing" diff --git a/etc/systemd/system/drbd-reactor.service.d/override.conf b/etc/systemd/system/drbd-reactor.service.d/override.conf new file mode 100644 index 000000000..2f99a46af --- /dev/null +++ b/etc/systemd/system/drbd-reactor.service.d/override.conf @@ -0,0 +1,6 @@ +[Service] +StartLimitInterval=60 +StartLimitBurst=10 + +Restart=always +RestartSec=2 diff --git a/etc/systemd/system/linstor-satellite.service.d/override.conf b/etc/systemd/system/linstor-satellite.service.d/override.conf new file mode 100644 index 000000000..b1686b4f3 --- /dev/null +++ b/etc/systemd/system/linstor-satellite.service.d/override.conf @@ -0,0 +1,5 @@ +[Service] +Environment=LS_KEEP_RES=^xcp-persistent* + +[Unit] +After=drbd.service diff --git a/etc/systemd/system/var-lib-linstor.service b/etc/systemd/system/var-lib-linstor.service new file mode 100644 index 000000000..e9deb9042 --- /dev/null +++ b/etc/systemd/system/var-lib-linstor.service @@ -0,0 +1,21 @@ +# Regarding the current version of systemd (v.219) used in XCP-ng, we can't use +# the ReadWriteOnly option (to apply the -w flag, it's not the same than -o rw). +# This file is a workaround to avoid RO. It must be replaced with the code below +# in a mount unit. Compatible with version >= 246. +# +# [Unit] +# Description=Filesystem for the LINSTOR controller +# +# [Mount] +# What=/dev/drbd/by-res/xcp-persistent-database/0 +# Where=/var/lib/linstor +# ReadWriteOnly=true + +[Unit] +Description=Mount filesystem for the LINSTOR controller + +[Service] +Type=oneshot +ExecStart=/bin/mount -w /dev/drbd/by-res/xcp-persistent-database/0 /var/lib/linstor +ExecStop=/opt/xensource/libexec/safe-umount /var/lib/linstor +RemainAfterExit=true diff --git a/linstor/Makefile b/linstor/Makefile new file mode 100644 index 000000000..c329ca302 --- /dev/null +++ b/linstor/Makefile @@ -0,0 +1,22 @@ +PREFIX ?= /opt/xensource/libexec +DESTDIR ?= +DEBUGDIR ?= /opt/xensource/debug + + +OPTS := -Wall -std=gnu99 + +SRC := linstor-monitord.c + +BIN := linstor-monitord + +all: daemon + +daemon: linstor-monitord.c + $(CC) $(OPTS) $(SRC) -o $(BIN) + +install: linstor-monitord + mkdir -p $(DESTDIR)$(PREFIX) + install -m 755 $^ $(DESTDIR)$(PREFIX) + +clean: + rm -f linstor-monitord diff --git a/linstor/linstor-monitord.c b/linstor/linstor-monitord.c new file mode 100644 index 000000000..47740598c --- /dev/null +++ b/linstor/linstor-monitord.c @@ -0,0 +1,535 @@ +/* + * Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// TODO: Handle new hosts. +// TODO: https://github.com/xcp-ng/xcp/issues/421 + +// ============================================================================= + +#define POOL_CONF_DIR "/etc/xensource" +#define POOL_CONF_FILE "pool.conf" +#define POOL_CONF_ABS_FILE POOL_CONF_DIR "/" POOL_CONF_FILE + +// In milliseconds. +#define UPDATE_LINSTOR_NODE_TIMEOUT 2000 +#define SR_SCAN_TIMEOUT 720000 + +// ----------------------------------------------------------------------------- + +static inline void normalizeTime (struct timespec *spec) { + while (spec->tv_nsec >= 1000000000) { + ++spec->tv_sec; + spec->tv_nsec -= 1000000000; + } + while (spec->tv_nsec < 0) { + --spec->tv_sec; + spec->tv_nsec += 1000000000; + } +} + +static inline struct timespec getCurrentTime () { + struct timespec spec; + clock_gettime(CLOCK_MONOTONIC, &spec); + return (struct timespec){ + .tv_sec = spec.tv_sec, + .tv_nsec = spec.tv_nsec + }; +} + +static inline struct timespec getTimeDiff (const struct timespec *a, const struct timespec *b) { + struct timespec result = *a; + result.tv_sec -= b->tv_sec - 1; + result.tv_nsec -= b->tv_nsec + 1000000000; + normalizeTime(&result); + return result; +} + +static inline int64_t convertToMilliseconds (struct timespec spec) { + spec.tv_nsec += 1000 - spec.tv_nsec % 1000; + normalizeTime(&spec); + return spec.tv_sec * 1000 + spec.tv_nsec / 1000000; +} + +// ----------------------------------------------------------------------------- + +static inline int readPoolConf (char *buffer, size_t bufferSize) { + FILE *f = fopen(POOL_CONF_ABS_FILE, "r"); + if (!f) { + syslog(LOG_ERR, "Failed to open `" POOL_CONF_ABS_FILE "`: `%s`.", strerror(errno)); + return -errno; + } + + int ret = 0; + if (!fgets(buffer, bufferSize, f)) { + syslog(LOG_ERR, "Cannot read `" POOL_CONF_ABS_FILE "`."); + ret = -EIO; + } + + fclose(f); + + return ret; +} + +static inline int isMasterHost (int *error) { + if (error) + *error = 0; + + char buffer[512]; + + int ret = readPoolConf(buffer, sizeof buffer); + if (ret < 0) { + if (error) + *error = ret; + return 0; + } + + static const char masterStr[] = "master"; + static const size_t masterLen = sizeof masterStr - 1; + if (!strncmp(buffer, masterStr, masterLen)) { + const char end = buffer[masterLen]; + ret = end == '\0' || isspace(end); + } + + if (ret < 0) { + if (error) + *error = ret; + return 0; + } + + return ret; +} + +// ----------------------------------------------------------------------------- + +typedef struct { + int inotifyFd; + struct timespec lastScanTime; + int isMaster; + // TODO: Should be completed with at least a hostname field. +} State; + +// ----------------------------------------------------------------------------- + +typedef struct { + char *data; + size_t size; + size_t capacity; +} Buffer; + +#define max(a, b) ({ \ + __typeof__(a) _a = (a); \ + __typeof__(b) _b = (b); \ + _a > _b ? _a : _b; \ +}) + +static inline ssize_t readAll (int fd, Buffer *buffer) { + assert(buffer->capacity >= buffer->size); + + ssize_t ret = 0; + do { + size_t byteCount = buffer->capacity - buffer->size; + if (byteCount < 16) { + const size_t newCapacity = max(buffer->capacity << 1, 64); + char *p = realloc(buffer->data, newCapacity); + if (!p) + return -errno; + + buffer->data = p; + buffer->capacity = newCapacity; + + byteCount = buffer->capacity - buffer->size; + } + + ret = read(fd, buffer->data + buffer->size, byteCount); + if (ret > 0) + buffer->size += ret; + else if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) + ret = 0; + } while (ret > 0); + + return ret; +} + +// ----------------------------------------------------------------------------- + +static inline int execCommand (char *argv[], Buffer *buffer) { + int pipefd[2]; + if (buffer) { + if (pipe(pipefd) < 0) { + syslog(LOG_ERR, "Failed to exec pipe: `%s`.", strerror(errno)); + return -errno; + } + + if (fcntl(pipefd[0], F_SETFL, O_NONBLOCK) < 0) { + syslog(LOG_ERR, "Failed to exec fcntl on pipe in: `%s`.", strerror(errno)); + close(pipefd[0]); + close(pipefd[1]); + return -errno; + } + } + + const pid_t pid = fork(); + if (pid < 0) { + syslog(LOG_ERR, "Failed to fork: `%s`.", strerror(errno)); + if (buffer) { + close(pipefd[0]); + close(pipefd[1]); + } + return -errno; + } + + // Child process. + if (pid == 0) { + if (buffer) { + close(STDOUT_FILENO); + dup(pipefd[1]); + + close(pipefd[0]); + close(pipefd[1]); + } + + if (execvp(*argv, argv) < 0) + syslog(LOG_ERR, "Failed to exec `%s` command.", *argv); + exit(EXIT_FAILURE); + } + + // Main process. + int ret = 0; + if (buffer) { + close(pipefd[1]); + + do { + struct pollfd fds = { pipefd[0], POLLIN | POLLHUP, 0 }; + const int res = poll(&fds, 1, 0); + if (res < 0) { + if (errno == EAGAIN) + continue; + syslog(LOG_ERR, "Failed to poll from command: `%s`.", strerror(errno)); + ret = -errno; + } else if (res > 0) { + if (fds.revents & POLLIN) + ret = readAll(pipefd[0], buffer); + if (fds.revents & POLLHUP) + break; // Input has been closed. + } + } while (ret >= 0); + + close(pipefd[0]); + } + + int status; + if (waitpid(pid, &status, 0) < 0) { + syslog(LOG_ERR, "Failed to wait command: `%s`.", *argv); + return -errno; + } + + if (WIFEXITED(status)) { + const int code = WEXITSTATUS(status); + if (code == 0) + syslog(LOG_INFO, "`%s` completed normally.", *argv); + else + syslog(LOG_ERR, "`%s` exited with an error: %d.", *argv, code); + } else if (WIFSIGNALED(status)) + syslog(LOG_ERR, "`%s` terminated by signal %d.", *argv, WTERMSIG(status)); + + return ret; +} + +// ----------------------------------------------------------------------------- + +static inline int createInotifyInstance () { + const int fd = inotify_init1(IN_CLOEXEC); + if (fd < 0) { + syslog(LOG_ERR, "Unable to create inotify instance: `%s`.", strerror(errno)); + return -errno; + } + return fd; +} + +static inline int addInotifyWatch (int inotifyFd, const char *filepath, uint32_t mask) { + const int wd = inotify_add_watch(inotifyFd, filepath, mask); + if (wd < 0) { + syslog(LOG_ERR, "Unable to register `%s`: `%s`.", filepath, strerror(errno)); + return -errno; + } + return wd; +} + +// ----------------------------------------------------------------------------- + +static inline int updateLinstorNode (State *state) { + char buffer[256]; + if (gethostname(buffer, sizeof buffer) == -1) { + syslog(LOG_ERR, "Failed to get hostname: `%s`.", strerror(errno)); + return errno ? -errno : -EINVAL; + } + + // TODO: Finish me, see: https://github.com/xcp-ng/xcp/issues/421 + + return 0; +} + +// ----------------------------------------------------------------------------- + +#define UUID_PARAM "uuid=" +#define UUID_PARAM_LEN (sizeof(UUID_PARAM) - 1) +#define UUID_LENGTH 36 + +static inline void scanLinstorSr (const char *uuid) { + char uuidBuf[UUID_LENGTH + UUID_PARAM_LEN + 1] = UUID_PARAM; + strncpy(uuidBuf + UUID_PARAM_LEN, uuid, UUID_LENGTH); + uuidBuf[UUID_LENGTH + UUID_PARAM_LEN] = '\0'; + execCommand((char *[]){ "xe", "sr-scan", uuidBuf, NULL }, NULL); +} + +// Called to update the physical/virtual size used by LINSTOR SRs in XAPI DB. +static inline int scanLinstorSrs () { + Buffer srs = {}; + const int ret = execCommand((char *[]){ "xe", "sr-list", "type=linstor", "--minimal", NULL }, &srs); + if (ret) { + free(srs.data); + return ret; + } + + const char *end = srs.data + srs.size; + char *pos = srs.data; + for (char *off; (off = memchr(pos, ',', end - pos)); pos = off + 1) + if (off - pos == UUID_LENGTH) + scanLinstorSr(pos); + + if (end - pos >= UUID_LENGTH) { + for (--end; end - pos >= UUID_LENGTH && isspace(*end); --end) {} + if (isalnum(*end)) + scanLinstorSr(pos); + } + + free(srs.data); + + return 0; +} + +// ----------------------------------------------------------------------------- + +#define PROCESS_MODE_DEFAULT 0 +#define PROCESS_MODE_WAIT_FILE_CREATION 1 + +static inline int waitForPoolConfCreation (State *state, int *wdFile); + +static inline int processPoolConfEvents (State *state, int wd, char **buffer, size_t *bufferSize, int mode, int *process) { + size_t size = 0; + if (ioctl(state->inotifyFd, FIONREAD, (char *)&size) == -1) { + syslog(LOG_ERR, "Failed to get buffer size from inotify descriptor: `%s`.", strerror(errno)); + return -errno; + } + + if (*bufferSize < size) { + void *ptr = realloc(*buffer, size); + if (!ptr) { + syslog(LOG_ERR, "Failed to reallocate buffer with size %zu: `%s`.", size, strerror(errno)); + return -errno; + } + *buffer = ptr; + *bufferSize = size; + } + + if ((size = (size_t)read(state->inotifyFd, *buffer, size)) == (size_t)-1) { + syslog(LOG_ERR, "Failed to read buffer from inotify descriptor: `%s`.", strerror(errno)); + return -errno; + } + + uint32_t mask = 0; + for (char *p = *buffer, *end = p + size; p < end; ) { + const struct inotify_event *event = (struct inotify_event *)p; + + if (event->mask & IN_Q_OVERFLOW) + syslog(LOG_WARNING, "Event queue overflow."); + + if (event->wd == wd) { + if (event->len) { + // Event in the watched directory. + if (!strncmp(event->name, POOL_CONF_FILE, event->len)) + mask |= event->mask; + } else { + // Directory or watched file event. + if (mode == PROCESS_MODE_DEFAULT) + mask |= event->mask; + else if (event->mask & (IN_DELETE_SELF | IN_MOVE_SELF | IN_UNMOUNT)) { + syslog(LOG_ERR, "Watched `" POOL_CONF_DIR "` dir has been removed!"); + return -EIO; // The process should be exited after that. + } + } + } + + p += sizeof(struct inotify_event) + event->len; + } + + int ret = 0; + if (mode == PROCESS_MODE_DEFAULT) { + if (!mask) + return 0; + + syslog(LOG_INFO, "Updating linstor services... (Inotify mask=%" PRIu32 ")", mask); + if (mask & (IN_DELETE_SELF | IN_MOVE_SELF | IN_UNMOUNT)) { + syslog(LOG_ERR, "Watched `" POOL_CONF_ABS_FILE "` file has been removed!"); + inotify_rm_watch(state->inotifyFd, wd); // Do not forget to remove watch to avoid leaks. + return -EIO; + } + } else { + if (mask & (IN_CREATE | IN_MOVED_TO)) { + syslog(LOG_ERR, "Watched `" POOL_CONF_ABS_FILE "` file has been recreated!"); + *process = 0; + } + } + + return ret; +} + +static inline int waitAndProcessEvents (State *state, int wd, int mode) { + char *buffer = NULL; + size_t bufferSize = 0; + + int ret = 0; + int process = 1; + + struct timespec previousTime = getCurrentTime(); + do { + const struct timespec currentTime = getCurrentTime(); + const int64_t elapsedTime = convertToMilliseconds(getTimeDiff(¤tTime, &previousTime)); + + int timeout; + if (elapsedTime >= UPDATE_LINSTOR_NODE_TIMEOUT) { + updateLinstorNode(state); + timeout = UPDATE_LINSTOR_NODE_TIMEOUT; + previousTime = getCurrentTime(); + } else { + timeout = UPDATE_LINSTOR_NODE_TIMEOUT - elapsedTime; + } + + const int64_t elapsedScanTime = convertToMilliseconds(getTimeDiff(¤tTime, &state->lastScanTime)); + if (elapsedScanTime >= SR_SCAN_TIMEOUT) { + state->isMaster = isMasterHost(&ret); + if (state->isMaster) + scanLinstorSrs(); + state->lastScanTime = getCurrentTime(); + } + + struct pollfd fds = { state->inotifyFd, POLLIN, 0 }; + const int res = poll(&fds, 1, timeout); + if (res < 0) { + if (errno == EAGAIN) + continue; + syslog(LOG_ERR, "Failed to poll from inotify descriptor: `%s`.", strerror(errno)); + ret = -errno; + } else if (res > 0) { + state->isMaster = isMasterHost(&ret); + if (!ret) + ret = processPoolConfEvents(state, wd, &buffer, &bufferSize, mode, &process); + } + } while (ret >= 0 && process); + + free(buffer); + return ret; +} + +static inline int waitAndProcessFileEvents (State *state, int wd) { + return waitAndProcessEvents(state, wd, PROCESS_MODE_DEFAULT); +} + +static inline int waitAndProcessDirEvents (State *state, int wd) { + return waitAndProcessEvents(state, wd, PROCESS_MODE_WAIT_FILE_CREATION); +} + +static inline int waitForPoolConfCreation (State *state, int *wdFile) { + const int wdDir = addInotifyWatch( + state->inotifyFd, POOL_CONF_DIR, IN_MOVED_TO | IN_CREATE | IN_MOVE_SELF | IN_DELETE_SELF + ); + if (wdDir < 0) + return wdDir; + + int ret = 0; + do { + do { + // Update LINSTOR services... + int ret; + state->isMaster = isMasterHost(&ret); + + // Ok we can't read the pool configuration file. + // Maybe the file doesn't exist. Waiting its creation... + } while ((ret == -ENOENT || ret == -EIO) && !(ret = waitAndProcessDirEvents(state, wdDir))); + + // The services have been updated, now we must add a new watch on the pool config file directly. + if (!ret) { + *wdFile = addInotifyWatch(state->inotifyFd, POOL_CONF_ABS_FILE, IN_MODIFY | IN_MOVE_SELF | IN_DELETE_SELF); + if (*wdFile < 0) + ret = *wdFile; + } + } while (ret == -ENOENT); + + inotify_rm_watch(state->inotifyFd, wdDir); + return ret; +} + +// ----------------------------------------------------------------------------- + +int main (int argc, char *argv[]) { + (void)argc; + (void)argv; + + openlog(argv[0], LOG_PID, LOG_USER | LOG_MAIL); + setlogmask(LOG_UPTO(LOG_INFO)); + + State state = { + .inotifyFd = -1, + .lastScanTime = getCurrentTime(), + .isMaster = 0 + }; + + const int inotifyFd = createInotifyInstance(); + if (inotifyFd < 0) + return -inotifyFd; + state.inotifyFd = inotifyFd; + + updateLinstorNode(&state); + + int ret = 0; + while (!ret || ret == -ENOENT || ret == -EIO) { + int wdFile; + if ((ret = waitForPoolConfCreation(&state, &wdFile)) < 0) + break; // If the pool config dir cannot be watched or accessed, we consider it is a fatal error. + + ret = waitAndProcessFileEvents(&state, wdFile); + } + + close(inotifyFd); + return -ret; +} diff --git a/misc/fairlock/fairlock.py b/misc/fairlock/fairlock.py index 91a6a4992..7e5206ba2 100644 --- a/misc/fairlock/fairlock.py +++ b/misc/fairlock/fairlock.py @@ -1,3 +1,5 @@ +from sm_typing import Any, Callable, Dict, Optional, override + import os import socket import inspect @@ -7,16 +9,17 @@ START_SERVICE_TIMEOUT_SECS = 2 class SingletonWithArgs(type): - _instances = {} - _init = {} + _instances: Dict[Any, Any] = {} + _init: Dict[type, Optional[Callable[..., None]]] = {} def __init__(cls, name, bases, dct): cls._init[cls] = dct.get('__init__', None) - def __call__(cls, *args, **kwargs): + @override + def __call__(cls, *args, **kwargs) -> Any: init = cls._init[cls] if init is not None: - key = (cls, frozenset( + key: Any = (cls, frozenset( inspect.getcallargs(init, None, *args, **kwargs).items())) else: key = cls diff --git a/mocks/linstor/__init__.py b/mocks/linstor/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/multipath/multipath.conf b/multipath/multipath.conf index 166bd8476..38cc8a8b1 100644 --- a/multipath/multipath.conf +++ b/multipath/multipath.conf @@ -23,6 +23,7 @@ blacklist { devnode "scini*" devnode "^rbd[0-9]*" devnode "^nbd[0-9]*" + devnode "^drbd[0-9]*" } # Leave this section in place even if empty blacklist_exceptions { diff --git a/scripts/fork-log-daemon b/scripts/fork-log-daemon new file mode 100755 index 000000000..bdc95fd24 --- /dev/null +++ b/scripts/fork-log-daemon @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 + +import os +import select +import signal +import subprocess +import sys +import syslog + +def main(): + process = subprocess.Popen( + sys.argv[1:], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, + env=dict(os.environ, PYTHONUNBUFFERED='1') + ) + signal.signal(signal.SIGTERM, signal.SIG_IGN) + write_to_stdout = True + + while process.poll() is None: + while True: + output = process.stdout.readline() + if not output: + break + + if write_to_stdout: + try: + print(output) + sys.stdout.flush() + except Exception: + # Probably a broken pipe. So the process reading stdout is dead. + write_to_stdout = False + syslog.syslog(output) + +if __name__ == "__main__": + syslog.openlog(ident=sys.argv[1], facility=syslog.LOG_DAEMON) + try: + main() + except Exception as e: + syslog.syslog(sys.argv[1] + ' terminated with exception: {}'.format(e)) + finally: + syslog.syslog(sys.argv[1] + ' is now terminated!') diff --git a/scripts/linstor-kv-tool b/scripts/linstor-kv-tool new file mode 100755 index 000000000..de14e731e --- /dev/null +++ b/scripts/linstor-kv-tool @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2022 Vates SAS +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +import sys +sys.path[0] = '/opt/xensource/sm/' + +from linstorvolumemanager import get_controller_uri + +import argparse +import json +import linstor + + +def dump_kv(controller_uri, group_name, namespace): + kv = linstor.KV( + group_name, + uri=controller_uri, + namespace=namespace + ) + print(json.dumps(kv, sort_keys=True, indent=2)) + + +def remove_volume(controller_uri, group_name, vdi_name): + assert vdi_name + kv = linstor.KV( + group_name, + uri=controller_uri, + namespace='/xcp/volume/{}'.format(vdi_name) + ) + + for key, value in list(kv.items()): + del kv[key] + + +def remove_all_volumes(controller_uri, group_name): + kv = linstor.KV( + group_name, + uri=controller_uri, + namespace='/' + ) + + for key, value in list(kv.items()): + if key.startswith('xcp/volume/') or key.startswith('xcp/sr/journal/'): + size = key.rindex('/') + kv.namespace = key[:size] + del kv[key[size + 1:]] + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-u', '--uri', required=False) + parser.add_argument('-g', '--group-name', required=True) + parser.add_argument('-n', '--namespace', default='/') + + action = parser.add_mutually_exclusive_group(required=True) + action.add_argument('--dump-volumes', action='store_true') + action.add_argument('--remove-volume', metavar='VDI_UUID') + action.add_argument('--remove-all-volumes', action='store_true') + + args = parser.parse_args() + controller_uri = get_controller_uri() if args.uri is None else args.uri + + if args.dump_volumes: + dump_kv(controller_uri, args.group_name, args.namespace) + elif args.remove_volume: + remove_volume(controller_uri, args.group_name, args.remove_volume) + elif args.remove_all_volumes: + remove_all_volumes(controller_uri, args.group_name) + + +if __name__ == '__main__': + main() diff --git a/scripts/safe-umount b/scripts/safe-umount new file mode 100755 index 000000000..3c64a3f31 --- /dev/null +++ b/scripts/safe-umount @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 + +import argparse +import subprocess +import sys +import time + + +def safe_umount(path): + retry_count = 10 + not_mounted_str = 'umount: {}: not mounted'.format(path) + + last_code = 0 + while retry_count: + proc = subprocess.Popen(['mountpoint', '-q', path]) + proc.wait() + if proc.returncode: + return 0 + + proc = subprocess.Popen(['umount', path], stderr=subprocess.PIPE) + (stdout, stderr) = proc.communicate() + if not proc.returncode: + return 0 + + error = stderr.strip() + if error == not_mounted_str: + return 0 + + retry_count -= 1 + last_code = proc.returncode + time.sleep(0.500) + return last_code + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('path') + args = parser.parse_args() + sys.exit(safe_umount(args.path)) diff --git a/sm_typing/Makefile b/sm_typing/Makefile new file mode 100644 index 000000000..1e2ea815f --- /dev/null +++ b/sm_typing/Makefile @@ -0,0 +1,12 @@ +PYTHONLIBDIR = $(shell python3 -c "import sys; print(sys.path.pop())") +DESTDIR ?= + +.PHONY: install +install: + install -D -m 644 __init__.py $(DESTDIR)$(PYTHONLIBDIR)/sm_typing.py + python3 -m compileall $(DESTDIR)$(PYTHONLIBDIR)/sm_typing.py + +.PHONY: uninstall +uninstall: + rm -rf $(DESTDIR)$(PYTHONLIBDIR)/sm_typing.py + rm -rf $(DESTDIR)$(PYTHONLIBDIR)/__pycache__/sm_typing.* diff --git a/sm_typing/__init__.py b/sm_typing/__init__.py new file mode 100644 index 000000000..2042dea65 --- /dev/null +++ b/sm_typing/__init__.py @@ -0,0 +1,14 @@ +import typing +from typing import * + +if not hasattr(typing, 'override'): + def override(method): # type: ignore + try: + # Set internal attr `__override__` like described in PEP 698. + method.__override__ = True + except (AttributeError, TypeError): + pass + return method + +if not hasattr(typing, 'Never'): + Never = None # type: ignore diff --git a/stubs/XenAPIPlugin.pyi b/stubs/XenAPIPlugin.pyi new file mode 100644 index 000000000..72bad09fb --- /dev/null +++ b/stubs/XenAPIPlugin.pyi @@ -0,0 +1,5 @@ +class Failure(Exception): + def __init__(self, code, params) -> None: ... + def __str__(self) -> str: ... # type: ignore + +def dispatch(fn_table) -> None: ... diff --git a/stubs/xen/__init__.pyi b/stubs/xen/__init__.pyi new file mode 100644 index 000000000..e69de29bb diff --git a/stubs/xen/lowlevel/xs.pyi b/stubs/xen/lowlevel/xs.pyi new file mode 100644 index 000000000..e69de29bb diff --git a/systemd/linstor-monitor.service b/systemd/linstor-monitor.service new file mode 100644 index 000000000..5f8f0a760 --- /dev/null +++ b/systemd/linstor-monitor.service @@ -0,0 +1,13 @@ +[Unit] +Description=LINSTOR Monitor +Before=xs-sm.service +ConditionPathExists=/usr/share/linstor-server/bin/Controller + +[Service] +StandardOutput=null +StandardError=journal +ExecStart=/opt/xensource/libexec/linstor-monitord +Restart=always + +[Install] +WantedBy=multi-user.target diff --git a/systemd/xs-sm.service b/systemd/xs-sm.service index 99cb313f3..609c6ef5d 100644 --- a/systemd/xs-sm.service +++ b/systemd/xs-sm.service @@ -1,5 +1,5 @@ [Unit] -Description=XenServer Storage Manager (SM) +Description=XCP-ng Storage Manager (SM) Before=xapi.service Conflicts=shutdown.target RefuseManualStop=yes diff --git a/tests/lvmlib.py b/tests/lvmlib.py index c57b32e05..6ec8d2c71 100644 --- a/tests/lvmlib.py +++ b/tests/lvmlib.py @@ -1,3 +1,5 @@ +from sm_typing import Never, override + import argparse import sys @@ -7,10 +9,12 @@ class TestArgParse(argparse.ArgumentParser): to stderr during the tests """ - def exit(self, status=0, msg=None): + @override + def exit(self, status=0, msg=None) -> Never: sys.exit(status) - def error(self, msg): + @override + def error(self, msg) -> Never: """error(msg : string)""" self.exit(2, "%s: error: %s\n" % (self.prog, msg)) diff --git a/tests/pylintrc b/tests/pylintrc index a982913b3..4588675ba 100644 --- a/tests/pylintrc +++ b/tests/pylintrc @@ -84,7 +84,7 @@ ignored-classes=SQLObject # List of members which are set dynamically and missed by pylint inference # system, and so shouldn't trigger E0201 when accessed. -generated-members=REQUEST,acl_users,aq_parent +generated-members=REQUEST,acl_users,aq_parent,linstor.* # List of module names for which member attributes should not be checked # # (useful for modules/projects where namespaces are manipulated during runtime diff --git a/tests/shared_iscsi_test_base.py b/tests/shared_iscsi_test_base.py index a65555470..b224d30dd 100644 --- a/tests/shared_iscsi_test_base.py +++ b/tests/shared_iscsi_test_base.py @@ -1,3 +1,5 @@ +from sm_typing import Dict, List, Tuple, override + import unittest from unittest import mock @@ -6,8 +8,11 @@ class ISCSITestCase(unittest.TestCase): + # Declared in subclasses. + TEST_CLASS: str - def setUp(self): + @override + def setUp(self) -> None: iscsilib_patcher = mock.patch(f'{self.TEST_CLASS}.iscsilib', autospec=True) self.mock_iscsilib = iscsilib_patcher.start() @@ -15,8 +20,8 @@ def setUp(self): self.mock_iscsilib._checkTGT.side_effect = self._checkTGT self.mock_iscsilib.login.side_effect = self.iscsi_login self.mock_iscsilib.parse_IP_port = iscsilib.parse_IP_port - self.discovery_data = {} - self.sessions = [] + self.discovery_data: Dict[str, Tuple[str, int, str]] = {} + self.sessions: List[str] = [] sleep_patcher = mock.patch(f'{self.TEST_CLASS}.time.sleep', autospec=True) diff --git a/tests/test_BaseISCSI.py b/tests/test_BaseISCSI.py index cc489a46a..7c4ed83fa 100644 --- a/tests/test_BaseISCSI.py +++ b/tests/test_BaseISCSI.py @@ -2,6 +2,8 @@ Unit tests for the Base ISCSI SR """ +from sm_typing import override + from unittest import mock from uuid import uuid4 @@ -15,7 +17,8 @@ class TestBaseISCSI(ISCSITestCase): TEST_CLASS = 'BaseISCSI' - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) util_patcher = mock.patch('BaseISCSI.util', autospec=True) diff --git a/tests/test_FileSR.py b/tests/test_FileSR.py index e755f2d8a..109f6d306 100644 --- a/tests/test_FileSR.py +++ b/tests/test_FileSR.py @@ -1,3 +1,5 @@ +from sm_typing import Any, Optional, Set, override + import errno import os import stat @@ -18,7 +20,8 @@ class FakeFileVDI(FileSR.FileVDI): - def load(self, uuid): + @override + def load(self, uuid) -> None: self.vdi_type = vhdutil.VDI_TYPE_VHD self.hidden = False self.path = os.path.join(self.sr.path, '%s.%s' % ( @@ -27,7 +30,8 @@ def load(self, uuid): class TestFileVDI(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: startlog_patcher = mock.patch('FileSR.util.start_log_entry', autospec=True) self.mock_startlog = startlog_patcher.start() @@ -50,7 +54,7 @@ def setUp(self): fist_patcher = mock.patch('FileSR.util.FistPoint.is_active', autospec=True) self.mock_fist = fist_patcher.start() - self.active_fists = set() + self.active_fists: Set[Any] = set() def active_fists(): return self.active_fists @@ -419,15 +423,19 @@ class FakeSharedFileSR(FileSR.SharedFileSR): """ Test SR class for SharedFileSR """ - def load(self, sr_uuid): + + @override + def load(self, sr_uuid) -> None: self.path = os.path.join(SR.MOUNT_BASE, sr_uuid) - self.lock = None + self.lock = None # type: ignore - def attach(self, sr_uuid): + @override + def attach(self, sr_uuid) -> None: self._check_writable() self._check_hardlinks() - def _read_hardlink_conf(self): + @override + def _read_hardlink_conf(self) -> Optional[bool]: return None class TestShareFileSR(unittest.TestCase): @@ -437,7 +445,8 @@ class TestShareFileSR(unittest.TestCase): TEST_SR_REF = "test_sr_ref" ERROR_524 = "Unknown error 524" - def setUp(self): + @override + def setUp(self) -> None: util_patcher = mock.patch('FileSR.util', autospec=True) self.mock_util = util_patcher.start() @@ -561,7 +570,8 @@ def test_scan_load_vdis_scan_list_differ(self): self.assertEqual(1, len(test_sr.vdis)) class TestFileSR(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: pread_patcher = mock.patch('FileSR.util.pread') self.mock_pread = pread_patcher.start() @@ -682,7 +692,7 @@ def test_attach_can_do_non_bind_mount(self, mock_chmod, mock_util_makedirs): sr.path = mount_dst sr.remotepath = mount_src - sr.attach(None, bind=False) + sr.attach_and_bind(None, bind=False) self.assertTrue(sr.attached) diff --git a/tests/test_ISCSISR.py b/tests/test_ISCSISR.py index 39529be13..9af3d4bc2 100644 --- a/tests/test_ISCSISR.py +++ b/tests/test_ISCSISR.py @@ -1,3 +1,5 @@ +from sm_typing import override + import unittest import BaseISCSI import unittest.mock as mock @@ -8,17 +10,20 @@ class TestBase(unittest.TestCase): """ Provides errorcodes.xml, so exceptions are sensible """ - def setUp(self): + @override + def setUp(self) -> None: self._xmldefs = xs_errors.XML_DEFS xs_errors.XML_DEFS = os.path.join( os.path.dirname(__file__), 'XE_SR_ERRORCODES.xml') - def tearDown(self): + @override + def tearDown(self) -> None: xs_errors.XML_DEFS = self._xmldefs class NonLoadingISCSISR(BaseISCSI.BaseISCSISR): - def load(self, sr_uuid): + @override + def load(self, sr_uuid) -> None: pass @@ -82,10 +87,12 @@ def __init__(self, node1, node2): extra_adapter: None } - def _synchroniseAddrList(self, *args, **kwargs): + @override + def _synchroniseAddrList(self, *args, **kwargs) -> None: pass - def _init_adapters(self): + @override + def _init_adapters(self) -> None: pass @@ -115,8 +122,8 @@ def test_vdi_type_modified_by_force_tapdisk(self): class TestMultiLUNISCSISR(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: self.node1 = { 'ip': '127.0.0.1', 'port': 3260, @@ -129,7 +136,7 @@ def setUp(self): 'tpgt': 'TPGT' } self.node_records = [( - "%s:%d" % (self.node2['ip'], self.node2['port']), + f"{self.node2['ip']}:{self.node2['port']}", self.node2['tpgt'], self.node2['iqn'] )] diff --git a/tests/test_ISOSR.py b/tests/test_ISOSR.py index 3aea79633..ad5d99dc3 100644 --- a/tests/test_ISOSR.py +++ b/tests/test_ISOSR.py @@ -24,6 +24,62 @@ def __init__(self, srcmd, none): self.dconf = srcmd.dconf self.srcmd = srcmd +class TestISOSR_overLocal(unittest.TestCase): + def create_isosr(self, location='/local_sr', sr_uuid='asr_uuid'): + srcmd = mock.Mock() + srcmd.dconf = { + 'location': location, + 'type': 'iso', + 'legacy_mode': True + } + srcmd.params = { + 'command': 'some_command' + } + isosr = FakeISOSR(srcmd, None) + isosr.load(sr_uuid) + return isosr + + @mock.patch('util.pread') + def test_load(self, pread): + self.create_isosr() + # Check `mount/umount` is never called. + self.assertFalse(pread.called) + + @mock.patch('os.path.exists', autospec=True) + @mock.patch('util.pread') + def test_attach_and_detach_local(self, pread, exists): + isosr = self.create_isosr() + isosr.attach(None) + self.assertFalse(pread.called) + isosr.detach(None) + self.assertFalse(pread.called) + + @mock.patch('os.path.exists', autospec=True) + @mock.patch('util.pread') + @mock.patch('ISOSR.ISOSR._checkmount') + def test_attach_and_detach_local_with_mounted_path( + self, _checkmount, pread, exists + ): + _checkmount.return_value = True + + isosr = self.create_isosr() + isosr.attach(None) + self.assertFalse(pread.called) + isosr.detach(None) + self.assertFalse(pread.called) + + @mock.patch('os.path.exists') + @mock.patch('util.pread') + def test_attach_local_with_bad_path(self, pread, exists): + # Local path doesn't exist, but error list yes. + exists.side_effect = [False, True] + + isosr = self.create_isosr() + with self.assertRaises(xs_errors.SROSError) as ose: + isosr.attach(None) + self.assertEqual(ose.exception.errno, 226) + self.assertFalse(pread.called) + class TestISOSR_overNFS(unittest.TestCase): diff --git a/tests/test_LVHDSR.py b/tests/test_LVHDSR.py index 600f1379f..3c4572fd2 100644 --- a/tests/test_LVHDSR.py +++ b/tests/test_LVHDSR.py @@ -1,3 +1,5 @@ +from sm_typing import override + import copy import os import unittest @@ -36,11 +38,12 @@ def remove_stubs(self): class TestLVHDSR(unittest.TestCase, Stubs): - - def setUp(self): + @override + def setUp(self) -> None: self.init_stubs() - def tearDown(self): + @override + def tearDown(self) -> None: self.remove_stubs() def create_LVHDSR(self, master=False, command='foo', sr_uuid=None): @@ -294,8 +297,8 @@ def convert_vdi_to_meta(self, vdi_data): class TestLVHDVDI(unittest.TestCase, Stubs): - - def setUp(self): + @override + def setUp(self) -> None: self.init_stubs() lvhdutil_patcher = mock.patch('LVHDSR.lvhdutil', autospec=True) @@ -332,7 +335,8 @@ def setUp(self): self.addCleanup(mock.patch.stopall) - def tearDown(self): + @override + def tearDown(self) -> None: self.remove_stubs() def create_LVHDSR(self): diff --git a/tests/test_LVHDoHBASR.py b/tests/test_LVHDoHBASR.py index 3bc7196b6..da88e4614 100644 --- a/tests/test_LVHDoHBASR.py +++ b/tests/test_LVHDoHBASR.py @@ -1,3 +1,5 @@ +from sm_typing import override + import unittest.mock as mock import LVHDoHBASR import unittest @@ -72,8 +74,8 @@ def test_generate_config_bad_path_assert(self, class TestLVHDoHBASR(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: self.host_ref = str(uuid4()) self.session_ref = str(uuid4()) self.sr_ref = str(uuid4()) diff --git a/tests/test_LVHDoISCSISR.py b/tests/test_LVHDoISCSISR.py index 3b5e1c420..ec71f20d7 100644 --- a/tests/test_LVHDoISCSISR.py +++ b/tests/test_LVHDoISCSISR.py @@ -1,3 +1,5 @@ +from sm_typing import Set, override + import os import unittest import unittest.mock as mock @@ -62,7 +64,8 @@ class TestLVHDoISCSISR_load(unittest.TestCase): Tests for 'LVHDoISCSISR.load()' """ - def setUp(self): + @override + def setUp(self) -> None: patchers = [ mock.patch( 'BaseISCSI.BaseISCSISR', @@ -124,14 +127,15 @@ class TestLVHDoISCSISR(ISCSITestCase): TEST_CLASS = 'LVHDoISCSISR' - def setUp(self): + @override + def setUp(self) -> None: util_patcher = mock.patch('LVHDoISCSISR.util', autospec=True) self.mock_util = util_patcher.start() # self.mock_util.SMlog.side_effect = print self.mock_util.isVDICommand = util.isVDICommand self.mock_util.sessions_less_than_targets = util.sessions_less_than_targets - self.base_srs = set() + self.base_srs: Set[BaseISCSISR] = set() baseiscsi_patcher = mock.patch('LVHDoISCSISR.BaseISCSI.BaseISCSISR', autospec=True) patched_baseiscsi = baseiscsi_patcher.start() diff --git a/tests/test_MooseFSSR.py b/tests/test_MooseFSSR.py new file mode 100644 index 000000000..f4e0a852f --- /dev/null +++ b/tests/test_MooseFSSR.py @@ -0,0 +1,64 @@ +from unittest import mock +import unittest + +import MooseFSSR +import unittest + + +class FakeMooseFSSR(MooseFSSR.MooseFSSR): + uuid = None + sr_ref = None + srcmd = None + other_config = {} + + def __init__(self, srcmd, none): + self.dconf = srcmd.dconf + self.srcmd = srcmd + + +class TestMooseFSSR(unittest.TestCase): + + def create_moosefssr(self, masterhost='aServer', rootpath='/aServerpath', + sr_uuid='asr_uuid', useroptions=''): + srcmd = mock.Mock() + srcmd.dconf = { + 'masterhost': masterhost, + 'rootpath': rootpath + } + srcmd.params = { + 'command': 'some_command', + 'device_config': {} + } + moosefssr = FakeMooseFSSR(srcmd, None) + moosefssr.load(sr_uuid) + return moosefssr + + @mock.patch('MooseFSSR.MooseFSSR._is_moosefs_available', mock.MagicMock(return_value="mfsmount")) + @mock.patch('MooseFSSR.Lock', autospec=True) + def test_load(self, Lock): + self.create_moosefssr() + + @mock.patch('MooseFSSR.MooseFSSR._is_moosefs_available', mock.MagicMock(return_value="mfsmount")) + @mock.patch('MooseFSSR.MooseFSSR.checkmount', autospec=True) + @mock.patch('MooseFSSR.Lock', autospec=True) + def test_attach_if_mounted_then_attached(self, mock_lock, mock_checkmount): + mfssr = self.create_moosefssr() + mock_checkmount.return_value=True + mfssr.attach('asr_uuid') + self.assertTrue(mfssr.attached) + + @mock.patch('MooseFSSR.MooseFSSR._is_moosefs_available', mock.MagicMock(return_value="mfsmount")) + @mock.patch('MooseFSSR.Lock', autospec=True) + def test_mount_mountpoint_empty_string(self, mock_lock): + mfssr = self.create_moosefssr() + self.assertRaises(MooseFSSR.MooseFSException, mfssr.mount) + + @mock.patch('MooseFSSR.MooseFSSR._is_moosefs_available', mock.MagicMock(return_value="mfsmount")) + @mock.patch('MooseFSSR.MooseFSSR.checkmount',return_value=False, autospec=True) + @mock.patch('MooseFSSR.Lock', autospec=True) + def test_detach_not_detached_if_not_mounted(self, mock_lock, mock_checkmount): + mfssr = self.create_moosefssr() + mfssr.attached = True + mock_checkmount.return_value=False + mfssr.detach('asr_uuid') + self.assertTrue(mfssr.attached) diff --git a/tests/test_SMBSR.py b/tests/test_SMBSR.py index d20bcc0c6..42ce035e2 100644 --- a/tests/test_SMBSR.py +++ b/tests/test_SMBSR.py @@ -1,3 +1,5 @@ +from sm_typing import Dict, override + import unittest import unittest.mock as mock import uuid @@ -12,17 +14,10 @@ class FakeSMBSR(SMBSR.SMBSR): - uuid = None - sr_ref = None - mountpoint = None - linkpath = None - path = None - session = None - remoteserver = None - def __init__(self, srcmd, none): self.dconf = srcmd.dconf self.srcmd = srcmd + self.session = None self.uuid = 'auuid' self.sr_ref = 'asr_ref' self.mountpoint = 'aMountpoint' @@ -32,14 +27,14 @@ def __init__(self, srcmd, none): class Test_SMBSR(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) pread_patcher = mock.patch('SMBSR.util.pread', autospec=True) self.mock_pread = pread_patcher.start() self.mock_pread.side_effect = self.pread - self.pread_results = {} + self.pread_results: Dict[str, str] = {} listdir_patcher = mock.patch('SMBSR.util.listdir', autospec=True) self.mock_list_dir = listdir_patcher.start() @@ -135,7 +130,7 @@ def test_attach_with_cifs_password( def test_attach_with_cifs_password_and_domain( self, symlink, mock_lock, makeMountPoint, mock_checkmount, mock_checklinks, mock_checkwritable): - smbsr = self.create_smbsr(username="citrix\jsmith", dconf_update={"password": "winter2019"}) + smbsr = self.create_smbsr(username="citrix\\jsmith", dconf_update={"password": "winter2019"}) mock_checkmount.return_value = False makeMountPoint.return_value = "/var/mount" smbsr.attach('asr_uuid') diff --git a/tests/test_SR.py b/tests/test_SR.py index b139f4c5f..4e0de361a 100644 --- a/tests/test_SR.py +++ b/tests/test_SR.py @@ -1,3 +1,5 @@ +from sm_typing import override + import unittest import unittest.mock as mock import SR @@ -18,10 +20,12 @@ def __init__(self, device=None): def verify(self): pass - def setUp(self): + @override + def setUp(self) -> None: pass - def tearDown(self): + @override + def tearDown(self) -> None: pass def create_SR(self, cmd, dconf, cmd_params=None): diff --git a/tests/test_ZFSSR.py b/tests/test_ZFSSR.py new file mode 100644 index 000000000..47d72b89b --- /dev/null +++ b/tests/test_ZFSSR.py @@ -0,0 +1,116 @@ +from unittest import mock +import unittest +import uuid + +import FileSR +import os +import SR +import ZFSSR +import xs_errors + + +XML_DEFS = os.path.dirname(os.path.abspath(__file__)) + \ + '/../drivers/XE_SR_ERRORCODES.xml' + + +class FakeZFSSR(ZFSSR.ZFSSR): + uuid = None + sr_ref = None + session = None + srcmd = None + other_config = {} + vdis = {} + passthrough = True + + def __init__(self, srcmd, none): + self.dconf = srcmd.dconf + self.srcmd = srcmd + self.uuid = str(uuid.uuid4()) + + +class TestZFSSR(unittest.TestCase): + def create_zfs_sr(self, sr_uuid='asr_uuid', location='fake_path'): + srcmd = mock.Mock() + srcmd.dconf = { + 'location': location + } + srcmd.params = { + 'command': 'some_command', + 'device_config': {} + } + sr = FakeZFSSR(srcmd, None) + sr.load(sr_uuid) + return sr + + @mock.patch('ZFSSR.is_zfs_available', autospec=True) + @mock.patch('FileSR.Lock', autospec=True) + def test_load(self, lock, is_zfs_available): + self.create_zfs_sr() + + @mock.patch('xs_errors.XML_DEFS', new=XML_DEFS) + def test_load_with_zfs_unavailable(self): + failed = False + try: + self.create_zfs_sr() + except xs_errors.SROSError as e: + # Check SRUnavailable error. + failed = e.errno == 47 + self.assertTrue(failed) + + @mock.patch('ZFSSR.is_zfs_available', autospec=True) + @mock.patch('ZFSSR.is_zfs_path', autospec=True) + @mock.patch('FileSR.Lock', autospec=True) + def test_create(self, lock, is_zfs_path, is_zfs_available): + sr = self.create_zfs_sr() + sr.create(sr.uuid, 42) + + @mock.patch('ZFSSR.is_zfs_available', autospec=True) + @mock.patch('ZFSSR.is_zfs_path', autospec=True) + @mock.patch('FileSR.Lock', autospec=True) + @mock.patch('xs_errors.XML_DEFS', new=XML_DEFS) + def test_create_with_invalid_zfs_path( + self, lock, is_zfs_path, is_zfs_available + ): + failed = False + + is_zfs_path.return_value = False + sr = self.create_zfs_sr() + try: + sr.create(sr.uuid, 42) + except xs_errors.SROSError as e: + # Check ZFSSRCreate error. + failed = e.errno == 5000 + self.assertTrue(failed) + + @mock.patch('ZFSSR.is_zfs_available', autospec=True) + @mock.patch('ZFSSR.is_zfs_path', autospec=True) + @mock.patch('FileSR.Lock', autospec=True) + @mock.patch('FileSR.FileSR._checkmount', autospec=True) + @mock.patch('FileSR.FileSR._loadvdis', autospec=True) + @mock.patch('SR.SR.scan', autospec=True) + @mock.patch('os.path.ismount', autospec=True) + def test_scan( + self, ismount, scan, _loadvdis, _checkmount, lock, + is_zfs_path, is_zfs_available + ): + sr = self.create_zfs_sr() + sr.scan(sr.uuid) + + @mock.patch('ZFSSR.is_zfs_available', autospec=True) + @mock.patch('ZFSSR.is_zfs_path', autospec=True) + @mock.patch('FileSR.Lock', autospec=True) + @mock.patch('FileSR.FileSR._checkmount', autospec=True) + @mock.patch('xs_errors.XML_DEFS', new=XML_DEFS) + def test_scan_with_invalid_zfs_path( + self, _checkmount, lock, is_zfs_path, is_zfs_available + ): + failed = False + + is_zfs_path.return_value = False + sr = self.create_zfs_sr() + try: + sr.scan(sr.uuid) + except xs_errors.SROSError as e: + # Check SRUnavailable error. + failed = e.errno == 47 + self.assertTrue(failed) diff --git a/tests/test_blktap2.py b/tests/test_blktap2.py index f1f0c9208..fd33899d5 100644 --- a/tests/test_blktap2.py +++ b/tests/test_blktap2.py @@ -1,3 +1,5 @@ +from sm_typing import override + import errno import json from io import StringIO @@ -24,7 +26,8 @@ class TestTapdisk(unittest.TestCase): # hence no usual decorator mocks and the monkey patching. # https://bugs.python.org/issue23078 # - def setUp(self): + @override + def setUp(self) -> None: subprocess_patcher = mock.patch("blktap2.subprocess") self.mock_subprocess = subprocess_patcher.start() @@ -111,7 +114,8 @@ def no_medium(pid, minor, type, path, options): class TestVDI(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) lock_patcher = mock.patch('blktap2.Lock', autospec=True) @@ -409,8 +413,8 @@ def test_activate_rw_already_activating_fail( class TestTapCtl(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: subprocess_patcher = mock.patch("blktap2.subprocess") self.mock_subprocess = subprocess_patcher.start() diff --git a/tests/test_cbt.py b/tests/test_cbt.py index 9093f2162..18bdc8a0c 100644 --- a/tests/test_cbt.py +++ b/tests/test_cbt.py @@ -1,3 +1,5 @@ +from sm_typing import override + import unittest.mock as mock import SR import testlib @@ -15,7 +17,8 @@ class TestVDI(VDI.VDI): - def load(self, vdi_uuid): + @override + def load(self, vdi_uuid) -> None: self.vdi_type = vhdutil.VDI_TYPE_VHD self._state_mock = mock.Mock() self.path = "/mock/sr_path/" + str(vdi_uuid) @@ -25,46 +28,56 @@ def load(self, vdi_uuid): def state_mock(self): return self._state_mock - def _get_blocktracking_status(self, uuid=None): + @override + def _get_blocktracking_status(self, uuid=None) -> bool: return self.block_tracking_state - def _ensure_cbt_space(self): + @override + def _ensure_cbt_space(self) -> None: super(TestVDI, self)._ensure_cbt_space() self.state_mock._ensure_cbt_space() - def _get_cbt_logpath(self, uuid): + @override + def _get_cbt_logpath(self, uuid) -> str: super(TestVDI, self)._get_cbt_logpath(uuid) self.state_mock._get_cbt_logpath(uuid) return "/mock/sr_path/{0}.log".format(uuid) - def _create_cbt_log(self): + @override + def _create_cbt_log(self) -> str: logpath = super(TestVDI, self)._create_cbt_log() self.state_mock._create_cbt_log() self.block_tracking_state = True return logpath - def _delete_cbt_log(self): + @override + def _delete_cbt_log(self) -> None: self.state_mock._delete_cbt_log() self.block_tracking_state = False - def _rename(self, from_path, to_path): + @override + def _rename(self, from_path, to_path) -> None: self.state_mock._rename(from_path, to_path) + @override def _do_snapshot(self, sr_uuid, vdi_uuid, snapType, - cloneOp=False, secondary=None, cbtlog=None): - self.state_mock._do_snapshot(sr_uuid, vdi_uuid, snapType, cloneOp, - secondary, cbtlog) + cloneOp=False, secondary=None, cbtlog=None) -> str: + return self.state_mock._do_snapshot( + sr_uuid, vdi_uuid, snapType, cloneOp, secondary, cbtlog + ) - def _activate_cbt_log(self, logname): - self.state_mock._activate_cbt_log(logname) + @override + def _activate_cbt_log(self, logname) -> bool: + return self.state_mock._activate_cbt_log(logname) - def _deactivate_cbt_log(self, logname): + @override + def _deactivate_cbt_log(self, logname) -> None: self.state_mock._deactivate_cbt_log(logname) class TestCBT(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: self.sr = mock.MagicMock() self.vdi_uuid = uuid.uuid4() self.sr_uuid = uuid.uuid4() diff --git a/tests/test_cifutils.py b/tests/test_cifutils.py index 924aa3c62..0b8bf8356 100644 --- a/tests/test_cifutils.py +++ b/tests/test_cifutils.py @@ -44,7 +44,7 @@ def test_password_and_username_smbsr(self): self.assertEqual(domain, None) def test_password_and_username_domain(self): - junk_dconf = {"cifspassword": "123", "username": "citrix\jsmith"} + junk_dconf = {"cifspassword": "123", "username": "citrix\\jsmith"} junk_session = 123 credentials, domain = cifutils.getCIFCredentials(junk_dconf, junk_session, @@ -54,7 +54,7 @@ def test_password_and_username_domain(self): self.assertEqual(domain, "citrix") def test_password_and_username_domain_smbsr(self): - junk_dconf = {"password": "123", "username": "citrix\jsmith"} + junk_dconf = {"password": "123", "username": "citrix\\jsmith"} junk_session = 123 credentials, domain = cifutils.getCIFCredentials(junk_dconf, junk_session) @@ -90,7 +90,7 @@ def test_password_secret_and_username_smbsr(self, get_secret): @mock.patch('util.get_secret', autospec=True) def test_password_secret_and_username_also_domain(self, get_secret): junk_dconf = {"cifspassword_secret": "123", - "username": "citrix\jsmith"} + "username": "citrix\\jsmith"} junk_session = 123 get_secret.return_value = 'winter2019' credentials, domain = cifutils.getCIFCredentials(junk_dconf, @@ -104,7 +104,7 @@ def test_password_secret_and_username_also_domain(self, get_secret): @mock.patch('util.get_secret', autospec=True) def test_password_secret_and_username_also_domain_smbsr(self, get_secret): junk_dconf = {"password_secret": "123", - "username": "citrix\jsmith"} + "username": "citrix\\jsmith"} junk_session = 123 get_secret.return_value = 'winter2019' credentials, domain = cifutils.getCIFCredentials(junk_dconf, @@ -116,23 +116,23 @@ def test_password_secret_and_username_also_domain_smbsr(self, get_secret): def test_username_bad_domain(self): junk_dconf = {"cifspassword_secret": "123", - "username": "citrix\gjk\jsmith"} + "username": "citrix\\gjk\\jsmith"} junk_session = 123 with self.assertRaises(cifutils.CIFSException) as cm: cifutils.getCIFCredentials(junk_dconf, junk_session, prefix="cifs") expected_message = ("A maximum of 2 tokens are expected " - "(\). 3 were given.") + "(\\). 3 were given.") the_exception = cm.exception self.assertEqual(the_exception.errstr, expected_message) def test_username_bad_domain_smbsr(self): junk_dconf = {"password_secret": "123", - "username": "citrix\gjk\jsmith"} + "username": "citrix\\gjk\\jsmith"} junk_session = 123 with self.assertRaises(cifutils.CIFSException) as cm: cifutils.getCIFCredentials(junk_dconf, junk_session) expected_message = ("A maximum of 2 tokens are expected " - "(\). 3 were given.") + "(\\). 3 were given.") the_exception = cm.exception self.assertEqual(the_exception.errstr, expected_message) diff --git a/tests/test_cleanup.py b/tests/test_cleanup.py index a2b14d602..2cd947139 100644 --- a/tests/test_cleanup.py +++ b/tests/test_cleanup.py @@ -1,3 +1,5 @@ +from sm_typing import Dict, List, override + import errno import signal import unittest @@ -26,7 +28,7 @@ class FakeException(Exception): class FakeUtil: - record = [] + record: List[str] = [] def log(input): FakeUtil.record.append(input) @@ -57,7 +59,8 @@ def create_cleanup_sr(xapi, uuid=None): class TestSR(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: time_sleep_patcher = mock.patch('cleanup.time.sleep') self.mock_time_sleep = time_sleep_patcher.start() @@ -79,7 +82,8 @@ def setUp(self): self.addCleanup(mock.patch.stopall) - def tearDown(self): + @override + def tearDown(self) -> None: cleanup.SIGTERM = False def setup_abort_flag(self, ipc_mock, should_abort=False): @@ -1883,14 +1887,14 @@ def test_not_plugged_retry(self): class TestLockGCActive(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) self.lock_patcher = mock.patch('cleanup.lock.Lock') patched_lock = self.lock_patcher.start() patched_lock.side_effect = self.create_lock - self.locks = {} + self.locks: Dict[str, TestLockGCActive.DummyLock] = {} self.sr_uuid = str(uuid4()) diff --git a/tests/test_fairlock.py b/tests/test_fairlock.py index 3078c0d73..e5a4e298d 100644 --- a/tests/test_fairlock.py +++ b/tests/test_fairlock.py @@ -1,3 +1,5 @@ +from sm_typing import override + import unittest import unittest.mock as mock @@ -5,7 +7,8 @@ from fairlock import Fairlock, FairlockServiceTimeout, FairlockDeadlock class TestFairlock(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: sock_patcher = mock.patch('fairlock.socket', autospec=True) self.mock_socket = sock_patcher.start() os_patcher = mock.patch('fairlock.os', autospec=True) diff --git a/tests/test_fjournaler.py b/tests/test_fjournaler.py index 115aaee9b..678358752 100644 --- a/tests/test_fjournaler.py +++ b/tests/test_fjournaler.py @@ -1,3 +1,5 @@ +from sm_typing import Any, Dict, override + from io import BytesIO as StringIO import os import unittest @@ -32,9 +34,9 @@ def readline(self): class TestFjournaler(unittest.TestCase): - - def setUp(self): - self.files = {} + @override + def setUp(self) -> None: + self.files: Dict[str, Any] = {} self.open_handlers = {TEST_DIR_PATH: self.__fake_open} self.subject = fjournaler.Journaler(TEST_DIR_PATH) diff --git a/tests/test_keymanagerutil.py b/tests/test_keymanagerutil.py index d97bb172b..70edbe5db 100644 --- a/tests/test_keymanagerutil.py +++ b/tests/test_keymanagerutil.py @@ -1,6 +1,9 @@ """ Test the "fake" keymanager for testing VHD encryption """ + +from sm_typing import Dict, override + import base64 import copy import io @@ -15,8 +18,8 @@ class TestKeymanagerutil(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) log_patcher = mock.patch('plugins.keymanagerutil.util.SMlog', autospec=True) @@ -27,7 +30,7 @@ def setUp(self): self.mock_open = open_patcher.start() self.mock_open.side_effect = self.open - self.files = {} + self.files: Dict[str, io.StringIO] = {} isfile_patcher = mock.patch( 'plugins.keymanagerutil.os.path.isfile', autospec=True) diff --git a/tests/test_lock.py b/tests/test_lock.py index f90d3bb6b..bee447a04 100644 --- a/tests/test_lock.py +++ b/tests/test_lock.py @@ -1,3 +1,5 @@ +from sm_typing import override + import fcntl import unittest import unittest.mock as mock @@ -12,8 +14,8 @@ class TestLock(unittest.TestCase): - - def tearDown(self): + @override + def tearDown(self) -> None: lock.Lock.INSTANCES = {} lock.Lock.BASE_INSTANCES = {} @@ -197,7 +199,8 @@ def create_lock_class_that_fails_to_create_file(number_of_failures): class LockThatFailsToCreateFile(lock.LockImplementation): _failures = number_of_failures - def _open_lockfile(self): + @override + def _open_lockfile(self) -> None: if self._failures > 0: error = IOError('No such file') error.errno = errno.ENOENT diff --git a/tests/test_lock_queue.py b/tests/test_lock_queue.py index edd733dcb..e9622cff2 100644 --- a/tests/test_lock_queue.py +++ b/tests/test_lock_queue.py @@ -1,3 +1,5 @@ +from sm_typing import override + import builtins import copy import os @@ -22,7 +24,8 @@ def mock_pickle_load_fn(*args): class Test_LockQueue(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: # Re-initialize queue to empty for each test global saved_queue saved_queue = [] diff --git a/tests/test_lvutil.py b/tests/test_lvutil.py index 2df8300b8..439c1fbee 100644 --- a/tests/test_lvutil.py +++ b/tests/test_lvutil.py @@ -1,3 +1,5 @@ +from sm_typing import override + import unittest.mock as mock import os import syslog @@ -25,7 +27,8 @@ def decorated(self, context, *args, **kwargs): class TestCreate(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: lock_patcher = mock.patch('lvutil.Fairlock', autospec=True) self.addCleanup(lock_patcher.stop) self.mock_lock = lock_patcher.start() @@ -98,7 +101,8 @@ def test_create_percentage_has_precedence_over_size(self, mock_pread): class TestRemove(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: lock_patcher = mock.patch('lvutil.Fairlock', autospec=True) self.addCleanup(lock_patcher.stop) self.mock_lock = lock_patcher.start() @@ -123,8 +127,8 @@ def test_remove_additional_config_param(self, mock_pread, _bugCleanup): class TestDeactivate(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: lock_patcher = mock.patch('lvutil.Fairlock', autospec=True) pathexists_patcher = mock.patch('lvutil.util.pathexists', autospec=True) lexists_patcher = mock.patch('lvutil.os.path.lexists', autospec=True) @@ -208,7 +212,8 @@ def test_deactivate_noref_withbugcleanup_retry_fail( class TestActivate(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) lock_patcher = mock.patch('lvutil.Fairlock', autospec=True) diff --git a/tests/test_mpath_dmp.py b/tests/test_mpath_dmp.py index 134b40e0c..312b55dad 100644 --- a/tests/test_mpath_dmp.py +++ b/tests/test_mpath_dmp.py @@ -1,6 +1,9 @@ """ Unit tests for mpath dmp """ + +from sm_typing import override + import errno import os import unittest @@ -22,7 +25,8 @@ class TestMpathDmp(unittest.TestCase): Unit tests for mpath dmp """ - def setUp(self): + @override + def setUp(self) -> None: time_patcher = mock.patch('mpath_dmp.time', autospec=True) self.mock_time = time_patcher.start() diff --git a/tests/test_mpathcount.py b/tests/test_mpathcount.py index c02478057..922e1f204 100644 --- a/tests/test_mpathcount.py +++ b/tests/test_mpathcount.py @@ -211,27 +211,37 @@ def test_exit_log_out_error(self, mock_exit): session.xenapi.session.logout.assert_called_once() @mock.patch('mpathcount.sys.exit', autospec=True) - def test_check_xapi_enabled_yes(self, mock_exit): + @mock.patch('mpathcount.util.SMlog', autospec=True) + @mock.patch('mpathcount.subprocess.Popen', autospec=True) + def test_check_xapi_enabled_yes(self, mock_popen, mock_smlog, mock_exit): # Arrange - session = mock.MagicMock() - session.xenapi.host.get_record.return_value = {'enabled': True} - hostref = mock.MagicMock() + process_mock = mock.Mock() + attrs = {'communicate.return_value': ('output', ''), 'returncode': 0} + process_mock.configure_mock(**attrs) + mock_popen.return_value = process_mock # Act - mpathcount.check_xapi_is_enabled(session, hostref) + result = mpathcount.check_xapi_is_enabled() # Assert + self.assertTrue(result) mock_exit.assert_not_called() + mock_smlog.assert_not_called() @mock.patch('mpathcount.sys.exit', autospec=True) - def test_check_xapi_enabled_no(self, mock_exit): + @mock.patch('mpathcount.util.SMlog', autospec=True) + @mock.patch('mpathcount.subprocess.Popen', autospec=True) + def test_check_xapi_enabled_no(self, mock_popen, mock_smlog, mock_exit): # Arrange - session = mock.MagicMock() - session.xenapi.host.get_record.return_value = {'enabled': False} - hostref = mock.MagicMock() + process_mock = mock.Mock() + attrs = {'communicate.return_value': ('', 'error'), 'returncode': 1} + process_mock.configure_mock(**attrs) + mock_popen.return_value = process_mock # Act - mpathcount.check_xapi_is_enabled(session, hostref) + result = mpathcount.check_xapi_is_enabled() # Assert - mock_exit.assert_called_once_with(0) + self.assertFalse(result) + mock_exit.assert_not_called() + mock_smlog.assert_called_once_with('XAPI health check failed: error') diff --git a/tests/test_on_slave.py b/tests/test_on_slave.py index 90347477d..b55e24aae 100644 --- a/tests/test_on_slave.py +++ b/tests/test_on_slave.py @@ -1,3 +1,5 @@ +from sm_typing import override + import errno import unittest import unittest.mock as mock @@ -13,13 +15,22 @@ class Test_on_slave_is_open(unittest.TestCase): - MOCK_IMPORTS = ['SRCommand', 'SR', 'NFSSR', 'EXTSR', 'LVHDSR', 'blktap2'] + MOCK_IMPORTS = [ + 'SRCommand', + 'SR', + 'NFSSR', + 'EXTSR', + 'LVHDSR', + 'LinstorSR', + 'blktap2' + ] def fake_import(self, *args, **kwargs): print('Asked to import {}'.format(args[0])) return self.mocks.get(args[0], self.real_import(*args, **kwargs)) - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) self.mocks = {x: mock.MagicMock() for x in self.MOCK_IMPORTS} @@ -30,6 +41,7 @@ def setUp(self): self.mock_import.side_effect = self.fake_import self.mock_sr = mock.MagicMock() + self.mock_sr.MOUNT_BASE = "/tmp/sr_mount" self.mocks['SR'] = self.mock_sr self.mock_blktap2 = mock.MagicMock() self.mocks['blktap2'] = self.mock_blktap2 @@ -123,7 +135,8 @@ class Test_on_slave_refresh_lun(unittest.TestCase): Tests for refresh_lun_size_by_SCSIid """ - def setUp(self): + @override + def setUp(self) -> None: self.mock_session = mock.MagicMock() @mock.patch('on_slave.scsiutil') @@ -155,7 +168,8 @@ class Test_on_slave_multi(unittest.TestCase): TMP_RENAME_PREFIX = "TEST_OLD_" - def setUp(self): + @override + def setUp(self) -> None: self.session = mock.MagicMock() lvmcache_patcher = mock.patch('on_slave.LVMCache', autospec=True) diff --git a/tests/test_sr_health_check.py b/tests/test_sr_health_check.py index 47c592123..bc879f96e 100644 --- a/tests/test_sr_health_check.py +++ b/tests/test_sr_health_check.py @@ -1,3 +1,5 @@ +from sm_typing import override + import unittest import unittest.mock as mock @@ -10,8 +12,8 @@ class TestSrHealthCheck(unittest.TestCase): - - def setUp(self): + @override + def setUp(self) -> None: util_patcher = mock.patch('sr_health_check.util') self.mock_util = util_patcher.start() self.mock_session = mock.MagicMock() diff --git a/tests/test_srmetadata.py b/tests/test_srmetadata.py index 720f12ff4..fbce62444 100644 --- a/tests/test_srmetadata.py +++ b/tests/test_srmetadata.py @@ -1,3 +1,5 @@ +from sm_typing import Generator, override + import io import random import string @@ -453,14 +455,17 @@ def __init__(self): super().__init__() self._metadata_file_content = b'\x00' * 4 * 1024 * 1024 - def start(self): + @override + def start(self) -> None: super().start() self.patch("util.gen_uuid", new=genuuid) - def generate_device_paths(self): + @override + def generate_device_paths(self) -> Generator[str, None, None]: yield self.METADATA_PATH - def fake_open(self, fname, mode='r'): + @override + def fake_open(self, fname, mode='r') -> io.TextIOBase: if fname != self.METADATA_PATH: # pragma: no cover return super().fake_open(fname, mode) else: diff --git a/tests/test_storage_init.py b/tests/test_storage_init.py index 9b7138a4b..38e465708 100644 --- a/tests/test_storage_init.py +++ b/tests/test_storage_init.py @@ -1,3 +1,5 @@ +from sm_typing import DefaultDict, Dict, List, override + import json import os import re @@ -31,7 +33,8 @@ class TestStorageInit(unittest.TestCase): storage after installation. """ - def setUp(self): + @override + def setUp(self) -> None: self.test_dir = tempfile.TemporaryDirectory() # There are tweaks we need to make the to storage-init: @@ -103,11 +106,12 @@ def setUp(self): self.copy_command("touch") self.script_exited = False - self.created_srs = defaultdict(list) - self.misc_xe_calls = [] - self.unanticipated_xe_calls = [] + self.created_srs: DefaultDict[str, List[Dict[str, str]]] = defaultdict(list) + self.misc_xe_calls: List[List[str]] = [] + self.unanticipated_xe_calls: List[List[str]] = [] - def tearDown(self): + @override + def tearDown(self) -> None: self.socket.close() self.test_dir.cleanup() @@ -355,7 +359,7 @@ def _xe_command(self, args): # pragma: no cover combined_args = " ".join(sorted(args[1:])) if subcmd == "sm-list": - m = re.match("--minimal params=uuid type=(\S+)$", combined_args) + m = re.match(r"--minimal params=uuid type=(\S+)$", combined_args) if m: sm_uuid = "uuid-for-sr-type-" + m.group(1) return CmdResult(stdout=f"{sm_uuid}\n") @@ -365,7 +369,7 @@ def _xe_command(self, args): # pragma: no cover if not self.created_srs: return CmdResult() - m = re.match("--minimal params=uuid type=(\S+)$", combined_args) + m = re.match(r"--minimal params=uuid type=(\S+)$", combined_args) if m: sr_type = m.group(1) num_srs = len(self.created_srs[sr_type]) diff --git a/tests/test_util.py b/tests/test_util.py index 965eec150..26912f1f6 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,3 +1,5 @@ +from sm_typing import Any, Dict, List, Set, override + import copy import errno import io @@ -43,7 +45,8 @@ class TestUtil(unittest.TestCase): Tests for the util module methods """ - def setUp(self): + @override + def setUp(self) -> None: # OS Patchers statvfs_patcher = mock.patch("util.os.statvfs", autospec=True) self.mock_statvfs = statvfs_patcher.start() @@ -53,7 +56,7 @@ def setUp(self): self.mock_mkdir = mkdir_patcher.start() unlink_patcher = mock.patch('util.os.unlink', autospec=True) self.mock_unlink = unlink_patcher.start() - self.dir_contents = {} + self.dir_contents: Dict[str, List[str]] = {} listdir_patcher = mock.patch('util.os.listdir', autospec=True) self.mock_listdir = listdir_patcher.start() self.mock_listdir.side_effect = self.list_dir @@ -77,12 +80,12 @@ def setUp(self): self.mock_session = mock.MagicMock() self.mock_xenapi.xapi_local.return_value = self.mock_session - self.processes = {} + self.processes: Dict[str, Any] = {} popen_patcher = mock.patch('util.subprocess.Popen', autospec=True) self.mock_popen = popen_patcher.start() self.mock_popen.side_effect = self.popen - self.mock_files = {} + self.mock_files: Dict[str, Any] = {} self.addCleanup(mock.patch.stopall) @@ -714,7 +717,8 @@ def test_unictrunc(self): class TestFistPoints(unittest.TestCase): - def setUp(self): + @override + def setUp(self) -> None: self.addCleanup(mock.patch.stopall) sleep_patcher = mock.patch('util.time.sleep', autospec=True) self.mock_sleep = sleep_patcher.start() @@ -725,7 +729,7 @@ def setUp(self): exists_patcher = mock.patch('util.os.path.exists', autospec=True) self.mock_exists = exists_patcher.start() self.mock_exists.side_effect = self.exists - self.existing_files = set() + self.existing_files: Set[str] = set() xenapi_patcher = mock.patch('util.XenAPI', autospec=True) patched_xenapi = xenapi_patcher.start() diff --git a/tests/testlib.py b/tests/testlib.py index 167bb172f..066188f4c 100644 --- a/tests/testlib.py +++ b/tests/testlib.py @@ -1,3 +1,5 @@ +from sm_typing import Any, Generator, override + import re import unittest.mock as mock import os @@ -49,12 +51,13 @@ def add_disk(self): def add_parameter(self, host_class, values): self.parameters.append((host_class, values)) - def adapter_device_paths(self, host_id): + def adapter_device_paths(self, host_id) -> Generator[str, None, None]: yield '/sys/class/scsi_host/host%s' % host_id class AdapterWithNonBlockDevice(SCSIAdapter): - def adapter_device_paths(self, host_id): + @override + def adapter_device_paths(self, host_id) -> Generator[str, None, None]: for adapter_device_path in super(AdapterWithNonBlockDevice, self).adapter_device_paths(host_id): yield adapter_device_path @@ -115,7 +118,7 @@ def patch(self, *args, **kwargs): self.patchers.append(patcher) patcher.start() - def start(self): + def start(self) -> None: self.patch('builtins.open', new=self.fake_open) self.patch('fcntl.fcntl', new=self.fake_fcntl) self.patch('os.path.exists', new=self.fake_exists) @@ -190,7 +193,7 @@ def fake_uname(self): 'x86_64' ) - def fake_open(self, fname, mode='r'): + def fake_open(self, fname, mode='r') -> Any: if fname == '/etc/xensource-inventory': return io.StringIO(self.generate_inventory_contents()) @@ -264,7 +267,7 @@ def generate_path_content(self): for path, value in self._path_content.items(): yield (path, value) - def generate_device_paths(self): + def generate_device_paths(self) -> Generator[str, None, None]: actual_disk_letter = 'a' for host_id, adapter in enumerate(self.scsi_adapters): for adapter_device_path in adapter.adapter_device_paths(host_id):