Skip to content

Commit e61f2a5

Browse files
committed
in progress
Signed-off-by: Ronan Abhamon <ronan.abhamon@vates.tech>
1 parent d7cd0b1 commit e61f2a5

File tree

9 files changed

+272
-88
lines changed

9 files changed

+272
-88
lines changed

drivers/LinstorSR.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -559,23 +559,19 @@ def create(self, uuid, size) -> None:
559559
opterr='Redundancy greater than host count'
560560
)
561561

562-
xenapi = self.session.xenapi
563-
srs = xenapi.SR.get_all_records_where(
564-
'field "type" = "{}"'.format(self.DRIVER_TYPE)
565-
)
566-
srs = dict([e for e in srs.items() if e[1]['uuid'] != self.uuid])
562+
srs = util.get_linstor_srs(self.session)
563+
try:
564+
srs.pop(self.uuid)
565+
except KeyError:
566+
# We cannot guarantee that the new SR key will be there even it should be the case.
567+
pass
567568

568-
for sr in srs.values():
569-
for pbd in sr['PBDs']:
570-
device_config = xenapi.PBD.get_device_config(pbd)
571-
group_name = device_config.get('group-name')
572-
if group_name and group_name == self._group_name:
573-
raise xs_errors.XenError(
574-
'LinstorSRCreate',
575-
opterr='group name must be unique, already used by PBD {}'.format(
576-
xenapi.PBD.get_uuid(pbd)
577-
)
578-
)
569+
pbd_uuid = util.find_pbd_uuid_from_dconf_value(self.session, srs, "group-name", self._group_name)
570+
if pbd_uuid:
571+
raise xs_errors.XenError(
572+
'LinstorSRCreate',
573+
opterr=f"group name must be unique, already used by PBD {pbd_uuid}"
574+
)
579575

580576
if srs:
581577
raise xs_errors.XenError(
@@ -1838,7 +1834,8 @@ def attach(self, sr_uuid, vdi_uuid) -> str:
18381834
return self._attach_using_http_nbd()
18391835

18401836
# Ensure we have a path...
1841-
self.sr._vhdutil.create_chain_paths(self.uuid, readonly=not writable)
1837+
chain = self.sr._vhdutil.create_chain_paths(self.uuid, not writable, cleanup.LinstorSR.abort_gc_from_openers_vdi)
1838+
chain.close()
18421839

18431840
self.attached = True
18441841
return VDI.VDI.attach(self, self.sr.uuid, self.uuid)
@@ -2375,7 +2372,8 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None):
23752372
raise xs_errors.XenError('SnapshotChainTooLong')
23762373

23772374
# Ensure we have a valid path if we don't have a local diskful.
2378-
self.sr._vhdutil.create_chain_paths(self.uuid, readonly=True)
2375+
chain = self.sr._vhdutil.create_chain_paths(self.uuid, True, cleanup.LinstorSR.abort_gc_from_openers_vdi)
2376+
chain.close()
23792377

23802378
volume_path = self.path
23812379
if not util.pathexists(volume_path):

drivers/VDI.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -123,14 +123,8 @@ def __init__(self, sr, uuid):
123123

124124
@staticmethod
125125
def from_uuid(session, vdi_uuid):
126-
127-
_VDI = session.xenapi.VDI
128-
vdi_ref = _VDI.get_by_uuid(vdi_uuid)
129-
sr_ref = _VDI.get_SR(vdi_ref)
130-
131-
_SR = session.xenapi.SR
132-
sr_uuid = _SR.get_uuid(sr_ref)
133-
126+
vdi_ref = session.xenapi.VDI.get_by_uuid(vdi_uuid)
127+
sr_uuid = util.get_sr_from_vdi_ref(session, vdi_ref)
134128
sr = SR.SR.from_uuid(session, sr_uuid)
135129

136130
sr.srcmd.params['vdi_ref'] = vdi_ref

drivers/blktap2.py

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
from socket import socket, AF_UNIX, SOCK_STREAM
5656

5757
try:
58-
from linstorvolumemanager import log_drbd_openers
58+
from linstorvolumemanager import get_controller_uri, get_all_volume_openers, LinstorVolumeManager
5959
LINSTOR_AVAILABLE = True
6060
except ImportError:
6161
LINSTOR_AVAILABLE = False
@@ -424,6 +424,8 @@ def unpause(cls, pid, minor, _type=None, _file=None, mirror=None,
424424
args += ["-a", str(params)]
425425
if cbtlog:
426426
args.extend(["-c", cbtlog])
427+
428+
# TODO: Handle issue.
427429
cls._pread(args)
428430

429431
@classmethod
@@ -820,6 +822,38 @@ def cgclassify(pid):
820822
except util.CommandException as e:
821823
util.logException(e)
822824

825+
@staticmethod
826+
def abort_linstor_gc(drbd_path: str) -> bool:
827+
if not LINSTOR_AVAILABLE or not drbd_path.startswith("/dev/drbd/by-res/xcp-volume-"):
828+
return False
829+
830+
_, volume_name, _ = drbd_path.rsplit("/", 2)
831+
group_name = LinstorVolumeManager.get_volume_group_name(volume_name)
832+
833+
openers = get_all_volume_openers(volume_name, "0")
834+
835+
session = XenAPI.xapi_local()
836+
session.xenapi.login_with_password("root", "", "", "SM")
837+
try:
838+
srs = util.get_linstor_srs(session)
839+
pbd_uuid = util.find_pbd_uuid_from_dconf_value(session, srs, "group-name", group_name)
840+
841+
pbd_ref = session.xenapi.PBD.get_by_uuid(pbd_uuid)
842+
pbd_rec = session.xenapi.PBD.get_record(pbd_ref)
843+
844+
sr_ref = pbd_rec["SR"]
845+
sr_uuid = session.xenapi.SR.get_uuid(sr_ref)
846+
847+
import cleanup # pylint: disable=C0415
848+
if cleanup.LinstorSR.abort_gc_from_openers_sr(sr_uuid, openers):
849+
return True
850+
851+
util.SMlog(f"Unable to run tapdisk, openers of DRBD resource `{drbd_path}`: {openers}")
852+
finally:
853+
session.xenapi.session.logout()
854+
855+
return False
856+
823857
@classmethod
824858
def launch_on_tap(cls, blktap, path, _type, options):
825859

@@ -844,13 +878,13 @@ def launch_on_tap(cls, blktap, path, _type, options):
844878
err = (
845879
'status' in e.info and e.info['status']
846880
) or None
847-
if err in (errno.EIO, errno.EROFS, errno.EAGAIN):
848-
if retry_open < 5:
849-
retry_open += 1
850-
time.sleep(1)
851-
continue
852-
if LINSTOR_AVAILABLE and err == errno.EROFS:
853-
log_drbd_openers(path)
881+
if err in (errno.EROFS, errno.EMEDIUMTYPE) and cls.abort_linstor_gc(path):
882+
continue
883+
884+
if err in (errno.EIO, errno.EAGAIN, errno.EROFS, errno.EMEDIUMTYPE) and retry_open < 5:
885+
retry_open += 1
886+
time.sleep(1)
887+
continue
854888
raise
855889
try:
856890
tapdisk = cls.__from_blktap(blktap)

drivers/cleanup.py

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# Script to coalesce and garbage collect VHD-based SR's in the background
1919
#
2020

21-
from sm_typing import Optional, override
21+
from sm_typing import Dict, Optional, override
2222

2323
import os
2424
import os.path
@@ -56,8 +56,7 @@
5656
from linstorjournaler import LinstorJournaler
5757
from linstorvhdutil import LinstorVhdUtil
5858
from linstorvolumemanager import get_controller_uri
59-
from linstorvolumemanager import LinstorVolumeManager
60-
from linstorvolumemanager import LinstorVolumeManagerError
59+
from linstorvolumemanager import LinstorVolumeManager, LinstorVolumeManagerError, LinstorVolumeOpeners
6160
from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX
6261

6362
LINSTOR_AVAILABLE = True
@@ -3656,17 +3655,55 @@ def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
36563655

36573656
def _checkSlaves(self, vdi):
36583657
try:
3659-
all_openers = self._linstor.get_volume_openers(vdi.uuid)
3660-
for openers in all_openers.values():
3661-
for opener in openers.values():
3658+
openers = self._linstor.get_volume_openers(vdi.uuid)
3659+
for host_openers in openers.values():
3660+
for opener in host_openers.values():
36623661
if opener['process-name'] != 'tapdisk':
36633662
raise util.SMException(
3664-
'VDI {} is in use: {}'.format(vdi.uuid, all_openers)
3663+
'VDI {} is in use: {}'.format(vdi.uuid, openers)
36653664
)
36663665
except LinstorVolumeManagerError as e:
36673666
if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS:
36683667
raise
36693668

3669+
@classmethod
3670+
def abort_gc_from_openers_vdi(cls, vdi_uuid: str, openers: 'LinstorVolumeOpeners') -> bool:
3671+
return cls._abort_gc_from_openers(vdi_uuid, True, openers)
3672+
3673+
@classmethod
3674+
def abort_gc_from_openers_sr(cls, sr_uuid: str, openers: 'LinstorVolumeOpeners') -> bool:
3675+
return cls._abort_gc_from_openers(sr_uuid, False, openers)
3676+
3677+
@staticmethod
3678+
def _abort_gc_from_openers(uuid: str, is_vdi_uuid: bool, openers: 'LinstorVolumeOpeners') -> bool:
3679+
from linstorvhdutil import MANAGER_PLUGIN
3680+
3681+
node_name = None
3682+
3683+
for host_openers in openers.values():
3684+
for hostname, opener in host_openers.items():
3685+
if opener['process-name'] != 'vhd-util' or 'coalesce' not in opener['cmdline']:
3686+
continue
3687+
3688+
if not node_name:
3689+
import socket
3690+
node_name = socket.gethostname()
3691+
3692+
if node_name == hostname:
3693+
continue
3694+
3695+
session = XAPI.getSession()
3696+
try:
3697+
sr_uuid = util.get_sr_from_vdi_uuid(session, uuid) if is_vdi_uuid else uuid
3698+
util.SMlog(f"LINSTOR volume is coalescing on `{sr_uuid}`. We're going to interrupt the GC...")
3699+
return bool(session.xenapi.host.call_plugin(
3700+
util.get_master_ref(session), MANAGER_PLUGIN, "abortGc", {"srUuid": sr_uuid}
3701+
))
3702+
finally:
3703+
session.xenapi.session.logout()
3704+
return False
3705+
3706+
36703707

36713708
################################################################################
36723709
#

drivers/linstor-manager

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,19 @@ def destroy(session, args):
385385
return str(False)
386386

387387

388+
def abort_gc(session, args):
389+
try:
390+
sr_uuid = args['srUuid']
391+
392+
import cleanup
393+
cleanup.abort(sr_uuid)
394+
395+
return str(True)
396+
except Exception as e:
397+
util.SMlog('linstor-manager:abort_gc error: {}'.format(e))
398+
return str(False)
399+
400+
388401
def check(session, args):
389402
try:
390403
device_path = args['devicePath']
@@ -1231,6 +1244,7 @@ if __name__ == '__main__':
12311244
'attach': attach,
12321245
'detach': detach,
12331246
'destroy': destroy,
1247+
'abortGc': abort_gc,
12341248

12351249
# vhdutil wrappers called by linstorvhdutil.
12361250
# Note: When a VHD is open in RO mode (so for all vhdutil getters),

drivers/linstorvhdutil.py

Lines changed: 63 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
# You should have received a copy of the GNU General Public License
1515
# along with this program. If not, see <https://www.gnu.org/licenses/>.
1616

17-
from sm_typing import override
17+
from sm_typing import Any, Callable, Dict, IO, List, Optional, override
1818

1919
from linstorjournaler import LinstorJournaler
20-
from linstorvolumemanager import LinstorVolumeManager
20+
from linstorvolumemanager import LinstorVolumeManager, LinstorVolumeOpeners
2121
import base64
2222
import errno
2323
import json
@@ -165,47 +165,74 @@ def wrapper(*args, **kwargs):
165165
class LinstorVhdUtil:
166166
MAX_SIZE = 2 * 1024 * 1024 * 1024 * 1024 # Max VHD size.
167167

168+
class Chain(object):
169+
def __init__(self, files: List[IO], leaf_path: str):
170+
self._files = files
171+
self._leaf_path = leaf_path
172+
173+
@property
174+
def leaf_path(self) -> str:
175+
return self._leaf_path
176+
177+
def close(self) -> None:
178+
for file in self._files:
179+
try:
180+
file.close()
181+
except Exception: # pylint: disable = W0718
182+
pass
183+
168184
def __init__(self, session, linstor):
169185
self._session = session
170186
self._linstor = linstor
171187

172-
def create_chain_paths(self, vdi_uuid, readonly=False):
188+
def create_chain_paths(
189+
self,
190+
vdi_uuid: str,
191+
readonly=False,
192+
cb_openers: Optional[Callable[[str, LinstorVolumeOpeners], Any]] = None
193+
) -> Chain:
173194
# OPTIMIZE: Add a limit_to_first_allocated_block param to limit vhdutil calls.
174195
# Useful for the snapshot code algorithm.
175196

176-
leaf_vdi_path = self._linstor.get_device_path(vdi_uuid)
177-
path = leaf_vdi_path
178-
while True:
179-
if not util.pathexists(path):
180-
raise xs_errors.XenError(
181-
'VDIUnavailable', opterr='Could not find: {}'.format(path)
182-
)
197+
files = []
183198

184-
# Diskless path can be created on the fly, ensure we can open it.
185-
def check_volume_usable():
186-
while True:
187-
try:
188-
with open(path, 'r' if readonly else 'r+'):
189-
pass
190-
except IOError as e:
191-
if e.errno == errno.ENODATA:
192-
time.sleep(2)
193-
continue
194-
if e.errno == errno.EROFS or e.errno == errno.EMEDIUMTYPE:
195-
util.SMlog('Volume not attachable because used. Openers: {}'.format(
196-
self._linstor.get_volume_openers(vdi_uuid)
197-
))
198-
raise
199+
leaf_path = self._linstor.get_device_path(vdi_uuid)
200+
path = leaf_path
201+
try:
202+
while True:
203+
if not util.pathexists(path):
204+
raise xs_errors.XenError(
205+
'VDIUnavailable', opterr='Could not find: {}'.format(path)
206+
)
207+
208+
# Diskless path can be created on the fly, ensure we can open it.
209+
def check_volume_usable():
210+
while True:
211+
try:
212+
files.append(open(path, 'r' if readonly else 'r+'))
213+
except OSError as e:
214+
if e.errno == errno.ENODATA:
215+
time.sleep(2)
216+
continue
217+
if e.errno in (errno.EROFS, errno.EMEDIUMTYPE):
218+
openers = self._linstor.get_volume_openers(vdi_uuid)
219+
util.SMlog(f'Volume not attachable because used. Openers: {openers}')
220+
if cb_openers:
221+
cb_openers(vdi_uuid, openers)
222+
raise
223+
break
224+
util.retry(check_volume_usable, 15, 2)
225+
226+
vdi_uuid = self.get_vhd_info(vdi_uuid).parentUuid
227+
if not vdi_uuid:
199228
break
200-
util.retry(check_volume_usable, 15, 2)
201-
202-
vdi_uuid = self.get_vhd_info(vdi_uuid).parentUuid
203-
if not vdi_uuid:
204-
break
205-
path = self._linstor.get_device_path(vdi_uuid)
206-
readonly = True # Non-leaf is always readonly.
229+
path = self._linstor.get_device_path(vdi_uuid)
230+
readonly = True # Non-leaf is always readonly.
231+
except Exception as e:
232+
self.Chain(files, leaf_path).close()
233+
raise e
207234

208-
return leaf_vdi_path
235+
return self.Chain(files, leaf_path)
209236

210237
# --------------------------------------------------------------------------
211238
# Getters: read locally and try on another host in case of failure.
@@ -554,7 +581,7 @@ def _call_method(self, local_method, remote_method, device_path, use_parent, *ar
554581
# B.3. Call!
555582
def remote_call():
556583
try:
557-
all_openers = self._linstor.get_volume_openers(openers_uuid)
584+
openers = self._linstor.get_volume_openers(openers_uuid)
558585
except Exception as e:
559586
raise xs_errors.XenError(
560587
'VDIUnavailable',
@@ -563,8 +590,8 @@ def remote_call():
563590
)
564591

565592
no_host_found = True
566-
for hostname, openers in all_openers.items():
567-
if not openers:
593+
for hostname, host_openers in openers.items():
594+
if not host_openers:
568595
continue
569596

570597
host_ref = self._find_host_ref_from_hostname(hosts, hostname)

0 commit comments

Comments
 (0)