Skip to content

Commit 1169b99

Browse files
committed
feat(linstor): add LinstorManager class
Signed-off-by: Ronan Abhamon <ronan.abhamon@vates.tech>
1 parent d7cd0b1 commit 1169b99

File tree

1 file changed

+268
-0
lines changed

1 file changed

+268
-0
lines changed

scripts/linstor-repair

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
#!/usr/bin/env python3
2+
#
3+
# Copyright (C) 2026 Vates SAS
4+
#
5+
# This program is free software: you can redistribute it and/or modify
6+
# it under the terms of the GNU General Public License as published by
7+
# the Free Software Foundation, either version 3 of the License, or
8+
# (at your option) any later version.
9+
# This program is distributed in the hope that it will be useful,
10+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
# GNU General Public License for more details.
13+
#
14+
# You should have received a copy of the GNU General Public License
15+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
16+
17+
from typing import Any, Callable, Dict, List, Optional, TypeVar
18+
19+
# TODO: Remove
20+
import sys
21+
sys.path[0] = '/opt/xensource/sm/'
22+
23+
import argparse
24+
import linstor
25+
import time
26+
27+
from collections import defaultdict
28+
29+
# TODO: Remove
30+
from linstorvolumemanager import _get_controller_uri
31+
32+
T = TypeVar('T')
33+
34+
# ==============================================================================
35+
36+
def wait_for_condition(function: Callable[[], T], timeout: int, interval: int) -> T:
37+
if timeout <= 0:
38+
return function()
39+
40+
start_time = time.time()
41+
while True:
42+
result = function()
43+
if result:
44+
return result
45+
if time.time() - start_time >= timeout:
46+
return None
47+
time.sleep(interval)
48+
49+
def logger(message: str) -> None:
50+
print(message)
51+
52+
# ==============================================================================
53+
54+
class LinstorManagerError(Exception):
55+
ERR_GENERIC = 0
56+
ERR_NETWORK = 1
57+
ERR_CONTROLLER_NOT_FOUND = 2
58+
ERR_RESOURCE_DESTROY = 3
59+
ERR_RESOURCE_TOGGLE = 4
60+
ERR_RESOURCE_PROP_UPDATE = 5
61+
62+
def __init__(self, message: str, code=ERR_GENERIC) -> None:
63+
super().__init__(message)
64+
self._code = code
65+
66+
@property
67+
def code(self) -> int:
68+
return self._code
69+
70+
# ------------------------------------------------------------------------------
71+
72+
class LinstorManagerBase(object):
73+
def __init__(self, uri = None) -> None:
74+
self._linstor = None
75+
self._uri = uri
76+
77+
def connect(self) -> None:
78+
if self._linstor:
79+
return
80+
81+
uri = None
82+
network_issue = False
83+
def connect_impl():
84+
nonlocal uri
85+
nonlocal network_issue
86+
87+
if self._uri:
88+
uri = self._uri
89+
else:
90+
uri = self.find_controller_uri(timeout=0)
91+
if not uri:
92+
return False
93+
94+
network_issue = False
95+
try:
96+
instance = linstor.Linstor(uri, timeout=5, keep_alive=True)
97+
instance.connect()
98+
self._linstor = instance
99+
self._uri = uri
100+
return True
101+
except (linstor.errors.LinstorNetworkError, linstor.errors.LinstorTimeoutError):
102+
network_issue = True
103+
except Exception as e:
104+
logger(f"Unable to connect to LINSTOR: `{e}`.")
105+
return False
106+
107+
if wait_for_condition(connect_impl, timeout=120, interval=1):
108+
return
109+
110+
self._uri = None
111+
if not uri:
112+
raise LinstorManagerError(
113+
"Unable to find controller uri...",
114+
LinstorManagerError.ERR_CONTROLLER_NOT_FOUND
115+
)
116+
117+
raise LinstorManagerError(
118+
f"Unable to connect to LINSTOR with URI: `{uri}`.",
119+
LinstorManagerError.ERR_NETWORK if network_issue else LinstorManagerError.ERR_GENERIC
120+
)
121+
122+
@staticmethod
123+
def find_controller_uri(timeout: int = 30) -> Optional[str]:
124+
return wait_for_condition(_get_controller_uri, timeout, interval=1)
125+
126+
def _exec_query(self, query: Callable[[linstor.Linstor, T], T], *args, **kwargs) -> T:
127+
while True:
128+
self.connect()
129+
try:
130+
return query(self._linstor, *args, **kwargs)
131+
except (linstor.errors.LinstorNetworkError, linstor.errors.LinstorTimeoutError):
132+
self._linstor = None
133+
except Exception as e:
134+
raise LinstorManagerError(f"LINSTOR query exception: `{e}`.") from e
135+
136+
@staticmethod
137+
def _filter_errors(result: List[Any]) -> List[linstor.responses.ApiCallResponse]:
138+
return [
139+
err for err in result
140+
if hasattr(err, 'is_error') and err.is_error()
141+
]
142+
143+
@classmethod
144+
def _get_error_str(cls, result: List[Any]) -> str:
145+
return ', '.join([
146+
err.message for err in cls._filter_errors(result)
147+
])
148+
149+
# ------------------------------------------------------------------------------
150+
151+
class LinstorManager(LinstorManagerBase):
152+
def resource_delete(self, resource_name: str, node_name: str) -> None:
153+
result = self._exec_query(
154+
linstor.Linstor.resource_delete,
155+
node_name,
156+
resource_name
157+
)
158+
errors = self._filter_errors(result)
159+
if errors:
160+
error_str = self._get_error_str(errors)
161+
raise LinstorManagerError(
162+
f"Failed to destroy resource `{resource_name}` on node `{node_name}`: `{error_str}`.",
163+
LinstorManagerError.ERR_RESOURCE_DESTROY
164+
)
165+
166+
def resource_toggle(self, resource_name: str, node_name: str, diskless: bool) -> None:
167+
result = self._exec_query(
168+
linstor.Linstor.resource_toggle_disk,
169+
node_name,
170+
resource_name,
171+
diskless=diskless
172+
)
173+
errors = self._filter_errors(result)
174+
if errors:
175+
state = "Diskless" if diskless else "Diskful"
176+
error_str = self._get_error_str(errors)
177+
raise LinstorManagerError(
178+
f"Could not toggle resource `{resource_name}` on node `{node_name}` to {state}: `{error_str}`.",
179+
LinstorManagerError.ERR_RESOURCE_TOGGLE
180+
)
181+
182+
def resource_remove_skip_disk_flag(self, resource_name: str, node_name: str) -> None:
183+
return self._resource_delete_properties(resource_name, node_name, ["DrbdOptions/SkipDisk"])
184+
185+
def resources_create_missing_replicas(self) -> None:
186+
pass # TODO
187+
188+
def resources_remove_skip_disks(self) -> None:
189+
name_to_resources = self._fetch_resources()
190+
191+
for resources in name_to_resources.values():
192+
skip_disk_resources = {
193+
resource.node_name: resource
194+
for resource in resources
195+
if self._is_skip_disk_resource(resource)
196+
}
197+
# Skip disk deletion is only permitted if all remaining volumes are in a stable state.
198+
if not skip_disk_resources or not all(
199+
self._is_online_disk_state(volume.state.disk_state)
200+
for resource in resources
201+
if resource.node_name not in skip_disk_resources
202+
for volume in resource.volumes
203+
):
204+
continue
205+
206+
for resource in skip_disk_resources.values():
207+
try:
208+
if linstor.consts.FLAG_DISKLESS not in resource.flags:
209+
self.resource_toggle(resource.name, resource.node_name, True)
210+
self.resource_remove_skip_disk_flag(resource.name, resource.node_name)
211+
except LinstorManagerError as e:
212+
if e.code == LinstorManagerError.ERR_NETWORK:
213+
raise
214+
logger(f"Failed to delete skip disk on resource `{resource.name}` on node `{resource.node_name}`: {e}")
215+
216+
def _fetch_resources(self) -> Dict[str, List[linstor.responses.Resource]]:
217+
resources = defaultdict(list)
218+
for resource in self._exec_query(linstor.Linstor.resource_list_raise).resources:
219+
resources[resource.name].append(resource)
220+
return resources
221+
222+
def _resource_delete_properties(self, resource_name: str, node_name: str, properties: List[str]) -> None:
223+
result = self._exec_query(
224+
linstor.Linstor.resource_modify,
225+
node_name,
226+
resource_name,
227+
property_dict={},
228+
delete_props=properties
229+
)
230+
errors = self._filter_errors(result)
231+
if errors:
232+
error_str = self._get_error_str(errors)
233+
raise LinstorManagerError(
234+
f"Failed to destroy resource properties of `{resource_name}` on node `{node_name}`: `{error_str}`.",
235+
LinstorManagerError.ERR_RESOURCE_PROP_UPDATE
236+
)
237+
238+
@staticmethod
239+
def _is_online_disk_state(disk_state: str) -> bool:
240+
return disk_state in ("UpToDate", "Diskless") or disk_state.startswith("SyncTarget")
241+
242+
@classmethod
243+
def _is_online_volume(cls, volume: linstor.responses.Volume) -> bool:
244+
return cls._is_online_disk_state(volume.state.disk_state)
245+
246+
@staticmethod
247+
def _is_skip_disk_resource(resource: linstor.responses.Resource) -> bool:
248+
return resource.properties.get("DrbdOptions/SkipDisk") == "True"
249+
250+
# ==============================================================================
251+
252+
def main():
253+
parser = argparse.ArgumentParser()
254+
parser.add_argument('-u', '--uri', required=False)
255+
256+
parser.add_argument('--create-missing-replicas', action='store_true')
257+
parser.add_argument('--remove-skip-disks', action='store_true')
258+
259+
args = parser.parse_args()
260+
261+
linstor_manager = LinstorManager(args.uri)
262+
if args.remove_skip_disks:
263+
linstor_manager.resources_remove_skip_disks()
264+
if args.create_missing_replicas:
265+
linstor_manager.resources_create_missing_replicas()
266+
267+
if __name__ == '__main__':
268+
main()

0 commit comments

Comments
 (0)