Skip to content

Commit ae34b7b

Browse files
committed
Adding cbcontbk and cbbackupmgr log collection if test fails
Change-Id: Id5814b0ea414176a0b44f708f3fee5fb155c3693 Reviewed-on: https://review.couchbase.org/c/TAF/+/244657 Tested-by: Build Bot <build@couchbase.com> Reviewed-by: Ashwin <ashwin.govindarajulu@couchbase.com>
1 parent 957a20f commit ae34b7b

3 files changed

Lines changed: 126 additions & 55 deletions

File tree

couchbase_utils/cb_tools/cbbackupmgr.py

Lines changed: 51 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,12 @@ def backup(self, archive_dir, repo_name, cluster_host=None,
8181
cmd += self.cli_flags
8282
self.log.debug(f"Executing command: {cmd}")
8383
output, error = self._execute_cmd(cmd)
84-
84+
8585
self.log.debug(f"Command output: {output}")
86-
86+
8787
if not output or error:
8888
self.log.error(f"Command failed with error: {error}")
89-
89+
9090
return output, error
9191

9292
"""
@@ -102,15 +102,15 @@ def create_repo(self, archive_dir, repo_name):
102102
"""
103103
cmd = "%s config --archive %s --repo %s" % (
104104
self.cbstatCmd, archive_dir, repo_name)
105-
105+
106106
self.log.debug(f"Executing command: {cmd}")
107107
output, error = self._execute_cmd(cmd)
108-
108+
109109
self.log.debug(f"Command output: {output}")
110-
110+
111111
if not output or error:
112112
self.log.error(f"Command failed with error: {error}")
113-
113+
114114
return output, error
115115

116116
"""
@@ -127,15 +127,15 @@ def list_backups(self, archive_dir, repo_name):
127127
cmd = "%s list --archive %s --repo %s" % (
128128
self.cbstatCmd, archive_dir, repo_name)
129129
cmd += self.cli_flags
130-
130+
131131
self.log.debug(f"Executing command: {cmd}")
132132
output, error = self._execute_cmd(cmd)
133-
133+
134134
self.log.debug(f"Command output: {output}")
135-
135+
136136
if not output or error:
137137
self.log.error(f"Command failed with error: {error}")
138-
138+
139139
return output, error
140140

141141
"""
@@ -213,15 +213,15 @@ def restore(self, archive_dir, repo_name, cluster_host=None,
213213
cmd += " --filter-values %s" % filter_values
214214

215215
cmd += self.cli_flags
216-
216+
217217
self.log.debug(f"Executing command: {cmd}")
218218
output, error = self._execute_cmd(cmd)
219-
219+
220220
self.log.debug(f"Command output: {output}")
221-
221+
222222
if not output or error:
223223
self.log.error(f"Command failed with error: {error}")
224-
224+
225225
return output, error
226226

227227
"""
@@ -238,18 +238,18 @@ def remove(self, archive_dir, repo_name, backup_range=None):
238238
"""
239239
cmd = "%s remove --archive %s --repo %s" % (
240240
self.cbstatCmd, archive_dir, repo_name)
241-
241+
242242
if backup_range:
243243
cmd += " --backups %s" % backup_range
244-
244+
245245
self.log.debug(f"Executing command: {cmd}")
246246
output, error = self._execute_cmd(cmd)
247-
247+
248248
self.log.debug(f"Command output: {output}")
249-
249+
250250
if not output or error:
251251
self.log.error(f"Command failed with error: {error}")
252-
252+
253253
return output, error
254254

255255
def generate_docs(self, num_docs, bucket_name, size, cluster_host=None):
@@ -272,15 +272,15 @@ def generate_docs(self, num_docs, bucket_name, size, cluster_host=None):
272272
self.username, self.password, size)
273273

274274
cmd += self.cli_flags
275-
275+
276276
self.log.debug(f"Executing command: {cmd}")
277277
output, error = self._execute_cmd(cmd)
278-
278+
279279
self.log.debug(f"Command output: {output}")
280-
280+
281281
if not output or error:
282282
self.log.error(f"Command failed with error: {error}")
283-
283+
284284
return output, error
285285

286286
def merge(self, archive_dir, repo_name, start, end):
@@ -297,9 +297,34 @@ def merge(self, archive_dir, repo_name, start, end):
297297

298298
self.log.debug(f"Executing command: {cmd}")
299299
output, error = self._execute_cmd(cmd)
300-
300+
301301
self.log.debug(f"Command output: {output}")
302-
302+
303+
if not output or error:
304+
self.log.error(f"Command failed with error: {error}")
305+
306+
return output, error
307+
308+
def collect_logs(self, archive_dir=None, output_dir=None):
309+
"""
310+
Execute cbbackupmgr collect-logs command to collect logs.
311+
312+
:param archive_dir str: The location of the backup archive directory.
313+
:param output_dir str: The directory to output the collected logs to.
314+
"""
315+
cmd = "%s collect-logs" % self.cbstatCmd
316+
317+
if archive_dir:
318+
cmd += " --archive %s" % archive_dir
319+
320+
if output_dir:
321+
cmd += " --output-dir %s" % output_dir
322+
323+
self.log.debug(f"Executing command: {cmd}")
324+
output, error = self._execute_cmd(cmd)
325+
326+
self.log.debug(f"Command output: {output}")
327+
303328
if not output or error:
304329
self.log.error(f"Command failed with error: {error}")
305330

couchbase_utils/cb_tools/cbcontbk.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,17 @@ def get_cluster_timestamp(self):
1919
Gets the current UTC timestamp from the cluster host.
2020
"""
2121
cmd = "date -u +'%Y-%m-%dT%H:%M:%SZ'"
22-
22+
2323
self.log.debug(f"Executing command: {cmd}")
24-
24+
2525
output, error = self._execute_cmd(cmd)
26-
26+
2727
if error:
2828
self.log.error(f"Failed to get cluster timestamp: {error}")
2929
return None
30-
30+
3131
self.log.debug(f"Command output: {output}")
32-
32+
3333
return output[0].strip()
3434

3535
def restore(self, archive_path, repo_name,
@@ -66,16 +66,16 @@ def restore(self, archive_path, repo_name,
6666
cmd += f" --map-data {map_data}"
6767

6868
cmd += self.cli_flags
69-
69+
7070
self.log.debug(f"Executing command: {cmd}")
71-
71+
7272
output, error = self._execute_cmd(cmd)
73-
73+
7474
self.log.debug(f"Command output: {output}")
75-
75+
7676
if not output or error:
7777
self.log.error(f"Continuous backup restore failed with: {error}")
78-
78+
7979
return output, error
8080

8181
def collect_logs(self, location, temp_dir):
@@ -88,13 +88,13 @@ def collect_logs(self, location, temp_dir):
8888
f"-d {temp_dir}")
8989

9090
cmd += self.cli_flags
91-
91+
9292
self.log.debug(f"Executing command: {cmd}")
93-
93+
9494
output, error = self._execute_cmd(cmd)
95-
95+
9696
self.log.debug(f"Command output: {output}")
97-
97+
9898
if not output or error:
9999
self.log.error(f"Command failed with error: {error}")
100100

pytests/bucket_collections/collections_base.py

Lines changed: 61 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from sdk_exceptions import SDKException
2525
from shell_util.remote_connection import RemoteMachineShellConnection
2626
from storage.fusion.fusion_base import FusionBase
27+
from TestInput import TestInputSingleton
2728
from pytests.bucket_collections.collection_scope_number_manager import CollectionScopeNumberManager
2829

2930
from couchbase.exceptions import InvalidIndexException, \
@@ -230,25 +231,70 @@ def tearDown(self):
230231

231232
# Clean up continuous backup folder
232233
if self.cont_bkp_test == "NFS":
233-
shell = RemoteMachineShellConnection(self.cluster.master)
234-
try:
235-
self.log.info("Removing continuous backup folder: %s" % self.continuous_backup_location)
236-
output, error = shell.execute_command(f"rm -rf {self.continuous_backup_location}")
237-
if error:
238-
self.log.warning("Error removing continuous backup folder: %s" % error)
239-
except Exception as e:
240-
self.log.warning("Exception during cleanup: %s" % str(e))
241234

242-
# Clean up backup folders
243-
try:
244-
if hasattr(self, 'repo_name') and self.repo_name:
245-
self.log.info("Removing backup repository")
246-
shell.execute_command(f"rm -rf {self.backup_archive_dir}/{self.backup_repo_name}")
247-
except Exception as e:
248-
self.log.warning(f"Exception during cleanup: {e}")
235+
if self.is_test_failed():
236+
self.log.warning("Test failed, skipping cleanup of continuous backup folder to preserve data for investigation: %s" % self.continuous_backup_location)
237+
if self.get_cbcollect_info:
238+
self._collect_backup_logs_on_failure()
239+
else:
240+
shell = RemoteMachineShellConnection(self.cluster.master)
241+
try:
242+
self.log.info("Removing continuous backup folder: %s" % self.continuous_backup_location)
243+
output, error = shell.execute_command(f"rm -rf {self.continuous_backup_location}")
244+
if error:
245+
self.log.warning("Error removing continuous backup folder: %s" % error)
246+
except Exception as e:
247+
self.log.warning("Exception during cleanup: %s" % str(e))
248+
249+
# Clean up backup folders
250+
try:
251+
if hasattr(self, 'repo_name') and self.repo_name:
252+
self.log.info("Removing backup repository")
253+
shell.execute_command(f"rm -rf {self.backup_archive_dir}/{self.backup_repo_name}")
254+
except Exception as e:
255+
self.log.warning(f"Exception during cleanup: {e}")
249256

250257
super(CollectionBase, self).tearDown()
251258

259+
def _collect_backup_logs_on_failure(self):
260+
"""
261+
Collects cbbackupmgr and cbcontbk logs on test failure.
262+
Only runs on Linux nodes. Logs are collected to /data/tmp on the remote
263+
node and then copied to the local log path.
264+
"""
265+
log_path = TestInputSingleton.input.param("logs_folder", "/tmp")
266+
remote_tmp_dir = "/data/tmp"
267+
268+
collectors = [
269+
("cbbackupmgr", self.backup_mgr,
270+
lambda mgr, tmp: mgr.collect_logs(archive_dir=self.backup_archive_dir,
271+
output_dir=tmp)),
272+
("cbcontbk", self.cont_bk_mgr,
273+
lambda mgr, tmp: mgr.collect_logs(location=self.continuous_backup_location,
274+
temp_dir=tmp)),
275+
]
276+
277+
for name, mgr, collect_fn in collectors:
278+
try:
279+
shell = mgr.shellConn
280+
os_info = shell.extract_remote_info()
281+
if os_info.type.lower() != "linux":
282+
self.log.info(f"Skipping {name} log collection: OS is not Linux")
283+
continue
284+
285+
self.log.info(f"Collecting {name} logs for investigation")
286+
shell.execute_command(f"mkdir -p {remote_tmp_dir}")
287+
collect_fn(mgr, remote_tmp_dir)
288+
289+
output, _ = shell.execute_command(f"ls {remote_tmp_dir}/*.zip 2>/dev/null")
290+
for log_file in output:
291+
log_file = log_file.strip()
292+
if log_file:
293+
self.log.info(f"Copying {log_file} to {log_path}")
294+
shell.get_file(remote_tmp_dir, log_file.split("/")[-1], log_path)
295+
except Exception as e:
296+
self.log.error(f"Exception during {name} log collection: {e}")
297+
252298
def collection_setup(self):
253299
ttl_buckets = [
254300
"multi_bucket.buckets_for_rebalance_tests_with_ttl",

0 commit comments

Comments
 (0)