Skip to content

Commit 544d85a

Browse files
authored
[rqd] Fix process lineage logic (#1689)
1 parent fdc16f8 commit 544d85a

File tree

3 files changed

+96
-103
lines changed

3 files changed

+96
-103
lines changed

rqd/rqd/rqcore.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1066,21 +1066,27 @@ def runDocker(self):
10661066
if runFrame.attributes['CPU_LIST']:
10671067
tasksetCmd = "taskset -c %s" % runFrame.attributes['CPU_LIST']
10681068

1069+
# Set process to use nice if running on a desktop
1070+
nice = ""
1071+
if self.rqCore.machine.isDesktop():
1072+
nice = "/bin/nice"
1073+
10691074
# A temporary password for the user created inside of the frame container.
10701075
# This user is only valid inside the container, meaning a leakage would only
10711076
# be harmful if the perpetrator gains access to run docker commands.
10721077
tempPassword = str(uuid.uuid4())
10731078
# Command wrapper
10741079
command = r"""#!/bin/sh
10751080
useradd -u %s -g %s -p %s %s >& /dev/null || true;
1076-
exec su -s %s %s -c "echo \$$; /bin/nice /usr/bin/time -p -o %s %s %s"
1081+
exec su -s %s %s -c "echo \$$; %s /usr/bin/time -p -o %s %s %s"
10771082
""" % (
10781083
uid,
10791084
gid,
10801085
tempPassword,
10811086
runFrame.user_name,
10821087
self.docker_agent.docker_shell_path,
10831088
runFrame.user_name,
1089+
nice,
10841090
tempStatFile,
10851091
tasksetCmd,
10861092
runFrame.command.replace('"', r"""\"""")

rqd/rqd/rqmachine.py

Lines changed: 89 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -132,14 +132,6 @@ def isDesktop(self):
132132
by checking /etc/inittab. False if not."""
133133
if rqd.rqconstants.OVERRIDE_IS_DESKTOP:
134134
return True
135-
if platform.system() == "Linux" and os.path.exists(rqd.rqconstants.PATH_INITTAB):
136-
with open(rqd.rqconstants.PATH_INITTAB, "r", encoding='utf-8') as inittabFile:
137-
for line in inittabFile:
138-
if line.startswith("id:5:initdefault:"):
139-
return True
140-
if os.path.islink(rqd.rqconstants.PATH_INIT_TARGET):
141-
if os.path.realpath(rqd.rqconstants.PATH_INIT_TARGET).endswith('graphical.target'):
142-
return True
143135
return False
144136

145137
def isUserLoggedIn(self):
@@ -256,8 +248,10 @@ def rssUpdate(self, frames):
256248
statFields = self._getStatFields(rqd.rqconstants.PATH_PROC_PID_STAT
257249
.format(pid))
258250
pids[pid] = {
251+
"pid": str(pid),
259252
"name": statFields[1],
260253
"state": statFields[2],
254+
"parentid": statFields[3],
261255
"pgrp": statFields[4],
262256
"session": statFields[5],
263257
# virtual memory size is in bytes convert to kb
@@ -307,84 +301,86 @@ def rssUpdate(self, frames):
307301

308302
values = list(frames.values())
309303
for frame in values:
310-
if frame.pid is not None and frame.pid > 0:
311-
session = str(frame.pid)
304+
pid = str(frame.pid)
305+
if pid is not None and frame.pid > 0:
306+
visited = [pid]
307+
children = [pids[pid]]
308+
self._collectChildren(pid, pids, visited, children)
312309
rss = 0
313310
vsize = 0
314311
swap = 0
315312
pcpu = 0
316-
# children pids share the same session id
317-
for pid, data in pids.items():
318-
if data["session"] == session:
319-
try:
320-
rss += int(data["rss"])
321-
vsize += int(data["vsize"])
322-
swap += int(data["swap"])
323-
324-
# jiffies used by this process, last two means that dead
325-
# children are counted
326-
totalTime = int(data["utime"]) + \
327-
int(data["stime"]) + \
328-
int(data["cutime"]) + \
329-
int(data["cstime"])
330-
331-
# Seconds of process life, boot time is already in seconds
332-
seconds = now - bootTime - \
333-
float(data["start_time"]) / rqd.rqconstants.SYS_HERTZ
334-
if seconds:
335-
if pid in self.__pidHistory:
336-
# Percent cpu using decaying average, 50% from 10 seconds
337-
# ago, 50% from last 10 seconds:
338-
oldTotalTime, oldSeconds, oldPidPcpu = \
339-
self.__pidHistory[pid]
340-
# checking if already updated data
341-
if seconds != oldSeconds:
342-
pidPcpu = ((totalTime - oldTotalTime) /
343-
float(seconds - oldSeconds))
344-
pcpu += (oldPidPcpu + pidPcpu) / 2 # %cpu
345-
pidData[pid] = totalTime, seconds, pidPcpu
346-
else:
347-
pidPcpu = totalTime / seconds
348-
pcpu += pidPcpu
349-
pidData[pid] = totalTime, seconds, pidPcpu
350-
# If children was already accounted for, only keep the highest
351-
# recorded rss value
352-
if pid in frame.childrenProcs:
353-
childRss = (int(data["rss"]) * resource.getpagesize()) // 1024
354-
if childRss > frame.childrenProcs[pid]['rss']:
355-
frame.childrenProcs[pid]['rss_page'] = int(data["rss"])
356-
frame.childrenProcs[pid]['rss'] = childRss
357-
frame.childrenProcs[pid]['vsize'] = \
358-
int(data["vsize"]) // 1024
359-
frame.childrenProcs[pid]['swap'] = swap // 1024
360-
frame.childrenProcs[pid]['statm_rss'] = \
361-
(int(data["statm_rss"]) \
362-
* resource.getpagesize()) // 1024
363-
frame.childrenProcs[pid]['statm_size'] = \
364-
(int(data["statm_size"]) * \
365-
resource.getpagesize()) // 1024
313+
for data in children:
314+
child_pid = data["pid"]
315+
try:
316+
rss += int(data["rss"])
317+
vsize += int(data["vsize"])
318+
swap += int(data["swap"])
319+
320+
# jiffies used by this process, last two means that dead
321+
# children are counted
322+
totalTime = int(data["utime"]) + \
323+
int(data["stime"]) + \
324+
int(data["cutime"]) + \
325+
int(data["cstime"])
326+
327+
# Seconds of process life, boot time is already in seconds
328+
seconds = now - bootTime - \
329+
float(data["start_time"]) / rqd.rqconstants.SYS_HERTZ
330+
if seconds:
331+
if child_pid in self.__pidHistory:
332+
# Percent cpu using decaying average, 50% from 10 seconds
333+
# ago, 50% from last 10 seconds:
334+
oldTotalTime, oldSeconds, oldPidPcpu = \
335+
self.__pidHistory[child_pid]
336+
# checking if already updated data
337+
if seconds != oldSeconds:
338+
pidPcpu = ((totalTime - oldTotalTime) /
339+
float(seconds - oldSeconds))
340+
pcpu += (oldPidPcpu + pidPcpu) / 2 # %cpu
341+
pidData[child_pid] = totalTime, seconds, pidPcpu
366342
else:
367-
frame.childrenProcs[pid] = \
368-
{'name': data['name'],
369-
'rss_page': int(data["rss"]),
370-
'rss': (int(data["rss"]) * resource.getpagesize()) // 1024,
371-
'vsize': int(data["vsize"]) // 1024,
372-
'swap': swap // 1024,
373-
'state': data['state'],
374-
# statm reports in pages (~ 4kB)
375-
# same as VmRss in /proc/[pid]/status (in KB)
376-
'statm_rss': (int(data["statm_rss"]) * \
377-
resource.getpagesize()) // 1024,
378-
'statm_size': (int(data["statm_size"]) * \
379-
resource.getpagesize()) // 1024,
380-
'cmd_line': data["cmd_line"],
381-
'start_time': seconds}
382-
383-
# pylint: disable=broad-except
384-
except Exception as e:
385-
log.warning(
386-
'Failure with pid rss update due to: %s at %s',
387-
e, traceback.extract_tb(sys.exc_info()[2]))
343+
pidPcpu = totalTime / seconds
344+
pcpu += pidPcpu
345+
pidData[child_pid] = totalTime, seconds, pidPcpu
346+
# If children was already accounted for, only keep the highest
347+
# recorded rss value
348+
if child_pid in frame.childrenProcs:
349+
childRss = (int(data["rss"]) * resource.getpagesize()) // 1024
350+
if childRss > frame.childrenProcs[child_pid]['rss']:
351+
frame.childrenProcs[child_pid]['rss_page'] = int(data["rss"])
352+
frame.childrenProcs[child_pid]['rss'] = childRss
353+
frame.childrenProcs[child_pid]['vsize'] = \
354+
int(data["vsize"]) // 1024
355+
frame.childrenProcs[child_pid]['swap'] = swap // 1024
356+
frame.childrenProcs[child_pid]['statm_rss'] = \
357+
(int(data["statm_rss"]) \
358+
* resource.getpagesize()) // 1024
359+
frame.childrenProcs[child_pid]['statm_size'] = \
360+
(int(data["statm_size"]) * \
361+
resource.getpagesize()) // 1024
362+
else:
363+
frame.childrenProcs[child_pid] = \
364+
{'name': data['name'],
365+
'rss_page': int(data["rss"]),
366+
'rss': (int(data["rss"]) * resource.getpagesize()) // 1024,
367+
'vsize': int(data["vsize"]) // 1024,
368+
'swap': swap // 1024,
369+
'state': data['state'],
370+
# statm reports in pages (~ 4kB)
371+
# same as VmRss in /proc/[child_pid]/status (in KB)
372+
'statm_rss': (int(data["statm_rss"]) * \
373+
resource.getpagesize()) // 1024,
374+
'statm_size': (int(data["statm_size"]) * \
375+
resource.getpagesize()) // 1024,
376+
'cmd_line': data["cmd_line"],
377+
'start_time': seconds}
378+
379+
# pylint: disable=broad-except
380+
except Exception as e:
381+
log.warning(
382+
'Failure with pid rss update due to: %s at %s',
383+
e, traceback.extract_tb(sys.exc_info()[2]))
388384
# convert bytes to KB
389385
rss = (rss * resource.getpagesize()) // 1024
390386
vsize = int(vsize/1024)
@@ -412,6 +408,18 @@ def rssUpdate(self, frames):
412408
except Exception as e:
413409
log.exception('Failure with rss update due to: %s', e)
414410

411+
def _collectChildren(self, current_pid: str,
412+
all_pids: dict[str, dict[str, str]], visited: list[str],
413+
children: list[dict[str, str]]):
414+
"""Recursive method to collect all children of first_parent_pid.
415+
416+
The list of children is returned on the arg children"""
417+
for child_pid, data in all_pids.items():
418+
if data["parentid"] == current_pid and child_pid not in visited:
419+
children.append(data)
420+
visited.append(child_pid)
421+
self._collectChildren(child_pid, all_pids, visited, children)
422+
415423
def _getProcSwap(self, pid):
416424
"""Helper function to get swap memory used by a process"""
417425
swap_used = 0

rqd/tests/rqmachine_test.py

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -203,27 +203,6 @@ def test_isNimbySafeToRunJobs_noFreeSwap(self):
203203

204204
self.assertFalse(self.machine.isNimbySafeToRunJobs())
205205

206-
def test_isDesktop_inittabDesktop(self):
207-
rqd.rqconstants.OVERRIDE_IS_DESKTOP = False
208-
self.fs.create_file(rqd.rqconstants.PATH_INITTAB, contents=INITTAB_DESKTOP)
209-
210-
self.assertTrue(self.machine.isDesktop())
211-
212-
def test_isDesktop_inittabServer(self):
213-
rqd.rqconstants.OVERRIDE_IS_DESKTOP = False
214-
self.fs.create_file(rqd.rqconstants.PATH_INITTAB, contents=INITTAB_SERVER)
215-
216-
self.assertFalse(self.machine.isDesktop())
217-
218-
def test_isDesktop_initTarget(self):
219-
rqd.rqconstants.OVERRIDE_IS_DESKTOP = False
220-
self.fs.create_file(rqd.rqconstants.PATH_INITTAB)
221-
symlink_target = '/lib/systemd/system/graphical.target'
222-
self.fs.create_file(symlink_target)
223-
self.fs.create_symlink(rqd.rqconstants.PATH_INIT_TARGET, symlink_target)
224-
225-
self.assertTrue(self.machine.isDesktop())
226-
227206
def test_isDesktop_override(self):
228207
rqd.rqconstants.OVERRIDE_IS_DESKTOP = True
229208

0 commit comments

Comments
 (0)