From 68478db6cebf7576628cf14e09fe5fe356841693 Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Tue, 4 Feb 2025 09:27:23 +1100 Subject: [PATCH 01/15] Several speed improvements for parsing ls directory output - Avoid re-splitting the line if it's already been split. - Handle a few failure modes in parsing links and size on '?' and other bad input. - Each directory is all one 'mode' - normal, or SELinux on RHEL 6/7 or 8+. Instead of trying to detect the mode of each line, detect the mode once and then use that for all lines in this directory. Difficult to extrapolate that to all listings in a parse - let's see how this goes for now. - Simplify how we pick up directory names and store previously processed directories. - Add a few comments to make it easier to see what's being parsed where. Signed-off-by: Paul Wayper --- insights/core/ls_parser.py | 117 +++++++++++++----------- insights/tests/parsers/test_lsinitrd.py | 5 +- 2 files changed, 67 insertions(+), 55 deletions(-) diff --git a/insights/core/ls_parser.py b/insights/core/ls_parser.py index bce81ee708..6a2a2e75a1 100644 --- a/insights/core/ls_parser.py +++ b/insights/core/ls_parser.py @@ -19,14 +19,12 @@ def parse_path(path): return path, link -def parse_non_selinux(parts): +def parse_non_selinux(links, owner, group, last): """ Parse part of an ls output line that isn't selinux. Args: - parts (list): A four element list of strings representing the initial - parts of an ls line after the permission bits. The parts are link - count, owner, group, and everything else. + link count, owner, group, and everything else. Returns: A dict containing links, owner, group, date, and name. If the line @@ -34,9 +32,11 @@ def parse_non_selinux(parts): size is included. If the raw name was a symbolic link, link is included. """ - links, owner, group, last = parts + # prw-------. 1 0 0 0 Jun 28 09:44 5.ref + # l?????????? ? ? ? ? ? invocation:auditd.service + result = { - "links": int(links), + "links": int(links) if links.isdigit() else links, "owner": owner, "group": group, } @@ -50,7 +50,7 @@ def parse_non_selinux(parts): result["minor"] = int(minor) else: size, rest = last.split(None, 1) - result["size"] = int(size) + result["size"] = int(size) if size.isdigit() else size # The date part is always 12 characters regardless of content. result["date"] = rest[:12] @@ -64,14 +64,12 @@ def parse_non_selinux(parts): return result -def parse_selinux(parts): +def parse_selinux(owner, group, selinux_str, name_part): """ Parse part of an ls output line that is selinux. Args: - parts (list): A four element list of strings representing the initial - parts of an ls line after the permission bits. The parts are owner - group, selinux info, and the path. + owner, group, selinux info, and the path. Returns: A dict containing owner, group, se_user, se_role, se_type, se_mls, and @@ -79,10 +77,9 @@ def parse_selinux(parts): """ - owner, group = parts[:2] - selinux = parts[2].split(":") + selinux = selinux_str.split(":") lsel = len(selinux) - path, link = parse_path(parts[-1]) + name, link = parse_path(name_part) result = { "owner": owner, "group": group, @@ -90,21 +87,19 @@ def parse_selinux(parts): "se_role": selinux[1] if lsel > 1 else None, "se_type": selinux[2] if lsel > 2 else None, "se_mls": selinux[3] if lsel > 3 else None, - "name": path, + "name": name } if link: result["link"] = link return result -def parse_rhel8_selinux(parts): +def parse_rhel8_selinux(links, owner, group, last): """ Parse part of an ls output line that is selinux on RHEL8. Args: - parts (list): A four element list of strings representing the initial - parts of an ls line after the permission bits. The parts are link - count, owner, group, and everything else + link count, owner, group, and everything else. Returns: A dict containing links, owner, group, se_user, se_role, se_type, @@ -112,14 +107,12 @@ def parse_rhel8_selinux(parts): link is also included. """ - - links, owner, group, last = parts result = { - "links": int(links), + "links": int(links) if links.isdigit() else links, "owner": owner, "group": group, } - selinux, last = parts[-1].split(None, 1) + selinux, last = last.split(None, 1) selinux = selinux.split(":") lsel = len(selinux) if "," in last: @@ -128,7 +121,7 @@ def parse_rhel8_selinux(parts): result['minor'] = int(minor) else: size, last = last.split(None, 1) - result['size'] = int(size) + result['size'] = int(size) if size.isdigit() else size date = last[:12] path, link = parse_path(last[13:]) result.update( @@ -146,33 +139,51 @@ def parse_rhel8_selinux(parts): return result +parse_mode = { + 'normal': parse_non_selinux, + 'selinux': parse_selinux, + 'rhel8_selinux': parse_rhel8_selinux +} + + class Directory(dict): def __init__(self, name, total, body): dirs = [] ents = {} files = [] specials = [] + mode = None for line in body: # we can't split(None, 5) here b/c rhel 6/7 selinux lines only have # 4 parts before the path, and the path itself could contain # spaces. Unfortunately, this means we have to split the line again # below - parts = line.split(None, 4) - perms = parts[0] + perms, links, owner, group, rest = line.split(None, 4) typ = perms[0] - entry = {"type": typ, "perms": perms[1:]} - if parts[1][0].isdigit(): - # We have to split the line again to see if this is a RHEL8 - # selinux stanza. This assumes that the context section will - # always have at least two pieces separated by ':'. - # '?' as the whole RHEL8 security context is also acceptable. - rhel8_selinux_ctx = line.split()[4].strip() - if ":" in rhel8_selinux_ctx or '?' == rhel8_selinux_ctx: - rest = parse_rhel8_selinux(parts[1:]) + entry = { + "type": typ, + "perms": perms[1:] + } + # determine mode once per directory + if mode is None: + if links[0].isdigit(): + # We have to split the line again to see if this is a RHEL8 + # selinux stanza. This assumes that the context section will + # always have at least two pieces separated by ':'. + # '?' as the whole RHEL8 security context is also acceptable. + rhel8_selinux_ctx = line.split()[4] + if ":" in rhel8_selinux_ctx or '?' == rhel8_selinux_ctx.strip(): + # drwxr-xr-x. 2 root root unconfined_u:object_r:var_lib_t:s0 54 Apr 8 16:41 abcd-efgh-ijkl-mnop + mode = 'rhel8_selinux' + else: + # crw-------. 1 0 0 10, 236 Jul 25 10:00 control + # lrwxrwxrwx. 1 0 0 11 Aug 4 2014 menu.lst -> ./grub.conf + mode = 'normal' else: - rest = parse_non_selinux(parts[1:]) - else: - rest = parse_selinux(parts[1:]) + # -rw-r--r--. root root system_u:object_r:boot_t:s0 config-3.10.0-267 + mode = 'selinux' + # Now parse based on mode + rest = parse_mode[mode](links, owner, group, rest) # Update our entry and put it into the correct buckets # based on its type. @@ -226,23 +237,23 @@ def parse(lines, root=None): # Skip empty line and non-exist dir line if not line or ': No such file or directory' in line: continue - if line and line[0] == "/" and line[-1] == ":": - if name is None: - name = line[:-1] - if entries: - d = Directory(name, total or len(entries), entries) - doc[root] = d - total = None - entries = [] - else: - d = Directory(name, total or len(entries), entries) - doc[name or root] = d - total = None - entries = [] - name = line[:-1] + if line[0] == "/" and line[-1] == ":": + # Directory name - like '/tmp:' + if total is None: + total = len(entries) + # Some old directory listings don't have an initial name line, + # so we put any entries we collected before a named directory in + # our 'root' directory - if we got a 'root' directory at all... + if not(name is None and root is None): + old_name = root if name is None else name + doc[old_name] = Directory(old_name, total, entries) + name = line[:-1] + total = None + entries = [] continue if line.startswith("total"): - total = int(line.split(None, 1)[1]) + # Should be first line after directory name + total = int(line[6:]) continue entries.append(line) name = name or root diff --git a/insights/tests/parsers/test_lsinitrd.py b/insights/tests/parsers/test_lsinitrd.py index 758543be65..9076314f82 100644 --- a/insights/tests/parsers/test_lsinitrd.py +++ b/insights/tests/parsers/test_lsinitrd.py @@ -65,7 +65,7 @@ LSINITRD_EMPTY = "" -# This test case +# This test case should not parse - but why would we be parsing this? LSINITRD_BROKEN = """ drwxr-xr-x 3 root root 0 Apr 20 15:58 kernel/x86 Version: dracut-033-535.el7 @@ -220,7 +220,8 @@ def test_lsinitrd_broken(): """ with pytest.raises(Exception) as err: lsinitrd.Lsinitrd(context_wrap(LSINITRD_BROKEN)) - assert "list index out of range" in str(err) + # Why do we need to be so specific though? + assert "not enough values to unpack" in str(err) def test_lsinitrd_kdump_image_valid(): From 7bdb5e24e351d7fdce107a3b08f2a4f5bb71882f Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Tue, 4 Feb 2025 10:12:31 +1100 Subject: [PATCH 02/15] Minor flake8 fix Signed-off-by: Paul Wayper --- insights/core/ls_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/insights/core/ls_parser.py b/insights/core/ls_parser.py index 6a2a2e75a1..292bd797b2 100644 --- a/insights/core/ls_parser.py +++ b/insights/core/ls_parser.py @@ -244,8 +244,8 @@ def parse(lines, root=None): # Some old directory listings don't have an initial name line, # so we put any entries we collected before a named directory in # our 'root' directory - if we got a 'root' directory at all... - if not(name is None and root is None): - old_name = root if name is None else name + old_name = root if name is None else name + if old_name is not None: doc[old_name] = Directory(old_name, total, entries) name = line[:-1] total = None From 4feb195aa4a47a4fd4b1b5951be8dfa067cfb772 Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Tue, 4 Feb 2025 11:05:17 +1100 Subject: [PATCH 03/15] Ignore any line that doesn't have at least five parts (perms, links, owner, group, rest) Signed-off-by: Paul Wayper --- insights/core/ls_parser.py | 6 +++++- insights/tests/parsers/test_lsinitrd.py | 12 +++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/insights/core/ls_parser.py b/insights/core/ls_parser.py index 292bd797b2..65feb84fa0 100644 --- a/insights/core/ls_parser.py +++ b/insights/core/ls_parser.py @@ -158,7 +158,11 @@ def __init__(self, name, total, body): # 4 parts before the path, and the path itself could contain # spaces. Unfortunately, this means we have to split the line again # below - perms, links, owner, group, rest = line.split(None, 4) + try: + perms, links, owner, group, rest = line.split(None, 4) + except ValueError: + # Ignore malformed lines completely + continue typ = perms[0] entry = { "type": typ, diff --git a/insights/tests/parsers/test_lsinitrd.py b/insights/tests/parsers/test_lsinitrd.py index 9076314f82..8c1ed5c9e3 100644 --- a/insights/tests/parsers/test_lsinitrd.py +++ b/insights/tests/parsers/test_lsinitrd.py @@ -1,5 +1,4 @@ import doctest -import pytest from insights.parsers import lsinitrd from insights.parsers.lsinitrd import LsinitrdKdumpImage from insights.tests import context_wrap @@ -215,13 +214,12 @@ def test_lsinitrd_all(): def test_lsinitrd_broken(): """ - For this testcase, ls_parser.parse() will throw an IndexError. - Assert with this specific error here. + In the case that someone's been tampering with the ls output, we want to + just ignore lines that don't have enough information to be a valid dirent. """ - with pytest.raises(Exception) as err: - lsinitrd.Lsinitrd(context_wrap(LSINITRD_BROKEN)) - # Why do we need to be so specific though? - assert "not enough values to unpack" in str(err) + result = lsinitrd.Lsinitrd(context_wrap(LSINITRD_BROKEN)) + assert result is not None + print("result:", result, dir(result)) def test_lsinitrd_kdump_image_valid(): From 29a6e621690dab29704a72ecd53efd7715613a07 Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Wed, 5 Feb 2025 08:50:52 +1100 Subject: [PATCH 04/15] Abstract size/major,minor parsing out, other parsing consolidation Signed-off-by: Paul Wayper --- insights/core/ls_parser.py | 130 ++++++++++++++++++------------------- 1 file changed, 63 insertions(+), 67 deletions(-) diff --git a/insights/core/ls_parser.py b/insights/core/ls_parser.py index 65feb84fa0..d7ab00a7fa 100644 --- a/insights/core/ls_parser.py +++ b/insights/core/ls_parser.py @@ -19,28 +19,17 @@ def parse_path(path): return path, link -def parse_non_selinux(links, owner, group, last): +def parse_major_minor(last, result): """ - Parse part of an ls output line that isn't selinux. + Parse the size / major, minor section of the line. Args: - link count, owner, group, and everything else. + last (str): the rest of the line after the owner and group. + result (dict): the dirent dict to put details in. Returns: - A dict containing links, owner, group, date, and name. If the line - represented a device, major and minor numbers are included. Otherwise, - size is included. If the raw name was a symbolic link, link is - included. + The rest of the line after the size / major, minor. """ - # prw-------. 1 0 0 0 Jun 28 09:44 5.ref - # l?????????? ? ? ? ? ? invocation:auditd.service - - result = { - "links": int(links) if links.isdigit() else links, - "owner": owner, - "group": group, - } - # device numbers only go to 256. # If a comma is in the first four characters, the next two elements are # major and minor device numbers. Otherwise, the next element is the size. @@ -51,20 +40,42 @@ def parse_non_selinux(links, owner, group, last): else: size, rest = last.split(None, 1) result["size"] = int(size) if size.isdigit() else size + return rest + + +def parse_non_selinux(entry, links, owner, group, last): + """ + Parse part of an ls output line that isn't selinux. + + Args: + link count, owner, group, and everything else. + + Returns: + A dict containing links, owner, group, date, and name. If the line + represented a device, major and minor numbers are included. Otherwise, + size is included. If the raw name was a symbolic link, link is + included. + """ + # prw-------. 1 0 0 0 Jun 28 09:44 5.ref + # l?????????? ? ? ? ? ? invocation:auditd.service + + entry["links"] = links if links == '?' else int(links) + entry["owner"] = owner + entry["group"] = group + + rest = parse_major_minor(last, entry) # The date part is always 12 characters regardless of content. - result["date"] = rest[:12] + entry["date"] = rest[:12] # Jump over the date and the following space to get the path part. path, link = parse_path(rest[13:]) - result["name"] = path + entry["name"] = path if link: - result["link"] = link + entry["link"] = link - return result - -def parse_selinux(owner, group, selinux_str, name_part): +def parse_selinux(entry, owner, group, selinux_str, name_part): """ Parse part of an ls output line that is selinux. @@ -80,21 +91,18 @@ def parse_selinux(owner, group, selinux_str, name_part): selinux = selinux_str.split(":") lsel = len(selinux) name, link = parse_path(name_part) - result = { - "owner": owner, - "group": group, - "se_user": selinux[0], - "se_role": selinux[1] if lsel > 1 else None, - "se_type": selinux[2] if lsel > 2 else None, - "se_mls": selinux[3] if lsel > 3 else None, - "name": name - } + entry["owner"] = owner + entry["group"] = group + entry["se_user"] = selinux[0] + entry["se_role"] = selinux[1] if lsel > 1 else None + entry["se_type"] = selinux[2] if lsel > 2 else None + entry["se_mls"] = selinux[3] if lsel > 3 else None + entry["name"] = name if link: - result["link"] = link - return result + entry["link"] = link -def parse_rhel8_selinux(links, owner, group, last): +def parse_rhel8_selinux(entry, links, owner, group, last): """ Parse part of an ls output line that is selinux on RHEL8. @@ -107,36 +115,23 @@ def parse_rhel8_selinux(links, owner, group, last): link is also included. """ - result = { - "links": int(links) if links.isdigit() else links, - "owner": owner, - "group": group, - } + entry["links"] = int(links) if links.isdigit() else links + entry["owner"] = owner + entry["group"] = group selinux, last = last.split(None, 1) selinux = selinux.split(":") lsel = len(selinux) - if "," in last: - major, minor, last = last.split(None, 2) - result['major'] = int(major.rstrip(",")) - result['minor'] = int(minor) - else: - size, last = last.split(None, 1) - result['size'] = int(size) if size.isdigit() else size - date = last[:12] - path, link = parse_path(last[13:]) - result.update( - { - "se_user": selinux[0], - "se_role": selinux[1] if lsel > 1 else None, - "se_type": selinux[2] if lsel > 2 else None, - "se_mls": selinux[3] if lsel > 3 else None, - "name": path, - "date": date, - } - ) + rest = parse_major_minor(last, entry) + date = rest[:12] + path, link = parse_path(rest[13:]) + entry["se_user"] = selinux[0] + entry["se_role"] = selinux[1] if lsel > 1 else None + entry["se_type"] = selinux[2] if lsel > 2 else None + entry["se_mls"] = selinux[3] if lsel > 3 else None + entry["name"] = path + entry["date"] = date if link: - result["link"] = link - return result + entry["link"] = link parse_mode = { @@ -147,7 +142,7 @@ def parse_rhel8_selinux(links, owner, group, last): class Directory(dict): - def __init__(self, name, total, body): + def __init__(self, dirname, total, body): dirs = [] ents = {} files = [] @@ -166,7 +161,8 @@ def __init__(self, name, total, body): typ = perms[0] entry = { "type": typ, - "perms": perms[1:] + "perms": perms[1:], + "dir": dirname, } # determine mode once per directory if mode is None: @@ -175,9 +171,9 @@ def __init__(self, name, total, body): # selinux stanza. This assumes that the context section will # always have at least two pieces separated by ':'. # '?' as the whole RHEL8 security context is also acceptable. - rhel8_selinux_ctx = line.split()[4] + rhel8_selinux_ctx = rest.split()[0] if ":" in rhel8_selinux_ctx or '?' == rhel8_selinux_ctx.strip(): - # drwxr-xr-x. 2 root root unconfined_u:object_r:var_lib_t:s0 54 Apr 8 16:41 abcd-efgh-ijkl-mnop + # unconfined_u:object_r:var_lib_t:s0 54 Apr 8 16:41 abcd-efgh-ijkl-mnop mode = 'rhel8_selinux' else: # crw-------. 1 0 0 10, 236 Jul 25 10:00 control @@ -187,7 +183,7 @@ def __init__(self, name, total, body): # -rw-r--r--. root root system_u:object_r:boot_t:s0 config-3.10.0-267 mode = 'selinux' # Now parse based on mode - rest = parse_mode[mode](links, owner, group, rest) + rest = parse_mode[mode](entry, links, owner, group, rest) # Update our entry and put it into the correct buckets # based on its type. @@ -196,7 +192,7 @@ def __init__(self, name, total, body): # - The `raw_entry` key is deprecated and will be removed from 3.6.0. # Please use the `insights.parsers.ls.FileListingParser.raw_entry_of` instead. entry["raw_entry"] = line - entry["dir"] = name + nm = entry["name"] ents[nm] = entry if typ not in "bcd": @@ -239,7 +235,7 @@ def parse(lines, root=None): for line in lines: line = line.strip() # Skip empty line and non-exist dir line - if not line or ': No such file or directory' in line: + if not line or ': No such file or directory' in line or 'cannot open directory' in line: continue if line[0] == "/" and line[-1] == ":": # Directory name - like '/tmp:' From 39ec3b61fb69f9efa8b1b33a729351a1b7f819e7 Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Wed, 19 Feb 2025 11:29:11 +1100 Subject: [PATCH 05/15] Minor improvement - speed up finding mode if we have a links count Signed-off-by: Paul Wayper --- insights/core/ls_parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/insights/core/ls_parser.py b/insights/core/ls_parser.py index d7ab00a7fa..627b0e123d 100644 --- a/insights/core/ls_parser.py +++ b/insights/core/ls_parser.py @@ -171,8 +171,9 @@ def __init__(self, dirname, total, body): # selinux stanza. This assumes that the context section will # always have at least two pieces separated by ':'. # '?' as the whole RHEL8 security context is also acceptable. - rhel8_selinux_ctx = rest.split()[0] - if ":" in rhel8_selinux_ctx or '?' == rhel8_selinux_ctx.strip(): + # substring to index of space is faster than strip + rhel8_selinux_ctx = rest[:rest.index(" ")] + if ":" in rhel8_selinux_ctx or '?' == rhel8_selinux_ctx: # unconfined_u:object_r:var_lib_t:s0 54 Apr 8 16:41 abcd-efgh-ijkl-mnop mode = 'rhel8_selinux' else: From cc5d2cb68c320ba004ff7700a96143323415fe67 Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Wed, 19 Feb 2025 11:39:38 +1100 Subject: [PATCH 06/15] Store parser function directly rather than looking it up Signed-off-by: Paul Wayper --- insights/core/ls_parser.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/insights/core/ls_parser.py b/insights/core/ls_parser.py index 627b0e123d..387868875a 100644 --- a/insights/core/ls_parser.py +++ b/insights/core/ls_parser.py @@ -147,7 +147,7 @@ def __init__(self, dirname, total, body): ents = {} files = [] specials = [] - mode = None + parser = None for line in body: # we can't split(None, 5) here b/c rhel 6/7 selinux lines only have # 4 parts before the path, and the path itself could contain @@ -165,7 +165,7 @@ def __init__(self, dirname, total, body): "dir": dirname, } # determine mode once per directory - if mode is None: + if parser is None: if links[0].isdigit(): # We have to split the line again to see if this is a RHEL8 # selinux stanza. This assumes that the context section will @@ -175,16 +175,16 @@ def __init__(self, dirname, total, body): rhel8_selinux_ctx = rest[:rest.index(" ")] if ":" in rhel8_selinux_ctx or '?' == rhel8_selinux_ctx: # unconfined_u:object_r:var_lib_t:s0 54 Apr 8 16:41 abcd-efgh-ijkl-mnop - mode = 'rhel8_selinux' + parser = parse_mode['rhel8_selinux'] else: # crw-------. 1 0 0 10, 236 Jul 25 10:00 control # lrwxrwxrwx. 1 0 0 11 Aug 4 2014 menu.lst -> ./grub.conf - mode = 'normal' + parser = parse_mode['normal'] else: # -rw-r--r--. root root system_u:object_r:boot_t:s0 config-3.10.0-267 - mode = 'selinux' + parser = parse_mode['selinux'] # Now parse based on mode - rest = parse_mode[mode](entry, links, owner, group, rest) + rest = parser(entry, links, owner, group, rest) # Update our entry and put it into the correct buckets # based on its type. From af8600eeab9e860f42d1f799098cc96f857bf25b Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Wed, 19 Feb 2025 12:03:40 +1100 Subject: [PATCH 07/15] Don't need return from parser function; slightly quicker if/elif/else on dirent type Signed-off-by: Paul Wayper --- insights/core/ls_parser.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/insights/core/ls_parser.py b/insights/core/ls_parser.py index 387868875a..a9f0198cca 100644 --- a/insights/core/ls_parser.py +++ b/insights/core/ls_parser.py @@ -184,7 +184,7 @@ def __init__(self, dirname, total, body): # -rw-r--r--. root root system_u:object_r:boot_t:s0 config-3.10.0-267 parser = parse_mode['selinux'] # Now parse based on mode - rest = parser(entry, links, owner, group, rest) + parser(entry, links, owner, group, rest) # Update our entry and put it into the correct buckets # based on its type. @@ -196,12 +196,12 @@ def __init__(self, dirname, total, body): nm = entry["name"] ents[nm] = entry - if typ not in "bcd": - files.append(nm) - elif typ == "d": + if typ == "d": dirs.append(nm) - elif typ in "bc": + elif typ in "bc": # faster than typ == "b" or typ == "c" specials.append(nm) + else: + files.append(nm) super(Directory, self).__init__( { From f0681fe68464e793a5f23a6198f6eb6f43719e15 Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Wed, 26 Feb 2025 15:22:03 +1100 Subject: [PATCH 08/15] Simplify to set_link_name and set_selinux, use partition for rhel8_selinux check Signed-off-by: Paul Wayper --- insights/core/ls_parser.py | 80 ++++++++++++++----------------- insights/tests/parsers/test_ls.py | 6 +-- 2 files changed, 39 insertions(+), 47 deletions(-) diff --git a/insights/core/ls_parser.py b/insights/core/ls_parser.py index a9f0198cca..807d751b65 100644 --- a/insights/core/ls_parser.py +++ b/insights/core/ls_parser.py @@ -4,19 +4,22 @@ """ -def parse_path(path): +def set_name_link(entry, is_softlink, path): """ - Convert possible symbolic link into a source -> target pair. + Get the name, and possibly the link, from the rest of the path. Args: - path (str): The path portion of an ls output line. + entry (dict): the dict to put this data into + is_softlink (bool): is this actually a softlink? + path (str): the rest of the line, optionally including the link Returns: - A (path, link) tuple where path is always populated and link is a non - empty string if the original path is a symoblic link. + Does not return, entry keys for name and link added """ - path, _, link = path.partition(" -> ") - return path, link + if is_softlink: + entry['name'], _, entry['link'] = path.partition(" -> ") + else: + entry['name'] = path def parse_major_minor(last, result): @@ -43,7 +46,7 @@ def parse_major_minor(last, result): return rest -def parse_non_selinux(entry, links, owner, group, last): +def parse_non_selinux(entry, is_softlink, links, owner, group, last): """ Parse part of an ls output line that isn't selinux. @@ -68,14 +71,22 @@ def parse_non_selinux(entry, links, owner, group, last): # The date part is always 12 characters regardless of content. entry["date"] = rest[:12] - # Jump over the date and the following space to get the path part. - path, link = parse_path(rest[13:]) - entry["name"] = path - if link: - entry["link"] = link + set_name_link(entry, is_softlink, rest[13:]) + + +def set_selinux(entry, selinux_str): + """ + Set the SELinux part of this entry + """ + selinux = selinux_str.split(":") + lsel = len(selinux) + entry["se_user"] = selinux[0] + entry["se_role"] = selinux[1] if lsel > 1 else None + entry["se_type"] = selinux[2] if lsel > 2 else None + entry["se_mls"] = selinux[3] if lsel > 3 else None -def parse_selinux(entry, owner, group, selinux_str, name_part): +def parse_old_selinux(entry, is_softlink, owner, group, selinux_str, name_part): """ Parse part of an ls output line that is selinux. @@ -88,21 +99,13 @@ def parse_selinux(entry, owner, group, selinux_str, name_part): """ - selinux = selinux_str.split(":") - lsel = len(selinux) - name, link = parse_path(name_part) entry["owner"] = owner entry["group"] = group - entry["se_user"] = selinux[0] - entry["se_role"] = selinux[1] if lsel > 1 else None - entry["se_type"] = selinux[2] if lsel > 2 else None - entry["se_mls"] = selinux[3] if lsel > 3 else None - entry["name"] = name - if link: - entry["link"] = link + set_selinux(entry, selinux_str) + set_name_link(entry, is_softlink, name_part) -def parse_rhel8_selinux(entry, links, owner, group, last): +def parse_rhel8_selinux(entry, is_softlink, links, owner, group, last): """ Parse part of an ls output line that is selinux on RHEL8. @@ -118,25 +121,16 @@ def parse_rhel8_selinux(entry, links, owner, group, last): entry["links"] = int(links) if links.isdigit() else links entry["owner"] = owner entry["group"] = group - selinux, last = last.split(None, 1) - selinux = selinux.split(":") - lsel = len(selinux) + selinux_str, last = last.split(None, 1) + set_selinux(entry, selinux_str) rest = parse_major_minor(last, entry) - date = rest[:12] - path, link = parse_path(rest[13:]) - entry["se_user"] = selinux[0] - entry["se_role"] = selinux[1] if lsel > 1 else None - entry["se_type"] = selinux[2] if lsel > 2 else None - entry["se_mls"] = selinux[3] if lsel > 3 else None - entry["name"] = path - entry["date"] = date - if link: - entry["link"] = link + entry["date"] = rest[:12] + set_name_link(entry, is_softlink, rest[13:]) parse_mode = { 'normal': parse_non_selinux, - 'selinux': parse_selinux, + 'selinux': parse_old_selinux, 'rhel8_selinux': parse_rhel8_selinux } @@ -171,10 +165,10 @@ def __init__(self, dirname, total, body): # selinux stanza. This assumes that the context section will # always have at least two pieces separated by ':'. # '?' as the whole RHEL8 security context is also acceptable. - # substring to index of space is faster than strip - rhel8_selinux_ctx = rest[:rest.index(" ")] + # partition faster than substring to index of space faster than split + rhel8_selinux_ctx, _, _ = rest.partition(" ") if ":" in rhel8_selinux_ctx or '?' == rhel8_selinux_ctx: - # unconfined_u:object_r:var_lib_t:s0 54 Apr 8 16:41 abcd-efgh-ijkl-mnop + # -rwxrwxr-x. 1 user group unconfined_u:object_r:var_lib_t:s0 54 Apr 8 16:41 abcd-efgh-ijkl-mnop parser = parse_mode['rhel8_selinux'] else: # crw-------. 1 0 0 10, 236 Jul 25 10:00 control @@ -184,7 +178,7 @@ def __init__(self, dirname, total, body): # -rw-r--r--. root root system_u:object_r:boot_t:s0 config-3.10.0-267 parser = parse_mode['selinux'] # Now parse based on mode - parser(entry, links, owner, group, rest) + parser(entry, typ == 'l', links, owner, group, rest) # Update our entry and put it into the correct buckets # based on its type. diff --git a/insights/tests/parsers/test_ls.py b/insights/tests/parsers/test_ls.py index 7a40f7e78f..a490f1da17 100644 --- a/insights/tests/parsers/test_ls.py +++ b/insights/tests/parsers/test_ls.py @@ -472,8 +472,7 @@ def test_ls_laRZ(): dev_listings = ls.listing_of('/var/log') assert 'boot.log' in dev_listings assert dev_listings["boot.log"]['se_type'] == 'plymouthd_var_log_t' - assert 'kdump.log' in dev_listings - assert dev_listings["kdump.log"]['link'] == 'false-link.log' + assert 'kdump.log -> false-link.log' in dev_listings ls = LSlaRZ(context_wrap(LS_LARZ_CONTENT2)) assert '/var/log/rhsm' in ls @@ -535,5 +534,4 @@ def test_ls_laZ_on_dev(): assert dev_listings['block']['size'] == 140 dev_listings = ls.listing_of('/dev/vfio') - assert 'vfio' in dev_listings - assert dev_listings["vfio"]['link'] == 'false_link_2' + assert 'vfio -> false_link_2' in dev_listings From 8d6197fcff7652d108d21f8abcdffba7d46db7e7 Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Thu, 6 Mar 2025 15:51:34 +1100 Subject: [PATCH 09/15] Parse date as well as major,minor/size, don't store raw_entry, do type switch on files first Signed-off-by: Paul Wayper --- insights/core/ls_parser.py | 48 +++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/insights/core/ls_parser.py b/insights/core/ls_parser.py index 807d751b65..ccefd12b84 100644 --- a/insights/core/ls_parser.py +++ b/insights/core/ls_parser.py @@ -22,16 +22,16 @@ def set_name_link(entry, is_softlink, path): entry['name'] = path -def parse_major_minor(last, result): +def parse_major_minor_date(last, result): """ - Parse the size / major, minor section of the line. + Parse the size / major, minor and date section of the line. Args: last (str): the rest of the line after the owner and group. result (dict): the dirent dict to put details in. Returns: - The rest of the line after the size / major, minor. + The file name portion of the line. """ # device numbers only go to 256. # If a comma is in the first four characters, the next two elements are @@ -42,8 +42,10 @@ def parse_major_minor(last, result): result["minor"] = int(minor) else: size, rest = last.split(None, 1) - result["size"] = int(size) if size.isdigit() else size - return rest + result["size"] = size if size == '?'else int(size) + # The date part is always 12 characters regardless of content. + result['date'] = rest[:12] + return rest[13:] def parse_non_selinux(entry, is_softlink, links, owner, group, last): @@ -66,12 +68,9 @@ def parse_non_selinux(entry, is_softlink, links, owner, group, last): entry["owner"] = owner entry["group"] = group - rest = parse_major_minor(last, entry) - - # The date part is always 12 characters regardless of content. - entry["date"] = rest[:12] + rest = parse_major_minor_date(last, entry) - set_name_link(entry, is_softlink, rest[13:]) + set_name_link(entry, is_softlink, rest) def set_selinux(entry, selinux_str): @@ -123,9 +122,8 @@ def parse_rhel8_selinux(entry, is_softlink, links, owner, group, last): entry["group"] = group selinux_str, last = last.split(None, 1) set_selinux(entry, selinux_str) - rest = parse_major_minor(last, entry) - entry["date"] = rest[:12] - set_name_link(entry, is_softlink, rest[13:]) + rest = parse_major_minor_date(last, entry) + set_name_link(entry, is_softlink, rest) parse_mode = { @@ -180,22 +178,24 @@ def __init__(self, dirname, total, body): # Now parse based on mode parser(entry, typ == 'l', links, owner, group, rest) - # Update our entry and put it into the correct buckets - # based on its type. - entry.update(rest) - # TODO - # - The `raw_entry` key is deprecated and will be removed from 3.6.0. - # Please use the `insights.parsers.ls.FileListingParser.raw_entry_of` instead. - entry["raw_entry"] = line + # final details + # entry["raw_entry"] = line nm = entry["name"] ents[nm] = entry - if typ == "d": + # Files are most common, dirs next, so we handle this in frequency order + if typ == "-": + files.append(nm) + elif typ == "d": dirs.append(nm) - elif typ in "bc": # faster than typ == "b" or typ == "c" - specials.append(nm) else: - files.append(nm) + specials.append(nm) + # if typ == "d": + # dirs.append(nm) + # elif typ in "bc": + # specials.append(nm) + # else: + # files.append(nm) super(Directory, self).__init__( { From 3c641b3085a61ff79091b9a99eac8064c8a1cb83 Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Mon, 31 Mar 2025 14:23:46 +1100 Subject: [PATCH 10/15] Links are 'files' too Signed-off-by: Paul Wayper --- insights/core/ls_parser.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/insights/core/ls_parser.py b/insights/core/ls_parser.py index ccefd12b84..141b3d5b4b 100644 --- a/insights/core/ls_parser.py +++ b/insights/core/ls_parser.py @@ -184,18 +184,12 @@ def __init__(self, dirname, total, body): nm = entry["name"] ents[nm] = entry # Files are most common, dirs next, so we handle this in frequency order - if typ == "-": + if typ in "-l": files.append(nm) elif typ == "d": dirs.append(nm) else: specials.append(nm) - # if typ == "d": - # dirs.append(nm) - # elif typ in "bc": - # specials.append(nm) - # else: - # files.append(nm) super(Directory, self).__init__( { From 084d40edb77ac9ff3f023ff1bc29243faedc2a3e Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Mon, 31 Mar 2025 14:48:59 +1100 Subject: [PATCH 11/15] Test for correct handling of file name with -> in it Signed-off-by: Paul Wayper --- insights/tests/test_ls_parser.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/insights/tests/test_ls_parser.py b/insights/tests/test_ls_parser.py index 32ffa26182..828a553258 100644 --- a/insights/tests/test_ls_parser.py +++ b/insights/tests/test_ls_parser.py @@ -98,6 +98,7 @@ brw-rw----. 1 0 6 1048576 Aug 4 16:56 block dev with no comma also valid -rwxr-xr-x. 2 0 0 1024 Jul 6 23:32 file_name_ending_with_colon: lrwxrwxrwx. 1 0 0 11 Aug 4 2014 link with spaces -> ../file with spaces +-rw-r--r--. 1 0 0 0 Sep 3 2013 'file name with -> in it' """ COMPLICATED_FILES_BAD_LINE = """ @@ -279,9 +280,10 @@ def test_parse_multiple_directories_with_break(): def test_complicated_files(): results = parse(COMPLICATED_FILES.splitlines(), "/tmp") assert len(results) == 1 - assert results["/tmp"]["total"] == 16, results["/tmp"]["total"] - assert results["/tmp"]["name"] == "/tmp", results["/tmp"]["name"] - res = results["/tmp"]["entries"]["dm-10"] + assert "/tmp" in results + tmpdir = results["/tmp"] + assert tmpdir["name"] == "/tmp", results["/tmp"]["name"] + res = tmpdir["entries"]["dm-10"] assert res["type"] == "b" assert res["links"] == 1 assert res["owner"] == "0" @@ -291,6 +293,9 @@ def test_complicated_files(): assert res["date"] == "Aug 4 16:56" assert res["name"] == "dm-10" assert res["dir"] == "/tmp" + # Files that are not links but have ' -> ' in them should be found as such + assert "file name with -> in it" in tmpdir["files"] + assert tmpdir["files"]["file name with -> in it"]["type"] == "-" def test_files_with_selinux_disabled(): From 1f0d14e36ea31e1e100336ac3b6d0bf359f57f90 Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Mon, 31 Mar 2025 15:49:50 +1100 Subject: [PATCH 12/15] Handle file names (and links) that are quoted by ls Signed-off-by: Paul Wayper --- insights/core/ls_parser.py | 51 +++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/insights/core/ls_parser.py b/insights/core/ls_parser.py index 141b3d5b4b..a14b4901e5 100644 --- a/insights/core/ls_parser.py +++ b/insights/core/ls_parser.py @@ -4,6 +4,16 @@ """ +def trim_fname(fname): + """ + Remove enclosing quotes or double quotes in file name. + """ + # Fastest way seems to be compare to set first, then compare first and last + if fname[0] in {'"', "'"} and fname[0] == fname[-1]: + return fname[1:-1] + return fname + + def set_name_link(entry, is_softlink, path): """ Get the name, and possibly the link, from the rest of the path. @@ -17,9 +27,11 @@ def set_name_link(entry, is_softlink, path): Does not return, entry keys for name and link added """ if is_softlink: - entry['name'], _, entry['link'] = path.partition(" -> ") + name, _, link = path.partition(" -> ") + entry['name'] = trim_fname(name) + entry['link'] = trim_fname(link) else: - entry['name'] = path + entry['name'] = trim_fname(path) def parse_major_minor_date(last, result): @@ -42,7 +54,7 @@ def parse_major_minor_date(last, result): result["minor"] = int(minor) else: size, rest = last.split(None, 1) - result["size"] = size if size == '?'else int(size) + result["size"] = size if size == '?' else int(size) # The date part is always 12 characters regardless of content. result['date'] = rest[:12] return rest[13:] @@ -53,7 +65,12 @@ def parse_non_selinux(entry, is_softlink, links, owner, group, last): Parse part of an ls output line that isn't selinux. Args: - link count, owner, group, and everything else. + entry (dict): the dict to put this data into + is_softlink (bool): is this actually a softlink? + links (str): the number of links on this dirent + owner (str): the owner (name or id) of this dirent + group (str): the group (name or id) of this dirent + last (str): the rest of the line Returns: A dict containing links, owner, group, date, and name. If the line @@ -90,11 +107,17 @@ def parse_old_selinux(entry, is_softlink, owner, group, selinux_str, name_part): Parse part of an ls output line that is selinux. Args: - owner, group, selinux info, and the path. + entry (dict): the dict to put this data into + is_softlink (bool): is this actually a softlink? + links (str): the number of links on this dirent + owner (str): the owner (name or id) of this dirent + group (str): the group (name or id) of this dirent + selinux_str (str): the SELinux context of this dirent + name_part (str): the name (and possibly link)) Returns: - A dict containing owner, group, se_user, se_role, se_type, se_mls, and - name. If the raw name was a symbolic link, link is also included. + No return; the ownership, SELinux context information and name part + are put directly into the entry dict. """ @@ -109,12 +132,16 @@ def parse_rhel8_selinux(entry, is_softlink, links, owner, group, last): Parse part of an ls output line that is selinux on RHEL8. Args: - link count, owner, group, and everything else. + entry (dict): the dict to put this data into + is_softlink (bool): is this actually a softlink? + links (str): the number of links on this dirent + owner (str): the owner (name or id) of this dirent + group (str): the group (name or id) of this dirent + last (str): the rest of the line Returns: - A dict containing links, owner, group, se_user, se_role, se_type, - se_mls, size, date, and name. If the raw name was a symbolic link, - link is also included. + No return; the ownership, SELinux context information and name part + are put directly into the entry dict. """ entry["links"] = int(links) if links.isdigit() else links @@ -196,7 +223,7 @@ def __init__(self, dirname, total, body): "dirs": dirs, "entries": ents, "files": files, - "name": name, + "name": dirname, "specials": specials, "total": total, } From 5020ea4c689aaa1df208d90dc52b47a34d9e8b46 Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Mon, 31 Mar 2025 15:50:15 +1100 Subject: [PATCH 13/15] Improving documentation (no large dict compares) Signed-off-by: Paul Wayper --- insights/parsers/ls.py | 46 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/insights/parsers/ls.py b/insights/parsers/ls.py index 41f3c3ef73..5530c43093 100644 --- a/insights/parsers/ls.py +++ b/insights/parsers/ls.py @@ -122,11 +122,30 @@ class FileListing(Parser, dict): [] >>> ls_lan.total_of("/etc/sysconfig") 96 - >>> ls_lan.dir_entry('/etc/sysconfig', 'grub') == {'group': '0', 'name': 'grub', 'links': 1, 'perms': 'rwxrwxrwx.', 'raw_entry': 'lrwxrwxrwx. 1 0 0 17 Jul 6 23:32 grub -> /etc/default/grub', 'owner': '0', 'link': '/etc/default/grub', 'date': 'Jul 6 23:32', 'type': 'l', 'dir': '/etc/sysconfig', 'size': 17} - True + >>> grub_entry = ls_lan.dir_entry('/etc/sysconfig', 'grub') + >>> grub_entry['group'] + '0' + >>> grub_entry['name'] + 'grub' + >>> grub_entry['links'] + 1 + >>> grub_entry['perms'] + 'rwxrwxrwx.' + >>> grub_entry['owner'] + '0' + >>> grub_entry['link'] + '/etc/default/grub' + >>> grub_entry['date'] + 'Jul 6 23:32' + >>> grub_entry['type'] + 'l' + >>> grub_entry['dir'] + '/etc/sysconfig' + >>> grub_entry['size'] + 17 >>> sorted(ls_lan.listing_of("/etc/sysconfig").keys()) == sorted(['console', 'grub', '..', 'firewalld', '.', 'cbq', 'ebtables-config']) True - >>> sorted(ls_lan.listing_of("/etc/sysconfig")['console'].keys()) == sorted(['group', 'name', 'links', 'perms', 'raw_entry', 'owner', 'date', 'type', 'dir', 'size']) + >>> sorted(ls_lan.listing_of("/etc/sysconfig")['console'].keys()) == sorted(['group', 'name', 'links', 'perms', 'owner', 'date', 'type', 'dir', 'size']) True >>> ls_lan.listing_of("/etc/sysconfig")['console']['type'] 'd' @@ -134,8 +153,25 @@ class FileListing(Parser, dict): 'rwxr-xr-x.' >>> ls_lan.dir_contains("/etc/sysconfig", "console") True - >>> ls_lan.dir_entry("/etc/sysconfig", "console") == {'group': '0', 'name': 'console', 'links': 2, 'perms': 'rwxr-xr-x.', 'raw_entry': 'drwxr-xr-x. 2 0 0 6 Sep 16 2015 console', 'owner': '0', 'date': 'Sep 16 2015', 'type': 'd', 'dir': '/etc/sysconfig', 'size': 6} - True + >>> console_entry = ls_lan.dir_entry("/etc/sysconfig", "console") + >>> console_entry['group'] + '0' + >>> console_entry['name'] + 'console' + >>> console_entry['links'] + 2 + >>> console_entry['perms'] + 'rwxr-xr-x.' + >>> console_entry['owner'] + '0' + >>> console_entry['date'] + 'Sep 16 2015' + >>> console_entry['type'] + 'd' + >>> console_entry['dir'] + '/etc/sysconfig' + >>> console_entry['size'] + 6 >>> ls_lan.dir_entry("/etc/sysconfig", "grub")['type'] 'l' >>> ls_lan.dir_entry("/etc/sysconfig", "grub")['link'] From 027b101714ec81eebf720e2da544ff87c0c252cc Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Mon, 31 Mar 2025 15:51:14 +1100 Subject: [PATCH 14/15] Removing direct raw_entry key comparisons Signed-off-by: Paul Wayper --- insights/tests/parsers/test_ls_boot.py | 2 -- insights/tests/parsers/test_ls_dev.py | 6 ------ insights/tests/parsers/test_ls_file_listing.py | 11 ----------- insights/tests/parsers/test_ls_sys_firmware.py | 1 - insights/tests/parsers/test_lsinitrd.py | 10 +++++++--- insights/tests/test_ls_parser.py | 8 ++++---- 6 files changed, 11 insertions(+), 27 deletions(-) diff --git a/insights/tests/parsers/test_ls_boot.py b/insights/tests/parsers/test_ls_boot.py index e97f448101..8793ba8b9b 100644 --- a/insights/tests/parsers/test_ls_boot.py +++ b/insights/tests/parsers/test_ls_boot.py @@ -93,7 +93,6 @@ def test_ls_boot(): "se_mls": "s0", "name": "grub.cfg", "date": "Apr 21 2023", - "raw_entry": "-rwx------. 1 root root system_u:object_r:boot_t:s0 6535 Apr 21 2023 grub.cfg", "dir": "/boot/grub2", } assert ( @@ -127,7 +126,6 @@ def test_boot_links(): "name": "dtb", "date": "Apr 21 2023", "link": "dtb-5.14.0-162.6.1.el9_1.aarch64", - "raw_entry": "lrwxrwxrwx. 1 root root system_u:object_r:boot_t:s0 32 Apr 21 2023 dtb -> dtb-5.14.0-162.6.1.el9_1.aarch64", "dir": "/boot", } assert ( diff --git a/insights/tests/parsers/test_ls_dev.py b/insights/tests/parsers/test_ls_dev.py index 6ac118a898..cac8309566 100644 --- a/insights/tests/parsers/test_ls_dev.py +++ b/insights/tests/parsers/test_ls_dev.py @@ -70,7 +70,6 @@ def test_ls_dev(): "name": "home", "links": 1, "perms": "rwxrwxrwx.", - "raw_entry": "lrwxrwxrwx. 1 0 0 7 Jul 25 10:00 home -> ../dm-2", "owner": "0", "link": "../dm-2", "date": "Jul 25 10:00", @@ -83,7 +82,6 @@ def test_ls_dev(): "name": "root", "links": 1, "perms": "rwxrwxrwx.", - "raw_entry": "lrwxrwxrwx. 1 0 0 7 Jul 25 10:00 root -> ../dm-0", "owner": "0", "link": "../dm-0", "date": "Jul 25 10:00", @@ -96,7 +94,6 @@ def test_ls_dev(): "name": "swap", "links": 1, "perms": "rwxrwxrwx.", - "raw_entry": "lrwxrwxrwx. 1 0 0 7 Jul 25 10:00 swap -> ../dm-1", "owner": "0", "link": "../dm-1", "date": "Jul 25 10:00", @@ -109,7 +106,6 @@ def test_ls_dev(): "name": "..", "links": 23, "perms": "rwxr-xr-x.", - "raw_entry": "drwxr-xr-x. 23 0 0 3720 Jul 25 12:43 ..", "owner": "0", "date": "Jul 25 12:43", "type": "d", @@ -121,7 +117,6 @@ def test_ls_dev(): "name": ".", "links": 2, "perms": "rwxr-xr-x.", - "raw_entry": "drwxr-xr-x. 2 0 0 100 Jul 25 10:00 .", "owner": "0", "date": "Jul 25 10:00", "type": "d", @@ -153,7 +148,6 @@ def test_ls_dev(): "name": "root", "date": "Sep 30 16:58", "link": "../dm-0", - "raw_entry": "lrwxrwxrwx. 1 root root system_u:object_r:device_t:s0 7 Sep 30 16:58 root -> ../dm-0", "dir": "/dev/rhel", } assert ( diff --git a/insights/tests/parsers/test_ls_file_listing.py b/insights/tests/parsers/test_ls_file_listing.py index 3c9b6d9727..8e2afe5bea 100644 --- a/insights/tests/parsers/test_ls_file_listing.py +++ b/insights/tests/parsers/test_ls_file_listing.py @@ -188,7 +188,6 @@ def test_multiple_directories(): 'size': 8192, 'date': 'Jul 13 03:55', 'name': '..', - 'raw_entry': 'drwxr-xr-x. 77 0 0 8192 Jul 13 03:55 ..', 'dir': '/etc/sysconfig', } assert listing['cbq'] == { @@ -200,7 +199,6 @@ def test_multiple_directories(): 'size': 41, 'date': 'Jul 6 23:32', 'name': 'cbq', - 'raw_entry': 'drwxr-xr-x. 2 0 0 41 Jul 6 23:32 cbq', 'dir': '/etc/sysconfig', } assert listing['firewalld'] == { @@ -212,7 +210,6 @@ def test_multiple_directories(): 'size': 72, 'date': 'Sep 15 2015', 'name': 'firewalld', - 'raw_entry': '-rw-r--r--. 1 0 0 72 Sep 15 2015 firewalld', 'dir': '/etc/sysconfig', } assert listing['grub'] == { @@ -225,7 +222,6 @@ def test_multiple_directories(): 'date': 'Jul 6 23:32', 'name': 'grub', 'link': '/etc/default/grub', - 'raw_entry': 'lrwxrwxrwx. 1 0 0 17 Jul 6 23:32 grub -> /etc/default/grub', 'dir': '/etc/sysconfig', } @@ -239,7 +235,6 @@ def test_multiple_directories(): 'size': 4096, 'date': 'Sep 16 2015', 'name': '..', - 'raw_entry': 'drwxr-xr-x. 10 0 0 4096 Sep 16 2015 ..', 'dir': '/etc/rc.d/rc3.d', } assert listing['K50netconsole'] == { @@ -252,7 +247,6 @@ def test_multiple_directories(): 'date': 'Jul 6 23:32', 'name': 'K50netconsole', 'link': '../init.d/netconsole', - 'raw_entry': 'lrwxrwxrwx. 1 0 0 20 Jul 6 23:32 K50netconsole -> ../init.d/netconsole', 'dir': '/etc/rc.d/rc3.d', } @@ -270,7 +264,6 @@ def test_multiple_directories(): 'date': 'Jul 6 23:32', 'name': 'grub', 'link': '/etc/default/grub', - 'raw_entry': 'lrwxrwxrwx. 1 0 0 17 Jul 6 23:32 grub -> /etc/default/grub', 'dir': '/etc/sysconfig', } @@ -283,7 +276,6 @@ def test_multiple_directories(): 'size': 41, 'date': 'Jul 6 23:32', 'name': 'cbq', - 'raw_entry': 'drwxr-xr-x. 2 0 0 41 Jul 6 23:32 cbq', 'dir': '/etc/sysconfig', } assert dirs.path_entry('no_slash') is None @@ -357,7 +349,6 @@ def test_complicated_directory(): 'date': 'Aug 4 16:56', 'name': 'dm-10', 'dir': '/tmp', - 'raw_entry': 'brw-rw----. 1 0 6 253, 10 Aug 4 16:56 dm-10', } assert listing['dm-10']['type'] == 'b' assert listing['dm-10']['major'] == 253 @@ -407,7 +398,6 @@ def test_selinux_directory(): 'se_type': 'boot_t', 'se_mls': 's0', 'name': 'grub2', - 'raw_entry': 'drwxr-xr-x. root root system_u:object_r:boot_t:s0 grub2', 'dir': '/boot', } actual = dirs.dir_entry('/boot', 'grub2') @@ -423,7 +413,6 @@ def test_files_created_with_selinux_disabled(): 'name': 'lv_cpwtk001_data01', 'links': 1, 'perms': 'rwxrwxrwx', - 'raw_entry': 'lrwxrwxrwx 1 0 0 7 Apr 27 05:34 lv_cpwtk001_data01 -> ../dm-7', 'owner': '0', 'link': '../dm-7', 'date': 'Apr 27 05:34', diff --git a/insights/tests/parsers/test_ls_sys_firmware.py b/insights/tests/parsers/test_ls_sys_firmware.py index 8ca106d75d..9a1f4a2ece 100644 --- a/insights/tests/parsers/test_ls_sys_firmware.py +++ b/insights/tests/parsers/test_ls_sys_firmware.py @@ -69,7 +69,6 @@ def test_ls_sys_firmware(): "se_mls": "s0", "name": "efi", "date": "Sep 30 16:58", - "raw_entry": "drwxr-xr-x. 3 root root system_u:object_r:sysfs_t:s0 0 Sep 30 16:58 efi", "dir": "/sys/firmware", } assert ( diff --git a/insights/tests/parsers/test_lsinitrd.py b/insights/tests/parsers/test_lsinitrd.py index 8c1ed5c9e3..4c48bb438e 100644 --- a/insights/tests/parsers/test_lsinitrd.py +++ b/insights/tests/parsers/test_lsinitrd.py @@ -191,7 +191,13 @@ def test_lsinitrd_empty(): def test_lsinitrd_filtered(): d = lsinitrd.Lsinitrd(context_wrap(LSINITRD_FILTERED)) assert len(d.data) == 5 - assert d.search(name__contains='kernel') == [{'type': 'd', 'perms': 'rwxr-xr-x', 'links': 3, 'owner': 'root', 'group': 'root', 'size': 0, 'date': 'Apr 20 15:58', 'name': 'kernel/x86', 'raw_entry': 'drwxr-xr-x 3 root root 0 Apr 20 15:58 kernel/x86', 'dir': ''}] + assert d.search(name__contains='kernel') == [ + { + 'type': 'd', 'perms': 'rwxr-xr-x', 'links': 3, 'owner': 'root', + 'group': 'root', 'size': 0, 'date': 'Apr 20 15:58', + 'name': 'kernel/x86', 'dir': '' + } + ] assert d.unparsed_lines == ['Version: dracut-033-535.el7', 'dracut modules:', 'kernel-modules', 'udev-rules'] @@ -202,7 +208,6 @@ def test_lsinitrd_all(): dev_console = { 'type': 'c', 'perms': 'rw-r--r--', 'links': 1, 'owner': 'root', 'group': 'root', 'major': 5, 'minor': 1, 'date': 'Apr 20 15:57', 'name': 'dev/console', 'dir': '', - 'raw_entry': 'crw-r--r-- 1 root root 5, 1 Apr 20 15:57 dev/console' } assert dev_console in lsdev assert 'dev/kmsg' in [l['name'] for l in lsdev] @@ -227,7 +232,6 @@ def test_lsinitrd_kdump_image_valid(): assert parser_result is not None result_list = parser_result.search(name__contains='devname') assert len(result_list) == 1 - assert result_list[0].get('raw_entry') == '-rw-r--r-- 1 root root 126 Aug 4 2020 usr/lib/modules/4.18.0-240.el8.x86_64/modules.devname' def test_lsinitrd_lvm_conf(): diff --git a/insights/tests/test_ls_parser.py b/insights/tests/test_ls_parser.py index 828a553258..b824ec1a67 100644 --- a/insights/tests/test_ls_parser.py +++ b/insights/tests/test_ls_parser.py @@ -97,7 +97,7 @@ drwxr-xr-x+ 2 0 0 41 Jul 6 23:32 additional_ACLs brw-rw----. 1 0 6 1048576 Aug 4 16:56 block dev with no comma also valid -rwxr-xr-x. 2 0 0 1024 Jul 6 23:32 file_name_ending_with_colon: -lrwxrwxrwx. 1 0 0 11 Aug 4 2014 link with spaces -> ../file with spaces +lrwxrwxrwx. 1 0 0 11 Aug 4 2014 'link with spaces' -> '../file with spaces' -rw-r--r--. 1 0 0 0 Sep 3 2013 'file name with -> in it' """ @@ -117,7 +117,7 @@ drwxr-xr-x+ 2 0 0 41 Jul 6 23:32 additional_ACLs brw-rw----. 1 0 6 1048576 Aug 4 16:56 block dev with no comma also valid -rwxr-xr-x. 2 0 0 1024 Jul 6 23:32 file_name_ending_with_colon: -lrwxrwxrwx. 1 0 0 11 Aug 4 2014 link with spaces -> ../file with spaces +lrwxrwxrwx. 1 0 0 11 Aug 4 2014 'link with spaces' -> '../file with spaces' """ SELINUX_DIRECTORY = """ @@ -294,8 +294,8 @@ def test_complicated_files(): assert res["name"] == "dm-10" assert res["dir"] == "/tmp" # Files that are not links but have ' -> ' in them should be found as such - assert "file name with -> in it" in tmpdir["files"] - assert tmpdir["files"]["file name with -> in it"]["type"] == "-" + assert "file name with -> in it" in tmpdir["entries"] + assert tmpdir["entries"]["file name with -> in it"]["type"] == "-" def test_files_with_selinux_disabled(): From 37dea651e0067e7363115c34886c2ea19c7e0fb1 Mon Sep 17 00:00:00 2001 From: Paul Wayper Date: Tue, 1 Apr 2025 17:24:39 +1100 Subject: [PATCH 15/15] Faster to handle normal line first - no partition, just isdigit Signed-off-by: Paul Wayper --- insights/core/ls_parser.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/insights/core/ls_parser.py b/insights/core/ls_parser.py index a14b4901e5..14c6f40a53 100644 --- a/insights/core/ls_parser.py +++ b/insights/core/ls_parser.py @@ -190,15 +190,14 @@ def __init__(self, dirname, total, body): # selinux stanza. This assumes that the context section will # always have at least two pieces separated by ':'. # '?' as the whole RHEL8 security context is also acceptable. - # partition faster than substring to index of space faster than split - rhel8_selinux_ctx, _, _ = rest.partition(" ") - if ":" in rhel8_selinux_ctx or '?' == rhel8_selinux_ctx: - # -rwxrwxr-x. 1 user group unconfined_u:object_r:var_lib_t:s0 54 Apr 8 16:41 abcd-efgh-ijkl-mnop - parser = parse_mode['rhel8_selinux'] - else: + # Handle normal case first... + if rest[0].isdigit(): # crw-------. 1 0 0 10, 236 Jul 25 10:00 control # lrwxrwxrwx. 1 0 0 11 Aug 4 2014 menu.lst -> ./grub.conf parser = parse_mode['normal'] + else: + # -rwxrwxr-x. 1 user group unconfined_u:object_r:var_lib_t:s0 54 Apr 8 16:41 abcd-efgh-ijkl-mnop + parser = parse_mode['rhel8_selinux'] else: # -rw-r--r--. root root system_u:object_r:boot_t:s0 config-3.10.0-267 parser = parse_mode['selinux']