project-chip
diff --git a/‎scripts/memory/memdf/README.md‎
Lines changed: 29 additions & 0 deletions b/‎scripts/memory/memdf/README.md‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎scripts/memory/memdf/__init__.py‎
Lines changed: 24 additions & 0 deletions b/‎scripts/memory/memdf/__init__.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎scripts/memory/memdf/collect.py‎
Lines changed: 334 additions & 0 deletions b/‎scripts/memory/memdf/collect.py‎
Lines changed: 334 additions & 0 deletions
diff --git a/‎scripts/memory/memdf/collector/__init__.py‎ b/‎scripts/memory/memdf/collector/__init__.py‎
@@ -0,0 +1,29 @@
+---
+orphan: true
+---
+
+This package contains routines to to collect, aggregate, and report memory
+usage, using Pandas `DataFrame` as the primary representation.
+
+-   memdf.collect — Helpers to read memory information from various sources
+    (e.g. executables) according to command line options.
+-   memdf.df — `DataFrame` utilities, in particular definitions of columns and
+    types for the main uses of data frames.
+-   memdf.name — Names for synthetic symbols, etc. Individual readers are
+    located under memdf.collector.
+-   memdf.report — Helpers to write data frames in various formats according to
+    command line or configured options.
+-   memdf.select — Helpers to select relevant subsets of data frames according
+    to command line or configured options.
+-   memdf.sizedb — Helpers for a database of size information.
+
+Modules under memdf.util are not specifically tied to memory usage.
+
+-   memdf.util.config — `Config` utility class for managing command line or
+    other options according to a declarative description.
+-   memdf.util.github — Utilities for communicating with GitHub.
+-   memdf.util.markdown — Utilities for manipulating Markdown text.
+-   memdf.util.nd — Nested dictionary utilities, used by `Config`.
+-   memdf.util.pretty — Pretty-printed logging utility functions.
+-   memdf.util.sqlite - Utilities for connecting to a sqlite3 database.
+-   memdf.util.subprocess — Utilities for executing external commands.
@@ -0,0 +1,24 @@
+#
+# Copyright (c) 2021 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Package for working with memory usage information using Pandas DataFrame."""
+
+from memdf.df import DF, DFs, ExtentDF, SectionDF, SegmentDF, SymbolDF
+from memdf.util.config import Config, ConfigDescription
+
+__all__ = [
+    'DF', 'SymbolDF', 'SectionDF', 'SegmentDF', 'ExtentDF',
+    'DFs', 'Config', 'ConfigDescription',
+]
@@ -0,0 +1,334 @@
+#
+# Copyright (c) 2021 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Collect information from various sources into Memory Map DataFrames."""
+
+import bisect
+from typing import Callable, Dict, List, Mapping, Optional, Sequence, Tuple
+
+import memdf.collector.bloaty
+import memdf.collector.csv
+import memdf.collector.elftools
+import memdf.collector.readelf
+import memdf.collector.su
+import memdf.name
+import memdf.select
+import memdf.util.config
+import pandas as pd  # type: ignore
+from elftools.elf.constants import SH_FLAGS  # type: ignore
+from memdf import DF, Config, ConfigDescription, DFs, ExtentDF, SectionDF, SymbolDF
+from memdf.collector.util import simplify_source
+
+PREFIX_CONFIG: ConfigDescription = {
+    'collect.prefix': {
+        'help': 'Strip PATH from the beginning of source file names',
+        'metavar': 'PATH',
+        'default': [],
+        'argparse': {
+            'alias': ['--prefix', '--strip-prefix'],
+            'action': 'append',
+        }
+    },
+}
+
+CONFIG: ConfigDescription = {
+    Config.group_def('input'): {
+        'title': 'input options',
+    },
+    Config.group_def('tool'): {
+        'title': 'external tool options',
+    },
+    Config.group_map('collect'): {
+        'group': 'input'
+    },
+    **memdf.collector.bloaty.CONFIG,
+    **memdf.collector.csv.CONFIG,
+    **memdf.collector.elftools.CONFIG,
+    **memdf.collector.readelf.CONFIG,
+    'collect.method': {
+        'help':
+            'Method of input processing: one of'
+            ' elftools, readelf, bloaty, csv, tsv, su.',
+        'metavar': 'METHOD',
+        'choices': ['elftools', 'readelf', 'bloaty', 'csv', 'tsv', 'su'],
+        'default': 'elftools',
+        'argparse': {
+            'alias': ['-f'],
+        },
+    },
+    **PREFIX_CONFIG,
+}
+
+ARM_SPECIAL_SYMBOLS = frozenset(["$a", "$t", "$t.x", "$d", "$d.realdata"])
+
+
+def postprocess_symbols(config: Config, symbols: SymbolDF) -> SymbolDF:
+    """Postprocess a symbol table after collecting from one source.
+
+    If the symbol table contains FILE symbols, they will be removed and
+    replaced by a 'file' column on other symbols.
+
+    If the symbol table contains ARM mode symbols, they will be removed
+    and replaced by an 'arm' column on other symbols.
+    """
+    files = []
+    arms = []
+    arm_symbols = {}
+    current_file = ''
+    current_arm = ''
+    has_file = False
+    if config['collect.prefix-file']:
+        prefixes = config.get_re('collect.prefix')
+    else:
+        prefixes = None
+    if 'type' in symbols.columns:
+        for symbol in symbols.itertuples():
+            if symbol.type == 'FILE':
+                has_file = True
+                current_file = symbol.symbol
+                if prefixes:
+                    current_file = simplify_source(current_file, prefixes)
+
+            elif symbol.type == 'NOTYPE':
+                if symbol.symbol.startswith('$'):
+                    if current_arm or symbol.symbol in ARM_SPECIAL_SYMBOLS:
+                        current_arm = symbol.symbol
+                        arm_symbols[current_arm] = True
+            files.append(current_file)
+            arms.append(current_arm)
+
+    if has_file:
+        symbols['file'] = files
+    if current_arm:
+        symbols['arm'] = arms
+
+    if has_file:
+        symbols = symbols[symbols['type'] != 'FILE']
+    if current_arm:
+        syms = arm_symbols.keys()
+        symbols = symbols[~symbols.symbol.isin(syms)]
+    return symbols
+
+
+def postprocess_file(config: Config, dfs: DFs) -> None:
+    """Postprocess tables after collecting from one source."""
+    if SymbolDF.name in dfs:
+        dfs[SymbolDF.name] = postprocess_symbols(config, dfs[SymbolDF.name])
+
+
+def fill_holes(config: Config, symbols: SymbolDF, sections: SectionDF) -> DFs:
+    """Account for space not used by any symbol, or by multiple symbols."""
+
+    # These symbols mark the start or end of unused space.
+    start_unused = frozenset(config.get('symbol.free.start', []))
+    end_unused = frozenset(config.get('symbol.free.end', []))
+
+    extent_columns = ['address', 'size', 'section', 'file']
+    need_cu = 'cu' in symbols.columns
+    if need_cu:
+        extent_columns.append('cu')
+    need_input = 'input' in symbols.columns
+    if need_input:
+        extent_columns.append('input')
+    columns = ['symbol', *extent_columns, 'type', 'bind']
+
+    def filler(name, address, size, previous, current) -> List:
+        row = [
+            name,  # symbol
+            address,  # address
+            size,  # size
+            (previous.section if previous else
+             current.section if current else memdf.name.UNDEF),  # section
+            (previous.file
+             if previous else current.file if current else ''),  # file
+        ]
+        if need_cu:
+            row.append(
+                previous.cu if previous else current.cu if current else '')
+        if need_input:
+            row.append(previous.input if previous else current.
+                       input if current else '')
+        row.append('NOTYPE')  # type
+        row.append('LOCAL')  # bind
+        return row
+
+    def fill_gap(previous, current, from_address,
+                 to_address) -> Tuple[str, List]:
+        """Add a row for a unaccounted gap or unused space."""
+        size = to_address - from_address
+        if (previous is None or previous.symbol in start_unused
+                or current.symbol in end_unused):
+            use = 'unused'
+            name = memdf.name.unused(from_address, size)
+        else:
+            use = 'gap'
+            name = memdf.name.gap(from_address, size)
+        return (use, filler(name, from_address, size, previous, current))
+
+    def fill_overlap(previous, current, from_address,
+                     to_address) -> Tuple[str, List]:
+        """Add a row for overlap."""
+        size = to_address - from_address
+        return ('overlap',
+                filler(memdf.name.overlap(from_address, -size), from_address,
+                       size, previous, current))
+
+    # Find the address range for sections that are configured or allocated.
+    config_sections = set()
+    for _, s in config.get('region.sections', {}).items():
+        config_sections |= set(s)
+    section_to_range = {}
+    start_to_section = {}
+    section_starts = [0]
+    for s in sections.itertuples():
+        if ((s.section in config_sections) or (s.flags & SH_FLAGS.SHF_ALLOC)):
+            section_to_range[s.section] = range(s.address, s.address + s.size)
+            start_to_section[s.address] = s.section
+            section_starts.append(s.address)
+    section_starts.sort()
+
+    new_symbols: Dict[str, List[list]] = {
+        'gap': [],
+        'unused': [],
+        'overlap': []
+    }
+    section_range = None
+    previous_symbol = None
+    current_address = 0
+    iterable_symbols = symbols.loc[(symbols.type != 'SECTION')
+                                   & (symbols.type != 'FILE')
+                                   & symbols.section.isin(section_to_range)]
+    iterable_symbols = iterable_symbols.sort_values(by='address')
+
+    for symbol in iterable_symbols.itertuples():
+        if not previous_symbol or symbol.section != previous_symbol.section:
+            # We sometimes see symbols that have the value of their section end
+            # address (so they are not actually within the section) and have
+            # the same address as a symbol in the next section.
+            symbol_address_section = start_to_section.get(section_starts[
+                bisect.bisect_right(section_starts, symbol.address) - 1])
+            if symbol_address_section != symbol.section:
+                continue
+            # Starting or switching sections.
+            if previous_symbol and section_range:
+                # previous_symbol is the last in its section.
+                if current_address < section_range[-1] + 1:
+                    use, row = fill_gap(previous_symbol, previous_symbol,
+                                        current_address, section_range[-1] + 1)
+                    new_symbols[use].append(row)
+            # Start of section.
+            previous_symbol = None
+            section_range = section_to_range.get(symbol.section)
+            if section_range:
+                current_address = section_range[0]
+        if section_range:
+            if current_address < symbol.address:
+                use, row = fill_gap(previous_symbol, symbol, current_address,
+                                    symbol.address)
+                new_symbols[use].append(row)
+            elif current_address > symbol.address:
+                use, row = fill_overlap(previous_symbol, symbol,
+                                        current_address, symbol.address)
+                new_symbols[use].append(row)
+        current_address = symbol.address + symbol.size
+        previous_symbol = symbol
+
+    dfs = {k: SymbolDF(new_symbols[k], columns=columns) for k in new_symbols}
+    symbols = pd.concat([symbols, *dfs.values()]).fillna('')
+    symbols.sort_values(by='address', inplace=True)
+    for k in dfs:
+        dfs[k] = ExtentDF(dfs[k][extent_columns])
+        dfs[k].attrs['name'] = k
+    dfs[SymbolDF.name] = SymbolDF(symbols)
+    return dfs
+
+
+def postprocess_collected(config: Config, dfs: DFs) -> None:
+    """Postprocess tables after reading all sources."""
+
+    # Prune tables according to configuration options. This happens before
+    # fill_holes() so that space of any pruned symbols will be accounted for,
+    # and to avoid unnecessary work for pruned sections.
+    for c in [SymbolDF, SectionDF]:
+        if c.name in dfs:
+            dfs[c.name] = memdf.select.select_configured(
+                config, dfs[c.name], memdf.select.COLLECTED_CHOICES)
+
+    # Account for space not used by any symbol, or by multiple symbols.
+    if (SymbolDF.name in dfs and SectionDF.name in dfs
+            and config.get('args.fill_holes', True)):
+        dfs.update(fill_holes(config, dfs[SymbolDF.name], dfs[SectionDF.name]))
+
+    # Create synthetic columns (e.g. 'region') and prune tables
+    # according to their configuration. This happens after fill_holes()
+    # so that synthetic column values will be created for the gap symbols.
+    for c in [SymbolDF, SectionDF]:
+        if c.name in dfs:
+            for column in memdf.select.SYNTHETIC_CHOICES:
+                dfs[c.name] = memdf.select.synthesize_column(
+                    config, dfs[c.name], column)
+                dfs[c.name] = memdf.select.select_configured_column(
+                    config, dfs[c.name], column)
+
+    for df in dfs.values():
+        if demangle := set((c for c in df.columns if c.endswith('symbol'))):
+            df.attrs['demangle'] = demangle
+        if hexify := set((c for c in df.columns if c.endswith('address'))):
+            df.attrs['hexify'] = hexify
+
+
+FileReader = Callable[[Config, str, str], DFs]
+
+FILE_READERS: Dict[str, FileReader] = {
+    'bloaty': memdf.collector.bloaty.read_file,
+    'elftools': memdf.collector.elftools.read_file,
+    'readelf': memdf.collector.readelf.read_file,
+    'csv': memdf.collector.csv.read_file,
+    'tsv': memdf.collector.csv.read_file,
+    'su': memdf.collector.su.read_dir,
+}
+
+
+def collect_files(config: Config,
+                  files: Optional[List[str]] = None,
+                  method: Optional[str] = None) -> DFs:
+    """Read a filtered memory map from a set of files."""
+    filenames = files if files else config.get('args.inputs', [])
+    if method is None:
+        method = config.get('collect.method', 'csv')
+    frames: Dict[str, List[DF]] = {}
+    for filename in filenames:
+        dfs: DFs = FILE_READERS[method](config, filename, method)
+        postprocess_file(config, dfs)
+        for k, frame in dfs.items():
+            if k not in frames:
+                frames[k] = []
+            frames[k].append(frame)
+    dfs = {}
+    for k, v in frames.items():
+        dfs[k] = pd.concat(v, ignore_index=True)
+    postprocess_collected(config, dfs)
+    return dfs
+
+
+def parse_args(config_desc: Mapping, argv: Sequence[str]) -> Config:
+    """Common argument parsing for collection tools."""
+    config = Config().init({
+        **memdf.util.config.CONFIG,
+        **CONFIG,
+        **config_desc
+    })
+    config.argparse.add_argument('inputs', metavar='FILE', nargs='+')
+    return config.parse(argv)