Skip to content

Commit 144222f

Browse files
committed
Add memdf from Matter C++
1 parent 99745c7 commit 144222f

23 files changed

Lines changed: 2986 additions & 0 deletions

scripts/memory/memdf/README.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
---
2+
orphan: true
3+
---
4+
5+
This package contains routines to to collect, aggregate, and report memory
6+
usage, using Pandas `DataFrame` as the primary representation.
7+
8+
- memdf.collect — Helpers to read memory information from various sources
9+
(e.g. executables) according to command line options.
10+
- memdf.df — `DataFrame` utilities, in particular definitions of columns and
11+
types for the main uses of data frames.
12+
- memdf.name — Names for synthetic symbols, etc. Individual readers are
13+
located under memdf.collector.
14+
- memdf.report — Helpers to write data frames in various formats according to
15+
command line or configured options.
16+
- memdf.select — Helpers to select relevant subsets of data frames according
17+
to command line or configured options.
18+
- memdf.sizedb — Helpers for a database of size information.
19+
20+
Modules under memdf.util are not specifically tied to memory usage.
21+
22+
- memdf.util.config — `Config` utility class for managing command line or
23+
other options according to a declarative description.
24+
- memdf.util.github — Utilities for communicating with GitHub.
25+
- memdf.util.markdown — Utilities for manipulating Markdown text.
26+
- memdf.util.nd — Nested dictionary utilities, used by `Config`.
27+
- memdf.util.pretty — Pretty-printed logging utility functions.
28+
- memdf.util.sqlite - Utilities for connecting to a sqlite3 database.
29+
- memdf.util.subprocess — Utilities for executing external commands.

scripts/memory/memdf/__init__.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#
2+
# Copyright (c) 2021 Project CHIP Authors
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
"""Package for working with memory usage information using Pandas DataFrame."""
17+
18+
from memdf.df import DF, DFs, ExtentDF, SectionDF, SegmentDF, SymbolDF
19+
from memdf.util.config import Config, ConfigDescription
20+
21+
__all__ = [
22+
'DF', 'SymbolDF', 'SectionDF', 'SegmentDF', 'ExtentDF',
23+
'DFs', 'Config', 'ConfigDescription',
24+
]

scripts/memory/memdf/collect.py

Lines changed: 334 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,334 @@
1+
#
2+
# Copyright (c) 2021 Project CHIP Authors
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
"""Collect information from various sources into Memory Map DataFrames."""
17+
18+
import bisect
19+
from typing import Callable, Dict, List, Mapping, Optional, Sequence, Tuple
20+
21+
import memdf.collector.bloaty
22+
import memdf.collector.csv
23+
import memdf.collector.elftools
24+
import memdf.collector.readelf
25+
import memdf.collector.su
26+
import memdf.name
27+
import memdf.select
28+
import memdf.util.config
29+
import pandas as pd # type: ignore
30+
from elftools.elf.constants import SH_FLAGS # type: ignore
31+
from memdf import DF, Config, ConfigDescription, DFs, ExtentDF, SectionDF, SymbolDF
32+
from memdf.collector.util import simplify_source
33+
34+
PREFIX_CONFIG: ConfigDescription = {
35+
'collect.prefix': {
36+
'help': 'Strip PATH from the beginning of source file names',
37+
'metavar': 'PATH',
38+
'default': [],
39+
'argparse': {
40+
'alias': ['--prefix', '--strip-prefix'],
41+
'action': 'append',
42+
}
43+
},
44+
}
45+
46+
CONFIG: ConfigDescription = {
47+
Config.group_def('input'): {
48+
'title': 'input options',
49+
},
50+
Config.group_def('tool'): {
51+
'title': 'external tool options',
52+
},
53+
Config.group_map('collect'): {
54+
'group': 'input'
55+
},
56+
**memdf.collector.bloaty.CONFIG,
57+
**memdf.collector.csv.CONFIG,
58+
**memdf.collector.elftools.CONFIG,
59+
**memdf.collector.readelf.CONFIG,
60+
'collect.method': {
61+
'help':
62+
'Method of input processing: one of'
63+
' elftools, readelf, bloaty, csv, tsv, su.',
64+
'metavar': 'METHOD',
65+
'choices': ['elftools', 'readelf', 'bloaty', 'csv', 'tsv', 'su'],
66+
'default': 'elftools',
67+
'argparse': {
68+
'alias': ['-f'],
69+
},
70+
},
71+
**PREFIX_CONFIG,
72+
}
73+
74+
ARM_SPECIAL_SYMBOLS = frozenset(["$a", "$t", "$t.x", "$d", "$d.realdata"])
75+
76+
77+
def postprocess_symbols(config: Config, symbols: SymbolDF) -> SymbolDF:
78+
"""Postprocess a symbol table after collecting from one source.
79+
80+
If the symbol table contains FILE symbols, they will be removed and
81+
replaced by a 'file' column on other symbols.
82+
83+
If the symbol table contains ARM mode symbols, they will be removed
84+
and replaced by an 'arm' column on other symbols.
85+
"""
86+
files = []
87+
arms = []
88+
arm_symbols = {}
89+
current_file = ''
90+
current_arm = ''
91+
has_file = False
92+
if config['collect.prefix-file']:
93+
prefixes = config.get_re('collect.prefix')
94+
else:
95+
prefixes = None
96+
if 'type' in symbols.columns:
97+
for symbol in symbols.itertuples():
98+
if symbol.type == 'FILE':
99+
has_file = True
100+
current_file = symbol.symbol
101+
if prefixes:
102+
current_file = simplify_source(current_file, prefixes)
103+
104+
elif symbol.type == 'NOTYPE':
105+
if symbol.symbol.startswith('$'):
106+
if current_arm or symbol.symbol in ARM_SPECIAL_SYMBOLS:
107+
current_arm = symbol.symbol
108+
arm_symbols[current_arm] = True
109+
files.append(current_file)
110+
arms.append(current_arm)
111+
112+
if has_file:
113+
symbols['file'] = files
114+
if current_arm:
115+
symbols['arm'] = arms
116+
117+
if has_file:
118+
symbols = symbols[symbols['type'] != 'FILE']
119+
if current_arm:
120+
syms = arm_symbols.keys()
121+
symbols = symbols[~symbols.symbol.isin(syms)]
122+
return symbols
123+
124+
125+
def postprocess_file(config: Config, dfs: DFs) -> None:
126+
"""Postprocess tables after collecting from one source."""
127+
if SymbolDF.name in dfs:
128+
dfs[SymbolDF.name] = postprocess_symbols(config, dfs[SymbolDF.name])
129+
130+
131+
def fill_holes(config: Config, symbols: SymbolDF, sections: SectionDF) -> DFs:
132+
"""Account for space not used by any symbol, or by multiple symbols."""
133+
134+
# These symbols mark the start or end of unused space.
135+
start_unused = frozenset(config.get('symbol.free.start', []))
136+
end_unused = frozenset(config.get('symbol.free.end', []))
137+
138+
extent_columns = ['address', 'size', 'section', 'file']
139+
need_cu = 'cu' in symbols.columns
140+
if need_cu:
141+
extent_columns.append('cu')
142+
need_input = 'input' in symbols.columns
143+
if need_input:
144+
extent_columns.append('input')
145+
columns = ['symbol', *extent_columns, 'type', 'bind']
146+
147+
def filler(name, address, size, previous, current) -> List:
148+
row = [
149+
name, # symbol
150+
address, # address
151+
size, # size
152+
(previous.section if previous else
153+
current.section if current else memdf.name.UNDEF), # section
154+
(previous.file
155+
if previous else current.file if current else ''), # file
156+
]
157+
if need_cu:
158+
row.append(
159+
previous.cu if previous else current.cu if current else '')
160+
if need_input:
161+
row.append(previous.input if previous else current.
162+
input if current else '')
163+
row.append('NOTYPE') # type
164+
row.append('LOCAL') # bind
165+
return row
166+
167+
def fill_gap(previous, current, from_address,
168+
to_address) -> Tuple[str, List]:
169+
"""Add a row for a unaccounted gap or unused space."""
170+
size = to_address - from_address
171+
if (previous is None or previous.symbol in start_unused
172+
or current.symbol in end_unused):
173+
use = 'unused'
174+
name = memdf.name.unused(from_address, size)
175+
else:
176+
use = 'gap'
177+
name = memdf.name.gap(from_address, size)
178+
return (use, filler(name, from_address, size, previous, current))
179+
180+
def fill_overlap(previous, current, from_address,
181+
to_address) -> Tuple[str, List]:
182+
"""Add a row for overlap."""
183+
size = to_address - from_address
184+
return ('overlap',
185+
filler(memdf.name.overlap(from_address, -size), from_address,
186+
size, previous, current))
187+
188+
# Find the address range for sections that are configured or allocated.
189+
config_sections = set()
190+
for _, s in config.get('region.sections', {}).items():
191+
config_sections |= set(s)
192+
section_to_range = {}
193+
start_to_section = {}
194+
section_starts = [0]
195+
for s in sections.itertuples():
196+
if ((s.section in config_sections) or (s.flags & SH_FLAGS.SHF_ALLOC)):
197+
section_to_range[s.section] = range(s.address, s.address + s.size)
198+
start_to_section[s.address] = s.section
199+
section_starts.append(s.address)
200+
section_starts.sort()
201+
202+
new_symbols: Dict[str, List[list]] = {
203+
'gap': [],
204+
'unused': [],
205+
'overlap': []
206+
}
207+
section_range = None
208+
previous_symbol = None
209+
current_address = 0
210+
iterable_symbols = symbols.loc[(symbols.type != 'SECTION')
211+
& (symbols.type != 'FILE')
212+
& symbols.section.isin(section_to_range)]
213+
iterable_symbols = iterable_symbols.sort_values(by='address')
214+
215+
for symbol in iterable_symbols.itertuples():
216+
if not previous_symbol or symbol.section != previous_symbol.section:
217+
# We sometimes see symbols that have the value of their section end
218+
# address (so they are not actually within the section) and have
219+
# the same address as a symbol in the next section.
220+
symbol_address_section = start_to_section.get(section_starts[
221+
bisect.bisect_right(section_starts, symbol.address) - 1])
222+
if symbol_address_section != symbol.section:
223+
continue
224+
# Starting or switching sections.
225+
if previous_symbol and section_range:
226+
# previous_symbol is the last in its section.
227+
if current_address < section_range[-1] + 1:
228+
use, row = fill_gap(previous_symbol, previous_symbol,
229+
current_address, section_range[-1] + 1)
230+
new_symbols[use].append(row)
231+
# Start of section.
232+
previous_symbol = None
233+
section_range = section_to_range.get(symbol.section)
234+
if section_range:
235+
current_address = section_range[0]
236+
if section_range:
237+
if current_address < symbol.address:
238+
use, row = fill_gap(previous_symbol, symbol, current_address,
239+
symbol.address)
240+
new_symbols[use].append(row)
241+
elif current_address > symbol.address:
242+
use, row = fill_overlap(previous_symbol, symbol,
243+
current_address, symbol.address)
244+
new_symbols[use].append(row)
245+
current_address = symbol.address + symbol.size
246+
previous_symbol = symbol
247+
248+
dfs = {k: SymbolDF(new_symbols[k], columns=columns) for k in new_symbols}
249+
symbols = pd.concat([symbols, *dfs.values()]).fillna('')
250+
symbols.sort_values(by='address', inplace=True)
251+
for k in dfs:
252+
dfs[k] = ExtentDF(dfs[k][extent_columns])
253+
dfs[k].attrs['name'] = k
254+
dfs[SymbolDF.name] = SymbolDF(symbols)
255+
return dfs
256+
257+
258+
def postprocess_collected(config: Config, dfs: DFs) -> None:
259+
"""Postprocess tables after reading all sources."""
260+
261+
# Prune tables according to configuration options. This happens before
262+
# fill_holes() so that space of any pruned symbols will be accounted for,
263+
# and to avoid unnecessary work for pruned sections.
264+
for c in [SymbolDF, SectionDF]:
265+
if c.name in dfs:
266+
dfs[c.name] = memdf.select.select_configured(
267+
config, dfs[c.name], memdf.select.COLLECTED_CHOICES)
268+
269+
# Account for space not used by any symbol, or by multiple symbols.
270+
if (SymbolDF.name in dfs and SectionDF.name in dfs
271+
and config.get('args.fill_holes', True)):
272+
dfs.update(fill_holes(config, dfs[SymbolDF.name], dfs[SectionDF.name]))
273+
274+
# Create synthetic columns (e.g. 'region') and prune tables
275+
# according to their configuration. This happens after fill_holes()
276+
# so that synthetic column values will be created for the gap symbols.
277+
for c in [SymbolDF, SectionDF]:
278+
if c.name in dfs:
279+
for column in memdf.select.SYNTHETIC_CHOICES:
280+
dfs[c.name] = memdf.select.synthesize_column(
281+
config, dfs[c.name], column)
282+
dfs[c.name] = memdf.select.select_configured_column(
283+
config, dfs[c.name], column)
284+
285+
for df in dfs.values():
286+
if demangle := set((c for c in df.columns if c.endswith('symbol'))):
287+
df.attrs['demangle'] = demangle
288+
if hexify := set((c for c in df.columns if c.endswith('address'))):
289+
df.attrs['hexify'] = hexify
290+
291+
292+
FileReader = Callable[[Config, str, str], DFs]
293+
294+
FILE_READERS: Dict[str, FileReader] = {
295+
'bloaty': memdf.collector.bloaty.read_file,
296+
'elftools': memdf.collector.elftools.read_file,
297+
'readelf': memdf.collector.readelf.read_file,
298+
'csv': memdf.collector.csv.read_file,
299+
'tsv': memdf.collector.csv.read_file,
300+
'su': memdf.collector.su.read_dir,
301+
}
302+
303+
304+
def collect_files(config: Config,
305+
files: Optional[List[str]] = None,
306+
method: Optional[str] = None) -> DFs:
307+
"""Read a filtered memory map from a set of files."""
308+
filenames = files if files else config.get('args.inputs', [])
309+
if method is None:
310+
method = config.get('collect.method', 'csv')
311+
frames: Dict[str, List[DF]] = {}
312+
for filename in filenames:
313+
dfs: DFs = FILE_READERS[method](config, filename, method)
314+
postprocess_file(config, dfs)
315+
for k, frame in dfs.items():
316+
if k not in frames:
317+
frames[k] = []
318+
frames[k].append(frame)
319+
dfs = {}
320+
for k, v in frames.items():
321+
dfs[k] = pd.concat(v, ignore_index=True)
322+
postprocess_collected(config, dfs)
323+
return dfs
324+
325+
326+
def parse_args(config_desc: Mapping, argv: Sequence[str]) -> Config:
327+
"""Common argument parsing for collection tools."""
328+
config = Config().init({
329+
**memdf.util.config.CONFIG,
330+
**CONFIG,
331+
**config_desc
332+
})
333+
config.argparse.add_argument('inputs', metavar='FILE', nargs='+')
334+
return config.parse(argv)

scripts/memory/memdf/collector/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)