|
| 1 | +# |
| 2 | +# Copyright (c) 2021 Project CHIP Authors |
| 3 | +# |
| 4 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +# you may not use this file except in compliance with the License. |
| 6 | +# You may obtain a copy of the License at |
| 7 | +# |
| 8 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +# |
| 10 | +# Unless required by applicable law or agreed to in writing, software |
| 11 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +# See the License for the specific language governing permissions and |
| 14 | +# limitations under the License. |
| 15 | +# |
| 16 | +"""Collect information from various sources into Memory Map DataFrames.""" |
| 17 | + |
| 18 | +import bisect |
| 19 | +from typing import Callable, Dict, List, Mapping, Optional, Sequence, Tuple |
| 20 | + |
| 21 | +import memdf.collector.bloaty |
| 22 | +import memdf.collector.csv |
| 23 | +import memdf.collector.elftools |
| 24 | +import memdf.collector.readelf |
| 25 | +import memdf.collector.su |
| 26 | +import memdf.name |
| 27 | +import memdf.select |
| 28 | +import memdf.util.config |
| 29 | +import pandas as pd # type: ignore |
| 30 | +from elftools.elf.constants import SH_FLAGS # type: ignore |
| 31 | +from memdf import DF, Config, ConfigDescription, DFs, ExtentDF, SectionDF, SymbolDF |
| 32 | +from memdf.collector.util import simplify_source |
| 33 | + |
| 34 | +PREFIX_CONFIG: ConfigDescription = { |
| 35 | + 'collect.prefix': { |
| 36 | + 'help': 'Strip PATH from the beginning of source file names', |
| 37 | + 'metavar': 'PATH', |
| 38 | + 'default': [], |
| 39 | + 'argparse': { |
| 40 | + 'alias': ['--prefix', '--strip-prefix'], |
| 41 | + 'action': 'append', |
| 42 | + } |
| 43 | + }, |
| 44 | +} |
| 45 | + |
| 46 | +CONFIG: ConfigDescription = { |
| 47 | + Config.group_def('input'): { |
| 48 | + 'title': 'input options', |
| 49 | + }, |
| 50 | + Config.group_def('tool'): { |
| 51 | + 'title': 'external tool options', |
| 52 | + }, |
| 53 | + Config.group_map('collect'): { |
| 54 | + 'group': 'input' |
| 55 | + }, |
| 56 | + **memdf.collector.bloaty.CONFIG, |
| 57 | + **memdf.collector.csv.CONFIG, |
| 58 | + **memdf.collector.elftools.CONFIG, |
| 59 | + **memdf.collector.readelf.CONFIG, |
| 60 | + 'collect.method': { |
| 61 | + 'help': |
| 62 | + 'Method of input processing: one of' |
| 63 | + ' elftools, readelf, bloaty, csv, tsv, su.', |
| 64 | + 'metavar': 'METHOD', |
| 65 | + 'choices': ['elftools', 'readelf', 'bloaty', 'csv', 'tsv', 'su'], |
| 66 | + 'default': 'elftools', |
| 67 | + 'argparse': { |
| 68 | + 'alias': ['-f'], |
| 69 | + }, |
| 70 | + }, |
| 71 | + **PREFIX_CONFIG, |
| 72 | +} |
| 73 | + |
| 74 | +ARM_SPECIAL_SYMBOLS = frozenset(["$a", "$t", "$t.x", "$d", "$d.realdata"]) |
| 75 | + |
| 76 | + |
| 77 | +def postprocess_symbols(config: Config, symbols: SymbolDF) -> SymbolDF: |
| 78 | + """Postprocess a symbol table after collecting from one source. |
| 79 | +
|
| 80 | + If the symbol table contains FILE symbols, they will be removed and |
| 81 | + replaced by a 'file' column on other symbols. |
| 82 | +
|
| 83 | + If the symbol table contains ARM mode symbols, they will be removed |
| 84 | + and replaced by an 'arm' column on other symbols. |
| 85 | + """ |
| 86 | + files = [] |
| 87 | + arms = [] |
| 88 | + arm_symbols = {} |
| 89 | + current_file = '' |
| 90 | + current_arm = '' |
| 91 | + has_file = False |
| 92 | + if config['collect.prefix-file']: |
| 93 | + prefixes = config.get_re('collect.prefix') |
| 94 | + else: |
| 95 | + prefixes = None |
| 96 | + if 'type' in symbols.columns: |
| 97 | + for symbol in symbols.itertuples(): |
| 98 | + if symbol.type == 'FILE': |
| 99 | + has_file = True |
| 100 | + current_file = symbol.symbol |
| 101 | + if prefixes: |
| 102 | + current_file = simplify_source(current_file, prefixes) |
| 103 | + |
| 104 | + elif symbol.type == 'NOTYPE': |
| 105 | + if symbol.symbol.startswith('$'): |
| 106 | + if current_arm or symbol.symbol in ARM_SPECIAL_SYMBOLS: |
| 107 | + current_arm = symbol.symbol |
| 108 | + arm_symbols[current_arm] = True |
| 109 | + files.append(current_file) |
| 110 | + arms.append(current_arm) |
| 111 | + |
| 112 | + if has_file: |
| 113 | + symbols['file'] = files |
| 114 | + if current_arm: |
| 115 | + symbols['arm'] = arms |
| 116 | + |
| 117 | + if has_file: |
| 118 | + symbols = symbols[symbols['type'] != 'FILE'] |
| 119 | + if current_arm: |
| 120 | + syms = arm_symbols.keys() |
| 121 | + symbols = symbols[~symbols.symbol.isin(syms)] |
| 122 | + return symbols |
| 123 | + |
| 124 | + |
| 125 | +def postprocess_file(config: Config, dfs: DFs) -> None: |
| 126 | + """Postprocess tables after collecting from one source.""" |
| 127 | + if SymbolDF.name in dfs: |
| 128 | + dfs[SymbolDF.name] = postprocess_symbols(config, dfs[SymbolDF.name]) |
| 129 | + |
| 130 | + |
| 131 | +def fill_holes(config: Config, symbols: SymbolDF, sections: SectionDF) -> DFs: |
| 132 | + """Account for space not used by any symbol, or by multiple symbols.""" |
| 133 | + |
| 134 | + # These symbols mark the start or end of unused space. |
| 135 | + start_unused = frozenset(config.get('symbol.free.start', [])) |
| 136 | + end_unused = frozenset(config.get('symbol.free.end', [])) |
| 137 | + |
| 138 | + extent_columns = ['address', 'size', 'section', 'file'] |
| 139 | + need_cu = 'cu' in symbols.columns |
| 140 | + if need_cu: |
| 141 | + extent_columns.append('cu') |
| 142 | + need_input = 'input' in symbols.columns |
| 143 | + if need_input: |
| 144 | + extent_columns.append('input') |
| 145 | + columns = ['symbol', *extent_columns, 'type', 'bind'] |
| 146 | + |
| 147 | + def filler(name, address, size, previous, current) -> List: |
| 148 | + row = [ |
| 149 | + name, # symbol |
| 150 | + address, # address |
| 151 | + size, # size |
| 152 | + (previous.section if previous else |
| 153 | + current.section if current else memdf.name.UNDEF), # section |
| 154 | + (previous.file |
| 155 | + if previous else current.file if current else ''), # file |
| 156 | + ] |
| 157 | + if need_cu: |
| 158 | + row.append( |
| 159 | + previous.cu if previous else current.cu if current else '') |
| 160 | + if need_input: |
| 161 | + row.append(previous.input if previous else current. |
| 162 | + input if current else '') |
| 163 | + row.append('NOTYPE') # type |
| 164 | + row.append('LOCAL') # bind |
| 165 | + return row |
| 166 | + |
| 167 | + def fill_gap(previous, current, from_address, |
| 168 | + to_address) -> Tuple[str, List]: |
| 169 | + """Add a row for a unaccounted gap or unused space.""" |
| 170 | + size = to_address - from_address |
| 171 | + if (previous is None or previous.symbol in start_unused |
| 172 | + or current.symbol in end_unused): |
| 173 | + use = 'unused' |
| 174 | + name = memdf.name.unused(from_address, size) |
| 175 | + else: |
| 176 | + use = 'gap' |
| 177 | + name = memdf.name.gap(from_address, size) |
| 178 | + return (use, filler(name, from_address, size, previous, current)) |
| 179 | + |
| 180 | + def fill_overlap(previous, current, from_address, |
| 181 | + to_address) -> Tuple[str, List]: |
| 182 | + """Add a row for overlap.""" |
| 183 | + size = to_address - from_address |
| 184 | + return ('overlap', |
| 185 | + filler(memdf.name.overlap(from_address, -size), from_address, |
| 186 | + size, previous, current)) |
| 187 | + |
| 188 | + # Find the address range for sections that are configured or allocated. |
| 189 | + config_sections = set() |
| 190 | + for _, s in config.get('region.sections', {}).items(): |
| 191 | + config_sections |= set(s) |
| 192 | + section_to_range = {} |
| 193 | + start_to_section = {} |
| 194 | + section_starts = [0] |
| 195 | + for s in sections.itertuples(): |
| 196 | + if ((s.section in config_sections) or (s.flags & SH_FLAGS.SHF_ALLOC)): |
| 197 | + section_to_range[s.section] = range(s.address, s.address + s.size) |
| 198 | + start_to_section[s.address] = s.section |
| 199 | + section_starts.append(s.address) |
| 200 | + section_starts.sort() |
| 201 | + |
| 202 | + new_symbols: Dict[str, List[list]] = { |
| 203 | + 'gap': [], |
| 204 | + 'unused': [], |
| 205 | + 'overlap': [] |
| 206 | + } |
| 207 | + section_range = None |
| 208 | + previous_symbol = None |
| 209 | + current_address = 0 |
| 210 | + iterable_symbols = symbols.loc[(symbols.type != 'SECTION') |
| 211 | + & (symbols.type != 'FILE') |
| 212 | + & symbols.section.isin(section_to_range)] |
| 213 | + iterable_symbols = iterable_symbols.sort_values(by='address') |
| 214 | + |
| 215 | + for symbol in iterable_symbols.itertuples(): |
| 216 | + if not previous_symbol or symbol.section != previous_symbol.section: |
| 217 | + # We sometimes see symbols that have the value of their section end |
| 218 | + # address (so they are not actually within the section) and have |
| 219 | + # the same address as a symbol in the next section. |
| 220 | + symbol_address_section = start_to_section.get(section_starts[ |
| 221 | + bisect.bisect_right(section_starts, symbol.address) - 1]) |
| 222 | + if symbol_address_section != symbol.section: |
| 223 | + continue |
| 224 | + # Starting or switching sections. |
| 225 | + if previous_symbol and section_range: |
| 226 | + # previous_symbol is the last in its section. |
| 227 | + if current_address < section_range[-1] + 1: |
| 228 | + use, row = fill_gap(previous_symbol, previous_symbol, |
| 229 | + current_address, section_range[-1] + 1) |
| 230 | + new_symbols[use].append(row) |
| 231 | + # Start of section. |
| 232 | + previous_symbol = None |
| 233 | + section_range = section_to_range.get(symbol.section) |
| 234 | + if section_range: |
| 235 | + current_address = section_range[0] |
| 236 | + if section_range: |
| 237 | + if current_address < symbol.address: |
| 238 | + use, row = fill_gap(previous_symbol, symbol, current_address, |
| 239 | + symbol.address) |
| 240 | + new_symbols[use].append(row) |
| 241 | + elif current_address > symbol.address: |
| 242 | + use, row = fill_overlap(previous_symbol, symbol, |
| 243 | + current_address, symbol.address) |
| 244 | + new_symbols[use].append(row) |
| 245 | + current_address = symbol.address + symbol.size |
| 246 | + previous_symbol = symbol |
| 247 | + |
| 248 | + dfs = {k: SymbolDF(new_symbols[k], columns=columns) for k in new_symbols} |
| 249 | + symbols = pd.concat([symbols, *dfs.values()]).fillna('') |
| 250 | + symbols.sort_values(by='address', inplace=True) |
| 251 | + for k in dfs: |
| 252 | + dfs[k] = ExtentDF(dfs[k][extent_columns]) |
| 253 | + dfs[k].attrs['name'] = k |
| 254 | + dfs[SymbolDF.name] = SymbolDF(symbols) |
| 255 | + return dfs |
| 256 | + |
| 257 | + |
| 258 | +def postprocess_collected(config: Config, dfs: DFs) -> None: |
| 259 | + """Postprocess tables after reading all sources.""" |
| 260 | + |
| 261 | + # Prune tables according to configuration options. This happens before |
| 262 | + # fill_holes() so that space of any pruned symbols will be accounted for, |
| 263 | + # and to avoid unnecessary work for pruned sections. |
| 264 | + for c in [SymbolDF, SectionDF]: |
| 265 | + if c.name in dfs: |
| 266 | + dfs[c.name] = memdf.select.select_configured( |
| 267 | + config, dfs[c.name], memdf.select.COLLECTED_CHOICES) |
| 268 | + |
| 269 | + # Account for space not used by any symbol, or by multiple symbols. |
| 270 | + if (SymbolDF.name in dfs and SectionDF.name in dfs |
| 271 | + and config.get('args.fill_holes', True)): |
| 272 | + dfs.update(fill_holes(config, dfs[SymbolDF.name], dfs[SectionDF.name])) |
| 273 | + |
| 274 | + # Create synthetic columns (e.g. 'region') and prune tables |
| 275 | + # according to their configuration. This happens after fill_holes() |
| 276 | + # so that synthetic column values will be created for the gap symbols. |
| 277 | + for c in [SymbolDF, SectionDF]: |
| 278 | + if c.name in dfs: |
| 279 | + for column in memdf.select.SYNTHETIC_CHOICES: |
| 280 | + dfs[c.name] = memdf.select.synthesize_column( |
| 281 | + config, dfs[c.name], column) |
| 282 | + dfs[c.name] = memdf.select.select_configured_column( |
| 283 | + config, dfs[c.name], column) |
| 284 | + |
| 285 | + for df in dfs.values(): |
| 286 | + if demangle := set((c for c in df.columns if c.endswith('symbol'))): |
| 287 | + df.attrs['demangle'] = demangle |
| 288 | + if hexify := set((c for c in df.columns if c.endswith('address'))): |
| 289 | + df.attrs['hexify'] = hexify |
| 290 | + |
| 291 | + |
| 292 | +FileReader = Callable[[Config, str, str], DFs] |
| 293 | + |
| 294 | +FILE_READERS: Dict[str, FileReader] = { |
| 295 | + 'bloaty': memdf.collector.bloaty.read_file, |
| 296 | + 'elftools': memdf.collector.elftools.read_file, |
| 297 | + 'readelf': memdf.collector.readelf.read_file, |
| 298 | + 'csv': memdf.collector.csv.read_file, |
| 299 | + 'tsv': memdf.collector.csv.read_file, |
| 300 | + 'su': memdf.collector.su.read_dir, |
| 301 | +} |
| 302 | + |
| 303 | + |
| 304 | +def collect_files(config: Config, |
| 305 | + files: Optional[List[str]] = None, |
| 306 | + method: Optional[str] = None) -> DFs: |
| 307 | + """Read a filtered memory map from a set of files.""" |
| 308 | + filenames = files if files else config.get('args.inputs', []) |
| 309 | + if method is None: |
| 310 | + method = config.get('collect.method', 'csv') |
| 311 | + frames: Dict[str, List[DF]] = {} |
| 312 | + for filename in filenames: |
| 313 | + dfs: DFs = FILE_READERS[method](config, filename, method) |
| 314 | + postprocess_file(config, dfs) |
| 315 | + for k, frame in dfs.items(): |
| 316 | + if k not in frames: |
| 317 | + frames[k] = [] |
| 318 | + frames[k].append(frame) |
| 319 | + dfs = {} |
| 320 | + for k, v in frames.items(): |
| 321 | + dfs[k] = pd.concat(v, ignore_index=True) |
| 322 | + postprocess_collected(config, dfs) |
| 323 | + return dfs |
| 324 | + |
| 325 | + |
| 326 | +def parse_args(config_desc: Mapping, argv: Sequence[str]) -> Config: |
| 327 | + """Common argument parsing for collection tools.""" |
| 328 | + config = Config().init({ |
| 329 | + **memdf.util.config.CONFIG, |
| 330 | + **CONFIG, |
| 331 | + **config_desc |
| 332 | + }) |
| 333 | + config.argparse.add_argument('inputs', metavar='FILE', nargs='+') |
| 334 | + return config.parse(argv) |
0 commit comments