diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..ab81abc --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,39 @@ +name: "CodeQL" + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + analyze: + name: CodeQL analysis + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ python ] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + queries: +security-and-quality + + - name: Autobuild + uses: github/codeql-action/autobuild@v3 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{ matrix.language }}" diff --git a/.github/workflows/python-versions.yml b/.github/workflows/python-versions.yml new file mode 100644 index 0000000..cadf1ae --- /dev/null +++ b/.github/workflows/python-versions.yml @@ -0,0 +1,58 @@ +# This workflow will install various versions of Python and run non-regression tests. + +name: Python versions + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest", "macos-latest"] + python-version: ["3.10", "3.12", "pypy2.7", "pypy3.9"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install coverage codecov + - name: Non-regression tests + run: | + export PYTHONPATH=$PYTHONPATH:$(pwd) + coverage run ./tests/test_all.py + - name: Update codecov + run: | + codecov + + python23: + name: python2.3 + runs-on: "ubuntu-latest" + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v3 + - name: Set up Python 2.3 + run: | + cd .. + curl -O https://www.python.org/ftp/python/2.3.7/Python-2.3.7.tgz + tar xzf Python-2.3.7.tgz + cd Python-2.3.7 + # We need to disable FORTIFY_SOURCE to compile python 2.3 + # cf. https://bugs.launchpad.net/ubuntu/+source/gcc-defaults/+bug/286334 + ./configure BASECFLAGS=-U_FORTIFY_SOURCE + make + sudo ln -fs $(pwd)/python /usr/local/bin/python + - name: Non-regression tests + run: | + python -c 'import sys;print(sys.version)' + export PYTHONPATH=$PYTHONPATH:$(pwd) + python ./tests/test_all.py diff --git a/.github/workflows/tools.yml b/.github/workflows/tools.yml new file mode 100644 index 0000000..4f17664 --- /dev/null +++ b/.github/workflows/tools.yml @@ -0,0 +1,46 @@ +# This workflow compares the outputs of elfesteem with native tools on the OS + +name: Native tools + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + macos: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ["macos-12", "macos-13"] + python-version: ["3.10"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Comparison with otool + run: | + export PYTHONPATH=$PYTHONPATH:$(pwd) + zsh ./tests/examples_macos.sh + linux: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest", "ubuntu-22.04", "ubuntu-20.04"] + python-version: ["3.10"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Comparison with readelf + run: | + readelf --version + export PYTHONPATH=$PYTHONPATH:$(pwd) + bash ./tests/examples_linux.sh diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..2a4cdc2 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,40 @@ +language: python +jobs: + include: + - python: '3.7' + - python: '2.7' + - name: 'Python: 2.3' + # python 2.3 not available in travis + install: + - cd .. + - curl -O https://www.python.org/ftp/python/2.3.7/Python-2.3.7.tgz + - tar xzf Python-2.3.7.tgz + - cd Python-2.3.7 + # We need to disable FORTIFY_SOURCE to compile python 2.3 + # cf. https://bugs.launchpad.net/ubuntu/+source/gcc-defaults/+bug/286334 + - ./configure BASECFLAGS=-U_FORTIFY_SOURCE + - make + - export PATH=$(pwd):$PATH + - cd ../elfesteem + - python -c 'import sys;print(sys.version)' + script: + - python ./tests/test_all.py + after_success: + - true # coverage needs python >= 2.6 + - python: 'pypy3' + - python: 'pypy' + - python: '3.8' + - python: '3.4' +install: + - pip install coverage codecov +before_script: + export PYTHONPATH=$PYTHONPATH:$(pwd) +script: + - coverage run ./tests/test_all.py + # We don't use e.g. tox for non-regression tests, because we want to have + # a script that works with old python too, and tox needs python2.5 + # python2.4 ./tests/test_all.py will work fine :-) + # Note that coverage is incompatible with python 3.2, cf. + # https://github.com/menegazzo/travispy/issues/20 +after_success: + - codecov diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e9ab0b3 --- /dev/null +++ b/LICENSE @@ -0,0 +1,458 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS diff --git a/README.md b/README.md new file mode 100644 index 0000000..00ffb09 --- /dev/null +++ b/README.md @@ -0,0 +1,39 @@ +# ELF Esteem # + +## Overview + +The goal of this library is to manipulate various containers of executable code. +ELF, PE, COFF and Mach-O files are fully supported. +It includes a partial support of Minidump and RPRC files, and a non-working implementation of Java classes. + +It aims at being self-contained and portable: it is pure python, compatible from python 2.3 upwards (including python 3.x). + +## Parsing with ELF Esteem + +[binary.py](elfesteem/binary.py) +can be used to read a binary of any known format and display its main characteristics. + +[readelf.py](examples/readelf.py) +outputs the same as binutils' readelf, using ELF Esteem. + +[otool.py](examples/otool.py) +outputs the same as MacOSX otool and dyldinfo, using ELF Esteem. + +[readpe.py](examples/readpe.py) +analyses the content of a PE or COFF file, including a hierarchical display of the layout of the file. + +## File manipulation with ELF Esteem + +Most of the internal representation of the file parsed by ELF Esteem is based on [cstruct.py](elfesteem/cstruct.py) which is a generic framework to manipulate binary data structures. + +The file is fully loaded using one of the classes `ELF`, `PE`, `COFF`, `MACHO`, `RPRC`, or `Minidump`. This class is the root of a tree of subclasses (e.g. file header, list of sections, ...) and each subtree can be modified. The method `pack()` reconstructs a binary. + +The philosophy behind ELF Esteem is that if the input file is valid, and no modification is made to the internal representation, then `pack()` will recover the input. +When modifications are made, then (depending on the details of the file format) some values are automatically recomputed (e.g. fields containing lengths, checksums). + +**More doc soon.** + +## Development status + +[![codecov](https://codecov.io/gh/LRGH/elfesteem/branch/master/graph/badge.svg)](https://codecov.io/gh/LRGH/elfesteem) +[![Unit tests](https://github.com/LRGH/elfesteem/actions/workflows/python-package.yml/badge.svg)](https://github.com/LRGH/elfesteem/actions/workflows/python-package.yml) diff --git a/elfcli b/elfcli index d43f4d1..13ff678 100755 --- a/elfcli +++ b/elfcli @@ -1,27 +1,33 @@ #! /usr/bin/env python import sys -import getopt -from elfesteem import ELF +import code +#import elfesteem.elf +import readline +import argparse + def usage(): print >>sys.stderr,"Usage: elfcli [-i inputfile]" raise SystemExit -def main(*args): - INPUT=sys.stdin - try: - opts = getopt.getopt(args,"hi:") - for opt,optarg in opts[0]: - if opt == "-h": - usage() - if opt == "-i": - INPUT = open(optarg) - except getopt.GetoptError,e: - print >>sys.stderr,"ERROR: %s" % e - raise SystemExit +def main(): + + parser = argparse.ArgumentParser() + parser.add_argument("infile") + + options = parser.parse_args() + + # prepare locals and binding for interactive session + readline.parse_and_bind("tab: complete") + del(parser) + from elfesteem import * + + elf = elf_init.ELF(open(options.infile).read()) + + code.interact(local=locals()) if __name__ == "__main__": - main(*sys.argv[1:]) + main() diff --git a/elfesteem/binary.py b/elfesteem/binary.py new file mode 100755 index 0000000..b78365c --- /dev/null +++ b/elfesteem/binary.py @@ -0,0 +1,67 @@ +#! /usr/bin/env python +# Generic container for all binary types known by elfesteem, +# with auto-recognition of the binary type. + +import sys, os +sys.path.insert(1, os.path.abspath(sys.path[0]+'/..')) + +from elfesteem.elf_init import ELF +from elfesteem.pe_init import PE, COFF +from elfesteem.minidump_init import Minidump +from elfesteem.macho import MACHO +from elfesteem.rprc import RPRC + +class UnknownFormat(object): + def __init__(self, raw): + self.raw = raw + architecture = 'UNKNOWN' + entrypoint = -1 + sections = () + symbols = () + dynsyms = () + class virt_stub(object): + max_addr = lambda _:-1 + virt = virt_stub() + +class BINARY(object): + def __init__(self, raw): + for container in ELF, PE, Minidump, MACHO, RPRC, COFF: + try: + self.e = container(raw) + break + except ValueError: + pass + except AssertionError: + pass + else: + self.e = UnknownFormat(raw) + container = property(lambda _:_.e.__class__.__name__) + architecture = property(lambda _:_.e.architecture) + entrypoint = property(lambda _:_.e.entrypoint) + max_addr = property(lambda _:_.e.virt.max_addr()) + sections = property(lambda _:_.e.sections) + symbols = property(lambda _:_.e.symbols) + dynsyms = property(lambda _:_.e.dynsyms) + +if __name__ == "__main__": + for file in sys.argv[1:]: + print("File: %s"%file) + fd = open(file, 'rb') + try: + raw = fd.read() + finally: + fd.close() + e = BINARY(raw) + print(" container %s" % e.container) + print(" architecture %s" % e.architecture) + print(" entrypoint %#x" % e.entrypoint) + print(" max address %#x" % e.max_addr) + print(" %d sections:" % len(e.sections)) + for sect in e.sections: + print(" %s" % sect) + print(" %d symbols:" % len(e.symbols)) + for symbol in e.symbols: + print(" %s" % symbol) + print(" %d dynamic symbols:" % len(e.dynsyms)) + for symbol in e.dynsyms: + print(" %s" % symbol) diff --git a/elfesteem/compatibility_python23.py b/elfesteem/compatibility_python23.py new file mode 100644 index 0000000..69bb945 --- /dev/null +++ b/elfesteem/compatibility_python23.py @@ -0,0 +1,17 @@ +import sys +if sys.version_info[0] == 2 and sys.version_info[1] <= 3: + # Python 2.3 does not know 'sorted' nor 'reversed' + def sorted(l, key=None, reverse=False): + l = [_ for _ in l] + if key is None: + if reverse: l.sort(lambda x,y: cmp(y,x)) + else: l.sort() + else: + if reverse: l.sort(lambda x,y: cmp(key(y),key(x))) + else: l.sort(lambda x,y: cmp(key(x),key(y))) + return l + def reversed(l): + length = len(l) + return [ l[length-idx] for idx in range(1,length+1) ] + import warnings + warnings.simplefilter("ignore", FutureWarning) diff --git a/elfesteem/cstruct.py b/elfesteem/cstruct.py index 0027d20..10cd8c7 100644 --- a/elfesteem/cstruct.py +++ b/elfesteem/cstruct.py @@ -1,131 +1,419 @@ #! /usr/bin/env python -import struct +import struct, re + +# To be compatible with python 2 and python 3 +data_empty = struct.pack("") +data_null = struct.pack("B",0) + +import sys +if sys.version_info[0] < 3: + bytes_to_name = lambda s: s + name_to_bytes = lambda s: s +else: + bytes_to_name = lambda s: s.decode(encoding="latin1") + name_to_bytes = lambda s: s.encode(encoding="latin1") + +class CBase(object): + """ + This is the base class, used to define CString, CStruct, CArray + + Functions to manipulate a CBase object + unpack(): two args (c, o) the bytestring and the starting offset + pack(): creates a byte string from the object content + bytelen: length of this byte string + pprint(): representation of the object, that can be used by pprint + update(): named args, that change the object content + + Parameters used to create a CBase object from a bytestring: + parent: parent object (mandatory) + content: binary stream to initialize the object + start: offset where to start parsing the content + sex and wsize: endianess and wordsize + """ + def __init__(self, *args, **kargs): + if not 'parent' in kargs: + # Old API of elfesteem + # e.g. used by miasm2's example/jitter/unpack_upx.py + kargs['parent'] = args[0] + self._parent_parse(kargs) + self._initialize() + if 'content' in kargs: + if not 'start' in kargs: kargs['start'] = 0 + if 'count' in kargs: + self.count = lambda c=kargs['count']: c + del kargs['count'] + self.unpack(kargs['content'], kargs['start']) + del kargs['content'] + del kargs['start'] + self.update(**kargs) + def _parent_parse(self, kargs): + self.parent = kargs['parent'] + if not 'sex' in kargs: kargs['sex'] = self.parent.sex + if not 'wsize' in kargs: kargs['wsize'] = self.parent.wsize + self.sex = kargs['sex'] + self.wsize = kargs['wsize'] + del kargs['parent'] + del kargs['sex'] + del kargs['wsize'] + def _initialize(self): + # For default values + pass + def update(self, **kargs): + pass + + def __len__(self): + # We don't use __len__ for the length in bytes, because we want to be able + # to use it for the number of elements of a CArray + raise AttributeError("__len__ not defined for '%s'"%self.__class__.__name__) + def bytelen(self): + return self._size + bytelen = property(bytelen) + + def _size_align(self, o): + s = o._size + if hasattr(self, '_align'): + s += ((self._align - s % self._align) % self._align) + return s + def _pack_align(self, o): + s = o.pack() + if hasattr(self, '_align'): + s += '\0' * ((self._align - o._size % self._align) % self._align) + return s + +class CString(CBase): + def set_value(self, s): + self.X = s + self._size = len(self.X) + 1 + def unpack(self, c, o): + self.set_value(c[o:c.find(data_null,o)]) + self._off = o + def update(self, **kargs): + # If 's' is an argument, then the string value is set to s + if 's' in kargs: + self.set_value(kargs['s']) + def _initialize(self): + self.set_value(data_empty) + def pack(self): + return self.X + data_null + def __str__(self): + return bytes_to_name(self.X) + def __repr__(self): + return '' % self.X + def pprint(self): + return self.X + +from elfesteem.strpatchwork import StrPatchwork +class CData(object): + # Generic class to be used at the end of a CStruct, to implement common + # cases implemented in C as struct s { ...; char data[]; } + # We use StrPatchwork because the data may be very long, and we want to + # be able to modify it very efficiently. + def __new__(self, f): + class CDataInstance(CBase): + def _initialize(self, f=f): + self._size = f(self.parent) + self._data = StrPatchwork() + def unpack(self, c, o): + self._data[0] = c[o:o+self._size] + def pack(self): + return self._data.pack() + def __str__(self): + return self.pack().decode('latin1') + def __getitem__(self, item): + return self._data[item] + def __setitem__(self, item, value): + self._data[item] = value + return CDataInstance type_size = {} size2type = {} +size2type_s = {} + for t in 'B', 'H', 'I', 'Q': s = struct.calcsize(t) type_size[t] = s*8 size2type[s*8] = t +for t in 'b', 'h', 'i', 'q': + s = struct.calcsize(t) + type_size[t] = s*8 + size2type_s[s*8] = t + type_size['u08'] = size2type[8] type_size['u16'] = size2type[16] type_size['u32'] = size2type[32] type_size['u64'] = size2type[64] -def fix_size(fields, wsize): - out = [] - for name, v in fields: - if v.endswith("s"): - pass - elif v == "ptr": - v = size2type[wsize] - elif not v in type_size: - raise ValueError("unkown Cstruct type", v) - else: - v = type_size[v] - out.append((name, v)) - fields = out - return fields - - -class Cstruct_Metaclass(type): +type_size['s08'] = size2type_s[8] +type_size['s16'] = size2type_s[16] +type_size['s32'] = size2type_s[32] +type_size['s64'] = size2type_s[64] + +def convert_size2type(ftype, wsize): + if not isinstance(ftype, str): + return '' + elif re.match(r'\d+s', ftype): + return ftype + elif ftype == "ptr": + return size2type[wsize] + elif ftype in type_size: + return type_size[ftype] + else: + raise ValueError("unkown CStruct type", ftype) + +class CStruct_metaclass(type): + """ + metaclass, with a syntax compatible with python2 and python3 + """ + _prefix = "_field_" # To avoid namespace collisions def __new__(cls, name, bases, dct): - o = super(Cstruct_Metaclass, cls).__new__(cls, name, bases, dct) - o._packstring = o._packformat+"".join(map(lambda x:x[1],o._fields)) - o._size = struct.calcsize(o._packstring) - return o -class CStruct(object): - #__metaclass__ = Cstruct_Metaclass + if '_fields' in dct: + for fname, _ in dct['_fields']: + dct[fname] = property( + lambda self,fname=fname: self.getf(fname), + lambda self,v,fname=fname: self.setf(fname,v), + None) + return type.__new__(cls, name, bases, dct) + +CStruct_base = CStruct_metaclass('CStruct_base', (CBase,), {}) +class CStruct(CStruct_base): + """ + The class CStruct is inherited by classes that simply + represent a concatenation of typed fields + + How to create a CStruct class: + _fields list the pairs (field_name, field_type) + if the last fields are (field_name, class), they are optional + _align: an optional integer value for alignment of optional fields + + How to create a CStruct object: + the keywords not used by CBase initialise the object fields + + How to use a CStruct object: + in addition to the CBase interface, the fields can be modified + + Field types: + basic types with fixed size (u08, ..., 16s) + wsize-dependent type (ptr) + """ + + def getf(self, fname): + return getattr(self,'_0'+fname) + def setf(self, fname, v): + return setattr(self,'_0'+fname,v) + _packformat = "" - _fields = [] - - @classmethod - def _from_file(cls, f): - return cls(f.read(cls._size)) - - def __init__(self, sex, wsize, *args, **kargs): - if sex==1: - sex = '<' - else: - sex = '>' - #packformat enforce sex + + def _parent_parse(self, kargs): + CBase._parent_parse(self, kargs) if self._packformat: - sex = "" - pstr = fix_size(self._fields, wsize) - self._packstring = sex + self._packformat+"".join(map(lambda x:x[1],pstr)) - self._size = struct.calcsize(self._packstring) + self.sex = "" + self._format = {} + pstr = [] + for fname, ftype in self._fields: + ftype = convert_size2type(ftype, self.wsize) + self._format[fname] = ftype + pstr.append(ftype) + self._packstring = self.sex + self._packformat+"".join(pstr) + self._names = [x[0] for x in self._fields if isinstance(x[1],str)] + self._opt = [x for x in self._fields if not isinstance(x[1],str)] - self._names = map(lambda x:x[0], self._fields) - if kargs: - self.__dict__.update(kargs) - else: - s="" - if args: - s = args[0] - s += "\x00"*self._size - s = s[:self._size] - self._unpack(s) - - def _unpack(self,s): + def unpack(self, c, o): + self._size = struct.calcsize(self._packstring) + s = c[o:o+self._size] + s += data_null*(self._size-len(s)) disas = struct.unpack(self._packstring, s) for n,v in zip(self._names,disas): setattr(self, n, v) + # If the last fields are optional data, their types are a class + for fname, fclass in self._opt: + v = fclass(parent=self, content=c, start=o+self._size) + self._size += self._size_align(v) + self.setf(fname, v) - def _pack(self): - return struct.pack(self._packstring, - *map(lambda x: getattr(self, x), self._names)) - - def _spack(self, superstruct, shift=0): - attr0 = map(lambda x: getattr(self, x), self._names) - attr = [] - for s in attr0: - if isinstance(s,CStruct): - if s in superstruct: - s = reduce(lambda x,y: x+len(y), - superstruct[:superstruct.index(s)], - 0) - s += shift - else: - raise Exception("%s not un superstructure" % repr(s)) - attr.append(s) - return struct.pack(self._packstring, *attr) - - def _copy(self): - return self.__class__(**self.__dict__) + def _initialize(self): + self._size = struct.calcsize(self._packstring) + for f in self._names: + # Default values + if self._format[f].endswith('s'): self.setf(f,data_empty) + else: self.setf(f,0) + for fname, fclass in self._opt: + v = fclass(parent=self) + self._size += self._size_align(v) + self.setf(fname, v) - def __len__(self): - return self._size + def update(self, **kargs): + for f in [f for f in kargs if f in self._names]: + self.setf(f,kargs[f]) + for fname, fclass in self._opt: + v = self.getf(fname) + self._size -= self._size_align(v) + v.update(**kargs) + self._size += self._size_align(v) + + def pack(self): + fields = [getattr(self, x) for x in self._names] + s = struct.pack(self._packstring, *fields) + for fname, fclass in self._opt: + s += self._pack_align(self.getf(fname)) + if self.bytelen != len(s): + raise ValueError("Inconsistent size %d != %d for %r" + % (self.bytelen,len(s), self.__class__.__name__)) + return s def __str__(self): - return self._pack() + raise AttributeError("Use pack() instead of str()") + + def pprint(self): + rep = { } + for fname, _ in self._fields: + rep[fname] = getattr(self, fname) + if hasattr(rep[fname], 'pprint'): + rep[fname] = rep[fname].pprint() + return ( "<%s>" % self.__class__.__name__, rep ) def __repr__(self): - return "<%s=%s>" % (self.__class__.__name__, "/".join(map(lambda x:repr(getattr(self,x[0])),self._fields))) + return "<%s=%s>" % (self.__class__.__name__, + "/".join(map(lambda x:repr(getattr(self,x[0])),self._fields))) def __getitem__(self, item): # to work with format strings return getattr(self, item) - def _show(self): - print "##%s:" % self.__class__.__name__ - fmt = "%%-%is = %%r" % max(map(lambda x:len(x[0]), self._fields)) - for fn,ft in self._fields: - print fmt % (fn,getattr(self,fn)) - -class CStructStruct: - def __init__(self, lst, shift=0): - self._lst = lst - self._shift = shift - def __getattr__(self, attr): - return getattr(self._lst, attr) - def __str__(self): - s = [] - for a in self._lst: - if type(a) is str: - s.append(a) +class CStructWithStrTable(CStruct): + # The attribute 'name' is computed from an integer index 'name_idx' + # and a link to the string table 'strtab' + def get_name(self): + return self.strtab.get_name(self.name_idx) + def set_name(self, name): + if self.name_idx == 0: + self.name_idx = self.strtab.add_name(name) + else: + self.strtab.mod_name(self.name_idx, name) + name = property(get_name, set_name) + def update(self, **kargs): + CStruct.update(self, **kargs) + if 'name' in kargs and 'name_idx' in self._names: + self.name = kargs['name'] + +class CArray_metaclass(type): + """ + metaclass, with a syntax compatible with python2 and python3 + """ + def __new__(cls, name, bases, dct): + class_defined = '_cls' in dct + for c in bases: + class_defined = class_defined or '_cls' in c.__dict__ + if not name.startswith('CArray') and not class_defined: + raise ValueError("Class %r should define '_cls'"%name) + return type.__new__(cls, name, bases, dct) + +CArray_base = CArray_metaclass('CArray_base', (CBase,), {}) +class CArray(CArray_base): + """ + The class CArray is inherited by classes that represent + a variable length array of objects of variable length. + + How to create a CArray subclass: + _cls: the class of the array elements + count (optional): method that returns the number of elements + + How to use a CArray object: + in addition to the CBase interface, + [item] gives access to an element of the array + len gives the number of elements + append adds an element to the array + _array is the whole array + _last is the terminating element, if count is not defined + """ + def _initialize(self): + self._array = [] # Elements of the array + self._size = 0 + if not hasattr(self, 'count'): + # Array end is decided by a terminating element + # which is detected by 'stop', of by default by + # comparing with the default value of an object + # of class _cls + self._last = self._cls(parent=self) + self._size += self._size_align(self._last) + + def pack(self): + s = data_empty.join([self._pack_align(o) for o in self._array]) + if hasattr(self, '_last'): s += self._pack_align(self._last) + if self._size != len(s): + raise ValueError("Inconsistent size %d != %d for %r" + % (self._size,len(s), self.__class__.__name__)) + return s + + def stop(self, elt): + return elt.pack() == self._last.pack() + + def unpack(self, c, o): + if o is None: return + self._off = o + if hasattr(self, 'count'): + # self.count() is recomputed each time + # This enables complicated conditions for array termination + idx = 0 + while idx < self.count(): + if o+self._size >= len(c): + break + elt = self._cls(parent=self, content=c, start=o+self._size) + self._array.append(elt) + self._size += self._size_align(elt) + idx += 1 + else: + pos = 0 + while True: + if o+pos >= len(c): + break + elt = self._cls(parent=self, content=c, start=o+pos) + if self.stop(elt): + break + self._array.append(elt) + pos += self._size_align(elt) + self._size += pos + + def __getitem__(self, item): + return self._array[item] + + def __len__(self): + return len(self._array) + + def append(self, obj): + self._array.append(obj) + self._size += self._size_align(self._array[-1]) + return obj + + def pprint(self): + return ("<%s>"%self.__class__.__name__, + [x.pprint() for x in self._array], + ) + + def __repr__(self): + return "<%s of length %d>" % (self.__class__.__name__, len(self)) + +# Method that defines constants (as in .h headers) and tables that +# can recover the constant's name from its value. +def Constants(globs = None, table = None, + name = None, prefix = None, + no_name = (), **kargs): + if prefix is None: + # Use the prefix common to all value names + for k in kargs: + if prefix is None: + prefix = k else: - s.append(a._spack(self._lst, self._shift)) - return "".join(s) - - + while not k.startswith(prefix): + prefix = prefix[:-1] + if name is None: + if prefix.endswith('_'): name = prefix[:-1] + else: name = prefix + if name != '' and not name in table: table[name] = {} + for k in kargs: + globs[k] = kargs[k] + if name != '': + if k.startswith(prefix) and not k in no_name: + if kargs[k] in table[name]: + print("Duplicate at %s[%s]=%s; %s"%(name,kargs[k],table[name][kargs[k]],k)) + table[name][kargs[k]] = k[len(prefix):] diff --git a/elfesteem/elf.py b/elfesteem/elf.py index 14fbdee..0cb4ac0 100755 --- a/elfesteem/elf.py +++ b/elfesteem/elf.py @@ -1,6 +1,16 @@ #! /usr/bin/env python -from cstruct import CStruct +from elfesteem.cstruct import Constants, CStruct, CStructWithStrTable + +constants = {} +def SetConstants(**kargs): + Constants(globs = globals(), table = constants, **kargs) + +import sys +if sys.version_info[0:2] == (2, 3): + mask32 = (eval("1L")<<32)-1 # 'eval' avoids SyntaxError with python3.x +else: + mask32 = eval("0xffffffff") # 'eval' avoids warnings with python2.3 class Ehdr(CStruct): _fields = [ ("ident","16s"), @@ -18,9 +28,8 @@ class Ehdr(CStruct): ("shnum","u16"), ("shstrndx","u16") ] - -class Shdr(CStruct): - _fields = [ ("name","u32"), +class Shdr(CStructWithStrTable): + _fields = [ ("name_idx","u32"), ("type","u32"), ("flags","ptr"), ("addr","ptr"), @@ -30,8 +39,65 @@ class Shdr(CStruct): ("info","u32"), ("addralign","ptr"), ("entsize","ptr") ] - -class Phdr(CStruct): + strtab = property(lambda _: _.parent.shstrtab) + header32 = [" [Nr] Name Type Addr Off Size ES Flg Lk Inf Al"] + format32 = " [%(idx)2d] %(name17)-17s %(type_txt)-15s %(addr)08x %(offset)06x %(size)06x %(entsize)02x %(flags_txt)3s %(link)2d %(info)3d %(addralign)2d" + header64 = [" [Nr] Name Type Address Offset Size EntSize Flags Link Info Align"] + format64 = " [%(idx)2d] %(name17)-17s %(type_txt)-15s %(addr)016x %(offset)08x %(size)016x %(entsize)016x %(flags_txt)3s %(link)2d %(info)2d %(addralign)2d" + footer32 = [ + "Key to Flags:", + " W (write), A (alloc), X (execute), M (merge), S (strings)", + " I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)", + " O (extra OS processing required) o (OS specific), p (processor specific)", + ] + footer64 = [ + "Key to Flags:", + " W (write), A (alloc), X (execute), M (merge), S (strings), l (large)", + " I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)", + " O (extra OS processing required) o (OS specific), p (processor specific)", + ] + format = property(lambda _: { 32: Shdr.format32, 64: Shdr.format64 }[_.wsize]) + footer = property(lambda _: { 32: Shdr.footer32, 64: Shdr.footer64 }[_.wsize]) + name17 = property(lambda _: _.name[:17]) + idx = property(lambda _: _.parent.parent.shlist.index(_.parent)) + def flags_txt(self): + ret = "" + if self.flags & SHF_WRITE: ret += "W" + if self.flags & SHF_ALLOC: ret += "A" + if self.flags & SHF_EXECINSTR: ret += "X" + if self.flags & SHF_MERGE: ret += "M" + if self.flags & SHF_STRINGS: ret += "S" + if self.flags & SHF_INFO_LINK: ret += "I" + if self.flags & SHF_LINK_ORDER: ret += "L" + if self.flags & SHF_OS_NONCONFORMING: ret += "O" + if self.flags & SHF_GROUP: ret += "G" + if self.flags & SHF_TLS: ret += "T" + if self.flags & SHF_EXCLUDE: ret += "E" + return ret + flags_txt = property(flags_txt) + def type_txt(self): + m = constants['EM'][self.parent.parent.parent.Ehdr.machine] + if 'SHT_'+m in constants: + ret = m+'_'+constants['SHT_'+m][self.type] + elif self.type in constants['SHT']: + ret = constants['SHT'][self.type] + elif SHT_LOOS <= self.type <= SHT_HIOS: + ret = "LOOS+%x"%(self.type - SHT_LOOS) + elif SHT_LOPROC <= self.type <= SHT_HIPROC: + ret = "LOPROC+%x"%(self.type - SHT_LOPROC) + elif SHT_LOUSER <= self.type <= SHT_HIUSER: + ret = "LOUSER+%x"%(self.type - SHT_LOUSER) + else: + ret = "Unknown%#x"%self.type + if ret == 'GNU_verdef': ret = 'VERDEF' + if ret == 'GNU_verneed': ret = 'VERNEED' + if ret == 'GNU_versym': ret = 'VERSYM' + return ret + type_txt = property(type_txt) + def readelf_display(self): + return self.format % self + +class Phdr32(CStruct): _fields = [ ("type","u32"), ("offset","u32"), ("vaddr","u32"), @@ -51,1503 +117,2124 @@ class Phdr64(CStruct): ("memsz","ptr"), ("align","ptr") ] - -class Sym32(CStruct): - _fields = [ ("name","u32"), +class Sym32(CStructWithStrTable): + _fields = [ ("name_idx","u32"), ("value","u32"), ("size","u32"), ("info","u08"), ("other","u08"), ("shndx","u16") ] - -class Sym64(CStruct): - _fields = [ ("name","u32"), + format = '%(idx)6d: %(value)08x %(size)5d %(type)-7s %(bind)-6s %(visibility)-7s %(ndx)-3s %(name)s' + idx = property(lambda _: _.parent.symtab.index(_)) + strtab = property(lambda _: _.parent.linksection) + type = property(lambda _: constants['STT'].get(_.info&0xf, + ': %d'%(_.info&0xf))) + bind = property(lambda _: constants['STB'].get(_.info>>4, + ': %d'%(_.info>>4))) + visibility = property(lambda _: constants['STV'].get(_.other, + 'DEFAULT [: %x] '%_.other)) + def ndx(self): + if self.shndx==SHN_UNDEF: return "UND" + elif self.shndx==SHN_ABS: return "ABS" + elif self.shndx==SHN_COMMON: return "COM" + else: return "%3d"%self.shndx + ndx = property(ndx) + def readelf_display(self): + return self.format % self + def __str__(self): + return self.readelf_display() + +class Sym64(Sym32): + _fields = [ ("name_idx","u32"), ("info","u08"), ("other","u08"), ("shndx","u16"), - ("value","u32"), + ("value","u64"), ("size","u64") ] + format = '%(idx)6d: %(value)016x %(size)5d %(type)-7s %(bind)-6s %(visibility)-7s %(ndx)-3s %(name)s' class Dym(CStruct): _fields = [ ("tag","u32"), ("val","u32") ] -class Rel32(CStruct): +class RelBase(CStruct): + def symbol(self): + if not hasattr(self.parent.linksection, 'symtab') \ + or self.sym_idx >= len(self.parent.linksection.symtab): + # In some (invalid?) binaries, most sections are of + # type NOBITS, including the symbols section, which + # therefore has no symtab member + # We could force this section type to SYMTAB, but it + # has side effects + class VoidName(object): + name = None + return VoidName() + return self.parent.linksection.symtab[self.sym_idx] + symbol = property(symbol) + shndx = property(lambda _: _.symbol.shndx) + value = property(lambda _: _.symbol.value) + sym = property(lambda _: _.symbol.name) + def name(self): + if self.sym == '': + return self.parent.parent.parent.sh[self.shndx].sh.name + else: + return self.sym[:22] + name = property(name) + def type17(self): + machine = constants['EM'][self.parent.parent.parent.Ehdr.machine] + if machine == 'SPARC32PLUS': machine = 'SPARC' + if machine == 'SPARCV9': machine = 'SPARC' + if not machine in constants['R']: + ret = '%d aka. %#x' % (self.type, self.type) + elif hasattr(self, 'type1'): # MIPS64 + ret = 'R_%s_%s' % (machine, constants['R'][machine][self.type1]) + else: + ret = 'R_%s_%s' % (machine, constants['R'][machine][self.type]) + ret = ret[:17] # truncated by readelf! + if ret == 'R_386_JMP_SLOT': ret = 'R_386_JUMP_SLOT' + return ret + type17 = property(type17) + def readelf_display(self): + res = self.format % self + if self.__class__.__name__ == 'Rel32': + res += ' %(value)08x ' % self + else: + if self.value == 0 and self.type == R_X86_64_RELATIVE: + res += ' ' + else: + res += ' %(value)016x' % self + res += ' %(name)s' % self + if self.parent.sht == SHT_RELA: + if self.addend < 0: res += " - %x" % -self.addend + elif self.name == '': res += " %x" % self.addend + else: res += " + %x" % self.addend + if hasattr(self, 'type1'): + machine = constants['EM'][self.parent.parent.parent.Ehdr.machine] + type = 'R_%s_%s' % (machine, constants['R'][machine][self.type2]) + res += "\n Type2: %-16s" % type + type = 'R_%s_%s' % (machine, constants['R'][machine][self.type3]) + res += "\n Type3: %-16s" % type + return res + +class Rel32(RelBase): + # sym_idx is 24-bit long, cannot be defined as a field type + # we get it by parsing 'info' _fields = [ ("offset","ptr"), ("info","u32") ] + format = '%(offset)08x %(info)08x %(type17)-17s %(value)08x %(name)s' + format = '%(offset)08x %(info)08x %(type17)-17s' + type = property(lambda _: _.info & 0xff) + sym_idx = property(lambda _:_.info>>8) -class Rel64(CStruct): +class Rel64(RelBase): _fields = [ ("offset","ptr"), ("info","u64") ] + format = '%(offset)012x %(info)012x %(type17)-17s %(value)016x %(name)s' + format = '%(offset)012x %(info)012x %(type17)-17s' + type = property(lambda _: _.info & mask32) + sym_idx = property(lambda _:_.info>>32) -class Rela32(CStruct): +class Rel64MIPS(RelBase): + # e.g. http://www.openwall.com/lists/musl/2016/01/22/2 + _fields = [ ("offset","ptr"), + ("sym_idx","u32"), + ("ssym","u08"), + ("type3","u08"), + ("type2","u08"), + ("type1","u08") ] + def type(self): + raise ValueError("MIPS64 relocation type is a combination of 3 relocation types each of size 1 byte") + type = property(type) + info = property(lambda _:_.type1 + (_.type2<<8) + (_.type3<<16) + (_.ssym<<24) + (_.sym_idx<<32)) + +class Rela32(Rel32): _fields = [ ("offset","ptr"), ("info","u32"), - ("addend","ptr") ] + ("addend","s32") ] -class Rela64(CStruct): +class Rela64(Rel64): _fields = [ ("offset","ptr"), ("info","u64"), - ("addend","ptr") ] + ("addend","s64") ] -class Dynamic(CStruct): +class Dyn32(CStruct): _fields = [ ("type","u32"), - ("name","u32") ] - - + ("name_idx","u32") ] + def name(self): + if self.type == DT_NEEDED: + return self.parent.linksection.get_name(self.name_idx) + return self.name_idx + name = property(name) + +class Dyn64(Dyn32): + _fields = [ ("type","u64"), + ("name_idx","u64") ] + + +class Verdef32(CStruct): + _fields = [ ("version","u16"), + ("flags","u16"), + ("ndx","u16"), + ("cnt","u16"), + ("hash","u32"), + ("aux","u32") ] + +class Verdef64(Verdef32): + _fields = [ ("version","u16"), + ("flags","u16"), + ("ndx","u16"), + ("cnt","u16"), + ("hash","u32"), + ("aux","u32"), + ("next","u32") ] + +class Verneed32(CStruct): + _fields = [ ("vn_version", "u16"), + ("vn_cnt", "u16"), + ("vn_file", "u32"), + ("vn_aux", "u32"), + ("vn_next", "u32")] + offset = None + element_size = 0x10 + + def next(self): + if not self.vn_next: + return None + + return self.parent.elements[(self.offset + self.vn_next) // self.element_size] + next = property(next) + + + def aux(self): + if not self.vn_aux: + return None + + return self.parent.elements[(self.offset + self.vn_aux) // self.element_size] + aux = property(aux) + +class Verneed64(Verneed32): + _fields = [ ("vn_version", "u16"), + ("vn_cnt", "u16"), + ("vn_file", "u32"), + ("vn_aux", "u32"), + ("vn_next", "u32")] + element_size = 0x10 + + +class Vernaux32(CStruct): + _fields = [ ("vna_hash", "u32"), + ("vna_flags", "u16"), + ("vna_other", "u16"), + ("vna_name", "u32"), + ("vna_next", "u32")] + element_size = 0x10 + offset = None + + def next(self): + if not self.vna_next: + return None + + return self.parent.elements[(self.offset + self.vna_next) // self.element_size] + next = property(next) + + def name(self): + return self.parent.parent.parent.getsectionbyname(".dynstr").get_name(self.vna_name) + name = property(name) + + +class Vernaux64(Vernaux32): + _fields = [ ("vna_hash", "u32"), + ("vna_flags", "u16"), + ("vna_other", "u16"), + ("vna_name", "u32"), + ("vna_next", "u32")] + element_size = 0x10 + + +SetConstants( # Legal values for e_ident (identification indexes) - -EI_MAG0 = 0 # File identification -EI_MAG1 = 1 # File identification -EI_MAG2 = 2 # File identification -EI_MAG3 = 3 # File identification -EI_CLASS = 4 # File class -EI_DATA = 5 # Data encoding -EI_VERSION = 6 # File version -EI_OSABI = 7 # Operating system/ABI identification -EI_ABIVERSION = 8 # ABI version -EI_PAD = 9 # Start of padding bytes -EI_NIDENT = 16 # Size of e_ident[] - +EI_MAG0 = 0, # File identification +EI_MAG1 = 1, # File identification +EI_MAG2 = 2, # File identification +EI_MAG3 = 3, # File identification +EI_CLASS = 4, # File class +EI_DATA = 5, # Data encoding +EI_VERSION = 6, # File version +EI_OSABI = 7, # Operating system/ABI identification +EI_ABIVERSION = 8, # ABI version +EI_PAD = 9, # Start of padding bytes +EI_NIDENT = 16, # Size of e_ident[] +) + +SetConstants( # Legal values for e_ident[EI_CLASS] +ELFCLASSNONE = 0, # Invalid class +ELFCLASS32 = 1, # 32-bit objects +ELFCLASS64 = 2, # 64-bit objects +) -ELFCLASSNONE = 0 # Invalid class -ELFCLASS32 = 1 # 32-bit objects -ELFCLASS64 = 2 # 64-bit objects - +SetConstants( # Legal values for e_ident[EI_DATA] - -ELFDATANONE = 0 # Invalid data encoding -ELFDATA2LSB = 1 # Least significant byte at lowest address -ELFDATA2MSB = 2 # Most significant byte at lowest address - +ELFDATANONE = 0, # Invalid data encoding +ELFDATA2LSB = 1, # Least significant byte at lowest address +ELFDATA2MSB = 2, # Most significant byte at lowest address +) + +SetConstants( +# Legal values for e_ident[EI_OSABI] +ELFOSABI_NONE = 0, # UNIX System V ABI +ELFOSABI_SYSV = 0, # Alias +ELFOSABI_HPUX = 1, # HP-UX +ELFOSABI_NETBSD = 2, # NetBSD +ELFOSABI_GNU = 3, # Object uses GNU ELF extensions +ELFOSABI_LINUX = 3, # Compatibility alias +ELFOSABI_SOLARIS = 6, # Sun Solaris +ELFOSABI_AIX = 7, # IBM AIX +ELFOSABI_IRIX = 8, # SGI Irix +ELFOSABI_FREEBSD = 9, # FreeBSD +ELFOSABI_TRU64 = 10, # Compaq TRU64 UNIX +ELFOSABI_MODESTO = 11, # Novell Modesto +ELFOSABI_OPENBSD = 12, # OpenBSD +ELFOSABI_ARM_AEABI = 64, # ARM EABI +ELFOSABI_ARM = 97, # ARM +ELFOSABI_STANDALONE = 255, # Standalone (embedded) application +no_name=('ELFOSABI_SYSV','ELFOSABI_LINUX'), +) + +SetConstants( # Legal values for e_type (object file type). - -ET_NONE = 0 # No file type -ET_REL = 1 # Relocatable file -ET_EXEC = 2 # Executable file -ET_DYN = 3 # Shared object file -ET_CORE = 4 # Core file -ET_NUM = 5 # Number of defined types -ET_LOOS = 0xfe00L # OS-specific range start -ET_HIOS = 0xfeffL # OS-specific range end -ET_LOPROC = 0xff00L # Processor-specific range start -ET_HIPROC = 0xffffL # Processor-specific range end - +ET_NONE = 0, # No file type +ET_REL = 1, # Relocatable file +ET_EXEC = 2, # Executable file +ET_DYN = 3, # Shared object file +ET_CORE = 4, # Core file +ET_NUM = 5, # Number of defined types +ET_LOOS = 0xfe00, # OS-specific range start +ET_HIOS = 0xfeff, # OS-specific range end +ET_LOPROC = 0xff00, # Processor-specific range start +ET_HIPROC = 0xffff, # Processor-specific range end +) + +SetConstants( # Legal values for e_machine (architecture). - -EM_NONE = 0 # No machine -EM_M32 = 1 # AT&T WE 32100 -EM_SPARC = 2 # SUN SPARC -EM_386 = 3 # Intel 80386 -EM_68K = 4 # Motorola m68k family -EM_88K = 5 # Motorola m88k family -EM_486 = 6 # Intel 80486 -EM_860 = 7 # Intel 80860 -EM_MIPS = 8 # MIPS R3000 big-endian -EM_S370 = 9 # IBM System/370 -EM_MIPS_RS3_LE = 10 # MIPS R3000 little-endian - -EM_PARISC = 15 # HPPA -EM_VPP500 = 17 # Fujitsu VPP500 -EM_SPARC32PLUS = 18 # Sun's "v8plus" -EM_960 = 19 # Intel 80960 -EM_PPC = 20 # PowerPC -EM_PPC64 = 21 # PowerPC 64-bit -EM_S390 = 22 # IBM S390 - -EM_V800 = 36 # NEC V800 series -EM_FR20 = 37 # Fujitsu FR20 -EM_RH32 = 38 # TRW RH-32 -EM_RCE = 39 # Motorola RCE -EM_ARM = 40 # ARM -EM_FAKE_ALPHA = 41 # Digital Alpha -EM_SH = 42 # Hitachi SH -EM_SPARCV9 = 43 # SPARC v9 64-bit -EM_TRICORE = 44 # Siemens Tricore -EM_ARC = 45 # Argonaut RISC Core -EM_H8_300 = 46 # Hitachi H8/300 -EM_H8_300H = 47 # Hitachi H8/300H -EM_H8S = 48 # Hitachi H8S -EM_H8_500 = 49 # Hitachi H8/500 -EM_IA_64 = 50 # Intel Merced -EM_MIPS_X = 51 # Stanford MIPS-X -EM_COLDFIRE = 52 # Motorola Coldfire -EM_68HC12 = 53 # Motorola M68HC12 -EM_MMA = 54 # Fujitsu MMA Multimedia Accelerator*/ -EM_PCP = 55 # Siemens PCP -EM_NCPU = 56 # Sony nCPU embeeded RISC -EM_NDR1 = 57 # Denso NDR1 microprocessor -EM_STARCORE = 58 # Motorola Start*Core processor -EM_ME16 = 59 # Toyota ME16 processor -EM_ST100 = 60 # STMicroelectronic ST100 processor -EM_TINYJ = 61 # Advanced Logic Corp. Tinyj emb.fam*/ -EM_X86_64 = 62 # AMD x86-64 architecture -EM_PDSP = 63 # Sony DSP Processor - -EM_FX66 = 66 # Siemens FX66 microcontroller -EM_ST9PLUS = 67 # STMicroelectronics ST9+ 8/16 mc -EM_ST7 = 68 # STmicroelectronics ST7 8 bit mc -EM_68HC16 = 69 # Motorola MC68HC16 microcontroller -EM_68HC11 = 70 # Motorola MC68HC11 microcontroller -EM_68HC08 = 71 # Motorola MC68HC08 microcontroller -EM_68HC05 = 72 # Motorola MC68HC05 microcontroller -EM_SVX = 73 # Silicon Graphics SVx -EM_ST19 = 74 # STMicroelectronics ST19 8 bit mc -EM_VAX = 75 # Digital VAX -EM_CRIS = 76 # Axis Communications 32-bit embedded processor -EM_JAVELIN = 77 # Infineon Technologies 32-bit embedded processor -EM_FIREPATH = 78 # Element 14 64-bit DSP Processor -EM_ZSP = 79 # LSI Logic 16-bit DSP Processor -EM_MMIX = 80 # Donald Knuth's educational 64-bit processor -EM_HUANY = 81 # Harvard University machine-independent object files -EM_PRISM = 82 # SiTera Prism -EM_AVR = 83 # Atmel AVR 8-bit microcontroller -EM_FR30 = 84 # Fujitsu FR30 -EM_D10V = 85 # Mitsubishi D10V -EM_D30V = 86 # Mitsubishi D30V -EM_V850 = 87 # NEC v850 -EM_M32R = 88 # Mitsubishi M32R -EM_MN10300 = 89 # Matsushita MN10300 -EM_MN10200 = 90 # Matsushita MN10200 -EM_PJ = 91 # picoJava -EM_OPENRISC = 92 # OpenRISC 32-bit embedded processor -EM_ARC_A5 = 93 # ARC Cores Tangent-A5 -EM_XTENSA = 94 # Tensilica Xtensa Architecture - -EM_ALPHA = 0x9026 - +EM_NONE = 0, # No machine +EM_M32 = 1, # AT&T WE 32100 +EM_SPARC = 2, # SUN SPARC +EM_386 = 3, # Intel 80386 +EM_68K = 4, # Motorola m68k family +EM_88K = 5, # Motorola m88k family +EM_486 = 6, # Intel 80486 +EM_860 = 7, # Intel 80860 +EM_MIPS = 8, # MIPS R3000 big-endian +EM_S370 = 9, # IBM System/370 +EM_MIPS_RS3_LE = 10, # MIPS R3000 little-endian + +EM_PARISC = 15, # HPPA +EM_VPP500 = 17, # Fujitsu VPP500 +EM_SPARC32PLUS = 18, # Sun's "v8plus" +EM_960 = 19, # Intel 80960 +EM_PPC = 20, # PowerPC +EM_PPC64 = 21, # PowerPC 64-bit +EM_S390 = 22, # IBM S390 +EM_SPU = 23, # Cell Broadband Engine SPU + +EM_V800 = 36, # NEC V800 series +EM_FR20 = 37, # Fujitsu FR20 +EM_RH32 = 38, # TRW RH-32 +EM_RCE = 39, # Motorola RCE +EM_ARM = 40, # ARM +EM_FAKE_ALPHA = 41, # Digital Alpha +EM_SH = 42, # Hitachi SH +EM_SPARCV9 = 43, # SPARC v9 64-bit +EM_TRICORE = 44, # Siemens Tricore +EM_ARC = 45, # Argonaut RISC Core +EM_H8_300 = 46, # Hitachi H8/300 +EM_H8_300H = 47, # Hitachi H8/300H +EM_H8S = 48, # Hitachi H8S +EM_H8_500 = 49, # Hitachi H8/500 +EM_IA_64 = 50, # Intel Merced +EM_MIPS_X = 51, # Stanford MIPS-X +EM_COLDFIRE = 52, # Motorola Coldfire +EM_68HC12 = 53, # Motorola M68HC12 +EM_MMA = 54, # Fujitsu MMA Multimedia Accelerator*/ +EM_PCP = 55, # Siemens PCP +EM_NCPU = 56, # Sony nCPU embeeded RISC +EM_NDR1 = 57, # Denso NDR1 microprocessor +EM_STARCORE = 58, # Motorola Start*Core processor +EM_ME16 = 59, # Toyota ME16 processor +EM_ST100 = 60, # STMicroelectronic ST100 processor +EM_TINYJ = 61, # Advanced Logic Corp. Tinyj emb.fam*/ +EM_X86_64 = 62, # AMD x86-64 architecture +EM_PDSP = 63, # Sony DSP Processor +EM_PDP10 = 64, # Digital Equipment Corp. PDP-10 +EM_PDP11 = 65, # Digital Equipment Corp. PDP-11 +EM_FX66 = 66, # Siemens FX66 microcontroller +EM_ST9PLUS = 67, # STMicroelectronics ST9+ 8/16 mc +EM_ST7 = 68, # STmicroelectronics ST7 8 bit mc +EM_68HC16 = 69, # Motorola MC68HC16 microcontroller +EM_68HC11 = 70, # Motorola MC68HC11 microcontroller +EM_68HC08 = 71, # Motorola MC68HC08 microcontroller +EM_68HC05 = 72, # Motorola MC68HC05 microcontroller +EM_SVX = 73, # Silicon Graphics SVx +EM_ST19 = 74, # STMicroelectronics ST19 8 bit mc +EM_VAX = 75, # Digital VAX +EM_CRIS = 76, # Axis Communications 32-bit embedded processor +EM_JAVELIN = 77, # Infineon Technologies 32-bit embedded processor +EM_FIREPATH = 78, # Element 14 64-bit DSP Processor +EM_ZSP = 79, # LSI Logic 16-bit DSP Processor +EM_MMIX = 80, # Donald Knuth's educational 64-bit processor +EM_HUANY = 81, # Harvard University machine-independent object files +EM_PRISM = 82, # SiTera Prism +EM_AVR = 83, # Atmel AVR 8-bit microcontroller +EM_FR30 = 84, # Fujitsu FR30 +EM_D10V = 85, # Mitsubishi D10V +EM_D30V = 86, # Mitsubishi D30V +EM_V850 = 87, # NEC v850 +EM_M32R = 88, # Mitsubishi M32R +EM_MN10300 = 89, # Matsushita MN10300 +EM_MN10200 = 90, # Matsushita MN10200 +EM_PJ = 91, # picoJava +EM_OPENRISC = 92, # OpenRISC 32-bit embedded processor +EM_ARC_A5 = 93, # ARC Cores Tangent-A5 +EM_XTENSA = 94, # Tensilica Xtensa Architecture +EM_VIDEOCORE = 95, # Alphamosaic VideoCore processor +EM_TMM_GPP = 96, # Thompson Multimedia General Purpose Processor +EM_NS32K = 97, # National Semiconductor 32000 series +EM_TPC = 98, # Tenor Network TPC processor +EM_SNP1K = 99, # Trebia SNP 1000 processor +EM_ST200 = 100, # STMicroelectronics (www.st.com) ST200 +EM_IP2K = 101, # Ubicom IP2xxx microcontroller family +EM_MAX = 102, # MAX Processor +EM_CR = 103, # National Semiconductor CompactRISC microprocessor +EM_F2MC16 = 104, # Fujitsu F2MC16 +EM_MSP430 = 105, # Texas Instruments embedded microcontroller msp430 +EM_BLACKFIN = 106, # Analog Devices Blackfin (DSP) processor +EM_SE_C33 = 107, # S1C33 Family of Seiko Epson processors +EM_SEP = 108, # Sharp embedded microprocessor +EM_ARCA = 109, # Arca RISC Microprocessor +EM_UNICORE = 110, # Microprocessor series from PKU-Unity Ltd. and MPRC of Peking University +EM_EXCESS = 111, # eXcess: 16/32/64-bit configurable embedded CPU +EM_DXP = 112, # Icera Semiconductor Inc. Deep Execution Processor +EM_ALTERA_NIOS2 = 113, # Altera Nios II soft-core processor +EM_CRX = 114, # National Semiconductor CompactRISC CRX +EM_XGATE = 115, # Motorola XGATE embedded processor +EM_C166 = 116, # Infineon C16x/XC16x processor +EM_M16C = 117, # Renesas M16C series microprocessors +EM_DSPIC30F = 118, # Microchip Technology dsPIC30F Digital Signal Controller +EM_CE = 119, # Freescale Communication Engine RISC core +EM_M32C = 120, # Renesas M32C series microprocessors +EM_TSK3000 = 131, # Altium TSK3000 core +EM_RS08 = 132, # Freescale RS08 embedded processor +EM_SHARC = 133, # Analog Devices SHARC family of 32-bit DSP processors +EM_ECOG2 = 134, # Cyan Technology eCOG2 microprocessor +EM_SCORE7 = 135, # Sunplus S+core7 RISC processor +EM_DSP24 = 136, # New Japan Radio (NJR) 24-bit DSP Processor +EM_VIDEOCORE3 = 137, # Broadcom VideoCore III processor +EM_LATTICEMICO32 = 138, # RISC processor for Lattice FPGA architecture +EM_SE_C17 = 139, # Seiko Epson C17 family +EM_TI_C6000 = 140, # The Texas Instruments TMS320C6000 DSP family +EM_TI_C2000 = 141, # The Texas Instruments TMS320C2000 DSP family +EM_TI_C5500 = 142, # The Texas Instruments TMS320C55x DSP family +EM_MMDSP_PLUS = 160, # STMicroelectronics 64bit VLIW Data Signal Processor +EM_CYPRESS_M8C = 161, # Cypress M8C microprocessor +EM_R32C = 162, # Renesas R32C series microprocessors +EM_TRIMEDIA = 163, # NXP Semiconductors TriMedia architecture family +EM_HEXAGON = 164, # Qualcomm Hexagon processor +EM_8051 = 165, # Intel 8051 and variants +EM_STXP7X = 166, # STMicroelectronics STxP7x family of configurable and extensible RISC processors +EM_NDS32 = 167, # Andes Technology compact code size embedded RISC processor family +EM_ECOG1 = 168, # Cyan Technology eCOG1X family +EM_ECOG1X = 168, # Cyan Technology eCOG1X family +EM_MAXQ30 = 169, # Dallas Semiconductor MAXQ30 Core Micro-controllers +EM_XIMO16 = 170, # New Japan Radio (NJR) 16-bit DSP Processor +EM_MANIK = 171, # M2000 Reconfigurable RISC Microprocessor +EM_CRAYNV2 = 172, # Cray Inc. NV2 vector architecture +EM_RX = 173, # Renesas RX family +EM_METAG = 174, # Imagination Technologies META processor architecture +EM_MCST_ELBRUS = 175, # MCST Elbrus general purpose hardware architecture +EM_ECOG16 = 176, # Cyan Technology eCOG16 family +EM_CR16 = 177, # National Semiconductor CompactRISC CR16 16-bit microprocessor +EM_ETPU = 178, # Freescale Extended Time Processing Unit +EM_SLE9X = 179, # Infineon Technologies SLE9X core +EM_L10M = 180, # Intel L10M +EM_K10M = 181, # Intel K10M +EM_AARCH64 = 183, # ARM AArch64 +EM_AVR32 = 185, # Atmel Corporation 32-bit microprocessor family +EM_STM8 = 186, # STMicroeletronics STM8 8-bit microcontroller +EM_TILE64 = 187, # Tilera TILE64 multicore architecture family +EM_TILEPRO = 188, # Tilera TILEPro multicore architecture family +EM_MICROBLAZE = 189, # Xilinx MicroBlaze +EM_CUDA = 190, # NVIDIA CUDA architecture +EM_TILEGX = 191, # Tilera TILE-Gx multicore architecture family +EM_CLOUDSHIELD = 192, # CloudShield architecture family +EM_COREA_1ST = 193, # KIPO-KAIST Core-A 1st generation processor family +EM_COREA_2ND = 194, # KIPO-KAIST Core-A 2nd generation processor family +EM_ARC_COMPACT2 = 195, # Synopsys ARCompact V2 +EM_OPEN8 = 196, # Open8 8-bit RISC soft processor core +EM_RL78 = 197, # Renesas RL78 family +EM_VIDEOCORE5 = 198, # Broadcom VideoCore V processor +EM_78KOR = 199, # Renesas 78KOR family +EM_56800EX = 200, # Freescale 56800EX Digital Signal Controller (DSC) +EM_BA1 = 201, # Beyond BA1 CPU architecture +EM_BA2 = 202, # Beyond BA2 CPU architecture +EM_XCORE = 203, # XMOS xCORE processor family +EM_MCHP_PIC = 204, # Microchip 8-bit PIC(r) family +EM_INTEL205 = 205, # Reserved by Intel +EM_INTEL206 = 206, # Reserved by Intel +EM_INTEL207 = 207, # Reserved by Intel +EM_INTEL208 = 208, # Reserved by Intel +EM_INTEL209 = 209, # Reserved by Intel +EM_KM32 = 210, # KM211 KM32 32-bit processor +EM_KMX32 = 211, # KM211 KMX32 32-bit processor +EM_KMX16 = 212, # KM211 KMX16 16-bit processor +EM_KMX8 = 213, # KM211 KMX8 8-bit processor +EM_KVARC = 214, # KM211 KVARC processor +EM_CDP = 215, # Paneve CDP architecture family +EM_COGE = 216, # Cognitive Smart Memory Processor +EM_COOL = 217, # iCelero CoolEngine +EM_NORC = 218, # Nanoradio Optimized RISC +EM_CSR_KALIMBA = 219, # CSR Kalimba architecture family +EM_VISIUM = 221, # +EM_FT32 = 222, # FTDI FT32 +EM_MOXIE = 223, # Moxie +EM_AMDGPU = 224, # AMD GPU architecture +EM_RISCV = 243, # RISC-V +EM_LANAI = 244, # Lanai 32-bit processor +EM_BPF = 247, # Linux kernel bpf virtual machine +EM_EPIPHANY = 0x1223, # Adapteva's Epiphany +EM_FRV = 0x5441, +EM_STORMY16 = 0xad45, +EM_IQ2000 = 0xfeba, # Vitesse IQ2000 +EM_ALPHA = 0x9026, +no_name=('EM_ECOG1',), +) + +SetConstants( +# Special section indices. +SHN_UNDEF = 0, # Undefined section +SHN_LORESERVE = 0xff00, # Start of reserved indices +SHN_LOPROC = 0xff00, # Start of processor-specific +SHN_BEFORE = 0xff00, # Order section before all others (Solaris). +SHN_AFTER = 0xff01, # Order section after all others (Solaris). +SHN_HIPROC = 0xff1f, # End of processor-specific +SHN_LOOS = 0xff20, # Start of OS-specific +SHN_HIOS = 0xff3f, # End of OS-specific +SHN_ABS = 0xfff1, # Associated symbol is absolute +SHN_COMMON = 0xfff2, # Associated symbol is common +SHN_XINDEX = 0xffff, # Index is in extra table. +SHN_HIRESERVE = 0xffff, # End of reserved indices +no_name=('SHN_LORESERVE','SHN_HIRESERVE', + 'SHN_LOPROC','SHN_HIPROC','SHN_LOOS','SHN_HIOS'), +) + +SetConstants( # Legal values for sh_type (section type). - -SHT_NULL = 0 # Section header table entry unused -SHT_PROGBITS = 1 # Program data -SHT_SYMTAB = 2 # Symbol table -SHT_STRTAB = 3 # String table -SHT_RELA = 4 # Relocation entries with addends -SHT_HASH = 5 # Symbol hash table -SHT_DYNAMIC = 6 # Dynamic linking information -SHT_NOTE = 7 # Notes -SHT_NOBITS = 8 # Program space with no data (bss) -SHT_REL = 9 # Relocation entries, no addends -SHT_SHLIB = 10 # Reserved -SHT_DYNSYM = 11 # Dynamic linker symbol table -SHT_INIT_ARRAY = 14 # Array of constructors -SHT_FINI_ARRAY = 15 # Array of destructors -SHT_PREINIT_ARRAY = 16 # Array of pre-constructors -SHT_GROUP = 17 # Section group -SHT_SYMTAB_SHNDX = 18 # Extended section indeces -SHT_NUM = 19 # Number of defined types. -SHT_LOOS = 0x60000000L # Start OS-specific -SHT_GNU_LIBLIST = 0x6ffffff7L # Prelink library list -SHT_CHECKSUM = 0x6ffffff8L # Checksum for DSO content. -SHT_LOSUNW = 0x6ffffffaL # Sun-specific low bound. -SHT_SUNW_move = 0x6ffffffaL -SHT_SUNW_COMDAT = 0x6ffffffbL -SHT_SUNW_syminfo = 0x6ffffffcL -SHT_GNU_verdef = 0x6ffffffdL # Version definition section. -SHT_GNU_verneed = 0x6ffffffeL # Version needs section. -SHT_GNU_versym = 0x6fffffffL # Version symbol table. -SHT_HISUNW = 0x6fffffffL # Sun-specific high bound. -SHT_HIOS = 0x6fffffffL # End OS-specific type -SHT_LOPROC = 0x70000000L # Start of processor-specific -SHT_HIPROC = 0x7fffffffL # End of processor-specific -SHT_LOUSER = 0x80000000L # Start of application-specific -SHT_HIUSER = 0x8fffffffL # End of application-specific - +SHT_NULL = 0, # Section header table entry unused +SHT_PROGBITS = 1, # Program data +SHT_SYMTAB = 2, # Symbol table +SHT_STRTAB = 3, # String table +SHT_RELA = 4, # Relocation entries with addends +SHT_HASH = 5, # Symbol hash table +SHT_DYNAMIC = 6, # Dynamic linking information +SHT_NOTE = 7, # Notes +SHT_NOBITS = 8, # Program space with no data (bss) +SHT_REL = 9, # Relocation entries, no addends +SHT_SHLIB = 10, # Reserved +SHT_DYNSYM = 11, # Dynamic linker symbol table +SHT_INIT_ARRAY = 14, # Array of constructors +SHT_FINI_ARRAY = 15, # Array of destructors +SHT_PREINIT_ARRAY = 16, # Array of pre-constructors +SHT_GROUP = 17, # Section group +SHT_SYMTAB_SHNDX = 18, # Extended section indeces +SHT_NUM = 19, # Number of defined types. +SHT_LOOS = 0x60000000, # Start OS-specific +SHT_GNU_HASH = 0x6ffffff6, +SHT_GNU_LIBLIST = 0x6ffffff7, # Prelink library list +SHT_CHECKSUM = 0x6ffffff8, # Checksum for DSO content. +SHT_LOSUNW = 0x6ffffffa, # Sun-specific low bound. +SHT_SUNW_move = 0x6ffffffa, +SHT_SUNW_COMDAT = 0x6ffffffb, +SHT_SUNW_syminfo = 0x6ffffffc, +SHT_GNU_verdef = 0x6ffffffd, # Version definition section. +SHT_GNU_verneed = 0x6ffffffe, # Version needs section. +SHT_GNU_versym = 0x6fffffff, # Version symbol table. +SHT_HISUNW = 0x6fffffff, # Sun-specific high bound. +SHT_HIOS = 0x6fffffff, # End OS-specific type +SHT_LOPROC = 0x70000000, # Start of processor-specific +SHT_HIPROC = 0x7fffffff, # End of processor-specific +SHT_LOUSER = 0x80000000, # Start of application-specific +SHT_HIUSER = 0x8fffffff, # End of application-specific +no_name = ('SHT_LOSUNW', 'SHT_HISUNW', 'SHT_HIOS'), +) + +SetConstants( +# http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044c/IHI0044C_aaelf.pdf +SHT_ARM_EXIDX = 0x70000001, # Exception Index table +SHT_ARM_PREEMPTMAP = 0x70000002, # DLL dynamic linking pre-emption map +SHT_ARM_ATTRIBUTES = 0x70000003, # Object file compatibility attributes +SHT_ARM_DEBUGOVERLAY = 0x70000004, +SHT_ARM_OVERLAYSECTION = 0x70000005, +) + +SetConstants( +# https://refspecs.linuxfoundation.org/elf/elf-pa.pdf +# https://sourceware.org/ml/binutils/2005-08/msg00141.html +SHT_PARISC_EXT = 0x70000000, # Section contains product-specific extension bits +SHT_PARISC_UNWIND = 0x70000001, # Section contains unwind table entries +SHT_PARISC_DOC = 0x70000002, # Section contains debug information for optimized code +SHT_PARISC_ANNOT = 0x70000003, # Section contains code annotations +SHT_PARISC_DLKM = 0x70000004, # DLKM special section +) + +SetConstants( +# https://dmz-portal.mips.com/wiki/MIPS_section_types +SHT_MIPS_LIBLIST = 0x70000000, # DSO library information used to link +SHT_MIPS_MSYM = 0x70000001, # MIPS symbol table extension +SHT_MIPS_CONFLICT = 0x70000002, # Symbol conflicting with DSO defined symbols +SHT_MIPS_GPTAB = 0x70000003, # Global pointer table +SHT_MIPS_UCODE = 0x70000004, # Reserved +SHT_MIPS_DEBUG = 0x70000005, # Reserved (obsolete debug information) +SHT_MIPS_REGINFO = 0x70000006, # Register usage information +SHT_MIPS_PACKAGE = 0x70000007, # OSF reserved +SHT_MIPS_PACKSYM = 0x70000008, # OSF reserved +SHT_MIPS_RELD = 0x70000009, # Dynamic relocations (obsolete) +# 0x7000000a, +SHT_MIPS_IFACE = 0x7000000b, # Subprogram interface information +SHT_MIPS_CONTENT = 0x7000000c, # Section content information +SHT_MIPS_OPTIONS = 0x7000000d, # General options +# 0x7000000e, +# 0x7000000f, +SHT_MIPS_SHDR = 0x70000010, +SHT_MIPS_FDESC = 0x70000011, +SHT_MIPS_EXTSYM = 0x70000012, +SHT_MIPS_DENSE = 0x70000013, +SHT_MIPS_PDESC = 0x70000014, +SHT_MIPS_LOCSYM = 0x70000015, +SHT_MIPS_AUXSYM = 0x70000016, +SHT_MIPS_OPTSYM = 0x70000017, +SHT_MIPS_LOCSTR = 0x70000018, +SHT_MIPS_LINE = 0x70000019, +SHT_MIPS_RFDESC = 0x7000001a, +SHT_MIPS_DELTASYM = 0x7000001b, # Delta C++ symbol table (obsolete) +SHT_MIPS_DELTAINST = 0x7000001c, # Delta C++ instance table (obsolete) +SHT_MIPS_DELTACLASS = 0x7000001d, # Delta C++ class table (obsolete) +SHT_MIPS_DWARF = 0x7000001e, # Dwarf debug information +SHT_MIPS_DELTADECL = 0x7000001f, # Delta C++ declarations (obsolete) +SHT_MIPS_SYMBOL_LIB = 0x70000020, # Symbol to library mapping +SHT_MIPS_EVENTS = 0x70000021, # Section event mapping +SHT_MIPS_TRANSLATE = 0x70000022, # Old pixie translation table (obsolete) +SHT_MIPS_PIXIE = 0x70000023, # Pixie specific sections (SGI) +SHT_MIPS_XLATE = 0x70000024, # Address translation table +SHT_MIPS_XLATE_DEBUG = 0x70000025, # SGI internal address translation table +SHT_MIPS_WHIRL = 0x70000026, # Intermediate code (MipsPro compiler) +SHT_MIPS_EH_REGION = 0x70000027, # C++ exception handling region information +SHT_MIPS_XLATE_OLD = 0x70000028, +SHT_MIPS_PDR_EXCEPTION = 0x70000029, # Runtime procedure descriptor table exception information (ucode) +SHT_MIPS_ABIFLAGS = 0x7000002a, +) + +SetConstants( # Legal values for sh_flags (section flags). - -SHF_WRITE = (1 << 0) # Writable -SHF_ALLOC = (1 << 1) # Occupies memory during execution -SHF_EXECINSTR = (1 << 2) # Executable -SHF_MERGE = (1 << 4) # Might be merged -SHF_STRINGS = (1 << 5) # Contains nul-terminated strings -SHF_INFO_LINK = (1 << 6) # `sh_info' contains SHT index -SHF_LINK_ORDER = (1 << 7) # Preserve order after combining -SHF_OS_NONCONFORMING = (1 << 8) # Non-standard OS specific handling required -SHF_GROUP = (1 << 9) # Section is member of a group. -SHF_TLS = (1 << 10) # Section hold thread-local data. -SHF_MASKOS = 0x0ff00000L # OS-specific. -SHF_MASKPROC = 0xf0000000L # Processor-specific - +SHF_WRITE = (1 << 0), # Writable +SHF_ALLOC = (1 << 1), # Occupies memory during execution +SHF_EXECINSTR = (1 << 2), # Executable +SHF_MERGE = (1 << 4), # Might be merged +SHF_STRINGS = (1 << 5), # Contains nul-terminated strings +SHF_INFO_LINK = (1 << 6), # `sh_info' contains SHT index +SHF_LINK_ORDER = (1 << 7), # Preserve order after combining +SHF_OS_NONCONFORMING = (1 << 8), # Non-standard OS specific handling required +SHF_GROUP = (1 << 9), # Section is member of a group. +SHF_TLS = (1 << 10), # Section hold thread-local data. +SHF_MASKOS = 0x0ff00000, # OS-specific. +SHF_MASKPROC = 0xf0000000, # Processor-specific +SHF_ORDERED = (1 << 30), # Special ordering requirement (Solaris) +SHF_EXCLUDE = (1 << 31), # Section is excluded unless references or allocated (Solaris) +) + +SetConstants( # Section group handling. +prefix = 'GRP_', +GRP_COMDAT = 0x1, # Mark group as COMDAT. +) -GRP_COMDAT = 0x1 # Mark group as COMDAT. - +SetConstants( # Legal values for p_type (segment type). - -PT_NULL = 0 # Program header table entry unused -PT_LOAD = 1 # Loadable program segment -PT_DYNAMIC = 2 # Dynamic linking information -PT_INTERP = 3 # Program interpreter -PT_NOTE = 4 # Auxiliary information -PT_SHLIB = 5 # Reserved -PT_PHDR = 6 # Entry for header table itself -PT_TLS = 7 # Thread-local storage segment -PT_NUM = 8 # Number of defined types -PT_LOOS = 0x60000000L # Start of OS-specific -PT_GNU_EH_FRAME = 0x6474e550L # GCC .eh_frame_hdr segment -PT_GNU_STACK = 0x6474e551L # Indicates stack executability -PT_LOSUNW = 0x6ffffffaL -PT_SUNWBSS = 0x6ffffffaL # Sun Specific segment -PT_SUNWSTACK = 0x6ffffffbL # Stack segment -PT_HISUNW = 0x6fffffffL -PT_HIOS = 0x6fffffffL # End of OS-specific -PT_LOPROC = 0x70000000L # Start of processor-specific -PT_HIPROC = 0x7fffffffL # End of processor-specific - +PT_NULL = 0, # Program header table entry unused +PT_LOAD = 1, # Loadable program segment +PT_DYNAMIC = 2, # Dynamic linking information +PT_INTERP = 3, # Program interpreter +PT_NOTE = 4, # Auxiliary information +PT_SHLIB = 5, # Reserved +PT_PHDR = 6, # Entry for header table itself +PT_TLS = 7, # Thread-local storage segment +PT_NUM = 8, # Number of defined types +PT_LOOS = 0x60000000, # Start of OS-specific +PT_GNU_EH_FRAME = 0x6474e550, # GCC .eh_frame_hdr segment +PT_GNU_STACK = 0x6474e551, # Indicates stack executability +PT_GNU_RELRO = 0x6474e552, +PT_LOSUNW = 0x6ffffffa, +PT_SUNWBSS = 0x6ffffffa, # Sun Specific segment +PT_SUNWSTACK = 0x6ffffffb, # Stack segment +PT_HISUNW = 0x6fffffff, +PT_HIOS = 0x6fffffff, # End of OS-specific +PT_LOPROC = 0x70000000, # Start of processor-specific +PT_HIPROC = 0x7fffffff, # End of processor-specific +no_name = ('PT_LOOS', 'PT_LOSUNW', 'PT_HISUNW', 'PT_HIOS', 'PT_LOPROC', 'PT_HIPROC') +) + +SetConstants( # Legal values for p_flags (segment flags). +PF_X = (1 << 0), # Segment is executable +PF_W = (1 << 1), # Segment is writable +PF_R = (1 << 2), # Segment is readable +PF_MASKOS = 0x0ff00000, # OS-specific +PF_MASKPROC = 0xf0000000, # Processor-specific +) -PF_X = (1 << 0) # Segment is executable -PF_W = (1 << 1) # Segment is writable -PF_R = (1 << 2) # Segment is readable -PF_MASKOS = 0x0ff00000L # OS-specific -PF_MASKPROC = 0xf0000000L # Processor-specific - -# Legal values for note segment descriptor types for core files. - -NT_PRSTATUS = 1 # Contains copy of prstatus struct -NT_FPREGSET = 2 # Contains copy of fpregset struct -NT_PRPSINFO = 3 # Contains copy of prpsinfo struct -NT_PRXREG = 4 # Contains copy of prxregset struct -NT_TASKSTRUCT = 4 # Contains copy of task structure -NT_PLATFORM = 5 # String from sysinfo(SI_PLATFORM) -NT_AUXV = 6 # Contains copy of auxv array -NT_GWINDOWS = 7 # Contains copy of gwindows struct -NT_ASRS = 8 # Contains copy of asrset struct -NT_PSTATUS = 10 # Contains copy of pstatus struct -NT_PSINFO = 13 # Contains copy of psinfo struct -NT_PRCRED = 14 # Contains copy of prcred struct -NT_UTSNAME = 15 # Contains copy of utsname struct -NT_LWPSTATUS = 16 # Contains copy of lwpstatus struct -NT_LWPSINFO = 17 # Contains copy of lwpinfo struct -NT_PRFPXREG = 20 # Contains copy of fprxregset struct +SetConstants( +name = 'NT_OBJECT', +prefix = 'NT_', # Legal values for the note segment descriptor types for object files. +NT_VERSION = 1, # Contains a version string. +) -NT_VERSION = 1 # Contains a version string. - +SetConstants( +name = 'NT_CORE', +prefix = 'NT_', +# Legal values for note segment descriptor types for core files. +NT_PRSTATUS = 1, # Contains copy of prstatus struct +NT_FPREGSET = 2, # Contains copy of fpregset struct +NT_PRPSINFO = 3, # Contains copy of prpsinfo struct +NT_PRXREG = 4, # Contains copy of prxregset struct +NT_TASKSTRUCT = 4, # Contains copy of task structure +NT_PLATFORM = 5, # String from sysinfo(SI_PLATFORM) +NT_AUXV = 6, # Contains copy of auxv array +NT_GWINDOWS = 7, # Contains copy of gwindows struct +NT_ASRS = 8, # Contains copy of asrset struct +NT_PSTATUS = 10, # Contains copy of pstatus struct +NT_PSINFO = 13, # Contains copy of psinfo struct +NT_PRCRED = 14, # Contains copy of prcred struct +NT_UTSNAME = 15, # Contains copy of utsname struct +NT_LWPSTATUS = 16, # Contains copy of lwpstatus struct +NT_LWPSINFO = 17, # Contains copy of lwpinfo struct +NT_PRFPXREG = 20, # Contains copy of fprxregset struct +no_name=('NT_PRXREG',), +# Cf. https://github.com/torvalds/linux/blob/master/include/uapi/linux/elf.h +NT_PPC_VMX = 0x100, # PowerPC Altivec/VMX registers +NT_PPC_SPE = 0x101, # PowerPC SPE/EVR registers +NT_PPC_VSX = 0x102, # PowerPC VSX registers +NT_PPC_TAR = 0x103, # Target Address Register +NT_PPC_PPR = 0x104, # Program Priority Register +NT_PPC_DSCR = 0x105, # Data Stream Control Register +NT_PPC_EBB = 0x106, # Event Based Branch Registers +NT_PPC_PMU = 0x107, # Performance Monitor Registers +NT_PPC_TM_CGPR = 0x108, # TM checkpointed GPR Registers +NT_PPC_TM_CFPR = 0x109, # TM checkpointed FPR Registers +NT_PPC_TM_CVMX = 0x10a, # TM checkpointed VMX Registers +NT_PPC_TM_CVSX = 0x10b, # TM checkpointed VSX Registers +NT_PPC_TM_SPR = 0x10c, # TM Special Purpose Registers +NT_PPC_TM_CTAR = 0x10d, # TM checkpointed Target Address Register +NT_PPC_TM_CPPR = 0x10e, # TM checkpointed Program Priority Register +NT_PPC_TM_CDSCR = 0x10f, # TM checkpointed Data Stream Control Register +NT_386_TLS = 0x200, # i386 TLS slots (struct user_desc) +NT_386_IOPERM = 0x201, # x86 io permission bitmap (1=deny) +NT_X86_XSTATE = 0x202, # x86 extended state using xsave +NT_S390_HIGH_GPRS = 0x300, # s390 upper register halves +NT_S390_TIMER = 0x301, # s390 timer register +NT_S390_TODCMP = 0x302, # s390 time-of-day (TOD) clock comparator register +NT_S390_TODPREG = 0x303, # s390 time-of-day (TOD) programmable register +NT_S390_CTRS = 0x304, # s390 control registers +NT_S390_PREFIX = 0x305, # s390 prefix register +NT_S390_LAST_BREAK = 0x306, # s390 breaking event address +NT_S390_SYSTEM_CALL = 0x307, # s390 system call restart data +NT_S390_TDB = 0x308, # s390 transaction diagnostic block +NT_S390_VXRS_LOW = 0x309, # s390 vector registers 0-15 upper half +NT_S390_VXRS_HIGH = 0x30a, # s390 vector registers 16-31 +NT_ARM_VFP = 0x400, # ARM VFP/NEON registers +NT_ARM_TLS = 0x401, # ARM TLS register +NT_ARM_HW_BREAK = 0x402, # ARM hardware breakpoint registers +NT_ARM_HW_WATCH = 0x403, # ARM hardware watchpoint registers +NT_ARM_SYSTEM_CALL = 0x404, # ARM system call number +NT_METAG_CBUF = 0x500, # Metag catch buffer registers +NT_METAG_RPIPE = 0x501, # Metag read pipeline state +NT_METAG_TLS = 0x502, # Metag TLS pointer +) + +SetConstants( +# SHT_NOTE section types +# Cf. https://sourceware.org/ml/gdb-patches/2016-01/msg00277.html +NT_FREEBSD_THRMISC = 7, # Thread miscellaneous info. +NT_FREEBSD_PROCSTAT_PROC = 8, # Procstat proc data. +NT_FREEBSD_PROCSTAT_FILES = 9, # Procstat files data. +NT_FREEBSD_PROCSTAT_VMMAP = 10, # Procstat vmmap data. +NT_FREEBSD_PROCSTAT_GROUPS = 11, # Procstat groups data. +NT_FREEBSD_PROCSTAT_UMASK = 12, # Procstat umask data. +NT_FREEBSD_PROCSTAT_RLIMIT = 13, # Procstat rlimit data. +NT_FREEBSD_PROCSTAT_OSREL = 14, # Procstat osreldate data. +NT_FREEBSD_PROCSTAT_PSSTRINGS = 15, # Procstat ps_strings data. +NT_FREEBSD_PROCSTAT_AUXV = 16, # Procstat auxv data. +) + +SetConstants( +# Cf. http://man7.org/linux/man-pages/man5/elf.5.html +NT_GNU_ABI_TAG = 1, # ABI version tag +NT_GNU_HWCAP = 2, # Synthetic hwcap information. +NT_GNU_BUILD_ID = 3, # Unique build ID as generated by the GNU ld --build-id option. +NT_GNU_GOLD_VERSION = 4, # GNU Gold linker version used. +) + +SetConstants( # Legal values for ST_BIND subfield of st_info (symbol binding). # bind = Sym.info >> 4 # val = Sym.info 0xf - -STB_LOCAL = 0 # Local symbol -STB_GLOBAL = 1 # Global symbol -STB_WEAK = 2 # Weak symbol -STB_NUM = 3 # Number of defined types. -STB_LOOS = 10 # Start of OS-specific -STB_HIOS = 12 # End of OS-specific -STB_LOPROC = 13 # Start of processor-specific -STB_HIPROC = 15 # End of processor-specific - -#Legal values for ST_TYPE subfield of st_info (symbol type). - -STT_NOTYPE = 0 # Symbol type is unspecified -STT_OBJECT = 1 # Symbol is a data object -STT_FUNC = 2 # Symbol is a code object -STT_SECTION = 3 # Symbol associated with a section -STT_FILE = 4 # Symbol's name is file name -STT_COMMON = 5 # Symbol is a common data object -STT_TLS = 6 # Symbol is thread-local data object*/ -STT_NUM = 7 # Number of defined types. -STT_LOOS = 10 # Start of OS-specific -STT_HIOS = 12 # End of OS-specific -STT_LOPROC = 13 # Start of processor-specific -STT_HIPROC = 15 # End of processor-specific - +STB_LOCAL = 0, # Local symbol +STB_GLOBAL = 1, # Global symbol +STB_WEAK = 2, # Weak symbol +STB_NUM = 3, # Number of defined types. +STB_GNU_UNIQUE = 10, # +STB_LOOS = 10, # Start of OS-specific +STB_HIOS = 12, # End of OS-specific +STB_LOPROC = 13, # Start of processor-specific +STB_HIPROC = 15, # End of processor-specific +no_name = ('STB_LOOS', 'STB_HIOS', 'STB_LOPROC', 'STB_HIPROC'), +) + +SetConstants( +# Legal values for ST_TYPE subfield of st_info (symbol type). +STT_NOTYPE = 0, # Symbol type is unspecified +STT_OBJECT = 1, # Symbol is a data object +STT_FUNC = 2, # Symbol is a code object +STT_SECTION = 3, # Symbol associated with a section +STT_FILE = 4, # Symbol's name is file name +STT_COMMON = 5, # Symbol is a common data object +STT_TLS = 6, # Symbol is thread-local data object*/ +STT_NUM = 7, # Number of defined types. +STT_GNU_IFUNC = 10, # GNU indirect function +STT_LOOS = 10, # Start of OS-specific +STT_HIOS = 12, # End of OS-specific +STT_LOPROC = 13, # Start of processor-specific +STT_HIPROC = 15, # End of processor-specific +no_name = ('STT_LOOS', 'STT_HIOS', 'STT_LOPROC', 'STT_HIPROC'), +) + +SetConstants( +# AMDGPU symbol types +STT_AMDGPU_HSA_KERNEL = 10, +STT_AMDGPU_HSA_INDIRECT_FUNCTION = 11, +STT_AMDGPU_HSA_METADATA = 12, +) + +SetConstants( +# Symbol visibility specification encoded in the st_other field. +STV_DEFAULT = 0, # Default symbol visibility rules +STV_INTERNAL = 1, # Processor specific hidden class +STV_HIDDEN = 2, # Sym unavailable to other modules +STV_PROTECTED = 3, # Not preemptible, not exported +) + +SetConstants( # Legal values for d_tag (dynamic entry type). - -DT_NULL = 0 # Marks end of dynamic section -DT_NEEDED = 1 # Name of needed library -DT_PLTRELSZ = 2 # Size in bytes of PLT relocs -DT_PLTGOT = 3 # Processor defined value -DT_HASH = 4 # Address of symbol hash table -DT_STRTAB = 5 # Address of string table -DT_SYMTAB = 6 # Address of symbol table -DT_RELA = 7 # Address of Rela relocs -DT_RELASZ = 8 # Total size of Rela relocs -DT_RELAENT = 9 # Size of one Rela reloc -DT_STRSZ = 10 # Size of string table -DT_SYMENT = 11 # Size of one symbol table entry -DT_INIT = 12 # Address of init function -DT_FINI = 13 # Address of termination function -DT_SONAME = 14 # Name of shared object -DT_RPATH = 15 # Library search path (deprecated) -DT_SYMBOLIC = 16 # Start symbol search here -DT_REL = 17 # Address of Rel relocs -DT_RELSZ = 18 # Total size of Rel relocs -DT_RELENT = 19 # Size of one Rel reloc -DT_PLTREL = 20 # Type of reloc in PLT -DT_DEBUG = 21 # For debugging; unspecified -DT_TEXTREL = 22 # Reloc might modify .text -DT_JMPREL = 23 # Address of PLT relocs -DT_BIND_NOW = 24 # Process relocations of object -DT_INIT_ARRAY = 25 # Array with addresses of init fct -DT_FINI_ARRAY = 26 # Array with addresses of fini fct -DT_INIT_ARRAYSZ = 27 # Size in bytes of DT_INIT_ARRAY -DT_FINI_ARRAYSZ = 28 # Size in bytes of DT_FINI_ARRAY -DT_RUNPATH = 29 # Library search path -DT_FLAGS = 30 # Flags for the object being loaded -DT_ENCODING = 32 # Start of encoded range -DT_PREINIT_ARRAY = 32 # Array with addresses of preinit fct -DT_PREINIT_ARRAYSZ = 33 # size in bytes of DT_PREINIT_ARRAY -DT_NUM = 34 # Number used -DT_LOOS = 0x6000000d # Start of OS-specific -DT_HIOS = 0x6ffff000 # End of OS-specific -DT_LOPROC = 0x70000000 # Start of processor-specific -DT_HIPROC = 0x7fffffff # End of processor-specific -#DT_PROCNUM = DT_MIPS_NUM # Most used by any processor +DT_NULL = 0, # Marks end of dynamic section +DT_NEEDED = 1, # Name of needed library +DT_PLTRELSZ = 2, # Size in bytes of PLT relocs +DT_PLTGOT = 3, # Processor defined value +DT_HASH = 4, # Address of symbol hash table +DT_STRTAB = 5, # Address of string table +DT_SYMTAB = 6, # Address of symbol table +DT_RELA = 7, # Address of Rela relocs +DT_RELASZ = 8, # Total size of Rela relocs +DT_RELAENT = 9, # Size of one Rela reloc +DT_STRSZ = 10, # Size of string table +DT_SYMENT = 11, # Size of one symbol table entry +DT_INIT = 12, # Address of init function +DT_FINI = 13, # Address of termination function +DT_SONAME = 14, # Name of shared object +DT_RPATH = 15, # Library search path (deprecated) +DT_SYMBOLIC = 16, # Start symbol search here +DT_REL = 17, # Address of Rel relocs +DT_RELSZ = 18, # Total size of Rel relocs +DT_RELENT = 19, # Size of one Rel reloc +DT_PLTREL = 20, # Type of reloc in PLT +DT_DEBUG = 21, # For debugging; unspecified +DT_TEXTREL = 22, # Reloc might modify .text +DT_JMPREL = 23, # Address of PLT relocs +DT_BIND_NOW = 24, # Process relocations of object +DT_INIT_ARRAY = 25, # Array with addresses of init fct +DT_FINI_ARRAY = 26, # Array with addresses of fini fct +DT_INIT_ARRAYSZ = 27, # Size in bytes of DT_INIT_ARRAY +DT_FINI_ARRAYSZ = 28, # Size in bytes of DT_FINI_ARRAY +DT_RUNPATH = 29, # Library search path +DT_FLAGS = 30, # Flags for the object being loaded +DT_ENCODING = 32, # Start of encoded range +DT_PREINIT_ARRAY = 32, # Array with addresses of preinit fct +DT_PREINIT_ARRAYSZ = 33, # size in bytes of DT_PREINIT_ARRAY +DT_NUM = 34, # Number used +DT_LOOS = 0x6000000d, # Start of OS-specific +DT_HIOS = 0x6ffff000, # End of OS-specific # DT_* entries which fall between DT_VALRNGHI & DT_VALRNGLO use the # Dyn.d_un.d_val field of the Elf*_Dyn structure. This follows Sun's # approach. -DT_VALRNGLO = 0x6ffffd00 -DT_GNU_PRELINKED = 0x6ffffdf5 # Prelinking timestamp -DT_GNU_CONFLICTSZ = 0x6ffffdf6 # Size of conflict section -DT_GNU_LIBLISTSZ = 0x6ffffdf7 # Size of library list -DT_CHECKSUM = 0x6ffffdf8 -DT_PLTPADSZ = 0x6ffffdf9 -DT_MOVEENT = 0x6ffffdfa -DT_MOVESZ = 0x6ffffdfb -DT_FEATURE_1 = 0x6ffffdfc # Feature selection (DTF_*). -DT_POSFLAG_1 = 0x6ffffdfd # Flags for DT_* entries, effecting the following DT_* entry. -DT_SYMINSZ = 0x6ffffdfe # Size of syminfo table (in bytes) -DT_SYMINENT = 0x6ffffdff # Entry size of syminfo -DT_VALRNGHI = 0x6ffffdff -DT_VALNUM = 12 +DT_VALRNGLO = 0x6ffffd00, +DT_GNU_PRELINKED = 0x6ffffdf5, # Prelinking timestamp +DT_GNU_CONFLICTSZ = 0x6ffffdf6, # Size of conflict section +DT_GNU_LIBLISTSZ = 0x6ffffdf7, # Size of library list +DT_CHECKSUM = 0x6ffffdf8, +DT_PLTPADSZ = 0x6ffffdf9, +DT_MOVEENT = 0x6ffffdfa, +DT_MOVESZ = 0x6ffffdfb, +DT_FEATURE_1 = 0x6ffffdfc, # Feature selection (DTF_*). +DT_POSFLAG_1 = 0x6ffffdfd, # Flags for DT_* entries, effecting the following DT_* entry. +DT_SYMINSZ = 0x6ffffdfe, # Size of syminfo table (in bytes) +DT_SYMINENT = 0x6ffffdff, # Entry size of syminfo +DT_VALRNGHI = 0x6ffffdff, +DT_VALNUM = 12, # DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the # Dyn.d_un.d_ptr field of the Elf*_Dyn structure. # # If any adjustment is made to the ELF object after it has been # built these entries will need to be adjusted. -DT_ADDRRNGLO = 0x6ffffe00 -DT_GNU_CONFLICT = 0x6ffffef8 # Start of conflict section -DT_GNU_LIBLIST = 0x6ffffef9 # Library list -DT_CONFIG = 0x6ffffefa # Configuration information. -DT_DEPAUDIT = 0x6ffffefb # Dependency auditing. -DT_AUDIT = 0x6ffffefc # Object auditing. -DT_PLTPAD = 0x6ffffefd # PLT padding. -DT_MOVETAB = 0x6ffffefe # Move table. -DT_SYMINFO = 0x6ffffeff # Syminfo table. -DT_ADDRRNGHI = 0x6ffffeff -DT_ADDRNUM = 10 +DT_ADDRRNGLO = 0x6ffffe00, +DT_GNU_HASH = 0x6ffffef5, +DT_TLSDESC_PLT = 0x6ffffef6, +DT_TLSDESC_GOT = 0x6ffffef7, +DT_GNU_CONFLICT = 0x6ffffef8, # Start of conflict section +DT_GNU_LIBLIST = 0x6ffffef9, # Library list +DT_CONFIG = 0x6ffffefa, # Configuration information. +DT_DEPAUDIT = 0x6ffffefb, # Dependency auditing. +DT_AUDIT = 0x6ffffefc, # Object auditing. +DT_PLTPAD = 0x6ffffefd, # PLT padding. +DT_MOVETAB = 0x6ffffefe, # Move table. +DT_SYMINFO = 0x6ffffeff, # Syminfo table. +DT_ADDRRNGHI = 0x6ffffeff, +DT_ADDRNUM = 10, # The versioning entry types. The next are defined as part of the # GNU extension. -DT_VERSYM = 0x6ffffff0 - -DT_RELACOUNT = 0x6ffffff9 -DT_RELCOUNT = 0x6ffffffa +DT_VERSYM = 0x6ffffff0, +DT_RELACOUNT = 0x6ffffff9, +DT_RELCOUNT = 0x6ffffffa, # These were chosen by Sun. -DT_FLAGS_1 = 0x6ffffffb # State flags, see DF_1_* below. -DT_VERDEF = 0x6ffffffc # Address of version definition table -DT_VERDEFNUM = 0x6ffffffd # Number of version definitions -DT_VERNEED = 0x6ffffffe # Address of table with needed versions -DT_VERNEEDNUM = 0x6fffffff # Number of needed versions -DT_VERSIONTAGNUM = 16 +DT_FLAGS_1 = 0x6ffffffb, # State flags, see DF_1_* below. +DT_VERDEF = 0x6ffffffc, # Address of version definition table +DT_VERDEFNUM = 0x6ffffffd, # Number of version definitions +DT_VERNEED = 0x6ffffffe, # Address of table with needed versions +DT_VERNEEDNUM = 0x6fffffff, # Number of needed versions +DT_VERSIONTAGNUM = 16, + +DT_LOPROC = 0x70000000, # Start of processor-specific # Sun added these machine-independent extensions in the "processor-specific" # range. Be compatible. -DT_AUXILIARY = 0x7ffffffd # Shared object to load before self -DT_FILTER = 0x7fffffff # Shared object to get values from -DT_EXTRANUM = 3 +DT_AUXILIARY = 0x7ffffffd, # Shared object to load before self +DT_USED = 0x7ffffffe, +DT_FILTER = 0x7fffffff, # Shared object to get values from +DT_EXTRANUM = 3, + +DT_HIPROC = 0x7fffffff, # End of processor-specific +#DT_PROCNUM = DT_MIPS_NUM # Most used by any processor +no_name = ('DT_ENCODING', 'DT_LOPROC', 'DT_HIPROC', + 'DT_VALRNGLO', 'DT_VALRNGHI', 'DT_VALNUM', + 'DT_ADDRRNGLO', 'DT_ADDRRNGHI', 'DT_ADDRNUM', + 'DT_VERSIONTAGNUM', 'DT_EXTRANUM') +) -# Values of `d_un.d_val' in the DT_FLAGS entry. -DF_ORIGIN = 0x00000001 # Object may use DF_ORIGIN -DF_SYMBOLIC = 0x00000002 # Symbol resolutions starts here -DF_TEXTREL = 0x00000004 # Object contains text relocations -DF_BIND_NOW = 0x00000008 # No lazy binding for this object -DF_STATIC_TLS = 0x00000010 # Module uses the static TLS model +SetConstants( +# Values of `d_un.d_val' in the DT_FLAGS entry. +DF_ORIGIN = 0x00000001, # Object may use DF_ORIGIN +DF_SYMBOLIC = 0x00000002, # Symbol resolutions starts here +DF_TEXTREL = 0x00000004, # Object contains text relocations +DF_BIND_NOW = 0x00000008, # No lazy binding for this object +DF_STATIC_TLS = 0x00000010, # Module uses the static TLS model +) + +SetConstants( # State flags selectable in the `d_un.d_val' element of the DT_FLAGS_1 # entry in the dynamic section. -DF_1_NOW = 0x00000001 # Set RTLD_NOW for this object. -DF_1_GLOBAL = 0x00000002 # Set RTLD_GLOBAL for this object. -DF_1_GROUP = 0x00000004 # Set RTLD_GROUP for this object. -DF_1_NODELETE = 0x00000008 # Set RTLD_NODELETE for this object. -DF_1_LOADFLTR = 0x00000010 # Trigger filtee loading at runtime. -DF_1_INITFIRST = 0x00000020 # Set RTLD_INITFIRST for this object -DF_1_NOOPEN = 0x00000040 # Set RTLD_NOOPEN for this object. -DF_1_ORIGIN = 0x00000080 # $ORIGIN must be handled. -DF_1_DIRECT = 0x00000100 # Direct binding enabled. -DF_1_TRANS = 0x00000200 -DF_1_INTERPOSE = 0x00000400 # Object is used to interpose. -DF_1_NODEFLIB = 0x00000800 # Ignore default lib search path. -DF_1_NODUMP = 0x00001000 # Object can't be dldump'ed. -DF_1_CONFALT = 0x00002000 # Configuration alternative created. -DF_1_ENDFILTEE = 0x00004000 # Filtee terminates filters search. -DF_1_DISPRELDNE = 0x00008000 # Disp reloc applied at build time. -DF_1_DISPRELPND = 0x00010000 # Disp reloc applied at run-time. - +DF_1_NOW = 0x00000001, # Set RTLD_NOW for this object. +DF_1_GLOBAL = 0x00000002, # Set RTLD_GLOBAL for this object. +DF_1_GROUP = 0x00000004, # Set RTLD_GROUP for this object. +DF_1_NODELETE = 0x00000008, # Set RTLD_NODELETE for this object. +DF_1_LOADFLTR = 0x00000010, # Trigger filtee loading at runtime. +DF_1_INITFIRST = 0x00000020, # Set RTLD_INITFIRST for this object +DF_1_NOOPEN = 0x00000040, # Set RTLD_NOOPEN for this object. +DF_1_ORIGIN = 0x00000080, # $ORIGIN must be handled. +DF_1_DIRECT = 0x00000100, # Direct binding enabled. +DF_1_TRANS = 0x00000200, +DF_1_INTERPOSE = 0x00000400, # Object is used to interpose. +DF_1_NODEFLIB = 0x00000800, # Ignore default lib search path. +DF_1_NODUMP = 0x00001000, # Object can't be dldump'ed. +DF_1_CONFALT = 0x00002000, # Configuration alternative created. +DF_1_ENDFILTEE = 0x00004000, # Filtee terminates filters search. +DF_1_DISPRELDNE = 0x00008000, # Disp reloc applied at build time. +DF_1_DISPRELPND = 0x00010000, # Disp reloc applied at run-time. +DF_1_NODIRECT = 0x00020000, +DF_1_IGNMULDEF = 0x00040000, +DF_1_NOKSYMS = 0x00080000, +DF_1_NOHDR = 0x00100000, +DF_1_EDITED = 0x00200000, +DF_1_NORELOC = 0x00400000, +DF_1_SYMINTPOSE = 0x00800000, +DF_1_GLOBAUDIT = 0x01000000, +DF_1_SINGLETON = 0x02000000, +DF_1_STUB = 0x04000000, +DF_1_PIE = 0x08000000, +) + +SetConstants( # Flags for the feature selection in DT_FEATURE_1. -DTF_1_PARINIT = 0x00000001 -DTF_1_CONFEXP = 0x00000002 +DTF_1_PARINIT = 0x00000001, +DTF_1_CONFEXP = 0x00000002, +) +SetConstants( # Flags in the DT_POSFLAG_1 entry effecting only the next DT_* entry. -DF_P1_LAZYLOAD = 0x00000001 # Lazyload following object. -DF_P1_GROUPPERM = 0x00000002 # Symbols from next object are not generally available. +DF_P1_LAZYLOAD = 0x00000001, # Lazyload following object. +DF_P1_GROUPPERM = 0x00000002, # Symbols from next object are not generally available. +) + +SetConstants( +# MIPS specific dynamic array tags. +DT_MIPS_RLD_VERSION = 0x70000001, # 32 bit version number for runtime linker interface. +DT_MIPS_TIME_STAMP = 0x70000002, # Time stamp. +DT_MIPS_ICHECKSUM = 0x70000003, # Checksum of external strings and common sizes. +DT_MIPS_IVERSION = 0x70000004, # Index of version string in string table. +DT_MIPS_FLAGS = 0x70000005, # 32 bits of flags. +DT_MIPS_BASE_ADDRESS = 0x70000006, # Base address of the segment. +DT_MIPS_MSYM = 0x70000007, +DT_MIPS_CONFLICT = 0x70000008, # Address of .conflict section. +DT_MIPS_LIBLIST = 0x70000009, # Address of .liblist section. +DT_MIPS_LOCAL_GOTNO = 0x7000000a, # Number of local global offset table entries. +DT_MIPS_CONFLICTNO = 0x7000000b, # Number of entries in the .conflict section. +DT_MIPS_LIBLISTNO = 0x70000010, # Number of entries in the .liblist section. +DT_MIPS_SYMTABNO = 0x70000011, # Number of entries in the .dynsym section. +DT_MIPS_UNREFEXTNO = 0x70000012, # Index of first external dynamic symbol not referenced locally. +DT_MIPS_GOTSYM = 0x70000013, # Index of first dynamic symbol in global offset table. +DT_MIPS_HIPAGENO = 0x70000014, # Number of page table entries in global offset table. +DT_MIPS_RLD_MAP = 0x70000016, # Address of run time loader map, used for debugging. +DT_MIPS_DELTA_CLASS = 0x70000017, # Delta C++ class definition. +DT_MIPS_DELTA_CLASS_NO= 0x70000018, # Number of entries in DT_MIPS_DELTA_CLASS. +DT_MIPS_DELTA_INSTANCE= 0x70000019, # Delta C++ class instances. +DT_MIPS_DELTA_INSTANCE_NO = 0x7000001a, # Number of entries in DT_MIPS_DELTA_INSTANCE. +DT_MIPS_DELTA_RELOC = 0x7000001b, # Delta relocations. +DT_MIPS_DELTA_RELOC_NO= 0x7000001c, # Number of entries in DT_MIPS_DELTA_RELOC. +DT_MIPS_DELTA_SYM = 0x7000001d, # Delta symbols that Delta relocations refer to. +DT_MIPS_DELTA_SYM_NO = 0x7000001e, # Number of entries in DT_MIPS_DELTA_SYM. +DT_MIPS_DELTA_CLASSSYM= 0x70000020, # Delta symbols that hold class declarations. +DT_MIPS_DELTA_CLASSSYM_NO = 0x70000021, # Number of entries in DT_MIPS_DELTA_CLASSSYM. +DT_MIPS_CXX_FLAGS = 0x70000022, # Flags indicating information about C++ flavor. +DT_MIPS_PIXIE_INIT = 0x70000023, # Pixie information (???). +DT_MIPS_SYMBOL_LIB = 0x70000024, # Address of .MIPS.symlib +DT_MIPS_LOCALPAGE_GOTIDX = 0x70000025, # The GOT index of the first PTE for a segment +DT_MIPS_LOCAL_GOTIDX = 0x70000026, # The GOT index of the first PTE for a local symbol +DT_MIPS_HIDDEN_GOTIDX = 0x70000027, # The GOT index of the first PTE for a hidden symbol +DT_MIPS_PROTECTED_GOTIDX = 0x70000028, # The GOT index of the first PTE for a protected symbol +DT_MIPS_OPTIONS = 0x70000029, # Address of `.MIPS.options'. +DT_MIPS_INTERFACE = 0x7000002a, # Address of `.interface'. +DT_MIPS_DYNSTR_ALIGN = 0x7000002b, # ??? +DT_MIPS_INTERFACE_SIZE= 0x7000002c, # Size of the .interface section. +DT_MIPS_RLD_TEXT_RESOLVE_ADDR = 0x7000002d, # Size of rld_text_resolve function stored in the GOT. +DT_MIPS_PERF_SUFFIX = 0x7000002e, # Default suffix of DSO to be added by rld on dlopen() calls. +DT_MIPS_COMPACT_SIZE = 0x7000002f, # Size of compact relocation section (O32). +DT_MIPS_GP_VALUE = 0x70000030, # GP value for auxiliary GOTs. +DT_MIPS_AUX_DYNAMIC = 0x70000031, # Address of auxiliary .dynamic. +DT_MIPS_PLTGOT = 0x70000032, # Address of the base of the PLTGOT. +DT_MIPS_RWPLT = 0x70000034, # Points to the base of a writable PLT. +) + +SetConstants( +# PowerPC64 specific values for the Dyn d_tag field. +DT_PPC64_GLINK = (DT_LOPROC + 0), +DT_PPC64_OPD = (DT_LOPROC + 1), +DT_PPC64_OPDSZ = (DT_LOPROC + 2), +DT_PPC64_NUM = 3, +no_name = ('DT_PPC64_NUM',) +) # Relocs +SetConstants( # Motorola 68k relocations - -R_68K_NONE = 0 # No reloc -R_68K_32 = 1 # Direct 32 bit -R_68K_16 = 2 # Direct 16 bit -R_68K_8 = 3 # Direct 8 bit -R_68K_PC32 = 4 # PC relative 32 bit -R_68K_PC16 = 5 # PC relative 16 bit -R_68K_PC8 = 6 # PC relative 8 bit -R_68K_GOT32 = 7 # 32 bit PC relative GOT entry -R_68K_GOT16 = 8 # 16 bit PC relative GOT entry -R_68K_GOT8 = 9 # 8 bit PC relative GOT entry -R_68K_GOT32O = 10 # 32 bit GOT offset -R_68K_GOT16O = 11 # 16 bit GOT offset -R_68K_GOT8O = 12 # 8 bit GOT offset -R_68K_PLT32 = 13 # 32 bit PC relative PLT address -R_68K_PLT16 = 14 # 16 bit PC relative PLT address -R_68K_PLT8 = 15 # 8 bit PC relative PLT address -R_68K_PLT32O = 16 # 32 bit PLT offset -R_68K_PLT16O = 17 # 16 bit PLT offset -R_68K_PLT8O = 18 # 8 bit PLT offset -R_68K_COPY = 19 # Copy symbol at runtime -R_68K_GLOB_DAT = 20 # Create GOT entry -R_68K_JMP_SLOT = 21 # Create PLT entry -R_68K_RELATIVE = 22 # Adjust by program base -R_68K_TLS_GD32 = 25 # 32 bit GOT offset for GD -R_68K_TLS_GD16 = 26 # 16 bit GOT offset for GD -R_68K_TLS_GD8 = 27 # 8 bit GOT offset for GD -R_68K_TLS_LDM32 = 28 # 32 bit GOT offset for LDM -R_68K_TLS_LDM16 = 29 # 16 bit GOT offset for LDM -R_68K_TLS_LDM8 = 30 # 8 bit GOT offset for LDM -R_68K_TLS_LDO32 = 31 # 32 bit module-relative offset -R_68K_TLS_LDO16 = 32 # 16 bit module-relative offset -R_68K_TLS_LDO8 = 33 # 8 bit module-relative offset -R_68K_TLS_IE32 = 34 # 32 bit GOT offset for IE -R_68K_TLS_IE16 = 35 # 16 bit GOT offset for IE -R_68K_TLS_IE8 = 36 # 8 bit GOT offset for IE -R_68K_TLS_LE32 = 37 # 32 bit offset relative to static TLS block -R_68K_TLS_LE16 = 38 # 16 bit offset relative to static TLS block -R_68K_TLS_LE8 = 39 # 8 bit offset relative to static TLS block -R_68K_TLS_DTPMOD32 = 40 # 32 bit module number -R_68K_TLS_DTPREL32 = 41 # 32 bit module-relative offset -R_68K_TLS_TPREL32 = 42 # 32 bit TP-relative offset +R_68K_NONE = 0, # No reloc +R_68K_32 = 1, # Direct 32 bit +R_68K_16 = 2, # Direct 16 bit +R_68K_8 = 3, # Direct 8 bit +R_68K_PC32 = 4, # PC relative 32 bit +R_68K_PC16 = 5, # PC relative 16 bit +R_68K_PC8 = 6, # PC relative 8 bit +R_68K_GOT32 = 7, # 32 bit PC relative GOT entry +R_68K_GOT16 = 8, # 16 bit PC relative GOT entry +R_68K_GOT8 = 9, # 8 bit PC relative GOT entry +R_68K_GOT32O = 10, # 32 bit GOT offset +R_68K_GOT16O = 11, # 16 bit GOT offset +R_68K_GOT8O = 12, # 8 bit GOT offset +R_68K_PLT32 = 13, # 32 bit PC relative PLT address +R_68K_PLT16 = 14, # 16 bit PC relative PLT address +R_68K_PLT8 = 15, # 8 bit PC relative PLT address +R_68K_PLT32O = 16, # 32 bit PLT offset +R_68K_PLT16O = 17, # 16 bit PLT offset +R_68K_PLT8O = 18, # 8 bit PLT offset +R_68K_COPY = 19, # Copy symbol at runtime +R_68K_GLOB_DAT = 20, # Create GOT entry +R_68K_JMP_SLOT = 21, # Create PLT entry +R_68K_RELATIVE = 22, # Adjust by program base +R_68K_TLS_GD32 = 25, # 32 bit GOT offset for GD +R_68K_TLS_GD16 = 26, # 16 bit GOT offset for GD +R_68K_TLS_GD8 = 27, # 8 bit GOT offset for GD +R_68K_TLS_LDM32 = 28, # 32 bit GOT offset for LDM +R_68K_TLS_LDM16 = 29, # 16 bit GOT offset for LDM +R_68K_TLS_LDM8 = 30, # 8 bit GOT offset for LDM +R_68K_TLS_LDO32 = 31, # 32 bit module-relative offset +R_68K_TLS_LDO16 = 32, # 16 bit module-relative offset +R_68K_TLS_LDO8 = 33, # 8 bit module-relative offset +R_68K_TLS_IE32 = 34, # 32 bit GOT offset for IE +R_68K_TLS_IE16 = 35, # 16 bit GOT offset for IE +R_68K_TLS_IE8 = 36, # 8 bit GOT offset for IE +R_68K_TLS_LE32 = 37, # 32 bit offset relative to static TLS block +R_68K_TLS_LE16 = 38, # 16 bit offset relative to static TLS block +R_68K_TLS_LE8 = 39, # 8 bit offset relative to static TLS block +R_68K_TLS_DTPMOD32 = 40, # 32 bit module number +R_68K_TLS_DTPREL32 = 41, # 32 bit module-relative offset +R_68K_TLS_TPREL32 = 42, # 32 bit TP-relative offset # Keep this the last entry. -R_68K_NUM = 43 +R_68K_NUM = 43, +) +SetConstants( # Intel 80386 relocations - -R_386_NONE = 0 # No reloc -R_386_32 = 1 # Direct 32 bit -R_386_PC32 = 2 # PC relative 32 bit -R_386_GOT32 = 3 # 32 bit GOT entry -R_386_PLT32 = 4 # 32 bit PLT address -R_386_COPY = 5 # Copy symbol at runtime -R_386_GLOB_DAT = 6 # Create GOT entry -R_386_JMP_SLOT = 7 # Create PLT entry -R_386_RELATIVE = 8 # Adjust by program base -R_386_GOTOFF = 9 # 32 bit offset to GOT -R_386_GOTPC = 10 # 32 bit PC relative offset to GOT -R_386_32PLT = 11 -R_386_TLS_TPOFF = 14 # Offset in static TLS block -R_386_TLS_IE = 15 # Address of GOT entry for static TLS block offset -R_386_TLS_GOTIE = 16 # GOT entry for static TLS block offset -R_386_TLS_LE = 17 # Offset relative to static TLS block -R_386_TLS_GD = 18 # Direct 32 bit for GNU version of general dynamic thread local data -R_386_TLS_LDM = 19 # Direct 32 bit for GNU version of local dynamic thread local data in LE code -R_386_16 = 20 -R_386_PC16 = 21 -R_386_8 = 22 -R_386_PC8 = 23 -R_386_TLS_GD_32 = 24 # Direct 32 bit for general dynamic thread local data -R_386_TLS_GD_PUSH = 25 # Tag for pushl in GD TLS code -R_386_TLS_GD_CALL = 26 # Relocation for call to __tls_get_addr() -R_386_TLS_GD_POP = 27 # Tag for popl in GD TLS code -R_386_TLS_LDM_32 = 28 # Direct 32 bit for local dynamic thread local data in LE code -R_386_TLS_LDM_PUSH = 29 # Tag for pushl in LDM TLS code -R_386_TLS_LDM_CALL = 30 # Relocation for call to __tls_get_addr() in LDM code -R_386_TLS_LDM_POP = 31 # Tag for popl in LDM TLS code -R_386_TLS_LDO_32 = 32 # Offset relative to TLS block -R_386_TLS_IE_32 = 33 # GOT entry for negated static TLS block offset -R_386_TLS_LE_32 = 34 # Negated offset relative to static TLS block -R_386_TLS_DTPMOD32 = 35 # ID of module containing symbol -R_386_TLS_DTPOFF32 = 36 # Offset in TLS block -R_386_TLS_TPOFF32 = 37 # Negated offset in static TLS block +R_386_NONE = 0, # No reloc +R_386_32 = 1, # Direct 32 bit +R_386_PC32 = 2, # PC relative 32 bit +R_386_GOT32 = 3, # 32 bit GOT entry +R_386_PLT32 = 4, # 32 bit PLT address +R_386_COPY = 5, # Copy symbol at runtime +R_386_GLOB_DAT = 6, # Create GOT entry +R_386_JMP_SLOT = 7, # Create PLT entry +R_386_RELATIVE = 8, # Adjust by program base +R_386_GOTOFF = 9, # 32 bit offset to GOT +R_386_GOTPC = 10, # 32 bit PC relative offset to GOT +R_386_32PLT = 11, +R_386_TLS_GD_PLT = 12, # This relocation is handled as if it were a R_386_PLT32 relocation referencing the ___tls_get_addr() function +R_386_TLS_LDM_PLT = 13, # ? +R_386_TLS_TPOFF = 14, # Offset in static TLS block +R_386_TLS_IE = 15, # Address of GOT entry for static TLS block offset +R_386_TLS_GOTIE = 16, # GOT entry for static TLS block offset +R_386_TLS_LE = 17, # Offset relative to static TLS block +R_386_TLS_GD = 18, # Direct 32 bit for GNU version of general dynamic thread local data +R_386_TLS_LDM = 19, # Direct 32 bit for GNU version of local dynamic thread local data in LE code +R_386_16 = 20, +R_386_PC16 = 21, +R_386_8 = 22, +R_386_PC8 = 23, +R_386_TLS_GD_32 = 24, # Direct 32 bit for general dynamic thread local data +R_386_TLS_GD_PUSH = 25, # Tag for pushl in GD TLS code +R_386_TLS_GD_CALL = 26, # Relocation for call to __tls_get_addr() +R_386_TLS_GD_POP = 27, # Tag for popl in GD TLS code +R_386_TLS_LDM_32 = 28, # Direct 32 bit for local dynamic thread local data in LE code +R_386_TLS_LDM_PUSH = 29, # Tag for pushl in LDM TLS code +R_386_TLS_LDM_CALL = 30, # Relocation for call to __tls_get_addr() in LDM code +R_386_TLS_LDM_POP = 31, # Tag for popl in LDM TLS code +R_386_TLS_LDO_32 = 32, # Offset relative to TLS block +R_386_TLS_IE_32 = 33, # GOT entry for negated static TLS block offset +R_386_TLS_LE_32 = 34, # Negated offset relative to static TLS block +R_386_TLS_DTPMOD32 = 35, # ID of module containing symbol +R_386_TLS_DTPOFF32 = 36, # Offset in TLS block +R_386_TLS_TPOFF32 = 37, # Negated offset in static TLS block # 38? -R_386_TLS_GOTDESC = 39 # GOT offset for TLS descriptor. -R_386_TLS_DESC_CALL = 40 # Marker of call through TLS descriptor for relaxation. -R_386_TLS_DESC = 41 # TLS descriptor containing pointer to code and to argument, returning the TLS offset for the symbol. -R_386_IRELATIVE = 42 # Adjust indirectly by program base +R_386_TLS_GOTDESC = 39, # GOT offset for TLS descriptor. +R_386_TLS_DESC_CALL = 40, # Marker of call through TLS descriptor for relaxation. +R_386_TLS_DESC = 41, # TLS descriptor containing pointer to code and to argument, returning the TLS offset for the symbol. +R_386_IRELATIVE = 42, # Adjust indirectly by program base +R_386_GOT32X = 43, # Load from 32 bit GOT entry, relaxable. # Keep this the last entry. -R_386_NUM = 43 +R_386_NUM = 44, +) +SetConstants( # SUN SPARC relocations - -R_SPARC_NONE = 0 # No reloc -R_SPARC_8 = 1 # Direct 8 bit -R_SPARC_16 = 2 # Direct 16 bit -R_SPARC_32 = 3 # Direct 32 bit -R_SPARC_DISP8 = 4 # PC relative 8 bit -R_SPARC_DISP16 = 5 # PC relative 16 bit -R_SPARC_DISP32 = 6 # PC relative 32 bit -R_SPARC_WDISP30 = 7 # PC relative 30 bit shifted -R_SPARC_WDISP22 = 8 # PC relative 22 bit shifted -R_SPARC_HI22 = 9 # High 22 bit -R_SPARC_22 = 10 # Direct 22 bit -R_SPARC_13 = 11 # Direct 13 bit -R_SPARC_LO10 = 12 # Truncated 10 bit -R_SPARC_GOT10 = 13 # Truncated 10 bit GOT entry -R_SPARC_GOT13 = 14 # 13 bit GOT entry -R_SPARC_GOT22 = 15 # 22 bit GOT entry shifted -R_SPARC_PC10 = 16 # PC relative 10 bit truncated -R_SPARC_PC22 = 17 # PC relative 22 bit shifted -R_SPARC_WPLT30 = 18 # 30 bit PC relative PLT address -R_SPARC_COPY = 19 # Copy symbol at runtime -R_SPARC_GLOB_DAT = 20 # Create GOT entry -R_SPARC_JMP_SLOT = 21 # Create PLT entry -R_SPARC_RELATIVE = 22 # Adjust by program base -R_SPARC_UA32 = 23 # Direct 32 bit unaligned +R_SPARC_NONE = 0, # No reloc +R_SPARC_8 = 1, # Direct 8 bit +R_SPARC_16 = 2, # Direct 16 bit +R_SPARC_32 = 3, # Direct 32 bit +R_SPARC_DISP8 = 4, # PC relative 8 bit +R_SPARC_DISP16 = 5, # PC relative 16 bit +R_SPARC_DISP32 = 6, # PC relative 32 bit +R_SPARC_WDISP30 = 7, # PC relative 30 bit shifted +R_SPARC_WDISP22 = 8, # PC relative 22 bit shifted +R_SPARC_HI22 = 9, # High 22 bit +R_SPARC_22 = 10, # Direct 22 bit +R_SPARC_13 = 11, # Direct 13 bit +R_SPARC_LO10 = 12, # Truncated 10 bit +R_SPARC_GOT10 = 13, # Truncated 10 bit GOT entry +R_SPARC_GOT13 = 14, # 13 bit GOT entry +R_SPARC_GOT22 = 15, # 22 bit GOT entry shifted +R_SPARC_PC10 = 16, # PC relative 10 bit truncated +R_SPARC_PC22 = 17, # PC relative 22 bit shifted +R_SPARC_WPLT30 = 18, # 30 bit PC relative PLT address +R_SPARC_COPY = 19, # Copy symbol at runtime +R_SPARC_GLOB_DAT = 20, # Create GOT entry +R_SPARC_JMP_SLOT = 21, # Create PLT entry +R_SPARC_RELATIVE = 22, # Adjust by program base +R_SPARC_UA32 = 23, # Direct 32 bit unaligned # Additional Sparc64 relocs. -R_SPARC_PLT32 = 24 # Direct 32 bit ref to PLT entry -R_SPARC_HIPLT22 = 25 # High 22 bit PLT entry -R_SPARC_LOPLT10 = 26 # Truncated 10 bit PLT entry -R_SPARC_PCPLT32 = 27 # PC rel 32 bit ref to PLT entry -R_SPARC_PCPLT22 = 28 # PC rel high 22 bit PLT entry -R_SPARC_PCPLT10 = 29 # PC rel trunc 10 bit PLT entry -R_SPARC_10 = 30 # Direct 10 bit -R_SPARC_11 = 31 # Direct 11 bit -R_SPARC_64 = 32 # Direct 64 bit -R_SPARC_OLO10 = 33 # 10bit with secondary 13bit addend -R_SPARC_HH22 = 34 # Top 22 bits of direct 64 bit -R_SPARC_HM10 = 35 # High middle 10 bits of ... -R_SPARC_LM22 = 36 # Low middle 22 bits of ... -R_SPARC_PC_HH22 = 37 # Top 22 bits of pc rel 64 bit -R_SPARC_PC_HM10 = 38 # High middle 10 bit of ... -R_SPARC_PC_LM22 = 39 # Low miggle 22 bits of ... -R_SPARC_WDISP16 = 40 # PC relative 16 bit shifted -R_SPARC_WDISP19 = 41 # PC relative 19 bit shifted -R_SPARC_GLOB_JMP = 42 # was part of v9 ABI but was removed -R_SPARC_7 = 43 # Direct 7 bit -R_SPARC_5 = 44 # Direct 5 bit -R_SPARC_6 = 45 # Direct 6 bit -R_SPARC_DISP64 = 46 # PC relative 64 bit -R_SPARC_PLT64 = 47 # Direct 64 bit ref to PLT entry -R_SPARC_HIX22 = 48 # High 22 bit complemented -R_SPARC_LOX10 = 49 # Truncated 11 bit complemented -R_SPARC_H44 = 50 # Direct high 12 of 44 bit -R_SPARC_M44 = 51 # Direct mid 22 of 44 bit -R_SPARC_L44 = 52 # Direct low 10 of 44 bit -R_SPARC_REGISTER = 53 # Global register usage -R_SPARC_UA64 = 54 # Direct 64 bit unaligned -R_SPARC_UA16 = 55 # Direct 16 bit unaligned -R_SPARC_TLS_GD_HI22 = 56 -R_SPARC_TLS_GD_LO10 = 57 -R_SPARC_TLS_GD_ADD = 58 -R_SPARC_TLS_GD_CALL = 59 -R_SPARC_TLS_LDM_HI22 = 60 -R_SPARC_TLS_LDM_LO10 = 61 -R_SPARC_TLS_LDM_ADD = 62 -R_SPARC_TLS_LDM_CALL = 63 -R_SPARC_TLS_LDO_HIX22 = 64 -R_SPARC_TLS_LDO_LOX10 = 65 -R_SPARC_TLS_LDO_ADD = 66 -R_SPARC_TLS_IE_HI22 = 67 -R_SPARC_TLS_IE_LO10 = 68 -R_SPARC_TLS_IE_LD = 69 -R_SPARC_TLS_IE_LDX = 70 -R_SPARC_TLS_IE_ADD = 71 -R_SPARC_TLS_LE_HIX22 = 72 -R_SPARC_TLS_LE_LOX10 = 73 -R_SPARC_TLS_DTPMOD32 = 74 -R_SPARC_TLS_DTPMOD64 = 75 -R_SPARC_TLS_DTPOFF32 = 76 -R_SPARC_TLS_DTPOFF64 = 77 -R_SPARC_TLS_TPOFF32 = 78 -R_SPARC_TLS_TPOFF64 = 79 -R_SPARC_GOTDATA_HIX22 = 80 -R_SPARC_GOTDATA_LOX10 = 81 -R_SPARC_GOTDATA_OP_HIX22 = 82 -R_SPARC_GOTDATA_OP_LOX10 = 83 -R_SPARC_GOTDATA_OP = 84 -R_SPARC_H34 = 85 -R_SPARC_SIZE32 = 86 -R_SPARC_SIZE64 = 87 -R_SPARC_JMP_IREL = 248 -R_SPARC_IRELATIVE = 249 -R_SPARC_GNU_VTINHERIT = 250 -R_SPARC_GNU_VTENTRY = 251 -R_SPARC_REV32 = 252 +R_SPARC_PLT32 = 24, # Direct 32 bit ref to PLT entry +R_SPARC_HIPLT22 = 25, # High 22 bit PLT entry +R_SPARC_LOPLT10 = 26, # Truncated 10 bit PLT entry +R_SPARC_PCPLT32 = 27, # PC rel 32 bit ref to PLT entry +R_SPARC_PCPLT22 = 28, # PC rel high 22 bit PLT entry +R_SPARC_PCPLT10 = 29, # PC rel trunc 10 bit PLT entry +R_SPARC_10 = 30, # Direct 10 bit +R_SPARC_11 = 31, # Direct 11 bit +R_SPARC_64 = 32, # Direct 64 bit +R_SPARC_OLO10 = 33, # 10bit with secondary 13bit addend +R_SPARC_HH22 = 34, # Top 22 bits of direct 64 bit +R_SPARC_HM10 = 35, # High middle 10 bits of ... +R_SPARC_LM22 = 36, # Low middle 22 bits of ... +R_SPARC_PC_HH22 = 37, # Top 22 bits of pc rel 64 bit +R_SPARC_PC_HM10 = 38, # High middle 10 bit of ... +R_SPARC_PC_LM22 = 39, # Low miggle 22 bits of ... +R_SPARC_WDISP16 = 40, # PC relative 16 bit shifted +R_SPARC_WDISP19 = 41, # PC relative 19 bit shifted +R_SPARC_GLOB_JMP = 42, # was part of v9 ABI but was removed +R_SPARC_7 = 43, # Direct 7 bit +R_SPARC_5 = 44, # Direct 5 bit +R_SPARC_6 = 45, # Direct 6 bit +R_SPARC_DISP64 = 46, # PC relative 64 bit +R_SPARC_PLT64 = 47, # Direct 64 bit ref to PLT entry +R_SPARC_HIX22 = 48, # High 22 bit complemented +R_SPARC_LOX10 = 49, # Truncated 11 bit complemented +R_SPARC_H44 = 50, # Direct high 12 of 44 bit +R_SPARC_M44 = 51, # Direct mid 22 of 44 bit +R_SPARC_L44 = 52, # Direct low 10 of 44 bit +R_SPARC_REGISTER = 53, # Global register usage +R_SPARC_UA64 = 54, # Direct 64 bit unaligned +R_SPARC_UA16 = 55, # Direct 16 bit unaligned +R_SPARC_TLS_GD_HI22 = 56, +R_SPARC_TLS_GD_LO10 = 57, +R_SPARC_TLS_GD_ADD = 58, +R_SPARC_TLS_GD_CALL = 59, +R_SPARC_TLS_LDM_HI22 = 60, +R_SPARC_TLS_LDM_LO10 = 61, +R_SPARC_TLS_LDM_ADD = 62, +R_SPARC_TLS_LDM_CALL = 63, +R_SPARC_TLS_LDO_HIX22 = 64, +R_SPARC_TLS_LDO_LOX10 = 65, +R_SPARC_TLS_LDO_ADD = 66, +R_SPARC_TLS_IE_HI22 = 67, +R_SPARC_TLS_IE_LO10 = 68, +R_SPARC_TLS_IE_LD = 69, +R_SPARC_TLS_IE_LDX = 70, +R_SPARC_TLS_IE_ADD = 71, +R_SPARC_TLS_LE_HIX22 = 72, +R_SPARC_TLS_LE_LOX10 = 73, +R_SPARC_TLS_DTPMOD32 = 74, +R_SPARC_TLS_DTPMOD64 = 75, +R_SPARC_TLS_DTPOFF32 = 76, +R_SPARC_TLS_DTPOFF64 = 77, +R_SPARC_TLS_TPOFF32 = 78, +R_SPARC_TLS_TPOFF64 = 79, +R_SPARC_GOTDATA_HIX22 = 80, +R_SPARC_GOTDATA_LOX10 = 81, +R_SPARC_GOTDATA_OP_HIX22 = 82, +R_SPARC_GOTDATA_OP_LOX10 = 83, +R_SPARC_GOTDATA_OP = 84, +R_SPARC_H34 = 85, +R_SPARC_SIZE32 = 86, +R_SPARC_SIZE64 = 87, +R_SPARC_JMP_IREL = 248, +R_SPARC_IRELATIVE = 249, +R_SPARC_GNU_VTINHERIT = 250, +R_SPARC_GNU_VTENTRY = 251, +R_SPARC_REV32 = 252, # Keep this the last entry. -R_SPARC_NUM = 253 +R_SPARC_NUM = 253, +) +SetConstants( # MIPS R3000 relocations - -R_MIPS_NONE = 0 # No reloc -R_MIPS_16 = 1 # Direct 16 bit -R_MIPS_32 = 2 # Direct 32 bit -R_MIPS_REL32 = 3 # PC relative 32 bit -R_MIPS_26 = 4 # Direct 26 bit shifted -R_MIPS_HI16 = 5 # High 16 bit -R_MIPS_LO16 = 6 # Low 16 bit -R_MIPS_GPREL16 = 7 # GP relative 16 bit -R_MIPS_LITERAL = 8 # 16 bit literal entry -R_MIPS_GOT16 = 9 # 16 bit GOT entry -R_MIPS_PC16 = 10 # PC relative 16 bit -R_MIPS_CALL16 = 11 # 16 bit GOT entry for function -R_MIPS_GPREL32 = 12 # GP relative 32 bit - -R_MIPS_SHIFT5 = 16 -R_MIPS_SHIFT6 = 17 -R_MIPS_64 = 18 -R_MIPS_GOT_DISP = 19 -R_MIPS_GOT_PAGE = 20 -R_MIPS_GOT_OFST = 21 -R_MIPS_GOT_HI16 = 22 -R_MIPS_GOT_LO16 = 23 -R_MIPS_SUB = 24 -R_MIPS_INSERT_A = 25 -R_MIPS_INSERT_B = 26 -R_MIPS_DELETE = 27 -R_MIPS_HIGHER = 28 -R_MIPS_HIGHEST = 29 -R_MIPS_CALL_HI16 = 30 -R_MIPS_CALL_LO16 = 31 -R_MIPS_SCN_DISP = 32 -R_MIPS_REL16 = 33 -R_MIPS_ADD_IMMEDIATE = 34 -R_MIPS_PJUMP = 35 -R_MIPS_RELGOT = 36 -R_MIPS_JALR = 37 -R_MIPS_TLS_DTPMOD32 = 38 # Module number 32 bit -R_MIPS_TLS_DTPREL32 = 39 # Module-relative offset 32 bit -R_MIPS_TLS_DTPMOD64 = 40 # Module number 64 bit -R_MIPS_TLS_DTPREL64 = 41 # Module-relative offset 64 bit -R_MIPS_TLS_GD = 42 # 16 bit GOT offset for GD -R_MIPS_TLS_LDM = 43 # 16 bit GOT offset for LDM -R_MIPS_TLS_DTPREL_HI16 = 44 # Module-relative offset, high 16 bits -R_MIPS_TLS_DTPREL_LO16 = 45 # Module-relative offset, low 16 bits -R_MIPS_TLS_GOTTPREL = 46 # 16 bit GOT offset for IE -R_MIPS_TLS_TPREL32 = 47 # TP-relative offset, 32 bit -R_MIPS_TLS_TPREL64 = 48 # TP-relative offset, 64 bit -R_MIPS_TLS_TPREL_HI16 = 49 # TP-relative offset, high 16 bits -R_MIPS_TLS_TPREL_LO16 = 50 # TP-relative offset, low 16 bits -R_MIPS_GLOB_DAT = 51 -R_MIPS_COPY = 126 -R_MIPS_JUMP_SLOT = 127 +R_MIPS_NONE = 0, # No reloc +R_MIPS_16 = 1, # Direct 16 bit +R_MIPS_32 = 2, # Direct 32 bit +R_MIPS_REL32 = 3, # PC relative 32 bit +R_MIPS_26 = 4, # Direct 26 bit shifted +R_MIPS_HI16 = 5, # High 16 bit +R_MIPS_LO16 = 6, # Low 16 bit +R_MIPS_GPREL16 = 7, # GP relative 16 bit +R_MIPS_LITERAL = 8, # 16 bit literal entry +R_MIPS_GOT16 = 9, # 16 bit GOT entry +R_MIPS_PC16 = 10, # PC relative 16 bit +R_MIPS_CALL16 = 11, # 16 bit GOT entry for function +R_MIPS_GPREL32 = 12, # GP relative 32 bit + +R_MIPS_SHIFT5 = 16, +R_MIPS_SHIFT6 = 17, +R_MIPS_64 = 18, +R_MIPS_GOT_DISP = 19, +R_MIPS_GOT_PAGE = 20, +R_MIPS_GOT_OFST = 21, +R_MIPS_GOT_HI16 = 22, +R_MIPS_GOT_LO16 = 23, +R_MIPS_SUB = 24, +R_MIPS_INSERT_A = 25, +R_MIPS_INSERT_B = 26, +R_MIPS_DELETE = 27, +R_MIPS_HIGHER = 28, +R_MIPS_HIGHEST = 29, +R_MIPS_CALL_HI16 = 30, +R_MIPS_CALL_LO16 = 31, +R_MIPS_SCN_DISP = 32, +R_MIPS_REL16 = 33, +R_MIPS_ADD_IMMEDIATE = 34, +R_MIPS_PJUMP = 35, +R_MIPS_RELGOT = 36, +R_MIPS_JALR = 37, +R_MIPS_TLS_DTPMOD32 = 38, # Module number 32 bit +R_MIPS_TLS_DTPREL32 = 39, # Module-relative offset 32 bit +R_MIPS_TLS_DTPMOD64 = 40, # Module number 64 bit +R_MIPS_TLS_DTPREL64 = 41, # Module-relative offset 64 bit +R_MIPS_TLS_GD = 42, # 16 bit GOT offset for GD +R_MIPS_TLS_LDM = 43, # 16 bit GOT offset for LDM +R_MIPS_TLS_DTPREL_HI16 = 44, # Module-relative offset, high 16 bits +R_MIPS_TLS_DTPREL_LO16 = 45, # Module-relative offset, low 16 bits +R_MIPS_TLS_GOTTPREL = 46, # 16 bit GOT offset for IE +R_MIPS_TLS_TPREL32 = 47, # TP-relative offset, 32 bit +R_MIPS_TLS_TPREL64 = 48, # TP-relative offset, 64 bit +R_MIPS_TLS_TPREL_HI16 = 49, # TP-relative offset, high 16 bits +R_MIPS_TLS_TPREL_LO16 = 50, # TP-relative offset, low 16 bits +R_MIPS_GLOB_DAT = 51, +R_MIPS_COPY = 126, +R_MIPS_JUMP_SLOT = 127, # Keep this the last entry. -R_MIPS_NUM = 128 +R_MIPS_NUM = 128, +) +SetConstants( # HPPA relocations - -R_PARISC_NONE = 0 # No reloc. -R_PARISC_DIR32 = 1 # Direct 32-bit reference. -R_PARISC_DIR21L = 2 # Left 21 bits of eff. address. -R_PARISC_DIR17R = 3 # Right 17 bits of eff. address. -R_PARISC_DIR17F = 4 # 17 bits of eff. address. -R_PARISC_DIR14R = 6 # Right 14 bits of eff. address. -R_PARISC_PCREL32 = 9 # 32-bit rel. address. -R_PARISC_PCREL21L = 10 # Left 21 bits of rel. address. -R_PARISC_PCREL17R = 11 # Right 17 bits of rel. address. -R_PARISC_PCREL17F = 12 # 17 bits of rel. address. -R_PARISC_PCREL14R = 14 # Right 14 bits of rel. address. -R_PARISC_DPREL21L = 18 # Left 21 bits of rel. address. -R_PARISC_DPREL14R = 22 # Right 14 bits of rel. address. -R_PARISC_GPREL21L = 26 # GP-relative, left 21 bits. -R_PARISC_GPREL14R = 30 # GP-relative, right 14 bits. -R_PARISC_LTOFF21L = 34 # LT-relative, left 21 bits. -R_PARISC_LTOFF14R = 38 # LT-relative, right 14 bits. -R_PARISC_SECREL32 = 41 # 32 bits section rel. address. -R_PARISC_SEGBASE = 48 # No relocation, set segment base. -R_PARISC_SEGREL32 = 49 # 32 bits segment rel. address. -R_PARISC_PLTOFF21L = 50 # PLT rel. address, left 21 bits. -R_PARISC_PLTOFF14R = 54 # PLT rel. address, right 14 bits. -R_PARISC_LTOFF_FPTR32 = 57 # 32 bits LT-rel. function pointer. -R_PARISC_LTOFF_FPTR21L = 58 # LT-rel. fct ptr, left 21 bits. -R_PARISC_LTOFF_FPTR14R = 62 # LT-rel. fct ptr, right 14 bits. -R_PARISC_FPTR64 = 64 # 64 bits function address. -R_PARISC_PLABEL32 = 65 # 32 bits function address. -R_PARISC_PLABEL21L = 66 # Left 21 bits of fdesc address. -R_PARISC_PLABEL14R = 70 # Right 14 bits of fdesc address. -R_PARISC_PCREL64 = 72 # 64 bits PC-rel. address. -R_PARISC_PCREL22F = 74 # 22 bits PC-rel. address. -R_PARISC_PCREL14WR = 75 # PC-rel. address, right 14 bits. -R_PARISC_PCREL14DR = 76 # PC rel. address, right 14 bits. -R_PARISC_PCREL16F = 77 # 16 bits PC-rel. address. -R_PARISC_PCREL16WF = 78 # 16 bits PC-rel. address. -R_PARISC_PCREL16DF = 79 # 16 bits PC-rel. address. -R_PARISC_DIR64 = 80 # 64 bits of eff. address. -R_PARISC_DIR14WR = 83 # 14 bits of eff. address. -R_PARISC_DIR14DR = 84 # 14 bits of eff. address. -R_PARISC_DIR16F = 85 # 16 bits of eff. address. -R_PARISC_DIR16WF = 86 # 16 bits of eff. address. -R_PARISC_DIR16DF = 87 # 16 bits of eff. address. -R_PARISC_GPREL64 = 88 # 64 bits of GP-rel. address. -R_PARISC_GPREL14WR = 91 # GP-rel. address, right 14 bits. -R_PARISC_GPREL14DR = 92 # GP-rel. address, right 14 bits. -R_PARISC_GPREL16F = 93 # 16 bits GP-rel. address. -R_PARISC_GPREL16WF = 94 # 16 bits GP-rel. address. -R_PARISC_GPREL16DF = 95 # 16 bits GP-rel. address. -R_PARISC_LTOFF64 = 96 # 64 bits LT-rel. address. -R_PARISC_LTOFF14WR = 99 # LT-rel. address, right 14 bits. -R_PARISC_LTOFF14DR = 100 # LT-rel. address, right 14 bits. -R_PARISC_LTOFF16F = 101 # 16 bits LT-rel. address. -R_PARISC_LTOFF16WF = 102 # 16 bits LT-rel. address. -R_PARISC_LTOFF16DF = 103 # 16 bits LT-rel. address. -R_PARISC_SECREL64 = 104 # 64 bits section rel. address. -R_PARISC_SEGREL64 = 112 # 64 bits segment rel. address. -R_PARISC_PLTOFF14WR = 115 # PLT-rel. address, right 14 bits. -R_PARISC_PLTOFF14DR = 116 # PLT-rel. address, right 14 bits. -R_PARISC_PLTOFF16F = 117 # 16 bits LT-rel. address. -R_PARISC_PLTOFF16WF = 118 # 16 bits PLT-rel. address. -R_PARISC_PLTOFF16DF = 119 # 16 bits PLT-rel. address. -R_PARISC_LTOFF_FPTR64 = 120 # 64 bits LT-rel. function ptr. -R_PARISC_LTOFF_FPTR14WR = 123 # LT-rel. fct. ptr., right 14 bits. -R_PARISC_LTOFF_FPTR14DR = 124 # LT-rel. fct. ptr., right 14 bits. -R_PARISC_LTOFF_FPTR16F = 125 # 16 bits LT-rel. function ptr. -R_PARISC_LTOFF_FPTR16WF = 126 # 16 bits LT-rel. function ptr. -R_PARISC_LTOFF_FPTR16DF = 127 # 16 bits LT-rel. function ptr. -R_PARISC_LORESERVE = 128 -R_PARISC_COPY = 128 # Copy relocation. -R_PARISC_IPLT = 129 # Dynamic reloc, imported PLT -R_PARISC_EPLT = 130 # Dynamic reloc, exported PLT -R_PARISC_TPREL32 = 153 # 32 bits TP-rel. address. -R_PARISC_TPREL21L = 154 # TP-rel. address, left 21 bits. -R_PARISC_TPREL14R = 158 # TP-rel. address, right 14 bits. -R_PARISC_LTOFF_TP21L = 162 # LT-TP-rel. address, left 21 bits. -R_PARISC_LTOFF_TP14R = 166 # LT-TP-rel. address, right 14 bits.*/ -R_PARISC_LTOFF_TP14F = 167 # 14 bits LT-TP-rel. address. -R_PARISC_TPREL64 = 216 # 64 bits TP-rel. address. -R_PARISC_TPREL14WR = 219 # TP-rel. address, right 14 bits. -R_PARISC_TPREL14DR = 220 # TP-rel. address, right 14 bits. -R_PARISC_TPREL16F = 221 # 16 bits TP-rel. address. -R_PARISC_TPREL16WF = 222 # 16 bits TP-rel. address. -R_PARISC_TPREL16DF = 223 # 16 bits TP-rel. address. -R_PARISC_LTOFF_TP64 = 224 # 64 bits LT-TP-rel. address. -R_PARISC_LTOFF_TP14WR = 227 # LT-TP-rel. address, right 14 bits.*/ -R_PARISC_LTOFF_TP14DR = 228 # LT-TP-rel. address, right 14 bits.*/ -R_PARISC_LTOFF_TP16F = 229 # 16 bits LT-TP-rel. address. -R_PARISC_LTOFF_TP16WF = 230 # 16 bits LT-TP-rel. address. -R_PARISC_LTOFF_TP16DF = 231 # 16 bits LT-TP-rel. address. -R_PARISC_GNU_VTENTRY = 232 -R_PARISC_GNU_VTINHERIT = 233 -R_PARISC_TLS_GD21L = 234 # GD 21-bit left. -R_PARISC_TLS_GD14R = 235 # GD 14-bit right. -R_PARISC_TLS_GDCALL = 236 # GD call to __t_g_a. -R_PARISC_TLS_LDM21L = 237 # LD module 21-bit left. -R_PARISC_TLS_LDM14R = 238 # LD module 14-bit right. -R_PARISC_TLS_LDMCALL = 239 # LD module call to __t_g_a. -R_PARISC_TLS_LDO21L = 240 # LD offset 21-bit left. -R_PARISC_TLS_LDO14R = 241 # LD offset 14-bit right. -R_PARISC_TLS_DTPMOD32 = 242 # DTP module 32-bit. -R_PARISC_TLS_DTPMOD64 = 243 # DTP module 64-bit. -R_PARISC_TLS_DTPOFF32 = 244 # DTP offset 32-bit. -R_PARISC_TLS_DTPOFF64 = 245 # DTP offset 32-bit. +R_PARISC_NONE = 0, # No reloc. +R_PARISC_DIR32 = 1, # Direct 32-bit reference. +R_PARISC_DIR21L = 2, # Left 21 bits of eff. address. +R_PARISC_DIR17R = 3, # Right 17 bits of eff. address. +R_PARISC_DIR17F = 4, # 17 bits of eff. address. +R_PARISC_DIR14R = 6, # Right 14 bits of eff. address. +R_PARISC_PCREL32 = 9, # 32-bit rel. address. +R_PARISC_PCREL21L = 10, # Left 21 bits of rel. address. +R_PARISC_PCREL17R = 11, # Right 17 bits of rel. address. +R_PARISC_PCREL17F = 12, # 17 bits of rel. address. +R_PARISC_PCREL14R = 14, # Right 14 bits of rel. address. +R_PARISC_DPREL21L = 18, # Left 21 bits of rel. address. +R_PARISC_DPREL14R = 22, # Right 14 bits of rel. address. +R_PARISC_GPREL21L = 26, # GP-relative, left 21 bits. +R_PARISC_GPREL14R = 30, # GP-relative, right 14 bits. +R_PARISC_LTOFF21L = 34, # LT-relative, left 21 bits. +R_PARISC_LTOFF14R = 38, # LT-relative, right 14 bits. +R_PARISC_SECREL32 = 41, # 32 bits section rel. address. +R_PARISC_SEGBASE = 48, # No relocation, set segment base. +R_PARISC_SEGREL32 = 49, # 32 bits segment rel. address. +R_PARISC_PLTOFF21L = 50, # PLT rel. address, left 21 bits. +R_PARISC_PLTOFF14R = 54, # PLT rel. address, right 14 bits. +R_PARISC_LTOFF_FPTR32 = 57, # 32 bits LT-rel. function pointer. +R_PARISC_LTOFF_FPTR21L = 58, # LT-rel. fct ptr, left 21 bits. +R_PARISC_LTOFF_FPTR14R = 62, # LT-rel. fct ptr, right 14 bits. +R_PARISC_FPTR64 = 64, # 64 bits function address. +R_PARISC_PLABEL32 = 65, # 32 bits function address. +R_PARISC_PLABEL21L = 66, # Left 21 bits of fdesc address. +R_PARISC_PLABEL14R = 70, # Right 14 bits of fdesc address. +R_PARISC_PCREL64 = 72, # 64 bits PC-rel. address. +R_PARISC_PCREL22F = 74, # 22 bits PC-rel. address. +R_PARISC_PCREL14WR = 75, # PC-rel. address, right 14 bits. +R_PARISC_PCREL14DR = 76, # PC rel. address, right 14 bits. +R_PARISC_PCREL16F = 77, # 16 bits PC-rel. address. +R_PARISC_PCREL16WF = 78, # 16 bits PC-rel. address. +R_PARISC_PCREL16DF = 79, # 16 bits PC-rel. address. +R_PARISC_DIR64 = 80, # 64 bits of eff. address. +R_PARISC_DIR14WR = 83, # 14 bits of eff. address. +R_PARISC_DIR14DR = 84, # 14 bits of eff. address. +R_PARISC_DIR16F = 85, # 16 bits of eff. address. +R_PARISC_DIR16WF = 86, # 16 bits of eff. address. +R_PARISC_DIR16DF = 87, # 16 bits of eff. address. +R_PARISC_GPREL64 = 88, # 64 bits of GP-rel. address. +R_PARISC_GPREL14WR = 91, # GP-rel. address, right 14 bits. +R_PARISC_GPREL14DR = 92, # GP-rel. address, right 14 bits. +R_PARISC_GPREL16F = 93, # 16 bits GP-rel. address. +R_PARISC_GPREL16WF = 94, # 16 bits GP-rel. address. +R_PARISC_GPREL16DF = 95, # 16 bits GP-rel. address. +R_PARISC_LTOFF64 = 96, # 64 bits LT-rel. address. +R_PARISC_LTOFF14WR = 99, # LT-rel. address, right 14 bits. +R_PARISC_LTOFF14DR = 100, # LT-rel. address, right 14 bits. +R_PARISC_LTOFF16F = 101, # 16 bits LT-rel. address. +R_PARISC_LTOFF16WF = 102, # 16 bits LT-rel. address. +R_PARISC_LTOFF16DF = 103, # 16 bits LT-rel. address. +R_PARISC_SECREL64 = 104, # 64 bits section rel. address. +R_PARISC_SEGREL64 = 112, # 64 bits segment rel. address. +R_PARISC_PLTOFF14WR = 115, # PLT-rel. address, right 14 bits. +R_PARISC_PLTOFF14DR = 116, # PLT-rel. address, right 14 bits. +R_PARISC_PLTOFF16F = 117, # 16 bits LT-rel. address. +R_PARISC_PLTOFF16WF = 118, # 16 bits PLT-rel. address. +R_PARISC_PLTOFF16DF = 119, # 16 bits PLT-rel. address. +R_PARISC_LTOFF_FPTR64 = 120, # 64 bits LT-rel. function ptr. +R_PARISC_LTOFF_FPTR14WR = 123, # LT-rel. fct. ptr., right 14 bits. +R_PARISC_LTOFF_FPTR14DR = 124, # LT-rel. fct. ptr., right 14 bits. +R_PARISC_LTOFF_FPTR16F = 125, # 16 bits LT-rel. function ptr. +R_PARISC_LTOFF_FPTR16WF = 126, # 16 bits LT-rel. function ptr. +R_PARISC_LTOFF_FPTR16DF = 127, # 16 bits LT-rel. function ptr. +R_PARISC_LORESERVE = 128, +R_PARISC_COPY = 128, # Copy relocation. +R_PARISC_IPLT = 129, # Dynamic reloc, imported PLT +R_PARISC_EPLT = 130, # Dynamic reloc, exported PLT +R_PARISC_TPREL32 = 153, # 32 bits TP-rel. address. +R_PARISC_TPREL21L = 154, # TP-rel. address, left 21 bits. +R_PARISC_TPREL14R = 158, # TP-rel. address, right 14 bits. +R_PARISC_LTOFF_TP21L = 162, # LT-TP-rel. address, left 21 bits. +R_PARISC_LTOFF_TP14R = 166, # LT-TP-rel. address, right 14 bits.*/ +R_PARISC_LTOFF_TP14F = 167, # 14 bits LT-TP-rel. address. +R_PARISC_TPREL64 = 216, # 64 bits TP-rel. address. +R_PARISC_TPREL14WR = 219, # TP-rel. address, right 14 bits. +R_PARISC_TPREL14DR = 220, # TP-rel. address, right 14 bits. +R_PARISC_TPREL16F = 221, # 16 bits TP-rel. address. +R_PARISC_TPREL16WF = 222, # 16 bits TP-rel. address. +R_PARISC_TPREL16DF = 223, # 16 bits TP-rel. address. +R_PARISC_LTOFF_TP64 = 224, # 64 bits LT-TP-rel. address. +R_PARISC_LTOFF_TP14WR = 227, # LT-TP-rel. address, right 14 bits.*/ +R_PARISC_LTOFF_TP14DR = 228, # LT-TP-rel. address, right 14 bits.*/ +R_PARISC_LTOFF_TP16F = 229, # 16 bits LT-TP-rel. address. +R_PARISC_LTOFF_TP16WF = 230, # 16 bits LT-TP-rel. address. +R_PARISC_LTOFF_TP16DF = 231, # 16 bits LT-TP-rel. address. +R_PARISC_GNU_VTENTRY = 232, +R_PARISC_GNU_VTINHERIT = 233, +R_PARISC_TLS_GD21L = 234, # GD 21-bit left. +R_PARISC_TLS_GD14R = 235, # GD 14-bit right. +R_PARISC_TLS_GDCALL = 236, # GD call to __t_g_a. +R_PARISC_TLS_LDM21L = 237, # LD module 21-bit left. +R_PARISC_TLS_LDM14R = 238, # LD module 14-bit right. +R_PARISC_TLS_LDMCALL = 239, # LD module call to __t_g_a. +R_PARISC_TLS_LDO21L = 240, # LD offset 21-bit left. +R_PARISC_TLS_LDO14R = 241, # LD offset 14-bit right. +R_PARISC_TLS_DTPMOD32 = 242, # DTP module 32-bit. +R_PARISC_TLS_DTPMOD64 = 243, # DTP module 64-bit. +R_PARISC_TLS_DTPOFF32 = 244, # DTP offset 32-bit. +R_PARISC_TLS_DTPOFF64 = 245, # DTP offset 32-bit. +R_PARISC_HIRESERVE = 255, +no_name = ('R_PARISC_LORESERVE','R_PARISC_HIRESERVE') +) R_PARISC_TLS_LE21L = R_PARISC_TPREL21L R_PARISC_TLS_LE14R = R_PARISC_TPREL14R R_PARISC_TLS_IE21L = R_PARISC_LTOFF_TP21L R_PARISC_TLS_IE14R = R_PARISC_LTOFF_TP14R R_PARISC_TLS_TPREL32 = R_PARISC_TPREL32 R_PARISC_TLS_TPREL64 = R_PARISC_TPREL64 -R_PARISC_HIRESERVE = 255 +SetConstants( # Alpha relocations - -R_ALPHA_NONE = 0 # No reloc -R_ALPHA_REFLONG = 1 # Direct 32 bit -R_ALPHA_REFQUAD = 2 # Direct 64 bit -R_ALPHA_GPREL32 = 3 # GP relative 32 bit -R_ALPHA_LITERAL = 4 # GP relative 16 bit w/optimization -R_ALPHA_LITUSE = 5 # Optimization hint for LITERAL -R_ALPHA_GPDISP = 6 # Add displacement to GP -R_ALPHA_BRADDR = 7 # PC+4 relative 23 bit shifted -R_ALPHA_HINT = 8 # PC+4 relative 16 bit shifted -R_ALPHA_SREL16 = 9 # PC relative 16 bit -R_ALPHA_SREL32 = 10 # PC relative 32 bit -R_ALPHA_SREL64 = 11 # PC relative 64 bit -R_ALPHA_GPRELHIGH = 17 # GP relative 32 bit, high 16 bits -R_ALPHA_GPRELLOW = 18 # GP relative 32 bit, low 16 bits -R_ALPHA_GPREL16 = 19 # GP relative 16 bit -R_ALPHA_COPY = 24 # Copy symbol at runtime -R_ALPHA_GLOB_DAT = 25 # Create GOT entry -R_ALPHA_JMP_SLOT = 26 # Create PLT entry -R_ALPHA_RELATIVE = 27 # Adjust by program base -R_ALPHA_TLS_GD_HI = 28 -R_ALPHA_TLSGD = 29 -R_ALPHA_TLS_LDM = 30 -R_ALPHA_DTPMOD64 = 31 -R_ALPHA_GOTDTPREL = 32 -R_ALPHA_DTPREL64 = 33 -R_ALPHA_DTPRELHI = 34 -R_ALPHA_DTPRELLO = 35 -R_ALPHA_DTPREL16 = 36 -R_ALPHA_GOTTPREL = 37 -R_ALPHA_TPREL64 = 38 -R_ALPHA_TPRELHI = 39 -R_ALPHA_TPRELLO = 40 -R_ALPHA_TPREL16 = 41 +R_ALPHA_NONE = 0, # No reloc +R_ALPHA_REFLONG = 1, # Direct 32 bit +R_ALPHA_REFQUAD = 2, # Direct 64 bit +R_ALPHA_GPREL32 = 3, # GP relative 32 bit +R_ALPHA_LITERAL = 4, # GP relative 16 bit w/optimization +R_ALPHA_LITUSE = 5, # Optimization hint for LITERAL +R_ALPHA_GPDISP = 6, # Add displacement to GP +R_ALPHA_BRADDR = 7, # PC+4 relative 23 bit shifted +R_ALPHA_HINT = 8, # PC+4 relative 16 bit shifted +R_ALPHA_SREL16 = 9, # PC relative 16 bit +R_ALPHA_SREL32 = 10, # PC relative 32 bit +R_ALPHA_SREL64 = 11, # PC relative 64 bit +R_ALPHA_GPRELHIGH = 17, # GP relative 32 bit, high 16 bits +R_ALPHA_GPRELLOW = 18, # GP relative 32 bit, low 16 bits +R_ALPHA_GPREL16 = 19, # GP relative 16 bit +R_ALPHA_COPY = 24, # Copy symbol at runtime +R_ALPHA_GLOB_DAT = 25, # Create GOT entry +R_ALPHA_JMP_SLOT = 26, # Create PLT entry +R_ALPHA_RELATIVE = 27, # Adjust by program base +R_ALPHA_TLS_GD_HI = 28, +R_ALPHA_TLSGD = 29, +R_ALPHA_TLS_LDM = 30, +R_ALPHA_DTPMOD64 = 31, +R_ALPHA_GOTDTPREL = 32, +R_ALPHA_DTPREL64 = 33, +R_ALPHA_DTPRELHI = 34, +R_ALPHA_DTPRELLO = 35, +R_ALPHA_DTPREL16 = 36, +R_ALPHA_GOTTPREL = 37, +R_ALPHA_TPREL64 = 38, +R_ALPHA_TPRELHI = 39, +R_ALPHA_TPRELLO = 40, +R_ALPHA_TPREL16 = 41, # Keep this the last entry. -R_ALPHA_NUM = 46 +R_ALPHA_NUM = 46, +) +SetConstants( # PowerPC relocations - -R_PPC_NONE = 0 -R_PPC_ADDR32 = 1 # 32bit absolute address -R_PPC_ADDR24 = 2 # 26bit address, 2 bits ignored. -R_PPC_ADDR16 = 3 # 16bit absolute address -R_PPC_ADDR16_LO = 4 # lower 16bit of absolute address -R_PPC_ADDR16_HI = 5 # high 16bit of absolute address -R_PPC_ADDR16_HA = 6 # adjusted high 16bit -R_PPC_ADDR14 = 7 # 16bit address, 2 bits ignored -R_PPC_ADDR14_BRTAKEN = 8 -R_PPC_ADDR14_BRNTAKEN = 9 -R_PPC_REL24 = 10 # PC relative 26 bit -R_PPC_REL14 = 11 # PC relative 16 bit -R_PPC_REL14_BRTAKEN = 12 -R_PPC_REL14_BRNTAKEN = 13 -R_PPC_GOT16 = 14 -R_PPC_GOT16_LO = 15 -R_PPC_GOT16_HI = 16 -R_PPC_GOT16_HA = 17 -R_PPC_PLTREL24 = 18 -R_PPC_COPY = 19 -R_PPC_GLOB_DAT = 20 -R_PPC_JMP_SLOT = 21 -R_PPC_RELATIVE = 22 -R_PPC_LOCAL24PC = 23 -R_PPC_UADDR32 = 24 -R_PPC_UADDR16 = 25 -R_PPC_REL32 = 26 -R_PPC_PLT32 = 27 -R_PPC_PLTREL32 = 28 -R_PPC_PLT16_LO = 29 -R_PPC_PLT16_HI = 30 -R_PPC_PLT16_HA = 31 -R_PPC_SDAREL16 = 32 -R_PPC_SECTOFF = 33 -R_PPC_SECTOFF_LO = 34 -R_PPC_SECTOFF_HI = 35 -R_PPC_SECTOFF_HA = 36 +R_PPC_NONE = 0, +R_PPC_ADDR32 = 1, # 32bit absolute address +R_PPC_ADDR24 = 2, # 26bit address, 2 bits ignored. +R_PPC_ADDR16 = 3, # 16bit absolute address +R_PPC_ADDR16_LO = 4, # lower 16bit of absolute address +R_PPC_ADDR16_HI = 5, # high 16bit of absolute address +R_PPC_ADDR16_HA = 6, # adjusted high 16bit +R_PPC_ADDR14 = 7, # 16bit address, 2 bits ignored +R_PPC_ADDR14_BRTAKEN = 8, +R_PPC_ADDR14_BRNTAKEN = 9, +R_PPC_REL24 = 10, # PC relative 26 bit +R_PPC_REL14 = 11, # PC relative 16 bit +R_PPC_REL14_BRTAKEN = 12, +R_PPC_REL14_BRNTAKEN = 13, +R_PPC_GOT16 = 14, +R_PPC_GOT16_LO = 15, +R_PPC_GOT16_HI = 16, +R_PPC_GOT16_HA = 17, +R_PPC_PLTREL24 = 18, +R_PPC_COPY = 19, +R_PPC_GLOB_DAT = 20, +R_PPC_JMP_SLOT = 21, +R_PPC_RELATIVE = 22, +R_PPC_LOCAL24PC = 23, +R_PPC_UADDR32 = 24, +R_PPC_UADDR16 = 25, +R_PPC_REL32 = 26, +R_PPC_PLT32 = 27, +R_PPC_PLTREL32 = 28, +R_PPC_PLT16_LO = 29, +R_PPC_PLT16_HI = 30, +R_PPC_PLT16_HA = 31, +R_PPC_SDAREL16 = 32, +R_PPC_SECTOFF = 33, +R_PPC_SECTOFF_LO = 34, +R_PPC_SECTOFF_HI = 35, +R_PPC_SECTOFF_HA = 36, # PowerPC relocations defined for the TLS access ABI. -R_PPC_TLS = 67 # none (sym+add)@tls -R_PPC_DTPMOD32 = 68 # word32 (sym+add)@dtpmod -R_PPC_TPREL16 = 69 # half16* (sym+add)@tprel -R_PPC_TPREL16_LO = 70 # half16 (sym+add)@tprel@l -R_PPC_TPREL16_HI = 71 # half16 (sym+add)@tprel@h -R_PPC_TPREL16_HA = 72 # half16 (sym+add)@tprel@ha -R_PPC_TPREL32 = 73 # word32 (sym+add)@tprel -R_PPC_DTPREL16 = 74 # half16* (sym+add)@dtprel -R_PPC_DTPREL16_LO = 75 # half16 (sym+add)@dtprel@l -R_PPC_DTPREL16_HI = 76 # half16 (sym+add)@dtprel@h -R_PPC_DTPREL16_HA = 77 # half16 (sym+add)@dtprel@ha -R_PPC_DTPREL32 = 78 # word32 (sym+add)@dtprel -R_PPC_GOT_TLSGD16 = 79 # half16* (sym+add)@got@tlsgd -R_PPC_GOT_TLSGD16_LO = 80 # half16 (sym+add)@got@tlsgd@l -R_PPC_GOT_TLSGD16_HI = 81 # half16 (sym+add)@got@tlsgd@h -R_PPC_GOT_TLSGD16_HA = 82 # half16 (sym+add)@got@tlsgd@ha -R_PPC_GOT_TLSLD16 = 83 # half16* (sym+add)@got@tlsld -R_PPC_GOT_TLSLD16_LO = 84 # half16 (sym+add)@got@tlsld@l -R_PPC_GOT_TLSLD16_HI = 85 # half16 (sym+add)@got@tlsld@h -R_PPC_GOT_TLSLD16_HA = 86 # half16 (sym+add)@got@tlsld@ha -R_PPC_GOT_TPREL16 = 87 # half16* (sym+add)@got@tprel -R_PPC_GOT_TPREL16_LO = 88 # half16 (sym+add)@got@tprel@l -R_PPC_GOT_TPREL16_HI = 89 # half16 (sym+add)@got@tprel@h -R_PPC_GOT_TPREL16_HA = 90 # half16 (sym+add)@got@tprel@ha -R_PPC_GOT_DTPREL16 = 91 # half16* (sym+add)@got@dtprel -R_PPC_GOT_DTPREL16_LO = 92 # half16* (sym+add)@got@dtprel@l -R_PPC_GOT_DTPREL16_HI = 93 # half16* (sym+add)@got@dtprel@h -R_PPC_GOT_DTPREL16_HA = 94 # half16* (sym+add)@got@dtprel@ha +R_PPC_TLS = 67, # none (sym+add)@tls +R_PPC_DTPMOD32 = 68, # word32 (sym+add)@dtpmod +R_PPC_TPREL16 = 69, # half16* (sym+add)@tprel +R_PPC_TPREL16_LO = 70, # half16 (sym+add)@tprel@l +R_PPC_TPREL16_HI = 71, # half16 (sym+add)@tprel@h +R_PPC_TPREL16_HA = 72, # half16 (sym+add)@tprel@ha +R_PPC_TPREL32 = 73, # word32 (sym+add)@tprel +R_PPC_DTPREL16 = 74, # half16* (sym+add)@dtprel +R_PPC_DTPREL16_LO = 75, # half16 (sym+add)@dtprel@l +R_PPC_DTPREL16_HI = 76, # half16 (sym+add)@dtprel@h +R_PPC_DTPREL16_HA = 77, # half16 (sym+add)@dtprel@ha +R_PPC_DTPREL32 = 78, # word32 (sym+add)@dtprel +R_PPC_GOT_TLSGD16 = 79, # half16* (sym+add)@got@tlsgd +R_PPC_GOT_TLSGD16_LO = 80, # half16 (sym+add)@got@tlsgd@l +R_PPC_GOT_TLSGD16_HI = 81, # half16 (sym+add)@got@tlsgd@h +R_PPC_GOT_TLSGD16_HA = 82, # half16 (sym+add)@got@tlsgd@ha +R_PPC_GOT_TLSLD16 = 83, # half16* (sym+add)@got@tlsld +R_PPC_GOT_TLSLD16_LO = 84, # half16 (sym+add)@got@tlsld@l +R_PPC_GOT_TLSLD16_HI = 85, # half16 (sym+add)@got@tlsld@h +R_PPC_GOT_TLSLD16_HA = 86, # half16 (sym+add)@got@tlsld@ha +R_PPC_GOT_TPREL16 = 87, # half16* (sym+add)@got@tprel +R_PPC_GOT_TPREL16_LO = 88, # half16 (sym+add)@got@tprel@l +R_PPC_GOT_TPREL16_HI = 89, # half16 (sym+add)@got@tprel@h +R_PPC_GOT_TPREL16_HA = 90, # half16 (sym+add)@got@tprel@ha +R_PPC_GOT_DTPREL16 = 91, # half16* (sym+add)@got@dtprel +R_PPC_GOT_DTPREL16_LO = 92, # half16* (sym+add)@got@dtprel@l +R_PPC_GOT_DTPREL16_HI = 93, # half16* (sym+add)@got@dtprel@h +R_PPC_GOT_DTPREL16_HA = 94, # half16* (sym+add)@got@dtprel@ha # The remaining relocs are from the Embedded ELF ABI, and are not in the SVR4 ELF ABI. -R_PPC_EMB_NADDR32 = 101 -R_PPC_EMB_NADDR16 = 102 -R_PPC_EMB_NADDR16_LO = 103 -R_PPC_EMB_NADDR16_HI = 104 -R_PPC_EMB_NADDR16_HA = 105 -R_PPC_EMB_SDAI16 = 106 -R_PPC_EMB_SDA2I16 = 107 -R_PPC_EMB_SDA2REL = 108 -R_PPC_EMB_SDA21 = 109 # 16 bit offset in SDA -R_PPC_EMB_MRKREF = 110 -R_PPC_EMB_RELSEC16 = 111 -R_PPC_EMB_RELST_LO = 112 -R_PPC_EMB_RELST_HI = 113 -R_PPC_EMB_RELST_HA = 114 -R_PPC_EMB_BIT_FLD = 115 -R_PPC_EMB_RELSDA = 116 # 16 bit relative offset in SDA +R_PPC_EMB_NADDR32 = 101, +R_PPC_EMB_NADDR16 = 102, +R_PPC_EMB_NADDR16_LO = 103, +R_PPC_EMB_NADDR16_HI = 104, +R_PPC_EMB_NADDR16_HA = 105, +R_PPC_EMB_SDAI16 = 106, +R_PPC_EMB_SDA2I16 = 107, +R_PPC_EMB_SDA2REL = 108, +R_PPC_EMB_SDA21 = 109, # 16 bit offset in SDA +R_PPC_EMB_MRKREF = 110, +R_PPC_EMB_RELSEC16 = 111, +R_PPC_EMB_RELST_LO = 112, +R_PPC_EMB_RELST_HI = 113, +R_PPC_EMB_RELST_HA = 114, +R_PPC_EMB_BIT_FLD = 115, +R_PPC_EMB_RELSDA = 116, # 16 bit relative offset in SDA # Diab tool relocations. -R_PPC_DIAB_SDA21_LO = 180 # like EMB_SDA21, but lower 16 bit -R_PPC_DIAB_SDA21_HI = 181 # like EMB_SDA21, but high 16 bit -R_PPC_DIAB_SDA21_HA = 182 # like EMB_SDA21, adjusted high 16 -R_PPC_DIAB_RELSDA_LO = 183 # like EMB_RELSDA, but lower 16 bit -R_PPC_DIAB_RELSDA_HI = 184 # like EMB_RELSDA, but high 16 bit -R_PPC_DIAB_RELSDA_HA = 185 # like EMB_RELSDA, adjusted high 16 +R_PPC_DIAB_SDA21_LO = 180, # like EMB_SDA21, but lower 16 bit +R_PPC_DIAB_SDA21_HI = 181, # like EMB_SDA21, but high 16 bit +R_PPC_DIAB_SDA21_HA = 182, # like EMB_SDA21, adjusted high 16 +R_PPC_DIAB_RELSDA_LO = 183, # like EMB_RELSDA, but lower 16 bit +R_PPC_DIAB_RELSDA_HI = 184, # like EMB_RELSDA, but high 16 bit +R_PPC_DIAB_RELSDA_HA = 185, # like EMB_RELSDA, adjusted high 16 # GNU extension to support local ifunc. -R_PPC_IRELATIVE = 248 +R_PPC_IRELATIVE = 248, # GNU relocs used in PIC code sequences. -R_PPC_REL16 = 249 # half16 (sym+add-.) -R_PPC_REL16_LO = 250 # half16 (sym+add-.)@l -R_PPC_REL16_HI = 251 # half16 (sym+add-.)@h -R_PPC_REL16_HA = 252 # half16 (sym+add-.)@ha +R_PPC_REL16 = 249, # half16 (sym+add-.) +R_PPC_REL16_LO = 250, # half16 (sym+add-.)@l +R_PPC_REL16_HI = 251, # half16 (sym+add-.)@h +R_PPC_REL16_HA = 252, # half16 (sym+add-.)@ha # This is a phony reloc to handle any old fashioned TOC16 references that may still be in object files. -R_PPC_TOC16 = 255 +R_PPC_TOC16 = 255, +) +SetConstants( # PowerPC64 relocations defined by the ABIs -R_PPC64_NONE = R_PPC_NONE -R_PPC64_ADDR32 = R_PPC_ADDR32 # 32bit absolute address -R_PPC64_ADDR24 = R_PPC_ADDR24 # 26bit address, word aligned -R_PPC64_ADDR16 = R_PPC_ADDR16 # 16bit absolute address -R_PPC64_ADDR16_LO = R_PPC_ADDR16_LO # lower 16bits of address -R_PPC64_ADDR16_HI = R_PPC_ADDR16_HI # high 16bits of address. -R_PPC64_ADDR16_HA = R_PPC_ADDR16_HA # adjusted high 16bits. -R_PPC64_ADDR14 = R_PPC_ADDR14 # 16bit address, word aligned -R_PPC64_ADDR14_BRTAKEN = R_PPC_ADDR14_BRTAKEN -R_PPC64_ADDR14_BRNTAKEN = R_PPC_ADDR14_BRNTAKEN -R_PPC64_REL24 = R_PPC_REL24 # PC-rel. 26 bit, word aligned -R_PPC64_REL14 = R_PPC_REL14 # PC relative 16 bit -R_PPC64_REL14_BRTAKEN = R_PPC_REL14_BRTAKEN -R_PPC64_REL14_BRNTAKEN = R_PPC_REL14_BRNTAKEN -R_PPC64_GOT16 = R_PPC_GOT16 -R_PPC64_GOT16_LO = R_PPC_GOT16_LO -R_PPC64_GOT16_HI = R_PPC_GOT16_HI -R_PPC64_GOT16_HA = R_PPC_GOT16_HA - -R_PPC64_COPY = R_PPC_COPY -R_PPC64_GLOB_DAT = R_PPC_GLOB_DAT -R_PPC64_JMP_SLOT = R_PPC_JMP_SLOT -R_PPC64_RELATIVE = R_PPC_RELATIVE - -R_PPC64_UADDR32 = R_PPC_UADDR32 -R_PPC64_UADDR16 = R_PPC_UADDR16 -R_PPC64_REL32 = R_PPC_REL32 -R_PPC64_PLT32 = R_PPC_PLT32 -R_PPC64_PLTREL32 = R_PPC_PLTREL32 -R_PPC64_PLT16_LO = R_PPC_PLT16_LO -R_PPC64_PLT16_HI = R_PPC_PLT16_HI -R_PPC64_PLT16_HA = R_PPC_PLT16_HA - -R_PPC64_SECTOFF = R_PPC_SECTOFF -R_PPC64_SECTOFF_LO = R_PPC_SECTOFF_LO -R_PPC64_SECTOFF_HI = R_PPC_SECTOFF_HI -R_PPC64_SECTOFF_HA = R_PPC_SECTOFF_HA -R_PPC64_ADDR30 = 37 # word30 (S + A - P) >> 2 -R_PPC64_ADDR64 = 38 # doubleword64 S + A -R_PPC64_ADDR16_HIGHER = 39 # half16 #higher(S + A) -R_PPC64_ADDR16_HIGHERA = 40 # half16 #highera(S + A) -R_PPC64_ADDR16_HIGHEST = 41 # half16 #highest(S + A) -R_PPC64_ADDR16_HIGHESTA = 42 # half16 #highesta(S + A) -R_PPC64_UADDR64 = 43 # doubleword64 S + A -R_PPC64_REL64 = 44 # doubleword64 S + A - P -R_PPC64_PLT64 = 45 # doubleword64 L + A -R_PPC64_PLTREL64 = 46 # doubleword64 L + A - P -R_PPC64_TOC16 = 47 # half16* S + A - .TOC -R_PPC64_TOC16_LO = 48 # half16 #lo(S + A - .TOC.) -R_PPC64_TOC16_HI = 49 # half16 #hi(S + A - .TOC.) -R_PPC64_TOC16_HA = 50 # half16 #ha(S + A - .TOC.) -R_PPC64_TOC = 51 # doubleword64 .TOC -R_PPC64_PLTGOT16 = 52 # half16* M + A -R_PPC64_PLTGOT16_LO = 53 # half16 #lo(M + A) -R_PPC64_PLTGOT16_HI = 54 # half16 #hi(M + A) -R_PPC64_PLTGOT16_HA = 55 # half16 #ha(M + A) - -R_PPC64_ADDR16_DS = 56 # half16ds* (S + A) >> 2 -R_PPC64_ADDR16_LO_DS = 57 # half16ds #lo(S + A) >> 2 -R_PPC64_GOT16_DS = 58 # half16ds* (G + A) >> 2 -R_PPC64_GOT16_LO_DS = 59 # half16ds #lo(G + A) >> 2 -R_PPC64_PLT16_LO_DS = 60 # half16ds #lo(L + A) >> 2 -R_PPC64_SECTOFF_DS = 61 # half16ds* (R + A) >> 2 -R_PPC64_SECTOFF_LO_DS = 62 # half16ds #lo(R + A) >> 2 -R_PPC64_TOC16_DS = 63 # half16ds* (S + A - .TOC.) >> 2 -R_PPC64_TOC16_LO_DS = 64 # half16ds #lo(S + A - .TOC.) >> 2 -R_PPC64_PLTGOT16_DS = 65 # half16ds* (M + A) >> 2 -R_PPC64_PLTGOT16_LO_DS = 66 # half16ds #lo(M + A) >> 2 +R_PPC64_NONE = R_PPC_NONE, +R_PPC64_ADDR32 = R_PPC_ADDR32, # 32bit absolute address +R_PPC64_ADDR24 = R_PPC_ADDR24, # 26bit address, word aligned +R_PPC64_ADDR16 = R_PPC_ADDR16, # 16bit absolute address +R_PPC64_ADDR16_LO = R_PPC_ADDR16_LO, # lower 16bits of address +R_PPC64_ADDR16_HI = R_PPC_ADDR16_HI, # high 16bits of address. +R_PPC64_ADDR16_HA = R_PPC_ADDR16_HA, # adjusted high 16bits. +R_PPC64_ADDR14 = R_PPC_ADDR14, # 16bit address, word aligned +R_PPC64_ADDR14_BRTAKEN = R_PPC_ADDR14_BRTAKEN, +R_PPC64_ADDR14_BRNTAKEN = R_PPC_ADDR14_BRNTAKEN, +R_PPC64_REL24 = R_PPC_REL24, # PC-rel. 26 bit, word aligned +R_PPC64_REL14 = R_PPC_REL14, # PC relative 16 bit +R_PPC64_REL14_BRTAKEN = R_PPC_REL14_BRTAKEN, +R_PPC64_REL14_BRNTAKEN = R_PPC_REL14_BRNTAKEN, +R_PPC64_GOT16 = R_PPC_GOT16, +R_PPC64_GOT16_LO = R_PPC_GOT16_LO, +R_PPC64_GOT16_HI = R_PPC_GOT16_HI, +R_PPC64_GOT16_HA = R_PPC_GOT16_HA, + +R_PPC64_COPY = R_PPC_COPY, +R_PPC64_GLOB_DAT = R_PPC_GLOB_DAT, +R_PPC64_JMP_SLOT = R_PPC_JMP_SLOT, +R_PPC64_RELATIVE = R_PPC_RELATIVE, + +R_PPC64_UADDR32 = R_PPC_UADDR32, +R_PPC64_UADDR16 = R_PPC_UADDR16, +R_PPC64_REL32 = R_PPC_REL32, +R_PPC64_PLT32 = R_PPC_PLT32, +R_PPC64_PLTREL32 = R_PPC_PLTREL32, +R_PPC64_PLT16_LO = R_PPC_PLT16_LO, +R_PPC64_PLT16_HI = R_PPC_PLT16_HI, +R_PPC64_PLT16_HA = R_PPC_PLT16_HA, + +R_PPC64_SECTOFF = R_PPC_SECTOFF, +R_PPC64_SECTOFF_LO = R_PPC_SECTOFF_LO, +R_PPC64_SECTOFF_HI = R_PPC_SECTOFF_HI, +R_PPC64_SECTOFF_HA = R_PPC_SECTOFF_HA, +R_PPC64_ADDR30 = 37, # word30 (S + A - P) >> 2 +R_PPC64_ADDR64 = 38, # doubleword64 S + A +R_PPC64_ADDR16_HIGHER = 39, # half16 #higher(S + A) +R_PPC64_ADDR16_HIGHERA = 40, # half16 #highera(S + A) +R_PPC64_ADDR16_HIGHEST = 41, # half16 #highest(S + A) +R_PPC64_ADDR16_HIGHESTA = 42, # half16 #highesta(S + A) +R_PPC64_UADDR64 = 43, # doubleword64 S + A +R_PPC64_REL64 = 44, # doubleword64 S + A - P +R_PPC64_PLT64 = 45, # doubleword64 L + A +R_PPC64_PLTREL64 = 46, # doubleword64 L + A - P +R_PPC64_TOC16 = 47, # half16* S + A - .TOC +R_PPC64_TOC16_LO = 48, # half16 #lo(S + A - .TOC.) +R_PPC64_TOC16_HI = 49, # half16 #hi(S + A - .TOC.) +R_PPC64_TOC16_HA = 50, # half16 #ha(S + A - .TOC.) +R_PPC64_TOC = 51, # doubleword64 .TOC +R_PPC64_PLTGOT16 = 52, # half16* M + A +R_PPC64_PLTGOT16_LO = 53, # half16 #lo(M + A) +R_PPC64_PLTGOT16_HI = 54, # half16 #hi(M + A) +R_PPC64_PLTGOT16_HA = 55, # half16 #ha(M + A) + +R_PPC64_ADDR16_DS = 56, # half16ds* (S + A) >> 2 +R_PPC64_ADDR16_LO_DS = 57, # half16ds #lo(S + A) >> 2 +R_PPC64_GOT16_DS = 58, # half16ds* (G + A) >> 2 +R_PPC64_GOT16_LO_DS = 59, # half16ds #lo(G + A) >> 2 +R_PPC64_PLT16_LO_DS = 60, # half16ds #lo(L + A) >> 2 +R_PPC64_SECTOFF_DS = 61, # half16ds* (R + A) >> 2 +R_PPC64_SECTOFF_LO_DS = 62, # half16ds #lo(R + A) >> 2 +R_PPC64_TOC16_DS = 63, # half16ds* (S + A - .TOC.) >> 2 +R_PPC64_TOC16_LO_DS = 64, # half16ds #lo(S + A - .TOC.) >> 2 +R_PPC64_PLTGOT16_DS = 65, # half16ds* (M + A) >> 2 +R_PPC64_PLTGOT16_LO_DS = 66, # half16ds #lo(M + A) >> 2 # PowerPC64 relocations defined for the TLS access ABI. -R_PPC64_TLS = 67 # none (sym+add)@tls -R_PPC64_DTPMOD64 = 68 # doubleword64 (sym+add)@dtpmod -R_PPC64_TPREL16 = 69 # half16* (sym+add)@tprel -R_PPC64_TPREL16_LO = 70 # half16 (sym+add)@tprel@l -R_PPC64_TPREL16_HI = 71 # half16 (sym+add)@tprel@h -R_PPC64_TPREL16_HA = 72 # half16 (sym+add)@tprel@ha -R_PPC64_TPREL64 = 73 # doubleword64 (sym+add)@tprel -R_PPC64_DTPREL16 = 74 # half16* (sym+add)@dtprel -R_PPC64_DTPREL16_LO = 75 # half16 (sym+add)@dtprel@l -R_PPC64_DTPREL16_HI = 76 # half16 (sym+add)@dtprel@h -R_PPC64_DTPREL16_HA = 77 # half16 (sym+add)@dtprel@ha -R_PPC64_DTPREL64 = 78 # doubleword64 (sym+add)@dtprel -R_PPC64_GOT_TLSGD16 = 79 # half16* (sym+add)@got@tlsgd -R_PPC64_GOT_TLSGD16_LO = 80 # half16 (sym+add)@got@tlsgd@l -R_PPC64_GOT_TLSGD16_HI = 81 # half16 (sym+add)@got@tlsgd@h -R_PPC64_GOT_TLSGD16_HA = 82 # half16 (sym+add)@got@tlsgd@ha -R_PPC64_GOT_TLSLD16 = 83 # half16* (sym+add)@got@tlsld -R_PPC64_GOT_TLSLD16_LO = 84 # half16 (sym+add)@got@tlsld@l -R_PPC64_GOT_TLSLD16_HI = 85 # half16 (sym+add)@got@tlsld@h -R_PPC64_GOT_TLSLD16_HA = 86 # half16 (sym+add)@got@tlsld@ha -R_PPC64_GOT_TPREL16_DS = 87 # half16ds* (sym+add)@got@tprel -R_PPC64_GOT_TPREL16_LO_DS = 88 # half16ds (sym+add)@got@tprel@l -R_PPC64_GOT_TPREL16_HI = 89 # half16 (sym+add)@got@tprel@h -R_PPC64_GOT_TPREL16_HA = 90 # half16 (sym+add)@got@tprel@ha -R_PPC64_GOT_DTPREL16_DS = 91 # half16ds* (sym+add)@got@dtprel -R_PPC64_GOT_DTPREL16_LO_DS = 92 # half16ds (sym+add)@got@dtprel@l -R_PPC64_GOT_DTPREL16_HI = 93 # half16 (sym+add)@got@dtprel@h -R_PPC64_GOT_DTPREL16_HA = 94 # half16 (sym+add)@got@dtprel@ha -R_PPC64_TPREL16_DS = 95 # half16ds* (sym+add)@tprel -R_PPC64_TPREL16_LO_DS = 96 # half16ds (sym+add)@tprel@l -R_PPC64_TPREL16_HIGHER = 97 # half16 (sym+add)@tprel@higher -R_PPC64_TPREL16_HIGHERA = 98 # half16 (sym+add)@tprel@highera -R_PPC64_TPREL16_HIGHEST = 99 # half16 (sym+add)@tprel@highest -R_PPC64_TPREL16_HIGHESTA = 100 # half16 (sym+add)@tprel@highesta -R_PPC64_DTPREL16_DS = 101 # half16ds* (sym+add)@dtprel -R_PPC64_DTPREL16_LO_DS = 102 # half16ds (sym+add)@dtprel@l -R_PPC64_DTPREL16_HIGHER = 103 # half16 (sym+add)@dtprel@higher -R_PPC64_DTPREL16_HIGHERA = 104 # half16 (sym+add)@dtprel@highera -R_PPC64_DTPREL16_HIGHEST = 105 # half16 (sym+add)@dtprel@highest -R_PPC64_DTPREL16_HIGHESTA = 106 # half16 (sym+add)@dtprel@highesta +R_PPC64_TLS = 67, # none (sym+add)@tls +R_PPC64_DTPMOD64 = 68, # doubleword64 (sym+add)@dtpmod +R_PPC64_TPREL16 = 69, # half16* (sym+add)@tprel +R_PPC64_TPREL16_LO = 70, # half16 (sym+add)@tprel@l +R_PPC64_TPREL16_HI = 71, # half16 (sym+add)@tprel@h +R_PPC64_TPREL16_HA = 72, # half16 (sym+add)@tprel@ha +R_PPC64_TPREL64 = 73, # doubleword64 (sym+add)@tprel +R_PPC64_DTPREL16 = 74, # half16* (sym+add)@dtprel +R_PPC64_DTPREL16_LO = 75, # half16 (sym+add)@dtprel@l +R_PPC64_DTPREL16_HI = 76, # half16 (sym+add)@dtprel@h +R_PPC64_DTPREL16_HA = 77, # half16 (sym+add)@dtprel@ha +R_PPC64_DTPREL64 = 78, # doubleword64 (sym+add)@dtprel +R_PPC64_GOT_TLSGD16 = 79, # half16* (sym+add)@got@tlsgd +R_PPC64_GOT_TLSGD16_LO = 80, # half16 (sym+add)@got@tlsgd@l +R_PPC64_GOT_TLSGD16_HI = 81, # half16 (sym+add)@got@tlsgd@h +R_PPC64_GOT_TLSGD16_HA = 82, # half16 (sym+add)@got@tlsgd@ha +R_PPC64_GOT_TLSLD16 = 83, # half16* (sym+add)@got@tlsld +R_PPC64_GOT_TLSLD16_LO = 84, # half16 (sym+add)@got@tlsld@l +R_PPC64_GOT_TLSLD16_HI = 85, # half16 (sym+add)@got@tlsld@h +R_PPC64_GOT_TLSLD16_HA = 86, # half16 (sym+add)@got@tlsld@ha +R_PPC64_GOT_TPREL16_DS = 87, # half16ds* (sym+add)@got@tprel +R_PPC64_GOT_TPREL16_LO_DS = 88, # half16ds (sym+add)@got@tprel@l +R_PPC64_GOT_TPREL16_HI = 89, # half16 (sym+add)@got@tprel@h +R_PPC64_GOT_TPREL16_HA = 90, # half16 (sym+add)@got@tprel@ha +R_PPC64_GOT_DTPREL16_DS = 91, # half16ds* (sym+add)@got@dtprel +R_PPC64_GOT_DTPREL16_LO_DS = 92, # half16ds (sym+add)@got@dtprel@l +R_PPC64_GOT_DTPREL16_HI = 93, # half16 (sym+add)@got@dtprel@h +R_PPC64_GOT_DTPREL16_HA = 94, # half16 (sym+add)@got@dtprel@ha +R_PPC64_TPREL16_DS = 95, # half16ds* (sym+add)@tprel +R_PPC64_TPREL16_LO_DS = 96, # half16ds (sym+add)@tprel@l +R_PPC64_TPREL16_HIGHER = 97, # half16 (sym+add)@tprel@higher +R_PPC64_TPREL16_HIGHERA = 98, # half16 (sym+add)@tprel@highera +R_PPC64_TPREL16_HIGHEST = 99, # half16 (sym+add)@tprel@highest +R_PPC64_TPREL16_HIGHESTA = 100, # half16 (sym+add)@tprel@highesta +R_PPC64_DTPREL16_DS = 101, # half16ds* (sym+add)@dtprel +R_PPC64_DTPREL16_LO_DS = 102, # half16ds (sym+add)@dtprel@l +R_PPC64_DTPREL16_HIGHER = 103, # half16 (sym+add)@dtprel@higher +R_PPC64_DTPREL16_HIGHERA = 104, # half16 (sym+add)@dtprel@highera +R_PPC64_DTPREL16_HIGHEST = 105, # half16 (sym+add)@dtprel@highest +R_PPC64_DTPREL16_HIGHESTA = 106, # half16 (sym+add)@dtprel@highesta # GNU extension to support local ifunc. -R_PPC64_JMP_IREL = 247 -R_PPC64_IRELATIVE = 248 -R_PPC64_REL16 = 249 # half16 (sym+add-.) -R_PPC64_REL16_LO = 250 # half16 (sym+add-.)@l -R_PPC64_REL16_HI = 251 # half16 (sym+add-.)@h -R_PPC64_REL16_HA = 252 # half16 (sym+add-.)@ha - -# PowerPC64 specific values for the Dyn d_tag field. -DT_PPC64_GLINK = (DT_LOPROC + 0) -DT_PPC64_OPD = (DT_LOPROC + 1) -DT_PPC64_OPDSZ = (DT_LOPROC + 2) -DT_PPC64_NUM = 3 - +R_PPC64_JMP_IREL = 247, +R_PPC64_IRELATIVE = 248, +R_PPC64_REL16 = 249, # half16 (sym+add-.) +R_PPC64_REL16_LO = 250, # half16 (sym+add-.)@l +R_PPC64_REL16_HI = 251, # half16 (sym+add-.)@h +R_PPC64_REL16_HA = 252, # half16 (sym+add-.)@ha +) + +SetConstants( # ARM relocations - -R_ARM_NONE = 0 # No reloc -R_ARM_PC24 = 1 # PC relative 26 bit branch -R_ARM_ABS32 = 2 # Direct 32 bit -R_ARM_REL32 = 3 # PC relative 32 bit -R_ARM_PC13 = 4 -R_ARM_ABS16 = 5 # Direct 16 bit -R_ARM_ABS12 = 6 # Direct 12 bit -R_ARM_THM_ABS5 = 7 -R_ARM_ABS8 = 8 # Direct 8 bit -R_ARM_SBREL32 = 9 -R_ARM_THM_PC22 = 10 -R_ARM_THM_PC8 = 11 -R_ARM_AMP_VCALL9 = 12 -R_ARM_SWI24 = 13 # Obsolete static relocation. -R_ARM_TLS_DESC = 13 # Dynamic relocation. -R_ARM_THM_SWI8 = 14 -R_ARM_XPC25 = 15 -R_ARM_THM_XPC22 = 16 -R_ARM_TLS_DTPMOD32 = 17 # ID of module containing symbol -R_ARM_TLS_DTPOFF32 = 18 # Offset in TLS block -R_ARM_TLS_TPOFF32 = 19 # Offset in static TLS block -R_ARM_COPY = 20 # Copy symbol at runtime -R_ARM_GLOB_DAT = 21 # Create GOT entry -R_ARM_JUMP_SLOT = 22 # Create PLT entry -R_ARM_RELATIVE = 23 # Adjust by program base -R_ARM_GOTOFF = 24 # 32 bit offset to GOT -R_ARM_GOTPC = 25 # 32 bit PC relative offset to GOT -R_ARM_GOT32 = 26 # 32 bit GOT entry -R_ARM_PLT32 = 27 # 32 bit PLT address -R_ARM_ALU_PCREL_7_0 = 32 -R_ARM_ALU_PCREL_15_8 = 33 -R_ARM_ALU_PCREL_23_15 = 34 -R_ARM_LDR_SBREL_11_0 = 35 -R_ARM_ALU_SBREL_19_12 = 36 -R_ARM_ALU_SBREL_27_20 = 37 -R_ARM_TLS_GOTDESC = 90 -R_ARM_TLS_CALL = 91 -R_ARM_TLS_DESCSEQ = 92 -R_ARM_THM_TLS_CALL = 93 -R_ARM_GNU_VTENTRY = 100 -R_ARM_GNU_VTINHERIT = 101 -R_ARM_THM_PC11 = 102 # thumb unconditional branch -R_ARM_THM_PC9 = 103 # thumb conditional branch -R_ARM_TLS_GD32 = 104 # PC-rel 32 bit for global dynamic thread local data -R_ARM_TLS_LDM32 = 105 # PC-rel 32 bit for local dynamic thread local data -R_ARM_TLS_LDO32 = 106 # 32 bit offset relative to TLS block -R_ARM_TLS_IE32 = 107 # PC-rel 32 bit for GOT entry of static TLS block offset -R_ARM_TLS_LE32 = 108 # 32 bit offset relative to static TLS block -R_ARM_THM_TLS_DESCSEQ = 129 -R_ARM_IRELATIVE = 160 -R_ARM_RXPC25 = 249 -R_ARM_RSBREL32 = 250 -R_ARM_THM_RPC22 = 251 -R_ARM_RREL32 = 252 -R_ARM_RABS22 = 253 -R_ARM_RPC24 = 254 -R_ARM_RBASE = 255 +R_ARM_NONE = 0, # No reloc +R_ARM_PC24 = 1, # PC relative 26 bit branch +R_ARM_ABS32 = 2, # Direct 32 bit +R_ARM_REL32 = 3, # PC relative 32 bit +R_ARM_PC13 = 4, +R_ARM_ABS16 = 5, # Direct 16 bit +R_ARM_ABS12 = 6, # Direct 12 bit +R_ARM_THM_ABS5 = 7, +R_ARM_ABS8 = 8, # Direct 8 bit +R_ARM_SBREL32 = 9, +R_ARM_THM_PC22 = 10, +R_ARM_THM_PC8 = 11, +R_ARM_AMP_VCALL9 = 12, +R_ARM_SWI24 = 13, # Obsolete static relocation. +R_ARM_TLS_DESC = 13, # Dynamic relocation. +R_ARM_THM_SWI8 = 14, +R_ARM_XPC25 = 15, +R_ARM_THM_XPC22 = 16, +R_ARM_TLS_DTPMOD32 = 17, # ID of module containing symbol +R_ARM_TLS_DTPOFF32 = 18, # Offset in TLS block +R_ARM_TLS_TPOFF32 = 19, # Offset in static TLS block +R_ARM_COPY = 20, # Copy symbol at runtime +R_ARM_GLOB_DAT = 21, # Create GOT entry +R_ARM_JUMP_SLOT = 22, # Create PLT entry +R_ARM_RELATIVE = 23, # Adjust by program base +R_ARM_GOTOFF = 24, # 32 bit offset to GOT +R_ARM_GOTPC = 25, # 32 bit PC relative offset to GOT +R_ARM_GOT32 = 26, # 32 bit GOT entry +R_ARM_PLT32 = 27, # 32 bit PLT address +R_ARM_ALU_PCREL_7_0 = 32, +R_ARM_ALU_PCREL_15_8 = 33, +R_ARM_ALU_PCREL_23_15 = 34, +R_ARM_LDR_SBREL_11_0 = 35, +R_ARM_ALU_SBREL_19_12 = 36, +R_ARM_ALU_SBREL_27_20 = 37, +R_ARM_TLS_GOTDESC = 90, +R_ARM_TLS_CALL = 91, +R_ARM_TLS_DESCSEQ = 92, +R_ARM_THM_TLS_CALL = 93, +R_ARM_GNU_VTENTRY = 100, +R_ARM_GNU_VTINHERIT = 101, +R_ARM_THM_PC11 = 102, # thumb unconditional branch +R_ARM_THM_PC9 = 103, # thumb conditional branch +R_ARM_TLS_GD32 = 104, # PC-rel 32 bit for global dynamic thread local data +R_ARM_TLS_LDM32 = 105, # PC-rel 32 bit for local dynamic thread local data +R_ARM_TLS_LDO32 = 106, # 32 bit offset relative to TLS block +R_ARM_TLS_IE32 = 107, # PC-rel 32 bit for GOT entry of static TLS block offset +R_ARM_TLS_LE32 = 108, # 32 bit offset relative to static TLS block +R_ARM_THM_TLS_DESCSEQ = 129, +R_ARM_IRELATIVE = 160, +R_ARM_RXPC25 = 249, +R_ARM_RSBREL32 = 250, +R_ARM_THM_RPC22 = 251, +R_ARM_RREL32 = 252, +R_ARM_RABS22 = 253, +R_ARM_RPC24 = 254, +R_ARM_RBASE = 255, # Keep this the last entry. -R_ARM_NUM = 256 +R_ARM_NUM = 256, +no_name = ('R_ARM_SWI24',) +) +SetConstants( # IA-64 relocations - -R_IA64_NONE = 0x00 # none -R_IA64_IMM14 = 0x21 # symbol + addend, add imm14 -R_IA64_IMM22 = 0x22 # symbol + addend, add imm22 -R_IA64_IMM64 = 0x23 # symbol + addend, mov imm64 -R_IA64_DIR32MSB = 0x24 # symbol + addend, data4 MSB -R_IA64_DIR32LSB = 0x25 # symbol + addend, data4 LSB -R_IA64_DIR64MSB = 0x26 # symbol + addend, data8 MSB -R_IA64_DIR64LSB = 0x27 # symbol + addend, data8 LSB -R_IA64_GPREL22 = 0x2a # @gprel(sym + add), add imm22 -R_IA64_GPREL64I = 0x2b # @gprel(sym + add), mov imm64 -R_IA64_GPREL32MSB = 0x2c # @gprel(sym + add), data4 MSB -R_IA64_GPREL32LSB = 0x2d # @gprel(sym + add), data4 LSB -R_IA64_GPREL64MSB = 0x2e # @gprel(sym + add), data8 MSB -R_IA64_GPREL64LSB = 0x2f # @gprel(sym + add), data8 LSB -R_IA64_LTOFF22 = 0x32 # @ltoff(sym + add), add imm22 -R_IA64_LTOFF64I = 0x33 # @ltoff(sym + add), mov imm64 -R_IA64_PLTOFF22 = 0x3a # @pltoff(sym + add), add imm22 -R_IA64_PLTOFF64I = 0x3b # @pltoff(sym + add), mov imm64 -R_IA64_PLTOFF64MSB = 0x3e # @pltoff(sym + add), data8 MSB -R_IA64_PLTOFF64LSB = 0x3f # @pltoff(sym + add), data8 LSB -R_IA64_FPTR64I = 0x43 # @fptr(sym + add), mov imm64 -R_IA64_FPTR32MSB = 0x44 # @fptr(sym + add), data4 MSB -R_IA64_FPTR32LSB = 0x45 # @fptr(sym + add), data4 LSB -R_IA64_FPTR64MSB = 0x46 # @fptr(sym + add), data8 MSB -R_IA64_FPTR64LSB = 0x47 # @fptr(sym + add), data8 LSB -R_IA64_PCREL60B = 0x48 # @pcrel(sym + add), brl -R_IA64_PCREL21B = 0x49 # @pcrel(sym + add), ptb, call -R_IA64_PCREL21M = 0x4a # @pcrel(sym + add), chk.s -R_IA64_PCREL21F = 0x4b # @pcrel(sym + add), fchkf -R_IA64_PCREL32MSB = 0x4c # @pcrel(sym + add), data4 MSB -R_IA64_PCREL32LSB = 0x4d # @pcrel(sym + add), data4 LSB -R_IA64_PCREL64MSB = 0x4e # @pcrel(sym + add), data8 MSB -R_IA64_PCREL64LSB = 0x4f # @pcrel(sym + add), data8 LSB -R_IA64_LTOFF_FPTR22 = 0x52 # @ltoff(@fptr(s+a)), imm22 -R_IA64_LTOFF_FPTR64I = 0x53 # @ltoff(@fptr(s+a)), imm64 -R_IA64_LTOFF_FPTR32MSB = 0x54 # @ltoff(@fptr(s+a)), data4 MSB -R_IA64_LTOFF_FPTR32LSB = 0x55 # @ltoff(@fptr(s+a)), data4 LSB -R_IA64_LTOFF_FPTR64MSB = 0x56 # @ltoff(@fptr(s+a)), data8 MSB -R_IA64_LTOFF_FPTR64LSB = 0x57 # @ltoff(@fptr(s+a)), data8 LSB -R_IA64_SEGREL32MSB = 0x5c # @segrel(sym + add), data4 MSB -R_IA64_SEGREL32LSB = 0x5d # @segrel(sym + add), data4 LSB -R_IA64_SEGREL64MSB = 0x5e # @segrel(sym + add), data8 MSB -R_IA64_SEGREL64LSB = 0x5f # @segrel(sym + add), data8 LSB -R_IA64_SECREL32MSB = 0x64 # @secrel(sym + add), data4 MSB -R_IA64_SECREL32LSB = 0x65 # @secrel(sym + add), data4 LSB -R_IA64_SECREL64MSB = 0x66 # @secrel(sym + add), data8 MSB -R_IA64_SECREL64LSB = 0x67 # @secrel(sym + add), data8 LSB -R_IA64_REL32MSB = 0x6c # data 4 + REL -R_IA64_REL32LSB = 0x6d # data 4 + REL -R_IA64_REL64MSB = 0x6e # data 8 + REL -R_IA64_REL64LSB = 0x6f # data 8 + REL -R_IA64_LTV32MSB = 0x74 # symbol + addend, data4 MSB -R_IA64_LTV32LSB = 0x75 # symbol + addend, data4 LSB -R_IA64_LTV64MSB = 0x76 # symbol + addend, data8 MSB -R_IA64_LTV64LSB = 0x77 # symbol + addend, data8 LSB -R_IA64_PCREL21BI = 0x79 # @pcrel(sym + add), 21bit inst -R_IA64_PCREL22 = 0x7a # @pcrel(sym + add), 22bit inst -R_IA64_PCREL64I = 0x7b # @pcrel(sym + add), 64bit inst -R_IA64_IPLTMSB = 0x80 # dynamic reloc, imported PLT, MSB -R_IA64_IPLTLSB = 0x81 # dynamic reloc, imported PLT, LSB -R_IA64_COPY = 0x84 # copy relocation -R_IA64_SUB = 0x85 # Addend and symbol difference -R_IA64_LTOFF22X = 0x86 # LTOFF22, relaxable. -R_IA64_LDXMOV = 0x87 # Use of LTOFF22X. -R_IA64_TPREL14 = 0x91 # @tprel(sym + add), imm14 -R_IA64_TPREL22 = 0x92 # @tprel(sym + add), imm22 -R_IA64_TPREL64I = 0x93 # @tprel(sym + add), imm64 -R_IA64_TPREL64MSB = 0x96 # @tprel(sym + add), data8 MSB -R_IA64_TPREL64LSB = 0x97 # @tprel(sym + add), data8 LSB -R_IA64_LTOFF_TPREL22 = 0x9a # @ltoff(@tprel(s+a)), imm2 -R_IA64_DTPMOD64MSB = 0xa6 # @dtpmod(sym + add), data8 MSB -R_IA64_DTPMOD64LSB = 0xa7 # @dtpmod(sym + add), data8 LSB -R_IA64_LTOFF_DTPMOD22 = 0xaa # @ltoff(@dtpmod(sym + add)), imm22 -R_IA64_DTPREL14 = 0xb1 # @dtprel(sym + add), imm14 -R_IA64_DTPREL22 = 0xb2 # @dtprel(sym + add), imm22 -R_IA64_DTPREL64I = 0xb3 # @dtprel(sym + add), imm64 -R_IA64_DTPREL32MSB = 0xb4 # @dtprel(sym + add), data4 MSB -R_IA64_DTPREL32LSB = 0xb5 # @dtprel(sym + add), data4 LSB -R_IA64_DTPREL64MSB = 0xb6 # @dtprel(sym + add), data8 MSB -R_IA64_DTPREL64LSB = 0xb7 # @dtprel(sym + add), data8 LSB -R_IA64_LTOFF_DTPREL22 = 0xba # @ltoff(@dtprel(s+a)), imm22 - +R_IA64_NONE = 0x00, # none +R_IA64_IMM14 = 0x21, # symbol + addend, add imm14 +R_IA64_IMM22 = 0x22, # symbol + addend, add imm22 +R_IA64_IMM64 = 0x23, # symbol + addend, mov imm64 +R_IA64_DIR32MSB = 0x24, # symbol + addend, data4 MSB +R_IA64_DIR32LSB = 0x25, # symbol + addend, data4 LSB +R_IA64_DIR64MSB = 0x26, # symbol + addend, data8 MSB +R_IA64_DIR64LSB = 0x27, # symbol + addend, data8 LSB +R_IA64_GPREL22 = 0x2a, # @gprel(sym + add), add imm22 +R_IA64_GPREL64I = 0x2b, # @gprel(sym + add), mov imm64 +R_IA64_GPREL32MSB = 0x2c, # @gprel(sym + add), data4 MSB +R_IA64_GPREL32LSB = 0x2d, # @gprel(sym + add), data4 LSB +R_IA64_GPREL64MSB = 0x2e, # @gprel(sym + add), data8 MSB +R_IA64_GPREL64LSB = 0x2f, # @gprel(sym + add), data8 LSB +R_IA64_LTOFF22 = 0x32, # @ltoff(sym + add), add imm22 +R_IA64_LTOFF64I = 0x33, # @ltoff(sym + add), mov imm64 +R_IA64_PLTOFF22 = 0x3a, # @pltoff(sym + add), add imm22 +R_IA64_PLTOFF64I = 0x3b, # @pltoff(sym + add), mov imm64 +R_IA64_PLTOFF64MSB = 0x3e, # @pltoff(sym + add), data8 MSB +R_IA64_PLTOFF64LSB = 0x3f, # @pltoff(sym + add), data8 LSB +R_IA64_FPTR64I = 0x43, # @fptr(sym + add), mov imm64 +R_IA64_FPTR32MSB = 0x44, # @fptr(sym + add), data4 MSB +R_IA64_FPTR32LSB = 0x45, # @fptr(sym + add), data4 LSB +R_IA64_FPTR64MSB = 0x46, # @fptr(sym + add), data8 MSB +R_IA64_FPTR64LSB = 0x47, # @fptr(sym + add), data8 LSB +R_IA64_PCREL60B = 0x48, # @pcrel(sym + add), brl +R_IA64_PCREL21B = 0x49, # @pcrel(sym + add), ptb, call +R_IA64_PCREL21M = 0x4a, # @pcrel(sym + add), chk.s +R_IA64_PCREL21F = 0x4b, # @pcrel(sym + add), fchkf +R_IA64_PCREL32MSB = 0x4c, # @pcrel(sym + add), data4 MSB +R_IA64_PCREL32LSB = 0x4d, # @pcrel(sym + add), data4 LSB +R_IA64_PCREL64MSB = 0x4e, # @pcrel(sym + add), data8 MSB +R_IA64_PCREL64LSB = 0x4f, # @pcrel(sym + add), data8 LSB +R_IA64_LTOFF_FPTR22 = 0x52, # @ltoff(@fptr(s+a)), imm22 +R_IA64_LTOFF_FPTR64I = 0x53, # @ltoff(@fptr(s+a)), imm64 +R_IA64_LTOFF_FPTR32MSB = 0x54, # @ltoff(@fptr(s+a)), data4 MSB +R_IA64_LTOFF_FPTR32LSB = 0x55, # @ltoff(@fptr(s+a)), data4 LSB +R_IA64_LTOFF_FPTR64MSB = 0x56, # @ltoff(@fptr(s+a)), data8 MSB +R_IA64_LTOFF_FPTR64LSB = 0x57, # @ltoff(@fptr(s+a)), data8 LSB +R_IA64_SEGREL32MSB = 0x5c, # @segrel(sym + add), data4 MSB +R_IA64_SEGREL32LSB = 0x5d, # @segrel(sym + add), data4 LSB +R_IA64_SEGREL64MSB = 0x5e, # @segrel(sym + add), data8 MSB +R_IA64_SEGREL64LSB = 0x5f, # @segrel(sym + add), data8 LSB +R_IA64_SECREL32MSB = 0x64, # @secrel(sym + add), data4 MSB +R_IA64_SECREL32LSB = 0x65, # @secrel(sym + add), data4 LSB +R_IA64_SECREL64MSB = 0x66, # @secrel(sym + add), data8 MSB +R_IA64_SECREL64LSB = 0x67, # @secrel(sym + add), data8 LSB +R_IA64_REL32MSB = 0x6c, # data 4 + REL +R_IA64_REL32LSB = 0x6d, # data 4 + REL +R_IA64_REL64MSB = 0x6e, # data 8 + REL +R_IA64_REL64LSB = 0x6f, # data 8 + REL +R_IA64_LTV32MSB = 0x74, # symbol + addend, data4 MSB +R_IA64_LTV32LSB = 0x75, # symbol + addend, data4 LSB +R_IA64_LTV64MSB = 0x76, # symbol + addend, data8 MSB +R_IA64_LTV64LSB = 0x77, # symbol + addend, data8 LSB +R_IA64_PCREL21BI = 0x79, # @pcrel(sym + add), 21bit inst +R_IA64_PCREL22 = 0x7a, # @pcrel(sym + add), 22bit inst +R_IA64_PCREL64I = 0x7b, # @pcrel(sym + add), 64bit inst +R_IA64_IPLTMSB = 0x80, # dynamic reloc, imported PLT, MSB +R_IA64_IPLTLSB = 0x81, # dynamic reloc, imported PLT, LSB +R_IA64_COPY = 0x84, # copy relocation +R_IA64_SUB = 0x85, # Addend and symbol difference +R_IA64_LTOFF22X = 0x86, # LTOFF22, relaxable. +R_IA64_LDXMOV = 0x87, # Use of LTOFF22X. +R_IA64_TPREL14 = 0x91, # @tprel(sym + add), imm14 +R_IA64_TPREL22 = 0x92, # @tprel(sym + add), imm22 +R_IA64_TPREL64I = 0x93, # @tprel(sym + add), imm64 +R_IA64_TPREL64MSB = 0x96, # @tprel(sym + add), data8 MSB +R_IA64_TPREL64LSB = 0x97, # @tprel(sym + add), data8 LSB +R_IA64_LTOFF_TPREL22 = 0x9a, # @ltoff(@tprel(s+a)), imm2 +R_IA64_DTPMOD64MSB = 0xa6, # @dtpmod(sym + add), data8 MSB +R_IA64_DTPMOD64LSB = 0xa7, # @dtpmod(sym + add), data8 LSB +R_IA64_LTOFF_DTPMOD22 = 0xaa, # @ltoff(@dtpmod(sym + add)), imm22 +R_IA64_DTPREL14 = 0xb1, # @dtprel(sym + add), imm14 +R_IA64_DTPREL22 = 0xb2, # @dtprel(sym + add), imm22 +R_IA64_DTPREL64I = 0xb3, # @dtprel(sym + add), imm64 +R_IA64_DTPREL32MSB = 0xb4, # @dtprel(sym + add), data4 MSB +R_IA64_DTPREL32LSB = 0xb5, # @dtprel(sym + add), data4 LSB +R_IA64_DTPREL64MSB = 0xb6, # @dtprel(sym + add), data8 MSB +R_IA64_DTPREL64LSB = 0xb7, # @dtprel(sym + add), data8 LSB +R_IA64_LTOFF_DTPREL22 = 0xba, # @ltoff(@dtprel(s+a)), imm22 +) + +SetConstants( # SH relocations - -R_SH_NONE = 0 -R_SH_DIR32 = 1 -R_SH_REL32 = 2 -R_SH_DIR8WPN = 3 -R_SH_IND12W = 4 -R_SH_DIR8WPL = 5 -R_SH_DIR8WPZ = 6 -R_SH_DIR8BP = 7 -R_SH_DIR8W = 8 -R_SH_DIR8L = 9 -R_SH_SWITCH16 = 25 -R_SH_SWITCH32 = 26 -R_SH_USES = 27 -R_SH_COUNT = 28 -R_SH_ALIGN = 29 -R_SH_CODE = 30 -R_SH_DATA = 31 -R_SH_LABEL = 32 -R_SH_SWITCH8 = 33 -R_SH_GNU_VTINHERIT = 34 -R_SH_GNU_VTENTRY = 35 -R_SH_TLS_GD_32 = 144 -R_SH_TLS_LD_32 = 145 -R_SH_TLS_LDO_32 = 146 -R_SH_TLS_IE_32 = 147 -R_SH_TLS_LE_32 = 148 -R_SH_TLS_DTPMOD32 = 149 -R_SH_TLS_DTPOFF32 = 150 -R_SH_TLS_TPOFF32 = 151 -R_SH_GOT32 = 160 -R_SH_PLT32 = 161 -R_SH_COPY = 162 -R_SH_GLOB_DAT = 163 -R_SH_JMP_SLOT = 164 -R_SH_RELATIVE = 165 -R_SH_GOTOFF = 166 -R_SH_GOTPC = 167 +R_SH_NONE = 0, +R_SH_DIR32 = 1, +R_SH_REL32 = 2, +R_SH_DIR8WPN = 3, +R_SH_IND12W = 4, +R_SH_DIR8WPL = 5, +R_SH_DIR8WPZ = 6, +R_SH_DIR8BP = 7, +R_SH_DIR8W = 8, +R_SH_DIR8L = 9, +R_SH_SWITCH16 = 25, +R_SH_SWITCH32 = 26, +R_SH_USES = 27, +R_SH_COUNT = 28, +R_SH_ALIGN = 29, +R_SH_CODE = 30, +R_SH_DATA = 31, +R_SH_LABEL = 32, +R_SH_SWITCH8 = 33, +R_SH_GNU_VTINHERIT = 34, +R_SH_GNU_VTENTRY = 35, +R_SH_TLS_GD_32 = 144, +R_SH_TLS_LD_32 = 145, +R_SH_TLS_LDO_32 = 146, +R_SH_TLS_IE_32 = 147, +R_SH_TLS_LE_32 = 148, +R_SH_TLS_DTPMOD32 = 149, +R_SH_TLS_DTPOFF32 = 150, +R_SH_TLS_TPOFF32 = 151, +R_SH_GOT32 = 160, +R_SH_PLT32 = 161, +R_SH_COPY = 162, +R_SH_GLOB_DAT = 163, +R_SH_JMP_SLOT = 164, +R_SH_RELATIVE = 165, +R_SH_GOTOFF = 166, +R_SH_GOTPC = 167, # Keep this the last entry. -R_SH_NUM = 256 +R_SH_NUM = 256, +) +SetConstants( # S/390 relocations - -R_390_NONE = 0 # No reloc. -R_390_8 = 1 # Direct 8 bit. -R_390_12 = 2 # Direct 12 bit. -R_390_16 = 3 # Direct 16 bit. -R_390_32 = 4 # Direct 32 bit. -R_390_PC32 = 5 # PC relative 32 bit. -R_390_GOT12 = 6 # 12 bit GOT offset. -R_390_GOT32 = 7 # 32 bit GOT offset. -R_390_PLT32 = 8 # 32 bit PC relative PLT address. -R_390_COPY = 9 # Copy symbol at runtime. -R_390_GLOB_DAT = 10 # Create GOT entry. -R_390_JMP_SLOT = 11 # Create PLT entry. -R_390_RELATIVE = 12 # Adjust by program base. -R_390_GOTOFF32 = 13 # 32 bit offset to GOT. -R_390_GOTPC = 14 # 32 bit PC relative offset to GOT. -R_390_GOT16 = 15 # 16 bit GOT offset. -R_390_PC16 = 16 # PC relative 16 bit. -R_390_PC16DBL = 17 # PC relative 16 bit shifted by 1. -R_390_PLT16DBL = 18 # 16 bit PC rel. PLT shifted by 1. -R_390_PC32DBL = 19 # PC relative 32 bit shifted by 1. -R_390_PLT32DBL = 20 # 32 bit PC rel. PLT shifted by 1. -R_390_GOTPCDBL = 21 # 32 bit PC rel. GOT shifted by 1. -R_390_64 = 22 # Direct 64 bit. -R_390_PC64 = 23 # PC relative 64 bit. -R_390_GOT64 = 24 # 64 bit GOT offset. -R_390_PLT64 = 25 # 64 bit PC relative PLT address. -R_390_GOTENT = 26 # 32 bit PC rel. to GOT entry >> 1. -R_390_GOTOFF16 = 27 # 16 bit offset to GOT. -R_390_GOTOFF64 = 28 # 64 bit offset to GOT. -R_390_GOTPLT12 = 29 # 12 bit offset to jump slot. -R_390_GOTPLT16 = 30 # 16 bit offset to jump slot. -R_390_GOTPLT32 = 31 # 32 bit offset to jump slot. -R_390_GOTPLT64 = 32 # 64 bit offset to jump slot. -R_390_GOTPLTENT = 33 # 32 bit rel. offset to jump slot. -R_390_PLTOFF16 = 34 # 16 bit offset from GOT to PLT. -R_390_PLTOFF32 = 35 # 32 bit offset from GOT to PLT. -R_390_PLTOFF64 = 36 # 16 bit offset from GOT to PLT. -R_390_TLS_LOAD = 37 # Tag for load insn in TLS code. -R_390_TLS_GDCALL = 38 # Tag for function call in general dynamic TLS code. -R_390_TLS_LDCALL = 39 # Tag for function call in local dynamic TLS code. -R_390_TLS_GD32 = 40 # Direct 32 bit for general dynamic thread local data. -R_390_TLS_GD64 = 41 # Direct 64 bit for general dynamic thread local data. -R_390_TLS_GOTIE12 = 42 # 12 bit GOT offset for static TLS block offset. -R_390_TLS_GOTIE32 = 43 # 32 bit GOT offset for static TLS block offset. -R_390_TLS_GOTIE64 = 44 # 64 bit GOT offset for static TLS block offset. -R_390_TLS_LDM32 = 45 # Direct 32 bit for local dynamic thread local data in LE code. -R_390_TLS_LDM64 = 46 # Direct 64 bit for local dynamic thread local data in LE code. -R_390_TLS_IE32 = 47 # 32 bit address of GOT entry for negated static TLS block offset. -R_390_TLS_IE64 = 48 # 64 bit address of GOT entry for negated static TLS block offset. -R_390_TLS_IEENT = 49 # 32 bit rel. offset to GOT entry for negated static TLS block offset. -R_390_TLS_LE32 = 50 # 32 bit negated offset relative to static TLS block. -R_390_TLS_LE64 = 51 # 64 bit negated offset relative to static TLS block. -R_390_TLS_LDO32 = 52 # 32 bit offset relative to TLS block. -R_390_TLS_LDO64 = 53 # 64 bit offset relative to TLS block. -R_390_TLS_DTPMOD = 54 # ID of module containing symbol. -R_390_TLS_DTPOFF = 55 # Offset in TLS block. -R_390_TLS_TPOFF = 56 # Negated offset in static TLS block. -R_390_20 = 57 # Direct 20 bit. -R_390_GOT20 = 58 # 20 bit GOT offset. -R_390_GOTPLT20 = 59 # 20 bit offset to jump slot. -R_390_TLS_GOTIE20 = 60 # 20 bit GOT offset for static TLS block offset. +R_390_NONE = 0, # No reloc. +R_390_8 = 1, # Direct 8 bit. +R_390_12 = 2, # Direct 12 bit. +R_390_16 = 3, # Direct 16 bit. +R_390_32 = 4, # Direct 32 bit. +R_390_PC32 = 5, # PC relative 32 bit. +R_390_GOT12 = 6, # 12 bit GOT offset. +R_390_GOT32 = 7, # 32 bit GOT offset. +R_390_PLT32 = 8, # 32 bit PC relative PLT address. +R_390_COPY = 9, # Copy symbol at runtime. +R_390_GLOB_DAT = 10, # Create GOT entry. +R_390_JMP_SLOT = 11, # Create PLT entry. +R_390_RELATIVE = 12, # Adjust by program base. +R_390_GOTOFF32 = 13, # 32 bit offset to GOT. +R_390_GOTPC = 14, # 32 bit PC relative offset to GOT. +R_390_GOT16 = 15, # 16 bit GOT offset. +R_390_PC16 = 16, # PC relative 16 bit. +R_390_PC16DBL = 17, # PC relative 16 bit shifted by 1. +R_390_PLT16DBL = 18, # 16 bit PC rel. PLT shifted by 1. +R_390_PC32DBL = 19, # PC relative 32 bit shifted by 1. +R_390_PLT32DBL = 20, # 32 bit PC rel. PLT shifted by 1. +R_390_GOTPCDBL = 21, # 32 bit PC rel. GOT shifted by 1. +R_390_64 = 22, # Direct 64 bit. +R_390_PC64 = 23, # PC relative 64 bit. +R_390_GOT64 = 24, # 64 bit GOT offset. +R_390_PLT64 = 25, # 64 bit PC relative PLT address. +R_390_GOTENT = 26, # 32 bit PC rel. to GOT entry >> 1. +R_390_GOTOFF16 = 27, # 16 bit offset to GOT. +R_390_GOTOFF64 = 28, # 64 bit offset to GOT. +R_390_GOTPLT12 = 29, # 12 bit offset to jump slot. +R_390_GOTPLT16 = 30, # 16 bit offset to jump slot. +R_390_GOTPLT32 = 31, # 32 bit offset to jump slot. +R_390_GOTPLT64 = 32, # 64 bit offset to jump slot. +R_390_GOTPLTENT = 33, # 32 bit rel. offset to jump slot. +R_390_PLTOFF16 = 34, # 16 bit offset from GOT to PLT. +R_390_PLTOFF32 = 35, # 32 bit offset from GOT to PLT. +R_390_PLTOFF64 = 36, # 16 bit offset from GOT to PLT. +R_390_TLS_LOAD = 37, # Tag for load insn in TLS code. +R_390_TLS_GDCALL = 38, # Tag for function call in general dynamic TLS code. +R_390_TLS_LDCALL = 39, # Tag for function call in local dynamic TLS code. +R_390_TLS_GD32 = 40, # Direct 32 bit for general dynamic thread local data. +R_390_TLS_GD64 = 41, # Direct 64 bit for general dynamic thread local data. +R_390_TLS_GOTIE12 = 42, # 12 bit GOT offset for static TLS block offset. +R_390_TLS_GOTIE32 = 43, # 32 bit GOT offset for static TLS block offset. +R_390_TLS_GOTIE64 = 44, # 64 bit GOT offset for static TLS block offset. +R_390_TLS_LDM32 = 45, # Direct 32 bit for local dynamic thread local data in LE code. +R_390_TLS_LDM64 = 46, # Direct 64 bit for local dynamic thread local data in LE code. +R_390_TLS_IE32 = 47, # 32 bit address of GOT entry for negated static TLS block offset. +R_390_TLS_IE64 = 48, # 64 bit address of GOT entry for negated static TLS block offset. +R_390_TLS_IEENT = 49, # 32 bit rel. offset to GOT entry for negated static TLS block offset. +R_390_TLS_LE32 = 50, # 32 bit negated offset relative to static TLS block. +R_390_TLS_LE64 = 51, # 64 bit negated offset relative to static TLS block. +R_390_TLS_LDO32 = 52, # 32 bit offset relative to TLS block. +R_390_TLS_LDO64 = 53, # 64 bit offset relative to TLS block. +R_390_TLS_DTPMOD = 54, # ID of module containing symbol. +R_390_TLS_DTPOFF = 55, # Offset in TLS block. +R_390_TLS_TPOFF = 56, # Negated offset in static TLS block. +R_390_20 = 57, # Direct 20 bit. +R_390_GOT20 = 58, # 20 bit GOT offset. +R_390_GOTPLT20 = 59, # 20 bit offset to jump slot. +R_390_TLS_GOTIE20 = 60, # 20 bit GOT offset for static TLS block offset. # Keep this the last entry. -R_390_NUM = 61 +R_390_NUM = 61, +) +SetConstants( # CRIS relocations. -R_CRIS_NONE = 0 -R_CRIS_8 = 1 -R_CRIS_16 = 2 -R_CRIS_32 = 3 -R_CRIS_8_PCREL = 4 -R_CRIS_16_PCREL = 5 -R_CRIS_32_PCREL = 6 -R_CRIS_GNU_VTINHERIT = 7 -R_CRIS_GNU_VTENTRY = 8 -R_CRIS_COPY = 9 -R_CRIS_GLOB_DAT = 10 -R_CRIS_JUMP_SLOT = 11 -R_CRIS_RELATIVE = 12 -R_CRIS_16_GOT = 13 -R_CRIS_32_GOT = 14 -R_CRIS_16_GOTPLT = 15 -R_CRIS_32_GOTPLT = 16 -R_CRIS_32_GOTREL = 17 -R_CRIS_32_PLT_GOTREL = 18 -R_CRIS_32_PLT_PCREL = 19 - -R_CRIS_NUM = 20 - - +R_CRIS_NONE = 0, +R_CRIS_8 = 1, +R_CRIS_16 = 2, +R_CRIS_32 = 3, +R_CRIS_8_PCREL = 4, +R_CRIS_16_PCREL = 5, +R_CRIS_32_PCREL = 6, +R_CRIS_GNU_VTINHERIT = 7, +R_CRIS_GNU_VTENTRY = 8, +R_CRIS_COPY = 9, +R_CRIS_GLOB_DAT = 10, +R_CRIS_JUMP_SLOT = 11, +R_CRIS_RELATIVE = 12, +R_CRIS_16_GOT = 13, +R_CRIS_32_GOT = 14, +R_CRIS_16_GOTPLT = 15, +R_CRIS_32_GOTPLT = 16, +R_CRIS_32_GOTREL = 17, +R_CRIS_32_PLT_GOTREL = 18, +R_CRIS_32_PLT_PCREL = 19, +R_CRIS_NUM = 20, +) + +SetConstants( # AMD x86-64 relocations. -R_X86_64_NONE = 0 # No reloc -R_X86_64_64 = 1 # Direct 64 bit -R_X86_64_PC32 = 2 # PC relative 32 bit signed -R_X86_64_GOT32 = 3 # 32 bit GOT entry -R_X86_64_PLT32 = 4 # 32 bit PLT address -R_X86_64_COPY = 5 # Copy symbol at runtime -R_X86_64_GLOB_DAT = 6 # Create GOT entry -R_X86_64_JUMP_SLOT = 7 # Create PLT entry -R_X86_64_RELATIVE = 8 # Adjust by program base -R_X86_64_GOTPCREL = 9 # 32 bit signed PC relative offset to GOT -R_X86_64_32 = 10 # Direct 32 bit zero extended -R_X86_64_32S = 11 # Direct 32 bit sign extended -R_X86_64_16 = 12 # Direct 16 bit zero extended -R_X86_64_PC16 = 13 # 16 bit sign extended pc relative -R_X86_64_8 = 14 # Direct 8 bit sign extended -R_X86_64_PC8 = 15 # 8 bit sign extended pc relative -R_X86_64_DTPMOD64 = 16 # ID of module containing symbol -R_X86_64_DTPOFF64 = 17 # Offset in module's TLS block -R_X86_64_TPOFF64 = 18 # Offset in initial TLS block -R_X86_64_TLSGD = 19 # 32 bit signed PC relative offset to two GOT entries for GD symbol -R_X86_64_TLSLD = 20 # 32 bit signed PC relative offset to two GOT entries for LD symbol -R_X86_64_DTPOFF32 = 21 # Offset in TLS block -R_X86_64_GOTTPOFF = 22 # 32 bit signed PC relative offset to GOT entry for IE symbol -R_X86_64_TPOFF32 = 23 # Offset in initial TLS block -R_X86_64_PC64 = 24 # PC relative 64 bit -R_X86_64_GOTOFF64 = 25 # 64 bit offset to GOT -R_X86_64_GOTPC32 = 26 # 32 bit signed pc relative offset to GOT -R_X86_64_GOT64 = 27 # 64-bit GOT entry offset -R_X86_64_GOTPCREL64 = 28 # 64-bit PC relative offset to GOT entry -R_X86_64_GOTPC64 = 29 # 64-bit PC relative offset to GOT -R_X86_64_GOTPLT64 = 30 # like GOT64, says PLT entry needed -R_X86_64_PLTOFF64 = 31 # 64-bit GOT relative offset to PLT entry -R_X86_64_SIZE32 = 32 # Size of symbol plus 32-bit addend -R_X86_64_SIZE64 = 33 # Size of symbol plus 64-bit addend -R_X86_64_GOTPC32_TLSDESC = 34 # GOT offset for TLS descriptor. -R_X86_64_TLSDESC_CALL = 35 # Marker for call through TLS descriptor. -R_X86_64_TLSDESC = 36 # TLS descriptor. -R_X86_64_IRELATIVE = 37 # Adjust indirectly by program base - -R_X86_64_NUM = 38 - - +R_X86_64_NONE = 0, # No reloc +R_X86_64_64 = 1, # Direct 64 bit +R_X86_64_PC32 = 2, # PC relative 32 bit signed +R_X86_64_GOT32 = 3, # 32 bit GOT entry +R_X86_64_PLT32 = 4, # 32 bit PLT address +R_X86_64_COPY = 5, # Copy symbol at runtime +R_X86_64_GLOB_DAT = 6, # Create GOT entry +R_X86_64_JUMP_SLOT = 7, # Create PLT entry +R_X86_64_RELATIVE = 8, # Adjust by program base +R_X86_64_GOTPCREL = 9, # 32 bit signed PC relative offset to GOT +R_X86_64_32 = 10, # Direct 32 bit zero extended +R_X86_64_32S = 11, # Direct 32 bit sign extended +R_X86_64_16 = 12, # Direct 16 bit zero extended +R_X86_64_PC16 = 13, # 16 bit sign extended pc relative +R_X86_64_8 = 14, # Direct 8 bit sign extended +R_X86_64_PC8 = 15, # 8 bit sign extended pc relative +R_X86_64_DTPMOD64 = 16, # ID of module containing symbol +R_X86_64_DTPOFF64 = 17, # Offset in module's TLS block +R_X86_64_TPOFF64 = 18, # Offset in initial TLS block +R_X86_64_TLSGD = 19, # 32 bit signed PC relative offset to two GOT entries for GD symbol +R_X86_64_TLSLD = 20, # 32 bit signed PC relative offset to two GOT entries for LD symbol +R_X86_64_DTPOFF32 = 21, # Offset in TLS block +R_X86_64_GOTTPOFF = 22, # 32 bit signed PC relative offset to GOT entry for IE symbol +R_X86_64_TPOFF32 = 23, # Offset in initial TLS block +R_X86_64_PC64 = 24, # PC relative 64 bit +R_X86_64_GOTOFF64 = 25, # 64 bit offset to GOT +R_X86_64_GOTPC32 = 26, # 32 bit signed pc relative offset to GOT +R_X86_64_GOT64 = 27, # 64-bit GOT entry offset +R_X86_64_GOTPCREL64 = 28, # 64-bit PC relative offset to GOT entry +R_X86_64_GOTPC64 = 29, # 64-bit PC relative offset to GOT +R_X86_64_GOTPLT64 = 30, # like GOT64, says PLT entry needed +R_X86_64_PLTOFF64 = 31, # 64-bit GOT relative offset to PLT entry +R_X86_64_SIZE32 = 32, # Size of symbol plus 32-bit addend +R_X86_64_SIZE64 = 33, # Size of symbol plus 64-bit addend +R_X86_64_GOTPC32_TLSDESC = 34, # GOT offset for TLS descriptor. +R_X86_64_TLSDESC_CALL = 35, # Marker for call through TLS descriptor. +R_X86_64_TLSDESC = 36, # TLS descriptor. +R_X86_64_IRELATIVE = 37, # Adjust indirectly by program base +R_X86_64_RELATIVE64 = 38, # 64-bit adjust by program base */ +R_X86_64_PC32_BND = 39, # Deprecated, reserved +R_X86_64_PLT32_BND = 40, # Deprecated, reserved +R_X86_64_GOTPCRELX = 41, # Load from 32 bit signed pc relative offset to GOT entry without REX prefix, relaxable. +R_X86_64_REX_GOTPCRELX = 42, # Load from 32 bit signed pc relative offset to GOT entry with REX prefix, relaxable. + +R_X86_64_NUM = 43, +) + +SetConstants( # AM33 relocations. -R_MN10300_NONE = 0 # No reloc. -R_MN10300_32 = 1 # Direct 32 bit. -R_MN10300_16 = 2 # Direct 16 bit. -R_MN10300_8 = 3 # Direct 8 bit. -R_MN10300_PCREL32 = 4 # PC-relative 32-bit. -R_MN10300_PCREL16 = 5 # PC-relative 16-bit signed. -R_MN10300_PCREL8 = 6 # PC-relative 8-bit signed. -R_MN10300_GNU_VTINHERIT = 7 # Ancient C++ vtable garbage... -R_MN10300_GNU_VTENTRY = 8 # ... collection annotation. -R_MN10300_24 = 9 # Direct 24 bit. -R_MN10300_GOTPC32 = 10 # 32-bit PCrel offset to GOT. -R_MN10300_GOTPC16 = 11 # 16-bit PCrel offset to GOT. -R_MN10300_GOTOFF32 = 12 # 32-bit offset from GOT. -R_MN10300_GOTOFF24 = 13 # 24-bit offset from GOT. -R_MN10300_GOTOFF16 = 14 # 16-bit offset from GOT. -R_MN10300_PLT32 = 15 # 32-bit PCrel to PLT entry. -R_MN10300_PLT16 = 16 # 16-bit PCrel to PLT entry. -R_MN10300_GOT32 = 17 # 32-bit offset to GOT entry. -R_MN10300_GOT24 = 18 # 24-bit offset to GOT entry. -R_MN10300_GOT16 = 19 # 16-bit offset to GOT entry. -R_MN10300_COPY = 20 # Copy symbol at runtime. -R_MN10300_GLOB_DAT = 21 # Create GOT entry. -R_MN10300_JMP_SLOT = 22 # Create PLT entry. -R_MN10300_RELATIVE = 23 # Adjust by program base. - -R_MN10300_NUM = 24 - - +R_MN10300_NONE = 0, # No reloc. +R_MN10300_32 = 1, # Direct 32 bit. +R_MN10300_16 = 2, # Direct 16 bit. +R_MN10300_8 = 3, # Direct 8 bit. +R_MN10300_PCREL32 = 4, # PC-relative 32-bit. +R_MN10300_PCREL16 = 5, # PC-relative 16-bit signed. +R_MN10300_PCREL8 = 6, # PC-relative 8-bit signed. +R_MN10300_GNU_VTINHERIT = 7, # Ancient C++ vtable garbage... +R_MN10300_GNU_VTENTRY = 8, # ... collection annotation. +R_MN10300_24 = 9, # Direct 24 bit. +R_MN10300_GOTPC32 = 10, # 32-bit PCrel offset to GOT. +R_MN10300_GOTPC16 = 11, # 16-bit PCrel offset to GOT. +R_MN10300_GOTOFF32 = 12, # 32-bit offset from GOT. +R_MN10300_GOTOFF24 = 13, # 24-bit offset from GOT. +R_MN10300_GOTOFF16 = 14, # 16-bit offset from GOT. +R_MN10300_PLT32 = 15, # 32-bit PCrel to PLT entry. +R_MN10300_PLT16 = 16, # 16-bit PCrel to PLT entry. +R_MN10300_GOT32 = 17, # 32-bit offset to GOT entry. +R_MN10300_GOT24 = 18, # 24-bit offset to GOT entry. +R_MN10300_GOT16 = 19, # 16-bit offset to GOT entry. +R_MN10300_COPY = 20, # Copy symbol at runtime. +R_MN10300_GLOB_DAT = 21, # Create GOT entry. +R_MN10300_JMP_SLOT = 22, # Create PLT entry. +R_MN10300_RELATIVE = 23, # Adjust by program base. +R_MN10300_NUM = 24, +) + +SetConstants( # M32R relocs. -R_M32R_NONE = 0 # No reloc. -R_M32R_16 = 1 # Direct 16 bit. -R_M32R_32 = 2 # Direct 32 bit. -R_M32R_24 = 3 # Direct 24 bit. -R_M32R_10_PCREL = 4 # PC relative 10 bit shifted. -R_M32R_18_PCREL = 5 # PC relative 18 bit shifted. -R_M32R_26_PCREL = 6 # PC relative 26 bit shifted. -R_M32R_HI16_ULO = 7 # High 16 bit with unsigned low. -R_M32R_HI16_SLO = 8 # High 16 bit with signed low. -R_M32R_LO16 = 9 # Low 16 bit. -R_M32R_SDA16 = 10 # 16 bit offset in SDA. -R_M32R_GNU_VTINHERIT = 11 -R_M32R_GNU_VTENTRY = 12 +R_M32R_NONE = 0, # No reloc. +R_M32R_16 = 1, # Direct 16 bit. +R_M32R_32 = 2, # Direct 32 bit. +R_M32R_24 = 3, # Direct 24 bit. +R_M32R_10_PCREL = 4, # PC relative 10 bit shifted. +R_M32R_18_PCREL = 5, # PC relative 18 bit shifted. +R_M32R_26_PCREL = 6, # PC relative 26 bit shifted. +R_M32R_HI16_ULO = 7, # High 16 bit with unsigned low. +R_M32R_HI16_SLO = 8, # High 16 bit with signed low. +R_M32R_LO16 = 9, # Low 16 bit. +R_M32R_SDA16 = 10, # 16 bit offset in SDA. +R_M32R_GNU_VTINHERIT = 11, +R_M32R_GNU_VTENTRY = 12, # M32R relocs use SHT_RELA. -R_M32R_16_RELA = 33 # Direct 16 bit. -R_M32R_32_RELA = 34 # Direct 32 bit. -R_M32R_24_RELA = 35 # Direct 24 bit. -R_M32R_10_PCREL_RELA = 36 # PC relative 10 bit shifted. -R_M32R_18_PCREL_RELA = 37 # PC relative 18 bit shifted. -R_M32R_26_PCREL_RELA = 38 # PC relative 26 bit shifted. -R_M32R_HI16_ULO_RELA = 39 # High 16 bit with unsigned low -R_M32R_HI16_SLO_RELA = 40 # High 16 bit with signed low -R_M32R_LO16_RELA = 41 # Low 16 bit -R_M32R_SDA16_RELA = 42 # 16 bit offset in SDA -R_M32R_RELA_GNU_VTINHERIT = 43 -R_M32R_RELA_GNU_VTENTRY = 44 -R_M32R_REL32 = 45 # PC relative 32 bit. - -R_M32R_GOT24 = 48 # 24 bit GOT entry -R_M32R_26_PLTREL = 49 # 26 bit PC relative to PLT shifted -R_M32R_COPY = 50 # Copy symbol at runtime -R_M32R_GLOB_DAT = 51 # Create GOT entry -R_M32R_JMP_SLOT = 52 # Create PLT entry -R_M32R_RELATIVE = 53 # Adjust by program base -R_M32R_GOTOFF = 54 # 24 bit offset to GOT -R_M32R_GOTPC24 = 55 # 24 bit PC relative offset to GOT -R_M32R_GOT16_HI_ULO = 56 # High 16 bit GOT entry with unsigned low -R_M32R_GOT16_HI_SLO = 57 # High 16 bit GOT entry with signed low -R_M32R_GOT16_LO = 58 # Low 16 bit GOT entry -R_M32R_GOTPC_HI_ULO = 59 # High 16 bit PC relative offset to GOT with unsigned low -R_M32R_GOTPC_HI_SLO = 60 # High 16 bit PC relative offset to GOT with signed low -R_M32R_GOTPC_LO = 61 # Low 16 bit PC relative offset to GOT -R_M32R_GOTOFF_HI_ULO = 62 # High 16 bit offset to GOT with unsigned low -R_M32R_GOTOFF_HI_SLO = 63 # High 16 bit offset to GOT with signed low -R_M32R_GOTOFF_LO = 64 # Low 16 bit offset to GOT -R_M32R_NUM = 256 # Keep this the last entry. -# - -if __name__ == "__main__": - import sys - ELFFILE = sys.stdin - if len(sys.argv) > 1: - ELFFILE = open(sys.argv[1]) - ehdr = Ehdr._from_file(ELFFILE) - - ELFFILE.seek(ehdr.phoff) - phdr = Phdr._from_file(ELFFILE) - - ELFFILE.seek(ehdr.shoff) - shdr = Shdr._from_file(ELFFILE) - - for i in range(ehdr.shnum): - ELFFILE.seek(ehdr.shoff+i*ehdr.shentsize) - shdr = Shdr._from_file(ELFFILE) - print "%(name)08x %(flags)x %(addr)08x %(offset)08x" % shdr - - - - - - - +R_M32R_16_RELA = 33, # Direct 16 bit. +R_M32R_32_RELA = 34, # Direct 32 bit. +R_M32R_24_RELA = 35, # Direct 24 bit. +R_M32R_10_PCREL_RELA = 36, # PC relative 10 bit shifted. +R_M32R_18_PCREL_RELA = 37, # PC relative 18 bit shifted. +R_M32R_26_PCREL_RELA = 38, # PC relative 26 bit shifted. +R_M32R_HI16_ULO_RELA = 39, # High 16 bit with unsigned low +R_M32R_HI16_SLO_RELA = 40, # High 16 bit with signed low +R_M32R_LO16_RELA = 41, # Low 16 bit +R_M32R_SDA16_RELA = 42, # 16 bit offset in SDA +R_M32R_RELA_GNU_VTINHERIT = 43, +R_M32R_RELA_GNU_VTENTRY = 44, +R_M32R_REL32 = 45, # PC relative 32 bit. + +R_M32R_GOT24 = 48, # 24 bit GOT entry +R_M32R_26_PLTREL = 49, # 26 bit PC relative to PLT shifted +R_M32R_COPY = 50, # Copy symbol at runtime +R_M32R_GLOB_DAT = 51, # Create GOT entry +R_M32R_JMP_SLOT = 52, # Create PLT entry +R_M32R_RELATIVE = 53, # Adjust by program base +R_M32R_GOTOFF = 54, # 24 bit offset to GOT +R_M32R_GOTPC24 = 55, # 24 bit PC relative offset to GOT +R_M32R_GOT16_HI_ULO = 56, # High 16 bit GOT entry with unsigned low +R_M32R_GOT16_HI_SLO = 57, # High 16 bit GOT entry with signed low +R_M32R_GOT16_LO = 58, # Low 16 bit GOT entry +R_M32R_GOTPC_HI_ULO = 59, # High 16 bit PC relative offset to GOT with unsigned low +R_M32R_GOTPC_HI_SLO = 60, # High 16 bit PC relative offset to GOT with signed low +R_M32R_GOTPC_LO = 61, # Low 16 bit PC relative offset to GOT +R_M32R_GOTOFF_HI_ULO = 62, # High 16 bit offset to GOT with unsigned low +R_M32R_GOTOFF_HI_SLO = 63, # High 16 bit offset to GOT with signed low +R_M32R_GOTOFF_LO = 64, # Low 16 bit offset to GOT +R_M32R_NUM = 256, # Keep this the last entry. +) + +SetConstants( +# NEC/Renesas V8xx series +R_V800_NONE = 0x30, # V810 +R_V800_BYTE = 0x31, # V810 +R_V800_HWORD = 0x32, # V810 +R_V800_WORD = 0x33, # V810 +R_V800_WLO = 0x34, # V810 +R_V800_WHI = 0x35, # V810 +R_V800_WHI1 = 0x36, # V810 +R_V800_GPBYTE = 0x37, # V810 +R_V800_GPHWORD = 0x38, # V810 +R_V800_GPWORD = 0x39, # V810 +R_V800_GPWLO = 0x3a, # V810 +R_V800_GPWHI = 0x3b, # V810 +R_V800_GPWHI1 = 0x3c, # V810 +R_V800_HWLO = 0x3d, # V850 +) + +constants['R'] = {} +for k in constants: + if k.startswith('R_'): + if k == 'R_390': m = 'S390' + elif k == 'R_IA64': m = 'IA_64' + else: m = k[2:] + constants['R'][m] = constants[k] diff --git a/elfesteem/elf_init.py b/elfesteem/elf_init.py index f0d755d..49da5a6 100644 --- a/elfesteem/elf_init.py +++ b/elfesteem/elf_init.py @@ -1,163 +1,223 @@ #! /usr/bin/env python import struct - -import cstruct -import elf -from strpatchwork import StrPatchwork import logging +from elfesteem import elf +from elfesteem.strpatchwork import StrPatchwork + log = logging.getLogger("elfparse") console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) log.addHandler(console_handler) log.setLevel(logging.WARN) -class test(type): - pass -class StructWrapper(object): - class __metaclass__(type): - def __new__(cls, name, bases, dct): - wrapped = dct["wrapped"] - if wrapped is not None: # XXX: make dct lookup look into base classes - for fname,v in wrapped._fields: - dct[fname] = property(dct.pop("get_"+fname, - lambda self,fname=fname: getattr(self.cstr,fname)), - dct.pop("set_"+fname, - lambda self,v,fname=fname: setattr(self.cstr,fname,v)), - dct.pop("del_"+fname, None)) - return type.__new__(cls, name, bases, dct) - wrapped = None - def __init__(self, parent, sex, size, *args, **kargs): - self.cstr = self.wrapped(sex, size, *args, **kargs) - self.parent = parent - def __getitem__(self, item): - return getattr(self,item) - def __repr__(self): - return ">8].name - def get_type(self): - return self.cstr.info & 0xff - -class WRel64(StructWrapper): - wrapped = elf.Rel64 - wrapped._fields.append(("sym","u32")) - wrapped._fields.append(("type","u32")) - def get_sym(self): - return self.parent.linksection.symtab[self.cstr.info>>32].name - def get_type(self): - return self.cstr.info & 0xffffffff - -class WRela32(WRel32): - wrapped = elf.Rela32 - wrapped._fields.append(("sym","u32")) - wrapped._fields.append(("type","u08")) - def get_sym(self): - return self.parent.linksection.symtab[self.cstr.info>>8].name - def get_type(self): - return self.cstr.info & 0xff - -class WRela64(WRel64): - wrapped = elf.Rela64 - wrapped._fields.append(("sym","u32")) - wrapped._fields.append(("type","u32")) - def get_sym(self): - return self.parent.linksection.symtab[self.cstr.info>>32].name - def get_type(self): - return self.cstr.info & 0xffffffff - -class WShdr(StructWrapper): - wrapped = elf.Shdr - def get_name(self): - return self.parent.parent._shstr.get_name(self.cstr.name) - -class WDynamic(StructWrapper): - wrapped = elf.Dynamic - def get_name(self): - if self.type == elf.DT_NEEDED: - return self.parent.linksection.get_name(self.cstr.name) - return self.cstr.name - -class WPhdr(StructWrapper): - wrapped = elf.Phdr - -class WPhdr64(StructWrapper): - wrapped = elf.Phdr64 - - -class ContentManager(object): - def __get__(self, owner, x): - if hasattr(owner, '_content'): - return owner._content - def __set__(self, owner, new_content): - owner.resize(len(owner._content), len(new_content)) - owner._content=StrPatchwork(new_content) - owner.parse_content(owner.sex, owner.size) - def __delete__(self, owner): - self.__set__(owner, None) +def align_to(value, alignment): + trimed = value - (value & (alignment-1)) + extra = 0 + if (value & (alignment-1)): + extra = alignment + + return trimed + extra + ### Sections -class Section(object): +def inheritsexwsize(self, parent, kargs): + for f in ['sex', 'wsize']: + if f in kargs: + setattr(self, f, kargs[f]) + del kargs[f] + elif parent != None: + setattr(self, f, getattr(parent, f)) + +class SectionMetaclass(type): sectypes = {} - class __metaclass__(type): - def __new__(cls, name, bases, dct): - o = type.__new__(cls, name, bases, dct) - if name != "Section": - Section.register(o) - return o - def register(cls, o): - if o.sht is not None: - cls.sectypes[o.sht] = o - def __call__(cls, parent, sex, size, shstr=None): + def __new__(cls, name, bases, dct): + o = type.__new__(cls, name, bases, dct) + if name != "SectionBase" and o.sht is not None: + SectionMetaclass.sectypes[o.sht] = o + return o + +SectionBase = SectionMetaclass('SectionBase', (object,), {}) + +class Section(SectionBase): + ''' + sht: (elf.SHT_*) Section header type + sh: (elf.Shdr) actual header + parent: (SHList) list of sections + phparents: (list[ProgramHeader]) all ProgramHeader's that fully contain this section + phparent: (ProgramHeader) _main_ ProgramHeader in witch this section resides + first encountered, prefering elf.PT_LOAD sections + content: (StrPatchwork) actual bytes of the section header + ''' + + sht = None + def create(cls, parent, shstr=None): + if shstr is None: sh = None - if shstr is not None: - sh = WShdr(None, sex, size, shstr) - if sh.type in Section.sectypes: - cls = Section.sectypes[sh.type] - i = cls.__new__(cls, cls.__name__, cls.__bases__, cls.__dict__) - if sh is not None: - sh.parent=i - i.__init__(parent,sh) - return i - - content = ContentManager() + else: + sh = elf.Shdr(parent = None, content = shstr, sex = parent.sex, +wsize = parent.wsize) + if sh.type in SectionMetaclass.sectypes: + cls = SectionMetaclass.sectypes[sh.type] + i = cls.__new__(cls, cls.__name__, cls.__bases__, cls.__dict__) + if sh is not None: + sh.parent=i + i.__init__(parent, sh) + return i + create = classmethod(create) + + def append_section_content(self, appended_section): + # type: (Section) -> None + old_size = self.size + self.resize(0, appended_section.size) + + self.content[ + old_size: + self.size + ] = appended_section.content.pack() + + def next_section(self): + # type: () -> Union[Section, list[Section]]: + stacket = False + stack = [] + latest = None + for section in self.parent.shlist: + if section.sh.offset <= self.sh.offset or section is self: + continue + + if latest is None or section.sh.offset < latest.sh.offset: + latest = section + stacket = False + stack = [section] + continue + + if section.sh.offset == latest.sh.offset: + stacket = True + stack.append(section) + + if stacket: + return stack + + return latest + + def fix_allignment_requierments(self): + local_logger = logging.getLogger("expand_sections") + # local_logger.setLevel(logging.DEBUG) + + if self.sh.addralign == 0 or (self.addr % self.sh.addralign) == 0: + return + + req = self.sh.addralign - (self.addr % self.sh.addralign) + + local_logger.debug("offseting (%x)[%r]$%x with {%x}", self.addr, self, self.sh.addralign, req) + + self.sh.offset += req + if self.addr: + self.sh.addr += req + + next_section = self.next_section() + if type(next_section) is list: + next_section = next_section[0] + + if next_section is not None: + local_logger.debug("\tfound next section [%r]", next_section) + old_req = req + local_logger.debug("\treq= %x, next.offset= %x, self.offset= %x, self.size= %x", req, next_section.sh.offset, self.sh.offset, self.size) + unused = next_section.sh.offset - (self.sh.offset + self.size) + local_logger.debug("\tunused= %x", unused) + + if unused >= 0: + reuse = req + req = 0 + else: + # unused < 0 aka h + reuse = abs(unused) + req += abs(unused) + + local_logger.debug("\trecovered %x: [%x -> %x]", reuse, old_req, req) + + if req == 0: + local_logger.debug("\tpremature solve") + return + + if self.phparent: + local_logger.debug("\t%x %x %x", self.phparent.addr, self.phparent.ph.filesz, self.phparent.ph.memsz) + self.phparent.resize(self, req) + for ph in self.phparents: + if ph is self.phparent: + continue + if ph.ph.type == elf.PT_LOAD: + continue + + ph.resize(self, req) + else: + self.parent.move_after(self, req) + def resize(self, old, new): + # type: (int, int) -> None + local_logger = logging.getLogger("expand_sections") + # local_logger.setLevel(logging.DEBUG) + + og_size = self.sh.size self.sh.size += new-old - self.parent.resize(self, new-old) + + diff = new - old + next_section = self.next_section() + if type(next_section) is list: + next_section = next_section[0] + if next_section is not None: + # take in to account existing space between this and the next section + local_logger.debug("\t %x %x", diff , next_section.sh.offset - self.sh.offset - self.size) + local_logger.debug("\t %x %x %x", next_section.sh.offset, self.sh.offset, og_size) + diff = max(0, diff - (next_section.sh.offset - self.sh.offset - og_size)) + + local_logger.debug("\tresize %r", self) + local_logger.debug("\tnext: %r", next_section) + local_logger.debug("\t %x", new-old) + local_logger.debug("\t %x", diff) + if diff == 0: + for ph in self.phparents: + if self.sh.offset + self.size == ph.ph.filesz + ph.size: + # only extend segments + # ignore posible segment overlaps since it is guaranteed that no section will overlap + ph.size += new-old + return + if self.phparent: - self.phparent.resize(self, new-old) - def parse_content(self, sex,size): - self.sex, self.size = sex, size + local_logger.debug("%x %x %x", self.phparent.addr, self.phparent.ph.filesz, self.phparent.ph.memsz) + self.phparent.resize(self, diff) + for ph in self.phparents: + if ph is self.phparent: + continue + if ph.ph.type == elf.PT_LOAD: + continue + ph.resize(self, diff) + else: + self.parent.move_after(self, diff) + + def move(self, diff): + self.sh.offset += diff + + if self.addr: + # don't change for unmaped sections + self.sh.addr += diff + + def parse_content(self): pass + def pack(self): + data = self.content + if type(data) != str: data = data.pack() + return data def get_linksection(self): - return self.parent[self.sh.link] + try: + linksection = self.parent[self.sh.link] + except IndexError: + linksection = NoLinkSection + return linksection def set_linksection(self, val): if isinstance(val, Section): val = self.parent.shlist.find(val) @@ -175,14 +235,23 @@ def set_infosection(self, val): if type(val) is int: self.sh.info = val infosection = property(get_infosection, set_infosection) - def __init__(self, parent, sh=None): + shstrtab = property(lambda _: _.parent._shstrtab) + def __init__(self, parent, sh=None, **kargs): self.parent=parent self.phparent=None + self.phparents=[] + inheritsexwsize(self, parent, {}) + if sh is None: + sh = elf.Shdr(parent=self, type=self.sht, name_idx=0, **kargs) self.sh=sh - self._content="" + self.content=StrPatchwork() def __repr__(self): - r = "{%(name)s ofs=%(offset)#x sz=%(size)#x addr=%(addr)#010x}" % self.sh - return r + return "%(name)-15s %(offset)08x %(size)06x %(addr)08x %(flags)x" % self.sh + def recalc(self): + pass + size = property(lambda _: _.sh.size) + addr = property(lambda _: _.sh.addr) + name = property(lambda _: _.sh.name) class NullSection(Section): sht = elf.SHT_NULL @@ -210,15 +279,122 @@ class FiniArray(Section): class GroupSection(Section): sht = elf.SHT_GROUP + def get_flags(self): + flags, = struct.unpack("I", self.content[:4]) + return flags + def get_sections(self): + l = len(self.content)//4 - 1 + sections = struct.unpack("I"*l, self.content[4:]) + return sections + def set_flags(self, value): + self.content[0] = struct.pack("I", value) + def set_sections(self, value): + for idx in self.sections: + self.parent.shlist[idx].sh.flags &= ~elf.SHF_GROUP + for idx in value: + self.parent.shlist[idx].sh.flags |= elf.SHF_GROUP + self.parent.shlist[idx].sh.addralign = 1 + self.content[4] = struct.pack("I"*len(value), *value) + flags = property(get_flags, set_flags) + sections = property(get_sections, set_sections) + def readelf_display(self): + if self.flags == elf.GRP_COMDAT: flags = 'COMDAT' + else: flags = '' + symbol = self.parent.parent.sh[self.sh.link] + if not symbol.sh.type == elf.SHT_SYMTAB: + return "readelf: Error: Bad sh_link in group section `%s'"%self.sh.name + symbol = symbol[self.sh.info].name + rep = [ "%s group section [%4d] `%s' [%s] contains %d sections:" % ( + flags, + self.parent.parent.sh.shlist.index(self), + self.sh.name, + symbol, + len(self.sections)) ] + format = " [%5s] %s" + rep.append(format % ('Index',' Name')) + for s_idx in self.sections: + s = self.parent.parent.sh[s_idx].sh + rep.append(format % (s_idx,s.name)) + if not (s.flags & elf.SHF_GROUP): + rep.append("No SHF_GROUP in %s" % s.name) + return "\n".join(rep) + class SymTabSHIndeces(Section): sht = elf.SHT_SYMTAB_SHNDX class GNUVerSym(Section): sht = elf.SHT_GNU_versym + entry_size = 2 + def parse_content(self): + c = self.content + self.indexes = [] + while len(c) >= self.entry_size: + self.indexes.append(struct.unpack("H", c[:self.entry_size])[0]) + c = c[self.entry_size:] + + def __getitem__(self, i): + return self.indexes[i] + def __setitem__(self, i, val): + self.indexes[i] = val + self.content[i * self.entry_size: i * self.entry_size + self.entry_size] = struct.pack("H", val) + def __len__(self): + return len(self.indexes) + class GNUVerNeed(Section): + ''' + elements: list[elf.Verneed64|elf.Vernaux64] + needs: list[elf.Verneed64] + auxs: list[elf.Vernaux64] + ''' sht = elf.SHT_GNU_verneed + entry_size = -1 + Verneed = None + Vernaux = None + + + def parse_content(self): + self.Verneed = {64: elf.Verneed64, 32: elf.Verneed32}[self.wsize] + self.Vernaux = {64: elf.Vernaux64, 32: elf.Vernaux32}[self.wsize] + self.entry_size = {64: 0x10, 32: 0x10}[self.wsize] + + self.elements = [None] * (len(self.content) // self.entry_size) + self.needs = [] + self.auxs = [] + + + c = self.content + while len(c) >= self.entry_size: + elem = self.Verneed(parent=self, content=c[:self.entry_size]) + self.needs.append(elem) + elem.offset = len(self.content) - len(c) + self.elements[elem.offset // self.entry_size] = elem + c = c[elem.vn_next:] + if not elem.vn_next: + break + + # TODO: validate for multiple needs + for need in self.needs: + c = self.content[need.offset+need.vn_aux:] + while len(c) >= self.entry_size: + elem = self.Vernaux(parent=self, content=c[:self.entry_size]) + self.auxs.append(elem) + elem.offset = len(self.content) - len(c) + self.elements[elem.offset // self.entry_size] = elem + c = c[elem.vna_next:] + if not elem.vna_next: + break + + def __getitem__(self, i): + return self.elements[i] + def __setitem__(self, i, val): + self.elements[i] = val + self.content[i * self.entry_size: i * self.entry_size + self.entry_size] = val.pack() + raise Exception("TODO") + # TODO: update in needs/auxs + def __len__(self): + return len(self.elements) class GNUVerDef(Section): sht = elf.SHT_GNU_verdef @@ -231,8 +407,7 @@ class CheckSumSection(Section): class NoteSection(Section): sht = elf.SHT_NOTE - def parse_content(self, sex, size): - self.sex, self.size = sex, size + def parse_content(self): c = self.content self.notes = [] # XXX: c may not be aligned? @@ -247,15 +422,19 @@ def parse_content(self, sex, size): class Dynamic(Section): sht = elf.SHT_DYNAMIC - def parse_content(self, sex, size): - self.sex, self.size = sex, size + def parse_content(self): + Dyn = { 32: elf.Dyn32, 64: elf.Dyn64 }[self.wsize] c = self.content self.dyntab = [] self.dynamic = {} sz = self.sh.entsize - while c: - s,c = c[:sz],c[sz:] - dyn = WDynamic(self,sex, size, s) + if sz == 0: + sz = self.wsize // 4 + idx = 0 + while len(c) > sz*idx: + s = c[sz*idx:sz*(idx+1)] + idx += 1 + dyn = Dyn(parent=self, content=s) self.dyntab.append(dyn) if type(dyn.name) is str: self.dynamic[dyn.name] = dyn @@ -263,66 +442,165 @@ def __getitem__(self,item): if type(item) is str: return self.dynamic[item] return self.dyntab[item] + def __setitem__(self, item, val): + if not isinstance(val, elf.Dyn32): + raise ValueError("Cannot set Dynamic item to %r" % val) + if item >= len(self.dyntab): + self.dyntab.extend([None for i in range(item + 1 - len(self.dyntab))]) + # TODO: completly remove old entry + self.dyntab[item] = val + if type(val.name) is str: + self.dynamic[val.name] = val + + self.content[item * self.sh.entsize] = val.pack() + + def get_with_type(self, target_type): + for dyn_entry in (self.dyntab): + if dyn_entry.type == target_type: + return dyn_entry + + return None + def update_wi(self, idx, new_val): + dyn_entry = self[idx] + dyn_entry.name_idx = self.parent.parent.getsectionbyname(".fini").addr + self[idx] = dyn_entry + def update_wt(self, target_type, new_val): + for i, dyn_entry in enumerate(self.dyntab): + if dyn_entry.type != target_type: + continue + dyn_entry.name_idx = new_val + self[i] = dyn_entry + break + else: + raise Exception("not found") + def recalc(self): + self.update_wt(elf.DT_FINI, self.parent.parent.getsectionbyname(".fini").addr) + self.update_wt(elf.DT_FINI_ARRAY, self.parent.parent.getsectionbyname(".fini_array").addr) + self.update_wt(elf.DT_FINI_ARRAYSZ, self.parent.parent.getsectionbyname(".fini_array").size) + self.update_wt(elf.DT_INIT_ARRAY, self.parent.parent.getsectionbyname(".init_array").addr) + self.update_wt(elf.DT_INIT_ARRAYSZ, self.parent.parent.getsectionbyname(".init_array").size) + + # check for full-RELRO + # !!! this might not be up to spec + if self.get_with_type(elf.DT_PLTGOT) is not None: + if (self.get_with_type(elf.DT_FLAGS) is not None and self.get_with_type(elf.DT_FLAGS).name_idx & elf.DF_BIND_NOW): + self.update_wt(elf.DT_PLTGOT, self.parent.parent.getsectionbyname(".got").addr) + else: + self.update_wt(elf.DT_PLTGOT, self.parent.parent.getsectionbyname(".got.plt").addr) + + if self.parent.parent.getsectionbyname(".rela.plt"): + self.update_wt(elf.DT_JMPREL, self.parent.parent.getsectionbyname(".rela.plt").addr) + + self.update_wt(elf.DT_SYMTAB, self.parent.parent.getsectionbyname(".dynsym").addr) + self.update_wt(elf.DT_STRTAB, self.parent.parent.getsectionbyname(".dynstr").addr) + self.update_wt(elf.DT_STRSZ, self.parent.parent.getsectionbyname(".dynstr").size) + self.update_wt(elf.DT_RELA, self.parent.parent.getsectionbyname(".rela.dyn").addr) + self.update_wt(elf.DT_RELASZ, self.parent.parent.getsectionbyname(".rela.dyn").size) + + if self.parent.parent.getsectionbyname(".plt"): + self.update_wt(elf.DT_PLTRELSZ, self.parent.parent.getsectionbyname(".plt").size) + + self.update_wt(elf.DT_VERSYM, self.parent.parent.getsectionbyname(".gnu.version").addr) + self.update_wt(elf.DT_VERNEED, self.parent.parent.getsectionbyname(".gnu.version_r").addr) + + for ph in self.parent.parent.ph: + if ph.ph.type == elf.PT_DYNAMIC: + ph.ph.offset = self.sh.offset + ph.ph.paddr = ph.ph.vaddr = self.sh.addr + +from elfesteem.cstruct import data_null, bytes_to_name, name_to_bytes class StrTable(Section): sht = elf.SHT_STRTAB - def parse_content(self, sex, size): - self.sex, self.size = sex, size - self.res = {} - c = self.content - q = 0 - while c: - p = c.find("\0") - if p < 0: - log.warning("Missing trailing 0 for string [%s]" % c) # XXX - p = len(c) - self.res[q] = c[:p] - q += p+1 - c = c[p+1:] + def get_name(self, idx): + n = self.content[idx:self.content.find(data_null, idx)] + return bytes_to_name(n) - def get_name(self, ofs): - n = self.content[ofs:] - n = n[:n.find("\0")] - return n + def find_name(self, name): + name = name_to_bytes(name) + if name + data_null in self.content: + return self.content.find(name+data_null) + + return None def add_name(self, name): - if name in self.content: + name = name_to_bytes(name) + if name + data_null in self.content: return self.content.find(name) - n = len(self.content) - self.content = str(self.content)+name+"\0" - return n - - def mod_name(self, name, new_name): - s = str(self.content) - if not name in s: - raise ValueError('unknown name', name) - s = s.replace('\x00'+name+'\x00', '\x00'+new_name+'\x00') - self.content = s - return len(self.content) + + # TODO: check for unused space and reuse, aka 2 or more NULL bytes + idx = len(self.content) + + self.resize(0, len(name)) + self.content[idx] = name+data_null + + return idx + + def mod_name(self, idx, name): + name = name_to_bytes(name) + n = self.content[idx:self.content.find(data_null, idx)] + dif = len(name) - len(n) + if dif != 0: + raise ValueError("Didn't fit in str section") + return idx + + def last_char(self): + pos = 0 + for i, c in enumerate(self.content): + if c != data_null: + pos = i + + return pos + last_char = property(last_char) class SymTable(Section): sht = elf.SHT_SYMTAB - def parse_content(self, sex, size): - self.sex, self.size = sex, size - c = self.content + def __init__(self, *args, **kargs): + Section.__init__(self, *args, **kargs) self.symtab=[] self.symbols={} - sz = self.sh.entsize - while c: - s,c = c[:sz],c[sz:] - if size == 32: - sym = WSym32(self,sex, size, s) - elif size == 64: - sym = WSym64(self,sex, size, s) - else: - ValueError('unknown size') + def parse_content(self): + Sym = { 32: elf.Sym32, 64: elf.Sym64 }[self.wsize] + c = self.content + sz = Sym(self).bytelen + if sz != self.sh.entsize: + log.error("SymTable has invalid entsize %d instead of %d", + self.sh.entsize, sz) + idx = 0 + while len(c) > sz*idx: + s = c[sz*idx:sz*(idx+1)] + idx += 1 + sym = Sym(parent=self, content=s) self.symtab.append(sym) self.symbols[sym.name] = sym + def __len__(self): + return len(self.symtab) def __getitem__(self,item): if type(item) is str: return self.symbols[item] return self.symtab[item] + def __setitem__(self,item,val): + if not isinstance(val, elf.Sym32): + raise ValueError("Cannot set SymTable item to %r"%val) + if item >= len(self.symtab): + self.symtab.extend([None for i in range(item+1-len(self.symtab))]) + self.symtab[item] = val + self.symbols[val.name] = val + self.content[item*self.sh.entsize] = val.pack() + if val.info>>4 == elf.STB_LOCAL and item >= self.sh.info: + # One greater than the symbol table index of the last local symbol + self.sh.info = item+1 + def readelf_display(self): + rep = [ "Symbol table '%s' contains %d entries:" + % (self.sh.name, len(self.symtab)) ] + if self.wsize == 32: + rep.append(" Num: Value Size Type Bind Vis Ndx Name") + elif self.wsize == 64: + rep.append(" Num: Value Size Type Bind Vis Ndx Name") + rep.extend([ _.readelf_display() for _ in self.symtab ]) + return "\n".join(rep) + class DynSymTable(SymTable): sht = elf.SHT_DYNSYM @@ -330,161 +608,325 @@ class DynSymTable(SymTable): class RelTable(Section): sht = elf.SHT_REL - def parse_content(self, sex, size): - self.sex, self.size = sex, size - if size == 32: - WRel = WRel32 - elif size == 64: - WRel = WRel64 + def rel_type(self): + if self.__class__.sht == elf.SHT_REL: + return { 32: elf.Rel32, 64: elf.Rel64 }[self.wsize] + elif self.__class__.sht == elf.SHT_RELA: + return { 32: elf.Rela32, 64: elf.Rela64 }[self.wsize] + elif self.parent.parent.Ehdr.machine == elf.EM_MIPS and self.wsize == 64: + return elf.Rel64MIPS else: - ValueError('unknown size') + raise Exception("unknown Rel") + + def parse_content(self): + Rel = self.rel_type() c = self.content self.reltab=[] self.rel = {} sz = self.sh.entsize - while c: - s,c = c[:sz],c[sz:] - rel = WRel(self,sex, size, s) + idx = 0 + while len(c) > sz*idx: + s = c[sz*idx:sz*(idx+1)] + idx += 1 + rel = Rel(parent=self, content=s) self.reltab.append(rel) self.rel[rel.sym] = rel -class RelATable(Section): + def __setitem__(self,item,val): + if not isinstance(val, elf.RelBase): + raise ValueError("Cannot set RelTable item to %r"%val) + if item >= len(self.reltab): + self.reltab.extend([None for i in range(item+1-len(self.reltab))]) + self.reltab[item] = val + self.rel[val.name] = val + self.content[item * self.sh.entsize] = val.pack() + + def readelf_display(self): + ret = "Relocation section %r at offset 0x%x contains %d entries:" % ( + self.sh.name, + self.sh.offset, + len(self.reltab)) + if self.wsize == 32: + ret += "\n Offset Info Type Sym.Value Sym. Name" + elif self.wsize == 64: + ret += "\n Offset Info Type Sym. Value Sym. Name" + if self.sht == elf.SHT_RELA: + ret += " + Addend" + for r in self.reltab: + ret += "\n" + r.readelf_display() + return ret + +class RelATable(RelTable): sht = elf.SHT_RELA - def parse_content(self, sex, size): - self.sex, self.size = sex, size - if size == 32: - WRela = WRela32 - elif size == 64: - WRela = WRela64 - else: - ValueError('unknown size') - c = self.content - self.reltab=[] - self.rel = {} - sz = self.sh.entsize - while c: - s,c = c[:sz],c[sz:] - rel = WRela(self,sex, size, s) - self.reltab.append(rel) - self.rel[rel.sym] = rel + ### Section List -class SHList: - def __init__(self, parent, sex, size): +class SHList(object): + def __init__(self, parent, **kargs): self.parent = parent + inheritsexwsize(self, parent, kargs) self.shlist = [] ehdr = self.parent.Ehdr of1 = ehdr.shoff if not of1: # No SH table return + filesize = len(parent.content) + if of1 > filesize: + log.error("Offset to section headers after end of file") + return + if of1+ehdr.shnum*ehdr.shentsize > filesize: + log.error("Offset to end of section headers after end of file") + return for i in range(ehdr.shnum): of2 = of1+ehdr.shentsize shstr = parent[of1:of2] - self.shlist.append( Section(self, sex, size, shstr=shstr) ) + self.shlist.append( Section.create(self, shstr=shstr) ) of1=of2 - self._shstr = self.shlist[ehdr.shstrndx] + assert len(self.shlist) == ehdr.shnum + # The shstrtab section is not always valid :-( + if 0 <= ehdr.shstrndx < ehdr.shnum: + self._shstrtab = self.shlist[ehdr.shstrndx] + else: + self._shstrtab = None + if not isinstance(self._shstrtab, StrTable): + class NoStrTab(object): + def get_name(self, idx): + return "" + self._shstrtab = NoStrTab() + + if ehdr.shnum == 0: return for s in self.shlist: if not isinstance(s, NoBitsSection): - s._content = StrPatchwork(parent[s.sh.offset: s.sh.offset+s.sh.size]) + if s.sh.offset > filesize: + log.error("Offset to section %d after end of file", + self.shlist.index(s)) + continue + if s.sh.offset+s.sh.size > filesize: + log.error("Offset to end of section %d after end of file", + self.shlist.index(s)) + continue + s.content = StrPatchwork(parent[s.sh.offset: s.sh.offset+s.sh.size]) # Follow dependencies when initializing sections zero = self.shlist[0] todo = self.shlist[1:] done = [] while todo: s = todo.pop(0) - if ( (s.linksection == zero or s.linksection in done) - and (s.infosection in [zero, None] or s.infosection in done)): + if ( (s.linksection in done + [zero, NoLinkSection]) and + (s.infosection in done + [zero, None]) ): done.append(s) - s.parse_content(sex, size) + s.parse_content() else: todo.append(s) - for s in self.shlist: - self.do_add_section(s) - - def do_add_section(self, section): - n = section.sh.name - if n.startswith("."): - n = n[1:] - n = n.replace(".","_").replace("-","_") - setattr(self, n, section) #xxx def append(self, item): - self.do_add_section(item) self.shlist.append(item) + def __len__(self): + return len(self.shlist) def __getitem__(self, item): return self.shlist[item] def __repr__(self): rep = ["# section offset size addr flags"] for i,s in enumerate(self.shlist): - l = "%(name)-15s %(offset)08x %(size)06x %(addr)08x %(flags)x " % s.sh - l = ("%2i " % i)+ l + s.__class__.__name__ - rep.append(l) + rep.append("%2i %r %s" % (i, s, s.__class__.__name__)) + return "\n".join(rep) + def readelf_display(self): + rep = [ "There are %d section headers, starting at offset %#x:" + % (len(self.shlist), self.parent.Ehdr.shoff), + "", + "Section Headers:" ] + rep.extend({32: elf.Shdr.header32, 64: elf.Shdr.header64}[self.wsize]) + rep.extend([ _.sh.readelf_display() for _ in self ]) + rep.extend(self[0].sh.footer) return "\n".join(rep) def __str__(self): - c = [] + raise AttributeError("Use pack() instead of str()") + def pack(self): + c = struct.pack("") for s in self.shlist: - c.append(str(s.sh)) - return "".join(c) - def resize(self, sec, diff): - for s in self.shlist: - if s.sh.offset > sec.sh.offset: - s.sh.offset += diff - if self.parent.Ehdr.shoff > sec.sh.offset: + c += s.sh.pack() + return c + + def move_after(self, sec, diff): + '''Only used when a resized section doesn't bellong to a segment + + !!! Untested ? + ''' + + old_section_file_end = sec.sh.offset + sec.sh.size - diff + old_section_memory_end = sec.sh.addr + sec.sh.size - diff + + reason_is_mapped = sec.sh.addr != 0 + + for section in self.shlist: + # check if a section needs to be moved relative to only one addres? + checks = [ + section.sh.offset > old_section_file_end, + section.addr > old_section_memory_end, + ] + if reason_is_mapped: + assert all(checks) or not any(checks) + + # skip previous sections + if section.sh.offset < old_section_file_end: + continue + + if section is sec: + continue + + section.move(diff) + + + if old_section_file_end < self.parent.Ehdr.shoff: self.parent.Ehdr.shoff += diff - if self.parent.Ehdr.phoff > sec.sh.offset: - self.parent.Ehdr.phoff += diff + + + symbol_table = self.parent.getsectionbyname(".symtab") + if symbol_table and sec.addr: + for i, symbol in enumerate(symbol_table.symtab): + if symbol.value >= old_section_memory_end: + symbol.value += diff + symbol_table[i] = symbol + +class NoLinkSection(object): + get_name = lambda s,i:None + add_name = lambda s,n:None + mod_name = lambda s,i,n:None +NoLinkSection = NoLinkSection() ### Program Header List -class ProgramHeader: - def __init__(self, parent, sex, size, phstr): +class ProgramHeader(object): + def __init__(self, parent, PHtype, phstr, **kargs): self.parent = parent - self.ph = WPhdr(self,sex, size, phstr) - self.shlist = [] + inheritsexwsize(self, parent, kargs) + self.ph = PHtype(parent=self, content=phstr) + self.shlist = [] # based on readelf's "Section to Segment mapping" + self.shlist_partial = [] # These are other sections of interest + ph_file_end = self.ph.offset+self.ph.filesz + ph_mem_end = self.ph.vaddr+self.ph.memsz for s in self.parent.parent.sh: if isinstance(s, NullSection): continue - if ( (isinstance(s,NoBitsSection) and s.sh.offset == self.ph.offset+self.ph.filesz) - or self.ph.offset <= s.sh.offset < self.ph.offset+self.ph.filesz ): - s.phparent = self - self.shlist.append(s) + if self.ph.type != elf.PT_TLS and ( + (s.sh.flags & elf.SHF_TLS) and s.sh.type == elf.SHT_NOBITS): + # .tbss is special. It doesn't contribute memory space + # to normal segments. + continue + if s.sh.flags & elf.SHF_ALLOC: + if (self.ph.vaddr <= s.sh.addr) and \ + (s.sh.addr+s.sh.size <= ph_mem_end): + if not s.phparent: + s.phparent = self + elif s.phparent.ph.type != elf.PT_LOAD and self.ph.type == elf.PT_LOAD: + s.phparent = self + s.phparents.append(self) + self.shlist.append(s) + else: + if (self.ph.offset <= s.sh.offset) and \ + (s.sh.offset+s.sh.size <= ph_file_end): + if not s.phparent: + s.phparent = self + elif s.phparent.ph.type != elf.PT_LOAD and self.ph.type == elf.PT_LOAD: + s.phparent = self + s.phparents.append(self) + self.shlist.append(s) + if s in self.shlist: + continue + if self.ph.offset <= s.sh.offset < ph_file_end: + # Section start in Segment + self.shlist_partial.append(s) + elif self.ph.offset < s.sh.offset+s.sh.size <= ph_file_end: + # Section end in Segment + self.shlist_partial.append(s) def resize(self, sec, diff): + local_logger = logging.getLogger("expand_sections") + # local_logger.setLevel(logging.DEBUG) + # the ELF standard demand that p_vaddr % p_align == p_offset % p_align, + # This requirements is designed such that it is possible to map the segments + # from the file into memory while still keeping the file size minimal + # (there is no need to insert padding into the file). + + old_size = max(self.ph.filesz, self.ph.memsz) + new_size = old_size + diff + self.ph.filesz += diff self.ph.memsz += diff - self.parent.resize(sec, diff) -class ProgramHeader64: - def __init__(self, parent, sex, size, phstr): - self.parent = parent - self.ph = WPhdr64(self,sex, size, phstr) - self.shlist = [] - for s in self.parent.parent.sh: - if isinstance(s, NullSection): + # update trailing sections address to avoid overlap + local_logger.debug("LOCAL:") + for section in self.shlist: + local_logger.debug("%r", section) + + if section.sh.addr and section.addr > sec.addr: + local_logger.debug("Offseting section %r", section) + section.sh.addr += diff + + if section.sh.offset > sec.sh.offset: + local_logger.debug("\tadd %x %x", section.sh.offset, diff) + section.sh.offset += diff + local_logger.debug("\t%x", section.sh.offset) + + performed_segment_expansion = False + + # TODO: remove hacky fix: self.ph.align > 0x30 + if align_to(old_size, self.ph.align) != align_to(new_size, self.ph.align) and self.ph.align > 0x30: + local_logger.debug("old_size=%x|%x", old_size, align_to(old_size, self.ph.align)) + local_logger.debug("new_size=%x|%x", new_size, align_to(new_size, self.ph.align)) + local_logger.debug("Offseting subsequent segments after %r", self.shlist) + segment_diff = align_to(new_size, self.ph.align) - align_to(old_size, self.ph.align) + self.parent.move_after(sec, segment_diff, sec.sh.size - diff) + performed_segment_expansion = True + + # handled in move_after?? + # yes but not properly, as a result of segment alignment + for section in self.shlist: + assert not sec.addr < section.addr < sec.addr + sec.size - diff + assert not sec.sh.offset < section.sh.offset < sec.sh.offset + sec.size - diff + + if performed_segment_expansion: + local_logger.debug("Segment resize: DONE") + return + + self.parent.parent.Ehdr.shoff += diff + + + local_logger.debug("GLOBAL:") + for section in self.parent.parent.sh: + local_logger.debug("%r", section) + if section.phparent: + local_logger.debug("\tskiping") continue - if ( (isinstance(s,NoBitsSection) and s.sh.offset == self.ph.offset+self.ph.filesz) - or self.ph.offset <= s.sh.offset < self.ph.offset+self.ph.filesz ): - s.phparent = self - self.shlist.append(s) - def resize(self, sec, diff): - self.ph.filesz += diff - self.ph.memsz += diff - self.parent.resize(sec, diff) -class PHList: - def __init__(self, parent, sex, size): + if self.ph.offset < section.sh.offset: + local_logger.debug("\toffseting %x", diff) + section.move(diff) + + # get_rvaitem needs addr and size (same names as in the Shdr class) + # Note that we should always have memsz >= filesz unless memsz == 0 + # Note that paddr is irrelevant for most OS + def get_size(self): + return self.ph.memsz + size = property(get_size) + def get_addr(self): + return self.ph.vaddr + addr = property(get_addr) + +class PHList(object): + def __init__(self, parent, **kargs): self.parent = parent + inheritsexwsize(self, parent, kargs) self.phlist = [] ehdr = self.parent.Ehdr of1 = ehdr.phoff for i in range(ehdr.phnum): of2 = of1+ehdr.phentsize phstr = parent[of1:of2] - if size == 32: - self.phlist.append(ProgramHeader(self, sex, size, phstr)) - else: - self.phlist.append(ProgramHeader64(self, sex, size, phstr)) + self.phlist.append(ProgramHeader(self, + { 32: elf.Phdr32, 64: elf.Phdr64 }[self.wsize], + phstr)) of1 = of2 def __getitem__(self, item): @@ -493,104 +935,136 @@ def __getitem__(self, item): def __repr__(self): r = [" offset filesz vaddr memsz"] for i,p in enumerate(self.phlist): - l = "%(offset)07x %(filesz)06x %(vaddr)08x %(memsz)07x %(type)02x"%p.ph + l = "%(offset)07x %(filesz)06x %(vaddr)08x %(memsz)07x %(type)02x %(flags)01x"%p.ph l = ("%2i " % i)+l r.append(l) r.append(" "+" ".join([s.sh.name for s in p.shlist])) return "\n".join(r) def __str__(self): - c = [] + raise AttributeError("Use pack() instead of str()") + def pack(self): + c = struct.pack("") for p in self.phlist: - c.append(str(p.ph)) - return "".join(c) - def resize(self, sec, diff): + c += p.ph.pack() + return c + def move_after(self, sec, diff, old_section_size): + local_logger = logging.getLogger("expand_sections") + # local_logger.setLevel(logging.DEBUG) + + # this is called by a ProgramHeader after a Section has beed resized + old_section_size = sec.sh.size - diff + local_logger.debug("%x = %x - %x", old_section_size, sec.sh.size, diff) + + old_section_file_end = sec.sh.offset + old_section_size + old_section_memory_end = sec.sh.addr + old_section_size + local_logger.debug("old_section_memory_end = sec.sh.addr + old_section_size") + local_logger.debug("%x = %x - %x", old_section_memory_end, sec.sh.addr, old_section_size) + for p in self.phlist: - if p.ph.offset > sec.sh.offset: - p.ph.offset += diff - if p.ph.vaddr > sec.phparent.ph.vaddr+sec.sh.offset: - p.ph.vaddr += diff - if p.ph.paddr > sec.phparent.ph.paddr+sec.sh.offset: - p.ph.paddr += diff + # address changes are requiered ONLY if the previous segment overflows in to it + # is there an instant when a segment needs to be moved relative to only one addres? + checks = [ + p.ph.offset > old_section_file_end, + p.ph.vaddr > old_section_memory_end, + p.ph.vaddr > old_section_memory_end + ] + assert all(checks) or not any(checks) + + if p.ph.offset < old_section_file_end: + continue + + p.ph.offset += diff + p.ph.vaddr += diff + p.ph.paddr += diff -class virt: + for section in self.parent.sh: + if section.phparent is sec.phparent: + # skip sections in segment + continue + + # check if a section needs to be moved relative to only one addres? + checks = [ + p.ph.offset > old_section_file_end, + p.ph.vaddr > old_section_memory_end, + p.ph.vaddr > old_section_memory_end + ] + assert all(checks) or not any(checks) + # skip previous sections + if section.sh.offset < old_section_file_end: + continue + + section.move(diff) + + # the section header is at the end; so it's offset needs to be updated + self.parent.Ehdr.shoff += diff + + +class virt(object): def __init__(self, x): self.parent = x - def get_rvaitem(self, start, stop = None, step = None): - if stop == None: - s = self.parent.getsectionbyvad(start) - if s: - start = start-s.sh.addr - else: - s = self.parent.getphbyvad(start) - if s: - start = start-s.ph.vaddr - if not s: - return [(None, start)] - return [(s, start)] - total_len = stop - start + def get_rvaitem(self, item, section = None): + if item.stop is None: + s = self.parent.getsectionbyvad(item.start, section) + return [(s, item.start-s.addr)] + total_len = item.stop - item.start + start = item.start virt_item = [] - while total_len: - s = self.parent.getsectionbyvad(start) - if not s: - s = self.parent.getphbyvad(start) + while total_len > 0: + s = self.parent.getsectionbyvad(start, section) if not s: raise ValueError('unknown rva address! %x'%start) - if isinstance(s, ProgramHeader) or isinstance(s, ProgramHeader64): - s_max = s.ph.filesz - s_start = start - s.ph.vaddr - s_stop = stop - s.ph.vaddr - else: - s_max = s.sh.size - s_start = start - s.sh.addr - s_stop = stop - s.sh.addr - if s_stop >s_max: - s_stop = s_max - + s_start = start - s.addr + s_stop = item.stop - s.addr + if s_stop > s.size: + s_stop = s.size s_len = s_stop - s_start if s_len == 0: raise ValueError('empty section! %x'%start) total_len -= s_len start += s_len - n_item = slice(s_start, s_stop, step) + n_item = slice(s_start, s_stop) virt_item.append((s, n_item)) return virt_item - def item2virtitem(self, item): - if not type(item) is slice:#integer - return self.get_rvaitem(item) - start = item.start - stop = item.stop - step = item.step - return self.get_rvaitem(start, stop, step) + def __call__(self, ad_start, ad_stop = None, section = None): + rva_items = self.get_rvaitem(slice(ad_start, ad_stop), section) + return self.rvaitems2binary(rva_items) def __getitem__(self, item): - """ - XXX - __getitem__ in python is limited to [0-0x7fffffff] - So if a binary has some data mapped in hight memory, getitem is unusable - """ - raise ValueError('\n\n**DEPRECATED API**\n\nuse virt(start, [stop, step]) instead of virt[start, [stop, step]]') - - def __setitem__(self, item, data): - s, n_item = self.item2virtitem(item) - if n_item == None: - return - return s.content.__setitem__(n_item, data) + rva_items = self.get_rvaitem(item) + return self.rvaitems2binary(rva_items) + def get(self, start, end): + # Deprecated API + return self[start:end] + + def rvaitems2binary(self, rva_items): + data_out = struct.pack("") + for s, n_item in rva_items: + if not isinstance(s, ProgramHeader): + data_out += s.content[n_item] + continue + if not type(n_item) is slice: + n_item = slice(n_item, n_item+1) + start = n_item.start + s.ph.offset + stop = n_item.stop + s.ph.offset + n_item = slice(start, stop) + data_out += self.parent.content[n_item] + return data_out def __setitem__(self, item, data): if not type(item) is slice: - item = slice(item, item+len(data), None) - virt_item = self.item2virtitem(item) - if not virt_item: + item = slice(item, item+len(data)) + rva_items = self.get_rvaitem(item) + if not rva_items: return off = 0 - for s, n_item in virt_item: + for s, n_item in rva_items: if isinstance(s, ProgBits): - i = slice(off, n_item.stop+off-n_item.start, n_item.step) + i = slice(off, n_item.stop+off-n_item.start) data_slice = data.__getitem__(i) s.content.__setitem__(n_item, data_slice) @@ -601,6 +1075,13 @@ def __setitem__(self, item, data): return def __len__(self): + # __len__ should not be used: Python returns an int object, which + # will cap values to 0x7FFFFFFF on 32 bit systems. A binary can have + # a base address higher than this, resulting in the impossibility to + # handle such programs. + log.warning("__len__ deprecated") + return self.max_addr() + def max_addr(self): # the maximum virtual address is found by retrieving the maximum # possible virtual address, either from the program entries, and # section entries. if there is no such object, raise an error. @@ -611,34 +1092,11 @@ def __len__(self): if self.parent.sh.shlist: for shdr in self.parent.sh.shlist: l = max(l, shdr.sh.addr + shdr.sh.size) - if not l: - raise ValueError('maximum virtual address not found !') return l def is_addr_in(self, ad): return self.parent.is_in_virt_address(ad) - def __call__(self, ad_start, ad_stop = None, ad_step = None): - rva_items = self.get_rvaitem(ad_start, ad_stop, ad_step) - data_out = "" - for s, n_item in rva_items: - if not (isinstance(s, ProgramHeader) or isinstance(s, ProgramHeader64)): - data_out += s.content.__getitem__(n_item) - continue - if not type(n_item) is slice: - n_item = slice(n_item, n_item+1, 1) - start = n_item.start + s.ph.offset - stop = n_item.stop + s.ph.offset - if n_item.step != None: - step = n_item.step + s.ph.offset - else: - step = None - n_item = slice(start, stop, step) - #data_out += self.parent.content.__s.content.__getitem__(n_item) - data_out += self.parent.content.__getitem__(n_item) - - return data_out - def find(self, pattern, offset = 0): sections = [] for s in self.parent.ph: @@ -655,63 +1113,317 @@ def find(self, pattern, offset = 0): data = self.parent.content[s.ph.offset:s.ph.offset+s.ph.filesz] ret = data.find(pattern, offset) if ret != -1: - return ret + s.ph.vaddr#self.parent.rva2virt(s.addr + ret) + return ret + s.ph.vaddr offset = 0 return -1 +def elf_default_content(self, **kargs): + if self.Ehdr.type == elf.ET_REL: + elf_default_content_reloc(self, **kargs) + +def elf_default_content_reloc(self, **kargs): + # Create the Section header string table, which contains the names + # of the sections + self.sh._shstrtab = StrTable(self.sh, addralign = 1) + self.sh._shstrtab.content[0] = '\0' + symtab = SymTable(self.sh, addralign = 4, entsize = 16) + strtab = StrTable(self.sh, addralign = 1) + symtab.sh.name = ".symtab" + strtab.sh.name = ".strtab" + self.sh._shstrtab.sh.name = ".shstrtab" + # Create the Section Header List + sections = kargs.get('sections',[".text"]) + relocs = kargs.get('relocs',[]) + self.sh.shlist.append(NullSection(self.sh)) + for name in sections: + flags = {} + if name.startswith(".text"): + SectionType = ProgBits + flags['addralign'] = 4 + flags['flags'] = elf.SHF_ALLOC|elf.SHF_EXECINSTR + if name.startswith(".text.startup"): + flags['addralign'] = 16 + if name.startswith(".data"): + SectionType = ProgBits + flags['addralign'] = 4 + flags['flags'] = elf.SHF_ALLOC|elf.SHF_WRITE + if name.startswith(".bss"): + SectionType = NoBitsSection + flags['addralign'] = 4 + flags['flags'] = elf.SHF_ALLOC|elf.SHF_WRITE + if name.startswith(".rodata"): + SectionType = ProgBits + flags['addralign'] = 1 + flags['flags'] = elf.SHF_ALLOC + if name.startswith(".rodata."): + flags['flags'] |= elf.SHF_MERGE + if name.startswith(".rodata.str"): + flags['flags'] |= elf.SHF_STRINGS + flags['entsize'] = 1 + if name.startswith(".rodata.str1.4"): + flags['addralign'] = 4 + if name.startswith(".rodata.cst4"): + flags['entsize'] = 4 + flags['addralign'] = 4 + if name == ".eh_frame": + SectionType = ProgBits + flags['addralign'] = 4 + flags['flags'] = elf.SHF_ALLOC + if name == ".comment": + SectionType = ProgBits + flags['addralign'] = 1 + flags['entsize'] = 1 + flags['flags'] = elf.SHF_MERGE|elf.SHF_STRINGS + if name == ".note.GNU-stack": + SectionType = ProgBits + flags['addralign'] = 1 + if name == ".group": + SectionType = GroupSection + flags['addralign'] = 4 + flags['entsize'] = 4 + if not name in relocs: + flags['name'] = name + self.sh.shlist.append(SectionType(self.sh, **flags)) + if name in relocs: + flags = { 'name': ".rel"+name, 'addralign': 4, 'entsize': 8 } + flags['info'] = len(self.sh.shlist)-1 + self.sh.shlist.append(RelTable(self.sh, **flags)) + self.sh.shlist[-2].sh.name_idx = self.sh.shlist[-1].sh.name_idx+4 + self.sh.shlist.append(self.sh._shstrtab) + self.sh.shlist.append(symtab) + self.sh.shlist.append(strtab) + # Automatically generate some values + self.Ehdr.shstrndx = self.sh.shlist.index(self.sh._shstrtab) + self.Ehdr.shnum = len(self.sh.shlist) + symtab.sh.link = self.sh.shlist.index(strtab) + for s in self.sh.shlist: + if isinstance(s, RelTable) or isinstance(s, GroupSection): + s.sh.link = self.sh.shlist.index(symtab) + # Note that all sections are empty, and therefore the section offsets + # and sizes are invalid + # elf_set_offsets() should take care of that + +def elf_set_offsets(self): + if self.Ehdr.type != elf.ET_REL: + # TODO + return + # Set offsets; the standard section layout is not the order of the shlist + s = self.getsectionbyname("") + s.sh.offset = 0 + pos = self.Ehdr.ehsize + section_layout = [".group", ".text", ".data", ".bss"] + section_layout += [ s.sh.name for s in self.sh.shlist if s.sh.name.startswith(".rodata") ] + section_layout += [ s.sh.name for s in self.sh.shlist if s.sh.name.startswith(".data.") ] + section_layout += [ s.sh.name for s in self.sh.shlist if s.sh.name.startswith(".text.") ] + section_layout += [ ".comment", ".note.GNU-stack", ".eh_frame" ] + section_layout = section_layout \ + + [ ".shstrtab", None, ".symtab", ".strtab"] \ + + [ ".rel"+name for name in section_layout ] + for name in section_layout: + if name is None: + pos = ((pos + 3)//4)*4 + self.Ehdr.shoff = pos + self.Ehdr.shentsize = self.sh._shstrtab.sh.bytelen + pos += self.Ehdr.shnum * self.Ehdr.shentsize + continue + for s in self.getsectionsbyname(name): + align = s.sh.addralign + s.sh.offset = ((pos + align-1)//align)*align + s.sh.size = len(s.content) + pos = s.sh.offset + if name != ".bss": pos += s.sh.size + for s in self.sh.shlist[1:]: + if s.sh.offset == 0: + align = s.sh.addralign + s.sh.offset = ((pos + align-1)//align)*align + s.sh.size = len(s.content) + pos = s.sh.offset + + # ELF object class ELF(object): - def __init__(self, elfstr): - self._content = elfstr - self.parse_content() - + # API shared by all/most binary containers + architecture = property(lambda _:elf.constants['EM'].get(_.Ehdr.machine,'UNKNOWN(%d)'%_.Ehdr.machine)) + entrypoint = property(lambda _:_.Ehdr.entry) + sections = property(lambda _:_.sh) + symbols = property(lambda _:_.getsectionbytype(elf.SHT_SYMTAB)) + dynsyms = property(lambda _:_.getsectionbytype(elf.SHT_DYNSYM)) + + def __init__(self, elfstr = None, **kargs): self._virt = virt(self) + if elfstr is None: + # Create an ELF file, with default header values + # kargs can supersede these default values + self.wsize = kargs.get('wsize', 32) + self.sex = kargs.get('sex', '<') + self.Ehdr = elf.Ehdr(parent=self) + self.Ehdr.ident = struct.pack("16B", + 0x7f,0x45,0x4c,0x46, # magic number, \x7fELF + {32:1, 64:2}[self.wsize], # EI_CLASS + {'<':1,'>':2}[self.sex], # EI_DATA + 1, # EI_VERSION + 0, # EI_OSABI + 0, # EI_ABIVERSION + 0,0,0,0,0,0,0) + self.Ehdr.version = 1 + self.Ehdr.type = kargs.get('e_type', elf.ET_REL) + self.Ehdr.machine = kargs.get('e_machine', elf.EM_386) + self.Ehdr.ehsize = self.Ehdr.bytelen + self.sh = SHList(self) + self.ph = PHList(self) + elf_default_content(self, **kargs) + return + self.content = StrPatchwork(elfstr) + self.parse_content() + try: + self.check_coherency() + except ValueError: + # Report the exception message in a way compatible with most + # versions of python. + import sys + log.error(str(sys.exc_info()[1])) def get_virt(self): return self._virt virt = property(get_virt) - content = ContentManager() def parse_content(self): - h = self.content[:8] - self.size = ord(h[4])*32 - self.sex = ord(h[5]) - self.Ehdr = WEhdr(self, self.sex, self.size, self.content) - self.sh = SHList(self, self.sex, self.size) - self.ph = PHList(self, self.sex, self.size) + h = struct.unpack("B"*8, self.content[:8]) + if h[:4] != ( 0x7f,0x45,0x4c,0x46 ): # magic number, \x7fELF + raise ValueError("Not an ELF") + self.wsize = h[4]*32 + self.sex = {1:'<', 2:'>'} .get(h[5], '') + if self.sex == '': + log.error("Invalid ELF, endianess defined to %d", h[5]) + if not self.wsize in (32, 64): + log.error("Invalid ELF, wordsize defined to %d", self.wsize) + self.wsize = 32 + self.Ehdr = elf.Ehdr(parent=self, content=self.content) + self.sh = SHList(self) + self.ph = PHList(self) def resize(self, old, new): pass def __getitem__(self, item): return self.content[item] def build_content(self): + if self.Ehdr.shoff == 0: + elf_set_offsets(self) c = StrPatchwork() - c[0] = str(self.Ehdr) - c[self.Ehdr.phoff] = str(self.ph) + c[0] = self.Ehdr.pack() + c[self.Ehdr.phoff] = self.ph.pack() for s in self.sh: - c[s.sh.offset] = str(s.content) - c[self.Ehdr.shoff] = str(self.sh) - return str(c) + c[s.sh.offset] = s.pack() + sh = self.sh.pack() + if len(sh): + # When 'shoff' is invalid, 'sh' is empty, but the line below + # is very slow because strpatchwork extends the file. + c[self.Ehdr.shoff] = sh + return c.pack() + + def check_coherency(self): + if self.Ehdr.version != 1: + raise ValueError("Ehdr version is %d instead of 1"%self.Ehdr.version) + symtab_count, dynsym_count, hash_count = 0, 0, 0 + for sh in self.sh: + if sh.sh.type == elf.SHT_SYMTAB: + symtab_count += 1 + if sh.sh.type == elf.SHT_DYNSYM: + dynsym_count += 1 + if sh.sh.type == elf.SHT_HASH: + hash_count += 1 + if symtab_count > 1: + raise ValueError("Has more than one (%d) sections SYMTAB"% symtab_count) + if dynsym_count > 1: + raise ValueError("Has more than one (%d) sections DYNSYM"% dynsym_count) + if hash_count > 1: + raise ValueError("Has more than one (%d) sections HASH"% hash_count) + if self.Ehdr.shstrndx == elf.SHN_UNDEF: + log.warning("No section (e.g. core file)") + else: + if self.Ehdr.shstrndx >= len(self.sh): + raise ValueError("No section of index shstrndx=%d"%self.Ehdr.shstrndx) + elif self.sh[self.Ehdr.shstrndx].sh.type != elf.SHT_STRTAB: + raise ValueError("Section of index shstrndx is of type %d instead of %d"%(self.sh[self.Ehdr.shstrndx].sh.type, elf.SHT_STRTAB)) + elif self.sh[self.Ehdr.shstrndx].sh.name != '.shstrtab': + raise ValueError("Section of index shstrndx[%d] is of name '%s' instead of '%s'"%(self.Ehdr.shstrndx, self.sh[self.Ehdr.shstrndx].sh.name, '.shstrtab')) - def __str__(self): - return self.build_content() + skipable_section_types = [ + ] - def getphbyvad(self, ad): - for s in self.ph: - if s.ph.vaddr <= ad < s.ph.vaddr+s.ph.memsz: - return s + for sh1 in self.sh: + # the section after BSS can overlap - def getsectionbyvad(self, ad): - for s in self.sh: - if s.sh.addr <= ad < s.sh.addr+s.sh.size: - return s + for sh2 in self.sh: + if sh2.sh.type in skipable_section_types: + continue + + if sh1.sh.type != elf.SHT_NOBITS and sh2.sh.type != elf.SHT_NOBITS and \ + (sh1.sh.offset < sh2.sh.offset < sh1.sh.offset + sh1.size or \ + sh2.sh.offset < sh1.sh.offset < sh2.sh.offset + sh2.size): + raise ValueError("Section offset overlap for [%r] [%r]" % (sh1, sh2)) + + if not sh1.addr or not sh2.addr: + continue + + if sh1.sh.flags & sh2.sh.flags & elf.SHF_ALLOC and \ + (sh1.addr < sh2.addr < sh1.addr + sh1.size or \ + sh2.addr < sh1.addr < sh2.addr + sh2.size): + raise ValueError("Section address overlap for [%r] [%r]" % (sh1, sh2)) + + def __str__(self): + raise AttributeError("Use pack() instead of str()") + def pack(self): + return self.build_content() + def getsectionsbytype(self, sectiontype): + return [s for s in self.sh if s.sh.type == sectiontype] + def getsectionbytype(self, sectiontype): + s = self.getsectionsbytype(sectiontype) + if len(s) == 0: return () + return s[0] + def getsectionsbyname(self, name): + if ',' in name: name = name[:name.index(',')] + return [s for s in self.sh if s.sh.name.strip('\x00') == name] def getsectionbyname(self, name): - for s in self.sh: - if s.sh.name.strip('\x00') == name: + s = self.getsectionsbyname(name) + if len(s) == 0: return None + return s[0] + + def getsectionbyvad(self, ad, section = None): + if section: + s = self.getsectionbyname(section) + if s.sh.addr <= ad < s.sh.addr + s.sh.size: return s + sh = [ s for s in self.sh if s.addr <= ad < s.addr+s.size ] + ph = [ s for s in self.ph if s.addr <= ad < s.addr+s.size ] + + if len(sh) == 1 and len(ph) == 1: + # Executable returns a section and a PH + if not sh[0] in ph[0].shlist: + raise ValueError("Mismatch: section not in segment") + return sh[0] + if len(sh) == 1 and len(ph) > 1: + # Executable may also return a section and many PH + # e.g. the start of the .got section + return sh[0] + if len(sh) == 0 and len(ph) == 1: + # Core returns a PH + return ph[0] + if len(ph) == 0 and len(sh) > 1: + # Relocatable returns many sections, all at address 0 + # The priority given to .text is heuristic + for s in sh: + if s.sh.name == '.text': + return s + for s in sh: + if s.sh.name.startswith('.text'): + return s + return sh[0] return None + def has_relocatable_sections(self): + return self.Ehdr.type == elf.ET_REL def is_in_virt_address(self, ad): for s in self.sh: @@ -720,10 +1432,14 @@ def is_in_virt_address(self, ad): return False if __name__ == "__main__": - import rlcompleter,readline,pdb - from pprint import pprint as pp + import readline readline.parse_and_bind("tab: complete") - e = ELF(open("/bin/ls").read()) - print repr(e) + fd = open("/bin/ls") + try: + raw = fd.read() + finally: + fd.close() + e = ELF(raw) + print (repr(e)) #o = ELF(open("/tmp/svg-main.o").read()) diff --git a/elfesteem/intervals.py b/elfesteem/intervals.py new file mode 100644 index 0000000..6f24abf --- /dev/null +++ b/elfesteem/intervals.py @@ -0,0 +1,83 @@ +import sys +if sys.version_info[0] >= 3: + from functools import reduce +if sys.version_info[0:2] == (2, 3): + from elfesteem.compatibility_python23 import sorted + +class Intervals(object): + ''' + Represent a subset of the integers, to be used to detect which parts + of the file have been parsed + ''' + def __init__(self): + self.ranges = [ ] + def __str__(self): + if len(self.ranges) == 0: return "[]" + return reduce(lambda x, y: x+" "+y, + map(lambda x: "[%s:%s]"%(x.start,x.stop), self.ranges)) + # Internal methods to make object manipulation easier + def _split(self, *poslist): + def _split_slice(l, s): + for pos in sorted(poslist): + if s.start < pos < s.stop: + l.append(slice(s.start, pos)) + s = slice(pos, s.stop) + l.append(s) + return l + self.ranges = reduce(_split_slice, self.ranges, []) + def _merge(self): + def _merge_two_slices(l, s): + if len(l) and (l[-1].stop == s.start): + l[-1] = slice(l[-1].start, s.stop) + else: + l.append(s) + return l + self.ranges = reduce(_merge_two_slices, self.ranges, []) + # Interface of the class + def __iter__(self): + for s in self.ranges: + for t in range(s.start, s.stop): + yield t + def contains(self, start, stop): + for s in self.ranges: + if s.start <= start and stop <= s.stop: + return True + return False + def excludes(self, start, stop): + if len(self.ranges) == 0: + return True + if stop <= self.ranges[0].start: + return True + if self.ranges[-1].stop <= start: + return True + for i in range(len(self.ranges)-1): + if self.ranges[i].stop <= start and stop <= self.ranges[i+1].start: + return True + return False + def delete(self, start, stop): + def _remove_slices(l, s): + if start > s.start or stop < s.stop: + l.append(s) + return l + self._split(start, stop) + self.ranges = reduce(_remove_slices, self.ranges, []) + return self + def add(self, start, stop): + if len(self.ranges) == 0: + self.ranges.append(slice(start, stop)) + return self + new_ranges = [] + prev_stop = None + for l in self.ranges: + if start <= l.start: + if prev_stop is None: + new_ranges.append(slice(start, min(stop,l.start))) + elif prev_stop < stop: + new_ranges.append(slice(max(start,prev_stop), min(stop,l.start))) + new_ranges.append(l) + prev_stop = l.stop + if new_ranges[-1].stop < stop: + new_ranges.append(slice(max(start,new_ranges[-1].stop), stop)) + self.ranges = new_ranges + self._merge() + return self diff --git a/elfesteem/jclass_init.py b/elfesteem/jclass_init.py index 2769e4b..b22ac42 100644 --- a/elfesteem/jclass_init.py +++ b/elfesteem/jclass_init.py @@ -1,11 +1,8 @@ #! /usr/bin/env python -import struct, array -from strpatchwork import StrPatchwork +import struct from new_cstruct import CStruct import logging -from collections import defaultdict -from pprint import pprint as pp log = logging.getLogger("classparse") console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) @@ -18,18 +15,8 @@ def gensapce(lvl): return ' '*lvl -class ContentManager(object): - def __get__(self, owner, x): - if hasattr(owner, '_content'): - return owner._content - def __set__(self, owner, new_content): - owner.resize(len(owner._content), len(new_content)) - owner._content=new_content - def __delete__(self, owner): - self.__set__(owner, None) - def out_attrs(o, lvl = None): - if lvl == None: + if lvl is None: lvl = 0 out = "" if not isinstance(o, list): @@ -507,8 +494,6 @@ def get_interfaces(self): class JCLASS(object): - content = ContentManager() - def __getitem__(self, item): return self.content[item] def __setitem__(self, item, data): @@ -518,7 +503,7 @@ def __setitem__(self, item, data): def __init__(self, pestr = None): self._sex = 0 self._wsize = 32 - self._content = pestr + self.content = pestr self.parse_content() def get_constant_pool_by_index(self, index): @@ -605,6 +590,9 @@ def add_fieldref(self, name, typetype, typename): if __name__ == "__main__": import sys - from pprint import pprint as pp - data = open(sys.argv[1]).read() + fd = open(sys.argv[1]) + try: + data = fd.read() + finally: + fd.close() e = JCLASS(data) diff --git a/elfesteem/macho/__init__.py b/elfesteem/macho/__init__.py new file mode 100644 index 0000000..f59ef7d --- /dev/null +++ b/elfesteem/macho/__init__.py @@ -0,0 +1 @@ +from elfesteem.macho.init import * diff --git a/elfesteem/macho/common.py b/elfesteem/macho/common.py new file mode 100644 index 0000000..96e26dd --- /dev/null +++ b/elfesteem/macho/common.py @@ -0,0 +1,332 @@ +#! /usr/bin/env python + +from elfesteem.cstruct import Constants, CStruct +from elfesteem.cstruct import data_empty, data_null +from elfesteem.cstruct import bytes_to_name, name_to_bytes + +import logging +log = logging.getLogger("mach-o") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + +__all__ = [ 'data_empty', 'data_null', 'bytes_to_name', 'name_to_bytes', + 'log', 'relocation_info' ] + +# Variables defined below and that need to be visible when import *. +def ImportAll(**kargs): + __all__.extend(kargs.keys()) + globals().update(kargs) + +# In addition for needing to be visible when import *, these values +# are added to constants, built in a way allowing to recover the +# constant's name from its value. +constants = {} +def SetConstants(**kargs): + __all__.extend([_ for _ in kargs.keys() if _ != 'no_name']) + Constants(globs = globals(), table = constants, **kargs) + +#### Main source: /usr/include/mach/machine.h +# VEO is found on http://www.opensource.apple.com/source/cctools/cctools-809/include/mach/machine.h +ImportAll( +CPU_ARCH_ABI64 = 0x01000000 +) +SetConstants( +CPU_TYPE_VAX = 1, +CPU_TYPE_ROMP = 2, # Deprecated +CPU_TYPE_NS32032 = 4, # Deprecated +CPU_TYPE_NS32332 = 5, # Deprecated +CPU_TYPE_MC680x0 = 6, +CPU_TYPE_X86 = 7, +CPU_TYPE_I386 = 7, +CPU_TYPE_X86_64 = 7 | CPU_ARCH_ABI64, +CPU_TYPE_MIPS = 8, +CPU_TYPE_NS32532 = 9, # Deprecated +CPU_TYPE_MC98000 = 10, +CPU_TYPE_HPPA = 11, +CPU_TYPE_ARM = 12, +CPU_TYPE_ARM64 = 12 | CPU_ARCH_ABI64, +CPU_TYPE_MC88000 = 13, +CPU_TYPE_SPARC = 14, +CPU_TYPE_I860 = 15, +CPU_TYPE_I860_LITTLE = 16, # Deprecated +CPU_TYPE_ALPHA = 16, +CPU_TYPE_RS6000 = 17, # Deprecated +CPU_TYPE_POWERPC = 18, +CPU_TYPE_POWERPC64 = 18 | CPU_ARCH_ABI64, +CPU_TYPE_VEO = 255, +no_name = ('CPU_TYPE_I386', 'CPU_TYPE_I860_LITTLE',) +) + +ImportAll( +CPU_SUBTYPE_MASK = 0xff000000, # mask for feature flags +CPU_SUBTYPE_LIB64 = 0x80000000, # 64 bit libraries +) + +# VAX subtypes. +ImportAll( +CPU_SUBTYPE_VAX_ALL = 0, +CPU_SUBTYPE_VAX780 = 1, +CPU_SUBTYPE_VAX785 = 2, +CPU_SUBTYPE_VAX750 = 3, +CPU_SUBTYPE_VAX730 = 4, +CPU_SUBTYPE_UVAXI = 5, +CPU_SUBTYPE_UVAXII = 6, +CPU_SUBTYPE_VAX8200 = 7, +CPU_SUBTYPE_VAX8500 = 8, +CPU_SUBTYPE_VAX8600 = 9, +CPU_SUBTYPE_VAX8650 = 10, +CPU_SUBTYPE_VAX8800 = 11, +CPU_SUBTYPE_UVAXIII = 12, +) + +# ROMP subtypes. +ImportAll( +CPU_SUBTYPE_RT_ALL = 0, +CPU_SUBTYPE_RT_PC = 1, +CPU_SUBTYPE_RT_APC = 2, +CPU_SUBTYPE_RT_135 = 3, +) + +# 2032/32332/32532 subtypes. +ImportAll( +CPU_SUBTYPE_MMAX_ALL = 0, +CPU_SUBTYPE_MMAX_DPC = 1, # 032 CPU +CPU_SUBTYPE_SQT = 2, +CPU_SUBTYPE_MMAX_APC_FPU = 3, # 32081 FPU +CPU_SUBTYPE_MMAX_APC_FPA = 4, # Weitek FPA +CPU_SUBTYPE_MMAX_XPC = 5, # 532 CPU +) + +# 680x0 subtypes +# NeXT used to consider 68030 code as generic 68000 code. +# For backwards compatability: +# * CPU_SUBTYPE_MC68030 symbol has been preserved for source code +# compatability. +# * CPU_SUBTYPE_MC680x0_ALL has been defined to be the same +# subtype as CPU_SUBTYPE_MC68030 for binary comatability. +# * CPU_SUBTYPE_MC68030_ONLY has been added to allow new object +# files to be tagged as containing 68030-specific instructions. +ImportAll( +CPU_SUBTYPE_MC680x0_ALL = 1, +CPU_SUBTYPE_MC68030 = 1, +CPU_SUBTYPE_MC68040 = 2, +CPU_SUBTYPE_MC68030_ONLY = 3, +) + +# I386 subtypes. +def CPU_SUBTYPE_INTEL(f, m): return f + (m << 4) +ImportAll( +CPU_SUBTYPE_I386_ALL = CPU_SUBTYPE_INTEL(3, 0), +CPU_SUBTYPE_386 = CPU_SUBTYPE_INTEL(3, 0), +CPU_SUBTYPE_486 = CPU_SUBTYPE_INTEL(4, 0), +CPU_SUBTYPE_486SX = CPU_SUBTYPE_INTEL(4, 8), +CPU_SUBTYPE_586 = CPU_SUBTYPE_INTEL(5, 0), +CPU_SUBTYPE_PENT = CPU_SUBTYPE_INTEL(5, 0), +CPU_SUBTYPE_PENTPRO = CPU_SUBTYPE_INTEL(6, 1), +CPU_SUBTYPE_PENTII_M3 = CPU_SUBTYPE_INTEL(6, 3), +CPU_SUBTYPE_PENTII_M5 = CPU_SUBTYPE_INTEL(6, 5), +CPU_SUBTYPE_CELERON = CPU_SUBTYPE_INTEL(7, 6), +CPU_SUBTYPE_CELERON_MOBILE = CPU_SUBTYPE_INTEL(7, 7), +CPU_SUBTYPE_PENTIUM_3 = CPU_SUBTYPE_INTEL(8, 0), +CPU_SUBTYPE_PENTIUM_3_M = CPU_SUBTYPE_INTEL(8, 1), +CPU_SUBTYPE_PENTIUM_3_XEON = CPU_SUBTYPE_INTEL(8, 2), +CPU_SUBTYPE_PENTIUM_M = CPU_SUBTYPE_INTEL(9, 0), +CPU_SUBTYPE_PENTIUM_4 = CPU_SUBTYPE_INTEL(10, 0), +CPU_SUBTYPE_PENTIUM_4_M = CPU_SUBTYPE_INTEL(10, 1), +CPU_SUBTYPE_ITANIUM = CPU_SUBTYPE_INTEL(11, 0), +CPU_SUBTYPE_ITANIUM_2 = CPU_SUBTYPE_INTEL(11, 1), +CPU_SUBTYPE_XEON = CPU_SUBTYPE_INTEL(12, 0), +CPU_SUBTYPE_XEON_MP = CPU_SUBTYPE_INTEL(12, 1), +) + +ImportAll( +CPU_SUBTYPE_X86_ALL = 3, +CPU_SUBTYPE_X86_64_ALL = 3, +CPU_SUBTYPE_X86_ARCH1 = 4, +CPU_SUBTYPE_X86_64_H = 8, # Haswell feature subset +) + +# Mips subtypes. +ImportAll( +CPU_SUBTYPE_MIPS_ALL = 0, +CPU_SUBTYPE_MIPS_R2300 = 1, +CPU_SUBTYPE_MIPS_R2600 = 2, +CPU_SUBTYPE_MIPS_R2800 = 3, +CPU_SUBTYPE_MIPS_R2000a = 4, # pmax +CPU_SUBTYPE_MIPS_R2000 = 5, +CPU_SUBTYPE_MIPS_R3000a = 6, # 3max +CPU_SUBTYPE_MIPS_R3000 = 7, +) + +# HPPA subtypes for Hewlett-Packard HP-PA family of risc processors. +# Port by NeXT to 700 series. +ImportAll( +CPU_SUBTYPE_HPPA_ALL = 0, +CPU_SUBTYPE_HPPA_7100 = 0, +CPU_SUBTYPE_HPPA_7100LC = 1, +) + +# MC88000 subtypes +ImportAll( +CPU_SUBTYPE_MC88000_ALL = 0, +CPU_SUBTYPE_MMAX_JPC = 1, +CPU_SUBTYPE_MC88100 = 1, +CPU_SUBTYPE_MC88110 = 2, +) + +# MC98000 (PowerPC) subtypes +ImportAll( +CPU_SUBTYPE_MC98000_AL = 0, +CPU_SUBTYPE_MC98601 = 1, +) + + +# I860 subtypes +ImportAll( +CPU_SUBTYPE_I860_ALL = 0, +CPU_SUBTYPE_I860_860 = 1, + +CPU_SUBTYPE_I860_LITTLE_ALL = 0, +CPU_SUBTYPE_I860_LITTLE = 1, +) + +# RS6000 subtypes +ImportAll( +CPU_SUBTYPE_RS6000_ALL = 0, +CPU_SUBTYPE_RS6000 = 1, +) + +# Sun4 subtypes - port done at CMU +ImportAll( +CPU_SUBTYPE_SUN4_ALL = 0, +CPU_SUBTYPE_SUN4_260 = 1, +CPU_SUBTYPE_SUN4_110 = 2, +CPU_SUBTYPE_SPARC_ALL = 0, +) + +# PowerPC subtypes +ImportAll( +CPU_SUBTYPE_POWERPC_ALL = 0, +CPU_SUBTYPE_POWERPC_601 = 1, +CPU_SUBTYPE_POWERPC_602 = 2, +CPU_SUBTYPE_POWERPC_603 = 3, +CPU_SUBTYPE_POWERPC_603e = 4, +CPU_SUBTYPE_POWERPC_603ev = 5, +CPU_SUBTYPE_POWERPC_604 = 6, +CPU_SUBTYPE_POWERPC_604e = 7, +CPU_SUBTYPE_POWERPC_620 = 8, +CPU_SUBTYPE_POWERPC_750 = 9, +CPU_SUBTYPE_POWERPC_7400 = 10, +CPU_SUBTYPE_POWERPC_7450 = 11, +CPU_SUBTYPE_POWERPC_970 = 100, + +CPU_SUBTYPE_POWERPC64_ALL = 0, +) + +# VEO subtypes +# Note: the CPU_SUBTYPE_VEO_ALL will likely change over time to be defined as +# one of the specific subtypes. +ImportAll( +CPU_SUBTYPE_VEO_1 = 1, +CPU_SUBTYPE_VEO_2 = 2, +CPU_SUBTYPE_VEO_3 = 3, +CPU_SUBTYPE_VEO_4 = 4, +CPU_SUBTYPE_VEO_ALL = 2, # CPU_SUBTYPE_VEO_2 +) + +# Acorn subtypes +ImportAll( +CPU_SUBTYPE_ARM_ALL = 0, +CPU_SUBTYPE_ARM_V4T = 5, +CPU_SUBTYPE_ARM_V6 = 6, +CPU_SUBTYPE_ARM_V5TEJ = 7, +CPU_SUBTYPE_ARM_XSCALE = 8, +CPU_SUBTYPE_ARM_V7 = 9, +CPU_SUBTYPE_ARM_V7F = 10, # Cortex A9 +CPU_SUBTYPE_ARM_V7S = 11, # Swift +CPU_SUBTYPE_ARM_V7K = 12, +CPU_SUBTYPE_ARM_V8 = 13, +CPU_SUBTYPE_ARM_V6M = 14, # Not meant to be run under xnu +CPU_SUBTYPE_ARM_V7M = 15, # Not meant to be run under xnu +CPU_SUBTYPE_ARM_V7EM = 16, # Not meant to be run under xnu + +CPU_SUBTYPE_ARM64_ALL = 0, +CPU_SUBTYPE_ARM64_V8 = 1, +) + + +#### Source: /usr/include/mach-o/reloc.h + +# * In reloc.h, there are two data structures: relocation_info and scattered_relocation_info, which are merged in one structure below. +ImportAll( +R_SCATTERED = 0x80000000 +) +class relocation_info(CStruct): + _fields = [ + ("relocaddr","u32"), + ("relocsym","u32"), + ] + scattered = property(lambda _:(_.relocaddr&0x80000000)>>31) + address = property(lambda _:(_.relocaddr&0x00ffffff)) + # Scattered + pcrel_1 = property(lambda _:(_.relocaddr&0x40000000)>>30) + length_1 = property(lambda _:(_.relocaddr&0x30000000)>>28) + type_1 = property(lambda _:(_.relocaddr&0x0f000000)>>24) + # Not scattered + type_0 = property(lambda _:(_.relocsym&0xf0000000)>>28) + extern_0 = property(lambda _:(_.relocsym&0x08000000)>>27) + length_0 = property(lambda _:(_.relocsym&0x06000000)>>25) + pcrel_0 = property(lambda _:(_.relocsym&0x01000000)>>24) + value = property(lambda _:(_.relocsym&0x00ffffff)) + # Generic + type = property(lambda _:getattr(_,"type_%s"%_.scattered)) + extern = property(lambda _:getattr(_,"extern_%s"%_.scattered)) + length = property(lambda _:getattr(_,"length_%s"%_.scattered)) + pcrel = property(lambda _:getattr(_,"pcrel_%s"%_.scattered)) + def symbolNumOrValue(self): + if self.scattered: return self.relocsym + else: return self.value + symbolNumOrValue = property(symbolNumOrValue) + def __repr__(self): + fields = [ "pcrel", "length" ] + if not self.scattered: + fields.append("extern") + fields.extend(["type", "scattered", "symbolNumOrValue"]) + return "<" + self.__class__.__name__ + " " + " -- ".join([x + " " + hex(getattr(self,x)) for x in fields]) + ">" + +# Relocation types used in a generic implementation. Relocation entries for +# normal things use the generic relocation as discribed above and their r_type +# is GENERIC_RELOC_VANILLA (a value of zero). +# (...) +# The implemention is quite messy given the compatibility with the existing +# relocation entry format. (...) +ImportAll( +GENERIC_RELOC_VANILLA = 0, # generic relocation as described above +GENERIC_RELOC_PAIR = 1, # Only follows a GENERIC_RELOC_SECTDIFF +GENERIC_RELOC_SECTDIFF = 2, +GENERIC_RELOC_PB_LA_PTR = 3, # prebound lazy pointer */ +GENERIC_RELOC_LOCAL_SECTDIFF = 4, +GENERIC_RELOC_TLV = 5, # thread local variables */ +) + +#### Source: /usr/include/mach-o/x86_64/reloc.h +# Relocations for x86_64 are a bit different than for other architectures in +# Mach-O: Scattered relocations are not used. Almost all relocations produced +# by the compiler are external relocations. An external relocation has the +# r_extern bit set to 1 and the r_symbolnum field contains the symbol table +# index of the target label. +# (...) +ImportAll( +X86_64_RELOC_UNSIGNED = 0, # for absolute addresses +X86_64_RELOC_SIGNED = 1, # for signed 32-bit displacement +X86_64_RELOC_BRANCH = 2, # a CALL/JMP instruction with 32-bit displacement +X86_64_RELOC_GOT_LOAD = 3, # a MOVQ load of a GOT entry +X86_64_RELOC_GOT = 4, # other GOT references +X86_64_RELOC_SUBTRACTOR = 5, # must be followed by a X86_64_RELOC_UNSIGNED +X86_64_RELOC_SIGNED_1 = 6, # for signed 32-bit displacement with a -1 addend +X86_64_RELOC_SIGNED_2 = 7, # for signed 32-bit displacement with a -2 addend +X86_64_RELOC_SIGNED_4 = 8, # for signed 32-bit displacement with a -4 addend +X86_64_RELOC_TLV = 9, # for thread local variables +) diff --git a/elfesteem/macho/init.py b/elfesteem/macho/init.py new file mode 100755 index 0000000..d9616d3 --- /dev/null +++ b/elfesteem/macho/init.py @@ -0,0 +1,604 @@ +from elfesteem.macho.sections import * +from elfesteem.macho.loaders import * +from elfesteem import intervals +import struct + +constants = {} +def SetConstants(**kargs): + Constants(globs = globals(), table = constants, **kargs) + +#### Source: /usr/include/mach/vm_prot.h + +# Protection values, defined as bits within the vm_prot_t type + +SetConstants( +VM_PROT_NONE = 0x00, +VM_PROT_READ = 0x01, # read permission +VM_PROT_WRITE = 0x02, # write permission +VM_PROT_EXECUTE = 0x04, # execute permission +) +# The default protection for newly-created virtual memory +VM_PROT_DEFAULT = (VM_PROT_READ|VM_PROT_WRITE) +# The maximum privileges possible, for parameter checking. +VM_PROT_ALL = (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) + +SetConstants( +# An invalid protection value. +# Used only by memory_object_lock_request to indicate no change +# to page locks. Using -1 here is a bad idea because it +# looks like VM_PROT_ALL and then some. +VM_PROT_NO_CHANGE = 0x08, + +# When a caller finds that he cannot obtain write permission on a +# mapped entry, the following flag can be used. The entry will +# be made "needs copy" effectively copying the object (using COW), +# and write permission will be added to the maximum protections +# for the associated entry. +VM_PROT_COPY = 0x10, + +# Another invalid protection value. +# Used only by memory_object_data_request upon an object +# which has specified a copy_call copy strategy. It is used +# when the kernel wants a page belonging to a copy of the +# object, and is only asking the object as a result of +# following a shadow chain. This solves the race between pages +# being pushed up by the memory manager and the kernel +# walking down the shadow chain. +VM_PROT_WANTS_COPY = 0x10, # (yes, vm_prot.h puts the same value as above) +no_name = ('VM_PROT_WANTS_COPY',), + +# Another invalid protection value. +# Indicates that the other protection bits are to be applied as a mask +# against the actual protection bits of the map entry. +VM_PROT_IS_MASK = 0x40, + +# Another invalid protection value to support execute-only protection. +# VM_PROT_STRIP_READ is a special marker that tells mprotect to not +# set VM_PROT_READ. We have to do it this way because existing code +# expects the system to set VM_PROT_READ if VM_PROT_EXECUTE is set. +# VM_PROT_EXECUTE_ONLY is just a convenience value to indicate that +# the memory should be executable and explicitly not readable. It will +# be ignored on platforms that do not support this type of protection. +VM_PROT_STRIP_READ = 0x80, +) +VM_PROT_EXECUTE_ONLY = (VM_PROT_EXECUTE|VM_PROT_STRIP_READ) + +#### Source: /usr/include/mach-o/fat.h + +import sys +if sys.version_info[0:2] == (2, 3): + SetConstants( + FAT_MAGIC = eval("0xcafebabeL"), + FAT_CIGAM = eval("0xbebafecaL"), + FAT_MAGIC_64 = eval("0xcafebabfL"), + FAT_CIGAM_64 = eval("0xbfbafecaL"), + ) +else: + SetConstants( + FAT_MAGIC = 0xcafebabe, + FAT_CIGAM = 0xbebafeca, # NXSwapLong(FAT_MAGIC) + # The support for the 64-bit fat file format described here is a work in + # progress and not yet fully supported in all the Apple Developer Tools. + FAT_MAGIC_64 = 0xcafebabf, + FAT_CIGAM_64 = 0xbfbafeca, # NXSwapLong(FAT_MAGIC_64) + ) + +class fat_header(CStruct): + _fields = [ + ("magic","u32"), # FAT_MAGIC or FAT_MAGIC_64 + ("nfat_arch","u32"), # number of structs that follow + ] + def __init__(self, *args, **kargs): + CStruct.__init__(self, *args, **kargs) + if self.parent.interval is not None : + self.parent.interval.delete(0,8) + +class fat_arch(CStruct): + _fields = [ + ("cputype","u32"), # cpu specifier (int) + ("cpusubtype","u32"), # machine specifier (int) + ("offset","u32"), # file offset to this object file + ("size","u32"), # size of this object file + ("align","u32"), # alignment as a power of 2 + ] + +class fat_arch_64(CStruct): + _fields = [ + ("cputype","u32"), # cpu specifier (int) + ("cpusubtype","u32"), # machine specifier (int) + ("offset","u64"), # file offset to this object file + ("size","u64"), # size of this object file + ("align","u32"), # alignment as a power of 2 + ("reserved","u32"), + ] + +class FarchList(CArray): + _cls = fat_arch + count = lambda _: _.parent.Fhdr.nfat_arch + # TODO: update self.parent.interval + # self.parent.interval.delete(of+20*i,of+20*(i+1)) + +class MachoList(CBase): + def unpack(self, c, o): + self.macholist = [] + for farch in self.parent.fh: + e = MACHO(c[farch.offset:farch.offset+farch.size], + interval=intervals.Intervals().add(0,farch.size), + parseSymbols=self.parent.fh.parseSymbols) + e.offset = farch.offset + self.macholist.append(e) + inverse = intervals.Intervals().add(0,farch.size) + for j in e.interval.ranges: + inverse.delete(j.start,j.stop) + if not self.parent.interval is None: + for j in inverse.ranges: + if not self.parent.interval.contains(farch.offset+j.start,farch.offset+j.stop): + raise ValueError("This part of file has already been parsed") + self.parent.interval.delete(farch.offset+j.start,farch.offset+j.stop) + def __getitem__(self, item): + return self.macholist[item] + + +#### Generic elfesteem data structures + +class virt(object): + def __init__(self, x): + self.parent = x + + def __call__(self, ad_start, ad_stop = None, section = None): + rva_items = self.get_rvaitem(slice(ad_start, ad_stop), section = section) + data_out = data_empty + for s, n_item in rva_items: + data_out += s.content[n_item] + return data_out + + def __getitem__(self, item): + rva_items = self.get_rvaitem(item) + data_out = data_empty + for s, n_item in rva_items: + data_out += s.content[n_item] + return data_out + + def __setitem__(self, item, data): + if not type(item) is slice: + item = slice(item, item+len(data)) + rva_items = self.get_rvaitem(item) + off = 0 + for s, n_item in rva_items: + i = slice(off,n_item.stop + off - n_item.start) + data_slice = data[i] + s.content[n_item] = data_slice + off = i.stop + + def get_rvaitem(self, item, section = None): + if item.step != None: + raise ValueError("pas de step") + if item.stop is None: + s = self.parent.getsectionbyvad(item.start, section = section) + if not s: + raise ValueError('unknown rva address! 0x%x'%item.start) + s_start = item.start - s.addr + n_item = slice(s_start, s.size) + return [ (s, n_item) ] + total_len = item.stop - item.start + virt_item = [] + start = item.start + while total_len: + s = self.parent.getsectionbyvad(start, section = section) + if s is None: + raise ValueError('unknown rva address! 0x%x'%start) + s_start = start - s.addr + s_stop = item.stop - s.addr + if s_stop > s.size: + s_stop = s.size + s_len = s_stop - s_start + if s_len == 0: + print("GETRVAITEM %r %s %s" % (s, hex(s.addr), s.size)) + raise ValueError('empty section at address 0x%x'%start) + total_len -= s_len + start += s_len + n_item = slice(s_start, s_stop) + virt_item.append((s, n_item)) + return virt_item + + def __len__(self): + # __len__ should not be used: Python returns an int object, which + # will cap values to 0x7FFFFFFF on 32 bit systems. A binary can have + # a base address higher than this, resulting in the impossibility to + # handle such programs. + log.warning("__len__ deprecated") + return self.max_addr() + def max_addr(self): + if not hasattr(self.parent, 'load'): + log.error("Not a unique memory mapping in Mach-O fat") + return -1 + l=0 + for lc in self.parent.load: + if hasattr(lc, 'vmaddr'): + l = max(l, lc.vmaddr+lc.vmsize) + return l + + +# MACHO object +class MACHO(object): + # Either a FAT file, or a normal Mach-O file (TODO: ar archives) + # Normal Mach-O file + # Mhdr Header + # load Load commands + # sect Sections (true sections and also chunks in __LINKEDIT) + # rawdata Unanalyzed data + # FAT file + # Fhdr Header + # fh list of architectures + # arch list of normal Mach-O files + # rawdata Unanalyzed data + def __init__(self, data, interval=True, parseSymbols=True): + if interval is True: + interval = intervals.Intervals().add(0,len(data)) + self.interval = interval + self.datasize = len(data) + self.content = StrPatchwork(data) + self.parse_content(parseSymbols=parseSymbols) + self._virt = virt(self) + def get_virt(self): + return self._virt + virt = property(get_virt) + + sections = property(lambda _:_.sect) + def parse_content(self, parseSymbols=True): + magic, = struct.unpack("': + # a Mach-O FAT file may contain ar archives, called "Static + # archive libraries", + # cf. https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/MachOTopics/1-Articles/building_files.html + # elfesteem does not know how to parse ar archives + raise ValueError("ar archive") + elif magic in (MH_MAGIC, MH_MAGIC_64, + MH_CIGAM, MH_CIGAM_64): + if magic == MH_MAGIC: self.sex, self.wsize = '<', 32 + elif magic == MH_CIGAM: self.sex, self.wsize = '>', 32 + elif magic == MH_MAGIC_64: self.sex, self.wsize = '<', 64 + elif magic == MH_CIGAM_64: self.sex, self.wsize = '>', 64 + self.Mhdr = mach_header(parent=self, content=self.content) + of = len(self.Mhdr.pack()) + self.load = LoadCommands(parent=self, content=self.content, start=of) + self.sect = Sections(self) + for sect in self.sect: + if type(sect) == SymbolTable: + self.symbols = sect + break + else: + self.symbols = () + if parseSymbols: + self.parse_symbols() + # 'rawdata' is a list of pairs (position, byte) that is used by + # pack() to reconstruct what was not parsed by analysing the + # headers. Null padding is not memorized. + self.rawdata = [] + if self.interval is not None: + for i in self.interval: + data = self.content[i:i+1] + if data != data_null: + self.rawdata.append( (i, data) ) + if len(self.rawdata): + log.warning("Part of the file was not parsed: %d bytes", len(self.rawdata)) + else: + raise ValueError("Not a Mach-O file") + + def parse_symbols(self): + lctext = self.load.findlctext() + if self.Mhdr.cputype in (CPU_TYPE_I386, CPU_TYPE_X86_64): + if lctext != None and lctext.flags == SG_PROTECTED_VERSION_1: + log.warning("cannot parse dynamic symbols because of encryption") + else: + self.parse_dynamic_symbols() + else: + log.warning("parse_dynamic_symbols() can only be used with x86 architectures, not %s", self.Mhdr.cputype) + + def pack(self): + if hasattr(self,'Mhdr'): + c = StrPatchwork() + mhdr = self.Mhdr.pack() + c[0] = mhdr + offset = len(mhdr) + c[offset] = self.load.pack() + for s in self.sect: + c[s.offset] = s.pack() + for offset, data in self.rawdata: + c[offset] = data + return c.pack() + elif hasattr(self,'Fhdr'): + c = StrPatchwork() + fhdr = self.Fhdr.pack() + c[0] = fhdr + offset = len(fhdr) + c[offset] = self.fh.pack() + for e in self.arch.macholist: + c[e.offset] = e.pack() + for offset, data in self.rawdata: + c[offset] = data + return c.pack() + def __str__(self): + raise AttributeError("Use pack() instead of str()") + + cpuname = property(lambda _:constants['CPU_TYPE'].get(_.Mhdr.cputype, + 'UNKNOWN(%d)'%_.Mhdr.cputype)) + def architecture(self): + if hasattr(self, 'Mhdr'): return self.cpuname + else: return [ _.cpuname for _ in self.arch ] + architecture = property(architecture) + + def entrypoint(self): + if not hasattr(self, 'load'): + log.error("Not a unique entrypoint in Mach-O fat") + return -1 + ep = [ _ for _ in self.load if _.cmd in (LC_MAIN, LC_UNIXTHREAD) ] + if len(ep) != 1: + log.error("Not a unique loader with entrypoint: %s" % ep) + return -1 + if ep[0].cmd == LC_MAIN: return self.off2ad(ep[0].entryoff) + if ep[0].cmd == LC_UNIXTHREAD: return ep[0].entrypoint + return -1 + def set_entrypoint(self, val): + if not hasattr(self, 'load'): + log.error("Not a unique entrypoint in Mach-O fat") + return + ep = [ _ for _ in self.load if _.cmd in (LC_MAIN, LC_UNIXTHREAD) ] + if len(ep) != 1: + log.error("Not a unique loader with entrypoint: %s" % ep) + return + if ep[0].cmd == LC_MAIN: + val = self.ad2off(val) + if val is not None: ep[0].entryoff = val + if ep[0].cmd == LC_UNIXTHREAD: ep[0].entrypoint = val + entrypoint = property(entrypoint, set_entrypoint) + + def getsectionbyname(self, name): + for s in self.sect: + if hasattr(s, 'sh') and name == "%s,%s"%(s.sh.segname,s.sh.sectname): + return s + return None + + def getsectionbyvad(self, ad, section = None): + if section: + s = self.getsectionbyname(section) + if s.addr <= ad < s.addr+s.size: + return s + f = [] + for lc in self.load: + if not lc.cmd in (LC_SEGMENT, LC_SEGMENT_64): + continue + for s in lc.sh: + if s.addr <= ad < s.addr+s.size: + f.append(s.sect) + if len(f) == 0: return None + return f[0] + + def getsegment_byoffset(self, of): + f = [] + for lc in self.load: + if hasattr(lc,'fileoff'): + if lc.fileoff <= of < lc.fileoff + lc.filesize: + f.append(lc) + if len(f) == 0: return None + return f[0] + + def ad2off(self, ad): + s = self.getsectionbyvad(ad) + if s is None: + log.error("Address %#x not mapped in memory", ad) + return + return ad - s.addr + s.offset + + def off2ad(self, of): + lc = self.getsegment_byoffset(of) + return of - lc.fileoff + lc.vmaddr + + def mem2file(self, ad): + f = [] + for s in self.sect: + if s.addr <= ad < s.addr+s.size: + f.append(ad-s.addr+s.offset) + return f + + def has_relocatable_sections(self): + return self.Mhdr.filetype == MH_OBJECT + + def add(self, *args, **kargs): + if args: + s= args[0] + if hasattr(self,'fh'): + for f in self.fh: + if f.content.wsize == s.wsize: + f.content.add(s) + return + if isinstance(s, Section): + if not self.load.addSH(s): + print("s.content %s" % s.content.pack()) + print("s.sex %s" % s.sex) + print("s.wsize %s" % s.wsize) + print("s.sh %r" % s.sh) + print("s.sh.segname %r" % s.sh.segname) + raise ValueError('addSH failed') + if not s.parent.size == len(s.pack()) : raise ValueError("s.parent.size and len(s.pack()) differ") + self.sect.add(s) + self.Mhdr.sizeofcmds += len(s.parent.pack()) + if hasattr(s, 'cmd'): # Load Command + if hasattr(s, 'segname'): + fileoff = 0 + vmaddr = 0x1000 + diff = 0 + for lc in self.load: + if hasattr(lc, 'segname'): + if not lc.fileoff == fileoff: + diff = lc.fileoff-fileoff + fileoff = lc.fileoff + vmaddr = lc.vmaddr + s.fileoff = fileoff + diff + s.vmaddr = vmaddr + diff + self.load.append(s) + elif kargs: + if 'parent' in kargs: + parent = kargs['parent'] + else: + parent = None + if 'sex' in kargs: + sex = kargs['sex'] + else: + sex = self.sex + if 'wsize' in kargs: + wsize = kargs['wsize'] + else: + wsize= self.wsize + type = kargs['type'] + nwlc = LoadCommand(parent=parent, sex=sex, wsize=wsize, cmd=type) + if 'segname' in kargs : + nwlc.segname = kargs['segname'] + else: + nwlc.segname = None + if 'initprot' in kargs : + nwlc.initprot = kargs['initprot'] + if 'maxprot' in kargs : + nwlc.maxprot = kargs['maxprot'] + else : + nwlc.maxprot = VM_PROT_ALL + if 'content' in kargs : + nwsh = Section(parent=sectionHeader(parent=self.load), + content=kargs['content']) + if not nwlc.segname is None: + nwsh.parent.segname = nwlc.segname + self.add(nwlc) + self.add(nwsh) + + def changeUUID(self, uuid): + for lc in self.load: + if hasattr(lc, 'changeUUID'): + lc.changeUUID(uuid) + + def changeStart(self): + self.sect.sect[0].content[0]='\0' + + def incompletedPosVal(self): + result = [] + if hasattr(self,'Fhdr'): + for arch in self.arch.macholist: + result.extend([(pos+arch.offset, val) for (pos, val) in arch.incompletedPosVal()]) + return result + if hasattr(self,'Mhdr'): + for lc in self.load: + if lc.cmd == LC_SEGMENT_64 and lc.is_text_segment(): + for s in lc.sh: + if s.is_text_section(): + if s.size%2 == 1 : + pos, val = s.offset+s.size, struct.pack("B",0x90) + if self[pos]==val: + result.append((pos,val)) + return result + + def checkParsedCompleted(self, **kargs): + if self.interval is None : + raise ValueError("No interval argument in macho_init call") + result = [] + for i in self.interval : + data = self.content[i:i+1] + if data != data_null : + result.append((i, data)) + if 'detect_nop' in kargs and kargs['detect_nop']: + for pos, val in self.incompletedPosVal(): + if (pos,val) in result: + self.rawdata.append((pos,val)) + result.remove((pos,val)) + return result + + dynsyms = property(lambda _:()) # TODO, cf. print_dysym from otool.py + def parse_dynamic_symbols(self): + if not len(self.sect): + return + for s in self.sect: + if hasattr(s, 'sh'): + if s.sh.type == S_NON_LAZY_SYMBOL_POINTERS: + nl_symbol_ptr = s + break + else: + nl_symbol_ptr = None + + for s in self.sect: + if hasattr(s, 'sh'): + if s.sh.type == S_LAZY_SYMBOL_POINTERS: + la_symbol_ptr = s + break + else: + la_symbol_ptr = None + + for s in self.sect: + if hasattr(s, 'sh') : + if s.sh.type == S_SYMBOL_STUBS: + symbol_stub = s + break + else: + symbol_stub = None + + hasDyldLazy = 0 + for s in self.sect: + if hasattr(s, 'SymbolOpcodeList'): + #print s.SymbolOpcodeList + dynamic_loader_info_lazy = s + hasDyldLazy = 1 + break + for s in self.sect: + if hasattr(s, 'BindSymbolOpcodeList'): + dynamic_loader_info_bind = s + break + + for s in self.sect: + if hasattr(s, 'symbols'): + symbol_table = s + break + # modif de symbol_stub pour les decalages dependant de la position de la_symbol_ptr + hasimport = 0 + for lc in self.load: + if hasattr(lc, 'segname'): + if lc.segname == "__IMPORT": + hasimport = 1 + break + if hasDyldLazy : + for symbol in dynamic_loader_info_lazy.SymbolOpcodeList: + symbol.pointer = la_symbol_ptr[symbol.realoffset] + la_symbol_ptr[symbol.realoffset].binding = symbol + symbol.stub = symbol_stub[symbol.addr] + symbol_stub[symbol.addr].binding = symbol + symbol_table[symbol.name].stub = symbol_stub[symbol.addr] + else : + indstubIndex = 0 + if nl_symbol_ptr is not None : + for indstub in nl_symbol_ptr: + symbol_table[indstubIndex].stub = indstub + indstubIndex += 1 + if symbol_stub is not None : + for indstub in symbol_stub: + if indstubIndex >= len(symbol_table): + break + symbol_table[indstubIndex].stub = indstub + indstubIndex += 1 + + def get_sym_value(self, name): + for s in self.sect: + if hasattr(s, 'symbols'): + symbol_table = s + break + if hasattr(symbol_table[name], 'stub'): + return symbol_table[name].stub.address + else: + return 0 diff --git a/elfesteem/macho/loaders.py b/elfesteem/macho/loaders.py new file mode 100755 index 0000000..c4adc97 --- /dev/null +++ b/elfesteem/macho/loaders.py @@ -0,0 +1,1653 @@ +from elfesteem.macho.common import * +from elfesteem.cstruct import convert_size2type, Constants, CBase, CArray, CStruct +import struct + +constants = {} +def SetConstants(**kargs): + Constants(globs = globals(), table = constants, **kargs) + +#### Source: /usr/include/mach-o/loader.h + +# * In loader.h, there are two data structures: mach_header and mach_header_64, which are merged in one structure below. +class mach_header(CStruct): + _fields = [ + ("magic","u32"), # mach magic number identifier + ("cputype","u32"), # cpu specifier + ("cpusubtype","u32"), # machine specifier + ("filetype","u32"), # type of file + ("ncmds","u32"), # number of load commands + ("sizeofcmds","u32"), # the size of all the load commands + ("flags","ptr"), # flags + ] + def __init__(self, *args, **kargs): + CStruct.__init__(self, *args, **kargs) + if self.magic not in (MH_MAGIC, MH_MAGIC_64): + raise ValueError('Not a little-endian Mach-O') + if self.parent.interval is not None : + self.parent.interval.delete(0,24+self.wsize//8) + +import sys +if sys.version_info[0:2] == (2, 3): + SetConstants( + name = 'MH_MAGIC', + MH_MAGIC = eval("0xfeedfaceL"), + MH_CIGAM = eval("0xcefaedfeL"), + MH_MAGIC_64 = eval("0xfeedfacfL"), + MH_CIGAM_64 = eval("0xcffaedfeL"), + ) +else: + SetConstants( + name = 'MH_MAGIC', + MH_MAGIC = 0xfeedface, # /* the mach magic number */ + MH_CIGAM = 0xcefaedfe, # /* NXSwapInt(MH_MAGIC) */ + MH_MAGIC_64 = 0xfeedfacf, # /* the 64-bit mach magic number */ + MH_CIGAM_64 = 0xcffaedfe, # /* NXSwapInt(MH_MAGIC_64) */ + ) + +SetConstants( +# Constants for the "filetype" field +name = 'MH_FILETYPE', +MH_OBJECT = 0x1, # relocatable object file +MH_EXECUTE = 0x2, # demand paged executable file +MH_FVMLIB = 0x3, # fixed VM shared library file +MH_CORE = 0x4, # core file +MH_PRELOAD = 0x5, # preloaded executable file +MH_DYLIB = 0x6, # dynamically bound shared library +MH_DYLINKER = 0x7, # dynamic link editor +MH_BUNDLE = 0x8, # dynamically bound bundle file +MH_DYLIB_STUB = 0x9, # shared library stub for static linking only, no section contents +MH_DSYM = 0xa, # companion file with only debug sections +MH_KEXT_BUNDLE = 0xb, # x86_64 kexts +) + +SetConstants( +# Constant bits for the "flags" field +name = 'MH_FLAGS', +MH_NOUNDEFS = 0x00000001, +MH_INCRLINK = 0x00000002, +MH_DYLDLINK = 0x00000004, +MH_BINDATLOAD = 0x00000008, +MH_PREBOUND = 0x00000010, +MH_SPLIT_SEGS = 0x00000020, +MH_LAZY_INIT = 0x00000040, +MH_TWOLEVEL = 0x00000080, +MH_FORCE_FLAT = 0x00000100, +MH_NOMULTIDEFS = 0x00000200, +MH_NOFIXPREBINDING = 0x00000400, +MH_PREBINDABLE = 0x00000800, +MH_ALLMODSBOUND = 0x00001000, +MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000, +MH_CANONICAL = 0x00004000, +MH_WEAK_DEFINES = 0x00008000, +MH_BINDS_TO_WEAK = 0x00010000, +MH_ALLOW_STACK_EXECUTION = 0x00020000, +MH_ROOT_SAFE = 0x00040000, +MH_SETUID_SAFE = 0x00080000, +MH_NO_REEXPORTED_DYLIBS = 0x00100000, +MH_PIE = 0x00200000, +MH_DEAD_STRIPPABLE_DYLIB = 0x00400000, +MH_HAS_TLV_DESCRIPTORS = 0x00800000, +MH_NO_HEAP_EXECUTION = 0x01000000, +MH_APP_EXTENSION_SAFE = 0x02000000, +) + +# The load commands directly follow the mach_header. The total size of all +# of the commands is given by the sizeofcmds field in the mach_header. All +# load commands must have as their first two fields cmd and cmdsize. The cmd +# field is filled in with a constant for that command type. Each command type +# has a structure specifically for it. The cmdsize field is the size in bytes +# of the particular load command structure plus anything that follows it that +# is a part of the load command (i.e. section structures, strings, etc.). To +# advance to the next load command the cmdsize can be added to the offset or +# pointer of the current load command. The cmdsize for 32-bit architectures +# MUST be a multiple of 4 bytes and for 64-bit architectures MUST be a multiple +# of 8 bytes (these are forever the maximum alignment of any load commands). +# The padded bytes must be zero. All tables in the object file must also +# follow these rules so the file can be memory mapped. Otherwise the pointers +# to these tables will not work well or at all on some machines. With all +# padding zeroed like objects will compare byte for byte. +class load_command(CStruct): + _fields = [ + ("cmd","u32"), # type of load command + ("cmdsize","u32"), # total size of command in bytes + ] + +# Hereafter, elfesteem creates 'LoadCommand' which registers all known +# load commands. +from elfesteem.cstruct import CStruct_metaclass +class LoadMetaclass(CStruct_metaclass): + registered = {} + def __new__(cls, name, bases, dct): + if '_fields' in dct: + # Those fields are common to all commands, we insert them here. + dct['_fields'][:0] = load_command._fields + if '_offsets_in_data' in dct: + # There is some additional data in a variable-length load command + fmt = ''.join([convert_size2type(t,None) for _, t in dct['_fields']]) + s = struct.calcsize(fmt) + dct['_fields'].append( ("data",CData(lambda _,s=s:_.cmdsize-s)) ) + def get_in_data(self, f=None, s=0): + value = getattr(self, f) + if value < s: + return None + data = self.data.pack() + if f == "linked_modules": + data, = struct.unpack("B", data[value-s:value-s+1]) + data = [str((data&(1<>i) for i in range(min(8,self.nmodules))] + return ''.join(data) + '...' + else: + data = data[(value-s):data.index(data_null,value-s)] + return str(data.decode('latin1')) + for f in dct['_offsets_in_data']: + dct['str_'+f] = property(lambda _,f=f,s=s: + get_in_data(_,f=f,s=s)) + o = CStruct_metaclass.__new__(cls, name, bases, dct) + # Parse the list of load commands for this data structure. + for cmd in dct.get('lc_types',()): + assert not cmd in LoadCommand.registered + LoadCommand.registered[cmd] = o + return o + # These two lines give the same result. + # LoadCommand(parent=p, content=c, start=o) + # LoadCommand.registered[cmd](parent=p, content=c, start=o) + # We can also create a load command with default content + # LoadCommand(sex='<', wsize=32, cmd=LC_SEGMENT) + def __call__(cls, *args, **kargs): + if 'cmd' in kargs: + if not 'parent' in kargs: kargs['parent'] = None + cmd = struct.pack("I",kargs['cmd']) + else: + c = kargs['content'] + o = kargs.get('start',0) + cmd = c[o:o+4] + p = kargs['parent'] + sex = kargs.get('sex',getattr(p,'sex','')) + if len(cmd) >= 4: cmd, = struct.unpack(sex+"I",cmd) + else: cmd = 0 + if not 'cmd' in kargs: + # Early test that 'cmdsize' has a valid value + cmdsize = c[o+4:o+8] + if len(cmdsize) < 4: + raise ValueError("cmdsize after end of file") + cmdsize, = struct.unpack(sex+"I",cmdsize) + if cmdsize < 8: + log.error("load command %d with size less than 8 bytes", len(p)) + if hasattr(p, 'parent'): + if o+cmdsize > p.offset+p.parent.Mhdr.sizeofcmds: + log.error("load command %d bigger than sizeofcmds", len(p)) + if p.parent.interval is not None and not p.parent.interval.contains(o,o+cmdsize): + raise ValueError("Parsing cmd %d of size %d reads a part of the file that has already been parsed" % (cmd, cmdsize)) + if cmd in cls.lc_types: + # A subclass of LoadCommand has been used + lh = super(LoadMetaclass,cls).__call__(*args, **kargs) + elif len(cls.lc_types): + # A subclass of LoadCommand has been used, with an incoherent cmd + # We don't use the class name, because one class may correspond + # to many values for cmd. + log.warning("Incoherent input cmd=%#x for %s", cmd, cls.__name__) + lh = super(LoadMetaclass,cls).__call__(*args, **kargs) + elif cmd in LoadCommand.registered: + # LoadCommand has been used with a known cmd + lh = LoadCommand.registered[cmd](*args, **kargs) + else: + # LoadCommand has been used with an unknown cmd + lh = super(LoadMetaclass,cls).__call__(*args, **kargs) + if not 'content' in kargs: + lh.cmdsize = lh.bytelen + else: + assert c[o:o+lh.bytelen] == lh.pack() + return lh +LoadBase = LoadMetaclass('LoadBase', (CStruct,), {}) + +from elfesteem.cstruct import CData +class LoadCommand(LoadBase): + # A generic load command may have arbitrary data following + # the first two values 'cmd' and 'cmdsize'. + # Note that this is not sufficient when the load command + # should trigger the analysis of sections, referred by their + # offset in the file. + lc_types = () + _fields = [ ("data",CData(lambda _:max(0,_.cmdsize-8))) ] + def changeOffsets(self, decalage, min_offset=None): + pass + def otool(self, llvm=False): + # Output similar to llvm-otool (depending on llvm version) + # Cf. https://opensource.apple.com/source/cctools/cctools-895/otool/ofile_print.c + # and others + import time + lc_value = [] + shift = 1 + for name, f_type in self._fields: + value = getattr(self, name) + if name == "cmd": + value = "LC_"+constants['LC'].get(self.cmd, hex(self.cmd)) + elif name == "cmdsize": + pass + elif name in getattr(self, '_offsets_in_data', []): + data = getattr(self, 'str_'+name) + if data is None: value = "?(bad offset %u)" % value + else: value = "%s (offset %u)" % (data, value) + name = "%12s" % name + elif name in ["vmaddr", "vmsize"]: + if self.cmd == LC_SEGMENT_64: value = "%#018x" % value + else: value = "%#010x" % value + elif name in ["maxprot", "initprot", "cksum", "header addr"]: + value = "%#010x" % value + elif name == "flags": + value = "%#x" % value + elif name in ("sdk", "minos"): + if value == 0: + value = "n/a" + else: + value = split_integer(value, 8, 3, truncate=1) + elif name == "tools": + for tool in value: + value = tool.tool + lc_value.append(('tool', value)) + value = split_integer(tool.version, 8, 3, truncate=2) + lc_value.append(('version', value)) + elif name == "timestamp": + name = "time stamp" + value = "%u %s" %(value, time.ctime(value)) + elif name in ["current_version", "compatibility_version"]: + shift = 0 + name = name[:-8] + value = "version " + split_integer(value, 8, 3) + elif name == "pad_segname": + name = "segname" + value = str(value.rstrip(data_null).decode('latin1')) + elif name == "raw_uuid": + name = "uuid" + value = "%.8X-%.4X-%.4X-%.4X-%.4X%.8X" % self.uuid + elif self.cmd in version_min_command.lc_types: + shift = 2 + value = split_integer(value, 8, 3, truncate=2) + elif self.cmd == LC_SOURCE_VERSION: + shift = 2 + value = split_integer(value, 10, 5, truncate=2) + elif self.cmd == LC_ENCRYPTION_INFO: + shift = 4 + elif self.cmd in (LC_THREAD, LC_UNIXTHREAD): + shift = 4 + # Display text values if they are the expected ones. + if name == "flavor" and not 'unknown' in self.flavorname: + value = self.flavorname + if name == "count" and not 'unknown' in self.flavorname: + value = self.flavorcount + if isinstance(f_type, str): + lc_value.append((name, value)) + # otool displays lc_value with a nice alignment + name_max_len = 0 + for name, _ in lc_value: + if name_max_len < len(name): + name_max_len = len(name) + format = "%%%ds %%s" % (name_max_len+shift) + return [format % _ for _ in lc_value] + # NB: for some load commands, additional information will be displayed + +def split_integer(v, nbits, ndigits, truncate=None): + mask = (1< 0: + res.insert(0, v & mask) + v = v >> nbits + ndigits -= 1 + res[0] += v << nbits + if truncate is not None: + while len(res) > truncate and res[-1] == 0: + res = res[:-1] + return ".".join(["%u"%_ for _ in res]) + +# After MacOS X 10.1 when a new load command is added that is required to be +# understood by the dynamic linker for the image to execute properly the +# LC_REQ_DYLD bit will be or'ed into the load command constant. If the dynamic +# linker sees such a load command it it does not understand will issue a +# "unknown load command required for execution" error and refuse to use the +# image. Other load commands without this bit that are not understood will +# simply be ignored. +if sys.version_info[0:2] == (2, 3): + LC_REQ_DYLD = eval("0x80000000L") +else: + LC_REQ_DYLD = 0x80000000 + +SetConstants( +LC_SEGMENT = 0x1, # segment of this file to be mapped +LC_SYMTAB = 0x2, # link-edit stab symbol table info +LC_SYMSEG = 0x3, # link-edit gdb symbol table info (obsolete) +LC_THREAD = 0x4, # thread +LC_UNIXTHREAD = 0x5, # unix thread (includes a stack) +LC_LOADFVMLIB = 0x6, # load a specified fixed VM shared library +LC_IDFVMLIB = 0x7, # fixed VM shared library identification +LC_IDENT = 0x8, # object identification info (obsolete) +LC_FVMFILE = 0x9, # fixed VM file inclusion (internal use) +LC_PREPAGE = 0xa, # prepage command (internal use) +LC_DYSYMTAB = 0xb, # dynamic link-edit symbol table info +LC_LOAD_DYLIB = 0xc, # load a dynamically linked shared library +LC_ID_DYLIB = 0xd, # dynamically linked shared lib ident +LC_LOAD_DYLINKER = 0xe, # load a dynamic linker +LC_ID_DYLINKER = 0xf, # dynamic linker identification +LC_PREBOUND_DYLIB = 0x10, # modules prebound for a dynamically linked shared library +LC_ROUTINES = 0x11, # image routines +LC_SUB_FRAMEWORK = 0x12, # sub framework +LC_SUB_UMBRELLA = 0x13, # sub umbrella +LC_SUB_CLIENT = 0x14, # sub client +LC_SUB_LIBRARY = 0x15, # sub library +LC_TWOLEVEL_HINTS = 0x16, # two-level namespace lookup hints +LC_PREBIND_CKSUM = 0x17, # prebind checksum +LC_LOAD_WEAK_DYLIB = 0x18|LC_REQ_DYLD, # load a dynamically linked shared library that is allowed to be missing (all symbols are weak imported) +LC_SEGMENT_64 = 0x19, # 64-bit segment of this file to be mapped +LC_ROUTINES_64 = 0x1a, # 64-bit image routines +LC_UUID = 0x1b, # the uuid +LC_RPATH = 0x1c|LC_REQ_DYLD, # runpath additions +LC_CODE_SIGNATURE = 0x1d, # local of code signature +LC_SEGMENT_SPLIT_INFO = 0x1e, # local of info to split segments +LC_REEXPORT_DYLIB = 0x1f|LC_REQ_DYLD, # load and re-export dylib +LC_LAZY_LOAD_DYLIB = 0x20, # delay load of dylib until first use +LC_ENCRYPTION_INFO = 0x21, # encrypted segment information +LC_DYLD_INFO = 0x22, # compressed dyld information +LC_DYLD_INFO_ONLY = 0x22|LC_REQ_DYLD, # compressed dyld information only +LC_LOAD_UPWARD_DYLIB = 0x23|LC_REQ_DYLD, # load upward dylib +LC_VERSION_MIN_MACOSX = 0x24, # build for MacOSX min OS version +LC_VERSION_MIN_IPHONEOS= 0x25, # build for iPhoneOS min OS version +LC_FUNCTION_STARTS = 0x26, # compressed table of function start addresses +LC_DYLD_ENVIRONMENT= 0x27, # string for dyld to treat like environment variable +LC_MAIN = 0x28|LC_REQ_DYLD, # replacement for LC_UNIXTHREAD +LC_DATA_IN_CODE = 0x29, # table of non-instructions in __text +LC_SOURCE_VERSION = 0x2A, # source version used to build binary +LC_DYLIB_CODE_SIGN_DRS = 0x2B, # Code signing DRs copied from linked dylibs +LC_ENCRYPTION_INFO_64 = 0x2C, # 64-bit encrypted segment information +LC_LINKER_OPTION = 0x2D, # linker options in MH_OBJECT files +LC_LINKER_OPTIMIZATION_HINT = 0x2E, # optimization hints in MH_OBJECT files +LC_VERSION_MIN_TVOS = 0x2F, +LC_VERSION_MIN_WATCHOS = 0x30, +LC_NOTE = 0x31, # arbitrary data included within a Mach-O file +LC_BUILD_VERSION = 0x32, # build for platform min OS version +LC_DYLD_EXPORTS_TRIE = 0x33|LC_REQ_DYLD, # used with linkedit_data_command, payload is trie +LC_DYLD_CHAINED_FIXUPS= 0x34|LC_REQ_DYLD, # used with linkedit_data_command +) + +# * In loader.h, there are two data structures: section and section_64, which are merged in one structure below. +class sectionHeader(CStruct): + _namelen = 16 + _fields = [ + ("pad_sectname","%ds"%_namelen), # name of this section + ("pad_segname","%ds"%_namelen), # segment this section goes in + ("addr","ptr"), # memory address of this section + ("size","ptr"), # size in bytes of this section + ("offset","u32"), # file offset of this section + ("align","u32"), # section alignment (power of 2) + ("reloff","u32"), # file offset of relocation entries + ("nreloc","u32"), # number of relocation entries + ("flags","u32"), # flags (section type and attributes) + ("reserved1","u32"), # reserved (for offset or index) + ("reserved2","ptr"), # reserved (for count or sizeof) + ] + def __str__(self): + return "%-30s %#010x %#010x %#010x" % (self.name, self.addr, self.offset, self.size) + def get_type(self): + return self.flags & SECTION_TYPE + def set_type(self, val): + self.flags = (val & SECTION_TYPE) | self.flags + type = property(get_type, set_type) + def get_attributes(self): + return self.flags & SECTION_ATTRIBUTES + def set_attributes(self, val): + self.flags = (val & SECTION_ATTRIBUTES) | self.type + attributes = property(get_attributes, set_attributes) + def changeOffsets(self, decalage, min_offset=None): + if isOffsetChangeable(self.offset, min_offset): + self.offset += decalage + if isOffsetChangeable(self.reloff, min_offset): + self.reloff += decalage + def __init__(self, *args, **kargs): + if kargs.get('content', None) is None: + kargs['content'] = data_empty + CStruct.__init__(self, *args, **kargs) + if kargs['content'] != data_empty: + return + self.align = 1 + if not 'segment' in kargs: + self.segname = "__LINKEDIT" + if not 'sectname' in kargs: + self.sectname = "__added_data" + if self.is_text_section(): + self.type = S_REGULAR + self.flags = S_ATTR_SOME_INSTRUCTIONS | S_ATTR_PURE_INSTRUCTIONS + def __call__(self, parent=None, addr=None, size=None, segment=None): + self.addr = addr + self.size = len(parent.content) + def get_segname(self): + return bytes_to_name(self.pad_segname).strip('\0') + def set_segname(self, val): + padding = self._namelen - len(val) + if (padding < 0) : raise ValueError("segname is too long for the structure") + self.pad_segname = name_to_bytes(val)+data_null*padding + segname = property(get_segname, set_segname) + def get_sectname(self): + return bytes_to_name(self.pad_sectname).strip('\0') + def set_sectname(self, val): + padding = self._namelen - len(val) + if (padding < 0) : raise ValueError("sectname is too long for the structure") + self.pad_sectname = name_to_bytes(val)+data_null*padding + sectname = property(get_sectname, set_sectname) + name = property(lambda _:"%s,%s"%(_.segname,_.sectname)) + def is_text_section(self): + return self.sectname == "__text" + all_flags = property(lambda _:_.flags) # Backwards compatibility + +SetConstants( +prefix = 'S_', +# Constants for the type of a section +SECTION_TYPE = 0x000000ff, # Up to 256 section types +S_REGULAR = 0x00, # regular section +S_ZEROFILL = 0x01, # zero fill on demand section +S_CSTRING_LITERALS = 0x02, # section with only literal C strings +S_4BYTE_LITERALS = 0x03, # section with only 4 byte literals +S_8BYTE_LITERALS = 0x04, # section with only 8 byte literals +S_LITERAL_POINTERS = 0x05, # section with only pointers to literals +S_NON_LAZY_SYMBOL_POINTERS = 0x06, # section with only non-lazy symbol pointers +S_LAZY_SYMBOL_POINTERS = 0x07, # section with only lazy symbol pointers +S_SYMBOL_STUBS = 0x08, # section with only symbol stubs, byte size of stub in the reserved2 field +S_MOD_INIT_FUNC_POINTERS = 0x09, # section with only function pointers for initialization +S_MOD_TERM_FUNC_POINTERS = 0x0a, # section with only function pointers for termination +S_COALESCED = 0x0b, # section contains symbols that are to be coalesced +S_GB_ZEROFILL = 0x0c, # zero fill on demand section (that can be larger than 4 gigabytes) +S_INTERPOSING = 0x0d, # section with only pairs of function pointers for interposing +S_16BYTE_LITERALS = 0x0e, # section with only 16 byte literals +S_DTRACE_DOF = 0x0f, # section contains DTrace Object Format +S_LAZY_DYLIB_SYMBOL_POINTERS = 0x10, # section with only lazy symbol pointers to lazy loaded dylibs +S_THREAD_LOCAL_REGULAR = 0x11, # template of initial values for TLVs +S_THREAD_LOCAL_ZEROFILL = 0x12, # template of initial values for TLVs +S_THREAD_LOCAL_VARIABLES = 0x13, # TLV descriptors +S_THREAD_LOCAL_VARIABLE_POINTERS = 0x14, # pointers to TLV descriptors +S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15, # functions to call to initialize TLV values +) + +SetConstants( +prefix = 'S_ATTR', +# Constants for the section attributes part of the flags field of a section structure. +SECTION_ATTRIBUTES = 0xffffff00, # Up to 24 section attributes +SECTION_ATTRIBUTES_USR = 0xff000000, # User setable attributes +S_ATTR_PURE_INSTRUCTIONS = 0x80000000, # section contains only true machine instructions +S_ATTR_NO_TOC = 0x40000000, # section contains coalesced symbols that are not to be in a ranlib table of contents +S_ATTR_STRIP_STATIC_SYMS = 0x20000000, # ok to strip static symbols in this section in files with the MH_DYLDLINK flag +S_ATTR_NO_DEAD_STRIP = 0x10000000, # no dead stripping +S_ATTR_LIVE_SUPPORT = 0x08000000, # blocks are live if they reference live blocks +S_ATTR_SELF_MODIFYING_CODE = 0x04000000, # Used with i386 code stubs written on by dyld +S_ATTR_DEBUG = 0x02000000, # A debug section +SECTION_ATTRIBUTES_SYS = 0x00ffff00, # system setable attributes +S_ATTR_SOME_INSTRUCTIONS = 0x00000400, # Section contains some machine instructions +S_ATTR_EXT_RELOC = 0x00000200, # Section has external relocation entries +S_ATTR_LOC_RELOC = 0x00000100, # Section has local relocation entries +) + +# The currently known segment names and the section names in those segments +SEG_PAGEZERO = "__PAGEZERO" # the pagezero segment which has no protections and catches NULL references for MH_EXECUTE files +SEG_TEXT = "__TEXT" # the tradition UNIX text segment +SECT_TEXT = "__text" # - the real text part of the text section no headers, and no padding +SECT_FVMLIB_INIT0 = "__fvmlib_init0" # - the fvmlib initialization section +SECT_FVMLIB_INIT1 = "__fvmlib_init1" # - the section following the fvmlib initialization section +SEG_DATA = "__DATA" # the tradition UNIX data segment +SECT_DATA = "__data" # - the real initialized data section no padding, no bss overlap +SECT_BSS = "__bss" # - the real uninitialized data section no padding +SECT_COMMON = "__common" # - the section common symbols are allocated in by the link editor +SEG_OBJC = "__OBJC" # objective-C runtime segment +SECT_OBJC_SYMBOLS = "__symbol_table" # - symbol table +SECT_OBJC_MODULES = "__module_info" # - module information +SECT_OBJC_STRINGS = "__selector_strs" # - string table +SECT_OBJC_REFS = "__selector_refs" # - string table +SEG_ICON = "__ICON" # the icon segment +SECT_ICON_HEADER = "__header" # - the icon headers +SECT_ICON_TIFF = "__tiff" # - the icons in tiff format +SEG_LINKEDIT = "__LINKEDIT" # the segment containing all structs created and maintained by the link editor. + # Created with -seglinkedit option to ld(1) for MH_EXECUTE and FVMLIB file types only +SEG_UNIXSTACK = "__UNIXSTACK" # the unix stack segment +SEG_IMPORT = "__IMPORT" # the segment for the self (dyld) modifing code stubs that has read, write and execute permissions + +# The segment load command indicates that a part of this file is to be +# mapped into the task's address space. The size of this segment in memory, +# vmsize, maybe equal to or larger than the amount to map from this file, +# filesize. The file is mapped starting at fileoff to the beginning of +# the segment in memory, vmaddr. The rest of the memory of the segment, +# if any, is allocated zero fill on demand. The segment's maximum virtual +# memory protection and initial virtual memory protection are specified +# by the maxprot and initprot fields. If the segment has sections then the +# section structures directly follow the segment command and their size is +# reflected in cmdsize. +# * In loader.h, there are two data structures: segment_command and segment_command_64, which are merged in one structure below. +class sectionHeaderArray(CArray): + _cls = sectionHeader + count = lambda _:_.parent.nsects +class segment_command(LoadCommand): + lc_types = (LC_SEGMENT, LC_SEGMENT_64) + _namelen = 16 + _fields = [ + ("pad_segname","%ds"%_namelen), # segment name + ("vmaddr","ptr"), # memory address of this segment + ("vmsize","ptr"), # memory size of this segment + ("fileoff","ptr"), # file offset of this segment + ("filesize","ptr"), # amount to map from the file + ("maxprot","u32"), # maximum VM protection + ("initprot","u32"), # initial VM protection + ("nsects","u32"), # number of sections in segment + ("flags","u32"), # flags + ("sh",sectionHeaderArray), + ] + def get_segname(self): + return bytes_to_name(self.pad_segname).strip('\0') + def set_segname(self, val): + padding = self._namelen - len(val) + if (padding < 0) : raise ValueError("segname is too long for the structure") + self.pad_segname = name_to_bytes(val)+data_null*padding + segname = property(get_segname, set_segname) + def otool(self, llvm=False): + res = LoadCommand.otool(self, llvm=llvm) + e = self.parent.parent + self.sectionsToAdd(e.content) + for s in self.sect: + if hasattr(s, 'reloclist') : + continue + res.append("Section") + res.append(" sectname %.16s" %s.parent.sectname) + res.append(" segname %.16s" %s.parent.segname) + if self.cmd == LC_SEGMENT_64: fmt = "%#018x" + else: fmt = "%#010x" + res.append((" addr "+fmt) %s.parent.addr) + if (not llvm or llvm in (8, 9, 10, 11)) and s.parent.offset + s.parent.size > len(e.content): + fmt += " (past end of file)" + res.append((" size "+fmt) %s.parent.size) + res.append(" offset %u" %s.parent.offset) + res.append(" align 2^%u (%d)" %(s.parent.align, 1 << s.parent.align)) + res.append(" reloff %u" %s.parent.reloff) + res.append(" nreloc %u" %s.parent.nreloc) + res.append(" flags %#010x" %s.parent.flags) + comment1 = "" + if s.parent.type in ( + S_SYMBOL_STUBS, + S_LAZY_SYMBOL_POINTERS, + S_NON_LAZY_SYMBOL_POINTERS, + S_LAZY_DYLIB_SYMBOL_POINTERS): + comment1 = " (index into indirect symbol table)" + res.append(" reserved1 %u%s" %(s.parent.reserved1,comment1)) + comment2 = "" + if s.parent.type == S_SYMBOL_STUBS: + comment2 = " (size of stubs)" + res.append(" reserved2 %u%s" %(s.parent.reserved2,comment2)) + return res + def sectionsToAdd(self, raw): + from elfesteem.macho.sections import Section, Reloc, SymbolStubList, SymbolPtrList + self.sect = [] + for sh in self.sh: + if sh.type == S_ZEROFILL or sh.type == S_THREAD_LOCAL_ZEROFILL or sh.type == S_GB_ZEROFILL: + sh.sect = Section(parent=sh, content=data_empty) + elif sh.type == S_SYMBOL_STUBS: + sh.sect = SymbolStubList(parent=sh, content=raw, start=sh.offset) + elif sh.type in (S_NON_LAZY_SYMBOL_POINTERS, + S_LAZY_SYMBOL_POINTERS): + sh.sect = SymbolPtrList(parent=sh, content=raw, start=sh.offset) + else: + # One byte of padding may be present. For data sections, + # it is usually \x00, and can be ignored, but for text + # sections it is ususally a nop (e.g. \x90 for x86) and + # keeping it is is necessary if we want pack() to reconstruct + # the file as it has been input. + size = sh.size + if (sh.offset+sh.size) % 2 == 1: size += 1 + sh.sect = Section(parent=sh, content=raw, start=sh.offset, size=size) + self.sect.append(sh.sect) + for sh in self.sh: + if sh.reloff != 0: + sh.reloc = Reloc(parent=sh, content=raw, start=sh.reloff) + self.sect.append(sh.reloc) + return self.sect + def changeOffsets(self, decalage, min_offset=None): + for sh in self.sh: + sh.changeOffsets(decalage, min_offset) + if isOffsetChangeable(self.fileoff, min_offset): + self.fileoff += decalage + def is_text_segment(self): + return self.segname == "__TEXT" + def addSH(self, s): + maxoff = self.fileoff + if not hasattr(self, 'sect'): + self.sect = [] + offset = 0 + size = 0 + if len(self.sect)>0: + offset = 0 + size = 0 + for se in self.sect: + if offset < se.offset : + offset = se.offset + size = se.size + maxoff = offset + size + self.nsects += 1 + self.cmdsize += len(s.parent.pack()) + self._size = self.cmdsize + s.parent.parent = self + s.parent.offset = maxoff + s.parent.addr = self.vmaddr - self.fileoff + s.parent.offset + s.parent.align = 4 + # Values and positions by default + self.sh.append(s.parent) + self.sect.append(s) + s.parent.size = len(s.pack()) + s.parent.offset = maxoff + if offset + size > self.fileoff + self.filesize: + raise ValueError("not enough space in segment") + #self.parent.extendSegment(self, 0x1000*(s.parent.size/0x1000 +1)) + else: + self.filesize += len(s.pack()) + self.vmsize += len(s.pack()) + +SetConstants( +# Constants for the flags field of the segment_command +SG_HIGHVM = 0x1, # the file contents for this segment is for the high part of the VM space, the low part is zero filled (for stacks in core files) +SG_FVMLIB = 0x2, # this segment is the VM that is allocated by a fixed VM library, for overlap checking in the link editor +SG_NORELOC = 0x4, # this segment has nothing that was relocated in it and nothing relocated to it, that is it maybe safely replaced without relocation +SG_PROTECTED_VERSION_1 = 0x8, # This segment is protected. If the segment starts at file offset 0, the first page of the segment is not protected. All other pages of the segment are protected. +) + + +# Fixed virtual memory shared libraries are identified by two things. The +# target pathname (the name of the library as found for execution), and the +# minor version number. The address of where the headers are loaded is in +# header_addr. (THIS IS OBSOLETE and no longer supported). +class fvmlib_command(LoadCommand): + lc_types = (LC_IDFVMLIB, LC_LOADFVMLIB, LC_FVMFILE) + _offsets_in_data = ("name",) + _fields = [ + ("name","u32"), # library's target pathname + ("minor_version","u32"), # library's minor version number + ("header_addr","u32"), # library's header address + ] + +# A dynamically linked shared library (filetype == MH_DYLIB in the mach header) +# contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. +# An object that uses a dynamically linked shared library also contains a +# dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or +# LC_REEXPORT_DYLIB) for each library it uses. +class dylib_command(LoadCommand): + lc_types = (LC_LOAD_DYLIB, LC_LAZY_LOAD_DYLIB, LC_ID_DYLIB, LC_REEXPORT_DYLIB, LC_LOAD_WEAK_DYLIB, LC_LOAD_UPWARD_DYLIB) + _offsets_in_data = ("name",) + _fields = [ + ("name","u32"), # library's path name + ("timestamp","u32"), # library's build time stamp + ("current_version","u32"), # library's current version + ("compatibility_version","u32"), # library's compatibility vers number + ] + +# A dynamically linked shared library may be a subframework of an umbrella +# framework. If so it will be linked with "-umbrella umbrella_name" where +# Where "umbrella_name" is the name of the umbrella framework. A subframework +# can only be linked against by its umbrella framework or other subframeworks +# that are part of the same umbrella framework. Otherwise the static link +# editor produces an error and states to link against the umbrella framework. +# The name of the umbrella framework for subframeworks is recorded in the +# following structure. +class sub_framework_command(LoadCommand): + lc_types = (LC_SUB_FRAMEWORK,) + _offsets_in_data = ("umbrella",) + _fields = [ ("umbrella","u32") ] # the umbrella framework name + +# For dynamically linked shared libraries that are subframework of an umbrella +# framework they can allow clients other than the umbrella framework or other +# subframeworks in the same umbrella framework. To do this the subframework +# is built with "-allowable_client client_name" and an LC_SUB_CLIENT load +# command is created for each -allowable_client flag. The client_name is +# usually a framework name. It can also be a name used for bundles clients +# where the bundle is built with "-client_name client_name". +class sub_client_command(LoadCommand): + lc_types = (LC_SUB_CLIENT,) + _offsets_in_data = ("client",) + _fields = [ ("client","u32") ] # the client name + +# A dynamically linked shared library may be a sub_umbrella of an umbrella +# framework. If so it will be linked with "-sub_umbrella umbrella_name" where +# Where "umbrella_name" is the name of the sub_umbrella framework. When +# staticly linking when -twolevel_namespace is in effect a twolevel namespace +# umbrella framework will only cause its subframeworks and those frameworks +# listed as sub_umbrella frameworks to be implicited linked in. Any other +# dependent dynamic libraries will not be linked it when -twolevel_namespace +# is in effect. The primary library recorded by the static linker when +# resolving a symbol in these libraries will be the umbrella framework. +# Zero or more sub_umbrella frameworks may be use by an umbrella framework. +# The name of a sub_umbrella framework is recorded in the following structure. +class sub_umbrella_command(LoadCommand): + lc_types = (LC_SUB_UMBRELLA,) + _offsets_in_data = ("sub_umbrella",) + _fields = [ ("sub_umbrella","u32") ] # the sub_umbrella framework name + +# A dynamically linked shared library may be a sub_library of another shared +# library. If so it will be linked with "-sub_library library_name" where +# Where "library_name" is the name of the sub_library shared library. When +# staticly linking when -twolevel_namespace is in effect a twolevel namespace +# shared library will only cause its subframeworks and those frameworks +# listed as sub_umbrella frameworks and libraries listed as sub_libraries to +# be implicited linked in. Any other dependent dynamic libraries will not be +# linked it when -twolevel_namespace is in effect. The primary library +# recorded by the static linker when resolving a symbol in these libraries +# will be the umbrella framework (or dynamic library). Zero or more sub_library +# shared libraries may be use by an umbrella framework or (or dynamic library). +# The name of a sub_library framework is recorded in the following structure. +# For example /usr/lib/libobjc_profile.A.dylib would be recorded as "libobjc". +class sub_library_command(LoadCommand): + lc_types = (LC_SUB_LIBRARY,) + _offsets_in_data = ("sub_library",) + _fields = [ ("sub_library","u32") ] # the sub_library name + +# A program (filetype == MH_EXECUTE) that is +# prebound to its dynamic libraries has one of these for each library that +# the static linker used in prebinding. It contains a bit vector for the +# modules in the library. The bits indicate which modules are bound (1) and +# which are not (0) from the library. The bit for module 0 is the low bit +# of the first byte. So the bit for the Nth module is: +# (linked_modules[N/8] >> N%8) & 1 +class prebound_dylib_command(LoadCommand): + lc_types = (LC_PREBOUND_DYLIB,) + _offsets_in_data = ("name","linked_modules") + _fields = [ + ("name","u32"), # library's path name + ("nmodules","u32"), # number of modules in library + ("linked_modules","u32"), # bit vector of linked modules + ] + +# A program that uses a dynamic linker contains a dylinker_command to identify +# the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker +# contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER). +# A file can have at most one of these. +# This struct is also used for the LC_DYLD_ENVIRONMENT load command and +# contains string for dyld to treat like environment variable. +class dylinker_command(LoadCommand): + lc_types = (LC_DYLD_ENVIRONMENT, LC_LOAD_DYLINKER, LC_ID_DYLINKER) + _offsets_in_data = ("name",) + _fields = [ ("name","u32") ] # dynamic linker's path name + +#### Source: /usr/include/mach-o/loader.h + +# Thread commands contain machine-specific data structures suitable for +# use in the thread state primitives. The machine specific data structures +# follow the struct thread_command as follows. +# Each flavor of machine specific data structure is preceded by an unsigned +# long constant for the flavor of that data structure, an uint32_t +# that is the count of longs of the size of the state data structure and then +# the state data structure follows. This triple may be repeated for many +# flavors. The constants for the flavors, counts and state data structure +# definitions are expected to be in the header file . + +class ThreadStateMetaclass(type): + registered = {} + def __new__(cls, name, bases, dct): + o = type.__new__(cls, name, bases, dct) + if 'cputype' in dct and 'flavor' in dct: + ThreadStateBase.registered[(dct['cputype'],dct['flavor'])] = o + return o + def __call__(cls, lc): + key = (lc.cputype, lc.flavor) + if not hasattr(cls, 'cputype') and key in ThreadStateBase.registered: + return ThreadStateBase.registered[key](lc) + else: + return super(ThreadStateMetaclass,cls).__call__(lc) + +class ThreadStateBase(ThreadStateMetaclass('ThreadStateBase', (object,), {})): + registers = [] + def __init__(self, lc): + self.c = lc + # When all registers have the same size, we can precompute the + # values used in reg_slice. + # If they don't all have the same size, we need to redefine + # flavorcount and reg_slice. + self.t = convert_size2type("ptr",self.c.wsize) + self.s = self.c.wsize//8 + flavorcount = property(lambda _:_.s//4*len(_.registers)) + def reg_slice(self, pos): + if pos in self.registers: + pos = self.registers.index(pos) + return self.t, slice(self.s*pos, self.s*(pos+1)) + def __getitem__(self, pos): + if isinstance(pos, slice): + assert pos.step is None + return tuple([self[_] for _ in range(pos.start, pos.stop)]) + else: + fmt, pos = self.reg_slice(pos) + return struct.unpack(self.c.sex + fmt, self.c.state[pos])[0] + def __setitem__(self, pos, val): + fmt, pos = self.reg_slice(pos) + self.c.state[pos] = struct.pack(self.c.sex + fmt, val) + def otool(self): + return [] + +#### Source: /usr/include/mach/*/{_structs.h,thread_status.h} +# The data for all known architectures can be found at +# https://github.com/opensource-apple/cctools/blob/master/include/... + +class ThreadStatePPC(ThreadStateBase): + cputype = CPU_TYPE_POWERPC + flavor = 1 + flavorname = 'PPC_THREAD_STATE' + entrypoint = 'srr0' + registers = ['srr0', 'srr1'] + ['r%d'%_ for _ in range(32)] + \ + ['cr', 'xer', 'lr', 'ctr', 'mq', 'vrsave'] + def otool(self): + return [ + " r0 %#010x r1 %#010x r2 %#010x r3 %#010x r4 %#010x"%self[2:7], + " r5 %#010x r6 %#010x r7 %#010x r8 %#010x r9 %#010x"%self[7:12], + " r10 %#010x r11 %#010x r12 %#010x r13 %#010x r14 %#010x"%self[12:17], + " r15 %#010x r16 %#010x r17 %#010x r18 %#010x r19 %#010x"%self[17:22], + " r20 %#010x r21 %#010x r22 %#010x r23 %#010x r24 %#010x"%self[22:27], + " r25 %#010x r26 %#010x r27 %#010x r28 %#010x r29 %#010x"%self[27:32], + " r30 %#010x r31 %#010x cr %#010x xer %#010x lr %#010x"%self[32:37], + " ctr %#010x mq %#010x vrsave %#010x srr0 %#010x srr1 %#010x" + % (self[37], self[38], self[39], self[0], self[1]), + ] + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_POWERPC + flavor = 2 + flavorname = 'PPC_FLOAT_STATE' + registers = ['f%d'%_ for _ in range(32)] + [ 'fpscr' ] + flavorcount = 66 + def reg_slice(self, pos): + if pos in self.registers: + pos = self.registers.index(pos) + if pos == 32: # fpscr is 64 bits, 32 bits of rubbish + return 'Q', slice(8*pos, 8*pos+8) + return 'd', slice(8*pos, 8*pos+8) + def otool(self): + return [ + " f0 %f f1 %f\n f2 %f f3 %f"%self[0:4], + " f4 %f f5 %f\n f6 %f f7 %f"%self[4:8], + " f8 %f f9 %f\n f10 %f f11 %f"%self[8:12], + " f12 %f f13 %f\n f14 %f f15 %f"%self[12:16], + " f16 %f f17 %f\n f18 %f f19 %f"%self[16:20], + " f20 %f f21 %f\n f22 %f f23 %f"%self[20:24], + " f24 %f f25 %f\n f26 %f f27 %f"%self[24:28], + " f28 %f f29 %f\n f30 %f f31 %f"%self[28:32], + " fpscr_pad %#x fpscr %#x"%(self[32]>>32,self[32]&0xffffffff), + ] + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_POWERPC + flavor = 3 + flavorname = 'PPC_EXCEPTION_STATE' + registers = ['dar', 'dsisr', 'exception', 'pad0'] + ['pad1[%d]'%_ for _ in range(4)] + def otool(self): + return [ + " dar 0x%x dsisr 0x%x exception 0x%x pad0 0x%x"%self[0:4], + " pad1[0] 0x%x pad1[1] 0x%x pad1[2] 0x%x pad1[3] 0x%x"%self[4:8], + ] + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_POWERPC + flavor = 4 + flavorname = 'PPC_VECTOR_STATE' + +class ThreadStatePPC64(ThreadStateBase): + cputype = CPU_TYPE_POWERPC64 + flavor = 5 + flavorname = 'PPC_THREAD_STATE64' + entrypoint = 'srr0' + registers = ['srr0', 'srr1'] + ['r%d'%_ for _ in range(32)] + \ + ['cr', 'xer', 'lr', 'ctr', 'vrsave'] + # NB: cr and vrsave are 32-bit, while all other registers are 64-bit. + flavorcount = 76 + def reg_slice(self, pos): + if pos in self.registers: + pos = self.registers.index(pos) + if pos == 34: return 'I', slice(8*pos, 8*pos+4) # 'cr' is 32-bit + if pos == 38: return 'I', slice(8*pos-8, 8*pos-4) # 'vrsave' is 32-bit + if 34 < pos < 38: return 'Q', slice(8*pos-4, 8*pos+4) # Shifted by 32 bits + return 'Q', slice(8*pos, 8*(pos+1)) + def otool(self): + return [ + " r0 %#018x r1 %#018x r2 %#018x"%self[2:5], + " r3 %#018x r4 %#018x r5 %#018x"%self[5:8], + " r6 %#018x r7 %#018x r8 %#018x"%self[8:11], + " r9 %#018x r10 %#018x r11 %#018x"%self[11:14], + " r12 %#018x r13 %#018x r14 %#018x"%self[14:17], + " r15 %#018x r16 %#018x r17 %#018x"%self[17:20], + " r18 %#018x r19 %#018x r20 %#018x"%self[20:23], + " r21 %#018x r22 %#018x r23 %#018x"%self[23:26], + " r24 %#018x r25 %#018x r26 %#018x"%self[26:29], + " r27 %#018x r28 %#018x r29 %#018x"%self[29:32], + " r30 %#018x r31 %#018x cr %#010x"%self[32:35], + " xer %#018x lr %#018x ctr %#018x"%self[35:38], + "vrsave %#010x srr0 %#018x srr1 %#018x"%(self[38], self[0], self[1]), + ] + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_POWERPC64 + flavor = 6 + flavorname = 'PPC_EXCEPTION_STATE64' + +class ThreadStateX86(ThreadStateBase): + cputype = CPU_TYPE_I386 + flavor = 1 + flavorname = 'x86_THREAD_STATE32' # New name + flavorname = 'i386_THREAD_STATE' # Legacy name + entrypoint = 'eip' + registers = ['eax', 'ebx', 'ecx', 'edx', 'edi', 'esi', 'ebp', 'esp', + 'ss', 'eflags', 'eip', 'cs', 'ds', 'es', 'fs', 'gs'] + def otool(self): + return [ + "\t eax %#010x ebx %#010x ecx %#010x edx %#010x"%self[0:4], + "\t edi %#010x esi %#010x ebp %#010x esp %#010x"%self[4:8], + "\t ss %#010x eflags %#010x eip %#010x cs %#010x"%self[8:12], + "\t ds %#010x es %#010x fs %#010x gs %#010x"%self[12:16], + ] + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_I386 + flavor = 2 + flavorname = 'x86_FLOAT_STATE32' # New name + flavorname = 'i386_FLOAT_STATE' # Legacy name + +class ThreadStateX86(ThreadStateBase): + cputype = CPU_TYPE_I386 + flavor = 3 + flavorname = 'x86_EXCEPTION_STATE32' # New name + flavorname = 'i386_EXCEPTION_STATE' # Legacy name + +class ThreadStateX64(ThreadStateBase): + cputype = CPU_TYPE_X86_64 + flavor = 4 + flavorname = 'x86_THREAD_STATE64' + entrypoint = 'rip' + registers = ['rax', 'rbx', 'rcx', 'rdx', 'rdi', 'rsi', 'rbp', 'rsp', + 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15', 'rip', + 'rflags', 'cs', 'fs', 'gs'] + def otool(self): + return [ + " rax %#018x rbx %#018x rcx %#018x"%self[0:3], + " rdx %#018x rdi %#018x rsi %#018x"%self[3:6], + " rbp %#018x rsp %#018x r8 %#018x"%self[6:9], + " r9 %#018x r10 %#018x r11 %#018x"%self[9:12], + " r12 %#018x r13 %#018x r14 %#018x"%self[12:15], + " r15 %#018x rip %#018x" %self[15:17], + "rflags %#018x cs %#018x fs %#018x"%self[17:20], + " gs %#018x" %self[20], + ] + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_X86_64 + flavor = 5 + flavorname = 'x86_FLOAT_STATE64' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_X86_64 + flavor = 6 + flavorname = 'x86_EXCEPTION_STATE64' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_I386 + flavor = 7 + flavorname = 'x86_THREAD_STATE' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_X86_64 + flavor = 7 + flavorname = 'x86_THREAD_STATE' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_I386 + flavor = 8 + flavorname = 'x86_FLOAT_STATE' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_X86_64 + flavor = 8 + flavorname = 'x86_FLOAT_STATE' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_I386 + flavor = 9 + flavorname = 'x86_EXCEPTION_STATE' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_X86_64 + flavor = 9 + flavorname = 'x86_EXCEPTION_STATE' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_I386 + flavor = 10 + flavorname = 'x86_DEBUG_STATE32' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_X86_64 + flavor = 11 + flavorname = 'x86_DEBUG_STATE64' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_I386 + flavor = 12 + flavorname = 'x86_DEBUG_STATE' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_X86_64 + flavor = 12 + flavorname = 'x86_DEBUG_STATE' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_I386 + flavor = 16 + flavorname = 'x86_AVX_STATE32' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_X86_64 + flavor = 17 + flavorname = 'x86_AVX_STATE64' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_I386 + flavor = 18 + flavorname = 'x86_AVX_STATE' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_X86_64 + flavor = 18 + flavorname = 'x86_AVX_STATE' + +class ThreadStateARM(ThreadStateBase): + cputype = CPU_TYPE_ARM + flavor = 1 + flavorname = 'ARM_THREAD_STATE' + entrypoint = 'pc' + registers = ['r0', 'r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7', + 'r8', 'r9', 'r10', 'r11', 'r12', 'sp', 'lr', 'pc', 'cpsr'] + def otool(self): + return [ + "\t r0 %#010x r1 %#010x r2 %#010x r3 %#010x"%self[0:4], + "\t r4 %#010x r5 %#010x r6 %#010x r7 %#010x"%self[4:8], + "\t r8 %#010x r9 %#010x r10 %#010x r11 %#010x"%self[8:12], + "\t r12 %#010x sp %#010x lr %#010x pc %#010x"%self[12:16], + "\t cpsr %#010x"%self[16], + ] + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_ARM + flavor = 2 + flavorname = 'ARM_VFP_STATE' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_ARM + flavor = 3 + flavorname = 'ARM_EXCEPTION_STATE' + def otool(self): + return [ "\t exception %#010x fsr %#010x far %#010x"%self[0:3] ] + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_ARM + # pre-armv8 + flavor = 4 + flavorname = 'ARM_DEBUG_STATE' + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_ARM64 + flavor = 6 + flavorname = 'ARM_THREAD_STATE64' + entrypoint = 'pc' + registers = ['x%d'%_ for _ in range(29)] + ['fp', 'sp', 'lr', 'pc', 'cpsr'] + flavorcount = 68 + # NB: cpsr is 32-bit, while all other registers are 64-bit. + # Therefore flavorcount should be 67, but the C compiler adds 32-bits + # of padding at the end of the __darwin_arm_thread_state64 structure. + def reg_slice(self, pos): + if pos in self.registers: + pos = self.registers.index(pos) + if pos == 33: return 'I', slice(8*pos, 8*pos+4) # 'cpsr' is 32-bit + return 'Q', slice(8*pos, 8*(pos+1)) + def otool(self): + return [ + "\t x0 %#018x x1 %#018x x2 %#018x"%self[0:3], + "\t x3 %#018x x4 %#018x x5 %#018x"%self[3:6], + "\t x6 %#018x x7 %#018x x8 %#018x"%self[6:9], + "\t x9 %#018x x10 %#018x x11 %#018x"%self[9:12], + "\t x12 %#018x x13 %#018x x14 %#018x"%self[12:15], + "\t x15 %#018x x16 %#018x x17 %#018x"%self[15:18], + "\t x18 %#018x x19 %#018x x20 %#018x"%self[18:21], + "\t x21 %#018x x22 %#018x x23 %#018x"%self[21:24], + "\t x24 %#018x x25 %#018x x26 %#018x"%self[24:27], + "\t x27 %#018x x28 %#018x fp %#018x"%self[27:30], + "\t lr %#018x sp %#018x pc %#018x"%self[30:33], + "\t cpsr %#010x"%self[33], + ] + +class ThreadState(ThreadStateBase): + cputype = CPU_TYPE_ARM64 + flavor = 7 + flavorname = 'ARM_EXCEPTION_STATE64' + def otool(self): + return [ "\t far %#018x esr %#010x exception %#010x" + % (self[0], self[1]>>32, self[1]&0xFFFFFFFF) ] + +class ThreadState(ThreadStateBase): + # Default output + flavorname = property(lambda _:'%d (unknown)'%_.c.flavor) + def otool(self): + return [ " state (Unknown cputype/cpusubtype)" ] + +#### Source: /usr/include/mach-o/loader.h + +class thread_command(LoadCommand): + lc_types = (LC_THREAD, LC_UNIXTHREAD) + _fields = [ + ("flavor","u32"), # flavor of thread state + ("count","u32"), # count of longs in thread state + ("state",CData(lambda _:_.cmdsize-16)), # thread state for this flavor + ] + def __init__(self, *args, **kargs): + LoadCommand.__init__(self, *args, **kargs) + self.reg = ThreadState(self) + def get_entrypoint(self): + return self.reg[self.reg.entrypoint] + def set_entrypoint(self, val): + self.reg[self.reg.entrypoint] = val + entrypoint = property(get_entrypoint, set_entrypoint) + def cputype(self): + if type(self.parent) == dict: return self.parent['cputype'] + else: return self.parent.parent.Mhdr.cputype + cputype = property(cputype) + flavorname = property(lambda _:_.reg.flavorname) + def flavorcount(self): + flavorcount = self.reg.flavorname+'_COUNT' + if self.count != self.reg.flavorcount: + flavorcount = '%d (not %s)' % (self.count, flavorcount) + return flavorcount + flavorcount = property(flavorcount) + def otool(self, llvm=False): + return LoadCommand.otool(self, llvm=llvm) + self.reg.otool() + + + +# The routines command contains the address of the dynamic shared library +# initialization routine and an index into the module table for the module +# that defines the routine. Before any modules are used from the library the +# dynamic linker fully binds the module that defines the initialization routine +# and then calls it. This gets called before any module initialization +# routines (used for C++ static constructors) in the library. +# * In loader.h, there are two data structures: routines_command and routines_command_64, which are merged in one structure below. +class routines_command(LoadCommand): + lc_types = (LC_ROUTINES, LC_ROUTINES_64) + _fields = [ + ("init_address","ptr"), # address of initialization routine + ("init_module","ptr"), # index into the module table that the init routine is defined in + ("reserved1","ptr"), + ("reserved2","ptr"), + ("reserved3","ptr"), + ("reserved4","ptr"), + ("reserved5","ptr"), + ("reserved6","ptr"), + ] + +# The symtab_command contains the offsets and sizes of the link-edit 4.3BSD +# "stab" style symbol table information as described in the header files +# and . +class symtab_command(LoadCommand): + lc_types = (LC_SYMTAB,) + _fields = [ + ("symoff","u32"), # symbol table offset + ("nsyms","u32"), # number of symbol table entries + ("stroff","u32"), # string table offset + ("strsize","u32"), # string table size in bytes + ] + def sectionsToAdd(self, raw): + from elfesteem.macho.sections import StringTable, SymbolTable + self.sect = [] + # We parse the String Table first, to be able to know the names + # of symbols. + assert self.stroff != 0 + self.sect.append(StringTable(parent=self, content=raw, start=self.stroff)) + assert self.symoff != 0 + self.sect.append(SymbolTable(parent=self, content=raw, start=self.symoff)) + return self.sect + strtab = property(lambda _:_.sect[0]) + def sectionsMappedInMemory(self): + return [self] + def changeOffsets(self, decalage, min_offset=None): + if isOffsetChangeable(self.stroff, min_offset): + self.stroff += decalage + if isOffsetChangeable(self.symoff, min_offset): + self.symoff += decalage + +# This is the second set of the symbolic information which is used to support +# the data structures for the dynamically link editor. +# (...) +# The symbols indicated by symoff and nsyms of the LC_SYMTAB load command +# are grouped into the following three groups: +# local symbols (further grouped by the module they are from) +# defined external symbols (further grouped by the module they are from) +# undefined symbols +# The local symbols are used only for debugging. The dynamic binding +# process may have to use them to indicate to the debugger the local +# symbols for a module that is being bound. +# The last two groups are used by the dynamic binding process to do the +# binding (indirectly through the module table and the reference symbol +# table when this is a dynamically linked shared library file). +from elfesteem.macho.sections import DySymArray +class dysymtab_command(LoadCommand): + lc_types = (LC_DYSYMTAB,) + _sym = DySymArray + _fields = [ + ("ilocalsym","u32"), # index to local symbols + ("nlocalsym","u32"), # number of local symbols + ("iextdefsym","u32"), # index to externally defined symbols + ("nextdefsym","u32"), # number of externally defined symbols + ("iundefsym","u32"), # index to undefined symbols + ("nundefsym","u32"), # number of undefined symbols + ("tocoff","u32"), # file offset to table of contents + ("ntoc","u32"), # number of entries in table of contents + ("modtaboff","u32"), # file offset to module table + ("nmodtab","u32"), # number of module table entries + ("extrefsymoff","u32"), # offset to referenced symbol table + ("nextrefsyms","u32"), # number of referenced symbol table entries + ("indirectsymoff","u32"), # file offset to the indirect symbol table + ("nindirectsyms","u32"), # number of indirect symbol table entries + ("extreloff","u32"), # offset to external relocation entries + ("nextrel","u32"), # number of external relocation entries + ("locreloff","u32"), # offset to local relocation entries + ("nlocrel","u32"), # number of local relocation entries + ] + def sectionsToAdd(self, raw): + self.sect = [] + for object_offset, _ in self._fields: + if not object_offset.endswith('off'): continue + of = getattr(self, object_offset) + if of != 0: + t = object_offset[:-3] + if not t in self._sym: raise NotImplementedError + self.sect.append(self._sym[t](parent=self, content=raw, start=of)) + return self.sect + def changeOffsets(self, decalage, min_offset=None): + for object_offset, _ in self._fields: + if not object_offset.endswith('off'): continue + of = getattr(self, object_offset) + if isOffsetChangeable(of, min_offset): + setattr(self, object_offset, of + decalage) + +# The twolevel_hints_command contains the offset and number of hints in the +# two-level namespace lookup hints table. +class twolevel_hints_command(LoadCommand): + lc_types = (LC_TWOLEVEL_HINTS,) + _fields = [ + ("offset","u32"), # offset to the hint table + ("nhints","u32"), # number of hints in the hint table + ] + def sectionsToAdd(self, raw): + from elfesteem.macho.sections import Hint + self.sect = [] + if self.offset != 0: + self.sect.append(Hint(parent=self, content=raw, start=self.offset)) + return self.sect + def changeOffsets(self, decalage, min_offset=None): + if isOffsetChangeable(self.offset, min_offset): + self.offset += decalage + +# The prebind_cksum_command contains the value of the original check sum for +# prebound files or zero. When a prebound file is first created or modified +# for other than updating its prebinding information the value of the check sum +# is set to zero. When the file has it prebinding re-done and if the value of +# the check sum is zero the original check sum is calculated and stored in +# cksum field of this load command in the output file. If when the prebinding +# is re-done and the cksum field is non-zero it is left unchanged from the +# input file. +class prebind_cksum_command(LoadCommand): + lc_types = (LC_PREBIND_CKSUM,) + _fields = [ ("cksum","u32") ] # the check sum or zero + +# The uuid load command contains a single 128-bit unique random number that +# identifies an object produced by the static link editor. +class uuid_command(LoadCommand): + lc_types = (LC_UUID,) + _fields = [ ("raw_uuid","16s") ] # the 128-bit uuid + def get_uuid_tuple(self): + return struct.unpack(">IHHHHI", self.raw_uuid) + def set_uuid_tuple(self, value): + self.raw_uuid = struct.pack(">IHHHHI", *value) + uuid = property(get_uuid_tuple, set_uuid_tuple) + def __repr__(self): + return '' % self.uuid + def changeUUID(self, uuid): + self.raw_uuid = struct.pack("B"*16, *[int(uuid[2*i:2*i+2],16) for i in range(len(uuid)//2)]) + +# The rpath_command contains a path which at runtime should be added to +# the current run path used to find @rpath prefixed dylibs. +class rpath_command(LoadCommand): + lc_types = (LC_RPATH,) + _offsets_in_data = ("path",) + _fields = [ ("path","u32") ] # path to add to run path + +# The linkedit_data_command contains the offsets and sizes of a blob +# of data in the __LINKEDIT segment. +class linkedit_data_command(LoadCommand): + lc_types = (LC_FUNCTION_STARTS,LC_DATA_IN_CODE,LC_DYLIB_CODE_SIGN_DRS,LC_CODE_SIGNATURE,LC_LINKER_OPTIMIZATION_HINT,LC_SEGMENT_SPLIT_INFO) + _fields = [ + ("dataoff","u32"), # file offset of data in __LINKEDIT segment + ("datasize","u32"), # file size of data in __LINKEDIT segment + ] + def sectionsToAdd(self, raw): + from elfesteem.macho.sections import FunctionStarts, DataInCode, DylibCodeSign, CodeSignature, OptimizationHint, SegmentSplitInfo + # The Load Commands below have some additional data in the LINKEDIT segment, + # this data is considered as being a section inside this segment. + self.sect = [] + if self.datasize != 0: + c = { + LC_FUNCTION_STARTS: FunctionStarts, + LC_DATA_IN_CODE: DataInCode, + LC_DYLIB_CODE_SIGN_DRS: DylibCodeSign, + LC_CODE_SIGNATURE: CodeSignature, + LC_LINKER_OPTIMIZATION_HINT: OptimizationHint, + LC_SEGMENT_SPLIT_INFO: SegmentSplitInfo, + }[self.cmd] + self.sect.append(c(parent=self, content=raw, start=self.dataoff)) + return self.sect + def changeOffsets(self, decalage, min_offset=None): + if isOffsetChangeable(self.dataoff, min_offset): + self.dataoff += decalage + +# The encryption_info_command contains the file offset and size of an +# of an encrypted segment. +class encryption_info_command(LoadCommand): + lc_types = (LC_ENCRYPTION_INFO,) + _fields = [ + ("cryptoff","u32"), # file offset of encrypted range + ("cryptsize","u32"),# file size of encrypted range + ("cryptid","u32"), # which enryption system, 0 means not-encrypted yet + ] + def sectionsToAdd(self, raw): + from elfesteem.macho.sections import Encryption + self.sect = [] + if self.cryptoff != 0: + self.sect.append(Encryption(self,content=raw, start=self.cryptoff, type='crypt')) + return self.sect + def changeOffsets(self, decalage, min_offset=None): + if isOffsetChangeable(self.cryptoff, min_offset): + self.cryptoff += decalage + if isOffsetChangeable(self.cryptsize, min_offset): + self.cryptsize += decalage + if isOffsetChangeable(self.cryptid, min_offset): + self.cryptid += decalage + +class encryption_info_command_64(encryption_info_command): + lc_types = (LC_ENCRYPTION_INFO_64,) + _fields = [ + ("cryptoff","u32"), # file offset of encrypted range + ("cryptsize","u32"),# file size of encrypted range + ("cryptid","u32"), # which enryption system, 0 means not-encrypted yet + ("pad","u32"), # padding to make this struct's size a multiple of 8 bytes + ] + +# The version_min_command contains the min OS version on which this +# binary was built to run. +class version_min_command(LoadCommand): + lc_types = (LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, + LC_VERSION_MIN_WATCHOS, LC_VERSION_MIN_TVOS) + _fields = [ + ("version","u32"), # X.Y.Z is encoded in nibbles xxxx.yy.zz + ("sdk","u32"), # X.Y.Z is encoded in nibbles xxxx.yy.zz + ] + +# The build_version_command contains the min OS version on which this +# binary was built to run for its platform. The list of known platforms and +# tool values following it. +class build_tool_version(CStruct): + _fields = [ + ("tool","u32"), # enum for the tool + ("version","u32"), # version number of the tool + ] + +class toolsArray(CArray): + _cls = build_tool_version + count = lambda _:_.parent.ntools + +class build_version_command(LoadCommand): + lc_types = (LC_BUILD_VERSION, ) + _fields = [ + ("platform","u32"),# platform + ("minos","u32"), # X.Y.Z is encoded in nibbles xxxx.yy.zz + ("sdk","u32"), # X.Y.Z is encoded in nibbles xxxx.yy.zz + ("ntools","u32"), # number of tool entries following this + ("tools",toolsArray), + ] + def otool(self, llvm=False): + if llvm == 9: + res = [ + " cmd %s" % "LC_"+constants['LC'].get(self.cmd, hex(self.cmd)), + " cmdsize %s" % self.cmdsize, + " platform %s" % {1: "macos"}.get(self.platform, self.platform), + " sdk %s" % split_integer(self.sdk, 8, 3, truncate=1), + " minos %s" % split_integer(self.minos, 8, 3, truncate=1), + " ntools %s" % self.ntools, + ] + for tool in self.tools: + res.extend([ + " tool %s" % {3: "ld"}.get(tool.tool, tool.tool), + " version %s" % split_integer(tool.version, 8, 3, truncate=2), + ]) + return res + if llvm == 11: + res = [ + " cmd %s" % "LC_"+constants['LC'].get(self.cmd, hex(self.cmd)), + " cmdsize %s" % self.cmdsize, + " platform %s" % self.platform, + " sdk %s" % split_integer(self.sdk, 8, 3, truncate=1), + " minos %s" % split_integer(self.minos, 8, 3, truncate=1), + " ntools %s" % self.ntools, + ] + for tool in self.tools: + res.extend([ + " tool %s" % tool.tool, + " version %s" % split_integer(tool.version, 8, 3, truncate=2), + ]) + return res + return LoadCommand.otool(self, llvm=llvm) + +# The dyld_info_command contains the file offsets and sizes of +# the new compressed form of the information dyld needs to +# load the image. This information is used by dyld on Mac OS X +# 10.6 and later. All information pointed to by this command +# is encoded using byte streams, so no endian swapping is needed +# to interpret it. +from elfesteem.macho.sections import DyldArray +class dyld_info_command(dysymtab_command): + lc_types = (LC_DYLD_INFO, LC_DYLD_INFO_ONLY) + _sym = DyldArray + _fields = [ + ("rebase_off","u32"), # file offset to rebase info + ("rebase_size","u32"), # size of rebase info + ("bind_off","u32"), # file offset to binding info + ("bind_size","u32"), # size of binding info + ("weak_bind_off","u32"), # file offset to weak binding info + ("weak_bind_size","u32"), # size of weak binding info + ("lazy_bind_off","u32"), # file offset to lazy binding info + ("lazy_bind_size","u32"), # size of lazy binding info + ("export_off","u32"), # file offset to lazy binding info + ("export_size","u32"), # size of lazy binding info + ] + +# The linker_option_command contains linker options embedded in object files. +class linkeroption_command(LoadCommand): + lc_types = (LC_LINKER_OPTION,) + _fields = [ + ("count","u32"), # number of strings + ("linker_options",CData(lambda _:_.cmdsize-12)), + ] + def otool(self, llvm=False): + res = LoadCommand.otool(self, llvm=llvm) + # linker_options is a concatenation of zero terminated UTF8 strings, + # zero filled at end to align. + data = self.linker_options.pack() + strings = [] + idx = 0 + while (len(strings) < self.count): + s = data[idx:data.index(data_null,idx)] + strings.append(s.decode('utf-8')) + idx += len(s)+1 + for i, s in enumerate(strings): + res.append(" string #%d %s" % (i+1, s)) + return res + + +# The symseg_command contains the offset and size of the GNU style +# symbol table information as described in the header file . +# The symbol roots of the symbol segments must also be aligned properly +# in the file. So the requirement of keeping the offsets aligned to a +# multiple of a 4 bytes translates to the length field of the symbol +# roots also being a multiple of a long. Also the padding must again be +# zeroed. (THIS IS OBSOLETE and no longer supported). +class symseg_command(LoadCommand): + lc_types = (LC_SYMSEG,) + _fields = [ + ("offset","u32"), # symbol segment offset + ("size","u32"), # symbol segment size in bytes + ] + +# The ident_command contains a free format string table following the +# ident_command structure. The strings are null terminated and the size of +# the command is padded out with zero bytes to a multiple of 4 bytes/ +# (THIS IS OBSOLETE and no longer supported). +class ident_command(LoadCommand): + lh_types = (LC_IDENT,) + _fields = [ ] + +# The fvmfile_command contains a reference to a file to be loaded at the +# specified virtual address. (Presently, this command is reserved for +# internal use. The kernel ignores this command when loading a program into +# memory). +class fvmfile_command(CStruct): + _fields = [ + ("stroffset","u32"), # files pathname + ("header_addr","u32"), # files virtual address + ] + +# The entry_point_command is a replacement for thread_command. +# It is used for main executables to specify the location (file offset) +# of main(). If -stack_size was used at link time, the stacksize +# field will contain the stack size need for the main thread. +class entry_point_command(LoadCommand): + lc_types = (LC_MAIN,) + _fields = [ + ("entryoff","u64"), # file (__TEXT) offset of main() + ("stacksize","u64"), # if not zero, initial stack size + ] + def changeOffsets(self, decalage, min_offset=None): + if isOffsetChangeable(self.entryoff, min_offset): + self.entryoff += decalage + +# The source_version_command is an optional load command containing +# the version of the sources used to build the binary. +class source_version_command(LoadCommand): + lc_types = (LC_SOURCE_VERSION,) + _fields = [ ("version","u64") ] # A.B.C.D.E packed as a24.b10.c10.d10.e10 + +# The LC_DATA_IN_CODE load commands uses a linkedit_data_command +# to point to an array of data_in_code_entry entries. Each entry +# describes a range of data in a code section. +class data_in_code_command(CStruct): + _fields = [ + ("offset","u32"), # from mach_header to start of data range + ("length","u16"), # number of bytes in data range + ("kind","u16"), # a DICE_KIND_* value + ] + data_incode_off = property(lambda _:_.offset) + data_incode_size = property(lambda _:_.length) + +SetConstants( +DICE_KIND_DATA = 0x0001, +DICE_KIND_JUMP_TABLE8 = 0x0002, +DICE_KIND_JUMP_TABLE16 = 0x0003, +DICE_KIND_JUMP_TABLE32 = 0x0004, +DICE_KIND_ABS_JUMP_TABLE32 = 0x0005, +) + + +###################################################################### + +def isOffsetChangeable(offset, min_offset): + return (min_offset is None or offset >= min_offset) and offset != 0 + +class LoadCommands(CBase): + def unpack(self, c, o): + self.offset = o + self.lhlist = [] + if self.parent.Mhdr.sizeofcmds > self.parent.datasize: + log.error("LoadCommands longer than file length") + return + if self.parent.Mhdr.ncmds*8 > self.parent.Mhdr.sizeofcmds: + log.error("Too many load command: %d commands cannot fit in %d bytes", self.parent.Mhdr.ncmds, self.parent.Mhdr.sizeofcmds) + return + for i in range(self.parent.Mhdr.ncmds): + lh = LoadCommand(parent=self, content=self.parent.content, start=o) + if lh.cmdsize > lh.bytelen: + log.warning("%s has %d bytes of additional padding", lh.__class__.__name__, lh.cmdsize-lh.bytelen) + elif 8 <= lh.cmdsize < lh.bytelen: + log.warning("%s is %d bytes too short", lh.__class__.__name__, lh.bytelen-lh.cmdsize) + self.lhlist.append(lh) + if self.parent.interval is not None : + self.parent.interval.delete(o,o+lh.bytelen) + o += lh.cmdsize + if self.parent.Mhdr.sizeofcmds > o-self.offset: + log.warning("LoadCommands have %d bytes of additional padding", self.parent.Mhdr.sizeofcmds-o+self.offset) + def pack(self): + data = data_empty + for lc in self.lhlist: + data += lc.pack() + return data + def append(self, lh): + self.lhlist.append(lh) + self.parent.Mhdr.ncmds += 1 + self.parent.Mhdr.sizeofcmds += lh.bytelen + def getpos(self, lht): + poslist = [] + for lc in self.lhlist: + if lht == lc.cmd: + poslist.append(self.lhlist.index(lc)) + return poslist + def removepos(self, pos): + self.parent.Mhdr.sizeofcmds -= len(self.lhlist[pos].pack()) + self.parent.Mhdr.ncmds-=1 + self.lhlist.remove(self.lhlist[pos]) + def changeOffsets(self, decalage, min_offset=None): + for lc in self.lhlist: + lc.changeOffsets(decalage, min_offset) + + def addSH(self, s): + for lc in self.lhlist: + if hasattr(lc, 'addSH') and lc.segname == s.parent.segname: + lc.addSH(s) + return True + return False + + def __iter__(self): + return self.lhlist.__iter__() + def __len__(self): + return self.lhlist.__len__() + def __getitem__(self, item): + return self.lhlist[item] + def __str__(self): + raise AttributeError("Use pack() instead of str()") + def extendSegment(self,lc,size): + if lc.maxprot == 0: + raise ValueError('Maximum Protection is 0') + lc.filesize += size + lc.vmsize += size + for lco in self.lhlist: + if hasattr(lco,'segname'): + if lco.fileoff > lc.fileoff: + lco.fileoff += size + lco.vmaddr += size + if hasattr(lco,'sect'): + for s in lco.sect: + if not s.offset == 0 : + s.offset += size + if not s.addr == 0 : + s.addr += size + else : + if not lco.cmd == LC_MAIN: + lco.changeOffsets(size) + + def findlctext(self): + for lc in self.lhlist: + if lc.cmd == LC_SEGMENT or lc.cmd == LC_SEGMENT_64: + if lc.is_text_segment(): + return lc diff --git a/elfesteem/macho/sections.py b/elfesteem/macho/sections.py new file mode 100755 index 0000000..68fe488 --- /dev/null +++ b/elfesteem/macho/sections.py @@ -0,0 +1,1161 @@ +import struct +from elfesteem.macho.common import * +from elfesteem.cstruct import Constants, CBase, CString, CArray, CStruct, CStructWithStrTable +from elfesteem.strpatchwork import StrPatchwork + +import sys +if sys.version_info[0:2] == (2, 3): + mask32 = (eval("1L")<<32)-1 # 'eval' avoids SyntaxError with python3.x + mask64 = (eval("1L")<<64)-1 +else: + mask32 = eval("0xffffffff") # 'eval' avoids warnings with python2.3 + mask64 = eval("0xffffffffffffffff") + +dyld_constants = {} +def SetConstants(**kargs): + Constants(globs = globals(), table = dyld_constants, **kargs) + +############################################################ +# Sections, containing data, at a given offset in the file +# +# NB: the LINKEDIT segment contains data from numerous Load Commands, +# which are not identified as true sections (nsects == 0 for this +# segment). +# We manage them almost as if they were true sections. + +class BaseSection(CBase): + # Give direct access to the offset in the file, which is mentioned + # in the parent structure. The name of the field depend on the type + # of section. + def get_offset(self): + if self.type is None: return self.parent.offset + else: return getattr(self.parent, self.type + 'off') + def set_offset(self, val): + if self.type is None: self.parent.offset = val + else: setattr(self.parent, self.type + 'off', val) + offset = property(get_offset, set_offset) + def __str__(self): + return "%-30s %-10s %#010x %#010x" % (self.__class__.__name__, '', self.offset, len(self)) + +class TrueSection(BaseSection): + name = property(lambda _:_.parent.name) + def set_size(self, val): + self.parent.size = val + size = property(lambda _:_.parent.size, set_size) + addr = property(lambda _:_.parent.addr) + # 'sh' member should be obsolete, but is used to detect a true section. + sh = property(lambda _:_.parent) + def __str__(self): + return str(self.parent) + +class Section(TrueSection): + type = None + def unpack(self, c, o): + self.content = c + if self.parent is not None: assert o == self.offset + self._off = o + def pack(self): + return self.content.pack() + def get_content(self): + return self.__content + def set_content(self,val): + self.__content = StrPatchwork(val) + content = property(get_content, set_content) + def update(self, **kargs): + if 'size' in kargs: self._size = kargs['size'] + elif hasattr(self, '__content'): self._size = len(self.content) + else: return + self.content = self.content[self._off:self._off+self._size] + +class symbolPointer(CStruct): + _fields = [ ("address","ptr") ] + +class SymbolPtrList(TrueSection,CArray): + type = None + _cls = symbolPointer + count = lambda _:_.parent.size//(_.wsize//8) + # TODO: update self.parent.size when the array size changes + +class symbolStub(CBase): + def unpack(self, c, o): + self._size = self.parent.parent.reserved2 + self.content = c[o:o+self._size] + def pack(self): + return self.content + +class SymbolStubList(TrueSection,CArray): + type = None + _cls = symbolStub + count = lambda _:_.parent.size//_.parent.reserved2 + # TODO: update self.parent.size when the array size changes + +class Reloc(TrueSection,CArray): + type = 'rel' # Offset is parent.reloff + _cls = relocation_info + count = lambda _:_.parent.nreloc + size = property(lambda _:_.parent.nreloc//8) + addr = property(lambda _:_.parent.reloff) + reloclist = property(lambda _:_._array) + def __str__(self): + p = self.parent + return "%-30s %-10s %#010x %#010x" % (p.name, 'relocs', p.reloff, p.nreloc) + # TODO: update self.parent.nreloc when the array size changes + +#### Source: /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9.sdk/usr/include/mach-o/nlist.h +# The 'n_type' aka. 'type' field +N_STAB = 0xe0 # if any of these bits set, a symbolic debugging entry +N_PEXT = 0x10 # private external symbol bit +N_TYPE = 0x0e # mask for the type bits +N_EXT = 0x01 # external symbol bit, set for external symbols +# Values for N_TYPE bits of the n_type field. +N_UNDF = 0x0 # undefined, n_sect == NO_SECT +N_ABS = 0x2 # absolute, n_sect == NO_SECT +# 0x4 # (found in 'Decibels' for iOS, meaning unknown) +N_SECT = 0xe # defined in section number n_sect +N_PBUD = 0xc # prebound undefined (defined in a dylib) +N_INDR = 0xa # indirect + +class symbol(CStructWithStrTable): + _fields = [ ("name_idx","u32"), + ("type","u08"), + ("sectionindex","u08"), + ("description","u16"), + ("value","ptr")] + def strtab(self): + return self.parent.parent.strtab + strtab = property(strtab) + def __str__(self): + return self.otool() + def otool(self): + n_type = { + N_UNDF: 'U', + N_ABS : 'A', + N_SECT: 'S', + N_PBUD: 'P', + N_INDR: 'I', + }.get(self.type & N_TYPE, hex(self.type & N_TYPE)) + n_type += [ ' ', 'X' ] [self.type & N_EXT] + n_type += [ ' ', 'X' ] [(self.type & N_PEXT)>>4] + if self.type & N_STAB: + n_type += 'D' + desc = self.description + e = self.parent.parent.parent.parent + if self.sectionindex == 0: + section = "NO_SECT" + elif 0 <= self.sectionindex-1 < len(e.sect): + section = e.sect[self.sectionindex-1].parent + if hasattr(section, 'name'): + section = section.name + else: + section = "INVALID(%d)" % self.sectionindex + else: + section = "INVALID(%d)" % self.sectionindex + return "%-35s %-15s %-4s 0x%08x %04x"%(self.name,section,n_type,self.value,desc) + +class SymbolNotFound(object): + pass +SymbolNotFound = SymbolNotFound() +class SymbolTable(BaseSection,CArray): + type = 'sym' + _cls = symbol + count = lambda _:_.parent.nsyms + def update(self, **kargs): + self.symbols_from_name = {} + # This cannot be done if the string table was not parsed + for symbol in self.symbols: + self.symbols_from_name[symbol.name] = symbol + symbols = property(lambda _:_._array) + def __iter__(self): + return self.symbols.__iter__() + def __getitem__(self, idx): + try: + if type(idx) == int: + return self.symbols[idx] + else: + return self.symbols_from_name[idx.strip('\0')] + except IndexError: + log.error("Cannot find symbol with index %r", idx) + return SymbolNotFound + +#### Source: /usr/include/mach-o/loader.h + +# The entries in the two-level namespace lookup hints table are twolevel_hint +# structs. These provide hints to the dynamic link editor where to start +# looking for an undefined symbol in a two-level namespace image. The +# isub_image field is an index into the sub-images (sub-frameworks and +# sub-umbrellas list) that made up the two-level image that the undefined +# symbol was found in when it was built by the static link editor. If +# isub-image is 0 the the symbol is expected to be defined in library and not +# in the sub-images. If isub-image is non-zero it is an index into the array +# of sub-images for the umbrella with the first index in the sub-images being +# 1. The array of sub-images is the ordered list of sub-images of the umbrella +# that would be searched for a symbol that has the umbrella recorded as its +# primary library. The table of contents index is an index into the +# library's table of contents. This is used as the starting point of the +# binary search or a directed linear search. + +class twolevel_hint(CStruct): + _fields = [ ("hint","u32") ] + isub_image = property(lambda _:_.hint>>24) + itoc = property(lambda _:_.hint&0x00ffffff) + +class Hint(BaseSection,CArray): + type = None + _cls = twolevel_hint + count = lambda _:_.parent.nhints + +# NB: the following sections are used by LC_DYSYMTAB; dysymarray_register +# registers these sections. +DySymArray = {} +def dysymarray_register(cls): + DySymArray[cls.type] = cls + +# An indirect symbol table entry is simply a 32bit index into the symbol table +# to the symbol that the pointer or stub is refering to. Unless it is for a +# non-lazy symbol pointer section for a defined symbol which strip(1) as +# removed. In which case it has the value INDIRECT_SYMBOL_LOCAL. If the +# symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that. +INDIRECT_SYMBOL_LOCAL = 0x80000000 +INDIRECT_SYMBOL_ABS = 0x40000000 +class dylib_indirect_entry(CStruct): + _fields = [ ("index","u32") ] + +class DySymIndirect(BaseSection,CArray): + type = 'indirectsym' + _cls = dylib_indirect_entry + count = lambda _:_.parent.nindirectsyms + entries = property(lambda _:_) +dysymarray_register(DySymIndirect) + +# A table of contents entry +class dylib_table_of_contents(CStruct): + _fields = [ + ("symbol_index","u32"), # the defined external symbol (index into the symbol table) + ("module_index","u32"), # index into the module table this symbol is defined in + ] + +class DySymToc(BaseSection,CArray): + type = 'toc' + _cls = dylib_table_of_contents + count = lambda _:_.parent.ntoc +dysymarray_register(DySymToc) + +# A module table entry +# * In loader.h, there are two data structures: dylib_module and dylib_module_64, which are merged in one structure below. +class dylib_module(CStruct): + _fields = [ + ("module_name","u32"), # the module name (index into string table) + ("iextdefsym","u32"), # index into externally defined symbols + ("nextdefsym","u32"), # number of externally defined symbols + ("irefsym","u32"), # index into reference symbol table + ("nrefsym","u32"), # number of reference symbol table entries + ("ilocalsym","u32"), # index into symbols for local symbols + ("nlocalsym","u32"), # number of local symbols + ("iextrel","u32"), # index into external relocation entries + ("nextrel","u32"), # number of external relocation entries + ("iinit_iterm","u32"), # low 16 bits are the index into the init section, high 16 bits are the index into the term section + ("ninit_nterm","u32"), # low 16 bits are the number of init section entries, high 16 bits are the number of term section entries + # for this module, address & size of the start of the (__OBJC,__module_info) section + ("objc_module_info_1","u32"), + ("objc_module_info_2","ptr"), + ] + def get_addr(self): + if self.wsize == 32: return self.objc_module_info_1 + if self.wsize == 64: return self.objc_module_info_2 + def set_addr(self, value): + if self.wsize == 32: self.objc_module_info_1 = value + if self.wsize == 64: self.objc_module_info_2 = value + def get_size(self): + if self.wsize == 32: return self.objc_module_info_2 + if self.wsize == 64: return self.objc_module_info_1 + def set_size(self, value): + if self.wsize == 32: self.objc_module_info_2 = value + if self.wsize == 64: self.objc_module_info_1 = value + objc_module_info_addr = property(get_addr, set_addr) + objc_module_info_size = property(get_size, set_size) + +class DySymModTab(BaseSection,CArray): + type = 'modtab' + _cls = dylib_module + count = lambda _:_.parent.nmodtab +dysymarray_register(DySymModTab) + +# The entries in the reference symbol table are used when loading the module +# (both by the static and dynamic link editors) and if the module is unloaded +# or replaced. Therefore all external symbols (defined and undefined) are +# listed in the module's reference table. The flags describe the type of +# reference that is being made. The constants for the flags are defined in +# as they are also used for symbol table entries. +class dylib_reference(CStruct): + _fields = [ ("index","u32") ] + isym = property(lambda _:_.index>>8) + flags = property(lambda _:_.index&0x000000ff) + +class DySymExtref(BaseSection,CArray): + type = 'extrefsym' + _cls = dylib_reference + count = lambda _:_.parent.nextrefsyms +dysymarray_register(DySymExtref) + +class DySymLocRel(BaseSection,CArray): + type = 'locrel' + _cls = relocation_info + count = lambda _:_.parent.nlocrel +dysymarray_register(DySymLocRel) + +class DySymExtRel(BaseSection,CArray): + type = 'extrel' + _cls = relocation_info + count = lambda _:_.parent.nextrel +dysymarray_register(DySymExtRel) + +# NB: the following sections are used by LC_DYLD_INFO, LC_DYLD_INFO_ONLY; +# dyldarray_register registers these sections. +# NB: some example code decoding these load commands is at: +# https://github.com/espes/Slave-in-the-Magic-Mirror/blob/master/dyld_info.py +# https://opensource.apple.com/source/ld64/ld64-264.3.102/src/other/dyldinfo.cpp.auto.html +DyldArray = {} +def dyldarray_register(cls): + DyldArray[cls.type] = cls + +class Uleb128(CBase): + def _parent_parse(self, kargs): + pass # Independent of endianess and wordsize + def _initialize(self): + self.value = 0 + self._size = 0 + def unpack(self, c, o): + pos = 0 + while True: + val, = struct.unpack("B",c[o:o+1]) + if sys.version_info[0:2] == (2, 3): + val += eval("0L") + self.value += (val&0x7f) << pos + self._size += 1; o += 1; pos += 7 + if not val & 0x80: break + return val, pos + def pack(self): + if self.value == 0: + return struct.pack("B", 0) + v = self.value + c = struct.pack("") + while v: + if v > 0x7f: c += struct.pack("B", (v&0x7f)|0x80) + else: c += struct.pack("B", v) + v >>= 7 + return c + def __int__(self): + return self.value + +class Sleb128(Uleb128): + def unpack(self, c, o): + val, pos = Uleb128.unpack(self, c, o) + if val & 0x40: + self.value |= (-1) << pos + def pack(self): + if self.value == 0: + return struct.pack("B", 0) + v = self.value + c = struct.pack("") + while v: + w = v & 0x7f + if v > 0x7f: c += struct.pack("B", w|0x80) + elif v < -0x7f: c += struct.pack("B", w|0x80) + else: c += struct.pack("B", w) + v >>= 7 + if v == -1: break + return c + +class DyldArrayGeneric(BaseSection,CArray): + _cls = None + def count(self): + if self.bytelen < self.size: return len(self)+1 + else: return -1 + def get_size(self): + return getattr(self.parent, self.type + 'size') + def set_size(self, val): + setattr(self.parent, self.type + 'size', val) + size = property(get_size, set_size) + def _initialize(self): + CArray._initialize(self) + # "uncompressed" data is stored in self._info, while the "compressed" + # data is in self._array; modifying this information is tricky, and + # the API for doing this in a safe way will be implemented later... + self._info = [] + self.addend = 0 # default value for bind + self.index = 0 # default value for lazy_bind + if self.type == 'bind_': self.cls = Bind + elif self.type == 'weak_bind_': self.cls = WeakBind + elif self.type == 'lazy_bind_': self.cls = LazyBind + elif self.type == 'rebase_': self.cls = Rebase + def update(self, **kargs): + try: + for op in self: + op.apply() + except ValueError: + log.error("Invalid opcode %s", op) + info = property(lambda _:_._info) + +#### Source: /usr/include/mach-o/loader.h + +# The bind information is a stream of byte sized +# opcodes whose symbolic names start with BIND_OPCODE_. +# Conceptually the bind information is a table of tuples: +# +# The opcodes are a compressed way to encode the table by only +# encoding when a column changes. In addition simple patterns +# like for runs of pointers initialized to the same value can be +# encoded in a few bytes. + +# The following are used to encode binding information +SetConstants( +BIND_TYPE_POINTER = 1, +BIND_TYPE_TEXT_ABSOLUTE32 = 2, +BIND_TYPE_TEXT_PCREL32 = 3, +) +SetConstants( +BIND_SPECIAL_DYLIB_SELF = 0, +BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE = -1, +BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2, +) + +BIND_IMMEDIATE_MASK = 0x0F +SetConstants( +BIND_SYMBOL_FLAGS_WEAK_IMPORT = 0x01, +BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION = 0x08, +) +BIND_OPCODE_MASK = 0xF0 +SetConstants( +BIND_OPCODE_DONE = 0x00, +BIND_OPCODE_SET_DYLIB_ORDINAL_IMM = 0x10, +BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB = 0x20, +BIND_OPCODE_SET_DYLIB_SPECIAL_IMM = 0x30, +BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM = 0x40, +BIND_OPCODE_SET_TYPE_IMM = 0x50, +BIND_OPCODE_SET_ADDEND_SLEB = 0x60, +BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x70, +BIND_OPCODE_ADD_ADDR_ULEB = 0x80, +BIND_OPCODE_DO_BIND = 0x90, +BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB = 0xA0, +BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED = 0xB0, +BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB = 0xC0, +) + +def get_lib_name(e, idx): + from elfesteem.macho.loaders import LC_LOAD_DYLIB + lc = [ _ for _ in e.load if _.cmd == LC_LOAD_DYLIB ] + if idx == 0: + return 'this-image' + elif idx > 0 and len(lc) > idx-1: + lib = lc[idx-1].str_name + if '/' in lib: lib = lib[lib.rindex('/')+1:] + if '.' in lib: lib = lib[:lib.index('.')] + return lib + else: + return None + +class Bind(object): + _to_copy = ('sym', 'weak_import', 'seg', 'addr', 'libord', 'info_type', 'addend') + def __init__(self, entry): + for f in self._to_copy: + if not hasattr(entry.parent, f): raise ValueError + setattr(self, f, getattr(entry.parent, f)) + e = entry.parent.parent.parent.parent + self.sec = e.getsectionbyvad(self.addr) + if self.sec is None: raise ValueError + self.sec = self.sec.parent.sectname + if 'libord' in self._to_copy: + self.libord = get_lib_name(e, self.libord) + def __str__(self): + return "%-7s %-16s 0x%08X %-7s %6d %-16s %s%s" % ( + self.seg, self.sec, self.addr, + self.info_type, self.addend, self.libord, + self.sym, self.weak_import) + +class WeakBind(Bind): + _to_copy = ('sym', 'weak_import', 'seg', 'addr', 'info_type', 'addend') + def __str__(self): + return "%-7s %-16s 0x%08X %-7s %6d %s%s" % ( + self.seg, self.sec, self.addr, + self.info_type, self.addend, + self.sym, self.weak_import) + +class LazyBind(Bind): + _to_copy = ('sym', 'weak_import', 'seg', 'addr', 'libord', 'index') + def __str__(self): + return "%-7s %-16s 0x%08X 0x%04X %-16s %s%s" % ( + self.seg, self.sec, self.addr, + self.index, self.libord, + self.sym, self.weak_import) + +from elfesteem.cstruct import CStruct_metaclass +class bind_metaclass(CStruct_metaclass): + registered = {} + def __new__(cls, name, bases, dct): + o = CStruct_metaclass.__new__(cls, name, bases, dct) + if 'opcode' in dct: + cls.registered[dct['opcode']] = o + return o + def __call__(cls, *args, **kargs): + c = kargs['content'] + o = kargs.get('start',0) + val, = struct.unpack("B",c[o:o+1]) + opcode = val & BIND_OPCODE_MASK + if hasattr(cls, 'opcode'): + op = super(bind_metaclass,cls).__call__(*args, **kargs) + elif opcode in bind_metaclass.registered: + op = bind_metaclass.registered[opcode](*args, **kargs) + else: + op = super(bind_metaclass,cls).__call__(*args, **kargs) + op.opcode = opcode + return op +bind_base = bind_metaclass('bind_base', (CStruct,), {}) + +class bind_entry(bind_base): + _fields = [ ("val", "u08") ] + imm = property(lambda _: _.val & BIND_IMMEDIATE_MASK) + def unpack(self, c, o): + CStruct.unpack(self, c, o) + self.index = o - self.parent.offset + def __str__(self): + return "0x%04X BIND_OPCODE_%s" % (self.index, + dyld_constants['BIND_OPCODE'].get(self.opcode, hex(self.opcode))) + def apply(self): + pass + +class bind_opcode(bind_entry): + opcode = BIND_OPCODE_DONE + _fields = [ ("val", "u08") ] + def apply(self): + self.parent.index = self.index + self.bytelen +del bind_opcode + +class bind_opcode_libord(bind_entry): + opcode = BIND_OPCODE_SET_DYLIB_ORDINAL_IMM + _fields = [ ("val", "u08") ] + libord = property(lambda _: _.imm) + def __str__(self): + return bind_entry.__str__(self) + '(%d)' % self.libord + def apply(self): + self.parent.libord = int(self.libord) + +class bind_opcode(bind_opcode_libord): + opcode = BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB + _fields = [ ("val", "u08"), ("libord", Uleb128) ] +del bind_opcode + +class bind_opcode(bind_opcode_libord): + opcode = BIND_OPCODE_SET_DYLIB_SPECIAL_IMM + _fields = [ ("val", "u08") ] + def libord(self): + if self.imm: return self.imm | BIND_OPCODE_MASK + else: return 0 + libord = property(libord) +del bind_opcode +del bind_opcode_libord + +class bind_opcode(bind_entry): + opcode = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM + _fields = [ ("val", "u08"), ("sym", CString) ] + def __str__(self): + return bind_entry.__str__(self) + '(0x%02X, %s)' % (self.imm, self.sym) + def apply(self): + self.parent.sym = self.sym + if self.imm & BIND_SYMBOL_FLAGS_WEAK_IMPORT: + self.parent.weak_import = " (weak import)" + else: + self.parent.weak_import = "" +del bind_opcode + +class bind_opcode(bind_entry): + opcode = BIND_OPCODE_SET_TYPE_IMM + _fields = [ ("val", "u08") ] + info_type = property(lambda _: _.imm) + def __str__(self): + return bind_entry.__str__(self) + '(%d)' % self.imm + def apply(self): + self.parent.info_type = { + BIND_TYPE_POINTER: "pointer", + BIND_TYPE_TEXT_ABSOLUTE32: "text abs32", + BIND_TYPE_TEXT_PCREL32: "text rel32", + }.get(self.imm,"!!unknown!!") +del bind_opcode + +class bind_opcode(bind_entry): + opcode = BIND_OPCODE_SET_ADDEND_SLEB + _fields = [ ("val", "u08"), ("addend", Sleb128) ] + def __str__(self): + return bind_entry.__str__(self) + '(%d)' % int(self.addend) + def apply(self): + self.parent.addend = self.addend +del bind_opcode + +class bind_opcode(bind_entry): + opcode = BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + _fields = [ ("val", "u08"), ("addr", Uleb128) ] + def __str__(self): + return bind_entry.__str__(self) + '(0x%02X, 0x%08X)' % (self.imm, int(self.addr)) + def apply(self): + from elfesteem.macho.loaders import LC_SEGMENT, LC_SEGMENT_64 + e = self.parent.parent.parent.parent + lc = [ _ for _ in e.load if _.cmd in (LC_SEGMENT, LC_SEGMENT_64) ] + if len(lc) > self.imm: + self.parent.seg = lc[self.imm].segname + self.parent.addr = lc[self.imm].vmaddr + int(self.addr) + else: + self.parent.seg = None + self.parent.addr = None +del bind_opcode + +class bind_opcode(bind_entry): + opcode = BIND_OPCODE_ADD_ADDR_ULEB + _fields = [ ("val", "u08"), ("addr", Uleb128) ] + def __str__(self): + return bind_entry.__str__(self) + '(0x%08X)' % (int(self.addr) & mask32) + def apply(self): + if not hasattr(self.parent, 'addr'): raise ValueError + self.parent.addr += int(self.addr) + self.parent.addr &= mask64 +del bind_opcode + +class bind_opcode(bind_entry): + opcode = BIND_OPCODE_DO_BIND + _fields = [ ("val", "u08") ] + def __str__(self): + return bind_entry.__str__(self) + '()' + def apply(self): + if not hasattr(self.parent, 'addr'): raise ValueError + self.parent._info.append(self.parent.cls(self)) + self.parent.addr += self.wsize//8 +del bind_opcode + +class bind_opcode(bind_entry): + opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB + _fields = [ ("val", "u08"), ("addr", Uleb128) ] + def __str__(self): + return bind_entry.__str__(self) + '(0x%08X)' % (int(self.addr) & mask32) + def apply(self): + if not hasattr(self.parent, 'addr'): raise ValueError + self.parent._info.append(self.parent.cls(self)) + self.parent.addr += self.wsize//8 + int(self.addr) + self.parent.addr &= mask64 +del bind_opcode + +class bind_opcode(bind_entry): + opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED + _fields = [ ("val", "u08") ] + add_addr = property(lambda _: (_.imm+1)*(_.wsize//8)) + def __str__(self): + return bind_entry.__str__(self) + '(0x%08X)' % self.add_addr + def apply(self): + self.parent._info.append(self.parent.cls(self)) + self.parent.addr += self.add_addr +del bind_opcode + +class bind_opcode(bind_entry): + opcode = BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB + _fields = [ ("val", "u08"), ("count", Uleb128), ("skip", Uleb128) ] + def __str__(self): + return bind_entry.__str__(self) + '(%d, 0x%08X)' % (int(self.count), int(self.skip)) + def apply(self): + if not hasattr(self.parent, 'addr'): raise ValueError + if int(self.count) > mask64: raise ValueError + for i in range(int(self.count)): + self.parent._info.append(self.parent.cls(self)) + self.parent.addr += int(self.skip) + self.wsize//8 +del bind_opcode + +class DyldArrayBind(DyldArrayGeneric): + type = 'bind_' + _cls = bind_entry +dyldarray_register(DyldArrayBind) + +class DyldArrayWeakBind(DyldArrayGeneric): + type = 'weak_bind_' + _cls = bind_entry +dyldarray_register(DyldArrayWeakBind) + +class DyldArrayLazyBind(DyldArrayGeneric): + type = 'lazy_bind_' + _cls = bind_entry +dyldarray_register(DyldArrayLazyBind) + +#### Source: /usr/include/mach-o/loader.h + +# Dyld rebases an image whenever dyld loads it at an address different +# from its preferred address. The rebase information is a stream +# of byte sized opcodes whose symbolic names start with REBASE_OPCODE_. +# Conceptually the rebase information is a table of tuples: +# +# The opcodes are a compressed way to encode the table by only +# encoding when a column changes. In addition simple patterns +# like "every n'th offset for m times" can be encoded in a few +# bytes. + +# The following are used to encode rebasing information +REBASE_IMMEDIATE_MASK = 0x0F +SetConstants( +REBASE_TYPE_POINTER = 1, +REBASE_TYPE_TEXT_ABSOLUTE32 = 2, +REBASE_TYPE_TEXT_PCREL32 = 3, +) +REBASE_OPCODE_MASK = 0xF0 +SetConstants( +REBASE_OPCODE_DONE = 0x00, +REBASE_OPCODE_SET_TYPE_IMM = 0x10, +REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x20, +REBASE_OPCODE_ADD_ADDR_ULEB = 0x30, +REBASE_OPCODE_ADD_ADDR_IMM_SCALED = 0x40, +REBASE_OPCODE_DO_REBASE_IMM_TIMES = 0x50, +REBASE_OPCODE_DO_REBASE_ULEB_TIMES = 0x60, +REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB = 0x70, +REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB = 0x80, +) + +class Rebase(Bind): + _to_copy = ('seg', 'addr', 'info_type') + def __str__(self): + return "%-7s %-16s 0x%08X %s" % ( + self.seg, self.sec, self.addr, + self.info_type) + +from elfesteem.cstruct import CStruct_metaclass +class rebase_metaclass(CStruct_metaclass): + registered = {} + def __new__(cls, name, bases, dct): + o = CStruct_metaclass.__new__(cls, name, bases, dct) + if 'opcode' in dct: + cls.registered[dct['opcode']] = o + return o + def __call__(cls, *args, **kargs): + c = kargs['content'] + o = kargs.get('start',0) + val, = struct.unpack("B",c[o:o+1]) + opcode = val & REBASE_OPCODE_MASK + if hasattr(cls, 'opcode'): + op = super(rebase_metaclass,cls).__call__(*args, **kargs) + elif opcode in rebase_metaclass.registered: + op = rebase_metaclass.registered[opcode](*args, **kargs) + else: + op = super(rebase_metaclass,cls).__call__(*args, **kargs) + op.opcode = opcode + return op +rebase_base = rebase_metaclass('rebase_base', (CStruct,), {}) + +class rebase_entry(rebase_base): + _fields = [ ("val", "u08") ] + imm = property(lambda _: _.val & REBASE_IMMEDIATE_MASK) + def unpack(self, c, o): + CStruct.unpack(self, c, o) + self.index = o - self.parent.offset + def __str__(self): + return "0x%04X REBASE_OPCODE_%s" % (self.index, + dyld_constants['REBASE_OPCODE'].get(self.opcode, hex(self.opcode))) + def apply(self): + pass + +class rebase_opcode(rebase_entry): + opcode = REBASE_OPCODE_DONE + _fields = [ ("val", "u08") ] + def __str__(self): + return rebase_entry.__str__(self) + '()' +del rebase_opcode + +class rebase_opcode(rebase_entry): + opcode = REBASE_OPCODE_SET_TYPE_IMM + _fields = [ ("val", "u08") ] + info_type = property(lambda _: _.imm) + def __str__(self): + return rebase_entry.__str__(self) + '(%d)' % self.imm + def apply(self): + self.parent.info_type = { + REBASE_TYPE_POINTER: "pointer", + REBASE_TYPE_TEXT_ABSOLUTE32: "text abs32", + REBASE_TYPE_TEXT_PCREL32: "text rel32", + }.get(self.imm,"!!unknown!!") +del rebase_opcode + +class rebase_opcode(rebase_entry): + opcode = REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + _fields = [ ("val", "u08"), ("addr", Uleb128) ] + def __str__(self): + return rebase_entry.__str__(self) + '(%d, 0x%08X)' % (self.imm, int(self.addr)) + def apply(self): + from elfesteem.macho.loaders import LC_SEGMENT, LC_SEGMENT_64 + e = self.parent.parent.parent.parent + lc = [ _ for _ in e.load if _.cmd in (LC_SEGMENT, LC_SEGMENT_64) ] + if len(lc) > self.imm: + self.parent.seg = lc[self.imm].segname + self.parent.addr = lc[self.imm].vmaddr + int(self.addr) + else: + self.parent.seg = None + self.parent.addr = None +del rebase_opcode + +class rebase_opcode(rebase_entry): + opcode = REBASE_OPCODE_ADD_ADDR_ULEB + _fields = [ ("val", "u08"), ("addr", Uleb128) ] + def __str__(self): + return rebase_entry.__str__(self) + '(0x%X)' % (int(self.addr) & mask32) + def apply(self): + if not hasattr(self.parent, 'addr'): raise ValueError + self.parent.addr += int(self.addr) +del rebase_opcode + +class rebase_opcode(rebase_entry): + opcode = REBASE_OPCODE_ADD_ADDR_IMM_SCALED + _fields = [ ("val", "u08") ] + add_addr = property(lambda _: _.imm*(_.wsize//8)) + def __str__(self): + return rebase_entry.__str__(self) + '(0x%X)' % self.add_addr + def apply(self): + if not hasattr(self.parent, 'addr'): raise ValueError + self.parent.addr += self.add_addr +del rebase_opcode + +class rebase_opcode(rebase_entry): + opcode = REBASE_OPCODE_DO_REBASE_IMM_TIMES + _fields = [ ("val", "u08") ] + def __str__(self): + return rebase_entry.__str__(self) + '(%d)' % self.imm + def apply(self): + if not hasattr(self.parent, 'addr'): raise ValueError + for i in range(self.imm): + self.parent._info.append(self.parent.cls(self)) + self.parent.addr += self.wsize//8 +del rebase_opcode + +class rebase_opcode(rebase_entry): + opcode = REBASE_OPCODE_DO_REBASE_ULEB_TIMES + _fields = [ ("val", "u08"), ("count", Uleb128) ] + def __str__(self): + return rebase_entry.__str__(self) + '(%d)' % int(self.count) + def apply(self): + if not hasattr(self.parent, 'addr'): raise ValueError + for i in range(int(self.count)): + self.parent._info.append(self.parent.cls(self)) + self.parent.addr += self.wsize//8 +del rebase_opcode + +class rebase_opcode(rebase_entry): + opcode = REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB + _fields = [ ("val", "u08"), ("value", Uleb128) ] + add_addr = property(lambda _: _.wsize//8 + int(_.value)) + def __str__(self): + return rebase_entry.__str__(self) + '(%d)' % (self.add_addr & mask32) + def apply(self): + if not hasattr(self.parent, 'addr'): raise ValueError + self.parent._info.append(self.parent.cls(self)) + self.parent.addr += self.add_addr +del rebase_opcode + +class rebase_opcode(rebase_entry): + opcode = REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB + _fields = [ ("val", "u08"), ("count", Uleb128), ("skip", Uleb128) ] + def __str__(self): + return rebase_entry.__str__(self) + '(%d, %d)' % (int(self.count), int(self.skip)) + def apply(self): + if not hasattr(self.parent, 'addr'): raise ValueError + for i in range(int(self.count)): + self.parent._info.append(self.parent.cls(self)) + self.parent.addr += int(self.skip) + self.wsize//8 +del rebase_opcode + +class DyldArrayRebase(DyldArrayGeneric): + _cls = rebase_entry + type = 'rebase_' +dyldarray_register(DyldArrayRebase) + +#### Source: /usr/include/mach-o/loader.h + +# The symbols exported by a dylib are encoded in a trie. This +# is a compact representation that factors out common prefixes. +# It also reduces LINKEDIT pages in RAM because it encodes all +# information (name, address, flags) in one small, contiguous range. +# The export area is a stream of nodes. The first node sequentially +# is the start node for the trie. +# +# Nodes for a symbol start with a uleb128 that is the length of +# the exported symbol information for the string so far. +# If there is no exported symbol, the node starts with a zero byte. +# If there is exported info, it follows the length. +# +# First is a uleb128 containing flags. Normally, it is followed by +# a uleb128 encoded offset which is location of the content named +# by the symbol from the mach_header for the image. If the flags +# is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags is +# a uleb128 encoded library ordinal, then a zero terminated +# UTF8 string. If the string is zero length, then the symbol +# is re-export from the specified dylib with the same name. +# +# If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following +# the flags is two uleb128s: the stub offset and the resolver offset. +# The stub is used by non-lazy pointers. The resolver is used +# by lazy pointers and must be called to get the actual address to use. +# +# After the optional exported symbol information is a byte of +# how many edges (0-255) that this node has leaving it, +# followed by each edge. +# Each edge is a zero terminated UTF8 of the addition chars +# in the symbol, followed by a uleb128 offset for the node that +# edge points to. + +# The following are used on the flags byte of a terminal node +# in the export information. +EXPORT_SYMBOL_FLAGS_KIND_MASK = 0x03 +EXPORT_SYMBOL_FLAGS_KIND_REGULAR = 0x00 +EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL = 0x01 +EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE = 0x02 +EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION = 0x04 +EXPORT_SYMBOL_FLAGS_REEXPORT = 0x08 +EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER = 0x10 + +class export_entry(object): + def __init__(self, entry, sym, flags, addr, other, name): + self.sym = sym + self.flags = int(flags) + self.addr = int(addr) + self.other = int(other) + self.name = name + self.macho = entry.parent.parent.parent + def __str__(self): + if self.flags & EXPORT_SYMBOL_FLAGS_REEXPORT: addr = '[re-export]' + else: addr = '0x%08X ' % self.addr + flags = [] + if self.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION: + flags.append('weak_def') + if (self.flags & EXPORT_SYMBOL_FLAGS_KIND_MASK) == EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL: + flags.append('per-thread') + if (self.flags & EXPORT_SYMBOL_FLAGS_KIND_MASK) == EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE: + flags.append('absolute') + if self.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER: + flags.append('resolver=0x%08X'%self.other) + flags = ','.join(flags) + if flags: flags = '[flags]' + if self.flags & EXPORT_SYMBOL_FLAGS_REEXPORT: + lib = get_lib_name(self.macho, self.addr) + name = str(self.name) + if len(name): name += ' ' + lib = ' (%sfrom %s)' % (name, lib) + else: + lib = '' + return "%s %s%s%s" % (addr, flags, self.sym, lib) + +class dyld_trie(CBase): + def unpack(self, c, o): + # 'p' will always be the DyldTrieExport object, independently of + # the trie depth. + p = self.parent + while not hasattr(p, 'info'): + p = p.parent + if o >= p.offset + p.size: + raise ValueError + self.prefix = self.parent.prefix + if hasattr(self.parent, 'suffix'): + self.prefix += str(self.parent.suffix) + self._size = 0 + termSize, = struct.unpack("B",c[o:o+1]) + p.interval_add(o, o+1) + self._size += 1 + if termSize: + flags = Uleb128(parent=self, content=c, start=o+self._size) + p.interval_add(o+self._size, o+self._size+flags.bytelen) + self._size += flags.bytelen + addr = Uleb128(parent=self, content=c, start=o+self._size) + p.interval_add(o+self._size, o+self._size+addr.bytelen) + self._size += addr.bytelen + if int(flags) & EXPORT_SYMBOL_FLAGS_REEXPORT: + name = CString(parent=self, content=c, start=o+self._size) + p.interval_add(o+self._size, o+self._size+name.bytelen) + other = 0 + elif int(flags) & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER: + name = None + other = Uleb128(parent=self, content=c, start=o+self._size) + p.interval_add(o+self._size, o+self._size+other.bytelen) + else: + name = None + other = 0 + p.info.append(export_entry(p, self.prefix, flags, addr, other, name)) + childCount, = struct.unpack("B",c[o+termSize+1:o+termSize+2]) + p.interval_add(o+termSize+1, o+termSize+2) + self._size = termSize+2 + for i in range(childCount): + self.suffix = CString(parent=self, content=c, start=o+self._size) + p.interval_add(o+self._size, o+self._size+self.suffix.bytelen) + self._size += self.suffix.bytelen + offset = Uleb128(parent=self, content=c, start=o+self._size) + p.interval_add(o+self._size, o+self._size+offset.bytelen) + self._size += offset.bytelen + dyld_trie(parent=self, content=c, start=p.offset+int(offset)) + +class DyldTrieExport(BaseSection): + type = 'export_' + def get_size(self): + return getattr(self.parent, self.type + 'size') + def set_size(self, val): + setattr(self.parent, self.type + 'size', val) + size = property(get_size, set_size) + def unpack(self, c, o): + # The trie is a recursive structure with information stored at + # explicit offsets: malformed files can cause infinite loops. + # We use 'intervals' to detect such loops. + from elfesteem.intervals import Intervals + self.interval = Intervals() + self.info = [] + self.prefix = '' + try: + self.trie = dyld_trie(parent=self, content=c, start=o) + except ValueError: + pass + self.c = c[self.offset:self.offset+self.size] + # NB: even in well-formed files, not everything is parsed + #print("TARGET [%d:%d]"%(self.offset,self.offset+self.size)) + #print("INTERVAL %s"%self.interval) + def pack(self): + return self.c + def interval_add(self, start, stop): + if self.interval.contains(start, stop): + log.error('The export trie is malformed, there is a risk of infinite loop') + raise ValueError + self.interval.add(start, stop) + def __str__(self): + return "%-30s %-10s %#010x %#010x" % (self.__class__.__name__, '', self.offset, len(self.info)) +dyldarray_register(DyldTrieExport) + +#### Many other sections inside the __LINKEDIT segment + +class LinkEditSection(BaseSection): + type = 'data' + def unpack(self, c, o): + if self.parent is not None: assert o == self.offset + self.content = StrPatchwork(c[o:o+self.size]) + def get_size(self): + return getattr(self.parent, self.type + 'size') + def set_size(self, val): + setattr(self.parent, self.type + 'size', val) + size = property(get_size, set_size) + addr = property(lambda _:0) + def pack(self): + return self.content.pack() + def __str__(self): + return "%-30s %-10s %#010x %#010x" % (self.__class__.__name__, '', self.offset, self.size) + +class FunctionStarts(LinkEditSection): + pass + +class DataInCode(LinkEditSection): + pass + +class CodeSignature(LinkEditSection): + pass + +class OptimizationHint(LinkEditSection): + pass + +class Encryption(LinkEditSection): + type = 'crypt' + +class SegmentSplitInfo(LinkEditSection): + pass + +class DylibCodeSign(LinkEditSection): + pass + """ + def unpack(self, c, o): + self.content = StrPatchwork(c) + self.blobs = [] + of = 0 + while self.content[of:of+2] == '\xfa\xde': + self.blobs.append(self.content[of:of+20]) + of += 20 + self.string = self.content[of:of+16] + self.int = self.content[of+16:of+20] + self.end = self.content[of+20:] # need to be improved + """ + +class StringTable(LinkEditSection): + type = 'str' + def get_name(self, idx): + return bytes_to_name(self.content[idx:self.content.find(data_null,idx)]) + def add_name(self, name): + name = name_to_bytes(name) + if data_null+name+data_null in self.content: + return self.content.find(name) + data = self.content + if type(data) != str: data = data.pack() + idx = len(data) + self.content = data+name+data_null + for sh in self.parent.shlist: + if sh.sh.offset > self.sh.offset: + sh.sh.offset += len(name)+1 + return idx + def mod_name(self, idx, name): + name = name_to_bytes(name) + n = self.content[idx:self.content.find(data_null,idx)] + dif = len(name) - len(n) + if dif != 0: + for sh in self.parent.shlist: + if sh.sh.name_idx > idx: + sh.sh.name_idx += dif + if sh.sh.offset > self.sh.offset: + sh.sh.offset += dif + return idx + def __str__(self): + return "%-30s %-10s %#010x %#010x" % ('StringTable', '', self.offset, self.size) + +class Sections(object): + def __init__(self, parent): + self.parent = parent + self.sect = [] + lc_list = [ _ for _ in parent.load if hasattr(_, 'sectionsToAdd') ] + # First, create all sections depending on each load command + for lc in lc_list: + lc.sectionsToAdd(self.parent.content) + self.sect.extend(lc.sect) + if parent.interval is not None : + for s in lc.sect: + if s.__class__.__name__== 'Encryption': + log.warning("Some encrypted text is not parsed with the section headers of LC_SEGMENT(__TEXT)") + continue + if not parent.interval.contains(s.offset,s.offset+len(s.pack())): + #log.warning("This part of file has already been parsed") + pass + parent.interval.delete(s.offset,s.offset+len(s.pack())) + # Then if the load command is not a segment, add the section to the + # list of sections in the relevant segment. + for lc in lc_list: + if not hasattr(lc,'segname'): + for s in lc.sect: + segm = parent.getsegment_byoffset(s.offset) + if segm is not None: segm.sect.append(s) + def add(self, s): + # looking in s.lc to know where to insert + pos = 0 + for lc in self.parent.load: + if not hasattr(lc, 'sect'): + pass + elif s in lc.sect: + pos += lc.sect.index(s) + self.sect[pos:pos] = [s] + break + else: + pos += len(lc.sect) + + def getpos(self, section): + poslist = [] + for i, s in enumerate(self.sect): + if s == section : + poslist.append(i) + return poslist + def removepos(self, pos): + self.sect.remove(self.sect[pos]) + def __getitem__(self, pos): + return self.sect.__getitem__(pos) + def __iter__(self): + return self.sect.__iter__() + def __len__(self): + return self.sect.__len__() + def __repr__(self): + return "".join(str(self.sect)) + def __str__(self): + raise ValueError('class Section cannot be output as a bytestream') diff --git a/elfesteem/macho_init.py b/elfesteem/macho_init.py new file mode 100755 index 0000000..9a2768d --- /dev/null +++ b/elfesteem/macho_init.py @@ -0,0 +1 @@ +from elfesteem.macho import * diff --git a/elfesteem/minidump.py b/elfesteem/minidump.py new file mode 100644 index 0000000..c5cb494 --- /dev/null +++ b/elfesteem/minidump.py @@ -0,0 +1,1168 @@ +"""Constants and structures associated to Minidump format +Based on: http://amnesia.gtisc.gatech.edu/~moyix/minidump.py +""" +from elfesteem.new_cstruct import CStruct + +import sys +if sys.version_info[0:2] == (2, 3): + mask32 = (eval("1L")<<32)-1 # 'eval' avoids SyntaxError with python3.x + bit31 = eval("1L")<<31 +else: + mask32 = eval("0xffffffff") # 'eval' avoids warnings with python2.3 + bit31 = eval("0x80000000") + +class Enumeration(object): + """Stand for an enumeration type""" + + def __init__(self, enum_info): + """enum_info: {name: value}""" + self._enum_info = enum_info + self._inv_info = {} + for k, v in enum_info.items(): + self._inv_info[v] = k + + def __getitem__(self, key): + """Helper: assume that string is for key, integer is for value""" + if key in self._inv_info: + return self._inv_info[key] + return self._enum_info[key] + + def __getattr__(self, key): + if key in self._enum_info: + return self._enum_info[key] + raise AttributeError + + def from_value(self, value): + return self._inv_info[value] + + +class Rva(CStruct): + """Relative Virtual Address + Note: RVA in Minidump means "file offset" + """ + _fields = [("rva", "u32"), + ] + + +minidumpType = Enumeration({ + # MINIDUMP_TYPE + # https://msdn.microsoft.com/en-us/library/ms680519(v=vs.85).aspx + "MiniDumpNormal" : 0x00000000, + "MiniDumpWithDataSegs" : 0x00000001, + "MiniDumpWithFullMemory" : 0x00000002, + "MiniDumpWithHandleData" : 0x00000004, + "MiniDumpFilterMemory" : 0x00000008, + "MiniDumpScanMemory" : 0x00000010, + "MiniDumpWithUnloadedModules" : 0x00000020, + "MiniDumpWithIndirectlyReferencedMemory" : 0x00000040, + "MiniDumpFilterModulePaths" : 0x00000080, + "MiniDumpWithProcessThreadData" : 0x00000100, + "MiniDumpWithPrivateReadWriteMemory" : 0x00000200, + "MiniDumpWithoutOptionalData" : 0x00000400, + "MiniDumpWithFullMemoryInfo" : 0x00000800, + "MiniDumpWithThreadInfo" : 0x00001000, + "MiniDumpWithCodeSegs" : 0x00002000, + "MiniDumpWithoutAuxiliaryState" : 0x00004000, + "MiniDumpWithFullAuxiliaryState" : 0x00008000, + "MiniDumpWithPrivateWriteCopyMemory" : 0x00010000, + "MiniDumpIgnoreInaccessibleMemory" : 0x00020000, + "MiniDumpWithTokenInformation" : 0x00040000, + "MiniDumpWithModuleHeaders" : 0x00080000, + "MiniDumpFilterTriage" : 0x00100000, + "MiniDumpValidTypeFlags" : 0x001fffff, +}) + +def time_str(value, zero=False): + if zero and value == 0: return '0' + import time + return '%#x %s' % (value, + time.strftime('%Y-%m-%d %H:%M:%S', + time.gmtime(value))), + +def data_str(v): + import struct + return '0x' + ''.join(["%02x"%_ for _ in struct.unpack("%dB"%len(v), v)]) + +class MinidumpHDR(CStruct): + """MINIDUMP_HEADER + https://msdn.microsoft.com/en-us/library/ms680378(VS.85).aspx + """ + _fields = [("Magic", "u32"), # MDMP + ("Version", "u16"), + ("ImplementationVersion", "u16"), + ("NumberOfStreams", "u32"), + ("StreamDirectoryRva", "Rva"), + ("Checksum", "u32"), + ("TimeDateStamp", "u32"), + ("Flags", "u32") + ] + def dump(self): + return '\n'.join([ + 'MDRawHeader', + ' signature = %#x' % self.Magic, + ' version = %#x' % (self.Version+(self.ImplementationVersion<<16)), + ' stream_count = %d' % self.NumberOfStreams, + ' stream_directory_rva = %#x' % self.StreamDirectoryRva.rva, + ' checksum = %#x' % self.Checksum, + ' time_date_stamp = %s' % time_str(self.TimeDateStamp), + ' flags = %#x' % self.Flags, + ]) + +class LocationDescriptor(CStruct): + """MINIDUMP_LOCATION_DESCRIPTOR + https://msdn.microsoft.com/en-us/library/ms680383(v=vs.85).aspx + """ + _fields = [("DataSize", "u32"), + ("Rva", "Rva"), + ] + + +streamType = Enumeration({ + # MINIDUMP_STREAM_TYPE + # https://msdn.microsoft.com/en-us/library/ms680394(v=vs.85).aspx + "UnusedStream" : 0, + "ReservedStream0" : 1, + "ReservedStream1" : 2, + "ThreadListStream" : 3, + "ModuleListStream" : 4, + "MemoryListStream" : 5, + "ExceptionStream" : 6, + "SystemInfoStream" : 7, + "ThreadExListStream" : 8, + "Memory64ListStream" : 9, + "CommentStreamA" : 10, + "CommentStreamW" : 11, + "HandleDataStream" : 12, + "FunctionTableStream" : 13, + "UnloadedModuleListStream" : 14, + "MiscInfoStream" : 15, + "MemoryInfoListStream" : 16, + "ThreadInfoListStream" : 17, + "HandleOperationListStream" : 18, + "LastReservedStream" : 0xffff, +}) + +MDminidumpType = Enumeration({ + # MINIDUMP_STREAM_TYPE + # https://chromium.googlesource.com/breakpad/breakpad/+/master/src/google_breakpad/common/minidump_format.h + "MD_UNUSED_STREAM" : 0, + "MD_RESERVED_STREAM_0" : 1, + "MD_RESERVED_STREAM_1" : 2, + "MD_THREAD_LIST_STREAM" : 3, # MDRawThreadList + "MD_MODULE_LIST_STREAM" : 4, # MDRawModuleList + "MD_MEMORY_LIST_STREAM" : 5, # MDRawMemoryList + "MD_EXCEPTION_STREAM" : 6, # MDRawExceptionStream + "MD_SYSTEM_INFO_STREAM" : 7, # MDRawSystemInfo + "MD_THREAD_EX_LIST_STREAM" : 8, + "MD_MEMORY_64_LIST_STREAM" : 9, + "MD_COMMENT_STREAM_A" : 10, + "MD_COMMENT_STREAM_W" : 11, + "MD_HANDLE_DATA_STREAM" : 12, + "MD_FUNCTION_TABLE_STREAM" : 13, + "MD_UNLOADED_MODULE_LIST_STREAM" : 14, + "MD_MISC_INFO_STREAM" : 15, # MDRawMiscInfo + "MD_MEMORY_INFO_LIST_STREAM" : 16, # MDRawMemoryInfoList + "MD_THREAD_INFO_LIST_STREAM" : 17, + "MD_HANDLE_OPERATION_LIST_STREAM" : 18, + "MD_TOKEN_STREAM" : 19, + "MD_JAVASCRIPT_DATA_STREAM" : 20, + "MD_SYSTEM_MEMORY_INFO_STREAM" : 21, + "MD_PROCESS_VM_COUNTERS_STREAM" : 22, + "MD_LAST_RESERVED_STREAM" : 0x0000ffff, + # Breakpad extension types. 0x4767 = "Gg" + "MD_BREAKPAD_INFO_STREAM" : 0x47670001, # MDRawBreakpadInfo + "MD_ASSERTION_INFO_STREAM" : 0x47670002, # MDRawAssertionInfo + # These are additional minidump stream values which are specific to + # the linux breakpad implementation. + "MD_LINUX_CPU_INFO" : 0x47670003, # /proc/cpuinfo + "MD_LINUX_PROC_STATUS" : 0x47670004, # /proc/$x/status + "MD_LINUX_LSB_RELEASE" : 0x47670005, # /etc/lsb-release + "MD_LINUX_CMD_LINE" : 0x47670006, # /proc/$x/cmdline + "MD_LINUX_ENVIRON" : 0x47670007, # /proc/$x/environ + "MD_LINUX_AUXV" : 0x47670008, # /proc/$x/auxv + "MD_LINUX_MAPS" : 0x47670009, # /proc/$x/maps + "MD_LINUX_DSO_DEBUG" : 0x4767000A, # MDRawDebug{32,64} +}) + + +class StreamDirectory(CStruct): + """MINIDUMP_DIRECTORY + https://msdn.microsoft.com/en-us/library/ms680365(VS.85).aspx + """ + _fields = [("StreamType", "u32"), + ("Location", "LocationDescriptor"), + ] + + def pretty_name(self): + return streamType[self.StreamType] + pretty_name = property(pretty_name) + + def type_with_name(self): + return "%#x (%s)" % (self.StreamType, + MDminidumpType.from_value(self.StreamType) ) + type_with_name = property(type_with_name) + + def dump(self): + return '\n'.join([ + 'MDRawDirectory', + ' stream_type = %s' % self.type_with_name, + ' location.data_size = %d' % self.Location.DataSize, + ' location.rva = %#x' % self.Location.Rva.rva, + ]) + +if sys.version_info[0:2] == (2, 3): + MD_VSFIXEDFILEINFO_SIGNATURE = eval("0xfeef04bdL") +else: + MD_VSFIXEDFILEINFO_SIGNATURE = eval("0xfeef04bd") +MD_VSFIXEDFILEINFO_VERSION = 0x00010000 +class FixedFileInfo(CStruct): + """VS_FIXEDFILEINFO + https://msdn.microsoft.com/en-us/library/ms646997(v=vs.85).aspx + """ + _fields = [("dwSignature", "u32"), + ("dwStrucVersion", "u32"), + ("dwFileVersionMS", "u32"), + ("dwFileVersionLS", "u32"), + ("dwProductVersionMS", "u32"), + ("dwProductVersionLS", "u32"), + ("dwFileFlagsMask", "u32"), + ("dwFileFlags", "u32"), + ("dwFileOS", "u32"), + ("dwFileType", "u32"), + ("dwFileSubtype", "u32"), + ("dwFileDateMS", "u32"), + ("dwFileDateLS", "u32"), + ] + def version(self): + if self.dwSignature != MD_VSFIXEDFILEINFO_SIGNATURE: + return '' + if not (self.dwStrucVersion & MD_VSFIXEDFILEINFO_VERSION): + return '' + return '%d.%d.%d.%d' % ( + self.dwFileVersionMS>>16, + self.dwFileVersionMS&0xffff, + self.dwFileVersionLS>>16, + self.dwFileVersionLS&0xffff) + version = property(version) + +class MinidumpString(CStruct): + """MINIDUMP_STRING + https://msdn.microsoft.com/en-us/library/ms680395(v=vs.85).aspx + """ + _fields = [("Length", "u32"), + ("Buffer", "u08", lambda string:string.Length), + ] + def __str__(self): + import struct + return struct.pack("%dB"%len(self.Buffer), *self.Buffer).decode("utf-16") + +class CvRecord(CStruct): + _fields = [("CvSignature", "u32"), + ("Sign0", "u32"), + ("Sign1", "u16"), + ("Sign2", "u16"), + ("SignX", "u08", lambda _: 8), + ("Age", "u32"), + ] + def signature_str(self): + return '%08x-%04x-%04x-' % (self.Sign0, self.Sign1, self.Sign2) \ + + ('%02x%02x-'+'%02x'*6) % tuple(self.SignX) + signature_str = property(signature_str) + def signature_id(self): + return '%08X%04X%04X' % (self.Sign0, self.Sign1, self.Sign2) \ + + ('%02X'*8) % tuple(self.SignX) + signature_id = property(signature_id) + +class Module(CStruct): + """MINIDUMP_MODULE + https://msdn.microsoft.com/en-us/library/ms680392(v=vs.85).aspx + """ + _fields = [("BaseOfImage", "u64"), + ("SizeOfImage", "u32"), + ("CheckSum", "u32"), + ("TimeDateStamp", "u32"), + ("ModuleNameRva", "Rva"), + ("VersionInfo", "FixedFileInfo"), + ("CvRecord", "LocationDescriptor"), + ("MiscRecord", "LocationDescriptor"), + ("Reserved0", "u64"), + ("Reserved1", "u64"), + ] + + def parse_data(self): + self.cv = CvRecord.unpack(self.parent_head._content, + off = self.CvRecord.Rva.rva, + parent_head = self.parent_head) + self.cv.filename = self.parent_head._content[self.CvRecord.Rva.rva+24:self.CvRecord.Rva.rva+self.CvRecord.DataSize-1].decode('latin1') # last character is NULL + rva = self.MiscRecord.Rva.rva + if rva == 0: self.misc_record = '(null)' + + def ModuleName(self): + return MinidumpString.unpack(self.parent_head._content, + off = self.ModuleNameRva.rva, + parent_head = self.parent_head) + ModuleName = property(ModuleName) + + def dump(self): + return '\n'.join([ + 'MDRawModule', + ' base_of_image = %#x' % self.BaseOfImage, + ' size_of_image = %#x' % self.SizeOfImage, + ' checksum = %#x' % self.CheckSum, + ' time_date_stamp = %s' % time_str(self.TimeDateStamp), + ' module_name_rva = %#x' % self.ModuleNameRva.rva, + ' version_info.signature = %#x' % self.VersionInfo.dwSignature, + ' version_info.struct_version = %#x' % self.VersionInfo.dwStrucVersion, + ' version_info.file_version = %#x:%#x' % (self.VersionInfo.dwFileVersionMS, self.VersionInfo.dwFileVersionLS), + ' version_info.product_version = %#x:%#x' % (self.VersionInfo.dwProductVersionMS, self.VersionInfo.dwProductVersionLS), + ' version_info.file_flags_mask = %#x' % self.VersionInfo.dwFileFlagsMask, + ' version_info.file_flags = %#x' % self.VersionInfo.dwFileFlags, + ' version_info.file_os = %#x' % self.VersionInfo.dwFileOS, + ' version_info.file_type = %#x' % self.VersionInfo.dwFileType, + ' version_info.file_subtype = %#x' % self.VersionInfo.dwFileSubtype, + ' version_info.file_date = %#x:%#x' % (self.VersionInfo.dwFileDateMS, self.VersionInfo.dwFileDateLS), + ' cv_record.data_size = %d' % self.CvRecord.DataSize, + ' cv_record.rva = %#x' % self.CvRecord.Rva.rva, + ' misc_record.data_size = %d' % self.MiscRecord.DataSize, + ' misc_record.rva = %#x' % self.MiscRecord.Rva.rva, + ]) + + def dump_other(self): + self.parse_data() + if self.parent_head.systeminfo.PlatformId in ( + MD_OS_WIN32_NT, + MD_OS_WIN32_WINDOWS, + ): + code_identifier = "%X%x" % (self.TimeDateStamp, self.SizeOfImage) + elif self.parent_head.systeminfo.PlatformId in ( + MD_OS_ANDROID, + MD_OS_LINUX, + MD_OS_MAC_OS_X, + MD_OS_IOS, + MD_OS_SOLARIS, + MD_OS_NACL, + MD_OS_PS3, + ): + code_identifier = "id" + debug_identifier = self.cv.signature_id + '%d'%self.cv.Age + return '\n'.join([ + ' (code_file) = "%s"' % self.ModuleName, + ' (code_identifier) = "%s"' % code_identifier, + ' (cv_record).cv_signature = %#x' % self.cv.CvSignature, + ' (cv_record).signature = %s' % self.cv.signature_str, + ' (cv_record).age = %d' % self.cv.Age, + ' (cv_record).pdb_file_name = "%s"' % self.cv.filename, + ' (misc_record) = %s' % self.misc_record, + ' (debug_file) = "%s"' % self.cv.filename, + ' (debug_identifier) = "%s"' % debug_identifier, + ' (version) = "%s"' % self.VersionInfo.version, + ]) + + + +class ModuleList(CStruct): + """MINIDUMP_MODULE_LIST + https://msdn.microsoft.com/en-us/library/ms680391(v=vs.85).aspx + """ + _fields = [("NumberOfModules", "u32"), + ("Modules", "Module", lambda mlist:mlist.NumberOfModules), + ] + +class ModuleListWithPadding(CStruct): + """MINIDUMP_THREAD_LIST may have 4 bytes padding + https://chromium.googlesource.com/breakpad/breakpad/+/master/src/processor/minidump.cc + cf. function MinidumpModuleList::Read + """ + _fields = [("NumberOfModules", "u32"), + ("Padding", "u32"), + ("Modules", "Module", lambda mlist:mlist.NumberOfModules), + ] + + +class MemoryDescriptor64(CStruct): + """MINIDUMP_MEMORY_DESCRIPTOR64 + https://msdn.microsoft.com/en-us/library/ms680384(v=vs.85).aspx + """ + _fields = [("StartOfMemoryRange", "u64"), + ("DataSize", "u64") + ] + + +class Memory64List(CStruct): + """MINIDUMP_MEMORY64_LIST + https://msdn.microsoft.com/en-us/library/ms680387(v=vs.85).aspx + """ + _fields = [("NumberOfMemoryRanges", "u64"), + ("BaseRva", "u64"), + ("MemoryRanges", "MemoryDescriptor64", + lambda mlist:mlist.NumberOfMemoryRanges), + ] + +class MemoryDescriptor(CStruct): + """MINIDUMP_MEMORY_DESCRIPTOR + https://msdn.microsoft.com/en-us/library/ms680384(v=vs.85).aspx + """ + _fields = [("StartOfMemoryRange", "u64"), + ("Memory", "LocationDescriptor"), + ] + def dump(self): + return '\n'.join([ + 'MDMemoryDescriptor', + ' start_of_memory_range = %#x' % self.StartOfMemoryRange, + ' memory.data_size = %#x' % self.Memory.DataSize, + ' memory.rva = %#x' % self.Memory.Rva.rva, + ]) + +class MemoryList(CStruct): + """MINIDUMP_MEMORY_LIST + https://msdn.microsoft.com/en-us/library/ms680387(v=vs.85).aspx + """ + _fields = [("NumberOfMemoryRanges", "u32"), + ("MemoryRanges", "MemoryDescriptor", + lambda mlist:mlist.NumberOfMemoryRanges), + ] + +class MemoryListWithPadding(CStruct): + _fields = [("NumberOfMemoryRanges", "u32"), + ("Padding", "u32"), + ("MemoryRanges", "MemoryDescriptor", + lambda mlist:mlist.NumberOfMemoryRanges), + ] + +memProtect = Enumeration({ + # MEM PROTECT + # https://msdn.microsoft.com/en-us/library/aa366786(v=vs.85).aspx + "PAGE_NOACCESS" : 0x0001, + "PAGE_READONLY" : 0x0002, + "PAGE_READWRITE" : 0x0004, + "PAGE_WRITECOPY" : 0x0008, + "PAGE_EXECUTE" : 0x0010, + "PAGE_EXECUTE_READ" : 0x0020, + "PAGE_EXECUTE_READWRITE" : 0x0040, + "PAGE_EXECUTE_WRITECOPY" : 0x0080, + "PAGE_GUARD" : 0x0100, + "PAGE_NOCACHE" : 0x0200, + "PAGE_WRITECOMBINE" : 0x0400, +}) + +class MemoryInfo(CStruct): + """MINIDUMP_MEMORY_INFO + https://msdn.microsoft.com/en-us/library/ms680386(v=vs.85).aspx + """ + _fields = [("BaseAddress", "u64"), + ("AllocationBase", "u64"), + ("AllocationProtect", "u32"), + ("__alignment1", "u32"), + ("RegionSize", "u64"), + ("State", "u32"), + ("Protect", "u32"), + ("Type", "u32"), + ("__alignment2", "u32"), + ] + +class MemoryInfoList(CStruct): + """MINIDUMP_MEMORY_INFO_LIST + https://msdn.microsoft.com/en-us/library/ms680385(v=vs.85).aspx + """ + _fields = [("SizeOfHeader", "u32"), + ("SizeOfEntry", "u32"), + ("NumberOfEntries", "u64"), + # Fake field, for easy access to MemoryInfo elements + ("MemoryInfos", "MemoryInfo", + lambda mlist: mlist.NumberOfEntries), + ] + + +contextFlags_x86 = Enumeration({ + "CONTEXT_i386" : 0x00010000, + "CONTEXT_CONTROL" : 0x00010001, + "CONTEXT_INTEGER" : 0x00010002, + "CONTEXT_SEGMENTS" : 0x00010004, + "CONTEXT_FLOATING_POINT" : 0x00010008, + "CONTEXT_DEBUG_REGISTERS" : 0x00010010, + "CONTEXT_EXTENDED_REGISTERS" : 0x00010020, +}) + +class FloatingSaveArea(CStruct): + """FLOATING_SAVE_AREA + http://terminus.rewolf.pl/terminus/structures/ntdll/_FLOATING_SAVE_AREA_x86.html + """ + _fields = [("ControlWord", "u32"), + ("StatusWord", "u32"), + ("TagWord", "u32"), + ("ErrorOffset", "u32"), + ("ErrorSelector", "u32"), + ("DataOffset", "u32"), + ("DataSelector", "u32"), + ("RegisterArea", "80s"), + ("Cr0NpxState", "u32"), + ] + +class Context_x86(CStruct): + """CONTEXT x86 + https://msdn.microsoft.com/en-us/en-en/library/ms679284(v=vs.85).aspx + http://terminus.rewolf.pl/terminus/structures/ntdll/_CONTEXT_x86.html + """ + + MAXIMUM_SUPPORTED_EXTENSION = 512 + + def is_activated(flag): + mask = contextFlags_x86[flag] + def check_context(ctx): + return 1 + if (ctx.ContextFlags & mask == mask): + return 1 + return 0 + return check_context + + _fields = [("ContextFlags", "u32"), + # DebugRegisters + ("Dr0", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr1", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr2", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr3", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr6", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr7", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + + ("FloatSave", "FloatingSaveArea", + is_activated("CONTEXT_FLOATING_POINT")), + + # SegmentRegisters + ("SegGs", "u32", is_activated("CONTEXT_SEGMENTS")), + ("SegFs", "u32", is_activated("CONTEXT_SEGMENTS")), + ("SegEs", "u32", is_activated("CONTEXT_SEGMENTS")), + ("SegDs", "u32", is_activated("CONTEXT_SEGMENTS")), + # IntegerRegisters + ("Edi", "u32", is_activated("CONTEXT_INTEGER")), + ("Esi", "u32", is_activated("CONTEXT_INTEGER")), + ("Ebx", "u32", is_activated("CONTEXT_INTEGER")), + ("Edx", "u32", is_activated("CONTEXT_INTEGER")), + ("Ecx", "u32", is_activated("CONTEXT_INTEGER")), + ("Eax", "u32", is_activated("CONTEXT_INTEGER")), + # ControlRegisters + ("Ebp", "u32", is_activated("CONTEXT_CONTROL")), + ("Eip", "u32", is_activated("CONTEXT_CONTROL")), + ("SegCs", "u32", is_activated("CONTEXT_CONTROL")), + ("EFlags", "u32", is_activated("CONTEXT_CONTROL")), + ("Esp", "u32", is_activated("CONTEXT_CONTROL")), + ("SegSs", "u32", is_activated("CONTEXT_CONTROL")), + + ("ExtendedRegisters", "%ds" % MAXIMUM_SUPPORTED_EXTENSION, + is_activated("CONTEXT_EXTENDED_REGISTERS")), + ] + def dump(self): + return '\n'.join([ + 'MDRawContextX86', + ' context_flags = %#x' % self.ContextFlags, + ' dr0 = %#x' % self.Dr0[0], + ' dr1 = %#x' % self.Dr1[0], + ' dr2 = %#x' % self.Dr2[0], + ' dr3 = %#x' % self.Dr3[0], + ' dr6 = %#x' % self.Dr6[0], + ' dr7 = %#x' % self.Dr7[0], + ' float_save.control_word = %#x' % self.FloatSave[0].ControlWord, + ' float_save.status_word = %#x' % self.FloatSave[0].StatusWord, + ' float_save.tag_word = %#x' % self.FloatSave[0].TagWord, + ' float_save.error_offset = %#x' % self.FloatSave[0].ErrorOffset, + ' float_save.error_selector = %#x' % self.FloatSave[0].ErrorSelector, + ' float_save.data_offset = %#x' % self.FloatSave[0].DataOffset, + ' float_save.data_selector = %#x' % self.FloatSave[0].DataSelector, + ' float_save.register_area[80] = %s' % data_str(self.FloatSave[0].RegisterArea), + ' float_save.cr0_npx_state = %#x' % self.FloatSave[0].Cr0NpxState, + ' gs = %#x' % self.SegGs[0], + ' fs = %#x' % self.SegFs[0], + ' es = %#x' % self.SegEs[0], + ' ds = %#x' % self.SegDs[0], + ' edi = %#x' % self.Edi[0], + ' esi = %#x' % self.Esi[0], + ' ebx = %#x' % self.Ebx[0], + ' edx = %#x' % self.Edx[0], + ' ecx = %#x' % self.Ecx[0], + ' eax = %#x' % self.Eax[0], + ' ebp = %#x' % self.Ebp[0], + ' eip = %#x' % self.Eip[0], + ' cs = %#x' % self.SegCs[0], + ' eflags = %#x' % self.EFlags[0], + ' esp = %#x' % self.Esp[0], + ' ss = %#x' % self.SegSs[0], + ' extended_registers[512] = %s' % data_str(self.ExtendedRegisters[0]), + ]) + + +contextFlags_AMD64 = Enumeration({ + "CONTEXT_AMD64" : 0x00100000, + "CONTEXT_CONTROL" : 0x00100001, + "CONTEXT_INTEGER" : 0x00100002, + "CONTEXT_SEGMENTS" : 0x00100004, + "CONTEXT_FLOATING_POINT" : 0x00100008, + "CONTEXT_DEBUG_REGISTERS" : 0x00100010, + "CONTEXT_XSTATE" : 0x00100020, + "CONTEXT_EXCEPTION_ACTIVE" : 0x08000000, + "CONTEXT_SERVICE_ACTIVE" : 0x10000000, + "CONTEXT_EXCEPTION_REQUEST" : 0x40000000, + "CONTEXT_EXCEPTION_REPORTING" : bit31, +}) + + +class M128A(CStruct): + """M128A + http://terminus.rewolf.pl/terminus/structures/ntdll/_M128A_x64.html + """ + _fields = [("Low", "u64"), + ("High", "u64"), + ] + +class Context_AMD64(CStruct): + """CONTEXT AMD64 + https://github.com/duarten/Threadjack/blob/master/WinNT.h + """ + + def is_activated(flag): + mask = contextFlags_AMD64[flag] + def check_context(ctx): + return 1 + if (ctx.ContextFlags & mask == mask): + return 1 + return 0 + return check_context + + _fields = [ + + # Only used for Convenience + ("P1Home", "u64"), + ("P2Home", "u64"), + ("P3Home", "u64"), + ("P4Home", "u64"), + ("P5Home", "u64"), + ("P6Home", "u64"), + + # Control + ("ContextFlags", "u32"), + ("MxCsr", "u32"), + + # Segment & processor + # /!\ activation depends on multiple flags + ("SegCs", "u16", is_activated("CONTEXT_CONTROL")), + ("SegDs", "u16", is_activated("CONTEXT_SEGMENTS")), + ("SegEs", "u16", is_activated("CONTEXT_SEGMENTS")), + ("SegFs", "u16", is_activated("CONTEXT_SEGMENTS")), + ("SegGs", "u16", is_activated("CONTEXT_SEGMENTS")), + ("SegSs", "u16", is_activated("CONTEXT_CONTROL")), + ("EFlags", "u32", is_activated("CONTEXT_CONTROL")), + + # Debug registers + ("Dr0", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr1", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr2", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr3", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr6", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr7", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + + # Integer registers + # /!\ activation depends on multiple flags + ("Rax", "u64", is_activated("CONTEXT_INTEGER")), + ("Rcx", "u64", is_activated("CONTEXT_INTEGER")), + ("Rdx", "u64", is_activated("CONTEXT_INTEGER")), + ("Rbx", "u64", is_activated("CONTEXT_INTEGER")), + ("Rsp", "u64", is_activated("CONTEXT_CONTROL")), + ("Rbp", "u64", is_activated("CONTEXT_INTEGER")), + ("Rsi", "u64", is_activated("CONTEXT_INTEGER")), + ("Rdi", "u64", is_activated("CONTEXT_INTEGER")), + ("R8", "u64", is_activated("CONTEXT_INTEGER")), + ("R9", "u64", is_activated("CONTEXT_INTEGER")), + ("R10", "u64", is_activated("CONTEXT_INTEGER")), + ("R11", "u64", is_activated("CONTEXT_INTEGER")), + ("R12", "u64", is_activated("CONTEXT_INTEGER")), + ("R13", "u64", is_activated("CONTEXT_INTEGER")), + ("R14", "u64", is_activated("CONTEXT_INTEGER")), + ("R15", "u64", is_activated("CONTEXT_INTEGER")), + ("Rip", "u64", is_activated("CONTEXT_CONTROL")), + + # Floating point + ("Header", "M128A", lambda ctx: 2), + ("Legacy", "M128A", lambda ctx: 8), + ("Xmm0", "M128A"), + ("Xmm1", "M128A"), + ("Xmm2", "M128A"), + ("Xmm3", "M128A"), + ("Xmm4", "M128A"), + ("Xmm5", "M128A"), + ("Xmm6", "M128A"), + ("Xmm7", "M128A"), + ("Xmm8", "M128A"), + ("Xmm9", "M128A"), + ("Xmm10", "M128A"), + ("Xmm11", "M128A"), + ("Xmm12", "M128A"), + ("Xmm13", "M128A"), + ("Xmm14", "M128A"), + ("Xmm15", "M128A"), + + + # Vector registers + ("VectorRegister", "M128A", lambda ctx: 16), + ("VectorControl", "u64"), + + # Special debug control regs + ("DebugControl", "u64"), + ("LastBranchToRip", "u64"), + ("LastBranchFromRip", "u64"), + ("LastExceptionToRip", "u64"), + ("LastExceptionFromRip", "u64"), + ] + def dump(self): + return '\n'.join([ + 'MDRawContextAMD64', + ' p1_home = %#x' % self.P1Home, + ' p2_home = %#x' % self.P2Home, + ' p3_home = %#x' % self.P3Home, + ' p4_home = %#x' % self.P4Home, + ' p5_home = %#x' % self.P5Home, + ' p6_home = %#x' % self.P6Home, + ' context_flags = %#x' % self.ContextFlags, + ' mx_csr = %#x' % self.MxCsr, + ' cs = %#x' % self.SegCs[0], + ' ds = %#x' % self.SegDs[0], + ' es = %#x' % self.SegEs[0], + ' fs = %#x' % self.SegFs[0], + ' gs = %#x' % self.SegGs[0], + ' ss = %#x' % self.SegSs[0], + ' eflags = %#x' % self.EFlags[0], + ' dr0 = %#x' % self.Dr0[0], + ' dr1 = %#x' % self.Dr1[0], + ' dr2 = %#x' % self.Dr2[0], + ' dr3 = %#x' % self.Dr3[0], + ' dr6 = %#x' % self.Dr6[0], + ' dr7 = %#x' % self.Dr7[0], + ' rax = %#x' % self.Rax[0], + ' rcx = %#x' % self.Rcx[0], + ' rdx = %#x' % self.Rdx[0], + ' rbx = %#x' % self.Rbx[0], + ' rsp = %#x' % self.Rsp[0], + ' rbp = %#x' % self.Rbp[0], + ' rsi = %#x' % self.Rsi[0], + ' rdi = %#x' % self.Rdi[0], + ' r8 = %#x' % self.R8[0], + ' r9 = %#x' % self.R9[0], + ' r10 = %#x' % self.R10[0], + ' r11 = %#x' % self.R11[0], + ' r12 = %#x' % self.R12[0], + ' r13 = %#x' % self.R13[0], + ' r14 = %#x' % self.R14[0], + ' r15 = %#x' % self.R15[0], + ' rip = %#x' % self.Rip[0], + ]) + +processorArchitecture = Enumeration({ + "PROCESSOR_ARCHITECTURE_X86" : 0, + "PROCESSOR_ARCHITECTURE_MIPS" : 1, + "PROCESSOR_ARCHITECTURE_ALPHA" : 2, + "PROCESSOR_ARCHITECTURE_PPC" : 3, + "PROCESSOR_ARCHITECTURE_SHX" : 4, + "PROCESSOR_ARCHITECTURE_ARM" : 5, + "PROCESSOR_ARCHITECTURE_IA64" : 6, + "PROCESSOR_ARCHITECTURE_ALPHA64" : 7, + "PROCESSOR_ARCHITECTURE_MSIL" : 8, + "PROCESSOR_ARCHITECTURE_AMD64" : 9, + "PROCESSOR_ARCHITECTURE_X86_WIN64" : 10, + "PROCESSOR_ARCHITECTURE_UNKNOWN" : 0xffff, +}) + +class Thread(CStruct): + """MINIDUMP_THREAD + https://msdn.microsoft.com/en-us/library/ms680517(v=vs.85).aspx + """ + + arch2context_cls = { + processorArchitecture.PROCESSOR_ARCHITECTURE_X86: Context_x86, + processorArchitecture.PROCESSOR_ARCHITECTURE_AMD64: Context_AMD64, + } + + def parse_context(self, content, offset): + self.loc_desc = LocationDescriptor.unpack(content, offset, self.parent_head) + + # Use the correct context depending on architecture + systeminfo = self.parent_head.systeminfo + context_cls = self.arch2context_cls.get(systeminfo.ProcessorArchitecture, + None) + if context_cls is None: + raise ValueError("Unsupported architecture: %s" % systeminfo.pretty_processor_architecture) + + ctxt = context_cls.unpack(content, self.loc_desc.Rva.rva, self.parent_head) + fake_loc_descriptor = LocationDescriptor(DataSize=0, Rva=Rva(rva=0)) + return ctxt, offset + len(fake_loc_descriptor) + + _fields = [("ThreadId", "u32"), + ("SuspendCount", "u32"), + ("PriorityClass", "u32"), + ("Priority", "u32"), + ("Teb", "u64"), + ("Stack", "MemoryDescriptor"), + ("ThreadContext", (parse_context, + lambda thread, value: NotImplemented)), + ] + def dump(self): + return '\n'.join([ + 'MDRawThread', + ' thread_id = %#x' % self.ThreadId, + ' suspend_count = %d' % self.SuspendCount, + ' priority_class = %#x' % self.PriorityClass, + ' priority = %#x' % self.Priority, + ' teb = %#x' % self.Teb, + ' stack.start_of_memory_range = %#x' % self.Stack.StartOfMemoryRange, + ' stack.memory.data_size = %#x' % self.Stack.Memory.DataSize, + ' stack.memory.rva = %#x' % self.Stack.Memory.Rva.rva, + ' thread_context.data_size = %#x' % self.loc_desc.DataSize, + ' thread_context.rva = %#x' % self.loc_desc.Rva.rva, + ]) + +class ThreadList(CStruct): + """MINIDUMP_THREAD_LIST + https://msdn.microsoft.com/en-us/library/ms680515(v=vs.85).aspx + """ + _fields = [("NumberOfThreads", "u32"), + ("Threads", "Thread", + lambda mlist: mlist.NumberOfThreads), + ] + +class ThreadListWithPadding(CStruct): + """MINIDUMP_THREAD_LIST may have 4 bytes padding + https://chromium.googlesource.com/breakpad/breakpad/+/master/src/processor/minidump.cc + cf. function MinidumpThreadList::Read + """ + _fields = [("NumberOfThreads", "u32"), + ("Padding", "u32"), + ("Threads", "Thread", + lambda mlist: mlist.NumberOfThreads), + ] + + +class Exception(Thread): + _fields = [("ThreadId", "u32"), + ("A", "u32"), + ("ExceptionCode", "u32"), + ("ExceptionFlags", "u32"), + ("ExceptionRecord", "u64"), + ("ExceptionAddress", "u64"), + ("NumberParameters", "u32"), + ("Align", "u32"), + ("ExceptionInformation", "u64", lambda _:15), + ("ThreadContext", (Thread.parse_context, + lambda thread, value: NotImplemented)), + ] + def dump(self): + res = [ + 'MDException', + ' thread_id = %#x' % self.ThreadId, + ' exception_record.exception_code = %#x' % self.ExceptionCode, + ' exception_record.exception_flags = %#x' % self.ExceptionFlags, + ' exception_record.exception_record = %#x' % self.ExceptionRecord, + ' exception_record.exception_address = %#x' % self.ExceptionAddress, + ' exception_record.number_parameters = %d' % self.NumberParameters, + ] + for i in range(self.NumberParameters): + res.append(' exception_record.exception_information[%2d] = %#x' % (i, self.ExceptionInformation[i])) + res.extend([ + ' thread_context.data_size = %d' % self.loc_desc.DataSize, + ' thread_context.rva = %#x' % self.loc_desc.Rva.rva, + ]) + return '\n'.join(res) + +class MDSystemTime(CStruct): + _fields = [("Year","u16"), + ("Month","u16"), + ("DayOfTheWeek","u16"), + ("Day","u16"), + ("Hour","u16"), + ("Minute","u16"), + ("Second","u16"), + ("Milliseconds","u16"), + ] + def dump(self): + return '%04d-%02d-%02d (%d) %02d:%02d:%02d.%03d' % (self.Year, self.Month, self.Day, self.DayOfTheWeek, self.Hour, self.Minute, self.Second, self.Milliseconds) + +class MDTimeZoneInformation(CStruct): + _fields = [("Bias","s32"), + ("StandardName","64s"), # utf-16 + ("StandardDate","MDSystemTime"), + ("StandardBias","s32"), + ("DaylightTime","64s"), # utf-16 + ("DaylightDate","MDSystemTime"), + ("DaylightBias","s32"), + ] + +class MDXStateFeature(CStruct): + _fields = [("Offset","u32"), + ("Size","u32"), + ] + +class MDXStateConfigFeatureMscInfo(CStruct): + _fields = [("SizeOfInfo","u32"), + ("ContextSize","u32"), + ("EnabledFeatures","u64"), + ("Features","MDXStateFeature",lambda _:64), + ] + +MD_MISCINFO_FLAGS1_PROCESS_ID = 0x00000001 +MD_MISCINFO_FLAGS1_PROCESS_TIMES = 0x00000002 +MD_MISCINFO_FLAGS1_PROCESSOR_POWER_INFO = 0x00000004 +MD_MISCINFO_FLAGS1_PROCESS_INTEGRITY = 0x00000010 +MD_MISCINFO_FLAGS1_PROCESS_EXECUTE_FLAGS = 0x00000020 +MD_MISCINFO_FLAGS1_TIMEZONE = 0x00000040 +MD_MISCINFO_FLAGS1_PROTECTED_PROCESS = 0x00000080 +MD_MISCINFO_FLAGS1_BUILDSTRING = 0x00000100 +MD_MISCINFO_FLAGS1_PROCESS_COOKIE = 0x00000200 + +MD_MISCINFO_SIZE = 24 +MD_MISCINFO2_SIZE = 44 +MD_MISCINFO3_SIZE = 232 +MD_MISCINFO4_SIZE = 832 +MD_MISCINFO5_SIZE = 1364 + +class MiscInfo(CStruct): + _fields = [("SizeOfInfo","u32"), + # Version 1 fields + ("Flags1","u32"), + ("ProcessId","u32"), + ("ProcessCreateTime","u32"), + ("ProcessUserTime","u32"), + ("ProcessKernelTime","u32"), + # Version 2 fields + ("ProcessorMaxMhz","u32"), + ("ProcessorCurrentMhz","u32"), + ("ProcessorMhzLimit","u32"), + ("ProcessorMaxIdleState","u32"), + ("ProcessorCurrentIdleState","u32"), + # Version 3 fields + ("ProcessIntegrityLevel","u32"), + ("ProcessExecuteFlags","u32"), + ("ProtectedProcess","u32"), + ("TimeZoneId","u32"), + ("TimeZone","MDTimeZoneInformation"), + # Version 4 fields + ("BuildString","520s"), + ("DbgBldStr","80s"), + # Version 5 fields + ("XstateData","MDXStateConfigFeatureMscInfo"), + ("ProcessCookie","u32"), + ] + def process_execute_flags(self): + if self.Flags1 & MD_MISCINFO_FLAGS1_PROCESS_EXECUTE_FLAGS: + return '%#x' % self.ProcessExecuteFlags + else: + return '(invalid)' + process_execute_flags = property(process_execute_flags) + def dump(self): + res = [ + 'MDRawMiscInfo', + ' size_of_info = %d' % self.SizeOfInfo, + ' flags1 = %#x' % self.Flags1, + ' process_id = %d' % self.ProcessId, + ' process_create_time = %s' % time_str(self.ProcessCreateTime), + ' process_user_time = %s' % time_str(self.ProcessUserTime,zero=True), + ' process_kernel_time = %s' % time_str(self.ProcessKernelTime,zero=True), + ] + if self.SizeOfInfo > MD_MISCINFO_SIZE: res += [ + # Print version 2 fields + ' processor_max_mhz = %d' % self.ProcessorMaxMhz, + ' processor_current_mhz = %d' % self.ProcessorCurrentMhz, + ' processor_mhz_limit = %d' % self.ProcessorMhzLimit, + ' processor_max_idle_state = %d' % self.ProcessorMaxIdleState, + ' processor_current_idle_state = %d' % self.ProcessorCurrentIdleState, + ] + if self.SizeOfInfo > MD_MISCINFO2_SIZE: res += [ + # Print version 3 fields + ' process_integrity_level = %#x' % self.ProcessIntegrityLevel, + ' process_execute_flags = %s' % self.process_execute_flags, + ' protected_process = %d' % self.ProtectedProcess, + ' time_zone_id = %d' % self.TimeZoneId, + ' time_zone.bias = %d' % self.TimeZone.Bias, + ' time_zone.standard_name = %s' % self.TimeZone.StandardName.decode('utf-16').strip('\0'), + ' time_zone.standard_date = %s' % self.TimeZone.StandardDate.dump(), + ' time_zone.standard_bias = %d' % self.TimeZone.StandardBias, + ' time_zone.daylight_name = %s' % self.TimeZone.DaylightTime.decode('utf-16').strip('\0'), + ' time_zone.daylight_date = %s' % self.TimeZone.DaylightDate.dump(), + ' time_zone.daylight_bias = %d' % self.TimeZone.DaylightBias, + ] + if self.SizeOfInfo > MD_MISCINFO3_SIZE: res += [ + # Print version 4 fields + ' build_string = %s' % self.BuildString.decode('utf-16').strip('\0'), + ' dbg_bld_str = %s' % self.DbgBldStr.decode('utf-16').strip('\0'), + ] + if self.SizeOfInfo > MD_MISCINFO4_SIZE: res += [ + # Print version 5 fields + ' xstate_data.size_of_info = %d' % self.XstateData.SizeOfInfo, + ' xstate_data.context_size = %d' % self.XstateData.ContextSize, + ' xstate_data.enabled_features = %#x' % self.XstateData.EnabledFeatures, + ] + if self.SizeOfInfo > MD_MISCINFO4_SIZE and \ + self.XstateData.EnabledFeatures == 0: + res.append(' xstate_data.features[] = (empty)') + if self.SizeOfInfo > MD_MISCINFO4_SIZE: + res.append(' process_cookie = %d' % self.ProcessCookie) + return '\n'.join(res) + +class BreakpadAssertion(CStruct): + _fields = [("Expression","256s"), + ("Function","256s"), + ("File","256s"), + ("Line","u32"), + ("Type","u32"), + ] + def dump(self): + return '\n'.join([ + 'MDAssertion', + ' expression = %s' % self.Expression.decode('utf-16').strip('\0'), + ' function = %s' % self.Function.decode('utf-16').strip('\0'), + ' file = %s' % self.File.decode('utf-16').strip('\0'), + ' line = %d' % self.Line, + ' type = %d' % self.Type, + ]) + +MD_BREAKPAD_INFO_VALID_DUMP_THREAD_ID = 0x0001 +MD_BREAKPAD_INFO_VALID_REQUESTING_THREAD_ID = 0x0002 +class BreakpadRawInfo(CStruct): + _fields = [("Validity","u32"), + ("DumpThreadId","u32"), + ("RequestingThreadId","u32"), + ] + def dump_thread_id(self): + if self.Validity & MD_BREAKPAD_INFO_VALID_DUMP_THREAD_ID: + return '%#x' % self.DumpThreadId + else: + return '(invalid)' + dump_thread_id = property(dump_thread_id) + def requesting_thread_id(self): + if self.Validity & MD_BREAKPAD_INFO_VALID_REQUESTING_THREAD_ID: + return '%#x' % self.RequestingThreadId + else: + return '(invalid)' + requesting_thread_id = property(requesting_thread_id) + def dump(self): + return '\n'.join([ + 'MDRawBreakpadInfo', + ' validity = %#x' % self.Validity, + ' dump_thread_id = %s' % self.dump_thread_id, + ' requesting_thread_id = %s' % self.requesting_thread_id, + ]) + +MD_OS_WIN32S = 0 # VER_PLATFORM_WIN32s (Windows 3.1) +MD_OS_WIN32_WINDOWS = 1 # VER_PLATFORM_WIN32_WINDOWS (Windows 95-98-Me) +MD_OS_WIN32_NT = 2 # VER_PLATFORM_WIN32_NT (Windows NT, 2000+) +MD_OS_WIN32_CE = 3 # VER_PLATFORM_WIN32_CE, VER_PLATFORM_WIN32_HH (Windows CE, Windows Mobile, "Handheld") +# The following values are Breakpad-defined. +MD_OS_UNIX = 0x8000 # Generic Unix-ish +MD_OS_MAC_OS_X = 0x8101 # Mac OS X/Darwin +MD_OS_IOS = 0x8102 # iOS +MD_OS_LINUX = 0x8201 # Linux +MD_OS_SOLARIS = 0x8202 # Solaris +MD_OS_ANDROID = 0x8203 # Android +MD_OS_PS3 = 0x8204 # PS3 +MD_OS_NACL = 0x8205 # Native Client (NaCl) + +MD_CPU_ARCHITECTURE_X86 = 0 # PROCESSOR_ARCHITECTURE_INTEL +MD_CPU_ARCHITECTURE_MIPS = 1 # PROCESSOR_ARCHITECTURE_MIPS +MD_CPU_ARCHITECTURE_ALPHA = 2 # PROCESSOR_ARCHITECTURE_ALPHA +MD_CPU_ARCHITECTURE_PPC = 3 # PROCESSOR_ARCHITECTURE_PPC +MD_CPU_ARCHITECTURE_SHX = 4 # PROCESSOR_ARCHITECTURE_SHX (Super-H) +MD_CPU_ARCHITECTURE_ARM = 5 # PROCESSOR_ARCHITECTURE_ARM +MD_CPU_ARCHITECTURE_IA64 = 6 # PROCESSOR_ARCHITECTURE_IA64 +MD_CPU_ARCHITECTURE_ALPHA64 = 7 # PROCESSOR_ARCHITECTURE_ALPHA64 +MD_CPU_ARCHITECTURE_MSIL = 8 # PROCESSOR_ARCHITECTURE_MSIL (Microsoft Intermediate Language) +MD_CPU_ARCHITECTURE_AMD64 = 9 # PROCESSOR_ARCHITECTURE_AMD64 +MD_CPU_ARCHITECTURE_X86_WIN64 = 10 # PROCESSOR_ARCHITECTURE_IA32_ON_WIN64 (WoW64) +MD_CPU_ARCHITECTURE_SPARC = 0x8001 # Breakpad-defined value for SPARC +MD_CPU_ARCHITECTURE_PPC64 = 0x8002 # Breakpad-defined value for PPC64 +MD_CPU_ARCHITECTURE_ARM64 = 0x8003 # Breakpad-defined value for ARM64 +MD_CPU_ARCHITECTURE_MIPS64 = 0x8004 # Breakpad-defined value for MIPS64 +MD_CPU_ARCHITECTURE_UNKNOWN = 0xffff # PROCESSOR_ARCHITECTURE_UNKNOWN + + +class SystemInfo(CStruct): + """MINIDUMP_SYSTEM_INFO + https://msdn.microsoft.com/en-us/library/ms680396(v=vs.85).aspx + """ + _fields = [("ProcessorArchitecture", "u16"), + ("ProcessorLevel", "u16"), + ("ProcessorRevision", "u16"), + ("NumberOfProcessors", "u08"), + ("ProductType", "u08"), + ("MajorVersion", "u32"), + ("MinorVersion", "u32"), + ("BuildNumber", "u32"), + ("PlatformId", "u32"), + ("CSDVersionRva", "Rva"), + ("SuiteMask", "u16"), + ("Reserved2", "u16"), + ("ProcessorFeatures", "u64", lambda _: 3), + ] + # The following fields are x86-only + VendorId = property(lambda _:[ + _.ProcessorFeatures[0]&mask32, + _.ProcessorFeatures[0]>>32, + _.ProcessorFeatures[1]&mask32]) + VersionInformation = property(lambda _:_.ProcessorFeatures[1]>>32) + FeatureInformation = property(lambda _:_.ProcessorFeatures[2]&mask32) + AMDExtendedCpuFeatures = property(lambda _:_.ProcessorFeatures[2]>>32) + # The following fields are arm-only + Cpuid = property(lambda _:_.ProcessorFeatures[0]&mask32) + ElfHwcaps = property(lambda _:_.ProcessorFeatures[0]>>32) # Linux-specific + + def pretty_processor_architecture(self): + return processorArchitecture[self.ProcessorArchitecture] + pretty_processor_architecture = property(pretty_processor_architecture) + + def csd_version(self): + return MinidumpString.unpack(self.parent_head._content, + off = self.CSDVersionRva.rva, + parent_head = self.parent_head) + csd_version = property(csd_version) + + def cpu_vendor(self): + if self.ProcessorArchitecture in (MD_CPU_ARCHITECTURE_X86, + MD_CPU_ARCHITECTURE_X86_WIN64): + import struct + return '"'+struct.pack(" Memory information + self.build_memory() + self.virt = ContentVirtual(self) + + def parse_content(self): + """Build structures corresponding to current content""" + + # Header + offset = 0 + self.minidumpHDR = mp.MinidumpHDR.unpack(self._content, offset, self) + assert self.minidumpHDR.Magic == 0x504d444d + + # Streams + base_offset = self.minidumpHDR.StreamDirectoryRva.rva + empty_stream = mp.StreamDirectory(StreamType=0, + Location=mp.LocationDescriptor(DataSize=0, + Rva=mp.Rva(rva=0) + ) + ) + streamdir_size = len(empty_stream) + for i in range(self.minidumpHDR.NumberOfStreams): + stream_offset = base_offset + i * streamdir_size + stream = mp.StreamDirectory.unpack(self._content, stream_offset, self) + self.streams.append(stream) + + # Launch specific action depending on the stream + datasize = stream.Location.DataSize + offset = stream.Location.Rva.rva + if stream.StreamType == mp.streamType.ModuleListStream: + self.modulelist = mp.ModuleList.unpack(self._content, offset, self) + if datasize == 8+self.modulelist.NumberOfModules*108: + self.modulelist = mp.ModuleListWithPadding.unpack(self._content, offset, self) + elif stream.StreamType == mp.streamType.MemoryListStream: + self.memorylist = mp.MemoryList.unpack(self._content, offset, self) + if datasize == 8+self.memorylist.NumberOfMemoryRanges*16: + self.memorylist = mp.MemoryListWithPadding.unpack(self._content, offset, self) + elif stream.StreamType == mp.streamType.Memory64ListStream: + self.memory64list = mp.Memory64List.unpack(self._content, offset, self) + elif stream.StreamType == mp.streamType.MemoryInfoListStream: + self.memoryinfolist = mp.MemoryInfoList.unpack(self._content, offset, self) + elif stream.StreamType == mp.streamType.SystemInfoStream: + self.systeminfo = mp.SystemInfo.unpack(self._content, offset, self) + elif stream.StreamType == mp.streamType.MiscInfoStream: + self.miscinfo = mp.MiscInfo.unpack(self._content, offset, self) + # Breakpad extension types + elif stream.StreamType == mp.MDminidumpType.MD_ASSERTION_INFO_STREAM: + self.breakpad_assertion = mp.BreakpadAssertion.unpack(self._content, offset, self) + elif stream.StreamType == mp.MDminidumpType.MD_BREAKPAD_INFO_STREAM: + self.breakpad_info = mp.BreakpadRawInfo.unpack(self._content, offset, self) + + # Some streams need the SystemInfo stream to work + if self.systeminfo is None: + return + for stream in self.streams: + datasize = stream.Location.DataSize + offset = stream.Location.Rva.rva + if stream.StreamType == mp.streamType.ThreadListStream: + self.threads = mp.ThreadList.unpack(self._content, offset, self) + if datasize == 8+self.threads.NumberOfThreads*48: + self.threads = mp.ThreadListWithPadding.unpack(self._content, offset, self) + elif stream.StreamType == mp.streamType.ExceptionStream: + self.exception = mp.Exception.unpack(self._content, offset, self) + + + def build_memory(self): + """Build an easier to use memory view based on ModuleList and + Memory64List streams""" + + addr2module = {} + if self.modulelist: + for module in self.modulelist.Modules: + addr2module[module.BaseOfImage] = module + addr2meminfo = {} + if self.memoryinfolist: + for memory in self.memoryinfolist.MemoryInfos: + addr2meminfo[memory.BaseAddress] = memory + + mode64 = self.minidumpHDR.Flags & mp.minidumpType.MiniDumpWithFullMemory + + if mode64: + offset = self.memory64list.BaseRva + memranges = self.memory64list.MemoryRanges + else: + memranges = self.memorylist.MemoryRanges + + for memory in memranges: + if not mode64: + offset = memory.Memory.Rva.rva + + # Create a MemorySegment with augmented information + base_address = memory.StartOfMemoryRange + module = addr2module.get(base_address, None) + meminfo = addr2meminfo.get(base_address, None) + self.memory[base_address] = MemorySegment(offset, memory, + module, meminfo) + + if mode64: + offset += memory.DataSize + + # Sanity check + if mode64: + assert not False in [addr in self.memory for addr in addr2module] + + def get(self, virt_start, virt_stop): + """Return the content at the (virtual addresses) + [virt_start:virt_stop]""" + + # Find the corresponding memory segment + for addr in self.memory: + if virt_start <= addr <= virt_stop: + break + else: + return "" + + memory = self.memory[addr] + shift = addr - virt_start + last = virt_stop - addr + if last > memory.size: + raise RuntimeError("Multi-page not implemented") + + return self._content[memory.offset + shift:memory.offset + last] + + def dump(self): + """ + Same output as minidump_dump from + https://chromium.googlesource.com/breakpad/breakpad + """ + res = [ self.minidumpHDR.dump() ] + streams_by_type = {} # Duplicates will not be shown + for i, s in enumerate(self.streams): + streams_by_type[s.StreamType] = (i, s) + res.extend(["", "mDirectory[%d]"%i, s.dump()]) + res.append("\nStreams:") + for t in sorted(streams_by_type.keys()): + i, s = streams_by_type[t] + res.append(" stream type %s at index %d" % (s.type_with_name, i)) + res.extend(["", + "MinidumpThreadList", + " thread_count = %d" % self.threads.NumberOfThreads]) + for i, t in enumerate(self.threads.Threads): + res.extend(["", + "thread[%d]"%i, + t.dump(), + "", + t.ThreadContext.dump(), + "", + "Stack", + self.memory[t.Stack.StartOfMemoryRange].dump(), + ]) + res.extend(["", + "MinidumpModuleList", + " module_count = %d" % self.modulelist.NumberOfModules]) + for i, m in enumerate(self.modulelist.Modules): + res.extend(["", + "module[%d]"%i, + m.dump(), + m.dump_other(), + ]) + res.extend(["", + "MinidumpMemoryList", + " region_count = %d" % self.memorylist.NumberOfMemoryRanges]) + for i, m in enumerate(self.memorylist.MemoryRanges): + res.extend(["", + "region[%d]"%i, + m.dump(), + "Memory", + self.memory[m.StartOfMemoryRange].dump(), + ]) + if hasattr(self, 'exception'): + res.extend(["", + self.exception.dump(), + "", + self.exception.ThreadContext.dump(), + ]) + if hasattr(self, 'breakpad_assertion'): + res.extend(["",self.breakpad_assertion.dump()]) + res.extend(["",self.systeminfo.dump(),""]) + if hasattr(self, 'miscinfo'): + res.extend([self.miscinfo.dump(),""]) + if hasattr(self, 'breakpad_info'): + res.extend([self.breakpad_info.dump(),""]) + return '\n'.join(res) + +class ContentVirtual(object): + """ Stub for binary.py """ + def __init__(self, minidump): + self.parent = minidump + def max_addr(self): + ad = -1 + for memory in self.parent.memory.values(): + ad = max(ad, memory.address+memory.size) + return ad + +if __name__ == "__main__": + for file in sys.argv[1:]: + if len(sys.argv) > 2: print("File: %s"%file) + fd = open(file, 'rb') + try: + raw = fd.read() + finally: + fd.close() + e = Minidump(raw) + print(e.dump()) diff --git a/elfesteem/new_cstruct.py b/elfesteem/new_cstruct.py index 1287b02..9baaa56 100644 --- a/elfesteem/new_cstruct.py +++ b/elfesteem/new_cstruct.py @@ -3,34 +3,38 @@ import struct import re -type2realtype = {} +# To be compatible with python 2 and python 3 +data_empty = struct.pack("") +data_null = struct.pack("B",0) + +type_size = {} size2type = {} size2type_s = {} for t in 'B', 'H', 'I', 'Q': s = struct.calcsize(t) - type2realtype[t] = s*8 + type_size[t] = s*8 size2type[s*8] = t for t in 'b', 'h', 'i', 'q': s = struct.calcsize(t) - type2realtype[t] = s*8 + type_size[t] = s*8 size2type_s[s*8] = t -type2realtype['u08'] = size2type[8] -type2realtype['u16'] = size2type[16] -type2realtype['u32'] = size2type[32] -type2realtype['u64'] = size2type[64] +type_size['u08'] = size2type[8] +type_size['u16'] = size2type[16] +type_size['u32'] = size2type[32] +type_size['u64'] = size2type[64] -type2realtype['s08'] = size2type_s[8] -type2realtype['s16'] = size2type_s[16] -type2realtype['s32'] = size2type_s[32] -type2realtype['s64'] = size2type_s[64] +type_size['s08'] = size2type_s[8] +type_size['s16'] = size2type_s[16] +type_size['s32'] = size2type_s[32] +type_size['s64'] = size2type_s[64] -type2realtype['d'] = 'd' -type2realtype['f'] = 'f' -type2realtype['q'] = 'q' -type2realtype['ptr'] = 'ptr' +type_size['d'] = 'd' +type_size['f'] = 'f' +type_size['q'] = 'q' +type_size['ptr'] = 'ptr' sex_types = {0:'<', 1:'>'} @@ -41,10 +45,10 @@ def fix_size(fields, wsize): pass elif v == "ptr": v = size2type[wsize] - elif not v in type2realtype: + elif not v in type_size: raise ValueError("unkown Cstruct type", v) else: - v = type2realtype[v] + v = type_size[v] out.append((name, v)) fields = out return fields @@ -52,25 +56,28 @@ def fix_size(fields, wsize): def real_fmt(fmt, wsize): if fmt == "ptr": v = size2type[wsize] - elif fmt in type2realtype: - v = type2realtype[fmt] + elif fmt in type_size: + v = type_size[fmt] else: v = fmt return v all_cstructs = {} class Cstruct_Metaclass(type): - field_suffix = "_value" + _prefix = "_field_" def __new__(cls, name, bases, dct): + if name == 'CStructBase': + o = type.__new__(cls, name, bases, dct) + return o for fields in dct['_fields']: fname = fields[0] if fname in ['parent', 'parent_head']: raise ValueError('field name will confuse internal structs', repr(fname)) dct[fname] = property(dct.pop("get_"+fname, - lambda self,fname=fname: getattr(self,fname+self.__class__.field_suffix)), + lambda self,fname=fname: getattr(self,cls._prefix+fname)), dct.pop("set_"+fname, - lambda self,v,fname=fname: setattr(self,fname+self.__class__.field_suffix,v)), + lambda self,v,fname=fname: setattr(self,cls._prefix+fname,v)), dct.pop("del_"+fname, None)) @@ -81,7 +88,7 @@ def __new__(cls, name, bases, dct): return o def unpack_l(cls, s, off = 0, parent_head = None, _sex=None, _wsize=None): - if _sex == None and _wsize == None: + if _sex is None and _wsize is None: # get sex and size from parent if parent_head: _sex = parent_head._sex @@ -90,18 +97,19 @@ def unpack_l(cls, s, off = 0, parent_head = None, _sex=None, _wsize=None): _sex = 0 _wsize = 32 c = cls(_sex = _sex, _wsize = _wsize) - if parent_head == None: + if parent_head is None: parent_head = c c.parent_head = parent_head of1 = off for field in c._fields: cpt = None + of2 = of1 if len(field) == 2: fname, ffmt = field elif len(field) == 3: fname, ffmt, cpt = field - if ffmt in type2realtype or (isinstance(ffmt, str) and re.match(r'\d+s', ffmt)): + if ffmt in type_size or (isinstance(ffmt, str) and re.match(r'\d+s', ffmt)): # basic types if cpt: value = [] @@ -117,7 +125,7 @@ def unpack_l(cls, s, off = 0, parent_head = None, _sex=None, _wsize=None): of2 = of1+struct.calcsize(fmt) value = struct.unpack(c.sex+fmt, s[of1:of2])[0] elif ffmt == "sz": # null terminated special case - of2 = s.find('\x00', of1) + of2 = s.find(data_null, of1) if of2 == -1: raise ValueError('no null char in string!') of2 += 1 @@ -144,7 +152,7 @@ def unpack_l(cls, s, off = 0, parent_head = None, _sex=None, _wsize=None): else: raise ValueError('unknown class', ffmt) of1 = of2 - setattr(c, fname+c.__class__.field_suffix, value) + setattr(c, CStruct._prefix+fname, value) return c, of2-off @@ -153,24 +161,23 @@ def unpack(cls, s, off = 0, parent_head = None, _sex=None, _wsize=None): parent_head = parent_head, _sex=_sex, _wsize=_wsize) return c -class CStruct(object): - __metaclass__ = Cstruct_Metaclass +CStructBase = Cstruct_Metaclass('CStructBase', (object,), {}) +class CStruct(CStructBase): _packformat = "" _fields = [] def __init__(self, parent_head = None, _sex = None, _wsize = None, **kargs): self.parent_head = parent_head - self._size = None kargs = dict(kargs) #if not sex or size: get the one of the parent - if _sex == None and _wsize == None: + if _sex is None and _wsize is None: if parent_head: _sex = parent_head._sex _wsize = parent_head._wsize else: # else default sex & size _sex = 0 - _size = 32 + _wsize = 32 self.sex = _sex self.wsize = _wsize if self._packformat: @@ -178,13 +185,13 @@ def __init__(self, parent_head = None, _sex = None, _wsize = None, **kargs): else: self.sex = sex_types[_sex] for f in self._fields: - setattr(self, f[0]+self.__class__.field_suffix, None) + setattr(self, CStruct._prefix+f[0], None) if kargs: for k, v in kargs.items(): - self.__dict__[k+self.__class__.field_suffix] = v + self.__dict__[CStruct._prefix+k] = v def pack(self): - out = '' + out = data_empty for field in self._fields: cpt = None if len(field) == 2: @@ -192,33 +199,33 @@ def pack(self): elif len(field) == 3: fname, ffmt, cpt = field - value = getattr(self, fname+self.__class__.field_suffix) - if ffmt in type2realtype or (isinstance(ffmt, str) and re.match(r'\d+s', ffmt)): + value = getattr(self, CStruct._prefix+fname) + if ffmt in type_size or (isinstance(ffmt, str) and re.match(r'\d+s', ffmt)): # basic types fmt = real_fmt(ffmt, self.wsize) - if cpt == None: - if value == None: - o = struct.calcsize(fmt)*"\x00" + if cpt is None: + if value is None: + o = struct.calcsize(fmt)*data_null else: o = struct.pack(self.sex+fmt, value) else: - o = "" + o = data_empty for v in value: - if value == None: - o += struct.calcsize(fmt)*"\x00" + if value is None: + o += struct.calcsize(fmt)*data_null else: o += struct.pack(self.sex+fmt, v) elif ffmt == "sz": # null terminated special case - o = value+'\x00' + o = value+data_null elif ffmt in all_cstructs: # sub structures - if cpt == None: - o = str(value) + if cpt is None: + o = value.pack() else: - o = "" + o = data_empty for v in value: - o += str(v) + o += v.pack() elif isinstance(ffmt, tuple): f_get, f_set = ffmt o = f_set(self, value) @@ -229,12 +236,13 @@ def pack(self): return out - def __str__(self): - return self.pack() - def __len__(self): return len(self.pack()) + def __str__(self): + raise AttributeError("Use pack() instead of str()") + return self.pack() + def __repr__(self): return "<%s=%s>" % (self.__class__.__name__, "/".join(map(lambda x:repr(getattr(self,x[0])),self._fields))) @@ -282,12 +290,12 @@ class c4(CStruct): lambda c, value:c.sets(value))), ("f", "u16"), ] - def gets(cls, s, of): + def gets(self, s, of): i = 0 while s[of+i] != "\x00": i+=1 return s[of:of+i], of+i+1 - def sets(cls, value): + def sets(self, value): return str(value)+'\x00' """ @@ -307,24 +315,24 @@ class c6(CStruct): ("k", "u16"), ] - print all_cstructs + print(all_cstructs) s1 = struct.pack('HHI', 1111, 2222, 333333333) c = c1.unpack(s1) - print repr(c) + print(repr(c)) assert len(c) == 8 - s2 = str(c) + s2 = c.pack() assert s1 == s2 - print repr(s2) - print repr(c1.unpack(s2)) + print(repr(s2)) + print(repr(c1.unpack(s2))) s3 = struct.pack('HHI', 4444, 5555, 666666666)+s2 - print repr(s3) + print(repr(s3)) assert len(s3) == 16 c = c2.unpack(s3) - print repr(c) - s4 = str(c) - print repr(s3), repr(s4) + print(repr(c)) + s4 = c.pack() + print("%r %r"%(s3,s4)) assert s3 == s4 assert c.c2_c.parent_head == c @@ -332,40 +340,40 @@ class c6(CStruct): s5 = struct.pack('HHH', 2, 5555, 6666)+s1*2+struct.pack('H', 9999) c = c3.unpack(s5) assert len(c) == 24 - print repr(c) - print c.b - print c.c - print c.c[0].c1_field1 + print(repr(c)) + print(c.b) + print(c.c) + print(c.c[0].c1_field1) - s6 = str(c) - print repr(s5), repr(s6) + s6 = c.pack() + print("%r %r"%(s5,s6)) assert s5 == s6 c = c1() c.c1_field1 = 1111 c.c1_field2 = 2222 c.c1_field3 = 333333333 - assert str(c) == s1 + assert c.pack() == s1 s7 = struct.pack('H', 8888)+"fffff\x00"+struct.pack('H', 9999) c = c4.unpack(s7) - print repr(c) - print repr(c.e) - print repr(c.f) + print(repr(c)) + print(repr(c.e)) + print(repr(c.f)) - print repr(s7) - print repr(str(c)) - assert s7 == str(c) + print(repr(s7)) + print(repr(c.pack())) + assert s7 == c.pack() s8 = struct.pack('H4s', 8888, "abcd") c = c5.unpack(s8) - print repr(c) - assert s8 == str(c) + print(repr(c)) + assert s8 == c.pack() s9 = struct.pack('H', 9999)+ "toto\x00" + struct.pack('H', 1010) - print repr(s9) + print(repr(s9)) c = c6.unpack(s9) - print repr(c), repr(str(c)) - assert s9 == str(c) + print("%r %r"%(c,c.pack())) + assert s9 == c.pack() diff --git a/elfesteem/pe.py b/elfesteem/pe.py index 95b95fa..3cf1fe5 100755 --- a/elfesteem/pe.py +++ b/elfesteem/pe.py @@ -1,17 +1,506 @@ #! /usr/bin/env python -from new_cstruct import CStruct -from strpatchwork import StrPatchwork +from elfesteem.cstruct import Constants, CBase, CString, CStruct, CArray +from elfesteem.cstruct import data_null, data_empty +from elfesteem.cstruct import bytes_to_name +from elfesteem.strpatchwork import StrPatchwork import struct import logging -from collections import defaultdict -log = logging.getLogger("pepy") +log = logging.getLogger("pe") console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) log.addHandler(console_handler) -log.setLevel(logging.WARN) - -class Doshdr(CStruct): +log.setLevel(logging.INFO) + +import sys +if sys.version_info[0:2] == (2, 3): + from elfesteem.compatibility_python23 import sorted + +constants = {} +def SetConstants(**kargs): + Constants(globs = globals(), table = constants, **kargs) + +SetConstants( +DIRECTORY_ENTRY_EXPORT = 0, +DIRECTORY_ENTRY_IMPORT = 1, +DIRECTORY_ENTRY_RESOURCE = 2, +DIRECTORY_ENTRY_EXCEPTION = 3, +DIRECTORY_ENTRY_SECURITY = 4, +DIRECTORY_ENTRY_BASERELOC = 5, +DIRECTORY_ENTRY_DEBUG = 6, +DIRECTORY_ENTRY_COPYRIGHT = 7, +DIRECTORY_ENTRY_GLOBALPTR = 8, +DIRECTORY_ENTRY_TLS = 9, +DIRECTORY_ENTRY_LOAD_CONFIG = 10, +DIRECTORY_ENTRY_BOUND_IMPORT = 11, +DIRECTORY_ENTRY_IAT = 12, +DIRECTORY_ENTRY_DELAY_IMPORT = 13, +DIRECTORY_ENTRY_COM_DESCRIPTOR = 14, +DIRECTORY_ENTRY_RESERVED = 15, +) + +SetConstants( +RT_CURSOR = 1, +RT_BITMAP = 2, +RT_ICON = 3, +RT_MENU = 4, +RT_DIALOG = 5, +RT_STRING = 6, +RT_FONTDIR = 7, +RT_FONT = 8, +RT_ACCELERATOR = 9, +RT_RCDATA = 10, +RT_MESSAGETABLE = 11, +RT_GROUP_CURSOR = 12, +RT_GROUP_ICON = 14, +RT_VERSION = 16, +RT_DLGINCLUDE = 17, +RT_PLUGPLAY = 19, +RT_VXD = 20, +RT_ANICURSOR = 21, +RT_ANIICON = 22, +RT_HTML = 23, +RT_MANIFEST = 24, +) + +SetConstants( +# Constants, e.g. from http://llvm.org/docs/doxygen/html/namespacellvm_1_1COFF.html +# plus the ones known by pefile.py, and some other +IMAGE_FILE_MACHINE_UNKNOWN = 0x0, +IMAGE_FILE_MACHINE_AM33_LLVM = 0x13, +IMAGE_FILE_MACHINE_TI = 0xC2, +IMAGE_FILE_MACHINE_MIPSIII = 0x142, +IMAGE_FILE_MACHINE_iAPX286SMALL = 0x14A, +IMAGE_FILE_MACHINE_I386 = 0x14C, +IMAGE_FILE_MACHINE_I860 = 0x14D, +IMAGE_FILE_MACHINE_mc68k = 0x150, +IMAGE_FILE_MACHINE_iAPX286LARGE = 0x152, +IMAGE_FILE_MACHINE_MIPSEB = 0x160, +IMAGE_FILE_MACHINE_R3000 = 0x162, +IMAGE_FILE_MACHINE_R4000 = 0x166, +IMAGE_FILE_MACHINE_R10000 = 0x168, +IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169 , +IMAGE_FILE_MACHINE_WE32000 = 0x170, +IMAGE_FILE_MACHINE_I386_BIS = 0x175, +IMAGE_FILE_MACHINE_CLIPPER = 0x17F, +IMAGE_FILE_MACHINE_ALPHA_O = 0x183, # OSF1/Tru64 Object file +IMAGE_FILE_MACHINE_ALPHA_PE = 0x184, # Windows NT PE for Alpha +IMAGE_FILE_MACHINE_ALPHA_Z = 0x188, # OSF1/Tru64 Compressed object file +IMAGE_FILE_MACHINE_ALPHA_U = 0x18F, # OSF1/Tru64 Ucode object file. Obsolete +IMAGE_FILE_MACHINE_APOLLOA88K= 0x194, +IMAGE_FILE_MACHINE_APOLLOM68K= 0x197, +IMAGE_FILE_MACHINE_SH3 = 0x1A2, +IMAGE_FILE_MACHINE_SH3DSP = 0x1A3, +IMAGE_FILE_MACHINE_SH3E = 0x1A4, +IMAGE_FILE_MACHINE_SH4 = 0x1A6, +IMAGE_FILE_MACHINE_SH5 = 0x1A8, +IMAGE_FILE_MACHINE_ARM = 0x1C0, +IMAGE_FILE_MACHINE_THUMB = 0x1C2, +IMAGE_FILE_MACHINE_ARMV7 = 0x1C4, +IMAGE_FILE_MACHINE_ARMNT = 0x1C4, # same +IMAGE_FILE_MACHINE_AM33 = 0x1D3, +IMAGE_FILE_MACHINE_XCOFF32 = 0x1DF, +IMAGE_FILE_MACHINE_POWERPC = 0x1F0, +IMAGE_FILE_MACHINE_POWERPCFP = 0x1F1, +IMAGE_FILE_MACHINE_XCOFF64 = 0x1F7, +IMAGE_FILE_MACHINE_IA64 = 0x200, +IMAGE_FILE_MACHINE_MIPS16 = 0x266, +IMAGE_FILE_MACHINE_ALPHA64 = 0x284, +IMAGE_FILE_MACHINE_AXP64 = 0x284, # same +IMAGE_FILE_MACHINE_MIPSFPU = 0x366, +IMAGE_FILE_MACHINE_MIPSFPU16 = 0x466, +IMAGE_FILE_MACHINE_TRICORE = 0x520, +IMAGE_FILE_MACHINE_CEF = 0xCEF, +IMAGE_FILE_MACHINE_EBC = 0xEBC, +IMAGE_FILE_MACHINE_AMD64 = 0x8664, +IMAGE_FILE_MACHINE_M32R = 0x9041, +IMAGE_FILE_MACHINE_ARM64 = 0xAA64, +IMAGE_FILE_MACHINE_CEE = 0xC0EE, +no_name = ('IMAGE_FILE_MACHINE_ARMNT', 'IMAGE_FILE_MACHINE_AXP64') +) + +SetConstants( +IMAGE_FILE_FLAG_RELOCS_STRIPPED = 0x0001, +IMAGE_FILE_FLAG_EXECUTABLE_IMAGE = 0x0002, +IMAGE_FILE_FLAG_LINE_NUMS_STRIPPED = 0x0004, +IMAGE_FILE_FLAG_LOCAL_SYMS_STRIPPED = 0x0008, +IMAGE_FILE_FLAG_AGGRESSIVE_WS_TRIM = 0x0010, +IMAGE_FILE_FLAG_LARGE_ADDRESS_AWARE = 0x0020, +IMAGE_FILE_FLAG_BYTES_REVERSED_LO = 0x0080, +IMAGE_FILE_FLAG_32BIT_MACHINE = 0x0100, +IMAGE_FILE_FLAG_DEBUG_STRIPPED = 0x0200, +IMAGE_FILE_FLAG_REMOVABLE_RUN_FROM_SWAP = 0x0400, +IMAGE_FILE_FLAG_NET_RUN_FROM_SWAP = 0x0800, +IMAGE_FILE_FLAG_SYSTEM = 0x1000, +IMAGE_FILE_FLAG_DLL = 0x2000, +IMAGE_FILE_FLAG_UP_SYSTEM_ONLY = 0x4000, +IMAGE_FILE_FLAG_BYTES_REVERSED_HI = 0x8000, +) + +SetConstants( +IMAGE_SYM_CLASS_END_OF_FUNCTION = -1, +IMAGE_SYM_CLASS_NULL = 0, +IMAGE_SYM_CLASS_AUTOMATIC = 1, +IMAGE_SYM_CLASS_EXTERNAL = 2, +IMAGE_SYM_CLASS_STATIC = 3, +IMAGE_SYM_CLASS_REGISTER = 4, +IMAGE_SYM_CLASS_EXTERNAL_DEF = 5, +IMAGE_SYM_CLASS_LABEL = 6, +IMAGE_SYM_CLASS_UNDEFINED_LABEL = 7, +IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8, +IMAGE_SYM_CLASS_ARGUMENT = 9, +IMAGE_SYM_CLASS_STRUCT_TAG = 10, +IMAGE_SYM_CLASS_MEMBER_OF_UNION = 11, +IMAGE_SYM_CLASS_UNION_TAG = 12, +IMAGE_SYM_CLASS_TYPE_DEFINITION = 13, +IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14, +IMAGE_SYM_CLASS_ENUM_TAG = 15, +IMAGE_SYM_CLASS_MEMBER_OF_ENUM = 16, +IMAGE_SYM_CLASS_REGISTER_PARAM = 17, +IMAGE_SYM_CLASS_BIT_FIELD = 18, +IMAGE_SYM_CLASS_BLOCK = 100, +IMAGE_SYM_CLASS_FUNCTION = 101, +IMAGE_SYM_CLASS_END_OF_STRUCT = 102, +IMAGE_SYM_CLASS_FILE = 103, +IMAGE_SYM_CLASS_SECTION = 104, +IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105, +IMAGE_SYM_CLASS_CLR_TOKEN = 107, +) + +SetConstants( +IMAGE_SYM_TYPE_NULL = 0, +IMAGE_SYM_TYPE_VOID = 1, +IMAGE_SYM_TYPE_CHAR = 2, +IMAGE_SYM_TYPE_SHORT = 3, +IMAGE_SYM_TYPE_INT = 4, +IMAGE_SYM_TYPE_LONG = 5, +IMAGE_SYM_TYPE_FLOAT = 6, +IMAGE_SYM_TYPE_DOUBLE = 7, +IMAGE_SYM_TYPE_STRUCT = 8, +IMAGE_SYM_TYPE_UNION = 9, +IMAGE_SYM_TYPE_ENUM = 10, +IMAGE_SYM_TYPE_MOE = 11, +IMAGE_SYM_TYPE_BYTE = 12, +IMAGE_SYM_TYPE_WORD = 13, +IMAGE_SYM_TYPE_UINT = 14, +IMAGE_SYM_TYPE_DWORD = 15, +) + +SetConstants( +IMAGE_SYM_DTYPE_NULL = 0, +IMAGE_SYM_DTYPE_POINTER = 1, +IMAGE_SYM_DTYPE_FUNCTION = 2, +IMAGE_SYM_DTYPE_ARRAY = 3, +IMAGE_SYM_DTYPE_SCT_COMPLEX_TYPE_SHIFT = 4, +) + +# Official names of these constants in Windows +IMAGE_NT_OPTIONAL_HDR32_MAGIC = 0x10b +IMAGE_NT_OPTIONAL_HDR64_MAGIC = 0x20b +IMAGE_NT_OPTIONAL_HDR_ROM_MAGIC = 0x107 + +SetConstants( +# Better names, for consistency for COFF that are not in PE files +IMAGE_OPTIONAL_HDR_MAGIC_EXE32 = IMAGE_NT_OPTIONAL_HDR32_MAGIC, +IMAGE_OPTIONAL_HDR_MAGIC_EXE64 = IMAGE_NT_OPTIONAL_HDR64_MAGIC, +IMAGE_OPTIONAL_HDR_MAGIC_ROM = IMAGE_NT_OPTIONAL_HDR_ROM_MAGIC, +IMAGE_OPTIONAL_HDR_MAGIC_EXE_TI = 0x108, # TI COFF executables +) + +SetConstants( +prefix = 'STYP_', +# COFF section flags +STYP_DSECT = 0x00000001, # Dummy section +STYP_TEXT = 0x00000020, # Text only +STYP_DATA = 0x00000040, # Data only +STYP_BSS = 0x00000080, # Bss only +STYP_RDATA = 0x00000100, # Read-only data only +STYP_SDATA = 0x00000200, # Small data only +STYP_SBSS = 0x00000400, # Small bss only +STYP_UCODE = 0x00000800, # Obsolete +STYP_GOT = 0x00001000, # Global offset table +STYP_DYNAMIC = 0x00002000, # Dynamic linking information +STYP_DYNSYM = 0x00004000, # Dynamic linking symbol table +STYP_REL_DYN = 0x00008000, # Dynamic relocation information +STYP_DYNSTR = 0x00010000, # Dynamic linking symbol table +STYP_HASH = 0x00020000, # Dynamic symbol hash table +STYP_DSOLIST = 0x00040000, # Shared library dependency list +STYP_MSYM = 0x00080000, # Additional dynamic linking symbol table +STYP_CONFLICT = 0x00100000, # Additional dynamic linking information +STYP_FINI = 0x01000000, # Termination text only +STYP_COMMENT = 0x02000000, # Comment section +STYP_RCONST = 0x02200000, # Read-only constants +STYP_XDATA = 0x02400000, # Exception scope table +STYP_TLSDATA = 0x02500000, # Initialized TLS data +STYP_TLSBSS = 0x02600000, # Uninitialized TLS data +STYP_TLSINIT = 0x02700000, # Initialization for TLS data +STYP_PDATA = 0x02800000, # Exception procedure table +STYP_LITA = 0x04000000, # Address literals only +STYP_LIT8 = 0x08000000, # 8-byte literals only +STYP_EXTMASK = 0x0ff00000, # Identifies bits used for multiple bit flag values +STYP_LIT4 = 0x10000000, # 4-byte literals only +S_NRELOC_OVFL2 = 0x20000000, # Section header field s_nreloc has overflowed +STYP_INIT = 0x80000000, # Initialization text only +) + +SetConstants( +# PE section flags (somewhat compatible, with different names), +IMAGE_SCN_TYPE_NO_PAD = 0x00000008, +IMAGE_SCN_CNT_CODE = 0x00000020, +IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040, +IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080, +IMAGE_SCN_LNK_INFO = 0x00000200, +IMAGE_SCN_LNK_REMOVE = 0x00000800, +IMAGE_SCN_LNK_COMDAT = 0x00001000, +IMAGE_SCN_GPREL = 0x00008000, +IMAGE_SCN_ALIGN_1BYTES = 0x00010000, +IMAGE_SCN_ALIGN_2BYTES = 0x00020000, +IMAGE_SCN_ALIGN_4BYTES = 0x00030000, +IMAGE_SCN_ALIGN_8BYTES = 0x00040000, +IMAGE_SCN_ALIGN_16BYTES = 0x00050000, +IMAGE_SCN_ALIGN_32BYTES = 0x00060000, +IMAGE_SCN_ALIGN_64BYTES = 0x00070000, +IMAGE_SCN_ALIGN_128BYTES = 0x00080000, +IMAGE_SCN_ALIGN_256BYTES = 0x00090000, +IMAGE_SCN_ALIGN_512BYTES = 0x000A0000, +IMAGE_SCN_ALIGN_1024BYTES = 0x000B0000, +IMAGE_SCN_ALIGN_2048BYTES = 0x000C0000, +IMAGE_SCN_ALIGN_4096BYTES = 0x000D0000, +IMAGE_SCN_ALIGN_8192BYTES = 0x000E0000, +IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000, +IMAGE_SCN_MEM_DISCARDABLE = 0x02000000, +IMAGE_SCN_MEM_NOT_CACHED = 0x04000000, +IMAGE_SCN_MEM_NOT_PAGED = 0x08000000, +IMAGE_SCN_MEM_SHARED = 0x10000000, +IMAGE_SCN_MEM_EXECUTE = 0x20000000, +IMAGE_SCN_MEM_READ = 0x40000000, +IMAGE_SCN_MEM_WRITE = 0x80000000, +) + +SetConstants( +# subsytem, in NT headers +IMAGE_SUBSYSTEM_UNKNOWN = 0, +IMAGE_SUBSYSTEM_NATIVE = 1, # Doesn't require a subsystem (such as a device driver) +IMAGE_SUBSYSTEM_WINDOWS_GUI = 2, # Runs in the Windows GUI subsystem +IMAGE_SUBSYSTEM_WINDOWS_CUI = 3, # Runs in the Windows character subsystem (a console app) +IMAGE_SUBSYSTEM_OS2_CUI = 5, # Runs in the OS/2 character subsystem (OS/2 1.x apps only) +IMAGE_SUBSYSTEM_POSIX_CUI = 7, # Runs in the Posix character subsystem +IMAGE_SUBSYSTEM_NATIVE_WINDOWS = 8, # Native Win9x driver +IMAGE_SUBSYSTEM_WINDOWS_CE_GUI = 9, # Windows CE +IMAGE_SUBSYSTEM_EFI_APPLICATION = 10, +IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER = 11, +IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER = 12, +IMAGE_SUBSYSTEM_EFI_ROM = 13, +IMAGE_SUBSYSTEM_XBOX = 14, +IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION = 16, +) + +# Relocations + +SetConstants( +# The following relocation type indicators are defined for x64 and compatible processors +IMAGE_REL_AMD64_ABSOLUTE = 0x0000, # The relocation is ignored. +IMAGE_REL_AMD64_ADDR64 = 0x0001, # The 64-bit VA of the relocation target. +IMAGE_REL_AMD64_ADDR32 = 0x0002, # The 32-bit VA of the relocation target. +IMAGE_REL_AMD64_ADDR32NB = 0x0003, # The 32-bit address without an image base (RVA). +IMAGE_REL_AMD64_REL32 = 0x0004, # The 32-bit relative address from the byte following the relocation. +IMAGE_REL_AMD64_REL32_1 = 0x0005, # The 32-bit address relative to byte distance 1 from the relocation. +IMAGE_REL_AMD64_REL32_2 = 0x0006, # The 32-bit address relative to byte distance 2 from the relocation. +IMAGE_REL_AMD64_REL32_3 = 0x0007, # The 32-bit address relative to byte distance 3 from the relocation. +IMAGE_REL_AMD64_REL32_4 = 0x0008, # The 32-bit address relative to byte distance 4 from the relocation. +IMAGE_REL_AMD64_REL32_5 = 0x0009, # The 32-bit address relative to byte distance 5 from the relocation. +IMAGE_REL_AMD64_SECTION = 0x000A, # The 16-bit section index of the section that contains the target. This is used to support debugging information. +IMAGE_REL_AMD64_SECREL = 0x000B, # The 32-bit offset of the target from the beginning of its section. This is used to support debugging information and static thread local storage. +IMAGE_REL_AMD64_SECREL7 = 0x000C, # A 7-bit unsigned offset from the base of the section that contains the target. +IMAGE_REL_AMD64_TOKEN = 0x000D, # CLR tokens. +IMAGE_REL_AMD64_SREL32 = 0x000E, # A 32-bit signed span-dependent value emitted into the object. +IMAGE_REL_AMD64_PAIR = 0x000F, # A pair that must immediately follow every span-dependent value. +IMAGE_REL_AMD64_SSPAN32 = 0x0010, # A 32-bit signed span-dependent value that is applied at link time. +) + +SetConstants( +# The following relocation type indicators are defined for ARM processors. +IMAGE_REL_ARM_ABSOLUTE = 0x0000, # The relocation is ignored. +IMAGE_REL_ARM_ADDR32 = 0x0001, # The 32-bit VA of the target. +IMAGE_REL_ARM_ADDR32NB = 0x0002, # The 32-bit RVA of the target. +IMAGE_REL_ARM_BRANCH24 = 0x0003, # The 24-bit relative displacement to the target. +IMAGE_REL_ARM_BRANCH11 = 0x0004, # The reference to a subroutine call. The reference consists of two 16-bit instructions with 11-bit offsets. +IMAGE_REL_ARM_SECTION = 0x000E, # The 16-bit section index of the section that contains the target. This is used to support debugging information. +IMAGE_REL_ARM_SECREL = 0x000F, # The 32-bit offset of the target from the beginning of its section. This is used to support debugging information and static thread local storage. +IMAGE_REL_ARM_MOV32 = 0x0010, # The 32-bit VA of the target. This relocation is applied using a MOVW instruction for the low 16 bits followed by a MOVT for the high 16 bits. +IMAGE_REL_THUMB_MOV32 = 0x0011, # The 32-bit VA of the target. This relocation is applied using a MOVW instruction for the low 16 bits followed by a MOVT for the high 16 bits. +IMAGE_REL_THUMB_BRANCH20 = 0x0012, # The instruction is fixed up with the 21-bit relative displacement to the 2-byte aligned target. The least significant bit of the displacement is always zero and is not stored. This relocation corresponds to a Thumb-2 32-bit conditional B instruction. +IMAGE_REL_THUMB_BRANCH24 = 0x0014, # The instruction is fixed up with the 25-bit relative displacement to the 2-byte aligned target. The least significant bit of the displacement is zero and is not stored. This relocation corresponds to a Thumb-2 B instruction. +IMAGE_REL_THUMB_BLX23 = 0x0015, # The instruction is fixed up with the 25-bit relative displacement to the 4-byte aligned target. The low 2 bits of the displacement are zero and are not stored. This relocation corresponds to a Thumb-2 BLX instruction. +IMAGE_REL_ARM_PAIR = 0x0016, # The relocation is valid only when it immediately follows a ARM_REFHI or THUMB_REFHI. Its SymbolTableIndex contains a displacement and not an index into the symbol table. +) + +SetConstants( +# The following relocation type indicators are defined for ARM64 processors. +IMAGE_REL_ARM64_ABSOLUTE = 0x0000, # The relocation is ignored. +IMAGE_REL_ARM64_ADDR32 = 0x0001, # The 32-bit VA of the target. +IMAGE_REL_ARM64_ADDR32NB = 0x0002, # The 32-bit RVA of the target. +IMAGE_REL_ARM64_BRANCH26 = 0x0003, # The 26-bit relative displacement to the target, for B and BL instructions. +IMAGE_REL_ARM64_PAGEBASE_REL21 = 0x0004, # The page base of the target, for ADRP instruction. +IMAGE_REL_ARM64_REL21 = 0x0005, # The 12-bit relative displacement to the target, for instruction ADR +IMAGE_REL_ARM64_PAGEOFFSET_12A = 0x0006, # The 12-bit page offset of the target, for instructions ADD/ADDS (immediate) with zero shift. +IMAGE_REL_ARM64_PAGEOFFSET_12L = 0x0007, # The 12-bit page offset of the target, for instruction LDR (indexed, unsigned immediate). +IMAGE_REL_ARM64_SECREL = 0x0008, # The 32-bit offset of the target from the beginning of its section. This is used to support debugging information and static thread local storage. +IMAGE_REL_ARM64_SECREL_LOW12A = 0x0009, # Bit 0:11 of section offset of the target, for instructions ADD/ADDS (immediate) with zero shift. +IMAGE_REL_ARM64_SECREL_HIGH12A = 0x000A, # Bit 12:23 of section offset of the target, for instructions ADD/ADDS (immediate) with zero shift. +IMAGE_REL_ARM64_SECREL_LOW12L = 0x000B, # Bit 0:11 of section offset of the target, for instruction LDR (indexed, unsigned immediate). +IMAGE_REL_ARM64_TOKEN = 0x000C, # CLR token. +IMAGE_REL_ARM64_SECTION = 0x000D, # The 16-bit section index of the section that contains the target. This is used to support debugging information. +IMAGE_REL_ARM64_ADDR64 = 0x000E, # The 64-bit VA of the relocation target. +IMAGE_REL_ARM64_BRANCH19 = 0x000F, # The 19-bit offset to the relocation target, for conditional B instruction. +IMAGE_REL_ARM64_BRANCH14 = 0x0010, # The 14-bit offset to the relocation target, for instructions TBZ and TBNZ. +) + +SetConstants( +# The following relocation type indicators are defined for Hitachi SH3 and SH4 processors. SH5-specific relocations are noted as SHM (SH Media). +IMAGE_REL_SH3_ABSOLUTE = 0x0000, # The relocation is ignored. +IMAGE_REL_SH3_DIRECT16 = 0x0001, # A reference to the 16-bit location that contains the VA of the target symbol. +IMAGE_REL_SH3_DIRECT32 = 0x0002, # The 32-bit VA of the target symbol. +IMAGE_REL_SH3_DIRECT8 = 0x0003, # A reference to the 8-bit location that contains the VA of the target symbol. +IMAGE_REL_SH3_DIRECT8_WORD = 0x0004, # A reference to the 8-bit instruction that contains the effective 16-bit VA of the target symbol. +IMAGE_REL_SH3_DIRECT8_LONG = 0x0005, # A reference to the 8-bit instruction that contains the effective 32-bit VA of the target symbol. +IMAGE_REL_SH3_DIRECT4 = 0x0006, # A reference to the 8-bit location whose low 4 bits contain the VA of the target symbol. +IMAGE_REL_SH3_DIRECT4_WORD = 0x0007, # A reference to the 8-bit instruction whose low 4 bits contain the effective 16-bit VA of the target symbol. +IMAGE_REL_SH3_DIRECT4_LONG = 0x0008, # A reference to the 8-bit instruction whose low 4 bits contain the effective 32-bit VA of the target symbol. +IMAGE_REL_SH3_PCREL8_WORD = 0x0009, # A reference to the 8-bit instruction that contains the effective 16-bit relative offset of the target symbol. +IMAGE_REL_SH3_PCREL8_LONG = 0x000A, # A reference to the 8-bit instruction that contains the effective 32-bit relative offset of the target symbol. +IMAGE_REL_SH3_PCREL12_WORD = 0x000B, # A reference to the 16-bit instruction whose low 12 bits contain the effective 16-bit relative offset of the target symbol. +IMAGE_REL_SH3_STARTOF_SECTION = 0x000C, # A reference to a 32-bit location that is the VA of the section that contains the target symbol. +IMAGE_REL_SH3_SIZEOF_SECTION = 0x000D, # A reference to the 32-bit location that is the size of the section that contains the target symbol. +IMAGE_REL_SH3_SECTION = 0x000E, # The 16-bit section index of the section that contains the target. This is used to support debugging information. +IMAGE_REL_SH3_SECREL = 0x000F, # The 32-bit offset of the target from the beginning of its section. This is used to support debugging information and static thread local storage. +IMAGE_REL_SH3_DIRECT32_NB = 0x0010, # The 32-bit RVA of the target symbol. +IMAGE_REL_SH3_GPREL4_LONG = 0x0011, # GP relative. +IMAGE_REL_SH3_TOKEN = 0x0012, # CLR token. +IMAGE_REL_SHM_PCRELPT = 0x0013, # The offset from the current instruction in longwords. If the NOMODE bit is not set, insert the inverse of the low bit at bit 32 to select PTA or PTB. +IMAGE_REL_SHM_REFLO = 0x0014, # The low 16 bits of the 32-bit address. +IMAGE_REL_SHM_REFHALF = 0x0015, # The high 16 bits of the 32-bit address. +IMAGE_REL_SHM_RELLO = 0x0016, # The low 16 bits of the relative address. +IMAGE_REL_SHM_RELHALF = 0x0017, # The high 16 bits of the relative address. +IMAGE_REL_SHM_PAIR = 0x0018, # The relocation is valid only when it immediately follows a REFHALF, RELHALF, or RELLO relocation. The SymbolTableIndex field of the relocation contains a displacement and not an index into the symbol table. +IMAGE_REL_SHM_NOMODE = 0x8000, # The relocation ignores section mode. +) + +SetConstants( +# The following relocation type indicators are defined for PowerPC processors. +IMAGE_REL_PPC_ABSOLUTE = 0x0000, # The relocation is ignored. +IMAGE_REL_PPC_ADDR64 = 0x0001, # The 64-bit VA of the target. +IMAGE_REL_PPC_ADDR32 = 0x0002, # The 32-bit VA of the target. +IMAGE_REL_PPC_ADDR24 = 0x0003, # The low 24 bits of the VA of the target. This is valid only when the target symbol is absolute and can be sign-extended to its original value. +IMAGE_REL_PPC_ADDR16 = 0x0004, # The low 16 bits of the target's VA. +IMAGE_REL_PPC_ADDR14 = 0x0005, # The low 14 bits of the target's VA. This is valid only when the target symbol is absolute and can be sign-extended to its original value. +IMAGE_REL_PPC_REL24 = 0x0006, # A 24-bit PC-relative offset to the symbol's location. +IMAGE_REL_PPC_REL14 = 0x0007, # A 14-bit PC-relative offset to the symbol's location. +IMAGE_REL_PPC_ADDR32NB = 0x000A, # The 32-bit RVA of the target. +IMAGE_REL_PPC_SECREL = 0x000B, # The 32-bit offset of the target from the beginning of its section. This is used to support debugging information and static thread local storage. +IMAGE_REL_PPC_SECTION = 0x000C, # The 16-bit section index of the section that contains the target. This is used to support debugging information. +IMAGE_REL_PPC_SECREL16 = 0x000F, # The 16-bit offset of the target from the beginning of its section. This is used to support debugging information and static thread local storage. +IMAGE_REL_PPC_REFHI = 0x0010, # The high 16 bits of the target's 32-bit VA. This is used for the first instruction in a two-instruction sequence that loads a full address. This relocation must be immediately followed by a PAIR relocation whose SymbolTableIndex contains a signed 16-bit displacement that is added to the upper 16 bits that was taken from the location that is being relocated. +IMAGE_REL_PPC_REFLO = 0x0011, # The low 16 bits of the target's VA. +IMAGE_REL_PPC_PAIR = 0x0012, # A relocation that is valid only when it immediately follows a REFHI or SECRELHI relocation. Its SymbolTableIndex contains a displacement and not an index into the symbol table. +IMAGE_REL_PPC_SECRELLO = 0x0013, # The low 16 bits of the 32-bit offset of the target from the beginning of its section. +IMAGE_REL_PPC_GPREL = 0x0015, # The 16-bit signed displacement of the target relative to the GP register. +IMAGE_REL_PPC_TOKEN = 0x0016, # The CLR token. +) + +SetConstants( +# The following relocation type indicators are defined for Intel 386 and compatible processors. +IMAGE_REL_I386_ABSOLUTE = 0x0000, # The relocation is ignored. +IMAGE_REL_I386_DIR16 = 0x0001, # Not supported. +IMAGE_REL_I386_REL16 = 0x0002, # Not supported. +IMAGE_REL_I386_DIR32 = 0x0006, # The target's 32-bit VA. +IMAGE_REL_I386_DIR32NB = 0x0007, # The target's 32-bit RVA. +IMAGE_REL_I386_SEG12 = 0x0009, # Not supported. +IMAGE_REL_I386_SECTION = 0x000A, # The 16-bit section index of the section that contains the target. This is used to support debugging information. +IMAGE_REL_I386_SECREL = 0x000B, # The 32-bit offset of the target from the beginning of its section. This is used to support debugging information and static thread local storage. +IMAGE_REL_I386_TOKEN = 0x000C, # The CLR token. +IMAGE_REL_I386_SECREL7 = 0x000D, # A 7-bit offset from the base of the section that contains the target. +IMAGE_REL_I386_REL32 = 0x0014, # The 32-bit relative displacement to the target. This supports the x86 relative branch and call instructions. +) + +SetConstants( +# The following relocation type indicators are defined for the Intel Itanium processor family and compatible processors. Note that relocations on instructions use the bundle's offset and slot number for the relocation offset. +IMAGE_REL_IA64_ABSOLUTE = 0x0000, # The relocation is ignored. +IMAGE_REL_IA64_IMM14 = 0x0001, # The instruction relocation can be followed by an ADDEND relocation whose value is added to the target address before it is inserted into the specified slot in the IMM14 bundle. The relocation target must be absolute or the image must be fixed. +IMAGE_REL_IA64_IMM22 = 0x0002, # The instruction relocation can be followed by an ADDEND relocation whose value is added to the target address before it is inserted into the specified slot in the IMM22 bundle. The relocation target must be absolute or the image must be fixed. +IMAGE_REL_IA64_IMM64 = 0x0003, # The slot number of this relocation must be one (1). The relocation can be followed by an ADDEND relocation whose value is added to the target address before it is stored in all three slots of the IMM64 bundle. +IMAGE_REL_IA64_DIR32 = 0x0004, # The target's 32-bit VA. This is supported only for /LARGEADDRESSAWARE:NO images. +IMAGE_REL_IA64_DIR64 = 0x0005, # The target's 64-bit VA. +IMAGE_REL_IA64_PCREL21B = 0x0006, # The instruction is fixed up with the 25-bit relative displacement to the 16-bit aligned target. The low 4 bits of the displacement are zero and are not stored. +IMAGE_REL_IA64_PCREL21M = 0x0007, # The instruction is fixed up with the 25-bit relative displacement to the 16-bit aligned target. The low 4 bits of the displacement, which are zero, are not stored. +IMAGE_REL_IA64_PCREL21F = 0x0008, # The LSBs of this relocation's offset must contain the slot number whereas the rest is the bundle address. The bundle is fixed up with the 25-bit relative displacement to the 16-bit aligned target. The low 4 bits of the displacement are zero and are not stored. +IMAGE_REL_IA64_GPREL22 = 0x0009, # The instruction relocation can be followed by an ADDEND relocation whose value is added to the target address and then a 22-bit GP-relative offset that is calculated and applied to the GPREL22 bundle. +IMAGE_REL_IA64_LTOFF22 = 0x000A, # The instruction is fixed up with the 22-bit GP-relative offset to the target symbol's literal table entry. The linker creates this literal table entry based on this relocation and the ADDEND relocation that might follow. +IMAGE_REL_IA64_SECTION = 0x000B, # The 16-bit section index of the section contains the target. This is used to support debugging information. +IMAGE_REL_IA64_SECREL22 = 0x000C, # The instruction is fixed up with the 22-bit offset of the target from the beginning of its section. This relocation can be followed immediately by an ADDEND relocation, whose Value field contains the 32-bit unsigned offset of the target from the beginning of the section. +IMAGE_REL_IA64_SECREL64I = 0x000D, # The slot number for this relocation must be one (1). The instruction is fixed up with the 64-bit offset of the target from the beginning of its section. This relocation can be followed immediately by an ADDEND relocation whose Value field contains the 32-bit unsigned offset of the target from the beginning of the section. +IMAGE_REL_IA64_SECREL32 = 0x000E, # The address of data to be fixed up with the 32-bit offset of the target from the beginning of its section. +IMAGE_REL_IA64_DIR32NB = 0x0010, # The target's 32-bit RVA. +IMAGE_REL_IA64_SREL14 = 0x0011, # This is applied to a signed 14-bit immediate that contains the difference between two relocatable targets. This is a declarative field for the linker that indicates that the compiler has already emitted this value. +IMAGE_REL_IA64_SREL22 = 0x0012, # This is applied to a signed 22-bit immediate that contains the difference between two relocatable targets. This is a declarative field for the linker that indicates that the compiler has already emitted this value. +IMAGE_REL_IA64_SREL32 = 0x0013, # This is applied to a signed 32-bit immediate that contains the difference between two relocatable values. This is a declarative field for the linker that indicates that the compiler has already emitted this value. +IMAGE_REL_IA64_UREL32 = 0x0014, # This is applied to an unsigned 32-bit immediate that contains the difference between two relocatable values. This is a declarative field for the linker that indicates that the compiler has already emitted this value. +IMAGE_REL_IA64_PCREL60X = 0x0015, # A 60-bit PC-relative fixup that always stays as a BRL instruction of an MLX bundle. +IMAGE_REL_IA64_PCREL60B = 0x0016, # A 60-bit PC-relative fixup. If the target displacement fits in a signed 25-bit field, convert the entire bundle to an MBB bundle with NOP.B in slot 1 and a 25-bit BR instruction (with the 4 lowest bits all zero and dropped) in slot 2. +IMAGE_REL_IA64_PCREL60F = 0x0017, # A 60-bit PC-relative fixup. If the target displacement fits in a signed 25-bit field, convert the entire bundle to an MFB bundle with NOP.F in slot 1 and a 25-bit (4 lowest bits all zero and dropped) BR instruction in slot 2. +IMAGE_REL_IA64_PCREL60I = 0x0018, # A 60-bit PC-relative fixup. If the target displacement fits in a signed 25-bit field, convert the entire bundle to an MIB bundle with NOP.I in slot 1 and a 25-bit (4 lowest bits all zero and dropped) BR instruction in slot 2. +IMAGE_REL_IA64_PCREL60M = 0x0019, # A 60-bit PC-relative fixup. If the target displacement fits in a signed 25-bit field, convert the entire bundle to an MMB bundle with NOP.M in slot 1 and a 25-bit (4 lowest bits all zero and dropped) BR instruction in slot 2. +IMAGE_REL_IA64_IMMGPREL64= 0x001a, # A 64-bit GP-relative fixup. +IMAGE_REL_IA64_TOKEN = 0x001b, # A CLR token. +IMAGE_REL_IA64_GPREL32 = 0x001c, # A 32-bit GP-relative fixup. +IMAGE_REL_IA64_ADDEND = 0x001F, # The relocation is valid only when it immediately follows one of the following relocations: IMM14, IMM22, IMM64, GPREL22, LTOFF22, LTOFF64, SECREL22, SECREL64I, or SECREL32. Its value contains the addend to apply to instructions within a bundle, not for data. +) + +SetConstants( +# The following relocation type indicators are defined for MIPS processors. +IMAGE_REL_MIPS_ABSOLUTE = 0x0000, # The relocation is ignored. +IMAGE_REL_MIPS_REFHALF = 0x0001, # The high 16 bits of the target's 32-bit VA. +IMAGE_REL_MIPS_REFWORD = 0x0002, # The target's 32-bit VA. +IMAGE_REL_MIPS_JMPADDR = 0x0003, # The low 26 bits of the target's VA. This supports the MIPS J and JAL instructions. +IMAGE_REL_MIPS_REFHI = 0x0004, # The high 16 bits of the target's 32-bit VA. This is used for the first instruction in a two-instruction sequence that loads a full address. This relocation must be immediately followed by a PAIR relocation whose SymbolTableIndex contains a signed 16-bit displacement that is added to the upper 16 bits that are taken from the location that is being relocated. +IMAGE_REL_MIPS_REFLO = 0x0005, # The low 16 bits of the target's VA. +IMAGE_REL_MIPS_GPREL = 0x0006, # A 16-bit signed displacement of the target relative to the GP register. +IMAGE_REL_MIPS_LITERAL = 0x0007, # The same as IMAGE_REL_MIPS_GPREL. +IMAGE_REL_MIPS_SECTION = 0x000A, # The 16-bit section index of the section contains the target. This is used to support debugging information. +IMAGE_REL_MIPS_SECREL = 0x000B, # The 32-bit offset of the target from the beginning of its section. This is used to support debugging information and static thread local storage. +IMAGE_REL_MIPS_SECRELLO = 0x000C, # The low 16 bits of the 32-bit offset of the target from the beginning of its section. +IMAGE_REL_MIPS_SECRELHI = 0x000D, # The high 16 bits of the 32-bit offset of the target from the beginning of its section. An IMAGE_REL_MIPS_PAIR relocation must immediately follow this one. The SymbolTableIndex of the PAIR relocation contains a signed 16-bit displacement that is added to the upper 16 bits that are taken from the location that is being relocated. +IMAGE_REL_MIPS_JMPADDR16 = 0x0010, # The low 26 bits of the target's VA. This supports the MIPS16 JAL instruction. +IMAGE_REL_MIPS_REFWORDNB = 0x0022, # The target's 32-bit RVA. +IMAGE_REL_MIPS_PAIR = 0x0025, # The relocation is valid only when it immediately follows a REFHI or SECRELHI relocation. Its SymbolTableIndex contains a displacement and not an index into the symbol table. +) + +SetConstants( +# The following relocation type indicators are defined for the Mitsubishi M32R processors. +IMAGE_REL_M32R_ABSOLUTE = 0x0000, # The relocation is ignored. +IMAGE_REL_M32R_ADDR32 = 0x0001, # The target's 32-bit VA. +IMAGE_REL_M32R_ADDR32NB = 0x0002, # The target's 32-bit RVA. +IMAGE_REL_M32R_ADDR24 = 0x0003, # The target's 24-bit VA. +IMAGE_REL_M32R_GPREL16 = 0x0004, # The target's 16-bit offset from the GP register. +IMAGE_REL_M32R_PCREL24 = 0x0005, # The target's 24-bit offset from the program counter (PC), shifted left by 2 bits and sign-extended +IMAGE_REL_M32R_PCREL16 = 0x0006, # The target's 16-bit offset from the PC, shifted left by 2 bits and sign-extended +IMAGE_REL_M32R_PCREL8 = 0x0007, # The target's 8-bit offset from the PC, shifted left by 2 bits and sign-extended +IMAGE_REL_M32R_REFHALF = 0x0008, # The 16 MSBs of the target VA. +IMAGE_REL_M32R_REFHI = 0x0009, # The 16 MSBs of the target VA, adjusted for LSB sign extension. This is used for the first instruction in a two-instruction sequence that loads a full 32-bit address. This relocation must be immediately followed by a PAIR relocation whose SymbolTableIndex contains a signed 16-bit displacement that is added to the upper 16 bits that are taken from the location that is being relocated. +IMAGE_REL_M32R_REFLO = 0x000A, # The 16 LSBs of the target VA. +IMAGE_REL_M32R_PAIR = 0x000B, # The relocation must follow the REFHI relocation. Its SymbolTableIndex contains a displacement and not an index into the symbol table. +IMAGE_REL_M32R_SECTION = 0x000C, # The 16-bit section index of the section that contains the target. This is used to support debugging information. +IMAGE_REL_M32R_SECREL = 0x000D, # The 32-bit offset of the target from the beginning of its section. This is used to support debugging information and static thread local storage. +IMAGE_REL_M32R_TOKEN = 0x000E, # The CLR token. +) + +class InvalidOffset(Exception): + pass + +#################################################################### +# Headers + +class DOShdr(CStruct): _fields = [ ("magic", "u16"), ("cblp","u16"), ("cp","u16"), @@ -30,34 +519,31 @@ class Doshdr(CStruct): ("oemid","u16"), ("oeminfo","u16"), ("res2","20s"), - ("lfanew","u32") ] + ("lfanew","u32") ] # must be 4-bytes aligned class NTsig(CStruct): - _fields = [ ("signature","u32"), - ] + _fields = [ ("signature","u32") ] -class Coffhdr(CStruct): +class COFFhdr(CStruct): _fields = [ ("machine","u16"), ("numberofsections","u16"), ("timedatestamp","u32"), - ("pointertosymboltable","u32"), + ("pointertosymboltable","ptr"), ("numberofsymbols","u32"), ("sizeofoptionalheader","u16"), ("characteristics","u16") ] -class Optehdr(CStruct): - _fields = [ ("rva","u32"), - ("size","u32") ] - -def get_optehdr_num(o): - numberofrva = o.numberofrvaandsizes - size_e = 8 - if o.parent_head.Coffhdr.sizeofoptionalheader < numberofrva * size_e+ len(o.parent_head.Opthdr): - numberofrva = (o.parent_head.Coffhdr.sizeofoptionalheader-len(o.parent_head.Opthdr))/size_e - log.warn('bad number of rva.. using default %d'%numberofrva) - numberofrva = 0x10 - return numberofrva +class XCOFFhdr64(CStruct): + _fields = [ ("machine","u16"), + ("numberofsections","u16"), + ("timedatestamp","u32"), + ("pointertosymboltable","ptr"), + ("sizeofoptionalheader","u16"), + ("characteristics","u16"), + ("numberofsymbols","u32"), + ] +# COFF Optional headers can have many variants class Opthdr32(CStruct): _fields = [ ("magic","u16"), ("majorlinkerversion","u08"), @@ -69,8 +555,15 @@ class Opthdr32(CStruct): ("BaseOfCode","u32"), ("BaseOfData","u32"), ] - -class Opthdr64(CStruct): + vstamp = property(lambda _:_.majorlinkerversion<<8+_.minorlinkerversion) + tsize = property(lambda _:_.SizeOfCode) + dsize = property(lambda _:_.sizeofinitializeddata) + bsize = property(lambda _:_.sizeofuninitializeddata) + entry = property(lambda _:_.AddressOfEntryPoint) + text_start = property(lambda _:_.BaseOfCode) + data_start = property(lambda _:_.BaseOfData) + +class Opthdr64(Opthdr32): _fields = [ ("magic","u16"), ("majorlinkerversion","u08"), ("minorlinkerversion","u08"), @@ -81,6 +574,159 @@ class Opthdr64(CStruct): ("BaseOfCode","u32"), ] +# Specs of COFF for Apollo found at +# https://opensource.apple.com/source/gdb/gdb-908/src/include/coff/apollo.h +class OpthdrApollo(CStruct): + _fields = [ ("magic","u16"), # type of file + ("vstamp","u16"), # version stamp + ("tsize","u32"), # text size in bytes + ("dsize","u32"), # initialized data + ("bsize","u32"), # uninitialized data + ("entry","u32"), # entry point + ("text_start","u32"), # base of text used for this file + ("data_start","u32"), # base of data used for this file + ("o_sri","u32"), # Apollo specific - .sri data pointer + ("o_inlib","u32"), # Apollo specific - .inlib data pointer + ("vid","u64"), # Apollo specific - 64 bit version ID + ] + +# No spec for COFF for Intergraph Clipper of CLIX found +# We make the assumption that is is standard COFF plus additional field +class OpthdrClipper(CStruct): + _fields = [ ("magic","u16"), + ("vstamp","u16"), + ("tsize","u32"), + ("dsize","u32"), + ("bsize","u32"), + ("entry","u32"), + ("text_start","u32"), + ("data_start","u32"), + ("c0","u32"), # Clipper specific? + ("c1","u32"), # Clipper specific? + ] + +# 32-bit eCOFF (for MIPS) +# The only source of information found is binutils' include/coff/mips.h +class OpthdrECOFF32(CStruct): + _fields = [ ("magic","u16"), + ("vstamp","u16"), + ("tsize","u32"), + ("dsize","u32"), + ("bsize","u32"), + ("entry","u32"), + ("text_start","u32"), + ("data_start","u32"), + ("bss_start","u32"), + ("gprmask","u32"), + ("cprmask0","u32"), + ("cprmask1","u32"), + ("cprmask2","u32"), + ("cprmask3","u32"), + ("gp_value","u32"), + ] + majorlinkerversion = property(lambda _:_.vstamp>>8) + minorlinkerversion = property(lambda _:_.vstamp&0xff) + +# Specs of eCOFF for Tru64 aka. OSF1 found at +# http://h41361.www4.hpe.com/docs/base_doc/DOCUMENTATION/V50A_ACRO_SUP/OBJSPEC.PDF +# Not fully consistent with binutils' include/coff/alpha.h +# Looking at sample files, it seems that binutils is right +class OpthdrECOFF64(CStruct): + _fields = [ ("magic","u16"), + ("vstamp","u16"), + ("bldrev","u16"), + ("padcell","u16"), + ("tsize","u64"), + ("dsize","u64"), + ("bsize","u64"), + ("entry","u64"), + ("text_start","u64"), + ("data_start","u64"), + ("bss_start","u64"), + ("gprmask","u32"), + ("fprmask","u32"), # As with binutils + ("gp_value","u64"), # As with binutils + #("fprmask","u64"), # As with OBJSPEC.PDF + #("gp_value","u32"), # As with OBJSPEC.PDF + ] + majorlinkerversion = property(lambda _:_.vstamp>>8) + minorlinkerversion = property(lambda _:_.vstamp&0xff) + +# Specs of XCOFF found at +# http://www.ibm.com/support/knowledgecenter/ssw_aix_72/com.ibm.aix.files/XCOFF.htm +class OpthdrXCOFF32(CStruct): + _fields = [ ("magic","u16"), + ("vstamp","u16"), + ("tsize","u32"), + ("dsize","u32"), + ("size","u32"), + ("entry","u32"), + ("text_start","u32"), + ("data_start","u32"), + ("toc","u32"), + ("snentry","u16"), + ("sntext","u16"), + ("sndata","u16"), + ("sntoc","u16"), + ("snloader","u16"), + ("snbss","u16"), + ("algntext","u16"), + ("algndata","u16"), + ("modtype","u16"), + ("cpuflag","u08"), + ("cputype","u08"), + ("maxstack","u32"), + ("maxdata","u32"), + ("debugger","u32"), + ("textpsize","u08"), + ("datapsize","u08"), + ("stackpsize","u08"), + ("flags","u08"), + ("sntdata","u16"), + ("sntbss","u16"), + ] + +class OpthdrXCOFF64(CStruct): + _fields = [ ("magic","u16"), + ("vstamp","u16"), + ("debugger","u32"), + ("text_start","u64"), + ("data_start","u64"), + ("toc","u64"), + ("snentry","u16"), + ("sntext","u16"), + ("sndata","u16"), + ("sntoc","u16"), + ("snloader","u16"), + ("snbss","u16"), + ("algntext","u16"), + ("algndata","u16"), + ("modtype","u16"), + ("cpuflag","u08"), + ("cputype","u08"), + ("textpsize","u08"), + ("datapsize","u08"), + ("stackpsize","u08"), + ("flags","u08"), + ("tsize","u64"), + ("dsize","u64"), + ("size","u64"), + ("entry","u64"), + ("maxstack","u64"), + ("maxdata","u64"), + ("sntdata","u16"), + ("sntbss","u16"), + ("x64flags","u16"), + ] + +class OptNThdr(CStruct): + _fields = [ ("rva","u32"), + ("size","u32") ] + +class OptNThdrs(CArray): + _cls = OptNThdr + count = lambda _: _.parent.numberofrvaandsizes + class NThdr(CStruct): _fields = [ ("ImageBase","ptr"), ("sectionalignment","u32"), @@ -103,446 +749,756 @@ class NThdr(CStruct): ("sizeofheapcommit","ptr"), ("loaderflags","u32"), ("numberofrvaandsizes","u32"), - ("optentries", "Optehdr", lambda c:get_optehdr_num(c)) - ] + ("optentries",OptNThdrs) ] + def get_optentries(self): + return self.getf('optentries')._array + def unpack(self, c, o): + CStruct.unpack(self, c, o) + sz_opt = self.parent.COFFhdr.sizeofoptionalheader + if sz_opt != self.parent.Opthdr.bytelen + self.bytelen: + log.warning('Number of rva %d does not match sizeofoptionalheader %d', + self.numberofrvaandsizes, sz_opt) + +#################################################################### +# Sections + +class SectionData(CBase): + # This class include the section data (of size rsize) but also + # the COFF relocations + def pack(self): + # section data is not in Shdr, therefore the answer is of size 0, + # to avoid that Shdr packing includes the data. + return data_empty + def _initialize(self): + # section data is not in Shdr, therefore it is made of size 0, + # to avoid that Shdr packing includes the data + self._size = 0 + def unpack(self, c, o): + pefile = self.parent.parent.parent + if hasattr(pefile, 'NThdr'): + filealignment = pefile.NThdr.filealignment + else: + filealignment = 0 + self.data = StrPatchwork() + if filealignment != 0: + if self.parent.scnptr % filealignment: + log.warning('Section %d offset %#x not aligned to %#x', + len(self.parent.parent), self.parent.scnptr, filealignment) + if self.parent.rsize % filealignment: + log.warning('Section %d size %#x not aligned to %#x', + len(self.parent.parent), self.parent.rsize, filealignment) + raw_sz = self.parent.rsize + raw_sz += self.parent.scnptr - self.parent.scn_baseoff + if self.parent.scn_baseoff+raw_sz > len(c): + raw_sz = len(c) - self.parent.scn_baseoff + self.data[0] = c[self.parent.scn_baseoff:self.parent.scn_baseoff+raw_sz] + if self.parent.relptr >= len(c): + raise ValueError("COFF invalid relptr") + self.relocs = COFFRelocations(parent=self.parent, + content=c, + start=self.parent.relptr) + def update(self, **kargs): + if 'data' in kargs: + self.data = StrPatchwork() + self.data[0] = kargs['data'] + def __getitem__(self, item): + return self.data.__getitem__(item) + def __setitem__(self, item, value): + return self.data.__setitem__(item, value) + def find(self, pattern, *args): + return self.data.find(pattern, *args) + def rfind(self, pattern, *args): + return self.data.rfind(pattern, *args) + +class COFFRelocation(CStruct): + _fields = [ ("VirtualAddress","u32"), + ("SymbolTableAddress","u32"), + ("Type","u16") ] + symbol = property(lambda _: + _.parent.parent.parent.parent.Symbols.getbyindex(_.SymbolTableAddress)) + name = property(lambda _:_.symbol.name) + def __repr__(self): + return '' % ( + self.VirtualAddress, self.name, self.Type) +class COFFRelocations(CArray): + _cls = COFFRelocation + count = lambda _:_.parent.nreloc class Shdr(CStruct): - _fields = [ ("name","8s"), - ("size","u32"), - ("addr","u32"), - ("rawsize","u32"), - ("offset","u32"), - ("pointertorelocations","u32"), - ("pointertolinenumbers","u32"), - ("numberofrelocations","u16"), - ("numberoflinenumbers","u16"), - ("flags","u32") ] - - -class SHList(CStruct): - _fields = [ ("shlist", "Shdr", lambda c:c.parent_head.Coffhdr.numberofsections)] - - def add_section(self, name="default", data = "", **args): - s_align = self.parent_head.NThdr.sectionalignment - s_align = max(0x1000, s_align) - - f_align = self.parent_head.NThdr.filealignment - f_align = max(0x200, f_align) - size = len(data) - rawsize = len(data) + # 40-bytes long for 32-bit COFF ; 64-bytes long for 64-bit COFF + # We use the field names mainly from http://wiki.osdev.org/COFF + # They are not the same names as for PE files, but the usual names + # for PE files don't always describe what is in the file! + # The main problems are the fields that contain size information: + # - The fourth field (rsize) always contains the size of the section + # in the PE/COFF file. + # - The second field (paddr) usually contains the same value as + # vaddr in COFF files (e.g. this is always the case as per OSF1 + # documentation, which also says that paddr is ignored) but some + # COFF files differ, e.g. Window .OBJ files where vaddr is always 0, + # but paddr not always, depending on the compiler. + # For PE files, the official documentation says that for executable + # images paddr is the virtual size, i.e. the size of the section in + # memory, and that if paddr is greater than rsize it is padded with + # zeroes, and that for object files paddr is zero + # ... but this is not true for all PE files. + # Recent OS (e.g. Windows 7) checks that the virtual mapping of sections + # in memory is contiguous, by computing the section size using the + # max of 'rsize' and 'paddr' rounded to the section alignment. + _fields = [ ("name_data","8s"), + ("paddr","ptr"), # was named 'size' + ("vaddr","ptr"), # was named 'addr' + ("rsize","ptr"), # was named 'rawsize' + ("scnptr","ptr"), # was named 'offset' + ("relptr","ptr"), # was named 'pointertorelocations' + ("lnnoptr","ptr"), # was named 'pointertolinenumbers' + ("nreloc","u16"), # was named 'numberofrelocations' + ("nlnno","u16"), # was named 'numberoflinenumbers' + ("flags","u32"), + ("section_data",SectionData) ] + def name(self): + # Offset in the string table, if more than 8 bytes long + n = self.name_data + if n[:4] == data_null*4 and n != data_null*8: + n, = struct.unpack("I", n[4:]) + n = self.parent.parent.SymbolStrings.getby_offset(n) + else: + n = n.rstrip(data_null) + return bytes_to_name(n) + name = property(name) + def scn_baseoff(self): + if not self.parent.parent.isPE(): + return self.scnptr + # The conversion from RVA to file offset is dependent on + # the file alignment. Instead of 'scnptr', PE.rva2off + # will use this 'scn_baseoff' value. + filealignment = self.parent.parent.NThdr.filealignment + if not filealignment: + return self.scnptr + # The following hack is what is needed to parse Ange + # Albertini's weirdsord.exe, which defines FILEALIGN + # to 0x4000 and then DELTA with an offset of 0x200, while + # the section starts at 0x201. It suggests that Windows + # always use an alignment of 0x200 independently of what + # is in the NT header... + filealignment = 0x200 + return (self.scnptr//filealignment)*filealignment + scn_baseoff = property(scn_baseoff) + def is_in_file(self): + if self.rsize == 0: + # Empty section, not in the file! + return False + if self.flags & (STYP_BSS|STYP_SBSS|STYP_DSECT): + # bss/dummy section, not in the file! + return False + return True + # For API compatibility with previous versions of elfesteem, + # especially miasm2/jitter/loader/pe.py + def set_rawsize(self, v): + self.rsize = v + rawsize = property(lambda _: _.rsize, set_rawsize) + def set_offset(self, v): + self.scnptr = v + offset = property(lambda _: _.scnptr, set_offset) + addr = property(lambda _: _.vaddr) + def size(self): + # Return the virtual size (for PE) or the RAW size (for COFF) + if self.parent.parent.isPE(): return self.paddr + else: return self.rawsize + def set_size(self, value): + if self.parent.parent.isPE(): self.paddr = value + else: self.rawsize = value + size = property(size, set_size) + def set_data(self, value): + self.section_data.data = value + data = property(lambda _: _.section_data.data, set_data) + def __str__(self): + return "%18s %#10x %#10x %#10x %#10x %#10x" %( + self.name.strip('\0'), + self.scnptr, self.rsize, + self.paddr, self.vaddr, + self.flags) + +class ShdrTI(Shdr): + # 48 bytes long, when the standard COFF is 40 bytes long + # Documented in http://www.ti.com/lit/an/spraao8/spraao8.pdf + _fields = [ ("name_data","8s"), + ("paddr","u32"), + ("vaddr","u32"), + ("rsize","u32"), + ("scnptr","u32"), + ("relptr","u32"), + ("lnnoptr","u32"), + ("nreloc","u32"), + ("nlnno","u32"), + ("flags","u32"), + ("reserved","u16"), + ("mem_page","u16"), + ("data",SectionData) ] + def rawsize(self): + # NB: rawsize is the size in bytes + # Based on the documentation by TI, for some CPU the "size" is + # in word, therefore we need to multiply by 2 + # But in our sample file, this is not the case for .debug_* sections + # (probably because of a compiler bug) + # This sample file is https://github.com/slavaprokopiy/Mini-TMS320C28346/blob/master/For_user/C28346_Load_Program_to_Flash/Debug/C28346_Load_Program_to_Flash.out + if self.parent.parent.CPU in ('TMS320C2800', 'TMS320C5400') \ + and not self.name.startswith('.debug_'): + return self.rsize*2 + return self.rsize + rawsize = property(rawsize) + +class SHList(CArray): + def _cls(self): + if self.parent.COFFhdr.machine == IMAGE_FILE_MACHINE_TI: + return ShdrTI + return Shdr + _cls = property(_cls) + count = lambda self: self.parent.COFFhdr.numberofsections + def shlist(self): + return self._array + shlist = property(shlist) + def display(self): + rep = ["# section offset size addr flags rawsize "] + for i, s in enumerate(self): + l = "%-15s"%s.name.strip('\x00') + l+="%(offset)08x %(size)06x %(vaddr)08x %(flags)08x %(rawsize)08x" % s + l = ("%2i " % i)+ l + rep.append(l) + return "\n".join(rep) + def __repr__(self): + # Not respecting python's recommendation of what __repr__ should return + return self.display() + + def add_section(self, name="default", data = data_empty, **args): if len(self): - addr = self[-1].addr+self[-1].size + # Check that there is enough free space in the headers + # to add a new section + min_size = (self.parent.DOShdr.lfanew + + self.parent.NTsig.bytelen + + self.parent.COFFhdr.bytelen + + self.parent.COFFhdr.sizeofoptionalheader + + (1+len(self))*Shdr(parent=self).bytelen) + first_section_offset = min_size + for s in self.parent.SHList: + if s.is_in_file() and first_section_offset > s.scnptr: + first_section_offset = s.scnptr + # Should be equal to self.parent.NThdr.sizeofheaders + if first_section_offset < min_size: + log.error("Cannot add section %s: not enough space for section list", name) + # Could be solved by changing the section offsets, but some + # sections may contain data that depends on the offset. + # Could be solved by changing lfanew, but it will be an unusual + # PE file that may break some PE readers. + return None + # Cf. https://code.google.com/archive/p/corkami/wikis/PE.wiki + # Section vaddr have to be in increasing order + # This web page also says that "sections don't have to be + # virtually contiguous", but it is not always true; for + # example Windows 7 reject PE files with non-contiguous + # sections, but Wine accepts them + vaddr = self[-1].vaddr+self[-1].rawsize s_last = self[0] for s in self: - if s_last.offset+s_last.rawsize self.parent.NThdr.sizeofheaders: + log.error('xxx') + scnptr = max(scnptr, self.parent.NThdr.sizeofheaders) + # alignment + s_align = self.parent.NThdr.sectionalignment + s_align = max(0x1000, s_align) + f_align = self.parent.NThdr.filealignment + vaddr = (vaddr+(s_align-1))&~(s_align-1) + scnptr = (scnptr+(f_align-1))&~(f_align-1) + + # 'name' is a string, 'name_data' is a sequence of bytes + name_data = name.encode('latin1') + (8-len(name))*data_null + rsize = (len(data)+(f_align-1))&~(f_align-1) + f = {"name_data":name_data, + "paddr":len(data), # was named 'size' + "vaddr":vaddr, # was named 'addr' + "rsize":rsize, # was named 'rawsize' + "scnptr":scnptr, # was named 'offset' + "relptr":0, # was named 'pointertorelocations' + "lnnoptr":0, # was named 'pointertolinenumbers' + "nreloc":0, # was named 'numberofrelocations' + "nlnno":0, # was named 'numberoflinenumbers' "flags":0xE0000020, - "data":data + "data":None } f.update(args) - s = Shdr(_sex = self.parent_head._sex, _wsize = self.parent_head._wsize, **f) - s.data = data - + s = Shdr(parent=self, **f) + if s.rawsize > len(data): - s.data = s.data+'\x00'*(s.rawsize-len(data)) - s.size = s.rawsize - c = StrPatchwork() - c[0] = s.data - s.data = c - s.size = max(s_align, s.size) - + # In PE file, paddr usually contains the size of the non-padded data + s.paddr = len(data) + data = data+data_null*(s.rawsize-len(data)) + if 'rawsize' in args: + # When created with the old elfesteem API + s.rsize = args['rawsize'] + s.paddr = args['rawsize'] + data = data+data_null*(s.rawsize-len(data)) + if 'size' in args: + # When created with the old elfesteem API + s.paddr = args['size'] + s.paddr = max(s.paddr, s_align) + s.section_data = SectionData(parent=s, data=data) + self.append(s) - self.parent_head.Coffhdr.numberofsections = len(self) - - l = (s.addr+s.size+(s_align-1))&~(s_align-1) - self.parent_head.NThdr.sizeofimage = l + self.parent.COFFhdr.numberofsections = len(self) + + l = (s.vaddr+s.rawsize+(s_align-1))&~(s_align-1) + self.parent.NThdr.sizeofimage = l return s - - - - def align_sections(self, f_align = None, s_align = None): - if f_align == None: - f_align = self.parent_head.NThdr.filealignment + + def align_sections(self, f_align=None, s_align=None): + if f_align is None: + f_align = self.parent.NThdr.filealignment f_align = max(0x200, f_align) - if s_align == None: - s_align = self.parent_head.NThdr.sectionalignment + if s_align is None: + s_align = self.parent.NThdr.sectionalignment s_align = max(0x1000, s_align) - - if not self: - return - addr = self[0].offset for s in self: - raw_off = f_align*((addr+f_align-1)/f_align) + if not s.is_in_file(): + continue + raw_off = f_align * ((addr + f_align - 1) // f_align) s.offset = raw_off s.rawsize = len(s.data) - addr = raw_off+s.rawsize - - def __repr__(self): - rep = ["# section offset size addr flags rawsize "] - for i,s in enumerate(self): - l = "%-15s"%s.name.strip('\x00') - l+="%(offset)08x %(size)06x %(addr)08x %(flags)08x %(rawsize)08x" % s - l = ("%2i " % i)+ l - rep.append(l) - return "\n".join(rep) - - def __getitem__(self, item): - return self.shlist[item] - def __len__(self): - return len(self.shlist) - - def append(self, s): - self.shlist.append(s) - -class Rva(CStruct): - _fields = [ ("rva","ptr"), - ] - -class DescName(CStruct): - _fields = [ ("name", (lambda c, s, of:c.gets(s, of), - lambda c, value:c.sets(value))) - ] - def gets(self, s, of): - if of < 0x1000: - log.warn("desname in pe hdr, used as offset") - ofname = of + addr = raw_off + s.rawsize + + +#################################################################### +# Directories + +# Parsing a Directory is not complicated, it is a tree-like structure +# where RVA are pointers to be converted in offsets in the file. +# Modifying a Directory is more complicated. +# - It is not always entirely in one section; e.g. for some PE files +# everything from the DelayImport directory is in .rdata, with the +# exception of the current thunks, in .data +# Therefore if we want to add an imported function, we may need to +# modify two sections. +# - References withing a directory are RVA, which change when the +# addresses and sizes of sections changes. Therefore if we change +# something, we need to recompute all RVA, and therefore to know +# where everything will be located. +# - References to directories from e.g. the executable section are +# also RVA, they would need to be modified if the load address of +# the directory changes. +# Therefore, if we change a Directory, we currently only allow to +# rebuild the file if a dedicated section is created to store the +# modifications. + +# Depending on how the PE file has been generated, the place +# where the directories are found varies a lot. Option '-Sl' +# of readpe.py can show in whihc section are the directories and +# the layout of the file. Here are a few examples: +# +# MinGW +# DirEnt IMPORT in .idata (as recommended by the reference doc of PE) +# DirEnt EXPORT in .edata (as recommended by the reference doc of PE) +# +# Some old Microsoft files +# DirEnt BOUND_IMPORT in headers (after PE header) +# DirEnt IMPORT in .text +# DirEnt DELAY_IMPORT in .text +# DirEnt EXPORT in .text +# DirEnt LOAD_CONFIG in .text +# DirEnt IAT in .text (contains IMPORT current Thunks) +# DirEnt DEBUG in .text +# DirEnt RESOURCE in .rsrc +# DirEnt BASERELOC in .reloc +# DirEnt SECURITY in no section +# Thunks DELAY_IMPORT original in .text, current in .data +# +# Some more recent Microsoft files +# DirEnt BOUND_IMPORT in headers (after PE header) +# DirEnt DEBUG in .text +# DirEnt IAT in .rdata (contains IMPORT current Thunks) +# DirEnt IMPORT in .rdata +# DirEnt DELAY_IMPORT in .rdata +# DirEnt EXPORT in .rdata +# DirEnt LOAD_CONFIG in .rdata +# DirEnt EXCEPTION in .pdata +# DirEnt RESOURCE in .rsrc +# DirEnt BASERELOC in .reloc +# DirEnt SECURITY in no section +# +# Some other executables +# DirEnt DEBUG in .text +# DirEnt IAT in .idata (contains IMPORT current Thunks) +# DirEnt IMPORT in .idata +# DirEnt DELAY_IMPORT in .text +# DirEnt EXPORT in .text +# DirEnt LOAD_CONFIG in .text +# DirEnt EXCEPTION in .pdata +# DirEnt RESOURCE in .rsrc +# DirEnt BASERELOC in .reloc +# DirEnt SECURITY in no section +# DirEnt TLS in .rdata + +from elfesteem.visual_studio_mangling import symbol_demangle + +class CArrayDirectory(CArray): + def unpack(self, c, o): + if o is None: + # Use the entry in the NT headers + # .rva contains the RVA of the descriptor array + # .size may contain the size of the descriptor array or of + # the whole directory entry, including thunks and names; + # it depends on the PE file. + if self._idx >= len(self.parent.NThdr.optentries): return # No entry + o = self.parent.NThdr.optentries[self._idx] + if o.rva == 0: return # No directory + o = self.parent.rva2off(o.rva) + if o is None: return # Directory in no section + CArray.unpack(self, c, o) + +class ImportName(CStruct): + _fields = [ ("hint", "u16"), + ("name", CString) ] + +class ImportNamePtr(CStruct): + _fields = [ ("rva","ptr") ] + def unpack(self, c, o): + CStruct.unpack(self, c, o) + # The function can be imported by name, or by ordinal + mask = {32: 0x80000000, 64: 0x8000000000000000}[self.wsize] + if self.rva == 0: + self.name = None + elif self.rva & mask: + self.obj = self.rva & (mask-1) + self.name = self.obj else: - ofname = self.parent_head.rva2off(of) - name = self.parent_head[ofname:self.parent_head._content.find('\x00', ofname)] - return name, of+len(name)+1 - def sets(self, value): - return str(value)+"\x00" + off = self.parent.parent.rva2off(self.rva) + # When parsing 'firstthunk', either "off' is None + # or it is identical to 'originalfirstthunk'. + # But that's just what is usually the case, a valid PE + # file may be different. + if off is None: + # Should never happen for originalfirstthunk + self.obj = None + self.name = None + else: + self.obj = ImportName(parent=self, content=c, start=off) + self.name = str(self.obj.name) -class ImportByName(CStruct): - _fields = [ ("hint", "u16"), - ("name", "sz") - ] +class ImportThunks(CArray): + _cls = ImportNamePtr -class ImpDesc_e(CStruct): - _fields = [ ("originalfirstthunk","u32"), +class ImportDescriptor(CStruct): + _fields = [ ("originalfirstthunk","u32"), # Import Lookup Table ("timestamp","u32"), ("forwarderchain","u32"), - ("name","u32"), - ("firstthunk","u32") + ("name_rva","u32"), # Imported DLL name + ("firstthunk","u32"), # Import Address Table + # overwritten by the PE loader ] - - -class struct_array(object): - def __init__(self, c, s, of, cstr, num = None): - self.l = [] - self.cls = c - self.end = None - i = 0 - if not s: + def rva2off(self, rva): + return self.parent.parent.rva2off(rva) + def unpack(self, c, o): + CStruct.unpack(self, c, o) + if self.parent.stop(self): + # Don't continue to parse the terminator return - - while (num == None) or (num and i keep original func addr - #if d.firstthunk: - # d.firstthunk = rva - # rva+=(len(d.firstthunks)+1)*self.parent_head._wsize/8 # Rva size - if d.originalfirstthunk and d.firstthunk: - if isinstance(d.originalfirstthunk, struct_array): - tmp_thunk = d.originalfirstthunks - elif isinstance(d.firstthunks, struct_array): - tmp_thunk = d.firstthunks - else: - raise "no thunk!!" - elif d.originalfirstthunk:# and self.parent_head.rva2off(d.originalfirstthunk): - tmp_thunk = d.originalfirstthunks - elif d.firstthunk: - tmp_thunk = d.firstthunks - else: - raise "no thunk!!" - - if tmp_thunk == d.originalfirstthunks: - d.firstthunks = tmp_thunk - else: - d.originalfirstthunks = tmp_thunk - for i, imp in enumerate(d.impbynames): - if isinstance(imp, ImportByName): - tmp_thunk[i].rva = rva - rva+=len(imp) - - def build_content(self, c): - dirimp = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_IMPORT] - of1 = dirimp.rva - if not of1: # No Import - return - x = str(self.impdesc) - c[self.parent_head.rva2off(of1)] = str(self) - for i, d in enumerate(self.impdesc): - c[self.parent_head.rva2off(d.name)] = str(d.dlldescname) - if d.originalfirstthunk and self.parent_head.rva2off(d.originalfirstthunk): - c[self.parent_head.rva2off(d.originalfirstthunk)] = str(d.originalfirstthunks) - if d.firstthunk: - c[self.parent_head.rva2off(d.firstthunk)] = str(d.firstthunks) - if d.originalfirstthunk and self.parent_head.rva2off(d.originalfirstthunk): - tmp_thunk = d.originalfirstthunks - elif d.firstthunk: - tmp_thunk = d.firstthunks - else: - raise "no thunk!!" - for j, imp in enumerate(d.impbynames): - if isinstance(imp, ImportByName): - c[self.parent_head.rva2off(tmp_thunk[j].rva)] = str(imp) - - def get_dlldesc(self): - out = [] - for impdesc in self.impdesc: - dllname = impdesc.dlldescname.name - funcs = [] - for f in impdesc.impbynames: - if isinstance(f, ImportByName): - funcs.append(f.name) - else: - funcs.append(f) - d = ({"name":dllname, "firstthunk":impdesc.firstthunk}, funcs) - out.append(d) - return out - - def __repr__(self): - rep = ["<%s>"%self.__class__.__name__] - for i,s in enumerate(self.impdesc): - l = "%2d %-25s %s"%(i, repr(s.dlldescname) ,repr(s)) - rep.append(l) - for ii, f in enumerate(s.impbynames): - l = " %2d %-16s"%(ii, repr(f)) - rep.append(l) - return "\n".join(rep) - + self.name = CString(parent=self, content=c, start=of) + # NB: it is possible for a PE to have many Import descriptors + # pointing to the same IAT and ILT. elfesteem will take a + # long time because the IAT and ILT will be parsed each time. + # An example of such malformed file is + # https://github.com/radare/radare2-regressions/blob/master/bins/fuzzed/file-rs-bf838568 + of = self.rva2off(self.firstthunk) + if of is None: + log.error('IAT') + else: + self.IAT = ImportThunks(parent=self, content=c, start=of) + # NB: http://win32assembly.programminghorizon.com/pe-tut6.html + # says "Some linkers generate PE files with 0 in + # OriginalFirstThunk. This is considered a bug." + # An example is the IDA installer! + of = self.rva2off(self.originalfirstthunk) + if not of in (0, None): + self.ILT = ImportThunks(parent=self, content=c, start=of) + +class DirImport(CArrayDirectory): + _cls = ImportDescriptor + _idx = DIRECTORY_ENTRY_IMPORT + def display(self): + res = '<%s>' % self.__class__.__name__ + def repr_obj(obj): + if hasattr(obj, 'name'): + name, _ = symbol_demangle(str(obj.name)) + return '%04X %r' % (obj.hint, name) + else: return repr(obj) + for idx, d in enumerate(self): + res += '\n%2d %r' % (idx, str(d.name)) + for jdx, t in enumerate(d.IAT): + t_virt = self.parent.rva2virt(d.firstthunk+jdx*t.bytelen) + t_obj = repr_obj(t.obj) + # Only display original thunks that are incoherent with current + if hasattr(d, 'ILT') and jdx < len(d.ILT): + u = d.ILT[jdx] + if u.rva != t.rva: + t_obj += ' ' + repr_obj(u.obj) + res += '\n %2d %#10x %s' % (jdx, t_virt, t_obj) + return res + def pack(self): + raise AttributeError("Cannot pack '%s': the Directory Entry data is not always contiguous"%self.__class__.__name__) + def stop(self, elt): + # Ange Albertini's imports_badterm.exe and imports_tinyXP.exe shows + # that the ImportDescriptor does not need to be all zeroes to be a + # terminator. + return elt.name_rva == 0 or elt.firstthunk == 0 + # According to Ange Albertini's manyimportsW7.exe the AddressOfIndex + # field of the TLS directory is a terminator too; but at this point + # the TLS directory has not been parsed by elfesteem. This will be + # handled if we handle relocations, and parse the file in multiple + # passes. + def _initialize(self): + CArrayDirectory._initialize(self) + # Imports are added in three steps: dll_to_add is computed, a + # new section is created, this section is constructed. + self.dll_to_add = [] def add_dlldesc(self, new_dll): - if self.parent_head._wsize == 32: - mask_ptr = 0x80000000 - elif self.parent_head._wsize == 64: - mask_ptr = 0x8000000000000000L - new_impdesc = [] - of1 = None - for nd, fcts in new_dll: - for x in ["timestamp", "forwarderchain", "originalfirstthunk"]: - if not x in nd: - nd[x] = 0 - d = ImpDesc_e(self.parent_head, **nd) - if d.firstthunk!=None: - of1 = d.firstthunk - elif of1 == None: - raise "set fthunk" - else: - d.firstthunk = of1 - d.dlldescname = DescName(self.parent_head, name = d.name) - d.originalfirstthunk = True - d.originalfirstthunks = struct_array(self, None, - None, - Rva) - d.firstthunks = struct_array(self, None, - None, - Rva) - - impbynames = [] - for nf in fcts: - f = Rva(self.parent_head) - if type(nf) in [int, long]: - f.rva = mask_ptr+nf - ibn = nf - elif type(nf) in [str]: - f.rva = True - ibn = ImportByName(self.parent_head) - ibn.name = nf - ibn.hint = 0 - else: - raise 'unknown func type %s'%str(nf) - impbynames.append(ibn) - d.originalfirstthunks.append(f) - ff = Rva(self.parent_head) - if isinstance(ibn, ImportByName): - ff.rva = 0xDEADBEEF #default func addr - else: - #ord ?XXX? - ff.rva = f.rva - d.firstthunks.append(ff) - of1+=self.parent_head._wsize/8 - #for null thunk - of1+=self.parent_head._wsize/8 - d.impbynames = impbynames - new_impdesc.append(d) - if not self.impdesc: - self.impdesc = struct_array(self, None, - None, - ImpDesc_e) - self.impdesc.l = new_impdesc + # Expand self.dll_to_add with new DLL and functions + # new_dll is a list, where each member is a pair + # - dll_name: dict with 'name' giving the DLL name + # The 'firstthunk' value is currently ignored: + # elfesteem used this value to indicate another + # section where the IAT would be located, and + # did not create an ILT. + # TODO: memorize this value to be used in + # 'write_directory' + # - dll_func: list of function names + for dll_name, dll_func in new_dll: + # First, an empty descriptor + d = ImportDescriptor(parent=self) + self.dll_to_add.append(d) + # Add the DLL name + d.name = CString(parent=d, s=dll_name['name'].encode('latin1')) + # Add the Import names; they will be located after the two thunks + thunk_len = (1+len(dll_func))*(self.wsize/8) + thunk_len *= 2 + # Add the IAT & ILT + d.ILT = ImportThunks(parent=d) + for n in dll_func: + t = ImportNamePtr(parent=d.ILT) + t.obj = ImportName(parent=t, s=n.encode('latin1')) + t.name = n + thunk_len += t.obj.bytelen + if thunk_len%2: thunk_len += 1 + d.ILT.append(t) + d.IAT = ImportThunks(parent=d) + for n in dll_func: + t = ImportNamePtr(parent=d.ILT) + t.name = n + d.IAT.append(t) + def write_directory(self, base_rva): + # Creates in the section starting at 'base_rva' a new Import Directory + # with the content of self.dll_to_add + + # Note that we need to avoid changing RVA of the current IAT, because + # they can be used e.g. in the executable section .text + # But there might not be enough space after the current list of + # descriptors to add new descriptors... + # The trick we use is to move the list of descriptors in a new + # section (s_dir), where we will also store the new ILT, IAT and + # names, leaving the original section unchanged. + # + # TODO: The IAT can be stored in another section than the rest of + # the directory (descriptors, names, ILT) ; provide this possibility. + # TODO: The ILT is not necessary. Provide the possibility of not + # creating it. + # TODO: If base_rva is not the vaddr of an existing section, but + # is inside na existing section, do we overwrite everything after + # base_rva? + + e = self.parent + for s_dir in e.SHList.shlist: + # This section may have been created by + # e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) + # which is the original syntax with elfesteem but does not + # use the appropriate value for rsize, because len(e.DirImport) + # now is the number of DLLs and not the bytelen of the directory. + # This does not matter, because we recompute s_dir.rsize at + # the end of this function. + if s_dir.vaddr == base_rva: + break else: - for d in new_impdesc: - self.impdesc.append(d) - - def get_funcrva(self, f): - for i, d in enumerate(self.impdesc): - if d.originalfirstthunk and self.parent_head.rva2off(d.originalfirstthunk): - tmp_thunk = d.originalfirstthunks - elif d.firstthunk: - tmp_thunk = d.firstthunks - else: - raise "no thunk!!" - if type(f) is str: - for j, imp in enumerate(d.impbynames): - if isinstance(imp, ImportByName): - if f == imp.name: - return d.firstthunk+j*4 - elif type(f) in (int, long): - for j, imp in enumerate(d.impbynames): - if not isinstance(imp, ImportByName): - if tmp_thunk[j].rva&0x7FFFFFFF == f: - return d.firstthunk+j*4 - else: - raise ValueError('unknown func tpye %s'%str(f)) - def get_funcvirt(self, f): - rva = self.get_funcrva(f) - if rva==None: + # Create the new section s_dir, with appropriate flags; write + # is needed if we store the IAT. + s_dir = e.SHList.add_section( + name='.idata2', + flags=IMAGE_SCN_MEM_WRITE|IMAGE_SCN_MEM_READ|IMAGE_SCN_CNT_INITIALIZED_DATA, + rsize=0x1000, # should be enough + ) + base_rva = s_dir.vaddr + s_dir.section_data.data = StrPatchwork() + self._size += self._cls(parent=self).bytelen * len(self.dll_to_add) + of = self.bytelen + for d in self.dll_to_add: + self._array.append(d) + d.name_rva = base_rva+of + s_dir.section_data.data[of] = d.name.pack() + of += d.name.bytelen + if of%2: of += 1 + thunk_len = (1+len(d.ILT))*(self.wsize//8) + thunk_len *= 2 + for t in d.ILT: + t.rva = base_rva+of+thunk_len + s_dir.section_data.data[of+thunk_len] = t.obj.pack() + thunk_len += t.obj.bytelen + if thunk_len%2: thunk_len += 1 + d.originalfirstthunk = base_rva+of + s_dir.section_data.data[of] = d.ILT.pack() + of += d.ILT.bytelen + d.firstthunk = base_rva+of + for idx, t in enumerate(d.IAT): + t.obj = d.ILT[idx].obj + t.rva = d.ILT[idx].rva + s_dir.section_data.data[of] = d.IAT.pack() + of += thunk_len - d.ILT.bytelen + self.dll_to_add = [] + # Write the descriptor list (now that all RVA have been computed) + s_dir.section_data.data[0] = CArray.pack(self) + # Update the section sizes + s_dir.paddr = len(s_dir.section_data.data) + if s_dir.rsize < s_dir.paddr: + s_dir.rsize = s_dir.paddr + s_dir.section_data.data[s_dir.paddr] = data_null*(s_dir.rsize-s_dir.paddr) + e.NThdr.optentries[self._idx].rva = base_rva + e.NThdr.optentries[self._idx].size = s_dir.paddr # Unused by PE loaders + def get_funcrva(self, dllname, funcname): + # Position of the function in the Import Address Table + for d in self: + if dllname is not None and str(d.name) != dllname: + continue + for idx, t in enumerate(d.IAT): + if t.name == funcname: + return d.firstthunk+idx*t.bytelen + return None + def get_funcvirt(self, dllname, funcname): + return self.parent.rva2virt(self.get_funcrva(dllname, funcname)) + # For API compatibility with previous versions of elfesteem + def get_dlldesc(self): + return [ ({'name': d.name}, [t.name for t in d.IAT]) for d in self ] + def set_rva(self, addr): + self.write_directory(addr) + def impdesc(self): + class ImpDesc_e(object): + def __init__(self, d): + self.firstthunk = d.firstthunk + self.dlldescname = APICompatibilityName(str(d.name)) + self.impbynames = [APICompatibilityName(str(_.name)) for _ in d.IAT] + return [ImpDesc_e(_) for _ in self] + def set_impdesc(self, value): + if value in (None, []): + CArrayDirectory._initialize(self) return - return self.parent_head.rva2virt(rva) - - -class ExpDesc_e(CStruct): - _fields = [ ("characteristics","u32"), + TODO + impdesc = property(impdesc, set_impdesc) +class APICompatibilityName(object): + def __init__(self, s): + self.name = s +ImportByName = APICompatibilityName + + +# Delay Import Directory is similar to Import Directory +# The implementation below is incomplete, but useable because +# boundiat and unloadiat are optional and usually absent. +class DelayDescriptor(ImportDescriptor): + _fields = [ ("attrs","u32"), + ("name_rva","u32"), + ("hmod","u32"), # Module Handle + ("firstthunk","u32"), # Delay Import Address Table + ("originalfirstthunk","u32"), # Delay Import Name Table + ("boundiat","u32"), # Bound Delay Import Table + ("unloadiat","u32"), # Unload Delay Import Table ("timestamp","u32"), - ("majorv","u16"), - ("minorv","u16"), - ("name","u32"), + ] + def rva2off(self, rva): + # Microsoft's pecoff.docx says that no attributes are defined + # and that it is set to 0, but all our example files have 0x1. + # Serpi implemented in elfesteem that if the 0x1 bit is not set + # then the RVA has been incremented with ImageBase. We don't have + # any supporting documentation. + if not (self.attrs & 1): + rva = self.parent.parent.virt2rva(rva) + return self.parent.parent.rva2off(rva) + +class DirDelay(DirImport): + _cls = DelayDescriptor + _idx = DIRECTORY_ENTRY_DELAY_IMPORT + + + + +class ExportAddressRVA(CStruct): + _fields = [ ("rva","u32") ] + def unpack(self, c, o): + CStruct.unpack(self, c, o) + # Follow the RVA if it is a "Forwarder RVA" + # which is the case if the RVA points into the export section. + # NB: IDA's export tab does not know about this, and just shows the RVA + direxport = self.parent.parent.parent + base = direxport.parent.NThdr.optentries[direxport._idx] + if base.rva <= self.rva < base.rva+base.size: + self.name = CString(parent=self, content=c, + start=self.parent.parent.rva2off(self.rva)) + +class ExportAddressTable(CArray): + _cls = ExportAddressRVA + count = lambda _: _.parent.numberoffunctions + +class ExportNamePointerRVA(CStruct): + _fields = [ ("rva","u32") ] + def unpack(self, c, o): + CStruct.unpack(self, c, o) + # Follow the RVA + self.name = CString(parent=self, content=c, + start=self.parent.parent.rva2off(self.rva)) + # For API compatibility with previous versions of elfesteem + self.name.name = str(self.name) + +class ExportNamePointersTable(CArray): + _cls = ExportNamePointerRVA + count = lambda _: _.parent.numberofnames + +class ExportOrdinal(CStruct): + _fields = [ ("ordinal","u16") ] + +class ExportOrdinalTable(CArray): + _cls = ExportOrdinal + count = lambda _: _.parent.numberofnames + +class ExportDescriptor(CStruct): + _fields = [ ("characteristics","u32"), # Unused and always 0 + ("timestamp","u32"), + ("majorv","u16"), # Unused and always 0 + ("minorv","u16"), # Unused and always 0 + ("name_rva","u32"), ("base","u32"), ("numberoffunctions","u32"), ("numberofnames","u32"), @@ -550,1001 +1506,533 @@ class ExpDesc_e(CStruct): ("addressofnames","u32"), ("addressofordinals","u32"), ] - -class DirExport(CStruct): - _fields = [ ("expdesc", (lambda c, s, of:c.gete(s, of), - lambda c, value:c.sete(value)))] - def gete(self, s, of): - of_o = of - if not of: - return None, of - of = self.parent_head.rva2off(of) - of_sav = of - expdesc = ExpDesc_e.unpack(s, - of, - self.parent_head) - if self.parent_head.rva2off(expdesc.addressoffunctions) == None or \ - self.parent_head.rva2off(expdesc.addressofnames) == None or \ - self.parent_head.rva2off(expdesc.addressofordinals) == None: - log.warn("export dir malformed!") - return None, of_o - self.dlldescname = DescName.unpack(s, expdesc.name, self.parent_head) - self.f_address = struct_array(self, s, - self.parent_head.rva2off(expdesc.addressoffunctions), - Rva, expdesc.numberoffunctions) - self.f_names = struct_array(self, s, - self.parent_head.rva2off(expdesc.addressofnames), - Rva, expdesc.numberofnames) - self.f_nameordinals = struct_array(self, s, - self.parent_head.rva2off(expdesc.addressofordinals), - Ordinal, expdesc.numberofnames) - for n in self.f_names: - n.name = DescName.unpack(s, n.rva, self.parent_head) - return expdesc, of_sav - - def sete(self, v): - c = str(self.expdesc) - return c - - def build_content(self, c): - direxp = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_EXPORT] - of1 = direxp.rva - if not self.expdesc: # No Export - return - c[self.parent_head.rva2off(of1)] = str(self.expdesc) - c[self.parent_head.rva2off(self.expdesc.name)] = str(self.dlldescname) - c[self.parent_head.rva2off(self.expdesc.addressoffunctions)] = str(self.f_address) - if self.expdesc.addressofnames!=0: - c[self.parent_head.rva2off(self.expdesc.addressofnames)] = str(self.f_names) - if self.expdesc.addressofordinals!=0: - c[self.parent_head.rva2off(self.expdesc.addressofordinals)] = str(self.f_nameordinals) - for n in self.f_names: - c[self.parent_head.rva2off(n.rva)] = str(n.name) - - # XXX BUG names must be alphanumeric ordered - names = [n.name for n in self.f_names] - names_ = names[:] - if names != names_: - log.warn("unsorted export names, may bug") - - def set_rva(self, rva, size = None): - if not self.expdesc: - return - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_EXPORT].rva = rva - if not size: - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_EXPORT].size= len(self) - else: - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_EXPORT].size= size - rva+=len(self.expdesc) - self.expdesc.name = rva - rva+=len(self.dlldescname) - self.expdesc.addressoffunctions = rva - rva+=len(self.f_address)*self.parent_head._wsize/8# Rva size - self.expdesc.addressofnames = rva - rva+=len(self.f_names)*self.parent_head._wsize/8# Rva size - self.expdesc.addressofordinals = rva - rva+=len(self.f_nameordinals)*2# Ordinal size - for n in self.f_names: - n.rva = rva - rva+=len(n.name) - - def __len__(self): - l = 0 - if not self.expdesc: - return l - l+=len(self.expdesc) - l+=len(self.dlldescname) - l+=len(self.f_address)*self.parent_head._wsize/8# Rva size - l+=len(self.f_names)*self.parent_head._wsize/8# Rva size - l+=len(self.f_nameordinals)*2# Ordinal size - for n in self.f_names: - l+=len(n.name) - return l - - def __repr__(self): - rep = ["<%s>"%self.__class__.__name__] - if not self.expdesc: - return "\n".join(rep) - - rep = ["<%s %d (%s) %s>"%(self.__class__.__name__, - self.expdesc.numberoffunctions, self.dlldescname, repr(self.expdesc))] - tmp_names = [[] for x in xrange(self.expdesc.numberoffunctions)] - for i, n in enumerate(self.f_names): - tmp_names[self.f_nameordinals[i].ordinal].append(n.name) - for i,s in enumerate(self.f_address): - tmpn = [] - if not s.rva: + def rva2off(self, rva): + return self.parent.parent.rva2off(rva) + def unpack(self, c, o): + CStruct.unpack(self, c, o) + # Follow the RVAs + self.name = CString(parent=self, content=c, + start=self.rva2off(self.name_rva)) + self.EAT = ExportAddressTable(parent=self, content=c, + start=self.rva2off(self.addressoffunctions)) + self.ENPT = ExportNamePointersTable(parent=self, content=c, + start=self.rva2off(self.addressofnames)) + self.EOT = ExportOrdinalTable(parent=self, content=c, + start=self.rva2off(self.addressofordinals)) + self.compute_exports() + def compute_exports(self): + # 'exports' contains the same information as displayed by IDA's export + # tab; it has issues, especially when the number of functions is not + # the number of names + self.exports = {} + for i in range(len(self.ENPT)): + # len(self.ENPT) is self.numberofnames, unless it is invalid. + # If self.numberofnames is invalid we prefer the smaller value! + j = self.EOT[i].ordinal + if j >= self.numberoffunctions: + print("Invalid ordinal[%d]: %d"%(i,j)) continue - l = "%2d %.8X %s"%(i+self.expdesc.base, s.rva ,repr(tmp_names[i])) - rep.append(l) - return "\n".join(rep) - - def create(self, name = 'default.dll'): - self.expdesc = ExpDesc_e(self.parent_head) - for x in [ "characteristics", - "timestamp", - "majorv", - "minorv", - "name", - "base", - "numberoffunctions", - "numberofnames", - "addressoffunctions", - "addressofnames", - "addressofordinals", - ]: - setattr(self.expdesc, x, 0) - - self.dlldescname = DescName(self.parent_head) - self.dlldescname.name = name - self.f_address = struct_array(self, None, - None, - Rva) - self.f_names = struct_array(self, None, - None, - Rva) - self.f_nameordinals = struct_array(self, None, - None, - Ordinal) - self.expdesc.base = 1 - - - def add_name(self, name, rva = 0xdeadc0fe): - if not self.expdesc: - return - l = len(self.f_names) - names = [n.name.name for n in self.f_names] - names_s = names[:] - names_s.sort() - if names_s != names: - log.warn('tab names was not sorted may bug') - names.append(name) - names.sort() - index = names.index(name) - descname = DescName(self.parent_head) - - descname.name = name - wname = Rva(self.parent_head) - - wname.name = descname - woffset = Rva(self.parent_head) - woffset.rva = rva - wordinal = Ordinal(self.parent_head) - #func is append to list - wordinal.ordinal = len(self.f_address) - self.f_address.append(woffset) - #self.f_names.insert(index, wname) - #self.f_nameordinals.insert(index, wordinal) - self.f_names.insert(index, wname) - self.f_nameordinals.insert(index, wordinal) - self.expdesc.numberofnames+=1 - self.expdesc.numberoffunctions+=1 - - def get_funcrva(self, f_str): - if not self.expdesc: - return None - for i, f in enumerate(self.f_names): - if f_str != f.name.name: + if self.base+j in self.exports: + print("Duplicate ordinal at %d"%(self.base+j)) continue - o = self.f_nameordinals[i].ordinal - rva = self.f_address[o].rva - return rva - return None - - def get_funcvirt(self, f): - rva = self.get_funcrva(f) - if rva==None: - return - return self.parent_head.rva2virt(rva) - - -class Delaydesc_e(CStruct): - _fields = [ ("attrs","u32"), - ("name","u32"), - ("hmod","u32"), - ("firstthunk","u32"), - ("originalfirstthunk","u32"), - ("boundiat","u32"), - ("unloadiat","u32"), - ("timestamp","u32"), - ] - -class DirDelay(CStruct): - _fields = [ ("delaydesc", (lambda c, s, of:c.gete(s, of), - lambda c, value:c.sete(value)))] - - def gete(self, s, of): - if not of: - return [], of - of = self.parent_head.rva2off(of) - out = struct_array(self, s, of, Delaydesc_e) - if self.parent_head._wsize == 32: - mask_ptr = 0x80000000 - elif self.parent_head._wsize == 64: - mask_ptr = 0x8000000000000000L - - parent = self.parent_head - for i, d in enumerate(out): - isfromva = (d.attrs & 1) == 0 - if isfromva: - isfromva = lambda x:parent.virt2rva(x) - else: - isfromva = lambda x:x - d.dlldescname = DescName.unpack(s, isfromva(d.name), - self.parent_head) - if d.originalfirstthunk: - d.originalfirstthunks = struct_array(self, s, - self.parent_head.rva2off(isfromva(d.originalfirstthunk)), - Rva) + addr = self.EAT[j] + name = self.ENPT[i].name + self.exports[self.base+j] = (addr, name) + # When ..numberoffunctions != ..numberofnames + for i in range(len(self.EAT)): + # len(self.EAT) is self.numberoffunctions, unless it is invalid. + if not self.base+i in self.exports: + addr = self.EAT[i] + self.exports[self.base+i] = (addr, CString(parent=self)) + +class DirExport(CArrayDirectory): + _cls = ExportDescriptor + _idx = DIRECTORY_ENTRY_EXPORT + count = lambda _: 1 + def display(self): + res = '<%s>' % self.__class__.__name__ + if len(self) == 0: return + d = self[0] + res += '\n %r' % str(d.name) + for i in sorted(d.exports.keys()): + addr, name = d.exports[i] + if hasattr(addr, 'name'): + addr = str(addr.name) else: - d.originalfirstthunks - - if d.firstthunk: - d.firstthunks = struct_array(self, s, - self.parent_head.rva2off(isfromva(d.firstthunk)), - Rva) + addr = addr.rva + if self.parent.COFFhdr.machine == IMAGE_FILE_MACHINE_ARMNT: + # To have the same display as IDA on PE for ARM + addr -= 1 + addr = '%08X' % self.parent.rva2virt(addr) + name, _ = symbol_demangle(str(name)) + res += '\n %2d %s %r' % (i, addr, name) + return res + def create(self, funcs, name = 'default.dll'): + # Don't separate 'create()' and 'add_name()' because adding new + # exports to an existing export table is very tricky: we need to + # resize the EAT, ENPT and EOT. + if len(self) != 0: return + e = self.parent + s = e.SHList.add_section( + name='.edata2', + flags=IMAGE_SCN_MEM_READ|IMAGE_SCN_CNT_INITIALIZED_DATA, + rsize=0x1000, # should be enough + ) + base_rva = e.off2rva(s.scnptr) + e.NThdr.optentries[self._idx].rva = base_rva + s.section_data.data = StrPatchwork() + # First, an empty descriptor + d = ExportDescriptor(parent=self, base=1) + self.append(d) + of = self.bytelen + # Add the DLL name + d.name = CString(parent=d, s=name.encode('latin1')) + d.name_rva = base_rva+of + s.section_data.data[of] = d.name.pack() + of += d.name.bytelen + # Add the EAT, ENPT & EOT + d.numberoffunctions += len(funcs) + d.numberofnames += len(funcs) + d.EAT = ExportAddressTable(parent=d) + for f in funcs: + if isinstance(f, tuple): + rva = f[1] else: - d.firstthunk = None + # TODO: we should look for the RVA of a symbol of name 'f' + rva = 0xdeadc0fe + t = ExportAddressRVA(parent=d.EAT, rva=rva) + d.EAT.append(t) + d.addressoffunctions = base_rva+of + s.section_data.data[of] = d.EAT.pack() + of += d.EAT.bytelen + d.EOT = ExportOrdinalTable(parent=d) + for idx in range(len(funcs)): + t = ExportOrdinal(parent=d.EOT, ordinal=idx) + d.EOT.append(t) + d.addressofordinals = base_rva+of + s.section_data.data[of] = d.EOT.pack() + of += d.EOT.bytelen + pos = len(funcs)*4 # size of ENPT + d.ENPT = ExportNamePointersTable(parent=d) + for f in funcs: + if isinstance(f, tuple): f = f[0] # The name of the function + t = ExportNamePointerRVA(parent=d.ENPT) + t.name = CString(parent=t, s=f.encode('latin1')) + t.name.name = f # For API compatibility with previous versions + t.rva = base_rva+of+pos + s.section_data.data[of+pos] = t.name.pack() + pos += t.name.bytelen + d.ENPT.append(t) + d.addressofnames = base_rva+of + s.section_data.data[of] = d.ENPT.pack() + # Write the descriptor list (now that everyting has been computed) + s.section_data.data[0] = CArray.pack(self) + # Update the section sizes + s.paddr = len(s.section_data.data) + e.NThdr.optentries[self._idx].size = s.paddr # Unused by PE loaders + if s.rsize < s.paddr: + s.rsize = s.paddr + s.section_data.data[s.paddr] = data_null*(s.rsize-s.paddr) + # Finalize + d.compute_exports() + def get_funcrva(self, name): + for d in self: + for t in d.ENPT: + if str(t.name) == name: return t.rva + return None + def get_funcvirt(self, name): + return self.parent.rva2virt(self.get_funcrva(name)) + # For API compatibility with previous versions of elfesteem + def expdesc(self): + if len(self): return self[0] + else: return None + expdesc = property(expdesc) + f_address = property(lambda _:getattr(_.expdesc,'EAT',[])) + f_nameordinals = property(lambda _:getattr(_.expdesc,'EOT',[])) + f_names = property(lambda _:getattr(_.expdesc,'ENPT',[])) + def add_name(self, name, rva = 0xdeadc0fe): + DEPRECATED - d.impbynames = [] - if d.originalfirstthunk and self.parent_head.rva2off(isfromva(d.originalfirstthunk)): - tmp_thunk = d.originalfirstthunks - elif d.firstthunk: - tmp_thunk = d.firstthunks - else: - print ValueError("no thunk in delay dir!! ") - return - for i in xrange(len(tmp_thunk)): - if tmp_thunk[i].rva&mask_ptr == 0: - n = ImportByName.unpack(s, - self.parent_head.rva2off(isfromva(tmp_thunk[i].rva)), - self.parent_head) - d.impbynames.append(n) - else: - d.impbynames.append(isfromva(tmp_thunk[i].rva&(mask_ptr-1))) - #print repr(d[-1]) - #raise ValueError('XXX to check') - return out, of - - def sete(self, v): - c = "".join([str(x) for x in v])+"\x00"*(4*8) #DelayDesc_e - return c - - - def __len__(self): - l = (len(self.delaydesc)+1)*(4*8) #DelayDesc_e - for i, d in enumerate(self.delaydesc): - l+=len(d.dlldescname) - if d.originalfirstthunk and self.parent_head.rva2off(d.originalfirstthunk): - l+=(len(d.originalfirstthunks)+1)*self.parent_head._wsize/8 #Rva size - if d.firstthunk: - l+=(len(d.firstthunks)+1)*self.parent_head._wsize/8 #Rva size - if d.originalfirstthunk and self.parent_head.rva2off(d.originalfirstthunk): - tmp_thunk = d.originalfirstthunks - """ - elif d.firstthunk: - tmp_thunk = d.firstthunks - else: - raise "no thunk!!" - """ - for i, imp in enumerate(d.impbynames): - if isinstance(imp, ImportByName): - l+=len(imp) - return l - - def set_rva(self, rva, size = None): - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].rva = rva - if not size: - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].size= len(self) - else: - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].size= size - rva+=(len(self.delaydesc)+1)*(4*8) #DelayDesc_e - parent = self.parent_head - for i, d in enumerate(self.delaydesc): - isfromva = (d.attrs & 1) == 0 - if isfromva: - isfromva = lambda x:self.parent_head.rva2virt(x) - else: - isfromva = lambda x:x - - d.name = isfromva(rva) - rva+=len(d.dlldescname) - if d.originalfirstthunk:# and self.parent_head.rva2off(d.originalfirstthunk): - d.originalfirstthunk = isfromva(rva) - rva+=(len(d.originalfirstthunks)+1)*self.parent_head._wsize/8 # rva size - #XXX rva fthunk not patched => fun addr - #if d.firstthunk: - # d.firstthunk = rva - # rva+=(len(d.firstthunks)+1)*pe.Rva._size - if d.originalfirstthunk and self.parent_head.rva2off(d.originalfirstthunk): - tmp_thunk = d.originalfirstthunks - elif d.firstthunk: - tmp_thunk = d.firstthunks - else: - raise "no thunk!!" - for i, imp in enumerate(d.impbynames): - if isinstance(imp, ImportByName): - tmp_thunk[i].rva = isfromva(rva) - rva+=len(imp) - - def build_content(self, c): - dirdelay = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT] - of1 = dirdelay.rva - if not of1: # No Delay Import - return - c[self.parent_head.rva2off(of1)] = str(self) - for i, d in enumerate(self.delaydesc): - c[self.parent_head.rva2off(d.name)] = str(d.dlldescname) - if d.originalfirstthunk and self.parent_head.rva2off(d.originalfirstthunk): - c[self.parent_head.rva2off(d.originalfirstthunk)] = str(d.originalfirstthunks) - if d.firstthunk: - c[self.parent_head.rva2off(d.firstthunk)] = str(d.firstthunks) - if d.originalfirstthunk and self.parent_head.rva2off(d.originalfirstthunk): - tmp_thunk = d.originalfirstthunks - elif d.firstthunk: - tmp_thunk = d.firstthunks - else: - raise "no thunk!!" - for j, imp in enumerate(d.impbynames): - if isinstance(imp, ImportByName): - c[self.parent_head.rva2off(tmp_thunk[j].rva)] = str(imp) - def __repr__(self): - rep = ["<%s>"%self.__class__.__name__] - for i,s in enumerate(self.delaydesc): - l = "%2d %-25s %s"%(i, repr(s.dlldescname) ,repr(s)) - rep.append(l) - for ii, f in enumerate(s.impbynames): - l = " %2d %-16s"%(ii, repr(f)) - rep.append(l) - return "\n".join(rep) - def add_dlldesc(self, new_dll): - if self.parent_head._wsize == 32: - mask_ptr = 0x80000000 - elif self.parent_head._wsize == 64: - mask_ptr = 0x8000000000000000L - new_impdesc = [] - of1 = None - for nd, fcts in new_dll: - for x in ["attrs","name","hmod","firstthunk","originalfirstthunk","boundiat","unloadiat","timestamp"]: - if not x in nd: - nd[x] = 0 - d = DelayDesc_e(self.parent_head,**nd) - #d.cstr.__dict__.update(nd) - if d.firstthunk!=None: - of1 = d.firstthunk - elif of1 == None: - raise "set fthunk" - else: - d.firstthunk = of1 - d.dlldescname = DescName(self.parent_head, name = d.name) - d.originalfirstthunk = True - d.originalfirstthunks = struct_array(self, None, - None, - Rva) - d.firstthunks = struct_array(self, None, - None, - Rva) - - impbynames = [] - for nf in fcts: - f = Rva(self.parent_head) - if type(nf) in [int, long]: - f.rva = mask_ptr+nf - ibn = None - elif type(nf) in [str]: - f.rva = True - ibn = ImportByName(self.parent_head) - ibn.name = nf - ibn.hint = 0 - else: - raise 'unknown func type %s'%str(nf) - impbynames.append(ibn) - d.originalfirstthunks.append(f) - ff = Rva(self.parent_head) - if ibn != None: - ff.rva = 0xDEADBEEF #default func addr - else: - #ord ?XXX? - ff.rva = f.rva - d.firstthunks.append(ff) - of1+=4 - #for null thunk - of1+=4 - d.impbynames = impbynames - new_delaydesc.append(d) - if not self.delaydesc: - self.delaydesc = struct_array(self, None, - None, - Delaydesc) - self.delaydesc.l = new_delaydesc - else: - for d in new_delaydesc: - self.delaydesc.append(d) - - def get_funcrva(self, f): - for i, d in enumerate(self.delaydesc): - isfromva = (d.attrs & 1) == 0 - if isfromva: - isfromva = lambda x:self.parent_head.virt2rva(x) - else: - isfromva = lambda x:x - if d.originalfirstthunk and self.parent_head.rva2off(isfromva(d.originalfirstthunk)): - tmp_thunk = d.originalfirstthunks - elif d.firstthunk: - tmp_thunk = d.firstthunks - else: - raise "no thunk!!" - if type(f) is str: - for j, imp in enumerate(d.impbynames): - if isinstance(imp, ImportByName): - if f == imp.name: - return isfromva(d.firstthunk)+j*4 - elif type(f) in (int, long): - for j, imp in enumerate(d.impbynames): - if not isinstance(imp, ImportByName): - if isfromva(tmp_thunk[j].rva&0x7FFFFFFF) == f: - return isfromva(d.firstthunk)+j*4 - else: - raise ValueError('unknown func tpye %s'%str(f)) - def get_funcvirt(self, f): - rva = self.get_funcrva(f) - if rva==None: - return - return self.parent_head.rva2virt(rva) +class Relocation(CStruct): + _fields = [ ("word","u16") ] + type = property(lambda _:_.word>>12) + offset = property(lambda _:_.word&0xfff) + rel = property(lambda _:(_.type,_.offset)) + def __repr__(self): + return "<%s=%s/%s>" % (self.__class__.__name__, + self.type, self.offset) +class RelocationTable(CArray): + _cls = Relocation + count = lambda _: (_.parent.size-8)//2 -class Rel(CStruct): +class RelocationBlock(CStruct): _fields = [ ("rva","u32"), - ("size","u32") - ] - -class Reloc(CStruct): - _fields = [ ("rel",(lambda c, s, of:c.gete(s, of), - lambda c, value:c.sete(value))) ] - def gete(self, s, of): - rel = struct.unpack('H', s[of:of+2])[0] - return (rel>>12, rel&0xfff), of+2 - def sete(self, value): - return struct.pack('H', (value[0]<<12) | value[1]) + ("size","u32"), # Should be at least 8 + ("rels", RelocationTable) ] + # TODO: don't parse 'rels' if it goes beyond the end of the directory def __repr__(self): - return '<%d %d>'%(self.rel[0], self.rel[1]) - -class DirReloc(CStruct): - _fields = [ ("reldesc", (lambda c, s, of:c.gete(s, of), - lambda c, value:c.sete(value)))] - - def gete(self, s, of): - if not of: - return [], of - - of1 = self.parent_head.rva2off(of) - ofend = of1+self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC].size - out = [] - while of1 < ofend: - reldesc, l = Rel.unpack_l(s, - of1, - self.parent_head) - if reldesc.size == 0: - log.warn('warning null reldesc') - reldesc.size = l - break - of2 = of1 + l - reldesc.rels = struct_array(self, s, - of2, - Reloc, - (reldesc.size-l)/2) # / Reloc size - reldesc.patchrel = False - out.append(reldesc) - of1+=reldesc.size - return out, of - - def sete(self, v): - rep = [] - for n in v: - rep.append(str(n)) - rep.append(str(n.rels)) - return "".join(rep) - - def set_rva(self, rva, size = None): - if not self.reldesc: - return - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC].rva = rva - if not size: - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC].size= len(self) - else: - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC].size= size + return '<%s RVA=%#x size=%d [table of length %d]>' % ( + self.__class__.__name__, + self.rva, self.size, len(self.rels)) + +class DirReloc(CArrayDirectory): + _cls = RelocationBlock + _idx = DIRECTORY_ENTRY_BASERELOC + def count(self): + if self._idx >= len(self.parent.NThdr.optentries): + return -1 + # We don't know how many relocation block will be parsed, we stop + # when reaching the end of the directory + if self.bytelen < self.parent.NThdr.optentries[self._idx].size: + return len(self)+1 + return -1 + def display(self): + res = '<%s>' % self.__class__.__name__ + for b in self: + res += '\n %r' % b + # Don't display the relocation table... too long + return res + def add_reloc(self, rels, rtype = 3, patchrel = True): + TODO + def del_reloc(self, taboffset): + TODO + # For API compatibility with previous versions of elfesteem + reldesc = property(lambda _:_) - def build_content(self, c): - dirrel = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC] - dirrel.size = len(self) - of1 = dirrel.rva - if not self.reldesc: # No Reloc - return - c[self.parent_head.rva2off(of1)] = str(self) - def __len__(self): - if not self.reldesc: - return 0 - l = 0 - for n in self.reldesc: - l+=n.size - return l - def __str__(self): - rep = [] - for n in self.reldesc: - rep.append(str(n)) - rep.append(str(n.rels)) - return "".join(rep) +class UStringData(CBase): + def _initialize(self): + self._size = 2*self.parent.length + def unpack(self, c, o): + self.value = c[o:o+self.bytelen] +class UString(CStruct): + _fields = [ ("length", "u16"), + ("value",UStringData) ] + def __str__(self): + return self.value.value.decode('utf16') +class ResourceDataDescription(CStruct): + _fields = [ ("rva", "u32"), + ("size","u32"), + ("codepage","u32"), + ("zero","u32") ] + def unpack(self, c, o): + CStruct.unpack(self, c, o) + # Follow the RVA + of=self.parent.rva2off(self.rva) + if of is None: + log.error("Invalid ResourceDataDescription with RVA %#x", self.rva) + raise ValueError + else: + self.data = c[of:of+self.size] def __repr__(self): - rep = ["<%s>"%self.__class__.__name__] - if not self.reldesc: - return "\n".join(rep) - for i, n in enumerate(self.reldesc): - l = "%2d %s"%(i, repr(n) ) - rep.append(l) - """ - #display too many lines... - for ii, m in enumerate(n.rels): - l = "\t%2d %s"%(ii, repr(m) ) - rep.append(l) - """ - l = "\t%2d rels..."%(len(n.rels)) - rep.append(l) - return "\n".join(rep) - - def add_reloc(self, rels, rtype = 3, patchrel = True): - dirrel = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC] - if not rels: - return - rels.sort() - all_base_ad = set([x & 0xFFFFF000 for x in rels]) - all_base_ad = list(all_base_ad) - all_base_ad.sort() - rels_by_base = defaultdict(list) - while rels: - r = rels.pop() - if r >= all_base_ad[-1]: - rels_by_base[all_base_ad[-1]].append(r) - else: - all_base_ad.pop() - rels_by_base[all_base_ad[-1]].append(r) - rels_by_base = [x for x in rels_by_base.items()] - rels_by_base.sort() - for o_init, rels in rels_by_base: - #o_init = rels[0]&0xFFFFF000 - offsets = struct_array(self, None, None, Reloc, 0) - for o in rels: - if (o&0xFFFFF000) !=o_init: - raise "relocs must be in same range" - r = Reloc(self.parent_head) - r.rel = (rtype, o-o_init) - offsets.append(r) - while len(offsets) &3: - r = Reloc(self.parent_head) - r.rel = (0, 0) - offsets.append(r) - reldesc = Rel(self.parent_head)#Reloc(self.parent_head) - reldesc.rva = o_init - reldesc.size = (len(offsets)*2+8) - reldesc.rels = offsets - reldesc.patchrel = patchrel - if not self.reldesc: - self.reldesc = [] - self.reldesc.append(reldesc) - dirrel.size+=reldesc.size - - def del_reloc(self, taboffset): - if not self.reldesc: - return - for rel in self.reldesc: - of1 = rel.rva - i = 0 - while i < len(rel.rels): - r = rel.rels[i] - if r.rel[0] != 0 and r.rel[1]+of1 in taboffset: - print 'del reloc', hex(r.rel[1]+of1) - del rel.rels[i] - rel.size-=Reloc._size - else: - i+=1 - - -class DirRes(CStruct): - _fields = [ ("resdesc", (lambda c, s, of:c.gete(s, of), - lambda c, value:c.sete(value)))] - - def gete(self, s, of): - if not of: - return [], of - of1 = self.parent_head.rva2off(of) - if of1 == None: - log.warning('cannot parse resources') - return [], of - - resdesc, l = ResDesc_e.unpack_l(s, - of1, - self.parent_head) - - nbr = resdesc.numberofnamedentries + resdesc.numberofidentries - if 1:#try: - resdesc.resentries = struct_array(self, s, - of1 + l, - ResEntry, - nbr) - if 0:#except: - log.warning('cannot parse resources') - resdesc.resentries = struct_array(self, s, - of1 + l, - ResEntry, - 0) - dir_todo = {of1:resdesc} - dir_done = {} - xx = 0 - cpt = 0 - while dir_todo: - of1, my_dir = dir_todo.popitem() - dir_done[of1] = my_dir - for e in my_dir.resentries: - of1 = e.offsettosubdir - if not of1: - #data dir - of1 = e.offsettodata - data = ResDataEntry.unpack(s, - self.parent_head.rva2off(of1), - self.parent_head) - of1 = data.offsettodata - offile = self.parent_head.rva2off(of1) - data.s = StrPatchwork(s[offile:offile + data.size]) - e.data = data - continue - #subdir - if of1 in dir_done: - log.warn('warning recusif subdir') - fdds - continue - subdir, l = ResDesc_e.unpack_l(s, - self.parent_head.rva2off(of1), - self.parent_head) - nbr = subdir.numberofnamedentries + subdir.numberofidentries - subdir.resentries = struct_array(self, s, - self.parent_head.rva2off(of1 + l), - ResEntry, - nbr) - - e.subdir = subdir - dir_todo[of1] = e.subdir - return resdesc, of - - def build_content(self, c): - if not self.resdesc: - return - of1 = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva - c[self.parent_head.rva2off(of1)] = str(self.resdesc) - dir_todo = {self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva:self.resdesc} - dir_done = {} - while dir_todo: - of1, my_dir = dir_todo.popitem() - dir_done[of1] = my_dir - c[self.parent_head.rva2off(of1)] = str(my_dir) - c[self.parent_head.rva2off(of1+len(my_dir))] = str(my_dir.resentries) - for e in my_dir.resentries: - if e.name_s: - c[self.parent_head.rva2off(e.name)] = str(e.name_s) - of1 = e.offsettosubdir - if not of1: - c[self.parent_head.rva2off(e.offsettodata)] = str(e.data) - c[self.parent_head.rva2off(e.data.offsettodata)] = str(e.data.s) - continue - dir_todo[of1] = e.subdir - - - def __len__(self): - l = 0 - if not self.resdesc: - return l - dir_todo = [self.resdesc] - dir_done = [] - while dir_todo: - my_dir = dir_todo.pop() - if not my_dir in dir_done: - dir_done.append(my_dir) - else: - raise 'recursif dir' - l+=len(my_dir) - l+=len(my_dir.resentries)*8 # ResEntry size - for e in my_dir.resentries: - if not e.offsettosubdir: - continue - if not e.subdir in dir_todo: - dir_todo.append(e.subdir) - else: - raise "recursive dir" - fds - continue - - dir_todo = dir_done - while dir_todo: - my_dir = dir_todo.pop() - for e in my_dir.resentries: - if e.name_s: - l+=len(e.name_s) - of1 = e.offsettosubdir - if not of1: - l+=4*4 # WResDataEntry size - #XXX because rva may be even rounded - l+=1 - l+=e.data.size - continue - return l - - def set_rva(self, rva, size = None): - if not self.resdesc: - return - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva = rva - if not size: - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].size = len(self) + return '<%s RVA=%#x size=%d codepage=%d zero=%d>' % ( + self.__class__.__name__, + self.rva, self.size, self.codepage, self.zero) + +class ResourceDirectoryEntry(CStruct): + _fields = [ ("id","u32"), + ("offset","u32") ] + base = property(lambda _:_.parent.base) + def rva2off(self, rva): + return self.parent.parent.parent.rva2off(rva) + def unpack(self, c, o): + CStruct.unpack(self, c, o) + # Two types of entries: Named & Id + # The self.parent.parent.numberofnamedentries first ones are Named + # and the MSB of their name is 1 + pos = len(self.parent._array) + if (pos < self.parent.parent.numberofnamedentries) \ + and (self.id & 0x80000000 == 0): + log.error("Named resource entries should be the first ones") + if (pos >= self.parent.parent.numberofnamedentries) \ + and (self.id & 0x80000000 != 0): + log.error("Id resource entries should be the last ones") + if self.id & 0x80000000: + self.name = UString(parent=self, content=c, + start=self.base + (self.id & 0x7FFFFFFF)) + if self.depth >= 10: + # In Windows PE, should never be more than 2. + # An example of file with an infinite depth is Ange Albertini's + # resourceloop.exe + log.warning('Resource tree too deep') + elif self.offset & 0x80000000: + self.dir = ResourceDescriptor(parent=self, content=c, + start=self.base + (self.offset & 0x7FFFFFFF)) else: - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].size = size - dir_todo = [self.resdesc] - dir_done = {} - while dir_todo: - my_dir = dir_todo.pop() - dir_done[rva] = my_dir - rva+=len(my_dir) - rva+=len(my_dir.resentries)*8 # ResEntry size - for e in my_dir.resentries: - if not e.offsettosubdir: + self.data = ResourceDataDescription(parent=self, content=c, + start=self.base + (self.offset & 0x7FFFFFFF)) + def depth(self): + p = self.parent.parent.parent + if isinstance(p, DirRes): return 0 + else: return p.depth+1 + depth = property(depth) + def show_tree(self): + if self.depth >= 10: + return [ (0, None) ] + def choose(val, true, false): + if val & 0x80000000: return true + else: return false + s = ( + self.parent._array.index(self), + choose(self.id, getattr(self, 'name', None), self.id), + choose(self.offset, None, getattr(self, 'data', None)), + ) + tree = [ (0, s) ] + if self.offset & 0x80000000: + tree += [ (d+1,s) for d, s in self.dir.show_tree() ] + return tree + +class ResourceDirectoryEntries(CArray): + _cls = ResourceDirectoryEntry + def count(self): + return self.parent.numberofnamedentries + self.parent.numberofidentries + base = property(lambda _:_.parent.base) + +class ResourceDescriptor(CStruct): + _fields = [ ("characteristics","u32"), # Unused and always 0 + ("timestamp","u32"), + ("majorv","u16"), # Unused and always 0 + ("minorv","u16"), # Unused and always 0 + ("numberofnamedentries","u16"), + ("numberofidentries","u16"), + ("entries",ResourceDirectoryEntries) ] + base = property(lambda _:_.parent.base) + def show_tree(self): + tree = [] + for e in self.entries: + tree.extend(e.show_tree()) + return tree + +class DirRes(CArrayDirectory): + _cls = ResourceDescriptor + _idx = DIRECTORY_ENTRY_RESOURCE + count = lambda _: 1 + base = property(lambda _: + _.parent.rva2off(_.parent.NThdr.optentries[_._idx].rva)) + def rva2off(self, rva): + return self.parent.rva2off(rva) + def is_depth_3_tree(self): + if len(self) == 0: return False + for d, (x, y, z) in self[0].show_tree(): + if d < 2 and z is not None: return False + if d == 2 and z is None: return False + if d > 2: return False + return True + def display(self): + res = '<%s>' % self.__class__.__name__ + if len(self) == 0: + return res + if self.is_depth_3_tree(): + # Windows-specific display, tree with all branches of depth 3 + assert self[0].characteristics == 0 + # MajorV is 0 for NTDLL-MIPS.DLL NTDLL-ALPHA.DLL notepad.exe + # regedit-2.exe + # 4 for A3DUtils.dll AdobeXMP.dll regedit-1.exe + # https://msdn.microsoft.com/en-us/library/ms809762.aspx + # says it is always 0 + assert self[0].majorv in (0, 4) + assert self[0].minorv == 0 + res += '\n Index Type Name Lang' + pos = [None, None, None] + val = [None, None, None] + for d, (x, y, z) in self[0].show_tree(): + pos[d] = x + val[d] = y + if d < 2: + assert z is None continue - if not e.subdir in dir_todo: - dir_todo.append(e.subdir) + assert d == 2 + res += '\n %2d %2d %2d %8s %8s %4s %r' % tuple(pos+val+[z]) + else: + # Generic display + for d, s in self[0].show_tree(): + if s is None: + res += '\n' + (1+d)*' ' + str(s) else: - raise "recursive dir" - fds - continue - dir_todo = dir_done - dir_inv = dict(map(lambda x:(x[1], x[0]), dir_todo.items())) - while dir_todo: - rva_tmp, my_dir = dir_todo.popitem() - for e in my_dir.resentries: - if e.name_s: - e.name = rva - rva+=len(e.name_s) - of1 = e.offsettosubdir - if not of1: - e.offsettodata = rva - rva+=4*4 # ResDataEntry size - #XXX menu rsrc must be even aligned? - if rva%2:rva+=1 - e.data.offsettodata = rva - rva+=e.data.size - continue - e.offsettosubdir = dir_inv[e.subdir] - + res += '\n' + (1+d)*' ' + '%d %s %r' % s + return res + + + + + +class AuxSymbolFunc(CStruct): + _fields = [ ("tagIndex","u32"), + ("totalSize","u32"), + ("pointerToLineNum","u32"), + ("pointerToNextFunc","u32"), + ("padding","u16")] + +class AuxSymbolSect(CStruct): + _fields = [ ("length","u32"), + ("numberOfRelocations","u16"), + ("numberOfLinenumbers","u16"), + ("checksum","u32"), + ("number","u16"), + ("selection","u08"), + ("padding1","u08"), + ("padding2","u08"), + ("padding3","u08")] + +class AuxSymbolFile(CStruct): + _fields = [ ("name_data","18s") ] + def name(self): + # Offset in the string table, if more than 18 bytes long + n = self.name_data + if n[:4] == data_null*4 and n != data_null*18: + n, = struct.unpack("I", n[4:8]) + n = self.parent.parent.parent.parent.SymbolStrings.getby_offset(n) + else: + n = n.rstrip(data_null) + return bytes_to_name(n) + name = property(name) def __repr__(self): - rep = ["<%s>"%(self.__class__.__name__ )] - if not self.resdesc: - return "\n".join(rep) - dir_todo = [self.resdesc] - out = [] - index = -1 - while dir_todo: - a = dir_todo.pop(0) - if isinstance(a, int): - index+=a - elif isinstance(a, ResDesc_e): - #out.append((index, repr(a))) - dir_todo=[1]+a.resentries.l+[-1]+dir_todo - elif isinstance(a, ResEntry): - if a.offsettosubdir: - out.append((index, repr(a))) - dir_todo = [a.subdir]+dir_todo - else: - out.append((index, repr(a))) - else: - raise "zarb" - for i, c in out: - rep.append(' '*4*i+c) - return "\n".join(rep) - -class Ordinal(CStruct): - _fields = [ ("ordinal","u16"), - ] - - - -class ResDesc_e(CStruct): - _fields = [ ("characteristics","u32"), - ("timestamp","u32"), - ("majorv","u16"), - ("minorv","u16"), - ("numberofnamedentries","u16"), - ("numberofidentries","u16") - ] - -class SUnicode(CStruct): - _fields = [ ("length", "u16"), - ("value", (lambda c, s, of:c.gets(s, of), - lambda c, value:c.sets(value))) - ] - def gets(self, s, of): - v = s[of:of+self.length*2] - return v, of+self.length - def sets(self, value): - return self.value - -class ResEntry(CStruct): - _fields = [ ("name",(lambda c, s, of:c.getn(s, of), - lambda c, value:c.setn(value))), - ("offsettodata",(lambda c, s, of:c.geto(s, of), - lambda c, value:c.seto(value))) - ] - - def getn(self, s, of): - self.data = None - #of = self.parent_head.rva2off(of) - name = struct.unpack('I', s[of:of+4])[0] - self.name_s = None - if name & 0x80000000: - name = (name & 0x7FFFFFFF) + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva # XXX res rva?? - name &= 0x7FFFFFFF - self.name_s = SUnicode.unpack(s, - self.parent_head.rva2off(name), - self.parent_head) - return name, of+4 - - def setn(self, v): - name = v - if self.name_s: - name=(self.name-self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva)+0x80000000L - return struct.pack('I', name) - - def geto(self, s, of): - self.offsettosubdir = None - offsettodata_o = struct.unpack('I', s[of:of+4])[0] - offsettodata = (offsettodata_o & 0x7FFFFFFF) + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva #XXX res rva?? - if offsettodata_o & 0x80000000: - self.offsettosubdir = offsettodata - return offsettodata, of+4 - def seto(self, v): - offsettodata = v - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva - if self.offsettosubdir: - offsettodata=(self.offsettosubdir-self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva)+0x80000000 - return struct.pack('I', offsettodata) - + return "<%s=%r>" % (self.__class__.__name__, self.name) + +class AuxSymbolDummy(CStruct): + _fields = [ ("data","18s") ] + +class AuxSymbols(CArray): + def _cls(self): + if self.parent.storageclass == IMAGE_SYM_CLASS_EXTERNAL: + return AuxSymbolFunc + elif self.parent.storageclass == IMAGE_SYM_CLASS_STATIC: + return AuxSymbolSect + elif self.parent.storageclass == IMAGE_SYM_CLASS_FILE: + return AuxSymbolFile + else: + return AuxSymbolDummy + _cls = property(_cls) + count = lambda _: _.parent.numberofauxsymbols def __repr__(self): - if self.name_s: - nameid = "%s"%repr(self.name_s) + return str([_ for _ in self]) + +class CoffSymbol(CStruct): + _fields = [ ("name_data","8s"), + ("value","u32"), + ("sectionnumber","u16"), + ("type","u16"), + ("storageclass","u08"), + ("numberofauxsymbols","u08"), + ("aux",AuxSymbols) ] + def name(self): + # Offset in the string table, if more than 8 bytes long + n = self.name_data + if n[:4] == data_null*4 and n != data_null*8: + n, = struct.unpack("I", n[4:]) + n = self.parent.parent.SymbolStrings.getby_offset(n) else: - if self.name in RT:# and not self.offsettosubdir: - nameid = "ID %s"%RT[self.name] - else: - nameid = "ID %d"%self.name - if self.offsettosubdir: - offsettodata = "subdir: %x"%self.offsettosubdir + n = n.rstrip(data_null) + n = bytes_to_name(n) + n, _ = symbol_demangle(n) + return n + name = property(name) + def section(self): + SHList = self.parent.parent.SHList + if 0 < self.sectionnumber < 1+len(SHList): + return SHList[self.sectionnumber-1].name else: - offsettodata = "data: %x"%self.offsettodata - return "<%s %s>"%(nameid, offsettodata) - - - -class ResDataEntry(CStruct): - _fields = [ ("offsettodata","u32"), - ("size","u32"), - ("codepage","u32"), - ("reserved","u32"), + return '%#x' % self.sectionnumber + section = property(section) + def type_str(self): + base_type = self.type & 0xf + cplx_type = self.type >> 4 + if base_type != 0: + return constants['IMAGE_SYM_TYPE'][base_type] + elif cplx_type in constants['IMAGE_SYM_DTYPE']: + return constants['IMAGE_SYM_DTYPE'][cplx_type] + else: + return '%#x' % cplx_type + type_str = property(type_str) + def storage(self): + if self.storageclass in constants['IMAGE_SYM_CLASS']: + return constants['IMAGE_SYM_CLASS'][self.storageclass] + else: + return '%#x' % self.storageclass + storage = property(storage) + def __repr__(self): + return "" % (self.name, self.value, self.section, self.type_str, self.storage, self.aux) + def __str__(self): + return '%-36r %-8s %-9s %#010x %s' % (self.name, self.type_str, self.storage, self.value, self.section) + +class CoffSymbols(CArray): + _cls = CoffSymbol + def count(self): + # Note that numberofsymbols also count AuxSymbols, while the + # length of this array does not. We need to keep track of the + # number of AuxSymbols up to now + if not hasattr(self, 'numberofaux'): self.numberofaux = 0 + if len(self._array): self.numberofaux += len(self[-1].aux) + return self.parent.COFFhdr.numberofsymbols - self.numberofaux + def unpack(self, c, o): + if o is None: + o = self.parent.COFFhdr.pointertosymboltable + CArray.unpack(self, c, o) + def getbyindex(self, n): + # An aux symbol counts, too + for s in self._array: + if n == 0: return s + n -= 1 + len(s.aux) + return None + def display(self): + res = '<%s>' % self.__class__.__name__ + for s in self.symbols: + res += '\n name=%r' % s.name + res += '\n type=%-8s storage=%-9s value=%#010x section=%s' % (s.type_str, s.storage, s.value, s.section) + return res + + # For API compatibility with previous versions of elfesteem + symbols = property(lambda _: _._array) + +class CoffOSF1Symbols(CStruct): + _fields = [ ("magic", "u16"), # 0x1992 + ("vstamp", "u16"), # 0x030b for version 3.13 + ("ilineMax", "u32"), + ("idnMax", "u32"), + ("ipdMax", "u32"), + ("isymMax", "u32"), + ("ioptMax", "u32"), + ("iauxMax", "u32"), + ("issMax", "u32"), + ("issExtMax", "u32"), + ("ifdMax", "u32"), + ("crfd", "u32"), + ("iextMax", "u32"), + ("cbLine", "u64"), + ("cbLineOffset", "u64"), + ("cbDnOffset", "u64"), + ("cbPdOffset", "u64"), + ("cbSymOffset", "u64"), + ("cbOptOffset", "u64"), + ("cbAuxOffset", "u64"), + ("cbSsOffset", "u64"), + ("cbSsExtOffset", "u64"), + ("cbFdOffset", "u64"), + ("cbRfdOffset", "u64"), + ("cbExtOffset", "u64"), ] - - -class Symb(CStruct): - _fields = [ ("name","8s"), - ("res1","u32"), - ("res2","u32"), - ("res3","u16")] - - -DIRECTORY_ENTRY_EXPORT = 0 -DIRECTORY_ENTRY_IMPORT = 1 -DIRECTORY_ENTRY_RESOURCE = 2 -DIRECTORY_ENTRY_EXCEPTION = 3 -DIRECTORY_ENTRY_SECURITY = 4 -DIRECTORY_ENTRY_BASERELOC = 5 -DIRECTORY_ENTRY_DEBUG = 6 -DIRECTORY_ENTRY_COPYRIGHT = 7 -DIRECTORY_ENTRY_GLOBALPTR = 8 -DIRECTORY_ENTRY_TLS = 9 -DIRECTORY_ENTRY_LOAD_CONFIG = 10 -DIRECTORY_ENTRY_BOUND_IMPORT = 11 -DIRECTORY_ENTRY_IAT = 12 -DIRECTORY_ENTRY_DELAY_IMPORT = 13 -DIRECTORY_ENTRY_COM_DESCRIPTOR = 14 -DIRECTORY_ENTRY_RESERVED = 15 - - -RT_CURSOR = 1 -RT_BITMAP = 2 -RT_ICON = 3 -RT_MENU = 4 -RT_DIALOG = 5 -RT_STRING = 6 -RT_FONTDIR = 7 -RT_FONT = 8 -RT_ACCELERATOR = 9 -RT_RCDATA = 10 -RT_MESSAGETABLE = 11 -RT_GROUP_CURSOR = 12 -RT_GROUP_ICON = 14 -RT_VERSION = 16 -RT_DLGINCLUDE = 17 -RT_PLUGPLAY = 19 -RT_VXD = 20 -RT_ANICURSOR = 21 -RT_ANIICON = 22 -RT_HTML = 23 -RT_MANIFEST = 24 - - -RT = { - RT_CURSOR :"RT_CURSOR", - RT_BITMAP :"RT_BITMAP", - RT_ICON :"RT_ICON", - RT_MENU :"RT_MENU", - RT_DIALOG :"RT_DIALOG", - RT_STRING :"RT_STRING", - RT_FONTDIR :"RT_FONTDIR", - RT_FONT :"RT_FONT", - RT_ACCELERATOR :"RT_ACCELERATOR", - RT_RCDATA :"RT_RCDATA", - RT_MESSAGETABLE :"RT_MESSAGETABLE", - RT_GROUP_CURSOR :"RT_GROUP_CURSOR", - RT_GROUP_ICON :"RT_GROUP_ICON", - RT_VERSION :"RT_VERSION", - RT_DLGINCLUDE :"RT_DLGINCLUDE", - RT_PLUGPLAY :"RT_PLUGPLAY", - RT_VXD :"RT_VXD", - RT_ANICURSOR :"RT_ANICURSOR", - RT_ANIICON :"RT_ANIICON", - RT_HTML :"RT_HTML", - RT_MANIFEST :"RT_MANIFEST", - } - - - -if __name__ == "__main__": - import sys - PEFILE = sys.stdin - if len(sys.argv) > 1: - PEFILE = open(sys.argv[1]) - dhdr = Doshdr._from_file(PEFILE) - print repr(dhdr) - print "sigMZ:", hex(dhdr.magic),hex(len(dhdr)) - - PEFILE.seek(dhdr.lfanew) - nthdr = NThdr._from_file(PEFILE) - print repr(nthdr) - print "sigPE:", hex(nthdr.signature),hex(len(nthdr)) - - PEFILE.seek(dhdr.lfanew+len(nthdr)) - opthdr = Opthdr._from_file(PEFILE) - print repr(opthdr) - print "sigHDR:",hex(opthdr.magic),hex(len(opthdr)) - - PEFILE.seek(dhdr.lfanew+len(nthdr)+len(opthdr)) - for i in xrange(opthdr.numberofrvaandsizes): - optehdr = Optehdr._from_file(PEFILE) - print repr(optehdr) - - - - print hex(dhdr.lfanew+len(nthdr)+nthdr.sizeofoptionalheader) - PEFILE.seek(dhdr.lfanew+len(nthdr)+nthdr.sizeofoptionalheader) - for i in xrange(nthdr.numberofsections): - #PEFILE.seek(dhdr.lfanew+len(nthdr)) - shdr = Shdr._from_file(PEFILE) - print repr(shdr) - print "name:",shdr.name,hex(len(shdr)) + # TODO: parse the various Symbol Tables + def __repr__(self): + s = '<%s\n' % self.__class__.__name__ + s += ' magic=%#x' % self.magic + s += ' version %d.%d\n' % (self.vstamp>>8, self.vstamp&0xff) + s += ' line %#010x %d lines %d bytes\n' % (self.cbLineOffset, self.ilineMax, self.cbLine) + s += ' dn %#010x %d (obsolete)\n'% (self.cbDnOffset, self.idnMax) + s += ' pd %#010x %d entries\n' % (self.cbPdOffset, self.ipdMax) + s += ' sym %#010x %d entries\n' % (self.cbSymOffset, self.isymMax) + s += ' opt %#010x %d bytes\n' % (self.cbOptOffset, self.ioptMax) + s += ' aux %#010x %d entries\n' % (self.cbAuxOffset, self.iauxMax) + s += ' ss %#010x %d bytes\n' % (self.cbSsOffset, self.issMax) + s += ' ssExt %#010x %d bytes\n' % (self.cbSsExtOffset, self.issExtMax) + s += ' fd %#010x %d entries\n' % (self.cbFdOffset, self.ifdMax) + s += ' rfd %#010x %d entries\n' % (self.cbRfdOffset, self.crfd) + s += ' ext %#010x %d entries\n' % (self.cbExtOffset, self.iextMax) + s += '>' + return s diff --git a/elfesteem/pe_init.py b/elfesteem/pe_init.py index 81442a2..fcf32a4 100644 --- a/elfesteem/pe_init.py +++ b/elfesteem/pe_init.py @@ -1,86 +1,75 @@ #! /usr/bin/env python import struct, array -import pe -from strpatchwork import StrPatchwork -import logging -from collections import defaultdict -log = logging.getLogger("peparse") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARN) - - - -class ContentManager(object): - def __get__(self, owner, x): - if hasattr(owner, '_content'): - return owner._content - def __set__(self, owner, new_content): - owner.resize(len(owner._content), len(new_content)) - owner._content=new_content - #owner.parse_content() - def __delete__(self, owner): - self.__set__(owner, None) - - -class drva: +from elfesteem import pe +from elfesteem.strpatchwork import StrPatchwork +log = pe.log + +import sys +if sys.version_info[0:2] == (2, 3): + from elfesteem.compatibility_python23 import sorted, reversed + mask32 = (eval("1L")<<32)-1 # 'eval' avoids SyntaxError with python3.x +else: + mask32 = eval("0xffffffff") # 'eval' avoids warnings with python2.3 + + +class ContentRVA(object): def __init__(self, x): self.parent = x def get_slice_raw(self, item): if not type(item) is slice: return None rva_items = self.get_rvaitem(item.start, item.stop, item.step) - if not rva_items: - return - data_out = "" + if rva_items is None: + return pe.data_null + data_out = pe.data_empty for s, n_item in rva_items: - if s: - data_out += s.data.__getitem__(n_item) + if s is not None: + data_out += s.section_data.__getitem__(n_item) else: data_out += self.parent.__getitem__(n_item) return data_out - def get_rvaitem(self, start, stop = None, step = None): - if not self.parent.SHList: - return [(None, start)] - if stop == None: - s = self.parent.getsectionbyrva(start) - if not s: + def get_rvaitem(self, start, stop = None, section = None): + if stop is None: + s = self.parent.getsectionbyrva(start, section) + if s is None: return [(None, start)] - start = start-s.addr + start = start-s.vaddr return [(s, start)] total_len = stop - start + s_min = self.parent.SHList[0].vaddr + if hasattr(self.parent, 'NThdr'): + s_min = min(s_min, self.parent.NThdr.sizeofheaders) rva_items = [] while total_len: # special case if look at pe hdr address - if 0 <= start < min(self.parent.SHList[0].addr, - self.parent.NThdr.sizeofheaders): + if 0 <= start < s_min: s_start = start s_stop = stop - s_max = min(self.parent.SHList[0].addr, - self.parent.NThdr.sizeofheaders) + s_max = s_min s = None else: - s = self.parent.getsectionbyrva(start) - if not s: - log.warn('unknown rva address! %x'%start) + s = self.parent.getsectionbyrva(start, section) + if s is None: + log.warning('unknown rva address! %x'%start) return [] - s_max = max(s.size, s.rawsize) - s_start = start - s.addr - s_stop = stop - s.addr - #print hex(s_stop), hex(s_start) + s_max = s.rawsize + if hasattr(self.parent, 'NThdr'): + # PE, not COFF + # paddr contains the virtual size + s_max = max(s.paddr, s_max) + s_start = start - s.vaddr + s_stop = stop - s.vaddr if s_stop >s_max: - #print 'yy' - #raise ValueError('lack data %d, %d'%(stop, s_max)) s_stop = s_max - #print hex(s_start), hex(s_stop) s_len = s_stop - s_start total_len -= s_len start += s_len - n_item = slice(s_start, s_stop, step) + n_item = slice(s_start, s_stop) rva_items.append((s, n_item)) + if s_len <= 0: + break return rva_items def __getitem__(self, item): @@ -89,22 +78,24 @@ def __setitem__(self, item, data): if not type(item) is slice: item = slice(item, item+len(data), None) rva_items = self.get_rvaitem(item.start, item.stop, item.step) - if not rva_items: + if rva_items is None: return off = 0 for s, n_item in rva_items: + if s is None: + log.warning('Cannot write at RVA %s', n_item) + continue i = slice(off, n_item.stop+off-n_item.start, n_item.step) data_slice = data.__getitem__(i) - s.data.__setitem__(n_item, data_slice) + s.section_data.__setitem__(n_item, data_slice) off = i.stop #XXX test patch content - file_off = self.parent.rva2off(s.addr+n_item.start) + file_off = self.parent.rva2off(s.vaddr+n_item.start) if self.parent.content: self.parent.content = self.parent.content[:file_off]+ data_slice + self.parent.content[file_off+len(data_slice):] - return #s.data.__setitem__(n_item, data) -class virt: +class ContentVirtual(object): def __init__(self, x): self.parent = x @@ -120,217 +111,307 @@ def item_virt2rva(self, item): def __getitem__(self, item): rva_item = self.item_virt2rva(item) return self.parent.drva.__getitem__(rva_item) + def get(self, start, end): + # Deprecated API + return self[start:end] def __setitem__(self, item, data): if not type(item) is slice: item = slice(item, item+len(data), None) rva_item = self.item_virt2rva(item) self.parent.drva.__setitem__(rva_item, data) + def set(self, addr, data): + # Deprecated API + self[addr] = data def __len__(self): - s = self.parent.SHList[-1] - l = s.addr+s.size+self.parent.NThdr.ImageBase - return int(l) + # __len__ should not be used: Python returns an int object, which + # will cap values to 0x7FFFFFFF on 32 bit systems. A binary can have + # a base address higher than this, resulting in the impossibility to + # handle such programs. + log.warning("__len__ deprecated") + return self.max_addr() + def max_addr(self): + l = 0 + for s in self.parent.SHList: + l = max(l, s.vaddr+s.size) + if hasattr(self.parent, 'NThdr'): + l += self.parent.NThdr.ImageBase + return int(l) - def find(self, pattern, offset = 0): - if offset != 0: - offset = self.parent.virt2rva(offset) + def find(self, pattern, start = 0, end = None): + if start != 0: + start = self.parent.virt2rva(start) + if end != None: + end = self.parent.virt2rva(end) sections = [] for s in self.parent.SHList: s_max = max(s.size, s.rawsize) - if offset < s.addr + s_max: + if s.vaddr+s_max <= start: + continue + if end is None or s.vaddr < end: sections.append(s) if not sections: return -1 - offset -= sections[0].addr - if offset < 0: - offset = 0 for s in sections: - ret = s.data.find(pattern, offset) - if ret != -1: - return self.parent.rva2virt(s.addr + ret) - offset = 0 + if s.vaddr < start: + off = start - s.vaddr + else: + off = 0 + ret = s.section_data.find(pattern, off) + if ret == -1: + continue + if end != None and s.vaddr + ret >= end: + return -1 + return self.parent.rva2virt(s.vaddr + ret) + return -1 + + def rfind(self, pattern, start = 0, end = None): + if start != 0: + start = self.parent.virt2rva(start) + if end != None: + end = self.parent.virt2rva(end) + + sections = [] + for s in self.parent.SHList: + s_max = max(s.size, s.rawsize) + if s.vaddr+s_max <= start: + continue + if end is None or s.vaddr < end: + sections.append(s) + if not sections: + return -1 + + for s in reversed(sections): + if s.vaddr < start: + off = start - s.vaddr + else: + off = 0 + if end is None: + ret = s.section_data.rfind(pattern, off) + else: + ret = s.data.rfind(pattern, off, end-s.vaddr) + if ret == -1: + continue + return self.parent.rva2virt(s.vaddr + ret) return -1 def is_addr_in(self, ad): return self.parent.is_in_virt_address(ad) - def __call__(self, ad_start, ad_stop = None, ad_step = None): + def __call__(self, ad_start, ad_stop = None, section = None): ad_start = self.parent.virt2rva(ad_start) if ad_stop != None: ad_stop = self.parent.virt2rva(ad_stop) - - rva_items = self.parent.drva.get_rvaitem(ad_start, ad_stop, ad_step) - data_out = "" + rva_items = self.parent.drva.get_rvaitem(ad_start, ad_stop, section) + data_out = pe.data_empty for s, n_item in rva_items: - if s: - data_out += s.data.__getitem__(n_item) - else: + if s is None: data_out += self.parent.__getitem__(n_item) - + else: + data_out += s.section_data.data.__getitem__(n_item) return data_out +class StrTable(object): + def __init__(self, c): + self.res = {} + self.names = {} + self.trail = pe.data_empty + self.len = 0 + while c: + p = c.find(pe.data_null) + if p < 0: + self.trail = c + break + self.res[self.len] = c[:p] + self.names[c[:p]] = self.len + self.len += p+1 + c = c[p+1:] + def __str__(self): + raise AttributeError("Use pack() instead of str()") + def pack(self): + res = pe.data_empty + k = sorted(self.res.keys()) + for s in k: + if len(res) != s: + raise ValueError("StrTable is incoherent : %r != %r"%(len(res),s)) + res += self.res[s] + pe.data_null + return res + self.trail + def add(self, name): + if name in self.names: + return self.names[name] + self.res[self.len] = name + self.names[name] = self.len + self.len += len(name)+1 + def rem(self, name): + TODO + def getby_name(self, name): + return self.names[name] + def getby_offset(self, of): + return self.res.get(of, "") + # PE object class PE(object): - content = ContentManager() - def __init__(self, pestr = None, loadfrommem=False): - self._drva = drva(self) - self._virt = virt(self) - if pestr == None: - self._content = StrPatchwork() - self._sex = 0 - self._wsize = 32 - self.Doshdr = pe.Doshdr(self) - self.NTsig = pe.NTsig(self) - self.Coffhdr = pe.Coffhdr(self) - - if self._wsize == 32: - Opthdr = pe.Opthdr32 - else: - Opthdr = pe.Opthdr64 - - self.Opthdr = Opthdr(self) - self.NThdr = pe.NThdr(self) - self.NThdr.optentries = [pe.Optehdr(self) for x in xrange(0x10)] - self.NThdr.CheckSum = 0 - self.SHList = pe.SHList(self) - self.SHList.shlist = [] - - self.DirImport = pe.DirImport(self) - self.DirExport = pe.DirExport(self) - self.DirDelay = pe.DirDelay(self) - self.DirReloc = pe.DirReloc(self) - self.DirRes = pe.DirRes(self) - - self.Doshdr.magic = 0x5a4d - self.Doshdr.lfanew = 0x200 - - - self.Opthdr.magic = 0x10b + # API shared by all/most binary containers + architecture = property(lambda _:pe.constants['IMAGE_FILE_MACHINE'].get(_.COFFhdr.machine,'UNKNOWN(%d)'%_.COFFhdr.machine)) + entrypoint = property(lambda _:_.rva2virt(_.Opthdr.AddressOfEntryPoint)) + sections = property(lambda _:_.SHList.shlist) + symbols = property(lambda _:getattr(_, 'Symbols', ())) + dynsyms = property(lambda _:()) + + Coffhdr = property(lambda self: self.COFFhdr) # Older API + Doshdr = property(lambda self: self.DOShdr) # Older API + def __init__(self, pestr = None, + parse_resources = True, + parse_delay = True, + parse_reloc = True, + wsize = 32): + self._rva = ContentRVA(self) + self._virt = ContentVirtual(self) + if pestr is None: + self.sex = '<' + self.wsize = wsize + self.DOShdr = pe.DOShdr(parent=self, wsize=32) + self.NTsig = pe.NTsig(parent=self, wsize=32) + self.COFFhdr = pe.COFFhdr(parent=self, wsize=32) + self.Opthdr = {32: pe.Opthdr32, 64: pe.Opthdr64}[wsize](parent=self) + self.NThdr = pe.NThdr(parent=self) + self.SHList = pe.SHList(parent=self, wsize=32) + + self.DirImport = pe.DirImport(parent=self) + self.DirExport = pe.DirExport(parent=self) + self.DirDelay = pe.DirDelay(parent=self) + self.DirReloc = pe.DirReloc(parent=self) + self.DirRes = pe.DirRes(parent=self) + + self.DOShdr.magic = 0x5a4d + self.DOShdr.lfanew = 0xe0 + + if wsize == 32: + self.COFFhdr.machine = pe.IMAGE_FILE_MACHINE_I386 + self.COFFhdr.characteristics = 0x10f + self.COFFhdr.sizeofoptionalheader = 0xe0 + self.Opthdr.magic = pe.IMAGE_NT_OPTIONAL_HDR32_MAGIC + elif wsize == 64: + self.COFFhdr.machine = pe.IMAGE_FILE_MACHINE_AMD64 + self.COFFhdr.characteristics = 0x22 + self.COFFhdr.sizeofoptionalheader = 0xf0 + self.Opthdr.magic = pe.IMAGE_NT_OPTIONAL_HDR64_MAGIC self.Opthdr.majorlinkerversion = 0x7 self.Opthdr.minorlinkerversion = 0x0 - self.NThdr.filealignment = 0x1000 + + self.NThdr.ImageBase = 0x400000 self.NThdr.sectionalignment = 0x1000 + self.NThdr.filealignment = 0x200 + self.NThdr.filealignment = 0x1000 # previous versions of elfesteem self.NThdr.majoroperatingsystemversion = 0x5 self.NThdr.minoroperatingsystemversion = 0x1 self.NThdr.MajorImageVersion = 0x5 self.NThdr.MinorImageVersion = 0x1 self.NThdr.majorsubsystemversion = 0x4 self.NThdr.minorsubsystemversion = 0x0 - self.NThdr.subsystem = 0x2 + self.NThdr.subsystem = 0x3 self.NThdr.dllcharacteristics = 0x8000 - - #for createthread self.NThdr.sizeofstackreserve = 0x200000 self.NThdr.sizeofstackcommit = 0x1000 self.NThdr.sizeofheapreserve = 0x100000 self.NThdr.sizeofheapcommit = 0x1000 - - self.NThdr.ImageBase = 0x400000 self.NThdr.sizeofheaders = 0x1000 self.NThdr.numberofrvaandsizes = 0x10 - + self.NThdr.optentries = pe.OptNThdrs(parent=self.NThdr) + for _ in range(self.NThdr.numberofrvaandsizes): + self.NThdr.optentries.append(pe.OptNThdr(parent=self.NThdr.optentries)) + self.NThdr._size += self.NThdr.optentries.bytelen + self.NThdr.CheckSum = 0 self.NTsig.signature = 0x4550 - self.Coffhdr.machine = 0x14c - self.Coffhdr.sizeofoptionalheader = 0xe0 - self.Coffhdr.characteristics = 0x10f - + self.content = StrPatchwork(self.pack()) else: - self._content = StrPatchwork(pestr) - self.loadfrommem = loadfrommem - self.parse_content() + self.content = StrPatchwork(pestr) + self.parse_content(parse_resources = parse_resources, + parse_delay = parse_delay, + parse_reloc = parse_reloc) + # For API compatibility with previous versions of elfesteem + self._sex = '<>'.index(self.sex) + self._wsize = self.wsize def isPE(self): + if not hasattr(self, 'NTsig') or self.NTsig is None: + return False return self.NTsig.signature == 0x4550 - def parse_content(self): + def has_relocatable_sections(self): + # Typically .obj COFF object files for Windows. + # All sections start at vaddr==0 because they are relocated by + # the linker. + return self.COFFhdr.characteristics & pe.IMAGE_FILE_FLAG_EXECUTABLE_IMAGE == 0 + + def parse_content(self, + parse_resources = True, + parse_delay = True, + parse_reloc = True): + h = struct.unpack("BB", self.content[:2]) + if h != ( 0x4d,0x5a ): # magic number, MZ + raise ValueError("Not a PE, no MZ magic number") of = 0 - self._sex = 0 - self._wsize = 32 - self.Doshdr = pe.Doshdr.unpack(self.content, of, self) - #print repr(self.Doshdr) - of = self.Doshdr.lfanew - self.NTsig = pe.NTsig.unpack(self.content, - of, self) - self.DirImport = None - self.DirExport = None - self.DirDelay = None - self.DirReloc = None - self.DirRes = None - - - if self.NTsig.signature != 0x4550: - return - of += len(self.NTsig) - self.Coffhdr, l = pe.Coffhdr.unpack_l(self.content, - of, - self) - - of += l - m = struct.unpack('H', self.content[of:of+2])[0] - m = (m>>8)*32 - self._wsize = m - - if self._wsize == 32: - Opthdr = pe.Opthdr32 - else: - Opthdr = pe.Opthdr64 - - self.Opthdr, l = Opthdr.unpack_l(self.content, of, self) - #print hex(of+len(self.Opthdr)) - self.NThdr = pe.NThdr.unpack(self.content, of+l, self) - #print repr(self.NThdr.optentries) - of += self.Coffhdr.sizeofoptionalheader - self.SHList = pe.SHList.unpack(self.content, of, self) - #print repr(self.SHList) - - # load section data - filealignment = self.NThdr.filealignment - for s in self.SHList.shlist: - if self.loadfrommem: - s.offset = s.addr - if filealignment ==0: - raw_off = s.offset - else: - raw_off = filealignment*(s.offset/filealignment) - if raw_off != s.offset: - log.warn('unaligned raw section!') - s.data = StrPatchwork() - s.data[0] = self.content[raw_off:raw_off+s.rawsize] - self.DirImport = pe.DirImport.unpack(self.content, - self.NThdr.optentries[pe.DIRECTORY_ENTRY_IMPORT].rva, - self) - self.DirExport = pe.DirExport.unpack(self.content, - self.NThdr.optentries[pe.DIRECTORY_ENTRY_EXPORT].rva, - self) - if len(self.NThdr.optentries) > pe.DIRECTORY_ENTRY_DELAY_IMPORT: - self.DirDelay = pe.DirDelay.unpack(self.content, - self.NThdr.optentries[pe.DIRECTORY_ENTRY_DELAY_IMPORT].rva, - self) - if len(self.NThdr.optentries) > pe.DIRECTORY_ENTRY_BASERELOC: - self.DirReloc = pe.DirReloc.unpack(self.content, - self.NThdr.optentries[pe.DIRECTORY_ENTRY_BASERELOC].rva, - self) - if len(self.NThdr.optentries) > pe.DIRECTORY_ENTRY_RESOURCE: - self.DirRes = pe.DirRes.unpack(self.content, - self.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva, - self) - #self.Symbols = ClassArray(self, WSymb, self.Coffhdr.Coffhdr.pointertosymboltable, self.Coffhdr.Coffhdr.numberofsymbols) - - #print repr(self.Doshdr) - #print repr(self.Coffhdr) - #print repr(self.Opthdr) - #print repr(self.SHList) - - #print repr(self.DirImport) - #print repr(self.DirExport) - #print repr(self.DirReloc) - #print repr(self.DirRes) + self.sex = '<' + self.wsize = 32 + self.DOShdr = pe.DOShdr(parent=self, content=self.content, start=of) + of = self.DOShdr.lfanew + if of > len(self.content): + raise ValueError('Not a PE, NTsig offset after eof %#x', of) + self.NTsig = pe.NTsig(parent=self, content=self.content, start=of) + if self.NTsig.signature != 0x4550: # PE\0\0 + raise ValueError('Not a PE, NTsig is %#x', self.NTsig.signature) + + of += self.NTsig.bytelen + self.COFFhdr = pe.COFFhdr(parent=self, content=self.content, start=of) + of += self.COFFhdr.bytelen + magic, = struct.unpack('H', self.content[of:of+2]) + self.wsize = (magic>>8)*32 + if not magic in (0x10b, 0x20b): + # e.g. Ange Albertini's d_nonnull.dll d_tiny.dll + log.warning('Opthdr magic %#x', magic) + self.wsize = 32 + self.Opthdr = {32: pe.Opthdr32, 64: pe.Opthdr64}[self.wsize](parent=self, content=self.content, start=of) + l = self.Opthdr.bytelen + self.NThdr = pe.NThdr(parent=self, content=self.content, start=of+l) + of += self.COFFhdr.sizeofoptionalheader + if self.NThdr.numberofrvaandsizes < 13: + log.warning('Windows 8 needs at least 13 directories, %d found', + self.NThdr.numberofrvaandsizes) + # Even if the NT header has 64-bit pointers, in 64-bit PE files + # the Section headers have 32-bit pointers (it is a 32-bit COFF + # in a 64-bit PE). + self.SHList = pe.SHList(parent=self, content=self.content, start=of, + wsize=32) + + # Directory parsing. + # 'start' is None, because the offset is computed from the RVA + # in the NT header + kargs = { 'parent':self, 'content':self.content, 'start':None } + self.DirImport = pe.DirImport(**kargs) + self.DirExport = pe.DirExport(**kargs) + if parse_delay: self.DirDelay = pe.DirDelay(**kargs) + if parse_reloc: self.DirReloc = pe.DirReloc(**kargs) + if parse_resources: self.DirRes = pe.DirRes (**kargs) + + if self.COFFhdr.pointertosymboltable != 0: + if self.COFFhdr.pointertosymboltable + 18 * self.COFFhdr.numberofsymbols > len(self.content): + log.warning('Too many symbols: %d', self.COFFhdr.numberofsymbols) + self.Symbols = pe.CoffSymbols(**kargs) + if hasattr(self, 'Symbols'): + of = self.COFFhdr.pointertosymboltable + self.Symbols.bytelen + sz, = struct.unpack(self.sex+'I',self.content[of:of+4]) + if len(self.content) < of+sz: + log.warning('File too short for StrTable %#x != %#x' % ( + len(self.content)-of, sz)) + sz = len(self.content) - of + self.SymbolStrings = StrTable(self.content[of:of+sz]) def resize(self, old, new): pass @@ -338,55 +419,62 @@ def __getitem__(self, item): return self.content[item] def __setitem__(self, item, data): self.content.__setitem__(item, data) - return - def getsectionbyrva(self, rva): - if not self.SHList: - return None + def getsectionbyrva(self, rva, section = None): + if section: + return self.getsectionbyname(section) for s in self.SHList.shlist: - if s.addr <= rva < s.addr+s.size: + if s.vaddr <= rva < s.vaddr+s.size: return s return None - def getsectionbyvad(self, vad): - return self.getsectionbyrva(self.virt2rva(vad)) + def getsectionbyvad(self, vad, section = None): + return self.getsectionbyrva(self.virt2rva(vad), section) def getsectionbyoff(self, off): - if not self.SHList: - return None for s in self.SHList.shlist: - if s.offset <= off < s.offset+s.rawsize: + if s.scnptr <= off < s.scnptr+s.rsize: return s return None def getsectionbyname(self, name): - if not self.SHList: - return None for s in self.SHList: if s.name.strip('\x00') == name: return s return None - def rva2off(self, rva): - s = self.getsectionbyrva(rva) - if not s: - return - return rva-s.addr+s.offset + def rva2off(self, rva, section = None): + if section is None and self.has_relocatable_sections(): + # TODO: .obj cannot convert rva2off without knowing the section + return None + # Special case rva in header + if not self.has_relocatable_sections() and rva < self.NThdr.sizeofheaders: + return rva + s = self.getsectionbyrva(rva, section) + if s is None: + # e.g. Ange Albertini's tinyW7_3264.exe where sizeofheaders is 0 + # therefore the import table is in no section but not detected as + # in the headers. + # We use 0x400 because it is the normal size for headers + if rva < 0x400: + return rva + return None + return rva-s.vaddr+s.scn_baseoff def off2rva(self, off): s = self.getsectionbyoff(off) - if not s: - return - return off-s.offset+s.addr + if s is None: + return None + return off-s.scn_baseoff+s.vaddr def virt2rva(self, virt): - if virt == None: - return + if virt is None or not hasattr(self, 'NThdr'): + return virt return virt - self.NThdr.ImageBase def rva2virt(self, rva): - if rva == None: - return + if rva is None or not hasattr(self, 'NThdr'): + return rva return rva + self.NThdr.ImageBase def virt2off(self, virt): @@ -396,39 +484,34 @@ def off2virt(self, off): return self.rva2virt(self.off2rva(off)) def is_in_virt_address(self, ad): - if ad < self.NThdr.ImageBase: + if hasattr(self, 'NThdr') and ad < self.NThdr.ImageBase: return False ad = self.virt2rva(ad) for s in self.SHList.shlist: - if s.addr <= ad < s.addr + s.size: + if s.vaddr <= ad < s.vaddr + s.size: return True return False - def get_drva(self): - return self._drva - - drva = property(get_drva) - - def get_virt(self): - return self._virt - - virt = property(get_virt) + drva = property(lambda _: _._rva) # Deprecated + rva = property(lambda _: _._rva) + virt = property(lambda _: _._virt) def patch_crc(self, c, olds): - s = 0L + s = 0 data = c[:] l = len(data) if len(c)%2: - end = struct.unpack('B', data[-1])[0] + end = struct.unpack('B', data[-1:])[0] data = data[:-1] if (len(c)&~0x1)%4: s+=struct.unpack('H', data[:2])[0] data = data[2:] data = array.array('I', data) - s = reduce(lambda x,y:x+y, data, s) + for y in data: + s += y s-=olds - while s>0xFFFFFFFF: - s = (s>>32)+(s&0xFFFFFFFF) + while s>mask32: + s = (s>>32)+(s&mask32) while s>0xFFFF: s = (s&0xFFFF)+((s>>16)&0xFFFF) if len(c)%2: @@ -436,57 +519,86 @@ def patch_crc(self, c, olds): s+=l return s - def build_content(self): + def build_headers(self, c): + off = self.DOShdr.lfanew + c[off] = self.NTsig.pack() + off += self.NTsig.bytelen + c[off] = self.COFFhdr.pack() + off += self.COFFhdr.bytelen + c[off] = self.Opthdr.pack() + off += self.Opthdr.bytelen + c[off] = self.NThdr.pack() + off += self.NThdr.bytelen + def build_content(self): c = StrPatchwork() - c[0] = str(self.Doshdr) - - for s in self.SHList.shlist: - c[s.offset:s.offset+s.rawsize] = str(s.data) + c[self.NThdr.sizeofheaders-1] = pe.data_null + c[0] = self.DOShdr.pack() # fix image size - s_last = self.SHList.shlist[-1] - size = s_last.addr + s_last.size + (self.NThdr.sectionalignment-1) - size &= ~(self.NThdr.sectionalignment-1) - self.NThdr.sizeofimage = size - - off = self.Doshdr.lfanew - c[off] = str(self.NTsig) - off += len(self.NTsig) - c[off] = str(self.Coffhdr) - off += len(self.Coffhdr) - c[off] = str(self.Opthdr) - off += len(self.Opthdr) - c[off] = str(self.NThdr) - off += len(self.NThdr) - #c[off] = str(self.Optehdr) - - off = self.Doshdr.lfanew+len(self.NTsig)+len(self.Coffhdr)+self.Coffhdr.sizeofoptionalheader - c[off] = str(self.SHList) - - for s in self.SHList: - if off + len(str(self.SHList)) > s.offset: - log.warn("secion offset overlap pe hdr 0x%x 0x%x"%(off+len(str(self.SHList)), s.offset)) - self.DirImport.build_content(c) - self.DirExport.build_content(c) - self.DirDelay.build_content(c) - self.DirReloc.build_content(c) - self.DirRes.build_content(c) - s = str(c) - if (self.Doshdr.lfanew+len(self.NTsig)+len(self.Coffhdr))%4: - log.warn("non aligned coffhdr, bad crc calculation") - crcs = self.patch_crc(s, self.NThdr.CheckSum) - c[self.Doshdr.lfanew+len(self.NTsig)+len(self.Coffhdr)+64] = struct.pack('I', crcs) - return str(c) + if len(self.SHList): + s_last = self.SHList.shlist[-1] + size = s_last.vaddr + s_last.rsize + (self.NThdr.sectionalignment-1) + size &= ~(self.NThdr.sectionalignment-1) + self.NThdr.sizeofimage = size + + # headers + self.build_headers(c) + + # section headers + off = self.DOShdr.lfanew \ + + self.NTsig.bytelen \ + + self.COFFhdr.bytelen \ + + self.COFFhdr.sizeofoptionalheader + c[off] = self.SHList.pack() + off += self.SHList.bytelen + end_of_headers = off + + # section data + # note that the content of directories should have been already + # included section data, which is possible because position and + # size of sections are known at this point + for s in sorted(self.SHList, key=lambda _:_.scnptr): + if s.rawsize == 0: + continue + if end_of_headers > s.scnptr: + log.warning("section %s offset %#x overlap pe hdr %#x", + s.name, s.scnptr, off) + elif off > s.scnptr: + log.warning("section %s offset %#x overlap previous section", + s.name, s.scnptr) + off = s.scnptr+s.rawsize + c[s.scnptr:off] = s.section_data.data.pack() + + # symbols and strings + if self.COFFhdr.numberofsymbols: + self.COFFhdr.pointertosymboltable = off + c[off] = self.Symbols.pack() + assert self.Symbols.bytelen == 18 * self.COFFhdr.numberofsymbols + off += self.Symbols.bytelen + c[off] = self.SymbolStrings.pack() + + # some headers may have been updated when building sections or symbols + self.build_headers(c) + + # final verifications + l = self.DOShdr.lfanew + self.NTsig.bytelen + self.COFFhdr.bytelen + if l%4: + log.warning("non aligned coffhdr, bad crc calculation") + crcs = self.patch_crc(c.pack(), self.NThdr.CheckSum) + c[l+64] = struct.pack('I', crcs) + return c.pack() def __str__(self): + # For compatibility with previous versions of elftesteem + # But it will not work with python3, because __str__ must + # return a string, not bytes + return self.pack() + + def pack(self): return self.build_content() def export_funcs(self): - if not self.DirExport: - print 'no export dir found' - return None, None - all_func = {} for i, n in enumerate(self.DirExport.f_names): all_func[n.name.name] = self.rva2virt(self.DirExport.f_address[self.DirExport.f_nameordinals[i].ordinal].rva) @@ -495,9 +607,8 @@ def export_funcs(self): return all_func def reloc_to(self, imgbase): + DEPRECATED offset = imgbase - self.NThdr.ImageBase - if not self.DirReloc: - log.warn('no relocation found!') for rel in self.DirReloc.reldesc: rva = rel.rva for reloc in rel.rels: @@ -509,97 +620,147 @@ def reloc_to(self, imgbase): off += rva v = struct.unpack('I', self.drva[off:off+4])[0] v += offset - self.drva[off:off+4] = struct.pack('I', v & 0xFFFFFFFF) + self.drva[off:off+4] = struct.pack('I', v & mask32) self.NThdr.ImageBase = imgbase -class Coff(PE): - def parse_content(self): - self.Coffhdr = Coffhdr(self, 0) - self.Opthdr = Opthdr(self, pe.Coffhdr._size) - self.SHList = SHList(self, pe.Coffhdr._size+self.Coffhdr.Coffhdr.sizeofoptionalheader) - - self.Symbols = ClassArray(self, WSymb, self.Coffhdr.Coffhdr.pointertosymboltable, self.Coffhdr.Coffhdr.numberofsymbols) - - - -if __name__ == "__main__": - import rlcompleter,readline,pdb, sys - from pprint import pprint as pp - readline.parse_and_bind("tab: complete") - - e = PE(open(sys.argv[1]).read()) - print repr(e.DirImport) - print repr(e.DirExport) - print repr(e.DirDelay) - print repr(e.DirReloc) - print repr(e.DirRes) - - # XXX patch boundimport /!\ - e.NThdr.optentries[pe.DIRECTORY_ENTRY_BOUND_IMPORT].rva = 0 - e.NThdr.optentries[pe.DIRECTORY_ENTRY_BOUND_IMPORT].size = 0 - - s_redir = e.SHList.add_section(name = "redir", rawsize = 0x1000) - s_test = e.SHList.add_section(name = "test", rawsize = 0x1000) - s_rel = e.SHList.add_section(name = "rel", rawsize = 0x5000) - - - - new_dll = [({"name":"kernel32.dll", - "firstthunk":s_test.addr}, - ["CreateFileA", - "SetFilePointer", - "WriteFile", - "CloseHandle", - ] - ), - ({"name":"USER32.dll", - "firstthunk":None}, - ["SetDlgItemInt", - "GetMenu", - "HideCaret", - ] - ) - ] - e.DirImport.add_dlldesc(new_dll) - - if not e.DirExport.expdesc: - e.DirExport.create() - e.DirExport.add_name("coco") - - s_myimp = e.SHList.add_section(name = "myimp", rawsize = len(e.DirImport)) - s_myexp = e.SHList.add_section(name = "myexp", rawsize = len(e.DirExport)) - s_mydel = e.SHList.add_section(name = "mydel", rawsize = len(e.DirDelay)) - s_myrel = e.SHList.add_section(name = "myrel", rawsize = len(e.DirReloc)) - s_myres = e.SHList.add_section(name = "myres", rawsize = len(e.DirRes)) - - """ - for s in e.SHList.shlist: - s.offset+=0xC00 - """ - - e.SHList.align_sections(0x1000, 0x1000) - - e.DirImport.set_rva(s_myimp.addr) - e.DirExport.set_rva(s_myexp.addr) - if e.DirDelay.delaydesc: - e.DirDelay.set_rva(s_mydel.addr) - if e.DirReloc.reldesc: - e.DirReloc.set_rva(s_myrel.addr) - if e.DirRes.resdesc: - e.DirRes.set_rva(s_myres.addr) - - e_str = str(e) - print "f1", e.DirImport.get_funcvirt('LoadStringW') - print "f2", e.DirExport.get_funcvirt('SetUserGeoID') - open('out.bin', 'wb').write(e_str) - #o = Coff(open('main.obj').read()) - #print repr(o.Coffhdr) - #print repr(o.Opthdr) - #print repr(o.SHList) - #print 'numsymb', hex(o.Coffhdr.Coffhdr.numberofsymbols) - #print 'offset', hex(o.Coffhdr.Coffhdr.pointertosymboltable) - # - #print repr(o.Symbols) - - f = PE() - open('uu.bin', 'w').write(str(f)) +# The COFF file format happens to have many variants, +# quite different from the COFF embedded in PE files... +class COFF(PE): + # API shared by all/most binary containers + entrypoint = property(lambda _:getattr(_.Opthdr, 'entry', -1)) + + def parse_content(self, + parse_resources = True, + parse_delay = True, + parse_reloc = True): + # Note that there is no "magic number" to recognize COFF files. + # Therefore, the usual way to know if a file is COFF is to parse + # its content with this method. If it is not a COFF, then an + # exception is raised, of type ValueError + of = 0 + # Detect specific cases of COFF Header format, without knowing + # the endianess + COFFmachineLE, = struct.unpack("H", self.content[0:2]) + if pe.IMAGE_FILE_MACHINE_ALPHA_O in (COFFmachineLE, COFFmachineBE): + self.wsize = 64 + COFFhdr = pe.COFFhdr + sizeofoptionalheader = self.content[18:20] + elif pe.IMAGE_FILE_MACHINE_XCOFF64 in (COFFmachineLE, COFFmachineBE): + self.wsize = 64 + COFFhdr = pe.XCOFFhdr64 + sizeofoptionalheader = self.content[16:18] + else: + self.wsize = 32 + COFFhdr = pe.COFFhdr + sizeofoptionalheader = self.content[16:18] + # COFF endianess is tricky to determine, we use the fact + # that sizeofoptionalheader should be less than 256 + sizeofoptionalheader = struct.unpack("BB", sizeofoptionalheader) + if not 0 in sizeofoptionalheader: + raise ValueError("Not COFF: OptHdr size too big") + if sizeofoptionalheader[1] == 0: self.sex = '<' + else: self.sex = '>' + self.COFFhdr = COFFhdr(parent=self, content=self.content, start=of) + of += self.COFFhdr.bytelen + if self.COFFhdr.machine == pe.IMAGE_FILE_MACHINE_TI: + m = struct.unpack('H', self.content[of:of+2])[0] + self.CPU = { + # COFF for Texas Instruments + # Cf. http://www.ti.com/lit/an/spraao8/spraao8.pdf + # and https://gist.github.com/eliotb/1073231 + 0x97: 'TMS470', + 0x98: 'TMS320C5400', + 0x99: 'TMS320C6000', + 0x9C: 'TMS320C5500', + 0x9D: 'TMS320C2800', + 0xA0: 'MSP430', + 0xA1: 'TMS320C5500+', + }.get(m, 'unknown') + of += 2 + kargs = { 'parent': self, 'content': self.content, 'start': of } + if self.COFFhdr.sizeofoptionalheader == 28: + self.Opthdr = pe.Opthdr32(**kargs) + elif self.COFFhdr.sizeofoptionalheader == 36: + assert self.COFFhdr.machine == pe.IMAGE_FILE_MACHINE_CLIPPER + self.Opthdr = pe.OpthdrClipper(**kargs) + elif self.COFFhdr.sizeofoptionalheader == 44: + assert self.COFFhdr.machine == pe.IMAGE_FILE_MACHINE_APOLLOM68K + self.Opthdr = pe.OpthdrApollo(**kargs) + elif self.COFFhdr.sizeofoptionalheader == 56: + assert self.COFFhdr.machine in ( + pe.IMAGE_FILE_MACHINE_MIPSIII, + pe.IMAGE_FILE_MACHINE_MIPSEB, + pe.IMAGE_FILE_MACHINE_R3000, + pe.IMAGE_FILE_MACHINE_R4000, + pe.IMAGE_FILE_MACHINE_R10000) + self.Opthdr = pe.OpthdrECOFF32(**kargs) + elif self.COFFhdr.sizeofoptionalheader == 80: + assert self.COFFhdr.machine == pe.IMAGE_FILE_MACHINE_ALPHA_O + self.Opthdr = pe.OpthdrECOFF64(**kargs) + elif self.COFFhdr.sizeofoptionalheader == 72: + self.Opthdr = pe.OpthdrXCOFF32(**kargs) + elif self.COFFhdr.sizeofoptionalheader == 110: + self.Opthdr = pe.OpthdrXCOFF64(**kargs) + elif self.COFFhdr.sizeofoptionalheader == 0: + from elfesteem.pe import CStruct + class NullHdr(CStruct): + _fields = [ ] + self.Opthdr = NullHdr(**kargs) + elif (self.COFFhdr.sizeofoptionalheader % 4) == 0: + # All known OptHdr start with a 2-byte magic and 2-byte vstamp + from elfesteem.pe import CStruct + class OpthdrUnknown(CStruct): + _fields = [ ("magic", "u16"), ("vstamp", "u16") ] \ + + [ ("f%d"%_, "u32") + for _ in range(1, self.COFFhdr.sizeofoptionalheader // 4) ] + self.Opthdr = OpthdrUnknown(**kargs) + else: + # Size of COFF optional header should probably be a multiple of 4 + raise ValueError("COFF SZOPT %d"%self.COFFhdr.sizeofoptionalheader) + + of += self.COFFhdr.sizeofoptionalheader + filesz = len(self.content) + if self.COFFhdr.numberofsections == 0: + raise ValueError("COFF cannot have no sections") + if self.COFFhdr.numberofsections > 0x1000: + raise ValueError("COFF too many sections %d"%self.COFFhdr.numberofsections) + if of + self.COFFhdr.numberofsections * 40 > filesz: + raise ValueError("COFF too many sections %d, past end of file"%self.COFFhdr.numberofsections) + if self.COFFhdr.pointertosymboltable > filesz: + raise ValueError("COFF invalid ptr to symbol table") + self.SHList = pe.SHList(parent=self, content=self.content, start=of) + + of = self.COFFhdr.pointertosymboltable + if self.COFFhdr.machine == pe.IMAGE_FILE_MACHINE_ALPHA_O \ + and of != 0 \ + and struct.unpack('= item.start: break + else: + i = len(self.layout) + # Find the position in the layout where the data loading ends + for j, (o, _) in enumerate(self.layout): + if o > item.stop: break + else: + j = len(self.layout) + # Find what is the value after the end + _, prv_data = self.layout[j-1] + self.layout[i:j] = [(item.start, data),(item.stop, prv_data)] + def __getitem__(self, item): + ''' Return a list of (slice, data) which indicates what is in + memory at interval 'item'; the slices that are returned + are contiguous and add up to the whole 'item' slice. ''' + res = [] + for i, (stop, _) in enumerate(self.layout): + if item.start >= stop: + continue + start, data = self.layout[i-1] + if item.stop <= start: + continue + res.append((slice(max(item.start,start),min(item.stop,stop)),data)) + if stop < item.stop: + _, data = self.layout[-1] + res.append((slice(stop,item.stop),data)) + return res + def max_addr(self): + return self.layout[-1][0] + +class Virtual(object): + # This class manages 'virtual addresses', i.e. the addresses when + # the RPRC file is loaded in memory. + # These addresses are the ones used by absolute addressing in the + # executable code. + def __init__(self, e): + self.parent = e + self.layout = Layout(overlap='silent') + for s in self.parent.sections: + self.layout[s.da:s.da+s.len] = s + def __getitem__(self, item): + # If 'item' is an integer, we return the byte at this address, + # else 'item' is a slice and we return the corresponding bytes, + # padded with zeroes. + if type(item) is slice: + assert item.step is None + start, stop = item.start, item.stop + else: + start, stop = item, item+1 + res = data_empty + for i, s in self.layout[start:stop]: + if s is None: res += data_null * (i.stop-i.start) # non-mapped + else: res += s.data[i.start-s.da:i.stop-s.da] + return res + def __setitem__(self, item, data): + # If 'item' is an integer, we write starting from this address + if type(item) is slice: + assert item.step is None + start, stop = item.start, item.stop + assert len(data) == stop-start + else: + start, stop = item, item+len(data) + l = self.layout[start:stop] + if None in [ s for _, s in l]: + raise ValueError('Addresses %#x:%#x not entirely mapped in memory'%(start,stop)) + for i, s in l: + of = i.start-start + s.data[i.start-s.da:i.stop-s.da] = data[i.start-s.da+of:i.stop-s.da+of] + def max_addr(self): + return self.layout.max_addr() + +class RPRC(object): + # API shared by all/most binary containers + architecture = property(lambda _:'ARM') + entrypoint = property(lambda _:-1) + #sections = property(lambda _:_.SHList.shlist) + symbols = property(lambda _:()) + dynsyms = property(lambda _:()) + + sex = '<' + wsize = 32 + virt = property(lambda _:_._virt) + def __init__(self, data = None, **kargs): + self.sections = [] + if data is not None: + self.content = StrPatchwork(data) + self.parse_content() + else: + # Create a RPRC file with no section + self.hdr = Header(parent=self) + self._virt = Virtual(self) + def parse_content(self): + h = struct.unpack("B"*4, self.content[:4]) + if h != ( 0x52,0x50,0x52,0x43 ): # magic number, RPRC + raise ValueError("Not an RPRC") + self.hdr = Header(parent=self, content=self.content) + of = self.hdr.bytelen + while of < len(self.content): + s = Section(parent=self, content=self.content, start=of) + self.sections.append(s) + of += s.bytelen + def pack(self): + c = StrPatchwork() + c[0] = self.hdr.pack() + of = self.hdr.bytelen + for s in self.sections: + c[of] = s.pack() + of += s.bytelen + return c.pack() + def display(self): + # Same output as 'readrprc' + rep = [self.hdr.display()] + [s.display() for s in self.sections] + return '\n'.join(rep) + def getsectionbyvad(self, ad): + # Same API as ELF or PE, but different implementation for accessing + # data by virtual addresses: a mechanism entirely inside 'virt' + # rather than split between two classes; future versions of + # elfesteem should probably do the same for all binary containers. + return self.virt.layout[ad:ad+1][0][1] + +if __name__ == "__main__": + import sys, code + if len(sys.argv) > 2: + for f in sys.argv[1:]: + print('File: %s'%f) + fd = open(f, 'rb') + try: + raw = fd.read() + finally: + fd.close() + e = RPRC(raw) + print (e.display()) + sys.exit(0) + if len(sys.argv) == 2: + fd = open(sys.argv[1], 'rb') + try: + raw = fd.read() + finally: + fd.close() + e = RPRC(raw) + code.interact('Interactive Python Console', None, locals()) diff --git a/elfesteem/strpatchwork.py b/elfesteem/strpatchwork.py index efe90f6..390fea4 100644 --- a/elfesteem/strpatchwork.py +++ b/elfesteem/strpatchwork.py @@ -1,34 +1,56 @@ from array import array -from sys import maxint -class StrPatchwork: - def __init__(self, s="", paddingbyte="\x00"): - self.s = array("B",str(s)) +# To be compatible with python 2 and python 3 +import sys +import struct +data_null = struct.pack("B",0) +data_empty = struct.pack("") + +class StrPatchwork(object): + def __init__(self, s=data_empty, paddingbyte=data_null): + if s is None: s = data_empty + if isinstance(s, StrPatchwork): s = s.pack() + self.s = array("B",s) # cache s to avoid rebuilding str after each find self.s_cache = s self.paddingbyte=paddingbyte def __str__(self): - return self.s.tostring() + raise AttributeError("Use pack() instead of str()") + def pack(self): + if sys.version_info[0] >= 3: + return self.s.tobytes() + else: + return self.s.tostring() def __getitem__(self, item): s = self.s if type(item) is slice: - end = item.stop - l = len(s) - if l < end and end != maxint: #XXX hack [x:] give 2GB limit - # This is inefficient but avoids complicated maths if step is not 1 - s = s[:] - s.extend(array("B",self.paddingbyte*(end-l))) r = s[item] - return r.tostring() - + end = item.stop + if end != None and len(s) < end: + if item.step is not None: + TODO + elif len(r) > 0: + # We go beyond the end of 's' + r.extend(array("B",self.paddingbyte*(end-len(s)))) + else: + # We are entirely after the end of 's' + start = item.start + if start is None: start = 0 + r = array("B",self.paddingbyte*(end-start)) else: if item > len(s): return self.paddingbyte else: - return chr(s[item]) + r = array("B",[s[item]]) + if sys.version_info[0] >= 3: + return r.tobytes() + else: + return r.tostring() def __setitem__(self, item, val): - if val == None: + if val is None: return + if sys.version_info[0] >= 3 and type(val) == str: + val = val.encode(encoding="latin1") val = array("B",val) if type(item) is not slice: item = slice(item, item+len(val)) @@ -41,17 +63,22 @@ def __setitem__(self, item, val): def __repr__(self): - return "" % self.s.tostring() + return "" % self.pack() def __len__(self): return len(self.s) def __contains__(self, val): - return val in str(self) + return val in self.pack() def __iadd__(self, other): self.s.extend(array("B", other)) return self - def find(self, pattern, offset = 0): + def find(self, pattern, *args): + if not self.s_cache: + self.s_cache = self.pack() + return self.s_cache.find(pattern, *args) + + def rfind(self, pattern, *args): if not self.s_cache: - self.s_cache = self.s.tostring() - return self.s_cache.find(pattern, offset) + self.s_cache = self.pack() + return self.s_cache.rfind(pattern, *args) diff --git a/elfesteem/visual_studio_mangling.py b/elfesteem/visual_studio_mangling.py new file mode 100755 index 0000000..4d0194c --- /dev/null +++ b/elfesteem/visual_studio_mangling.py @@ -0,0 +1,919 @@ +#! /usr/bin/env python + +import sys +if sys.version_info[0:2] == (2, 3): + from elfesteem.compatibility_python23 import reversed + +def symbol_demangle(symbol, verbose=False): + # Some documentation on Visual C++ name mangling is at + # https://github.com/wine-mirror/wine/blob/master/dlls/msvcrt/undname.c + # https://github.com/nico/demumble (includes wine's undname) + # https://en.wikiversity.org/wiki/Visual_C%2B%2B_name_mangling + # http://sourceforge.net/projects/php-ms-demangle/ + # A web interface to a demangler is available at https://demangler.com/ + # I also have made some tests with undname.exe of Visual Studio 14.0. + data = DemangleData(symbol, verbose=verbose) + # 'data' will contain the rest, not parsed + try: + return symbol_demangle_reentrant(data), data + except AssertionError: + return symbol, '' + except TypeError: + return symbol, '' + except KeyError: + return symbol, '' + +# We define quote and backquote to be reconfigurable +quote_b = "`" +quote_e = "'" + +class DemangleData(object): + def __init__(self, value, verbose=False): + # The main data is the input string. + self.value = value + # But we also store, for backreferences, the list of name fragments + # and the list of arguments (non-primitive types only). + self.fragments = [] + self.arguments = [] + # In templates, the backreference lists are pushed on a history stack. + self.history = [] + self.verbose = verbose + def advance(self, count): + # This is linear in the size of self.value, therefore the complexity + # of symbol_demangle_reentrant is quadratic, which makes it vulnerable + # to DoS attacks. That's why DemangleData's interface can support an + # implementation such that self.value is constant and the current + # position in self.value is stored in e.g. self.pos; I chose to + # modify self.value, it seems more understandable. + self.value = self.value[count:] + def __getitem__(self, pos): + return self.value.__getitem__(pos) + def index(self, pos): + return self.value.index(pos) + def __repr__(self): + return repr(self.value) + def __len__(self): + return len(self.value) + def add_fragment(self, fragment): + self.fragments.append(fragment) + def add_argument(self, argument): + self.arguments.append(argument) + def enter_template(self): + self.history.append( (self.fragments, self.arguments) ) + self.fragments, self.arguments = [], [] + def exit_template(self): + self.fragments, self.arguments = self.history.pop() + def is_in_template(self): + # '?' data type depends on whether we are in a template + return len(self.history) > 0 + def log(self, msg, *args): + if self.verbose: print(' %-25s REST=%r ARG=%r FRAG=%r' + %(msg%args,self,self.arguments,self.fragments)) + +def symbol_demangle_reentrant(data): + # Reentrant: can be called for nested symbols. + if data[:6] == '__mep@': + # undname.exe does not expand symbols beginning with __mep@, + # but they are generated when using C++/CLI and correspond + # to prefixed mangled symbols. + data.advance(6) + return '[MEP] ' + symbol_demangle_reentrant(data) + elif data[:1] != '?': + data.log('Not Mangled') + name = str(data.value) + data.advance(len(data)) + return name + data.advance(1) + if data[:1] == '$': + # Neither a variable nor a function: just a name with a template + # Example: '?$a@PAUb@@' which means 'a' + data.advance(1) + name = extract_template(data) + assert len(data) == 0 + return name + elif data[:1] == '@': + # Found by reversing vcruntime140.dll + # Don't know when such names are generated + data.advance(1) + data.log('CV: prefix') + return 'CV: ' + symbol_demangle_reentrant(data) + elif data[:3] == '?_C': + # Neither a variable nor a function: just `string' + # The rest is ignored + name = quote_b + 'string' + quote_e + data.advance(len(data)) + return name + # Variable or function: starts with a list of name fragments, + # continues with type information. + if data[:1] == '?': + data.advance(1) + name = name_extract_special(data) + else: + name = [] + name += name_extract_list(data) + data.log('PARAM') + if name[0] == quote_b + 'local static guard' + quote_e: + return symbol_demangle_local_static_guard(name, data) + if '0' <= data[:1] <= '9' or data[:2] == '$B': + return symbol_demangle_variable(name, data) + if 'A' <= data[:1] <= 'Z' or data[:1] == '$': + return symbol_demangle_function(name, data) + assert False + +def symbol_demangle_local_static_guard(name, data): + # We don't know if other value than 5 can appear, and what they mean. + assert data[:1] == '5' + data.advance(1) + assert '0' <= data[:1] <= '9' + param = 1+ord(data[0])-ord('0') + data.advance(1) + name = '::'.join(reversed(name)) + return name + "{%d}'" % param + +def symbol_demangle_variable(name, data): + # Access level and storage class + thunk, access = parse_value(data, thunk_access, logmsg='TYPE=%s ACCESS=%s') + add_name = '' + if thunk == 'VAR': + # NB: ret is of type DataType, because it may be a function pointer + ret = data_type(data) + data.log('TYPE=%s', ret) + cv = ' '.join(cv_class_modifiers(data)) + if cv: cv += ' ' + ret += ' ' + cv + elif thunk == 'OPT': + ret = ' '.join(cv_class_modifiers(data)) + if ret: ret += ' ' + if data[:1] != '@': + add_name = name_extract_list(data) + add_name = '::'.join(reversed(add_name)) + add_name = "{for %s%s%s}" % (quote_b, add_name, quote_e) + data.log('OPT_NAME=%r', add_name) + assert data[:1] == '@' + data.advance(1) + elif thunk == 'vcall': + n1 = decode_number(data) + data.log('VCALL{%d}', n1) + add_name = '{%d,{flat}}'%n1 + quote_e + ' }' + quote_e + assert data[:1] == 'A' + data.advance(1) + ret = parse_value(data, calling_convention, logmsg='CALL=%r') + else: + raise ValueError("Unknown thunk value %r"%thunk) + name = '::'.join(reversed(name)) + ret += name + if access: access += ' ' + return access + str(ret) + add_name + +def symbol_demangle_function(name, data): + if data[:3] in ('$$F', '$$H'): + # C++/CLI + # https://en.wikiversity.org/wiki/Visual_C%2B%2B_name_mangling + # identifies $$F as a 'function modifier' for C++/CLI meaning + # the the function is managed. This web page mentions $$F in the + # data_types section, but it does not appear with data types, + # it appears before the thunk_access letters. + # The symbol with $$F is the 'managed entry point', the other + # being the 'native entry point'. + # + # Visual Studio also generates $$H for 'main', with unknown + # meaning... + # + # Apparently this could be ignored, because undname.exe outputs + # the same decoding when $$F or $$H is present. We add a prefix, + # it seems more informative. + prefix = { + '$$F': '[managed] ', + '$$H': '[MANAGED] ', + }[data[:3]] + data.advance(3) + return prefix + symbol_demangle_function(name, data) + if data[:3] == '$$J': + data.advance(3) + assert len(data) + if data[0] in '0123456789': + # To be analyzed later... does not change the output of undname.exe + # Visual Studio generates various values ('0', '18', ...) + data.advance(1+ord(data[0])-ord('0')) + prefix = 'extern "C" ' + else: + prefix = '' + thunk, access = parse_value(data, thunk_access, logmsg='TYPE=%s ACCESS=%s') + if thunk == 'vtordisp': + vtor = [str(decode_number(data)) for _ in range(2)] + elif thunk == 'vtordispex': + assert data[:1] == '4' + data.advance(1) + vtor = [str(decode_number(data)) for _ in range(4)] + cv = '' + if access and not 'static' in access: + cli_retval = { + '$A': '', + '$C': '%', + } + cli = parse_value(data, cli_retval) + if cli: + data.log('C++/CLI Return Value') + cv = cli + cv = ' '.join(cv_class_modifiers(data)) + cv + ret, func_call, args = symbol_demangle_function_prototype(data) + name, ret = name_finalize(name, ret) + name = '::'.join(reversed(name)) + if thunk is not None and thunk.startswith('vtordisp'): + name += quote_b + thunk + '{' + ','.join(vtor) + '}' + quote_e + ' ' + if ret and access: access += ' ' + ret += ' ' + func_call + name + args + cv + return prefix + access + str(ret) + +def symbol_demangle_function_prototype(data): + # Used when demangling a function, but also for function pointers + # and member function pointers. + func_call = parse_value(data, calling_convention, logmsg='CALL=%r') + ret = data_type(data) + data.log('RET=%s', ret) + args = arg_list(data, stop='XZ@') + args = '(' + ','.join(args) + ')' + if data[:1] == 'Z': + # No throw + data.advance(1) + else: + # Same output as undname.exe, but Visual Studio 14.0 seems to + # ignore throw() in function prototypes. + throw_args = arg_list(data, stop='@') + args += ' throw(' + ','.join(throw_args) + ')' + return ret, func_call, args + +def name_extract_special(data): + # The symbol's name optionally starts with a special fragment + name = [] + fragment = parse_value(data, special_fragment) + if fragment is not None: + data.log('SPEC=%r', fragment) + name.append(fragment) + elif data[:2] == '_P': + data.advance(2) + fragment = quote_b + 'udt returning' + quote_e + fragment += name_extract_special(data)[0] + name.append(fragment) + elif data[:3] == '_R0': + data.advance(3) + fragment = data_type(data) + fragment += ' ' + quote_b + 'RTTI Type Descriptor' + quote_e + name.append(str(fragment)) + elif data[:3] == '_R1': + data.advance(3) + fragment = quote_b + 'RTTI Base Class Descriptor at (%d,%d,%d,%d)' + quote_e + fragment = fragment % tuple([decode_number(data) for _ in range(4)]) + name.append(fragment) + elif data[:2] == '$?': + # operator template + data.advance(2) + fragment = parse_value(data, special_fragment) + fragment += '<%s>' % data_type(data) + name.append(fragment) + assert data[:1] == '@' + data.advance(1) + elif data[:1] == '$': + # normal template + data.advance(1) + fragment = extract_template(data) + name.append(fragment) + return name + +def name_extract_list(data): + # Other fragments cannot be in 'special_fragment' nor operator template. + # If they begin with '?$' they are normal templates, with '??' they are + # nested names, and other fragments beginning with '?' are quoted numeric. + name = [] + while data[:1] != '@': + fragment = extract_name_fragment(data) + name.append(fragment) + assert data[:1] == '@' + data.advance(1) + data.log('NAME=%r', name) + return name + +def extract_name_string(data): + assert len(data) + assert data[0] != '?' + idx = data.index('@') + fragment = data[:idx] + data.advance(idx+1) + data.add_fragment(fragment) + data.log('NAME=%r', fragment) + return fragment + +def extract_name_fragment(data): + if len(data) and data[0] in '0123456789': + # fragment backreference + data.log('BACKREF_FRG=%s', data[0]) + fragment = data.fragments[int(data[0])] + data.advance(1) + elif data[:2] == '??': + # nested name + data.advance(1) + fragment = quote_b + symbol_demangle_reentrant(data) + quote_e + elif data[:2] == '?$': + # template + data.advance(2) + fragment = extract_template(data) + data.add_fragment(fragment) + elif data[:2] == '?A': + # anonymous namespace + idx = data.index('@') + data.advance(idx+1) + fragment = quote_b + 'anonymous namespace' + quote_e + elif data[:1] == '?': + # numbered namespace + data.advance(1) + i = decode_number(data) + fragment = quote_b + str(i) + quote_e + else: + # name (text) + fragment = extract_name_string(data) + data.log('FRAGMENT=%r', fragment) + return fragment + +def extract_template(data): + data.log('TEMPLATE start') + data.enter_template() + name = extract_name_string(data) + args = arg_list(data, stop='Z@') + data.exit_template() + fragment = '%s<%s>'%(name, ','.join(args)) + data.log('TEMPLATE=%r', fragment) + return fragment + +def cv_class_modifiers(data): + mod = [] + while len(data) and data[0] in 'EFI': + mod.append({ + 'E': '__ptr64', + 'F': '__unaligned', + 'I': '__restrict', + }[data[0]]) + data.advance(1) + cv_table = { + 'A': '', + 'B': 'const', + 'C': 'volatile', + 'D': 'const volatile', + 'M2': '__based(%s)', + } + cv = parse_value(data, cv_table) + if cv == '__based(%s)': + # Note that undname.exe forgets some const qualifiers. Example: + # int __based(b) * const __cdecl a(short) + # becomes ?a@@YAQM2b@@HF@Z but undname.exe decodes it as + # int __based(b) * __cdecl a(short) + name = name_extract_list(data) + name = '::'.join(reversed(name)) + cv = cv % name + data.log('CVC_MOD=%r %r', cv, mod) + return [cv] + mod + +def decode_number(data): + if data[:1] == '?': sign = -1; data.advance(1) + else: sign = 1 + if data[:1] == '@': + data.advance(1) + return 0 + elif data[:1] in '0123456789': + val = 1+int(data[0]) + data.advance(1) + return sign*val + elif len(data) and data[0] in 'ABCDEFGHIJKLMNOP': + i = 0 + while len(data) and data[0] != '@': + i *= 16 + i += ord(data[0])-ord('A') + data.advance(1) + data.advance(1) + return sign*i + assert False + +class DataType(object): + # Usually a data type is a string, but if it is a function type, + # then it is a triplet of strings + # ( return type, calling convention & qualifiers, arguments ) + # We create a dedicated class, because we want to use += (aka. __iadd__) + def __init__(self, value): + if isinstance(value, tuple): + self.value = (str(value[0]), value[1], value[2]) + else: + self.value = value + def __repr__(self): + return '<%s %r>'%(self.__class__.__name__, self.value) + def __str__(self): + if isinstance(self.value, tuple): + r, c, a = self.value + if c: c = '(' + c + ')' + return r + ' ' + c + a + else: + return self.value + def __iadd__(self, other): + if isinstance(self.value, tuple): + r, c, a = self.value + self.value = (r, c + other, a) + else: + self.value += other + return self + def append(self, other): + if isinstance(self.value, tuple): + r, c, a = self.value + self.value = (r, c, a + other) + else: + self.value += other + return self + def prepend(self, other): + if isinstance(self.value, tuple): + r, c, a = self.value + self.value = (other + r, c, a) + else: + self.value = other + self.value + return self + def __nonzero__(self): + # For python2 + return self.__bool__() + def __bool__(self): + # For python3 + return bool(len(self.value)) + +def data_type(data, depth = 0): + data.log('TYPE depth %d', depth) + if len(data) and data[0] in '0123456789': + # argument backreference + pos = int(data[0]) + data.log('BACKREF_ARG=%d', pos) + data.advance(1) + assert pos < len(data.arguments) + result = data.arguments[pos] + elif data[:2] in ('P6', 'Q6'): + # Function pointer + # The result of 'data_type' is not a string, because if it is + # an argument of a function it needs to be converted to + # '%s(%s)%s'%result but if it is a return type it needs + # to be converted to '%s(%s f(args))%s' + # 'Q6' is probably 'const', but undname.exe does not show it. + _, p_mod = parse_value(data, data_types) + data.advance(1) + result = DataType(symbol_demangle_function_prototype(data)) + result += ' '.join(p_mod) + elif data[:2] == 'P8': + # Member function pointer + data.advance(2) + assert len(data) + name = name_extract_list(data) + name = '::'.join(reversed(name)) + cv = ' '.join(cv_class_modifiers(data)) + result = DataType(symbol_demangle_function_prototype(data)) + result += name+'::*' + result.append(cv) + elif data[:3] == '__Z': + # HACK. do nothing + data.advance(3) + result = data_type(data) + elif data[:2] in ('A$', 'P$'): + cli0 = data[0] + data.advance(2) + assert len(data) + # Managed C++ properties + # https://en.wikipedia.org/wiki/Managed_Extensions_for_C%2B%2B + # Now deprecated, was designed for .Net and CLR. + # There were __gc, __value, __interface, __abstract, __sealed + # and _pin modifiers. + # C++/CLI + # Replaces Managed C++, included in Visual Studio 2005 + # The both Managed C++ and C++/CLI are well described at + # https://msdn.microsoft.com/en-us/library/ms379603(VS.80).aspx + data.log('C++/CLI Arguments') + cli1 = data[0] + if cli1 in 'ABC': + data.advance(1) + cli_arguments = { + 'PA': (' ^', ''), + 'AA': (' %', ''), + # Not sure whether these next two are generated by the compiler + 'PC': (' %', ''), + 'AC': (' %', ''), + # pin_ptr decoding seems invalid, the < is not closed, + # but that's what undname.exe outputs. + 'PB': (' *', 'cli::pin_ptr<'), + 'AB': (' &', 'cli::pin_ptr<'), + } + postfix, prefix = cli_arguments[cli0+cli1] + c = cv_class_modifiers(data) + result = data_type(data, depth=depth+1) + if c[0] != '': result += ' ' + c[0] + result += postfix + result.prepend(prefix) + if len(c) > 1: result += ' ' + ' '.join(c[1:]) + elif cli1 in '01': + dim = int(data[:2], 16) + data.advance(2) + data.advance(1) # ignored, apparently + result = data_type(data, depth=depth+1) + result.prepend('cli::array<') + if dim == 1: result += ' >^' + else: result += ' ,%d>^' % dim + else: + # VS 14.0's undname.exe does some additional decoding, + # e.g. ?a@@$$FYMHP$DFCH@Z + # becomes 'int __clrcall a(cli::array^)' + # but this is clearly a bug of undname.exe + assert False + elif data[:1] == '?' and data.is_in_template(): + # Template parameters + data.advance(1) + i = decode_number(data) + result = DataType(quote_b + 'template-parameter-%d'%i + quote_e) + elif data[:1] == '$' and data[:2] != '$$' and data.is_in_template(): + # Various types of template parameters + template_type = data[1] + data.advance(2) + if template_type == '0': + # Template instanciated with a numeric value. Example: + # template struct S1 { int a[N]; }; + # S1<10> s1; + i = decode_number(data) + result = DataType(str(i)) + elif template_type == '1': + # Template instanciated with a static object. Example: + # template struct S2 {}; + # int N = 1; + # S2 s2; + result = DataType('&%s'% symbol_demangle_reentrant(data)) + elif template_type in '2FG': + # Decoding obtained by trial and error with undname.exe, + # but the result seems meaningless. + h = str(ord(data[0])-ord('/')) + data.advance(1) + i = decode_number(data) + if template_type == '2': + result = DataType('%s.%se%d'%(h[0],h[1:],i)) + elif template_type == 'F': + result = DataType('{%s,%d}'%(h,i)) + elif template_type == 'G': + j = decode_number(data) + result = DataType('{%s,%d,%d}'%(h,i,j)) + elif template_type == 'D': + # This is compatible with wine's undname, but is not known to + # the undname.exe of Visual Studio 14.0. + i = decode_number(data) + result = DataType(quote_b + 'template-parameter%d'%i + quote_e) + else: + raise KeyError('TemplateParameter<%s>'%template_type) + elif data[:3] == '$$B': + # $$B seems useless because it calls data_type with no changes, + # but it is needed by undname.exe in some cases. + data.advance(3) + result = data_type(data) + elif data[:1] == 'Y' and (depth > 0 or data.is_in_template()): + # Pointer to multidimensional array + data.advance(1) + dim = decode_number(data) + val = [ '[%d]'%decode_number(data) for _ in range(dim) ] + result = str(data_type(data)) + result = DataType((result, '', ''.join(val))) + elif data[:2] == '_$': + # __w64 type + data.advance(2) + result = data_type(data, depth=depth+1).prepend('__w64 ') + elif data[:2] == '_O': + # Array + dimension = 1 + data.advance(2) + cv = ' '.join(cv_class_modifiers(data)) + if cv: cv = ' ' + cv + while data[:2] == '_O': + dimension += 1 + data.advance(2) + cv_class_modifiers(data) + result = data_type(data, depth=depth+1) + result.append(cv + ' ' + '[]' * dimension) + else: + category, result = parse_value(data, data_types) + if category == 'COMPLEX': + data.log('COMPLEX_TYPE') + if result == 'enum': + assert 'int' == parse_value(data, enum_types) + result = DataType(result) + name = name_extract_list(data) + name = '::'.join(reversed(name)) + result += ' ' + name + elif category == 'MODIFIER': + # The type modifier is output in two parts, because the qualifier + # is not present when there are nested pointer/references, + # detected by looking at the variable 'depth'. + # The mixing of 'm' and 'c' outputs the same order as undname.exe + m = result + c = cv_class_modifiers(data) + data.log('CVM(%d) %s %s', depth, c, m) + if depth > 0: m = [m[0]] + cm = [] + if c[0] != '': cm.append(c[0]) + if m[0] != '': cm.append(m[0]) + cm = ' '.join(cm + c[1:] + m[1:]) + if cm != '': cm = ' ' + cm + result = data_type(data, depth=depth+1) + result += cm + else: + assert category == 'SIMPLE' + result = DataType(result) + data.log('TYPE=%r', result) + return result + +def arg_list(data, stop=None): + # For function arguments, 'X' is terminating => stop = 'XZ@' + # For template arguments, 'X' is not terminating => stop = 'Z@' + data.log('ARGS start') + args = [] + while len(data): + if data[0] in stop: break + primitive_type = data[0] in 'CDEFGHIJKMNO' + a = data_type(data) + if a is None: break + args.append(str(a)) + if not primitive_type: data.add_argument(a) + data.log('ARGS=%r', args) + if not len(data): + # Neither a variable nor a function: just a type with template + return args + if data[:1] == 'X': + # void as the only argument + args.append('void') + elif data[:2] == 'ZZ': + # ellipsis only when at the end of the argument list + args.append('...') + else: + assert data[:1] == '@' + data.advance(1) + return args + +def name_finalize(name, ret): + # Some special fragments need to be replaced after everything has + # been computed. + if name[0] == '?0': + # constructor + assert len(name) >= 2 + name[0] = name[1] + ret = '' + elif name[0] == '?1': + # destructor + assert len(name) >= 2 + name[0] = '~' + name[1] + ret = '' + elif name[0] == '?B': + # operator returntype + name[0] = 'operator ' + str(ret) + ret = '' + elif name[0] in ('?__E', '?__F', '?__K'): + assert len(name) >= 2 + name[1] = { + '?__E': quote_b + "dynamic initializer for '%s'" + quote_e, + '?__F': quote_b + "dynamic atexit destructor for '%s'" + quote_e, + '?__K': 'operator "" %s', + }[name[0]] % name[1] + name[:1] = [] + return name, ret + +def parse_value(data, table, logmsg=None): + # Function for accessing the tables below + for k in table: + if data[:len(k)] == k: + data.advance(len(k)) + if logmsg is not None: + data.log(logmsg % table[k]) + return table[k] + +special_fragment = { + '0': '?0', # to be done by name_finalize() + '1': '?1', # to be done by name_finalize() + '2': 'operator new', + '3': 'operator delete', + '4': 'operator=', + '5': 'operator>>', + '6': 'operator<<', + '7': 'operator!', + '8': 'operator==', + '9': 'operator!=', + 'A': 'operator[]', + 'B': '?B', # to be done by name_finalize() + 'C': 'operator->', + 'D': 'operator*', + 'E': 'operator++', + 'F': 'operator--', + 'G': 'operator-', + 'H': 'operator+', + 'I': 'operator&', + 'J': 'operator->*', + 'K': 'operator/', + 'L': 'operator%', + 'M': 'operator<', + 'N': 'operator<=', + 'O': 'operator>', + 'P': 'operator>=', + 'Q': 'operator,', + 'R': 'operator()', + 'S': 'operator~', + 'T': 'operator^', + 'U': 'operator|', + 'V': 'operator&&', + 'W': 'operator||', + 'X': 'operator*=', + 'Y': 'operator+=', + 'Z': 'operator-=', + '_0': 'operator/=', + '_1': 'operator%=', + '_2': 'operator>>=', + '_3': 'operator<<=', + '_4': 'operator&=', + '_5': 'operator|=', + '_6': 'operator^=', + '_7': quote_b + 'vftable' + quote_e, + '_8': quote_b + 'vbtable' + quote_e, + '_9': quote_b + 'vcall' + quote_e, + '_A': quote_b + 'typeof' + quote_e, + '_B': quote_b + 'local static guard' + quote_e, + #_C just returns 'string' and forgets the rest of the input + '_D': quote_b + 'vbase destructor' + quote_e, + '_E': quote_b + 'vector deleting destructor' + quote_e, + '_F': quote_b + 'default constructor closure' + quote_e, + '_G': quote_b + 'scalar deleting destructor' + quote_e, + '_H': quote_b + 'vector constructor iterator' + quote_e, + '_I': quote_b + 'vector destructor iterator' + quote_e, + '_J': quote_b + 'vector vbase constructor iterator' + quote_e, + '_K': quote_b + 'virtual displacement map' + quote_e, + '_L': quote_b + 'eh vector constructor iterator' + quote_e, + '_M': quote_b + 'eh vector destructor iterator' + quote_e, + '_N': quote_b + 'eh vector vbase constructor iterator' + quote_e, + '_O': quote_b + 'copy constructor closure' + quote_e, + #_P 'udt returning' followed by a special fragment + #_R0 'RTTI Type Descriptor' followed by a data type + #_R1 'RTTI Base Class Descriptor' followed by four numbers + '_R2': quote_b + 'RTTI Base Class Array' + quote_e, + '_R3': quote_b + 'RTTI Class Hierarchy Descriptor' + quote_e, + '_R4': quote_b + 'RTTI Complete Object Locator' + quote_e, + '_S': quote_b + 'local vftable' + quote_e, + '_T': quote_b + 'local vftable constructor closure' + quote_e, + '_U': 'operator new[]', + '_V': 'operator delete[]', + '_X': quote_b + 'placement delete closure' + quote_e, + '_Y': quote_b + 'placement delete[] closure' + quote_e, + '__A': quote_b + 'managed vector constructor iterator' + quote_e, + '__B': quote_b + 'managed vector destructor iterator' + quote_e, + '__C': quote_b + 'eh vector copy constructor iterator' + quote_e, + '__D': quote_b + 'eh vector vbase copy constructor iterator' + quote_e, + '__E': '?__E', # to be done by name_finalize() + '__F': '?__F', # to be done by name_finalize() + '__G': quote_b + 'vector copy constructor iterator' + quote_e, + '__H': quote_b + 'vector vbase copy constructor iterator' + quote_e, + '__I': quote_b + 'managed vector copy constructor iterator' + quote_e, + '__J': quote_b + 'local static thread guard' + quote_e, + '__K': '?__K', # to be done by name_finalize() + } + +data_types = { + # We should set '@' to 'void' if we want the same output as wine's undname + '@': ('SIMPLE', '',), + '?': ('MODIFIER', ['', ]), + 'A': ('MODIFIER', ['&', ]), + 'B': ('MODIFIER', ['& volatile', ]), + 'C': ('SIMPLE', 'signed char',), + 'D': ('SIMPLE', 'char',), + 'E': ('SIMPLE', 'unsigned char',), + 'F': ('SIMPLE', 'short',), + 'G': ('SIMPLE', 'unsigned short',), + 'H': ('SIMPLE', 'int',), + 'I': ('SIMPLE', 'unsigned int',), + 'J': ('SIMPLE', 'long',), + 'K': ('SIMPLE', 'unsigned long',), + 'M': ('SIMPLE', 'float',), + 'N': ('SIMPLE', 'double',), + 'O': ('SIMPLE', 'long double',), + 'P': ('MODIFIER', ['*', ]), + 'Q': ('MODIFIER', ['*', 'const']), + 'R': ('MODIFIER', ['*', 'volatile']), + 'S': ('MODIFIER', ['*', 'const volatile']), + 'T': ('COMPLEX', 'union'), + 'U': ('COMPLEX', 'struct'), + 'V': ('COMPLEX', 'class'), + 'W': ('COMPLEX', 'enum'), + 'X': ('SIMPLE', 'void',), + 'Y': ('COMPLEX', 'cointerface'), + '_D': ('SIMPLE', '__int8',), + '_E': ('SIMPLE', 'unsigned __int8',), + '_F': ('SIMPLE', '__int16',), + '_G': ('SIMPLE', 'unsigned __int16',), + '_H': ('SIMPLE', '__int32',), + '_I': ('SIMPLE', 'unsigned __int32',), + '_J': ('SIMPLE', '__int64',), + '_K': ('SIMPLE', 'unsigned __int64',), + '_L': ('SIMPLE', '__int128',), + '_M': ('SIMPLE', 'unsigned __int128',), + '_N': ('SIMPLE', 'bool',), + #_O =SPECIAL CASE= Array + '_S': ('SIMPLE', 'char16_t',), + '_U': ('SIMPLE', 'char32_t',), + '_W': ('SIMPLE', 'wchar_t',), + '_X': ('COMPLEX', 'coclass'), + '_Y': ('COMPLEX', 'cointerface'), + #_$' =SPECIAL CASE= __w64 type + #$$A =TODO= (found by reversing vcruntime140.dll, more reverse is needed) + #$$B =SPECIAL CASE= Apparently no effect + '$$C': ('MODIFIER', ['', ]), + '$$Q': ('MODIFIER', ['&&', ]), + '$$R': ('MODIFIER', ['&&', 'volatile']), + #$$S =TODO= (found by reversing vcruntime140.dll, more reverse is needed) + '$$T': ('SIMPLE', 'std::nullptr_t'), + #$$Y =TODO= (found by reversing vcruntime140.dll, more reverse is needed) + } + +enum_types = { + # Here are the enum types mentioned at + # https://en.wikiversity.org/wiki/Visual_C%2B%2B_name_mangling + # Note that only type 4 aka 'int' is used by "modern versions" + # of Visual Studio. + '0': 'char', + '1': 'unsigned char', + '2': 'short', + '3': 'unsigned short', + '4': 'int', + '5': 'unsigned int', + '6': 'long', + '7': 'unsigned long', + } + +thunk_access = { + 'A': (None, 'private:'), + 'B': (None, 'private:'), + 'C': (None, 'private: static'), + 'D': (None, 'private: static'), + 'E': (None, 'private: virtual'), + 'F': (None, 'private: virtual'), + 'G': (None, 'private: thunk'), + 'H': (None, 'private: thunk'), + 'I': (None, 'protected:'), + 'J': (None, 'protected:'), + 'K': (None, 'protected: static'), + 'L': (None, 'protected: static'), + 'M': (None, 'protected: virtual'), + 'N': (None, 'protected: virtual'), + 'O': (None, 'protected: thunk'), + 'P': (None, 'protected: thunk'), + 'Q': (None, 'public:'), + 'R': (None, 'public:'), + 'S': (None, 'public: static'), + 'T': (None, 'public: static'), + 'U': (None, 'public: virtual'), + 'V': (None, 'public: virtual'), + 'W': (None, 'public: thunk'), + 'X': (None, 'public: thunk'), + 'Y': (None, ''), + 'Z': (None, ''), + '0': ('VAR', 'private: static'), + '1': ('VAR', 'protected: static'), + '2': ('VAR', 'public: static'), + '3': ('VAR', ''), # private non-static + '4': ('VAR', ''), # protected non-static + '5': ('VAR', ''), # public non-static + '6': ('OPT', ''), + '7': ('OPT', ''), + '$0': ('vtordisp', '[thunk]:private: virtual'), + '$1': ('vtordisp', '[thunk]:private: virtual'), + '$2': ('vtordisp', '[thunk]:protected: virtual'), + '$3': ('vtordisp', '[thunk]:protected: virtual'), + '$4': ('vtordisp', '[thunk]:public: virtual'), + '$5': ('vtordisp', '[thunk]:public: virtual'), + '$B': ('vcall', '[thunk]:'), + '$R': ('vtordispex', '[thunk]:public: virtual'), + } + +calling_convention = { + 'A': '__cdecl ', + 'B': '__cdecl __dll_export ', + 'C': '__pascal ', + 'D': '__pascal __dll_export ', + 'E': '__thiscall ', + 'F': '__thiscall __dll_export ', + 'G': '__stdcall ', + 'H': '__stdcall __dll_export ', + 'I': '__fastcall ', + 'J': '__fastcall __dll_export ', + 'K': '', + 'L': '__dll_export ', + 'M': '__clrcall ', + 'N': '__clrcall __dll_export ', + 'O': '__eabi ', + 'P': '__eabi __dll_export ', + 'Q': '__vectorcall ', + } + +if __name__ == "__main__": + import sys + verbose = False + for s in sys.argv[1:]: + if s == '-v': verbose = True; continue + n, r = symbol_demangle(s, verbose=verbose) + if r: n += ' Rest(%s)'%r + print(n) diff --git a/examples/minidump_to_pe.py b/examples/minidump_to_pe.py new file mode 100644 index 0000000..a884034 --- /dev/null +++ b/examples/minidump_to_pe.py @@ -0,0 +1,49 @@ +#! /usr/bin/env python +"""Minidump to PE example""" +import sys +from elfesteem.minidump_init import Minidump +from elfesteem.pe_init import PE + +fd = open(sys.argv[1]) +try: + raw = fd.read() +finally: + fd.close() +minidump = Minidump(raw) + +pe = PE() +for i, memory in enumerate(sorted(minidump.memory.itervalues(), + key=lambda x:x.address)): + # Get section name + name = str(memory.name) + if not name: + name = "s_%02d" % i + else: + name = name.split('\\')[-1] + + # Get section protection + protect = memory.pretty_protect + protect_mask = 0x20 + if protect == "UNKNOWN": + protect_mask |= 0xe0000000 + else: + if "EXECUTE" in protect: + protect_mask |= 1 << 29 + if "READ" in protect: + protect_mask |= 1 << 30 + if "WRITE" in protect: + protect_mask |= 1 << 31 + + # Add the section + pe.SHList.add_section(name=name, addr=memory.address, rawsize=memory.size, + data=memory.content, flags=protect_mask) + +# Find entry point +entry_point = minidump.threads.Threads[0].ThreadContext.Eip[0] +pe.Opthdr.AddressOfEntryPoint = entry_point + +fd = open("out_pe.bin", "w") +try: + fd.write(str(pe)) +finally: + fd.close() diff --git a/examples/otool.py b/examples/otool.py new file mode 100755 index 0000000..1e9b1e5 --- /dev/null +++ b/examples/otool.py @@ -0,0 +1,440 @@ +#! /usr/bin/env python + +import sys, os +import platform + +sys.path.insert(1, os.path.abspath(sys.path[0]+'/..')) +from elfesteem import macho_init, macho + +def print_header(e, **fargs): + print("Mach header") + print(" magic cputype cpusubtype caps filetype ncmds sizeofcmds flags") + print(" 0x%08x %7d %10d 0x%02x %10u %5u %10u 0x%08x" %(e.Mhdr.magic,e.Mhdr.cputype ,e.Mhdr.cpusubtype & (0xffffffff ^ macho.CPU_SUBTYPE_MASK),(e.Mhdr.cpusubtype & macho.CPU_SUBTYPE_MASK) >> 24,e.Mhdr.filetype,e.Mhdr.ncmds,e.Mhdr.sizeofcmds,e.Mhdr.flags)) + +import subprocess +def popen_read_out_err(cmd): + p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p.wait() + p.stdin.close() + return p.stdout.read() + p.stderr.read() + +import re +def get_otool_version(): + otool_v = popen_read_out_err(["otool", "--version"]) + if type(otool_v) != str: otool_v = str(otool_v, encoding='latin1') + r = re.search(r' LLVM version (\d+)', otool_v) + if r: + return int(r.groups()[0]) + else: + sys.stderr.write("Could not detect otool version\n") + sys.stderr.write(otool_v) + return None + +def split_integer(v, nbits, ndigits, truncate=None): + mask = (1< 0: + res.insert(0, v & mask) + v = v >> nbits + ndigits -= 1 + res[0] += v << nbits + if truncate is not None: + while len(res) > truncate and res[-1] == 0: + res = res[:-1] + return ".".join(["%u"%_ for _ in res]) + +def print_lc(e, llvm=False, **fargs): + for i, lc in enumerate(e.load): + print("Load command %u" %i) + print("\n".join(lc.otool(llvm=llvm))) + + + +def print_symbols(e, **fargs): + for sect in e.sect: + if type(sect) != macho_init.SymbolTable: + continue + print("%-35s %-15s %-4s %-10s %s"%("Symbol","Section","Type","Value","Description")) + for symbol in sect.symbols: + print(symbol.otool()) + +def print_dysym(e, **fargs): + # Display indirect symbol tables + for sect in e.sect: + if getattr(sect, 'type', None) is None: + continue + elif sect.type == 'indirectsym': + print("Indirect symbols [%d entries]"%len(sect)) + print("%5s %s"%("index","name")) + for entry in sect: + entry = entry.index + if entry == macho.INDIRECT_SYMBOL_LOCAL: + print("%5s" % "LOCAL") + elif entry == macho.INDIRECT_SYMBOL_ABS: + print("%5s" % "ABSOLUTE") + elif 0 <= entry < len(e.symbols.symbols): + print("%5s %s" % (entry,e.symbols.symbols[entry].name)) + else: + print("INVALID(%d)" % entry) + elif sect.type == 'locrel': + print("Local relocations [%d entries]"%len(sect)) + for entry in sect: + print(repr(entry)) + elif sect.type == 'extrel': + print("External relocations [%d entries]"%len(sect)) + for entry in sect: + print(repr(entry)) + +def print_indirect(e, **fargs): + # Find section with indirect symbols and indirect symbols table + indirectsym_table = None + indirectsym_section = [] + for s in e.sect: + if getattr(s, 'type', None) == 'indirectsym': + if indirectsym_table is not None: + raise ValueError("Only one IndirectSymbolTable per Mach-O file") + indirectsym_table = s + if not hasattr(s, 'sh'): continue + if s.sh.type in [ + macho.S_SYMBOL_STUBS, + macho.S_LAZY_SYMBOL_POINTERS, + macho.S_NON_LAZY_SYMBOL_POINTERS, + macho.S_LAZY_DYLIB_SYMBOL_POINTERS, + ]: + indirectsym_section.append(s) + # Display + verbose = False # Exactly the same output as 'otool -Iv' + import struct + idx = 0 + for s in indirectsym_section: + print("Indirect symbols for (%s,%s) %u entries" + % (s.sh.segname, s.sh.sectname, len(s))) + if e.wsize == 64: + header = "%-18s %5s" + format = "0x%016x %5s" + valfmt = e.sex+"Q" + if e.wsize == 32: + header = "%-10s %5s" + format = "0x%08x %5s" + valfmt = e.sex+"I" + if s.sh.type == macho.S_SYMBOL_STUBS: + # First two bytes are 0xff 0x25 + valfmt = e.sex+"HI" + address = s.addr + data = [ "address", "index", " name" ] + if verbose: + # The value read in the table is not output by otool + # it may be useless ??? + header += "%-20s " + format += "%-20s " + data += "value" + header += "%s" + format += "%s" + print(header % tuple(data)) + for entry in s: + if verbose: content = struct.unpack(valfmt,entry.content)[-1] + index = indirectsym_table.entries[idx].index + name = '' + if index == macho.INDIRECT_SYMBOL_LOCAL: index = "LOCAL" + elif index == macho.INDIRECT_SYMBOL_ABS: index = "ABSOLUTE" + else: name = ' '+e.symbols.symbols[index].name + data = [ address, index, name ] + if verbose: data.append(content) + print(format % tuple(data)) + idx += 1 + address += entry.bytelen + +def print_relocs(e, **fargs): + for s in e.sect: + if not hasattr(s, 'reloclist'): continue + print("Relocation information (%s,%s) %u entries" + % (s.sh.segname, s.sh.sectname, s.sh.nreloc)) + print("address pcrel length extern type scattered symbolnum/value") + for x in s.reloclist: + if x.scattered: xt, xn = 'n/a', '0x%08x' % x.symbolNumOrValue + else: xt, xn = x.extern, '%u' % x.symbolNumOrValue + print("%08x %-5u %-6u %-6s %-7d %-9d %s" % + (x.address, x.pcrel, x.length, xt, x.type, x.scattered, xn)) + +def print_opcodes(e, **fargs): + messages_and_values = ( + ('rebase_', macho.REBASE_OPCODE_DONE, + 'rebase opcodes:', 'no compressed rebase info'), + ('bind_', macho.BIND_OPCODE_DONE, + 'binding opcodes:', 'no compressed binding info'), + ('weak_bind_', macho.BIND_OPCODE_DONE, + 'weak binding opcodes:', 'no compressed weak binding info'), + ('lazy_bind_', -1, + 'lazy binding opcodes:', 'no compressed lazy binding info'), + ) + for t, v, ok, ko in messages_and_values: + s_list = [ _ for _ in e.sect if getattr(_, 'type', None) == t ] + if len(s_list) == 0: + print(ko) + continue + if len(s_list) > 1: + print("ERROR: many sections with %s"%t[:-1]) + for s in s_list: + print(ok) + for x in s._array: + print(x) + if x.opcode == v: + break + +def print_rebase(e, **fargs): + for s in e.sect: + if getattr(s, 'type', None) != 'rebase_': continue + print("rebase information (from compressed dyld info):") + print("segment section address type") + for x in s.info: print(x) + +def print_bind(e, **fargs): + for s in e.sect: + if getattr(s, 'type', None) != 'bind_': continue + print("bind information:") + print("segment section address type addend dylib symbol") + for x in s.info: print(x) + break + else: + print("no compressed binding info") + +def print_weak_bind(e, **fargs): + for s in e.sect: + if getattr(s, 'type', None) != 'weak_bind_': continue + print("weak binding information:") + print("segment section address type addend symbol") + for x in s.info: print(x) + break + else: + print("no weak binding") + +def print_lazy_bind(e, **fargs): + for s in e.sect: + if getattr(s, 'type', None) != 'lazy_bind_': continue + print("lazy binding information (from lazy_bind part of dyld info):") + print("segment section address index dylib symbol") + for x in s.info: print(x) + break + else: + print("no compressed lazy binding info") + +def print_export(e, **fargs): + for s in e.sect: + if getattr(s, 'type', None) != 'export_': continue + print("export information (from trie):") + for x in sorted(s.info, key=lambda _:_.addr): print(x) + break + else: + print("no compressed export info") + +archi = { + (macho.CPU_TYPE_MC680x0, macho.CPU_SUBTYPE_MC680x0_ALL): 'm68k', + (macho.CPU_TYPE_MC680x0, macho.CPU_SUBTYPE_MC68030_ONLY): 'm68030', + (macho.CPU_TYPE_MC680x0, macho.CPU_SUBTYPE_MC68040): 'm68040', + (macho.CPU_TYPE_MC88000, macho.CPU_SUBTYPE_MC88000_ALL): 'm88k', + (macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_I386_ALL): 'i386', + (macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_486): 'i486', + (macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_486SX): 'i486SX', + (macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_PENT): 'pentium', + (macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_PENTPRO): 'pentpro', + #macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_PENTIUM_4): 'pentium4', + (macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_PENTII_M3): 'pentIIm3', + (macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_PENTII_M5): 'pentIIm5', + (macho.CPU_TYPE_X86_64, macho.CPU_SUBTYPE_X86_64_ALL): 'x86_64', + (macho.CPU_TYPE_X86_64, macho.CPU_SUBTYPE_X86_64_H): 'x86_64h', + (macho.CPU_TYPE_I860, macho.CPU_SUBTYPE_I860_ALL): 'i860', + (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_ALL): 'ppc', + (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_601): 'ppc601', + (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_603): 'ppc602', + (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_603): 'ppc603', + (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_603e): 'ppc603e', + (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_603ev):'ppc603ev', + (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_604): 'ppc604', + (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_604e): 'ppc604e', + (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_620): 'ppc620', + (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_750): 'ppc750', + (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_7400): 'ppc7400', + (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_7450): 'ppc7450', + (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_970): 'ppc970', + (macho.CPU_TYPE_POWERPC64, macho.CPU_SUBTYPE_POWERPC64_ALL):'ppc64', + (macho.CPU_TYPE_POWERPC64, macho.CPU_SUBTYPE_POWERPC_970): 'ppc970-64', + (macho.CPU_TYPE_VEO, macho.CPU_SUBTYPE_VEO_ALL): 'veo', + (macho.CPU_TYPE_VEO, macho.CPU_SUBTYPE_VEO_1): 'veo1', + (macho.CPU_TYPE_VEO, macho.CPU_SUBTYPE_VEO_2): 'veo2', + (macho.CPU_TYPE_VEO, macho.CPU_SUBTYPE_VEO_3): 'veo3', + (macho.CPU_TYPE_VEO, macho.CPU_SUBTYPE_VEO_4): 'veo4', + (macho.CPU_TYPE_HPPA, macho.CPU_SUBTYPE_HPPA_ALL): 'hppa', + (macho.CPU_TYPE_HPPA, macho.CPU_SUBTYPE_HPPA_7100LC): 'hppa7100LC', + (macho.CPU_TYPE_SPARC, macho.CPU_SUBTYPE_SPARC_ALL): 'sparc', + (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_ALL): 'arm', + (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V4T): 'armv4t', + (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V5TEJ): 'armv5', + (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_XSCALE): 'xscale', + (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V6): 'armv6', + (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V6M): 'armv6m', + (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V7): 'armv7', + (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V7F): 'armv7f', + (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V7S): 'armv7s', + (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V7K): 'armv7k', + (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V7M): 'armv7m', + (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V7EM): 'armv7em', + (macho.CPU_TYPE_ARM64, macho.CPU_SUBTYPE_ARM64_ALL): 'arm64', + (macho.CPU_TYPE_ARM64, macho.CPU_SUBTYPE_ARM64_V8): 'arm64v8', + } + +def arch_name(e): + return archi[(e.Mhdr.cputype, + e.Mhdr.cpusubtype & (0xffffffff ^ macho.CPU_SUBTYPE_MASK))] + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(add_help=False) + # Simulates 'otool' + parser.add_argument('-arch', dest='arch_type', action='append', help='select architecture') + parser.add_argument('-h', dest='options', action='append_const', const='header', help='print the mach header') + parser.add_argument('-l', dest='options', action='append_const', const='load', help='print the load commands') + parser.add_argument('--symbols', dest='options', action='append_const', const='symbols', help='print the symbols') + parser.add_argument('--dysym', dest='options', action='append_const', const='dysym', help='print dynamic symbols') + parser.add_argument('-r', dest='options', action='append_const', const='reloc', help='Display the relocation entries') + parser.add_argument('-I', dest='options', action='append_const', const='indirect', help='Display the indirect symbol table') + parser.add_argument('--llvm', dest='llvm_version', action='append', help='Simulate the output of a given version of llvm-otool') + # Simulates 'dyldinfo' + parser.add_argument('-opcodes', dest='options', action='append_const', const='opcodes', help='opcodes used to generate the rebase and binding information') + parser.add_argument('-rebase', dest='options', action='append_const', const='rebase', help='addresses dyld will adjust if file not loaded at preferred address') + parser.add_argument('-bind', dest='options', action='append_const', const='bind', help='addresses dyld will set based on symbolic lookups') + parser.add_argument('-weak_bind', dest='options', action='append_const', const='weak_bind', help='symbols which dyld must coalesce') + parser.add_argument('-lazy_bind', dest='options', action='append_const', const='lazy_bind', help='addresses dyld will lazily set on first use') + parser.add_argument('-export', dest='options', action='append_const', const='export', help='addresses of all symbols this file exports') + parser.add_argument('file', nargs='*', help='object file') + args = parser.parse_args() + if args.options is None: + args.options = [] + if len(args.file) == 0: + parser.print_help() + functions = [] + fargs = {} + dyldinfo_simulation = False + if args.llvm_version: + # Hypothesis: the major number of the version of Xcode is sufficient + # to determine what the output format of llvm-otool is. + for llvm in args.llvm_version: + if 'native' in llvm: + fargs['llvm'] = get_otool_version() + else: + fargs['llvm'] = int(llvm) + if 'header' in args.options: + functions.append(print_header) + if 'load' in args.options: + if fargs.get('llvm',8) in (8, 9, 10, 11) and not 'header' in args.options: + functions.append(print_header) + functions.append(print_lc) + if 'symbols' in args.options: + functions.append(print_symbols) + if 'dysym' in args.options: + functions.append(print_dysym) + if 'reloc' in args.options: + functions.append(print_relocs) + if 'indirect' in args.options: + functions.append(print_indirect) + if 'rebase' in args.options: + functions.append(print_rebase) + dyldinfo_simulation = True + if 'bind' in args.options: + functions.append(print_bind) + dyldinfo_simulation = True + if 'weak_bind' in args.options: + functions.append(print_weak_bind) + dyldinfo_simulation = True + if 'lazy_bind' in args.options: + functions.append(print_lazy_bind) + dyldinfo_simulation = True + if 'export' in args.options: + functions.append(print_export) + dyldinfo_simulation = True + if 'opcodes' in args.options: + functions.append(print_opcodes) + dyldinfo_simulation = True + + for file in args.file: + fd = open(file, 'rb') + try: + raw = fd.read() + finally: + fd.close() + filesize = os.path.getsize(file) + try: + e = macho_init.MACHO(raw, + parseSymbols = False) + except ValueError as err: + print("%s:" %file) + print(" %s" % err) + continue + if args.arch_type is None: + if hasattr(e, 'Fhdr'): + # Select the current architecture, if present + current = platform.machine() + for _ in e.arch: + if current == arch_name(_): + e = _ + break + else: + # Display all architectures + e = [ _ for _ in e.arch ] + elif 'all' in args.arch_type: + if hasattr(e, 'Fhdr'): + # Display all architectures + e = [ _ for _ in e.arch ] + elif len(args.arch_type) == 1: + if hasattr(e, 'Fhdr'): + # Display one architecture + current = args.arch_type[0] + for _ in e.arch: + if current == arch_name(_): + e = _ + break + else: + sys.stderr.write("error: otool: file: %s does not contain architecture: %s\n" % (file, current)) + e = [] + else: + # Display if it is the architecture + current = args.arch_type[0] + if current != arch_name(e): + e = [] + else: + if hasattr(e, 'Fhdr'): + # Display some architectures, in the order appearing in the args + f = [] + for current in args.arch_type: + for _ in e.arch: + if current == arch_name(_): + f.append(_) + break + else: + sys.stderr.write("error: otool: file: %s does not contain architecture: %s\n" % (file, current)) + e = f + else: + # Display if one is the architecture + for current in args.arch_type: + if current == arch_name(e): + break + else: + e = [] + + if dyldinfo_simulation and len(args.file) > 1: + print("\n%s:" %file) + if hasattr(e, 'Mhdr'): + if not dyldinfo_simulation and functions != [ print_header ]: + print("%s:" %file) + for f in functions: + f(e, **fargs) + else: + for _ in e: + t0 = _.Mhdr.cputype + t1 = _.Mhdr.cpusubtype & (0xffffffff ^ macho.CPU_SUBTYPE_MASK) + if dyldinfo_simulation: + print("for arch %s:" % arch_name(_)) + else: + if functions != [ print_header ]: + print("%s (architecture %s):" %(file, arch_name(_))) + for f in functions: + f(_, **fargs) diff --git a/examples/readelf.py b/examples/readelf.py new file mode 100755 index 0000000..0ac8487 --- /dev/null +++ b/examples/readelf.py @@ -0,0 +1,346 @@ +#! /usr/bin/env python +import sys, os + +if sys.version_info[0] == 2 and sys.version_info[1] < 5: + sys.stderr.write("python version older than 2.5 is not supported\n") + exit(1) + +sys.path.insert(1, os.path.abspath(sys.path[0]+'/..')) +from elfesteem import elf_init, elf + +import subprocess +def popen_read_out_err(cmd): + p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p.wait() + p.stdin.close() + return p.stdout.read() + p.stderr.read() + +import re +def get_readelf_version(): + readelf_v = popen_read_out_err(["readelf", "--version"]) + if type(readelf_v) != str: readelf_v = str(readelf_v, encoding='latin1') + r = re.search(r'GNU readelf .* (\d+\.\d+)', readelf_v) + if r: + sys.stderr.write("readelf version %s\n" % float(r.groups()[0])) + return float(r.groups()[0]) + else: + sys.stderr.write("Could not detect readelf version\n") + sys.stderr.write(readelf_v) + return None + +et_strings = { + elf.ET_REL: 'REL (Relocatable file)', + elf.ET_EXEC: 'EXEC (Executable file)', + elf.ET_DYN: 'DYN (Shared object file)', + elf.ET_CORE: 'CORE (Core file)', + } +def expand_code(table, val): + if val in table: return table[val] + return ': %#x' % val + +def is_pie(e): + # binutils 2.37 + # 2021-06-15 https://github.com/bminor/binutils-gdb/commit/93df3340fd5ad32f784214fc125de71811da72ff + for i, sh in enumerate(e.sh): + if sh.sh.type != elf.SHT_DYNAMIC: + continue + if e.wsize == 32: + dyntab = sh.dyntab[:-2] + elif e.wsize == 64: + dyntab = sh.dyntab[:-1] + for d in dyntab: + if d.type == elf.DT_FLAGS_1 and d.name & elf.DF_1_PIE: + return True + return False + +def display_headers(e): + print("ELF Header:") + import struct + ident = struct.unpack('16B', e.Ehdr.ident) + print(" Magic: %s "%' '.join(['%02x'%_ for _ in ident])) + print(" Class: %s"%expand_code({ + elf.ELFCLASS32: 'ELF32', + elf.ELFCLASS64: 'ELF64', + }, ident[elf.EI_CLASS])) + print(" Data: %s"%expand_code({ + elf.ELFDATA2LSB: "2's complement, little endian", + elf.ELFDATA2MSB: "2's complement, big endian", + }, ident[elf.EI_DATA])) + print(" Version: %s"%expand_code({ + 1: '1 (current)', + }, ident[elf.EI_VERSION])) + print(" OS/ABI: %s"%expand_code({ + 0: 'UNIX - System V', + }, ident[elf.EI_OSABI])) + print(" ABI Version: %d"%ident[elf.EI_ABIVERSION]) + elf_file_type = expand_code(et_strings, e.Ehdr.type) + if e.Ehdr.type == elf.ET_DYN and elf.is_pie(e): + elf_file_type = 'DYN (Position-Independent Executable file)' + print(" Type: %s"%elf_file_type) + machine_code = dict(elf.constants['EM']) + # Same textual output as readelf, from readelf.c + machine_code[elf.EM_M32] = 'ME32100' + machine_code[elf.EM_SPARC] = 'Sparc' + machine_code[elf.EM_386] = 'Intel 80386' + machine_code[elf.EM_68K] = 'MC68000' + machine_code[elf.EM_88K] = 'MC88000' + machine_code[elf.EM_486] = 'Intel 80486' + machine_code[elf.EM_860] = 'Intel 80860' + machine_code[elf.EM_MIPS] = 'MIPS R3000' + machine_code[elf.EM_S370] = 'IBM System/370' + machine_code[elf.EM_MIPS_RS3_LE] = 'MIPS R4000 big-endian' + machine_code[elf.EM_PARISC] = 'HPPA' + machine_code[elf.EM_SPARC32PLUS] = 'Sparc v8+' + machine_code[elf.EM_960] = 'Intel 80960' + machine_code[elf.EM_PPC] = 'PowerPC' + machine_code[elf.EM_PPC64] = 'PowerPC64' + machine_code[elf.EM_V800] = 'NEC V800' + machine_code[elf.EM_FR20] = 'Fujitsu FR20' + machine_code[elf.EM_RH32] = 'TRW RH32' + machine_code[elf.EM_ARM] = 'ARM' + machine_code[elf.EM_FAKE_ALPHA] = 'Digital Alpha (old)' + machine_code[elf.EM_SH] = 'Renesas / SuperH SH' + machine_code[elf.EM_SPARCV9] = 'Sparc v9' + machine_code[elf.EM_TRICORE] = 'Siemens Tricore' + machine_code[elf.EM_ARC] = 'ARC' + machine_code[elf.EM_H8_300] = 'Renesas H8/300' + machine_code[elf.EM_H8_300H] = 'Renesas H8/300H' + machine_code[elf.EM_H8S] = 'Renesas H8S' + machine_code[elf.EM_H8_500] = 'Renesas H8/500' + machine_code[elf.EM_IA_64] = 'Intel IA-64' + machine_code[elf.EM_MIPS_X] = 'Stanford MIPS-X' + machine_code[elf.EM_COLDFIRE] = 'Motorola Coldfire' + machine_code[elf.EM_X86_64] = 'Advanced Micro Devices X86-64' + print(" Machine: %s"%expand_code(machine_code, e.Ehdr.machine)) + print(" Version: %#x"%e.Ehdr.version) + print(" Entry point address: %#x"%e.Ehdr.entry) + print(" Start of program headers: %d (bytes into file)"%e.Ehdr.phoff) + print(" Start of section headers: %d (bytes into file)"%e.Ehdr.shoff) + print(" Flags: %#x"%e.Ehdr.flags) + print(" Size of this header: %d (bytes)"%e.Ehdr.ehsize) + print(" Size of program headers: %d (bytes)"%e.Ehdr.phentsize) + print(" Number of program headers: %d"%e.Ehdr.phnum) + print(" Size of section headers: %d (bytes)"%e.Ehdr.shentsize) + print(" Number of section headers: %d"%e.Ehdr.shnum) + print(" Section header string table index: %d"%e.Ehdr.shstrndx) + +def display_program_headers(e): + # Output format similar to readelf -l + if len(e.ph.phlist) == 0: + print("\nThere are no program headers in this file.") + return + print("\nElf file type is %s" % expand_code(et_strings, e.Ehdr.type)) + print("Entry point 0x%x" % e.Ehdr.entry) + print("There are %d program headers, starting at offset %d" % (e.Ehdr.phnum, e.Ehdr.phoff)) + print("\nProgram Headers:") + if e.wsize == 32: + header = " Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align" + format = " %-14s 0x%06x 0x%08x 0x%08x 0x%05x 0x%05x %-3s 0x%x" + elif e.wsize == 64: + header = " Type Offset VirtAddr PhysAddr\n FileSiz MemSiz Flags Align" + format = " %-14s 0x%016x 0x%016x 0x%016x\n 0x%016x 0x%016x %-3s %x" + print(header) + for p in e.ph: + flags = [' ', ' ', ' '] + if p.ph.flags & 4: flags[0] = 'R' + if p.ph.flags & 2: flags[1] = 'W' + if p.ph.flags & 1: flags[2] = 'E' + print(format%(elf.constants['PT'][p.ph.type], + p.ph.offset, p.ph.vaddr, p.ph.paddr, + p.ph.filesz, p.ph.memsz, ''.join(flags), + p.ph.align)) + if p.ph.type == elf.PT_INTERP: + s = p.shlist[0] + print(' [Requesting program interpreter: %s]' % e[s.sh.offset:s.sh.offset+s.sh.size].strip('\0')) + if len(e.sh.shlist) == 0: + return + print("\n Section to Segment mapping:") + print(" Segment Sections...") + for i, p in enumerate(e.ph): + res = " %02d " % i + for s in p.shlist: + res += s.sh.name + " " + print(res) + +def display_dynamic(e): + machine = elf.constants['EM'][e.Ehdr.machine] + for i, sh in enumerate(e.sh): + if sh.sh.type != elf.SHT_DYNAMIC: + continue + if e.wsize == 32: + header = " Tag Type Name/Value" + format = "%#010x %-28s %s" + dyntab = sh.dyntab[:-2] + elif e.wsize == 64: + header = " Tag Type Name/Value" + format = "%#018x %-20s %s" + dyntab = sh.dyntab[:-1] + print("\nDynamic section at offset %#x contains %d entries:" % (sh.sh.offset, len(dyntab))) + print(header) + for d in dyntab: + type = elf.constants['DT'].get(machine, {}).get(d.type, None) + if type is None: type = elf.constants['DT'].get(d.type, None) + else: type = machine + '_' + type + if type in ('NEEDED',): + name = 'Shared library: [%s]' % d.name + elif type in ('STRSZ','SYMENT','RELSZ','RELENT','PLTRELSZ','RELASZ'): + name = '%d (bytes)' % d.name + elif type in ('PLTGOT','HASH','STRTAB','SYMTAB','INIT','FINI','REL', + 'JMPREL','DEBUG','RELA', + 'CHECKSUM','VERNEED', + 'GNU_HASH', + 'MIPS_BASE_ADDRESS','MIPS_LIBLIST','MIPS_GOTSYM', + 'MIPS_HIDDEN_GOTIDX','MIPS_PROTECTED_GOTIDX', + 'MIPS_LOCAL_GOTIDX','MIPS_LOCALPAGE_GOTIDX', + 'MIPS_SYMBOL_LIB','MIPS_MSYM','MIPS_CONFLICT', + 'MIPS_RLD_MAP','MIPS_OPTIONS', + 'MIPS_INTERFACE','MIPS_INTERFACE_SIZE'): + name = '%#x' % d.name + elif type == 'PLTREL': + name = elf.constants['DT'].get(d.name, d.name) + elif type == 'MIPS_FLAGS': + if d.name == 0: + name = 'NONE' + else: + flags = ('QUICKSTART', 'NOTPOT', 'NO_LIBRARY_REPLACEMENT', + 'NO_MOVE', 'SGI_ONLY', 'GUARANTEE_INIT', + 'DELTA_C_PLUS_PLUS', 'GUARANTEE_START_INIT', + 'PIXIE', 'DEFAULT_DELAY_LOAD', 'REQUICKSTART', + 'REQUICKSTARTED', 'CORD', 'NO_UNRES_UNDEF', + 'RLD_ORDER_SAFE') + name = ' '.join([ f for (f,b) + in zip(flags,reversed(bin(d.name)[2:])) + if b == '1' ]) + else: + name = d.name + output = format%(d.type, '(%s)'%type, name) + print(output) + + +def display_symbols(sections): + for s in sections: + print("\n"+s.readelf_display()) + + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument('-H', '--help', action='help', default=argparse.SUPPRESS, help='Display this information') + parser.add_argument('-h', '--file-header', dest='options', action='append_const', const='headers', help='Display the ELF file header') + parser.add_argument('-S', '--section-headers', '--sections', dest='options', action='append_const', const='sections', help="Display the sections' header") + parser.add_argument('-r', '--relocs', dest='options', action='append_const', const='reltab', help='Display the relocations (if present)') + parser.add_argument('-s', '--syms', '--symbols', dest='options', action='append_const', const='symtab', help='Display the symbol table') + parser.add_argument('--dyn-syms', dest='options', action='append_const', const='dynsym', help='Display the dynamic symbol table') + parser.add_argument('-d', '--dynamic', dest='options', action='append_const', const='dynamic', help='Display the dynamic section (if present)') + parser.add_argument('-l', '--program-headers', '--segments', dest='options', action='append_const', const='program', help='Display the program headers') + parser.add_argument('-g', '--section-groups', dest='options', action='append_const', const='groups', help='Display the section groups') + parser.add_argument('--readelf', dest='readelf_version', action='append', help='Simulate the output of a given version of readelf') + parser.add_argument('file', nargs='+', help='ELF file(s)') + args = parser.parse_args() + if args.options is None: + args.options = [] + + elf.is_pie = lambda _: False + if args.readelf_version: + for readelf in args.readelf_version: + if 'native' in readelf: + readelf_version = get_readelf_version() + else: + readelf_version = float(readelf) + if True: + # TODO: readelf has a different output if "do_section_details" or "do_wide" + elf.Shdr.header64 = [" [Nr] Name Type Address Offset", + " Size EntSize Flags Link Info Align"] + elf.Shdr.format64 = (" [%(idx)2d] %(name17)-17s %(type_txt)-15s %(addr)016x %(offset)08x\n" + " %(size)016x %(entsize)016x %(flags_txt)3s %(link)2d %(info)2d %(addralign)d") + if readelf_version >= 2.26: + # 2016-01-20 https://github.com/bminor/binutils-gdb/commit/9fb71ee49fc37163697e4f34e16097928eb83d66 + elf.Shdr.footer = property(lambda _: [ + "Key to Flags:", + " W (write), A (alloc), X (execute), M (merge), S (strings), I (info),", + " L (link order), O (extra OS processing required), G (group), T (TLS),", + " C (compressed), x (unknown), o (OS specific), E (exclude),", + " %sp (processor specific)" % ( + "l (large), " if e.Ehdr.machine in (elf.EM_X86_64, elf.EM_L10M, elf.EM_K10M) else + "y (noread), " if e.Ehdr.machine == elf.EM_ARM else + "" ), + ]) + if readelf_version >= 2.27: + # 2016-07-05 https://github.com/bminor/binutils-gdb/commit/f0728ee368f217f2473798ad7ccfe9feae4412ce + elf.Shdr.footer = property(lambda _: [ + "Key to Flags:", + " W (write), A (alloc), X (execute), M (merge), S (strings), I (info),", + " L (link order), O (extra OS processing required), G (group), T (TLS),", + " C (compressed), x (unknown), o (OS specific), E (exclude),", + " %sp (processor specific)" % ( + "l (large), " if e.Ehdr.machine in (elf.EM_X86_64, elf.EM_L10M, elf.EM_K10M) else + "y (purecode), " if e.Ehdr.machine == elf.EM_ARM else + "" ), + ]) + if readelf_version >= 2.29: # more precisely 2.29.1 + # 2017-09-05 https://github.com/bminor/binutils-gdb/commit/83eef883581525d04df3a8e53a82c01d0d12b56a + elf.Shdr.footer = property(lambda _: [ + "Key to Flags:", + " W (write), A (alloc), X (execute), M (merge), S (strings), I (info),", + " L (link order), O (extra OS processing required), G (group), T (TLS),", + " C (compressed), x (unknown), o (OS specific), E (exclude),", + " %sp (processor specific)" % ( + "l (large), " if e.Ehdr.machine in (elf.EM_X86_64, elf.EM_L10M, elf.EM_K10M) else + "y (purecode), " if e.Ehdr.machine == elf.EM_ARM else + "v (VLE), " if e.Ehdr.machine == elf.EM_PPC else + "" ), + ]) + if readelf_version >= 2.36: # more precisely 2.36.1 + # 2021-02-02 https://github.com/bminor/binutils-gdb/commit/5424d7ed94cf5a7ca24636ab9f4e6d5c353fc0d3 + elf.Shdr.footer = property(lambda _: [ + "Key to Flags:", + " W (write), A (alloc), X (execute), M (merge), S (strings), I (info),", + " L (link order), O (extra OS processing required), G (group), T (TLS),", + " C (compressed), x (unknown), o (OS specific), E (exclude),", + " %s%sp (processor specific)" % ( + "R (retain), D (mbind), " if e.Ehdr.ident[elf.EI_OSABI] in (elf.ELFOSABI_GNU, elf.ELFOSABI_FREEBSD) else + "D (mbind), " if e.Ehdr.ident[elf.EI_OSABI] == elf.ELFOSABI_NONE else + "" + , + "l (large), " if e.Ehdr.machine in (elf.EM_X86_64, elf.EM_L10M, elf.EM_K10M) else + "y (noread), " if e.Ehdr.machine == elf.EM_ARM else + "" ), + ]) + if readelf_version >= 2.35: + # 2020-07-02 https://github.com/bminor/binutils-gdb/commit/0942c7ab94e554657c3e11ab85ae7f15373ee80d + elf.Shdr.name17 = property(lambda _: _.name[:12]+"[...]" if len(_.name) > 17 else _.name) + if readelf_version >= 2.37: + # 2021-06-15 https://github.com/bminor/binutils-gdb/commit/93df3340fd5ad32f784214fc125de71811da72ff + elf.is_pie = is_pie + + + for file in args.file: + if len(args.file) > 1: + print("\nFile: %s" % file) + fd = open(file, 'rb') + try: + raw = fd.read() + finally: + fd.close() + e = elf_init.ELF(raw) + if 'headers' in args.options: + display_headers(e) + if 'sections' in args.options: + print(e.sh.readelf_display()) + if 'reltab' in args.options: + for sh in e.sh: + if not 'rel' in dir(sh): continue + print("\n" + sh.readelf_display()) + if 'symtab' in args.options or 'dynsym' in args.options: + display_symbols(e.getsectionsbytype(elf.SHT_DYNSYM)) + if 'symtab' in args.options: + display_symbols(e.getsectionsbytype(elf.SHT_SYMTAB)) + if 'dynamic' in args.options: + display_dynamic(e) + if 'program' in args.options: + display_program_headers(e) + if 'groups' in args.options: + for sh in e.sh: + if not sh.sh.type == elf.SHT_GROUP: continue + print(sh.readelf_display()) diff --git a/examples/readpe.py b/examples/readpe.py new file mode 100755 index 0000000..98e5edf --- /dev/null +++ b/examples/readpe.py @@ -0,0 +1,491 @@ +#! /usr/bin/env python +import sys, os + +if sys.version_info[0] == 2 and sys.version_info[1] < 4: + sys.stderr.write("python version older than 2.4 is not supported\n") + sys.exit(1) + +sys.path.insert(1, os.path.abspath(sys.path[0]+'/..')) +from elfesteem import pe_init, pe +import struct + +def test_rebuild(e): + bin = str(e) + if bin != raw: + print("ERROR: PE file is not reconstructed identical") + f = pe_init.PE(bin) + bin2 = str(f) + if bin != bin2: + print("ERROR: PE does not even have a fixpoint") + +def print_petype(e): + if hasattr(e, 'COFFhdr'): COFFhdr = e.COFFhdr + else: COFFhdr = e.Coffhdr + machine = pe.constants['IMAGE_FILE_MACHINE'].get(COFFhdr.machine, + "UNKNOWN(%#x)" % COFFhdr.machine) + if hasattr(e, 'NThdr'): + print("PE for %s (%s header)"%(machine,struct.pack("':'big'}[e.sex],machine)) + print("COFF: %d sections, %d symbols; flags %#x; szopthdr %#x" % ( + COFFhdr.numberofsections, + COFFhdr.numberofsymbols, + COFFhdr.characteristics, + COFFhdr.sizeofoptionalheader, + )) + for flag in pe.constants['IMAGE_FILE_FLAG']: + if COFFhdr.characteristics & flag: + print(" %s"%pe.constants['IMAGE_FILE_FLAG'][flag]) + if COFFhdr.sizeofoptionalheader: + if hasattr(e.Opthdr, 'majorlinkerversion'): + vstamp = '%d.%d' % ( e.Opthdr.majorlinkerversion, + e.Opthdr.minorlinkerversion ) + else: + # Sometimes, the doc does not say how vstamp is splitted in + # major / minor + vstamp = '%#x' % e.Opthdr.vstamp + magic = pe.constants['IMAGE_OPTIONAL_HDR_MAGIC'].get(e.Opthdr.magic, + "UNKNOWN(%#x)" % e.Opthdr.magic) + print("OPThdr magic: %s; version %s; Entry: %#10x" % ( + magic, vstamp, e.Opthdr.entry)) + if COFFhdr.pointertosymboltable: + strtab_off = COFFhdr.pointertosymboltable + 18 * COFFhdr.numberofsymbols + print("SymbolTable: %#x; %d symbols; strtab of len %d bytes" % ( + COFFhdr.pointertosymboltable, + COFFhdr.numberofsymbols, + len(e.content) - strtab_off, + )) + print("MaxAddr %#x" % e.virt.max_addr()) + if hasattr(e, 'NThdr'): + print("NThdr: Sig %s OSver %d.%d IMGver %d.%d subsystem %s v%d.%d" % ( + struct.pack("') + if s.size == 0: name = '' + else: name = n.name.strip('\0') + print("%2d %15s %#10x %#10x %12s"%(i, dirname, s.rva, s.size, name)) + +def print_symtab(e): + if hasattr(e, 'Symbols'): + print(e.Symbols.display()) + if hasattr(e, 'OSF1Symbols'): + print("\nOSF1/Tru64 SYMBOLS") + print("%r"%e.OSF1Symbols) + +from operator import itemgetter +def print_layout(e, filesz): + if filesz == 0: + print("\nEMPTY FILE") + return + layout = [] + of = 0 + if hasattr(e, 'COFFhdr'): COFFhdr = e.COFFhdr + else: COFFhdr = e.Coffhdr + if hasattr(e, 'NThdr'): # PE + if hasattr(e, 'DOShdr'): DOShdr = e.DOShdr + else: DOShdr = e.Doshdr + layout.append((0, e.NThdr.sizeofheaders, 'Headers')) + layout.append((0, len(DOShdr.pack()), 'DOS header')) + of += DOShdr.lfanew + layout.append((of, len(e.NTsig.pack()), 'NT sig')) + of += len(e.NTsig.pack()) + layout.append((of, len(COFFhdr.pack()), 'COFF header')) + of += len(COFFhdr.pack()) + if COFFhdr.sizeofoptionalheader > 0: + layout.append((of, len(e.Opthdr.pack()), 'Optional headers')) + if hasattr(e, 'NThdr'): # PE + layout.append((of + len(e.Opthdr.pack()), len(e.NThdr.pack()), 'NT header')) + layout.append((of, COFFhdr.sizeofoptionalheader, 'NT + Optional headers')) + of += COFFhdr.sizeofoptionalheader + layout.append((of, len(e.SHList.pack()), 'List of Sections')) + of += len(e.SHList.pack()) + if hasattr(e, 'NThdr'): + layout.append((DOShdr.lfanew, of-DOShdr.lfanew, 'PE header')) + for i, s in enumerate(e.SHList): + if not s.is_in_file(): + continue + if i == 0 and s.name.startswith('$'): + # '$build.attributes' dummy section is seen in TI COFF sample file + # PECOFF reference documentation 4.2 explains the special + # interpretation of $ in section names, which is compatible + # with ignoring sections starting with $ + continue + # We use rawsize instead of rsize, because we want the size in bytes + layout.append((s.scnptr, s.rawsize, + 'Section '+s.name.strip('\0'))) + if s.nreloc: + layout.append((s.relptr, s.nreloc*10, + 'Relocs '+s.name.strip('\0'))) + if s.nlnno: + nlnno = s.nlnno + if s.lnnoptr+s.nlnno*6 > filesz: + nlnno = (filesz-s.lnnoptr)//6 + print("LINENO for section %s is %d and should probably be %s" % (s.name.strip('\0'), s.nlnno, nlnno)) + layout.append((s.lnnoptr, nlnno*6, + 'LineNo '+s.name.strip('\0'))) + + if hasattr(e, 'OSF1Symbols'): + layout.append((COFFhdr.pointertosymboltable, + e.OSF1Symbols.bytelen, + 'COFF/OSF1 Symbols Header')) + stab_end = COFFhdr.pointertosymboltable + e.OSF1Symbols.bytelen + for start, count, size, name in ( + ('cbLineOffset', 'cbLine', 1, 'Packed Line Number Entries'), + ('cbDnOffset', 'idnMax', 0, 'Obsolete'), + ('cbPdOffset', 'ipdMax', 64, 'Procedure Descriptors'), + ('cbSymOffset', 'isymMax', 16, 'Local Symbols'), + ('cbOptOffset', 'ioptMax', 1, 'Optimization Entries'), + ('cbAuxOffset', 'iauxMax', 4, 'Auxiliary Symbols'), + ('cbSsOffset', 'issMax', 1, 'Local Strings'), + ('cbSsExtOffset', 'issExtMax', 1, 'External Strings'), + ('cbFdOffset', 'ifdMax', 96, 'File Descriptors'), + ('cbRfdOffset', 'crfd', 4, 'Relative File Descriptors'), + ('cbExtOffset', 'iextMax', 24, 'External Symbols'), + ): + if getattr(e.OSF1Symbols, start) != 0: + layout.append((getattr(e.OSF1Symbols, start), + getattr(e.OSF1Symbols, count) * size, + 'COFF/OSF1 %s'%name)) + stab_end_s = getattr(e.OSF1Symbols, start) + \ + getattr(e.OSF1Symbols, count) * size + if stab_end < stab_end_s: stab_end = stab_end_s + layout.append((COFFhdr.pointertosymboltable, + stab_end - COFFhdr.pointertosymboltable, + 'COFF/OSF1 Symbols')) + if hasattr(e, 'Symbols'): + layout.append((COFFhdr.pointertosymboltable, + e.Symbols.bytelen, + 'COFF Symbols')) + if hasattr(e, 'SymbolStrings'): + layout.append((COFFhdr.pointertosymboltable + + e.Symbols.bytelen, + len(e.SymbolStrings.pack()), + 'COFF SymbolStrings')) + + if hasattr(e, 'NThdr'): + for i, s in enumerate(e.NThdr.optentries): + if s.rva != 0: + if i == pe.DIRECTORY_ENTRY_SECURITY: + # SECURITY vaddr is an offset, not a RVA! + of = s.rva + if of >= filesz: of = None + else: + of = e.rva2off(s.rva) + if of is None: + # e.g. Ange Albertini's foldedhdr.exe + continue + layout.append((of, s.size, + 'DirEnt '+pe.constants['DIRECTORY_ENTRY'][i])) + if i in (pe.DIRECTORY_ENTRY_IMPORT, + pe.DIRECTORY_ENTRY_DELAY_IMPORT): + directory, name = { + pe.DIRECTORY_ENTRY_IMPORT: ('DirImport','IMPORT'), + pe.DIRECTORY_ENTRY_DELAY_IMPORT: ('DirDelay', 'DELAY '), + }[i] + directory = getattr(e, directory) + layout.append(( + directory._off, + directory._size, + '%s Descriptors'%name)) + for idx, d in enumerate(directory): + # for a .exe created by mingw, + # there is a RVA before each thunk + if hasattr(d, 'ILT'): + layout.append(( + e.rva2off(d.originalfirstthunk), + d.ILT.bytelen, + '%s Thunks:original [%d]' % (name, idx))) + if hasattr(d, 'IAT'): + layout.append(( + e.rva2off(d.firstthunk), + d.IAT.bytelen, + '%s Thunks:current [%d]' % (name, idx))) + if hasattr(d, 'name'): + # Sometimes aligned to 2 bytes + size = d.name.bytelen + if idx+1 == len(directory) or \ + d.name_rva+size= context[-1][1]: + b, f = context[-1][1], min(l[0], context[-2][1]) + if f > b: + unknown(b, f, len(context)-2) + context.pop() + context.append((l[0],l[0]+l[1])) + print(format % (l[0], l[0]+l[1], ". " * (len(context)-2), + ' '.join(l[2:]))) + # If we did not reach the end of the file + l = (filesz,) + while len(context) > 1 and l[0] >= context[-1][1]: + b, f = context[-1][1], min(l[0], context[-2][1]) + if f > b: + unknown(b, f, len(context)-2) + context.pop() + if context[-1][1] > filesz: + print(format % (context[-1][1], filesz, "", "(went after EOF!)")) + for l in not_in_section: + print("Not in a section: %s" % (' '.join(l[2:]))) + +def pe_dir_display(e): + if hasattr(e, 'DirImport'): print(e.DirImport.display()) + if hasattr(e, 'DirExport'): print(e.DirExport.display()) + if hasattr(e, 'DirDelay'): print(e.DirDelay.display()) + if hasattr(e, 'DirRes'): print(e.DirRes.display()) + if hasattr(e, 'DirReloc'): print(e.DirReloc.display()) + +if __name__ == '__main__': + arg_keys = { + 'H': ('headers', 'Headers'), + 'S': ('sections', 'Sections'), + 'D': ('directories', 'Directories'), + 'r': ('reltab', 'Relocation sections'), + 's': ('symtab', 'Symbol table'), + 'l': ('layout', 'File content layout'), + #'d': ('dynsym', 'Dynamic symbols'), + } + try: + import argparse + parser = argparse.ArgumentParser() + for key in arg_keys: + const, help = arg_keys[key] + parser.add_argument('-'+key, + dest='options', + action='append_const', + const=const, + help=help) + parser.add_argument('file', nargs='+', help='ELF file(s)') + args = parser.parse_args() + if args.options is None: + args.options = [] + except ImportError: + # Emulate argparse for python < 2.7 + # We miss e.g. the help + class Args(object): + file = [] + options = [] + args = Args() + for arg in sys.argv[1:]: + if arg.startswith('-'): + for key in arg_keys: + if key in arg: args.options.append(arg_keys[key][0]) + else: + args.file.append(arg) + + for file in args.file: + if len(args.file) > 1: + print("\nFile: %s" % file) + fd = open(file, 'rb') + try: + raw = fd.read() + finally: + fd.close() + if raw[:2] == struct.pack("2B", 0x48,0x52): + # IDA's bochsys.dll is a normal PE with its magic number replaced + # by 'HR', probably meaning HexRays. + raw = struct.pack("2B", 0x4d,0x5a) + raw[2:] + try: + e = pe_init.PE(raw) + except ValueError: + try: + e = pe_init.Coff(raw) + except ValueError: + print('Not a valid COFF') + continue + #test_rebuild(e) + if 'headers' in args.options: + print_petype(e) + if 'sections' in args.options: + print_sections(e) + if 'symtab' in args.options: + print_symtab(e) + if 'reltab' in args.options: + for s in e.SHList: + if s.nreloc: + print('Relocs '+s.name.strip('\0')) + for r in s.data.relocs: + print(' %r'%r) + if 'layout' in args.options: + print_layout(e,len(raw)) + if 'directories' in args.options: + pe_dir_display(e) + +# http://media.blackhat.com/bh-us-11/Vuksan/BH_US_11_VuksanPericin_PECOFF_Slides.pdf diff --git a/elfesteem/test_pe.py b/examples/test_pe.py similarity index 91% rename from elfesteem/test_pe.py rename to examples/test_pe.py index a12a735..da865de 100644 --- a/elfesteem/test_pe.py +++ b/examples/test_pe.py @@ -25,4 +25,8 @@ s_myimp = e_.SHList.add_section(name = "myimp", rawsize = 0x1000) e_.DirImport.set_rva(s_myimp.addr) -open('uu.bin', 'wb').write(str(e_)) +fd = open('uu.bin', 'wb') +try: + fd.write(str(e_)) +finally: + fd.close() diff --git a/setup.py b/setup.py index 64eedf7..50cbe57 100755 --- a/setup.py +++ b/setup.py @@ -5,13 +5,14 @@ setup( name = 'ELF-Esteem', version = '0.1', - packages=['elfesteem'], - scripts = ['elfcli'], + packages = ['elfesteem', 'elfesteem.macho'], + requires = ['python (>= 2.3)'], + scripts = ['examples/readelf.py','examples/otool.py','examples/readpe.py'], # Metadata author = 'Philippe BIONDI', author_email = 'phil(at)secdev.org', description = 'ELF-Esteem: ELF file manipulation library', - license = 'GPLv2', + license = 'LGPLv2.1', + url = 'https://github.com/airbus-seclab/elfesteem', # keywords = '', - # url = '', ) diff --git a/tests/binary_input/Ange/bottomsecttbl.exe b/tests/binary_input/Ange/bottomsecttbl.exe new file mode 100644 index 0000000..1d25c1e Binary files /dev/null and b/tests/binary_input/Ange/bottomsecttbl.exe differ diff --git a/tests/binary_input/Ange/d_tiny.dll b/tests/binary_input/Ange/d_tiny.dll new file mode 100644 index 0000000..06af4b5 Binary files /dev/null and b/tests/binary_input/Ange/d_tiny.dll differ diff --git a/tests/binary_input/Ange/delayfake.exe b/tests/binary_input/Ange/delayfake.exe new file mode 100644 index 0000000..d9690b2 Binary files /dev/null and b/tests/binary_input/Ange/delayfake.exe differ diff --git a/tests/binary_input/Ange/dllbound-ld.exe b/tests/binary_input/Ange/dllbound-ld.exe new file mode 100644 index 0000000..421b9b4 Binary files /dev/null and b/tests/binary_input/Ange/dllbound-ld.exe differ diff --git a/tests/binary_input/Ange/dllfw.dll b/tests/binary_input/Ange/dllfw.dll new file mode 100644 index 0000000..9ee58d6 Binary files /dev/null and b/tests/binary_input/Ange/dllfw.dll differ diff --git a/tests/binary_input/Ange/exportobf.exe b/tests/binary_input/Ange/exportobf.exe new file mode 100644 index 0000000..b91188f Binary files /dev/null and b/tests/binary_input/Ange/exportobf.exe differ diff --git a/tests/binary_input/Ange/imports_relocW7.exe b/tests/binary_input/Ange/imports_relocW7.exe new file mode 100644 index 0000000..6c9a1fe Binary files /dev/null and b/tests/binary_input/Ange/imports_relocW7.exe differ diff --git a/tests/binary_input/Ange/imports_tinyXP.exe b/tests/binary_input/Ange/imports_tinyXP.exe new file mode 100644 index 0000000..3c68610 Binary files /dev/null and b/tests/binary_input/Ange/imports_tinyXP.exe differ diff --git a/tests/binary_input/Ange/namedresource.exe b/tests/binary_input/Ange/namedresource.exe new file mode 100644 index 0000000..072fdc7 Binary files /dev/null and b/tests/binary_input/Ange/namedresource.exe differ diff --git a/tests/binary_input/Ange/nosectionW7.exe b/tests/binary_input/Ange/nosectionW7.exe new file mode 100644 index 0000000..8d527b3 Binary files /dev/null and b/tests/binary_input/Ange/nosectionW7.exe differ diff --git a/tests/binary_input/Ange/resourceloop.exe b/tests/binary_input/Ange/resourceloop.exe new file mode 100644 index 0000000..71dc058 Binary files /dev/null and b/tests/binary_input/Ange/resourceloop.exe differ diff --git a/tests/binary_input/Ange/tinydllXP.dll b/tests/binary_input/Ange/tinydllXP.dll new file mode 100644 index 0000000..38ca86e Binary files /dev/null and b/tests/binary_input/Ange/tinydllXP.dll differ diff --git a/tests/binary_input/Ange/weirdsord.exe b/tests/binary_input/Ange/weirdsord.exe new file mode 100644 index 0000000..09750cf Binary files /dev/null and b/tests/binary_input/Ange/weirdsord.exe differ diff --git a/tests/binary_input/C28346_Load_Program_to_Flash.out b/tests/binary_input/C28346_Load_Program_to_Flash.out new file mode 100644 index 0000000..a18e203 Binary files /dev/null and b/tests/binary_input/C28346_Load_Program_to_Flash.out differ diff --git a/tests/binary_input/README.txt b/tests/binary_input/README.txt new file mode 100644 index 0000000..c2f303c --- /dev/null +++ b/tests/binary_input/README.txt @@ -0,0 +1,94 @@ +Ange + Some files from https://github.com/corkami/pocs/tree/master/PE/bin + +tiny*.asm +tiny*.bin + Cf. http://www.muppetlabs.com/%7Ebreadbox/software/tiny/teensy.html + +C28346_Load_Program_to_Flash.out + Source https://github.com/slavaprokopiy/Mini-TMS320C28346/blob/master/For_user/C28346_Load_Program_to_Flash/Debug/C28346_Load_Program_to_Flash.out + +cku190.rs6aix32c-3.2.4 +cku192.irix40 +cku192.ultrix43c-mips3 +cku193a05.apollo-sr10-s5r3 +cku196.clix-3.1 +cku200.dec-osf-1.3a + Source ftp://kermit.columbia.edu/kermit/bin/ + +notle-tesla-dsp.xe64T +ducati-m3_p768.bin + Source https://drive.google.com/drive/folders/0B2AlG69ZVaWldU1vUnRFUklCek0 + Linked from https://github.com/radare/radare2/issues/1602 + +coff_mingw.obj +elf64_small.o +elf64_small.out +elf_cpp.o +elf_small.o +elf_small.out +pe_mingw.exe +pe_vstudio.dll +macho/macho_32.o +macho/macho_32.out +macho/macho_64.o +macho/macho_64.out +macho/macho_fat.out + Built by Louis Granboulan for elfesteem non-regression tests + +macho/sh + An example of Mach-O with more symbol stubs than symbols (/bin/sh) + +macho/libPrintServiceQuota.1.dylib + An example of big-endian Mach-O (from an old MacOSX for PowerPC) + +macho/Decibels + An example of iPhone app, with two ARM architectures and Encryption + +macho/LyonMetro + An other example of iPhone app, with a LC_VERSION_MIN_IPHONEOS + +macho/TelephonyUtil.o + An example of object file with a LC_LINKER_OPTION + Extracted from /usr/lib/libATCommandStudio.a from a recent MacOSX + +macho/libdns_services.dylib + An example of file with a LC_SOURCE_VERSION + Copied from /usr/lib/libdns_services.dylib from a recent MacOSX + +macho/libecpg.6.5.dylib + An example of file with a section size "past end of file" + Copied from /usr/lib/libecpg.6.5.dylib from a recent MacOSX + +macho/libATCommandStudioDynamic.dylib + An example of file with weak binding + Copied from /usr/lib/libATCommandStudioDynamic.dylib from a recent MacOSX + +macho/libcoretls.dylib + An example of file with no binding, no weak binding, no lazy binding + Copied from /usr/lib/libcoretls.dylib from a recent MacOSX + +macho/libSystem.B.dylib + An example of file BIND_OPCODE_SET_DYLIB_SPECIAL_IMM + Copied from /usr/lib/libSystem.B.dylib from a recent MacOSX + +macho/OSXII + An example of old universal binary, ppc & i386, with LC_UNIXTHREAD + The OSXII software has been discontinued, cf. + https://www.macupdate.com/app/mac/10578/osxii + +macho/SweetHome3D + An example of universal binary, ppc, i386 & x86_64, with LC_UNIXTHREAD + SweetHome3D is open source and available at http://www.sweethome3d.com/ + +macho/MacTheRipper + Another old Mach-O binary, with LC_PREBOUND_DYLIB + This is the version 2.6.6, downloadable at a link available at its + Wikipedia page + +minidump-i386.dmp +minidump-x86_64.dmp + Source https://github.com/OutOfOrder/BreakpadTest/tree/master/Samples + +windows.dmp + Source https://github.com/electron/node-minidump/tree/master/test/fixtures diff --git a/tests/binary_input/cku190.rs6aix32c-3.2.4 b/tests/binary_input/cku190.rs6aix32c-3.2.4 new file mode 100644 index 0000000..2591bee Binary files /dev/null and b/tests/binary_input/cku190.rs6aix32c-3.2.4 differ diff --git a/tests/binary_input/cku192.irix40 b/tests/binary_input/cku192.irix40 new file mode 100644 index 0000000..4ef166d Binary files /dev/null and b/tests/binary_input/cku192.irix40 differ diff --git a/tests/binary_input/cku192.ultrix43c-mips3 b/tests/binary_input/cku192.ultrix43c-mips3 new file mode 100644 index 0000000..6c06320 Binary files /dev/null and b/tests/binary_input/cku192.ultrix43c-mips3 differ diff --git a/tests/binary_input/cku193a05.apollo-sr10-s5r3 b/tests/binary_input/cku193a05.apollo-sr10-s5r3 new file mode 100644 index 0000000..42dd943 Binary files /dev/null and b/tests/binary_input/cku193a05.apollo-sr10-s5r3 differ diff --git a/tests/binary_input/cku196.clix-3.1 b/tests/binary_input/cku196.clix-3.1 new file mode 100644 index 0000000..d28ae76 Binary files /dev/null and b/tests/binary_input/cku196.clix-3.1 differ diff --git a/tests/binary_input/cku200.dec-osf-1.3a b/tests/binary_input/cku200.dec-osf-1.3a new file mode 100644 index 0000000..c340806 Binary files /dev/null and b/tests/binary_input/cku200.dec-osf-1.3a differ diff --git a/tests/binary_input/coff_mingw.obj b/tests/binary_input/coff_mingw.obj new file mode 100644 index 0000000..ac9bf11 Binary files /dev/null and b/tests/binary_input/coff_mingw.obj differ diff --git a/tests/binary_input/ducati-m3_p768.bin b/tests/binary_input/ducati-m3_p768.bin new file mode 100644 index 0000000..cd70675 Binary files /dev/null and b/tests/binary_input/ducati-m3_p768.bin differ diff --git a/tests/binary_input/elf64_small.out b/tests/binary_input/elf64_small.out new file mode 100755 index 0000000..ce7a3b5 Binary files /dev/null and b/tests/binary_input/elf64_small.out differ diff --git a/tests/binary_input/elf_cpp.o b/tests/binary_input/elf_cpp.o new file mode 100644 index 0000000..a4bd2d5 Binary files /dev/null and b/tests/binary_input/elf_cpp.o differ diff --git a/tests/binary_input/elf_small.out b/tests/binary_input/elf_small.out new file mode 100755 index 0000000..6df2406 Binary files /dev/null and b/tests/binary_input/elf_small.out differ diff --git a/tests/binary_input/macho/Decibels b/tests/binary_input/macho/Decibels new file mode 100755 index 0000000..6f05baa Binary files /dev/null and b/tests/binary_input/macho/Decibels differ diff --git a/tests/binary_input/macho/LyonMetro b/tests/binary_input/macho/LyonMetro new file mode 100755 index 0000000..e84548d Binary files /dev/null and b/tests/binary_input/macho/LyonMetro differ diff --git a/tests/binary_input/macho/MacTheRipper b/tests/binary_input/macho/MacTheRipper new file mode 100755 index 0000000..561c535 Binary files /dev/null and b/tests/binary_input/macho/MacTheRipper differ diff --git a/tests/binary_input/macho/OSXII b/tests/binary_input/macho/OSXII new file mode 100755 index 0000000..6c97dda Binary files /dev/null and b/tests/binary_input/macho/OSXII differ diff --git a/tests/binary_input/macho/SweetHome3D b/tests/binary_input/macho/SweetHome3D new file mode 100755 index 0000000..3fbb89b Binary files /dev/null and b/tests/binary_input/macho/SweetHome3D differ diff --git a/tests/binary_input/macho/TelephonyUtil.o b/tests/binary_input/macho/TelephonyUtil.o new file mode 100644 index 0000000..eda70f6 Binary files /dev/null and b/tests/binary_input/macho/TelephonyUtil.o differ diff --git a/tests/binary_input/macho/libATCommandStudioDynamic.dylib b/tests/binary_input/macho/libATCommandStudioDynamic.dylib new file mode 100755 index 0000000..49c577a Binary files /dev/null and b/tests/binary_input/macho/libATCommandStudioDynamic.dylib differ diff --git a/tests/binary_input/macho/libPrintServiceQuota.1.dylib b/tests/binary_input/macho/libPrintServiceQuota.1.dylib new file mode 100755 index 0000000..4a032a7 Binary files /dev/null and b/tests/binary_input/macho/libPrintServiceQuota.1.dylib differ diff --git a/tests/binary_input/macho/libSystem.B.dylib b/tests/binary_input/macho/libSystem.B.dylib new file mode 100755 index 0000000..23213c4 Binary files /dev/null and b/tests/binary_input/macho/libSystem.B.dylib differ diff --git a/tests/binary_input/macho/libcoretls.dylib b/tests/binary_input/macho/libcoretls.dylib new file mode 100755 index 0000000..22490e9 Binary files /dev/null and b/tests/binary_input/macho/libcoretls.dylib differ diff --git a/tests/binary_input/macho/libdns_services.dylib b/tests/binary_input/macho/libdns_services.dylib new file mode 100755 index 0000000..ac81e19 Binary files /dev/null and b/tests/binary_input/macho/libdns_services.dylib differ diff --git a/tests/binary_input/macho/libecpg.6.5.dylib b/tests/binary_input/macho/libecpg.6.5.dylib new file mode 100755 index 0000000..9900bf9 Binary files /dev/null and b/tests/binary_input/macho/libecpg.6.5.dylib differ diff --git a/tests/binary_input/macho/macho_32.o b/tests/binary_input/macho/macho_32.o new file mode 100755 index 0000000..09e6843 Binary files /dev/null and b/tests/binary_input/macho/macho_32.o differ diff --git a/tests/binary_input/macho/macho_32.out b/tests/binary_input/macho/macho_32.out new file mode 100755 index 0000000..ef4417e Binary files /dev/null and b/tests/binary_input/macho/macho_32.out differ diff --git a/tests/binary_input/macho/macho_64.o b/tests/binary_input/macho/macho_64.o new file mode 100755 index 0000000..475514b Binary files /dev/null and b/tests/binary_input/macho/macho_64.o differ diff --git a/tests/binary_input/macho/macho_64.out b/tests/binary_input/macho/macho_64.out new file mode 100755 index 0000000..296e019 Binary files /dev/null and b/tests/binary_input/macho/macho_64.out differ diff --git a/tests/binary_input/macho/macho_fat.out b/tests/binary_input/macho/macho_fat.out new file mode 100755 index 0000000..ddec115 Binary files /dev/null and b/tests/binary_input/macho/macho_fat.out differ diff --git a/tests/binary_input/macho/macho_lcbuild.out b/tests/binary_input/macho/macho_lcbuild.out new file mode 100755 index 0000000..9fcca95 Binary files /dev/null and b/tests/binary_input/macho/macho_lcbuild.out differ diff --git a/tests/binary_input/macho/sh b/tests/binary_input/macho/sh new file mode 100755 index 0000000..7cc10d7 Binary files /dev/null and b/tests/binary_input/macho/sh differ diff --git a/tests/binary_input/minidump-i386.dmp b/tests/binary_input/minidump-i386.dmp new file mode 100644 index 0000000..2ea4581 Binary files /dev/null and b/tests/binary_input/minidump-i386.dmp differ diff --git a/tests/binary_input/minidump-x86_64.dmp b/tests/binary_input/minidump-x86_64.dmp new file mode 100644 index 0000000..9bac225 Binary files /dev/null and b/tests/binary_input/minidump-x86_64.dmp differ diff --git a/tests/binary_input/notle-tesla-dsp.xe64T b/tests/binary_input/notle-tesla-dsp.xe64T new file mode 100644 index 0000000..d01370f Binary files /dev/null and b/tests/binary_input/notle-tesla-dsp.xe64T differ diff --git a/tests/binary_input/pe_mingw.exe b/tests/binary_input/pe_mingw.exe new file mode 100755 index 0000000..8fc4fff Binary files /dev/null and b/tests/binary_input/pe_mingw.exe differ diff --git a/tests/binary_input/pe_vstudio.dll b/tests/binary_input/pe_vstudio.dll new file mode 100644 index 0000000..c4b84fd Binary files /dev/null and b/tests/binary_input/pe_vstudio.dll differ diff --git a/tests/binary_input/tiny45.asm b/tests/binary_input/tiny45.asm new file mode 100644 index 0000000..a5efe86 --- /dev/null +++ b/tests/binary_input/tiny45.asm @@ -0,0 +1,29 @@ + ; tiny.asm + + BITS 32 + + org 0x00010000 + + db 0x7F, "ELF" ; e_ident + dd 1 ; p_type + dd 0 ; p_offset + dd $$ ; p_vaddr + dw 2 ; e_type ; p_paddr + dw 3 ; e_machine + dd _start ; e_version ; p_filesz + dd _start ; e_entry ; p_memsz + dd 4 ; e_phoff ; p_flags + _start: + mov bl, 42 ; e_shoff ; p_align + xor eax, eax + inc eax ; e_flags + int 0x80 + db 0 + dw 0x34 ; e_ehsize + dw 0x20 ; e_phentsize + db 1 ; e_phnum + ; e_shentsize + ; e_shnum + ; e_shstrndx + + filesize equ $ - $$ diff --git a/tests/binary_input/tiny45.bin b/tests/binary_input/tiny45.bin new file mode 100644 index 0000000..9c91203 Binary files /dev/null and b/tests/binary_input/tiny45.bin differ diff --git a/tests/binary_input/tiny52.asm b/tests/binary_input/tiny52.asm new file mode 100644 index 0000000..b085498 --- /dev/null +++ b/tests/binary_input/tiny52.asm @@ -0,0 +1,29 @@ + ; tiny.asm + + BITS 32 + + org 0x00010000 + + db 0x7F, "ELF" ; e_ident + dd 1 ; p_type + dd 0 ; p_offset + dd $$ ; p_vaddr + dw 2 ; e_type ; p_paddr + dw 3 ; e_machine + dd _start ; e_version ; p_filesz + dd _start ; e_entry ; p_memsz + dd 4 ; e_phoff ; p_flags + _start: + mov bl, 42 ; e_shoff ; p_align + xor eax, eax + inc eax ; e_flags + int 0x80 + db 0 + dw 0x34 ; e_ehsize + dw 0x20 ; e_phentsize + dw 1 ; e_phnum + dw 0 ; e_shentsize + dw 0 ; e_shnum + dw 0 ; e_shstrndx + + filesize equ $ - $$ diff --git a/tests/binary_input/tiny52.bin b/tests/binary_input/tiny52.bin new file mode 100644 index 0000000..5cc387f Binary files /dev/null and b/tests/binary_input/tiny52.bin differ diff --git a/tests/binary_input/tiny64.asm b/tests/binary_input/tiny64.asm new file mode 100644 index 0000000..a38a81b --- /dev/null +++ b/tests/binary_input/tiny64.asm @@ -0,0 +1,31 @@ + ; tiny.asm + + BITS 32 + + org 0x00200000 + + db 0x7F, "ELF" ; e_ident + db 1, 1, 1, 0, 0 + _start: + mov bl, 42 + xor eax, eax + inc eax + int 0x80 + dw 2 ; e_type + dw 3 ; e_machine + dd 1 ; e_version + dd _start ; e_entry + dd phdr - $$ ; e_phoff + phdr: dd 1 ; e_shoff ; p_type + dd 0 ; e_flags ; p_offset + dd $$ ; e_ehsize ; p_vaddr + ; e_phentsize + dw 1 ; e_phnum ; p_paddr + dw 0 ; e_shentsize + dd filesize ; e_shnum ; p_filesz + ; e_shstrndx + dd filesize ; p_memsz + dd 5 ; p_flags + dd 0x1000 ; p_align + + filesize equ $ - $$ diff --git a/tests/binary_input/tiny64.bin b/tests/binary_input/tiny64.bin new file mode 100644 index 0000000..da1df77 Binary files /dev/null and b/tests/binary_input/tiny64.bin differ diff --git a/tests/binary_input/tiny76.asm b/tests/binary_input/tiny76.asm new file mode 100644 index 0000000..6e53a4d --- /dev/null +++ b/tests/binary_input/tiny76.asm @@ -0,0 +1,36 @@ + ; tiny.asm + + BITS 32 + + org 0x08048000 + + ehdr: + db 0x7F, "ELF" ; e_ident + db 1, 1, 1, 0, 0 + _start: mov bl, 42 + xor eax, eax + inc eax + int 0x80 + dw 2 ; e_type + dw 3 ; e_machine + dd 1 ; e_version + dd _start ; e_entry + dd phdr - $$ ; e_phoff + dd 0 ; e_shoff + dd 0 ; e_flags + dw ehdrsize ; e_ehsize + dw phdrsize ; e_phentsize + phdr: dd 1 ; e_phnum ; p_type + ; e_shentsize + dd 0 ; e_shnum ; p_offset + ; e_shstrndx + ehdrsize equ $ - ehdr + dd $$ ; p_vaddr + dd $$ ; p_paddr + dd filesize ; p_filesz + dd filesize ; p_memsz + dd 5 ; p_flags + dd 0x1000 ; p_align + phdrsize equ $ - phdr + + filesize equ $ - $$ diff --git a/tests/binary_input/tiny76.bin b/tests/binary_input/tiny76.bin new file mode 100644 index 0000000..c1c4ea9 Binary files /dev/null and b/tests/binary_input/tiny76.bin differ diff --git a/tests/binary_input/tiny84.asm b/tests/binary_input/tiny84.asm new file mode 100644 index 0000000..5266185 --- /dev/null +++ b/tests/binary_input/tiny84.asm @@ -0,0 +1,42 @@ + ; tiny.asm + + BITS 32 + + org 0x08048000 + + ehdr: ; Elf32_Ehdr + db 0x7F, "ELF" ; e_ident + db 1, 1, 1, 0, 0 + _start: mov bl, 42 + xor eax, eax + inc eax + int 0x80 + dw 2 ; e_type + dw 3 ; e_machine + dd 1 ; e_version + dd _start ; e_entry + dd phdr - $$ ; e_phoff + dd 0 ; e_shoff + dd 0 ; e_flags + dw ehdrsize ; e_ehsize + dw phdrsize ; e_phentsize + dw 1 ; e_phnum + dw 0 ; e_shentsize + dw 0 ; e_shnum + dw 0 ; e_shstrndx + + ehdrsize equ $ - ehdr + + phdr: ; Elf32_Phdr + dd 1 ; p_type + dd 0 ; p_offset + dd $$ ; p_vaddr + dd $$ ; p_paddr + dd filesize ; p_filesz + dd filesize ; p_memsz + dd 5 ; p_flags + dd 0x1000 ; p_align + + phdrsize equ $ - phdr + + filesize equ $ - $$ diff --git a/tests/binary_input/tiny84.bin b/tests/binary_input/tiny84.bin new file mode 100644 index 0000000..3a93ae5 Binary files /dev/null and b/tests/binary_input/tiny84.bin differ diff --git a/tests/binary_input/windows.dmp b/tests/binary_input/windows.dmp new file mode 100644 index 0000000..71d5b19 Binary files /dev/null and b/tests/binary_input/windows.dmp differ diff --git a/tests/examples_linux.sh b/tests/examples_linux.sh new file mode 100644 index 0000000..9c687de --- /dev/null +++ b/tests/examples_linux.sh @@ -0,0 +1,10 @@ +#! /bin/bash + +options="-h -S -r -s --dyn-syms -d -l -g" +options="-h -S" +for option in $options; do +for file in /bin/sh tests/binary_input/elf_small.out; do +echo "=== readelf $option $file ===" +diff -c <(readelf $option $file) <(python ./examples/readelf.py $option --readelf=native $file 2>/dev/null) +done +done diff --git a/tests/examples_macos.sh b/tests/examples_macos.sh new file mode 100644 index 0000000..363cc87 --- /dev/null +++ b/tests/examples_macos.sh @@ -0,0 +1,9 @@ +#! /bin/zsh + +# Note that we don't test all files, because some are not well parsed by the +# system's otool. + +for file in tests/binary_input/macho/{[DLST],lib[AScde],macho_}*; do +echo "=== $file ===" +diff -c =(otool -l $file) =(python ./examples/otool.py --llvm=native -l $file 2>/dev/null) +done diff --git a/tests/test_all.py b/tests/test_all.py new file mode 100755 index 0000000..2e7399d --- /dev/null +++ b/tests/test_all.py @@ -0,0 +1,104 @@ +#! /usr/bin/env python + +# These non-regression tests should be OK from python2.3 to python3.x + +# How to import by name, compatible with python2 and python3 +import sys, os +__dir__ = os.path.dirname(__file__) +try: + # The following is working starting with python2.7 + import importlib + import_by_name = importlib.import_module +except ImportError: + # The following is working for python2.3 to python3.11 + import imp + def import_by_name(name): + fp, pathname, description = imp.find_module(name, [__dir__]) + try: + module = imp.load_module(name, fp, pathname, description) + finally: + if fp is not None: fp.close() + return module + +try: + import hashlib +except ImportError: + # Python 2.4 does not have hashlib + # but 'md5' is deprecated since python2.5 + import md5 as oldpy_md5 + class hashlib(object): + def md5(self, data): + return oldpy_md5.new(data) + md5 = classmethod(md5) + +try: + # This way, we can use our code with pytest, but we can also + # use it directly, e.g. when testing for python2.3. + # No decorator, the syntax is forbidden in python2.3. + import pytest + def assertion(): + def inner_assertion(target, value, message): + assert target == value + return inner_assertion + assertion = pytest.fixture(assertion) +except Exception: + assertion = None + +class print_colored(object): # Namespace + end = '\033[0m' + def bold(self, txt): + print('\033[1m'+txt+self.end) + bold = classmethod(bold) + def boldred(self, txt): + print('\033[91;1m'+txt+self.end) + boldred = classmethod(boldred) + def boldgreen(self, txt): + print('\033[92;1m'+txt+self.end) + boldgreen = classmethod(boldgreen) + +def assertion_status(target, value, message, status_ptr): + if target != value: + print_colored.boldred('Non-regression failure for %r' % message) + status_ptr[0] = False + +def run_tests(run_test): + status_ptr = [True] + run_test(lambda target, value, msg, status_ptr=status_ptr: + assertion_status(target, value, msg, status_ptr)) + if status_ptr[0]: + print_colored.boldgreen('OK') + return status_ptr[0] + +def test_MD5(assertion): + import struct + assertion('f71dbe52628a3f83a77ab494817525c6', + hashlib.md5(struct.pack('BBBB',116,111,116,111)).hexdigest(), + 'MD5') + +def open_read(f): + fd = open(f, 'rb') + try: + data = fd.read() + finally: + fd.close() + return data + +if __name__ == "__main__": + exit_value = 0 + print_colored.bold('test_MD5') + if not run_tests(test_MD5): + exit_value = 1 + for name in ( + 'visual_studio_mangling', + 'pe_manipulation', + 'elf_manipulation', + 'macho_manipulation', + 'rprc_manipulation', + 'minidump_manipulation', + 'intervals', + ): + module = import_by_name('test_' + name) + print_colored.bold(name) + if not run_tests(module.run_test): + exit_value = 1 + sys.exit(exit_value) diff --git a/tests/test_elf_manipulation.py b/tests/test_elf_manipulation.py new file mode 100644 index 0000000..04f845b --- /dev/null +++ b/tests/test_elf_manipulation.py @@ -0,0 +1,378 @@ +#! /usr/bin/env python + +import os +__dir__ = os.path.dirname(__file__) + +from test_all import run_tests, assertion, hashlib, open_read +from elfesteem.strpatchwork import StrPatchwork +from elfesteem.elf_init import ELF, log +from elfesteem import elf + +import struct + +# We want to be able to verify warnings in non-regression test +log_history = [] +log.warning = lambda *args, **kargs: log_history.append(('warn',args,kargs)) +log.error = lambda *args, **kargs: log_history.append(('error',args,kargs)) + +def test_ELF_empty(assertion): + e = ELF() + d = e.pack() + assertion('0ddf18391c150850c72257b3f3caa67b', + hashlib.md5(d).hexdigest(), + 'Creation of a standard empty ELF') + assertion(0, + len(e.symbols), + 'Empty ELF has no symbols') + d = ELF(d).pack() + assertion('0ddf18391c150850c72257b3f3caa67b', + hashlib.md5(d).hexdigest(), + 'Creation of a standard empty ELF; fix point') + assertion(True, + e.has_relocatable_sections(), + 'Standard empty ELF is relocatable') + +def test_ELF_invalid(assertion): + try: + e = ELF(open_read(__dir__+'/binary_input/README.txt')) + assertion(0,1, 'Not an ELF') + except ValueError: + pass + +def test_ELF_creation(assertion): + e = ELF( + e_type = elf.ET_REL, # Default value + e_machine = elf.EM_386, # Default value + sections = ['.text', '.text.startup', '.group', + '.data', '.rodata.str1.4', '.rodata.cst4', + '.bss', '.eh_frame', '.comment', '.note.GNU-stack', + ], + relocs = ['.text'], # These sections will have relocs + ) + d = e.pack() + assertion('dc3f17080d002ba0bfb3aec9f3bec8b2', + hashlib.md5(d).hexdigest(), + 'Creation of an ELF with a given list of sections') + +def test_ELF_small32(assertion): + global log_history + elf_small = open_read(__dir__+'/binary_input/elf_small.out') + assertion('d5284d5f438e25ef5502a0c1de97d84f', + hashlib.md5(elf_small).hexdigest(), + 'Reading elf_small.out') + e = ELF(elf_small) + d = e.pack() + assertion('d5284d5f438e25ef5502a0c1de97d84f', + hashlib.md5(d).hexdigest(), + 'Packing after reading elf_small.out') + # Packed file is identical :-) + d = repr(e.ph).encode('latin1') + assertion('ab4b1e52e7532789592878872910a2a1', + hashlib.md5(d).hexdigest(), + 'Display Program Headers') + d = repr(e.sh).encode('latin1') + assertion('ddf01165114eb70bd27910e4c5b03c09', + hashlib.md5(d).hexdigest(), + 'Display Section Headers (repr)') + d = e.sh.readelf_display().encode('latin1') + assertion('08da11fa164d7013561db398c068ac71', + hashlib.md5(d).hexdigest(), + 'Display Section Headers (readelf)') + d = e.getsectionbyname('.symtab').readelf_display().encode('latin1') + assertion('943434f4cde658b1659b7d8db39d9e60', + hashlib.md5(d).hexdigest(), + 'Display Symbol Table') + assertion(' 49: 0804a01c 0 NOTYPE GLOBAL DEFAULT ABS _edata', + e.getsectionbyname('.symtab')['_edata'].readelf_display(), + 'Get symbol by name, found') + assertion(' 2: 00000000 0 FUNC GLOBAL DEFAULT UND __stack_chk_fail', + e.getsectionbyname('.dynsym')[2].readelf_display(), + 'Get symbol by index, found') + d = e.getsectionbytype(elf.SHT_SYMTAB).pack() + assertion('4ed5a808faff1ca7c6a766ae45ebf377', + hashlib.md5(d).hexdigest(), + 'Get existing section by type') + d = e.getsectionbyname('.text').pack() + assertion('7149c6e4b8baaab8beebfeb818585638', + hashlib.md5(d).hexdigest(), + 'Get existing section by name') + d = e.getsectionbyvad(0x080483d0+0x100).pack() + assertion('7149c6e4b8baaab8beebfeb818585638', + hashlib.md5(d).hexdigest(), + 'Get existing section by address') + d = e.getsectionbyname('no_sect') + assertion(None, d, 'Get non-existing section by name') + d = e.getsectionbyvad(0x1000) + assertion(None, d, 'Get non-existing section by address') + d = e[0x100:0x120] + assertion('5e94f899265a799826a46ec86a293e16', + hashlib.md5(d).hexdigest(), + 'Extract chunk from raw data') + assertion(True, + e.virt.is_addr_in(0x080483d0), + 'Address in mapped virtual memory') + assertion(False, + e.virt.is_addr_in(0x08048000), + 'Address not in mapped virtual memory') + d = e.virt[0x080483d0:0x080483e0] + assertion('9d225ebfd0f9562b74b17c5a4653dc6f', + hashlib.md5(d).hexdigest(), + 'Extract chunk from mapped memory, in a section') + try: + e.virt[0x08040000:0x08040020] + assertion(0,1, 'Extract chunk from non-mapped memory') + except ValueError: + pass + assertion(e.virt[0x080483d0:0x080483e0], + e.virt(0x080483d0,0x080483e0), + 'Extract chunk from mapped memory, old API') + e.virt[0x080483d0:0x080483e0] = e.virt[0x080483d0:0x080483e0] + d = e.pack() + assertion('d5284d5f438e25ef5502a0c1de97d84f', + hashlib.md5(d).hexdigest(), + 'Writing in memory (interval)') + e.virt[0x080483d0] = e.virt[0x080483d0:0x080483e0] + d = e.pack() + assertion('d5284d5f438e25ef5502a0c1de97d84f', + hashlib.md5(d).hexdigest(), + 'Writing in memory (address)') + assertion(0x804a028, len(e.virt), 'Max virtual address') + assertion([('warn', ('__len__ deprecated',), {})], + log_history, + '__len__ deprecated (logs)') + log_history = [] + # Find leave; ret + assertion(0x8048481, + e.virt.find(struct.pack('BB', 0xc9, 0xc3)), + 'Find pattern (existing)') + assertion(-1, + e.virt.find(struct.pack('BBBB', 1,2,3,4)), + 'Find pattern (not existing)') + +def test_ELF_small64(assertion): + elf64_small = open_read(__dir__+'/binary_input/elf64_small.out') + assertion('dc21d928bb6a3a0fa59b17fafe803d50', + hashlib.md5(elf64_small).hexdigest(), + 'Reading elf64_small.out') + e = ELF(elf64_small) + d = e.pack() + assertion('dc21d928bb6a3a0fa59b17fafe803d50', + hashlib.md5(d).hexdigest(), + 'Packing after reading elf64_small.out') + # Packed file is identical :-) + d = e.sh.readelf_display().encode('latin1') + assertion('0454c8b5354b3eda58fce252d5d48621', + hashlib.md5(d).hexdigest(), + 'Display Section Headers (readelf, 64bit)') + d = e.getsectionbyname('.symtab').readelf_display().encode('latin1') + assertion('452e64fb0f2dad5c0e44d83e57b9d82b', + hashlib.md5(d).hexdigest(), + 'Display Symbol Table (elf64)') + d = e.getsectionbyname('.rela.dyn').readelf_display().encode('latin1') + assertion('650cf3f99117d39d63fae73232e09acf', + hashlib.md5(d).hexdigest(), + 'Display Reloc Table (elf64)') + +def test_ELF_group(assertion): + elf_group = open_read(__dir__+'/binary_input/elf_cpp.o') + assertion('57fed5de9474bc0600173a1db5ee6327', + hashlib.md5(elf_group).hexdigest(), + 'Reading elf_cpp.o') + e = ELF(elf_group) + d = e.pack() + assertion('57fed5de9474bc0600173a1db5ee6327', + hashlib.md5(d).hexdigest(), + 'Packing after reading elf_cpp.o') + # Packed file is identical :-) + d = e.getsectionbyname('.group').readelf_display().encode('latin1') + assertion('5c80b11a64a32e7aaee8ef378da4ccef', + hashlib.md5(d).hexdigest(), + 'Display Group Section') + +def test_ELF_TMP320C6x(assertion): + elf_tmp320c6x = open_read(__dir__+'/binary_input/notle-tesla-dsp.xe64T') + assertion('fb83ed8d809f394e70f5d84d0c8e593f', + hashlib.md5(elf_tmp320c6x).hexdigest(), + 'Reading notle-tesla-dsp.xe64T') + e = ELF(elf_tmp320c6x) + d = e.pack() + assertion('fb83ed8d809f394e70f5d84d0c8e593f', + hashlib.md5(d).hexdigest(), + 'Packing after reading notle-tesla-dsp.xe64T') + # Packed file is identical :-) + d = e.sh.readelf_display().encode('latin1') + assertion('ecf169c765d29175177528e24601f1be', + hashlib.md5(d).hexdigest(), + 'Display Section Headers (TMP320C6x)') + +def test_ELF_invalid_entsize(assertion): + global log_history + # Some various ways for an ELF to be detected as invalid + e = ELF() + e.symbols.sh.entsize = 24 + e = ELF(e.pack()) + assertion([('error', ('SymTable has invalid entsize %d instead of %d', 24, 16), {})], + log_history, + 'Invalid entsize for symbols (logs)') + log_history = [] + +def test_ELF_invalid_shstrndx(assertion): + global log_history + e = ELF() + e.Ehdr.shstrndx = 20 + e = ELF(e.pack()) + assertion([('error', ('No section of index shstrndx=20',), {})], + log_history, + 'Invalid shstrndx (logs)') + assertion(88, + e.Ehdr.shoff, + 'Normal e.Ehdr.shoff') + log_history = [] + +def test_ELF_offset_to_sections(assertion): + global log_history + data = StrPatchwork(ELF().pack()) + data[88+20] = struct.pack("', + repr(lh), + 'UUID change (repr)') + d = e.pack() + assertion('f86802506fb24de2ac2bebd9101326e9', + hashlib.md5(d).hexdigest(), + 'UUID change (pack)') + lh.uuid = (0,0xAAAA,0,0,0,0x11111111) + assertion((0,0xAAAA,0,0,0,0x11111111), + lh.uuid, + 'set UUID') + d = e.pack() + assertion('c8457df239deb4c51c316bd6670a445e', + hashlib.md5(d).hexdigest(), + 'set UUID (pack)') + +def test_MACHO_extend_segment(assertion): + macho_64 = open_read(__dir__+'macho_64.out') + e = MACHO(macho_64) + for l in e.load: + if getattr(l,'segname',None) == "__LINKEDIT": break + e.load.extendSegment(l, 0x1000) + d = e.pack() + assertion('405962fd8a4fe751c0ea4fe1a9d02c1e', + hashlib.md5(d).hexdigest(), + 'Extend segment') + assertion([], + log_history, + 'No non-regression test created unwanted log messages') + + + +def changeMainToUnixThread(e, **kargs): + main_pos, = e.load.getpos(macho.LC_MAIN) + sign_pos, = e.load.getpos(macho.LC_DYLIB_CODE_SIGN_DRS) + sectsign_pos, = e.sect.getpos(e.load[sign_pos].sect[0]) + delta_from_start_to_main = 0x40 + lc_main = e.load[main_pos] + mainasmpos = lc_main.entryoff - delta_from_start_to_main + # At some point, we would like to create a load command with: + #lh = macho.LoadCommand(sex='<', wsize=32, cmd=macho.LC_UNIXTHREAD, + # cputype=e.Mhdr.cputype) + largs = { 'parent':{'cputype':e.Mhdr.cputype}, 'sex':'<', 'wsize': e.wsize} + if e.wsize == 32: + c = (macho.LC_UNIXTHREAD, 80, 1, 16, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, macho.FAT_MAGIC, 0, 0, 0, 0, 0) + elif e.wsize == 64: + FAT_MAGIC_SWAPPED = macho.FAT_MAGIC>>16 + (macho.FAT_MAGIC&0xffff)<<16 + c = (macho.LC_UNIXTHREAD, 184, 4, 42, + 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, + 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, + FAT_MAGIC_SWAPPED,1, 0,0, 0,0, 0,0, 0,0) + largs['content'] = struct.pack("<%dI"%len(c), *c) + lh = macho.LoadCommand(**largs) + lh.entrypoint = e.off2ad(mainasmpos) + e.load.append(lh) + e.load.removepos(sign_pos) + e.load.removepos(main_pos) + e.sect.removepos(sectsign_pos) + +def insert_start_function(e): + unix_pos, = e.load.getpos(macho.LC_UNIXTHREAD) + lh = e.load[unix_pos] + if e.wsize == 32: + segtype = macho.LC_SEGMENT + # binary code for the _start function, taken from crt0.o by gcc + content = ( + 106, 0, # pushl $0 + 137, 229, # movl %esp, %ebp + 131, 228, 240, # andl $-16, %esp + 131, 236, 16, # subl $16, %esp + 139, 93, 4, # movl 4(%ebp), %ebx + 137, 28, 36, # movl %ebx, (%esp) + 141, 77, 8, # leal 8(%ebp), %ecx + 137, 76, 36, 4, # movl %ecx, 4(%esp) + 131, 195, 1, # addl $1, %ebx + 193, 227, 2, # shll $2, %ebx + 1, 203, # addl %ecx, %ebx + 137, 92, 36, 8, # movl %ebx, 8(%esp) + 139, 3, # movl (%ebx), %eax + 131, 195, 4, # addl $4, %ebx + 133, 192, # testl %eax, %eax + 117, 247, # jne .-7 + 137, 92, 36, 12, # movl %ebx, 12(%esp) + 232, 0, 0, 0, 0, ## call main + 137, 4, 36, # movl %eax, (%esp) + 232, 0, 0, 0, 0, ## call exit + ) + call_offset = 0x0b + exit_offset = 0x33 + offset_of_call_main = 0x30 + offset_of_call_exit = 0x38 + elif e.wsize == 64: + segtype = macho.LC_SEGMENT_64 + # binary code for the _start function + content = ( + 106, 0, # pushq $0 + 72, 137, 229, # movq %rsp, %rbp + 72, 131, 228, 240, # andq $-16, %rsp + 72, 139, 125, 8, # movq 8(%rbp), %rdi + 72, 141, 117, 16, # movq 16(%rbp), %rsi + 137, 250, # movl %edi, %edx + 131, 194, 1, # addl $1, %edx + 193, 226, 3, # shll $3, %edx + 72, 1, 242, # addq %rsi, %rdx + 72, 137, 209, # movq %rdx, %rcx + 235, 4, # jmp .+4 + 72, 131, 193, 8, # addq $8, %rcx + 72, 131, 57, 0, # cmpq $0, (%rcx) + 117, 246, # jne .-8 + 72, 131, 193, 8, # addq $0, %rcx + 232, 0, 0, 0, 0, ## call main + 137, 199, # movl %eax, %edi + 232, 0, 0, 0, 0, ## call exit + ) + call_offset = 0x0c + exit_offset = 0x35 + offset_of_call_main = 0x2f + offset_of_call_exit = 0x36 + else: + raise ValueError("Wordsize %s is not possible", e.wsize) + content = struct.pack('%dB'%len(content), *content) + e.add(type=segtype, segname='__NEWTEXT', + initprot=macho.VM_PROT_READ|macho.VM_PROT_EXECUTE, content=content) + off = e.sect.sect[-1].offset + mainasmpos = e.ad2off(lh.entrypoint) + lh.entrypoint = e.off2ad(off) + call = mainasmpos + call_offset - off + exit = mainasmpos + exit_offset - off + e.sect.sect[-1].content = content[:offset_of_call_main+1] + struct.pack(" OK + for _ in range(2): + del e.SHList._array[-1] + e.SHList._size -= 40 + e.COFFhdr.numberofsections -= 1 + # Add two Descriptors in the Import Directory + e.DirImport.add_dlldesc( + [({"name":"kernel32.dll", + "firstthunk":s_test.addr}, + ["CreateFileA", + "SetFilePointer", + "WriteFile", + "CloseHandle", + ] + ), + ({"name":"USER32.dll", + "firstthunk":None}, + ["SetDlgItemInt", + "GetMenu", + "HideCaret", + ] + ) + ] + ) + s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) + e.DirImport.set_rva(s_myimp.addr) + assertion(0x4050a8, + e.DirImport.get_funcvirt('KERNEL32.dll','ExitProcess'), + 'Import ExitProcess') + assertion(None, + e.DirImport.get_funcvirt(None,'LoadStringW'), + 'Import LoadStringW') + assertion(None, + e.DirExport.get_funcvirt('SetUserGeoID'), + 'Export SetUserGeoID') + d = e.pack() + assertion('8a3a1c8c9aa2db211e1d34c7efbb8473', + hashlib.md5(d).hexdigest(), + 'Adding new imports') + d = PE(d).pack() + assertion([('warn', ('Section %d size %#x not aligned to %#x', 5, 294, 512), {})], + log_history, + 'Adding new imports (logs)') + log_history = [] + assertion('8a3a1c8c9aa2db211e1d34c7efbb8473', + hashlib.md5(d).hexdigest(), + 'Adding new imports; fix point') + # Add an export + if e.DirExport.expdesc is None: + e.DirExport.create(['coco']) + assertion(0x40703e, + e.DirExport.get_funcvirt('coco'), + 'Export: get_funcvirt') + # 'eval' avoids warnings with python2.3 + assertion({1: eval("0xdeedc0fe"), 'coco': eval("0xdeedc0fe")}, + e.export_funcs(), + 'Export: export_funcs') + d = e.pack() + assertion('47a864481296d88f908126fb822ded59', + hashlib.md5(d).hexdigest(), + 'Adding new exports') + d = PE(d).pack() + assertion([('warn', ('Section %d size %#x not aligned to %#x', 5, 294, 512), {})], + log_history, + 'Adding new exports (logs)') + log_history = [] + assertion('47a864481296d88f908126fb822ded59', + hashlib.md5(d).hexdigest(), + 'Adding new exports; fix point') + # Add a new Descriptor in the Import Directory + e.DirImport.add_dlldesc([ ({"name":"MyDLL.dll"}, ["MyFunc"]) ]) + e.DirImport.set_rva(None) + assertion('47a864481296d88f908126fb822ded59', + hashlib.md5(d).hexdigest(), + 'Adding imports, no specified section') + +def test_PE_dll(assertion): + global log_history + # Small DLL created with Visual Studio + dll_vstudio = open_read(__dir__+'/binary_input/pe_vstudio.dll') + e = PE(dll_vstudio) + d = e.pack() + assertion('19028e1a1bde785fb4a58aeacf56007b', + hashlib.md5(d).hexdigest(), + 'Packing after reading pe_vstudio.dll') + # Test the display() functions + d = e.DirImport.display().encode('latin1') + assertion('e9f925c32ed91f889a2b57e73360d444', + hashlib.md5(d).hexdigest(), + 'Display Directory IMPORT') + d = e.DirExport.display().encode('latin1') + assertion('2d262c4d834e58b17d4c7f2359d1f6f1', + hashlib.md5(d).hexdigest(), + 'Display Directory EXPORT') + d = e.DirRes.display().encode('latin1') + assertion('a794e58acca2f6b2d9628e64008ad6d8', + hashlib.md5(d).hexdigest(), + 'Display Directory RESOURCE') + d = e.DirReloc.display().encode('latin1') + assertion('33af05a3215689dec4cdae3656c63af0', + hashlib.md5(d).hexdigest(), + 'Display Directory BASERELOC') + d = '\n'.join([repr(_) for reldir in e.DirReloc for _ in reldir.rels]) + d = d.encode('latin1') + assertion('87951bfbb3c09dec8c54d41f72cc4263', + hashlib.md5(d).hexdigest(), + 'Display all relocations') + +def test_PE_ange(assertion): + global log_history + # Parse some ill-formed PE made by Ange Albertini + PE(open_read(__dir__+'/binary_input/Ange/resourceloop.exe')) + assertion([('warn', ('Resource tree too deep',), {})]*212, + log_history, + 'Ange/resourceloop.exe (logs)') + log_history = [] + PE(open_read(__dir__+'/binary_input/Ange/namedresource.exe')) + assertion([], + log_history, + 'Ange/namedresource.exe (logs)') + PE(open_read(__dir__+'/binary_input/Ange/weirdsord.exe')) + assertion([('warn', ('Section %d offset %#x not aligned to %#x', 0, 513, 16384), {}), ('warn', ('Section %d size %#x not aligned to %#x', 0, 270, 16384), {})], + log_history, + 'Ange/weirdsord.exe (logs)') + log_history = [] + PE(open_read(__dir__+'/binary_input/Ange/nosectionW7.exe')) + assertion([('warn', ('Number of rva %d does not match sizeofoptionalheader %d', 16, 0), {})], + log_history, + 'Ange/nosectionW7.exe (logs)') + log_history = [] + PE(open_read(__dir__+'/binary_input/Ange/imports_relocW7.exe')) + assertion([], + log_history, + 'Ange/imports_relocW7.exe (logs)') + PE(open_read(__dir__+'/binary_input/Ange/imports_tinyXP.exe')) + assertion([], + log_history, + 'Ange/imports_tinyXP.exe (logs)') + PE(open_read(__dir__+'/binary_input/Ange/bottomsecttbl.exe')) + assertion([('warn', ('Number of rva %d does not match sizeofoptionalheader %d', 16, 696), {})], + log_history, + 'Ange/bottomsecttbl.exe (logs)') + log_history = [] + PE(open_read(__dir__+'/binary_input/Ange/delayfake.exe')) + assertion([], + log_history, + 'Ange/delayfake.exe (logs)') + PE(open_read(__dir__+'/binary_input/Ange/exportobf.exe')) + assertion([], + log_history, + 'Ange/exportobf.exe (logs)') + PE(open_read(__dir__+'/binary_input/Ange/dllbound-ld.exe')) + assertion([], + log_history, + 'Ange/dllbound-ld.exe (logs)') + PE(open_read(__dir__+'/binary_input/Ange/d_tiny.dll')) + assertion([('warn', ('Opthdr magic %#x', 31074), {}), + ('warn', ('Number of rva %d does not match sizeofoptionalheader %d', 0, 13864), {}), + ('warn', ('Windows 8 needs at least 13 directories, %d found', 0), {}), + ('warn', ('Too many symbols: %d', 541413408), {}), + ('warn', ('File too short for StrTable -0x61746127 != 0x0',), {})], + log_history, + 'Ange/d_tiny.dll (logs)') + log_history = [] + PE(open_read(__dir__+'/binary_input/Ange/dllfw.dll')) + assertion([], + log_history, + 'Ange/dllfw.dll (logs)') + PE(open_read(__dir__+'/binary_input/Ange/tinydllXP.dll')) + assertion([('warn', ('Number of rva %d does not match sizeofoptionalheader %d', 0, 0), {}), + ('warn', ('Windows 8 needs at least 13 directories, %d found', 0), {}), + ('warn', ('File too short for StrTable 0x55 != 0xc258016a',), {})], + log_history, + 'Ange/tinydllXP.dll (logs)') + log_history = [] + e = PE(open_read(__dir__+'/binary_input/Ange/resourceloop.exe')) + log_history = [] + d = e.DirRes.display().encode('latin1') + assertion('98701be30b09759a64340e5245e48195', + hashlib.md5(d).hexdigest(), + 'Display Directory RESOURCE that is too deep') + +def test_PE_invalids(assertion): + # Some various ways for a PE to be detected as invalid + e = PE() + data = StrPatchwork(e.pack()) + try: + e.NTsig.signature = 0x2000 + e = PE(e.pack()) + assertion(0,1, 'Not a PE, invalid NTsig') + except ValueError: + pass + try: + e.DOShdr.lfanew = 0x200000 + data[60] = struct.pack(">::basic_filebuf>(class std::basic_filebuf> const &)"), +("??0?$basic_filebuf@DU?$char_traits@D@std@@@std@@QAE@PAU_iobuf@@@Z", "public: __thiscall std::basic_filebuf>::basic_filebuf>(struct _iobuf *)"), +("??0?$basic_filebuf@DU?$char_traits@D@std@@@std@@QAE@W4_Uninitialized@1@@Z", "public: __thiscall std::basic_filebuf>::basic_filebuf>(enum std::_Uninitialized)"), +("??0?$basic_filebuf@GU?$char_traits@G@std@@@std@@QAE@ABV01@@Z", "public: __thiscall std::basic_filebuf>::basic_filebuf>(class std::basic_filebuf> const &)"), +("??0?$basic_filebuf@GU?$char_traits@G@std@@@std@@QAE@PAU_iobuf@@@Z", "public: __thiscall std::basic_filebuf>::basic_filebuf>(struct _iobuf *)"), +("??0?$basic_filebuf@GU?$char_traits@G@std@@@std@@QAE@W4_Uninitialized@1@@Z", "public: __thiscall std::basic_filebuf>::basic_filebuf>(enum std::_Uninitialized)"), +("??0?$basic_stringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QAE@ABV01@@Z", "public: __thiscall std::basic_stringstream,class std::allocator>::basic_stringstream,class std::allocator>(class std::basic_stringstream,class std::allocator> const &)"), +("??0?$basic_stringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QAE@ABV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@H@Z", "public: __thiscall std::basic_stringstream,class std::allocator>::basic_stringstream,class std::allocator>(class std::basic_string,class std::allocator> const &,int)"), +("??0?$basic_stringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QAE@H@Z", "public: __thiscall std::basic_stringstream,class std::allocator>::basic_stringstream,class std::allocator>(int)"), +("??0?$basic_stringstream@GU?$char_traits@G@std@@V?$allocator@G@2@@std@@QAE@ABV01@@Z", "public: __thiscall std::basic_stringstream,class std::allocator>::basic_stringstream,class std::allocator>(class std::basic_stringstream,class std::allocator> const &)"), +("??0?$basic_stringstream@GU?$char_traits@G@std@@V?$allocator@G@2@@std@@QAE@ABV?$basic_string@GU?$char_traits@G@std@@V?$allocator@G@2@@1@H@Z", "public: __thiscall std::basic_stringstream,class std::allocator>::basic_stringstream,class std::allocator>(class std::basic_string,class std::allocator> const &,int)"), +("??0?$basic_stringstream@GU?$char_traits@G@std@@V?$allocator@G@2@@std@@QAE@H@Z", "public: __thiscall std::basic_stringstream,class std::allocator>::basic_stringstream,class std::allocator>(int)"), +("??0?$num_get@DV?$istreambuf_iterator@DU?$char_traits@D@std@@@std@@@std@@QAE@ABV_Locinfo@1@I@Z", "public: __thiscall std::num_get>>::num_get>>(class std::_Locinfo const &,unsigned int)"), +("??0?$num_get@DV?$istreambuf_iterator@DU?$char_traits@D@std@@@std@@@std@@QAE@I@Z", "public: __thiscall std::num_get>>::num_get>>(unsigned int)"), +("??0?$num_get@GV?$istreambuf_iterator@GU?$char_traits@G@std@@@std@@@std@@QAE@ABV_Locinfo@1@I@Z", "public: __thiscall std::num_get>>::num_get>>(class std::_Locinfo const &,unsigned int)"), +("??0?$num_get@GV?$istreambuf_iterator@GU?$char_traits@G@std@@@std@@@std@@QAE@I@Z", "public: __thiscall std::num_get>>::num_get>>(unsigned int)"), +("??0streambuf@@QAE@ABV0@@Z", "public: __thiscall streambuf::streambuf(class streambuf const &)"), +("??0strstreambuf@@QAE@ABV0@@Z", "public: __thiscall strstreambuf::strstreambuf(class strstreambuf const &)"), +("??0strstreambuf@@QAE@H@Z", "public: __thiscall strstreambuf::strstreambuf(int)"), +("??0strstreambuf@@QAE@P6APAXJ@ZP6AXPAX@Z@Z", "public: __thiscall strstreambuf::strstreambuf(void * (__cdecl *)(long),void (__cdecl *)(void *))"), +("??0strstreambuf@@QAE@PADH0@Z", "public: __thiscall strstreambuf::strstreambuf(char *,int,char *)"), +("??0strstreambuf@@QAE@PAEH0@Z", "public: __thiscall strstreambuf::strstreambuf(unsigned char *,int,unsigned char *)"), +("??0strstreambuf@@QAE@XZ", "public: __thiscall strstreambuf::strstreambuf(void)"), +("??1__non_rtti_object@std@@UAE@XZ", "public: virtual __thiscall std::__non_rtti_object::~__non_rtti_object(void)"), +("??1__non_rtti_object@@UAE@XZ", "public: virtual __thiscall __non_rtti_object::~__non_rtti_object(void)"), +("??1?$num_get@DV?$istreambuf_iterator@DU?$char_traits@D@std@@@std@@@std@@UAE@XZ", "public: virtual __thiscall std::num_get>>::~num_get>>(void)"), +("??1?$num_get@GV?$istreambuf_iterator@GU?$char_traits@G@std@@@std@@@std@@UAE@XZ", "public: virtual __thiscall std::num_get>>::~num_get>>(void)"), +("??4istream_withassign@@QAEAAV0@ABV0@@Z", "public: class istream_withassign & __thiscall istream_withassign::operator=(class istream_withassign const &)"), +("??4istream_withassign@@QAEAAVistream@@ABV1@@Z", "public: class istream & __thiscall istream_withassign::operator=(class istream const &)"), +("??4istream_withassign@@QAEAAVistream@@PAVstreambuf@@@Z", "public: class istream & __thiscall istream_withassign::operator=(class streambuf *)"), +("??5std@@YAAAV?$basic_istream@DU?$char_traits@D@std@@@0@AAV10@AAC@Z", "class std::basic_istream> & __cdecl std::operator>>(class std::basic_istream> &,signed char &)"), +("??5std@@YAAAV?$basic_istream@DU?$char_traits@D@std@@@0@AAV10@AAD@Z", "class std::basic_istream> & __cdecl std::operator>>(class std::basic_istream> &,char &)"), +("??5std@@YAAAV?$basic_istream@DU?$char_traits@D@std@@@0@AAV10@AAE@Z", "class std::basic_istream> & __cdecl std::operator>>(class std::basic_istream> &,unsigned char &)"), +("??6?$basic_ostream@GU?$char_traits@G@std@@@std@@QAEAAV01@P6AAAVios_base@1@AAV21@@Z@Z", "public: class std::basic_ostream> & __thiscall std::basic_ostream>::operator<<(class std::ios_base & (__cdecl *)(class std::ios_base &))"), +("??6?$basic_ostream@GU?$char_traits@G@std@@@std@@QAEAAV01@PAV?$basic_streambuf@GU?$char_traits@G@std@@@1@@Z", "public: class std::basic_ostream> & __thiscall std::basic_ostream>::operator<<(class std::basic_streambuf> *)"), +("??6?$basic_ostream@GU?$char_traits@G@std@@@std@@QAEAAV01@PBX@Z", "public: class std::basic_ostream> & __thiscall std::basic_ostream>::operator<<(void const *)"), +("??_8?$basic_fstream@DU?$char_traits@D@std@@@std@@7B?$basic_ostream@DU?$char_traits@D@std@@@1@@", "const std::basic_fstream>::`vbtable'{for `std::basic_ostream>'}"), +("??_8?$basic_fstream@GU?$char_traits@G@std@@@std@@7B?$basic_istream@GU?$char_traits@G@std@@@1@@", "const std::basic_fstream>::`vbtable'{for `std::basic_istream>'}"), +("??_8?$basic_fstream@GU?$char_traits@G@std@@@std@@7B?$basic_ostream@GU?$char_traits@G@std@@@1@@", "const std::basic_fstream>::`vbtable'{for `std::basic_ostream>'}"), +("??9std@@YA_NPBDABV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@@Z", "bool __cdecl std::operator!=(char const *,class std::basic_string,class std::allocator> const &)"), +("??9std@@YA_NPBGABV?$basic_string@GU?$char_traits@G@std@@V?$allocator@G@2@@0@@Z", "bool __cdecl std::operator!=(unsigned short const *,class std::basic_string,class std::allocator> const &)"), +("??A?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QAEAADI@Z", "public: char & __thiscall std::basic_string,class std::allocator>::operator[](unsigned int)"), +("??A?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QBEABDI@Z", "public: char const & __thiscall std::basic_string,class std::allocator>::operator[](unsigned int)const"), +("??A?$basic_string@GU?$char_traits@G@std@@V?$allocator@G@2@@std@@QAEAAGI@Z", "public: unsigned short & __thiscall std::basic_string,class std::allocator>::operator[](unsigned int)"), +("??A?$basic_string@GU?$char_traits@G@std@@V?$allocator@G@2@@std@@QBEABGI@Z", "public: unsigned short const & __thiscall std::basic_string,class std::allocator>::operator[](unsigned int)const"), +("?abs@std@@YAMABV?$complex@M@1@@Z", "float __cdecl std::abs(class std::complex const &)"), +("?abs@std@@YANABV?$complex@N@1@@Z", "double __cdecl std::abs(class std::complex const &)"), +("?abs@std@@YAOABV?$complex@O@1@@Z", "long double __cdecl std::abs(class std::complex const &)"), +("?cin@std@@3V?$basic_istream@DU?$char_traits@D@std@@@1@A", "class std::basic_istream> std::cin"), +("?do_get@?$num_get@DV?$istreambuf_iterator@DU?$char_traits@D@std@@@std@@@std@@MBE?AV?$istreambuf_iterator@DU?$char_traits@D@std@@@2@V32@0AAVios_base@2@AAHAAG@Z", "protected: virtual class std::istreambuf_iterator> __thiscall std::num_get>>::do_get(class std::istreambuf_iterator>,class std::istreambuf_iterator>,class std::ios_base &,int &,unsigned short &)const"), +("?do_get@?$num_get@DV?$istreambuf_iterator@DU?$char_traits@D@std@@@std@@@std@@MBE?AV?$istreambuf_iterator@DU?$char_traits@D@std@@@2@V32@0AAVios_base@2@AAHAAI@Z", "protected: virtual class std::istreambuf_iterator> __thiscall std::num_get>>::do_get(class std::istreambuf_iterator>,class std::istreambuf_iterator>,class std::ios_base &,int &,unsigned int &)const"), +("?do_get@?$num_get@DV?$istreambuf_iterator@DU?$char_traits@D@std@@@std@@@std@@MBE?AV?$istreambuf_iterator@DU?$char_traits@D@std@@@2@V32@0AAVios_base@2@AAHAAJ@Z", "protected: virtual class std::istreambuf_iterator> __thiscall std::num_get>>::do_get(class std::istreambuf_iterator>,class std::istreambuf_iterator>,class std::ios_base &,int &,long &)const"), +("?do_get@?$num_get@DV?$istreambuf_iterator@DU?$char_traits@D@std@@@std@@@std@@MBE?AV?$istreambuf_iterator@DU?$char_traits@D@std@@@2@V32@0AAVios_base@2@AAHAAK@Z", "protected: virtual class std::istreambuf_iterator> __thiscall std::num_get>>::do_get(class std::istreambuf_iterator>,class std::istreambuf_iterator>,class std::ios_base &,int &,unsigned long &)const"), +("?do_get@?$num_get@DV?$istreambuf_iterator@DU?$char_traits@D@std@@@std@@@std@@MBE?AV?$istreambuf_iterator@DU?$char_traits@D@std@@@2@V32@0AAVios_base@2@AAHAAM@Z", "protected: virtual class std::istreambuf_iterator> __thiscall std::num_get>>::do_get(class std::istreambuf_iterator>,class std::istreambuf_iterator>,class std::ios_base &,int &,float &)const"), +("?_query_new_handler@@YAP6AHI@ZXZ", "int (__cdecl * __cdecl _query_new_handler(void))(unsigned int)"), +("?register_callback@ios_base@std@@QAEXP6AXW4event@12@AAV12@H@ZH@Z", "public: void __thiscall std::ios_base::register_callback(void (__cdecl *)(enum std::ios_base::event,class std::ios_base &,int),int)"), +("?seekg@?$basic_istream@DU?$char_traits@D@std@@@std@@QAEAAV12@JW4seekdir@ios_base@2@@Z", "public: class std::basic_istream> & __thiscall std::basic_istream>::seekg(long,enum std::ios_base::seekdir)"), +("?seekg@?$basic_istream@DU?$char_traits@D@std@@@std@@QAEAAV12@V?$fpos@H@2@@Z", "public: class std::basic_istream> & __thiscall std::basic_istream>::seekg(class std::fpos)"), +("?seekg@?$basic_istream@GU?$char_traits@G@std@@@std@@QAEAAV12@JW4seekdir@ios_base@2@@Z", "public: class std::basic_istream> & __thiscall std::basic_istream>::seekg(long,enum std::ios_base::seekdir)"), +("?seekg@?$basic_istream@GU?$char_traits@G@std@@@std@@QAEAAV12@V?$fpos@H@2@@Z", "public: class std::basic_istream> & __thiscall std::basic_istream>::seekg(class std::fpos)"), +("?seekoff@?$basic_filebuf@DU?$char_traits@D@std@@@std@@MAE?AV?$fpos@H@2@JW4seekdir@ios_base@2@H@Z", "protected: virtual class std::fpos __thiscall std::basic_filebuf>::seekoff(long,enum std::ios_base::seekdir,int)"), +("?seekoff@?$basic_filebuf@GU?$char_traits@G@std@@@std@@MAE?AV?$fpos@H@2@JW4seekdir@ios_base@2@H@Z", "protected: virtual class std::fpos __thiscall std::basic_filebuf>::seekoff(long,enum std::ios_base::seekdir,int)"), +("?set_new_handler@@YAP6AXXZP6AXXZ@Z", "void (__cdecl * __cdecl set_new_handler(void (__cdecl *)(void)))(void)"), +("?str@?$basic_istringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QAEXABV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z", "public: void __thiscall std::basic_istringstream,class std::allocator>::str(class std::basic_string,class std::allocator> const &)"), +("?str@?$basic_istringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QBE?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@XZ", "public: class std::basic_string,class std::allocator> __thiscall std::basic_istringstream,class std::allocator>::str(void)const"), +("?str@?$basic_istringstream@GU?$char_traits@G@std@@V?$allocator@G@2@@std@@QAEXABV?$basic_string@GU?$char_traits@G@std@@V?$allocator@G@2@@2@@Z", "public: void __thiscall std::basic_istringstream,class std::allocator>::str(class std::basic_string,class std::allocator> const &)"), +("?str@?$basic_istringstream@GU?$char_traits@G@std@@V?$allocator@G@2@@std@@QBE?AV?$basic_string@GU?$char_traits@G@std@@V?$allocator@G@2@@2@XZ", "public: class std::basic_string,class std::allocator> __thiscall std::basic_istringstream,class std::allocator>::str(void)const"), +("?str@?$basic_ostringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QAEXABV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z", "public: void __thiscall std::basic_ostringstream,class std::allocator>::str(class std::basic_string,class std::allocator> const &)"), +("?str@?$basic_ostringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QBE?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@XZ", "public: class std::basic_string,class std::allocator> __thiscall std::basic_ostringstream,class std::allocator>::str(void)const"), +("?str@?$basic_ostringstream@GU?$char_traits@G@std@@V?$allocator@G@2@@std@@QAEXABV?$basic_string@GU?$char_traits@G@std@@V?$allocator@G@2@@2@@Z", "public: void __thiscall std::basic_ostringstream,class std::allocator>::str(class std::basic_string,class std::allocator> const &)"), +("?str@?$basic_ostringstream@GU?$char_traits@G@std@@V?$allocator@G@2@@std@@QBE?AV?$basic_string@GU?$char_traits@G@std@@V?$allocator@G@2@@2@XZ", "public: class std::basic_string,class std::allocator> __thiscall std::basic_ostringstream,class std::allocator>::str(void)const"), +("?str@?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QAEXABV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z", "public: void __thiscall std::basic_stringbuf,class std::allocator>::str(class std::basic_string,class std::allocator> const &)"), +("?str@?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QBE?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@XZ", "public: class std::basic_string,class std::allocator> __thiscall std::basic_stringbuf,class std::allocator>::str(void)const"), +("?str@?$basic_stringbuf@GU?$char_traits@G@std@@V?$allocator@G@2@@std@@QAEXABV?$basic_string@GU?$char_traits@G@std@@V?$allocator@G@2@@2@@Z", "public: void __thiscall std::basic_stringbuf,class std::allocator>::str(class std::basic_string,class std::allocator> const &)"), +("?str@?$basic_stringbuf@GU?$char_traits@G@std@@V?$allocator@G@2@@std@@QBE?AV?$basic_string@GU?$char_traits@G@std@@V?$allocator@G@2@@2@XZ", "public: class std::basic_string,class std::allocator> __thiscall std::basic_stringbuf,class std::allocator>::str(void)const"), +("?str@?$basic_stringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QAEXABV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z", "public: void __thiscall std::basic_stringstream,class std::allocator>::str(class std::basic_string,class std::allocator> const &)"), +("?str@?$basic_stringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QBE?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@XZ", "public: class std::basic_string,class std::allocator> __thiscall std::basic_stringstream,class std::allocator>::str(void)const"), +("?str@?$basic_stringstream@GU?$char_traits@G@std@@V?$allocator@G@2@@std@@QAEXABV?$basic_string@GU?$char_traits@G@std@@V?$allocator@G@2@@2@@Z", "public: void __thiscall std::basic_stringstream,class std::allocator>::str(class std::basic_string,class std::allocator> const &)"), +("?str@?$basic_stringstream@GU?$char_traits@G@std@@V?$allocator@G@2@@std@@QBE?AV?$basic_string@GU?$char_traits@G@std@@V?$allocator@G@2@@2@XZ", "public: class std::basic_string,class std::allocator> __thiscall std::basic_stringstream,class std::allocator>::str(void)const"), +("?_Sync@ios_base@std@@0_NA", "private: static bool std::ios_base::_Sync"), +("??_U@YAPAXI@Z", "void * __cdecl operator new[](unsigned int)"), +("??_V@YAXPAX@Z", "void __cdecl operator delete[](void *)"), +("??X?$_Complex_base@M@std@@QAEAAV01@ABM@Z", "public: class std::_Complex_base & __thiscall std::_Complex_base::operator*=(float const &)"), +("??Xstd@@YAAAV?$complex@M@0@AAV10@ABV10@@Z", "class std::complex & __cdecl std::operator*=(class std::complex &,class std::complex const &)"), +("?aaa@@YAHAAUbbb@@@Z", "int __cdecl aaa(struct bbb &)"), +("?aaa@@YAHBAUbbb@@@Z", "int __cdecl aaa(struct bbb & volatile)"), +("?aaa@@YAHPAUbbb@@@Z", "int __cdecl aaa(struct bbb *)"), +("?aaa@@YAHQAUbbb@@@Z", "int __cdecl aaa(struct bbb * const)"), +("?aaa@@YAHRAUbbb@@@Z", "int __cdecl aaa(struct bbb * volatile)"), +("?aaa@@YAHSAUbbb@@@Z", "int __cdecl aaa(struct bbb * const volatile)"), +("??0aa.a@@QAE@XZ", "public: __thiscall aa.a::aa.a(void)"), # INVALID, '.' should not be allowed in a name fragment +("??0aa$_3a@@QAE@XZ", "public: __thiscall aa$_3a::aa$_3a(void)"), +("??2?$aaa@AAUbbb@@AAUccc@@AAU2@@ddd@1eee@2@QAEHXZ", "public: int __thiscall eee::eee::ddd::ddd::aaa::operator new(void)"), +("?pSW@@3P6GHKPAX0PAU_tagSTACKFRAME@@0P6GH0K0KPAK@ZP6GPAX0K@ZP6GK0K@ZP6GK00PAU_tagADDRESS@@@Z@ZA", "int (__stdcall * pSW)(unsigned long,void *,void *,struct _tagSTACKFRAME *,void *,int (__stdcall *)(void *,unsigned long,void *,unsigned long,unsigned long *),void * (__stdcall *)(void *,unsigned long),unsigned long (__stdcall *)(void *,unsigned long),unsigned long (__stdcall *)(void *,void *,struct _tagADDRESS *))"), +("?$_aaa@Vbbb@@", "_aaa"), +("?$aaa@Vbbb@ccc@@Vddd@2@", "aaa"), +("??0?$Foo@P6GHPAX0@Z@@QAE@PAD@Z", "public: __thiscall Foo::Foo(char *)"), +("??0?$Foo@P6GHPAX0@Z@@QAE@PAD@Z", "public: __thiscall Foo::Foo(char *)"), +("?Qux@Bar@@0PAP6AHPAV1@AAH1PAH@ZA", "private: static int (__cdecl * * Bar::Qux)(class Bar *,int &,int &,int *)"), +("?Qux@Bar@@0PAP6AHPAV1@AAH1PAH@ZA", "private: static int (__cdecl * * Bar::Qux)(class Bar *,int &,int &,int *)"), +("?$AAA@$DBAB@", "AAA<`template-parameter257'>"), +("?$AAA@?C@", "AAA<`template-parameter-2'>"), +("?$AAA@PAUBBB@@", "AAA"), +("??$ccccc@PAVaaa@@@bar@bb@foo@@DGPAV0@PAV0@PAVee@@IPAPAVaaa@@1@Z", "private: static class bar * __stdcall foo::bb::bar::ccccc(class bar *,class ee *,unsigned int,class aaa * *,class ee *)"), +("?f@T@@QAEHQCY1BE@BO@D@Z", "public: int __thiscall T::f(char ( volatile * const)[20][30])"), +("?f@T@@QAEHQAY2BE@BO@CI@D@Z", "public: int __thiscall T::f(char ( * const)[20][30][40])"), +("?f@T@@QAEHQAY1BE@BO@$$CBD@Z", "public: int __thiscall T::f(char const ( * const)[20][30])"), +("??0?$Foo@U?$vector_c@H$00$01$0?1$0A@$0A@$0HPPPPPPP@$0HPPPPPPP@$0HPPPPPPP@$0HPPPPPPP@$0HPPPPPPP@$0HPPPPPPP@$0HPPPPPPP@$0HPPPPPPP@$0HPPPPPPP@$0HPPPPPPP@$0HPPPPPPP@$0HPPPPPPP@$0HPPPPPPP@$0HPPPPPPP@$0HPPPPPPP@@mpl@boost@@@@QAE@XZ", "public: __thiscall Foo>::Foo>(void)"), +("?swprintf@@YAHPAGIPBGZZ", "int __cdecl swprintf(unsigned short *,unsigned int,unsigned short const *,...)"), +("?vswprintf@@YAHPAGIPBGPAD@Z", "int __cdecl vswprintf(unsigned short *,unsigned int,unsigned short const *,char *)"), +("?vswprintf@@YAHPA_WIPB_WPAD@Z", "int __cdecl vswprintf(wchar_t *,unsigned int,wchar_t const *,char *)"), +("?swprintf@@YAHPA_WIPB_WZZ", "int __cdecl swprintf(wchar_t *,unsigned int,wchar_t const *,...)"), +("??Xstd@@YAAEAV?$complex@M@0@AEAV10@AEBV10@@Z", "class std::complex & __ptr64 __cdecl std::operator*=(class std::complex & __ptr64,class std::complex const & __ptr64)"), +("?_Doraise@bad_cast@std@@MEBAXXZ", "protected: virtual void __cdecl std::bad_cast::_Doraise(void)const __ptr64"), +("??$?DM@std@@YA?AV?$complex@M@0@ABMABV10@@Z", "class std::complex __cdecl std::operator*(float const &,class std::complex const &)"), +("?_R2@?BN@???$_Fabs@N@std@@YANAEBV?$complex@N@1@PEAH@Z@4NB", "double const `double __cdecl std::_Fabs(class std::complex const & __ptr64,int * __ptr64)'::`29'::_R2"), +("?vtordisp_thunk@std@@$4PPPPPPPM@3EAA_NXZ", "[thunk]:public: virtual bool __cdecl std::vtordisp_thunk`vtordisp{4294967292,4}' (void) __ptr64"), +("??_9CView@@$BBII@AE", "[thunk]: __thiscall CView::`vcall'{392,{flat}}' }'"), +("?_dispatch@_impl_Engine@SalomeApp@@$R4CE@BA@PPPPPPPM@7AE_NAAVomniCallHandle@@@Z", "[thunk]:public: virtual bool __thiscall SalomeApp::_impl_Engine::_dispatch`vtordispex{36,16,4294967292,8}' (class omniCallHandle &)"), +("?_Doraise@bad_cast@std@@MEBAXXZ", "protected: virtual void __cdecl std::bad_cast::_Doraise(void)const __ptr64"), +("??Xstd@@YAAEAV?$complex@M@0@AEAV10@AEBV10@@Z", "class std::complex & __ptr64 __cdecl std::operator*=(class std::complex & __ptr64,class std::complex const & __ptr64)"), +("??Xstd@@YAAEAV?$complex@M@0@AEAV10@AEBV10@@Z", "class std::complex & __ptr64 __cdecl std::operator*=(class std::complex & __ptr64,class std::complex const & __ptr64)"), +("??$run@XVTask_Render_Preview@@@QtConcurrent@@YA?AV?$QFuture@X@@PEAVTask_Render_Preview@@P82@EAAXXZ@Z", "class QFuture __cdecl QtConcurrent::run(class Task_Render_Preview * __ptr64,void (__cdecl Task_Render_Preview::*)(void) __ptr64)"), +("??_E?$TStrArray@$$BY0BAA@D$0BA@@@UAEPAXI@Z", "public: virtual void * __thiscall TStrArray::`vector deleting destructor'(unsigned int)"), + +# Some other tests +("NotMangled", "NotMangled"), +("?variable@@4HA", "int variable"), +("?variable@@3HA", "int variable"), +("?variable@@2HA", "public: static int variable"), +("?variable@@1HA", "protected: static int variable"), +("?variable@@0HA", "private: static int variable"), +("?variable@@4JA", "long variable"), +("?variable@@4JB", "long const variable"), +("?a@@YE@V0@@Z", " __thiscall a(class a)"), +("?a@@YE@Vx@@@Z", " __thiscall a(class x)"), +("?a@@YE@Vx@0@@Z", " __thiscall a(class a::x)"), +("?a@@YE@V0@H@Z", " __thiscall a(class a,int)"), +("?a@@YE@W40@@Z", " __thiscall a(enum a)"), +("?a@@YE@W4x@@@Z", " __thiscall a(enum x)"), +("?a@@YE@W4x@0@@Z", " __thiscall a(enum a::x)"), +("?a@@YE@W40@H@Z", " __thiscall a(enum a,int)"), +("?d@@YAHPAH@Z", "int __cdecl d(int *)"), +("?d@@YAHQAH@Z", "int __cdecl d(int * const)"), +("?d@@YAHAAPAH@Z", "int __cdecl d(int * &)"), +("?a@@UAAXXZ", "public: virtual void __cdecl a(void)"), +("?a@@UAA@XZ", "public: virtual __cdecl a(void)"), +("?a@@UAAXH@Z", "public: virtual void __cdecl a(int)"), +("??0a@@QEAA@XZ", "public: __cdecl a::a(void) __ptr64"), +("??0a@@IEAA@XZ", "protected: __cdecl a::a(void) __ptr64"), +("??1a@@UEAA@XZ", "public: virtual __cdecl a::~a(void) __ptr64"), +("?a@@AEBE@QEAH@Z", "private: __thiscall a(int * __ptr64 const)const __ptr64"), +("?a@@AEBE@QEBH@Z", "private: __thiscall a(int const * __ptr64 const)const __ptr64"), +("?a@b@@QAAHXZ", "public: int __cdecl b::a(void)"), +("?a@b@@SAPAHXZ", "public: static int * __cdecl b::a(void)"), +("?a@b@@SAPAVc@@XZ", "public: static class c * __cdecl b::a(void)"), +("?a@b@@SAV1@XZ", "public: static class b __cdecl b::a(void)"), +("?a@b@@SAPAV1@XZ", "public: static class b * __cdecl b::a(void)"), +("?a@b@@SAV1c@@XZ", "public: static class c::b __cdecl b::a(void)"), +("?a@b@@YAXPBD@Z", "void __cdecl b::a(char const *)"), +("?a@b@@AAEXH@Z", "private: void __thiscall b::a(int)"), +("?a@b@@UEAAEK@Z", "public: virtual unsigned char __cdecl b::a(unsigned long) __ptr64"), +("?a@b@@QEAAEKK@Z", "public: unsigned char __cdecl b::a(unsigned long,unsigned long) __ptr64"), +("?a@b@@UEAAEPEBGZZ", "public: virtual unsigned char __cdecl b::a(unsigned short const * __ptr64,...) __ptr64"), +("?UnlockBuffer@?$def@H@@QAAXXZ", "public: void __cdecl def::UnlockBuffer(void)"), +("?SPrintfAppend@DSTRING@@UAAEPBGZZ", "public: virtual unsigned char __cdecl DSTRING::SPrintfAppend(unsigned short const *,...)"), +("?SPrintf@DSTRING@@UAAEPBGZZ", "public: virtual unsigned char __cdecl DSTRING::SPrintf(unsigned short const *,...)"), +("?_Mutex@std@@YCVtoto@@XZ", "class toto __pascal std::_Mutex(void)"), +("??0?$CStringT@GV?$StrTraitATL@GV?$ChTraitsCRT@G@ATL@@@ATL@@@ATL@@QAA@XZ", "public: __cdecl ATL::CStringT>>::CStringT>>(void)"), +("?a@b@c@d@@SAV1toto@@XZ", "public: static class toto::b __cdecl d::c::b::a(void)"), +("?a@b@@UEBAJPEBV1@@Z", "public: virtual long __cdecl b::a(class b const * __ptr64)const __ptr64"), +("?a@b@c@d@@QAE@W4e@1@@Z", "public: __thiscall d::c::b::a(enum b::e)"), +("??0a@b@c@d@@QAE@W4e@1@@Z", "public: __thiscall d::c::b::a::a(enum b::e)"), +("??2a@b@c@d@@QAE@W4e@1@@Z", "public: __thiscall d::c::b::a::operator new(enum b::e)"), +("?a@b@@QEAAEPEBGK@Z", "public: unsigned char __cdecl b::a(unsigned short const * __ptr64,unsigned long) __ptr64"), +("?a@b@@QEAAEPEBV1@KK@Z", "public: unsigned char __cdecl b::a(class b const * __ptr64,unsigned long,unsigned long) __ptr64"), +("?a@b@@QEAAEPEBV1@@Z", "public: unsigned char __cdecl b::a(class b const * __ptr64) __ptr64"), +("??0?$a@G$0A@@b@@QAA@ABV01@@Z", "public: __cdecl b::a::a(class b::a const &)"), +("?desc@@QAA@P6APAVroot@@PAVpage@@@Z@Z", "public: __cdecl desc(class root * (__cdecl *)(class page *))"), +("?d@@YAHAAQAH@Z", "int __cdecl d(int * &)"), +("?c@@YA?AV?$b@X@@H@Z", "class b __cdecl c(int)"), +("?a@b@@QIAEHXZ", "public: int __thiscall b::a(void) __restrict"), +("?a@@QAEH$$QAVb@@@Z", "public: int __thiscall a(class b &&)"), +("?a@@QAEH$$RAVb@@@Z", "public: int __thiscall a(class b && volatile)"), +("?a@@QAEH$$CAVb@@@Z", "public: int __thiscall a(class b)"), +("?a@b@@AAEH_OB_OCFHH@Z", "private: int __thiscall b::a(short const [][],int,int)"), +("?a@@3V?$b@HVc@@@@A", "class b a"), +("??0a@b@@QEAA@PEAVc@1@P821@EAAPEAVd@1@PEAV31@1PEAVe@f@@@ZPEAVg@5@@Z", "public: __cdecl b::a::a(class b::c * __ptr64,class b::d * __ptr64 (__cdecl b::c::*)(class b::d * __ptr64,class b::d * __ptr64,class f::e * __ptr64) __ptr64,class f::g * __ptr64) __ptr64"), +("??_Da@@QEAAXXZ", "public: void __cdecl a::`vbase destructor'(void) __ptr64"), +("??_Fa@@QEAAXXZ", "public: void __cdecl a::`default constructor closure'(void) __ptr64"), + +# For code coverage +("?Invalid@@x", "?Invalid@@x"), +("?$Invalid@$0x@", "?$Invalid@$0x@"), +("?Invalid@@$$FYMHP$DFCH@Z", "?Invalid@@$$FYMHP$DFCH@Z"), + +# Calling conventions +("?a@@YAHF@Z", "int __cdecl a(short)"), +("?a@@YBHF@Z", "int __cdecl __dll_export a(short)"), +("?a@@YCHF@Z", "int __pascal a(short)"), +("?a@@YDHF@Z", "int __pascal __dll_export a(short)"), +("?a@@YEHF@Z", "int __thiscall a(short)"), +("?a@@YFHF@Z", "int __thiscall __dll_export a(short)"), +("?a@@YGHF@Z", "int __stdcall a(short)"), +("?a@@YHHF@Z", "int __stdcall __dll_export a(short)"), +("?a@@YIHF@Z", "int __fastcall a(short)"), +("?a@@YJHF@Z", "int __fastcall __dll_export a(short)"), +("?a@@YKHF@Z", "int a(short)"), +("?a@@YLHF@Z", "int __dll_export a(short)"), +("?a@@YMHF@Z", "int __clrcall a(short)"), +("?a@@YNHF@Z", "int __clrcall __dll_export a(short)"), +("?a@@YOHF@Z", "int __eabi a(short)"), +("?a@@YPHF@Z", "int __eabi __dll_export a(short)"), +("?a@@YQHF@Z", "int __vectorcall a(short)"), + +# Data types +("?a@@YK_$MXZ", "__w64 float a(void)"), +("?a@@YKX_$M@Z", "void a(__w64 float)"), +("?a@@YK_$PAHXZ", "__w64 int * a(void)"), +("?a@@YKP6AXJ@ZXZ", "void (__cdecl * a(void))(long)"), +("?a@@YKQ6AXJ@ZXZ", "void (__cdecl * const a(void))(long)"), +("?a@@YKXP6AXJ@Z@Z", "void a(void (__cdecl *)(long))"), +("?a@@YK_$P6AXJ@ZXZ", "__w64 void (__cdecl * a(void))(long)"), +("?a@@YK?AHP80@AAXXZ@Z", "int a(void (__cdecl a::*)(void))"), +("?a@@YK?AHP8b@c@@AAXXZ@Z", "int a(void (__cdecl c::b::*)(void))"), +("?a@@YK?AHP8b@@BAXXZ@Z", "int a(void (__cdecl b::*)(void)const)"), +("?a@@YKX_OA_OAF@Z", "void a(short [][])"), +("?a@@YKX_OA_OAPAH@Z", "void a(int * [][])"), +("?a@@YKX_OA_OAP6AXJ@Z@Z", "void a(void (__cdecl *)(long) [][])"), + +# Nested, with backreferences +("?a@??f@@YAXXZ@4HA", "int `void __cdecl f(void)'::a"), +("?a@b@??f@0@YAXXZ@4HA", "int `void __cdecl a::f(void)'::b::a"), + +# Numbered/anonymous namespaces +("?a@?@@YEHF@Z", "int __thiscall `0'::a(short)"), +("?a@?BN@@YEHF@Z", "int __thiscall `29'::a(short)"), +("?a@?A@@YEHF@Z", "int __thiscall `anonymous namespace'::a(short)"), +("?a@?AN@@YEHF@Z", "int __thiscall `anonymous namespace'::a(short)"), + +# Local static guard +("??_B@58", "`local static guard'{9}'"), +("??_B??a@@SAHXZ@51", "`public: static int __cdecl a(void)'::`local static guard'{2}'"), +("??_B?1??a@@SAHXZ@51", "`public: static int __cdecl a(void)'::`2'::`local static guard'{2}'"), +("??_B?1??a@b@c@@SAAEAVd@2@XZ@51", "`public: static class c::d & __ptr64 __cdecl c::b::a(void)'::`2'::`local static guard'{2}'"), + +# RTTI +("??_R0PADa@@AAE@H@Z", "private: __thiscall a::char * `RTTI Type Descriptor'(int)"), +("??_R13433a@@AAE@H@Z", "private: __thiscall a::`RTTI Base Class Descriptor at (4,5,4,4)'(int)"), +("??_R2a@@AAE@H@Z", "private: __thiscall a::`RTTI Base Class Array'(int)"), +("??_R4a@@AAE@H@Z", "private: __thiscall a::`RTTI Complete Object Locator'(int)"), +("??_R3a@@AAE@H@Z", "private: __thiscall a::`RTTI Class Hierarchy Descriptor'(int)"), + +# Some other special fragments +("??Ba@@QAAAAVb@@XZ", "public: __cdecl a::operator class b &(void)"), +("??_Car@@AAE@H@Z", "`string'"), +("??__Ea@@AAEXH@Z", "private: void __thiscall `dynamic initializer for 'a''(int)"), +("??__Fa@@AAEXH@Z", "private: void __thiscall `dynamic atexit destructor for 'a''(int)"), +("??__Ka@@AAEXH@Z", 'private: void __thiscall operator "" a(int)'), +("??_Va@@YAXPAXV0@@Z", "void __cdecl a::operator delete[](void *,class a)"), +("??_7a@@6B@", "const a::`vftable'"), +("??_8a@@6Bx@@@", "const a::`vbtable'{for `x'}"), +("??_PAa@@AAE@H@Z", "private: __thiscall a::`udt returning'operator[](int)"), +("??_P_R4a@0@AAE@H@Z", "private: __thiscall a::a::`udt returning'`RTTI Complete Object Locator'(int)"), + +# CV: +("?@?a@@YAHF@Z", "CV: int __cdecl a(short)"), +("?@?a@@4HA", "CV: int a"), + +# template parameters +("?$a@?C@", "a<`template-parameter-2'>"), +("?$a@$DBAB@", "a<`template-parameter257'>"), # not known by undname.exe +("?$a@$0A@", "a<0>"), +("?a@@3U?$b@$09@@A", "struct b<10> a"), +("?a@@3U?$b@$1?c@@3HA@@A", "struct b<&int c> a"), +("?a@@3U?$b@$1?c@@3PADA@@A", "struct b<&char * c> a"), +# meaningless results +("?$a@$22C@", "a<3.e2>"), +("?$a@$29C@", "a<1.0e2>"), +("?$a@$F1CD@", "a<{2,35}>"), +("?$a@$G0CD@B@", "a<{1,35,1}>"), + +# vtordisp +("?a@@$023AAHXZ", "[thunk]:private: virtual int __cdecl a`vtordisp{3,4}' (void)"), +("?a@@$123AAHXZ", "[thunk]:private: virtual int __cdecl a`vtordisp{3,4}' (void)"), +("?a@@$223AAHXZ", "[thunk]:protected: virtual int __cdecl a`vtordisp{3,4}' (void)"), +("?a@@$323AAHXZ", "[thunk]:protected: virtual int __cdecl a`vtordisp{3,4}' (void)"), +("?a@@$423AAHXZ", "[thunk]:public: virtual int __cdecl a`vtordisp{3,4}' (void)"), +("?a@@$523AAHXZ", "[thunk]:public: virtual int __cdecl a`vtordisp{3,4}' (void)"), +("?a@@$R42345AAHXZ", "[thunk]:public: virtual int __cdecl a`vtordispex{3,4,5,6}' (void)"), + +# C++/CLI +("?a@@$$FQ$AAMXXZ", "[managed] public: void __clrcall a(void)"), +("?a@@$$FQ$CAMXXZ", "[managed] public: void __clrcall a(void)%"), +("?a@@$$FYMHP$AA__ZVb@@@Z", "[managed] int __clrcall a(class b ^)"), +("?a@@YMHP$03AH@Z", "int __clrcall a(cli::array^)"), +("?a@@$$FYMHP$03AH@Z", "[managed] int __clrcall a(cli::array^)"), +("__mep@?a@@$$FQ$AAMXXZ", "[MEP] [managed] public: void __clrcall a(void)"), +("?a@@$$HYMHP$01AP$AAVb@@@Z", "[MANAGED] int __clrcall a(cli::array^)"), +("?a@@$$FYMP$AAHXZ", "[managed] int ^ __clrcall a(void)"), +("?a@@$$FYMA$AAHXZ", "[managed] int % __clrcall a(void)"), +# these inputs may be invalid, they were not generated by VC++ +("?a@@$$FYMHP$AEBH@Z", "[managed] int __clrcall a(int const ^ __ptr64)"), +("?a@@$$FYMHP$BECH@Z", "[managed] int __clrcall a(cli::pin_ptr^)"), # 'W' seems ignored +("?a@@$$FYMHP$AEBP6AXJ@Z@Z", "[managed] int __clrcall a(void (__cdecl * const ^ __ptr64)(long))"), +("?a@@$$FYMHP$BECP6AXJ@Z@Z", "[managed] int __clrcall a(cli::pin_ptr