From d260c2bbfa52ec0235e9e6dc314642faeabb567f Mon Sep 17 00:00:00 2001 From: Angie Date: Sun, 25 Jun 2023 19:24:41 -0400 Subject: [PATCH 01/14] Multiple levels for the rodata string guesser --- pyproject.toml | 2 +- spimdisasm/__init__.py | 4 +- spimdisasm/common/ContextSymbols.py | 17 ++++-- spimdisasm/common/GlobalConfig.py | 60 +++++++++++++++++-- spimdisasm/common/Utils.py | 22 +++++++ spimdisasm/mips/sections/MipsSectionRodata.py | 12 ++-- 6 files changed, 102 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a20b77ef..4c165af8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ [project] name = "spimdisasm" # Version should be synced with spimdisasm/__init__.py -version = "1.14.3" +version = "1.14.3.dev0" description = "MIPS disassembler" # license = "MIT" readme = "README.md" diff --git a/spimdisasm/__init__.py b/spimdisasm/__init__.py index 9a43f59b..d31298d6 100644 --- a/spimdisasm/__init__.py +++ b/spimdisasm/__init__.py @@ -5,8 +5,8 @@ from __future__ import annotations -__version_info__ = (1, 14, 3) -__version__ = ".".join(map(str, __version_info__)) +__version_info__ = (1, 14, 4) +__version__ = ".".join(map(str, __version_info__)) + ".dev0" __author__ = "Decompollaborate" from . import common diff --git a/spimdisasm/common/ContextSymbols.py b/spimdisasm/common/ContextSymbols.py index bd98d7bd..dd57561b 100644 --- a/spimdisasm/common/ContextSymbols.py +++ b/spimdisasm/common/ContextSymbols.py @@ -205,6 +205,11 @@ def hasNoType(self) -> bool: currentType = self.getTypeSpecial() return (currentType is None or currentType == "") and self.accessType is None + def hasOnlyAutodetectedType(self) -> bool: + if self.userDeclaredType is not None and self.userDeclaredType != "": + return False + return (self.autodetectedType is not None and self.autodetectedType != "") and self.accessType is not None + def isTrustableFunction(self, rsp: bool=False) -> bool: """Checks if the function symbol should be trusted based on the current disassembler settings""" @@ -267,12 +272,16 @@ def isString(self) -> bool: return True if not self.isMaybeString: return False - if not GlobalConfig.STRING_GUESSER: + if GlobalConfig.RODATA_STRING_GUESSER_LEVEL < 1: return False - if self.hasNoType(): # no type information, let's try to guess - return True - if GlobalConfig.AGGRESSIVE_STRING_GUESSER: + if self.hasNoType(): + # no type information, let's try to guess return True + + if self.hasOnlyAutodetectedType(): + if GlobalConfig.RODATA_STRING_GUESSER_LEVEL >= 4: + # There's autodetected type information, but we are going to ignore it and try to guess + return True return False def isFloat(self) -> bool: diff --git a/spimdisasm/common/GlobalConfig.py b/spimdisasm/common/GlobalConfig.py index 228dab56..9860281c 100644 --- a/spimdisasm/common/GlobalConfig.py +++ b/spimdisasm/common/GlobalConfig.py @@ -109,9 +109,35 @@ class GlobalConfig: TRUST_USER_FUNCTIONS: bool = True TRUST_JAL_FUNCTIONS: bool = True - STRING_GUESSER: bool = True + RODATA_STRING_GUESSER_LEVEL: int = 1 """Rodata string guesser""" - AGGRESSIVE_STRING_GUESSER: bool = False + + #! @deprecated + @property + def STRING_GUESSER(self) -> bool: + return GlobalConfig.RODATA_STRING_GUESSER_LEVEL > 0 + #! @deprecated + @STRING_GUESSER.setter + def STRING_GUESSER(self, value: bool) -> None: + if value: + if GlobalConfig.RODATA_STRING_GUESSER_LEVEL <= 0: + GlobalConfig.RODATA_STRING_GUESSER_LEVEL = 1 + else: + GlobalConfig.RODATA_STRING_GUESSER_LEVEL = 0 + + #! @deprecated + @property + def AGGRESSIVE_STRING_GUESSER(self) -> bool: + return GlobalConfig.RODATA_STRING_GUESSER_LEVEL > 1 + #! @deprecated + @AGGRESSIVE_STRING_GUESSER.setter + def AGGRESSIVE_STRING_GUESSER(self, value: bool) -> None: + if value: + if GlobalConfig.RODATA_STRING_GUESSER_LEVEL <= 2: + GlobalConfig.RODATA_STRING_GUESSER_LEVEL = 9 + else: + if GlobalConfig.RODATA_STRING_GUESSER_LEVEL > 1: + GlobalConfig.RODATA_STRING_GUESSER_LEVEL = 1 AUTOGENERATED_NAMES_BASED_ON_SECTION_TYPE: bool = True """Name autogenerated symbols after the section those are come from @@ -230,8 +256,34 @@ def addParametersToArgParse(parser: argparse.ArgumentParser): backendConfig.add_argument("--disasm-unknown", help=f"Force disassembling functions with unknown instructions. Defaults to {GlobalConfig.DISASSEMBLE_UNKNOWN_INSTRUCTIONS}", action=Utils.BooleanOptionalAction) - backendConfig.add_argument("--string-guesser", help=f"Toggles the string guesser feature. Defaults to {GlobalConfig.STRING_GUESSER}", action=Utils.BooleanOptionalAction) - backendConfig.add_argument("--aggressive-string-guesser", help=f"Makes the string guesser feature to be more aggressive when trying to detect strings. Requires `--string-guesser` to be enabled. Defaults to {GlobalConfig.AGGRESSIVE_STRING_GUESSER}", action=Utils.BooleanOptionalAction) + rodataStringGuesserHelp = f""" +Sets the level for the rodata C string guesser. Smaller values mean more +conservative methods to guess a string, while higher values are more agressive. +0 (and negative) completely disables the guessing feature. Defaults to {GlobalConfig.RODATA_STRING_GUESSER_LEVEL}. + +A C string must start at a 0x4-aligned region, which is '\\0' terminated and +padded with '\\0's until a 0x4 boundary. + +level 0: Completely disable the guessing feature. +level 1: The most conservative guessing level. Imposes the following + restrictions: + - Do not try to guess if the user provided a type for the symbol. + - Do no try to guess if type information for the symbol can be + inferred by other means. + - A string symbol must be referenced only once. + - Strings must not be empty. + +level 2: A string no longer needs to be referenced only once to be considered a + possible strings. This can happen because of a deduplication + optimization. +level 3: Empty strings are allowed. +level 4: Symbols with autodetected type information but no user type + information can still be guessed as strings. +""" + backendConfig.add_argument("--rodata-string-guesser", help=rodataStringGuesserHelp, type=int, metavar="level") + + backendConfig.add_argument("--string-guesser", help=f"DEPRECATED, prefer `--rodata-string-guesser`. Toggles the string guesser feature. Defaults to {GlobalConfig.STRING_GUESSER}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--aggressive-string-guesser", help=f"DEPRECATED, prefer `--rodata-string-guesser`. Makes the string guesser feature to be more aggressive when trying to detect strings. Requires `--string-guesser` to be enabled. Defaults to {GlobalConfig.AGGRESSIVE_STRING_GUESSER}", action=Utils.BooleanOptionalAction) backendConfig.add_argument("--name-vars-by-section", help=f"Toggles the naming-after-section feature for autogenerated names. This means autogenerated symbols get a RO_ or B_ prefix if the symbol is from a rodata or bss section. Defaults to {GlobalConfig.AUTOGENERATED_NAMES_BASED_ON_SECTION_TYPE}", action=Utils.BooleanOptionalAction) diff --git a/spimdisasm/common/Utils.py b/spimdisasm/common/Utils.py index 2440c4c1..a06b05d2 100644 --- a/spimdisasm/common/Utils.py +++ b/spimdisasm/common/Utils.py @@ -299,3 +299,25 @@ def __call__(self, parser, namespace, values, option_string: str|None=None): def format_usage(self): return ' | '.join(self.option_strings) + +# https://stackoverflow.com/a/35925919/6292472 +class PreserveWhiteSpaceWrapRawTextHelpFormatter(argparse.RawDescriptionHelpFormatter): + def __add_whitespace(self, idx, iWSpace, text): + if idx is 0: + return text + return (" " * iWSpace) + text + + def _split_lines(self, text, width): + import textwrap + import re + textRows = text.splitlines() + for idx, line in enumerate(textRows): + search = re.search('\s*[0-9\-]{0,}\.?\s*', line) + if line.strip() is "": + textRows[idx] = " " + elif search: + lWSpace = search.end() + lines = [self.__add_whitespace(i,lWSpace,x) for i,x in enumerate(textwrap.wrap(line, width))] + textRows[idx] = lines + + return [item for sublist in textRows for item in sublist] diff --git a/spimdisasm/mips/sections/MipsSectionRodata.py b/spimdisasm/mips/sections/MipsSectionRodata.py index b8dfc514..01caf5d4 100644 --- a/spimdisasm/mips/sections/MipsSectionRodata.py +++ b/spimdisasm/mips/sections/MipsSectionRodata.py @@ -29,16 +29,20 @@ def _stringGuesser(self, contextSym: common.ContextSymbol, localOffset: int) -> if contextSym.isMaybeString or contextSym.isString(): return True - if not common.GlobalConfig.STRING_GUESSER: + if common.GlobalConfig.RODATA_STRING_GUESSER_LEVEL < 1: return False - if not contextSym.hasNoType() or contextSym.referenceCounter > 1: - if not common.GlobalConfig.AGGRESSIVE_STRING_GUESSER: + if contextSym.referenceCounter > 1: + if common.GlobalConfig.RODATA_STRING_GUESSER_LEVEL < 2: return False # This would mean the string is an empty string, which is not very likely if self.words[localOffset//4] == 0: - if not common.GlobalConfig.AGGRESSIVE_STRING_GUESSER: + if common.GlobalConfig.RODATA_STRING_GUESSER_LEVEL < 3: + return False + + if contextSym.hasOnlyAutodetectedType(): + if common.GlobalConfig.RODATA_STRING_GUESSER_LEVEL < 4: return False try: From c22a280f6382b093f8c49edf92f6788ed1fc2c37 Mon Sep 17 00:00:00 2001 From: Angie Date: Sun, 25 Jun 2023 19:43:19 -0400 Subject: [PATCH 02/14] Fix deprecated STRING_GUESSER workaround and borken description --- spimdisasm/common/GlobalConfig.py | 242 +++++++++--------- spimdisasm/disasmdis/DisasmdisInternals.py | 4 +- .../elfObjDisasm/ElfObjDisasmInternals.py | 4 +- spimdisasm/rspDisasm/RspDisasmInternals.py | 4 +- .../SingleFileDisasmInternals.py | 4 +- 5 files changed, 125 insertions(+), 133 deletions(-) diff --git a/spimdisasm/common/GlobalConfig.py b/spimdisasm/common/GlobalConfig.py index 9860281c..cb0bbff5 100644 --- a/spimdisasm/common/GlobalConfig.py +++ b/spimdisasm/common/GlobalConfig.py @@ -6,6 +6,7 @@ from __future__ import annotations import argparse +import dataclasses import enum import os @@ -101,7 +102,8 @@ class InputFileType(enum.Enum): ELF = "elf" -class GlobalConfig: +@dataclasses.dataclass +class GlobalConfigType: DISASSEMBLE_UNKNOWN_INSTRUCTIONS: bool = False """Try to disassemble non implemented instructions and functions""" @@ -115,29 +117,29 @@ class GlobalConfig: #! @deprecated @property def STRING_GUESSER(self) -> bool: - return GlobalConfig.RODATA_STRING_GUESSER_LEVEL > 0 + return self.RODATA_STRING_GUESSER_LEVEL > 0 #! @deprecated @STRING_GUESSER.setter def STRING_GUESSER(self, value: bool) -> None: if value: - if GlobalConfig.RODATA_STRING_GUESSER_LEVEL <= 0: - GlobalConfig.RODATA_STRING_GUESSER_LEVEL = 1 + if self.RODATA_STRING_GUESSER_LEVEL <= 0: + self.RODATA_STRING_GUESSER_LEVEL = 1 else: - GlobalConfig.RODATA_STRING_GUESSER_LEVEL = 0 + self.RODATA_STRING_GUESSER_LEVEL = 0 #! @deprecated @property def AGGRESSIVE_STRING_GUESSER(self) -> bool: - return GlobalConfig.RODATA_STRING_GUESSER_LEVEL > 1 + return self.RODATA_STRING_GUESSER_LEVEL > 1 #! @deprecated @AGGRESSIVE_STRING_GUESSER.setter def AGGRESSIVE_STRING_GUESSER(self, value: bool) -> None: if value: - if GlobalConfig.RODATA_STRING_GUESSER_LEVEL <= 2: - GlobalConfig.RODATA_STRING_GUESSER_LEVEL = 9 + if self.RODATA_STRING_GUESSER_LEVEL <= 2: + self.RODATA_STRING_GUESSER_LEVEL = 9 else: - if GlobalConfig.RODATA_STRING_GUESSER_LEVEL > 1: - GlobalConfig.RODATA_STRING_GUESSER_LEVEL = 1 + if self.RODATA_STRING_GUESSER_LEVEL > 1: + self.RODATA_STRING_GUESSER_LEVEL = 1 AUTOGENERATED_NAMES_BASED_ON_SECTION_TYPE: bool = True """Name autogenerated symbols after the section those are come from @@ -244,103 +246,93 @@ def AGGRESSIVE_STRING_GUESSER(self, value: bool) -> None: REMOVE_POINTERS: bool = False IGNORE_BRANCHES: bool = False """Ignores the address of every branch, jump and jal""" - IGNORE_WORD_LIST: set[int] = set() + IGNORE_WORD_LIST: set[int] = dataclasses.field(default_factory=set) """Ignores words that starts in 0xXX""" WRITE_BINARY: bool = False """write to files splitted binaries""" - @staticmethod - def addParametersToArgParse(parser: argparse.ArgumentParser): + def addParametersToArgParse(self, parser: argparse.ArgumentParser): backendConfig = parser.add_argument_group("Disassembler backend configuration") - backendConfig.add_argument("--disasm-unknown", help=f"Force disassembling functions with unknown instructions. Defaults to {GlobalConfig.DISASSEMBLE_UNKNOWN_INSTRUCTIONS}", action=Utils.BooleanOptionalAction) - - rodataStringGuesserHelp = f""" -Sets the level for the rodata C string guesser. Smaller values mean more -conservative methods to guess a string, while higher values are more agressive. -0 (and negative) completely disables the guessing feature. Defaults to {GlobalConfig.RODATA_STRING_GUESSER_LEVEL}. - -A C string must start at a 0x4-aligned region, which is '\\0' terminated and -padded with '\\0's until a 0x4 boundary. - -level 0: Completely disable the guessing feature. -level 1: The most conservative guessing level. Imposes the following - restrictions: - - Do not try to guess if the user provided a type for the symbol. - - Do no try to guess if type information for the symbol can be - inferred by other means. - - A string symbol must be referenced only once. - - Strings must not be empty. - -level 2: A string no longer needs to be referenced only once to be considered a - possible strings. This can happen because of a deduplication - optimization. -level 3: Empty strings are allowed. -level 4: Symbols with autodetected type information but no user type - information can still be guessed as strings. + backendConfig.add_argument("--disasm-unknown", help=f"Force disassembling functions with unknown instructions. Defaults to {self.DISASSEMBLE_UNKNOWN_INSTRUCTIONS}", action=Utils.BooleanOptionalAction) + + rodataStringGuesserHelp = f"""\ +Sets the level for the rodata C string guesser. Smaller values mean more conservative methods to guess a string, while higher values are more agressive. Level 0 (and negative) completely disables the guessing feature. Defaults to {self.RODATA_STRING_GUESSER_LEVEL}. + +A C string must start at a 0x4-aligned region, which is '\\0' terminated and padded with '\\0's until a 0x4 boundary. + +- level 0: Completely disable the guessing feature. +- level 1: The most conservative guessing level. Imposes the following restrictions: + - Do not try to guess if the user provided a type for the symbol. + - Do no try to guess if type information for the symbol can be inferred by other means. + - A string symbol must be referenced only once. + - Strings must not be empty. +- level 2: A string no longer needs to be referenced only once to be considered a possible strings. This can happen because of a deduplication optimization. +- level 3: Empty strings are allowed. +- level 4: Symbols with autodetected type information but no user type information can still be guessed as strings. """ backendConfig.add_argument("--rodata-string-guesser", help=rodataStringGuesserHelp, type=int, metavar="level") - backendConfig.add_argument("--string-guesser", help=f"DEPRECATED, prefer `--rodata-string-guesser`. Toggles the string guesser feature. Defaults to {GlobalConfig.STRING_GUESSER}", action=Utils.BooleanOptionalAction) - backendConfig.add_argument("--aggressive-string-guesser", help=f"DEPRECATED, prefer `--rodata-string-guesser`. Makes the string guesser feature to be more aggressive when trying to detect strings. Requires `--string-guesser` to be enabled. Defaults to {GlobalConfig.AGGRESSIVE_STRING_GUESSER}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--string-guesser", help=f"DEPRECATED, prefer `--rodata-string-guesser`. Toggles the string guesser feature. Defaults to {self.STRING_GUESSER}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--aggressive-string-guesser", help=f"DEPRECATED, prefer `--rodata-string-guesser`. Makes the string guesser feature to be more aggressive when trying to detect strings. Requires `--string-guesser` to be enabled. Defaults to {self.AGGRESSIVE_STRING_GUESSER}", action=Utils.BooleanOptionalAction) - backendConfig.add_argument("--name-vars-by-section", help=f"Toggles the naming-after-section feature for autogenerated names. This means autogenerated symbols get a RO_ or B_ prefix if the symbol is from a rodata or bss section. Defaults to {GlobalConfig.AUTOGENERATED_NAMES_BASED_ON_SECTION_TYPE}", action=Utils.BooleanOptionalAction) - backendConfig.add_argument("--name-vars-by-type", help=f"Toggles the naming-after-type feature for autogenerated names. This means autogenerated symbols can get a STR_, FLT_ or DBL_ prefix if the symbol is a string, float or double. Defaults to {GlobalConfig.AUTOGENERATED_NAMES_BASED_ON_DATA_TYPE}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--name-vars-by-section", help=f"Toggles the naming-after-section feature for autogenerated names. This means autogenerated symbols get a RO_ or B_ prefix if the symbol is from a rodata or bss section. Defaults to {self.AUTOGENERATED_NAMES_BASED_ON_SECTION_TYPE}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--name-vars-by-type", help=f"Toggles the naming-after-type feature for autogenerated names. This means autogenerated symbols can get a STR_, FLT_ or DBL_ prefix if the symbol is a string, float or double. Defaults to {self.AUTOGENERATED_NAMES_BASED_ON_DATA_TYPE}", action=Utils.BooleanOptionalAction) backendConfig.add_argument("--custom-suffix", help="Set a custom suffix for automatically generated symbols") - backendConfig.add_argument("--compiler", help=f"Enables some tweaks for the selected compiler. Defaults to {GlobalConfig.COMPILER.name}", choices=compilerOptions) + backendConfig.add_argument("--compiler", help=f"Enables some tweaks for the selected compiler. Defaults to {self.COMPILER.name}", choices=compilerOptions) - backendConfig.add_argument("--endian", help=f"Set the endianness of input files. Defaults to {GlobalConfig.ENDIAN.name.lower()}", choices=["big", "little", "middle"], default=GlobalConfig.ENDIAN.name.lower()) + backendConfig.add_argument("--endian", help=f"Set the endianness of input files. Defaults to {self.ENDIAN.name.lower()}", choices=["big", "little", "middle"], default=self.ENDIAN.name.lower()) - backendConfig.add_argument("--abi", help=f"Changes the ABI of the disassembly, applying corresponding tweaks. Defaults to {GlobalConfig.ABI.name}", choices=["O32", "N32", "N64"], default=GlobalConfig.ABI.name) - backendConfig.add_argument("--arch-level", help=f"Changes the arch level of the disassembly, applying corresponding tweaks. Defaults to {GlobalConfig.ARCHLEVEL.name}", choices=archLevelOptions, default=GlobalConfig.ARCHLEVEL.name) + backendConfig.add_argument("--abi", help=f"Changes the ABI of the disassembly, applying corresponding tweaks. Defaults to {self.ABI.name}", choices=["O32", "N32", "N64"], default=self.ABI.name) + backendConfig.add_argument("--arch-level", help=f"Changes the arch level of the disassembly, applying corresponding tweaks. Defaults to {self.ARCHLEVEL.name}", choices=archLevelOptions, default=self.ARCHLEVEL.name) backendConfig.add_argument("--gp", help="Set the value used for loads and stores related to the $gp register. A hex value is expected") - backendConfig.add_argument("--pic", help=f"Enables PIC analysis and the usage of some rel types, like %%got. Defaults to {GlobalConfig.PIC}", action=Utils.BooleanOptionalAction) - backendConfig.add_argument("--emit-cpload", help=f"Emits a .cpload directive instead of the corresponding instructions if it were detected on PIC binaries. Defaults to {GlobalConfig.EMIT_CPLOAD}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--pic", help=f"Enables PIC analysis and the usage of some rel types, like %%got. Defaults to {self.PIC}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--emit-cpload", help=f"Emits a .cpload directive instead of the corresponding instructions if it were detected on PIC binaries. Defaults to {self.EMIT_CPLOAD}", action=Utils.BooleanOptionalAction) - backendConfig.add_argument("--emit-inline-reloc", help=f"Emit a comment indicating the relocation in each instruction/word. Defaults to {GlobalConfig.EMIT_INLINE_RELOC}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--emit-inline-reloc", help=f"Emit a comment indicating the relocation in each instruction/word. Defaults to {self.EMIT_INLINE_RELOC}", action=Utils.BooleanOptionalAction) - backendConfig.add_argument("--filter-low-addresses", help=f"Filter out low addresses (lower than 0x40000000) when searching for pointers. Defaults to {GlobalConfig.SYMBOL_FINDER_FILTER_LOW_ADDRESSES}", action=Utils.BooleanOptionalAction) - backendConfig.add_argument("--filter-high-addresses", help=f"Filter out high addresses (higher than 0xC0000000) when searching for pointers. Defaults to {GlobalConfig.SYMBOL_FINDER_FILTER_HIGH_ADDRESSES}", action=Utils.BooleanOptionalAction) - backendConfig.add_argument("--filtered-addresses-as-constants", help=f"Treat filtered out addressed as constants. Defaults to {GlobalConfig.SYMBOL_FINDER_FILTERED_ADDRESSES_AS_CONSTANTS}", action=Utils.BooleanOptionalAction) - backendConfig.add_argument("--filtered-addresses-as-hilo", help=f"Use %%hi/%%lo syntax for filtered out addresses. Defaults to {GlobalConfig.SYMBOL_FINDER_FILTERED_ADDRESSES_AS_HILO}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--filter-low-addresses", help=f"Filter out low addresses (lower than 0x40000000) when searching for pointers. Defaults to {self.SYMBOL_FINDER_FILTER_LOW_ADDRESSES}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--filter-high-addresses", help=f"Filter out high addresses (higher than 0xC0000000) when searching for pointers. Defaults to {self.SYMBOL_FINDER_FILTER_HIGH_ADDRESSES}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--filtered-addresses-as-constants", help=f"Treat filtered out addressed as constants. Defaults to {self.SYMBOL_FINDER_FILTERED_ADDRESSES_AS_CONSTANTS}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--filtered-addresses-as-hilo", help=f"Use %%hi/%%lo syntax for filtered out addresses. Defaults to {self.SYMBOL_FINDER_FILTERED_ADDRESSES_AS_HILO}", action=Utils.BooleanOptionalAction) - backendConfig.add_argument("--allow-unksegment", help=f"Allow using symbols from the unknown segment. Defaults to {GlobalConfig.ALLOW_UNKSEGMENT}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--allow-unksegment", help=f"Allow using symbols from the unknown segment. Defaults to {self.ALLOW_UNKSEGMENT}", action=Utils.BooleanOptionalAction) - backendConfig.add_argument("--allow-all-addends-on-data", help=f"Enable using addends on symbols referenced by data. Defaults to {GlobalConfig.ALLOW_ALL_ADDENDS_ON_DATA}", action=Utils.BooleanOptionalAction) - backendConfig.add_argument("--allow-all-constants-on-data", help=f"Enable referencing constants by data. Defaults to {GlobalConfig.ALLOW_ALL_CONSTANTS_ON_DATA}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--allow-all-addends-on-data", help=f"Enable using addends on symbols referenced by data. Defaults to {self.ALLOW_ALL_ADDENDS_ON_DATA}", action=Utils.BooleanOptionalAction) + backendConfig.add_argument("--allow-all-constants-on-data", help=f"Enable referencing constants by data. Defaults to {self.ALLOW_ALL_CONSTANTS_ON_DATA}", action=Utils.BooleanOptionalAction) miscConfig = parser.add_argument_group("Disassembler misc options") - miscConfig.add_argument("--asm-comments", help=f"Toggle the comments in generated assembly code. Defaults to {GlobalConfig.ASM_COMMENT}", action=Utils.BooleanOptionalAction) - miscConfig.add_argument("--comment-offset-width", help=f"Sets the zeroes width padding for the file offset comment. Defaults to {GlobalConfig.ASM_COMMENT_OFFSET_WIDTH}", action=Utils.BooleanOptionalAction) - miscConfig.add_argument("--glabel-count", help=f"Toggle glabel count comment. Defaults to {GlobalConfig.GLABEL_ASM_COUNT}", action=Utils.BooleanOptionalAction) - miscConfig.add_argument("--asm-referencee-symbols", help=f"Toggle glabel count comment. Defaults to {GlobalConfig.ASM_REFERENCEE_SYMBOLS}", action=Utils.BooleanOptionalAction) + miscConfig.add_argument("--asm-comments", help=f"Toggle the comments in generated assembly code. Defaults to {self.ASM_COMMENT}", action=Utils.BooleanOptionalAction) + miscConfig.add_argument("--comment-offset-width", help=f"Sets the zeroes width padding for the file offset comment. Defaults to {self.ASM_COMMENT_OFFSET_WIDTH}", action=Utils.BooleanOptionalAction) + miscConfig.add_argument("--glabel-count", help=f"Toggle glabel count comment. Defaults to {self.GLABEL_ASM_COUNT}", action=Utils.BooleanOptionalAction) + miscConfig.add_argument("--asm-referencee-symbols", help=f"Toggle glabel count comment. Defaults to {self.ASM_REFERENCEE_SYMBOLS}", action=Utils.BooleanOptionalAction) - miscConfig.add_argument("--asm-text-label", help=f"Changes the label used to declare functions. Defaults to {GlobalConfig.ASM_TEXT_LABEL}") - miscConfig.add_argument("--asm-jtbl-label", help=f"Changes the label used to declare jumptable labels. Defaults to {GlobalConfig.ASM_JTBL_LABEL}") - miscConfig.add_argument("--asm-data-label", help=f"Changes the label used to declare data symbols. Defaults to {GlobalConfig.ASM_DATA_LABEL}") - miscConfig.add_argument("--asm-use-symbol-label", help=f"Toggles the use of labels for symbols. Defaults to {GlobalConfig.ASM_USE_SYMBOL_LABEL}", action=Utils.BooleanOptionalAction) + miscConfig.add_argument("--asm-text-label", help=f"Changes the label used to declare functions. Defaults to {self.ASM_TEXT_LABEL}") + miscConfig.add_argument("--asm-jtbl-label", help=f"Changes the label used to declare jumptable labels. Defaults to {self.ASM_JTBL_LABEL}") + miscConfig.add_argument("--asm-data-label", help=f"Changes the label used to declare data symbols. Defaults to {self.ASM_DATA_LABEL}") + miscConfig.add_argument("--asm-use-symbol-label", help=f"Toggles the use of labels for symbols. Defaults to {self.ASM_USE_SYMBOL_LABEL}", action=Utils.BooleanOptionalAction) miscConfig.add_argument("--asm-ent-label", help=f"Tells the disassembler to start using an ent label for functions") miscConfig.add_argument("--asm-end-label", help=f"Tells the disassembler to start using an end label for functions") - miscConfig.add_argument("--asm-func-as-label", help=f"Toggle adding the function name as an additional label. Defaults to {GlobalConfig.ASM_TEXT_FUNC_AS_LABEL}", action=Utils.BooleanOptionalAction) - miscConfig.add_argument("--asm-data-as-label", help=f"Toggle adding the data symbol name as an additional label. Defaults to {GlobalConfig.ASM_DATA_SYM_AS_LABEL}", action=Utils.BooleanOptionalAction) - miscConfig.add_argument("--asm-emit-size-directive", help=f"Toggles emitting a size directive to generated symbols. Defaults to {GlobalConfig.ASM_EMIT_SIZE_DIRECTIVE}", action=Utils.BooleanOptionalAction) - miscConfig.add_argument("--asm-use-prelude", help=f"Toggle use of the default prelude for asm files. Defaults to {GlobalConfig.ASM_USE_PRELUDE}", action=Utils.BooleanOptionalAction) - miscConfig.add_argument("--asm-generated-by", help=f"Toggle comment indicating the tool and version used to generate the disassembly. Defaults to {GlobalConfig.ASM_GENERATED_BY}", action=Utils.BooleanOptionalAction) + miscConfig.add_argument("--asm-func-as-label", help=f"Toggle adding the function name as an additional label. Defaults to {self.ASM_TEXT_FUNC_AS_LABEL}", action=Utils.BooleanOptionalAction) + miscConfig.add_argument("--asm-data-as-label", help=f"Toggle adding the data symbol name as an additional label. Defaults to {self.ASM_DATA_SYM_AS_LABEL}", action=Utils.BooleanOptionalAction) + miscConfig.add_argument("--asm-emit-size-directive", help=f"Toggles emitting a size directive to generated symbols. Defaults to {self.ASM_EMIT_SIZE_DIRECTIVE}", action=Utils.BooleanOptionalAction) + miscConfig.add_argument("--asm-use-prelude", help=f"Toggle use of the default prelude for asm files. Defaults to {self.ASM_USE_PRELUDE}", action=Utils.BooleanOptionalAction) + miscConfig.add_argument("--asm-generated-by", help=f"Toggle comment indicating the tool and version used to generate the disassembly. Defaults to {self.ASM_GENERATED_BY}", action=Utils.BooleanOptionalAction) - miscConfig.add_argument("--print-new-file-boundaries", help=f"Print to stdout any new file boundary found. Defaults to {GlobalConfig.PRINT_NEW_FILE_BOUNDARIES}", action=Utils.BooleanOptionalAction) + miscConfig.add_argument("--print-new-file-boundaries", help=f"Print to stdout any new file boundary found. Defaults to {self.PRINT_NEW_FILE_BOUNDARIES}", action=Utils.BooleanOptionalAction) - miscConfig.add_argument("--use-dot-byte", help=f"Disassemble symbols marked as bytes with .byte instead of .word. Defaults to {GlobalConfig.USE_DOT_BYTE}", action=Utils.BooleanOptionalAction) - miscConfig.add_argument("--use-dot-short", help=f"Disassemble symbols marked as shorts with .short instead of .word. Defaults to {GlobalConfig.USE_DOT_SHORT}", action=Utils.BooleanOptionalAction) + miscConfig.add_argument("--use-dot-byte", help=f"Disassemble symbols marked as bytes with .byte instead of .word. Defaults to {self.USE_DOT_BYTE}", action=Utils.BooleanOptionalAction) + miscConfig.add_argument("--use-dot-short", help=f"Disassemble symbols marked as shorts with .short instead of .word. Defaults to {self.USE_DOT_SHORT}", action=Utils.BooleanOptionalAction) - miscConfig.add_argument("--panic-range-check", help=f"Produce a fatal error if a range check fails instead of just printing a warning. Defaults to {GlobalConfig.PANIC_RANGE_CHECK}", action=Utils.BooleanOptionalAction) + miscConfig.add_argument("--panic-range-check", help=f"Produce a fatal error if a range check fails instead of just printing a warning. Defaults to {self.PANIC_RANGE_CHECK}", action=Utils.BooleanOptionalAction) verbosityConfig = parser.add_argument_group("Verbosity options") @@ -356,16 +348,15 @@ def addParametersToArgParse(parser: argparse.ArgumentParser): debugging.add_argument("--debug-unpaired-luis", help="Enables some debug info printing related to the unpaired LUI instructions)", action=Utils.BooleanOptionalAction) - @staticmethod - def processEnvironmentVariables(): + def processEnvironmentVariables(self): # Allows changing the global configuration by setting a SPIMDISASM_SETTINGNAME environment variable # For example: SPIMDISASM_EMIT_CPLOAD=False - for attr in dir(GlobalConfig): + for attr in dir(self): if attr.startswith("__"): continue - currentValue = getattr(GlobalConfig, attr) + currentValue = getattr(self, attr) environmentValue = os.getenv(f"SPIMDISASM_{attr}", currentValue) if environmentValue == currentValue: @@ -395,122 +386,123 @@ def processEnvironmentVariables(): elif isinstance(currentValue, int): environmentValue = int(environmentValue, 16) - setattr(GlobalConfig, attr, environmentValue) + setattr(self, attr, environmentValue) - @classmethod - def parseArgs(cls, args: argparse.Namespace): + def parseArgs(self, args: argparse.Namespace): if args.disasm_unknown is not None: - GlobalConfig.DISASSEMBLE_UNKNOWN_INSTRUCTIONS = args.disasm_unknown + self.DISASSEMBLE_UNKNOWN_INSTRUCTIONS = args.disasm_unknown if args.string_guesser is not None: - GlobalConfig.STRING_GUESSER = args.string_guesser + self.STRING_GUESSER = args.string_guesser if args.aggressive_string_guesser is not None: - GlobalConfig.AGGRESSIVE_STRING_GUESSER = args.aggressive_string_guesser + self.AGGRESSIVE_STRING_GUESSER = args.aggressive_string_guesser if args.name_vars_by_section is not None: - GlobalConfig.AUTOGENERATED_NAMES_BASED_ON_SECTION_TYPE = args.name_vars_by_section + self.AUTOGENERATED_NAMES_BASED_ON_SECTION_TYPE = args.name_vars_by_section if args.name_vars_by_type is not None: - GlobalConfig.AUTOGENERATED_NAMES_BASED_ON_DATA_TYPE = args.name_vars_by_type + self.AUTOGENERATED_NAMES_BASED_ON_DATA_TYPE = args.name_vars_by_type if args.custom_suffix: - GlobalConfig.CUSTOM_SUFFIX = args.custom_suffix + self.CUSTOM_SUFFIX = args.custom_suffix if args.compiler is not None: - GlobalConfig.COMPILER = Compiler.fromStr(args.compiler) + self.COMPILER = Compiler.fromStr(args.compiler) if args.endian is not None: - GlobalConfig.ENDIAN = InputEndian.fromStr(args.endian) + self.ENDIAN = InputEndian.fromStr(args.endian) if args.abi is not None: - GlobalConfig.ABI = Abi.fromStr(args.abi) + self.ABI = Abi.fromStr(args.abi) arch_level = ArchLevel.fromValue(args.arch_level) if arch_level is not None: - GlobalConfig.ARCHLEVEL = arch_level + self.ARCHLEVEL = arch_level if args.gp is not None: - GlobalConfig.GP_VALUE = int(args.gp, 16) + self.GP_VALUE = int(args.gp, 16) if args.pic is not None: - GlobalConfig.PIC = args.pic + self.PIC = args.pic if args.emit_cpload is not None: - GlobalConfig.EMIT_CPLOAD = args.emit_cpload + self.EMIT_CPLOAD = args.emit_cpload if args.emit_inline_reloc is not None: - GlobalConfig.EMIT_INLINE_RELOC = args.emit_inline_reloc + self.EMIT_INLINE_RELOC = args.emit_inline_reloc if args.filter_low_addresses is not None: - GlobalConfig.SYMBOL_FINDER_FILTER_LOW_ADDRESSES = args.filter_low_addresses + self.SYMBOL_FINDER_FILTER_LOW_ADDRESSES = args.filter_low_addresses if args.filter_high_addresses is not None: - GlobalConfig.SYMBOL_FINDER_FILTER_HIGH_ADDRESSES = args.filter_high_addresses + self.SYMBOL_FINDER_FILTER_HIGH_ADDRESSES = args.filter_high_addresses if args.filtered_addresses_as_constants is not None: - GlobalConfig.SYMBOL_FINDER_FILTERED_ADDRESSES_AS_CONSTANTS = args.filtered_addresses_as_constants + self.SYMBOL_FINDER_FILTERED_ADDRESSES_AS_CONSTANTS = args.filtered_addresses_as_constants if args.filtered_addresses_as_hilo is not None: - GlobalConfig.SYMBOL_FINDER_FILTERED_ADDRESSES_AS_HILO = args.filtered_addresses_as_hilo + self.SYMBOL_FINDER_FILTERED_ADDRESSES_AS_HILO = args.filtered_addresses_as_hilo if args.allow_unksegment is not None: - GlobalConfig.ALLOW_UNKSEGMENT = args.allow_unksegment + self.ALLOW_UNKSEGMENT = args.allow_unksegment if args.allow_all_addends_on_data is not None: - GlobalConfig.ALLOW_ALL_ADDENDS_ON_DATA = args.allow_all_addends_on_data + self.ALLOW_ALL_ADDENDS_ON_DATA = args.allow_all_addends_on_data if args.allow_all_constants_on_data is not None: - GlobalConfig.ALLOW_ALL_CONSTANTS_ON_DATA = args.allow_all_constants_on_data + self.ALLOW_ALL_CONSTANTS_ON_DATA = args.allow_all_constants_on_data if args.asm_comments is not None: - GlobalConfig.ASM_COMMENT = args.asm_comments + self.ASM_COMMENT = args.asm_comments if args.comment_offset_width is not None: - GlobalConfig.ASM_COMMENT_OFFSET_WIDTH = args.comment_offset_width + self.ASM_COMMENT_OFFSET_WIDTH = args.comment_offset_width if args.glabel_count is not None: - GlobalConfig.GLABEL_ASM_COUNT = args.glabel_count + self.GLABEL_ASM_COUNT = args.glabel_count if args.asm_referencee_symbols is not None: - GlobalConfig.ASM_REFERENCEE_SYMBOLS = args.asm_referencee_symbols + self.ASM_REFERENCEE_SYMBOLS = args.asm_referencee_symbols if args.asm_text_label: - GlobalConfig.ASM_TEXT_LABEL = args.asm_text_label + self.ASM_TEXT_LABEL = args.asm_text_label if args.asm_jtbl_label: - GlobalConfig.ASM_JTBL_LABEL = args.asm_jtbl_label + self.ASM_JTBL_LABEL = args.asm_jtbl_label if args.asm_data_label: - GlobalConfig.ASM_DATA_LABEL = args.asm_data_label + self.ASM_DATA_LABEL = args.asm_data_label if args.asm_use_symbol_label is not None: - GlobalConfig.ASM_USE_SYMBOL_LABEL = args.asm_use_symbol_label + self.ASM_USE_SYMBOL_LABEL = args.asm_use_symbol_label if args.asm_ent_label: - GlobalConfig.ASM_TEXT_ENT_LABEL = args.asm_ent_label + self.ASM_TEXT_ENT_LABEL = args.asm_ent_label if args.asm_end_label: - GlobalConfig.ASM_TEXT_END_LABEL = args.asm_end_label + self.ASM_TEXT_END_LABEL = args.asm_end_label if args.asm_func_as_label is not None: - GlobalConfig.ASM_TEXT_FUNC_AS_LABEL = args.asm_func_as_label + self.ASM_TEXT_FUNC_AS_LABEL = args.asm_func_as_label if args.asm_data_as_label is not None: - GlobalConfig.ASM_DATA_SYM_AS_LABEL = args.asm_data_as_label + self.ASM_DATA_SYM_AS_LABEL = args.asm_data_as_label if args.asm_emit_size_directive is not None: - GlobalConfig.ASM_EMIT_SIZE_DIRECTIVE = args.asm_emit_size_directive + self.ASM_EMIT_SIZE_DIRECTIVE = args.asm_emit_size_directive if args.asm_use_prelude is not None: - GlobalConfig.ASM_USE_PRELUDE = args.asm_use_prelude + self.ASM_USE_PRELUDE = args.asm_use_prelude if args.asm_generated_by is not None: - GlobalConfig.ASM_GENERATED_BY = args.asm_generated_by + self.ASM_GENERATED_BY = args.asm_generated_by if args.print_new_file_boundaries is not None: - GlobalConfig.PRINT_NEW_FILE_BOUNDARIES = args.print_new_file_boundaries + self.PRINT_NEW_FILE_BOUNDARIES = args.print_new_file_boundaries if args.use_dot_byte is not None: - GlobalConfig.USE_DOT_BYTE = args.use_dot_byte + self.USE_DOT_BYTE = args.use_dot_byte if args.use_dot_short is not None: - GlobalConfig.USE_DOT_SHORT = args.use_dot_short + self.USE_DOT_SHORT = args.use_dot_short if args.panic_range_check is not None: - GlobalConfig.PANIC_RANGE_CHECK = args.panic_range_check + self.PANIC_RANGE_CHECK = args.panic_range_check if args.verbose is not None: - GlobalConfig.VERBOSE = args.verbose + self.VERBOSE = args.verbose if args.quiet is not None: - GlobalConfig.QUIET = args.quiet + self.QUIET = args.quiet if args.debug_func_analysis is not None: - GlobalConfig.PRINT_FUNCTION_ANALYSIS_DEBUG_INFO = args.debug_func_analysis + self.PRINT_FUNCTION_ANALYSIS_DEBUG_INFO = args.debug_func_analysis if args.debug_symbol_finder is not None: - GlobalConfig.PRINT_SYMBOL_FINDER_DEBUG_INFO = args.debug_symbol_finder + self.PRINT_SYMBOL_FINDER_DEBUG_INFO = args.debug_symbol_finder if args.debug_unpaired_luis is not None: - GlobalConfig.PRINT_UNPAIRED_LUIS_DEBUG_INFO = args.debug_unpaired_luis + self.PRINT_UNPAIRED_LUIS_DEBUG_INFO = args.debug_unpaired_luis + +GlobalConfig = GlobalConfigType() GlobalConfig.processEnvironmentVariables() diff --git a/spimdisasm/disasmdis/DisasmdisInternals.py b/spimdisasm/disasmdis/DisasmdisInternals.py index 231129b3..4793f7cc 100644 --- a/spimdisasm/disasmdis/DisasmdisInternals.py +++ b/spimdisasm/disasmdis/DisasmdisInternals.py @@ -33,7 +33,7 @@ def addOptionsToParser(parser: argparse.ArgumentParser) -> argparse.ArgumentPars return parser def getArgsParser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description=getToolDescription(), prog=PROGNAME) + parser = argparse.ArgumentParser(description=getToolDescription(), prog=PROGNAME, formatter_class=common.Utils.PreserveWhiteSpaceWrapRawTextHelpFormatter) return addOptionsToParser(parser) @@ -104,7 +104,7 @@ def processArguments(args: argparse.Namespace) -> int: return 0 def addSubparser(subparser: argparse._SubParsersAction[argparse.ArgumentParser]): - parser = subparser.add_parser("disasmdis", help=getToolDescription()) + parser = subparser.add_parser("disasmdis", help=getToolDescription(), formatter_class=common.Utils.PreserveWhiteSpaceWrapRawTextHelpFormatter) addOptionsToParser(parser) diff --git a/spimdisasm/elfObjDisasm/ElfObjDisasmInternals.py b/spimdisasm/elfObjDisasm/ElfObjDisasmInternals.py index 9a39713a..28d5c89c 100644 --- a/spimdisasm/elfObjDisasm/ElfObjDisasmInternals.py +++ b/spimdisasm/elfObjDisasm/ElfObjDisasmInternals.py @@ -60,7 +60,7 @@ def addOptionsToParser(parser: argparse.ArgumentParser) -> argparse.ArgumentPars return parser def getArgsParser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description=getToolDescription(), prog=PROGNAME) + parser = argparse.ArgumentParser(description=getToolDescription(), prog=PROGNAME, formatter_class=common.Utils.PreserveWhiteSpaceWrapRawTextHelpFormatter) return addOptionsToParser(parser) def applyArgs(args: argparse.Namespace) -> None: @@ -451,7 +451,7 @@ def processArguments(args: argparse.Namespace) -> int: return 0 def addSubparser(subparser: argparse._SubParsersAction[argparse.ArgumentParser]): - parser = subparser.add_parser(PROGNAME, help=getToolDescription()) + parser = subparser.add_parser(PROGNAME, help=getToolDescription(), formatter_class=common.Utils.PreserveWhiteSpaceWrapRawTextHelpFormatter) addOptionsToParser(parser) diff --git a/spimdisasm/rspDisasm/RspDisasmInternals.py b/spimdisasm/rspDisasm/RspDisasmInternals.py index 5534244c..d4469157 100644 --- a/spimdisasm/rspDisasm/RspDisasmInternals.py +++ b/spimdisasm/rspDisasm/RspDisasmInternals.py @@ -39,7 +39,7 @@ def addOptionsToParser(parser: argparse.ArgumentParser) -> argparse.ArgumentPars return parser def getArgsParser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description=getToolDescription(), prog=PROGNAME) + parser = argparse.ArgumentParser(description=getToolDescription(), prog=PROGNAME, formatter_class=common.Utils.PreserveWhiteSpaceWrapRawTextHelpFormatter) return addOptionsToParser(parser) @@ -102,7 +102,7 @@ def processArguments(args: argparse.Namespace) -> int: return 0 def addSubparser(subparser: argparse._SubParsersAction[argparse.ArgumentParser]): - parser = subparser.add_parser("rspDisasm", help=getToolDescription()) + parser = subparser.add_parser("rspDisasm", help=getToolDescription(), formatter_class=common.Utils.PreserveWhiteSpaceWrapRawTextHelpFormatter) addOptionsToParser(parser) diff --git a/spimdisasm/singleFileDisasm/SingleFileDisasmInternals.py b/spimdisasm/singleFileDisasm/SingleFileDisasmInternals.py index 7b9b8afa..b24114c4 100644 --- a/spimdisasm/singleFileDisasm/SingleFileDisasmInternals.py +++ b/spimdisasm/singleFileDisasm/SingleFileDisasmInternals.py @@ -61,7 +61,7 @@ def addOptionsToParser(parser: argparse.ArgumentParser) -> argparse.ArgumentPars return parser def getArgsParser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description=getToolDescription(), prog=PROGNAME) + parser = argparse.ArgumentParser(description=getToolDescription(), prog=PROGNAME, formatter_class=common.Utils.PreserveWhiteSpaceWrapRawTextHelpFormatter) return addOptionsToParser(parser) def applyArgs(args: argparse.Namespace) -> None: @@ -208,7 +208,7 @@ def processArguments(args: argparse.Namespace) -> int: return 0 def addSubparser(subparser: argparse._SubParsersAction[argparse.ArgumentParser]): - parser = subparser.add_parser("singleFileDisasm", help=getToolDescription()) + parser = subparser.add_parser("singleFileDisasm", help=getToolDescription(), formatter_class=common.Utils.PreserveWhiteSpaceWrapRawTextHelpFormatter) addOptionsToParser(parser) From 44de8affddf649e4d77afcd79ae09dcfca77574e Mon Sep 17 00:00:00 2001 From: Angie Date: Sun, 25 Jun 2023 20:52:34 -0400 Subject: [PATCH 03/14] Implement string guesser for data section --- spimdisasm/common/ContextSymbols.py | 11 ++++- spimdisasm/common/GlobalConfig.py | 9 ++++ spimdisasm/common/Utils.py | 44 ++++++++++++++++++ spimdisasm/mips/MipsFileBase.py | 2 + spimdisasm/mips/sections/MipsSectionBase.py | 41 +++++++++++++++++ spimdisasm/mips/sections/MipsSectionData.py | 3 ++ spimdisasm/mips/sections/MipsSectionRodata.py | 46 ------------------- 7 files changed, 108 insertions(+), 48 deletions(-) diff --git a/spimdisasm/common/ContextSymbols.py b/spimdisasm/common/ContextSymbols.py index dd57561b..d5dbb374 100644 --- a/spimdisasm/common/ContextSymbols.py +++ b/spimdisasm/common/ContextSymbols.py @@ -268,18 +268,25 @@ def isShort(self) -> bool: def isString(self) -> bool: currentType = self.getTypeSpecial() + if self.sectionType == FileSectionType.Rodata: + stringGuesserLevel = GlobalConfig.RODATA_STRING_GUESSER_LEVEL + else: + stringGuesserLevel = GlobalConfig.DATA_STRING_GUESSER_LEVEL + if currentType in {"char", "char*", "asciz"}: return True if not self.isMaybeString: return False - if GlobalConfig.RODATA_STRING_GUESSER_LEVEL < 1: + + if stringGuesserLevel < 1: return False + if self.hasNoType(): # no type information, let's try to guess return True if self.hasOnlyAutodetectedType(): - if GlobalConfig.RODATA_STRING_GUESSER_LEVEL >= 4: + if stringGuesserLevel >= 4: # There's autodetected type information, but we are going to ignore it and try to guess return True return False diff --git a/spimdisasm/common/GlobalConfig.py b/spimdisasm/common/GlobalConfig.py index cb0bbff5..8e5148f4 100644 --- a/spimdisasm/common/GlobalConfig.py +++ b/spimdisasm/common/GlobalConfig.py @@ -114,6 +114,9 @@ class GlobalConfigType: RODATA_STRING_GUESSER_LEVEL: int = 1 """Rodata string guesser""" + DATA_STRING_GUESSER_LEVEL: int = 2 + """Data string guesser""" + #! @deprecated @property def STRING_GUESSER(self) -> bool: @@ -273,6 +276,7 @@ def addParametersToArgParse(self, parser: argparse.ArgumentParser): - level 4: Symbols with autodetected type information but no user type information can still be guessed as strings. """ backendConfig.add_argument("--rodata-string-guesser", help=rodataStringGuesserHelp, type=int, metavar="level") + backendConfig.add_argument("--data-string-guesser", help=f"Sets the level for the data C string guesser. See the explanation of `--rodata-string-guesser`. Defaults to {self.DATA_STRING_GUESSER_LEVEL}.", type=int, metavar="level") backendConfig.add_argument("--string-guesser", help=f"DEPRECATED, prefer `--rodata-string-guesser`. Toggles the string guesser feature. Defaults to {self.STRING_GUESSER}", action=Utils.BooleanOptionalAction) backendConfig.add_argument("--aggressive-string-guesser", help=f"DEPRECATED, prefer `--rodata-string-guesser`. Makes the string guesser feature to be more aggressive when trying to detect strings. Requires `--string-guesser` to be enabled. Defaults to {self.AGGRESSIVE_STRING_GUESSER}", action=Utils.BooleanOptionalAction) @@ -392,6 +396,11 @@ def parseArgs(self, args: argparse.Namespace): if args.disasm_unknown is not None: self.DISASSEMBLE_UNKNOWN_INSTRUCTIONS = args.disasm_unknown + if args.rodata_string_guesser is not None: + self.RODATA_STRING_GUESSER_LEVEL = args.rodata_string_guesser + if args.data_string_guesser is not None: + self.DATA_STRING_GUESSER_LEVEL = args.data_string_guesser + if args.string_guesser is not None: self.STRING_GUESSER = args.string_guesser if args.aggressive_string_guesser is not None: diff --git a/spimdisasm/common/Utils.py b/spimdisasm/common/Utils.py index a06b05d2..38ba07f2 100644 --- a/spimdisasm/common/Utils.py +++ b/spimdisasm/common/Utils.py @@ -231,6 +231,50 @@ def getMaybeBooleyFromMaybeStr(booley: str|None) -> bool|None: escapeCharactersSpecialCases = {0x1B, 0x8C, 0x8D} +def decodeWordsToStrings(buf: bytes, offset: int, stringEncoding: str, terminator: int=0) -> tuple[list[str], int]: + result = [] + + dst = bytearray() + i = 0 + while offset + i < len(buf) and buf[offset + i] != terminator: + char = buf[offset + i] + if char in bannedEscapeCharacters: + return [], 0 + elif char in escapeCharactersSpecialCases: + if dst: + try: + decoded = rabbitizer.Utils.escapeString(dst.decode(stringEncoding)) + except UnicodeDecodeError: + return [], 0 + result.append(decoded) + dst.clear() + result.append(f"\\x{char:02X}") + else: + dst.append(char) + i += 1 + + if offset + i > len(buf): + # Reached the end of the buffer without finding an 0 + return [], 0 + + if dst: + try: + decoded = rabbitizer.Utils.escapeString(dst.decode(stringEncoding)) + except UnicodeDecodeError: + return [], 0 + result.append(decoded) + + # To be a valid aligned string, the next word-aligned bytes needs to be zero + checkStartOffset = offset + i + checkEndOffset = min((checkStartOffset & ~3) + 4, len(buf)) + while checkStartOffset < checkEndOffset: + if buf[checkStartOffset] != terminator: + return [], 0 + checkStartOffset += 1 + + return result, i + +#! @deprecated def decodeString(buf: bytes, offset: int, stringEncoding: str) -> tuple[list[str], int]: result = [] diff --git a/spimdisasm/mips/MipsFileBase.py b/spimdisasm/mips/MipsFileBase.py index a30e4cf8..7e0ebfc8 100644 --- a/spimdisasm/mips/MipsFileBase.py +++ b/spimdisasm/mips/MipsFileBase.py @@ -31,6 +31,8 @@ def __init__(self, context: common.Context, vromStart: int, vromEnd: int, vram: self.stringEncoding: str = "EUC-JP" + self.bytes: bytes = common.Utils.wordsToBytes(self.words) + def setCommentOffset(self, commentOffset: int): self.commentOffset = commentOffset diff --git a/spimdisasm/mips/sections/MipsSectionBase.py b/spimdisasm/mips/sections/MipsSectionBase.py index 4558a7da..9a1e9022 100644 --- a/spimdisasm/mips/sections/MipsSectionBase.py +++ b/spimdisasm/mips/sections/MipsSectionBase.py @@ -38,6 +38,47 @@ def processStaticRelocs(self) -> None: contextSym = self.addSymbol(relocVram, sectionType=sectionType, isAutogenerated=True) contextSym._isStatic = True + def _stringGuesser(self, contextSym: common.ContextSymbol, localOffset: int) -> bool: + if contextSym.isMaybeString or contextSym.isString(): + return True + + if self.sectionType == common.FileSectionType.Rodata: + stringGuesserLevel = common.GlobalConfig.RODATA_STRING_GUESSER_LEVEL + else: + stringGuesserLevel = common.GlobalConfig.DATA_STRING_GUESSER_LEVEL + + if stringGuesserLevel < 1: + return False + + if contextSym.referenceCounter > 1: + if stringGuesserLevel < 2: + return False + + # This would mean the string is an empty string, which is not very likely + if self.words[localOffset//4] == 0: + if stringGuesserLevel < 3: + return False + + if contextSym.hasOnlyAutodetectedType(): + if stringGuesserLevel < 4: + return False + + currentVram = self.getVramOffset(localOffset) + currentVrom = self.getVromOffset(localOffset) + _, rawStringSize = common.Utils.decodeWordsToStrings(self.bytes, localOffset, self.stringEncoding) + if rawStringSize == 0: + # String can't be decoded + return False + + # Check if there is already another symbol after the current one and before the end of the string, + # in which case we say this symbol should not be a string + otherSym = self.getSymbol(currentVram + rawStringSize, vromAddress=currentVrom + rawStringSize, checkUpperLimit=False, checkGlobalSegment=False) + if otherSym != contextSym: + return False + + return True + + def blankOutDifferences(self, other: FileBase) -> bool: if not common.GlobalConfig.REMOVE_POINTERS: return False diff --git a/spimdisasm/mips/sections/MipsSectionData.py b/spimdisasm/mips/sections/MipsSectionData.py index 633dc949..9ede538e 100644 --- a/spimdisasm/mips/sections/MipsSectionData.py +++ b/spimdisasm/mips/sections/MipsSectionData.py @@ -36,8 +36,10 @@ def analyze(self): contextSym = self.getSymbol(currentVram, vromAddress=currentVrom, tryPlusOffset=False) if contextSym is not None: symbolList.append((localOffset, contextSym)) + contextSym.isMaybeString = self._stringGuesser(contextSym, localOffset) elif self.popPointerInDataReference(currentVram) is not None: contextSym = self.addSymbol(currentVram, sectionType=self.sectionType, isAutogenerated=True) + contextSym.isMaybeString = self._stringGuesser(contextSym, localOffset) symbolList.append((localOffset, contextSym)) if self.checkWordIsASymbolReference(w): @@ -56,6 +58,7 @@ def analyze(self): contextSym = self.getSymbol(currentVram, vromAddress=currentVrom, tryPlusOffset=True, checkUpperLimit=True) if contextSym is None and self.popPointerInDataReference(currentVram) is not None: contextSym = self.addSymbol(currentVram, sectionType=self.sectionType, isAutogenerated=True) + contextSym.isMaybeString = self._stringGuesser(contextSym, localOffset) symbolList.append((localOffset, contextSym)) localOffset += 4 diff --git a/spimdisasm/mips/sections/MipsSectionRodata.py b/spimdisasm/mips/sections/MipsSectionRodata.py index 01caf5d4..eef6a073 100644 --- a/spimdisasm/mips/sections/MipsSectionRodata.py +++ b/spimdisasm/mips/sections/MipsSectionRodata.py @@ -22,52 +22,6 @@ def __init__(self, context: common.Context, vromStart: int, vromEnd: int, vram: words = common.Utils.bytesToWords(array_of_bytes, vromStart, vromEnd) super().__init__(context, vromStart, vromEnd, vram, filename, words, common.FileSectionType.Rodata, segmentVromStart, overlayCategory) - self.bytes: bytes = common.Utils.wordsToBytes(self.words) - - - def _stringGuesser(self, contextSym: common.ContextSymbol, localOffset: int) -> bool: - if contextSym.isMaybeString or contextSym.isString(): - return True - - if common.GlobalConfig.RODATA_STRING_GUESSER_LEVEL < 1: - return False - - if contextSym.referenceCounter > 1: - if common.GlobalConfig.RODATA_STRING_GUESSER_LEVEL < 2: - return False - - # This would mean the string is an empty string, which is not very likely - if self.words[localOffset//4] == 0: - if common.GlobalConfig.RODATA_STRING_GUESSER_LEVEL < 3: - return False - - if contextSym.hasOnlyAutodetectedType(): - if common.GlobalConfig.RODATA_STRING_GUESSER_LEVEL < 4: - return False - - try: - currentVram = self.getVramOffset(localOffset) - currentVrom = self.getVromOffset(localOffset) - _, rawStringSize = common.Utils.decodeString(self.bytes, localOffset, self.stringEncoding) - - # Check if there is already another symbol after the current one and before the end of the string, - # in which case we say this symbol should not be a string - otherSym = self.getSymbol(currentVram + rawStringSize, vromAddress=currentVrom + rawStringSize, checkUpperLimit=False, checkGlobalSegment=False) - if otherSym != contextSym: - return False - - # To be a valid aligned string, the next word-aligned bytes needs to be zero - checkStartOffset = localOffset + rawStringSize - checkEndOffset = min((checkStartOffset & ~3) + 4, len(self.bytes)) - while checkStartOffset < checkEndOffset: - if self.bytes[checkStartOffset] != 0: - return False - checkStartOffset += 1 - except (UnicodeDecodeError, RuntimeError): - # String can't be decoded - return False - return True - def analyze(self): self.checkAndCreateFirstSymbol() From d4a6243d0df62eda0cda56fc3a7e8e59935be4bb Mon Sep 17 00:00:00 2001 From: angie Date: Sun, 25 Jun 2023 23:02:17 -0400 Subject: [PATCH 04/14] Fix string decoding borken by the small refactor --- spimdisasm/common/ContextSymbols.py | 2 +- spimdisasm/common/GlobalConfig.py | 5 ++--- spimdisasm/common/Utils.py | 18 +++++++++--------- spimdisasm/mips/sections/MipsSectionBase.py | 4 ++-- spimdisasm/mips/symbols/MipsSymbolBase.py | 17 +++++------------ 5 files changed, 19 insertions(+), 27 deletions(-) diff --git a/spimdisasm/common/ContextSymbols.py b/spimdisasm/common/ContextSymbols.py index d5dbb374..bf74323b 100644 --- a/spimdisasm/common/ContextSymbols.py +++ b/spimdisasm/common/ContextSymbols.py @@ -208,7 +208,7 @@ def hasNoType(self) -> bool: def hasOnlyAutodetectedType(self) -> bool: if self.userDeclaredType is not None and self.userDeclaredType != "": return False - return (self.autodetectedType is not None and self.autodetectedType != "") and self.accessType is not None + return (self.autodetectedType is not None and self.autodetectedType != "") or self.accessType is not None def isTrustableFunction(self, rsp: bool=False) -> bool: diff --git a/spimdisasm/common/GlobalConfig.py b/spimdisasm/common/GlobalConfig.py index 8e5148f4..d0fc3471 100644 --- a/spimdisasm/common/GlobalConfig.py +++ b/spimdisasm/common/GlobalConfig.py @@ -138,10 +138,9 @@ def AGGRESSIVE_STRING_GUESSER(self) -> bool: @AGGRESSIVE_STRING_GUESSER.setter def AGGRESSIVE_STRING_GUESSER(self, value: bool) -> None: if value: - if self.RODATA_STRING_GUESSER_LEVEL <= 2: - self.RODATA_STRING_GUESSER_LEVEL = 9 + self.RODATA_STRING_GUESSER_LEVEL = 9 else: - if self.RODATA_STRING_GUESSER_LEVEL > 1: + if self.RODATA_STRING_GUESSER_LEVEL >= 0: self.RODATA_STRING_GUESSER_LEVEL = 1 AUTOGENERATED_NAMES_BASED_ON_SECTION_TYPE: bool = True diff --git a/spimdisasm/common/Utils.py b/spimdisasm/common/Utils.py index 38ba07f2..7e2fd418 100644 --- a/spimdisasm/common/Utils.py +++ b/spimdisasm/common/Utils.py @@ -231,7 +231,7 @@ def getMaybeBooleyFromMaybeStr(booley: str|None) -> bool|None: escapeCharactersSpecialCases = {0x1B, 0x8C, 0x8D} -def decodeWordsToStrings(buf: bytes, offset: int, stringEncoding: str, terminator: int=0) -> tuple[list[str], int]: +def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminator: int=0) -> tuple[list[str], int]: result = [] dst = bytearray() @@ -239,13 +239,13 @@ def decodeWordsToStrings(buf: bytes, offset: int, stringEncoding: str, terminato while offset + i < len(buf) and buf[offset + i] != terminator: char = buf[offset + i] if char in bannedEscapeCharacters: - return [], 0 + return [], -1 elif char in escapeCharactersSpecialCases: if dst: try: decoded = rabbitizer.Utils.escapeString(dst.decode(stringEncoding)) except UnicodeDecodeError: - return [], 0 + return [], -1 result.append(decoded) dst.clear() result.append(f"\\x{char:02X}") @@ -253,15 +253,15 @@ def decodeWordsToStrings(buf: bytes, offset: int, stringEncoding: str, terminato dst.append(char) i += 1 - if offset + i > len(buf): + if offset + i >= len(buf): # Reached the end of the buffer without finding an 0 - return [], 0 + return [], -1 if dst: try: decoded = rabbitizer.Utils.escapeString(dst.decode(stringEncoding)) except UnicodeDecodeError: - return [], 0 + return [], -1 result.append(decoded) # To be a valid aligned string, the next word-aligned bytes needs to be zero @@ -269,7 +269,7 @@ def decodeWordsToStrings(buf: bytes, offset: int, stringEncoding: str, terminato checkEndOffset = min((checkStartOffset & ~3) + 4, len(buf)) while checkStartOffset < checkEndOffset: if buf[checkStartOffset] != terminator: - return [], 0 + return [], -1 checkStartOffset += 1 return result, i @@ -347,7 +347,7 @@ def format_usage(self): # https://stackoverflow.com/a/35925919/6292472 class PreserveWhiteSpaceWrapRawTextHelpFormatter(argparse.RawDescriptionHelpFormatter): def __add_whitespace(self, idx, iWSpace, text): - if idx is 0: + if idx == 0: return text return (" " * iWSpace) + text @@ -357,7 +357,7 @@ def _split_lines(self, text, width): textRows = text.splitlines() for idx, line in enumerate(textRows): search = re.search('\s*[0-9\-]{0,}\.?\s*', line) - if line.strip() is "": + if line.strip() == "": textRows[idx] = " " elif search: lWSpace = search.end() diff --git a/spimdisasm/mips/sections/MipsSectionBase.py b/spimdisasm/mips/sections/MipsSectionBase.py index 9a1e9022..ea121be8 100644 --- a/spimdisasm/mips/sections/MipsSectionBase.py +++ b/spimdisasm/mips/sections/MipsSectionBase.py @@ -65,8 +65,8 @@ def _stringGuesser(self, contextSym: common.ContextSymbol, localOffset: int) -> currentVram = self.getVramOffset(localOffset) currentVrom = self.getVromOffset(localOffset) - _, rawStringSize = common.Utils.decodeWordsToStrings(self.bytes, localOffset, self.stringEncoding) - if rawStringSize == 0: + _, rawStringSize = common.Utils.decodeBytesToStrings(self.bytes, localOffset, self.stringEncoding) + if rawStringSize < 0: # String can't be decoded return False diff --git a/spimdisasm/mips/symbols/MipsSymbolBase.py b/spimdisasm/mips/symbols/MipsSymbolBase.py index 7ae80a1b..956bd62c 100644 --- a/spimdisasm/mips/symbols/MipsSymbolBase.py +++ b/spimdisasm/mips/symbols/MipsSymbolBase.py @@ -399,15 +399,9 @@ def getNthWordAsString(self, i: int) -> tuple[str, int]: localOffset = 4*i buffer = common.Utils.wordsToBytes(self.words) - decodedStrings, rawStringSize = common.Utils.decodeString(buffer, localOffset, self.stringEncoding) - - # To be a valid aligned string, the next word-aligned bytes needs to be zero - checkStartOffset = localOffset + rawStringSize - checkEndOffset = min((checkStartOffset & ~3) + 4, len(buffer)) - while checkStartOffset < checkEndOffset: - if buffer[checkStartOffset] != 0: - raise RuntimeError() - checkStartOffset += 1 + decodedStrings, rawStringSize = common.Utils.decodeBytesToStrings(buffer, localOffset, self.stringEncoding) + if rawStringSize < 0: + return "", -1 skip = rawStringSize // 4 comment = self.generateAsmLineComment(localOffset) @@ -487,9 +481,8 @@ def disassembleAsData(self, useGlobalLabel: bool=True) -> str: elif self.isDouble(i): data, skip = self.getNthWordAsDouble(i) elif self.isString(): - try: - data, skip = self.getNthWordAsString(i) - except (UnicodeDecodeError, RuntimeError): + data, skip = self.getNthWordAsString(i) + if skip < 0: # Not a string self._failedStringDecoding = True data, skip = self.getNthWord(i, canReferenceSymbolsWithAddends, canReferenceConstants) From 331f17ccdf22fbeb7c4d91d463b5320878c8eeb9 Mon Sep 17 00:00:00 2001 From: Angie Date: Mon, 26 Jun 2023 13:47:58 -0400 Subject: [PATCH 05/14] Experimental pascal string guesser --- spimdisasm/common/ContextSymbols.py | 32 ++++++++++- spimdisasm/common/GlobalConfig.py | 12 ++++ spimdisasm/common/Utils.py | 1 + spimdisasm/mips/sections/MipsSectionBase.py | 40 ++++++++++++++ spimdisasm/mips/sections/MipsSectionData.py | 3 + spimdisasm/mips/sections/MipsSectionRodata.py | 3 + spimdisasm/mips/symbols/MipsSymbolBase.py | 55 +++++++++++++++++-- spimdisasm/mips/symbols/MipsSymbolRodata.py | 9 +++ 8 files changed, 149 insertions(+), 6 deletions(-) diff --git a/spimdisasm/common/ContextSymbols.py b/spimdisasm/common/ContextSymbols.py index bf74323b..0a763b88 100644 --- a/spimdisasm/common/ContextSymbols.py +++ b/spimdisasm/common/ContextSymbols.py @@ -122,6 +122,8 @@ class ContextSymbol: isMaybeString: bool = False + isMaybePascalString: bool = False + referenceCounter: int = 0 "How much this symbol is referenced by something else" @@ -291,6 +293,32 @@ def isString(self) -> bool: return True return False + def isPascalString(self) -> bool: + currentType = self.getTypeSpecial() + + if self.sectionType == FileSectionType.Rodata: + stringGuesserLevel = GlobalConfig.PASCAL_RODATA_STRING_GUESSER_LEVEL + else: + stringGuesserLevel = GlobalConfig.PASCAL_DATA_STRING_GUESSER_LEVEL + + if currentType in {"String", "Char", "ascii"}: + return True + if not self.isMaybePascalString: + return False + + if stringGuesserLevel < 1: + return False + + if self.hasNoType(): + # no type information, let's try to guess + return True + + if self.hasOnlyAutodetectedType(): + if stringGuesserLevel >= 4: + # There's autodetected type information, but we are going to ignore it and try to guess + return True + return False + def isFloat(self) -> bool: currentType = self.getTypeSpecial() @@ -535,7 +563,7 @@ def getCsvHeader() -> str: output += "autodetectedSize," output += "getSize,getVrom,sectionType," - output += "isDefined,isUserDeclared,isAutogenerated,isMaybeString," + output += "isDefined,isUserDeclared,isAutogenerated,isMaybeString,isMaybePascalString," output += "referenceCounter,overlayCategory,unknownSegment," output += "isGot,isGotGlobal,isGotLocal,gotIndex," output += "firstLoAccess,isAutogeneratedPad,isElfNotype" @@ -560,7 +588,7 @@ def toCsv(self) -> str: else: output += f"0x{self.autodetectedSize:X}," output += f"0x{self.getSize():X},0x{self.getVrom():X},{self.sectionType.toStr()}," - output += f"{self.isDefined},{self.isUserDeclared},{self.isAutogenerated},{self.isMaybeString}," + output += f"{self.isDefined},{self.isUserDeclared},{self.isAutogenerated},{self.isMaybeString},{self.isMaybePascalString}," output += f"{self.referenceCounter},{self.overlayCategory},{self.unknownSegment}," output += f"{self.isGot},{self.isGotGlobal},{self.isGotLocal},{self.gotIndex}," output += f"{self.firstLoAccess},{self.isAutogeneratedPad()},{self.isElfNotype}" diff --git a/spimdisasm/common/GlobalConfig.py b/spimdisasm/common/GlobalConfig.py index d0fc3471..14ffd579 100644 --- a/spimdisasm/common/GlobalConfig.py +++ b/spimdisasm/common/GlobalConfig.py @@ -117,6 +117,10 @@ class GlobalConfigType: DATA_STRING_GUESSER_LEVEL: int = 2 """Data string guesser""" + PASCAL_RODATA_STRING_GUESSER_LEVEL: int = 0 + + PASCAL_DATA_STRING_GUESSER_LEVEL: int = 0 + #! @deprecated @property def STRING_GUESSER(self) -> bool: @@ -277,6 +281,9 @@ def addParametersToArgParse(self, parser: argparse.ArgumentParser): backendConfig.add_argument("--rodata-string-guesser", help=rodataStringGuesserHelp, type=int, metavar="level") backendConfig.add_argument("--data-string-guesser", help=f"Sets the level for the data C string guesser. See the explanation of `--rodata-string-guesser`. Defaults to {self.DATA_STRING_GUESSER_LEVEL}.", type=int, metavar="level") + backendConfig.add_argument("--pascal-rodata-string-guesser", help=f"EXPERIMENTAL, this feature may change or be removed in the future. Sets the level for the data Pascal string guesser. See the explanation of `--rodata-string-guesser`. Defaults to {self.PASCAL_RODATA_STRING_GUESSER_LEVEL}.", type=int, metavar="level") + backendConfig.add_argument("--pascal-data-string-guesser", help=f"EXPERIMENTAL, this feature may change or be removed in the future. Sets the level for the data Pascal string guesser. See the explanation of `--rodata-string-guesser`. Defaults to {self.PASCAL_DATA_STRING_GUESSER_LEVEL}.", type=int, metavar="level") + backendConfig.add_argument("--string-guesser", help=f"DEPRECATED, prefer `--rodata-string-guesser`. Toggles the string guesser feature. Defaults to {self.STRING_GUESSER}", action=Utils.BooleanOptionalAction) backendConfig.add_argument("--aggressive-string-guesser", help=f"DEPRECATED, prefer `--rodata-string-guesser`. Makes the string guesser feature to be more aggressive when trying to detect strings. Requires `--string-guesser` to be enabled. Defaults to {self.AGGRESSIVE_STRING_GUESSER}", action=Utils.BooleanOptionalAction) @@ -400,6 +407,11 @@ def parseArgs(self, args: argparse.Namespace): if args.data_string_guesser is not None: self.DATA_STRING_GUESSER_LEVEL = args.data_string_guesser + if args.pascal_rodata_string_guesser is not None: + self.PASCAL_RODATA_STRING_GUESSER_LEVEL = args.pascal_rodata_string_guesser + if args.pascal_data_string_guesser is not None: + self.PASCAL_DATA_STRING_GUESSER_LEVEL = args.pascal_data_string_guesser + if args.string_guesser is not None: self.STRING_GUESSER = args.string_guesser if args.aggressive_string_guesser is not None: diff --git a/spimdisasm/common/Utils.py b/spimdisasm/common/Utils.py index 7e2fd418..7a4f5af3 100644 --- a/spimdisasm/common/Utils.py +++ b/spimdisasm/common/Utils.py @@ -196,6 +196,7 @@ def getMaybeBooleyFromMaybeStr(booley: str|None) -> bool|None: # Escape characters that are unlikely to be used bannedEscapeCharacters = { + 0x00, # '\0' 0x01, 0x02, 0x03, diff --git a/spimdisasm/mips/sections/MipsSectionBase.py b/spimdisasm/mips/sections/MipsSectionBase.py index ea121be8..9b1bd4cc 100644 --- a/spimdisasm/mips/sections/MipsSectionBase.py +++ b/spimdisasm/mips/sections/MipsSectionBase.py @@ -78,6 +78,46 @@ def _stringGuesser(self, contextSym: common.ContextSymbol, localOffset: int) -> return True + def _pascalStringGuesser(self, contextSym: common.ContextSymbol, localOffset: int) -> bool: + if contextSym.isMaybePascalString or contextSym.isPascalString(): + return True + + if self.sectionType == common.FileSectionType.Rodata: + stringGuesserLevel = common.GlobalConfig.PASCAL_RODATA_STRING_GUESSER_LEVEL + else: + stringGuesserLevel = common.GlobalConfig.PASCAL_DATA_STRING_GUESSER_LEVEL + + if stringGuesserLevel < 1: + return False + + if contextSym.referenceCounter > 1: + if stringGuesserLevel < 2: + return False + + # This would mean the string is an empty string, which is not very likely + if self.words[localOffset//4] == 0: + if stringGuesserLevel < 3: + return False + + if contextSym.hasOnlyAutodetectedType(): + if stringGuesserLevel < 4: + return False + + currentVram = self.getVramOffset(localOffset) + currentVrom = self.getVromOffset(localOffset) + _, rawStringSize = common.Utils.decodeBytesToStrings(self.bytes, localOffset, self.stringEncoding, terminator=0x20) + if rawStringSize < 0: + # String can't be decoded + return False + + # Check if there is already another symbol after the current one and before the end of the string, + # in which case we say this symbol should not be a string + otherSym = self.getSymbol(currentVram + rawStringSize, vromAddress=currentVrom + rawStringSize, checkUpperLimit=False, checkGlobalSegment=False) + if otherSym != contextSym: + return False + + return True + def blankOutDifferences(self, other: FileBase) -> bool: if not common.GlobalConfig.REMOVE_POINTERS: diff --git a/spimdisasm/mips/sections/MipsSectionData.py b/spimdisasm/mips/sections/MipsSectionData.py index 9ede538e..ae81dee6 100644 --- a/spimdisasm/mips/sections/MipsSectionData.py +++ b/spimdisasm/mips/sections/MipsSectionData.py @@ -37,9 +37,11 @@ def analyze(self): if contextSym is not None: symbolList.append((localOffset, contextSym)) contextSym.isMaybeString = self._stringGuesser(contextSym, localOffset) + contextSym.isMaybePascalString = self._pascalStringGuesser(contextSym, localOffset) elif self.popPointerInDataReference(currentVram) is not None: contextSym = self.addSymbol(currentVram, sectionType=self.sectionType, isAutogenerated=True) contextSym.isMaybeString = self._stringGuesser(contextSym, localOffset) + contextSym.isMaybePascalString = self._pascalStringGuesser(contextSym, localOffset) symbolList.append((localOffset, contextSym)) if self.checkWordIsASymbolReference(w): @@ -59,6 +61,7 @@ def analyze(self): if contextSym is None and self.popPointerInDataReference(currentVram) is not None: contextSym = self.addSymbol(currentVram, sectionType=self.sectionType, isAutogenerated=True) contextSym.isMaybeString = self._stringGuesser(contextSym, localOffset) + contextSym.isMaybePascalString = self._pascalStringGuesser(contextSym, localOffset) symbolList.append((localOffset, contextSym)) localOffset += 4 diff --git a/spimdisasm/mips/sections/MipsSectionRodata.py b/spimdisasm/mips/sections/MipsSectionRodata.py index eef6a073..3ab64c4d 100644 --- a/spimdisasm/mips/sections/MipsSectionRodata.py +++ b/spimdisasm/mips/sections/MipsSectionRodata.py @@ -80,14 +80,17 @@ def analyze(self): if self.popPointerInDataReference(currentVram) is not None: contextSym = self.addSymbol(currentVram, sectionType=self.sectionType, isAutogenerated=True) contextSym.isMaybeString = self._stringGuesser(contextSym, localOffset) + contextSym.isMaybePascalString = self._pascalStringGuesser(contextSym, localOffset) lastVramSymbol = contextSym elif contextSym is not None: contextSym.isMaybeString = self._stringGuesser(contextSym, localOffset) + contextSym.isMaybePascalString = self._pascalStringGuesser(contextSym, localOffset) elif lastVramSymbol is not None and lastVramSymbol.isJumpTable() and w != 0: contextSym = self.addSymbol(currentVram, sectionType=self.sectionType, isAutogenerated=True) contextSym.isMaybeString = self._stringGuesser(contextSym, localOffset) + contextSym.isMaybePascalString = self._pascalStringGuesser(contextSym, localOffset) lastVramSymbol = contextSym self.checkWordIsASymbolReference(w) diff --git a/spimdisasm/mips/symbols/MipsSymbolBase.py b/spimdisasm/mips/symbols/MipsSymbolBase.py index 956bd62c..95f2a7a8 100644 --- a/spimdisasm/mips/symbols/MipsSymbolBase.py +++ b/spimdisasm/mips/symbols/MipsSymbolBase.py @@ -25,6 +25,7 @@ def __init__(self, context: common.Context, vromStart: int, vromEnd: int, inFile self.stringEncoding: str = "EUC-JP" self._failedStringDecoding: bool = False + self._failedPascalStringDecoding: bool = False self.relocs: dict[int, common.RelocationInfo] = dict() "key: word offset" @@ -123,13 +124,22 @@ def relocToInlineStr(self, relocInfo: common.RelocationInfo | None) -> str: return relocInfo.getInlineStr() def isByte(self, index: int) -> bool: - return self.contextSym.isByte() and not self.isString() + if self.isString() or self.isPascalString(): + return False + return self.contextSym.isByte() def isShort(self, index: int) -> bool: return self.contextSym.isShort() def isString(self) -> bool: - return self.contextSym.isString() and not self._failedStringDecoding + if self._failedStringDecoding: + return False + return self.contextSym.isString() + + def isPascalString(self) -> bool: + if self._failedStringDecoding: + return False + return self.contextSym.isPascalString() def isFloat(self, index: int) -> bool: if self.contextSym.isFloat(): @@ -184,6 +194,8 @@ def renameBasedOnType(self): self.contextSym.name = f"DBL_{self.vram:08X}" elif self.isString(): self.contextSym.name = f"STR_{self.vram:08X}" + elif self.isPascalString(): + self.contextSym.name = f"PSTR_{self.vram:08X}" def analyze(self): @@ -420,6 +432,31 @@ def getNthWordAsString(self, i: int) -> tuple[str, int]: return result, skip + def getNthWordAsPascalString(self, i: int) -> tuple[str, int]: + localOffset = 4*i + + buffer = common.Utils.wordsToBytes(self.words) + decodedStrings, rawStringSize = common.Utils.decodeBytesToStrings(buffer, localOffset, self.stringEncoding, terminator=0x20) + if rawStringSize < 0: + return "", -1 + + skip = rawStringSize // 4 + comment = self.generateAsmLineComment(localOffset) + result = f"{comment} " + + commentPaddingNum = 22 + if not common.GlobalConfig.ASM_COMMENT: + commentPaddingNum = 1 + + if rawStringSize == 0: + decodedStrings.append("") + for decodedValue in decodedStrings[:-1]: + result += f'.ascii "{decodedValue}"' + result += common.GlobalConfig.LINE_ENDS + (commentPaddingNum * " ") + result += f'.ascii "{decodedStrings[-1]}"{common.GlobalConfig.LINE_ENDS}' + + return result, skip + def getNthWord(self, i: int, canReferenceSymbolsWithAddends: bool=False, canReferenceConstants: bool=False) -> tuple[str, int]: return self.getNthWordAsWords(i, canReferenceSymbolsWithAddends=canReferenceSymbolsWithAddends, canReferenceConstants=canReferenceConstants) @@ -442,14 +479,18 @@ def getPrevAlignDirective(self, i: int=0) -> str: if i == 0 and common.GlobalConfig.COMPILER not in {common.Compiler.IDO, common.Compiler.PSYQ}: if self.vram % 0x8 == 0: return f".align 3{common.GlobalConfig.LINE_ENDS}" - elif self.isString(): + elif self.isString() or self.isPascalString(): if self.vram % 0x4 == 0: return f".align 2{common.GlobalConfig.LINE_ENDS}" return "" def getPostAlignDirective(self, i: int=0) -> str: - if self.isString(): + if self.parent is not None and self.parent.vram % 0x8 != 0: + # Can't emit alignment directives if the parent file isn't properly aligned + return "" + + if self.isString() or self.isPascalString(): return f".align 2{common.GlobalConfig.LINE_ENDS}" return "" @@ -486,6 +527,12 @@ def disassembleAsData(self, useGlobalLabel: bool=True) -> str: # Not a string self._failedStringDecoding = True data, skip = self.getNthWord(i, canReferenceSymbolsWithAddends, canReferenceConstants) + elif self.isPascalString(): + data, skip = self.getNthWordAsPascalString(i) + if skip < 0: + # Not a string + self._failedPascalStringDecoding = True + data, skip = self.getNthWord(i, canReferenceSymbolsWithAddends, canReferenceConstants) else: data, skip = self.getNthWord(i, canReferenceSymbolsWithAddends, canReferenceConstants) diff --git a/spimdisasm/mips/symbols/MipsSymbolRodata.py b/spimdisasm/mips/symbols/MipsSymbolRodata.py index 618b4239..97854f65 100644 --- a/spimdisasm/mips/symbols/MipsSymbolRodata.py +++ b/spimdisasm/mips/symbols/MipsSymbolRodata.py @@ -42,6 +42,8 @@ def isMaybeConstVariable(self) -> bool: return False elif self.isString(): return False + elif self.isPascalString(): + return False return True def isRdata(self) -> bool: @@ -98,6 +100,13 @@ def countExtraPadding(self) -> int: if (self.words[i-1] & 0x000000FF) != 0: break count += 1 + if self.isPascalString(): + for i in range(len(self.words)-1, 0, -1): + if self.words[i] != 0x20: + break + if (self.words[i-1] & 0x000000FF) != 0x20: + break + count += 1 elif self.isDouble(0): for i in range(len(self.words)-1, 0, -2): if self.words[i] != 0 or self.words[i-1] != 0: From 4a81fd0f78979483b64001ef6a6e77c14eefab44 Mon Sep 17 00:00:00 2001 From: Angie Date: Mon, 26 Jun 2023 18:29:13 -0400 Subject: [PATCH 06/14] Preserve spaces in pascal strings --- spimdisasm/common/Utils.py | 66 +++++++++++++++++++++ spimdisasm/mips/sections/MipsSectionBase.py | 2 +- spimdisasm/mips/symbols/MipsSymbolBase.py | 6 +- 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/spimdisasm/common/Utils.py b/spimdisasm/common/Utils.py index 7a4f5af3..3a3fd10e 100644 --- a/spimdisasm/common/Utils.py +++ b/spimdisasm/common/Utils.py @@ -275,6 +275,72 @@ def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminato return result, i +def decodeBytesToPascalStrings(buf: bytes, offset: int, stringEncoding: str, terminator: int=0x20) -> tuple[list[str], int]: + result = [] + + dst = bytearray() + i = 0 + while offset + i < len(buf): + if buf[offset + i] != terminator: + if offset + i + 1 < len(buf) and buf[offset + i + 1] != terminator: + # Require at least 2 terminators next to each other to actually consider the string has ended + break + char = buf[offset + i] + if char in bannedEscapeCharacters: + return [], -1 + elif char in escapeCharactersSpecialCases: + if dst: + try: + decoded = rabbitizer.Utils.escapeString(dst.decode(stringEncoding)) + except UnicodeDecodeError: + return [], -1 + result.append(decoded) + dst.clear() + result.append(f"\\x{char:02X}") + else: + dst.append(char) + i += 1 + + if offset + i >= len(buf): + # Reached the end of the buffer without finding an 0 + return [], -1 + + # To be a valid aligned string, the next word-aligned bytes needs to be the terminator value + checkStartOffset = offset + i + checkEndOffset = min((checkStartOffset & ~3) + 4, len(buf)) + while checkStartOffset < checkEndOffset: + if buf[checkStartOffset] != terminator: + return [], -1 + dst.append(buf[checkStartOffset]) + checkStartOffset += 1 + i += 1 + + while offset + i < len(buf): + # Check in chunks of 4 bytes for the terminator value + j = 0 + onlyTerminator = True + while j < 4 and offset + i + j < len(buf): + char = buf[offset + i + j] + if char != terminator: + onlyTerminator = False + break + j += 1 + + if not onlyTerminator: + break + dst.extend([terminator] * 4) + i += 4 + + if dst: + try: + decoded = rabbitizer.Utils.escapeString(dst.decode(stringEncoding)) + except UnicodeDecodeError: + return [], -1 + result.append(decoded) + + return result, i + + #! @deprecated def decodeString(buf: bytes, offset: int, stringEncoding: str) -> tuple[list[str], int]: result = [] diff --git a/spimdisasm/mips/sections/MipsSectionBase.py b/spimdisasm/mips/sections/MipsSectionBase.py index 9b1bd4cc..4d5eea56 100644 --- a/spimdisasm/mips/sections/MipsSectionBase.py +++ b/spimdisasm/mips/sections/MipsSectionBase.py @@ -105,7 +105,7 @@ def _pascalStringGuesser(self, contextSym: common.ContextSymbol, localOffset: in currentVram = self.getVramOffset(localOffset) currentVrom = self.getVromOffset(localOffset) - _, rawStringSize = common.Utils.decodeBytesToStrings(self.bytes, localOffset, self.stringEncoding, terminator=0x20) + _, rawStringSize = common.Utils.decodeBytesToPascalStrings(self.bytes, localOffset, self.stringEncoding, terminator=0x20) if rawStringSize < 0: # String can't be decoded return False diff --git a/spimdisasm/mips/symbols/MipsSymbolBase.py b/spimdisasm/mips/symbols/MipsSymbolBase.py index 95f2a7a8..dd9a7fa6 100644 --- a/spimdisasm/mips/symbols/MipsSymbolBase.py +++ b/spimdisasm/mips/symbols/MipsSymbolBase.py @@ -134,11 +134,15 @@ def isShort(self, index: int) -> bool: def isString(self) -> bool: if self._failedStringDecoding: return False + if self.contextSym.isPascalString(): + return False return self.contextSym.isString() def isPascalString(self) -> bool: if self._failedStringDecoding: return False + if self.contextSym.isString(): + return False return self.contextSym.isPascalString() def isFloat(self, index: int) -> bool: @@ -436,7 +440,7 @@ def getNthWordAsPascalString(self, i: int) -> tuple[str, int]: localOffset = 4*i buffer = common.Utils.wordsToBytes(self.words) - decodedStrings, rawStringSize = common.Utils.decodeBytesToStrings(buffer, localOffset, self.stringEncoding, terminator=0x20) + decodedStrings, rawStringSize = common.Utils.decodeBytesToPascalStrings(buffer, localOffset, self.stringEncoding, terminator=0x20) if rawStringSize < 0: return "", -1 From 143a52b77589b65b65cf5970287ee5d2e3fda240 Mon Sep 17 00:00:00 2001 From: angie Date: Wed, 28 Jun 2023 20:21:20 -0400 Subject: [PATCH 07/14] Decode data strings as ASCII by default --- spimdisasm/mips/MipsFileBase.py | 2 +- spimdisasm/mips/sections/MipsSectionRodata.py | 2 ++ spimdisasm/mips/symbols/MipsSymbolBase.py | 2 +- spimdisasm/mips/symbols/MipsSymbolRodata.py | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/spimdisasm/mips/MipsFileBase.py b/spimdisasm/mips/MipsFileBase.py index 7e0ebfc8..2ea83c86 100644 --- a/spimdisasm/mips/MipsFileBase.py +++ b/spimdisasm/mips/MipsFileBase.py @@ -29,7 +29,7 @@ def __init__(self, context: common.Context, vromStart: int, vromEnd: int, vram: self.symbolsVRams: set[int] = set() "addresses of symbols in this section" - self.stringEncoding: str = "EUC-JP" + self.stringEncoding: str = "ASCII" self.bytes: bytes = common.Utils.wordsToBytes(self.words) diff --git a/spimdisasm/mips/sections/MipsSectionRodata.py b/spimdisasm/mips/sections/MipsSectionRodata.py index 3ab64c4d..89f4887f 100644 --- a/spimdisasm/mips/sections/MipsSectionRodata.py +++ b/spimdisasm/mips/sections/MipsSectionRodata.py @@ -22,6 +22,8 @@ def __init__(self, context: common.Context, vromStart: int, vromEnd: int, vram: words = common.Utils.bytesToWords(array_of_bytes, vromStart, vromEnd) super().__init__(context, vromStart, vromEnd, vram, filename, words, common.FileSectionType.Rodata, segmentVromStart, overlayCategory) + self.stringEncoding = "EUC-JP" + def analyze(self): self.checkAndCreateFirstSymbol() diff --git a/spimdisasm/mips/symbols/MipsSymbolBase.py b/spimdisasm/mips/symbols/MipsSymbolBase.py index dd9a7fa6..7a29c6ae 100644 --- a/spimdisasm/mips/symbols/MipsSymbolBase.py +++ b/spimdisasm/mips/symbols/MipsSymbolBase.py @@ -23,7 +23,7 @@ def __init__(self, context: common.Context, vromStart: int, vromEnd: int, inFile self.contextSym.isDefined = True self.contextSym.sectionType = self.sectionType - self.stringEncoding: str = "EUC-JP" + self.stringEncoding: str = "ASCII" self._failedStringDecoding: bool = False self._failedPascalStringDecoding: bool = False diff --git a/spimdisasm/mips/symbols/MipsSymbolRodata.py b/spimdisasm/mips/symbols/MipsSymbolRodata.py index 97854f65..1374286e 100644 --- a/spimdisasm/mips/symbols/MipsSymbolRodata.py +++ b/spimdisasm/mips/symbols/MipsSymbolRodata.py @@ -17,6 +17,7 @@ class SymbolRodata(SymbolBase): def __init__(self, context: common.Context, vromStart: int, vromEnd: int, inFileOffset: int, vram: int, words: list[int], segmentVromStart: int, overlayCategory: str|None): super().__init__(context, vromStart, vromEnd, inFileOffset, vram, words, common.FileSectionType.Rodata, segmentVromStart, overlayCategory) + self.stringEncoding = "EUC-JP" def isJumpTable(self) -> bool: # jumptables must have at least 3 labels From faf9838aac5683c7163eecd3f0b99e9852f74651 Mon Sep 17 00:00:00 2001 From: angie Date: Mon, 3 Jul 2023 11:25:53 -0400 Subject: [PATCH 08/14] Actually check for terminators in decodeBytesToPascalStrings --- spimdisasm/common/Utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spimdisasm/common/Utils.py b/spimdisasm/common/Utils.py index 3a3fd10e..639ed8b3 100644 --- a/spimdisasm/common/Utils.py +++ b/spimdisasm/common/Utils.py @@ -281,8 +281,8 @@ def decodeBytesToPascalStrings(buf: bytes, offset: int, stringEncoding: str, ter dst = bytearray() i = 0 while offset + i < len(buf): - if buf[offset + i] != terminator: - if offset + i + 1 < len(buf) and buf[offset + i + 1] != terminator: + if buf[offset + i] == terminator: + if offset + i + 1 < len(buf) and buf[offset + i + 1] == terminator: # Require at least 2 terminators next to each other to actually consider the string has ended break char = buf[offset + i] From cb191c8022f3c8ca48eec70a12f58d992f130cd4 Mon Sep 17 00:00:00 2001 From: angie Date: Mon, 3 Jul 2023 11:39:56 -0400 Subject: [PATCH 09/14] off by one error on pascal string detection --- spimdisasm/mips/sections/MipsSectionBase.py | 2 +- spimdisasm/mips/symbols/MipsSymbolBase.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/spimdisasm/mips/sections/MipsSectionBase.py b/spimdisasm/mips/sections/MipsSectionBase.py index 4d5eea56..d7940182 100644 --- a/spimdisasm/mips/sections/MipsSectionBase.py +++ b/spimdisasm/mips/sections/MipsSectionBase.py @@ -112,7 +112,7 @@ def _pascalStringGuesser(self, contextSym: common.ContextSymbol, localOffset: in # Check if there is already another symbol after the current one and before the end of the string, # in which case we say this symbol should not be a string - otherSym = self.getSymbol(currentVram + rawStringSize, vromAddress=currentVrom + rawStringSize, checkUpperLimit=False, checkGlobalSegment=False) + otherSym = self.getSymbol(currentVram + rawStringSize - 1, vromAddress=currentVrom + rawStringSize, checkUpperLimit=False, checkGlobalSegment=False) if otherSym != contextSym: return False diff --git a/spimdisasm/mips/symbols/MipsSymbolBase.py b/spimdisasm/mips/symbols/MipsSymbolBase.py index 7a29c6ae..a00449ec 100644 --- a/spimdisasm/mips/symbols/MipsSymbolBase.py +++ b/spimdisasm/mips/symbols/MipsSymbolBase.py @@ -444,7 +444,7 @@ def getNthWordAsPascalString(self, i: int) -> tuple[str, int]: if rawStringSize < 0: return "", -1 - skip = rawStringSize // 4 + skip = (rawStringSize - 1) // 4 comment = self.generateAsmLineComment(localOffset) result = f"{comment} " From 010465dae04a1b68b1efa9c0df65d21ea26439cc Mon Sep 17 00:00:00 2001 From: angie Date: Mon, 3 Jul 2023 12:36:07 -0400 Subject: [PATCH 10/14] Add `as` to every init.py file --- spimdisasm/__init__.py | 16 ++++----- spimdisasm/common/__init__.py | 34 +++++++++++++------- spimdisasm/disasmdis/__init__.py | 9 +++++- spimdisasm/elf32/__init__.py | 34 +++++++++++++------- spimdisasm/elfObjDisasm/__init__.py | 16 ++++++++- spimdisasm/frontendCommon/__init__.py | 2 +- spimdisasm/mips/__init__.py | 15 +++++---- spimdisasm/mips/sections/__init__.py | 15 +++++---- spimdisasm/mips/symbols/__init__.py | 12 +++---- spimdisasm/mips/symbols/analysis/__init__.py | 2 +- spimdisasm/rspDisasm/__init__.py | 9 +++++- spimdisasm/singleFileDisasm/__init__.py | 11 ++++++- 12 files changed, 118 insertions(+), 57 deletions(-) diff --git a/spimdisasm/__init__.py b/spimdisasm/__init__.py index d31298d6..83fe1bed 100644 --- a/spimdisasm/__init__.py +++ b/spimdisasm/__init__.py @@ -9,13 +9,13 @@ __version__ = ".".join(map(str, __version_info__)) + ".dev0" __author__ = "Decompollaborate" -from . import common -from . import elf32 -from . import mips +from . import common as common +from . import elf32 as elf32 +from . import mips as mips # Front-end scripts -from . import frontendCommon -from . import disasmdis -from . import rspDisasm -from . import elfObjDisasm -from . import singleFileDisasm +from . import frontendCommon as frontendCommon +from . import disasmdis as disasmdis +from . import rspDisasm as rspDisasm +from . import elfObjDisasm as elfObjDisasm +from . import singleFileDisasm as singleFileDisasm diff --git a/spimdisasm/common/__init__.py b/spimdisasm/common/__init__.py index af5e8c76..882d89e4 100644 --- a/spimdisasm/common/__init__.py +++ b/spimdisasm/common/__init__.py @@ -3,14 +3,26 @@ from . import Utils -from .SortedDict import SortedDict -from .GlobalConfig import GlobalConfig, InputEndian, Compiler, Abi, ArchLevel, InputFileType -from .FileSectionType import FileSectionType, FileSections_ListBasic, FileSections_ListAll -from .ContextSymbols import SymbolSpecialType, ContextSymbol, gKnownTypes -from .SymbolsSegment import SymbolsSegment -from .Context import Context -from .FileSplitFormat import FileSplitFormat, FileSplitEntry -from .ElementBase import ElementBase -from .GpAccesses import GlobalOffsetTable -from .OrderedEnum import OrderedEnum -from .Relocation import RelocType, RelocationInfo, RelocationStaticReference +from .SortedDict import SortedDict as SortedDict +from .GlobalConfig import GlobalConfig as GlobalConfig +from .GlobalConfig import InputEndian as InputEndian +from .GlobalConfig import Compiler as Compiler +from .GlobalConfig import Abi as Abi +from .GlobalConfig import ArchLevel as ArchLevel +from .GlobalConfig import InputFileType as InputFileType +from .FileSectionType import FileSectionType as FileSectionType +from .FileSectionType import FileSections_ListBasic as FileSections_ListBasic +from .FileSectionType import FileSections_ListAll as FileSections_ListAll +from .ContextSymbols import SymbolSpecialType as SymbolSpecialType +from .ContextSymbols import ContextSymbol as ContextSymbol +from .ContextSymbols import gKnownTypes as gKnownTypes +from .SymbolsSegment import SymbolsSegment as SymbolsSegment +from .Context import Context as Context +from .FileSplitFormat import FileSplitFormat as FileSplitFormat +from .FileSplitFormat import FileSplitEntry as FileSplitEntry +from .ElementBase import ElementBase as ElementBase +from .GpAccesses import GlobalOffsetTable as GlobalOffsetTable +from .OrderedEnum import OrderedEnum as OrderedEnum +from .Relocation import RelocType as RelocType +from .Relocation import RelocationInfo as RelocationInfo +from .Relocation import RelocationStaticReference as RelocationStaticReference diff --git a/spimdisasm/disasmdis/__init__.py b/spimdisasm/disasmdis/__init__.py index 69808910..1c35bcfa 100644 --- a/spimdisasm/disasmdis/__init__.py +++ b/spimdisasm/disasmdis/__init__.py @@ -6,4 +6,11 @@ from __future__ import annotations -from .DisasmdisInternals import getToolDescription, addOptionsToParser, getArgsParser, applyArgs, wordGeneratorFromStrList, processArguments, addSubparser, disasmdisMain +from .DisasmdisInternals import getToolDescription as getToolDescription +from .DisasmdisInternals import addOptionsToParser as addOptionsToParser +from .DisasmdisInternals import getArgsParser as getArgsParser +from .DisasmdisInternals import applyArgs as applyArgs +from .DisasmdisInternals import wordGeneratorFromStrList as wordGeneratorFromStrList +from .DisasmdisInternals import processArguments as processArguments +from .DisasmdisInternals import addSubparser as addSubparser +from .DisasmdisInternals import disasmdisMain as disasmdisMain diff --git a/spimdisasm/elf32/__init__.py b/spimdisasm/elf32/__init__.py index be9e357f..c3f826f8 100644 --- a/spimdisasm/elf32/__init__.py +++ b/spimdisasm/elf32/__init__.py @@ -5,18 +5,28 @@ from __future__ import annotations -from .Elf32Constants import Elf32HeaderIdentifier, Elf32ObjectFileType -from .Elf32Constants import Elf32HeaderFlag, Elf32SectionHeaderType, Elf32SectionHeaderFlag -from .Elf32Constants import Elf32SymbolTableType, Elf32SymbolTableBinding, Elf32SymbolVisibility, Elf32SectionHeaderNumber -from .Elf32Constants import Elf32DynamicTable -from .Elf32Dyns import Elf32Dyns, Elf32DynEntry -from .Elf32GlobalOffsetTable import Elf32GlobalOffsetTable -from .Elf32Header import Elf32Header -from .Elf32RegInfo import Elf32RegInfo -from .Elf32SectionHeaders import Elf32SectionHeaders, Elf32SectionHeaderEntry -from .Elf32StringTable import Elf32StringTable -from .Elf32Syms import Elf32Syms, Elf32SymEntry -from .Elf32Rels import Elf32Rels, Elf32RelEntry +from .Elf32Constants import Elf32HeaderIdentifier as Elf32HeaderIdentifier +from .Elf32Constants import Elf32ObjectFileType as Elf32ObjectFileType +from .Elf32Constants import Elf32HeaderFlag as Elf32HeaderFlag +from .Elf32Constants import Elf32SectionHeaderType as Elf32SectionHeaderType +from .Elf32Constants import Elf32SectionHeaderFlag as Elf32SectionHeaderFlag +from .Elf32Constants import Elf32SymbolTableType as Elf32SymbolTableType +from .Elf32Constants import Elf32SymbolTableBinding as Elf32SymbolTableBinding +from .Elf32Constants import Elf32SymbolVisibility as Elf32SymbolVisibility +from .Elf32Constants import Elf32SectionHeaderNumber as Elf32SectionHeaderNumber +from .Elf32Constants import Elf32DynamicTable as Elf32DynamicTable +from .Elf32Dyns import Elf32Dyns as Elf32Dyns +from .Elf32Dyns import Elf32DynEntry as Elf32DynEntry +from .Elf32GlobalOffsetTable import Elf32GlobalOffsetTable as Elf32GlobalOffsetTable +from .Elf32Header import Elf32Header as Elf32Header +from .Elf32RegInfo import Elf32RegInfo as Elf32RegInfo +from .Elf32SectionHeaders import Elf32SectionHeaders as Elf32SectionHeaders +from .Elf32SectionHeaders import Elf32SectionHeaderEntry as Elf32SectionHeaderEntry +from .Elf32StringTable import Elf32StringTable as Elf32StringTable +from .Elf32Syms import Elf32Syms as Elf32Syms +from .Elf32Syms import Elf32SymEntry as Elf32SymEntry +from .Elf32Rels import Elf32Rels as Elf32Rels +from .Elf32Rels import Elf32RelEntry as Elf32RelEntry from .Elf32File import Elf32File diff --git a/spimdisasm/elfObjDisasm/__init__.py b/spimdisasm/elfObjDisasm/__init__.py index 707bfbdd..711b5b8f 100644 --- a/spimdisasm/elfObjDisasm/__init__.py +++ b/spimdisasm/elfObjDisasm/__init__.py @@ -6,4 +6,18 @@ from __future__ import annotations -from .ElfObjDisasmInternals import getToolDescription, addOptionsToParser, getArgsParser, applyArgs, applyGlobalConfigurations, getOutputPath, getProcessedSections, changeGlobalSegmentRanges, insertSymtabIntoContext, insertDynsymIntoContext, injectAllElfSymbols, processGlobalOffsetTable, processArguments, addSubparser, elfObjDisasmMain +from .ElfObjDisasmInternals import getToolDescription as getToolDescription +from .ElfObjDisasmInternals import addOptionsToParser as addOptionsToParser +from .ElfObjDisasmInternals import getArgsParser as getArgsParser +from .ElfObjDisasmInternals import applyArgs as applyArgs +from .ElfObjDisasmInternals import applyGlobalConfigurations as applyGlobalConfigurations +from .ElfObjDisasmInternals import getOutputPath as getOutputPath +from .ElfObjDisasmInternals import getProcessedSections as getProcessedSections +from .ElfObjDisasmInternals import changeGlobalSegmentRanges as changeGlobalSegmentRanges +from .ElfObjDisasmInternals import insertSymtabIntoContext as insertSymtabIntoContext +from .ElfObjDisasmInternals import insertDynsymIntoContext as insertDynsymIntoContext +from .ElfObjDisasmInternals import injectAllElfSymbols as injectAllElfSymbols +from .ElfObjDisasmInternals import processGlobalOffsetTable as processGlobalOffsetTable +from .ElfObjDisasmInternals import processArguments as processArguments +from .ElfObjDisasmInternals import addSubparser as addSubparser +from .ElfObjDisasmInternals import elfObjDisasmMain as elfObjDisasmMain diff --git a/spimdisasm/frontendCommon/__init__.py b/spimdisasm/frontendCommon/__init__.py index f6428986..d5f1d73a 100644 --- a/spimdisasm/frontendCommon/__init__.py +++ b/spimdisasm/frontendCommon/__init__.py @@ -6,4 +6,4 @@ from __future__ import annotations -from . import FrontendUtilities +from . import FrontendUtilities as FrontendUtilities diff --git a/spimdisasm/mips/__init__.py b/spimdisasm/mips/__init__.py index 7c375ceb..02e44b61 100644 --- a/spimdisasm/mips/__init__.py +++ b/spimdisasm/mips/__init__.py @@ -5,13 +5,14 @@ from __future__ import annotations -from . import sections -from . import symbols +from . import sections as sections +from . import symbols as symbols -from .FuncRodataEntry import FunctionRodataEntry +from .FuncRodataEntry import FunctionRodataEntry as FunctionRodataEntry -from . import FilesHandlers +from . import FilesHandlers as FilesHandlers -from .InstructionConfig import InstructionConfig -from .MipsFileBase import FileBase, createEmptyFile -from .MipsFileSplits import FileSplits +from .InstructionConfig import InstructionConfig as InstructionConfig +from .MipsFileBase import FileBase as FileBase +from .MipsFileBase import createEmptyFile as createEmptyFile +from .MipsFileSplits import FileSplits as FileSplits diff --git a/spimdisasm/mips/sections/__init__.py b/spimdisasm/mips/sections/__init__.py index 539606b9..ad1d5c2d 100644 --- a/spimdisasm/mips/sections/__init__.py +++ b/spimdisasm/mips/sections/__init__.py @@ -5,10 +5,11 @@ from __future__ import annotations -from .MipsSectionBase import SectionBase - -from .MipsSectionText import SectionText -from .MipsSectionData import SectionData -from .MipsSectionRodata import SectionRodata -from .MipsSectionBss import SectionBss -from .MipsSectionRelocZ64 import SectionRelocZ64, RelocEntry +from .MipsSectionBase import SectionBase as SectionBase + +from .MipsSectionText import SectionText as SectionText +from .MipsSectionData import SectionData as SectionData +from .MipsSectionRodata import SectionRodata as SectionRodata +from .MipsSectionBss import SectionBss as SectionBss +from .MipsSectionRelocZ64 import SectionRelocZ64 as SectionRelocZ64 +from .MipsSectionRelocZ64 import RelocEntry as RelocEntry diff --git a/spimdisasm/mips/symbols/__init__.py b/spimdisasm/mips/symbols/__init__.py index f24dc538..17cb2faa 100644 --- a/spimdisasm/mips/symbols/__init__.py +++ b/spimdisasm/mips/symbols/__init__.py @@ -7,11 +7,11 @@ from . import analysis -from .MipsSymbolBase import SymbolBase +from .MipsSymbolBase import SymbolBase as SymbolBase -from .MipsSymbolText import SymbolText -from .MipsSymbolData import SymbolData -from .MipsSymbolRodata import SymbolRodata -from .MipsSymbolBss import SymbolBss +from .MipsSymbolText import SymbolText as SymbolText +from .MipsSymbolData import SymbolData as SymbolData +from .MipsSymbolRodata import SymbolRodata as SymbolRodata +from .MipsSymbolBss import SymbolBss as SymbolBss -from .MipsSymbolFunction import SymbolFunction +from .MipsSymbolFunction import SymbolFunction as SymbolFunction diff --git a/spimdisasm/mips/symbols/analysis/__init__.py b/spimdisasm/mips/symbols/analysis/__init__.py index 9189a51b..cd89e207 100644 --- a/spimdisasm/mips/symbols/analysis/__init__.py +++ b/spimdisasm/mips/symbols/analysis/__init__.py @@ -5,4 +5,4 @@ from __future__ import annotations -from .InstrAnalyzer import InstrAnalyzer +from .InstrAnalyzer import InstrAnalyzer as InstrAnalyzer diff --git a/spimdisasm/rspDisasm/__init__.py b/spimdisasm/rspDisasm/__init__.py index 3d6a57d4..0b245db0 100644 --- a/spimdisasm/rspDisasm/__init__.py +++ b/spimdisasm/rspDisasm/__init__.py @@ -6,4 +6,11 @@ from __future__ import annotations -from .RspDisasmInternals import getToolDescription, addOptionsToParser, getArgsParser, applyArgs, initializeContext, processArguments, addSubparser, rspDisasmMain +from .RspDisasmInternals import getToolDescription +from .RspDisasmInternals import addOptionsToParser +from .RspDisasmInternals import getArgsParser +from .RspDisasmInternals import applyArgs +from .RspDisasmInternals import initializeContext +from .RspDisasmInternals import processArguments +from .RspDisasmInternals import addSubparser +from .RspDisasmInternals import rspDisasmMain diff --git a/spimdisasm/singleFileDisasm/__init__.py b/spimdisasm/singleFileDisasm/__init__.py index 8d084d82..cc7ad868 100644 --- a/spimdisasm/singleFileDisasm/__init__.py +++ b/spimdisasm/singleFileDisasm/__init__.py @@ -6,4 +6,13 @@ from __future__ import annotations -from .SingleFileDisasmInternals import getToolDescription, addOptionsToParser, getArgsParser, applyArgs, applyGlobalConfigurations, getSplits, changeGlobalSegmentRanges, processArguments, addSubparser, disassemblerMain +from .SingleFileDisasmInternals import getToolDescription as getToolDescription +from .SingleFileDisasmInternals import addOptionsToParser as addOptionsToParser +from .SingleFileDisasmInternals import getArgsParser as getArgsParser +from .SingleFileDisasmInternals import applyArgs as applyArgs +from .SingleFileDisasmInternals import applyGlobalConfigurations as applyGlobalConfigurations +from .SingleFileDisasmInternals import getSplits as getSplits +from .SingleFileDisasmInternals import changeGlobalSegmentRanges as changeGlobalSegmentRanges +from .SingleFileDisasmInternals import processArguments as processArguments +from .SingleFileDisasmInternals import addSubparser as addSubparser +from .SingleFileDisasmInternals import disassemblerMain as disassemblerMain From 6327ce506dd46f2580c0f3dd12030907d27caec6 Mon Sep 17 00:00:00 2001 From: angie Date: Mon, 3 Jul 2023 13:47:35 -0400 Subject: [PATCH 11/14] Add pascal strings types to list of known types --- spimdisasm/common/ContextSymbols.py | 4 ++-- spimdisasm/common/GlobalConfig.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spimdisasm/common/ContextSymbols.py b/spimdisasm/common/ContextSymbols.py index 0a763b88..f9377dd5 100644 --- a/spimdisasm/common/ContextSymbols.py +++ b/spimdisasm/common/ContextSymbols.py @@ -89,7 +89,7 @@ def getAllTypes(self) -> set[str]: gKnownTypes: set[str] = { - "asciz", "char", "char*" + "asciz", "char", "char*", "String", "Char" } for kind in gAccessKinds.values(): @@ -301,7 +301,7 @@ def isPascalString(self) -> bool: else: stringGuesserLevel = GlobalConfig.PASCAL_DATA_STRING_GUESSER_LEVEL - if currentType in {"String", "Char", "ascii"}: + if currentType in {"String", "Char"}: return True if not self.isMaybePascalString: return False diff --git a/spimdisasm/common/GlobalConfig.py b/spimdisasm/common/GlobalConfig.py index 14ffd579..88881dce 100644 --- a/spimdisasm/common/GlobalConfig.py +++ b/spimdisasm/common/GlobalConfig.py @@ -274,7 +274,7 @@ def addParametersToArgParse(self, parser: argparse.ArgumentParser): - Do no try to guess if type information for the symbol can be inferred by other means. - A string symbol must be referenced only once. - Strings must not be empty. -- level 2: A string no longer needs to be referenced only once to be considered a possible strings. This can happen because of a deduplication optimization. +- level 2: A string no longer needs to be referenced only once to be considered a possible string. This can happen because of a deduplication optimization. - level 3: Empty strings are allowed. - level 4: Symbols with autodetected type information but no user type information can still be guessed as strings. """ From 1c0f413ebc6ca3655e36c33642c472992f3bf2e3 Mon Sep 17 00:00:00 2001 From: angie Date: Mon, 3 Jul 2023 14:00:13 -0400 Subject: [PATCH 12/14] Change default for ASM_EMIT_SIZE_DIRECTIVE to True --- spimdisasm/common/GlobalConfig.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spimdisasm/common/GlobalConfig.py b/spimdisasm/common/GlobalConfig.py index 88881dce..4a7b8fab 100644 --- a/spimdisasm/common/GlobalConfig.py +++ b/spimdisasm/common/GlobalConfig.py @@ -223,7 +223,7 @@ def AGGRESSIVE_STRING_GUESSER(self, value: bool) -> None: ASM_TEXT_END_LABEL: str = "" ASM_TEXT_FUNC_AS_LABEL: bool = False ASM_DATA_SYM_AS_LABEL: bool = False - ASM_EMIT_SIZE_DIRECTIVE: bool = False + ASM_EMIT_SIZE_DIRECTIVE: bool = True ASM_USE_PRELUDE: bool = True ASM_GENERATED_BY: bool = True From 9361406fa29d436fd38d85d369dd88f772ece642 Mon Sep 17 00:00:00 2001 From: angie Date: Mon, 3 Jul 2023 14:02:06 -0400 Subject: [PATCH 13/14] Change jtbl and data symbols to use jlabel and dlabel by default --- spimdisasm/common/GlobalConfig.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spimdisasm/common/GlobalConfig.py b/spimdisasm/common/GlobalConfig.py index 4a7b8fab..0a7762f2 100644 --- a/spimdisasm/common/GlobalConfig.py +++ b/spimdisasm/common/GlobalConfig.py @@ -216,8 +216,8 @@ def AGGRESSIVE_STRING_GUESSER(self, value: bool) -> None: ASM_REFERENCEE_SYMBOLS: bool = False ASM_TEXT_LABEL: str = "glabel" - ASM_JTBL_LABEL: str = "glabel" - ASM_DATA_LABEL: str = "glabel" + ASM_JTBL_LABEL: str = "jlabel" + ASM_DATA_LABEL: str = "dlabel" ASM_USE_SYMBOL_LABEL: bool = True ASM_TEXT_ENT_LABEL: str = "" ASM_TEXT_END_LABEL: str = "" From 4f36653faf63e0927c3745b303386f7431088fcb Mon Sep 17 00:00:00 2001 From: angie Date: Mon, 3 Jul 2023 14:03:13 -0400 Subject: [PATCH 14/14] version bump --- pyproject.toml | 2 +- spimdisasm/__init__.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4c165af8..00fc5ab3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ [project] name = "spimdisasm" # Version should be synced with spimdisasm/__init__.py -version = "1.14.3.dev0" +version = "1.15.0" description = "MIPS disassembler" # license = "MIT" readme = "README.md" diff --git a/spimdisasm/__init__.py b/spimdisasm/__init__.py index 83fe1bed..1a17cdcd 100644 --- a/spimdisasm/__init__.py +++ b/spimdisasm/__init__.py @@ -5,8 +5,8 @@ from __future__ import annotations -__version_info__ = (1, 14, 4) -__version__ = ".".join(map(str, __version_info__)) + ".dev0" +__version_info__ = (1, 15, 0) +__version__ = ".".join(map(str, __version_info__)) __author__ = "Decompollaborate" from . import common as common