En_Syateki_Niw (#270)

* All matching, apart from the ones that aren't. * Fix conflcit and 1 func * Bugz * Conflict 2 * Header * Remove asm * PR * PR * undefined syms * PR
2025-08-28 01:41:20 +00:00 · 2020-08-20 23:34:18 +01:00 · 2020-08-20 23:34:18 +01:00 · 4bd417aeba
commit 4bd417aeba
parent 296b20e925
22 changed files with 981 additions and 2158 deletions
--- a/diff.py
+++ b/diff.py
@ -21,7 +21,7 @@ def fail(msg):

 MISSING_PREREQUISITES = (
    "Missing prerequisite python module {}. "
-    "Run `python3 -m pip install --user colorama ansiwrap attrs watchdog python-Levenshtein cxxfilt` to install prerequisites (python-Levenshtein only needed for --algorithm=levenshtein, cxxfilt only needed with --source)."
+    "Run `python3 -m pip install --user colorama ansiwrap attrs watchdog python-Levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)."
 )

 try:
@ -60,6 +60,11 @@ parser.add_argument(
    action="store_true",
    help="Show source code (if possible). Only works with -o and -e.",
 )
+parser.add_argument(
+    "--inlines",
+    action="store_true",
+    help="Show inline function calls (if possible). Only works with -o and -e.",
+)
 parser.add_argument(
    "--base-asm",
    dest="base_asm",
@ -137,7 +142,7 @@ parser.add_argument(
 parser.add_argument(
    "--algorithm",
    dest="algorithm",
-    default="difflib",
+    default="levenshtein",
    choices=["levenshtein", "difflib"],
    help="Diff algorithm to use.",
 )
@ -185,7 +190,7 @@ COLOR_ROTATION = [
 ]

 BUFFER_CMD = ["tail", "-c", str(10 ** 9)]
-LESS_CMD = ["less", "-Ric"]
+LESS_CMD = ["less", "-SRic", "-#6"]

 DEBOUNCE_DELAY = 0.1
 FS_WATCH_EXTENSIONS = [".c", ".h"]
@ -270,17 +275,22 @@ def maybe_get_objdump_source_flags():
    if not args.source:
        return []

-    return [
+    flags = [
        "--source",
        "--source-comment=| ",
        "-l",
    ]

+    if args.inlines:
+        flags.append("--inlines")
+
+    return flags
+

 def run_objdump(cmd):
    flags, target, restrict = cmd
    out = subprocess.check_output(
-        [objdump_executable] + flags + [target], universal_newlines=True
+        [objdump_executable] + arch_flags + flags + [target], universal_newlines=True
    )
    if restrict is not None:
        return restrict_to_function(out, restrict)
@ -431,12 +441,13 @@ def ansi_ljust(s, width):

 if arch == "mips":
    re_int = re.compile(r"[0-9]+")
-    re_comments = re.compile(r"<.*?>")
-    re_regs = re.compile(r"\$?\b(a[0-3]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|fp)\b")
+    re_comment = re.compile(r"<.*?>")
+    re_reg = re.compile(r"\$?\b(a[0-3]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|k[01]|fp|ra)\b")
    re_sprel = re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)")
    re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
    re_imm = re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)")
    forbidden = set(string.ascii_letters + "_")
+    arch_flags = ["-m", "mips:4300"]
    branch_likely_instructions = {
        "beql",
        "bnel",
@ -455,13 +466,14 @@ if arch == "mips":
    instructions_with_address_immediates = branch_instructions.union({"jal", "j"})
 elif arch == "aarch64":
    re_int = re.compile(r"[0-9]+")
-    re_comments = re.compile(r"(<.*?>|//.*$)")
+    re_comment = re.compile(r"(<.*?>|//.*$)")
    # GPRs and FP registers: X0-X30, W0-W30, [DSHQ]0..31
    # The zero registers and SP should not be in this list.
-    re_regs = re.compile(r"\$?\b([dshq][12]?[0-9]|[dshq]3[01]|[xw][12]?[0-9]|[xw]30)\b")
+    re_reg = re.compile(r"\$?\b([dshq][12]?[0-9]|[dshq]3[01]|[xw][12]?[0-9]|[xw]30)\b")
    re_sprel = re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b")
    re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
    re_imm = re.compile(r"(?<!sp, )#-?(0x[0-9a-fA-F]+|[0-9]+)\b")
+    arch_flags = []
    forbidden = set(string.ascii_letters + "_")
    branch_likely_instructions = set()
    branch_instructions = {"bl", "b", "b.eq", "b.ne", "b.cs", "b.hs", "b.cc", "b.lo", "b.mi", "b.pl", "b.vs", "b.vc", "b.hi", "b.ls", "b.ge", "b.lt", "b.gt", "b.le", "cbz", "cbnz", "tbz", "tbnz"}
@ -500,11 +512,18 @@ def parse_relocated_line(line):
    return before, imm, after


-def process_reloc(row, prev):
+def process_mips_reloc(row, prev):
    before, imm, after = parse_relocated_line(prev)
    repl = row.split()[-1]
    if imm != "0":
-        if before.strip() == "jal" and not imm.startswith("0x"):
+        # MIPS uses relocations with addends embedded in the code as immediates.
+        # If there is an immediate, show it as part of the relocation. Ideally
+        # we'd show this addend in both %lo/%hi, but annoyingly objdump's output
+        # doesn't include enough information to pair up %lo's and %hi's...
+        # TODO: handle unambiguous cases where all addends for a symbol are the
+        # same, or show "+???".
+        mnemonic = prev.split()[0]
+        if mnemonic in instructions_with_address_immediates and not imm.startswith("0x"):
            imm = "0x" + imm
        repl += "+" + imm if int(imm, 0) > 0 else imm
    if "R_MIPS_LO16" in row:
@ -519,27 +538,39 @@ def process_reloc(row, prev):
    return before + repl + after


+def cleanup_whitespace(line):
+    return "".join(f"{o:<8s}" for o in line.strip().split("\t"))
+
+
+Line = collections.namedtuple(
+    "Line",
+    [
+        "mnemonic",
+        "diff_row",
+        "original",
+        "line_num",
+        "branch_target",
+        "source_lines",
+        "comment",
+    ],
+)
+
+
 def process(lines):
-    mnemonics = []
-    diff_rows = []
-    rows_with_imms = []
    skip_next = False
-    originals = []
-    line_nums = []
-    branch_targets = []
-    source_lines = collections.defaultdict(list)
-    comments = []
+    source_lines = []
    if not args.diff_obj:
        lines = lines[7:]
        if lines and not lines[-1]:
            lines.pop()

+    output = []
    for row in lines:
        if args.diff_obj and (">:" in row or not row):
            continue

        if args.source and (row and row[0] != " "):
-            source_lines[len(mnemonics)].append(row)
+            source_lines.append(row)
            continue

        if "R_AARCH64_" in row:
@ -548,13 +579,15 @@ def process(lines):

        if "R_MIPS_" in row:
            # N.B. Don't transform the diff rows, they already ignore immediates
-            # if diff_rows[-1] != '<delay-slot>':
-            # diff_rows[-1] = process_reloc(row, rows_with_imms[-1])
-            originals[-1] = process_reloc(row, originals[-1])
+            # if output[-1].diff_row != "<delay-slot>":
+            # output[-1] = output[-1].replace(diff_row=process_mips_reloc(row, output[-1].row_with_imm))
+            new_original = process_mips_reloc(row, output[-1].original)
+            output[-1] = output[-1]._replace(original=new_original)
            continue

-        comments.append(re.search(re_comments, row))
-        row = re.sub(re_comments, "", row)
+        m_comment = re.search(re_comment, row)
+        comment = m_comment[0] if m_comment else None
+        row = re.sub(re_comment, "", row)
        row = row.rstrip()
        tabs = row.split("\t")
        row = "\t".join(tabs[2:])
@ -570,7 +603,7 @@ def process(lines):
            mnemonic = "<delay-slot>"
        if mnemonic in branch_likely_instructions:
            skip_next = True
-        row = re.sub(re_regs, "<reg>", row)
+        row = re.sub(re_reg, "<reg>", row)
        row = re.sub(re_sprel, "addr(sp)", row)
        row_with_imm = row
        if mnemonic in instructions_with_address_immediates:
@ -580,28 +613,35 @@ def process(lines):
        else:
            row = normalize_imms(row)

-        mnemonics.append(mnemonic)
-        rows_with_imms.append(row_with_imm)
-        diff_rows.append(row)
-        originals.append(original)
-        line_nums.append(line_num)
+        branch_target = None
        if mnemonic in branch_instructions:
            target = row_parts[1].strip().split(",")[-1]
            if mnemonic in branch_likely_instructions:
                target = hex(int(target, 16) - 4)[2:]
-            branch_targets.append(target.strip())
-        else:
-            branch_targets.append(None)
+            branch_target = target.strip()
+
+        output.append(
+            Line(
+                mnemonic=mnemonic,
+                diff_row=row,
+                original=original,
+                line_num=line_num,
+                branch_target=branch_target,
+                source_lines=source_lines,
+                comment=comment,
+            )
+        )
+        source_lines = []
+
        if args.stop_jrra and mnemonic == "jr" and row_parts[1].strip() == "ra":
            break

-    # Cleanup whitespace
-    originals = [original.strip() for original in originals]
-    originals = [
-        "".join(f"{o:<8s}" for o in original.split("\t")) for original in originals
+    # Cleanup whitespace, after relocation fixups have happened
+    output = [
+        line._replace(original=cleanup_whitespace(line.original)) for line in output
    ]
-    # return diff_rows, diff_rows, line_nums
-    return mnemonics, diff_rows, originals, line_nums, branch_targets, source_lines, comments
+
+    return output


 def format_single_line_diff(line1, line2, column_width):
@ -704,20 +744,13 @@ def diff_sequences(seq1, seq2):


 def do_diff(basedump, mydump):
-    asm_lines1 = basedump.split("\n")
-    asm_lines2 = mydump.split("\n")
-
    output = []

    # TODO: status line?
    # output.append(sha1sum(mydump))

-    mnemonics1, asm_lines1, originals1, line_nums1, branch_targets1, _, _ = process(
-        asm_lines1
-    )
-    mnemonics2, asm_lines2, originals2, line_nums2, branch_targets2, source_lines2, comments2 = process(
-        asm_lines2
-    )
+    lines1 = process(basedump.split("\n"))
+    lines2 = process(mydump.split("\n"))

    sc1 = SymbolColorer(0)
    sc2 = SymbolColorer(0)
@ -729,68 +762,56 @@ def do_diff(basedump, mydump):
    bts2 = set()

    if args.show_branches:
-        for (bts, btset, sc) in [
-            (branch_targets1, bts1, sc5),
-            (branch_targets2, bts2, sc6),
+        for (lines, btset, sc) in [
+            (lines1, bts1, sc5),
+            (lines2, bts2, sc6),
        ]:
-            for bt in bts:
+            for line in lines:
+                bt = line.branch_target
                if bt is not None:
                    btset.add(bt + ":")
                    sc.color_symbol(bt + ":")

-    for (tag, i1, i2, j1, j2) in diff_sequences(mnemonics1, mnemonics2):
-        lines1 = asm_lines1[i1:i2]
-        lines2 = asm_lines2[j1:j2]
-
-        for k, (line1, line2) in enumerate(itertools.zip_longest(lines1, lines2)):
+    for (tag, i1, i2, j1, j2) in diff_sequences(
+        [line.mnemonic for line in lines1], [line.mnemonic for line in lines2]
+    ):
+        for line1, line2 in itertools.zip_longest(lines1[i1:i2], lines2[j1:j2]):
            if tag == "replace":
                if line1 is None:
                    tag = "insert"
                elif line2 is None:
                    tag = "delete"
+            elif tag == "insert":
+                assert line1 is None
+            elif tag == "delete":
+                assert line2 is None

-            try:
-                original1 = originals1[i1 + k]
-                line_num1 = line_nums1[i1 + k]
-            except:
-                original1 = ""
-                line_num1 = ""
-            try:
-                original2 = originals2[j1 + k]
-                line_num2 = line_nums2[j1 + k]
-            except:
-                original2 = ""
-                line_num2 = ""
-
-            has1 = has2 = True
            line_color1 = line_color2 = sym_color = Fore.RESET
            line_prefix = " "
-            if line1 == line2:
-                if not line1:
-                    has1 = has2 = False
-                if maybe_normalize_large_imms(original1) == maybe_normalize_large_imms(
-                    original2
-                ):
-                    out1 = original1
-                    out2 = original2
-                elif line1 == "<delay-slot>":
-                    out1 = f"{Style.DIM}{original1}"
-                    out2 = f"{Style.DIM}{original2}"
+            if line1 and line2 and line1.diff_row == line2.diff_row:
+                if maybe_normalize_large_imms(
+                    line1.original
+                ) == maybe_normalize_large_imms(line2.original):
+                    out1 = line1.original
+                    out2 = line2.original
+                elif line1.diff_row == "<delay-slot>":
+                    out1 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line1.original}"
+                    out2 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line2.original}"
                else:
-                    mnemonic = original1.split()[0]
-                    out1, out2 = original1, original2
+                    mnemonic = line1.original.split()[0]
+                    out1, out2 = line1.original, line2.original
                    branch1 = branch2 = ""
                    if mnemonic in instructions_with_address_immediates:
-                        out1, branch1 = split_off_branch(original1)
-                        out2, branch2 = split_off_branch(original2)
+                        out1, branch1 = split_off_branch(line1.original)
+                        out2, branch2 = split_off_branch(line2.original)
                    branchless1 = out1
                    branchless2 = out2
                    out1, out2 = color_imms(out1, out2)

                    same_relative_target = False
-                    if branch_targets1[i1 + k] is not None and branch_targets2[j1 + k] is not None:
-                        relative_target1 = eval_line_num(branch_targets1[i1 + k]) - eval_line_num(line_num1)
-                        relative_target2 = eval_line_num(branch_targets2[j1 + k]) - eval_line_num(line_num2)
+                    if line1.branch_target is not None and line2.branch_target is not None:
+                        relative_target1 = eval_line_num(line1.branch_target) - eval_line_num(line1.line_num)
+                        relative_target2 = eval_line_num(line2.branch_target) - eval_line_num(line2.line_num)
                        same_relative_target = relative_target1 == relative_target2

                    if not same_relative_target:
@ -805,14 +826,10 @@ def do_diff(basedump, mydump):
                            line_prefix = "i"
                    else:
                        out1 = re.sub(
-                            re_sprel,
-                            lambda s: sc3.color_symbol(s.group()),
-                            out1,
+                            re_sprel, lambda s: sc3.color_symbol(s.group()), out1,
                        )
                        out2 = re.sub(
-                            re_sprel,
-                            lambda s: sc4.color_symbol(s.group()),
-                            out2,
+                            re_sprel, lambda s: sc4.color_symbol(s.group()), out2,
                        )
                        if normalize_stack(branchless1) == normalize_stack(branchless2):
                            # only stack differences (luckily stack and imm
@ -823,76 +840,80 @@ def do_diff(basedump, mydump):
                        else:
                            # regs differences and maybe imms as well
                            out1 = re.sub(
-                                re_regs, lambda s: sc1.color_symbol(s.group()), out1
+                                re_reg, lambda s: sc1.color_symbol(s.group()), out1
                            )
                            out2 = re.sub(
-                                re_regs, lambda s: sc2.color_symbol(s.group()), out2
+                                re_reg, lambda s: sc2.color_symbol(s.group()), out2
                            )
                            line_color1 = line_color2 = sym_color = Fore.YELLOW
                            line_prefix = "r"
-            elif tag in ["replace", "equal"]:
+            elif line1 and line2:
                line_prefix = "|"
                line_color1 = Fore.LIGHTBLUE_EX
                line_color2 = Fore.LIGHTBLUE_EX
                sym_color = Fore.LIGHTBLUE_EX
-                out1 = original1
-                out2 = original2
-            elif tag == "delete":
+                out1 = line1.original
+                out2 = line2.original
+            elif line1:
                line_prefix = "<"
                line_color1 = line_color2 = sym_color = Fore.RED
-                has2 = False
-                out1 = original1
+                out1 = line1.original
                out2 = ""
-            elif tag == "insert":
+            elif line2:
                line_prefix = ">"
                line_color1 = line_color2 = sym_color = Fore.GREEN
-                has1 = False
                out1 = ""
-                out2 = original2
+                out2 = line2.original

            in_arrow1 = "  "
            in_arrow2 = "  "
            out_arrow1 = ""
            out_arrow2 = ""
-            line_num1 = line_num1 if has1 else ""
-            line_num2 = line_num2 if has2 else ""

-            if args.show_branches and has1:
-                if line_num1 in bts1:
-                    in_arrow1 = sc5.color_symbol(line_num1, "~>") + line_color1
-                if branch_targets1[i1 + k] is not None:
-                    out_arrow1 = " " + sc5.color_symbol(
-                        branch_targets1[i1 + k] + ":", "~>"
-                    )
-            if args.show_branches and has2:
-                if line_num2 in bts2:
-                    in_arrow2 = sc6.color_symbol(line_num2, "~>") + line_color2
-                if branch_targets2[j1 + k] is not None:
-                    out_arrow2 = " " + sc6.color_symbol(
-                        branch_targets2[j1 + k] + ":", "~>"
-                    )
+            if args.show_branches and line1:
+                if line1.line_num in bts1:
+                    in_arrow1 = sc5.color_symbol(line1.line_num, "~>") + line_color1
+                if line1.branch_target is not None:
+                    out_arrow1 = " " + sc5.color_symbol(line1.branch_target + ":", "~>")
+            if args.show_branches and line2:
+                if line2.line_num in bts2:
+                    in_arrow2 = sc6.color_symbol(line2.line_num, "~>") + line_color2
+                if line2.branch_target is not None:
+                    out_arrow2 = " " + sc6.color_symbol(line2.branch_target + ":", "~>")

-            if args.source and has2 and comments2[j1 + k] is not None:
-                out2 += f" {comments2[j1 + k][0]}"
+            if args.source and line2 and line2.comment:
+                out2 += f" {line2.comment}"
+
+            line_num1 = line1.line_num if line1 else ""
+            line_num2 = line2.line_num if line2 else ""

            out1 = f"{line_color1}{line_num1} {in_arrow1} {out1}{Style.RESET_ALL}{out_arrow1}"
            out2 = f"{line_color2}{line_num2} {in_arrow2} {out2}{Style.RESET_ALL}{out_arrow2}"
            mid = f"{sym_color}{line_prefix} "

-            for source_line in source_lines2[j1 + k]:
-                color = Style.DIM
-                # File names and function names
-                if source_line and source_line[0] != "|":
-                    color += Style.BRIGHT
-                    # Function names
-                    if source_line.endswith("():"):
-                        # Underline. Colorama does not provide this feature, unfortunately.
-                        color += "\u001b[4m"
-                        try:
-                            source_line = cxxfilt.demangle(source_line[:-3], external_only=False)
-                        except:
-                            pass
-                output.append(format_single_line_diff("", f"  {color}{source_line}{Style.RESET_ALL}", args.column_width))
+            if line2:
+                for source_line in line2.source_lines:
+                    color = Style.DIM
+                    # File names and function names
+                    if source_line and source_line[0] != "|":
+                        color += Style.BRIGHT
+                        # Function names
+                        if source_line.endswith("():"):
+                            # Underline. Colorama does not provide this feature, unfortunately.
+                            color += "\u001b[4m"
+                            try:
+                                source_line = cxxfilt.demangle(
+                                    source_line[:-3], external_only=False
+                                )
+                            except:
+                                pass
+                    output.append(
+                        format_single_line_diff(
+                            "",
+                            f"  {color}{source_line}{Style.RESET_ALL}",
+                            args.column_width,
+                        )
+                    )

            output.append(format_single_line_diff(out1, mid + out2, args.column_width))