diff --git a/README.md b/README.md index 99026c4..31914d5 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,9 @@ Allows live-record to record both parent and child after `fork()` has been calle [**Load Debug Symbols**](load_debug_symbols/README.md) Loads debug symbols by parsing the relevant section addresses. +[**Malloc Free Check**](malloc_free_check/README.md) +Checks that no memory was leaked in your program by tracking calls to `malloc()` and `free()`. + [**Reconstruct file**](reconstruct_file/README.md) Reconstructs the content of a file by analysing reads on the execution history of a debugged program or LiveRecorder recording. diff --git a/_linters/mypy-stubs/gdb/__init__.pyi b/_linters/mypy-stubs/gdb/__init__.pyi index 6e6960a..7f30374 100644 --- a/_linters/mypy-stubs/gdb/__init__.pyi +++ b/_linters/mypy-stubs/gdb/__init__.pyi @@ -217,7 +217,7 @@ class Breakpoint(object): commands: Optional[str] def __init__( self, - spec: str, + spec: str = ..., type: gdbtypes.BreakpointType = ..., wp_class: gdbtypes.WatchPointType = ..., internal: bool = ..., @@ -232,7 +232,8 @@ class Breakpoint(object): def is_valid(self) -> bool: ... def delete(self) -> None: ... -class FinishBreakpoint(Breakpoint): ... +class FinishBreakpoint(Breakpoint): + return_value: Optional[Value] class Symbol(object): type: Type diff --git a/_linters/mypy.ini b/_linters/mypy.ini index ad9816e..e531162 100644 --- a/_linters/mypy.ini +++ b/_linters/mypy.ini @@ -1,9 +1,15 @@ [mypy] python_version = 3.6 +[mypy-undo.udb_launcher] +ignore_missing_imports = True + [mypy-undodb.debugger_extensions] ignore_missing_imports = True +[mypy-undodb.debugger_extensions.debugger_io] +ignore_missing_imports = True + [mypy-undodb.udb_launcher] ignore_missing_imports = True diff --git a/malloc_free_check/Makefile b/malloc_free_check/Makefile new file mode 100644 index 0000000..0a05e59 --- /dev/null +++ b/malloc_free_check/Makefile @@ -0,0 +1,13 @@ +memchecker: memchecker.c + gcc -g -o $@ $< + +memchecker.undo: memchecker + live-record -o $@ ./$< + +run: memchecker.undo + ./malloc-free.py $< + +all: memchecker.undo + +clean: + rm *.undo *.pyc memchecker diff --git a/malloc_free_check/README.md b/malloc_free_check/README.md new file mode 100644 index 0000000..5f03b20 --- /dev/null +++ b/malloc_free_check/README.md @@ -0,0 +1,29 @@ +# Memory leak detection example +This example implements a simple memory leak detector with the Undo Automation API. + +The `memchecker.c` example application has a single unmatched `malloc()` call (excluding some from before the application has actually started, including the 1KB buffer for printfs, which are detected). + +Using the Undo Automation API, the python scripts process the recording to find all `malloc()` and `free()` calls. The script ignores all `malloc()` calls with matching `free()` call, and after parsing the entire recording, jumps back in time to each of the unmatched `malloc()` calls. For each call, the scripts: +* Output the backtrace at the time of the call. +* Continue execution until `malloc()` returns. +* Outputs the souce code for the calling function (if available) and locals. + +In the case of the example program, this is sufficient to clearly show the root cause for the deliberate leak. Generally it should give a good hint for other recordings, and the output does clearly provide the timestamps for the `malloc()` calls to enable opening the recording and jumping directly to the leaking memory allocation to start debugging from there. + +These scripts can be used as a starting point to implement other kinds of analysis related to the standard allocation functions, such as producing a profile of how much memory is being used during execution. + +## How to run the demo +Simply enter the directory and run: + +`make run` + +## How to use the scripts on other recordings +Simply run the `malloc-free.py` script, passing the recording as the parameter: + +`./malloc-free.py ` + +## Enhancements ideas +* Provide some way to filter out library code. +* Add verbosity controls. +* Support recordings without symbols (provide address for `malloc()` & `free()` at command line). +* Automatically trace the use of leaking memory to identify the last read or write access to the memory. diff --git a/malloc_free_check/install.py b/malloc_free_check/install.py new file mode 100644 index 0000000..f0bb942 --- /dev/null +++ b/malloc_free_check/install.py @@ -0,0 +1,32 @@ +import pathlib + + +def maybe_install_script(script_path: pathlib.Path, script_name: str) -> None: + """ + Ask for permission and then install a script into ~/.local/bin. + """ + local_bin = pathlib.Path.home() / ".local" / "bin" + install_path = local_bin / script_name + + choice = input(f"Do you want to install {script_name} to {local_bin}? [y/N] ") + if choice.lower() not in ("y", "yes"): + return + + try: + install_path.symlink_to(script_path) + install_path.chmod(0o755) + except OSError as e: + print(f"Failed to install the script: {e}") + + +script = pathlib.Path(__file__).resolve().parent / "malloc_free_check.py" + +print( + f"""\ +The {script.name!r} script can be run outside of UDB: + + $ {script} +""" +) + +maybe_install_script(script, "malloc-free-check") diff --git a/malloc_free_check/malloc_free_check.py b/malloc_free_check/malloc_free_check.py new file mode 100755 index 0000000..d846012 --- /dev/null +++ b/malloc_free_check/malloc_free_check.py @@ -0,0 +1,62 @@ +#! /usr/bin/env udb-automate +""" +Undo Automation command-line script for tracking calls to malloc() and free() and checking for +leaked memory. + +This script only support the x86-64 architecture. + +Contributors: Chris Croft-White, Magne Hov +""" + +import sys +import textwrap + +from undo.udb_launcher import REDIRECTION_COLLECT, UdbLauncher + + +def main(argv: list[str]) -> None: + # Get the arguments from the command line. + try: + recording = argv[1] + except ValueError: + # Wrong number of arguments. + print(f"{sys.argv[0]} RECORDING_FILE", file=sys.stderr) + raise SystemExit(1) + + # Prepare for launching UDB. + launcher = UdbLauncher() + # Make UDB run with our recording. + launcher.recording_file = recording + # Make UDB load the malloc_free_check_extension.py file from the current directory. + launcher.add_extension("malloc_free_check_extension") + # Finally, launch UDB! + # We collect the output as, in normal conditions, we don't want to show it + # to the user but, in case of errors, we want to display it. + res = launcher.run_debugger(redirect_debugger_output=REDIRECTION_COLLECT) + + if not res.exit_code: + # All good as UDB exited with exit code 0 (i.e. no errors). + # The result_data attribute is used to pass information from the extension to this script. + unmatched = res.result_data["unmatched"] + print(f"The recording failed to free allocated memory {unmatched} time(s).") + else: + # Something went wrong! Print a useful message. + print( + textwrap.dedent( + f"""\ + Error! + UDB exited with code {res.exit_code}. + + The output was: + + {res.output} + """ + ), + file=sys.stderr, + ) + # Exit this script with the same error code as UDB. + raise SystemExit(res.exit_code) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/malloc_free_check/malloc_free_check_extension.py b/malloc_free_check/malloc_free_check_extension.py new file mode 100644 index 0000000..8a50121 --- /dev/null +++ b/malloc_free_check/malloc_free_check_extension.py @@ -0,0 +1,135 @@ +""" +Undo Automation extension module for tracking calls to malloc() and free() and checking for +leaked memory. + +This script only support the x86-64 architecture. + +Contributors: Chris Croft-White, Magne Hov +""" + +import collections +import re + +import gdb + +from undodb.debugger_extensions import udb +from undodb.debugger_extensions.debugger_io import redirect_to_launcher_output + + +def leak_check() -> int: + """ + Implements breakpoints and stops on all calls to malloc() and free(), capturing the + timestamp, size and returned pointer for malloc(), then confirms the address pointer is later + seen in a free() call. + + If a subsequent free() is not seen, then at the end of execution, output the timestamp and + details of the memory which was never freed. + + Returns the number of unmatched allocations found. + """ + # Set a breakpoint for the specified function. + gdb.Breakpoint("malloc") + gdb.Breakpoint("free") + + # Declare allocations dictionary structure. + allocations = collections.OrderedDict() + + # Do "continue" until we have gone through the whole recording, potentially + # hitting the breakpoints several times. + end_of_time = udb.get_event_log_extent().end + while True: + gdb.execute("continue") + + # Rather than having the check directly in the while condition we have + # it here as we don't want to print the backtrace when we hit the end of + # the recording but only when we stop at a breakpoint. + if udb.time.get().bbcount >= end_of_time: + break + + # Use the $PC output to get the symbol and idenfity whether execution has stopped + # at a malloc() or free() call. + mypc = format(gdb.parse_and_eval("$pc")) + if re.search("malloc", mypc): + # In malloc(), set a FinishBreakpoint to capture the pointer returned later. + mfbp = gdb.FinishBreakpoint() + + # For now, capture the timestamp and size of memory requested. + time = udb.time.get() + size = int(gdb.parse_and_eval("$rdi")) + + gdb.execute("continue") + + # Should stop at the finish breakpoint, so capture the pointer. + assert mfbp.return_value is not None, "Expected to see a return value." + addr = int(mfbp.return_value) + + if addr: + # Store details in the dictionary. + allocations[hex(addr)] = time, size + else: + print(f"-- INFO: Malloc called for {size} byte(s) but null returned.") + + print(f"{time}: malloc() called: {size} byte(s) allocated at {addr}.") + + elif re.search("free", mypc): + # In free(), get the pointer address. + addr = int(gdb.parse_and_eval("$rdi")) + + time = udb.time.get() + + # Delete entry from the dictionary as this memory was released. + if addr > 0: + if allocations[hex(addr)]: + del allocations[hex(addr)] + else: + print("--- INFO: Free called with unknown address") + else: + print("--- INFO: Free called with null address") + + # with redirect_to_launcher_output(): + print(f"{time}: free() called for {addr:#x}") + + # If Allocations has any entries remaining, they were not released. + with redirect_to_launcher_output(): + print() + print(f"{len(allocations)} unmatched memory allocation(s):") + print() + + total = 0 + + # Increase the amount of source from default (10) to 16 lines for more context. + gdb.execute("set listsize 16") + for location, (time, size) in allocations.items(): + total += size + print("===============================================================================") + print(f"{time}: {size} bytes was allocated at {location}, but never freed.") + print("===============================================================================") + udb.time.goto(time) + print("Backtrace:") + gdb.execute("backtrace") + print() + print("Source (if available):") + gdb.execute("finish") + gdb.execute("list") + print() + print("Locals (after malloc returns):") + gdb.execute("info locals") + print() + print() + print("===============================================================================") + print(f" In total, {total} byte(s) were allocated and not released") + print() + + return len(allocations) + + +# UDB will automatically load the modules passed to UdbLauncher.add_extension and, if present, +# automatically execute any function (with no arguments) called "run". +def run() -> None: + # Needed to allow GDB to fixup breakpoints properly after glibc has been loaded. + gdb.Breakpoint("main") + + unmatched = leak_check() + + # Pass the number of unmatched allocations back to the outer script. + udb.result_data["unmatched"] = unmatched diff --git a/malloc_free_check/memchecker.c b/malloc_free_check/memchecker.c new file mode 100644 index 0000000..c0e3b70 --- /dev/null +++ b/malloc_free_check/memchecker.c @@ -0,0 +1,20 @@ +#include +#include + +int +main(void) +{ + int i; + + for (i = 1; i < 20; ++i) + { + int *addr = (int *)malloc(10 * sizeof(int)); + printf("Address allocated: %p\n", addr); + + if (!(i % 10 == 0)) + { + printf("Address freed: %p\n", addr); + free(addr); + } + } +} diff --git a/private/manifest.json b/private/manifest.json index d37ee15..d4cde55 100644 --- a/private/manifest.json +++ b/private/manifest.json @@ -99,6 +99,13 @@ ], "python_package_dir": "systemc_trace_packages", "version_min": "8.2.0" + }, + "malloc-free-check": { + "description": "Tool for detecting memory leaks based on malloc and free calls.", + "repo": "addons", + "script": "malloc_free_check/install.py", + "version_min": "8.3.0", + "help": "malloc-free.py -- Check for memory leaks based on malloc and free calls." } } }