From a109ea776d99aa63772d3775c0cc0478898be877 Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Mon, 11 Aug 2025 20:02:35 -0400 Subject: [PATCH 1/7] cmd-diff: refactor diff_cmd_output into a loop It had some common elements so let's use a loop. --- src/cmd-diff | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/cmd-diff b/src/cmd-diff index fc5799dc83..82080380c5 100755 --- a/src/cmd-diff +++ b/src/cmd-diff @@ -404,15 +404,13 @@ def diff_metal(diff_from, diff_to): def diff_cmd_outputs(cmd, file_from, file_to): with tempfile.NamedTemporaryFile(prefix=cmd[0] + '-') as f_from, \ tempfile.NamedTemporaryFile(prefix=cmd[0] + '-') as f_to: - if '{}' not in cmd: - cmd += ['{}'] - idx = cmd.index('{}') - cmd_from = list(cmd) - cmd_from[idx] = file_from - subprocess.run(cmd_from, check=True, stdout=f_from).stdout - cmd_to = list(cmd) - cmd_to[idx] = file_to - subprocess.run(cmd_to, check=True, stdout=f_to).stdout + for file, output in (file_from, f_from), (file_to, f_to): + c = list(cmd) + if '{}' not in c: + c += ['{}'] + idx = c.index('{}') + c[idx] = file + subprocess.run(c, check=True, stdout=file).stdout git_diff(f_from.name, f_to.name) From a74696c8271327068674caa92696d17a6c027867 Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Mon, 11 Aug 2025 20:18:11 -0400 Subject: [PATCH 2/7] cmd-diff: rename vars in diff_cmd_outputs Since we could be operating on directories or files change file_from -> path_from and file_to -> path_to. Also change the temporary output filenames to more properly indicate they are outputs. --- src/cmd-diff | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cmd-diff b/src/cmd-diff index 82080380c5..8ca8b8e15e 100755 --- a/src/cmd-diff +++ b/src/cmd-diff @@ -401,17 +401,17 @@ def diff_metal(diff_from, diff_to): shutdown_process(p_to) -def diff_cmd_outputs(cmd, file_from, file_to): - with tempfile.NamedTemporaryFile(prefix=cmd[0] + '-') as f_from, \ - tempfile.NamedTemporaryFile(prefix=cmd[0] + '-') as f_to: - for file, output in (file_from, f_from), (file_to, f_to): +def diff_cmd_outputs(cmd, path_from, path_to): + with tempfile.NamedTemporaryFile(prefix=cmd[0] + '-') as from_output, \ + tempfile.NamedTemporaryFile(prefix=cmd[0] + '-') as to_output: + for path, output in (path_from, from_output), (path_to, to_output): c = list(cmd) if '{}' not in c: c += ['{}'] idx = c.index('{}') - c[idx] = file - subprocess.run(c, check=True, stdout=file).stdout - git_diff(f_from.name, f_to.name) + c[idx] = path + subprocess.run(c, check=True, stdout=output).stdout + git_diff(from_output.name, to_output.name) def git_diff(arg_from, arg_to): From 4e0cf9e4dd46acbd20d1ae6926916c6beff86f11 Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Mon, 11 Aug 2025 20:24:32 -0400 Subject: [PATCH 3/7] cmd-diff: support `cd` strategy for diff_cmd_outputs This strategy simply changes directory into the given path before running the provided command rather than replacing a templated `{}` with the path. Useful for commands that operate more cleanly when operated on in the directory where you want the operation to occur. --- src/cmd-diff | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/cmd-diff b/src/cmd-diff index 8ca8b8e15e..1cd0b5676e 100755 --- a/src/cmd-diff +++ b/src/cmd-diff @@ -401,16 +401,21 @@ def diff_metal(diff_from, diff_to): shutdown_process(p_to) -def diff_cmd_outputs(cmd, path_from, path_to): +def diff_cmd_outputs(cmd, path_from, path_to, strategy='template'): + workingdir = os.getcwd() with tempfile.NamedTemporaryFile(prefix=cmd[0] + '-') as from_output, \ tempfile.NamedTemporaryFile(prefix=cmd[0] + '-') as to_output: for path, output in (path_from, from_output), (path_to, to_output): c = list(cmd) - if '{}' not in c: - c += ['{}'] - idx = c.index('{}') - c[idx] = path - subprocess.run(c, check=True, stdout=output).stdout + if strategy == 'template': + if '{}' not in c: + c += ['{}'] + idx = c.index('{}') + c[idx] = path + else: + assert strategy == 'cd' + workingdir = path + subprocess.run(c, cwd=workingdir, check=True, stdout=output).stdout git_diff(from_output.name, to_output.name) From ca158e222155d8b1396957cd1e489e08d4f53024 Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Mon, 11 Aug 2025 23:00:17 -0400 Subject: [PATCH 4/7] cmd-diff: support `git difftool` so we can utilize vimdiff Having vimdiff is a lot more powerful to me because you can manually do a few things in the terminal to massage the files on each side of the diff to give you more information (i.e. narrow in on exactly what diff you are interested in). Let's add a --difftool boolean flag to trigger `git difftool`, which will allow diffs to be displayed using vimdiff. Additionally include vim-enhanced so we have `vimdiff` installed and at our disposal. --- src/cmd-diff | 12 +++++++++++- src/deps.txt | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/cmd-diff b/src/cmd-diff index 1cd0b5676e..1b4fee43f5 100755 --- a/src/cmd-diff +++ b/src/cmd-diff @@ -49,11 +49,17 @@ TMP_REPO = 'tmp/repo' DIFF_CACHE = 'tmp/diff-cache' +USE_DIFFTOOL = False + def main(): args = parse_args() builds = Builds() + # Modify the USE_DIFFTOOL global based on the --difftool argument + global USE_DIFFTOOL + USE_DIFFTOOL = args.difftool + latest_build = builds.get_latest() os.makedirs(DIFF_CACHE, exist_ok=True) @@ -109,6 +115,7 @@ def parse_args(): parser.add_argument("--to", dest='diff_to', help="Second build ID") parser.add_argument("--gc", action='store_true', help="Delete cached diff content") parser.add_argument("--arch", dest='arch', help="Architecture of builds") + parser.add_argument("--difftool", action='store_true', help="Use git difftool") for differ in DIFFERS: parser.add_argument("--" + differ.name, action='store_true', default=False, @@ -420,7 +427,10 @@ def diff_cmd_outputs(cmd, path_from, path_to, strategy='template'): def git_diff(arg_from, arg_to): - runcmd(['git', 'diff', '--no-index', arg_from, arg_to], check=False) + subcmd = 'diff' + if USE_DIFFTOOL: + subcmd = 'difftool' + runcmd(['git', subcmd, '--no-index', arg_from, arg_to], check=False) def cache_dir(dir): diff --git a/src/deps.txt b/src/deps.txt index f0b2f780de..54e5eb62a5 100644 --- a/src/deps.txt +++ b/src/deps.txt @@ -110,3 +110,6 @@ python3-libguestfs # For generating kubernetes YAML files (e.g Konflux resources) kustomize + +# For vimdiff +vim-enhanced From 5590be459b67f9646a9d2f508fd6089b45f12080 Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Mon, 11 Aug 2025 23:27:09 -0400 Subject: [PATCH 5/7] cmd-diff: refactor metal mounting code into helper This way we can have more commands that can leverae this code for different "diffs" on the resulting mounted filesystems. Prep for a future commit. --- src/cmd-diff | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/cmd-diff b/src/cmd-diff index 1b4fee43f5..1aeb00ecf2 100755 --- a/src/cmd-diff +++ b/src/cmd-diff @@ -356,7 +356,10 @@ def run_guestfs_mount(image_path, mount_target): g.close() -def diff_metal(diff_from, diff_to): +# Generator that will mount up metal image filesystems and yield +# the paths to be used for analysis and then clean up once given back +# control. +def diff_metal_helper(diff_from, diff_to): metal_from = get_metal_path(diff_from) metal_to = get_metal_path(diff_to) @@ -389,8 +392,8 @@ def diff_metal(diff_from, diff_to): if not p.is_alive(): raise Exception(f"A guestfs process for {os.path.basename(d)} died unexpectedly.") - # Now that the mounts are live, we can diff them - git_diff(mount_dir_from, mount_dir_to) + # Allow the caller to operate on these values + yield mount_dir_from, mount_dir_to finally: # Unmount the FUSE binds, this will make the guestfs mount calls return @@ -408,6 +411,11 @@ def diff_metal(diff_from, diff_to): shutdown_process(p_to) +def diff_metal(diff_from, diff_to): + for mount_dir_from, mount_dir_to in diff_metal_helper(diff_from, diff_to): + git_diff(mount_dir_from, mount_dir_to) + + def diff_cmd_outputs(cmd, path_from, path_to, strategy='template'): workingdir = os.getcwd() with tempfile.NamedTemporaryFile(prefix=cmd[0] + '-') as from_output, \ From 8ef5b0e2d07b321ccd63c4a112eac9ace700107d Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Tue, 12 Aug 2025 00:07:24 -0400 Subject: [PATCH 6/7] cmd-diff: support --metal-du and --metal-ls A few more views of differences between two metal disk image mounted filesystems. --- src/cmd-diff | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/cmd-diff b/src/cmd-diff index 1aeb00ecf2..fba5a3ed09 100755 --- a/src/cmd-diff +++ b/src/cmd-diff @@ -416,6 +416,18 @@ def diff_metal(diff_from, diff_to): git_diff(mount_dir_from, mount_dir_to) +def diff_metal_du(diff_from, diff_to): + for mount_dir_from, mount_dir_to in diff_metal_helper(diff_from, diff_to): + cmd = ['find', '.', '-type', 'd', '-exec', 'du', '-sh', '{}', ';'] + diff_cmd_outputs(cmd, mount_dir_from, mount_dir_to, strategy='cd') + + +def diff_metal_ls(diff_from, diff_to): + for mount_dir_from, mount_dir_to in diff_metal_helper(diff_from, diff_to): + cmd = ['find', '.'] + diff_cmd_outputs(cmd, mount_dir_from, mount_dir_to, strategy='cd') + + def diff_cmd_outputs(cmd, path_from, path_to, strategy='template'): workingdir = os.getcwd() with tempfile.NamedTemporaryFile(prefix=cmd[0] + '-') as from_output, \ @@ -478,6 +490,10 @@ DIFFERS = [ needs_ostree=OSTreeImport.NO, function=diff_metal_partitions), Differ("metal", "Diff metal disk image content", needs_ostree=OSTreeImport.NO, function=diff_metal), + Differ("metal-du", "Compare directory usage of metal disk image content", + needs_ostree=OSTreeImport.NO, function=diff_metal_du), + Differ("metal-ls", "Compare directory listing of metal disk image content", + needs_ostree=OSTreeImport.NO, function=diff_metal_ls), ] if __name__ == '__main__': From f034c3114568bf7f4493b609038847c168029676 Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Tue, 12 Aug 2025 10:33:50 -0400 Subject: [PATCH 7/7] cmd-diff: fix superfluous .stdout call The .stdout at the end of this line is unnecessary and has no effect. When stdout is redirected to a file object, the stdout attribute of the returned CompletedProcess object is None. Assisted-By https://github.com/coreos/coreos-assembler/pull/4253#discussion_r2268528878 --- src/cmd-diff | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmd-diff b/src/cmd-diff index fba5a3ed09..ec680088a9 100755 --- a/src/cmd-diff +++ b/src/cmd-diff @@ -442,7 +442,7 @@ def diff_cmd_outputs(cmd, path_from, path_to, strategy='template'): else: assert strategy == 'cd' workingdir = path - subprocess.run(c, cwd=workingdir, check=True, stdout=output).stdout + subprocess.run(c, cwd=workingdir, check=True, stdout=output) git_diff(from_output.name, to_output.name)