Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Print manifest comment in single-file packages #36

Merged
merged 7 commits into from
Dec 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Use platforms for compiling C/C++ code.
build --incompatible_enable_cc_toolchain_resolution

# Make git version information available to bazel rules.
build --workspace_status_command=./tools/bin/workspace_status.sh

# Allow the user to switch to various Clang version for compiling everything.
build:clang11 --platform_suffix=clang11
build:clang11 --//build:requested_compiler_flag=clang11
Expand Down
8 changes: 7 additions & 1 deletion BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ BASE_UNITS = [

BASE_UNIT_STRING = " ".join(BASE_UNITS)

CMD_ROOT = "$(location tools/bin/make-single-file) {extra_opts} --units {units} > $(OUTS)"
GIT_ID_CMD = "cat bazel-out/stable-status.txt | grep STABLE_GIT_ID | sed 's/STABLE_GIT_ID \\(.*\\)/\\1/' | tr -d '\\n'"

CMD_ROOT = "$(location tools/bin/make-single-file) {extra_opts} --units {units} --version-id $$({id_cmd}) > $(OUTS)"

################################################################################
# Release single-file package `au.hh`
Expand All @@ -49,7 +51,9 @@ genrule(
cmd = CMD_ROOT.format(
extra_opts = "",
units = BASE_UNIT_STRING,
id_cmd = GIT_ID_CMD,
),
stamp = True,
tools = ["tools/bin/make-single-file"],
)

Expand All @@ -69,7 +73,9 @@ genrule(
cmd = CMD_ROOT.format(
extra_opts = "--noio",
units = BASE_UNIT_STRING,
id_cmd = GIT_ID_CMD,
),
stamp = True,
tools = ["tools/bin/make-single-file"],
visibility = ["//release:__pkg__"],
)
Expand Down
92 changes: 83 additions & 9 deletions tools/bin/make-single-file
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import argparse
import datetime
import re
import subprocess
import sys


Expand All @@ -20,25 +21,26 @@ def main(argv=None):
`#include` directives (such as standard library headers) untouched.
"""
args = parse_command_line_args(argv)
files = parse_files(filenames=filenames(args=args))
print_unified_file(files)
files = parse_files(
filenames=filenames(
main_files=args.main_files, units=args.units, include_io=args.include_io
)
)
print_unified_file(files, args=args)

return 0


def filenames(args):
def filenames(main_files, units, include_io):
"""Construct the list of project filenames to include.

The script will be sure to include all of these, and will also include any
transitive dependencies from within the project.

:param args: A Namespace object whose fields include the lists `main_files`
and `units`, and the boolean `include_io`.
"""
names = (
["au/au.hh"] + [f"au/units/{unit}.hh" for unit in args.units] + args.main_files
["au/au.hh"] + [f"au/units/{unit}.hh" for unit in units] + main_files
)
if args.include_io:
if include_io:
names.append("au/io.hh")
return names

Expand All @@ -51,6 +53,12 @@ def parse_command_line_args(argv):

parser.add_argument("--units", nargs="*", default=[], help="The units to include")

parser.add_argument(
"--version-id",
default=git_id_description(),
help="An identifier for the version of code used to build this file",
)

parser.add_argument(
"--noio",
action="store_false",
Expand Down Expand Up @@ -161,7 +169,7 @@ def include_lines(files):
return set(line for f in files for line in files[f].global_includes)


def print_unified_file(files):
def print_unified_file(files, args):
"""Print the single-file output to stdout."""
print(f"// {AURORA_COPYRIGHT.format(year=datetime.datetime.now().year)}")
print()
Expand All @@ -171,10 +179,76 @@ def print_unified_file(files):
for i in sorted(include_lines(files)):
print(i)

print()
for line in manifest(args=args):
print(f"// {line}")

for f in sort_topologically(files):
for line in files[f].lines:
print(line)


def manifest(args):
"""A sequence of lines describing the options that generated this file."""
args = CheckArgs(args)

lines = [
f"Version identifier: {args.version_id}",
f'<iostream> support: {"INCLUDED" if args.include_io else "EXCLUDED"}',
"List of included units:",
] + [f" {u}" for u in sorted(args.units)]

if args.main_files:
lines.append("Extra files included:")
lines.extend(f" {f}" for f in sorted(args.main_files))

assert args.are_all_used()
return lines


class CheckArgs(dict):
def __init__(self, args):
self.vars = vars(args)
self.used = set()

def are_all_used(self):
return not set(self.vars.keys()).difference(self.used)

def __getattr__(self, k):
self.used.add(k)
return self.vars[k]
Comment on lines +209 to +219
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we deal with #21, this would be a great candidate for a unit test. In fact, I used one to write it:

class TestUseAllArgs(unittest.TestCase):
    def parse_args_a_b_c(self, argv):
        parser = argparse.ArgumentParser()
        parser.add_argument("--a", type=int)
        parser.add_argument("--b", type=int)
        parser.add_argument("--c", type=int)
        return parser.parse_args(argv)

    def test_detects_unused_arg(self):
        args = CheckArgs(self.parse_args_a_b_c(argv=["--a", "1", "--c", "2"]))

        self.assertFalse(args.are_all_used())

        self.assertEqual(args.a, 1)
        self.assertEqual(args.c, 2)
        self.assertFalse(args.are_all_used())

        self.assertIsNone(args.b)
        self.assertTrue(args.are_all_used())

The reason I didn't land it was because I hadn't yet broken this out into (tested) python libraries, a python binary, and a thin wrapper. But I hope the existence of this test, even if only locally, adds some confidence that this thing does what it's supposed to. (I really do need to go ahead and just do #21...)



def git_id_description():
"""A description of the ID (commit or tag) in git, and whether it's clean."""
# TODO(#21): The logic for the identifier currently lives in two places.
# After `make-single-file` becomes a thin wrapper on the underlying logic in
# python, we can fix this by making the `--version-id` argument _required_,
# and deleting this function. (We can have the underlying python rule
# depend on the existence of the stable version stamp, and simply use that
# always.)
try:
return (
subprocess.check_output(
["git", "describe", "--always", "--dirty"], stderr=subprocess.DEVNULL
)
.decode("ascii")
.strip()
)
except subprocess.CalledProcessError:
# We do not ever expect this line to affect the generated file in normal
# operations. If users are running the script manually, then the above
# command will succeed, because currently the command is not sandboxed.
# And if users are running a genrule for a pre-built single-file
# package, then that command is constructed to supply the version
# information, which means that the below value will be _returned_ but
# not _used_.
#
# Really, the point of the following line is to generate a valid default
# value (so that the script does not crash) in the cases where we know
# the default value won't be used.
return "(Unknown version)"


if __name__ == "__main__":
sys.exit(main())
3 changes: 3 additions & 0 deletions tools/bin/workspace_status.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

echo STABLE_GIT_ID $(git describe --always --dirty)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not obvious to me that making this information as "stable" is beneficial. Perhaps in a small repo like this it doesn't matter. But when you get a larger repo, it's generally less useful to have this be stable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My goal in doing this was to force the stamped targets (i.e., //:au_hh and //:au_noio_hh) to rebuild every time the git description changes. I had thought that if I let the git description be "volatile", then these would not get rebuilt. Am I understanding correctly?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If your goal is exactly "rebuild any time anything at all changes", then what you have does work. It is, however, not super useful. E.g. every time you create a commit, everything gets "relinked/restamped". In a repo of non-trivial size, that time spent restamping/relinking adds up pretty quickly. In my experience, the more useful notion is "any time this target needs to get rebuilt, grab the latest git information". That means targets only get rebuilt when they actually need to be. You can then still manually force everything to get restamped with --embed_label.

Even if you make this volatile, the information will never be wrong, but it might not mean exactly what you want it to. Basically, if you make it volatile, then the stamped git hash means "if you check out this hash, then you will be able to reproduce the corresponding artifact exactly." It does not mean "this artifact was built at exactly this hash".

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either way, not important for a project of this size.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I would like to fix this up to get it right, but it's not a top priority since this doesn't block release, and we're very far from having a repo big enough to notice this (if we even ever do).

At lower priority, therefore, I had a clarification question. When you say "rebuild any time anything at all changes", were you referring to stamped targets (and their reverse deps), unstamped targets, or both?

My expectation/mental model was that the stamped targets (and anything that depends on them) would get rebuilt when the stable version changes, but anything else would not. In other words, I expected only the two stamp=True targets to be directly affected.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When you say "rebuild any time anything at all changes", were you referring to stamped targets (and their reverse deps), unstamped targets, or both?

stamped targets (and their reverse deps)

My expectation/mental model was that the stamped targets (and anything that depends on them) would get rebuilt when the stable version changes, but anything else would not. In other words, I expected only the two stamp=True targets to be directly affected.

That is correct.