From 7add6a3f6020286eb756e1815be22e9bd41e6961 Mon Sep 17 00:00:00 2001 From: Josh Heinrichs Date: Tue, 23 Sep 2025 13:25:59 +0900 Subject: [PATCH] Support blobs and trees in fetchGit This makes it possible to fetch the source for specific folders / files, provided it's supported by the remote and you know the hash of the object. This is especially useful for monorepos if you just want to grab the source for a specific directory. When using this via fetchGit, shallow is required as there's no history to fetch. Since .gitattributes isn't available, submodules and lfs have no effect. When fetching blobs and trees, revCount and lastModified are omitted from the resulting attribute set as those attributes only really make sense for commits (or things that point at commits like annotated tags). --- maintainers/flake-module.nix | 1 + src/libexpr/primops/fetchTree.cc | 8 +-- src/libfetchers/git-utils.cc | 22 +++++-- src/libfetchers/git.cc | 47 +++++++------- .../include/nix/fetchers/git-utils.hh | 4 +- tests/functional/fetchGit.sh | 5 ++ tests/functional/fetchGitObjects.sh | 61 +++++++++++++++++++ tests/functional/meson.build | 1 + 8 files changed, 115 insertions(+), 34 deletions(-) create mode 100755 tests/functional/fetchGitObjects.sh diff --git a/maintainers/flake-module.nix b/maintainers/flake-module.nix index a5360675f02..2e27ef674e4 100644 --- a/maintainers/flake-module.nix +++ b/maintainers/flake-module.nix @@ -128,6 +128,7 @@ ''^tests/functional/extra-sandbox-profile\.sh$'' ''^tests/functional/fetchClosure\.sh$'' ''^tests/functional/fetchGit\.sh$'' + ''^tests/functional/fetchGitObjects\.sh$'' ''^tests/functional/fetchGitRefs\.sh$'' ''^tests/functional/fetchGitSubmodules\.sh$'' ''^tests/functional/fetchGitVerification\.sh$'' diff --git a/src/libexpr/primops/fetchTree.cc b/src/libexpr/primops/fetchTree.cc index e673e55a012..f981460bd98 100644 --- a/src/libexpr/primops/fetchTree.cc +++ b/src/libexpr/primops/fetchTree.cc @@ -44,17 +44,15 @@ void emitTreeAttrs( if (auto rev = input.getRev()) { attrs.alloc("rev").mkString(rev->gitRev()); attrs.alloc("shortRev").mkString(rev->gitShortRev()); + if (auto revCount = input.getRevCount()) + attrs.alloc("revCount").mkInt(*revCount); } else if (emptyRevFallback) { // Backwards compat for `builtins.fetchGit`: dirty repos return an empty sha1 as rev auto emptyHash = Hash(HashAlgorithm::SHA1); attrs.alloc("rev").mkString(emptyHash.gitRev()); attrs.alloc("shortRev").mkString(emptyHash.gitShortRev()); - } - - if (auto revCount = input.getRevCount()) - attrs.alloc("revCount").mkInt(*revCount); - else if (emptyRevFallback) attrs.alloc("revCount").mkInt(0); + } } if (auto dirtyRev = fetchers::maybeGetStrAttr(input.attrs, "dirtyRev")) { diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index a3652e5222e..6feced48d7a 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -314,8 +314,13 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this checkInterrupt(); } - uint64_t getRevCount(const Hash & rev) override + std::optional getRevCount(const Hash & rev) override { + auto obj = lookupObject(*this, hashToOID(rev)); + auto type = git_object_type(obj.get()); + if (type == GIT_OBJECT_BLOB || type == GIT_OBJECT_TREE) + return std::nullopt; + boost::unordered_flat_set> done; std::queue todo; @@ -344,10 +349,15 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this return done.size(); } - uint64_t getLastModified(const Hash & rev) override + std::optional getLastModified(const Hash & rev) override { - auto commit = peelObject(lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT); - + auto obj = lookupObject(*this, hashToOID(rev)); + auto type = git_object_type(obj.get()); + // blobs and trees don't have commit times + if (type == GIT_OBJECT_BLOB || type == GIT_OBJECT_TREE) + return std::nullopt; + // peel annotated tags + Commit commit = peelObject(obj.get(), GIT_OBJECT_COMMIT); return git_commit_time(commit.get()); } @@ -367,10 +377,10 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this Object object; // Using the rev-parse notation which libgit2 supports, make sure we peel - // the ref ultimately down to the underlying commit. + // the ref ultimately down to the underlying object. // This is to handle the case where it may be an annotated tag which itself has // an object_id. - std::string peeledRef = ref + "^{commit}"; + std::string peeledRef = ref + "^{}"; if (git_revparse_single(Setter(object), *this, peeledRef.c_str())) throw Error("resolving Git reference '%s': %s", ref, git_error_last()->message); auto oid = git_object_id(object.get()); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index f750d907d36..87a76f5bbc7 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -506,7 +506,7 @@ struct GitInputScheme : InputScheme return repoInfo; } - uint64_t getLastModified( + std::optional getLastModified( const Settings & settings, const RepoInfo & repoInfo, const std::filesystem::path & repoDir, @@ -517,16 +517,17 @@ struct GitInputScheme : InputScheme auto cache = settings.getCache(); if (auto res = cache->lookup(key)) - return getIntAttr(*res, "lastModified"); + return maybeGetIntAttr(*res, "lastModified"); auto lastModified = GitRepo::openRepo(repoDir)->getLastModified(rev); - cache->upsert(key, {{"lastModified", lastModified}}); + auto attrs = lastModified ? Attrs{{"lastModified", *lastModified}} : Attrs{}; + cache->upsert(key, attrs); return lastModified; } - uint64_t getRevCount( + std::optional getRevCount( const Settings & settings, const RepoInfo & repoInfo, const std::filesystem::path & repoDir, @@ -537,14 +538,15 @@ struct GitInputScheme : InputScheme auto cache = settings.getCache(); if (auto revCountAttrs = cache->lookup(key)) - return getIntAttr(*revCountAttrs, "revCount"); + return maybeGetIntAttr(*revCountAttrs, "revCount"); Activity act( *logger, lvlChatty, actUnknown, fmt("getting Git revision count of '%s'", repoInfo.locationToArg())); auto revCount = GitRepo::openRepo(repoDir)->getRevCount(rev); - cache->upsert(key, Attrs{{"revCount", revCount}}); + auto attrs = revCount ? Attrs{{"revCount", *revCount}} : Attrs{}; + cache->upsert(key, attrs); return revCount; } @@ -705,13 +707,12 @@ struct GitInputScheme : InputScheme auto rev = *input.getRev(); - Attrs infoAttrs({ - {"rev", rev.gitRev()}, - {"lastModified", getLastModified(*input.settings, repoInfo, repoDir, rev)}, - }); - + Attrs infoAttrs{{"rev", rev.gitRev()}}; + if (auto lastModified = getLastModified(*input.settings, repoInfo, repoDir, rev)) + infoAttrs.insert_or_assign("lastModified", *lastModified); if (!getShallowAttr(input)) - infoAttrs.insert_or_assign("revCount", getRevCount(*input.settings, repoInfo, repoDir, rev)); + if (auto revCount = getRevCount(*input.settings, repoInfo, repoDir, rev)) + infoAttrs.insert_or_assign("revCount", *revCount); printTalkative("using revision %s of repo '%s'", rev.gitRev(), repoInfo.locationToArg()); @@ -768,8 +769,10 @@ struct GitInputScheme : InputScheme assert(!origRev || origRev == rev); if (!getShallowAttr(input)) - input.attrs.insert_or_assign("revCount", getIntAttr(infoAttrs, "revCount")); - input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified")); + if (auto revCount = getRevCount(*input.settings, repoInfo, repoDir, rev)) + input.attrs.insert_or_assign("revCount", *revCount); + if (auto lastModified = maybeGetIntAttr(infoAttrs, "lastModified")) + input.attrs.insert_or_assign("lastModified", *lastModified); return {accessor, std::move(input)}; } @@ -834,8 +837,10 @@ struct GitInputScheme : InputScheme input.attrs.insert_or_assign("rev", rev.gitRev()); if (!getShallowAttr(input)) { - input.attrs.insert_or_assign( - "revCount", rev == nullRev ? 0 : getRevCount(*input.settings, repoInfo, repoPath, rev)); + auto revCount = (rev == nullRev) ? std::optional(0) + : getRevCount(*input.settings, repoInfo, repoPath, rev); + if (revCount) + input.attrs.insert_or_assign("revCount", *revCount); } verifyCommit(input, repo); @@ -850,11 +855,11 @@ struct GitInputScheme : InputScheme verifyCommit(input, nullptr); } - input.attrs.insert_or_assign( - "lastModified", - repoInfo.workdirInfo.headRev - ? getLastModified(*input.settings, repoInfo, repoPath, *repoInfo.workdirInfo.headRev) - : 0); + auto lastModified = repoInfo.workdirInfo.headRev + ? getLastModified(*input.settings, repoInfo, repoPath, *repoInfo.workdirInfo.headRev) + : std::optional{0}; + if (lastModified) + input.attrs.insert_or_assign("lastModified", *lastModified); return {accessor, std::move(input)}; } diff --git a/src/libfetchers/include/nix/fetchers/git-utils.hh b/src/libfetchers/include/nix/fetchers/git-utils.hh index 07b9855417f..a10650591de 100644 --- a/src/libfetchers/include/nix/fetchers/git-utils.hh +++ b/src/libfetchers/include/nix/fetchers/git-utils.hh @@ -28,9 +28,9 @@ struct GitRepo static ref openRepo(const std::filesystem::path & path, bool create = false, bool bare = false); - virtual uint64_t getRevCount(const Hash & rev) = 0; + virtual std::optional getRevCount(const Hash & rev) = 0; - virtual uint64_t getLastModified(const Hash & rev) = 0; + virtual std::optional getLastModified(const Hash & rev) = 0; virtual bool isShallow() = 0; diff --git a/tests/functional/fetchGit.sh b/tests/functional/fetchGit.sh index e7c9c77a5a1..761824200a7 100755 --- a/tests/functional/fetchGit.sh +++ b/tests/functional/fetchGit.sh @@ -298,6 +298,11 @@ expected_attrs="{ lastModified = 0; lastModifiedDate = \"19700101000000\"; narHa result=$(nix eval --impure --expr "builtins.removeAttrs (builtins.fetchGit $empty) [\"outPath\"]") [[ "$result" = "$expected_attrs" ]] +# fetchTree shouldn't have rev, revCount or shortRev +expected_attrs="{ lastModified = 0; lastModifiedDate = \"19700101000000\"; narHash = \"sha256-wzlAGjxKxpaWdqVhlq55q5Gxo4Bf860+kLeEa/v02As=\"; submodules = false; }" +result=$(nix eval --impure --expr "builtins.removeAttrs (builtins.fetchTree { type = \"git\"; url = \"file://$empty\"; }) [\"outPath\"]") +[[ "$result" = "$expected_attrs" ]] + # Test a repo with an empty commit. git -C "$empty" rm -f x diff --git a/tests/functional/fetchGitObjects.sh b/tests/functional/fetchGitObjects.sh new file mode 100755 index 00000000000..f61e266f275 --- /dev/null +++ b/tests/functional/fetchGitObjects.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +source common.sh + +requireGit + +clearStoreIfPossible + +repo="$TEST_ROOT/git" + +rm -rf "$repo" "${repo}-tmp" "$TEST_HOME/.cache/nix" + +git init "$repo" +git -C "$repo" config user.email "foobar@example.com" +git -C "$repo" config user.name "Foobar" + +echo foo > "$repo/blob" +mkdir "$repo/tree" +echo bar > "$repo/tree/blob" +git -C "$repo" add blob tree +git -C "$repo" commit -m 'Bla1' + +rev=$(git -C $repo rev-parse HEAD) +blobrev=$(git -C $repo rev-parse HEAD:blob) +treerev=$(git -C $repo rev-parse HEAD:tree) + +git -C $repo update-ref refs/blob $blobrev +git -C $repo update-ref refs/tree $treerev + +git -C $repo tag -a blobtag -m "annotated tag" $blobrev +git -C $repo tag -a treetag -m "annotated tag" $treerev + +# Fetch by hash +nix-instantiate --eval -E "builtins.readFile (builtins.fetchGit { url = file://$repo; rev = \"$blobrev\"; shallow = true; }) == \"foo\n\"" +nix-instantiate --eval -E "builtins.readFile (builtins.fetchGit { url = file://$repo; rev = \"$treerev\"; shallow = true; } + \"/blob\") == \"bar\n\"" + +# Fetch by ref +nix-instantiate --eval -E "builtins.readFile (builtins.fetchGit { url = file://$repo; ref = \"refs/blob\"; shallow = true; }) == \"foo\n\"" +nix-instantiate --eval -E "builtins.readFile (builtins.fetchGit { url = file://$repo; ref = \"refs/tree\"; shallow = true; } + \"/blob\") == \"bar\n\"" + +# Fetch by annotated tag +nix-instantiate --eval -E "builtins.readFile (builtins.fetchGit { url = file://$repo; ref = \"refs/tags/blobtag\"; shallow = true; }) == \"foo\n\"" +nix-instantiate --eval -E "builtins.readFile (builtins.fetchGit { url = file://$repo; ref = \"refs/tags/treetag\"; shallow = true; } + \"/blob\") == \"bar\n\"" + +# fetchGit attributes +expectedAttrs="{ narHash = \"sha256-QvtAMbUl/uvi+LCObmqOhvNOapHdA2raiI4xG5zI5pA=\"; rev = \"$blobrev\"; shortRev = \"${blobrev:0:7}\"; submodules = false; }" +result=$(nix eval --impure --expr "builtins.removeAttrs (builtins.fetchGit { url = file://$repo; rev = \"$blobrev\"; shallow = true; }) [\"outPath\"]") +[[ "$result" = "$expectedAttrs" ]] + +expectedAttrs="{ narHash = \"sha256-R/LfkvSLnUzdPeKhbQ6lGFpSfLdKvDw3LLicN46rUR4=\"; rev = \"$treerev\"; shortRev = \"${treerev:0:7}\"; submodules = false; }" +result=$(nix eval --impure --expr "builtins.removeAttrs (builtins.fetchGit { url = file://$repo; rev = \"$treerev\"; shallow = true; }) [\"outPath\"]") +[[ "$result" = "$expectedAttrs" ]] + +# fetchTree attributes +expectedAttrs="{ narHash = \"sha256-QvtAMbUl/uvi+LCObmqOhvNOapHdA2raiI4xG5zI5pA=\"; rev = \"$blobrev\"; shortRev = \"${blobrev:0:7}\"; submodules = false; }" +result=$(nix eval --impure --expr "builtins.removeAttrs (builtins.fetchTree { type = \"git\"; url = file://$repo; rev = \"$blobrev\"; shallow = true; }) [\"outPath\"]") +[[ "$result" = "$expectedAttrs" ]] + +expectedAttrs="{ narHash = \"sha256-R/LfkvSLnUzdPeKhbQ6lGFpSfLdKvDw3LLicN46rUR4=\"; rev = \"$treerev\"; shortRev = \"${treerev:0:7}\"; submodules = false; }" +result=$(nix eval --impure --expr "builtins.removeAttrs (builtins.fetchTree { type = \"git\"; url = file://$repo; rev = \"$treerev\"; shallow = true; }) [\"outPath\"]") +[[ "$result" = "$expectedAttrs" ]] diff --git a/tests/functional/meson.build b/tests/functional/meson.build index 368f60452d7..a1aa95d98a5 100644 --- a/tests/functional/meson.build +++ b/tests/functional/meson.build @@ -76,6 +76,7 @@ suites = [ 'gc-runtime.sh', 'tarball.sh', 'fetchGit.sh', + 'fetchGitObjects.sh', 'fetchGitShallow.sh', 'fetchurl.sh', 'fetchPath.sh',