Skip to content

Commit 426e2af

Browse files
authored
Merge pull request #11077 from hercules-ci/support-hardlinks-in-tarballs
Support hardlinks in tarballs
2 parents 142e566 + 4fd8f19 commit 426e2af

File tree

11 files changed

+280
-8
lines changed

11 files changed

+280
-8
lines changed

src/libfetchers/git-utils.cc

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,10 @@ git_oid hashToOID(const Hash & hash)
115115
return oid;
116116
}
117117

118-
Object lookupObject(git_repository * repo, const git_oid & oid)
118+
Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t type = GIT_OBJECT_ANY)
119119
{
120120
Object obj;
121-
if (git_object_lookup(Setter(obj), repo, &oid, GIT_OBJECT_ANY)) {
121+
if (git_object_lookup(Setter(obj), repo, &oid, type)) {
122122
auto err = git_error_last();
123123
throw Error("getting Git object '%s': %s", oid, err->message);
124124
}
@@ -909,6 +909,61 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
909909
addToTree(*pathComponents.rbegin(), oid, GIT_FILEMODE_LINK);
910910
}
911911

912+
void createHardlink(const CanonPath & path, const CanonPath & target) override
913+
{
914+
std::vector<std::string> pathComponents;
915+
for (auto & c : path)
916+
pathComponents.emplace_back(c);
917+
918+
if (!prepareDirs(pathComponents, false)) return;
919+
920+
// We can't just look up the path from the start of the root, since
921+
// some parent directories may not have finished yet, so we compute
922+
// a relative path that helps us find the right git_tree_builder or object.
923+
auto relTarget = CanonPath(path).parent()->makeRelative(target);
924+
925+
auto dir = pendingDirs.rbegin();
926+
927+
// For each ../ component at the start, go up one directory.
928+
// CanonPath::makeRelative() always puts all .. elements at the start,
929+
// so they're all handled by this loop:
930+
std::string_view relTargetLeft(relTarget);
931+
while (hasPrefix(relTargetLeft, "../")) {
932+
if (dir == pendingDirs.rend())
933+
throw Error("invalid hard link target '%s' for path '%s'", target, path);
934+
++dir;
935+
relTargetLeft = relTargetLeft.substr(3);
936+
}
937+
if (dir == pendingDirs.rend())
938+
throw Error("invalid hard link target '%s' for path '%s'", target, path);
939+
940+
// Look up the remainder of the target, starting at the
941+
// top-most `git_treebuilder`.
942+
std::variant<git_treebuilder *, git_oid> curDir{dir->builder.get()};
943+
Object tree; // needed to keep `entry` alive
944+
const git_tree_entry * entry = nullptr;
945+
946+
for (auto & c : CanonPath(relTargetLeft)) {
947+
if (auto builder = std::get_if<git_treebuilder *>(&curDir)) {
948+
assert(*builder);
949+
if (!(entry = git_treebuilder_get(*builder, std::string(c).c_str())))
950+
throw Error("cannot find hard link target '%s' for path '%s'", target, path);
951+
curDir = *git_tree_entry_id(entry);
952+
} else if (auto oid = std::get_if<git_oid>(&curDir)) {
953+
tree = lookupObject(*repo, *oid, GIT_OBJECT_TREE);
954+
if (!(entry = git_tree_entry_byname((const git_tree *) &*tree, std::string(c).c_str())))
955+
throw Error("cannot find hard link target '%s' for path '%s'", target, path);
956+
curDir = *git_tree_entry_id(entry);
957+
}
958+
}
959+
960+
assert(entry);
961+
962+
addToTree(*pathComponents.rbegin(),
963+
*git_tree_entry_id(entry),
964+
git_tree_entry_filemode(entry));
965+
}
966+
912967
Hash sync() override {
913968
updateBuilders({});
914969

src/libfetchers/git-utils.hh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ namespace nix {
77

88
namespace fetchers { struct PublicKey; }
99

10-
struct GitFileSystemObjectSink : FileSystemObjectSink
10+
struct GitFileSystemObjectSink : ExtendedFileSystemObjectSink
1111
{
1212
/**
1313
* Flush builder and return a final Git hash.

src/libutil/fs-sink.hh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,19 @@ struct FileSystemObjectSink
4141
virtual void createSymlink(const CanonPath & path, const std::string & target) = 0;
4242
};
4343

44+
/**
45+
* An extension of `FileSystemObjectSink` that supports file types
46+
* that are not supported by Nix's FSO model.
47+
*/
48+
struct ExtendedFileSystemObjectSink : virtual FileSystemObjectSink
49+
{
50+
/**
51+
* Create a hard link. The target must be the path of a previously
52+
* encountered file relative to the root of the FSO.
53+
*/
54+
virtual void createHardlink(const CanonPath & path, const CanonPath & target) = 0;
55+
};
56+
4457
/**
4558
* Recursively copy file system objects from the source into the sink.
4659
*/

src/libutil/tarfile.cc

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ void unpackTarfile(const Path & tarFile, const Path & destDir)
174174
extract_archive(archive, destDir);
175175
}
176176

177-
time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink)
177+
time_t unpackTarfileToSink(TarArchive & archive, ExtendedFileSystemObjectSink & parseSink)
178178
{
179179
time_t lastModified = 0;
180180

@@ -195,7 +195,12 @@ time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSin
195195

196196
lastModified = std::max(lastModified, archive_entry_mtime(entry));
197197

198-
switch (archive_entry_filetype(entry)) {
198+
if (auto target = archive_entry_hardlink(entry)) {
199+
parseSink.createHardlink(cpath, CanonPath(target));
200+
continue;
201+
}
202+
203+
switch (auto type = archive_entry_filetype(entry)) {
199204

200205
case AE_IFDIR:
201206
parseSink.createDirectory(cpath);
@@ -232,7 +237,7 @@ time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSin
232237
}
233238

234239
default:
235-
throw Error("file '%s' in tarball has unsupported file type", path);
240+
throw Error("file '%s' in tarball has unsupported file type %d", path, type);
236241
}
237242
}
238243

src/libutil/tarfile.hh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,6 @@ void unpackTarfile(Source & source, const Path & destDir);
4141

4242
void unpackTarfile(const Path & tarFile, const Path & destDir);
4343

44-
time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink);
44+
time_t unpackTarfileToSink(TarArchive & archive, ExtendedFileSystemObjectSink & parseSink);
4545

4646
}

tests/functional/tarball.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,15 @@ test_tarball() {
7171
test_tarball '' cat
7272
test_tarball .xz xz
7373
test_tarball .gz gzip
74+
75+
# Test hard links.
76+
# All entries in tree.tar.gz refer to the same file, and all have the same inode when unpacked by GNU tar.
77+
# We don't preserve the hard links, because that's an optimization we think is not worth the complexity,
78+
# so we only make sure that the contents are copied correctly.
79+
path="$(nix flake prefetch --json "tarball+file://$(pwd)/tree.tar.gz" | jq -r .storePath)"
80+
[[ $(cat "$path/a/b/foo") = bar ]]
81+
[[ $(cat "$path/a/b/xyzzy") = bar ]]
82+
[[ $(cat "$path/a/yyy") = bar ]]
83+
[[ $(cat "$path/a/zzz") = bar ]]
84+
[[ $(cat "$path/c/aap") = bar ]]
85+
[[ $(cat "$path/fnord") = bar ]]

tests/functional/tree.tar.gz

298 Bytes
Binary file not shown.
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#include "git-utils.hh"
2+
#include "file-system.hh"
3+
#include "gmock/gmock.h"
4+
#include <git2/global.h>
5+
#include <git2/repository.h>
6+
#include <git2/types.h>
7+
#include <gtest/gtest.h>
8+
#include "fs-sink.hh"
9+
#include "serialise.hh"
10+
11+
namespace nix {
12+
13+
class GitUtilsTest : public ::testing::Test
14+
{
15+
// We use a single repository for all tests.
16+
Path tmpDir;
17+
std::unique_ptr<AutoDelete> delTmpDir;
18+
19+
public:
20+
void SetUp() override
21+
{
22+
tmpDir = createTempDir();
23+
delTmpDir = std::make_unique<AutoDelete>(tmpDir, true);
24+
25+
// Create the repo with libgit2
26+
git_libgit2_init();
27+
git_repository * repo = nullptr;
28+
auto r = git_repository_init(&repo, tmpDir.c_str(), 0);
29+
ASSERT_EQ(r, 0);
30+
git_repository_free(repo);
31+
}
32+
33+
void TearDown() override
34+
{
35+
// Destroy the AutoDelete, triggering removal
36+
// not AutoDelete::reset(), which would cancel the deletion.
37+
delTmpDir.reset();
38+
}
39+
40+
ref<GitRepo> openRepo()
41+
{
42+
return GitRepo::openRepo(tmpDir, true, false);
43+
}
44+
};
45+
46+
void writeString(CreateRegularFileSink & fileSink, std::string contents, bool executable)
47+
{
48+
if (executable)
49+
fileSink.isExecutable();
50+
fileSink.preallocateContents(contents.size());
51+
fileSink(contents);
52+
}
53+
54+
TEST_F(GitUtilsTest, sink_basic)
55+
{
56+
auto repo = openRepo();
57+
auto sink = repo->getFileSystemObjectSink();
58+
59+
// TODO/Question: It seems a little odd that we use the tarball-like convention of requiring a top-level directory
60+
// here
61+
// The sync method does not document this behavior, should probably renamed because it's not very
62+
// general, and I can't imagine that "non-conventional" archives or any other source to be handled by
63+
// this sink.
64+
65+
sink->createDirectory(CanonPath("foo-1.1"));
66+
67+
sink->createRegularFile(CanonPath("foo-1.1/hello"), [](CreateRegularFileSink & fileSink) {
68+
writeString(fileSink, "hello world", false);
69+
});
70+
sink->createRegularFile(CanonPath("foo-1.1/bye"), [](CreateRegularFileSink & fileSink) {
71+
writeString(fileSink, "thanks for all the fish", false);
72+
});
73+
sink->createSymlink(CanonPath("foo-1.1/bye-link"), "bye");
74+
sink->createDirectory(CanonPath("foo-1.1/empty"));
75+
sink->createDirectory(CanonPath("foo-1.1/links"));
76+
sink->createHardlink(CanonPath("foo-1.1/links/foo"), CanonPath("foo-1.1/hello"));
77+
78+
// sink->createHardlink("foo-1.1/links/foo-2", CanonPath("foo-1.1/hello"));
79+
80+
auto result = sink->sync();
81+
auto accessor = repo->getAccessor(result, false);
82+
auto entries = accessor->readDirectory(CanonPath::root);
83+
ASSERT_EQ(entries.size(), 5);
84+
ASSERT_EQ(accessor->readFile(CanonPath("hello")), "hello world");
85+
ASSERT_EQ(accessor->readFile(CanonPath("bye")), "thanks for all the fish");
86+
ASSERT_EQ(accessor->readLink(CanonPath("bye-link")), "bye");
87+
ASSERT_EQ(accessor->readDirectory(CanonPath("empty")).size(), 0);
88+
ASSERT_EQ(accessor->readFile(CanonPath("links/foo")), "hello world");
89+
};
90+
91+
TEST_F(GitUtilsTest, sink_hardlink)
92+
{
93+
auto repo = openRepo();
94+
auto sink = repo->getFileSystemObjectSink();
95+
96+
sink->createDirectory(CanonPath("foo-1.1"));
97+
98+
sink->createRegularFile(CanonPath("foo-1.1/hello"), [](CreateRegularFileSink & fileSink) {
99+
writeString(fileSink, "hello world", false);
100+
});
101+
102+
try {
103+
sink->createHardlink(CanonPath("foo-1.1/link"), CanonPath("hello"));
104+
FAIL() << "Expected an exception";
105+
} catch (const nix::Error & e) {
106+
ASSERT_THAT(e.msg(), testing::HasSubstr("invalid hard link target"));
107+
ASSERT_THAT(e.msg(), testing::HasSubstr("/hello"));
108+
ASSERT_THAT(e.msg(), testing::HasSubstr("foo-1.1/link"));
109+
}
110+
};
111+
112+
} // namespace nix

tests/unit/libfetchers/local.mk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ libfetchers-tests_LIBS = \
2929
libstore-test-support libutil-test-support \
3030
libfetchers libstore libutil
3131

32-
libfetchers-tests_LDFLAGS := -lrapidcheck $(GTEST_LIBS)
32+
libfetchers-tests_LDFLAGS := -lrapidcheck $(GTEST_LIBS) $(LIBGIT2_LIBS)
3333

3434
ifdef HOST_WINDOWS
3535
# Increase the default reserved stack size to 65 MB so Nix doesn't run out of space
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#include <iostream>
2+
#include "tracing-file-system-object-sink.hh"
3+
4+
namespace nix::test {
5+
6+
void TracingFileSystemObjectSink::createDirectory(const CanonPath & path)
7+
{
8+
std::cerr << "createDirectory(" << path << ")\n";
9+
sink.createDirectory(path);
10+
}
11+
12+
void TracingFileSystemObjectSink::createRegularFile(
13+
const CanonPath & path, std::function<void(CreateRegularFileSink &)> fn)
14+
{
15+
std::cerr << "createRegularFile(" << path << ")\n";
16+
sink.createRegularFile(path, [&](CreateRegularFileSink & crf) {
17+
// We could wrap this and trace about the chunks of data and such
18+
fn(crf);
19+
});
20+
}
21+
22+
void TracingFileSystemObjectSink::createSymlink(const CanonPath & path, const std::string & target)
23+
{
24+
std::cerr << "createSymlink(" << path << ", target: " << target << ")\n";
25+
sink.createSymlink(path, target);
26+
}
27+
28+
void TracingExtendedFileSystemObjectSink::createHardlink(const CanonPath & path, const CanonPath & target)
29+
{
30+
std::cerr << "createHardlink(" << path << ", target: " << target << ")\n";
31+
sink.createHardlink(path, target);
32+
}
33+
34+
} // namespace nix::test

0 commit comments

Comments
 (0)