diff --git a/src/main/java/org/apache/commons/codec/digest/DigestUtils.java b/src/main/java/org/apache/commons/codec/digest/DigestUtils.java
index 7c84b0b021..8970a03dbc 100644
--- a/src/main/java/org/apache/commons/codec/digest/DigestUtils.java
+++ b/src/main/java/org/apache/commons/codec/digest/DigestUtils.java
@@ -18,24 +18,17 @@
package org.apache.commons.codec.digest;
import java.io.BufferedInputStream;
-import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.TreeSet;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.binary.StringUtils;
@@ -191,26 +184,6 @@ public static MessageDigest getDigest(final String algorithm, final MessageDiges
}
}
- /**
- * Returns the {@link GitDirectoryEntry.Type} of a file.
- *
- * @param path The file to check.
- * @return A {@link GitDirectoryEntry.Type}
- */
- private static GitDirectoryEntry.Type getGitDirectoryEntryType(final Path path) {
- // Symbolic links first
- if (Files.isSymbolicLink(path)) {
- return GitDirectoryEntry.Type.SYMBOLIC_LINK;
- }
- if (Files.isDirectory(path)) {
- return GitDirectoryEntry.Type.DIRECTORY;
- }
- if (Files.isExecutable(path)) {
- return GitDirectoryEntry.Type.EXECUTABLE;
- }
- return GitDirectoryEntry.Type.REGULAR;
- }
-
/**
* Gets an MD2 MessageDigest.
*
@@ -407,123 +380,6 @@ public static MessageDigest getShake256_512Digest() {
return getDigest(MessageDigestAlgorithms.SHAKE256_512);
}
- /**
- * Reads through a byte array and return a generalized Git blob identifier.
- *
- *
The identifier is computed in the way described by the
- * SWHID contents identifier, but it can use any hash
- * algorithm.
- *
- * When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.
- *
- * @param messageDigest The MessageDigest to use (for example SHA-1).
- * @param data Data to digest.
- * @return A generalized Git blob identifier.
- * @since 1.22.0
- */
- public static byte[] gitBlob(final MessageDigest messageDigest, final byte[] data) {
- messageDigest.reset();
- updateDigest(messageDigest, gitBlobPrefix(data.length));
- return digest(messageDigest, data);
- }
-
- /**
- * Reads through a byte array and return a generalized Git blob identifier.
- *
- * The identifier is computed in the way described by the
- * SWHID contents identifier, but it can use any hash
- * algorithm.
- *
- * When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.
- *
- * @param messageDigest The MessageDigest to use (for example SHA-1).
- * @param data Data to digest.
- * @param options Options how to open the file.
- * @return A generalized Git blob identifier.
- * @throws IOException On error accessing the file.
- * @since 1.22.0
- */
- public static byte[] gitBlob(final MessageDigest messageDigest, final Path data, final OpenOption... options) throws IOException {
- messageDigest.reset();
- if (Files.isSymbolicLink(data)) {
- final byte[] linkTarget = Files.readSymbolicLink(data).toString().getBytes(StandardCharsets.UTF_8);
- updateDigest(messageDigest, gitBlobPrefix(linkTarget.length));
- return digest(messageDigest, linkTarget);
- }
- updateDigest(messageDigest, gitBlobPrefix(Files.size(data)));
- return updateDigest(messageDigest, data, options).digest();
- }
-
- private static byte[] gitBlobPrefix(final long dataSize) {
- return gitPrefix("blob ", dataSize);
- }
-
- private static byte[] gitPrefix(final String prefix, final long dataSize) {
- return (prefix + dataSize + "\0").getBytes(StandardCharsets.UTF_8);
- }
-
- /**
- * Returns a generalized Git tree identifier.
- *
- * The identifier is computed in the way described by the
- * SWHID directory identifier, but it can use any hash
- * algorithm.
- *
- * When the hash algorithm is SHA-1, the identifier is identical to Git tree identifier and SWHID directory identifier.
- *
- * @param messageDigest The MessageDigest to use (for example SHA-1).
- * @param entries The directory entries.
- * @return A generalized Git tree identifier.
- */
- static byte[] gitTree(final MessageDigest messageDigest, final Collection entries) {
- final TreeSet treeSet = new TreeSet<>(entries);
- final ByteArrayOutputStream baos = new ByteArrayOutputStream();
- for (final GitDirectoryEntry entry : treeSet) {
- final byte[] treeEntryBytes = entry.toTreeEntryBytes();
- baos.write(treeEntryBytes, 0, treeEntryBytes.length);
- }
- messageDigest.reset();
- updateDigest(messageDigest, gitTreePrefix(baos.size()));
- return updateDigest(messageDigest, baos.toByteArray()).digest();
- }
-
- /**
- * Reads through a byte array and return a generalized Git tree identifier.
- *
- * The identifier is computed in the way described by the
- * SWHID directory identifier, but it can use any hash
- * algorithm.
- *
- * When the hash algorithm is SHA-1, the identifier is identical to Git tree identifier and SWHID directory identifier.
- *
- * @param messageDigest The MessageDigest to use (for example SHA-1).
- * @param data Data to digest.
- * @param options Options how to open the file.
- * @return A generalized Git tree identifier.
- * @throws IOException On error accessing the file.
- * @since 1.22.0
- */
- public static byte[] gitTree(final MessageDigest messageDigest, final Path data, final OpenOption... options) throws IOException {
- final List entries = new ArrayList<>();
- try (DirectoryStream files = Files.newDirectoryStream(data)) {
- for (final Path path : files) {
- final GitDirectoryEntry.Type type = getGitDirectoryEntryType(path);
- final byte[] rawObjectId;
- if (type == GitDirectoryEntry.Type.DIRECTORY) {
- rawObjectId = gitTree(messageDigest, path, options);
- } else {
- rawObjectId = gitBlob(messageDigest, path, options);
- }
- entries.add(new GitDirectoryEntry(path, type, rawObjectId));
- }
- }
- return gitTree(messageDigest, entries);
- }
-
- private static byte[] gitTreePrefix(final long dataSize) {
- return gitPrefix("tree ", dataSize);
- }
-
/**
* Test whether the algorithm is supported.
*
diff --git a/src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java b/src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java
deleted file mode 100644
index e41400e4b7..0000000000
--- a/src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.commons.codec.digest;
-
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Path;
-import java.util.Objects;
-
-/**
- * Represents a single entry in a Git tree object.
- *
- * A Git tree object encodes a directory snapshot. Each entry holds:
- *
- * - a {@link Type} that determines the Unix file mode (e.g. {@code 100644} for a regular file),
- * - the entry name (file or directory name, without a path separator),
- * - the raw object id of the referenced blob or sub-tree.
- *
- *
- * Entries are ordered by {@link #compareTo} using Git's tree-sort rule: directory names are compared as if they ended with {@code '/'}, so that {@code foo/}
- * sorts after {@code foobar}.
- *
- * Call {@link #toTreeEntryBytes()} to obtain the binary encoding that Git feeds to its hash function when computing the tree object identifier.
- *
- * @see Git Internals – Git Objects
- * @see SWHID Directory Identifier
- */
-class GitDirectoryEntry implements Comparable {
-
- /**
- * The type of a Git tree entry, which maps to a Unix file-mode string.
- *
- * Git encodes the file type and permission bits as an ASCII octal string that precedes the entry name in the binary tree format. The values defined here
- * cover the four entry types that Git itself produces.
- *
- * This enum is package-private. If it were made public, {@link #mode} would need to be wrapped in an immutable copy to prevent external mutation.
- */
- enum Type {
-
- /**
- * A sub-directory (Git sub-tree).
- */
- DIRECTORY("40000"),
-
- /**
- * An executable file.
- */
- EXECUTABLE("100755"),
-
- /**
- * A regular (non-executable) file.
- */
- REGULAR("100644"),
-
- /**
- * A symbolic link.
- */
- SYMBOLIC_LINK("120000");
-
- /**
- * The ASCII-encoded octal mode string as it appears in the binary tree entry.
- */
- private final byte[] mode;
-
- Type(final String mode) {
- this.mode = mode.getBytes(StandardCharsets.US_ASCII);
- }
- }
-
- private static String getFileName(final Path path) {
- final Path fileName = path.getFileName();
- if (fileName == null) {
- throw new IllegalArgumentException(path.toString());
- }
- return fileName.toString();
- }
-
- /**
- * The entry name (file or directory name, no path separator).
- */
- private final String name;
-
- /**
- * The key used for ordering entries within a tree object.
- *
- * >Git appends {@code '/'} to directory names before comparing.
- */
- private final String sortKey;
-
- /**
- * The Git object type, which determines the Unix file-mode prefix.
- */
- private final Type type;
-
- /**
- * The raw object id of the referenced blob or sub-tree.
- */
- private final byte[] rawObjectId;
-
- /**
- * Creates an entry.
- *
- * @param path The path of the entry; must not be an empty path.
- * @param type The type of the entry.
- * @param rawObjectId The id of the entry.
- * @throws IllegalArgumentException If the path is empty.
- * @throws NullPointerException If any argument is {@code null}.
- */
- GitDirectoryEntry(final Path path, final Type type, final byte[] rawObjectId) {
- this(getFileName(path), type, rawObjectId);
- }
-
- /**
- * Creates an entry.
- *
- * @param name The name of the entry
- * @param type The type of the entry
- * @param rawObjectId The id of the entry
- */
- private GitDirectoryEntry(final String name, final Type type, final byte[] rawObjectId) {
- this.name = name;
- this.type = Objects.requireNonNull(type);
- this.sortKey = type == Type.DIRECTORY ? name + "/" : name;
- this.rawObjectId = Objects.requireNonNull(rawObjectId);
- }
-
- @Override
- public int compareTo(final GitDirectoryEntry o) {
- return sortKey.compareTo(o.sortKey);
- }
-
- @Override
- public boolean equals(final Object obj) {
- if (obj == this) {
- return true;
- }
- if (!(obj instanceof GitDirectoryEntry)) {
- return false;
- }
- final GitDirectoryEntry other = (GitDirectoryEntry) obj;
- return name.equals(other.name);
- }
-
- @Override
- public int hashCode() {
- return name.hashCode();
- }
-
- /**
- * Returns the binary encoding of this entry as it appears inside a Git tree object.
- *
- * The format follows the Git tree entry layout:
- *
- * <mode> SP <name> NUL <20-byte-object-id>
- *
- *
- * @return the binary tree-entry encoding; never {@code null}.
- */
- byte[] toTreeEntryBytes() {
- final byte[] nameBytes = name.getBytes(StandardCharsets.UTF_8);
- final byte[] result = new byte[type.mode.length + nameBytes.length + rawObjectId.length + 2];
- System.arraycopy(type.mode, 0, result, 0, type.mode.length);
- result[type.mode.length] = ' ';
- System.arraycopy(nameBytes, 0, result, type.mode.length + 1, nameBytes.length);
- result[type.mode.length + nameBytes.length + 1] = '\0';
- System.arraycopy(rawObjectId, 0, result, type.mode.length + nameBytes.length + 2, rawObjectId.length);
- return result;
- }
-}
diff --git a/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java b/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java
new file mode 100644
index 0000000000..6b4c0ccf2f
--- /dev/null
+++ b/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java
@@ -0,0 +1,452 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.codec.digest;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.security.MessageDigest;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.TreeSet;
+
+/**
+ * Operations for computing Git object identifiers and their generalizations described by the
+ * SWHID specification.
+ *
+ * When the hash algorithm is SHA-1, the identifiers produced by this class are identical to those used by Git.
+ * Other hash algorithms produce generalized identifiers as described by the SWHID specification.
+ *
+ * This class is immutable and thread-safe. However, the {@link MessageDigest} instances passed to it generally won't be.
+ *
+ * @see Git Internals – Git Objects
+ * @see SWHID Specification
+ * @since 1.22.0
+ */
+public class GitIdentifiers {
+
+ /**
+ * The type of a Git tree entry, which maps to a Unix file-mode string.
+ *
+ * Git encodes the file type and permission bits as an ASCII octal string that precedes the entry name in the binary tree format. The values defined here
+ * cover the four entry types that Git itself produces.
+ */
+ public enum FileMode {
+
+ /**
+ * A sub-directory (Git sub-tree).
+ */
+ DIRECTORY("40000"),
+
+ /**
+ * An executable file.
+ */
+ EXECUTABLE("100755"),
+
+ /**
+ * A regular (non-executable) file.
+ */
+ REGULAR("100644"),
+
+ /**
+ * A symbolic link.
+ */
+ SYMBOLIC_LINK("120000");
+
+ /**
+ * The octal mode as used by Git.
+ */
+ private final String mode;
+
+ /**
+ * Serialized {@code mode}: since this is mutable, it must remain private.
+ */
+ private final byte[] modeBytes;
+
+ FileMode(final String mode) {
+ this.mode = mode;
+ this.modeBytes = mode.getBytes(StandardCharsets.US_ASCII);
+ }
+
+ /**
+ * Gets the octal mode as used by Git.
+ *
+ * @return The octal mode
+ */
+ public String getMode() {
+ return mode;
+ }
+ }
+
+ /**
+ * Represents a single entry in a Git tree object.
+ *
+ * A Git tree object encodes a directory snapshot. Each entry holds:
+ *
+ * - a {@link FileMode} that determines the Unix file mode (e.g. {@code 100644} for a regular file),
+ * - the entry name (file or directory name, without a path separator),
+ * - the raw object id of the referenced blob or sub-tree.
+ *
+ *
+ * Entries are ordered by {@link #compareTo} using Git's tree-sort rule: directory names are compared as if they ended with {@code '/'}, so that {@code foo/}
+ * sorts after {@code foobar}.
+ *
+ * @see Git Internals – Git Objects
+ * @see SWHID Directory Identifier
+ */
+ static class DirectoryEntry implements Comparable {
+
+ /**
+ * The entry name (file or directory name, no path separator).
+ */
+ private final String name;
+ /**
+ * The raw object id of the referenced blob or sub-tree.
+ */
+ private final byte[] rawObjectId;
+ /**
+ * The key used for ordering entries within a tree object.
+ *
+ * >Git appends {@code '/'} to directory names before comparing.
+ */
+ private final String sortKey;
+ /**
+ * The Git object type, which determines the Unix file-mode prefix.
+ */
+ private final FileMode type;
+
+ /**
+ * Creates an entry.
+ *
+ * @param name The name of the entry
+ * @param type The type of the entry
+ * @param rawObjectId The id of the entry
+ */
+ DirectoryEntry(final String name, final FileMode type, final byte[] rawObjectId) {
+ if (Objects.requireNonNull(name).indexOf('/') >= 0) {
+ throw new IllegalArgumentException("Entry name must not contain '/': " + name);
+ }
+ this.name = name;
+ this.type = Objects.requireNonNull(type);
+ this.sortKey = type == FileMode.DIRECTORY ? name + "/" : name;
+ this.rawObjectId = Objects.requireNonNull(rawObjectId);
+ }
+
+ @Override
+ public int compareTo(final DirectoryEntry o) {
+ return sortKey.compareTo(o.sortKey);
+ }
+
+ @Override
+ public boolean equals(final Object obj) {
+ if (obj == this) {
+ return true;
+ }
+ if (!(obj instanceof DirectoryEntry)) {
+ return false;
+ }
+ final DirectoryEntry other = (DirectoryEntry) obj;
+ return name.equals(other.name);
+ }
+
+ @Override
+ public int hashCode() {
+ return name.hashCode();
+ }
+
+ }
+
+ /**
+ * Builds a Git tree identifier for a virtual directory structure, such as the contents of
+ * an archive.
+ */
+ public static final class TreeIdBuilder {
+
+ /**
+ * A supplier of a blob identifier that may throw {@link IOException}.
+ */
+ @FunctionalInterface
+ private interface BlobIdSupplier {
+ byte[] get() throws IOException;
+ }
+
+ private static String requireNoParentTraversal(String name) {
+ if ("..".equals(name)) {
+ throw new IllegalArgumentException("Path component not allowed: " + name);
+ }
+ return name;
+ }
+
+ private final Map dirEntries = new HashMap<>();
+ private final Map fileEntries = new HashMap<>();
+ private final MessageDigest messageDigest;
+
+ private TreeIdBuilder(final MessageDigest messageDigest) {
+ this.messageDigest = Objects.requireNonNull(messageDigest);
+ }
+
+ /**
+ * Returns the {@link TreeIdBuilder} for the named subdirectory, creating it if absent.
+ *
+ * @param name The relative path of the subdirectory in normalized form (may contain {@code '/'}).
+ * @return The {@link TreeIdBuilder} for the subdirectory.
+ * @throws IllegalArgumentException If any path component is {@code ".."}.
+ */
+ public TreeIdBuilder addDirectory(final String name) {
+ TreeIdBuilder current = this;
+ for (final String component : name.split("/", -1)) {
+ // Noop segments
+ if (component.isEmpty() || ".".equals(component)) {
+ continue;
+ }
+ current = current.dirEntries.computeIfAbsent(requireNoParentTraversal(component), k -> new TreeIdBuilder(messageDigest));
+ }
+ return current;
+ }
+
+ /**
+ * Adds a file entry at the given path within this tree, streaming content without buffering.
+ *
+ * If {@code name} contains {@code '/'}, intermediate subdirectories are created automatically.
+ *
+ * The stream is eagerly drained.
+ *
+ * @param mode The file mode (e.g. {@link FileMode#REGULAR}).
+ * @param name The relative path of the entry in normalized form(may contain {@code '/'}).
+ * @param dataSize The exact number of bytes in {@code data}.
+ * @param data The file content.
+ * @throws IOException If the stream cannot be read.
+ * @throws IllegalArgumentException If any path component is {@code ".."}.
+ */
+ public void addFile(final FileMode mode, final String name, final long dataSize, final InputStream data) throws IOException {
+ addFile(mode, name, () -> blobId(messageDigest, dataSize, data));
+ }
+
+ private void addFile(final FileMode mode, final String name, final BlobIdSupplier blobId) throws IOException {
+ final int slash = name.lastIndexOf('/');
+ if (slash < 0) {
+ fileEntries.put(name, new DirectoryEntry(requireNoParentTraversal(name), mode, blobId.get()));
+ } else {
+ addDirectory(name.substring(0, slash)).addFile(mode, name.substring(slash + 1), blobId);
+ }
+ }
+
+ /**
+ * Adds a file entry at the given path within this tree.
+ *
+ * If {@code name} contains {@code '/'}, intermediate subdirectories are created automatically.
+ *
+ * @param mode The file mode (e.g. {@link FileMode#REGULAR}).
+ * @param name The relative path of the entry in normalized form(may contain {@code '/'}).
+ * @param data The file content.
+ * @throws IOException If an I/O error occurs.
+ * @throws IllegalArgumentException If any path component is {@code ".."}.
+ */
+ public void addFile(final FileMode mode, final String name, final byte[] data) throws IOException {
+ addFile(mode, name, () -> blobId(messageDigest, data));
+ }
+
+ /**
+ * Adds a symbolic link entry at the give path within this tree.
+ *
+ * If {@code name} contains {@code '/'}, intermediate subdirectories are created automatically.
+ *
+ * @param name The relative path of the entry in normalized form(may contain {@code '/'}).
+ * @param target The target of the symbolic link.
+ * @throws IOException If an I/O error occurs.
+ * @throws IllegalArgumentException If any path component is {@code ".."}.
+ */
+ public void addSymbolicLink(final String name, final String target) throws IOException {
+ addFile(FileMode.SYMBOLIC_LINK, name, target.getBytes(StandardCharsets.UTF_8));
+ }
+
+ /**
+ * Computes the Git tree identifier for this directory and all its descendants.
+ *
+ * @return The raw tree identifier bytes.
+ * @throws IOException If a digest operation fails.
+ */
+ public byte[] build() throws IOException {
+ final Set entries = new TreeSet<>(fileEntries.values());
+ for (final Map.Entry e : dirEntries.entrySet()) {
+ entries.add(new DirectoryEntry(e.getKey(), FileMode.DIRECTORY, e.getValue().build()));
+ }
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ for (final DirectoryEntry entry : entries) {
+ baos.write(entry.type.modeBytes);
+ baos.write(' ');
+ baos.write(entry.name.getBytes(StandardCharsets.UTF_8));
+ baos.write('\0');
+ baos.write(entry.rawObjectId);
+ }
+ messageDigest.reset();
+ DigestUtils.updateDigest(messageDigest, getGitTreePrefix(baos.size()));
+ return DigestUtils.updateDigest(messageDigest, baos.toByteArray()).digest();
+ }
+ }
+
+ /**
+ * Reads through a byte array and returns a generalized Git blob identifier.
+ *
+ * The identifier is computed in the way described by the
+ * SWHID contents identifier, but it can use any hash
+ * algorithm.
+ *
+ * When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.
+ *
+ * @param messageDigest The MessageDigest to use (for example SHA-1).
+ * @param data Data to digest.
+ * @return A generalized Git blob identifier.
+ */
+ public static byte[] blobId(final MessageDigest messageDigest, final byte[] data) {
+ messageDigest.reset();
+ DigestUtils.updateDigest(messageDigest, getGitBlobPrefix(data.length));
+ return DigestUtils.digest(messageDigest, data);
+ }
+
+ /**
+ * Reads through a stream of known size and returns a generalized Git blob identifier, without buffering.
+ *
+ * When the size of the content is known in advance, this overload streams {@code data} directly through
+ * the digest without buffering the full content in memory.
+ *
+ * When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.
+ *
+ * @param messageDigest The MessageDigest to use (for example SHA-1).
+ * @param dataSize The exact number of bytes in {@code data}.
+ * @param data Stream to digest.
+ * @return A generalized Git blob identifier.
+ * @throws IOException On error reading the stream.
+ */
+ public static byte[] blobId(final MessageDigest messageDigest, final long dataSize, final InputStream data) throws IOException {
+ messageDigest.reset();
+ DigestUtils.updateDigest(messageDigest, getGitBlobPrefix(dataSize));
+ return DigestUtils.updateDigest(messageDigest, data).digest();
+ }
+
+ /**
+ * Reads through a file and returns a generalized Git blob identifier.
+ *
+ * The identifier is computed in the way described by the
+ * SWHID contents identifier, but it can use any hash
+ * algorithm.
+ *
+ * When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.
+ *
+ * @param messageDigest The MessageDigest to use (for example SHA-1).
+ * @param data Path to the file to digest.
+ * @return A generalized Git blob identifier.
+ * @throws IOException On error accessing the file.
+ */
+ public static byte[] blobId(final MessageDigest messageDigest, final Path data) throws IOException {
+ if (Files.isSymbolicLink(data)) {
+ final byte[] linkTarget = Files.readSymbolicLink(data).toString().getBytes(StandardCharsets.UTF_8);
+ return blobId(messageDigest, linkTarget);
+ }
+ messageDigest.reset();
+ DigestUtils.updateDigest(messageDigest, getGitBlobPrefix(Files.size(data)));
+ return DigestUtils.updateDigest(messageDigest, data).digest();
+ }
+
+ private static FileMode getGitDirectoryEntryType(final Path path) {
+ // Symbolic links first
+ if (Files.isSymbolicLink(path)) {
+ return FileMode.SYMBOLIC_LINK;
+ }
+ if (Files.isDirectory(path)) {
+ return FileMode.DIRECTORY;
+ }
+ if (Files.isExecutable(path)) {
+ return FileMode.EXECUTABLE;
+ }
+ return FileMode.REGULAR;
+ }
+
+ private static byte[] getGitBlobPrefix(final long dataSize) {
+ return getGitPrefix("blob", dataSize);
+ }
+
+ private static byte[] getGitPrefix(final String type, final long dataSize) {
+ return (type + " " + dataSize + "\0").getBytes(StandardCharsets.UTF_8);
+ }
+
+ private static byte[] getGitTreePrefix(final long dataSize) {
+ return getGitPrefix("tree", dataSize);
+ }
+
+ private static void populateFromPath(final TreeIdBuilder builder, final Path directory) throws IOException {
+ try (DirectoryStream files = Files.newDirectoryStream(directory)) {
+ for (final Path path : files) {
+ final String name = Objects.toString(path.getFileName());
+ final FileMode mode = getGitDirectoryEntryType(path);
+ if (mode == FileMode.DIRECTORY) {
+ populateFromPath(builder.addDirectory(name), path);
+ } else {
+ builder.addFile(mode, name, () -> blobId(builder.messageDigest, path));
+ }
+ }
+ }
+ }
+
+ /**
+ * Reads through a directory and returns a generalized Git tree identifier.
+ *
+ * The identifier is computed in the way described by the
+ * SWHID directory identifier, but it can use any hash
+ * algorithm.
+ *
+ * When the hash algorithm is SHA-1, the identifier is identical to Git tree identifier and SWHID directory identifier.
+ *
+ * @param messageDigest The MessageDigest to use (for example SHA-1).
+ * @param data Path to the directory to digest.
+ * @return A generalized Git tree identifier.
+ * @throws IOException On error accessing the directory or its contents.
+ */
+ public static byte[] treeId(final MessageDigest messageDigest, final Path data) throws IOException {
+ final TreeIdBuilder builder = treeIdBuilder(messageDigest);
+ populateFromPath(builder, data);
+ return builder.build();
+ }
+
+ /**
+ * Returns a new {@link TreeIdBuilder} for constructing a generalized Git tree identifier from a virtual directory
+ * structure, such as the contents of an archive.
+ *
+ * The identifier is computed in the way described by the
+ * SWHID directory identifier, but it can use any hash
+ * algorithm.
+ *
+ * When the hash algorithm is SHA-1, the identifier is identical to Git tree identifier and SWHID directory identifier.
+ *
+ * @param messageDigest The MessageDigest to use (for example SHA-1).
+ * @return A new {@link TreeIdBuilder}.
+ */
+ public static TreeIdBuilder treeIdBuilder(final MessageDigest messageDigest) {
+ return new TreeIdBuilder(messageDigest);
+ }
+
+ private GitIdentifiers() {
+ // utility class
+ }
+}
diff --git a/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java b/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java
index 7d1e72b0b8..6f7160baa7 100644
--- a/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java
+++ b/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java
@@ -32,14 +32,11 @@
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
-import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.MessageDigest;
-import java.util.ArrayList;
import java.util.Arrays;
-import java.util.List;
import java.util.Locale;
import java.util.Random;
import java.util.stream.Stream;
@@ -50,14 +47,11 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.SystemUtils;
import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.Assumptions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
-import org.junit.jupiter.params.provider.ValueSource;
/**
* Tests {@link DigestUtils}.
@@ -244,31 +238,6 @@ class DigestUtilsTest {
"CA 92 BF 0B E5 61 5E 96 95 9D 76 71 97 A0 BE EB";
// @formatter:on
- /**
- * Binary body of the test tree object used in {@link #testGitTreeCollection}.
- *
- * Each entry has the format {@code SP NUL <20-byte-object-id>}.
- */
- private static final String TREE_BODY_HEX =
- // 100644 hello.txt\0 + objectId
- "3130303634342068656c6c6f2e74787400" + "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0" +
- // 120000 link.txt\0 + objectId
- "313230303030206c696e6b2e74787400" + "1234567890abcdef1234567890abcdef12345678" +
- // 100755 run.sh\0 + objectId
- "3130303735352072756e2e736800" + "f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9" +
- // 40000 src\0 + objectId
- "34303030302073726300" + "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef";
-
- static Stream gitBlobProvider() {
- return Stream.of(Arguments.of("DigestUtilsTest/hello.txt", "5f4a83288e67f1be2d6fcdad84165a86c6a970d7"),
- Arguments.of("DigestUtilsTest/greetings.txt", "6cf4f797455661e61d1ee6913fc29344f5897243"),
- Arguments.of("DigestUtilsTest/subdir/nested.txt", "07a392ddb4dbff06a373a7617939f30b2dcfe719"));
- }
-
- private static Path resourcePath(final String resourceName) throws Exception {
- return Paths.get(DigestUtilsTest.class.getClassLoader().getResource(resourceName).toURI());
- }
-
static Stream testShake128_256() {
// @formatter:off
return Stream.of(
@@ -506,67 +475,6 @@ void testGetMessageDigest() {
assertEquals(MessageDigestAlgorithms.MD5, digestUtils.getMessageDigest().getAlgorithm());
}
- @ParameterizedTest
- @MethodSource("gitBlobProvider")
- void testGitBlobByteArray(final String resourceName, final String expectedSha1Hex) throws Exception {
- final byte[] data = Files.readAllBytes(resourcePath(resourceName));
- assertArrayEquals(Hex.decodeHex(expectedSha1Hex), DigestUtils.gitBlob(DigestUtils.getSha1Digest(), data));
- }
-
- @ParameterizedTest
- @MethodSource("gitBlobProvider")
- void testGitBlobPath(final String resourceName, final String expectedSha1Hex) throws Exception {
- assertArrayEquals(Hex.decodeHex(expectedSha1Hex), DigestUtils.gitBlob(DigestUtils.getSha1Digest(), resourcePath(resourceName)));
- }
-
- @Test
- void testGitBlobSymlink(@TempDir final Path tempDir) throws Exception {
- final Path subDir = Files.createDirectory(tempDir.resolve("subdir"));
- Files.write(subDir.resolve("file.txt"), "hello".getBytes(StandardCharsets.UTF_8));
- final Path linkToDir;
- final Path linkToFile;
- try {
- linkToDir = Files.createSymbolicLink(tempDir.resolve("link-to-dir"), Paths.get("subdir"));
- linkToFile = Files.createSymbolicLink(tempDir.resolve("link-to-file"), Paths.get("subdir/file.txt"));
- } catch (final UnsupportedOperationException e) {
- Assumptions.assumeTrue(false, "Symbolic links not supported on this filesystem");
- return;
- }
- final MessageDigest sha1 = DigestUtils.getSha1Digest();
- assertArrayEquals(Hex.decodeHex("8bbe8a53790056316b23b7c270f10ab6bf6bb1b4"), DigestUtils.gitBlob(sha1, linkToDir));
- assertArrayEquals(Hex.decodeHex("dfe6ef8392ae13a11ff85419b4fd906d997b6cb7"), DigestUtils.gitBlob(sha1, linkToFile));
- }
-
- @ParameterizedTest
- @ValueSource(strings = {MessageDigestAlgorithms.SHA_1, MessageDigestAlgorithms.SHA_256})
- void testGitTreeCollection(final String algorithm) throws Exception {
- final byte[] helloId = Hex.decodeHex("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0");
- final byte[] runId = Hex.decodeHex("f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9");
- final byte[] linkId = Hex.decodeHex("1234567890abcdef1234567890abcdef12345678");
- final byte[] srcId = Hex.decodeHex("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef");
-
- // Entries are supplied out of order to verify that the method sorts them correctly.
- final List entries = new ArrayList<>();
- entries.add(new GitDirectoryEntry(Paths.get("src"), GitDirectoryEntry.Type.DIRECTORY, srcId));
- entries.add(new GitDirectoryEntry(Paths.get("run.sh"), GitDirectoryEntry.Type.EXECUTABLE, runId));
- entries.add(new GitDirectoryEntry(Paths.get("hello.txt"), GitDirectoryEntry.Type.REGULAR, helloId));
- entries.add(new GitDirectoryEntry(Paths.get("link.txt"), GitDirectoryEntry.Type.SYMBOLIC_LINK, linkId));
-
- // Compute expected value
- final byte[] treeBody = Hex.decodeHex(TREE_BODY_HEX);
- final MessageDigest md = DigestUtils.getDigest(algorithm);
- DigestUtils.updateDigest(md, ("tree " + treeBody.length + "\0").getBytes(StandardCharsets.UTF_8));
- final byte[] expected = DigestUtils.updateDigest(md, treeBody).digest();
-
- assertArrayEquals(expected, DigestUtils.gitTree(md, entries));
- }
-
- @Test
- void testGitTreePath() throws Exception {
- assertArrayEquals(Hex.decodeHex("e4b21f6d78ceba6eb7c211ac15e3337ec4614e8a"),
- DigestUtils.gitTree(DigestUtils.getSha1Digest(), resourcePath("DigestUtilsTest")));
- }
-
@Test
void testInternalNoSuchAlgorithmException() {
assertThrows(IllegalArgumentException.class, () -> DigestUtils.getDigest("Bogus Bogus"));
diff --git a/src/test/java/org/apache/commons/codec/digest/GitDirectoryEntryTest.java b/src/test/java/org/apache/commons/codec/digest/GitDirectoryEntryTest.java
deleted file mode 100644
index 8dd22ce4aa..0000000000
--- a/src/test/java/org/apache/commons/codec/digest/GitDirectoryEntryTest.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.commons.codec.digest;
-
-import static org.junit.jupiter.api.Assertions.assertArrayEquals;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotEquals;
-import static org.junit.jupiter.api.Assertions.assertThrows;
-
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.junit.jupiter.api.Test;
-
-class GitDirectoryEntryTest {
-
- private static final byte[] ZERO_ID = new byte[20];
-
- @Test
- void testConstructor() {
- assertThrows(NullPointerException.class, () -> new GitDirectoryEntry(null, GitDirectoryEntry.Type.REGULAR, ZERO_ID));
- assertThrows(NullPointerException.class, () -> new GitDirectoryEntry(Paths.get("hello.txt"), null, ZERO_ID));
- assertThrows(NullPointerException.class, () -> new GitDirectoryEntry(Paths.get("hello.txt"), GitDirectoryEntry.Type.REGULAR, null));
- assertThrows(IllegalArgumentException.class, () -> new GitDirectoryEntry(Paths.get("/"), GitDirectoryEntry.Type.REGULAR, ZERO_ID));
- }
-
- /**
- * Equality and hash code are based solely on the entry name.
- */
- @Test
- void testEqualityBasedOnNameOnly() {
- final byte[] otherId = new byte[20];
- Arrays.fill(otherId, (byte) 0xff);
- final GitDirectoryEntry regular = new GitDirectoryEntry(Paths.get("foo"), GitDirectoryEntry.Type.REGULAR, ZERO_ID);
- final GitDirectoryEntry executable = new GitDirectoryEntry(Paths.get("foo"), GitDirectoryEntry.Type.EXECUTABLE, otherId);
- // Same name, different type and object id -> equal
- assertEquals(regular, executable);
- assertEquals(regular.hashCode(), executable.hashCode());
- // Different name -> not equal
- assertNotEquals(regular, new GitDirectoryEntry(Paths.get("bar"), GitDirectoryEntry.Type.REGULAR, ZERO_ID));
- // Same reference -> equal
- assertEquals(regular, regular);
- // Not equal to null or unrelated type
- assertNotEquals(regular, null);
- assertNotEquals(regular, "foo");
- }
-
- /**
- * The Path constructor must extract the filename component.
- */
- @Test
- void testPathConstructorUsesFilename() {
- final GitDirectoryEntry fromLabel = new GitDirectoryEntry(Paths.get("hello.txt"), GitDirectoryEntry.Type.REGULAR, ZERO_ID);
- final GitDirectoryEntry fromRelative = new GitDirectoryEntry(Paths.get("subdir/hello.txt"), GitDirectoryEntry.Type.REGULAR, ZERO_ID);
- final GitDirectoryEntry fromAbsolute = new GitDirectoryEntry(Paths.get("hello.txt").toAbsolutePath(), GitDirectoryEntry.Type.REGULAR, ZERO_ID);
- assertEquals(fromLabel, fromRelative);
- assertEquals(fromLabel, fromAbsolute);
- assertArrayEquals(fromLabel.toTreeEntryBytes(), fromRelative.toTreeEntryBytes());
- assertArrayEquals(fromLabel.toTreeEntryBytes(), fromAbsolute.toTreeEntryBytes());
- }
-
- /**
- * Entries should be sorted by Git sort rule.
- *
- * Git compares the names of the entries, but adds a {@code /} at the end of directory entries.
- */
- @Test
- void testSortOrder() {
- final GitDirectoryEntry alpha = new GitDirectoryEntry(Paths.get("alpha.txt"), GitDirectoryEntry.Type.REGULAR, ZERO_ID);
- final GitDirectoryEntry fooTxt = new GitDirectoryEntry(Paths.get("foo.txt"), GitDirectoryEntry.Type.REGULAR, ZERO_ID);
- final GitDirectoryEntry fooDir = new GitDirectoryEntry(Paths.get("foo"), GitDirectoryEntry.Type.DIRECTORY, ZERO_ID);
- final GitDirectoryEntry foobar = new GitDirectoryEntry(Paths.get("foobar"), GitDirectoryEntry.Type.REGULAR, ZERO_ID);
- final GitDirectoryEntry zeta = new GitDirectoryEntry(Paths.get("zeta.txt"), GitDirectoryEntry.Type.REGULAR, ZERO_ID);
- final List entries = new ArrayList<>(Arrays.asList(zeta, foobar, fooDir, alpha, fooTxt));
- entries.sort(GitDirectoryEntry::compareTo);
- assertEquals(Arrays.asList(alpha, fooTxt, fooDir, foobar, zeta), entries);
- }
-}
diff --git a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java
new file mode 100644
index 0000000000..8c8b7c45b0
--- /dev/null
+++ b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.codec.digest;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.io.ByteArrayInputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.security.MessageDigest;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Stream;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.GitIdentifiers.DirectoryEntry;
+import org.junit.jupiter.api.Assumptions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+/**
+ * Tests {@link GitIdentifiers}.
+ */
+class GitIdentifiersTest {
+
+
+ private static final byte[] ZERO_ID = new byte[20];
+
+ static Stream blobIdProvider() {
+ return Stream.of(Arguments.of("DigestUtilsTest/hello.txt", "5f4a83288e67f1be2d6fcdad84165a86c6a970d7"),
+ Arguments.of("DigestUtilsTest/greetings.txt", "6cf4f797455661e61d1ee6913fc29344f5897243"),
+ Arguments.of("DigestUtilsTest/subdir/nested.txt", "07a392ddb4dbff06a373a7617939f30b2dcfe719"));
+ }
+
+ private static Path resourcePath(final String resourceName) throws Exception {
+ return Paths.get(GitIdentifiersTest.class.getClassLoader().getResource(resourceName).toURI());
+ }
+
+ static Stream testTreeIdBuilder() {
+ return Stream.of(
+ Arguments.of(MessageDigestAlgorithms.SHA_1,
+ "ce013625030ba8dba906f756967f9e9ca394464a", // blob id of "hello\n"
+ "8bbe8a53790056316b23b7c270f10ab6bf6bb1b4", // blob id of "subdir"
+ "1a2485251c33a70432394c93fb89330ef214bfc9", // blob id of "#!/bin/sh\n"
+ "4b825dc642cb6eb9a060e54bf8d69288fbee4904"), // tree id of empty directory
+ Arguments.of(MessageDigestAlgorithms.SHA_256,
+ "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
+ "33910dae80b0db75dbad7fa521dbbf1885a07edfab1228871c41a2e94ccd7edb",
+ "1249034e3cf9007362d695b09b1fbdb4c578903bf10b665749b94743f8177ce1",
+ "6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321"));
+ }
+
+ @ParameterizedTest
+ @MethodSource("blobIdProvider")
+ void testBlobIdByteArray(final String resourceName, final String expectedSha1Hex) throws Exception {
+ final byte[] data = Files.readAllBytes(resourcePath(resourceName));
+ assertArrayEquals(Hex.decodeHex(expectedSha1Hex), GitIdentifiers.blobId(DigestUtils.getSha1Digest(), data));
+ }
+
+ @ParameterizedTest
+ @MethodSource("blobIdProvider")
+ void testBlobIdInputStreamWithSize(final String resourceName, final String expectedSha1Hex) throws Exception {
+ final byte[] data = Files.readAllBytes(resourcePath(resourceName));
+ assertArrayEquals(Hex.decodeHex(expectedSha1Hex),
+ GitIdentifiers.blobId(DigestUtils.getSha1Digest(), data.length, new ByteArrayInputStream(data)));
+ }
+
+ @ParameterizedTest
+ @MethodSource("blobIdProvider")
+ void testBlobIdPath(final String resourceName, final String expectedSha1Hex) throws Exception {
+ assertArrayEquals(Hex.decodeHex(expectedSha1Hex), GitIdentifiers.blobId(DigestUtils.getSha1Digest(), resourcePath(resourceName)));
+ }
+
+ @Test
+ void testBlobIdSymlink(@TempDir final Path tempDir) throws Exception {
+ final Path subDir = Files.createDirectory(tempDir.resolve("subdir"));
+ Files.write(subDir.resolve("file.txt"), "hello".getBytes(StandardCharsets.UTF_8));
+ try {
+ final Path linkToDir = Files.createSymbolicLink(tempDir.resolve("link-to-dir"), Paths.get("subdir"));
+ final Path linkToFile = Files.createSymbolicLink(tempDir.resolve("link-to-file"), Paths.get("subdir/file.txt"));
+ final MessageDigest sha1 = DigestUtils.getSha1Digest();
+ assertArrayEquals(Hex.decodeHex("8bbe8a53790056316b23b7c270f10ab6bf6bb1b4"), GitIdentifiers.blobId(sha1, linkToDir));
+ assertArrayEquals(Hex.decodeHex("dfe6ef8392ae13a11ff85419b4fd906d997b6cb7"), GitIdentifiers.blobId(sha1, linkToFile));
+ } catch (final UnsupportedOperationException e) {
+ Assumptions.abort("Symbolic links not supported on this filesystem");
+ }
+ }
+
+ @Test
+ void testDirectoryEntryConstructor() {
+ assertThrows(NullPointerException.class, () -> new DirectoryEntry(null, GitIdentifiers.FileMode.REGULAR, ZERO_ID));
+ assertThrows(NullPointerException.class, () -> new DirectoryEntry("hello.txt", null, ZERO_ID));
+ assertThrows(NullPointerException.class, () -> new DirectoryEntry("hello.txt", GitIdentifiers.FileMode.REGULAR, null));
+ assertThrows(IllegalArgumentException.class, () -> new DirectoryEntry("/", GitIdentifiers.FileMode.REGULAR, ZERO_ID));
+ }
+
+ /**
+ * Equality and hash code are based solely on the entry name.
+ */
+ @Test
+ void testDirectoryEntryEqualityBasedOnNameOnly() {
+ final byte[] otherId = new byte[20];
+ Arrays.fill(otherId, (byte) 0xff);
+ final DirectoryEntry regular = new DirectoryEntry("foo", GitIdentifiers.FileMode.REGULAR, ZERO_ID);
+ final DirectoryEntry executable = new DirectoryEntry("foo", GitIdentifiers.FileMode.EXECUTABLE, otherId);
+ // Same name, different type and object id -> equal
+ assertEquals(regular, executable);
+ assertEquals(regular.hashCode(), executable.hashCode());
+ // Different name -> not equal
+ assertNotEquals(regular, new DirectoryEntry("bar", GitIdentifiers.FileMode.REGULAR, ZERO_ID));
+ // Same reference -> equal
+ assertEquals(regular, regular);
+ // Not equal to null or unrelated type
+ assertFalse(regular.equals(null));
+ assertFalse(regular.equals("foo"));
+ }
+
+ /**
+ * Entries should be sorted by Git sort rule.
+ *
+ * Git compares the names of the entries, but adds a {@code /} at the end of directory entries.
+ */
+ @Test
+ void testDirectoryEntrySortOrder() {
+ final DirectoryEntry alpha = new DirectoryEntry("alpha.txt", GitIdentifiers.FileMode.REGULAR, ZERO_ID);
+ final DirectoryEntry fooTxt = new DirectoryEntry("foo.txt", GitIdentifiers.FileMode.REGULAR, ZERO_ID);
+ final DirectoryEntry fooDir = new DirectoryEntry("foo", GitIdentifiers.FileMode.DIRECTORY, ZERO_ID);
+ final DirectoryEntry foobar = new DirectoryEntry("foobar", GitIdentifiers.FileMode.REGULAR, ZERO_ID);
+ final DirectoryEntry zeta = new DirectoryEntry("zeta.txt", GitIdentifiers.FileMode.REGULAR, ZERO_ID);
+ final List entries = new ArrayList<>(Arrays.asList(zeta, foobar, fooDir, alpha, fooTxt));
+ entries.sort(DirectoryEntry::compareTo);
+ assertEquals(Arrays.asList(alpha, fooTxt, fooDir, foobar, zeta), entries);
+ }
+
+ @ParameterizedTest
+ @MethodSource
+ void testTreeIdBuilder(final String algorithm, final String helloHex, final String linkHex, final String runHex, final String srcHex) throws Exception {
+ final byte[] helloContent = "hello\n".getBytes(StandardCharsets.UTF_8);
+ final byte[] runContent = "#!/bin/sh\n".getBytes(StandardCharsets.UTF_8);
+ final String linkTarget = "subdir";
+ final MessageDigest md = DigestUtils.getDigest(algorithm);
+
+ // Verify individual blob IDs against pre-computed constants.
+ assertArrayEquals(Hex.decodeHex(helloHex), GitIdentifiers.blobId(md, helloContent));
+ assertArrayEquals(Hex.decodeHex(linkHex), GitIdentifiers.blobId(md, linkTarget.getBytes(StandardCharsets.UTF_8)));
+ assertArrayEquals(Hex.decodeHex(runHex), GitIdentifiers.blobId(md, runContent));
+
+ // Entries are supplied out of order to verify that the builder sorts them correctly.
+ final GitIdentifiers.TreeIdBuilder builder = GitIdentifiers.treeIdBuilder(md);
+ builder.addDirectory("src");
+ builder.addFile(GitIdentifiers.FileMode.EXECUTABLE, "run.sh", runContent);
+ builder.addFile(GitIdentifiers.FileMode.REGULAR, "hello.txt", helloContent);
+ builder.addSymbolicLink("link.txt", linkTarget);
+
+ // Expected tree body: entries in Git sort order (hello.txt, link.txt, run.sh, src/).
+ // Each entry: hex-encoded " \0" followed by the object id.
+ final byte[] treeBody = Hex.decodeHex("3130303634342068656c6c6f2e74787400" + helloHex + // 100644 hello.txt\0
+ "313230303030206c696e6b2e74787400" + linkHex + // 120000 link.txt\0
+ "3130303735352072756e2e736800" + runHex + // 100755 run.sh\0
+ "34303030302073726300" + srcHex); // 40000 src\0
+ md.reset();
+ DigestUtils.updateDigest(md, ("tree " + treeBody.length + "\0").getBytes(StandardCharsets.UTF_8));
+ assertArrayEquals(DigestUtils.updateDigest(md, treeBody).digest(), builder.build());
+ }
+
+ @Test
+ void testTreeIdBuilderAddFileInputStream() throws Exception {
+ final MessageDigest md = DigestUtils.getSha1Digest();
+ final byte[] content = "Hello, World!\n".getBytes(StandardCharsets.UTF_8);
+
+ final GitIdentifiers.TreeIdBuilder byteArrayBuilder = GitIdentifiers.treeIdBuilder(md);
+ byteArrayBuilder.addFile(GitIdentifiers.FileMode.REGULAR, "file.txt", content);
+ final byte[] expected = byteArrayBuilder.build();
+
+ final GitIdentifiers.TreeIdBuilder sizedStreamBuilder = GitIdentifiers.treeIdBuilder(md);
+ sizedStreamBuilder.addFile(GitIdentifiers.FileMode.REGULAR, "file.txt", content.length, new ByteArrayInputStream(content));
+ assertArrayEquals(expected, sizedStreamBuilder.build());
+ }
+
+ @Test
+ void testTreeIdBuilderInvalidPathSegments() {
+ final MessageDigest md = DigestUtils.getSha1Digest();
+ final byte[] data = new byte[0];
+ // Sole path component
+ assertThrows(IllegalArgumentException.class,
+ () -> GitIdentifiers.treeIdBuilder(md).addFile(GitIdentifiers.FileMode.REGULAR, "..", data));
+ assertThrows(IllegalArgumentException.class,
+ () -> GitIdentifiers.treeIdBuilder(md).addDirectory(".."));
+ // Embedded in a longer path
+ assertThrows(IllegalArgumentException.class,
+ () -> GitIdentifiers.treeIdBuilder(md).addFile(GitIdentifiers.FileMode.REGULAR, "subdir/../file.txt", data));
+ assertThrows(IllegalArgumentException.class,
+ () -> GitIdentifiers.treeIdBuilder(md).addDirectory("subdir/.."));
+ }
+
+ @Test
+ void testTreeIdBuilderNestedFileEquivalentToDirectoryAndFile() throws Exception {
+ final MessageDigest md = DigestUtils.getSha1Digest();
+ final byte[] content = "hello\n".getBytes(StandardCharsets.UTF_8);
+
+ final GitIdentifiers.TreeIdBuilder direct = GitIdentifiers.treeIdBuilder(md);
+ direct.addFile(GitIdentifiers.FileMode.REGULAR, "nested/file.txt", content);
+
+ final GitIdentifiers.TreeIdBuilder indirect = GitIdentifiers.treeIdBuilder(md);
+ indirect.addDirectory("nested").addFile(GitIdentifiers.FileMode.REGULAR, "file.txt", content);
+
+ assertArrayEquals(direct.build(), indirect.build());
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"", "."})
+ void testTreeIdBuilderNoopPathSegments(String segment) throws Exception {
+ final MessageDigest md = DigestUtils.getSha1Digest();
+ final byte[] content = "hello\n".getBytes(StandardCharsets.UTF_8);
+
+ // Canonical form
+ final GitIdentifiers.TreeIdBuilder canonical = GitIdentifiers.treeIdBuilder(md);
+ canonical.addFile(GitIdentifiers.FileMode.REGULAR, "subdir/file.txt", content);
+ final byte[] expected = canonical.build();
+
+ // Leading segment
+ final GitIdentifiers.TreeIdBuilder withLeading = GitIdentifiers.treeIdBuilder(md);
+ withLeading.addFile(GitIdentifiers.FileMode.REGULAR, segment + "/subdir/file.txt", content);
+ assertArrayEquals(expected, withLeading.build());
+
+ // Intermediate segment
+ final GitIdentifiers.TreeIdBuilder withIntermediate = GitIdentifiers.treeIdBuilder(md);
+ withIntermediate.addFile(GitIdentifiers.FileMode.REGULAR, "subdir/" + segment + "/file.txt", content);
+ assertArrayEquals(expected, withIntermediate.build());
+
+ // addDirectory with leading/trailing segments
+ final GitIdentifiers.TreeIdBuilder viaDirectory = GitIdentifiers.treeIdBuilder(md);
+ viaDirectory.addDirectory(segment + "/subdir/" + segment).addFile(GitIdentifiers.FileMode.REGULAR, "file.txt", content);
+ assertArrayEquals(expected, viaDirectory.build());
+ }
+
+ @Test
+ void testTreeIdPath() throws Exception {
+ assertArrayEquals(Hex.decodeHex("e4b21f6d78ceba6eb7c211ac15e3337ec4614e8a"),
+ GitIdentifiers.treeId(DigestUtils.getSha1Digest(), resourcePath("DigestUtilsTest")));
+ }
+
+}