-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[CodeGen][KCFI] Allow setting type hash from xxHash64 to FNV-1a #167254
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-llvm-transforms Author: Kees Cook (kees) ChangesSwitch from xxHash64 to FNV-1a (to match the coming GCC KCFI implementation) and to remove the last user of xxHash64. Full diff: https://github.com/llvm/llvm-project/pull/167254.diff 9 Files Affected:
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 98d59b79ab881..c992a0bc4aa11 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -67,10 +67,10 @@
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TimeProfiler.h"
-#include "llvm/Support/xxhash.h"
#include "llvm/TargetParser/RISCVISAInfo.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/TargetParser/X86TargetParser.h"
+#include "llvm/Transforms/Instrumentation/KCFI.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include <optional>
#include <set>
@@ -2431,8 +2431,7 @@ llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T, StringRef Salt) {
if (getCodeGenOpts().SanitizeCfiICallGeneralizePointers)
Out << ".generalized";
- return llvm::ConstantInt::get(Int32Ty,
- static_cast<uint32_t>(llvm::xxHash64(OutName)));
+ return llvm::ConstantInt::get(Int32Ty, llvm::getKCFITypeID(OutName));
}
void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD,
@@ -3186,7 +3185,7 @@ void CodeGenModule::finalizeKCFITypes() {
continue;
std::string Asm = (".weak __kcfi_typeid_" + Name + "\n.set __kcfi_typeid_" +
- Name + ", " + Twine(Type->getZExtValue()) + "\n")
+ Name + ", " + Twine(Type->getSExtValue()) + "\n")
.str();
M.appendModuleInlineAsm(Asm);
}
diff --git a/clang/test/CodeGen/kcfi-generalize.c b/clang/test/CodeGen/kcfi-generalize.c
index 5a44d97412af9..47d8128c45bcf 100644
--- a/clang/test/CodeGen/kcfi-generalize.c
+++ b/clang/test/CodeGen/kcfi-generalize.c
@@ -21,8 +21,8 @@ int** f3(char *a, char **b) {
}
void g(int** (*fp)(const char *, const char **)) {
- // UNGENERALIZED: call {{.*}} [ "kcfi"(i32 1296635908) ]
- // GENERALIZED: call {{.*}} [ "kcfi"(i32 -49168686) ]
+ // UNGENERALIZED: call {{.*}} [ "kcfi"(i32 -1900814401) ]
+ // GENERALIZED: call {{.*}} [ "kcfi"(i32 355875385) ]
fp(0, 0);
}
@@ -33,16 +33,16 @@ union Union {
// CHECK: define{{.*}} void @uni({{.*}} !kcfi_type [[TYPE4:![0-9]+]]
void uni(void (*fn)(union Union), union Union arg1) {
- // UNGENERALIZED: call {{.*}} [ "kcfi"(i32 -587217045) ]
- // GENERALIZED: call {{.*}} [ "kcfi"(i32 2139530422) ]
+ // UNGENERALIZED: call {{.*}} [ "kcfi"(i32 514817671) ]
+ // GENERALIZED: call {{.*}} [ "kcfi"(i32 1629153266) ]
fn(arg1);
}
-// UNGENERALIZED: [[TYPE]] = !{i32 1296635908}
-// GENERALIZED: [[TYPE]] = !{i32 -49168686}
+// UNGENERALIZED: [[TYPE]] = !{i32 -1900814401}
+// GENERALIZED: [[TYPE]] = !{i32 355875385}
-// UNGENERALIZED: [[TYPE3]] = !{i32 874141567}
-// GENERALIZED: [[TYPE3]] = !{i32 954385378}
+// UNGENERALIZED: [[TYPE3]] = !{i32 1089235487}
+// GENERALIZED: [[TYPE3]] = !{i32 1460151842}
-// UNGENERALIZED: [[TYPE4]] = !{i32 -1619636625}
-// GENERALIZED: [[TYPE4]] = !{i32 -125078496}
+// UNGENERALIZED: [[TYPE4]] = !{i32 1937639136}
+// GENERALIZED: [[TYPE4]] = !{i32 734921772}
diff --git a/clang/test/CodeGen/kcfi-normalize.c b/clang/test/CodeGen/kcfi-normalize.c
index bd87f4af534a1..320f9e398954f 100644
--- a/clang/test/CodeGen/kcfi-normalize.c
+++ b/clang/test/CodeGen/kcfi-normalize.c
@@ -10,21 +10,21 @@
void foo(void (*fn)(int), int arg) {
// CHECK-LABEL: define{{.*}}foo
// CHECK-SAME: {{.*}}!kcfi_type ![[TYPE1:[0-9]+]]
- // CHECK: call void %0(i32 noundef %1){{.*}}[ "kcfi"(i32 1162514891) ]
+ // CHECK: call void %0(i32 noundef %1){{.*}}[ "kcfi"(i32 -402462225) ]
fn(arg);
}
void bar(void (*fn)(int, int), int arg1, int arg2) {
// CHECK-LABEL: define{{.*}}bar
// CHECK-SAME: {{.*}}!kcfi_type ![[TYPE2:[0-9]+]]
- // CHECK: call void %0(i32 noundef %1, i32 noundef %2){{.*}}[ "kcfi"(i32 448046469) ]
+ // CHECK: call void %0(i32 noundef %1, i32 noundef %2){{.*}}[ "kcfi"(i32 -972192795) ]
fn(arg1, arg2);
}
void baz(void (*fn)(int, int, int), int arg1, int arg2, int arg3) {
// CHECK-LABEL: define{{.*}}baz
// CHECK-SAME: {{.*}}!kcfi_type ![[TYPE3:[0-9]+]]
- // CHECK: call void %0(i32 noundef %1, i32 noundef %2, i32 noundef %3){{.*}}[ "kcfi"(i32 -2049681433) ]
+ // CHECK: call void %0(i32 noundef %1, i32 noundef %2, i32 noundef %3){{.*}}[ "kcfi"(i32 -1376104717) ]
fn(arg1, arg2, arg3);
}
@@ -36,14 +36,14 @@ union Union {
void uni(void (*fn)(union Union), union Union arg1) {
// CHECK-LABEL: define{{.*}}uni
// CHECK-SAME: {{.*}}!kcfi_type ![[TYPE4:[0-9]+]]
- // C: call void %0(ptr %1) [ "kcfi"(i32 1819770848) ]
- // CPP: call void %0(ptr %1) [ "kcfi"(i32 -1430221633) ]
+ // C: call void %0(ptr %1) [ "kcfi"(i32 641309179) ]
+ // CPP: call void %0(ptr %1) [ "kcfi"(i32 15039153) ]
fn(arg1);
}
// CHECK: ![[#]] = !{i32 4, !"cfi-normalize-integers", i32 1}
-// CHECK: ![[TYPE1]] = !{i32 -1143117868}
-// CHECK: ![[TYPE2]] = !{i32 -460921415}
-// CHECK: ![[TYPE3]] = !{i32 -333839615}
-// C: ![[TYPE4]] = !{i32 -650530463}
-// CPP: ![[TYPE4]] = !{i32 1766237188}
+// CHECK: ![[TYPE1]] = !{i32 -1113907258}
+// CHECK: ![[TYPE2]] = !{i32 994987278}
+// CHECK: ![[TYPE3]] = !{i32 -886425042}
+// C: ![[TYPE4]] = !{i32 -1919128908}
+// CPP: ![[TYPE4]] = !{i32 1834954376}
diff --git a/llvm/include/llvm/Support/xxhash.h b/llvm/include/llvm/Support/xxhash.h
index b521adbef3456..9f95e7d4db58d 100644
--- a/llvm/include/llvm/Support/xxhash.h
+++ b/llvm/include/llvm/Support/xxhash.h
@@ -44,9 +44,6 @@
namespace llvm {
-LLVM_ABI uint64_t xxHash64(llvm::StringRef Data);
-LLVM_ABI uint64_t xxHash64(llvm::ArrayRef<uint8_t> Data);
-
LLVM_ABI uint64_t xxh3_64bits(ArrayRef<uint8_t> data);
inline uint64_t xxh3_64bits(StringRef data) {
return xxh3_64bits(ArrayRef(data.bytes_begin(), data.size()));
diff --git a/llvm/include/llvm/Transforms/Instrumentation/KCFI.h b/llvm/include/llvm/Transforms/Instrumentation/KCFI.h
index 2711930a20b5b..7a2d42fa1e1c6 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/KCFI.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/KCFI.h
@@ -18,10 +18,15 @@
#include "llvm/Support/Compiler.h"
namespace llvm {
+
class KCFIPass : public PassInfoMixin<KCFIPass> {
public:
static bool isRequired() { return true; }
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
+
+/// Compute the KCFI type identifier hash for a given mangled type name.
+uint32_t getKCFITypeID(StringRef MangledTypeName);
+
} // namespace llvm
#endif // LLVM_TRANSFORMS_INSTRUMENTATION_KCFI_H
diff --git a/llvm/lib/Support/xxhash.cpp b/llvm/lib/Support/xxhash.cpp
index cdb76d57e2c1d..79df4bc4d0ec0 100644
--- a/llvm/lib/Support/xxhash.cpp
+++ b/llvm/lib/Support/xxhash.cpp
@@ -77,20 +77,6 @@ static const uint64_t PRIME64_3 = 1609587929392839161ULL;
static const uint64_t PRIME64_4 = 9650029242287828579ULL;
static const uint64_t PRIME64_5 = 2870177450012600261ULL;
-static uint64_t round(uint64_t Acc, uint64_t Input) {
- Acc += Input * PRIME64_2;
- Acc = rotl64(Acc, 31);
- Acc *= PRIME64_1;
- return Acc;
-}
-
-static uint64_t mergeRound(uint64_t Acc, uint64_t Val) {
- Val = round(0, Val);
- Acc ^= Val;
- Acc = Acc * PRIME64_1 + PRIME64_4;
- return Acc;
-}
-
static uint64_t XXH64_avalanche(uint64_t hash) {
hash ^= hash >> 33;
hash *= PRIME64_2;
@@ -100,70 +86,6 @@ static uint64_t XXH64_avalanche(uint64_t hash) {
return hash;
}
-uint64_t llvm::xxHash64(StringRef Data) {
- size_t Len = Data.size();
- uint64_t Seed = 0;
- const unsigned char *P = Data.bytes_begin();
- const unsigned char *const BEnd = Data.bytes_end();
- uint64_t H64;
-
- if (Len >= 32) {
- const unsigned char *const Limit = BEnd - 32;
- uint64_t V1 = Seed + PRIME64_1 + PRIME64_2;
- uint64_t V2 = Seed + PRIME64_2;
- uint64_t V3 = Seed + 0;
- uint64_t V4 = Seed - PRIME64_1;
-
- do {
- V1 = round(V1, endian::read64le(P));
- P += 8;
- V2 = round(V2, endian::read64le(P));
- P += 8;
- V3 = round(V3, endian::read64le(P));
- P += 8;
- V4 = round(V4, endian::read64le(P));
- P += 8;
- } while (P <= Limit);
-
- H64 = rotl64(V1, 1) + rotl64(V2, 7) + rotl64(V3, 12) + rotl64(V4, 18);
- H64 = mergeRound(H64, V1);
- H64 = mergeRound(H64, V2);
- H64 = mergeRound(H64, V3);
- H64 = mergeRound(H64, V4);
-
- } else {
- H64 = Seed + PRIME64_5;
- }
-
- H64 += (uint64_t)Len;
-
- while (reinterpret_cast<uintptr_t>(P) + 8 <=
- reinterpret_cast<uintptr_t>(BEnd)) {
- uint64_t const K1 = round(0, endian::read64le(P));
- H64 ^= K1;
- H64 = rotl64(H64, 27) * PRIME64_1 + PRIME64_4;
- P += 8;
- }
-
- if (reinterpret_cast<uintptr_t>(P) + 4 <= reinterpret_cast<uintptr_t>(BEnd)) {
- H64 ^= (uint64_t)(endian::read32le(P)) * PRIME64_1;
- H64 = rotl64(H64, 23) * PRIME64_2 + PRIME64_3;
- P += 4;
- }
-
- while (P < BEnd) {
- H64 ^= (*P) * PRIME64_5;
- H64 = rotl64(H64, 11) * PRIME64_1;
- P++;
- }
-
- return XXH64_avalanche(H64);
-}
-
-uint64_t llvm::xxHash64(ArrayRef<uint8_t> Data) {
- return xxHash64({(const char *)Data.data(), Data.size()});
-}
-
constexpr size_t XXH3_SECRETSIZE_MIN = 136;
constexpr size_t XXH_SECRET_DEFAULT_SIZE = 192;
diff --git a/llvm/lib/Transforms/Instrumentation/KCFI.cpp b/llvm/lib/Transforms/Instrumentation/KCFI.cpp
index f4cb4e2d1c9e1..31c5c972589c2 100644
--- a/llvm/lib/Transforms/Instrumentation/KCFI.cpp
+++ b/llvm/lib/Transforms/Instrumentation/KCFI.cpp
@@ -32,6 +32,16 @@ using namespace llvm;
STATISTIC(NumKCFIChecks, "Number of kcfi operands transformed into checks");
+uint32_t llvm::getKCFITypeID(StringRef MangledTypeName) {
+ // FNV-1a hash (32-bit)
+ uint32_t Hash = 2166136261u; // FNV offset basis
+ for (unsigned char C : MangledTypeName) {
+ Hash ^= C;
+ Hash *= 16777619u; // FNV prime
+ }
+ return Hash;
+}
+
namespace {
class DiagnosticInfoKCFI : public DiagnosticInfo {
const Twine &Msg;
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 596849ecab742..4b496dbae5a61 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -11,8 +11,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/Analysis/VectorUtils.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -21,7 +21,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/xxhash.h"
+#include "llvm/Transforms/Instrumentation/KCFI.h"
using namespace llvm;
@@ -208,10 +208,10 @@ void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
std::string Type = MangledType.str();
if (M.getModuleFlag("cfi-normalize-integers"))
Type += ".normalized";
- F.setMetadata(LLVMContext::MD_kcfi_type,
- MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
- Type::getInt32Ty(Ctx),
- static_cast<uint32_t>(xxHash64(Type))))));
+ F.setMetadata(
+ LLVMContext::MD_kcfi_type,
+ MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
+ Type::getInt32Ty(Ctx), getKCFITypeID(Type)))));
// If the module was compiled with -fpatchable-function-entry, ensure
// we use the same patchable-function-prefix.
if (auto *MD = mdconst::extract_or_null<ConstantInt>(
diff --git a/llvm/unittests/Support/xxhashTest.cpp b/llvm/unittests/Support/xxhashTest.cpp
index 84308ce130e72..dcc654b800484 100644
--- a/llvm/unittests/Support/xxhashTest.cpp
+++ b/llvm/unittests/Support/xxhashTest.cpp
@@ -31,14 +31,6 @@ static void fillTestBuffer(uint8_t *buffer, size_t len) {
}
}
-TEST(xxhashTest, Basic) {
- EXPECT_EQ(0xef46db3751d8e999U, xxHash64(StringRef()));
- EXPECT_EQ(0x33bf00a859c4ba3fU, xxHash64("foo"));
- EXPECT_EQ(0x48a37c90ad27a659U, xxHash64("bar"));
- EXPECT_EQ(0x69196c1b3af0bff9U,
- xxHash64("0123456789abcdefghijklmnopqrstuvwxyz"));
-}
-
TEST(xxhashTest, xxh3) {
constexpr size_t size = 2243;
uint8_t a[size];
|
|
What is your plan for addressing compatibility with rustc? If Clang switches to a different hash, the user will also need a version of rustc that uses the same hash, or cross-language indirect calls will break. |
clang/lib/CodeGen/CodeGenModule.cpp
Outdated
|
|
||
| std::string Asm = (".weak __kcfi_typeid_" + Name + "\n.set __kcfi_typeid_" + | ||
| Name + ", " + Twine(Type->getZExtValue()) + "\n") | ||
| Name + ", " + Twine(Type->getSExtValue()) + "\n") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I seem to recall that this was intentional, and was needed for the values to be usable in x86 relocations. Did you confirm that kernel builds still work with this change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, testing with this change alone, x86_64 defconfig build fails when linking .tmp_vmlinux1:
ld.lld: error: vmlinux.o:(function __cfi___memcpy: .noinstr.text+0x4a21): relocation R_X86_64_32 out of range: 18446744071952041092 is not in [0, 4294967295]; references '__kcfi_typeid___memcpy'
>>> referenced by usercopy_64.c
>>> defined in vmlinux.o
...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, okay. Thanks for double checking that. I will revert that and find another solution for the FileCheck stuff....
What's the best way to bind such a version skew? |
Good question. There are three places where hashes are calculated: Clang (front-end), rustc, and the LLVM back-end (for LLVM generated functions). Rustc can (and often will) use a different LLVM version than Clang, so you potentially need to ensure that three different components use the same hash algorithm, which is going to be tedious if we don't want to break any users. One option would be to support both hashes in Clang/LLVM during a transition period and use a command line flag to indicate which one to use. You'd have to figure out in Kconfig whether all compilers support FNV-1a and fall back to xxHash64 if not. Once older compilers are no longer supported for kernel builds, we can finally drop xxHash64 support everywhere... and hope we never have to do this again. |
|
Is the version of LLVM used by rustc exposed in some way? I'd like to avoid a flag for this if we can just say "kcfi only works on [combination of clang version and llvm version]". Does that seem feasible? |
Yes, see the
You might still end up in a situation where a compatible Clang/Rust toolchain simply isn't available for some period of time because either the release cycles don't quite match, or because the Rust folks are lagging behind upstream LLVM. Larger downstream users like Android can probably coordinate their internal toolchain releases to make sure all the necessary patches are in and the LLVM back-end versions match, but I'm not sure about other distros or users who rely on upstream releases. |
When emitting the assembly .set directive, KCFI needs to use
getZExtValue(). However, this means that FileCheck pattern matching can't
match between the .set directive and the IR when the high bit of a 32-bit
value is set. We had gotten lucky with the existing tests happening to
just not have had the high bit set. The coming hash change will expose
this, though.
LLVM IR's default printing behavior uses APInt::operator<<, which calls
APInt::print(OS, /*isSigned=*/true). This means KCFI operand bundles in
call instructions print as signed (e.g. [ "kcfi"(i32 -1208803271) ]),
and KCFI type metadata prints as signed (e.g. !3 = !{i32 -1208803271}).
Changing the IR to print unsigned i32 values would impact hundreds of
existing tests, so it is best to just leave it be.
Update the KCFI .set direct to use getSExtValue() in a comment so that
we can both build correctly and use FileCheck with pattern matching in
tests.
Signed-off-by: Kees Cook <[email protected]>
…tion KCFI generates hashes in two places. Instead of exposing the hash implementation in both places, introduce a helper that wraps the specific hash implementation in a single place. Signed-off-by: Kees Cook <[email protected]>
In order to transition between KCFI hash, we need to be able to specify them. Add the Clang option -fsanitize-kcfi-hash= and a IR module option "kcfi-hash" that can choose between xxHash64 and FNV-1a. Default to xxHash64 to stay backward compatible, as we'll need to also update rustc to take a new option to change the hash to FNV-1a for interop with the coming GCC KCFI.
|
Okay, we can't remove xxHash64 then, so I've dropped that. I added |
fmayer
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, I had concerns about just changing the type, a flag SGTM
| /// Compute KCFI type ID from mangled type name. | ||
| /// The algorithm can be xxHash64 or FNV-1a. | ||
| uint32_t | ||
| getKCFITypeID(StringRef MangledTypeName, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we leave out the default argument? then we make sure we don't forget to think about the type id anywhere
Allow for switching the KCFI type id hash from xxHash64 to FNV-1a (to match the coming GCC KCFI implementation). Introduces the Clang
-fsanitize-kcfi-hash=option and "kcfi-hash" IR module option.