Skip to content

Commit e99f010

Browse files
committed
umash.c: add support for aarch64 + crypto extensions
The only vector operations we use (all on 128 bits) are: 1. creating a vector from 64-bit values 2. shifting 64-bit lanes by one bit to the left 3. carry-less multiplication of two 64-bit scalars into 128-bit vector 4. carry-less multiplication of the low and high halves of a vector 5. xoring vectors together That's easy to convert to neon with the crypto extension for `vmull_p64` (and aarch64 for 64x64 -> 128 multiplications).
1 parent fc5bfab commit e99f010

File tree

2 files changed

+44
-2
lines changed

2 files changed

+44
-2
lines changed

.travis.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,20 @@ compiler:
44
- gcc
55
arch:
66
- amd64
7+
- arm64
78
os: linux
89
dist: bionic
910

1011
install:
1112
- pyenv shell 3.8 || echo "using default python"
1213
- sudo apt-get -y install python3-venv
1314
- pip3 install --upgrade pip
15+
1416
script:
15-
- export CFLAGS='-g -O2 -std=c99 -W -Wall -Werror -mpclmul';
17+
- if [ "x$TRAVIS_CPU_ARCH" == "xarm64" ];
18+
then
19+
export CFLAGS='-g -O2 -std=c99 -W -Wall -Werror -march=armv8-a+crypto';
20+
else
21+
export CFLAGS='-g -O2 -std=c99 -W -Wall -Werror -mpclmul';
22+
fi
1623
- t/run-tests-public.sh && t/run-tests.sh

umash.c

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,43 @@ v128_clmul_cross(v128 x)
4242
{
4343
return _mm_clmulepi64_si128(x, x, 1);
4444
}
45+
46+
#elif defined(__ARM_FEATURE_CRYPTO)
47+
48+
#include <arm_neon.h>
49+
50+
typedef uint64x2_t v128;
51+
52+
#define V128_ZERO { 0 };
53+
54+
static inline v128
55+
v128_create(uint64_t lo, uint64_t hi)
56+
{
57+
return vcombine_u64(vcreate_u64(lo), vcreate_u64(hi));
58+
}
59+
60+
static inline v128
61+
v128_shift(v128 x)
62+
{
63+
return vshlq_n_u64(x, 1);
64+
}
65+
66+
static inline v128
67+
v128_clmul(uint64_t x, uint64_t y)
68+
{
69+
return vreinterpretq_u64_p128(vmull_p64(x, y));
70+
}
71+
72+
static inline v128
73+
v128_clmul_cross(v128 x)
74+
{
75+
return v128_clmul(vgetq_lane_u64(x, 0), vgetq_lane_u64(x, 1));
76+
}
77+
4578
#else
46-
#error "Unsupported platform: umash requires x86's SSE2 and CLMUL (-mpclmul)"
79+
80+
#error \
81+
"Unsupported platform: umash requires CLMUL (-mpclmul) on x86-64, or crypto (-march=...+crypto) extensions on aarch64."
4782
#endif
4883

4984
/*

0 commit comments

Comments
 (0)