diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 24f881f3..18a832bb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -70,6 +70,12 @@ jobs: # RSA superclass with tests (no sanitizer, but debug info) - { BUILDOPTIONS: '--with-cc=gcc --with-m64 --cflags=-DLTM_NOTHING --cflags=-DSC_RSA_1_WITH_TESTS --limit-valgrind', SANITIZER: '', COMPILE_DEBUG: '1', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: '' } + # Build with small stack-size + - { BUILDOPTIONS: '--with-cc=gcc --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE', SANITIZER: '', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'gcc-multilib' } + - { BUILDOPTIONS: '--with-cc=clang-10 --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE --limit-valgrind', SANITIZER: '1', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'clang-10 llvm-10 libc6-dev-i386 gcc-multilib' } + - { BUILDOPTIONS: '--with-cc=gcc --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE --multithread --limit-valgrind', SANITIZER: '', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'libc6-dev-i386 gcc-multilib' } + - { BUILDOPTIONS: '--with-cc=clang-10 --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE --multithread', SANITIZER: '1', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'clang-10 llvm-10 gcc-multilib' } + # Test "autotuning", the automatic evaluation and setting of the Toom-Cook cut-offs. #- env: SANITIZER=1 BUILDOPTIONS='--with-cc=gcc-5 --cflags=-DMP_16BIT --limit-valgrind --make-option=tune' #- env: SANITIZER=1 BUILDOPTIONS='--with-cc=gcc-5 --cflags=-DMP_32BIT --limit-valgrind --make-option=tune' diff --git a/CMakeLists.txt b/CMakeLists.txt index d6063277..2f59d32e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -138,7 +138,7 @@ if(COMPILE_LTO) if(COMPILER_SUPPORTS_LTO) set_property(TARGET ${PROJECT_NAME} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) else() - message(SEND_ERROR "This compiler does not support LTO. Reconfigure ${PROJECT_NAME} with -DCOMPILE_LTO=OFF.") + message(FATAL_ERROR "This compiler does not support LTO. Reconfigure ${PROJECT_NAME} with -DCOMPILE_LTO=OFF.") endif() endif() diff --git a/appveyor.yml b/appveyor.yml index 30d9ee75..2134f2dd 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -11,6 +11,10 @@ image: - Visual Studio 2019 - Visual Studio 2017 - Visual Studio 2015 +environment: + matrix: + - CFLAGS_VAR: "" + CFLAGS_VAR_DLL: "CFLAGS=\"/Ox /MD /DLTM_TEST_DYNAMIC\"" build_script: - cmd: >- if "Visual Studio 2022"=="%APPVEYOR_BUILD_WORKER_IMAGE%" call "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat" @@ -18,9 +22,9 @@ build_script: if "Visual Studio 2017"=="%APPVEYOR_BUILD_WORKER_IMAGE%" call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat" if "Visual Studio 2015"=="%APPVEYOR_BUILD_WORKER_IMAGE%" call "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 if "Visual Studio 2015"=="%APPVEYOR_BUILD_WORKER_IMAGE%" call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" x86_amd64 - nmake -f makefile.msvc test.exe + nmake -f makefile.msvc test.exe %CFLAGS_VAR% nmake -f makefile.msvc clean-obj - nmake -f makefile.msvc test_dll.exe CFLAGS="/Ox /MD /DLTM_TEST_DYNAMIC" + nmake -f makefile.msvc test_dll.exe %CFLAGS_VAR_DLL% test_script: - cmd: test.exe - cmd: test_dll.exe diff --git a/demo/test.c b/demo/test.c index f290dbf2..2fa6e08d 100644 --- a/demo/test.c +++ b/demo/test.c @@ -2455,12 +2455,101 @@ static int test_mp_pack_unpack(void) #define ONLY_PUBLIC_API_C #endif +#if !defined(LTM_TEST_MULTITHREAD) +#define SINGLE_THREADED_C +typedef uintptr_t thread_id_t; +#else +#define MULTI_THREADED_C +#if !defined(_WIN32) +#define MULTI_THREADED_PTHREAD_C +#include +typedef pthread_t thread_id_t; +#else +#define MULTI_THREADED_MSVC_C + +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x0501 +#endif +#ifndef WINVER +#define WINVER 0x0501 +#endif + +#define WIN32_LEAN_AND_MEAN +#include +typedef HANDLE thread_id_t; +#endif +#endif + +#if !defined(MULTI_THREADED_PTHREAD_C) +extern int pthread_create(thread_id_t *, const void *, void *(*)(void *), void *); +extern int pthread_join(thread_id_t, void **); +#endif + +#if !defined(MULTI_THREADED_MSVC_C) +extern thread_id_t CreateThread(void *, size_t, unsigned long (*)(void *), void *, unsigned long, void *); +extern unsigned long WaitForSingleObject(thread_id_t hHandle, unsigned long dwMilliseconds); +#define INFINITE ((unsigned long)-1) +#endif + +struct test_fn { + const char *name; + int (*fn)(void); +}; + +struct thread_info { + thread_id_t thread_id; + const struct test_fn *t; + int ret; +}; + +static void run(struct thread_info *tinfo) +{ + tinfo->ret = tinfo->t->fn(); + + if (mp_warray_free() == -2) + tinfo->ret = EXIT_FAILURE; +} + +static void *run_pthread(void *arg) +{ + run(arg); + + return arg; +} + +static unsigned long run_msvc(void *arg) +{ + run(arg); + + return 0; +} + +static int thread_start(struct thread_info *info) +{ + if (MP_HAS(MULTI_THREADED_PTHREAD)) + return pthread_create(&info->thread_id, NULL, run_pthread, info); + if (MP_HAS(MULTI_THREADED_MSVC)) { + info->thread_id = CreateThread(NULL, 0, run_msvc, info, 0, NULL); + return info->thread_id == (thread_id_t)NULL ? -1 : 0; + } + return -1; +} + +static int thread_join(struct thread_info *info, struct thread_info **res) +{ + if (MP_HAS(MULTI_THREADED_PTHREAD)) + return pthread_join(info->thread_id, (void **)res); + if (MP_HAS(MULTI_THREADED_MSVC)) { + WaitForSingleObject(info->thread_id, INFINITE); + *res = info; + return 0; + } + return -1; +} + static int unit_tests(int argc, char **argv) { - static const struct { - const char *name; - int (*fn)(void); - } test[] = { + static const struct test_fn test[] = { #define T0(n) { #n, test_##n } #define T1(n, o) { #n, MP_HAS(o) ? test_##n : NULL } #define T2(n, o1, o2) { #n, (MP_HAS(o1) && MP_HAS(o2)) ? test_##n : NULL } @@ -2522,10 +2611,10 @@ static int unit_tests(int argc, char **argv) #undef T2 #undef T1 }; + struct thread_info test_threads[sizeof(test)/sizeof(test[0])], *res; unsigned long i, ok, fail, nop; uint64_t t; int j; - ok = fail = nop = 0; t = (uint64_t)time(NULL); @@ -2533,20 +2622,43 @@ static int unit_tests(int argc, char **argv) s_mp_rand_jenkins_init(t); mp_rand_source(s_mp_rand_jenkins); + if (MP_HAS(MP_SMALL_STACK_SIZE)) { + printf("Small-stack enabled\n\n"); + } + + if (MP_HAS(MULTI_THREADED)) { + printf("Multi-threading enabled\n\n"); + /* we ignore the fact that jenkins is not thread safe */ + for (i = 0; i < (sizeof(test) / sizeof(test[0])); ++i) { + test_threads[i].t = &test[i]; + EXPECT(thread_start(&test_threads[i]) == 0); + } + } + for (i = 0; i < (sizeof(test) / sizeof(test[0])); ++i) { - if (argc > 1) { - for (j = 1; j < argc; ++j) { - if (strstr(test[i].name, argv[j]) != NULL) { - break; + j = -1; + if (MP_HAS(SINGLE_THREADED)) { + if (argc > 1) { + for (j = 1; j < argc; ++j) { + if (strstr(test[i].name, argv[j]) != NULL) { + break; + } } + if (j == argc) continue; } - if (j == argc) continue; + + if (test[i].fn) + j = test[i].fn(); + } else if (MP_HAS(MULTI_THREADED)) { + EXPECT(thread_join(&test_threads[i], &res) == 0); + j = res->ret; } printf("TEST %s\n", test[i].name); + if (test[i].fn == NULL) { nop++; printf("NOP %s\n\n", test[i].name); - } else if (test[i].fn() == EXIT_SUCCESS) { + } else if (j == EXIT_SUCCESS) { ok++; printf("\n"); } else { @@ -2556,8 +2668,12 @@ static int unit_tests(int argc, char **argv) } fprintf(fail?stderr:stdout, "Tests OK/NOP/FAIL: %lu/%lu/%lu\n", ok, nop, fail); - if (fail != 0) return EXIT_FAILURE; - else return EXIT_SUCCESS; + EXPECT(mp_warray_free() != -2); + + if (fail == 0) + return EXIT_SUCCESS; +LBL_ERR: + return EXIT_FAILURE; } int main(int argc, char **argv) diff --git a/doc/bn.tex b/doc/bn.tex index 22ae5f3e..63e71633 100644 --- a/doc/bn.tex +++ b/doc/bn.tex @@ -352,6 +352,16 @@ \subsubsection{Operand Size Related} \end{center} \end{small} +\subsection{Small-Stack option} +\label{ch:SMALL_STACK_INTRO} +The library can be compiled with the symbol \texttt{MP\_SMALL\_STACK\_SIZE} defined, which results in +the temporary \texttt{MP\_WARRAY}-sized stack buffers being put on the heap. +This comes with one problem, namely: formerly promised thread-safety isn't given anymore. +Therefore if the Small-Stack option is enabled while doing multi threading, one shall always initialize +the library by calling \texttt{mp\_warray\_init()} once with the correct number of threads. + +C.f. \ref{ch:SMALL_STACK_API} for the API description and further details. + \section{Purpose of LibTomMath} Unlike GNU MP (GMP) Library, LIP, OpenSSL or various other commercial kits (Miracl), LibTomMath was not written with bleeding edge performance in mind. First and foremost LibTomMath was written @@ -428,7 +438,11 @@ \chapter{Getting Started with LibTomMath} \section{Building Programs} In order to use LibTomMath you must include ``tommath.h'' and link against the appropriate library file (typically -libtommath.a). There is no library initialization required and the entire library is thread safe. +libtommath.a). There is no library initialization required and the entire library is thread safe +if it is used in its default configuration. The small-stack option makes use of atomic operations +to maintain its internal state and therefore does not require locking, but it MUST be initialized +if used from multiple threads. For further information see \ref{ch:SMALL_STACK_INTRO} resp. +\ref{ch:SMALL_STACK_API}. \section{Return Codes} There are five possible return codes a function may return. @@ -813,6 +827,37 @@ \subsection{Adding additional digits} \end{alltt} \end{small} +\section{Small-Stack option} +\label{ch:SMALL_STACK_API} + +In case the \texttt{MP\_SMALL\_STACK\_SIZE} symbol is defined the following functions +can be useful. + +To initialize the internal structure the following function shall be called. + +\index{mp\_warray\_init} +\begin{alltt} +mp_err mp_warray_init(size_t n_alloc, bool preallocate); +\end{alltt} + +The flag \texttt{preallocate} controls whether the internal buffers -- +\texttt{n\_alloc} buffers of size \texttt{MP\_WARRAY} -- will be allocated when +\texttt{mp\_warray\_init()} is called, or whether they will be allocated when required. + +To free the internally allocated memory the following function shall be called. + +\index{mp\_warray\_free} +\begin{alltt} +int mp_warray_free(void); +\end{alltt} + + +Those two API functions are always available, even if the \texttt{MP\_SMALL\_STACK\_SIZE} option +has been disabled at compile time. +In that case \texttt{mp\_warray\_init()} will return \texttt{MP\_ERR} and \texttt{mp\_warray\_free()} +will return $-1$. + + \chapter{Basic Operations} \section{Copying} diff --git a/helper.pl b/helper.pl index 53658614..ffc592a7 100755 --- a/helper.pl +++ b/helper.pl @@ -394,7 +394,7 @@ sub update_dep foreach my $filename (glob '*mp_*.c') { my $content; my $cc = $ENV{'CC'} || 'gcc'; - $content = `$cc -E -x c -DLTM_ALL $filename`; + $content = `$cc -E -x c -DLTM_ALL -DMP_SMALL_STACK_SIZE $filename`; $content =~ s/^# 1 "$filename".*?^# 2 "$filename"//ms; # convert filename to upper case so we can use it as a define diff --git a/libtommath_VS2008.vcproj b/libtommath_VS2008.vcproj index 13158a09..71dd3807 100644 --- a/libtommath_VS2008.vcproj +++ b/libtommath_VS2008.vcproj @@ -792,6 +792,10 @@ RelativePath="mp_unpack.c" > + + @@ -928,6 +932,18 @@ RelativePath="s_mp_sub.c" > + + + + + + diff --git a/makefile b/makefile index ec32ecd0..8f211f5f 100644 --- a/makefile +++ b/makefile @@ -43,13 +43,14 @@ mp_reduce_2k_l.o mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o mp_reduce_is_2k.o m mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o \ mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ -mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o s_mp_div_recursive.o \ -s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o s_mp_fp_log_d.o \ -s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ -s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ -s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ -s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o \ -s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o +mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \ +s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o \ +s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o \ +s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o \ +s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o \ +s_mp_radix_map.o s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o \ +s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o s_mp_warray_get.o s_mp_warray_put.o \ +s_mp_zero_buf.o s_mp_zero_digs.o #END_INS @@ -172,9 +173,10 @@ c89: -e 's/UINT32_MAX/0xFFFFFFFFu/g' \ -e 's/UINT64_MAX/(mp_u64)-1/g' \ -e 's/INT32_MAX/0x7FFFFFFF/g' \ - -e 's/INT32_MIN/(-2147483647-1)/g' \ + -e 's/INT32_MIN/(-2147483647-1)/g' \ -e 's/INT64_MAX/(mp_i64)(((mp_u64)1<<63)-1)/g' \ -e 's/INT64_MIN/(mp_i64)((mp_u64)1<<63)/g' \ + -e 's/uintptr_t/mp_uintptr/g' \ -e 's/SIZE_MAX/((size_t)-1)/g' \ -e 's/\(PRI[ioux]64\)/MP_\1/g' \ -e 's/uint\([0-9][0-9]*\)_t/mp_u\1/g' \ @@ -195,10 +197,11 @@ c99: -e 's/false_/MP_NO_/g' \ -e 's/0xFFFFFFFFu/UINT32_MAX/g' \ -e 's/(mp_u64)-1/UINT64_MAX/g' \ - -e 's/(-2147483647-1)/INT32_MIN/g' \ + -e 's/(-2147483647-1)/INT32_MIN/g' \ -e 's/0x7FFFFFFF/INT32_MAX/g' \ -e 's/(mp_i64)((mp_u64)1<<63)/INT64_MIN/g' \ -e 's/(mp_i64)(((mp_u64)1<<63)-1)/INT64_MAX/g' \ + -e 's/mp_uintptr/uintptr_t/g' \ -e 's/((size_t)-1)/SIZE_MAX/g' \ -e 's/MP_\(PRI[ioux]64\)/\1/g' \ -e 's/mp_u\([0-9][0-9]*\)/uint\1_t/g' \ diff --git a/makefile.mingw b/makefile.mingw index 532747be..e2445e8a 100644 --- a/makefile.mingw +++ b/makefile.mingw @@ -45,13 +45,14 @@ mp_reduce_2k_l.o mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o mp_reduce_is_2k.o m mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o \ mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ -mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o s_mp_div_recursive.o \ -s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o s_mp_fp_log_d.o \ -s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ -s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ -s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ -s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o \ -s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o +mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \ +s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o \ +s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o \ +s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o \ +s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o \ +s_mp_radix_map.o s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o \ +s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o s_mp_warray_get.o s_mp_warray_put.o \ +s_mp_zero_buf.o s_mp_zero_digs.o HEADERS_PUB=tommath.h HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB) diff --git a/makefile.msvc b/makefile.msvc index 5d128549..8feb425c 100644 --- a/makefile.msvc +++ b/makefile.msvc @@ -41,13 +41,14 @@ mp_reduce_2k_l.obj mp_reduce_2k_setup.obj mp_reduce_2k_setup_l.obj mp_reduce_is_ mp_reduce_setup.obj mp_root_n.obj mp_rshd.obj mp_sbin_size.obj mp_set.obj mp_set_double.obj mp_set_i32.obj mp_set_i64.obj \ mp_set_l.obj mp_set_u32.obj mp_set_u64.obj mp_set_ul.obj mp_shrink.obj mp_signed_rsh.obj mp_sqrmod.obj mp_sqrt.obj \ mp_sqrtmod_prime.obj mp_sub.obj mp_sub_d.obj mp_submod.obj mp_to_radix.obj mp_to_sbin.obj mp_to_ubin.obj mp_ubin_size.obj \ -mp_unpack.obj mp_xor.obj mp_zero.obj s_mp_add.obj s_mp_copy_digs.obj s_mp_div_3.obj s_mp_div_recursive.obj \ -s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj s_mp_fp_log.obj s_mp_fp_log_d.obj \ -s_mp_get_bit.obj s_mp_invmod.obj s_mp_invmod_odd.obj s_mp_log_2expt.obj s_mp_montgomery_reduce_comba.obj s_mp_mul.obj \ -s_mp_mul_balance.obj s_mp_mul_comba.obj s_mp_mul_high.obj s_mp_mul_high_comba.obj s_mp_mul_karatsuba.obj \ -s_mp_mul_toom.obj s_mp_prime_is_divisible.obj s_mp_prime_tab.obj s_mp_radix_map.obj \ -s_mp_radix_size_overestimate.obj s_mp_rand_platform.obj s_mp_sqr.obj s_mp_sqr_comba.obj s_mp_sqr_karatsuba.obj \ -s_mp_sqr_toom.obj s_mp_sub.obj s_mp_zero_buf.obj s_mp_zero_digs.obj +mp_unpack.obj mp_warray_free.obj mp_xor.obj mp_zero.obj s_mp_add.obj s_mp_copy_digs.obj s_mp_div_3.obj \ +s_mp_div_recursive.obj s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj s_mp_fp_log.obj \ +s_mp_fp_log_d.obj s_mp_get_bit.obj s_mp_invmod.obj s_mp_invmod_odd.obj s_mp_log_2expt.obj \ +s_mp_montgomery_reduce_comba.obj s_mp_mul.obj s_mp_mul_balance.obj s_mp_mul_comba.obj s_mp_mul_high.obj \ +s_mp_mul_high_comba.obj s_mp_mul_karatsuba.obj s_mp_mul_toom.obj s_mp_prime_is_divisible.obj s_mp_prime_tab.obj \ +s_mp_radix_map.obj s_mp_radix_size_overestimate.obj s_mp_rand_platform.obj s_mp_sqr.obj s_mp_sqr_comba.obj \ +s_mp_sqr_karatsuba.obj s_mp_sqr_toom.obj s_mp_sub.obj s_mp_warray.obj s_mp_warray_get.obj s_mp_warray_put.obj \ +s_mp_zero_buf.obj s_mp_zero_digs.obj HEADERS_PUB=tommath.h HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB) diff --git a/makefile.shared b/makefile.shared index c9b93351..50c33526 100644 --- a/makefile.shared +++ b/makefile.shared @@ -40,13 +40,14 @@ mp_reduce_2k_l.o mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o mp_reduce_is_2k.o m mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o \ mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ -mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o s_mp_div_recursive.o \ -s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o s_mp_fp_log_d.o \ -s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ -s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ -s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ -s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o \ -s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o +mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \ +s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o \ +s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o \ +s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o \ +s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o \ +s_mp_radix_map.o s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o \ +s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o s_mp_warray_get.o s_mp_warray_put.o \ +s_mp_zero_buf.o s_mp_zero_digs.o #END_INS diff --git a/makefile.unix b/makefile.unix index 34ebd1a8..58642098 100644 --- a/makefile.unix +++ b/makefile.unix @@ -46,13 +46,14 @@ mp_reduce_2k_l.o mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o mp_reduce_is_2k.o m mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o \ mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ -mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o s_mp_div_recursive.o \ -s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o s_mp_fp_log_d.o \ -s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ -s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ -s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ -s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o \ -s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o +mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \ +s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o \ +s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o \ +s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o \ +s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o \ +s_mp_radix_map.o s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o \ +s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o s_mp_warray_get.o s_mp_warray_put.o \ +s_mp_zero_buf.o s_mp_zero_digs.o HEADERS_PUB=tommath.h diff --git a/makefile_include.mk b/makefile_include.mk index da897396..d47ea2ba 100644 --- a/makefile_include.mk +++ b/makefile_include.mk @@ -97,7 +97,7 @@ endif endif # COMPILE_SIZE ifneq ($(findstring clang,$(CC)),) -LTM_CFLAGS += -Wno-typedef-redefinition -Wno-tautological-compare -Wno-builtin-requires-header +LTM_CFLAGS += -Wno-unknown-warning-option -Wno-typedef-redefinition -Wno-tautological-compare -Wno-builtin-requires-header -Wno-incomplete-setjmp-declaration ifdef IGNORE_SPEED #for dead code eliminiation LTM_CFLAGS += -O1 diff --git a/mp_warray_free.c b/mp_warray_free.c new file mode 100644 index 00000000..f7470f81 --- /dev/null +++ b/mp_warray_free.c @@ -0,0 +1,28 @@ +#include "tommath_private.h" +#ifdef MP_WARRAY_FREE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +/* static check that the multiplication won't overflow */ +MP_STATIC_ASSERT(warray_free_sz_does_not_overflow, (sizeof(mp_word) * MP_WARRAY) >= MP_WARRAY) + +static int s_warray_free(void) +{ + int ret = 0; + if (s_mp_warray.w_used) + return -2; + if (s_mp_warray.w_free) { + s_mp_zero_buf(s_mp_warray.w_free, sizeof(mp_word) * MP_WARRAY); + MP_FREE(s_mp_warray.w_free, sizeof(mp_word) * MP_WARRAY); + s_mp_warray.w_free = NULL; + } + return ret; +} + +int mp_warray_free(void) +{ + if (MP_HAS(MP_SMALL_STACK_SIZE)) return s_warray_free(); + return -1; +} + +#endif diff --git a/s_mp_montgomery_reduce_comba.c b/s_mp_montgomery_reduce_comba.c index 7472caf3..3858f75a 100644 --- a/s_mp_montgomery_reduce_comba.c +++ b/s_mp_montgomery_reduce_comba.c @@ -15,9 +15,12 @@ mp_err s_mp_montgomery_reduce_comba(mp_int *x, const mp_int *n, mp_digit rho) { int ix, oldused; mp_err err; - mp_word W[MP_WARRAY]; + mp_word MP_ALLOC_WARRAY(W); + + MP_CHECK_WARRAY(W); if (x->used > MP_WARRAY) { + MP_FREE_WARRAY(W); return MP_VAL; } @@ -26,6 +29,7 @@ mp_err s_mp_montgomery_reduce_comba(mp_int *x, const mp_int *n, mp_digit rho) /* grow a as required */ if ((err = mp_grow(x, n->used + 1)) != MP_OKAY) { + MP_FREE_WARRAY(W); return err; } @@ -110,6 +114,7 @@ mp_err s_mp_montgomery_reduce_comba(mp_int *x, const mp_int *n, mp_digit rho) mp_clamp(x); + MP_FREE_WARRAY(W); /* if A >= m then A = A - m */ if (mp_cmp_mag(x, n) != MP_LT) { return s_mp_sub(x, n, x); diff --git a/s_mp_mul_comba.c b/s_mp_mul_comba.c index ca89ff9d..5b37035e 100644 --- a/s_mp_mul_comba.c +++ b/s_mp_mul_comba.c @@ -23,15 +23,19 @@ mp_err s_mp_mul_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs) { int oldused, pa, ix; mp_err err; - mp_digit W[MP_WARRAY]; + mp_digit MP_ALLOC_WARRAY(W); mp_word _W; + MP_CHECK_WARRAY(W); + if (digs < 0) { + MP_FREE_WARRAY(W); return MP_VAL; } /* grow the destination as required */ if ((err = mp_grow(c, digs)) != MP_OKAY) { + MP_FREE_WARRAY(W); return err; } @@ -77,6 +81,7 @@ mp_err s_mp_mul_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs) s_mp_zero_digs(c->dp + c->used, oldused - c->used); mp_clamp(c); + MP_FREE_WARRAY(W); return MP_OKAY; } #endif diff --git a/s_mp_mul_high_comba.c b/s_mp_mul_high_comba.c index b5ac06d7..b0096d4e 100644 --- a/s_mp_mul_high_comba.c +++ b/s_mp_mul_high_comba.c @@ -16,16 +16,20 @@ mp_err s_mp_mul_high_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs { int oldused, pa, ix; mp_err err; - mp_digit W[MP_WARRAY]; + mp_digit MP_ALLOC_WARRAY(W); mp_word _W; + MP_CHECK_WARRAY(W); + if (digs < 0) { + MP_FREE_WARRAY(W); return MP_VAL; } /* grow the destination as required */ pa = a->used + b->used; if ((err = mp_grow(c, pa)) != MP_OKAY) { + MP_FREE_WARRAY(W); return err; } @@ -69,6 +73,7 @@ mp_err s_mp_mul_high_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs s_mp_zero_digs(c->dp + c->used, oldused - c->used); mp_clamp(c); + MP_FREE_WARRAY(W); return MP_OKAY; } #endif diff --git a/s_mp_sqr_comba.c b/s_mp_sqr_comba.c index 1bcc1f93..336a0a08 100644 --- a/s_mp_sqr_comba.c +++ b/s_mp_sqr_comba.c @@ -16,13 +16,16 @@ After that loop you do the squares and add them in. mp_err s_mp_sqr_comba(const mp_int *a, mp_int *b) { int oldused, pa, ix; - mp_digit W[MP_WARRAY]; + mp_digit MP_ALLOC_WARRAY(W); mp_word W1; mp_err err; + MP_CHECK_WARRAY(W); + /* grow the destination as required */ pa = a->used + a->used; if ((err = mp_grow(b, pa)) != MP_OKAY) { + MP_FREE_WARRAY(W); return err; } @@ -82,6 +85,7 @@ mp_err s_mp_sqr_comba(const mp_int *a, mp_int *b) s_mp_zero_digs(b->dp + b->used, oldused - b->used); mp_clamp(b); + MP_FREE_WARRAY(W); return MP_OKAY; } #endif diff --git a/s_mp_warray.c b/s_mp_warray.c new file mode 100644 index 00000000..1b8b068b --- /dev/null +++ b/s_mp_warray.c @@ -0,0 +1,8 @@ +#include "tommath_private.h" +#ifdef S_MP_WARRAY_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +mp_thread st_warray s_mp_warray = { 0 }; + +#endif diff --git a/s_mp_warray_get.c b/s_mp_warray_get.c new file mode 100644 index 00000000..26b0d7c1 --- /dev/null +++ b/s_mp_warray_get.c @@ -0,0 +1,18 @@ +#include "tommath_private.h" +#ifdef S_MP_WARRAY_GET_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +void *s_mp_warray_get(void) +{ + if (s_mp_warray.w_used) + return NULL; + if (s_mp_warray.w_free == NULL) { + s_mp_warray.w_free = MP_CALLOC(MP_WARRAY, sizeof(mp_word)); + } + s_mp_warray.w_used = s_mp_warray.w_free; + s_mp_warray.w_free = NULL; + return s_mp_warray.w_used; +} + +#endif diff --git a/s_mp_warray_put.c b/s_mp_warray_put.c new file mode 100644 index 00000000..79e014ac --- /dev/null +++ b/s_mp_warray_put.c @@ -0,0 +1,14 @@ +#include "tommath_private.h" +#ifdef S_MP_WARRAY_PUT_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +void s_mp_warray_put(void *w) +{ + if (s_mp_warray.w_free || s_mp_warray.w_used != w) + return; + s_mp_warray.w_free = w; + s_mp_warray.w_used = NULL; +} + +#endif diff --git a/sources.cmake b/sources.cmake index bbb2aeab..103e9c09 100644 --- a/sources.cmake +++ b/sources.cmake @@ -122,6 +122,7 @@ mp_to_sbin.c mp_to_ubin.c mp_ubin_size.c mp_unpack.c +mp_warray_free.c mp_xor.c mp_zero.c s_mp_add.c @@ -156,6 +157,9 @@ s_mp_sqr_comba.c s_mp_sqr_karatsuba.c s_mp_sqr_toom.c s_mp_sub.c +s_mp_warray.c +s_mp_warray_get.c +s_mp_warray_put.c s_mp_zero_buf.c s_mp_zero_digs.c ) diff --git a/testme.sh b/testme.sh index 089e42a7..92997a04 100755 --- a/testme.sh +++ b/testme.sh @@ -70,6 +70,8 @@ All other options will be tested with all MP_xBIT configurations. runtime and may trigger the 30 minutes timeout. + --multithread Run tests in multi-threaded mode (via pthread). + Godmode: --all Choose all architectures and gcc and clang @@ -128,7 +130,7 @@ _make() echo -ne " Compile $1 $2" suffix=$(echo ${1}${2} | tr ' ' '_') _fixup_cflags "$1" - CC="$1" CFLAGS="$2 $TEST_CFLAGS" make -j$MAKE_JOBS $3 $MAKE_OPTIONS 2>gcc_errors_${suffix}.log + CC="$1" CFLAGS="$2 $TEST_CFLAGS" LFLAGS="$4" LDFLAGS="$5" make -j$MAKE_JOBS $3 $MAKE_OPTIONS 2>gcc_errors_${suffix}.log errcnt=$(wc -l < gcc_errors_${suffix}.log) if [[ ${errcnt} -gt 1 ]]; then echo " failed" @@ -148,10 +150,10 @@ _runtest() # "make tune" will run "tune_it.sh" automatically, hence "autotune", but it cannot # get switched off without some effort, so we just let it run twice for testing purposes echo -e "\rRun autotune $1 $2" - _make "$1" "$2" "" + _make "$1" "$2" "" "$3" "$4" $_timeout $TUNE_CMD > test_${suffix}.log || _die "running autotune" $? else - _make "$1" "$2" "test" + _make "$1" "$2" "test" "$3" "$4" echo -e "\rRun test $1 $2" $_timeout ./test > test_${suffix}.log || _die "running tests" $? fi @@ -171,13 +173,13 @@ echo "MAKE_OPTIONS = \"$MAKE_OPTIONS\"" if [[ "$MAKE_OPTIONS" =~ "tune" ]] then echo "autotune branch" - _make "$1" "$2" "" + _make "$1" "$2" "" "$3" "$4" # The shell used for /bin/sh is DASH 0.5.7-4ubuntu1 on the author's machine which fails valgrind, so # we just run on instance of etc/tune with the same options as in etc/tune_it.sh echo -e "\rRun etc/tune $1 $2 once inside valgrind" $_timeout $VALGRIND_BIN $VALGRIND_OPTS $TUNE_CMD > test_${suffix}.log || _die "running etc/tune" $? else - _make "$1" "$2" "test" + _make "$1" "$2" "test" "$3" "$4" echo -e "\rRun test $1 $2 inside valgrind" $_timeout $VALGRIND_BIN $VALGRIND_OPTS ./test > test_${suffix}.log || _die "running tests" $? fi @@ -301,6 +303,11 @@ do --symbols) CHECK_SYMBOLS="1" ;; + --multithread) + CFLAGS="$CFLAGS -DLTM_TEST_MULTITHREAD" + LFLAGS="$LFLAGS -pthread" + LDFLAGS="$LDFLAGS -pthread" + ;; --all) COMPILERS="gcc clang" ARCHFLAGS="-m64 -m32 -mx32" @@ -376,9 +383,9 @@ then _banner "$CC" if [[ "$VALGRIND_BIN" != "" ]] then - _runvalgrind "$CC" "" + _runvalgrind "$CC" "" "$LFLAGS" "$LDFLAGS" else - _runtest "$CC" "" + _runtest "$CC" "" "$LFLAGS" "$LDFLAGS" fi _exit fi @@ -398,9 +405,9 @@ _banner if [[ "$TEST_VS_MTEST" != "" ]] then make clean > /dev/null - _make "${compilers[0]}" "${archflags[0]} $CFLAGS" "mtest_opponent" + _make "${compilers[0]}" "${archflags[0]} $CFLAGS" "mtest_opponent" "$LFLAGS" "$LDFLAGS" echo - _make "gcc" "$MTEST_RAND" "mtest" + _make "gcc" "$MTEST_RAND" "mtest" "$LFLAGS" "$LDFLAGS" echo echo "Run test vs. mtest for $TEST_VS_MTEST iterations" _timeout="" @@ -429,15 +436,15 @@ do fi if [[ "$VALGRIND_BIN" != "" ]] then - _runvalgrind "$i" "$a $CFLAGS" + _runvalgrind "$i" "$a $CFLAGS" "$LFLAGS" "$LDFLAGS" [ "$WITH_LOW_MP" != "1" ] && continue - _runvalgrind "$i" "$a -DMP_16BIT $CFLAGS" - _runvalgrind "$i" "$a -DMP_32BIT $CFLAGS" + _runvalgrind "$i" "$a -DMP_16BIT $CFLAGS" "$LFLAGS" "$LDFLAGS" + _runvalgrind "$i" "$a -DMP_32BIT $CFLAGS" "$LFLAGS" "$LDFLAGS" else - _runtest "$i" "$a $CFLAGS" + _runtest "$i" "$a $CFLAGS" "$LFLAGS" "$LDFLAGS" [ "$WITH_LOW_MP" != "1" ] && continue - _runtest "$i" "$a -DMP_16BIT $CFLAGS" - _runtest "$i" "$a -DMP_32BIT $CFLAGS" + _runtest "$i" "$a -DMP_16BIT $CFLAGS" "$LFLAGS" "$LDFLAGS" + _runtest "$i" "$a -DMP_32BIT $CFLAGS" "$LFLAGS" "$LDFLAGS" fi done done diff --git a/tommath.def b/tommath.def index 86f34872..ed5aa8b0 100644 --- a/tommath.def +++ b/tommath.def @@ -125,6 +125,7 @@ EXPORTS mp_to_ubin mp_ubin_size mp_unpack + mp_warray_free mp_xor mp_zero MP_MUL_KARATSUBA_CUTOFF diff --git a/tommath.h b/tommath.h index 84bb0909..1820d243 100644 --- a/tommath.h +++ b/tommath.h @@ -588,6 +588,8 @@ mp_err mp_fread(mp_int *a, int radix, FILE *stream) MP_WUR; mp_err mp_fwrite(const mp_int *a, int radix, FILE *stream) MP_WUR; #endif +int mp_warray_free(void); + #define mp_to_binary(M, S, N) mp_to_radix((M), (S), (N), NULL, 2) #define mp_to_octal(M, S, N) mp_to_radix((M), (S), (N), NULL, 8) #define mp_to_decimal(M, S, N) mp_to_radix((M), (S), (N), NULL, 10) diff --git a/tommath_c89.h b/tommath_c89.h index 49400a13..22436366 100644 --- a/tommath_c89.h +++ b/tommath_c89.h @@ -26,6 +26,11 @@ typedef __UINT8_TYPE__ mp_u8; typedef __UINT16_TYPE__ mp_u16; typedef __UINT32_TYPE__ mp_u32; typedef __UINT64_TYPE__ mp_u64; +# if __WORDSIZE == 64 +typedef __UINT64_TYPE__ mp_uintptr; +# else +typedef __UINT32_TYPE__ mp_uintptr; +# endif /* inttypes.h replacement, printf format specifier */ # if __WORDSIZE == 64 diff --git a/tommath_class.h b/tommath_class.h index e08bc5f3..09bb3ea6 100644 --- a/tommath_class.h +++ b/tommath_class.h @@ -131,6 +131,7 @@ # define MP_TO_UBIN_C # define MP_UBIN_SIZE_C # define MP_UNPACK_C +# define MP_WARRAY_FREE_C # define MP_XOR_C # define MP_ZERO_C # define S_MP_ADD_C @@ -165,6 +166,9 @@ # define S_MP_SQR_KARATSUBA_C # define S_MP_SQR_TOOM_C # define S_MP_SUB_C +# define S_MP_WARRAY_C +# define S_MP_WARRAY_GET_C +# define S_MP_WARRAY_PUT_C # define S_MP_ZERO_BUF_C # define S_MP_ZERO_DIGS_C #endif @@ -957,6 +961,10 @@ # define MP_ZERO_C #endif +#if defined(MP_WARRAY_FREE_C) +# define S_MP_ZERO_BUF_C +#endif + #if defined(MP_XOR_C) # define MP_CLAMP_C # define MP_GROW_C @@ -1137,6 +1145,8 @@ # define MP_CMP_MAG_C # define MP_GROW_C # define S_MP_SUB_C +# define S_MP_WARRAY_GET_C +# define S_MP_WARRAY_PUT_C # define S_MP_ZERO_BUF_C # define S_MP_ZERO_DIGS_C #endif @@ -1165,6 +1175,8 @@ #if defined(S_MP_MUL_COMBA_C) # define MP_CLAMP_C # define MP_GROW_C +# define S_MP_WARRAY_GET_C +# define S_MP_WARRAY_PUT_C # define S_MP_ZERO_DIGS_C #endif @@ -1179,6 +1191,8 @@ #if defined(S_MP_MUL_HIGH_COMBA_C) # define MP_CLAMP_C # define MP_GROW_C +# define S_MP_WARRAY_GET_C +# define S_MP_WARRAY_PUT_C # define S_MP_ZERO_DIGS_C #endif @@ -1244,6 +1258,8 @@ #if defined(S_MP_SQR_COMBA_C) # define MP_CLAMP_C # define MP_GROW_C +# define S_MP_WARRAY_GET_C +# define S_MP_WARRAY_PUT_C # define S_MP_ZERO_DIGS_C #endif @@ -1279,6 +1295,15 @@ # define S_MP_ZERO_DIGS_C #endif +#if defined(S_MP_WARRAY_C) +#endif + +#if defined(S_MP_WARRAY_GET_C) +#endif + +#if defined(S_MP_WARRAY_PUT_C) +#endif + #if defined(S_MP_ZERO_BUF_C) #endif diff --git a/tommath_private.h b/tommath_private.h index c1fa95a0..be620dbc 100644 --- a/tommath_private.h +++ b/tommath_private.h @@ -234,6 +234,47 @@ MP_PRIVATE mp_err s_mp_radix_size_overestimate(const mp_int *a, const int radix, MP_PRIVATE mp_err s_mp_fp_log(const mp_int *a, mp_int *c) MP_WUR; MP_PRIVATE mp_err s_mp_fp_log_d(const mp_int *a, mp_word *c) MP_WUR; +#ifdef MP_SMALL_STACK_SIZE + +#if defined(__GNUC__) +/* We use TLS (Thread Local Storage) to manage the instance of the WARRAY + * per thread. + * The compilers we're usually looking at are GCC, Clang and MSVC. + * Both GCC and Clang are straight-forward with TLS, so it's enabled there. + * Using MSVC the tests were OK with the static library, but failed when + * the library was built as a DLL. As a result we completely disable + * support for MSVC. + * If your compiler can handle TLS properly without too much hocus pocus, + * feel free to open a PR to add support for it. + */ +#define mp_thread __thread +#else +#error "MP_SMALL_STACK_SIZE not supported with your compiler" +#endif + +#define MP_SMALL_STACK_SIZE_C +#define MP_ALLOC_WARRAY(name) *name = s_mp_warray_get() +#define MP_FREE_WARRAY(name) s_mp_warray_put(name) +#define MP_CHECK_WARRAY(name) do { if ((name) == NULL) { return MP_MEM; } } while(0) +#else +#define MP_ALLOC_WARRAY(name) name[MP_WARRAY] +#define MP_FREE_WARRAY(name) +#define MP_CHECK_WARRAY(name) +#endif + +#ifndef mp_thread +#define mp_thread +#endif + +typedef struct { + void *w_free, *w_used; +} st_warray; + +extern MP_PRIVATE mp_thread st_warray s_mp_warray; + +MP_PRIVATE void *s_mp_warray_get(void); +MP_PRIVATE void s_mp_warray_put(void *w); + #define MP_RADIX_MAP_REVERSE_SIZE 80u extern MP_PRIVATE const char s_mp_radix_map[]; extern MP_PRIVATE const uint8_t s_mp_radix_map_reverse[]; diff --git a/tommath_superclass.h b/tommath_superclass.h index 9245e002..10c7f12a 100644 --- a/tommath_superclass.h +++ b/tommath_superclass.h @@ -42,6 +42,8 @@ # define MP_SBIN_SIZE_C # define MP_TO_RADIX_C # define MP_TO_SBIN_C +# define MP_WARRAY_FREE_C +# define MP_WARRAY_INIT_C # define S_MP_RAND_JENKINS_C # define S_MP_RAND_PLATFORM_C #endif