bundle relevent libsodium parts

pull/35/head
Jeff Becker 6 years ago
parent cc106ed37b
commit c262f8b5e3
No known key found for this signature in database
GPG Key ID: F357B3B42F6F9B05

@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 2.8.10)
set(PROJECT_NAME lokinet)
project(${PROJECT_NAME})
project(${PROJECT_NAME} C CXX ASM)
macro(add_cflags)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARGN}")
@ -12,18 +12,11 @@ macro(add_cxxflags)
endmacro(add_cxxflags)
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX11)
CHECK_CXX_COMPILER_FLAG("-std=c++17" COMPILER_SUPPORTS_CXX17)
option(HAVE_CXX17_FILESYSTEM "Disable if your C++ compiler and runtime library lacks std::[experimental::]filesystem" ON)
if(COMPILER_SUPPORTS_CXX11 AND NOT HAVE_CXX17_FILESYSTEM)
add_cxxflags("-std=c++11")
elseif(COMPILER_SUPPORTS_CXX17 AND HAVE_CXX17_FILESYSTEM)
if(COMPILER_SUPPORTS_CXX17)
add_cxxflags("-std=c++17")
add_definitions(-DUSE_CXX17_FILESYSTEM)
else()
message(ERROR "The compiler ${CMAKE_CXX_COMPILER} has no C++11 or C++17 support. Please use a different C++ compiler.")
message(ERROR "The compiler ${CMAKE_CXX_COMPILER} has no C++17 support. Please use a different C++ compiler.")
endif()
add_cxxflags("-fpermissive")
@ -103,8 +96,11 @@ if(CMAKE_BUILD_TYPE MATCHES "[Dd][Ee][Bb][Uu][Gg]")
add_cxxflags("${DEBUG_FLAGS}")
endif()
add_cflags("-Wall -Wno-deprecated-declarations ${OPTIMIZE_FLAGS}")
add_cxxflags("-Wall -Wno-deprecated-declarations ${OPTIMIZE_FLAGS}")
set(CRYPTO_FLAGS "-march=native")
set(CMAKE_ASM_FLAGS "-march=native")
add_cflags("-Wall -Wno-deprecated-declarations ${OPTIMIZE_FLAGS} ${CRYPTO_FLAGS}")
add_cxxflags("-Wall -Wno-deprecated-declarations ${OPTIMIZE_FLAGS} ${CRYPTO_FLAGS}")
if(SHADOW)
add_cflags("-fPIC")
@ -123,20 +119,6 @@ endif()
set(EXE lokinet)
set(EXE_SRC daemon/main.cpp)
if(SODIUM_INCLUDE_DIR)
include_directories(${SODIUM_INCLUDE_DIR})
endif()
if(SODIUM_LIBRARIES)
set(SODIUM_LIB ${SODIUM_LIBRARIES})
else()
find_library (
SODIUM_LIB
NAMES sodium libsodium
HINTS "/usr/local/lib"
)
endif()
# HeapAlloc(2) on Windows was significantly revamped in 2009
# but the old algorithm isn't too bad either
# this is _the_ system allocator on BSD UNIX
@ -146,10 +128,8 @@ if(JEMALLOC)
set(MALLOC_LIB jemalloc)
endif()
set(LIBS ${SODIUM_LIB} ${THREAD_LIB} ${MALLOC_LIB})
if(HAVE_CXX17_FILESYSTEM)
set(LIBS ${LIBS} stdc++fs)
endif()
set(FS_LIB stdc++fs)
set(LIBS ${THREAD_LIB} ${MALLOC_LIB} ${FS_LIB})
set(LIB lokinet)
set(SHARED_LIB ${LIB})
@ -202,24 +182,6 @@ set(LIBTUNTAP_SRC
else()
set(LIBTUNTAP_SRC ${LIBTUNTAP_SRC_BASE})
endif(UNIX)
set(CPP_BACKPORT_SRC
vendor/cppbackport-master/lib/fs/rename.cpp
vendor/cppbackport-master/lib/fs/filestatus.cpp
vendor/cppbackport-master/lib/fs/filetype.cpp
vendor/cppbackport-master/lib/fs/cleanpath.cpp
vendor/cppbackport-master/lib/fs/perms.cpp
vendor/cppbackport-master/lib/fs/equivalent.cpp
vendor/cppbackport-master/lib/fs/current_path.cpp
vendor/cppbackport-master/lib/fs/basename.cpp
vendor/cppbackport-master/lib/fs/tempdir.cpp
vendor/cppbackport-master/lib/fs/create_directory.cpp
vendor/cppbackport-master/lib/fs/path.cpp
vendor/cppbackport-master/lib/fs/remove.cpp
vendor/cppbackport-master/lib/fs/diriter.cpp
vendor/cppbackport-master/lib/fs/copyfile.cpp
vendor/cppbackport-master/lib/fs/absolute.cpp
vendor/cppbackport-master/lib/fs/direntry.cpp
)
if(ANDROID)
add_definitions(-DANDROID)
@ -296,13 +258,83 @@ set(NTRU_REF_SRC
crypto/libntrup/src/ref/rq.c
)
include_directories(crypto/libntrup/include)
include_directories(crypto/include)
set(NTRU_SRC
${NTRU_AVX_SRC}
${NTRU_REF_SRC}
crypto/libntrup/src/ntru.cpp
)
)
set(SHA512_SRC
crypto/sha512/sha512.c)
set(CHACHA_SRC
crypto/chacha20/ref/chacha20_ref.c
crypto/chacha20/dolbeau/chacha20_dolbeau-ssse3.c
crypto/chacha20/dolbeau/chacha20_dolbeau-avx2.c
crypto/chacha20/stream_chacha20.c
crypto/salsa20/ref/salsa20_ref.c
crypto/salsa20/stream_salsa20.c
crypto/salsa20/xmm6/salsa20_xmm6-asm.S
crypto/salsa20/xmm6/salsa20_xmm6.c
crypto/salsa20/xmm6int/salsa20_xmm6int-avx2.c
crypto/salsa20/xmm6int/salsa20_xmm6int-sse2.c
crypto/xchacha20/hchacha.c
crypto/xchacha20/stream_xchacha20.c)
set(CSRNG_SRC
crypto/csrng/randombytes_salsa20_random.c
crypto/csrng/randombytes.c)
set(CRYPTO_MEM_SRC
crypto/secmem/secmem.c)
set(BLAKE2B_SRC
crypto/blake2b/blake2b-compress-avx2.c
crypto/blake2b/blake2b-compress-ref.c
crypto/blake2b/blake2b-compress-sse41.c
crypto/blake2b/blake2b-compress-ssse3.c
crypto/blake2b/blake2b-ref.c
crypto/blake2b/generichash_blake2b.c)
set(X25519_SRC
crypto/curve25519/crypto_scalarmult.c
crypto/curve25519/ref10/x25519_ref10.c
crypto/curve25519/ref10/ed25519_ref10.c
crypto/curve25519/sandy2x/fe51_invert.c
crypto/curve25519/sandy2x/ladder_base.S
crypto/curve25519/sandy2x/curve25519_sandy2x.c
crypto/curve25519/sandy2x/consts.S
crypto/curve25519/sandy2x/fe51_nsquare.S
crypto/curve25519/sandy2x/fe51_mul.S
crypto/curve25519/sandy2x/fe51_pack.S
crypto/curve25519/sandy2x/fe_frombytes_sandy2x.c
crypto/curve25519/sandy2x/sandy2x.S
crypto/curve25519/sandy2x/ladder.S
crypto/curve25519/scalarmult_curve25519.c
crypto/ed25519/crypto_box.c
crypto/ed25519/crypto_sign.c
crypto/ed25519/ref10/open.c
crypto/ed25519/ref10/obsolete.c
crypto/ed25519/ref10/keypair.c
crypto/ed25519/ref10/sign.c
crypto/ed25519/sign_ed25519.c)
set(CRYPTOGRAPHY_SRC
crypto/libsodium/init.c
crypto/libsodium/runtime.c
crypto/verify/crypto_verify.c
${CRYPTO_MEM_SRC}
${CSRNG_SRC}
${BLAKE2B_SRC}
${CHACHA_SRC}
${ED25519_SRC}
${X25519_SRC}
${SHA512_SRC})
set(UTP_SRC
libutp/utp_callbacks.cpp
@ -321,6 +353,7 @@ endif()
set(LIB_SRC
${UTP_SRC}
${NTRU_SRC}
${CRYPTOGRAPHY_SRC}
llarp/address_info.cpp
llarp/bencode.cpp
llarp/buffer.cpp
@ -419,7 +452,7 @@ set(CLIENT_SRC
client/main.cpp
)
set(ALL_SRC ${CLIENT_SRC} ${RC_SRC} ${EXE_SRC} ${DNS_SRC} ${LIB_PLATFORM_SRC} ${LIB_SRC} ${TEST_SRC} ${CPP_BACKPORT_SRC})
set(ALL_SRC ${CLIENT_SRC} ${RC_SRC} ${EXE_SRC} ${DNS_SRC} ${LIB_PLATFORM_SRC} ${LIB_SRC} ${TEST_SRC})
foreach(F ${ALL_SRC})
set_source_files_properties(${F} PROPERTIES COMPILE_FLAGS -DLOG_TAG=\\\"${F}\\\")
@ -428,14 +461,12 @@ endforeach(F)
set(LIB lokinet)
set(SHARED_LIB ${LIB})
set(STATIC_LIB ${LIB}-static)
set(BACKPORT_LIB ${LIB}-backport)
set(PLATFORM_LIB ${LIB}-platform)
set(ANDROID_LIB ${LIB}android)
# TODO: exclude this from includes and expose stuff properly for rcutil
include_directories(llarp)
include_directories(include)
include_directories(vendor/cppbackport-master/lib)
# Neuro Linux box hack:
include_directories(/usr/local/include)
include_directories(${sodium_INCLUDE_DIR})
@ -474,43 +505,24 @@ add_executable(${TEST_EXE} ${TEST_SRC})
if(WITH_STATIC)
add_library(${STATIC_LIB} STATIC ${LIB_SRC})
if(NOT HAVE_CXX17_FILESYSTEM)
add_library(${BACKPORT_LIB} STATIC ${CPP_BACKPORT_SRC})
endif(NOT HAVE_CXX17_FILESYSTEM)
add_library(${PLATFORM_LIB} STATIC ${LIB_PLATFORM_SRC})
target_link_libraries(${PLATFORM_LIB} ${THREAD_LIB} ${ABYSS_LIB})
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
target_link_libraries(${PLATFORM_LIB} -lcap)
endif()
if(NOT HAVE_CXX17_FILESYSTEM)
target_link_libraries(${STATIC_LIB} ${LIBS} ${PLATFORM_LIB} ${BACKPORT_LIB})
else()
target_link_libraries(${STATIC_LIB} ${LIBS} ${PLATFORM_LIB})
endif(NOT HAVE_CXX17_FILESYSTEM)
target_link_libraries(${STATIC_LIB} ${LIBS} ${PLATFORM_LIB})
if(NOT WITH_SHARED)
if(NOT HAVE_CXX17_FILESYSTEM)
target_link_libraries(${EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${BACKPORT_LIB} ${PLATFORM_LIB})
target_link_libraries(${CLIENT_EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${BACKPORT_LIB} ${PLATFORM_LIB})
target_link_libraries(${RC_EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${BACKPORT_LIB} ${PLATFORM_LIB})
target_link_libraries(${TEST_EXE} ${STATIC_LINK_LIBS} gtest_main ${STATIC_LIB} ${BACKPORT_LIB} ${PLATFORM_LIB})
if (WIN32)
target_link_libraries(${EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${BACKPORT_LIB} ${PLATFORM_LIB} ws2_32 iphlpapi)
target_link_libraries(${CLIENT_EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${BACKPORT_LIB} ${PLATFORM_LIB} ws2_32 iphlpapi)
target_link_libraries(${RC_EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${BACKPORT_LIB} ${PLATFORM_LIB} ws2_32 iphlpapi)
target_link_libraries(${TEST_EXE} ${STATIC_LINK_LIBS} gtest_main ${STATIC_LIB} ${BACKPORT_LIB} ${PLATFORM_LIB} ws2_32 iphlpapi)
endif(WIN32)
else()
target_link_libraries(${EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${PLATFORM_LIB})
target_link_libraries(${CLIENT_EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${PLATFORM_LIB})
target_link_libraries(${RC_EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${PLATFORM_LIB})
target_link_libraries(${TEST_EXE} ${STATIC_LINK_LIBS} gtest_main ${STATIC_LIB} ${PLATFORM_LIB})
if (WIN32)
target_link_libraries(${EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${PLATFORM_LIB} ws2_32 iphlpapi)
target_link_libraries(${CLIENT_EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${PLATFORM_LIB} ws2_32 iphlpapi)
target_link_libraries(${RC_EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${PLATFORM_LIB} ws2_32 iphlpapi)
target_link_libraries(${TEST_EXE} ${STATIC_LINK_LIBS} gtest_main ${STATIC_LIB} ${PLATFORM_LIB} ws2_32 iphlpapi)
endif(WIN32)
endif(NOT HAVE_CXX17_FILESYSTEM)
target_link_libraries(${EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${PLATFORM_LIB})
target_link_libraries(${CLIENT_EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${PLATFORM_LIB})
target_link_libraries(${RC_EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${PLATFORM_LIB})
target_link_libraries(${TEST_EXE} ${STATIC_LINK_LIBS} gtest_main ${STATIC_LIB} ${PLATFORM_LIB})
if (WIN32)
target_link_libraries(${EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${PLATFORM_LIB} ws2_32 iphlpapi)
target_link_libraries(${CLIENT_EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${PLATFORM_LIB} ws2_32 iphlpapi)
target_link_libraries(${RC_EXE} ${STATIC_LINK_LIBS} ${STATIC_LIB} ${PLATFORM_LIB} ws2_32 iphlpapi)
target_link_libraries(${TEST_EXE} ${STATIC_LINK_LIBS} gtest_main ${STATIC_LIB} ${PLATFORM_LIB} ws2_32 iphlpapi)
endif(WIN32)
if (WIN32)
target_link_libraries(${DNS_EXE} ${STATIC_LIB} ${PLATFORM_LIB} ${THREAD_LIB} ws2_32 iphlpapi)
endif(WIN32)
@ -523,9 +535,6 @@ if(WITH_STATIC)
endif()
if(WITH_SHARED)
if(HAVE_CXX17_FILESYSTEM)
set(LIB_SRC ${LIB_SRC} ${CPP_BACKPORT_SRC})
endif(HAVE_CXX17_FILESYSTEM)
add_library(${SHARED_LIB} SHARED ${LIB_SRC} ${LIB_PLATFORM_SRC})
if (WIN32)
set(${LIBS} ${LIBS} ws2_32 iphlpapi)

@ -10,10 +10,6 @@ PREFIX ?= /usr/local
CC ?= cc
CXX ?= c++
TARGETS = lokinet
SIGS = $(TARGETS:=.sig)
SHADOW_ROOT ?= $(HOME)/.shadow
SHADOW_BIN=$(SHADOW_ROOT)/bin/shadow
SHADOW_CONFIG=$(REPO)/shadow.config.xml
@ -28,55 +24,65 @@ TESTNET_ROOT=/tmp/lokinet_testnet_tmp
TESTNET_CONF=$(TESTNET_ROOT)/supervisor.conf
TESTNET_LOG=$(TESTNET_ROOT)/testnet.log
EXE = $(REPO)/lokinet
TEST_EXE = $(REPO)/testAll
TESTNET_EXE=$(REPO)/lokinet-testnet
TESTNET_CLIENTS ?= 50
TESTNET_SERVERS ?= 50
TESTNET_DEBUG ?= 0
BUILD_ROOT = $(REPO)/build
CONFIG_CMD = $(shell echo -n "cd '$(BUILD_ROOT)' && " ; echo -n "cmake '$(REPO)'")
TARGETS = $(REPO)/lokinet
SIGS = $(TARGETS:=.sig)
EXE = $(BUILD_ROOT)/lokinet
TEST_EXE = $(BUILD_ROOT)/testAll
DNS_PORT ?= 53
# PROCS ?= $(shell cat /proc/cpuinfo | grep processor | wc -l)
LINT_FILES = $(wildcard llarp/*.cpp)
LINT_CHECK = $(LINT_FILES:.cpp=.cpp-check)
clean:
rm -f build.ninja rules.ninja cmake_install.cmake CMakeCache.txt
rm -rf CMakeFiles
rm -f $(TARGETS) llarpd llarpc dns rcutil testAll
rm -rf $(BUILD_ROOT)
rm -f $(SHADOW_PLUGIN) $(SHADOW_CONFIG)
rm -f *.sig
rm -f *.a *.so
debug-configure:
cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=$(CC) -DCMAKE_CXX_COMPILER=$(CXX) -DHAVE_CXX17_FILESYSTEM=OFF -DDNS_PORT=$(DNS_PORT)
debug-configure:
mkdir -p '$(BUILD_ROOT)'
$(CONFIG_CMD) -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=$(CC) -DCMAKE_CXX_COMPILER=$(CXX) -DDNS_PORT=$(DNS_PORT)
release-configure: clean
cmake -GNinja -DSTATIC_LINK=ON -DCMAKE_BUILD_TYPE=Release -DRELEASE_MOTTO="$(shell cat motto.txt)" -DCMAKE_C_COMPILER=$(CC) -DCMAKE_CXX_COMPILER=$(CXX)
mkdir -p '$(BUILD_ROOT)'
$(CONFIG_CMD) -DSTATIC_LINK=ON -DCMAKE_BUILD_TYPE=Release -DRELEASE_MOTTO="$(shell cat motto.txt)" -DCMAKE_C_COMPILER=ecc -DCMAKE_CXX_COMPILER=ecc++
debug: debug-configure
ninja
$(MAKE) -C $(BUILD_ROOT)
release-compile: release-configure
ninja
$(MAKE) -C $(BUILD_ROOT)
cp $(EXE) lokinet
strip $(TARGETS)
$(TARGETS): release-compile
%.sig: $(TARGETS)
$(SIGN) $*
release: $(SIGS)
shadow-configure: clean
cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DSHADOW=ON -DCMAKE_C_COMPILER=$(CC) -DCMAKE_CXX_COMPILER=$(CXX)
$(CONFIG_CMD) -DCMAKE_BUILD_TYPE=Debug -DSHADOW=ON -DCMAKE_C_COMPILER=$(CC) -DCMAKE_CXX_COMPILER=$(CXX)
shadow-build: shadow-configure
ninja clean
ninja
$(MAKE) -C $(BUILD_ROOT) clean
$(MAKE) -C $(BUILD_ROOT)
shadow-run: shadow-build
python3 contrib/shadow/genconf.py $(SHADOW_CONFIG)
@ -91,16 +97,16 @@ testnet-clean: clean
rm -rf $(TESTNET_ROOT)
testnet-configure: testnet-clean
cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=$(CC) -DCMAKE_CXX_COMPILER=$(CXX) -DTESTNET=1
$(CONFIG_CMD) -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=$(CC) -DCMAKE_CXX_COMPILER=$(CXX) -DTESTNET=1
testnet-build: testnet-configure
ninja
$(MAKE) -C $(BUILD_ROOT)
shared-configure: clean
cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DWITH_TESTS=ON -DCMAKE_C_COMPILER=$(CC) -DCMAKE_CXX_COMPILER=$(CXX) -DWITH_SHARED=ON
$(CONFIG_CMD) -DCMAKE_BUILD_TYPE=Debug -DWITH_TESTS=ON -DCMAKE_C_COMPILER=$(CC) -DCMAKE_CXX_COMPILER=$(CXX) -DWITH_SHARED=ON
shared: shared-configure
ninja
$(MAKE) -C $(BUILD_ROOT)
testnet:
cp $(EXE) $(TESTNET_EXE)
@ -117,7 +123,7 @@ format:
lint: $(LINT_CHECK)
%.cpp-check: %.cpp
clang-tidy $^ -- -I$(REPO)/include -I$(REPO)/vendor/cppbackport-master/lib -I$(REPO)/crypto/libntrup/include -I$(REPO)/llarp
clang-tidy $^ -- -I$(REPO)/include -I$(REPO)/crypto/libntrup/include -I$(REPO)/llarp
install:
rm -f $(PREFIX)/bin/lokinet

@ -0,0 +1,123 @@
/*
BLAKE2 reference source code package - reference C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
All code is triple-licensed under the
[CC0](http://creativecommons.org/publicdomain/zero/1.0), the
[OpenSSL Licence](https://www.openssl.org/source/license.html), or
the [Apache Public License 2.0](http://www.apache.org/licenses/LICENSE-2.0),
at your choosing.
*/
#ifndef blake2_H
#define blake2_H
#include <stddef.h>
#include <stdint.h>
#include <sodium/crypto_generichash_blake2b.h>
#include <sodium/export.h>
#define blake2b_init_param crypto_generichash_blake2b__init_param
#define blake2b_init crypto_generichash_blake2b__init
#define blake2b_init_salt_personal \
crypto_generichash_blake2b__init_salt_personal
#define blake2b_init_key crypto_generichash_blake2b__init_key
#define blake2b_init_key_salt_personal \
crypto_generichash_blake2b__init_key_salt_personal
#define blake2b_update crypto_generichash_blake2b__update
#define blake2b_final crypto_generichash_blake2b__final
#define blake2b crypto_generichash_blake2b__blake2b
#define blake2b_salt_personal crypto_generichash_blake2b__blake2b_salt_personal
#define blake2b_pick_best_implementation \
crypto_generichash_blake2b__pick_best_implementation
enum blake2b_constant
{
BLAKE2B_BLOCKBYTES = 128,
BLAKE2B_OUTBYTES = 64,
BLAKE2B_KEYBYTES = 64,
BLAKE2B_SALTBYTES = 16,
BLAKE2B_PERSONALBYTES = 16
};
#if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
#pragma pack(1)
#else
#pragma pack(push, 1)
#endif
typedef struct blake2b_param_
{
uint8_t digest_length; /* 1 */
uint8_t key_length; /* 2 */
uint8_t fanout; /* 3 */
uint8_t depth; /* 4 */
uint8_t leaf_length[4]; /* 8 */
uint8_t node_offset[8]; /* 16 */
uint8_t node_depth; /* 17 */
uint8_t inner_length; /* 18 */
uint8_t reserved[14]; /* 32 */
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
} blake2b_param;
typedef crypto_generichash_blake2b_state blake2b_state;
#if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
#pragma pack()
#else
#pragma pack(pop)
#endif
/* Streaming API */
int
blake2b_init(blake2b_state *S, const uint8_t outlen);
int
blake2b_init_salt_personal(blake2b_state *S, const uint8_t outlen,
const void *salt, const void *personal);
int
blake2b_init_key(blake2b_state *S, const uint8_t outlen, const void *key,
const uint8_t keylen);
int
blake2b_init_key_salt_personal(blake2b_state *S, const uint8_t outlen,
const void *key, const uint8_t keylen,
const void *salt, const void *personal);
int
blake2b_init_param(blake2b_state *S, const blake2b_param *P);
int
blake2b_update(blake2b_state *S, const uint8_t *in, uint64_t inlen);
int
blake2b_final(blake2b_state *S, uint8_t *out, uint8_t outlen);
/* Simple API */
int
blake2b(uint8_t *out, const void *in, const void *key, const uint8_t outlen,
const uint64_t inlen, uint8_t keylen);
int
blake2b_salt_personal(uint8_t *out, const void *in, const void *key,
const uint8_t outlen, const uint64_t inlen,
uint8_t keylen, const void *salt, const void *personal);
typedef int (*blake2b_compress_fn)(blake2b_state *S,
const uint8_t block[BLAKE2B_BLOCKBYTES]);
int
blake2b_pick_best_implementation(void);
int
blake2b_compress_ref(blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES]);
int
blake2b_compress_ssse3(blake2b_state *S,
const uint8_t block[BLAKE2B_BLOCKBYTES]);
int
blake2b_compress_sse41(blake2b_state *S,
const uint8_t block[BLAKE2B_BLOCKBYTES]);
int
blake2b_compress_avx2(blake2b_state *S,
const uint8_t block[BLAKE2B_BLOCKBYTES]);
#endif

@ -0,0 +1,48 @@
#define BLAKE2_USE_SSSE3
#define BLAKE2_USE_SSE41
#define BLAKE2_USE_AVX2
#include <stdint.h>
#include <string.h>
#include "blake2.h"
#include <sodium/private/common.h>
#include <sodium/private/sse2_64_32.h>
#if defined(HAVE_AVX2INTRIN_H) && defined(HAVE_EMMINTRIN_H) \
&& defined(HAVE_TMMINTRIN_H) && defined(HAVE_SMMINTRIN_H)
#ifdef __GNUC__
#pragma GCC target("sse2")
#pragma GCC target("ssse3")
#pragma GCC target("sse4.1")
#pragma GCC target("avx2")
#endif
#include <emmintrin.h>
#include <immintrin.h>
#include <smmintrin.h>
#include <tmmintrin.h>
#include "blake2b-compress-avx2.h"
CRYPTO_ALIGN(64)
static const uint64_t blake2b_IV[8] = {
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL};
int
blake2b_compress_avx2(blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES])
{
__m256i a = LOADU(&S->h[0]);
__m256i b = LOADU(&S->h[4]);
BLAKE2B_COMPRESS_V1(a, b, block, S->t[0], S->t[1], S->f[0], S->f[1]);
STOREU(&S->h[0], a);
STOREU(&S->h[4], b);
return 0;
}
#endif

@ -0,0 +1,137 @@
#ifndef blake2b_compress_avx2_H
#define blake2b_compress_avx2_H
#define LOADU128(p) _mm_loadu_si128((__m128i *) (p))
#define STOREU128(p, r) _mm_storeu_si128((__m128i *) (p), r)
#define LOAD(p) _mm256_load_si256((__m256i *) (p))
#define STORE(p, r) _mm256_store_si256((__m256i *) (p), r)
#define LOADU(p) _mm256_loadu_si256((__m256i *) (p))
#define STOREU(p, r) _mm256_storeu_si256((__m256i *) (p), r)
static inline uint64_t
LOADU64(const void *p)
{
uint64_t v;
memcpy(&v, p, sizeof v);
return v;
}
#define ROTATE16 \
_mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, \
3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)
#define ROTATE24 \
_mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, \
4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)
#define ADD(a, b) _mm256_add_epi64(a, b)
#define SUB(a, b) _mm256_sub_epi64(a, b)
#define XOR(a, b) _mm256_xor_si256(a, b)
#define AND(a, b) _mm256_and_si256(a, b)
#define OR(a, b) _mm256_or_si256(a, b)
#define ROT32(x) _mm256_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1))
#define ROT24(x) _mm256_shuffle_epi8((x), ROTATE24)
#define ROT16(x) _mm256_shuffle_epi8((x), ROTATE16)
#define ROT63(x) _mm256_or_si256(_mm256_srli_epi64((x), 63), ADD((x), (x)))
#define BLAKE2B_G1_V1(a, b, c, d, m) \
do { \
a = ADD(a, m); \
a = ADD(a, b); \
d = XOR(d, a); \
d = ROT32(d); \
c = ADD(c, d); \
b = XOR(b, c); \
b = ROT24(b); \
} while (0)
#define BLAKE2B_G2_V1(a, b, c, d, m) \
do { \
a = ADD(a, m); \
a = ADD(a, b); \
d = XOR(d, a); \
d = ROT16(d); \
c = ADD(c, d); \
b = XOR(b, c); \
b = ROT63(b); \
} while (0)
#define BLAKE2B_DIAG_V1(a, b, c, d) \
do { \
d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(2, 1, 0, 3)); \
c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); \
b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(0, 3, 2, 1)); \
} while (0)
#define BLAKE2B_UNDIAG_V1(a, b, c, d) \
do { \
d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(0, 3, 2, 1)); \
c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); \
b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(2, 1, 0, 3)); \
} while (0)
#include "blake2b-load-avx2.h"
#define BLAKE2B_ROUND_V1(a, b, c, d, r, m) \
do { \
__m256i b0; \
BLAKE2B_LOAD_MSG_##r##_1(b0); \
BLAKE2B_G1_V1(a, b, c, d, b0); \
BLAKE2B_LOAD_MSG_##r##_2(b0); \
BLAKE2B_G2_V1(a, b, c, d, b0); \
BLAKE2B_DIAG_V1(a, b, c, d); \
BLAKE2B_LOAD_MSG_##r##_3(b0); \
BLAKE2B_G1_V1(a, b, c, d, b0); \
BLAKE2B_LOAD_MSG_##r##_4(b0); \
BLAKE2B_G2_V1(a, b, c, d, b0); \
BLAKE2B_UNDIAG_V1(a, b, c, d); \
} while (0)
#define BLAKE2B_ROUNDS_V1(a, b, c, d, m) \
do { \
BLAKE2B_ROUND_V1(a, b, c, d, 0, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 1, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 2, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 3, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 4, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 5, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 6, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 7, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 8, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 9, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 10, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 11, (m)); \
} while (0)
#define DECLARE_MESSAGE_WORDS(m) \
const __m256i m0 = _mm256_broadcastsi128_si256(LOADU128((m) + 0)); \
const __m256i m1 = _mm256_broadcastsi128_si256(LOADU128((m) + 16)); \
const __m256i m2 = _mm256_broadcastsi128_si256(LOADU128((m) + 32)); \
const __m256i m3 = _mm256_broadcastsi128_si256(LOADU128((m) + 48)); \
const __m256i m4 = _mm256_broadcastsi128_si256(LOADU128((m) + 64)); \
const __m256i m5 = _mm256_broadcastsi128_si256(LOADU128((m) + 80)); \
const __m256i m6 = _mm256_broadcastsi128_si256(LOADU128((m) + 96)); \
const __m256i m7 = _mm256_broadcastsi128_si256(LOADU128((m) + 112)); \
__m256i t0, t1;
#define BLAKE2B_COMPRESS_V1(a, b, m, t0, t1, f0, f1) \
do { \
DECLARE_MESSAGE_WORDS(m) \
const __m256i iv0 = a; \
const __m256i iv1 = b; \
__m256i c = LOAD(&blake2b_IV[0]); \
__m256i d = \
XOR(LOAD(&blake2b_IV[4]), _mm256_set_epi64x(f1, f0, t1, t0)); \
BLAKE2B_ROUNDS_V1(a, b, c, d, m); \
a = XOR(a, c); \
b = XOR(b, d); \
a = XOR(a, iv0); \
b = XOR(b, iv1); \
} while (0)
#endif

@ -0,0 +1,96 @@
#include <stdint.h>
#include <string.h>
#include "blake2.h"
#include <sodium/private/common.h>
CRYPTO_ALIGN(64)
static const uint64_t blake2b_IV[8] = {
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL};
static const uint8_t blake2b_sigma[12][16] = {
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}};
int
blake2b_compress_ref(blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES])
{
uint64_t m[16];
uint64_t v[16];
int i;
for(i = 0; i < 16; ++i)
{
m[i] = LOAD64_LE(block + i * sizeof(m[i]));
}
for(i = 0; i < 8; ++i)
{
v[i] = S->h[i];
}
v[8] = blake2b_IV[0];
v[9] = blake2b_IV[1];
v[10] = blake2b_IV[2];
v[11] = blake2b_IV[3];
v[12] = S->t[0] ^ blake2b_IV[4];
v[13] = S->t[1] ^ blake2b_IV[5];
v[14] = S->f[0] ^ blake2b_IV[6];
v[15] = S->f[1] ^ blake2b_IV[7];
#define G(r, i, a, b, c, d) \
do \
{ \
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
d = ROTR64(d ^ a, 32); \
c = c + d; \
b = ROTR64(b ^ c, 24); \
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
d = ROTR64(d ^ a, 16); \
c = c + d; \
b = ROTR64(b ^ c, 63); \
} while(0)
#define ROUND(r) \
do \
{ \
G(r, 0, v[0], v[4], v[8], v[12]); \
G(r, 1, v[1], v[5], v[9], v[13]); \
G(r, 2, v[2], v[6], v[10], v[14]); \
G(r, 3, v[3], v[7], v[11], v[15]); \
G(r, 4, v[0], v[5], v[10], v[15]); \
G(r, 5, v[1], v[6], v[11], v[12]); \
G(r, 6, v[2], v[7], v[8], v[13]); \
G(r, 7, v[3], v[4], v[9], v[14]); \
} while(0)
ROUND(0);
ROUND(1);
ROUND(2);
ROUND(3);
ROUND(4);
ROUND(5);
ROUND(6);
ROUND(7);
ROUND(8);
ROUND(9);
ROUND(10);
ROUND(11);
for(i = 0; i < 8; ++i)
{
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
}
#undef G
#undef ROUND
return 0;
}

@ -0,0 +1,86 @@
#define BLAKE2_USE_SSSE3
#define BLAKE2_USE_SSE41
#include <stdint.h>
#include <string.h>
#include "blake2.h"
#include <sodium/private/common.h>
#include <sodium/private/sse2_64_32.h>
#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) \
&& defined(HAVE_SMMINTRIN_H)
#ifdef __GNUC__
#pragma GCC target("sse2")
#pragma GCC target("ssse3")
#pragma GCC target("sse4.1")
#endif
#include <emmintrin.h>
#include <smmintrin.h>
#include <tmmintrin.h>
#include "blake2b-compress-sse41.h"
CRYPTO_ALIGN(64)
static const uint64_t blake2b_IV[8] = {
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL};
int
blake2b_compress_sse41(blake2b_state *S,
const uint8_t block[BLAKE2B_BLOCKBYTES])
{
__m128i row1l, row1h;
__m128i row2l, row2h;
__m128i row3l, row3h;
__m128i row4l, row4h;
__m128i b0, b1;
__m128i t0, t1;
const __m128i r16 =
_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
const __m128i r24 =
_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
const __m128i m0 = LOADU(block + 00);
const __m128i m1 = LOADU(block + 16);
const __m128i m2 = LOADU(block + 32);
const __m128i m3 = LOADU(block + 48);
const __m128i m4 = LOADU(block + 64);
const __m128i m5 = LOADU(block + 80);
const __m128i m6 = LOADU(block + 96);
const __m128i m7 = LOADU(block + 112);
row1l = LOADU(&S->h[0]);
row1h = LOADU(&S->h[2]);
row2l = LOADU(&S->h[4]);
row2h = LOADU(&S->h[6]);
row3l = LOADU(&blake2b_IV[0]);
row3h = LOADU(&blake2b_IV[2]);
row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
ROUND(0);
ROUND(1);
ROUND(2);
ROUND(3);
ROUND(4);
ROUND(5);
ROUND(6);
ROUND(7);
ROUND(8);
ROUND(9);
ROUND(10);
ROUND(11);
row1l = _mm_xor_si128(row3l, row1l);
row1h = _mm_xor_si128(row3h, row1h);
STOREU(&S->h[0], _mm_xor_si128(LOADU(&S->h[0]), row1l));
STOREU(&S->h[2], _mm_xor_si128(LOADU(&S->h[2]), row1h));
row2l = _mm_xor_si128(row4l, row2l);
row2h = _mm_xor_si128(row4h, row2h);
STOREU(&S->h[4], _mm_xor_si128(LOADU(&S->h[4]), row2l));
STOREU(&S->h[6], _mm_xor_si128(LOADU(&S->h[6]), row2h));
return 0;
}
#endif

@ -0,0 +1,103 @@
#ifndef blake2b_compress_sse41_H
#define blake2b_compress_sse41_H
#define LOADU(p) _mm_loadu_si128((const __m128i *) (const void *) (p))
#define STOREU(p, r) _mm_storeu_si128((__m128i *) (void *) (p), r)
#define _mm_roti_epi64(x, c) \
(-(c) == 32) \
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
: (-(c) == 24) \
? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) \
? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) \
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_slli_epi64((x), 64 - (-(c))))
#define G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -32); \
row4h = _mm_roti_epi64(row4h, -32); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -24); \
row2h = _mm_roti_epi64(row2h, -24);
#define G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -16); \
row4h = _mm_roti_epi64(row4h, -16); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -63); \
row2h = _mm_roti_epi64(row2h, -63);
#define DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
row4l = t1; \
row4h = t0;
#define UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
row4l = t1; \
row4h = t0;
#include "blake2b-load-sse41.h"
#define ROUND(r) \
LOAD_MSG_##r##_1(b0, b1); \
G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
LOAD_MSG_##r##_2(b0, b1); \
G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \
LOAD_MSG_##r##_3(b0, b1); \
G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
LOAD_MSG_##r##_4(b0, b1); \
G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h);
#endif

@ -0,0 +1,89 @@
#include <stdint.h>
#include <string.h>
#include "blake2.h"
#include <sodium/private/common.h>
#include <sodium/private/sse2_64_32.h>
#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H)
#ifdef __GNUC__
#pragma GCC target("sse2")
#pragma GCC target("ssse3")
#endif
#include <emmintrin.h>
#include <tmmintrin.h>
#include "blake2b-compress-ssse3.h"
CRYPTO_ALIGN(64)
static const uint64_t blake2b_IV[8] = {
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL};
int
blake2b_compress_ssse3(blake2b_state *S,
const uint8_t block[BLAKE2B_BLOCKBYTES])
{
__m128i row1l, row1h;
__m128i row2l, row2h;
__m128i row3l, row3h;
__m128i row4l, row4h;
__m128i b0, b1;
__m128i t0, t1;
const __m128i r16 =
_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
const __m128i r24 =
_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
const uint64_t m0 = ((uint64_t *)block)[0];
const uint64_t m1 = ((uint64_t *)block)[1];
const uint64_t m2 = ((uint64_t *)block)[2];
const uint64_t m3 = ((uint64_t *)block)[3];
const uint64_t m4 = ((uint64_t *)block)[4];
const uint64_t m5 = ((uint64_t *)block)[5];
const uint64_t m6 = ((uint64_t *)block)[6];
const uint64_t m7 = ((uint64_t *)block)[7];
const uint64_t m8 = ((uint64_t *)block)[8];
const uint64_t m9 = ((uint64_t *)block)[9];
const uint64_t m10 = ((uint64_t *)block)[10];
const uint64_t m11 = ((uint64_t *)block)[11];
const uint64_t m12 = ((uint64_t *)block)[12];
const uint64_t m13 = ((uint64_t *)block)[13];
const uint64_t m14 = ((uint64_t *)block)[14];
const uint64_t m15 = ((uint64_t *)block)[15];
row1l = LOADU(&S->h[0]);
row1h = LOADU(&S->h[2]);
row2l = LOADU(&S->h[4]);
row2h = LOADU(&S->h[6]);
row3l = LOADU(&blake2b_IV[0]);
row3h = LOADU(&blake2b_IV[2]);
row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
ROUND(0);
ROUND(1);
ROUND(2);
ROUND(3);
ROUND(4);
ROUND(5);
ROUND(6);
ROUND(7);
ROUND(8);
ROUND(9);
ROUND(10);
ROUND(11);
row1l = _mm_xor_si128(row3l, row1l);
row1h = _mm_xor_si128(row3h, row1h);
STOREU(&S->h[0], _mm_xor_si128(LOADU(&S->h[0]), row1l));
STOREU(&S->h[2], _mm_xor_si128(LOADU(&S->h[2]), row1h));
row2l = _mm_xor_si128(row4l, row2l);
row2h = _mm_xor_si128(row4h, row2h);
STOREU(&S->h[4], _mm_xor_si128(LOADU(&S->h[4]), row2l));
STOREU(&S->h[6], _mm_xor_si128(LOADU(&S->h[6]), row2h));
return 0;
}
#endif

@ -0,0 +1,103 @@
#ifndef blake2b_compress_ssse3_H
#define blake2b_compress_ssse3_H
#define LOADU(p) _mm_loadu_si128((const __m128i *) (const void *) (p))
#define STOREU(p, r) _mm_storeu_si128((__m128i *) (void *) (p), r)
#define _mm_roti_epi64(x, c) \
(-(c) == 32) \
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
: (-(c) == 24) \
? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) \
? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) \
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_slli_epi64((x), 64 - (-(c))))
#define G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -32); \
row4h = _mm_roti_epi64(row4h, -32); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -24); \
row2h = _mm_roti_epi64(row2h, -24);
#define G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -16); \
row4h = _mm_roti_epi64(row4h, -16); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -63); \
row2h = _mm_roti_epi64(row2h, -63);
#define DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
row4l = t1; \
row4h = t0;
#define UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
row4l = t1; \
row4h = t0;
#include "blake2b-load-sse2.h"
#define ROUND(r) \
LOAD_MSG_##r##_1(b0, b1); \
G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
LOAD_MSG_##r##_2(b0, b1); \
G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \
LOAD_MSG_##r##_3(b0, b1); \
G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
LOAD_MSG_##r##_4(b0, b1); \
G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h);
#endif

@ -0,0 +1,340 @@
#ifndef blake2b_load_avx2_H
#define blake2b_load_avx2_H
#define BLAKE2B_LOAD_MSG_0_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m0, m1); \
t1 = _mm256_unpacklo_epi64(m2, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_0_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m0, m1); \
t1 = _mm256_unpackhi_epi64(m2, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_0_3(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m4, m5); \
t1 = _mm256_unpacklo_epi64(m6, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_0_4(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m4, m5); \
t1 = _mm256_unpackhi_epi64(m6, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_1_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m7, m2); \
t1 = _mm256_unpackhi_epi64(m4, m6); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_1_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m5, m4); \
t1 = _mm256_alignr_epi8(m3, m7, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_1_3(b0) \
do { \
t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
t1 = _mm256_unpackhi_epi64(m5, m2); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_1_4(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m6, m1); \
t1 = _mm256_unpackhi_epi64(m3, m1); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_2_1(b0) \
do { \
t0 = _mm256_alignr_epi8(m6, m5, 8); \
t1 = _mm256_unpackhi_epi64(m2, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_2_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m4, m0); \
t1 = _mm256_blend_epi32(m6, m1, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_2_3(b0) \
do { \
t0 = _mm256_blend_epi32(m1, m5, 0x33); \
t1 = _mm256_unpackhi_epi64(m3, m4); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_2_4(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m7, m3); \
t1 = _mm256_alignr_epi8(m2, m0, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_3_1(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m3, m1); \
t1 = _mm256_unpackhi_epi64(m6, m5); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_3_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m4, m0); \
t1 = _mm256_unpacklo_epi64(m6, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_3_3(b0) \
do { \
t0 = _mm256_blend_epi32(m2, m1, 0x33); \
t1 = _mm256_blend_epi32(m7, m2, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_3_4(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m3, m5); \
t1 = _mm256_unpacklo_epi64(m0, m4); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_4_1(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m4, m2); \
t1 = _mm256_unpacklo_epi64(m1, m5); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_4_2(b0) \
do { \
t0 = _mm256_blend_epi32(m3, m0, 0x33); \
t1 = _mm256_blend_epi32(m7, m2, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_4_3(b0) \
do { \
t0 = _mm256_blend_epi32(m5, m7, 0x33); \
t1 = _mm256_blend_epi32(m1, m3, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_4_4(b0) \
do { \
t0 = _mm256_alignr_epi8(m6, m0, 8); \
t1 = _mm256_blend_epi32(m6, m4, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_5_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m1, m3); \
t1 = _mm256_unpacklo_epi64(m0, m4); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_5_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m6, m5); \
t1 = _mm256_unpackhi_epi64(m5, m1); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_5_3(b0) \
do { \
t0 = _mm256_blend_epi32(m3, m2, 0x33); \
t1 = _mm256_unpackhi_epi64(m7, m0); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_5_4(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m6, m2); \
t1 = _mm256_blend_epi32(m4, m7, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_6_1(b0) \
do { \
t0 = _mm256_blend_epi32(m0, m6, 0x33); \
t1 = _mm256_unpacklo_epi64(m7, m2); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_6_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m2, m7); \
t1 = _mm256_alignr_epi8(m5, m6, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_6_3(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m0, m3); \
t1 = _mm256_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_6_4(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m3, m1); \
t1 = _mm256_blend_epi32(m5, m1, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_7_1(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m6, m3); \
t1 = _mm256_blend_epi32(m1, m6, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_7_2(b0) \
do { \
t0 = _mm256_alignr_epi8(m7, m5, 8); \
t1 = _mm256_unpackhi_epi64(m0, m4); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_7_3(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m2, m7); \
t1 = _mm256_unpacklo_epi64(m4, m1); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_7_4(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m0, m2); \
t1 = _mm256_unpacklo_epi64(m3, m5); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_8_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m3, m7); \
t1 = _mm256_alignr_epi8(m0, m5, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_8_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m7, m4); \
t1 = _mm256_alignr_epi8(m4, m1, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_8_3(b0) \
do { \
t0 = m6; \
t1 = _mm256_alignr_epi8(m5, m0, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_8_4(b0) \
do { \
t0 = _mm256_blend_epi32(m3, m1, 0x33); \
t1 = m2; \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_9_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m5, m4); \
t1 = _mm256_unpackhi_epi64(m3, m0); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_9_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m1, m2); \
t1 = _mm256_blend_epi32(m2, m3, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_9_3(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m7, m4); \
t1 = _mm256_unpackhi_epi64(m1, m6); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_9_4(b0) \
do { \
t0 = _mm256_alignr_epi8(m7, m5, 8); \
t1 = _mm256_unpacklo_epi64(m6, m0); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_10_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m0, m1); \
t1 = _mm256_unpacklo_epi64(m2, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_10_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m0, m1); \
t1 = _mm256_unpackhi_epi64(m2, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_10_3(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m4, m5); \
t1 = _mm256_unpacklo_epi64(m6, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_10_4(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m4, m5); \
t1 = _mm256_unpackhi_epi64(m6, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_11_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m7, m2); \
t1 = _mm256_unpackhi_epi64(m4, m6); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_11_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m5, m4); \
t1 = _mm256_alignr_epi8(m3, m7, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_11_3(b0) \
do { \
t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
t1 = _mm256_unpackhi_epi64(m5, m2); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_11_4(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m6, m1); \
t1 = _mm256_unpackhi_epi64(m3, m1); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#endif

@ -0,0 +1,164 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along
with
this software. If not, see
<http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifndef blake2b_load_sse2_H
#define blake2b_load_sse2_H
#define LOAD_MSG_0_1(b0, b1) \
b0 = _mm_set_epi64x(m2, m0); \
b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_0_2(b0, b1) \
b0 = _mm_set_epi64x(m3, m1); \
b1 = _mm_set_epi64x(m7, m5)
#define LOAD_MSG_0_3(b0, b1) \
b0 = _mm_set_epi64x(m10, m8); \
b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_0_4(b0, b1) \
b0 = _mm_set_epi64x(m11, m9); \
b1 = _mm_set_epi64x(m15, m13)
#define LOAD_MSG_1_1(b0, b1) \
b0 = _mm_set_epi64x(m4, m14); \
b1 = _mm_set_epi64x(m13, m9)
#define LOAD_MSG_1_2(b0, b1) \
b0 = _mm_set_epi64x(m8, m10); \
b1 = _mm_set_epi64x(m6, m15)
#define LOAD_MSG_1_3(b0, b1) \
b0 = _mm_set_epi64x(m0, m1); \
b1 = _mm_set_epi64x(m5, m11)
#define LOAD_MSG_1_4(b0, b1) \
b0 = _mm_set_epi64x(m2, m12); \
b1 = _mm_set_epi64x(m3, m7)
#define LOAD_MSG_2_1(b0, b1) \
b0 = _mm_set_epi64x(m12, m11); \
b1 = _mm_set_epi64x(m15, m5)
#define LOAD_MSG_2_2(b0, b1) \
b0 = _mm_set_epi64x(m0, m8); \
b1 = _mm_set_epi64x(m13, m2)
#define LOAD_MSG_2_3(b0, b1) \
b0 = _mm_set_epi64x(m3, m10); \
b1 = _mm_set_epi64x(m9, m7)
#define LOAD_MSG_2_4(b0, b1) \
b0 = _mm_set_epi64x(m6, m14); \
b1 = _mm_set_epi64x(m4, m1)
#define LOAD_MSG_3_1(b0, b1) \
b0 = _mm_set_epi64x(m3, m7); \
b1 = _mm_set_epi64x(m11, m13)
#define LOAD_MSG_3_2(b0, b1) \
b0 = _mm_set_epi64x(m1, m9); \
b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_3_3(b0, b1) \
b0 = _mm_set_epi64x(m5, m2); \
b1 = _mm_set_epi64x(m15, m4)
#define LOAD_MSG_3_4(b0, b1) \
b0 = _mm_set_epi64x(m10, m6); \
b1 = _mm_set_epi64x(m8, m0)
#define LOAD_MSG_4_1(b0, b1) \
b0 = _mm_set_epi64x(m5, m9); \
b1 = _mm_set_epi64x(m10, m2)
#define LOAD_MSG_4_2(b0, b1) \
b0 = _mm_set_epi64x(m7, m0); \
b1 = _mm_set_epi64x(m15, m4)
#define LOAD_MSG_4_3(b0, b1) \
b0 = _mm_set_epi64x(m11, m14); \
b1 = _mm_set_epi64x(m3, m6)
#define LOAD_MSG_4_4(b0, b1) \
b0 = _mm_set_epi64x(m12, m1); \
b1 = _mm_set_epi64x(m13, m8)
#define LOAD_MSG_5_1(b0, b1) \
b0 = _mm_set_epi64x(m6, m2); \
b1 = _mm_set_epi64x(m8, m0)
#define LOAD_MSG_5_2(b0, b1) \
b0 = _mm_set_epi64x(m10, m12); \
b1 = _mm_set_epi64x(m3, m11)
#define LOAD_MSG_5_3(b0, b1) \
b0 = _mm_set_epi64x(m7, m4); \
b1 = _mm_set_epi64x(m1, m15)
#define LOAD_MSG_5_4(b0, b1) \
b0 = _mm_set_epi64x(m5, m13); \
b1 = _mm_set_epi64x(m9, m14)
#define LOAD_MSG_6_1(b0, b1) \
b0 = _mm_set_epi64x(m1, m12); \
b1 = _mm_set_epi64x(m4, m14)
#define LOAD_MSG_6_2(b0, b1) \
b0 = _mm_set_epi64x(m15, m5); \
b1 = _mm_set_epi64x(m10, m13)
#define LOAD_MSG_6_3(b0, b1) \
b0 = _mm_set_epi64x(m6, m0); \
b1 = _mm_set_epi64x(m8, m9)
#define LOAD_MSG_6_4(b0, b1) \
b0 = _mm_set_epi64x(m3, m7); \
b1 = _mm_set_epi64x(m11, m2)
#define LOAD_MSG_7_1(b0, b1) \
b0 = _mm_set_epi64x(m7, m13); \
b1 = _mm_set_epi64x(m3, m12)
#define LOAD_MSG_7_2(b0, b1) \
b0 = _mm_set_epi64x(m14, m11); \
b1 = _mm_set_epi64x(m9, m1)
#define LOAD_MSG_7_3(b0, b1) \
b0 = _mm_set_epi64x(m15, m5); \
b1 = _mm_set_epi64x(m2, m8)
#define LOAD_MSG_7_4(b0, b1) \
b0 = _mm_set_epi64x(m4, m0); \
b1 = _mm_set_epi64x(m10, m6)
#define LOAD_MSG_8_1(b0, b1) \
b0 = _mm_set_epi64x(m14, m6); \
b1 = _mm_set_epi64x(m0, m11)
#define LOAD_MSG_8_2(b0, b1) \
b0 = _mm_set_epi64x(m9, m15); \
b1 = _mm_set_epi64x(m8, m3)
#define LOAD_MSG_8_3(b0, b1) \
b0 = _mm_set_epi64x(m13, m12); \
b1 = _mm_set_epi64x(m10, m1)
#define LOAD_MSG_8_4(b0, b1) \
b0 = _mm_set_epi64x(m7, m2); \
b1 = _mm_set_epi64x(m5, m4)
#define LOAD_MSG_9_1(b0, b1) \
b0 = _mm_set_epi64x(m8, m10); \
b1 = _mm_set_epi64x(m1, m7)
#define LOAD_MSG_9_2(b0, b1) \
b0 = _mm_set_epi64x(m4, m2); \
b1 = _mm_set_epi64x(m5, m6)
#define LOAD_MSG_9_3(b0, b1) \
b0 = _mm_set_epi64x(m9, m15); \
b1 = _mm_set_epi64x(m13, m3)
#define LOAD_MSG_9_4(b0, b1) \
b0 = _mm_set_epi64x(m14, m11); \
b1 = _mm_set_epi64x(m0, m12)
#define LOAD_MSG_10_1(b0, b1) \
b0 = _mm_set_epi64x(m2, m0); \
b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_10_2(b0, b1) \
b0 = _mm_set_epi64x(m3, m1); \
b1 = _mm_set_epi64x(m7, m5)
#define LOAD_MSG_10_3(b0, b1) \
b0 = _mm_set_epi64x(m10, m8); \
b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_10_4(b0, b1) \
b0 = _mm_set_epi64x(m11, m9); \
b1 = _mm_set_epi64x(m15, m13)
#define LOAD_MSG_11_1(b0, b1) \
b0 = _mm_set_epi64x(m4, m14); \
b1 = _mm_set_epi64x(m13, m9)
#define LOAD_MSG_11_2(b0, b1) \
b0 = _mm_set_epi64x(m8, m10); \
b1 = _mm_set_epi64x(m6, m15)
#define LOAD_MSG_11_3(b0, b1) \
b0 = _mm_set_epi64x(m0, m1); \
b1 = _mm_set_epi64x(m5, m11)
#define LOAD_MSG_11_4(b0, b1) \
b0 = _mm_set_epi64x(m2, m12); \
b1 = _mm_set_epi64x(m3, m7)
#endif

@ -0,0 +1,307 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along
with
this software. If not, see
<http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifndef blake2b_load_sse41_H
#define blake2b_load_sse41_H
#define LOAD_MSG_0_1(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while (0)
#define LOAD_MSG_0_2(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while (0)
#define LOAD_MSG_0_3(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while (0)
#define LOAD_MSG_0_4(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while (0)
#define LOAD_MSG_1_1(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while (0)
#define LOAD_MSG_1_2(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while (0)
#define LOAD_MSG_1_3(b0, b1) \
do { \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while (0)
#define LOAD_MSG_1_4(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while (0)
#define LOAD_MSG_2_1(b0, b1) \
do { \
b0 = _mm_alignr_epi8(m6, m5, 8); \
b1 = _mm_unpackhi_epi64(m2, m7); \
} while (0)
#define LOAD_MSG_2_2(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m4, m0); \
b1 = _mm_blend_epi16(m1, m6, 0xF0); \
} while (0)
#define LOAD_MSG_2_3(b0, b1) \
do { \
b0 = _mm_blend_epi16(m5, m1, 0xF0); \
b1 = _mm_unpackhi_epi64(m3, m4); \
} while (0)
#define LOAD_MSG_2_4(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m7, m3); \
b1 = _mm_alignr_epi8(m2, m0, 8); \
} while (0)
#define LOAD_MSG_3_1(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_unpackhi_epi64(m6, m5); \
} while (0)
#define LOAD_MSG_3_2(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m4, m0); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while (0)
#define LOAD_MSG_3_3(b0, b1) \
do { \
b0 = _mm_blend_epi16(m1, m2, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while (0)
#define LOAD_MSG_3_4(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m3, m5); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while (0)
#define LOAD_MSG_4_1(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m4, m2); \
b1 = _mm_unpacklo_epi64(m1, m5); \
} while (0)
#define LOAD_MSG_4_2(b0, b1) \
do { \
b0 = _mm_blend_epi16(m0, m3, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while (0)
#define LOAD_MSG_4_3(b0, b1) \
do { \
b0 = _mm_blend_epi16(m7, m5, 0xF0); \
b1 = _mm_blend_epi16(m3, m1, 0xF0); \
} while (0)
#define LOAD_MSG_4_4(b0, b1) \
do { \
b0 = _mm_alignr_epi8(m6, m0, 8); \
b1 = _mm_blend_epi16(m4, m6, 0xF0); \
} while (0)
#define LOAD_MSG_5_1(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m1, m3); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while (0)
#define LOAD_MSG_5_2(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m6, m5); \
b1 = _mm_unpackhi_epi64(m5, m1); \
} while (0)
#define LOAD_MSG_5_3(b0, b1) \
do { \
b0 = _mm_blend_epi16(m2, m3, 0xF0); \
b1 = _mm_unpackhi_epi64(m7, m0); \
} while (0)
#define LOAD_MSG_5_4(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m6, m2); \
b1 = _mm_blend_epi16(m7, m4, 0xF0); \
} while (0)
#define LOAD_MSG_6_1(b0, b1) \
do { \
b0 = _mm_blend_epi16(m6, m0, 0xF0); \
b1 = _mm_unpacklo_epi64(m7, m2); \
} while (0)
#define LOAD_MSG_6_2(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_alignr_epi8(m5, m6, 8); \
} while (0)
#define LOAD_MSG_6_3(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m0, m3); \
b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \
} while (0)
#define LOAD_MSG_6_4(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_blend_epi16(m1, m5, 0xF0); \
} while (0)
#define LOAD_MSG_7_1(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m6, m3); \
b1 = _mm_blend_epi16(m6, m1, 0xF0); \
} while (0)
#define LOAD_MSG_7_2(b0, b1) \
do { \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpackhi_epi64(m0, m4); \
} while (0)
#define LOAD_MSG_7_3(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_unpacklo_epi64(m4, m1); \
} while (0)
#define LOAD_MSG_7_4(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m0, m2); \
b1 = _mm_unpacklo_epi64(m3, m5); \
} while (0)
#define LOAD_MSG_8_1(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m3, m7); \
b1 = _mm_alignr_epi8(m0, m5, 8); \
} while (0)
#define LOAD_MSG_8_2(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_alignr_epi8(m4, m1, 8); \
} while (0)
#define LOAD_MSG_8_3(b0, b1) \
do { \
b0 = m6; \
b1 = _mm_alignr_epi8(m5, m0, 8); \
} while (0)
#define LOAD_MSG_8_4(b0, b1) \
do { \
b0 = _mm_blend_epi16(m1, m3, 0xF0); \
b1 = m2; \
} while (0)
#define LOAD_MSG_9_1(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_unpackhi_epi64(m3, m0); \
} while (0)
#define LOAD_MSG_9_2(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m1, m2); \
b1 = _mm_blend_epi16(m3, m2, 0xF0); \
} while (0)
#define LOAD_MSG_9_3(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_unpackhi_epi64(m1, m6); \
} while (0)
#define LOAD_MSG_9_4(b0, b1) \
do { \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpacklo_epi64(m6, m0); \
} while (0)
#define LOAD_MSG_10_1(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while (0)
#define LOAD_MSG_10_2(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while (0)
#define LOAD_MSG_10_3(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while (0)
#define LOAD_MSG_10_4(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while (0)
#define LOAD_MSG_11_1(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while (0)
#define LOAD_MSG_11_2(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while (0)
#define LOAD_MSG_11_3(b0, b1) \
do { \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while (0)
#define LOAD_MSG_11_4(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while (0)
#endif

@ -0,0 +1,488 @@
/*
BLAKE2 reference source code package - C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along
with
this software. If not, see
<http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "blake2.h"
#include <sodium/core.h>
#include <sodium/private/common.h>
#include <sodium/runtime.h>
#include <sodium/utils.h>
static blake2b_compress_fn blake2b_compress = blake2b_compress_ref;
static const uint64_t blake2b_IV[8] = {
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL};
/* LCOV_EXCL_START */
static inline int
blake2b_set_lastnode(blake2b_state *S)
{
S->f[1] = -1;
return 0;
}
/* LCOV_EXCL_STOP */
static inline int
blake2b_is_lastblock(const blake2b_state *S)
{
return S->f[0] != 0;
}
static inline int
blake2b_set_lastblock(blake2b_state *S)
{
if(S->last_node)
{
blake2b_set_lastnode(S);
}
S->f[0] = -1;
return 0;
}
static inline int
blake2b_increment_counter(blake2b_state *S, const uint64_t inc)
{
#ifdef HAVE_TI_MODE
uint128_t t = ((uint128_t)S->t[1] << 64) | S->t[0];
t += inc;
S->t[0] = (uint64_t)(t >> 0);
S->t[1] = (uint64_t)(t >> 64);
#else
S->t[0] += inc;
S->t[1] += (S->t[0] < inc);
#endif
return 0;
}
/* Parameter-related functions */
static inline int
blake2b_param_set_salt(blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES])
{
memcpy(P->salt, salt, BLAKE2B_SALTBYTES);
return 0;
}
static inline int
blake2b_param_set_personal(blake2b_param *P,
const uint8_t personal[BLAKE2B_PERSONALBYTES])
{
memcpy(P->personal, personal, BLAKE2B_PERSONALBYTES);
return 0;
}
static inline int
blake2b_init0(blake2b_state *S)
{
int i;
for(i = 0; i < 8; i++)
{
S->h[i] = blake2b_IV[i];
}
memset(S->t, 0,
offsetof(blake2b_state, last_node) + sizeof(S->last_node)
- offsetof(blake2b_state, t));
return 0;
}
/* init xors IV with input parameter block */
int
blake2b_init_param(blake2b_state *S, const blake2b_param *P)
{
size_t i;
const uint8_t *p;
COMPILER_ASSERT(sizeof *P == 64);
blake2b_init0(S);
p = (const uint8_t *)(P);
/* IV XOR ParamBlock */
for(i = 0; i < 8; i++)
{
S->h[i] ^= LOAD64_LE(p + sizeof(S->h[i]) * i);
}
return 0;
}
int
blake2b_init(blake2b_state *S, const uint8_t outlen)
{
blake2b_param P[1];
if((!outlen) || (outlen > BLAKE2B_OUTBYTES))
{
sodium_misuse();
}
P->digest_length = outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
STORE32_LE(P->leaf_length, 0);
STORE64_LE(P->node_offset, 0);
P->node_depth = 0;
P->inner_length = 0;
memset(P->reserved, 0, sizeof(P->reserved));
memset(P->salt, 0, sizeof(P->salt));
memset(P->personal, 0, sizeof(P->personal));
return blake2b_init_param(S, P);
}
int
blake2b_init_salt_personal(blake2b_state *S, const uint8_t outlen,
const void *salt, const void *personal)
{
blake2b_param P[1];
if((!outlen) || (outlen > BLAKE2B_OUTBYTES))
{
sodium_misuse();
}
P->digest_length = outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
STORE32_LE(P->leaf_length, 0);
STORE64_LE(P->node_offset, 0);
P->node_depth = 0;
P->inner_length = 0;
memset(P->reserved, 0, sizeof(P->reserved));
if(salt != NULL)
{
blake2b_param_set_salt(P, (const uint8_t *)salt);
}
else
{
memset(P->salt, 0, sizeof(P->salt));
}
if(personal != NULL)
{
blake2b_param_set_personal(P, (const uint8_t *)personal);
}
else
{
memset(P->personal, 0, sizeof(P->personal));
}
return blake2b_init_param(S, P);
}
int
blake2b_init_key(blake2b_state *S, const uint8_t outlen, const void *key,
const uint8_t keylen)
{
blake2b_param P[1];
if((!outlen) || (outlen > BLAKE2B_OUTBYTES))
{
sodium_misuse();
}
if(!key || !keylen || keylen > BLAKE2B_KEYBYTES)
{
sodium_misuse();
}
P->digest_length = outlen;
P->key_length = keylen;
P->fanout = 1;
P->depth = 1;
STORE32_LE(P->leaf_length, 0);
STORE64_LE(P->node_offset, 0);
P->node_depth = 0;
P->inner_length = 0;
memset(P->reserved, 0, sizeof(P->reserved));
memset(P->salt, 0, sizeof(P->salt));
memset(P->personal, 0, sizeof(P->personal));
if(blake2b_init_param(S, P) < 0)
{
sodium_misuse();
}
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset(block, 0, BLAKE2B_BLOCKBYTES);
memcpy(block, key, keylen); /* keylen cannot be 0 */
blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
sodium_memzero(block, BLAKE2B_BLOCKBYTES); /* Burn the key from stack */
}
return 0;
}
int
blake2b_init_key_salt_personal(blake2b_state *S, const uint8_t outlen,
const void *key, const uint8_t keylen,
const void *salt, const void *personal)
{
blake2b_param P[1];
if((!outlen) || (outlen > BLAKE2B_OUTBYTES))
{
sodium_misuse();
}
if(!key || !keylen || keylen > BLAKE2B_KEYBYTES)
{
sodium_misuse();
}
P->digest_length = outlen;
P->key_length = keylen;
P->fanout = 1;
P->depth = 1;
STORE32_LE(P->leaf_length, 0);
STORE64_LE(P->node_offset, 0);
P->node_depth = 0;
P->inner_length = 0;
memset(P->reserved, 0, sizeof(P->reserved));
if(salt != NULL)
{
blake2b_param_set_salt(P, (const uint8_t *)salt);
}
else
{
memset(P->salt, 0, sizeof(P->salt));
}
if(personal != NULL)
{
blake2b_param_set_personal(P, (const uint8_t *)personal);
}
else
{
memset(P->personal, 0, sizeof(P->personal));
}
if(blake2b_init_param(S, P) < 0)
{
sodium_misuse();
}
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset(block, 0, BLAKE2B_BLOCKBYTES);
memcpy(block, key, keylen); /* keylen cannot be 0 */
blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
sodium_memzero(block, BLAKE2B_BLOCKBYTES); /* Burn the key from stack */
}
return 0;
}
/* inlen now in bytes */
int
blake2b_update(blake2b_state *S, const uint8_t *in, uint64_t inlen)
{
while(inlen > 0)
{
size_t left = S->buflen;
size_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
if(inlen > fill)
{
memcpy(S->buf + left, in, fill); /* Fill buffer */
S->buflen += fill;
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress(S, S->buf); /* Compress */
memcpy(S->buf, S->buf + BLAKE2B_BLOCKBYTES,
BLAKE2B_BLOCKBYTES); /* Shift buffer left */
S->buflen -= BLAKE2B_BLOCKBYTES;
in += fill;
inlen -= fill;
}
else /* inlen <= fill */
{
memcpy(S->buf + left, in, inlen);
S->buflen += inlen; /* Be lazy, do not compress */
in += inlen;
inlen -= inlen;
}
}
return 0;
}
int
blake2b_final(blake2b_state *S, uint8_t *out, uint8_t outlen)
{
unsigned char buffer[BLAKE2B_OUTBYTES];
if(!outlen || outlen > BLAKE2B_OUTBYTES)
{
sodium_misuse();
}
if(blake2b_is_lastblock(S))
{
return -1;
}
if(S->buflen > BLAKE2B_BLOCKBYTES)
{
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress(S, S->buf);
S->buflen -= BLAKE2B_BLOCKBYTES;
assert(S->buflen <= BLAKE2B_BLOCKBYTES);
memcpy(S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen);
}
blake2b_increment_counter(S, S->buflen);
blake2b_set_lastblock(S);
memset(S->buf + S->buflen, 0,
2 * BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
blake2b_compress(S, S->buf);
COMPILER_ASSERT(sizeof buffer == 64U);
STORE64_LE(buffer + 8 * 0, S->h[0]);
STORE64_LE(buffer + 8 * 1, S->h[1]);
STORE64_LE(buffer + 8 * 2, S->h[2]);
STORE64_LE(buffer + 8 * 3, S->h[3]);
STORE64_LE(buffer + 8 * 4, S->h[4]);
STORE64_LE(buffer + 8 * 5, S->h[5]);
STORE64_LE(buffer + 8 * 6, S->h[6]);
STORE64_LE(buffer + 8 * 7, S->h[7]);
memcpy(out, buffer, outlen); /* outlen <= BLAKE2B_OUTBYTES (64) */
sodium_memzero(S->h, sizeof S->h);
sodium_memzero(S->buf, sizeof S->buf);
return 0;
}
/* inlen, at least, should be uint64_t. Others can be size_t. */
int
blake2b(uint8_t *out, const void *in, const void *key, const uint8_t outlen,
const uint64_t inlen, uint8_t keylen)
{
blake2b_state S[1];
/* Verify parameters */
if(NULL == in && inlen > 0)
{
sodium_misuse();
}
if(NULL == out)
{
sodium_misuse();
}
if(!outlen || outlen > BLAKE2B_OUTBYTES)
{
sodium_misuse();
}
if(NULL == key && keylen > 0)
{
sodium_misuse();
}
if(keylen > BLAKE2B_KEYBYTES)
{
sodium_misuse();
}
if(keylen > 0)
{
if(blake2b_init_key(S, outlen, key, keylen) < 0)
{
sodium_misuse();
}
}
else
{
if(blake2b_init(S, outlen) < 0)
{
sodium_misuse();
}
}
blake2b_update(S, (const uint8_t *)in, inlen);
blake2b_final(S, out, outlen);
return 0;
}
int
blake2b_salt_personal(uint8_t *out, const void *in, const void *key,
const uint8_t outlen, const uint64_t inlen,
uint8_t keylen, const void *salt, const void *personal)
{
blake2b_state S[1];
/* Verify parameters */
if(NULL == in && inlen > 0)
{
sodium_misuse();
}
if(NULL == out)
{
sodium_misuse();
}
if(!outlen || outlen > BLAKE2B_OUTBYTES)
{
sodium_misuse();
}
if(NULL == key && keylen > 0)
{
sodium_misuse();
}
if(keylen > BLAKE2B_KEYBYTES)
{
sodium_misuse();
}
if(keylen > 0)
{
if(blake2b_init_key_salt_personal(S, outlen, key, keylen, salt, personal)
< 0)
{
sodium_misuse();
}
}
else
{
if(blake2b_init_salt_personal(S, outlen, salt, personal) < 0)
{
sodium_misuse();
}
}
blake2b_update(S, (const uint8_t *)in, inlen);
blake2b_final(S, out, outlen);
return 0;
}
int
blake2b_pick_best_implementation(void)
{
/* LCOV_EXCL_START */
#if defined(HAVE_AVX2INTRIN_H) && defined(HAVE_TMMINTRIN_H) \
&& defined(HAVE_SMMINTRIN_H)
if(sodium_runtime_has_avx2())
{
blake2b_compress = blake2b_compress_avx2;
return 0;
}
#endif
#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) \
&& defined(HAVE_SMMINTRIN_H)
if(sodium_runtime_has_sse41())
{
blake2b_compress = blake2b_compress_sse41;
return 0;
}
#endif
#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H)
if(sodium_runtime_has_ssse3())
{
blake2b_compress = blake2b_compress_ssse3;
return 0;
}
#endif
blake2b_compress = blake2b_compress_ref;
return 0;
/* LCOV_EXCL_STOP */
}

@ -0,0 +1,119 @@
#include <assert.h>
#include <limits.h>
#include <stdint.h>
#include "blake2.h"
#include <sodium/crypto_generichash_blake2b.h>
#include <sodium/private/implementations.h>
int
crypto_generichash_blake2b(unsigned char *out, size_t outlen,
const unsigned char *in, unsigned long long inlen,
const unsigned char *key, size_t keylen)
{
if(outlen <= 0U || outlen > BLAKE2B_OUTBYTES || keylen > BLAKE2B_KEYBYTES
|| inlen > UINT64_MAX)
{
return -1;
}
assert(outlen <= UINT8_MAX);
assert(keylen <= UINT8_MAX);
return blake2b((uint8_t *)out, in, key, (uint8_t)outlen, (uint64_t)inlen,
(uint8_t)keylen);
}
int
crypto_generichash_blake2b_salt_personal(
unsigned char *out, size_t outlen, const unsigned char *in,
unsigned long long inlen, const unsigned char *key, size_t keylen,
const unsigned char *salt, const unsigned char *personal)
{
if(outlen <= 0U || outlen > BLAKE2B_OUTBYTES || keylen > BLAKE2B_KEYBYTES
|| inlen > UINT64_MAX)
{
return -1;
}
assert(outlen <= UINT8_MAX);
assert(keylen <= UINT8_MAX);
return blake2b_salt_personal((uint8_t *)out, in, key, (uint8_t)outlen,
(uint64_t)inlen, (uint8_t)keylen, salt,
personal);
}
int
crypto_generichash_blake2b_init(crypto_generichash_blake2b_state *state,
const unsigned char *key, const size_t keylen,
const size_t outlen)
{
if(outlen <= 0U || outlen > BLAKE2B_OUTBYTES || keylen > BLAKE2B_KEYBYTES)
{
return -1;
}
assert(outlen <= UINT8_MAX);
assert(keylen <= UINT8_MAX);
if(key == NULL || keylen <= 0U)
{
if(blake2b_init(state, (uint8_t)outlen) != 0)
{
return -1; /* LCOV_EXCL_LINE */
}
}
else if(blake2b_init_key(state, (uint8_t)outlen, key, (uint8_t)keylen) != 0)
{
return -1; /* LCOV_EXCL_LINE */
}
return 0;
}
int
crypto_generichash_blake2b_init_salt_personal(
crypto_generichash_blake2b_state *state, const unsigned char *key,
const size_t keylen, const size_t outlen, const unsigned char *salt,
const unsigned char *personal)
{
if(outlen <= 0U || outlen > BLAKE2B_OUTBYTES || keylen > BLAKE2B_KEYBYTES)
{
return -1;
}
assert(outlen <= UINT8_MAX);
assert(keylen <= UINT8_MAX);
if(key == NULL || keylen <= 0U)
{
if(blake2b_init_salt_personal(state, (uint8_t)outlen, salt, personal) != 0)
{
return -1; /* LCOV_EXCL_LINE */
}
}
else if(blake2b_init_key_salt_personal(state, (uint8_t)outlen, key,
(uint8_t)keylen, salt, personal)
!= 0)
{
return -1; /* LCOV_EXCL_LINE */
}
return 0;
}
int
crypto_generichash_blake2b_update(crypto_generichash_blake2b_state *state,
const unsigned char *in,
unsigned long long inlen)
{
return blake2b_update(state, (const uint8_t *)in, (uint64_t)inlen);
}
int
crypto_generichash_blake2b_final(crypto_generichash_blake2b_state *state,
unsigned char *out, const size_t outlen)
{
assert(outlen <= UINT8_MAX);
return blake2b_final(state, (uint8_t *)out, (uint8_t)outlen);
}
int
_crypto_generichash_blake2b_pick_best_implementation(void)
{
return blake2b_pick_best_implementation();
}

@ -0,0 +1,184 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sodium/core.h>
#include <sodium/crypto_stream_chacha20.h>
#include <sodium/private/common.h>
#include <sodium/private/sse2_64_32.h>
#include <sodium/utils.h>
#if __AVX2__
#ifdef __GNUC__
#pragma GCC target("sse2")
#pragma GCC target("ssse3")
#pragma GCC target("sse4.1")
#pragma GCC target("avx2")
#endif
#include <emmintrin.h>
#include <immintrin.h>
#include <smmintrin.h>
#include <tmmintrin.h>
#include "../stream_chacha20.h"
#include "chacha20_dolbeau-avx2.h"
#define ROUNDS 20
typedef struct chacha_ctx
{
uint32_t input[16];
} chacha_ctx;
static void
chacha_keysetup(chacha_ctx *ctx, const uint8_t *k)
{
ctx->input[0] = 0x61707865;
ctx->input[1] = 0x3320646e;
ctx->input[2] = 0x79622d32;
ctx->input[3] = 0x6b206574;
ctx->input[4] = LOAD32_LE(k + 0);
ctx->input[5] = LOAD32_LE(k + 4);
ctx->input[6] = LOAD32_LE(k + 8);
ctx->input[7] = LOAD32_LE(k + 12);
ctx->input[8] = LOAD32_LE(k + 16);
ctx->input[9] = LOAD32_LE(k + 20);
ctx->input[10] = LOAD32_LE(k + 24);
ctx->input[11] = LOAD32_LE(k + 28);
}
static void
chacha_ivsetup(chacha_ctx *ctx, const uint8_t *iv, const uint8_t *counter)
{
ctx->input[12] = counter == NULL ? 0 : LOAD32_LE(counter + 0);
ctx->input[13] = counter == NULL ? 0 : LOAD32_LE(counter + 4);
ctx->input[14] = LOAD32_LE(iv + 0);
ctx->input[15] = LOAD32_LE(iv + 4);
}
static void
chacha_ietf_ivsetup(chacha_ctx *ctx, const uint8_t *iv, const uint8_t *counter)
{
ctx->input[12] = counter == NULL ? 0 : LOAD32_LE(counter);
ctx->input[13] = LOAD32_LE(iv + 0);
ctx->input[14] = LOAD32_LE(iv + 4);
ctx->input[15] = LOAD32_LE(iv + 8);
}
static void
chacha20_encrypt_bytes(chacha_ctx *ctx, const uint8_t *m, uint8_t *c,
unsigned long long bytes)
{
uint32_t *const x = &ctx->input[0];
if(!bytes)
{
return; /* LCOV_EXCL_LINE */
}
if(bytes > crypto_stream_chacha20_MESSAGEBYTES_MAX)
{
sodium_misuse();
}
#include "u8.h"
#include "u4.h"
#include "u1.h"
#include "u0.h"
}
static int
stream_ref(unsigned char *c, unsigned long long clen, const unsigned char *n,
const unsigned char *k)
{
struct chacha_ctx ctx;
if(!clen)
{
return 0;
}
COMPILER_ASSERT(crypto_stream_chacha20_KEYBYTES == 256 / 8);
chacha_keysetup(&ctx, k);
chacha_ivsetup(&ctx, n, NULL);
memset(c, 0, clen);
chacha20_encrypt_bytes(&ctx, c, c, clen);
sodium_memzero(&ctx, sizeof ctx);
return 0;
}
static int
stream_ietf_ref(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k)
{
struct chacha_ctx ctx;
if(!clen)
{
return 0;
}
COMPILER_ASSERT(crypto_stream_chacha20_KEYBYTES == 256 / 8);
chacha_keysetup(&ctx, k);
chacha_ietf_ivsetup(&ctx, n, NULL);
memset(c, 0, clen);
chacha20_encrypt_bytes(&ctx, c, c, clen);
sodium_memzero(&ctx, sizeof ctx);
return 0;
}
static int
stream_ref_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n, uint64_t ic,
const unsigned char *k)
{
struct chacha_ctx ctx;
uint8_t ic_bytes[8];
uint32_t ic_high;
uint32_t ic_low;
if(!mlen)
{
return 0;
}
ic_high = (uint32_t)(ic >> 32);
ic_low = (uint32_t)ic;
STORE32_LE(&ic_bytes[0], ic_low);
STORE32_LE(&ic_bytes[4], ic_high);
chacha_keysetup(&ctx, k);
chacha_ivsetup(&ctx, n, ic_bytes);
chacha20_encrypt_bytes(&ctx, m, c, mlen);
sodium_memzero(&ctx, sizeof ctx);
return 0;
}
static int
stream_ietf_ref_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
uint32_t ic, const unsigned char *k)
{
struct chacha_ctx ctx;
uint8_t ic_bytes[4];
if(!mlen)
{
return 0;
}
STORE32_LE(ic_bytes, ic);
chacha_keysetup(&ctx, k);
chacha_ietf_ivsetup(&ctx, n, ic_bytes);
chacha20_encrypt_bytes(&ctx, m, c, mlen);
sodium_memzero(&ctx, sizeof ctx);
return 0;
}
struct crypto_stream_chacha20_implementation
crypto_stream_chacha20_dolbeau_avx2_implementation = {
SODIUM_C99(.stream =) stream_ref,
SODIUM_C99(.stream_ietf =) stream_ietf_ref,
SODIUM_C99(.stream_xor_ic =) stream_ref_xor_ic,
SODIUM_C99(.stream_ietf_xor_ic =) stream_ietf_ref_xor_ic};
#endif

@ -0,0 +1,8 @@
#include <stdint.h>
#include "../stream_chacha20.h"
#include <sodium/crypto_stream_chacha20.h>
extern struct crypto_stream_chacha20_implementation
crypto_stream_chacha20_dolbeau_avx2_implementation;

@ -0,0 +1,180 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sodium/core.h>
#include <sodium/crypto_stream_chacha20.h>
#include <sodium/private/common.h>
#include <sodium/private/sse2_64_32.h>
#include <sodium/utils.h>
#if __SSSE3__
#ifdef __GNUC__
#pragma GCC target("sse2")
#pragma GCC target("ssse3")
#endif
#include <emmintrin.h>
#include <tmmintrin.h>
#include "../stream_chacha20.h"
#include "chacha20_dolbeau-ssse3.h"
#define ROUNDS 20
typedef struct chacha_ctx
{
uint32_t input[16];
} chacha_ctx;
static void
chacha_keysetup(chacha_ctx *ctx, const uint8_t *k)
{
ctx->input[0] = 0x61707865;
ctx->input[1] = 0x3320646e;
ctx->input[2] = 0x79622d32;
ctx->input[3] = 0x6b206574;
ctx->input[4] = LOAD32_LE(k + 0);
ctx->input[5] = LOAD32_LE(k + 4);
ctx->input[6] = LOAD32_LE(k + 8);
ctx->input[7] = LOAD32_LE(k + 12);
ctx->input[8] = LOAD32_LE(k + 16);
ctx->input[9] = LOAD32_LE(k + 20);
ctx->input[10] = LOAD32_LE(k + 24);
ctx->input[11] = LOAD32_LE(k + 28);
}
static void
chacha_ivsetup(chacha_ctx *ctx, const uint8_t *iv, const uint8_t *counter)
{
ctx->input[12] = counter == NULL ? 0 : LOAD32_LE(counter + 0);
ctx->input[13] = counter == NULL ? 0 : LOAD32_LE(counter + 4);
ctx->input[14] = LOAD32_LE(iv + 0);
ctx->input[15] = LOAD32_LE(iv + 4);
}
static void
chacha_ietf_ivsetup(chacha_ctx *ctx, const uint8_t *iv, const uint8_t *counter)
{
ctx->input[12] = counter == NULL ? 0 : LOAD32_LE(counter);
ctx->input[13] = LOAD32_LE(iv + 0);
ctx->input[14] = LOAD32_LE(iv + 4);
ctx->input[15] = LOAD32_LE(iv + 8);
}
static void
chacha20_encrypt_bytes(chacha_ctx *ctx, const uint8_t *m, uint8_t *c,
unsigned long long bytes)
{
uint32_t *const x = &ctx->input[0];
if(!bytes)
{
return; /* LCOV_EXCL_LINE */
}
if(bytes > crypto_stream_chacha20_MESSAGEBYTES_MAX)
{
sodium_misuse();
}
#include "u4.h"
#include "u1.h"
#include "u0.h"
}
static int
stream_ref(unsigned char *c, unsigned long long clen, const unsigned char *n,
const unsigned char *k)
{
struct chacha_ctx ctx;
if(!clen)
{
return 0;
}
COMPILER_ASSERT(crypto_stream_chacha20_KEYBYTES == 256 / 8);
chacha_keysetup(&ctx, k);
chacha_ivsetup(&ctx, n, NULL);
memset(c, 0, clen);
chacha20_encrypt_bytes(&ctx, c, c, clen);
sodium_memzero(&ctx, sizeof ctx);
return 0;
}
static int
stream_ietf_ref(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k)
{
struct chacha_ctx ctx;
if(!clen)
{
return 0;
}
COMPILER_ASSERT(crypto_stream_chacha20_KEYBYTES == 256 / 8);
chacha_keysetup(&ctx, k);
chacha_ietf_ivsetup(&ctx, n, NULL);
memset(c, 0, clen);
chacha20_encrypt_bytes(&ctx, c, c, clen);
sodium_memzero(&ctx, sizeof ctx);
return 0;
}
static int
stream_ref_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n, uint64_t ic,
const unsigned char *k)
{
struct chacha_ctx ctx;
uint8_t ic_bytes[8];
uint32_t ic_high;
uint32_t ic_low;
if(!mlen)
{
return 0;
}
ic_high = (uint32_t)(ic >> 32);
ic_low = (uint32_t)ic;
STORE32_LE(&ic_bytes[0], ic_low);
STORE32_LE(&ic_bytes[4], ic_high);
chacha_keysetup(&ctx, k);
chacha_ivsetup(&ctx, n, ic_bytes);
chacha20_encrypt_bytes(&ctx, m, c, mlen);
sodium_memzero(&ctx, sizeof ctx);
return 0;
}
static int
stream_ietf_ref_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
uint32_t ic, const unsigned char *k)
{
struct chacha_ctx ctx;
uint8_t ic_bytes[4];
if(!mlen)
{
return 0;
}
STORE32_LE(ic_bytes, ic);
chacha_keysetup(&ctx, k);
chacha_ietf_ivsetup(&ctx, n, ic_bytes);
chacha20_encrypt_bytes(&ctx, m, c, mlen);
sodium_memzero(&ctx, sizeof ctx);
return 0;
}
struct crypto_stream_chacha20_implementation
crypto_stream_chacha20_dolbeau_ssse3_implementation = {
SODIUM_C99(.stream =) stream_ref,
SODIUM_C99(.stream_ietf =) stream_ietf_ref,
SODIUM_C99(.stream_xor_ic =) stream_ref_xor_ic,
SODIUM_C99(.stream_ietf_xor_ic =) stream_ietf_ref_xor_ic};
#endif

@ -0,0 +1,8 @@
#include <stdint.h>
#include "../stream_chacha20.h"
#include <sodium/crypto_stream_chacha20.h>
extern struct crypto_stream_chacha20_implementation
crypto_stream_chacha20_dolbeau_ssse3_implementation;

@ -0,0 +1,86 @@
if (bytes > 0) {
__m128i x_0, x_1, x_2, x_3;
__m128i t_1;
const __m128i rot16 =
_mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
const __m128i rot8 =
_mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
uint8_t partialblock[64];
unsigned int i;
x_0 = _mm_loadu_si128((__m128i*) (x + 0));
x_1 = _mm_loadu_si128((__m128i*) (x + 4));
x_2 = _mm_loadu_si128((__m128i*) (x + 8));
x_3 = _mm_loadu_si128((__m128i*) (x + 12));
for (i = 0; i < ROUNDS; i += 2) {
x_0 = _mm_add_epi32(x_0, x_1);
x_3 = _mm_xor_si128(x_3, x_0);
x_3 = _mm_shuffle_epi8(x_3, rot16);
x_2 = _mm_add_epi32(x_2, x_3);
x_1 = _mm_xor_si128(x_1, x_2);
t_1 = x_1;
x_1 = _mm_slli_epi32(x_1, 12);
t_1 = _mm_srli_epi32(t_1, 20);
x_1 = _mm_xor_si128(x_1, t_1);
x_0 = _mm_add_epi32(x_0, x_1);
x_3 = _mm_xor_si128(x_3, x_0);
x_0 = _mm_shuffle_epi32(x_0, 0x93);
x_3 = _mm_shuffle_epi8(x_3, rot8);
x_2 = _mm_add_epi32(x_2, x_3);
x_3 = _mm_shuffle_epi32(x_3, 0x4e);
x_1 = _mm_xor_si128(x_1, x_2);
x_2 = _mm_shuffle_epi32(x_2, 0x39);
t_1 = x_1;
x_1 = _mm_slli_epi32(x_1, 7);
t_1 = _mm_srli_epi32(t_1, 25);
x_1 = _mm_xor_si128(x_1, t_1);
x_0 = _mm_add_epi32(x_0, x_1);
x_3 = _mm_xor_si128(x_3, x_0);
x_3 = _mm_shuffle_epi8(x_3, rot16);
x_2 = _mm_add_epi32(x_2, x_3);
x_1 = _mm_xor_si128(x_1, x_2);
t_1 = x_1;
x_1 = _mm_slli_epi32(x_1, 12);
t_1 = _mm_srli_epi32(t_1, 20);
x_1 = _mm_xor_si128(x_1, t_1);
x_0 = _mm_add_epi32(x_0, x_1);
x_3 = _mm_xor_si128(x_3, x_0);
x_0 = _mm_shuffle_epi32(x_0, 0x39);
x_3 = _mm_shuffle_epi8(x_3, rot8);
x_2 = _mm_add_epi32(x_2, x_3);
x_3 = _mm_shuffle_epi32(x_3, 0x4e);
x_1 = _mm_xor_si128(x_1, x_2);
x_2 = _mm_shuffle_epi32(x_2, 0x93);
t_1 = x_1;
x_1 = _mm_slli_epi32(x_1, 7);
t_1 = _mm_srli_epi32(t_1, 25);
x_1 = _mm_xor_si128(x_1, t_1);
}
x_0 = _mm_add_epi32(x_0, _mm_loadu_si128((__m128i*) (x + 0)));
x_1 = _mm_add_epi32(x_1, _mm_loadu_si128((__m128i*) (x + 4)));
x_2 = _mm_add_epi32(x_2, _mm_loadu_si128((__m128i*) (x + 8)));
x_3 = _mm_add_epi32(x_3, _mm_loadu_si128((__m128i*) (x + 12)));
_mm_storeu_si128((__m128i*) (partialblock + 0), x_0);
_mm_storeu_si128((__m128i*) (partialblock + 16), x_1);
_mm_storeu_si128((__m128i*) (partialblock + 32), x_2);
_mm_storeu_si128((__m128i*) (partialblock + 48), x_3);
for (i = 0; i < bytes; i++) {
c[i] = m[i] ^ partialblock[i];
}
sodium_memzero(partialblock, sizeof partialblock);
}

@ -0,0 +1,98 @@
while (bytes >= 64) {
__m128i x_0, x_1, x_2, x_3;
__m128i t_1;
const __m128i rot16 =
_mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
const __m128i rot8 =
_mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
uint32_t in12;
uint32_t in13;
int i;
x_0 = _mm_loadu_si128((__m128i*) (x + 0));
x_1 = _mm_loadu_si128((__m128i*) (x + 4));
x_2 = _mm_loadu_si128((__m128i*) (x + 8));
x_3 = _mm_loadu_si128((__m128i*) (x + 12));
for (i = 0; i < ROUNDS; i += 2) {
x_0 = _mm_add_epi32(x_0, x_1);
x_3 = _mm_xor_si128(x_3, x_0);
x_3 = _mm_shuffle_epi8(x_3, rot16);
x_2 = _mm_add_epi32(x_2, x_3);
x_1 = _mm_xor_si128(x_1, x_2);
t_1 = x_1;
x_1 = _mm_slli_epi32(x_1, 12);
t_1 = _mm_srli_epi32(t_1, 20);
x_1 = _mm_xor_si128(x_1, t_1);
x_0 = _mm_add_epi32(x_0, x_1);
x_3 = _mm_xor_si128(x_3, x_0);
x_0 = _mm_shuffle_epi32(x_0, 0x93);
x_3 = _mm_shuffle_epi8(x_3, rot8);
x_2 = _mm_add_epi32(x_2, x_3);
x_3 = _mm_shuffle_epi32(x_3, 0x4e);
x_1 = _mm_xor_si128(x_1, x_2);
x_2 = _mm_shuffle_epi32(x_2, 0x39);
t_1 = x_1;
x_1 = _mm_slli_epi32(x_1, 7);
t_1 = _mm_srli_epi32(t_1, 25);
x_1 = _mm_xor_si128(x_1, t_1);
x_0 = _mm_add_epi32(x_0, x_1);
x_3 = _mm_xor_si128(x_3, x_0);
x_3 = _mm_shuffle_epi8(x_3, rot16);
x_2 = _mm_add_epi32(x_2, x_3);
x_1 = _mm_xor_si128(x_1, x_2);
t_1 = x_1;
x_1 = _mm_slli_epi32(x_1, 12);
t_1 = _mm_srli_epi32(t_1, 20);
x_1 = _mm_xor_si128(x_1, t_1);
x_0 = _mm_add_epi32(x_0, x_1);
x_3 = _mm_xor_si128(x_3, x_0);
x_0 = _mm_shuffle_epi32(x_0, 0x39);
x_3 = _mm_shuffle_epi8(x_3, rot8);
x_2 = _mm_add_epi32(x_2, x_3);
x_3 = _mm_shuffle_epi32(x_3, 0x4e);
x_1 = _mm_xor_si128(x_1, x_2);
x_2 = _mm_shuffle_epi32(x_2, 0x93);
t_1 = x_1;
x_1 = _mm_slli_epi32(x_1, 7);
t_1 = _mm_srli_epi32(t_1, 25);
x_1 = _mm_xor_si128(x_1, t_1);
}
x_0 = _mm_add_epi32(x_0, _mm_loadu_si128((__m128i*) (x + 0)));
x_1 = _mm_add_epi32(x_1, _mm_loadu_si128((__m128i*) (x + 4)));
x_2 = _mm_add_epi32(x_2, _mm_loadu_si128((__m128i*) (x + 8)));
x_3 = _mm_add_epi32(x_3, _mm_loadu_si128((__m128i*) (x + 12)));
x_0 = _mm_xor_si128(x_0, _mm_loadu_si128((__m128i*) (m + 0)));
x_1 = _mm_xor_si128(x_1, _mm_loadu_si128((__m128i*) (m + 16)));
x_2 = _mm_xor_si128(x_2, _mm_loadu_si128((__m128i*) (m + 32)));
x_3 = _mm_xor_si128(x_3, _mm_loadu_si128((__m128i*) (m + 48)));
_mm_storeu_si128((__m128i*) (c + 0), x_0);
_mm_storeu_si128((__m128i*) (c + 16), x_1);
_mm_storeu_si128((__m128i*) (c + 32), x_2);
_mm_storeu_si128((__m128i*) (c + 48), x_3);
in12 = x[12];
in13 = x[13];
in12++;
if (in12 == 0) {
in13++;
}
x[12] = in12;
x[13] = in13;
bytes -= 64;
c += 64;
m += 64;
}

@ -0,0 +1,175 @@
#define VEC4_ROT(A, IMM) \
_mm_or_si128(_mm_slli_epi32(A, IMM), _mm_srli_epi32(A, (32 - IMM)))
/* same, but replace 2 of the shift/shift/or "rotation" by byte shuffles (8 &
* 16) (better) */
#define VEC4_QUARTERROUND_SHUFFLE(A, B, C, D) \
x_##A = _mm_add_epi32(x_##A, x_##B); \
t_##A = _mm_xor_si128(x_##D, x_##A); \
x_##D = _mm_shuffle_epi8(t_##A, rot16); \
x_##C = _mm_add_epi32(x_##C, x_##D); \
t_##C = _mm_xor_si128(x_##B, x_##C); \
x_##B = VEC4_ROT(t_##C, 12); \
x_##A = _mm_add_epi32(x_##A, x_##B); \
t_##A = _mm_xor_si128(x_##D, x_##A); \
x_##D = _mm_shuffle_epi8(t_##A, rot8); \
x_##C = _mm_add_epi32(x_##C, x_##D); \
t_##C = _mm_xor_si128(x_##B, x_##C); \
x_##B = VEC4_ROT(t_##C, 7)
#define VEC4_QUARTERROUND(A, B, C, D) VEC4_QUARTERROUND_SHUFFLE(A, B, C, D)
if (bytes >= 256) {
/* constant for shuffling bytes (replacing multiple-of-8 rotates) */
__m128i rot16 =
_mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
__m128i rot8 =
_mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
__m128i x_0 = _mm_set1_epi32(x[0]);
__m128i x_1 = _mm_set1_epi32(x[1]);
__m128i x_2 = _mm_set1_epi32(x[2]);
__m128i x_3 = _mm_set1_epi32(x[3]);
__m128i x_4 = _mm_set1_epi32(x[4]);
__m128i x_5 = _mm_set1_epi32(x[5]);
__m128i x_6 = _mm_set1_epi32(x[6]);
__m128i x_7 = _mm_set1_epi32(x[7]);
__m128i x_8 = _mm_set1_epi32(x[8]);
__m128i x_9 = _mm_set1_epi32(x[9]);
__m128i x_10 = _mm_set1_epi32(x[10]);
__m128i x_11 = _mm_set1_epi32(x[11]);
__m128i x_12;
__m128i x_13;
__m128i x_14 = _mm_set1_epi32(x[14]);
__m128i x_15 = _mm_set1_epi32(x[15]);
__m128i orig0 = x_0;
__m128i orig1 = x_1;
__m128i orig2 = x_2;
__m128i orig3 = x_3;
__m128i orig4 = x_4;
__m128i orig5 = x_5;
__m128i orig6 = x_6;
__m128i orig7 = x_7;
__m128i orig8 = x_8;
__m128i orig9 = x_9;
__m128i orig10 = x_10;
__m128i orig11 = x_11;
__m128i orig12;
__m128i orig13;
__m128i orig14 = x_14;
__m128i orig15 = x_15;
__m128i t_0, t_1, t_2, t_3, t_4, t_5, t_6, t_7, t_8, t_9, t_10, t_11, t_12,
t_13, t_14, t_15;
uint32_t in12, in13;
int i;
while (bytes >= 256) {
const __m128i addv12 = _mm_set_epi64x(1, 0);
const __m128i addv13 = _mm_set_epi64x(3, 2);
__m128i t12, t13;
uint64_t in1213;
x_0 = orig0;
x_1 = orig1;
x_2 = orig2;
x_3 = orig3;
x_4 = orig4;
x_5 = orig5;
x_6 = orig6;
x_7 = orig7;
x_8 = orig8;
x_9 = orig9;
x_10 = orig10;
x_11 = orig11;
x_14 = orig14;
x_15 = orig15;
in12 = x[12];
in13 = x[13];
in1213 = ((uint64_t) in12) | (((uint64_t) in13) << 32);
t12 = _mm_set1_epi64x(in1213);
t13 = _mm_set1_epi64x(in1213);
x_12 = _mm_add_epi64(addv12, t12);
x_13 = _mm_add_epi64(addv13, t13);
t12 = _mm_unpacklo_epi32(x_12, x_13);
t13 = _mm_unpackhi_epi32(x_12, x_13);
x_12 = _mm_unpacklo_epi32(t12, t13);
x_13 = _mm_unpackhi_epi32(t12, t13);
orig12 = x_12;
orig13 = x_13;
in1213 += 4;
x[12] = in1213 & 0xFFFFFFFF;
x[13] = (in1213 >> 32) & 0xFFFFFFFF;
for (i = 0; i < ROUNDS; i += 2) {
VEC4_QUARTERROUND(0, 4, 8, 12);
VEC4_QUARTERROUND(1, 5, 9, 13);
VEC4_QUARTERROUND(2, 6, 10, 14);
VEC4_QUARTERROUND(3, 7, 11, 15);
VEC4_QUARTERROUND(0, 5, 10, 15);
VEC4_QUARTERROUND(1, 6, 11, 12);
VEC4_QUARTERROUND(2, 7, 8, 13);
VEC4_QUARTERROUND(3, 4, 9, 14);
}
#define ONEQUAD_TRANSPOSE(A, B, C, D) \
{ \
__m128i t0, t1, t2, t3; \
\
x_##A = _mm_add_epi32(x_##A, orig##A); \
x_##B = _mm_add_epi32(x_##B, orig##B); \
x_##C = _mm_add_epi32(x_##C, orig##C); \
x_##D = _mm_add_epi32(x_##D, orig##D); \
t_##A = _mm_unpacklo_epi32(x_##A, x_##B); \
t_##B = _mm_unpacklo_epi32(x_##C, x_##D); \
t_##C = _mm_unpackhi_epi32(x_##A, x_##B); \
t_##D = _mm_unpackhi_epi32(x_##C, x_##D); \
x_##A = _mm_unpacklo_epi64(t_##A, t_##B); \
x_##B = _mm_unpackhi_epi64(t_##A, t_##B); \
x_##C = _mm_unpacklo_epi64(t_##C, t_##D); \
x_##D = _mm_unpackhi_epi64(t_##C, t_##D); \
\
t0 = _mm_xor_si128(x_##A, _mm_loadu_si128((__m128i*) (m + 0))); \
_mm_storeu_si128((__m128i*) (c + 0), t0); \
t1 = _mm_xor_si128(x_##B, _mm_loadu_si128((__m128i*) (m + 64))); \
_mm_storeu_si128((__m128i*) (c + 64), t1); \
t2 = _mm_xor_si128(x_##C, _mm_loadu_si128((__m128i*) (m + 128))); \
_mm_storeu_si128((__m128i*) (c + 128), t2); \
t3 = _mm_xor_si128(x_##D, _mm_loadu_si128((__m128i*) (m + 192))); \
_mm_storeu_si128((__m128i*) (c + 192), t3); \
}
#define ONEQUAD(A, B, C, D) ONEQUAD_TRANSPOSE(A, B, C, D)
ONEQUAD(0, 1, 2, 3);
m += 16;
c += 16;
ONEQUAD(4, 5, 6, 7);
m += 16;
c += 16;
ONEQUAD(8, 9, 10, 11);
m += 16;
c += 16;
ONEQUAD(12, 13, 14, 15);
m -= 48;
c -= 48;
#undef ONEQUAD
#undef ONEQUAD_TRANSPOSE
bytes -= 256;
c += 256;
m += 256;
}
}
#undef VEC4_ROT
#undef VEC4_QUARTERROUND
#undef VEC4_QUARTERROUND_SHUFFLE

@ -0,0 +1,357 @@
#define VEC8_ROT(A, IMM) \
_mm256_or_si256(_mm256_slli_epi32(A, IMM), _mm256_srli_epi32(A, (32 - IMM)))
/* implements a vector quarter round by-the-book (naive!) */
#define VEC8_QUARTERROUND_NAIVE(A, B, C, D) \
x_##A = _mm256_add_epi32(x_##A, x_##B); \
t_##A = _mm256_xor_si256(x_##D, x_##A); \
x_##D = VEC8_ROT(t_##A, 16); \
x_##C = _mm256_add_epi32(x_##C, x_##D); \
t_##C = _mm256_xor_si256(x_##B, x_##C); \
x_##B = VEC8_ROT(t_##C, 12); \
x_##A = _mm256_add_epi32(x_##A, x_##B); \
t_##A = _mm256_xor_si256(x_##D, x_##A); \
x_##D = VEC8_ROT(t_##A, 8); \
x_##C = _mm256_add_epi32(x_##C, x_##D); \
t_##C = _mm256_xor_si256(x_##B, x_##C); \
x_##B = VEC8_ROT(t_##C, 7)
/* same, but replace 2 of the shift/shift/or "rotation" by byte shuffles (8 &
* 16) (better) */
#define VEC8_QUARTERROUND_SHUFFLE(A, B, C, D) \
x_##A = _mm256_add_epi32(x_##A, x_##B); \
t_##A = _mm256_xor_si256(x_##D, x_##A); \
x_##D = _mm256_shuffle_epi8(t_##A, rot16); \
x_##C = _mm256_add_epi32(x_##C, x_##D); \
t_##C = _mm256_xor_si256(x_##B, x_##C); \
x_##B = VEC8_ROT(t_##C, 12); \
x_##A = _mm256_add_epi32(x_##A, x_##B); \
t_##A = _mm256_xor_si256(x_##D, x_##A); \
x_##D = _mm256_shuffle_epi8(t_##A, rot8); \
x_##C = _mm256_add_epi32(x_##C, x_##D); \
t_##C = _mm256_xor_si256(x_##B, x_##C); \
x_##B = VEC8_ROT(t_##C, 7)
/* same, but replace 2 of the shift/shift/or "rotation" by byte & word shuffles
* (8 & 16) (not as good as previous) */
#define VEC8_QUARTERROUND_SHUFFLE2(A, B, C, D) \
x_##A = _mm256_add_epi32(x_##A, x_##B); \
t_##A = _mm256_xor_si256(x_##D, x_##A); \
x_##D = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(t_##A, 0xb1), 0xb1); \
x_##C = _mm256_add_epi32(x_##C, x_##D); \
t_##C = _mm256_xor_si256(x_##B, x_##C); \
x_##B = VEC8_ROT(t_##C, 12); \
x_##A = _mm256_add_epi32(x_##A, x_##B); \
t_##A = _mm256_xor_si256(x_##D, x_##A); \
x_##D = _mm256_shuffle_epi8(t_##A, rot8); \
x_##C = _mm256_add_epi32(x_##C, x_##D); \
t_##C = _mm256_xor_si256(x_##B, x_##C); \
x_##B = VEC8_ROT(t_##C, 7)
#define VEC8_QUARTERROUND(A, B, C, D) VEC8_QUARTERROUND_SHUFFLE(A, B, C, D)
#define VEC8_LINE1(A, B, C, D) \
x_##A = _mm256_add_epi32(x_##A, x_##B); \
x_##D = _mm256_shuffle_epi8(_mm256_xor_si256(x_##D, x_##A), rot16)
#define VEC8_LINE2(A, B, C, D) \
x_##C = _mm256_add_epi32(x_##C, x_##D); \
x_##B = VEC8_ROT(_mm256_xor_si256(x_##B, x_##C), 12)
#define VEC8_LINE3(A, B, C, D) \
x_##A = _mm256_add_epi32(x_##A, x_##B); \
x_##D = _mm256_shuffle_epi8(_mm256_xor_si256(x_##D, x_##A), rot8)
#define VEC8_LINE4(A, B, C, D) \
x_##C = _mm256_add_epi32(x_##C, x_##D); \
x_##B = VEC8_ROT(_mm256_xor_si256(x_##B, x_##C), 7)
#define VEC8_ROUND_SEQ(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, B4, \
C4, D4) \
VEC8_LINE1(A1, B1, C1, D1); \
VEC8_LINE1(A2, B2, C2, D2); \
VEC8_LINE1(A3, B3, C3, D3); \
VEC8_LINE1(A4, B4, C4, D4); \
VEC8_LINE2(A1, B1, C1, D1); \
VEC8_LINE2(A2, B2, C2, D2); \
VEC8_LINE2(A3, B3, C3, D3); \
VEC8_LINE2(A4, B4, C4, D4); \
VEC8_LINE3(A1, B1, C1, D1); \
VEC8_LINE3(A2, B2, C2, D2); \
VEC8_LINE3(A3, B3, C3, D3); \
VEC8_LINE3(A4, B4, C4, D4); \
VEC8_LINE4(A1, B1, C1, D1); \
VEC8_LINE4(A2, B2, C2, D2); \
VEC8_LINE4(A3, B3, C3, D3); \
VEC8_LINE4(A4, B4, C4, D4)
#define VEC8_ROUND_HALF(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, \
B4, C4, D4) \
VEC8_LINE1(A1, B1, C1, D1); \
VEC8_LINE1(A2, B2, C2, D2); \
VEC8_LINE2(A1, B1, C1, D1); \
VEC8_LINE2(A2, B2, C2, D2); \
VEC8_LINE3(A1, B1, C1, D1); \
VEC8_LINE3(A2, B2, C2, D2); \
VEC8_LINE4(A1, B1, C1, D1); \
VEC8_LINE4(A2, B2, C2, D2); \
VEC8_LINE1(A3, B3, C3, D3); \
VEC8_LINE1(A4, B4, C4, D4); \
VEC8_LINE2(A3, B3, C3, D3); \
VEC8_LINE2(A4, B4, C4, D4); \
VEC8_LINE3(A3, B3, C3, D3); \
VEC8_LINE3(A4, B4, C4, D4); \
VEC8_LINE4(A3, B3, C3, D3); \
VEC8_LINE4(A4, B4, C4, D4)
#define VEC8_ROUND_HALFANDHALF(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, \
A4, B4, C4, D4) \
VEC8_LINE1(A1, B1, C1, D1); \
VEC8_LINE1(A2, B2, C2, D2); \
VEC8_LINE2(A1, B1, C1, D1); \
VEC8_LINE2(A2, B2, C2, D2); \
VEC8_LINE1(A3, B3, C3, D3); \
VEC8_LINE1(A4, B4, C4, D4); \
VEC8_LINE2(A3, B3, C3, D3); \
VEC8_LINE2(A4, B4, C4, D4); \
VEC8_LINE3(A1, B1, C1, D1); \
VEC8_LINE3(A2, B2, C2, D2); \
VEC8_LINE4(A1, B1, C1, D1); \
VEC8_LINE4(A2, B2, C2, D2); \
VEC8_LINE3(A3, B3, C3, D3); \
VEC8_LINE3(A4, B4, C4, D4); \
VEC8_LINE4(A3, B3, C3, D3); \
VEC8_LINE4(A4, B4, C4, D4)
#define VEC8_ROUND(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, B4, C4, \
D4) \
VEC8_ROUND_SEQ(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, B4, C4, \
D4)
if (bytes >= 512) {
/* constant for shuffling bytes (replacing multiple-of-8 rotates) */
__m256i rot16 =
_mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
__m256i rot8 =
_mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
uint32_t in12, in13;
/* the naive way seems as fast (if not a bit faster) than the vector way */
__m256i x_0 = _mm256_set1_epi32(x[0]);
__m256i x_1 = _mm256_set1_epi32(x[1]);
__m256i x_2 = _mm256_set1_epi32(x[2]);
__m256i x_3 = _mm256_set1_epi32(x[3]);
__m256i x_4 = _mm256_set1_epi32(x[4]);
__m256i x_5 = _mm256_set1_epi32(x[5]);
__m256i x_6 = _mm256_set1_epi32(x[6]);
__m256i x_7 = _mm256_set1_epi32(x[7]);
__m256i x_8 = _mm256_set1_epi32(x[8]);
__m256i x_9 = _mm256_set1_epi32(x[9]);
__m256i x_10 = _mm256_set1_epi32(x[10]);
__m256i x_11 = _mm256_set1_epi32(x[11]);
__m256i x_12;
__m256i x_13;
__m256i x_14 = _mm256_set1_epi32(x[14]);
__m256i x_15 = _mm256_set1_epi32(x[15]);
__m256i orig0 = x_0;
__m256i orig1 = x_1;
__m256i orig2 = x_2;
__m256i orig3 = x_3;
__m256i orig4 = x_4;
__m256i orig5 = x_5;
__m256i orig6 = x_6;
__m256i orig7 = x_7;
__m256i orig8 = x_8;
__m256i orig9 = x_9;
__m256i orig10 = x_10;
__m256i orig11 = x_11;
__m256i orig12;
__m256i orig13;
__m256i orig14 = x_14;
__m256i orig15 = x_15;
__m256i t_0, t_1, t_2, t_3, t_4, t_5, t_6, t_7, t_8, t_9, t_10, t_11, t_12,
t_13, t_14, t_15;
while (bytes >= 512) {
const __m256i addv12 = _mm256_set_epi64x(3, 2, 1, 0);
const __m256i addv13 = _mm256_set_epi64x(7, 6, 5, 4);
const __m256i permute = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
__m256i t12, t13;
uint64_t in1213;
int i;
x_0 = orig0;
x_1 = orig1;
x_2 = orig2;
x_3 = orig3;
x_4 = orig4;
x_5 = orig5;
x_6 = orig6;
x_7 = orig7;
x_8 = orig8;
x_9 = orig9;
x_10 = orig10;
x_11 = orig11;
x_14 = orig14;
x_15 = orig15;
in12 = x[12];
in13 = x[13];
in1213 = ((uint64_t) in12) | (((uint64_t) in13) << 32);
x_12 = x_13 = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(in1213));
t12 = _mm256_add_epi64(addv12, x_12);
t13 = _mm256_add_epi64(addv13, x_13);
x_12 = _mm256_unpacklo_epi32(t12, t13);
x_13 = _mm256_unpackhi_epi32(t12, t13);
t12 = _mm256_unpacklo_epi32(x_12, x_13);
t13 = _mm256_unpackhi_epi32(x_12, x_13);
/* required because unpack* are intra-lane */
x_12 = _mm256_permutevar8x32_epi32(t12, permute);
x_13 = _mm256_permutevar8x32_epi32(t13, permute);
orig12 = x_12;
orig13 = x_13;
in1213 += 8;
x[12] = in1213 & 0xFFFFFFFF;
x[13] = (in1213 >> 32) & 0xFFFFFFFF;
for (i = 0; i < ROUNDS; i += 2) {
VEC8_ROUND(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
VEC8_ROUND(0, 5, 10, 15, 1, 6, 11, 12, 2, 7, 8, 13, 3, 4, 9, 14);
}
#define ONEQUAD_TRANSPOSE(A, B, C, D) \
{ \
__m128i t0, t1, t2, t3; \
x_##A = _mm256_add_epi32(x_##A, orig##A); \
x_##B = _mm256_add_epi32(x_##B, orig##B); \
x_##C = _mm256_add_epi32(x_##C, orig##C); \
x_##D = _mm256_add_epi32(x_##D, orig##D); \
t_##A = _mm256_unpacklo_epi32(x_##A, x_##B); \
t_##B = _mm256_unpacklo_epi32(x_##C, x_##D); \
t_##C = _mm256_unpackhi_epi32(x_##A, x_##B); \
t_##D = _mm256_unpackhi_epi32(x_##C, x_##D); \
x_##A = _mm256_unpacklo_epi64(t_##A, t_##B); \
x_##B = _mm256_unpackhi_epi64(t_##A, t_##B); \
x_##C = _mm256_unpacklo_epi64(t_##C, t_##D); \
x_##D = _mm256_unpackhi_epi64(t_##C, t_##D); \
t0 = _mm_xor_si128(_mm256_extracti128_si256(x_##A, 0), \
_mm_loadu_si128((__m128i*) (m + 0))); \
_mm_storeu_si128((__m128i*) (c + 0), t0); \
t1 = _mm_xor_si128(_mm256_extracti128_si256(x_##B, 0), \
_mm_loadu_si128((__m128i*) (m + 64))); \
_mm_storeu_si128((__m128i*) (c + 64), t1); \
t2 = _mm_xor_si128(_mm256_extracti128_si256(x_##C, 0), \
_mm_loadu_si128((__m128i*) (m + 128))); \
_mm_storeu_si128((__m128i*) (c + 128), t2); \
t3 = _mm_xor_si128(_mm256_extracti128_si256(x_##D, 0), \
_mm_loadu_si128((__m128i*) (m + 192))); \
_mm_storeu_si128((__m128i*) (c + 192), t3); \
t0 = _mm_xor_si128(_mm256_extracti128_si256(x_##A, 1), \
_mm_loadu_si128((__m128i*) (m + 256))); \
_mm_storeu_si128((__m128i*) (c + 256), t0); \
t1 = _mm_xor_si128(_mm256_extracti128_si256(x_##B, 1), \
_mm_loadu_si128((__m128i*) (m + 320))); \
_mm_storeu_si128((__m128i*) (c + 320), t1); \
t2 = _mm_xor_si128(_mm256_extracti128_si256(x_##C, 1), \
_mm_loadu_si128((__m128i*) (m + 384))); \
_mm_storeu_si128((__m128i*) (c + 384), t2); \
t3 = _mm_xor_si128(_mm256_extracti128_si256(x_##D, 1), \
_mm_loadu_si128((__m128i*) (m + 448))); \
_mm_storeu_si128((__m128i*) (c + 448), t3); \
}
#define ONEQUAD(A, B, C, D) ONEQUAD_TRANSPOSE(A, B, C, D)
#define ONEQUAD_UNPCK(A, B, C, D) \
{ \
x_##A = _mm256_add_epi32(x_##A, orig##A); \
x_##B = _mm256_add_epi32(x_##B, orig##B); \
x_##C = _mm256_add_epi32(x_##C, orig##C); \
x_##D = _mm256_add_epi32(x_##D, orig##D); \
t_##A = _mm256_unpacklo_epi32(x_##A, x_##B); \
t_##B = _mm256_unpacklo_epi32(x_##C, x_##D); \
t_##C = _mm256_unpackhi_epi32(x_##A, x_##B); \
t_##D = _mm256_unpackhi_epi32(x_##C, x_##D); \
x_##A = _mm256_unpacklo_epi64(t_##A, t_##B); \
x_##B = _mm256_unpackhi_epi64(t_##A, t_##B); \
x_##C = _mm256_unpacklo_epi64(t_##C, t_##D); \
x_##D = _mm256_unpackhi_epi64(t_##C, t_##D); \
}
#define ONEOCTO(A, B, C, D, A2, B2, C2, D2) \
{ \
ONEQUAD_UNPCK(A, B, C, D); \
ONEQUAD_UNPCK(A2, B2, C2, D2); \
t_##A = _mm256_permute2x128_si256(x_##A, x_##A2, 0x20); \
t_##A2 = _mm256_permute2x128_si256(x_##A, x_##A2, 0x31); \
t_##B = _mm256_permute2x128_si256(x_##B, x_##B2, 0x20); \
t_##B2 = _mm256_permute2x128_si256(x_##B, x_##B2, 0x31); \
t_##C = _mm256_permute2x128_si256(x_##C, x_##C2, 0x20); \
t_##C2 = _mm256_permute2x128_si256(x_##C, x_##C2, 0x31); \
t_##D = _mm256_permute2x128_si256(x_##D, x_##D2, 0x20); \
t_##D2 = _mm256_permute2x128_si256(x_##D, x_##D2, 0x31); \
t_##A = \
_mm256_xor_si256(t_##A, _mm256_loadu_si256((__m256i*) (m + 0))); \
t_##B = \
_mm256_xor_si256(t_##B, _mm256_loadu_si256((__m256i*) (m + 64))); \
t_##C = \
_mm256_xor_si256(t_##C, _mm256_loadu_si256((__m256i*) (m + 128))); \
t_##D = \
_mm256_xor_si256(t_##D, _mm256_loadu_si256((__m256i*) (m + 192))); \
t_##A2 = _mm256_xor_si256(t_##A2, \
_mm256_loadu_si256((__m256i*) (m + 256))); \
t_##B2 = _mm256_xor_si256(t_##B2, \
_mm256_loadu_si256((__m256i*) (m + 320))); \
t_##C2 = _mm256_xor_si256(t_##C2, \
_mm256_loadu_si256((__m256i*) (m + 384))); \
t_##D2 = _mm256_xor_si256(t_##D2, \
_mm256_loadu_si256((__m256i*) (m + 448))); \
_mm256_storeu_si256((__m256i*) (c + 0), t_##A); \
_mm256_storeu_si256((__m256i*) (c + 64), t_##B); \
_mm256_storeu_si256((__m256i*) (c + 128), t_##C); \
_mm256_storeu_si256((__m256i*) (c + 192), t_##D); \
_mm256_storeu_si256((__m256i*) (c + 256), t_##A2); \
_mm256_storeu_si256((__m256i*) (c + 320), t_##B2); \
_mm256_storeu_si256((__m256i*) (c + 384), t_##C2); \
_mm256_storeu_si256((__m256i*) (c + 448), t_##D2); \
}
ONEOCTO(0, 1, 2, 3, 4, 5, 6, 7);
m += 32;
c += 32;
ONEOCTO(8, 9, 10, 11, 12, 13, 14, 15);
m -= 32;
c -= 32;
#undef ONEQUAD
#undef ONEQUAD_TRANSPOSE
#undef ONEQUAD_UNPCK
#undef ONEOCTO
bytes -= 512;
c += 512;
m += 512;
}
}
#undef VEC8_ROT
#undef VEC8_QUARTERROUND
#undef VEC8_QUARTERROUND_NAIVE
#undef VEC8_QUARTERROUND_SHUFFLE
#undef VEC8_QUARTERROUND_SHUFFLE2
#undef VEC8_LINE1
#undef VEC8_LINE2
#undef VEC8_LINE3
#undef VEC8_LINE4
#undef VEC8_ROUND
#undef VEC8_ROUND_SEQ
#undef VEC8_ROUND_HALF
#undef VEC8_ROUND_HALFANDHALF

@ -0,0 +1,327 @@
/*
chacha-merged.c version 20080118
D. J. Bernstein
Public domain.
*/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sodium/core.h>
#include <sodium/crypto_stream_chacha20.h>
#include <sodium/private/common.h>
#include <sodium/utils.h>
#include "../stream_chacha20.h"
#include "chacha20_ref.h"
struct chacha_ctx
{
uint32_t input[16];
};
typedef struct chacha_ctx chacha_ctx;
#define U32C(v) (v##U)
#define U32V(v) ((uint32_t)(v)&U32C(0xFFFFFFFF))
#define ROTATE(v, c) (ROTL32(v, c))
#define XOR(v, w) ((v) ^ (w))
#define PLUS(v, w) (U32V((v) + (w)))
#define PLUSONE(v) (PLUS((v), 1))
#define QUARTERROUND(a, b, c, d) \
a = PLUS(a, b); \
d = ROTATE(XOR(d, a), 16); \
c = PLUS(c, d); \
b = ROTATE(XOR(b, c), 12); \
a = PLUS(a, b); \
d = ROTATE(XOR(d, a), 8); \
c = PLUS(c, d); \
b = ROTATE(XOR(b, c), 7);
static void
chacha_keysetup(chacha_ctx *ctx, const uint8_t *k)
{
ctx->input[0] = U32C(0x61707865);
ctx->input[1] = U32C(0x3320646e);
ctx->input[2] = U32C(0x79622d32);
ctx->input[3] = U32C(0x6b206574);
ctx->input[4] = LOAD32_LE(k + 0);
ctx->input[5] = LOAD32_LE(k + 4);
ctx->input[6] = LOAD32_LE(k + 8);
ctx->input[7] = LOAD32_LE(k + 12);
ctx->input[8] = LOAD32_LE(k + 16);
ctx->input[9] = LOAD32_LE(k + 20);
ctx->input[10] = LOAD32_LE(k + 24);
ctx->input[11] = LOAD32_LE(k + 28);
}
static void
chacha_ivsetup(chacha_ctx *ctx, const uint8_t *iv, const uint8_t *counter)
{
ctx->input[12] = counter == NULL ? 0 : LOAD32_LE(counter + 0);
ctx->input[13] = counter == NULL ? 0 : LOAD32_LE(counter + 4);
ctx->input[14] = LOAD32_LE(iv + 0);
ctx->input[15] = LOAD32_LE(iv + 4);
}
static void
chacha_ietf_ivsetup(chacha_ctx *ctx, const uint8_t *iv, const uint8_t *counter)
{
ctx->input[12] = counter == NULL ? 0 : LOAD32_LE(counter);
ctx->input[13] = LOAD32_LE(iv + 0);
ctx->input[14] = LOAD32_LE(iv + 4);
ctx->input[15] = LOAD32_LE(iv + 8);
}
static void
chacha20_encrypt_bytes(chacha_ctx *ctx, const uint8_t *m, uint8_t *c,
unsigned long long bytes)
{
uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
uint8_t *ctarget = NULL;
uint8_t tmp[64];
unsigned int i;
if(!bytes)
{
return; /* LCOV_EXCL_LINE */
}
if(bytes > crypto_stream_chacha20_MESSAGEBYTES_MAX)
{
sodium_misuse();
}
j0 = ctx->input[0];
j1 = ctx->input[1];
j2 = ctx->input[2];
j3 = ctx->input[3];
j4 = ctx->input[4];
j5 = ctx->input[5];
j6 = ctx->input[6];
j7 = ctx->input[7];
j8 = ctx->input[8];
j9 = ctx->input[9];
j10 = ctx->input[10];
j11 = ctx->input[11];
j12 = ctx->input[12];
j13 = ctx->input[13];
j14 = ctx->input[14];
j15 = ctx->input[15];
for(;;)
{
if(bytes < 64)
{
memset(tmp, 0, 64);
for(i = 0; i < bytes; ++i)
{
tmp[i] = m[i];
}
m = tmp;
ctarget = c;
c = tmp;
}
x0 = j0;
x1 = j1;
x2 = j2;
x3 = j3;
x4 = j4;
x5 = j5;
x6 = j6;
x7 = j7;
x8 = j8;
x9 = j9;
x10 = j10;
x11 = j11;
x12 = j12;
x13 = j13;
x14 = j14;
x15 = j15;
for(i = 20; i > 0; i -= 2)
{
QUARTERROUND(x0, x4, x8, x12)
QUARTERROUND(x1, x5, x9, x13)
QUARTERROUND(x2, x6, x10, x14)
QUARTERROUND(x3, x7, x11, x15)
QUARTERROUND(x0, x5, x10, x15)
QUARTERROUND(x1, x6, x11, x12)
QUARTERROUND(x2, x7, x8, x13)
QUARTERROUND(x3, x4, x9, x14)
}
x0 = PLUS(x0, j0);
x1 = PLUS(x1, j1);
x2 = PLUS(x2, j2);
x3 = PLUS(x3, j3);
x4 = PLUS(x4, j4);
x5 = PLUS(x5, j5);
x6 = PLUS(x6, j6);
x7 = PLUS(x7, j7);
x8 = PLUS(x8, j8);
x9 = PLUS(x9, j9);
x10 = PLUS(x10, j10);
x11 = PLUS(x11, j11);
x12 = PLUS(x12, j12);
x13 = PLUS(x13, j13);
x14 = PLUS(x14, j14);
x15 = PLUS(x15, j15);
x0 = XOR(x0, LOAD32_LE(m + 0));
x1 = XOR(x1, LOAD32_LE(m + 4));
x2 = XOR(x2, LOAD32_LE(m + 8));
x3 = XOR(x3, LOAD32_LE(m + 12));
x4 = XOR(x4, LOAD32_LE(m + 16));
x5 = XOR(x5, LOAD32_LE(m + 20));
x6 = XOR(x6, LOAD32_LE(m + 24));
x7 = XOR(x7, LOAD32_LE(m + 28));
x8 = XOR(x8, LOAD32_LE(m + 32));
x9 = XOR(x9, LOAD32_LE(m + 36));
x10 = XOR(x10, LOAD32_LE(m + 40));
x11 = XOR(x11, LOAD32_LE(m + 44));
x12 = XOR(x12, LOAD32_LE(m + 48));
x13 = XOR(x13, LOAD32_LE(m + 52));
x14 = XOR(x14, LOAD32_LE(m + 56));
x15 = XOR(x15, LOAD32_LE(m + 60));
j12 = PLUSONE(j12);
/* LCOV_EXCL_START */
if(!j12)
{
j13 = PLUSONE(j13);
}
/* LCOV_EXCL_STOP */
STORE32_LE(c + 0, x0);
STORE32_LE(c + 4, x1);
STORE32_LE(c + 8, x2);
STORE32_LE(c + 12, x3);
STORE32_LE(c + 16, x4);
STORE32_LE(c + 20, x5);
STORE32_LE(c + 24, x6);
STORE32_LE(c + 28, x7);
STORE32_LE(c + 32, x8);
STORE32_LE(c + 36, x9);
STORE32_LE(c + 40, x10);
STORE32_LE(c + 44, x11);
STORE32_LE(c + 48, x12);
STORE32_LE(c + 52, x13);
STORE32_LE(c + 56, x14);
STORE32_LE(c + 60, x15);
if(bytes <= 64)
{
if(bytes < 64)
{
for(i = 0; i < (unsigned int)bytes; ++i)
{
ctarget[i] = c[i]; /* ctarget cannot be NULL */
}
}
ctx->input[12] = j12;
ctx->input[13] = j13;
return;
}
bytes -= 64;
c += 64;
m += 64;
}
}
static int
stream_ref(unsigned char *c, unsigned long long clen, const unsigned char *n,
const unsigned char *k)
{
struct chacha_ctx ctx;
if(!clen)
{
return 0;
}
COMPILER_ASSERT(crypto_stream_chacha20_KEYBYTES == 256 / 8);
chacha_keysetup(&ctx, k);
chacha_ivsetup(&ctx, n, NULL);
memset(c, 0, clen);
chacha20_encrypt_bytes(&ctx, c, c, clen);
sodium_memzero(&ctx, sizeof ctx);
return 0;
}
static int
stream_ietf_ref(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k)
{
struct chacha_ctx ctx;
if(!clen)
{
return 0;
}
COMPILER_ASSERT(crypto_stream_chacha20_KEYBYTES == 256 / 8);
chacha_keysetup(&ctx, k);
chacha_ietf_ivsetup(&ctx, n, NULL);
memset(c, 0, clen);
chacha20_encrypt_bytes(&ctx, c, c, clen);
sodium_memzero(&ctx, sizeof ctx);
return 0;
}
static int
stream_ref_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n, uint64_t ic,
const unsigned char *k)
{
struct chacha_ctx ctx;
uint8_t ic_bytes[8];
uint32_t ic_high;
uint32_t ic_low;
if(!mlen)
{
return 0;
}
ic_high = U32V(ic >> 32);
ic_low = U32V(ic);
STORE32_LE(&ic_bytes[0], ic_low);
STORE32_LE(&ic_bytes[4], ic_high);
chacha_keysetup(&ctx, k);
chacha_ivsetup(&ctx, n, ic_bytes);
chacha20_encrypt_bytes(&ctx, m, c, mlen);
sodium_memzero(&ctx, sizeof ctx);
return 0;
}
static int
stream_ietf_ref_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
uint32_t ic, const unsigned char *k)
{
struct chacha_ctx ctx;
uint8_t ic_bytes[4];
if(!mlen)
{
return 0;
}
STORE32_LE(ic_bytes, ic);
chacha_keysetup(&ctx, k);
chacha_ietf_ivsetup(&ctx, n, ic_bytes);
chacha20_encrypt_bytes(&ctx, m, c, mlen);
sodium_memzero(&ctx, sizeof ctx);
return 0;
}
struct crypto_stream_chacha20_implementation
crypto_stream_chacha20_ref_implementation = {
SODIUM_C99(.stream =) stream_ref,
SODIUM_C99(.stream_ietf =) stream_ietf_ref,
SODIUM_C99(.stream_xor_ic =) stream_ref_xor_ic,
SODIUM_C99(.stream_ietf_xor_ic =) stream_ietf_ref_xor_ic};

@ -0,0 +1,8 @@
#include <stdint.h>
#include "../stream_chacha20.h"
#include <sodium/crypto_stream_chacha20.h>
extern struct crypto_stream_chacha20_implementation
crypto_stream_chacha20_ref_implementation;

@ -0,0 +1,131 @@
#include <sodium/crypto_stream_chacha20.h>
#include <sodium/private/common.h>
#include <sodium/private/implementations.h>
#include <sodium/randombytes.h>
#include <sodium/runtime.h>
#include "stream_chacha20.h"
#include "ref/chacha20_ref.h"
#include "dolbeau/chacha20_dolbeau-avx2.h"
#include "dolbeau/chacha20_dolbeau-ssse3.h"
static const crypto_stream_chacha20_implementation *implementation =
&crypto_stream_chacha20_ref_implementation;
size_t
crypto_stream_chacha20_keybytes(void)
{
return crypto_stream_chacha20_KEYBYTES;
}
size_t
crypto_stream_chacha20_noncebytes(void)
{
return crypto_stream_chacha20_NONCEBYTES;
}
size_t
crypto_stream_chacha20_messagebytes_max(void)
{
return crypto_stream_chacha20_MESSAGEBYTES_MAX;
}
size_t
crypto_stream_chacha20_ietf_keybytes(void)
{
return crypto_stream_chacha20_ietf_KEYBYTES;
}
size_t
crypto_stream_chacha20_ietf_noncebytes(void)
{
return crypto_stream_chacha20_ietf_NONCEBYTES;
}
size_t
crypto_stream_chacha20_ietf_messagebytes_max(void)
{
return crypto_stream_chacha20_ietf_MESSAGEBYTES_MAX;
}
int
crypto_stream_chacha20(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k)
{
return implementation->stream(c, clen, n, k);
}
int
crypto_stream_chacha20_ietf(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k)
{
return implementation->stream_ietf(c, clen, n, k);
}
int
crypto_stream_chacha20_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
uint64_t ic, const unsigned char *k)
{
return implementation->stream_xor_ic(c, m, mlen, n, ic, k);
}
int
crypto_stream_chacha20_ietf_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen,
const unsigned char *n, uint32_t ic,
const unsigned char *k)
{
return implementation->stream_ietf_xor_ic(c, m, mlen, n, ic, k);
}
int
crypto_stream_chacha20_xor(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *k)
{
return implementation->stream_xor_ic(c, m, mlen, n, 0U, k);
}
int
crypto_stream_chacha20_ietf_xor(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *k)
{
return implementation->stream_ietf_xor_ic(c, m, mlen, n, 0U, k);
}
void
crypto_stream_chacha20_ietf_keygen(
unsigned char k[crypto_stream_chacha20_ietf_KEYBYTES])
{
randombytes_buf(k, crypto_stream_chacha20_ietf_KEYBYTES);
}
void
crypto_stream_chacha20_keygen(unsigned char k[crypto_stream_chacha20_KEYBYTES])
{
randombytes_buf(k, crypto_stream_chacha20_KEYBYTES);
}
int
_crypto_stream_chacha20_pick_best_implementation(void)
{
implementation = &crypto_stream_chacha20_ref_implementation;
#if __AVX2__
if(sodium_runtime_has_avx2())
{
implementation = &crypto_stream_chacha20_dolbeau_avx2_implementation;
return 0;
}
#endif
#if __SSSE3__
if(sodium_runtime_has_ssse3())
{
implementation = &crypto_stream_chacha20_dolbeau_ssse3_implementation;
}
#endif
return 0;
}

@ -0,0 +1,22 @@
#ifndef stream_chacha20_H
#define stream_chacha20_H
#include <stdint.h>
typedef struct crypto_stream_chacha20_implementation {
int (*stream)(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k);
int (*stream_ietf)(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k);
int (*stream_xor_ic)(unsigned char *c, const unsigned char *m,
unsigned long long mlen,
const unsigned char *n, uint64_t ic,
const unsigned char *k);
int (*stream_ietf_xor_ic)(unsigned char *c, const unsigned char *m,
unsigned long long mlen,
const unsigned char *n, uint32_t ic,
const unsigned char *k);
} crypto_stream_chacha20_implementation;
#endif

@ -0,0 +1,190 @@
#include <assert.h>
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/types.h>
#ifdef __EMSCRIPTEN__
#include <emscripten.h>
#endif
#include <sodium/core.h>
#include <sodium/crypto_stream_xchacha20.h>
#include <sodium/randombytes.h>
#include <sodium/randombytes_salsa20_random.h>
#include <sodium/common.h>
/* C++Builder defines a "random" macro */
#undef random
static const randombytes_implementation *implementation =
&randombytes_salsa20_implementation;
static void
randombytes_init_if_needed(void)
{
if(implementation == NULL)
implementation = &randombytes_salsa20_implementation;
}
int
randombytes_set_implementation(randombytes_implementation *impl)
{
implementation = impl;
return 0;
}
const char *
randombytes_implementation_name(void)
{
#ifndef __EMSCRIPTEN__
randombytes_init_if_needed();
return implementation->implementation_name();
#else
return "js";
#endif
}
uint32_t
randombytes_random(void)
{
#ifndef __EMSCRIPTEN__
randombytes_init_if_needed();
return implementation->random();
#else
return EM_ASM_INT_V({ return Module.getRandomValue(); });
#endif
}
void
randombytes_stir(void)
{
#ifndef __EMSCRIPTEN__
randombytes_init_if_needed();
if(implementation->stir != NULL)
{
implementation->stir();
}
#else
EM_ASM({
if(Module.getRandomValue == = undefined)
{
try
{
var window_ = 'object' == = typeof window ? window : self;
var crypto_ = typeof window_.crypto != = 'undefined' ? window_.crypto
: window_.msCrypto;
var randomValuesStandard = function()
{
var buf = new Uint32Array(1);
crypto_.getRandomValues(buf);
return buf[0] >>> 0;
};
randomValuesStandard();
Module.getRandomValue = randomValuesStandard;
}
catch(e)
{
try
{
var crypto = require('crypto');
var randomValueNodeJS = function()
{
var buf = crypto['randomBytes'](4);
return (buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]) >>> 0;
};
randomValueNodeJS();
Module.getRandomValue = randomValueNodeJS;
}
catch(e)
{
throw 'No secure random number generator found';
}
}
}
});
#endif
}
uint32_t
randombytes_uniform(const uint32_t upper_bound)
{
uint32_t min;
uint32_t r;
#ifndef __EMSCRIPTEN__
randombytes_init_if_needed();
if(implementation->uniform != NULL)
{
return implementation->uniform(upper_bound);
}
#endif
if(upper_bound < 2)
{
return 0;
}
min = (1U + ~upper_bound) % upper_bound; /* = 2**32 mod upper_bound */
do
{
r = randombytes_random();
} while(r < min);
/* r is now clamped to a set whose size mod upper_bound == 0
* the worst case (2**31+1) requires ~ 2 attempts */
return r % upper_bound;
}
void
randombytes_buf(void *const buf, const size_t size)
{
randombytes_init_if_needed();
if(size > (size_t)0U)
{
implementation->buf(buf, size);
}
}
void
randombytes_buf_deterministic(void *const buf, const size_t size,
const unsigned char seed[randombytes_SEEDBYTES])
{
static const unsigned char nonce[crypto_stream_xchacha20_NONCEBYTES] = {
'L', 'i', 'b', 's', 'o', 'd', 'i', 'u', 'm', 'D', 'R', 'G'};
COMPILER_ASSERT(randombytes_SEEDBYTES == crypto_stream_xchacha20_KEYBYTES);
#if SIZE_MAX > 0x4000000000ULL
COMPILER_ASSERT(randombytes_BYTES_MAX <= 0x4000000000ULL);
if(size > 0x4000000000ULL)
{
sodium_misuse();
}
#endif
crypto_stream_xchacha20((unsigned char *)buf, (unsigned long long)size, nonce,
seed);
}
size_t
randombytes_seedbytes(void)
{
return randombytes_SEEDBYTES;
}
int
randombytes_close(void)
{
if(implementation != NULL && implementation->close != NULL)
{
return implementation->close();
}
return 0;
}
void
randombytes(unsigned char *const buf, const unsigned long long buf_len)
{
assert(buf_len <= SIZE_MAX);
randombytes_buf(buf, (size_t)buf_len);
}

@ -0,0 +1,600 @@
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdlib.h>
#include <string.h>
#if !defined(_MSC_VER) && !defined(__BORLANDC__)
#include <unistd.h>
#endif
#include <sys/types.h>
#ifndef _WIN32
#include <sys/stat.h>
#include <sys/time.h>
#endif
#ifdef __linux__
#ifdef __dietlibc__
#define _LINUX_SOURCE
#else
#include <sys/syscall.h>
#endif
#include <poll.h>
#endif
#ifdef HAVE_RDRAND
#pragma GCC target("rdrnd")
#include <immintrin.h>
#endif
#include <sodium/core.h>
#include <sodium/crypto_core_salsa20.h>
#include <sodium/crypto_stream_salsa20.h>
#include <sodium/private/common.h>
#include <sodium/randombytes.h>
#include <sodium/randombytes_salsa20_random.h>
#include <sodium/runtime.h>
#include <sodium/utils.h>
#ifdef _WIN32
#include <windows.h>
#include <sys/timeb.h>
#define RtlGenRandom SystemFunction036
#if defined(__cplusplus)
extern "C"
#endif
BOOLEAN NTAPI
RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength);
#pragma comment(lib, "advapi32.lib")
#ifdef __BORLANDC__
#define _ftime ftime
#define _timeb timeb
#endif
#endif
#define SALSA20_RANDOM_BLOCK_SIZE crypto_core_salsa20_OUTPUTBYTES
#if defined(__OpenBSD__) || defined(__CloudABI__)
#define HAVE_SAFE_ARC4RANDOM 1
#endif
#ifndef SSIZE_MAX
#define SSIZE_MAX (SIZE_MAX / 2 - 1)
#endif
#ifndef S_ISNAM
#ifdef __COMPCERT__
#define S_ISNAM(X) 1
#else
#define S_ISNAM(X) 0
#endif
#endif
#ifndef TLS
#ifdef _WIN32
#define TLS __declspec(thread)
#else
#define TLS
#endif
#endif
typedef struct Salsa20RandomGlobal_
{
int initialized;
int random_data_source_fd;
int getrandom_available;
int rdrand_available;
#ifdef HAVE_GETPID
pid_t pid;
#endif
} Salsa20RandomGlobal;
typedef struct Salsa20Random_
{
int initialized;
size_t rnd32_outleft;
unsigned char key[crypto_stream_salsa20_KEYBYTES];
unsigned char rnd32[16U * SALSA20_RANDOM_BLOCK_SIZE];
uint64_t nonce;
} Salsa20Random;
static Salsa20RandomGlobal global = {SODIUM_C99(.initialized =) 0,
SODIUM_C99(.random_data_source_fd =) - 1};
static TLS Salsa20Random stream = {SODIUM_C99(.initialized =) 0,
SODIUM_C99(.rnd32_outleft =)(size_t) 0U};
/*
* Get a high-resolution timestamp, as a uint64_t value
*/
#ifdef _WIN32
static uint64_t
sodium_hrtime(void)
{
struct _timeb tb;
#pragma warning(push)
#pragma warning(disable : 4996)
_ftime(&tb);
#pragma warning(pop)
return ((uint64_t)tb.time) * 1000000U + ((uint64_t)tb.millitm) * 1000U;
}
#else /* _WIN32 */
static uint64_t
sodium_hrtime(void)
{
struct timeval tv;
if(gettimeofday(&tv, NULL) != 0)
{
sodium_misuse(); /* LCOV_EXCL_LINE */
}
return ((uint64_t)tv.tv_sec) * 1000000U + (uint64_t)tv.tv_usec;
}
#endif
/*
* Initialize the entropy source
*/
#ifdef _WIN32
static void
randombytes_salsa20_random_init(void)
{
stream.nonce = sodium_hrtime();
assert(stream.nonce != (uint64_t)0U);
global.rdrand_available = sodium_runtime_has_rdrand();
}
#else /* _WIN32 */
static ssize_t
safe_read(const int fd, void *const buf_, size_t size)
{
unsigned char *buf = (unsigned char *)buf_;
ssize_t readnb;
assert(size > (size_t)0U);
assert(size <= SSIZE_MAX);
do
{
while((readnb = read(fd, buf, size)) < (ssize_t)0
&& (errno == EINTR || errno == EAGAIN))
; /* LCOV_EXCL_LINE */
if(readnb < (ssize_t)0)
{
return readnb; /* LCOV_EXCL_LINE */
}
if(readnb == (ssize_t)0)
{
break; /* LCOV_EXCL_LINE */
}
size -= (size_t)readnb;
buf += readnb;
} while(size > (ssize_t)0);
return (ssize_t)(buf - (unsigned char *)buf_);
}
#if defined(__linux__) && !defined(USE_BLOCKING_RANDOM) \
&& !defined(NO_BLOCKING_RANDOM_POLL)
static int
randombytes_block_on_dev_random(void)
{
struct pollfd pfd;
int fd;
int pret;
fd = open("/dev/random", O_RDONLY);
if(fd == -1)
{
return 0;
}
pfd.fd = fd;
pfd.events = POLLIN;
pfd.revents = 0;
do
{
pret = poll(&pfd, 1, -1);
} while(pret < 0 && (errno == EINTR || errno == EAGAIN));
if(pret != 1)
{
(void)close(fd);
errno = EIO;
return -1;
}
return close(fd);
}
#endif
#ifndef HAVE_SAFE_ARC4RANDOM
static int
randombytes_salsa20_random_random_dev_open(void)
{
/* LCOV_EXCL_START */
struct stat st;
static const char *devices[] = {
#ifndef USE_BLOCKING_RANDOM
"/dev/urandom",
#endif
"/dev/random", NULL};
const char **device = devices;
int fd;
#if defined(__linux__) && !defined(USE_BLOCKING_RANDOM) \
&& !defined(NO_BLOCKING_RANDOM_POLL)
if(randombytes_block_on_dev_random() != 0)
{
return -1;
}
#endif
do
{
fd = open(*device, O_RDONLY);
if(fd != -1)
{
if(fstat(fd, &st) == 0 && (S_ISNAM(st.st_mode) || S_ISCHR(st.st_mode)))
{
#if defined(F_SETFD) && defined(FD_CLOEXEC)
(void)fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
#endif
return fd;
}
(void)close(fd);
}
else if(errno == EINTR)
{
continue;
}
device++;
} while(*device != NULL);
errno = EIO;
return -1;
/* LCOV_EXCL_STOP */
}
#endif
#if defined(__dietlibc__) || (defined(SYS_getrandom) && defined(__NR_getrandom))
static int
_randombytes_linux_getrandom(void *const buf, const size_t size)
{
int readnb;
assert(size <= 256U);
do
{
#ifdef __dietlibc__
readnb = getrandom(buf, size, 0);
#else
readnb = syscall(SYS_getrandom, buf, (int)size, 0);
#endif
} while(readnb < 0 && (errno == EINTR || errno == EAGAIN));
return (readnb == (int)size) - 1;
}
static int
randombytes_linux_getrandom(void *const buf_, size_t size)
{
unsigned char *buf = (unsigned char *)buf_;
size_t chunk_size = 256U;
do
{
if(size < chunk_size)
{
chunk_size = size;
assert(chunk_size > (size_t)0U);
}
if(_randombytes_linux_getrandom(buf, chunk_size) != 0)
{
return -1;
}
size -= chunk_size;
buf += chunk_size;
} while(size > (size_t)0U);
return 0;
}
#endif
static void
randombytes_salsa20_random_init(void)
{
const int errno_save = errno;
stream.nonce = sodium_hrtime();
global.rdrand_available = sodium_runtime_has_rdrand();
assert(stream.nonce != (uint64_t)0U);
#ifdef HAVE_SAFE_ARC4RANDOM
errno = errno_save;
#else
#if defined(SYS_getrandom) && defined(__NR_getrandom)
{
unsigned char fodder[16];
if(randombytes_linux_getrandom(fodder, sizeof fodder) == 0)
{
global.getrandom_available = 1;
errno = errno_save;
return;
}
global.getrandom_available = 0;
}
#endif /* SYS_getrandom */
if((global.random_data_source_fd =
randombytes_salsa20_random_random_dev_open())
== -1)
{
sodium_misuse(); /* LCOV_EXCL_LINE */
}
errno = errno_save;
#endif /* HAVE_SAFE_ARC4RANDOM */
}
#endif /* _WIN32 */
/*
* (Re)seed the generator using the entropy source
*/
static void
randombytes_salsa20_random_stir(void)
{
unsigned char
m0[crypto_stream_salsa20_KEYBYTES + crypto_stream_salsa20_NONCEBYTES];
memset(stream.rnd32, 0, sizeof stream.rnd32);
stream.rnd32_outleft = (size_t)0U;
if(global.initialized == 0)
{
randombytes_salsa20_random_init();
global.initialized = 1;
}
#ifdef HAVE_GETPID
global.pid = getpid();
#endif
#ifndef _WIN32
#ifdef HAVE_SAFE_ARC4RANDOM
arc4random_buf(m0, sizeof m0);
#elif defined(SYS_getrandom) && defined(__NR_getrandom)
if(global.getrandom_available != 0)
{
if(randombytes_linux_getrandom(m0, sizeof m0) != 0)
{
sodium_misuse(); /* LCOV_EXCL_LINE */
}
}
else if(global.random_data_source_fd == -1
|| safe_read(global.random_data_source_fd, m0, sizeof m0)
!= (ssize_t)sizeof m0)
{
sodium_misuse(); /* LCOV_EXCL_LINE */
}
#else
if(global.random_data_source_fd == -1
|| safe_read(global.random_data_source_fd, m0, sizeof m0)
!= (ssize_t)sizeof m0)
{
sodium_misuse(); /* LCOV_EXCL_LINE */
}
#endif
#else /* _WIN32 */
if(!RtlGenRandom((PVOID)m0, (ULONG)sizeof m0))
{
sodium_misuse(); /* LCOV_EXCL_LINE */
}
#endif
crypto_stream_salsa20(stream.key, sizeof stream.key,
m0 + crypto_stream_salsa20_KEYBYTES, m0);
sodium_memzero(m0, sizeof m0);
stream.initialized = 1;
}
/*
* Reseed the generator if it hasn't been initialized yet
*/
static void
randombytes_salsa20_random_stir_if_needed(void)
{
#ifdef HAVE_GETPID
if(stream.initialized == 0)
{
randombytes_salsa20_random_stir();
}
else if(global.pid != getpid())
{
sodium_misuse(); /* LCOV_EXCL_LINE */
}
#else
if(stream.initialized == 0)
{
randombytes_salsa20_random_stir();
}
#endif
}
/*
* Close the stream, free global resources
*/
#ifdef _WIN32
static int
randombytes_salsa20_random_close(void)
{
int ret = -1;
if(global.initialized != 0)
{
global.initialized = 0;
ret = 0;
}
sodium_memzero(&stream, sizeof stream);
return ret;
}
#else
static int
randombytes_salsa20_random_close(void)
{
int ret = -1;
if(global.random_data_source_fd != -1
&& close(global.random_data_source_fd) == 0)
{
global.random_data_source_fd = -1;
global.initialized = 0;
#ifdef HAVE_GETPID
global.pid = (pid_t)0;
#endif
ret = 0;
}
#ifdef HAVE_SAFE_ARC4RANDOM
ret = 0;
#endif
#if defined(SYS_getrandom) && defined(__NR_getrandom)
if(global.getrandom_available != 0)
{
ret = 0;
}
#endif
sodium_memzero(&stream, sizeof stream);
return ret;
}
#endif
/*
* RDRAND is only used to mitigate prediction if a key is compromised
*/
static void
randombytes_salsa20_random_xorhwrand(void)
{
/* LCOV_EXCL_START */
#ifdef HAVE_RDRAND
unsigned int r;
if(global.rdrand_available == 0)
{
return;
}
(void)_rdrand32_step(&r);
*(uint32_t *)(void *)&stream.key[crypto_stream_salsa20_KEYBYTES - 4] ^=
(uint32_t)r;
#endif
/* LCOV_EXCL_STOP */
}
/*
* XOR the key with another same-length secret
*/
static inline void
randombytes_salsa20_random_xorkey(const unsigned char *const mix)
{
unsigned char *key = stream.key;
size_t i;
for(i = (size_t)0U; i < sizeof stream.key; i++)
{
key[i] ^= mix[i];
}
}
/*
* Put `size` random bytes into `buf` and overwrite the key
*/
static void
randombytes_salsa20_random_buf(void *const buf, const size_t size)
{
size_t i;
int ret;
randombytes_salsa20_random_stir_if_needed();
COMPILER_ASSERT(sizeof stream.nonce == crypto_stream_salsa20_NONCEBYTES);
#if defined(ULONG_LONG_MAX) && defined(SIZE_MAX)
#if SIZE_MAX > ULONG_LONG_MAX
/* coverity[result_independent_of_operands] */
assert(size <= ULONG_LONG_MAX);
#endif
#endif
ret = crypto_stream_salsa20((unsigned char *)buf, (unsigned long long)size,
(unsigned char *)&stream.nonce, stream.key);
assert(ret == 0);
for(i = 0U; i < sizeof size; i++)
{
stream.key[i] ^= ((const unsigned char *)(const void *)&size)[i];
}
randombytes_salsa20_random_xorhwrand();
stream.nonce++;
crypto_stream_salsa20_xor(stream.key, stream.key, sizeof stream.key,
(unsigned char *)&stream.nonce, stream.key);
}
/*
* Pop a 32-bit value from the random pool
*
* Overwrite the key after the pool gets refilled.
*/
static uint32_t
randombytes_salsa20_random(void)
{
uint32_t val;
int ret;
COMPILER_ASSERT(sizeof stream.rnd32 >= (sizeof stream.key) + (sizeof val));
COMPILER_ASSERT(((sizeof stream.rnd32) - (sizeof stream.key)) % sizeof val
== (size_t)0U);
if(stream.rnd32_outleft <= (size_t)0U)
{
randombytes_salsa20_random_stir_if_needed();
COMPILER_ASSERT(sizeof stream.nonce == crypto_stream_salsa20_NONCEBYTES);
ret = crypto_stream_salsa20((unsigned char *)stream.rnd32,
(unsigned long long)sizeof stream.rnd32,
(unsigned char *)&stream.nonce, stream.key);
assert(ret == 0);
stream.rnd32_outleft = (sizeof stream.rnd32) - (sizeof stream.key);
randombytes_salsa20_random_xorhwrand();
randombytes_salsa20_random_xorkey(&stream.rnd32[stream.rnd32_outleft]);
memset(&stream.rnd32[stream.rnd32_outleft], 0, sizeof stream.key);
stream.nonce++;
}
stream.rnd32_outleft -= sizeof val;
memcpy(&val, &stream.rnd32[stream.rnd32_outleft], sizeof val);
memset(&stream.rnd32[stream.rnd32_outleft], 0, sizeof val);
return val;
}
static const char *
randombytes_salsa20_implementation_name(void)
{
return "salsa20";
}
struct randombytes_implementation randombytes_salsa20_implementation = {
SODIUM_C99(.implementation_name =) randombytes_salsa20_implementation_name,
SODIUM_C99(.random =) randombytes_salsa20_random,
SODIUM_C99(.stir =) randombytes_salsa20_random_stir,
SODIUM_C99(.uniform =) NULL,
SODIUM_C99(.buf =) randombytes_salsa20_random_buf,
SODIUM_C99(.close =) randombytes_salsa20_random_close};

@ -0,0 +1,33 @@
#include <sodium/crypto_scalarmult.h>
const char *
crypto_scalarmult_primitive(void)
{
return crypto_scalarmult_PRIMITIVE;
}
int
crypto_scalarmult_base(unsigned char *q, const unsigned char *n)
{
return crypto_scalarmult_curve25519_base(q, n);
}
int
crypto_scalarmult(unsigned char *q, const unsigned char *n,
const unsigned char *p)
{
return crypto_scalarmult_curve25519(q, n, p);
}
size_t
crypto_scalarmult_bytes(void)
{
return crypto_scalarmult_BYTES;
}
size_t
crypto_scalarmult_scalarbytes(void)
{
return crypto_scalarmult_SCALARBYTES;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,40 @@
{
{ 25967493, -14356035, 29566456, 3660896, -12694345, 4014787, 27544626, -11754271, -6079156, 2047605 },
{ -12545711, 934262, -2722910, 3049990, -727428, 9406986, 12720692, 5043384, 19500929, -15469378 },
{ -8738181, 4489570, 9688441, -14785194, 10184609, -12363380, 29287919, 11864899, -24514362, -4438546 }
},
{
{ 15636291, -9688557, 24204773, -7912398, 616977, -16685262, 27787600, -14772189, 28944400, -1550024 },
{ 16568933, 4717097, -11556148, -1102322, 15682896, -11807043, 16354577, -11775962, 7689662, 11199574 },
{ 30464156, -5976125, -11779434, -15670865, 23220365, 15915852, 7512774, 10017326, -17749093, -9920357 }
},
{
{ 10861363, 11473154, 27284546, 1981175, -30064349, 12577861, 32867885, 14515107, -15438304, 10819380 },
{ 4708026, 6336745, 20377586, 9066809, -11272109, 6594696, -25653668, 12483688, -12668491, 5581306 },
{ 19563160, 16186464, -29386857, 4097519, 10237984, -4348115, 28542350, 13850243, -23678021, -15815942 }
},
{
{ 5153746, 9909285, 1723747, -2777874, 30523605, 5516873, 19480852, 5230134, -23952439, -15175766 },
{ -30269007, -3463509, 7665486, 10083793, 28475525, 1649722, 20654025, 16520125, 30598449, 7715701 },
{ 28881845, 14381568, 9657904, 3680757, -20181635, 7843316, -31400660, 1370708, 29794553, -1409300 }
},
{
{ -22518993, -6692182, 14201702, -8745502, -23510406, 8844726, 18474211, -1361450, -13062696, 13821877 },
{ -6455177, -7839871, 3374702, -4740862, -27098617, -10571707, 31655028, -7212327, 18853322, -14220951 },
{ 4566830, -12963868, -28974889, -12240689, -7602672, -2830569, -8514358, -10431137, 2207753, -3209784 }
},
{
{ -25154831, -4185821, 29681144, 7868801, -6854661, -9423865, -12437364, -663000, -31111463, -16132436 },
{ 25576264, -2703214, 7349804, -11814844, 16472782, 9300885, 3844789, 15725684, 171356, 6466918 },
{ 23103977, 13316479, 9739013, -16149481, 817875, -15038942, 8965339, -14088058, -30714912, 16193877 }
},
{
{ -33521811, 3180713, -2394130, 14003687, -16903474, -16270840, 17238398, 4729455, -18074513, 9256800 },
{ -25182317, -4174131, 32336398, 5036987, -21236817, 11360617, 22616405, 9761698, -19827198, 630305 },
{ -13720693, 2639453, -24237460, -7406481, 9494427, -5774029, -6554551, -15960994, -2449256, -14291300 }
},
{
{ -3151181, -5046075, 9282714, 6866145, -31907062, -863023, -18940575, 15033784, 25105118, -7894876 },
{ -24326370, 15950226, -31801215, -14592823, -11662737, -5090925, 1573892, -2625887, 2198790, -15804619 },
{ -3099351, 10324967, -2241613, 7453183, -5446979, -2735503, -13812022, -16236442, -32461234, -12290683 }
}

@ -0,0 +1,20 @@
/* 37095705934669439343138083508754565189542113879843219016388785533085940283555 */
static const fe25519 d = {
-10913610, 13857413, -15372611, 6949391, 114729, -8787816, -6275908, -3247719, -18696448, -12055116
};
/* 2 * d =
* 16295367250680780974490674513165176452449235426866156013048779062215315747161
*/
static const fe25519 d2 = {
-21827239, -5839606, -30745221, 13898782, 229458, 15978800, -12551817, -6495438, 29715968, 9444199 };
/* sqrt(-1) */
static const fe25519 sqrtm1 = {
-32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, 326686, 11406482
};
/* A = 486662 */
static const fe25519 curve25519_A = {
486662, 0, 0, 0, 0, 0, 0, 0, 0, 0
};

@ -0,0 +1,220 @@
/*
Ignores top bit of h.
*/
void
fe25519_frombytes(fe25519 h, const unsigned char *s)
{
int64_t h0 = load_4(s);
int64_t h1 = load_3(s + 4) << 6;
int64_t h2 = load_3(s + 7) << 5;
int64_t h3 = load_3(s + 10) << 3;
int64_t h4 = load_3(s + 13) << 2;
int64_t h5 = load_4(s + 16);
int64_t h6 = load_3(s + 20) << 7;
int64_t h7 = load_3(s + 23) << 5;
int64_t h8 = load_3(s + 26) << 4;
int64_t h9 = (load_3(s + 29) & 8388607) << 2;
int64_t carry0;
int64_t carry1;
int64_t carry2;
int64_t carry3;
int64_t carry4;
int64_t carry5;
int64_t carry6;
int64_t carry7;
int64_t carry8;
int64_t carry9;
carry9 = (h9 + (int64_t)(1L << 24)) >> 25;
h0 += carry9 * 19;
h9 -= carry9 * ((uint64_t) 1L << 25);
carry1 = (h1 + (int64_t)(1L << 24)) >> 25;
h2 += carry1;
h1 -= carry1 * ((uint64_t) 1L << 25);
carry3 = (h3 + (int64_t)(1L << 24)) >> 25;
h4 += carry3;
h3 -= carry3 * ((uint64_t) 1L << 25);
carry5 = (h5 + (int64_t)(1L << 24)) >> 25;
h6 += carry5;
h5 -= carry5 * ((uint64_t) 1L << 25);
carry7 = (h7 + (int64_t)(1L << 24)) >> 25;
h8 += carry7;
h7 -= carry7 * ((uint64_t) 1L << 25);
carry0 = (h0 + (int64_t)(1L << 25)) >> 26;
h1 += carry0;
h0 -= carry0 * ((uint64_t) 1L << 26);
carry2 = (h2 + (int64_t)(1L << 25)) >> 26;
h3 += carry2;
h2 -= carry2 * ((uint64_t) 1L << 26);
carry4 = (h4 + (int64_t)(1L << 25)) >> 26;
h5 += carry4;
h4 -= carry4 * ((uint64_t) 1L << 26);
carry6 = (h6 + (int64_t)(1L << 25)) >> 26;
h7 += carry6;
h6 -= carry6 * ((uint64_t) 1L << 26);
carry8 = (h8 + (int64_t)(1L << 25)) >> 26;
h9 += carry8;
h8 -= carry8 * ((uint64_t) 1L << 26);
h[0] = (int32_t) h0;
h[1] = (int32_t) h1;
h[2] = (int32_t) h2;
h[3] = (int32_t) h3;
h[4] = (int32_t) h4;
h[5] = (int32_t) h5;
h[6] = (int32_t) h6;
h[7] = (int32_t) h7;
h[8] = (int32_t) h8;
h[9] = (int32_t) h9;
}
/*
Preconditions:
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
Write p=2^255-19; q=floor(h/p).
Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
Proof:
Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
Then 0<y<1.
Write r=h-pq.
Have 0<=r<=p-1=2^255-20.
Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
Write x=r+19(2^-255)r+y.
Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
*/
static void
fe25519_reduce(fe25519 h, const fe25519 f)
{
int32_t h0 = f[0];
int32_t h1 = f[1];
int32_t h2 = f[2];
int32_t h3 = f[3];
int32_t h4 = f[4];
int32_t h5 = f[5];
int32_t h6 = f[6];
int32_t h7 = f[7];
int32_t h8 = f[8];
int32_t h9 = f[9];
int32_t q;
int32_t carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9;
q = (19 * h9 + ((uint32_t) 1L << 24)) >> 25;
q = (h0 + q) >> 26;
q = (h1 + q) >> 25;
q = (h2 + q) >> 26;
q = (h3 + q) >> 25;
q = (h4 + q) >> 26;
q = (h5 + q) >> 25;
q = (h6 + q) >> 26;
q = (h7 + q) >> 25;
q = (h8 + q) >> 26;
q = (h9 + q) >> 25;
/* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
h0 += 19 * q;
/* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
carry0 = h0 >> 26;
h1 += carry0;
h0 -= carry0 * ((uint32_t) 1L << 26);
carry1 = h1 >> 25;
h2 += carry1;
h1 -= carry1 * ((uint32_t) 1L << 25);
carry2 = h2 >> 26;
h3 += carry2;
h2 -= carry2 * ((uint32_t) 1L << 26);
carry3 = h3 >> 25;
h4 += carry3;
h3 -= carry3 * ((uint32_t) 1L << 25);
carry4 = h4 >> 26;
h5 += carry4;
h4 -= carry4 * ((uint32_t) 1L << 26);
carry5 = h5 >> 25;
h6 += carry5;
h5 -= carry5 * ((uint32_t) 1L << 25);
carry6 = h6 >> 26;
h7 += carry6;
h6 -= carry6 * ((uint32_t) 1L << 26);
carry7 = h7 >> 25;
h8 += carry7;
h7 -= carry7 * ((uint32_t) 1L << 25);
carry8 = h8 >> 26;
h9 += carry8;
h8 -= carry8 * ((uint32_t) 1L << 26);
carry9 = h9 >> 25;
h9 -= carry9 * ((uint32_t) 1L << 25);
h[0] = h0;
h[1] = h1;
h[2] = h2;
h[3] = h3;
h[4] = h4;
h[5] = h5;
h[6] = h6;
h[7] = h7;
h[8] = h8;
h[9] = h9;
}
/*
Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
Have h0+...+2^230 h9 between 0 and 2^255-1;
evidently 2^255 h10-2^255 q = 0.
Goal: Output h0+...+2^230 h9.
*/
void
fe25519_tobytes(unsigned char *s, const fe25519 h)
{
fe25519 t;
fe25519_reduce(t, h);
s[0] = t[0] >> 0;
s[1] = t[0] >> 8;
s[2] = t[0] >> 16;
s[3] = (t[0] >> 24) | (t[1] * ((uint32_t) 1 << 2));
s[4] = t[1] >> 6;
s[5] = t[1] >> 14;
s[6] = (t[1] >> 22) | (t[2] * ((uint32_t) 1 << 3));
s[7] = t[2] >> 5;
s[8] = t[2] >> 13;
s[9] = (t[2] >> 21) | (t[3] * ((uint32_t) 1 << 5));
s[10] = t[3] >> 3;
s[11] = t[3] >> 11;
s[12] = (t[3] >> 19) | (t[4] * ((uint32_t) 1 << 6));
s[13] = t[4] >> 2;
s[14] = t[4] >> 10;
s[15] = t[4] >> 18;
s[16] = t[5] >> 0;
s[17] = t[5] >> 8;
s[18] = t[5] >> 16;
s[19] = (t[5] >> 24) | (t[6] * ((uint32_t) 1 << 1));
s[20] = t[6] >> 7;
s[21] = t[6] >> 15;
s[22] = (t[6] >> 23) | (t[7] * ((uint32_t) 1 << 3));
s[23] = t[7] >> 5;
s[24] = t[7] >> 13;
s[25] = (t[7] >> 21) | (t[8] * ((uint32_t) 1 << 4));
s[26] = t[8] >> 4;
s[27] = t[8] >> 12;
s[28] = (t[8] >> 20) | (t[9] * ((uint32_t) 1 << 6));
s[29] = t[9] >> 2;
s[30] = t[9] >> 10;
s[31] = t[9] >> 18;
}

@ -0,0 +1,188 @@
#include <stddef.h>
#include <stdint.h>
#include "../scalarmult_curve25519.h"
#include <sodium/export.h>
#include <sodium/private/ed25519_ref10.h>
#include <sodium/utils.h>
#include "x25519_ref10.h"
/*
* Reject small order points early to mitigate the implications of
* unexpected optimizations that would affect the ref10 code.
* See https://eprint.iacr.org/2017/806.pdf for reference.
*/
static int
has_small_order(const unsigned char s[32])
{
CRYPTO_ALIGN(16)
static const unsigned char blacklist[][32] = {
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, 0x16, 0x56, 0xe3,
0xfa, 0xf1, 0x9f, 0xc4, 0x6a, 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32,
0xb1, 0xfd, 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00},
{0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, 0xb1, 0xd0, 0xb1,
0x55, 0x9c, 0x83, 0xef, 0x5b, 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c,
0x8e, 0x86, 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57},
{0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f},
{0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f},
{0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f},
{0xcd, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, 0x16, 0x56, 0xe3,
0xfa, 0xf1, 0x9f, 0xc4, 0x6a, 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32,
0xb1, 0xfd, 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80},
{0x4c, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, 0xb1, 0xd0, 0xb1,
0x55, 0x9c, 0x83, 0xef, 0x5b, 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c,
0x8e, 0x86, 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7},
{0xd9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
{0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
{0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
unsigned char c[12] = {0};
unsigned int k;
size_t i, j;
COMPILER_ASSERT(12 == sizeof blacklist / sizeof blacklist[0]);
for(j = 0; j < 32; j++)
{
for(i = 0; i < sizeof blacklist / sizeof blacklist[0]; i++)
{
c[i] |= s[j] ^ blacklist[i][j];
}
}
k = 0;
for(i = 0; i < sizeof blacklist / sizeof blacklist[0]; i++)
{
k |= (c[i] - 1);
}
return (int)((k >> 8) & 1);
}
static int
crypto_scalarmult_curve25519_ref10(unsigned char *q, const unsigned char *n,
const unsigned char *p)
{
unsigned char *t = q;
unsigned int i;
fe25519 x1;
fe25519 x2;
fe25519 z2;
fe25519 x3;
fe25519 z3;
fe25519 tmp0;
fe25519 tmp1;
int pos;
unsigned int swap;
unsigned int b;
if(has_small_order(p))
{
return -1;
}
for(i = 0; i < 32; i++)
{
t[i] = n[i];
}
t[0] &= 248;
t[31] &= 127;
t[31] |= 64;
fe25519_frombytes(x1, p);
fe25519_1(x2);
fe25519_0(z2);
fe25519_copy(x3, x1);
fe25519_1(z3);
swap = 0;
for(pos = 254; pos >= 0; --pos)
{
b = t[pos / 8] >> (pos & 7);
b &= 1;
swap ^= b;
fe25519_cswap(x2, x3, swap);
fe25519_cswap(z2, z3, swap);
swap = b;
fe25519_sub(tmp0, x3, z3);
fe25519_sub(tmp1, x2, z2);
fe25519_add(x2, x2, z2);
fe25519_add(z2, x3, z3);
fe25519_mul(z3, tmp0, x2);
fe25519_mul(z2, z2, tmp1);
fe25519_sq(tmp0, tmp1);
fe25519_sq(tmp1, x2);
fe25519_add(x3, z3, z2);
fe25519_sub(z2, z3, z2);
fe25519_mul(x2, tmp1, tmp0);
fe25519_sub(tmp1, tmp1, tmp0);
fe25519_sq(z2, z2);
fe25519_scalar_product(z3, tmp1, 121666);
fe25519_sq(x3, x3);
fe25519_add(tmp0, tmp0, z3);
fe25519_mul(z3, x1, z2);
fe25519_mul(z2, tmp1, tmp0);
}
fe25519_cswap(x2, x3, swap);
fe25519_cswap(z2, z3, swap);
fe25519_invert(z2, z2);
fe25519_mul(x2, x2, z2);
fe25519_tobytes(q, x2);
return 0;
}
static void
edwards_to_montgomery(fe25519 montgomeryX, const fe25519 edwardsY,
const fe25519 edwardsZ)
{
fe25519 tempX;
fe25519 tempZ;
fe25519_add(tempX, edwardsZ, edwardsY);
fe25519_sub(tempZ, edwardsZ, edwardsY);
fe25519_invert(tempZ, tempZ);
fe25519_mul(montgomeryX, tempX, tempZ);
}
static int
crypto_scalarmult_curve25519_ref10_base(unsigned char *q,
const unsigned char *n)
{
unsigned char *t = q;
ge25519_p3 A;
fe25519 pk;
unsigned int i;
for(i = 0; i < 32; i++)
{
t[i] = n[i];
}
t[0] &= 248;
t[31] &= 127;
t[31] |= 64;
ge25519_scalarmult_base(&A, t);
edwards_to_montgomery(pk, A.Y, A.Z);
fe25519_tobytes(q, pk);
return 0;
}
struct crypto_scalarmult_curve25519_implementation
crypto_scalarmult_curve25519_ref10_implementation = {
SODIUM_C99(.mult =) crypto_scalarmult_curve25519_ref10,
SODIUM_C99(.mult_base =) crypto_scalarmult_curve25519_ref10_base};

@ -0,0 +1,10 @@
#ifndef x25519_ref10_H
#define x25519_ref10_H
#include <sodium/crypto_scalarmult_curve25519.h>
#include "../scalarmult_curve25519.h"
extern struct crypto_scalarmult_curve25519_implementation
crypto_scalarmult_curve25519_ref10_implementation;
#endif

@ -0,0 +1,25 @@
#ifdef IN_SANDY2X
/*
REDMASK51 is from amd64-51/consts.s.
*/
#include "consts_namespace.h"
.data
.p2align 4
v0_0: .quad 0, 0
v1_0: .quad 1, 0
v2_1: .quad 2, 1
v9_0: .quad 9, 0
v9_9: .quad 9, 9
v19_19: .quad 19, 19
v38_1: .quad 38, 1
v38_38: .quad 38, 38
v121666_121666: .quad 121666, 121666
m25: .quad 33554431, 33554431
m26: .quad 67108863, 67108863
subc0: .quad 0x07FFFFDA, 0x03FFFFFE
subc2: .quad 0x07FFFFFE, 0x03FFFFFE
REDMASK51: .quad 0x0007FFFFFFFFFFFF
#endif

@ -0,0 +1,20 @@
#ifndef consts_namespace_H
#define consts_namespace_H
#define v0_0 crypto_scalarmult_curve25519_sandy2x_v0_0
#define v1_0 crypto_scalarmult_curve25519_sandy2x_v1_0
#define v2_1 crypto_scalarmult_curve25519_sandy2x_v2_1
#define v9_0 crypto_scalarmult_curve25519_sandy2x_v9_0
#define v9_9 crypto_scalarmult_curve25519_sandy2x_v9_9
#define v19_19 crypto_scalarmult_curve25519_sandy2x_v19_19
#define v38_1 crypto_scalarmult_curve25519_sandy2x_v38_1
#define v38_38 crypto_scalarmult_curve25519_sandy2x_v38_38
#define v121666_121666 crypto_scalarmult_curve25519_sandy2x_v121666_121666
#define m25 crypto_scalarmult_curve25519_sandy2x_m25
#define m26 crypto_scalarmult_curve25519_sandy2x_m26
#define subc0 crypto_scalarmult_curve25519_sandy2x_subc0
#define subc2 crypto_scalarmult_curve25519_sandy2x_subc2
#define REDMASK51 crypto_scalarmult_curve25519_sandy2x_REDMASK51
#endif /* ifndef consts_namespace_H */

@ -0,0 +1,114 @@
/*
This file is adapted from ref10/scalarmult.c:
The code for Mongomery ladder is replace by the ladder assembly function;
Inversion is done in the same way as amd64-51/.
(fe is first converted into fe51 after Mongomery ladder)
*/
#include <stddef.h>
#ifdef HAVE_AVX_ASM
#include "utils.h"
#include "curve25519_sandy2x.h"
#include "../scalarmult_curve25519.h"
#include "fe.h"
#include "fe51.h"
#include "ladder.h"
#include "ladder_base.h"
#define x1 var[0]
#define x2 var[1]
#define z2 var[2]
static int
crypto_scalarmult_curve25519_sandy2x(unsigned char *q, const unsigned char *n,
const unsigned char *p)
{
unsigned char *t = q;
fe var[3];
fe51 x_51;
fe51 z_51;
unsigned int i;
for (i = 0; i < 32; i++) {
t[i] = n[i];
}
t[0] &= 248;
t[31] &= 127;
t[31] |= 64;
fe_frombytes(x1, p);
ladder(var, t);
z_51.v[0] = (z2[1] << 26) + z2[0];
z_51.v[1] = (z2[3] << 26) + z2[2];
z_51.v[2] = (z2[5] << 26) + z2[4];
z_51.v[3] = (z2[7] << 26) + z2[6];
z_51.v[4] = (z2[9] << 26) + z2[8];
x_51.v[0] = (x2[1] << 26) + x2[0];
x_51.v[1] = (x2[3] << 26) + x2[2];
x_51.v[2] = (x2[5] << 26) + x2[4];
x_51.v[3] = (x2[7] << 26) + x2[6];
x_51.v[4] = (x2[9] << 26) + x2[8];
fe51_invert(&z_51, &z_51);
fe51_mul(&x_51, &x_51, &z_51);
fe51_pack(q, &x_51);
return 0;
}
#undef x2
#undef z2
#define x2 var[0]
#define z2 var[1]
static int
crypto_scalarmult_curve25519_sandy2x_base(unsigned char *q,
const unsigned char *n)
{
unsigned char *t = q;
fe var[3];
fe51 x_51;
fe51 z_51;
unsigned int i;
for (i = 0;i < 32; i++) {
t[i] = n[i];
}
t[0] &= 248;
t[31] &= 127;
t[31] |= 64;
ladder_base(var, t);
z_51.v[0] = (z2[1] << 26) + z2[0];
z_51.v[1] = (z2[3] << 26) + z2[2];
z_51.v[2] = (z2[5] << 26) + z2[4];
z_51.v[3] = (z2[7] << 26) + z2[6];
z_51.v[4] = (z2[9] << 26) + z2[8];
x_51.v[0] = (x2[1] << 26) + x2[0];
x_51.v[1] = (x2[3] << 26) + x2[2];
x_51.v[2] = (x2[5] << 26) + x2[4];
x_51.v[3] = (x2[7] << 26) + x2[6];
x_51.v[4] = (x2[9] << 26) + x2[8];
fe51_invert(&z_51, &z_51);
fe51_mul(&x_51, &x_51, &z_51);
fe51_pack(q, &x_51);
return 0;
}
struct crypto_scalarmult_curve25519_implementation
crypto_scalarmult_curve25519_sandy2x_implementation = {
SODIUM_C99(.mult = ) crypto_scalarmult_curve25519_sandy2x,
SODIUM_C99(.mult_base = ) crypto_scalarmult_curve25519_sandy2x_base
};
#endif

@ -0,0 +1,9 @@
#ifndef curve25519_sandy2x_H
#define curve25519_sandy2x_H
#include <sodium/crypto_scalarmult_curve25519.h>
extern struct crypto_scalarmult_curve25519_implementation
crypto_scalarmult_curve25519_sandy2x_implementation;
#endif

@ -0,0 +1,26 @@
/*
This file is adapted from ref10/fe.h:
All the redundant functions are removed.
*/
#ifndef fe_H
#define fe_H
#include <stdint.h>
#include <stdlib.h>
typedef uint64_t fe[10];
/*
fe means field element.
Here the field is \Z/(2^255-19).
An element t, entries t[0]...t[9], represents the integer
t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
Bounds on each t[i] vary depending on context.
*/
#define fe_frombytes crypto_scalarmult_curve25519_sandy2x_fe_frombytes
extern void fe_frombytes(fe, const unsigned char *);
#endif

@ -0,0 +1,35 @@
/*
This file is adapted from amd64-51/fe25519.h:
'fe25519' is renamed as 'fe51';
All the redundant functions are removed;
New function fe51_nsquare is introduced.
*/
#ifndef fe51_H
#define fe51_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <stdlib.h>
#include "fe51_namespace.h"
typedef struct
{
uint64_t v[5];
}
fe51;
extern void fe51_pack(unsigned char *, const fe51 *);
extern void fe51_mul(fe51 *, const fe51 *, const fe51 *);
extern void fe51_nsquare(fe51 *, const fe51 *, int);
extern void fe51_invert(fe51 *, const fe51 *);
#ifdef __cplusplus
}
#endif
#endif

@ -0,0 +1,58 @@
/*
This file is adapted from amd64-51/fe25519_invert.c:
Loops of squares are replaced by nsquares for better performance.
*/
#include "fe51.h"
#ifdef HAVE_AVX_ASM
#define fe51_square(x, y) fe51_nsquare(x, y, 1)
void
fe51_invert(fe51 *r, const fe51 *x)
{
fe51 z2;
fe51 z9;
fe51 z11;
fe51 z2_5_0;
fe51 z2_10_0;
fe51 z2_20_0;
fe51 z2_50_0;
fe51 z2_100_0;
fe51 t;
/* 2 */ fe51_square(&z2,x);
/* 4 */ fe51_square(&t,&z2);
/* 8 */ fe51_square(&t,&t);
/* 9 */ fe51_mul(&z9,&t,x);
/* 11 */ fe51_mul(&z11,&z9,&z2);
/* 22 */ fe51_square(&t,&z11);
/* 2^5 - 2^0 = 31 */ fe51_mul(&z2_5_0,&t,&z9);
/* 2^10 - 2^5 */ fe51_nsquare(&t,&z2_5_0, 5);
/* 2^10 - 2^0 */ fe51_mul(&z2_10_0,&t,&z2_5_0);
/* 2^20 - 2^10 */ fe51_nsquare(&t,&z2_10_0, 10);
/* 2^20 - 2^0 */ fe51_mul(&z2_20_0,&t,&z2_10_0);
/* 2^40 - 2^20 */ fe51_nsquare(&t,&z2_20_0, 20);
/* 2^40 - 2^0 */ fe51_mul(&t,&t,&z2_20_0);
/* 2^50 - 2^10 */ fe51_nsquare(&t,&t,10);
/* 2^50 - 2^0 */ fe51_mul(&z2_50_0,&t,&z2_10_0);
/* 2^100 - 2^50 */ fe51_nsquare(&t,&z2_50_0, 50);
/* 2^100 - 2^0 */ fe51_mul(&z2_100_0,&t,&z2_50_0);
/* 2^200 - 2^100 */ fe51_nsquare(&t,&z2_100_0, 100);
/* 2^200 - 2^0 */ fe51_mul(&t,&t,&z2_100_0);
/* 2^250 - 2^50 */ fe51_nsquare(&t,&t, 50);
/* 2^250 - 2^0 */ fe51_mul(&t,&t,&z2_50_0);
/* 2^255 - 2^5 */ fe51_nsquare(&t,&t,5);
/* 2^255 - 21 */ fe51_mul(r,&t,&z11);
}
#endif

@ -0,0 +1,197 @@
#ifdef IN_SANDY2X
/*
This file is basically amd64-51/fe25519_mul.s.
*/
#include "fe51_namespace.h"
#include "consts_namespace.h"
.text
.p2align 5
#ifdef ASM_HIDE_SYMBOL
ASM_HIDE_SYMBOL fe51_mul
ASM_HIDE_SYMBOL _fe51_mul
#endif
.globl fe51_mul
.globl _fe51_mul
#ifdef __ELF__
.type fe51_mul, @function
.type _fe51_mul, @function
#endif
fe51_mul:
_fe51_mul:
mov %rsp,%r11
and $31,%r11
add $96,%r11
sub %r11,%rsp
movq %r11,0(%rsp)
movq %r12,8(%rsp)
movq %r13,16(%rsp)
movq %r14,24(%rsp)
movq %r15,32(%rsp)
movq %rbx,40(%rsp)
movq %rbp,48(%rsp)
movq %rdi,56(%rsp)
mov %rdx,%rcx
movq 24(%rsi),%rdx
imulq $19,%rdx,%rax
movq %rax,64(%rsp)
mulq 16(%rcx)
mov %rax,%r8
mov %rdx,%r9
movq 32(%rsi),%rdx
imulq $19,%rdx,%rax
movq %rax,72(%rsp)
mulq 8(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 0(%rsi),%rax
mulq 0(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 0(%rsi),%rax
mulq 8(%rcx)
mov %rax,%r10
mov %rdx,%r11
movq 0(%rsi),%rax
mulq 16(%rcx)
mov %rax,%r12
mov %rdx,%r13
movq 0(%rsi),%rax
mulq 24(%rcx)
mov %rax,%r14
mov %rdx,%r15
movq 0(%rsi),%rax
mulq 32(%rcx)
mov %rax,%rbx
mov %rdx,%rbp
movq 8(%rsi),%rax
mulq 0(%rcx)
add %rax,%r10
adc %rdx,%r11
movq 8(%rsi),%rax
mulq 8(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 8(%rsi),%rax
mulq 16(%rcx)
add %rax,%r14
adc %rdx,%r15
movq 8(%rsi),%rax
mulq 24(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq 8(%rsi),%rdx
imulq $19,%rdx,%rax
mulq 32(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 16(%rsi),%rax
mulq 0(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 16(%rsi),%rax
mulq 8(%rcx)
add %rax,%r14
adc %rdx,%r15
movq 16(%rsi),%rax
mulq 16(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq 16(%rsi),%rdx
imulq $19,%rdx,%rax
mulq 24(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 16(%rsi),%rdx
imulq $19,%rdx,%rax
mulq 32(%rcx)
add %rax,%r10
adc %rdx,%r11
movq 24(%rsi),%rax
mulq 0(%rcx)
add %rax,%r14
adc %rdx,%r15
movq 24(%rsi),%rax
mulq 8(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq 64(%rsp),%rax
mulq 24(%rcx)
add %rax,%r10
adc %rdx,%r11
movq 64(%rsp),%rax
mulq 32(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 32(%rsi),%rax
mulq 0(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq 72(%rsp),%rax
mulq 16(%rcx)
add %rax,%r10
adc %rdx,%r11
movq 72(%rsp),%rax
mulq 24(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 72(%rsp),%rax
mulq 32(%rcx)
add %rax,%r14
adc %rdx,%r15
movq REDMASK51(%rip),%rsi
shld $13,%r8,%r9
and %rsi,%r8
shld $13,%r10,%r11
and %rsi,%r10
add %r9,%r10
shld $13,%r12,%r13
and %rsi,%r12
add %r11,%r12
shld $13,%r14,%r15
and %rsi,%r14
add %r13,%r14
shld $13,%rbx,%rbp
and %rsi,%rbx
add %r15,%rbx
imulq $19,%rbp,%rdx
add %rdx,%r8
mov %r8,%rdx
shr $51,%rdx
add %r10,%rdx
mov %rdx,%rcx
shr $51,%rdx
and %rsi,%r8
add %r12,%rdx
mov %rdx,%r9
shr $51,%rdx
and %rsi,%rcx
add %r14,%rdx
mov %rdx,%rax
shr $51,%rdx
and %rsi,%r9
add %rbx,%rdx
mov %rdx,%r10
shr $51,%rdx
and %rsi,%rax
imulq $19,%rdx,%rdx
add %rdx,%r8
and %rsi,%r10
movq %r8,0(%rdi)
movq %rcx,8(%rdi)
movq %r9,16(%rdi)
movq %rax,24(%rdi)
movq %r10,32(%rdi)
movq 0(%rsp),%r11
movq 8(%rsp),%r12
movq 16(%rsp),%r13
movq 24(%rsp),%r14
movq 32(%rsp),%r15
movq 40(%rsp),%rbx
movq 48(%rsp),%rbp
add %r11,%rsp
mov %rdi,%rax
mov %rsi,%rdx
ret
#endif

@ -0,0 +1,16 @@
#ifndef fe51_namespace_H
#define fe51_namespace_H
#define fe51 crypto_scalarmult_curve25519_sandy2x_fe51
#define _fe51 _crypto_scalarmult_curve25519_sandy2x_fe51
#define fe51_pack crypto_scalarmult_curve25519_sandy2x_fe51_pack
#define _fe51_pack _crypto_scalarmult_curve25519_sandy2x_fe51_pack
#define fe51_mul crypto_scalarmult_curve25519_sandy2x_fe51_mul
#define _fe51_mul _crypto_scalarmult_curve25519_sandy2x_fe51_mul
#define fe51_nsquare crypto_scalarmult_curve25519_sandy2x_fe51_nsquare
#define _fe51_nsquare _crypto_scalarmult_curve25519_sandy2x_fe51_nsquare
#define fe51_invert crypto_scalarmult_curve25519_sandy2x_fe51_invert
#endif /* ifndef fe51_namespace_H */

@ -0,0 +1,172 @@
#ifdef IN_SANDY2X
/*
This file is adapted from amd64-51/fe25519_square.s:
Adding loop to perform n squares.
*/
#include "fe51_namespace.h"
#include "consts_namespace.h"
.p2align 5
#ifdef ASM_HIDE_SYMBOL
ASM_HIDE_SYMBOL fe51_nsquare
ASM_HIDE_SYMBOL _fe51_nsquare
#endif
.globl fe51_nsquare
.globl _fe51_nsquare
#ifdef __ELF__
.type fe51_nsquare, @function
.type _fe51_nsquare, @function
#endif
fe51_nsquare:
_fe51_nsquare:
mov %rsp,%r11
and $31,%r11
add $64,%r11
sub %r11,%rsp
movq %r11,0(%rsp)
movq %r12,8(%rsp)
movq %r13,16(%rsp)
movq %r14,24(%rsp)
movq %r15,32(%rsp)
movq %rbx,40(%rsp)
movq %rbp,48(%rsp)
movq 0(%rsi),%rcx
movq 8(%rsi),%r8
movq 16(%rsi),%r9
movq 24(%rsi),%rax
movq 32(%rsi),%rsi
movq %r9,16(%rdi)
movq %rax,24(%rdi)
movq %rsi,32(%rdi)
mov %rdx,%rsi
.p2align 4
._loop:
sub $1,%rsi
mov %rcx,%rax
mul %rcx
add %rcx,%rcx
mov %rax,%r9
mov %rdx,%r10
mov %rcx,%rax
mul %r8
mov %rax,%r11
mov %rdx,%r12
mov %rcx,%rax
mulq 16(%rdi)
mov %rax,%r13
mov %rdx,%r14
mov %rcx,%rax
mulq 24(%rdi)
mov %rax,%r15
mov %rdx,%rbx
mov %rcx,%rax
mulq 32(%rdi)
mov %rax,%rcx
mov %rdx,%rbp
mov %r8,%rax
mul %r8
add %r8,%r8
add %rax,%r13
adc %rdx,%r14
mov %r8,%rax
mulq 16(%rdi)
add %rax,%r15
adc %rdx,%rbx
mov %r8,%rax
imulq $19, %r8,%r8
mulq 24(%rdi)
add %rax,%rcx
adc %rdx,%rbp
mov %r8,%rax
mulq 32(%rdi)
add %rax,%r9
adc %rdx,%r10
movq 16(%rdi),%rax
mulq 16(%rdi)
add %rax,%rcx
adc %rdx,%rbp
shld $13,%rcx,%rbp
movq 16(%rdi),%rax
imulq $38, %rax,%rax
mulq 24(%rdi)
add %rax,%r9
adc %rdx,%r10
shld $13,%r9,%r10
movq 16(%rdi),%rax
imulq $38, %rax,%rax
mulq 32(%rdi)
add %rax,%r11
adc %rdx,%r12
movq 24(%rdi),%rax
imulq $19, %rax,%rax
mulq 24(%rdi)
add %rax,%r11
adc %rdx,%r12
shld $13,%r11,%r12
movq 24(%rdi),%rax
imulq $38, %rax,%rax
mulq 32(%rdi)
add %rax,%r13
adc %rdx,%r14
shld $13,%r13,%r14
movq 32(%rdi),%rax
imulq $19, %rax,%rax
mulq 32(%rdi)
add %rax,%r15
adc %rdx,%rbx
shld $13,%r15,%rbx
movq REDMASK51(%rip),%rdx
and %rdx,%rcx
add %rbx,%rcx
and %rdx,%r9
and %rdx,%r11
add %r10,%r11
and %rdx,%r13
add %r12,%r13
and %rdx,%r15
add %r14,%r15
imulq $19, %rbp,%rbp
lea (%r9,%rbp),%r9
mov %r9,%rax
shr $51,%r9
add %r11,%r9
and %rdx,%rax
mov %r9,%r8
shr $51,%r9
add %r13,%r9
and %rdx,%r8
mov %r9,%r10
shr $51,%r9
add %r15,%r9
and %rdx,%r10
movq %r10,16(%rdi)
mov %r9,%r10
shr $51,%r9
add %rcx,%r9
and %rdx,%r10
movq %r10,24(%rdi)
mov %r9,%r10
shr $51,%r9
imulq $19, %r9,%r9
lea (%rax,%r9),%rcx
and %rdx,%r10
movq %r10,32(%rdi)
cmp $0,%rsi
jne ._loop
movq %rcx,0(%rdi)
movq %r8,8(%rdi)
movq 0(%rsp),%r11
movq 8(%rsp),%r12
movq 16(%rsp),%r13
movq 24(%rsp),%r14
movq 32(%rsp),%r15
movq 40(%rsp),%rbx
movq 48(%rsp),%rbp
add %r11,%rsp
ret
#endif

@ -0,0 +1,226 @@
#ifdef IN_SANDY2X
/*
This file is the result of merging
amd64-51/fe25519_pack.c and amd64-51/fe25519_freeze.s.
*/
#include "fe51_namespace.h"
#include "consts_namespace.h"
.p2align 5
#ifdef ASM_HIDE_SYMBOL
ASM_HIDE_SYMBOL fe51_pack
ASM_HIDE_SYMBOL _fe51_pack
#endif
.globl fe51_pack
.globl _fe51_pack
#ifdef __ELF__
.type fe51_pack, @function
.type _fe51_pack, @function
#endif
fe51_pack:
_fe51_pack:
mov %rsp,%r11
and $31,%r11
add $32,%r11
sub %r11,%rsp
movq %r11,0(%rsp)
movq %r12,8(%rsp)
movq 0(%rsi),%rdx
movq 8(%rsi),%rcx
movq 16(%rsi),%r8
movq 24(%rsi),%r9
movq 32(%rsi),%rsi
movq REDMASK51(%rip),%rax
lea -18(%rax),%r10
mov $3,%r11
.p2align 4
._reduceloop:
mov %rdx,%r12
shr $51,%r12
and %rax,%rdx
add %r12,%rcx
mov %rcx,%r12
shr $51,%r12
and %rax,%rcx
add %r12,%r8
mov %r8,%r12
shr $51,%r12
and %rax,%r8
add %r12,%r9
mov %r9,%r12
shr $51,%r12
and %rax,%r9
add %r12,%rsi
mov %rsi,%r12
shr $51,%r12
and %rax,%rsi
imulq $19, %r12,%r12
add %r12,%rdx
sub $1,%r11
ja ._reduceloop
mov $1,%r12
cmp %r10,%rdx
cmovl %r11,%r12
cmp %rax,%rcx
cmovne %r11,%r12
cmp %rax,%r8
cmovne %r11,%r12
cmp %rax,%r9
cmovne %r11,%r12
cmp %rax,%rsi
cmovne %r11,%r12
neg %r12
and %r12,%rax
and %r12,%r10
sub %r10,%rdx
sub %rax,%rcx
sub %rax,%r8
sub %rax,%r9
sub %rax,%rsi
mov %rdx,%rax
and $0xFF,%eax
movb %al,0(%rdi)
mov %rdx,%rax
shr $8,%rax
and $0xFF,%eax
movb %al,1(%rdi)
mov %rdx,%rax
shr $16,%rax
and $0xFF,%eax
movb %al,2(%rdi)
mov %rdx,%rax
shr $24,%rax
and $0xFF,%eax
movb %al,3(%rdi)
mov %rdx,%rax
shr $32,%rax
and $0xFF,%eax
movb %al,4(%rdi)
mov %rdx,%rax
shr $40,%rax
and $0xFF,%eax
movb %al,5(%rdi)
mov %rdx,%rdx
shr $48,%rdx
mov %rcx,%rax
shl $3,%rax
and $0xF8,%eax
xor %rdx,%rax
movb %al,6(%rdi)
mov %rcx,%rdx
shr $5,%rdx
and $0xFF,%edx
movb %dl,7(%rdi)
mov %rcx,%rdx
shr $13,%rdx
and $0xFF,%edx
movb %dl,8(%rdi)
mov %rcx,%rdx
shr $21,%rdx
and $0xFF,%edx
movb %dl,9(%rdi)
mov %rcx,%rdx
shr $29,%rdx
and $0xFF,%edx
movb %dl,10(%rdi)
mov %rcx,%rdx
shr $37,%rdx
and $0xFF,%edx
movb %dl,11(%rdi)
mov %rcx,%rdx
shr $45,%rdx
mov %r8,%rcx
shl $6,%rcx
and $0xC0,%ecx
xor %rdx,%rcx
movb %cl,12(%rdi)
mov %r8,%rdx
shr $2,%rdx
and $0xFF,%edx
movb %dl,13(%rdi)
mov %r8,%rdx
shr $10,%rdx
and $0xFF,%edx
movb %dl,14(%rdi)
mov %r8,%rdx
shr $18,%rdx
and $0xFF,%edx
movb %dl,15(%rdi)
mov %r8,%rdx
shr $26,%rdx
and $0xFF,%edx
movb %dl,16(%rdi)
mov %r8,%rdx
shr $34,%rdx
and $0xFF,%edx
movb %dl,17(%rdi)
mov %r8,%rdx
shr $42,%rdx
movb %dl,18(%rdi)
mov %r8,%rdx
shr $50,%rdx
mov %r9,%rcx
shl $1,%rcx
and $0xFE,%ecx
xor %rdx,%rcx
movb %cl,19(%rdi)
mov %r9,%rdx
shr $7,%rdx
and $0xFF,%edx
movb %dl,20(%rdi)
mov %r9,%rdx
shr $15,%rdx
and $0xFF,%edx
movb %dl,21(%rdi)
mov %r9,%rdx
shr $23,%rdx
and $0xFF,%edx
movb %dl,22(%rdi)
mov %r9,%rdx
shr $31,%rdx
and $0xFF,%edx
movb %dl,23(%rdi)
mov %r9,%rdx
shr $39,%rdx
and $0xFF,%edx
movb %dl,24(%rdi)
mov %r9,%rdx
shr $47,%rdx
mov %rsi,%rcx
shl $4,%rcx
and $0xF0,%ecx
xor %rdx,%rcx
movb %cl,25(%rdi)
mov %rsi,%rdx
shr $4,%rdx
and $0xFF,%edx
movb %dl,26(%rdi)
mov %rsi,%rdx
shr $12,%rdx
and $0xFF,%edx
movb %dl,27(%rdi)
mov %rsi,%rdx
shr $20,%rdx
and $0xFF,%edx
movb %dl,28(%rdi)
mov %rsi,%rdx
shr $28,%rdx
and $0xFF,%edx
movb %dl,29(%rdi)
mov %rsi,%rdx
shr $36,%rdx
and $0xFF,%edx
movb %dl,30(%rdi)
mov %rsi,%rsi
shr $44,%rsi
movb %sil,31(%rdi)
movq 0(%rsp),%r11
movq 8(%rsp),%r12
add %r11,%rsp
ret
#endif

@ -0,0 +1,78 @@
/*
This file is basically ref10/fe_frombytes.h.
*/
#include "fe.h"
#ifdef HAVE_AVX_ASM
static uint64_t
load_3(const unsigned char *in)
{
uint64_t result;
result = (uint64_t) in[0];
result |= ((uint64_t) in[1]) << 8;
result |= ((uint64_t) in[2]) << 16;
return result;
}
static uint64_t
load_4(const unsigned char *in)
{
uint64_t result;
result = (uint64_t) in[0];
result |= ((uint64_t) in[1]) << 8;
result |= ((uint64_t) in[2]) << 16;
result |= ((uint64_t) in[3]) << 24;
return result;
}
void
fe_frombytes(fe h, const unsigned char *s)
{
uint64_t h0 = load_4(s);
uint64_t h1 = load_3(s + 4) << 6;
uint64_t h2 = load_3(s + 7) << 5;
uint64_t h3 = load_3(s + 10) << 3;
uint64_t h4 = load_3(s + 13) << 2;
uint64_t h5 = load_4(s + 16);
uint64_t h6 = load_3(s + 20) << 7;
uint64_t h7 = load_3(s + 23) << 5;
uint64_t h8 = load_3(s + 26) << 4;
uint64_t h9 = (load_3(s + 29) & 8388607) << 2;
uint64_t carry0;
uint64_t carry1;
uint64_t carry2;
uint64_t carry3;
uint64_t carry4;
uint64_t carry5;
uint64_t carry6;
uint64_t carry7;
uint64_t carry8;
uint64_t carry9;
carry9 = h9 >> 25; h0 += carry9 * 19; h9 &= 0x1FFFFFF;
carry1 = h1 >> 25; h2 += carry1; h1 &= 0x1FFFFFF;
carry3 = h3 >> 25; h4 += carry3; h3 &= 0x1FFFFFF;
carry5 = h5 >> 25; h6 += carry5; h5 &= 0x1FFFFFF;
carry7 = h7 >> 25; h8 += carry7; h7 &= 0x1FFFFFF;
carry0 = h0 >> 26; h1 += carry0; h0 &= 0x3FFFFFF;
carry2 = h2 >> 26; h3 += carry2; h2 &= 0x3FFFFFF;
carry4 = h4 >> 26; h5 += carry4; h4 &= 0x3FFFFFF;
carry6 = h6 >> 26; h7 += carry6; h6 &= 0x3FFFFFF;
carry8 = h8 >> 26; h9 += carry8; h8 &= 0x3FFFFFF;
h[0] = h0;
h[1] = h1;
h[2] = h2;
h[3] = h3;
h[4] = h4;
h[5] = h5;
h[6] = h6;
h[7] = h7;
h[8] = h8;
h[9] = h9;
}
#endif

File diff suppressed because it is too large Load Diff

@ -0,0 +1,18 @@
#ifndef ladder_H
#define ladder_H
#ifdef __cplusplus
extern "C" {
#endif
#include "fe.h"
#include "ladder_namespace.h"
extern void ladder(fe *, const unsigned char *);
#ifdef __cplusplus
}
#endif
#endif /* ifndef ladder_H */

File diff suppressed because it is too large Load Diff

@ -0,0 +1,18 @@
#ifndef ladder_base_H
#define ladder_base_H
#ifdef __cplusplus
extern "C" {
#endif
#include "fe.h"
#include "ladder_base_namespace.h"
extern void ladder_base(fe *, const unsigned char *);
#ifdef __cplusplus
}
#endif
#endif /* ifndef ladder_base_H */

@ -0,0 +1,8 @@
#ifndef ladder_base_namespace_H
#define ladder_base_namespace_H
#define ladder_base crypto_scalarmult_curve25519_sandy2x_ladder_base
#define _ladder_base _crypto_scalarmult_curve25519_sandy2x_ladder_base
#endif /* ifndef ladder_base_namespace_H */

@ -0,0 +1,8 @@
#ifndef ladder_namespace_H
#define ladder_namespace_H
#define ladder crypto_scalarmult_curve25519_sandy2x_ladder
#define _ladder _crypto_scalarmult_curve25519_sandy2x_ladder
#endif /* ifndef ladder_namespace_H */

@ -0,0 +1,17 @@
#ifdef HAVE_AVX_ASM
#define IN_SANDY2X
#include "consts.S"
#include "fe51_mul.S"
#include "fe51_nsquare.S"
#include "fe51_pack.S"
#include "ladder.S"
#include "ladder_base.S"
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
#endif

@ -0,0 +1,60 @@
#include <sodium/crypto_scalarmult_curve25519.h>
#include <sodium/private/implementations.h>
#include "scalarmult_curve25519.h"
#include <sodium/runtime.h>
#include "sandy2x/curve25519_sandy2x.h"
#include "ref10/x25519_ref10.h"
static const crypto_scalarmult_curve25519_implementation *implementation =
&crypto_scalarmult_curve25519_ref10_implementation;
int
crypto_scalarmult_curve25519(unsigned char *q, const unsigned char *n,
const unsigned char *p)
{
size_t i;
volatile unsigned char d = 0;
if(implementation->mult(q, n, p) != 0)
{
return -1; /* LCOV_EXCL_LINE */
}
for(i = 0; i < crypto_scalarmult_curve25519_BYTES; i++)
{
d |= q[i];
}
return -(1 & ((d - 1) >> 8));
}
int
crypto_scalarmult_curve25519_base(unsigned char *q, const unsigned char *n)
{
return implementation->mult_base(q, n);
}
size_t
crypto_scalarmult_curve25519_bytes(void)
{
return crypto_scalarmult_curve25519_BYTES;
}
size_t
crypto_scalarmult_curve25519_scalarbytes(void)
{
return crypto_scalarmult_curve25519_SCALARBYTES;
}
int
_crypto_scalarmult_curve25519_pick_best_implementation(void)
{
implementation = &crypto_scalarmult_curve25519_ref10_implementation;
#ifdef HAVE_AVX_ASM
if(sodium_runtime_has_avx())
{
implementation = &crypto_scalarmult_curve25519_sandy2x_implementation;
}
#endif
return 0;
}

@ -0,0 +1,11 @@
#ifndef scalarmult_poly1305_H
#define scalarmult_poly1305_H
typedef struct crypto_scalarmult_curve25519_implementation {
int (*mult)(unsigned char *q, const unsigned char *n,
const unsigned char *p);
int (*mult_base)(unsigned char *q, const unsigned char *n);
} crypto_scalarmult_curve25519_implementation;
#endif

@ -0,0 +1,114 @@
#include <sodium/crypto_box.h>
size_t
crypto_box_seedbytes(void)
{
return crypto_box_SEEDBYTES;
}
size_t
crypto_box_publickeybytes(void)
{
return crypto_box_PUBLICKEYBYTES;
}
size_t
crypto_box_secretkeybytes(void)
{
return crypto_box_SECRETKEYBYTES;
}
size_t
crypto_box_beforenmbytes(void)
{
return crypto_box_BEFORENMBYTES;
}
size_t
crypto_box_noncebytes(void)
{
return crypto_box_NONCEBYTES;
}
size_t
crypto_box_zerobytes(void)
{
return crypto_box_ZEROBYTES;
}
size_t
crypto_box_boxzerobytes(void)
{
return crypto_box_BOXZEROBYTES;
}
size_t
crypto_box_macbytes(void)
{
return crypto_box_MACBYTES;
}
size_t
crypto_box_messagebytes_max(void)
{
return crypto_box_MESSAGEBYTES_MAX;
}
const char *
crypto_box_primitive(void)
{
return crypto_box_PRIMITIVE;
}
int
crypto_box_seed_keypair(unsigned char *pk, unsigned char *sk,
const unsigned char *seed)
{
return crypto_box_curve25519xsalsa20poly1305_seed_keypair(pk, sk, seed);
}
int
crypto_box_keypair(unsigned char *pk, unsigned char *sk)
{
return crypto_box_curve25519xsalsa20poly1305_keypair(pk, sk);
}
int
crypto_box_beforenm(unsigned char *k, const unsigned char *pk,
const unsigned char *sk)
{
return crypto_box_curve25519xsalsa20poly1305_beforenm(k, pk, sk);
}
int
crypto_box_afternm(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *k)
{
return crypto_box_curve25519xsalsa20poly1305_afternm(c, m, mlen, n, k);
}
int
crypto_box_open_afternm(unsigned char *m, const unsigned char *c,
unsigned long long clen, const unsigned char *n,
const unsigned char *k)
{
return crypto_box_curve25519xsalsa20poly1305_open_afternm(m, c, clen, n, k);
}
int
crypto_box(unsigned char *c, const unsigned char *m, unsigned long long mlen,
const unsigned char *n, const unsigned char *pk,
const unsigned char *sk)
{
return crypto_box_curve25519xsalsa20poly1305(c, m, mlen, n, pk, sk);
}
int
crypto_box_open(unsigned char *m, const unsigned char *c,
unsigned long long clen, const unsigned char *n,
const unsigned char *pk, const unsigned char *sk)
{
return crypto_box_curve25519xsalsa20poly1305_open(m, c, clen, n, pk, sk);
}

@ -0,0 +1,115 @@
#include <sodium/crypto_sign.h>
size_t
crypto_sign_statebytes(void)
{
return sizeof(crypto_sign_state);
}
size_t
crypto_sign_bytes(void)
{
return crypto_sign_BYTES;
}
size_t
crypto_sign_seedbytes(void)
{
return crypto_sign_SEEDBYTES;
}
size_t
crypto_sign_publickeybytes(void)
{
return crypto_sign_PUBLICKEYBYTES;
}
size_t
crypto_sign_secretkeybytes(void)
{
return crypto_sign_SECRETKEYBYTES;
}
size_t
crypto_sign_messagebytes_max(void)
{
return crypto_sign_MESSAGEBYTES_MAX;
}
const char *
crypto_sign_primitive(void)
{
return crypto_sign_PRIMITIVE;
}
int
crypto_sign_seed_keypair(unsigned char *pk, unsigned char *sk,
const unsigned char *seed)
{
return crypto_sign_ed25519_seed_keypair(pk, sk, seed);
}
int
crypto_sign_keypair(unsigned char *pk, unsigned char *sk)
{
return crypto_sign_ed25519_keypair(pk, sk);
}
int
crypto_sign(unsigned char *sm, unsigned long long *smlen_p,
const unsigned char *m, unsigned long long mlen,
const unsigned char *sk)
{
return crypto_sign_ed25519(sm, smlen_p, m, mlen, sk);
}
int
crypto_sign_open(unsigned char *m, unsigned long long *mlen_p,
const unsigned char *sm, unsigned long long smlen,
const unsigned char *pk)
{
return crypto_sign_ed25519_open(m, mlen_p, sm, smlen, pk);
}
int
crypto_sign_detached(unsigned char *sig, unsigned long long *siglen_p,
const unsigned char *m, unsigned long long mlen,
const unsigned char *sk)
{
return crypto_sign_ed25519_detached(sig, siglen_p, m, mlen, sk);
}
int
crypto_sign_verify_detached(const unsigned char *sig, const unsigned char *m,
unsigned long long mlen, const unsigned char *pk)
{
return crypto_sign_ed25519_verify_detached(sig, m, mlen, pk);
}
int
crypto_sign_init(crypto_sign_state *state)
{
return crypto_sign_ed25519ph_init(state);
}
int
crypto_sign_update(crypto_sign_state *state, const unsigned char *m,
unsigned long long mlen)
{
return crypto_sign_ed25519ph_update(state, m, mlen);
}
int
crypto_sign_final_create(crypto_sign_state *state, unsigned char *sig,
unsigned long long *siglen_p, const unsigned char *sk)
{
return crypto_sign_ed25519ph_final_create(state, sig, siglen_p, sk);
}
int
crypto_sign_final_verify(crypto_sign_state *state, unsigned char *sig,
const unsigned char *pk)
{
return crypto_sign_ed25519ph_final_verify(state, sig, pk);
}

@ -0,0 +1,92 @@
#include <string.h>
#include <sodium/crypto_hash_sha512.h>
#include <sodium/crypto_scalarmult_curve25519.h>
#include <sodium/crypto_sign_ed25519.h>
#include "sign_ed25519_ref10.h"
#include <sodium/private/ed25519_ref10.h>
#include <sodium/randombytes.h>
#include <sodium/utils.h>
int
crypto_sign_ed25519_seed_keypair(unsigned char *pk, unsigned char *sk,
const unsigned char *seed)
{
ge25519_p3 A;
#ifdef ED25519_NONDETERMINISTIC
memmove(sk, seed, 32);
#else
crypto_hash_sha512(sk, seed, 32);
#endif
sk[0] &= 248;
sk[31] &= 127;
sk[31] |= 64;
ge25519_scalarmult_base(&A, sk);
ge25519_p3_tobytes(pk, &A);
memmove(sk, seed, 32);
memmove(sk + 32, pk, 32);
return 0;
}
int
crypto_sign_ed25519_keypair(unsigned char *pk, unsigned char *sk)
{
unsigned char seed[32];
int ret;
randombytes_buf(seed, sizeof seed);
ret = crypto_sign_ed25519_seed_keypair(pk, sk, seed);
sodium_memzero(seed, sizeof seed);
return ret;
}
int
crypto_sign_ed25519_pk_to_curve25519(unsigned char *curve25519_pk,
const unsigned char *ed25519_pk)
{
ge25519_p3 A;
fe25519 x;
fe25519 one_minus_y;
if(ge25519_has_small_order(ed25519_pk) != 0
|| ge25519_frombytes_negate_vartime(&A, ed25519_pk) != 0
|| ge25519_is_on_main_subgroup(&A) == 0)
{
return -1;
}
fe25519_1(one_minus_y);
fe25519_sub(one_minus_y, one_minus_y, A.Y);
fe25519_invert(one_minus_y, one_minus_y);
fe25519_1(x);
fe25519_add(x, x, A.Y);
fe25519_mul(x, x, one_minus_y);
fe25519_tobytes(curve25519_pk, x);
return 0;
}
int
crypto_sign_ed25519_sk_to_curve25519(unsigned char *curve25519_sk,
const unsigned char *ed25519_sk)
{
unsigned char h[crypto_hash_sha512_BYTES];
#ifdef ED25519_NONDETERMINISTIC
memcpy(h, ed25519_sk, 32);
#else
crypto_hash_sha512(h, ed25519_sk, 32);
#endif
h[0] &= 248;
h[31] &= 127;
h[31] |= 64;
memcpy(curve25519_sk, h, crypto_scalarmult_curve25519_BYTES);
sodium_memzero(h, sizeof h);
return 0;
}

@ -0,0 +1,121 @@
#include <limits.h>
#include <stdint.h>
#include <string.h>
#include <sodium/crypto_hash_sha512.h>
#include <sodium/crypto_sign_edwards25519sha512batch.h>
#include <sodium/crypto_verify_32.h>
#include <sodium/private/ed25519_ref10.h>
#include <sodium/randombytes.h>
#include <sodium/utils.h>
int
crypto_sign_edwards25519sha512batch_keypair(unsigned char *pk,
unsigned char *sk)
{
ge25519_p3 A;
randombytes_buf(sk, 32);
crypto_hash_sha512(sk, sk, 32);
sk[0] &= 248;
sk[31] &= 127;
sk[31] |= 64;
ge25519_scalarmult_base(&A, sk);
ge25519_p3_tobytes(pk, &A);
return 0;
}
int
crypto_sign_edwards25519sha512batch(unsigned char *sm,
unsigned long long *smlen_p,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *sk)
{
crypto_hash_sha512_state hs;
unsigned char nonce[64];
unsigned char hram[64];
unsigned char sig[64];
ge25519_p3 A;
ge25519_p3 R;
crypto_hash_sha512_init(&hs);
crypto_hash_sha512_update(&hs, sk + 32, 32);
crypto_hash_sha512_update(&hs, m, mlen);
crypto_hash_sha512_final(&hs, nonce);
ge25519_scalarmult_base(&A, sk);
ge25519_p3_tobytes(sig + 32, &A);
sc25519_reduce(nonce);
ge25519_scalarmult_base(&R, nonce);
ge25519_p3_tobytes(sig, &R);
crypto_hash_sha512_init(&hs);
crypto_hash_sha512_update(&hs, sig, 32);
crypto_hash_sha512_update(&hs, m, mlen);
crypto_hash_sha512_final(&hs, hram);
sc25519_reduce(hram);
sc25519_muladd(sig + 32, hram, nonce, sk);
sodium_memzero(hram, sizeof hram);
memmove(sm + 32, m, (size_t)mlen);
memcpy(sm, sig, 32);
memcpy(sm + 32 + mlen, sig + 32, 32);
*smlen_p = mlen + 64U;
return 0;
}
int
crypto_sign_edwards25519sha512batch_open(unsigned char *m,
unsigned long long *mlen_p,
const unsigned char *sm,
unsigned long long smlen,
const unsigned char *pk)
{
unsigned char h[64];
unsigned char t1[32], t2[32];
unsigned long long mlen;
ge25519_cached Ai;
ge25519_p1p1 csa;
ge25519_p2 cs;
ge25519_p3 A;
ge25519_p3 R;
ge25519_p3 cs3;
*mlen_p = 0;
if(smlen < 64
|| smlen - 64 > crypto_sign_edwards25519sha512batch_MESSAGEBYTES_MAX)
{
return -1;
}
mlen = smlen - 64;
if(sm[smlen - 1] & 224)
{
return -1;
}
if(ge25519_has_small_order(pk) != 0
|| ge25519_frombytes_negate_vartime(&A, pk) != 0
|| ge25519_has_small_order(sm) != 0
|| ge25519_frombytes_negate_vartime(&R, sm) != 0)
{
return -1;
}
ge25519_p3_to_cached(&Ai, &A);
crypto_hash_sha512(h, sm, mlen + 32);
sc25519_reduce(h);
ge25519_scalarmult(&cs3, h, &R);
ge25519_add(&csa, &cs3, &Ai);
ge25519_p1p1_to_p2(&cs, &csa);
ge25519_tobytes(t1, &cs);
t1[31] ^= 1 << 7;
ge25519_scalarmult_base(&R, sm + 32 + mlen);
ge25519_p3_tobytes(t2, &R);
if(crypto_verify_32(t1, t2) != 0)
{
return -1;
}
*mlen_p = mlen;
memmove(m, sm + 32, mlen);
return 0;
}

@ -0,0 +1,99 @@
#include <limits.h>
#include <stdint.h>
#include <string.h>
#include <sodium/crypto_hash_sha512.h>
#include <sodium/crypto_sign_ed25519.h>
#include <sodium/crypto_verify_32.h>
#include "sign_ed25519_ref10.h"
#include <sodium/private/ed25519_ref10.h>
#include <sodium/utils.h>
int
_crypto_sign_ed25519_verify_detached(const unsigned char *sig,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *pk, int prehashed)
{
crypto_hash_sha512_state hs;
unsigned char h[64];
unsigned char rcheck[32];
ge25519_p3 A;
ge25519_p2 R;
#ifndef ED25519_COMPAT
if(sc25519_is_canonical(sig + 32) == 0 || ge25519_has_small_order(sig) != 0)
{
return -1;
}
if(ge25519_is_canonical(pk) == 0)
{
return -1;
}
#else
if(sig[63] & 224)
{
return -1;
}
#endif
if(ge25519_has_small_order(pk) != 0
|| ge25519_frombytes_negate_vartime(&A, pk) != 0)
{
return -1;
}
_crypto_sign_ed25519_ref10_hinit(&hs, prehashed);
crypto_hash_sha512_update(&hs, sig, 32);
crypto_hash_sha512_update(&hs, pk, 32);
crypto_hash_sha512_update(&hs, m, mlen);
crypto_hash_sha512_final(&hs, h);
sc25519_reduce(h);
ge25519_double_scalarmult_vartime(&R, h, &A, sig + 32);
ge25519_tobytes(rcheck, &R);
return crypto_verify_32(rcheck, sig) | (-(rcheck == sig))
| sodium_memcmp(sig, rcheck, 32);
}
int
crypto_sign_ed25519_verify_detached(const unsigned char *sig,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *pk)
{
return _crypto_sign_ed25519_verify_detached(sig, m, mlen, pk, 0);
}
int
crypto_sign_ed25519_open(unsigned char *m, unsigned long long *mlen_p,
const unsigned char *sm, unsigned long long smlen,
const unsigned char *pk)
{
unsigned long long mlen;
if(smlen < 64 || smlen - 64 > crypto_sign_ed25519_MESSAGEBYTES_MAX)
{
goto badsig;
}
mlen = smlen - 64;
if(crypto_sign_ed25519_verify_detached(sm, sm + 64, mlen, pk) != 0)
{
memset(m, 0, mlen);
goto badsig;
}
if(mlen_p != NULL)
{
*mlen_p = mlen;
}
memmove(m, sm + 64, mlen);
return 0;
badsig:
if(mlen_p != NULL)
{
*mlen_p = 0;
}
return -1;
}

@ -0,0 +1,148 @@
#include <string.h>
#include <sodium/crypto_hash_sha512.h>
#include <sodium/crypto_sign_ed25519.h>
#include "sign_ed25519_ref10.h"
#include <sodium/private/ed25519_ref10.h>
#include <sodium/randombytes.h>
#include <sodium/utils.h>
void
_crypto_sign_ed25519_ref10_hinit(crypto_hash_sha512_state *hs, int prehashed)
{
static const unsigned char DOM2PREFIX[32 + 2] = {
'S', 'i', 'g', 'E', 'd', '2', '5', '5', '1', '9', ' ', 'n',
'o', ' ', 'E', 'd', '2', '5', '5', '1', '9', ' ', 'c', 'o',
'l', 'l', 'i', 's', 'i', 'o', 'n', 's', 1, 0};
crypto_hash_sha512_init(hs);
if(prehashed)
{
crypto_hash_sha512_update(hs, DOM2PREFIX, sizeof DOM2PREFIX);
}
}
static inline void
_crypto_sign_ed25519_clamp(unsigned char k[32])
{
k[0] &= 248;
k[31] &= 127;
k[31] |= 64;
}
#ifdef ED25519_NONDETERMINISTIC
/* r = hash(B || empty_labelset || Z || pad1 || k || pad2 || empty_labelset || K
* || extra || M) (mod q) */
static void
_crypto_sign_ed25519_synthetic_r_hv(crypto_hash_sha512_state *hs,
unsigned char Z[32],
const unsigned char sk[64])
{
static const unsigned char B[32] = {
0x58, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
};
static const unsigned char zeros[128] = {0x00};
static const unsigned char empty_labelset[3] = {0x02, 0x00, 0x00};
crypto_hash_sha512_update(hs, B, 32);
crypto_hash_sha512_update(hs, empty_labelset, 3);
randombytes_buf(Z, 32);
crypto_hash_sha512_update(hs, Z, 32);
crypto_hash_sha512_update(hs, zeros, 128 - (32 + 3 + 32) % 128);
crypto_hash_sha512_update(hs, sk, 32);
crypto_hash_sha512_update(hs, zeros, 128 - 32 % 128);
crypto_hash_sha512_update(hs, empty_labelset, 3);
crypto_hash_sha512_update(hs, sk + 32, 32);
/* empty extra */
}
#endif
int
_crypto_sign_ed25519_detached(unsigned char *sig, unsigned long long *siglen_p,
const unsigned char *m, unsigned long long mlen,
const unsigned char *sk, int prehashed)
{
crypto_hash_sha512_state hs;
unsigned char az[64];
unsigned char nonce[64];
unsigned char hram[64];
ge25519_p3 R;
_crypto_sign_ed25519_ref10_hinit(&hs, prehashed);
#ifdef ED25519_NONDETERMINISTIC
memcpy(az, sk, 32);
_crypto_sign_ed25519_synthetic_r_hv(&hs, nonce, az);
#else
crypto_hash_sha512(az, sk, 32);
crypto_hash_sha512_update(&hs, az + 32, 32);
#endif
crypto_hash_sha512_update(&hs, m, mlen);
crypto_hash_sha512_final(&hs, nonce);
memmove(sig + 32, sk + 32, 32);
sc25519_reduce(nonce);
ge25519_scalarmult_base(&R, nonce);
ge25519_p3_tobytes(sig, &R);
_crypto_sign_ed25519_ref10_hinit(&hs, prehashed);
crypto_hash_sha512_update(&hs, sig, 64);
crypto_hash_sha512_update(&hs, m, mlen);
crypto_hash_sha512_final(&hs, hram);
sc25519_reduce(hram);
_crypto_sign_ed25519_clamp(az);
sc25519_muladd(sig + 32, hram, az, nonce);
sodium_memzero(az, sizeof az);
sodium_memzero(nonce, sizeof nonce);
if(siglen_p != NULL)
{
*siglen_p = 64U;
}
return 0;
}
int
crypto_sign_ed25519_detached(unsigned char *sig, unsigned long long *siglen_p,
const unsigned char *m, unsigned long long mlen,
const unsigned char *sk)
{
return _crypto_sign_ed25519_detached(sig, siglen_p, m, mlen, sk, 0);
}
int
crypto_sign_ed25519(unsigned char *sm, unsigned long long *smlen_p,
const unsigned char *m, unsigned long long mlen,
const unsigned char *sk)
{
unsigned long long siglen;
memmove(sm + crypto_sign_ed25519_BYTES, m, mlen);
/* LCOV_EXCL_START */
if(crypto_sign_ed25519_detached(sm, &siglen, sm + crypto_sign_ed25519_BYTES,
mlen, sk)
!= 0
|| siglen != crypto_sign_ed25519_BYTES)
{
if(smlen_p != NULL)
{
*smlen_p = 0;
}
memset(sm, 0, mlen + crypto_sign_ed25519_BYTES);
return -1;
}
/* LCOV_EXCL_STOP */
if(smlen_p != NULL)
{
*smlen_p = mlen + siglen;
}
return 0;
}

@ -0,0 +1,18 @@
#ifndef sign_ed25519_ref10_H
#define sign_ed25519_ref10_H
void _crypto_sign_ed25519_ref10_hinit(crypto_hash_sha512_state *hs,
int prehashed);
int _crypto_sign_ed25519_detached(unsigned char *sig,
unsigned long long *siglen_p,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *sk, int prehashed);
int _crypto_sign_ed25519_verify_detached(const unsigned char *sig,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *pk,
int prehashed);
#endif

@ -0,0 +1,96 @@
#include <string.h>
#include <sodium/crypto_hash_sha512.h>
#include <sodium/crypto_sign_ed25519.h>
#include "ref10/sign_ed25519_ref10.h"
size_t
crypto_sign_ed25519ph_statebytes(void)
{
return sizeof(crypto_sign_ed25519ph_state);
}
size_t
crypto_sign_ed25519_bytes(void)
{
return crypto_sign_ed25519_BYTES;
}
size_t
crypto_sign_ed25519_seedbytes(void)
{
return crypto_sign_ed25519_SEEDBYTES;
}
size_t
crypto_sign_ed25519_publickeybytes(void)
{
return crypto_sign_ed25519_PUBLICKEYBYTES;
}
size_t
crypto_sign_ed25519_secretkeybytes(void)
{
return crypto_sign_ed25519_SECRETKEYBYTES;
}
size_t
crypto_sign_ed25519_messagebytes_max(void)
{
return crypto_sign_ed25519_MESSAGEBYTES_MAX;
}
int
crypto_sign_ed25519_sk_to_seed(unsigned char *seed, const unsigned char *sk)
{
memmove(seed, sk, crypto_sign_ed25519_SEEDBYTES);
return 0;
}
int
crypto_sign_ed25519_sk_to_pk(unsigned char *pk, const unsigned char *sk)
{
memmove(pk, sk + crypto_sign_ed25519_SEEDBYTES,
crypto_sign_ed25519_PUBLICKEYBYTES);
return 0;
}
int
crypto_sign_ed25519ph_init(crypto_sign_ed25519ph_state *state)
{
crypto_hash_sha512_init(&state->hs);
return 0;
}
int
crypto_sign_ed25519ph_update(crypto_sign_ed25519ph_state *state,
const unsigned char *m, unsigned long long mlen)
{
return crypto_hash_sha512_update(&state->hs, m, mlen);
}
int
crypto_sign_ed25519ph_final_create(crypto_sign_ed25519ph_state *state,
unsigned char *sig,
unsigned long long *siglen_p,
const unsigned char *sk)
{
unsigned char ph[crypto_hash_sha512_BYTES];
crypto_hash_sha512_final(&state->hs, ph);
return _crypto_sign_ed25519_detached(sig, siglen_p, ph, sizeof ph, sk, 1);
}
int
crypto_sign_ed25519ph_final_verify(crypto_sign_ed25519ph_state *state,
unsigned char *sig, const unsigned char *pk)
{
unsigned char ph[crypto_hash_sha512_BYTES];
crypto_hash_sha512_final(&state->hs, ph);
return _crypto_sign_ed25519_verify_detached(sig, ph, sizeof ph, pk, 1);
}

@ -0,0 +1,246 @@
#ifndef common_H
#define common_H 1
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#define COMPILER_ASSERT(X) (void) sizeof(char[(X) ? 1 : -1])
#ifdef HAVE_TI_MODE
# if defined(__SIZEOF_INT128__)
typedef unsigned __int128 uint128_t;
# else
typedef unsigned uint128_t __attribute__((mode(TI)));
# endif
#endif
#define ROTL32(X, B) rotl32((X), (B))
static inline uint32_t
rotl32(const uint32_t x, const int b)
{
return (x << b) | (x >> (32 - b));
}
#define ROTL64(X, B) rotl64((X), (B))
static inline uint64_t
rotl64(const uint64_t x, const int b)
{
return (x << b) | (x >> (64 - b));
}
#define ROTR32(X, B) rotr32((X), (B))
static inline uint32_t
rotr32(const uint32_t x, const int b)
{
return (x >> b) | (x << (32 - b));
}
#define ROTR64(X, B) rotr64((X), (B))
static inline uint64_t
rotr64(const uint64_t x, const int b)
{
return (x >> b) | (x << (64 - b));
}
#define LOAD64_LE(SRC) load64_le(SRC)
static inline uint64_t
load64_le(const uint8_t src[8])
{
#ifdef NATIVE_LITTLE_ENDIAN
uint64_t w;
memcpy(&w, src, sizeof w);
return w;
#else
uint64_t w = (uint64_t) src[0];
w |= (uint64_t) src[1] << 8;
w |= (uint64_t) src[2] << 16;
w |= (uint64_t) src[3] << 24;
w |= (uint64_t) src[4] << 32;
w |= (uint64_t) src[5] << 40;
w |= (uint64_t) src[6] << 48;
w |= (uint64_t) src[7] << 56;
return w;
#endif
}
#define STORE64_LE(DST, W) store64_le((DST), (W))
static inline void
store64_le(uint8_t dst[8], uint64_t w)
{
#ifdef NATIVE_LITTLE_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[0] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[3] = (uint8_t) w; w >>= 8;
dst[4] = (uint8_t) w; w >>= 8;
dst[5] = (uint8_t) w; w >>= 8;
dst[6] = (uint8_t) w; w >>= 8;
dst[7] = (uint8_t) w;
#endif
}
#define LOAD32_LE(SRC) load32_le(SRC)
static inline uint32_t
load32_le(const uint8_t src[4])
{
#ifdef NATIVE_LITTLE_ENDIAN
uint32_t w;
memcpy(&w, src, sizeof w);
return w;
#else
uint32_t w = (uint32_t) src[0];
w |= (uint32_t) src[1] << 8;
w |= (uint32_t) src[2] << 16;
w |= (uint32_t) src[3] << 24;
return w;
#endif
}
#define STORE32_LE(DST, W) store32_le((DST), (W))
static inline void
store32_le(uint8_t dst[4], uint32_t w)
{
#ifdef NATIVE_LITTLE_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[0] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[3] = (uint8_t) w;
#endif
}
/* ----- */
#define LOAD64_BE(SRC) load64_be(SRC)
static inline uint64_t
load64_be(const uint8_t src[8])
{
#ifdef NATIVE_BIG_ENDIAN
uint64_t w;
memcpy(&w, src, sizeof w);
return w;
#else
uint64_t w = (uint64_t) src[7];
w |= (uint64_t) src[6] << 8;
w |= (uint64_t) src[5] << 16;
w |= (uint64_t) src[4] << 24;
w |= (uint64_t) src[3] << 32;
w |= (uint64_t) src[2] << 40;
w |= (uint64_t) src[1] << 48;
w |= (uint64_t) src[0] << 56;
return w;
#endif
}
#define STORE64_BE(DST, W) store64_be((DST), (W))
static inline void
store64_be(uint8_t dst[8], uint64_t w)
{
#ifdef NATIVE_BIG_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[7] = (uint8_t) w; w >>= 8;
dst[6] = (uint8_t) w; w >>= 8;
dst[5] = (uint8_t) w; w >>= 8;
dst[4] = (uint8_t) w; w >>= 8;
dst[3] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[0] = (uint8_t) w;
#endif
}
#define LOAD32_BE(SRC) load32_be(SRC)
static inline uint32_t
load32_be(const uint8_t src[4])
{
#ifdef NATIVE_BIG_ENDIAN
uint32_t w;
memcpy(&w, src, sizeof w);
return w;
#else
uint32_t w = (uint32_t) src[3];
w |= (uint32_t) src[2] << 8;
w |= (uint32_t) src[1] << 16;
w |= (uint32_t) src[0] << 24;
return w;
#endif
}
#define STORE32_BE(DST, W) store32_be((DST), (W))
static inline void
store32_be(uint8_t dst[4], uint32_t w)
{
#ifdef NATIVE_BIG_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[3] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[0] = (uint8_t) w;
#endif
}
#define XOR_BUF(OUT, IN, N) xor_buf((OUT), (IN), (N))
static inline void
xor_buf(unsigned char *out, const unsigned char *in, size_t n)
{
size_t i;
for (i = 0; i < n; i++) {
out[i] ^= in[i];
}
}
#if !defined(__clang__) && !defined(__GNUC__)
# ifdef __attribute__
# undef __attribute__
# endif
# define __attribute__(a)
#endif
#ifndef CRYPTO_ALIGN
# if defined(__INTEL_COMPILER) || defined(_MSC_VER)
# define CRYPTO_ALIGN(x) __declspec(align(x))
# else
# define CRYPTO_ALIGN(x) __attribute__ ((aligned(x)))
# endif
#endif
#if defined(_MSC_VER) && \
(defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86))
# include <intrin.h>
# define HAVE_INTRIN_H 1
# define HAVE_MMINTRIN_H 1
# define HAVE_EMMINTRIN_H 1
# define HAVE_PMMINTRIN_H 1
# define HAVE_TMMINTRIN_H 1
# define HAVE_SMMINTRIN_H 1
# define HAVE_AVXINTRIN_H 1
# if _MSC_VER >= 1600
# define HAVE_WMMINTRIN_H 1
# endif
# if _MSC_VER >= 1700 && defined(_M_X64)
# define HAVE_AVX2INTRIN_H 1
# endif
#elif defined(HAVE_INTRIN_H)
# include <intrin.h>
#endif
#ifdef HAVE_LIBCTGRIND
extern void ct_poison (const void *, size_t);
extern void ct_unpoison(const void *, size_t);
# define POISON(X, L) ct_poison((X), (L))
# define UNPOISON(X, L) ct_unpoison((X), (L))
#else
# define POISON(X, L) (void) 0
# define UNPOISON(X, L) (void) 0
#endif
#endif

@ -0,0 +1,63 @@
#ifndef crypto_hash_sha512_H
#define crypto_hash_sha512_H
/*
* WARNING: Unless you absolutely need to use SHA512 for interoperatibility,
* purposes, you might want to consider crypto_generichash() instead.
* Unlike SHA512, crypto_generichash() is not vulnerable to length
* extension attacks.
*/
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <sodium/export.h>
#ifdef __cplusplus
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wlong-long"
#endif
extern "C"
{
#endif
typedef struct crypto_hash_sha512_state
{
uint64_t state[8];
uint64_t count[2];
uint8_t buf[128];
} crypto_hash_sha512_state;
SODIUM_EXPORT
size_t
crypto_hash_sha512_statebytes(void);
#define crypto_hash_sha512_BYTES 64U
SODIUM_EXPORT
size_t
crypto_hash_sha512_bytes(void);
SODIUM_EXPORT
int
crypto_hash_sha512(unsigned char *out, const unsigned char *in,
unsigned long long inlen);
SODIUM_EXPORT
int
crypto_hash_sha512_init(crypto_hash_sha512_state *state);
SODIUM_EXPORT
int
crypto_hash_sha512_update(crypto_hash_sha512_state *state,
const unsigned char *in, unsigned long long inlen);
SODIUM_EXPORT
int
crypto_hash_sha512_final(crypto_hash_sha512_state *state, unsigned char *out);
#ifdef __cplusplus
}
#endif
#endif

@ -0,0 +1,66 @@
#ifndef crypto_stream_xchacha20_H
#define crypto_stream_xchacha20_H
/*
* WARNING: This is just a stream cipher. It is NOT authenticated encryption.
* While it provides some protection against eavesdropping, it does NOT
* provide any security against active attacks.
* Unless you know what you're doing, what you are looking for is probably
* the crypto_box functions.
*/
#include <stddef.h>
#include <stdint.h>
#include <sodium/export.h>
#ifdef __cplusplus
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wlong-long"
#endif
extern "C"
{
#endif
#define crypto_stream_xchacha20_KEYBYTES 32U
SODIUM_EXPORT
size_t
crypto_stream_xchacha20_keybytes(void);
#define crypto_stream_xchacha20_NONCEBYTES 24U
SODIUM_EXPORT
size_t
crypto_stream_xchacha20_noncebytes(void);
#define crypto_stream_xchacha20_MESSAGEBYTES_MAX SODIUM_SIZE_MAX
SODIUM_EXPORT
size_t
crypto_stream_xchacha20_messagebytes_max(void);
SODIUM_EXPORT
int
crypto_stream_xchacha20(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k);
SODIUM_EXPORT
int
crypto_stream_xchacha20_xor(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *k);
SODIUM_EXPORT
int
crypto_stream_xchacha20_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen,
const unsigned char *n, uint64_t ic,
const unsigned char *k);
SODIUM_EXPORT
void
crypto_stream_xchacha20_keygen(
unsigned char k[crypto_stream_xchacha20_KEYBYTES]);
#ifdef __cplusplus
}
#endif
#endif

@ -0,0 +1,26 @@
#ifndef crypto_verify_16_H
#define crypto_verify_16_H
#include <stddef.h>
#include <sodium/export.h>
#ifdef __cplusplus
extern "C"
{
#endif
#define crypto_verify_16_BYTES 16U
SODIUM_EXPORT
size_t
crypto_verify_16_bytes(void);
SODIUM_EXPORT
int
crypto_verify_16(const unsigned char *x, const unsigned char *y)
__attribute__((warn_unused_result));
#ifdef __cplusplus
}
#endif
#endif

@ -0,0 +1,26 @@
#ifndef crypto_verify_32_H
#define crypto_verify_32_H
#include <stddef.h>
#include <sodium/export.h>
#ifdef __cplusplus
extern "C"
{
#endif
#define crypto_verify_32_BYTES 32U
SODIUM_EXPORT
size_t
crypto_verify_32_bytes(void);
SODIUM_EXPORT
int
crypto_verify_32(const unsigned char *x, const unsigned char *y)
__attribute__((warn_unused_result));
#ifdef __cplusplus
}
#endif
#endif

@ -0,0 +1,26 @@
#ifndef crypto_verify_64_H
#define crypto_verify_64_H
#include <stddef.h>
#include <sodium/export.h>
#ifdef __cplusplus
extern "C"
{
#endif
#define crypto_verify_64_BYTES 64U
SODIUM_EXPORT
size_t
crypto_verify_64_bytes(void);
SODIUM_EXPORT
int
crypto_verify_64(const unsigned char *x, const unsigned char *y)
__attribute__((warn_unused_result));
#ifdef __cplusplus
}
#endif
#endif

@ -0,0 +1,246 @@
#ifndef common_H
#define common_H 1
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#define COMPILER_ASSERT(X) (void) sizeof(char[(X) ? 1 : -1])
#ifdef HAVE_TI_MODE
# if defined(__SIZEOF_INT128__)
typedef unsigned __int128 uint128_t;
# else
typedef unsigned uint128_t __attribute__((mode(TI)));
# endif
#endif
#define ROTL32(X, B) rotl32((X), (B))
static inline uint32_t
rotl32(const uint32_t x, const int b)
{
return (x << b) | (x >> (32 - b));
}
#define ROTL64(X, B) rotl64((X), (B))
static inline uint64_t
rotl64(const uint64_t x, const int b)
{
return (x << b) | (x >> (64 - b));
}
#define ROTR32(X, B) rotr32((X), (B))
static inline uint32_t
rotr32(const uint32_t x, const int b)
{
return (x >> b) | (x << (32 - b));
}
#define ROTR64(X, B) rotr64((X), (B))
static inline uint64_t
rotr64(const uint64_t x, const int b)
{
return (x >> b) | (x << (64 - b));
}
#define LOAD64_LE(SRC) load64_le(SRC)
static inline uint64_t
load64_le(const uint8_t src[8])
{
#ifdef NATIVE_LITTLE_ENDIAN
uint64_t w;
memcpy(&w, src, sizeof w);
return w;
#else
uint64_t w = (uint64_t) src[0];
w |= (uint64_t) src[1] << 8;
w |= (uint64_t) src[2] << 16;
w |= (uint64_t) src[3] << 24;
w |= (uint64_t) src[4] << 32;
w |= (uint64_t) src[5] << 40;
w |= (uint64_t) src[6] << 48;
w |= (uint64_t) src[7] << 56;
return w;
#endif
}
#define STORE64_LE(DST, W) store64_le((DST), (W))
static inline void
store64_le(uint8_t dst[8], uint64_t w)
{
#ifdef NATIVE_LITTLE_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[0] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[3] = (uint8_t) w; w >>= 8;
dst[4] = (uint8_t) w; w >>= 8;
dst[5] = (uint8_t) w; w >>= 8;
dst[6] = (uint8_t) w; w >>= 8;
dst[7] = (uint8_t) w;
#endif
}
#define LOAD32_LE(SRC) load32_le(SRC)
static inline uint32_t
load32_le(const uint8_t src[4])
{
#ifdef NATIVE_LITTLE_ENDIAN
uint32_t w;
memcpy(&w, src, sizeof w);
return w;
#else
uint32_t w = (uint32_t) src[0];
w |= (uint32_t) src[1] << 8;
w |= (uint32_t) src[2] << 16;
w |= (uint32_t) src[3] << 24;
return w;
#endif
}
#define STORE32_LE(DST, W) store32_le((DST), (W))
static inline void
store32_le(uint8_t dst[4], uint32_t w)
{
#ifdef NATIVE_LITTLE_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[0] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[3] = (uint8_t) w;
#endif
}
/* ----- */
#define LOAD64_BE(SRC) load64_be(SRC)
static inline uint64_t
load64_be(const uint8_t src[8])
{
#ifdef NATIVE_BIG_ENDIAN
uint64_t w;
memcpy(&w, src, sizeof w);
return w;
#else
uint64_t w = (uint64_t) src[7];
w |= (uint64_t) src[6] << 8;
w |= (uint64_t) src[5] << 16;
w |= (uint64_t) src[4] << 24;
w |= (uint64_t) src[3] << 32;
w |= (uint64_t) src[2] << 40;
w |= (uint64_t) src[1] << 48;
w |= (uint64_t) src[0] << 56;
return w;
#endif
}
#define STORE64_BE(DST, W) store64_be((DST), (W))
static inline void
store64_be(uint8_t dst[8], uint64_t w)
{
#ifdef NATIVE_BIG_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[7] = (uint8_t) w; w >>= 8;
dst[6] = (uint8_t) w; w >>= 8;
dst[5] = (uint8_t) w; w >>= 8;
dst[4] = (uint8_t) w; w >>= 8;
dst[3] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[0] = (uint8_t) w;
#endif
}
#define LOAD32_BE(SRC) load32_be(SRC)
static inline uint32_t
load32_be(const uint8_t src[4])
{
#ifdef NATIVE_BIG_ENDIAN
uint32_t w;
memcpy(&w, src, sizeof w);
return w;
#else
uint32_t w = (uint32_t) src[3];
w |= (uint32_t) src[2] << 8;
w |= (uint32_t) src[1] << 16;
w |= (uint32_t) src[0] << 24;
return w;
#endif
}
#define STORE32_BE(DST, W) store32_be((DST), (W))
static inline void
store32_be(uint8_t dst[4], uint32_t w)
{
#ifdef NATIVE_BIG_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[3] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[0] = (uint8_t) w;
#endif
}
#define XOR_BUF(OUT, IN, N) xor_buf((OUT), (IN), (N))
static inline void
xor_buf(unsigned char *out, const unsigned char *in, size_t n)
{
size_t i;
for (i = 0; i < n; i++) {
out[i] ^= in[i];
}
}
#if !defined(__clang__) && !defined(__GNUC__)
# ifdef __attribute__
# undef __attribute__
# endif
# define __attribute__(a)
#endif
#ifndef CRYPTO_ALIGN
# if defined(__INTEL_COMPILER) || defined(_MSC_VER)
# define CRYPTO_ALIGN(x) __declspec(align(x))
# else
# define CRYPTO_ALIGN(x) __attribute__ ((aligned(x)))
# endif
#endif
#if defined(_MSC_VER) && \
(defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86))
# include <intrin.h>
# define HAVE_INTRIN_H 1
# define HAVE_MMINTRIN_H 1
# define HAVE_EMMINTRIN_H 1
# define HAVE_PMMINTRIN_H 1
# define HAVE_TMMINTRIN_H 1
# define HAVE_SMMINTRIN_H 1
# define HAVE_AVXINTRIN_H 1
# if _MSC_VER >= 1600
# define HAVE_WMMINTRIN_H 1
# endif
# if _MSC_VER >= 1700 && defined(_M_X64)
# define HAVE_AVX2INTRIN_H 1
# endif
#elif defined(HAVE_INTRIN_H)
# include <intrin.h>
#endif
#ifdef HAVE_LIBCTGRIND
extern void ct_poison (const void *, size_t);
extern void ct_unpoison(const void *, size_t);
# define POISON(X, L) ct_poison((X), (L))
# define UNPOISON(X, L) ct_unpoison((X), (L))
#else
# define POISON(X, L) (void) 0
# define UNPOISON(X, L) (void) 0
#endif
#endif

@ -0,0 +1,125 @@
#ifndef ed25519_ref10_H
#define ed25519_ref10_H
#include <stddef.h>
#include <stdint.h>
/*
fe means field element.
Here the field is \Z/(2^255-19).
*/
#ifdef HAVE_TI_MODE
typedef uint64_t fe25519[5];
#else
typedef int32_t fe25519[10];
#endif
void fe25519_invert(fe25519 out, const fe25519 z);
void fe25519_frombytes(fe25519 h, const unsigned char *s);
void fe25519_tobytes(unsigned char *s, const fe25519 h);
#ifdef HAVE_TI_MODE
# include "ed25519_ref10_fe_51.h"
#else
# include "ed25519_ref10_fe_25_5.h"
#endif
/*
ge means group element.
Here the group is the set of pairs (x,y) of field elements
satisfying -x^2 + y^2 = 1 + d x^2y^2
where d = -121665/121666.
Representations:
ge25519_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z
ge25519_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
ge25519_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
ge25519_precomp (Duif): (y+x,y-x,2dxy)
*/
typedef struct {
fe25519 X;
fe25519 Y;
fe25519 Z;
} ge25519_p2;
typedef struct {
fe25519 X;
fe25519 Y;
fe25519 Z;
fe25519 T;
} ge25519_p3;
typedef struct {
fe25519 X;
fe25519 Y;
fe25519 Z;
fe25519 T;
} ge25519_p1p1;
typedef struct {
fe25519 yplusx;
fe25519 yminusx;
fe25519 xy2d;
} ge25519_precomp;
typedef struct {
fe25519 YplusX;
fe25519 YminusX;
fe25519 Z;
fe25519 T2d;
} ge25519_cached;
void ge25519_tobytes(unsigned char *s, const ge25519_p2 *h);
void ge25519_p3_tobytes(unsigned char *s, const ge25519_p3 *h);
int ge25519_frombytes(ge25519_p3 *h, const unsigned char *s);
int ge25519_frombytes_negate_vartime(ge25519_p3 *h, const unsigned char *s);
void ge25519_p3_to_cached(ge25519_cached *r, const ge25519_p3 *p);
void ge25519_p1p1_to_p2(ge25519_p2 *r, const ge25519_p1p1 *p);
void ge25519_p1p1_to_p3(ge25519_p3 *r, const ge25519_p1p1 *p);
void ge25519_add(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q);
void ge25519_sub(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q);
void ge25519_scalarmult_base(ge25519_p3 *h, const unsigned char *a);
void ge25519_double_scalarmult_vartime(ge25519_p2 *r, const unsigned char *a,
const ge25519_p3 *A,
const unsigned char *b);
void ge25519_scalarmult(ge25519_p3 *h, const unsigned char *a,
const ge25519_p3 *p);
int ge25519_is_canonical(const unsigned char *s);
int ge25519_is_on_curve(const ge25519_p3 *p);
int ge25519_is_on_main_subgroup(const ge25519_p3 *p);
int ge25519_has_small_order(const unsigned char s[32]);
void ge25519_from_uniform(unsigned char s[32], const unsigned char r[32]);
/*
The set of scalars is \Z/l
where l = 2^252 + 27742317777372353535851937790883648493.
*/
void sc25519_reduce(unsigned char *s);
void sc25519_muladd(unsigned char *s, const unsigned char *a,
const unsigned char *b, const unsigned char *c);
int sc25519_is_canonical(const unsigned char *s);
#endif

File diff suppressed because it is too large Load Diff

@ -0,0 +1,518 @@
#include <string.h>
#include "private/common.h"
#include "utils.h"
/*
h = 0
*/
static inline void
fe25519_0(fe25519 h)
{
memset(&h[0], 0, 5 * sizeof h[0]);
}
/*
h = 1
*/
static inline void
fe25519_1(fe25519 h)
{
h[0] = 1;
memset(&h[1], 0, 4 * sizeof h[0]);
}
/*
h = f + g
Can overlap h with f or g.
*/
static inline void
fe25519_add(fe25519 h, const fe25519 f, const fe25519 g)
{
uint64_t h0 = f[0] + g[0];
uint64_t h1 = f[1] + g[1];
uint64_t h2 = f[2] + g[2];
uint64_t h3 = f[3] + g[3];
uint64_t h4 = f[4] + g[4];
h[0] = h0;
h[1] = h1;
h[2] = h2;
h[3] = h3;
h[4] = h4;
}
/*
h = f - g
*/
static void
fe25519_sub(fe25519 h, const fe25519 f, const fe25519 g)
{
const uint64_t mask = 0x7ffffffffffffULL;
uint64_t h0, h1, h2, h3, h4;
h0 = g[0];
h1 = g[1];
h2 = g[2];
h3 = g[3];
h4 = g[4];
h1 += h0 >> 51;
h0 &= mask;
h2 += h1 >> 51;
h1 &= mask;
h3 += h2 >> 51;
h2 &= mask;
h4 += h3 >> 51;
h3 &= mask;
h0 += 19ULL * (h4 >> 51);
h4 &= mask;
h0 = (f[0] + 0xfffffffffffdaULL) - h0;
h1 = (f[1] + 0xffffffffffffeULL) - h1;
h2 = (f[2] + 0xffffffffffffeULL) - h2;
h3 = (f[3] + 0xffffffffffffeULL) - h3;
h4 = (f[4] + 0xffffffffffffeULL) - h4;
h[0] = h0;
h[1] = h1;
h[2] = h2;
h[3] = h3;
h[4] = h4;
}
/*
h = -f
*/
static inline void
fe25519_neg(fe25519 h, const fe25519 f)
{
fe25519 zero;
fe25519_0(zero);
fe25519_sub(h, zero, f);
}
/*
Replace (f,g) with (g,g) if b == 1;
replace (f,g) with (f,g) if b == 0.
*
Preconditions: b in {0,1}.
*/
static void
fe25519_cmov(fe25519 f, const fe25519 g, unsigned int b)
{
const uint64_t mask = (uint64_t) (-(int64_t) b);
uint64_t f0 = f[0];
uint64_t f1 = f[1];
uint64_t f2 = f[2];
uint64_t f3 = f[3];
uint64_t f4 = f[4];
uint64_t x0 = f0 ^ g[0];
uint64_t x1 = f1 ^ g[1];
uint64_t x2 = f2 ^ g[2];
uint64_t x3 = f3 ^ g[3];
uint64_t x4 = f4 ^ g[4];
x0 &= mask;
x1 &= mask;
x2 &= mask;
x3 &= mask;
x4 &= mask;
f[0] = f0 ^ x0;
f[1] = f1 ^ x1;
f[2] = f2 ^ x2;
f[3] = f3 ^ x3;
f[4] = f4 ^ x4;
}
/*
Replace (f,g) with (g,f) if b == 1;
replace (f,g) with (f,g) if b == 0.
Preconditions: b in {0,1}.
*/
static void
fe25519_cswap(fe25519 f, fe25519 g, unsigned int b)
{
const uint64_t mask = (uint64_t) (-(int64_t) b);
uint64_t f0 = f[0];
uint64_t f1 = f[1];
uint64_t f2 = f[2];
uint64_t f3 = f[3];
uint64_t f4 = f[4];
uint64_t g0 = g[0];
uint64_t g1 = g[1];
uint64_t g2 = g[2];
uint64_t g3 = g[3];
uint64_t g4 = g[4];
uint64_t x0 = f0 ^ g0;
uint64_t x1 = f1 ^ g1;
uint64_t x2 = f2 ^ g2;
uint64_t x3 = f3 ^ g3;
uint64_t x4 = f4 ^ g4;
x0 &= mask;
x1 &= mask;
x2 &= mask;
x3 &= mask;
x4 &= mask;
f[0] = f0 ^ x0;
f[1] = f1 ^ x1;
f[2] = f2 ^ x2;
f[3] = f3 ^ x3;
f[4] = f4 ^ x4;
g[0] = g0 ^ x0;
g[1] = g1 ^ x1;
g[2] = g2 ^ x2;
g[3] = g3 ^ x3;
g[4] = g4 ^ x4;
}
/*
h = f
*/
static inline void
fe25519_copy(fe25519 h, const fe25519 f)
{
uint64_t f0 = f[0];
uint64_t f1 = f[1];
uint64_t f2 = f[2];
uint64_t f3 = f[3];
uint64_t f4 = f[4];
h[0] = f0;
h[1] = f1;
h[2] = f2;
h[3] = f3;
h[4] = f4;
}
/*
return 1 if f is in {1,3,5,...,q-2}
return 0 if f is in {0,2,4,...,q-1}
*/
static inline int
fe25519_isnegative(const fe25519 f)
{
unsigned char s[32];
fe25519_tobytes(s, f);
return s[0] & 1;
}
/*
return 1 if f == 0
return 0 if f != 0
*/
static inline int
fe25519_iszero(const fe25519 f)
{
unsigned char s[32];
fe25519_tobytes(s, f);
return sodium_is_zero(s, 32);
}
/*
h = f * g
Can overlap h with f or g.
*/
static void
fe25519_mul(fe25519 h, const fe25519 f, const fe25519 g)
{
const uint64_t mask = 0x7ffffffffffffULL;
uint128_t r0, r1, r2, r3, r4, carry;
uint64_t f0, f1, f2, f3, f4;
uint64_t f1_19, f2_19, f3_19, f4_19;
uint64_t g0, g1, g2, g3, g4;
uint64_t r00, r01, r02, r03, r04;
f0 = f[0];
f1 = f[1];
f2 = f[2];
f3 = f[3];
f4 = f[4];
g0 = g[0];
g1 = g[1];
g2 = g[2];
g3 = g[3];
g4 = g[4];
f1_19 = 19ULL * f1;
f2_19 = 19ULL * f2;
f3_19 = 19ULL * f3;
f4_19 = 19ULL * f4;
r0 = ((uint128_t) f0 ) * ((uint128_t) g0);
r0 += ((uint128_t) f1_19) * ((uint128_t) g4);
r0 += ((uint128_t) f2_19) * ((uint128_t) g3);
r0 += ((uint128_t) f3_19) * ((uint128_t) g2);
r0 += ((uint128_t) f4_19) * ((uint128_t) g1);
r1 = ((uint128_t) f0 ) * ((uint128_t) g1);
r1 += ((uint128_t) f1 ) * ((uint128_t) g0);
r1 += ((uint128_t) f2_19) * ((uint128_t) g4);
r1 += ((uint128_t) f3_19) * ((uint128_t) g3);
r1 += ((uint128_t) f4_19) * ((uint128_t) g2);
r2 = ((uint128_t) f0 ) * ((uint128_t) g2);
r2 += ((uint128_t) f1 ) * ((uint128_t) g1);
r2 += ((uint128_t) f2 ) * ((uint128_t) g0);
r2 += ((uint128_t) f3_19) * ((uint128_t) g4);
r2 += ((uint128_t) f4_19) * ((uint128_t) g3);
r3 = ((uint128_t) f0 ) * ((uint128_t) g3);
r3 += ((uint128_t) f1 ) * ((uint128_t) g2);
r3 += ((uint128_t) f2 ) * ((uint128_t) g1);
r3 += ((uint128_t) f3 ) * ((uint128_t) g0);
r3 += ((uint128_t) f4_19) * ((uint128_t) g4);
r4 = ((uint128_t) f0 ) * ((uint128_t) g4);
r4 += ((uint128_t) f1 ) * ((uint128_t) g3);
r4 += ((uint128_t) f2 ) * ((uint128_t) g2);
r4 += ((uint128_t) f3 ) * ((uint128_t) g1);
r4 += ((uint128_t) f4 ) * ((uint128_t) g0);
r00 = ((uint64_t) r0) & mask;
carry = r0 >> 51;
r1 += carry;
r01 = ((uint64_t) r1) & mask;
carry = r1 >> 51;
r2 += carry;
r02 = ((uint64_t) r2) & mask;
carry = r2 >> 51;
r3 += carry;
r03 = ((uint64_t) r3) & mask;
carry = r3 >> 51;
r4 += carry;
r04 = ((uint64_t) r4) & mask;
carry = r4 >> 51;
r00 += 19ULL * (uint64_t) carry;
carry = r00 >> 51;
r00 &= mask;
r01 += (uint64_t) carry;
carry = r01 >> 51;
r01 &= mask;
r02 += (uint64_t) carry;
h[0] = r00;
h[1] = r01;
h[2] = r02;
h[3] = r03;
h[4] = r04;
}
/*
h = f * f
Can overlap h with f.
*/
static void
fe25519_sq(fe25519 h, const fe25519 f)
{
const uint64_t mask = 0x7ffffffffffffULL;
uint128_t r0, r1, r2, r3, r4, carry;
uint64_t f0, f1, f2, f3, f4;
uint64_t f0_2, f1_2, f1_38, f2_38, f3_38, f3_19, f4_19;
uint64_t r00, r01, r02, r03, r04;
f0 = f[0];
f1 = f[1];
f2 = f[2];
f3 = f[3];
f4 = f[4];
f0_2 = f0 << 1;
f1_2 = f1 << 1;
f1_38 = 38ULL * f1;
f2_38 = 38ULL * f2;
f3_38 = 38ULL * f3;
f3_19 = 19ULL * f3;
f4_19 = 19ULL * f4;
r0 = ((uint128_t) f0 ) * ((uint128_t) f0);
r0 += ((uint128_t) f1_38) * ((uint128_t) f4);
r0 += ((uint128_t) f2_38) * ((uint128_t) f3);
r1 = ((uint128_t) f0_2 ) * ((uint128_t) f1);
r1 += ((uint128_t) f2_38) * ((uint128_t) f4);
r1 += ((uint128_t) f3_19) * ((uint128_t) f3);
r2 = ((uint128_t) f0_2 ) * ((uint128_t) f2);
r2 += ((uint128_t) f1 ) * ((uint128_t) f1);
r2 += ((uint128_t) f3_38) * ((uint128_t) f4);
r3 = ((uint128_t) f0_2 ) * ((uint128_t) f3);
r3 += ((uint128_t) f1_2 ) * ((uint128_t) f2);
r3 += ((uint128_t) f4_19) * ((uint128_t) f4);
r4 = ((uint128_t) f0_2 ) * ((uint128_t) f4);
r4 += ((uint128_t) f1_2 ) * ((uint128_t) f3);
r4 += ((uint128_t) f2 ) * ((uint128_t) f2);
r00 = ((uint64_t) r0) & mask;
carry = r0 >> 51;
r1 += carry;
r01 = ((uint64_t) r1) & mask;
carry = r1 >> 51;
r2 += carry;
r02 = ((uint64_t) r2) & mask;
carry = r2 >> 51;
r3 += carry;
r03 = ((uint64_t) r3) & mask;
carry = r3 >> 51;
r4 += carry;
r04 = ((uint64_t) r4) & mask;
carry = r4 >> 51;
r00 += 19ULL * (uint64_t) carry;
carry = r00 >> 51;
r00 &= mask;
r01 += (uint64_t) carry;
carry = r01 >> 51;
r01 &= mask;
r02 += (uint64_t) carry;
h[0] = r00;
h[1] = r01;
h[2] = r02;
h[3] = r03;
h[4] = r04;
}
/*
h = 2 * f * f
Can overlap h with f.
*/
static void
fe25519_sq2(fe25519 h, const fe25519 f)
{
const uint64_t mask = 0x7ffffffffffffULL;
uint128_t r0, r1, r2, r3, r4, carry;
uint64_t f0, f1, f2, f3, f4;
uint64_t f0_2, f1_2, f1_38, f2_38, f3_38, f3_19, f4_19;
uint64_t r00, r01, r02, r03, r04;
f0 = f[0];
f1 = f[1];
f2 = f[2];
f3 = f[3];
f4 = f[4];
f0_2 = f0 << 1;
f1_2 = f1 << 1;
f1_38 = 38ULL * f1;
f2_38 = 38ULL * f2;
f3_38 = 38ULL * f3;
f3_19 = 19ULL * f3;
f4_19 = 19ULL * f4;
r0 = ((uint128_t) f0 ) * ((uint128_t) f0);
r0 += ((uint128_t) f1_38) * ((uint128_t) f4);
r0 += ((uint128_t) f2_38) * ((uint128_t) f3);
r1 = ((uint128_t) f0_2 ) * ((uint128_t) f1);
r1 += ((uint128_t) f2_38) * ((uint128_t) f4);
r1 += ((uint128_t) f3_19) * ((uint128_t) f3);
r2 = ((uint128_t) f0_2 ) * ((uint128_t) f2);
r2 += ((uint128_t) f1 ) * ((uint128_t) f1);
r2 += ((uint128_t) f3_38) * ((uint128_t) f4);
r3 = ((uint128_t) f0_2 ) * ((uint128_t) f3);
r3 += ((uint128_t) f1_2 ) * ((uint128_t) f2);
r3 += ((uint128_t) f4_19) * ((uint128_t) f4);
r4 = ((uint128_t) f0_2 ) * ((uint128_t) f4);
r4 += ((uint128_t) f1_2 ) * ((uint128_t) f3);
r4 += ((uint128_t) f2 ) * ((uint128_t) f2);
r0 <<= 1;
r1 <<= 1;
r2 <<= 1;
r3 <<= 1;
r4 <<= 1;
r00 = ((uint64_t) r0) & mask;
carry = r0 >> 51;
r1 += carry;
r01 = ((uint64_t) r1) & mask;
carry = r1 >> 51;
r2 += carry;
r02 = ((uint64_t) r2) & mask;
carry = r2 >> 51;
r3 += carry;
r03 = ((uint64_t) r3) & mask;
carry = r3 >> 51;
r4 += carry;
r04 = ((uint64_t) r4) & mask;
carry = r4 >> 51;
r00 += 19ULL * (uint64_t) carry;
carry = r00 >> 51;
r00 &= mask;
r01 += (uint64_t) carry;
carry = r01 >> 51;
r01 &= mask;
r02 += (uint64_t) carry;
h[0] = r00;
h[1] = r01;
h[2] = r02;
h[3] = r03;
h[4] = r04;
}
static void
fe25519_scalar_product(fe25519 h, const fe25519 f, uint32_t n)
{
const uint64_t mask = 0x7ffffffffffffULL;
uint128_t a;
uint128_t sn = (uint128_t) n;
uint64_t h0, h1, h2, h3, h4;
a = f[0] * sn;
h0 = ((uint64_t) a) & mask;
a = f[1] * sn + ((uint64_t) (a >> 51));
h1 = ((uint64_t) a) & mask;
a = f[2] * sn + ((uint64_t) (a >> 51));
h2 = ((uint64_t) a) & mask;
a = f[3] * sn + ((uint64_t) (a >> 51));
h3 = ((uint64_t) a) & mask;
a = f[4] * sn + ((uint64_t) (a >> 51));
h4 = ((uint64_t) a) & mask;
h0 += (a >> 51) * 19ULL;
h[0] = h0;
h[1] = h1;
h[2] = h2;
h[3] = h3;
h[4] = h4;
}

@ -0,0 +1,11 @@
#ifndef implementations_H
#define implementations_H
int _crypto_generichash_blake2b_pick_best_implementation(void);
int _crypto_onetimeauth_poly1305_pick_best_implementation(void);
int _crypto_pwhash_argon2_pick_best_implementation(void);
int _crypto_scalarmult_curve25519_pick_best_implementation(void);
int _crypto_stream_chacha20_pick_best_implementation(void);
int _crypto_stream_salsa20_pick_best_implementation(void);
#endif

@ -0,0 +1,7 @@
#ifndef mutex_H
#define mutex_H 1
extern int sodium_crit_enter(void);
extern int sodium_crit_leave(void);
#endif

@ -0,0 +1,50 @@
#ifndef sse2_64_32_H
#define sse2_64_32_H 1
#include "common.h"
#ifdef HAVE_INTRIN_H
# include <intrin.h>
#endif
#if defined(HAVE_EMMINTRIN_H) && \
!(defined(__amd64) || defined(__amd64__) || defined(__x86_64__) || \
defined(_M_X64) || defined(_M_AMD64))
# include <emmintrin.h>
# include <stdint.h>
# ifndef _mm_set_epi64x
# define _mm_set_epi64x(Q0, Q1) sodium__mm_set_epi64x((Q0), (Q1))
static inline __m128i
sodium__mm_set_epi64x(int64_t q1, int64_t q0)
{
union { int64_t as64; int32_t as32[2]; } x0, x1;
x0.as64 = q0; x1.as64 = q1;
return _mm_set_epi32(x1.as32[1], x1.as32[0], x0.as32[1], x0.as32[0]);
}
# endif
# ifndef _mm_set1_epi64x
# define _mm_set1_epi64x(Q) sodium__mm_set1_epi64x(Q)
static inline __m128i
sodium__mm_set1_epi64x(int64_t q)
{
return _mm_set_epi64x(q, q);
}
# endif
# ifndef _mm_cvtsi64_si128
# define _mm_cvtsi64_si128(Q) sodium__mm_cvtsi64_si128(Q)
static inline __m128i
sodium__mm_cvtsi64_si128(int64_t q)
{
union { int64_t as64; int32_t as32[2]; } x;
x.as64 = q;
return _mm_setr_epi32(x.as32[0], x.as32[1], 0, 0);
}
# endif
#endif
#endif

@ -0,0 +1,83 @@
#ifndef randombytes_H
#define randombytes_H
#include <stddef.h>
#include <stdint.h>
#include <sys/types.h>
#include <sodium/export.h>
#ifdef __cplusplus
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wlong-long"
#endif
extern "C"
{
#endif
typedef struct randombytes_implementation
{
const char *(*implementation_name)(void); /* required */
uint32_t (*random)(void); /* required */
void (*stir)(void); /* optional */
uint32_t (*uniform)(
const uint32_t upper_bound); /* optional, a default implementation will
be used if NULL */
void (*buf)(void *const buf, const size_t size); /* required */
int (*close)(void); /* optional */
} randombytes_implementation;
#define randombytes_BYTES_MAX SODIUM_MIN(SODIUM_SIZE_MAX, 0xffffffffUL)
#define randombytes_SEEDBYTES 32U
SODIUM_EXPORT
size_t
randombytes_seedbytes(void);
SODIUM_EXPORT
void
randombytes_buf(void *const buf, const size_t size);
SODIUM_EXPORT
void
randombytes_buf_deterministic(
void *const buf, const size_t size,
const unsigned char seed[randombytes_SEEDBYTES]);
SODIUM_EXPORT
uint32_t
randombytes_random(void);
SODIUM_EXPORT
uint32_t
randombytes_uniform(const uint32_t upper_bound);
SODIUM_EXPORT
void
randombytes_stir(void);
SODIUM_EXPORT
int
randombytes_close(void);
SODIUM_EXPORT
int
randombytes_set_implementation(randombytes_implementation *impl);
SODIUM_EXPORT
const char *
randombytes_implementation_name(void);
/* -- NaCl compatibility interface -- */
SODIUM_EXPORT
void
randombytes(unsigned char *const buf, const unsigned long long buf_len);
#ifdef __cplusplus
}
#endif
#endif

@ -0,0 +1,23 @@
#ifndef randombytes_nativeclient_H
#define randombytes_nativeclient_H
#ifdef __native_client__
# include "export.h"
# include "randombytes.h"
# ifdef __cplusplus
extern "C" {
# endif
SODIUM_EXPORT
extern struct randombytes_implementation randombytes_nativeclient_implementation;
# ifdef __cplusplus
}
# endif
#endif
#endif

@ -0,0 +1,20 @@
#ifndef randombytes_salsa20_random_H
#define randombytes_salsa20_random_H
#include <sodium/export.h>
#include <sodium/randombytes.h>
#ifdef __cplusplus
extern "C"
{
#endif
SODIUM_EXPORT
extern struct randombytes_implementation randombytes_salsa20_implementation;
#ifdef __cplusplus
}
#endif
#endif

@ -0,0 +1,20 @@
#ifndef randombytes_sysrandom_H
#define randombytes_sysrandom_H
#include <sodium/export.h>
#include <sodium/randombytes.h>
#ifdef __cplusplus
extern "C"
{
#endif
SODIUM_EXPORT
extern struct randombytes_implementation randombytes_sysrandom_implementation;
#ifdef __cplusplus
}
#endif
#endif

@ -1 +0,0 @@
#include <sodium/crypto_hash_sha512.h>

@ -1 +0,0 @@
#include <sodium/crypto_verify_32.h>

@ -1 +0,0 @@
#include <sodium/randombytes.h>

@ -7,18 +7,15 @@
#include "mod3.h"
#include "rq.h"
#include "r3.h"
#include "crypto_hash_sha512.h"
#include "crypto_verify_32.h"
#include "crypto_kem.h"
#include <sodium/crypto_hash_sha512.h>
#include <sodium/crypto_verify_32.h>
#include <sodium/crypto_kem.h>
int crypto_kem_dec_avx2(
unsigned char *k,
const unsigned char *cstr,
const unsigned char *sk
)
int
crypto_kem_dec_avx2(unsigned char *k, const unsigned char *cstr,
const unsigned char *sk)
{
#if __AVX2__
#if __AVX2__
small f[768];
modq h[768];
small grecip[768];
@ -32,41 +29,45 @@ int crypto_kem_dec_avx2(
int i;
int result = 0;
small_decode(f,sk);
small_decode(grecip,sk + small_encode_len);
rq_decode(h,sk + 2 * small_encode_len);
small_decode(f, sk);
small_decode(grecip, sk + small_encode_len);
rq_decode(h, sk + 2 * small_encode_len);
rq_decoderounded(c,cstr + 32);
rq_decoderounded(c, cstr + 32);
rq_mult(t,c,f);
rq_mod3(t3,t);
rq_mult(t, c, f);
rq_mod3(t3, t);
r3_mult(r,t3,grecip);
r3_mult(r, t3, grecip);
#ifdef KAT
{
int j;
printf("decrypt r:");
for (j = 0;j < p;++j)
if (r[j] == 1) printf(" +%d",j);
else if (r[j] == -1) printf(" -%d",j);
for(j = 0; j < p; ++j)
if(r[j] == 1)
printf(" +%d", j);
else if(r[j] == -1)
printf(" -%d", j);
printf("\n");
}
#endif
result |= r3_weightw_mask(r);
rq_mult(hr,h,r);
rq_round3(hr,hr);
for (i = 0;i < p;++i) result |= modq_nonzero_mask(hr[i] - c[i]);
rq_mult(hr, h, r);
rq_round3(hr, hr);
for(i = 0; i < p; ++i)
result |= modq_nonzero_mask(hr[i] - c[i]);
small_encode(rstr,r);
crypto_hash_sha512(hash,rstr,sizeof rstr);
result |= crypto_verify_32(hash,cstr);
small_encode(rstr, r);
crypto_hash_sha512(hash, rstr, sizeof rstr);
result |= crypto_verify_32(hash, cstr);
for (i = 0;i < 32;++i) k[i] = (hash[32 + i] & ~result);
for(i = 0; i < 32; ++i)
k[i] = (hash[32 + i] & ~result);
return result;
#else
#else
return -1;
#endif
#endif
}

@ -6,14 +6,12 @@
#include "params.h"
#include "small.h"
#include "rq.h"
#include "crypto_hash_sha512.h"
#include "crypto_kem.h"
int crypto_kem_enc_avx2(
unsigned char *cstr,
unsigned char *k,
const unsigned char *pk
)
#include <sodium/crypto_hash_sha512.h>
#include <sodium/crypto_kem.h>
int
crypto_kem_enc_avx2(unsigned char *cstr, unsigned char *k,
const unsigned char *pk)
{
#if __AVX2__
small r[768];
@ -28,25 +26,27 @@ int crypto_kem_enc_avx2(
{
int i;
printf("encrypt r:");
for (i = 0;i < p;++i)
if (r[i] == 1) printf(" +%d",i);
else if (r[i] == -1) printf(" -%d",i);
for(i = 0; i < p; ++i)
if(r[i] == 1)
printf(" +%d", i);
else if(r[i] == -1)
printf(" -%d", i);
printf("\n");
}
#endif
small_encode(rstr,r);
crypto_hash_sha512(hash,rstr,sizeof rstr);
small_encode(rstr, r);
crypto_hash_sha512(hash, rstr, sizeof rstr);
rq_decode(h,pk);
rq_mult(c,h,r);
rq_decode(h, pk);
rq_mult(c, h, r);
memcpy(k,hash + 32,32);
memcpy(cstr,hash,32);
rq_roundencode(cstr + 32,c);
memcpy(k, hash + 32, 32);
memcpy(cstr, hash, 32);
rq_roundencode(cstr + 32, c);
return 0;
#else
return -1;
return -1;
#endif
}

@ -1,9 +1,10 @@
#ifndef int32_sort_h
#define int32_sort_h
#include "crypto_int32.h"
#include <sodium/crypto_int32.h>
#define int32_sort crypto_kem_sntrup4591761_avx_int32_sort
extern void int32_sort(crypto_int32 *,int);
extern void
int32_sort(crypto_int32 *, int);
#endif

@ -4,16 +4,18 @@
#include "r3.h"
#include "small.h"
#include "rq.h"
#include "crypto_kem.h"
#include <sodium/crypto_kem.h>
#if crypto_kem_PUBLICKEYBYTES != rq_encode_len
#error "crypto_kem_PUBLICKEYBYTES must match rq_encode_len"
#endif
#if crypto_kem_SECRETKEYBYTES != rq_encode_len + 2 * small_encode_len
#error "crypto_kem_SECRETKEYBYTES must match rq_encode_len + 2 * small_encode_len"
#error \
"crypto_kem_SECRETKEYBYTES must match rq_encode_len + 2 * small_encode_len"
#endif
int crypto_kem_keypair_avx2(unsigned char *pk,unsigned char *sk)
int
crypto_kem_keypair_avx2(unsigned char *pk, unsigned char *sk)
{
#if __AVX2__
small g[768];
@ -24,17 +26,17 @@ int crypto_kem_keypair_avx2(unsigned char *pk,unsigned char *sk)
do
small_random(g);
while (r3_recip(grecip,g) != 0);
while(r3_recip(grecip, g) != 0);
small_random_weightw(f);
rq_recip3(f3recip,f);
rq_recip3(f3recip, f);
rq_mult(h,f3recip,g);
rq_mult(h, f3recip, g);
rq_encode(pk,h);
small_encode(sk,f);
small_encode(sk + small_encode_len,grecip);
memcpy(sk + 2 * small_encode_len,pk,rq_encode_len);
rq_encode(pk, h);
small_encode(sk, f);
small_encode(sk + small_encode_len, grecip);
memcpy(sk + 2 * small_encode_len, pk, rq_encode_len);
return 0;
#else

@ -2,24 +2,27 @@
#define mod3_h
#include "small.h"
#include "crypto_int32.h"
#include <sodium/crypto_int32.h>
/* -1 if x is nonzero, 0 otherwise */
static inline int mod3_nonzero_mask(small x)
static inline int
mod3_nonzero_mask(small x)
{
return -x*x;
return -x * x;
}
/* input between -100000 and 100000 */
/* output between -1 and 1 */
static inline small mod3_freeze(crypto_int32 a)
static inline small
mod3_freeze(crypto_int32 a)
{
a -= 3 * ((10923 * a) >> 15);
a -= 3 * ((89478485 * a + 134217728) >> 28);
return a;
}
static inline small mod3_minusproduct(small a,small b,small c)
static inline small
mod3_minusproduct(small a, small b, small c)
{
crypto_int32 A = a;
crypto_int32 B = b;
@ -27,7 +30,8 @@ static inline small mod3_minusproduct(small a,small b,small c)
return mod3_freeze(A - B * C);
}
static inline small mod3_plusproduct(small a,small b,small c)
static inline small
mod3_plusproduct(small a, small b, small c)
{
crypto_int32 A = a;
crypto_int32 B = b;
@ -35,26 +39,30 @@ static inline small mod3_plusproduct(small a,small b,small c)
return mod3_freeze(A + B * C);
}
static inline small mod3_product(small a,small b)
static inline small
mod3_product(small a, small b)
{
return a * b;
}
static inline small mod3_sum(small a,small b)
static inline small
mod3_sum(small a, small b)
{
crypto_int32 A = a;
crypto_int32 B = b;
return mod3_freeze(A + B);
}
static inline small mod3_reciprocal(small a1)
static inline small
mod3_reciprocal(small a1)
{
return a1;
}
static inline small mod3_quotient(small num,small den)
static inline small
mod3_quotient(small num, small den)
{
return mod3_product(num,mod3_reciprocal(den));
return mod3_product(num, mod3_reciprocal(den));
}
#endif

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save