mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-11 02:15:45 +02:00
feat: add s390x architecture support (#1214)
* fix(lua): use native architecture when compiling lua for s390x.
Signed-off-by: iko1 <me@remotecpp.dev>
* feat(server): implement CompareFP for s390x architecture.
Signed-off-by: iko1 <me@remotecpp.dev>
* feat: implement validate_ascii_fast function variant for s390x arch.
Signed-off-by: iko1 <me@remotecpp.dev>
* fix: add comments before s390x vector operations
Signed-off-by: iko1 <me@remotecpp.dev>
* fix validate_ascii_fast function logic after CR comment
Signed-off-by: iko1 <me@remotecpp.dev>
* Revert "fix(lua): use native architecture when compiling lua for s390x."
This reverts commit 6cc5d8a8ed
.
* fix(lua): use native architecture when compiling lua for s390x.
Signed-off-by: iko1 <me@remotecpp.dev>
* refactor validate_ascii_fast function after CR comment
Signed-off-by: iko1 <me@remotecpp.dev>
* include vecintrin.h from sse_port.h rather the misleading filename
Signed-off-by: iko1 <me@remotecpp.dev>
---------
Signed-off-by: iko1 <me@remotecpp.dev>
This commit is contained in:
parent
6d4d740d6e
commit
19d7622280
4 changed files with 74 additions and 2 deletions
|
@ -12,10 +12,10 @@ index d42d14b7..75647e72 100644
|
||||||
#define LUAI_MAXSTACK 15000
|
#define LUAI_MAXSTACK 15000
|
||||||
#endif
|
#endif
|
||||||
diff --git a/makefile b/makefile
|
diff --git a/makefile b/makefile
|
||||||
index d46e650c..e347e614 100644
|
index d46e650c..c27e5677 100644
|
||||||
--- a/makefile
|
--- a/makefile
|
||||||
+++ b/makefile
|
+++ b/makefile
|
||||||
@@ -66,13 +66,23 @@ LOCAL = $(TESTS) $(CWARNS)
|
@@ -66,13 +66,25 @@ LOCAL = $(TESTS) $(CWARNS)
|
||||||
|
|
||||||
|
|
||||||
# enable Linux goodies
|
# enable Linux goodies
|
||||||
|
@ -32,6 +32,8 @@ index d46e650c..e347e614 100644
|
||||||
+OPTFLAGS= -march=sandybridge
|
+OPTFLAGS= -march=sandybridge
|
||||||
+else ifeq ($(uname_m), aarch64)
|
+else ifeq ($(uname_m), aarch64)
|
||||||
+OPTFLAGS= -march=armv8.2-a+fp16+rcpc+dotprod+crypto
|
+OPTFLAGS= -march=armv8.2-a+fp16+rcpc+dotprod+crypto
|
||||||
|
+else ifeq ($(uname_m), s390x)
|
||||||
|
+OPTFLAGS= -march=native
|
||||||
+else
|
+else
|
||||||
+ $(error ERROR: unknown architecture $(uname_m))
|
+ $(error ERROR: unknown architecture $(uname_m))
|
||||||
+endif
|
+endif
|
||||||
|
|
|
@ -879,6 +879,34 @@ unsigned BucketBase<NUM_SLOTS, NUM_OVR>::UnsetStashPtr(uint8_t fp_hash, unsigned
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __s390x__
|
||||||
|
template <unsigned NUM_SLOTS, unsigned NUM_OVR>
|
||||||
|
uint32_t BucketBase<NUM_SLOTS, NUM_OVR>::CompareFP(uint8_t fp) const {
|
||||||
|
static_assert(FpArray{}.size() <= 16);
|
||||||
|
vector unsigned char v1;
|
||||||
|
|
||||||
|
// Replicate 16 times fp to key_data.
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
v1[i] = fp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loads 16 bytes of src into seg_data.
|
||||||
|
vector unsigned char v2 = vec_load_len(finger_arr_.data(), 16);
|
||||||
|
|
||||||
|
// compare 1-byte vectors seg_data and key_data, dst[i] := ( a[i] == b[i] ) ? 0xFF : 0.
|
||||||
|
vector bool char rv_mask = vec_cmpeq(v1, v2);
|
||||||
|
|
||||||
|
// collapses 16 msb bits from each byte in rv_mask into mask.
|
||||||
|
int mask = 0;
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
if (rv_mask[i]) {
|
||||||
|
mask |= 1 << i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
|
#else
|
||||||
template <unsigned NUM_SLOTS, unsigned NUM_OVR>
|
template <unsigned NUM_SLOTS, unsigned NUM_OVR>
|
||||||
uint32_t BucketBase<NUM_SLOTS, NUM_OVR>::CompareFP(uint8_t fp) const {
|
uint32_t BucketBase<NUM_SLOTS, NUM_OVR>::CompareFP(uint8_t fp) const {
|
||||||
static_assert(FpArray{}.size() <= 16);
|
static_assert(FpArray{}.size() <= 16);
|
||||||
|
@ -898,6 +926,7 @@ uint32_t BucketBase<NUM_SLOTS, NUM_OVR>::CompareFP(uint8_t fp) const {
|
||||||
// Note: Last 2 operations can be combined in skylake with _mm_cmpeq_epi8_mask.
|
// Note: Last 2 operations can be combined in skylake with _mm_cmpeq_epi8_mask.
|
||||||
return mask;
|
return mask;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Bucket slot array goes from left to right: [x, x, ...]
|
// Bucket slot array goes from left to right: [x, x, ...]
|
||||||
// Shift right vacates the first slot on the left by shifting all the elements right and
|
// Shift right vacates the first slot on the left by shifting all the elements right and
|
||||||
|
|
|
@ -104,6 +104,42 @@ static inline pair<const char*, uint8_t*> simd_variant2_pack(const char* ascii,
|
||||||
// See https://github.com/lemire/fastvalidate-utf-8/
|
// See https://github.com/lemire/fastvalidate-utf-8/
|
||||||
// The function returns true (1) if all chars passed in src are
|
// The function returns true (1) if all chars passed in src are
|
||||||
// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
|
// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
|
||||||
|
#ifdef __s390x__
|
||||||
|
bool validate_ascii_fast(const char* src, size_t len) {
|
||||||
|
size_t i = 0;
|
||||||
|
|
||||||
|
// Initialize a vector in which all the elements are set to zero.
|
||||||
|
vector unsigned char has_error = vec_splat_s8(0);
|
||||||
|
if (len >= 16) {
|
||||||
|
for (; i <= len - 16; i += 16) {
|
||||||
|
// Load 16 bytes from buffer into a vector.
|
||||||
|
vector unsigned char current_bytes = vec_load_len((signed char*)(src + i), 16);
|
||||||
|
// Perform a bitwise OR operation between the current and the previously loaded contents.
|
||||||
|
has_error = vec_orc(has_error, current_bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize a vector in which all the elements are set to an invalid ASCII value.
|
||||||
|
vector unsigned char rep_invalid_values = vec_splat_s8(0x80);
|
||||||
|
|
||||||
|
// Perform bitwise AND-complement operation between two vectors.
|
||||||
|
vector unsigned char andc_result = vec_andc(rep_invalid_values, has_error);
|
||||||
|
|
||||||
|
// Tests whether any of corresponding elements of the given vectors are not equal.
|
||||||
|
// After the bitwise operation, both vectors should be equal if ASCII values.
|
||||||
|
if (!vec_all_eq(rep_invalid_values, andc_result)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < len; i++) {
|
||||||
|
if (src[i] & 0x80) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#else
|
||||||
bool validate_ascii_fast(const char* src, size_t len) {
|
bool validate_ascii_fast(const char* src, size_t len) {
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
__m128i has_error = _mm_setzero_si128();
|
__m128i has_error = _mm_setzero_si128();
|
||||||
|
@ -123,6 +159,7 @@ bool validate_ascii_fast(const char* src, size_t len) {
|
||||||
|
|
||||||
return !error_mask;
|
return !error_mask;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// len must be at least 16
|
// len must be at least 16
|
||||||
void ascii_pack(const char* ascii, size_t len, uint8_t* bin) {
|
void ascii_pack(const char* ascii, size_t len, uint8_t* bin) {
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#if defined(__aarch64__)
|
#if defined(__aarch64__)
|
||||||
#include "base/sse2neon.h"
|
#include "base/sse2neon.h"
|
||||||
|
#elif defined(__s390x__)
|
||||||
|
#include <vecintrin.h>
|
||||||
#else
|
#else
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#include <tmmintrin.h>
|
#include <tmmintrin.h>
|
||||||
|
@ -12,6 +14,7 @@
|
||||||
|
|
||||||
namespace dfly {
|
namespace dfly {
|
||||||
|
|
||||||
|
#ifndef __s390x__
|
||||||
inline __m128i mm_loadu_si128(const __m128i* ptr) {
|
inline __m128i mm_loadu_si128(const __m128i* ptr) {
|
||||||
#if defined(__aarch64__)
|
#if defined(__aarch64__)
|
||||||
__m128i res;
|
__m128i res;
|
||||||
|
@ -22,5 +25,6 @@ inline __m128i mm_loadu_si128(const __m128i* ptr) {
|
||||||
return _mm_loadu_si128(ptr);
|
return _mm_loadu_si128(ptr);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace dfly
|
} // namespace dfly
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue