diff --git a/src/core/dash_internal.h b/src/core/dash_internal.h index 96db5d53a..f57ed2bdd 100644 --- a/src/core/dash_internal.h +++ b/src/core/dash_internal.h @@ -13,11 +13,7 @@ #include #include -#if defined(__aarch64__) -#include "base/sse2neon.h" -#else -#include -#endif +#include "core/sse_port.h" namespace dfly { namespace detail { @@ -891,7 +887,7 @@ uint32_t BucketBase::CompareFP(uint8_t fp) const { const __m128i key_data = _mm_set1_epi8(fp); // Loads 16 bytes of src into seg_data. - __m128i seg_data = _mm_loadu_si128(reinterpret_cast(finger_arr_.data())); + __m128i seg_data = mm_loadu_si128(reinterpret_cast(finger_arr_.data())); // compare 16-byte vectors seg_data and key_data, dst[i] := ( a[i] == b[i] ) ? 0xFF : 0. __m128i rv_mask = _mm_cmpeq_epi8(seg_data, key_data); diff --git a/src/core/detail/bitpacking.cc b/src/core/detail/bitpacking.cc index 227ee0eec..3f0438971 100644 --- a/src/core/detail/bitpacking.cc +++ b/src/core/detail/bitpacking.cc @@ -4,16 +4,11 @@ #include "src/core/detail/bitpacking.h" -#include "base/logging.h" - -#if defined(__aarch64__) -#include "base/sse2neon.h" -#else -#include -#include -#endif #include +#include "base/logging.h" +#include "core/sse_port.h" + using namespace std; namespace dfly { @@ -43,7 +38,7 @@ static inline pair simd_variant1_pack(const char* ascii, // Based on the question I asked here: https://stackoverflow.com/q/74831843/2280111 while (ascii <= end) { - val = _mm_loadu_si128(reinterpret_cast(ascii)); + val = mm_loadu_si128(reinterpret_cast(ascii)); /* x = ((x & 0x7F007F007F007F00) >> 1) | (x & 0x007F007F007F007F); @@ -81,7 +76,7 @@ static inline pair simd_variant2_pack(const char* ascii, // Based on the question I asked here: https://stackoverflow.com/q/74831843/2280111 while (ascii <= end) { - val = _mm_loadu_si128(reinterpret_cast(ascii)); + val = mm_loadu_si128(reinterpret_cast(ascii)); /* x = ((x & 0x7F007F007F007F00) >> 1) | (x & 0x007F007F007F007F); @@ -114,7 +109,7 @@ bool validate_ascii_fast(const char* src, size_t len) { __m128i has_error = _mm_setzero_si128(); if (len >= 16) { for (; i <= len - 16; i += 16) { - __m128i current_bytes = _mm_loadu_si128((const __m128i*)(src + i)); + __m128i current_bytes = mm_loadu_si128((const __m128i*)(src + i)); has_error = _mm_or_si128(has_error, current_bytes); } } @@ -249,7 +244,7 @@ void ascii_unpack_simd(const uint8_t* bin, size_t ascii_len, char* ascii) { const __m128i control = _mm_set_epi8(14, 13, 12, 11, 10, 9, 8, 7, -1, 6, 5, 4, 3, 2, 1, 0); while (ascii < end) { - val = _mm_loadu_si128(reinterpret_cast(bin)); + val = mm_loadu_si128(reinterpret_cast(bin)); val = _mm_shuffle_epi8(val, control); rpart = _mm_and_si128(val, _mm_set1_epi64x(0x000000000FFFFFFF)); diff --git a/src/core/sse_port.h b/src/core/sse_port.h new file mode 100644 index 000000000..692c24963 --- /dev/null +++ b/src/core/sse_port.h @@ -0,0 +1,26 @@ +// Copyright 2023, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#pragma once +#if defined(__aarch64__) +#include "base/sse2neon.h" +#else +#include +#include +#endif + +namespace dfly { + +inline __m128i mm_loadu_si128(const __m128i* ptr) { +#if defined(__aarch64__) + __m128i res; + memcpy(&res, ptr, sizeof(res)); + return res; +// return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *) p)); +#else + return _mm_loadu_si128(ptr); +#endif +} + +} // namespace dfly