mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-10 18:05:44 +02:00
bug(sse2neon): fix sanitizer crash in _mm_loadu_si128 (#1037)
* bug(sse2neon): fix sanitizer crash in _mm_loadu_si128 Signed-off-by: adi_holden <adi@dragonflydb.io> * remove static Signed-off-by: adi_holden <adi@dragonflydb.io> --------- Signed-off-by: adi_holden <adi@dragonflydb.io>
This commit is contained in:
parent
7887327fd4
commit
8600eacdc4
3 changed files with 35 additions and 18 deletions
|
@ -13,11 +13,7 @@
|
|||
#include <cstring>
|
||||
#include <functional>
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#include "base/sse2neon.h"
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#include "core/sse_port.h"
|
||||
|
||||
namespace dfly {
|
||||
namespace detail {
|
||||
|
@ -891,7 +887,7 @@ uint32_t BucketBase<NUM_SLOTS, NUM_OVR>::CompareFP(uint8_t fp) const {
|
|||
const __m128i key_data = _mm_set1_epi8(fp);
|
||||
|
||||
// Loads 16 bytes of src into seg_data.
|
||||
__m128i seg_data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(finger_arr_.data()));
|
||||
__m128i seg_data = mm_loadu_si128(reinterpret_cast<const __m128i*>(finger_arr_.data()));
|
||||
|
||||
// compare 16-byte vectors seg_data and key_data, dst[i] := ( a[i] == b[i] ) ? 0xFF : 0.
|
||||
__m128i rv_mask = _mm_cmpeq_epi8(seg_data, key_data);
|
||||
|
|
|
@ -4,16 +4,11 @@
|
|||
|
||||
#include "src/core/detail/bitpacking.h"
|
||||
|
||||
#include "base/logging.h"
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#include "base/sse2neon.h"
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
#include <absl/base/internal/endian.h>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "core/sse_port.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace dfly {
|
||||
|
@ -43,7 +38,7 @@ static inline pair<const char*, uint8_t*> simd_variant1_pack(const char* ascii,
|
|||
|
||||
// Based on the question I asked here: https://stackoverflow.com/q/74831843/2280111
|
||||
while (ascii <= end) {
|
||||
val = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ascii));
|
||||
val = mm_loadu_si128(reinterpret_cast<const __m128i*>(ascii));
|
||||
|
||||
/*
|
||||
x = ((x & 0x7F007F007F007F00) >> 1) | (x & 0x007F007F007F007F);
|
||||
|
@ -81,7 +76,7 @@ static inline pair<const char*, uint8_t*> simd_variant2_pack(const char* ascii,
|
|||
|
||||
// Based on the question I asked here: https://stackoverflow.com/q/74831843/2280111
|
||||
while (ascii <= end) {
|
||||
val = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ascii));
|
||||
val = mm_loadu_si128(reinterpret_cast<const __m128i*>(ascii));
|
||||
|
||||
/*
|
||||
x = ((x & 0x7F007F007F007F00) >> 1) | (x & 0x007F007F007F007F);
|
||||
|
@ -114,7 +109,7 @@ bool validate_ascii_fast(const char* src, size_t len) {
|
|||
__m128i has_error = _mm_setzero_si128();
|
||||
if (len >= 16) {
|
||||
for (; i <= len - 16; i += 16) {
|
||||
__m128i current_bytes = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
__m128i current_bytes = mm_loadu_si128((const __m128i*)(src + i));
|
||||
has_error = _mm_or_si128(has_error, current_bytes);
|
||||
}
|
||||
}
|
||||
|
@ -249,7 +244,7 @@ void ascii_unpack_simd(const uint8_t* bin, size_t ascii_len, char* ascii) {
|
|||
const __m128i control = _mm_set_epi8(14, 13, 12, 11, 10, 9, 8, 7, -1, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
||||
while (ascii < end) {
|
||||
val = _mm_loadu_si128(reinterpret_cast<const __m128i*>(bin));
|
||||
val = mm_loadu_si128(reinterpret_cast<const __m128i*>(bin));
|
||||
val = _mm_shuffle_epi8(val, control);
|
||||
|
||||
rpart = _mm_and_si128(val, _mm_set1_epi64x(0x000000000FFFFFFF));
|
||||
|
|
26
src/core/sse_port.h
Normal file
26
src/core/sse_port.h
Normal file
|
@ -0,0 +1,26 @@
|
|||
// Copyright 2023, DragonflyDB authors. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
#if defined(__aarch64__)
|
||||
#include "base/sse2neon.h"
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
namespace dfly {
|
||||
|
||||
inline __m128i mm_loadu_si128(const __m128i* ptr) {
|
||||
#if defined(__aarch64__)
|
||||
__m128i res;
|
||||
memcpy(&res, ptr, sizeof(res));
|
||||
return res;
|
||||
// return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *) p));
|
||||
#else
|
||||
return _mm_loadu_si128(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace dfly
|
Loading…
Add table
Add a link
Reference in a new issue