feat (hset): Support arguments (count, withvalues) in HRANDFIELD (#1804)

feat (hset): Support arguments (count, withvalues) in HRANDFIELD

fixes #858
This commit is contained in:
Yue Li 2023-09-06 02:04:36 -07:00 committed by GitHub
parent 4e393cf742
commit a8e4bebffe
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 276 additions and 16 deletions

View file

@ -101,6 +101,77 @@ sds StringMap::Find(std::string_view key) {
return GetValue(str);
}
std::pair<sds, sds> StringMap::RandomPair() {
auto it = begin();
it += rand() % Size();
return std::make_pair(it->first, it->second);
}
void StringMap::RandomPairsUnique(unsigned int count, std::vector<sds>& keys,
std::vector<sds>& vals, bool with_value) {
unsigned int total_size = Size();
unsigned int index = 0;
if (count > total_size)
count = total_size;
auto itr = begin();
uint32_t picked = 0, remaining = count;
while (picked < count && itr != end()) {
double random_double = ((double)rand()) / RAND_MAX;
double threshold = ((double)remaining) / (total_size - index);
if (random_double <= threshold) {
keys.push_back(itr->first);
if (with_value) {
vals.push_back(itr->second);
}
remaining--;
picked++;
}
++itr;
index++;
}
DCHECK(keys.size() == count);
if (with_value)
DCHECK(vals.size() == count);
}
void StringMap::RandomPairs(unsigned int count, std::vector<sds>& keys, std::vector<sds>& vals,
bool with_value) {
using RandomPick = std::pair<unsigned int, unsigned int>;
std::vector<RandomPick> picks;
unsigned int total_size = Size();
for (unsigned int i = 0; i < count; ++i) {
RandomPick pick{rand() % total_size, i};
picks.push_back(pick);
}
std::sort(picks.begin(), picks.end(), [](auto& x, auto& y) { return x.first < y.first; });
unsigned int index = picks[0].first, pick_index = 0;
auto itr = begin();
for (unsigned int i = 0; i < index; ++i)
++itr;
keys.reserve(count);
if (with_value)
vals.reserve(count);
while (itr != end() && pick_index < count) {
auto [key, val] = *itr;
while (pick_index < count && index == picks[pick_index].first) {
int store_order = picks[pick_index].second;
keys[store_order] = key;
if (with_value)
vals[store_order] = val;
++pick_index;
}
++index;
++itr;
}
}
pair<sds, bool> StringMap::ReallocIfNeeded(void* obj, float ratio) {
sds key = (sds)obj;
size_t key_len = sdslen(key);

View file

@ -81,6 +81,12 @@ class StringMap : public DenseSet {
return *this;
}
iterator& operator+=(unsigned int n) {
for (unsigned int i = 0; i < n; ++i)
Advance();
return *this;
}
bool operator==(const iterator& b) const {
return curr_list_ == b.curr_list_;
}
@ -117,6 +123,21 @@ class StringMap : public DenseSet {
return iterator{this, true};
}
// Returns a random key value pair.
// Returns key only if value is a nullptr.
std::pair<sds, sds> RandomPair();
// Randomly selects count of key value pairs. The selections are unique.
// if count is larger than the total number of key value pairs, returns
// every pair.
void RandomPairsUnique(unsigned int count, std::vector<sds>& keys, std::vector<sds>& vals,
bool with_value);
// Randomly selects count of key value pairs. The select key value pairs
// are allowed to have duplications.
void RandomPairs(unsigned int count, std::vector<sds>& keys, std::vector<sds>& vals,
bool with_value);
private:
// Reallocate key and/or value if their pages are underutilized.
// Returns new pointer (stays same if key utilization is enough) and if reallocation happened.

View file

@ -27,6 +27,7 @@ using namespace std;
namespace dfly {
using namespace facade;
using absl::SimpleAtoi;
namespace {
@ -1005,8 +1006,35 @@ void HSetFamily::HStrLen(CmdArgList args, ConnectionContext* cntx) {
}
}
void StrVecEmplaceBack(StringVec& str_vec, const listpackEntry& lp) {
if (lp.sval) {
str_vec.emplace_back(reinterpret_cast<char*>(lp.sval), lp.slen);
return;
}
str_vec.emplace_back(absl::StrCat(lp.lval));
}
void HSetFamily::HRandField(CmdArgList args, ConnectionContext* cntx) {
if (args.size() > 3) {
DVLOG(1) << "Wrong number of command arguments: " << args.size();
return (*cntx)->SendError(kSyntaxErr);
}
string_view key = ArgS(args, 0);
int32_t count;
bool with_values = false;
if ((args.size() > 1) && (!SimpleAtoi(ArgS(args, 1), &count))) {
return (*cntx)->SendError("count value is not an integer", kSyntaxErrType);
}
if (args.size() == 3) {
ToUpper(&args[2]);
if (ArgS(args, 2) != "WITHVALUES")
return (*cntx)->SendError(kSyntaxErr);
else
with_values = true;
}
auto cb = [&](Transaction* t, EngineShard* shard) -> OpResult<StringVec> {
auto& db_slice = shard->db_slice();
@ -1020,24 +1048,56 @@ void HSetFamily::HRandField(CmdArgList args, ConnectionContext* cntx) {
StringVec str_vec;
if (pv.Encoding() == kEncodingStrMap2) {
// TODO: to create real random logic.
StringMap* string_map = (StringMap*)pv.RObjPtr();
sds key = string_map->begin()->first;
str_vec.emplace_back(key, sdslen(key));
if (args.size() == 1) {
auto [key, value] = string_map->RandomPair();
str_vec.emplace_back(key, sdslen(key));
} else {
size_t actual_count =
(count >= 0) ? std::min(size_t(count), string_map->Size()) : abs(count);
std::vector<sds> keys, vals;
if (count >= 0) {
string_map->RandomPairsUnique(actual_count, keys, vals, with_values);
} else {
string_map->RandomPairs(actual_count, keys, vals, with_values);
}
for (size_t i = 0; i < actual_count; ++i) {
str_vec.emplace_back(keys[i], sdslen(keys[i]));
if (with_values) {
str_vec.emplace_back(vals[i], sdslen(vals[i]));
}
}
}
} else if (pv.Encoding() == kEncodingListPack) {
uint8_t* lp = (uint8_t*)pv.RObjPtr();
size_t lplen = lpLength(lp);
CHECK(lplen > 0 && lplen % 2 == 0);
size_t hlen = lplen / 2;
listpackEntry key;
lpRandomPair(lp, hlen, &key, NULL);
if (key.sval) {
str_vec.emplace_back(reinterpret_cast<char*>(key.sval), key.slen);
if (args.size() == 1) {
listpackEntry key;
lpRandomPair(lp, hlen, &key, NULL);
StrVecEmplaceBack(str_vec, key);
} else {
str_vec.emplace_back(absl::StrCat(key.lval));
size_t actual_count = (count >= 0) ? std::min(size_t(count), hlen) : abs(count);
std::unique_ptr<listpackEntry[]> keys = nullptr, vals = nullptr;
keys = std::make_unique<listpackEntry[]>(actual_count);
if (with_values)
vals = std::make_unique<listpackEntry[]>(actual_count);
// count has been specified.
if (count >= 0)
// always returns unique entries.
lpRandomPairsUnique(lp, actual_count, keys.get(), vals.get());
else
// allows non-unique entries.
lpRandomPairs(lp, actual_count, keys.get(), vals.get());
for (size_t i = 0; i < actual_count; ++i) {
StrVecEmplaceBack(str_vec, keys[i]);
if (with_values) {
StrVecEmplaceBack(str_vec, vals[i]);
}
}
}
} else {
LOG(ERROR) << "Invalid encoding " << pv.Encoding();
@ -1048,8 +1108,7 @@ void HSetFamily::HRandField(CmdArgList args, ConnectionContext* cntx) {
OpResult<StringVec> result = cntx->transaction->ScheduleSingleHopT(std::move(cb));
if (result) {
CHECK_EQ(1u, result->size()); // TBD: to support count and withvalues.
(*cntx)->SendBulkString(result->front());
(*cntx)->SendStringArr(*result);
} else if (result.status() == OpStatus::KEY_NOTFOUND) {
(*cntx)->SendNull();
} else {
@ -1094,9 +1153,7 @@ void HSetFamily::Register(CommandRegistry* registry) {
<< CI{"HINCRBYFLOAT", CO::WRITE | CO::DENYOOM | CO::FAST, 4, 1, 1, 1, acl::kHIncrByFloat}
.HFUNC(HIncrByFloat)
<< CI{"HKEYS", CO::READONLY, 2, 1, 1, 1, acl::kHKeys}.HFUNC(HKeys)
// TODO: add options support
<< CI{"HRANDFIELD", CO::READONLY, 2, 1, 1, 1, acl::kHRandField}.HFUNC(HRandField)
<< CI{"HRANDFIELD", CO::READONLY, -2, 1, 1, 1, acl::kHRandField}.HFUNC(HRandField)
<< CI{"HSCAN", CO::READONLY, -3, 1, 1, 1, acl::kHScan}.HFUNC(HScan)
<< CI{"HSET", CO::WRITE | CO::FAST | CO::DENYOOM, -4, 1, 1, 1, acl::kHSet}.HFUNC(HSet)
<< CI{"HSETEX", CO::WRITE | CO::FAST | CO::DENYOOM, -5, 1, 1, 1, acl::kHSetEx}.SetHandler(

View file

@ -203,6 +203,117 @@ TEST_F(HSetFamilyTest, HRandFloat) {
Run({"hrandfield", "k"});
}
TEST_F(HSetFamilyTest, HRandField) {
// exercise Redis' listpack encoding
Run({"HSET", "k", "a", "0", "b", "1", "c", "2"});
EXPECT_THAT(Run({"hrandfield", "k"}), AnyOf("a", "b", "c"));
EXPECT_THAT(Run({"hrandfield", "k", "2"}).GetVec(), IsSubsetOf({"a", "b", "c"}));
EXPECT_THAT(Run({"hrandfield", "k", "3"}).GetVec(), UnorderedElementsAre("a", "b", "c"));
EXPECT_THAT(Run({"hrandfield", "k", "4"}).GetVec(), UnorderedElementsAre("a", "b", "c"));
auto resp = Run({"hrandfield", "k", "4", "withvalues"});
EXPECT_THAT(resp, ArrLen(6));
auto vec = resp.GetVec();
std::vector<RespExpr> k, v;
for (unsigned int i = 0; i < vec.size(); ++i) {
if (i % 2 == 1)
v.push_back(vec[i]);
else
k.push_back(vec[i]);
}
EXPECT_THAT(v, UnorderedElementsAre("0", "1", "2"));
EXPECT_THAT(k, UnorderedElementsAre("a", "b", "c"));
resp = Run({"hrandfield", "k", "-4", "withvalues"});
EXPECT_THAT(resp, ArrLen(8));
vec = resp.GetVec();
k.clear();
v.clear();
for (unsigned int i = 0; i < vec.size(); ++i) {
if (i % 2 == 0) {
if (vec[i] == "a")
EXPECT_EQ(vec[i + 1], "0");
else if (vec[i] == "b")
EXPECT_EQ(vec[i + 1], "1");
else if (vec[i] == "c")
EXPECT_EQ(vec[i + 1], "2");
else
ADD_FAILURE();
}
}
// exercise Dragonfly's string map encoding
int num_entries = 500;
for (int i = 0; i < num_entries; i++) {
Run({"HSET", "largehash", std::to_string(i), std::to_string(i * 10)});
}
resp = Run({"hrandfield", "largehash"});
EXPECT_LE(stoi(resp.GetString()), num_entries - 1);
EXPECT_GE(stoi(resp.GetString()), 0);
resp = Run({"hrandfield", "largehash", std::to_string(num_entries / 2)});
vec = resp.GetVec();
std::vector<std::string> string_vec;
for (auto v : vec) {
string_vec.push_back(v.GetString());
}
sort(string_vec.begin(), string_vec.end());
auto it = std::unique(string_vec.begin(), string_vec.end());
bool is_unique = (it == string_vec.end());
EXPECT_TRUE(is_unique);
for (const auto& str : string_vec) {
EXPECT_LE(stoi(str), num_entries - 1);
EXPECT_GE(stoi(str), 0);
}
resp = Run({"hrandfield", "largehash", std::to_string(num_entries * -1 - 1)});
EXPECT_THAT(resp, ArrLen(num_entries + 1));
vec = resp.GetVec();
string_vec.clear();
for (auto v : vec) {
string_vec.push_back(v.GetString());
int i = stoi(v.GetString());
EXPECT_LE(i, num_entries - 1);
EXPECT_GE(i, 0);
}
sort(string_vec.begin(), string_vec.end());
it = std::unique(string_vec.begin(), string_vec.end());
is_unique = (it == string_vec.end());
EXPECT_FALSE(is_unique);
resp = Run({"hrandfield", "largehash", std::to_string(num_entries * -1 - 1), "withvalues"});
EXPECT_THAT(resp, ArrLen((num_entries + 1) * 2));
vec = resp.GetVec();
string_vec.clear();
for (unsigned int i = 0; i < vec.size(); ++i) {
if (i % 2 == 0) {
int k = stoi(vec[i].GetString());
EXPECT_LE(k, num_entries - 1);
EXPECT_GE(k, 0);
int v = stoi(vec[i + 1].GetString());
EXPECT_EQ(v, k * 10);
string_vec.push_back(vec[i].GetString());
}
}
sort(string_vec.begin(), string_vec.end());
it = std::unique(string_vec.begin(), string_vec.end());
is_unique = (it == string_vec.end());
EXPECT_FALSE(is_unique);
}
TEST_F(HSetFamilyTest, HSetEx) {
TEST_current_time_ms = kMemberExpiryBase * 1000; // to reset to test time.