Revert "chore: get rid of kv_args and replace it with slices to full_… (#3024)

Revert "chore: get rid of kv_args and replace it with slices to full_args (#2942)"

This reverts commit de0e5cb0bd.
This commit is contained in:
Roman Gershman 2024-05-08 03:01:37 +03:00 committed by GitHub
parent 5c4279c285
commit 25e6930ac3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 185 additions and 216 deletions

View file

@ -40,6 +40,9 @@ CommandId::CommandId(const char* name, uint32_t mask, int8_t arity, int8_t first
: facade::CommandId(name, mask, arity, first_key, last_key, acl_categories) { : facade::CommandId(name, mask, arity, first_key, last_key, acl_categories) {
if (mask & CO::ADMIN) if (mask & CO::ADMIN)
opt_mask_ |= CO::NOSCRIPT; opt_mask_ |= CO::NOSCRIPT;
if (mask & CO::BLOCKING)
opt_mask_ |= CO::REVERSE_MAPPING;
} }
bool CommandId::IsTransactional() const { bool CommandId::IsTransactional() const {
@ -170,6 +173,8 @@ const char* OptName(CO::CommandOpt fl) {
return "readonly"; return "readonly";
case DENYOOM: case DENYOOM:
return "denyoom"; return "denyoom";
case REVERSE_MAPPING:
return "reverse-mapping";
case FAST: case FAST:
return "fast"; return "fast";
case LOADING: case LOADING:

View file

@ -27,13 +27,16 @@ enum CommandOpt : uint32_t {
LOADING = 1U << 3, // Command allowed during LOADING state. LOADING = 1U << 3, // Command allowed during LOADING state.
DENYOOM = 1U << 4, // use-memory in redis. DENYOOM = 1U << 4, // use-memory in redis.
// UNUSED = 1U << 5, // marked commands that demand preserve the order of keys to work correctly.
// For example, MGET needs to know the order of keys to return the values in the same order.
// BLPOP needs to know the order of keys to return the first non-empty list from the left.
REVERSE_MAPPING = 1U << 5,
VARIADIC_KEYS = 1U << 6, // arg 2 determines number of keys. Relevant for ZUNIONSTORE, EVAL etc. VARIADIC_KEYS = 1U << 6, // arg 2 determines number of keys. Relevant for ZUNIONSTORE, EVAL etc.
ADMIN = 1U << 7, // implies NOSCRIPT, ADMIN = 1U << 7, // implies NOSCRIPT,
NOSCRIPT = 1U << 8, NOSCRIPT = 1U << 8,
BLOCKING = 1U << 9, BLOCKING = 1U << 9, // implies REVERSE_MAPPING
HIDDEN = 1U << 10, // does not show in COMMAND command output HIDDEN = 1U << 10, // does not show in COMMAND command output
INTERLEAVED_KEYS = 1U << 11, // keys are interleaved with arguments INTERLEAVED_KEYS = 1U << 11, // keys are interleaved with arguments
GLOBAL_TRANS = 1U << 12, GLOBAL_TRANS = 1U << 12,

View file

@ -40,12 +40,14 @@ OpResult<std::pair<DbSlice::ConstIterator, unsigned>> FindFirstReadOnly(const Db
int req_obj_type) { int req_obj_type) {
DCHECK(!args.Empty()); DCHECK(!args.Empty());
for (auto it = args.begin(); it != args.end(); ++it) { unsigned i = 0;
OpResult<DbSlice::ConstIterator> res = db_slice.FindReadOnly(cntx, *it, req_obj_type); for (string_view key : args) {
OpResult<DbSlice::ConstIterator> res = db_slice.FindReadOnly(cntx, key, req_obj_type);
if (res) if (res)
return make_pair(res.value(), unsigned(it.index())); return make_pair(res.value(), i);
if (res.status() != OpStatus::KEY_NOTFOUND) if (res.status() != OpStatus::KEY_NOTFOUND)
return res.status(); return res.status();
++i;
} }
VLOG(2) << "FindFirst not found"; VLOG(2) << "FindFirst not found";
@ -117,8 +119,8 @@ OpResult<ShardFFResult> FindFirstNonEmpty(Transaction* trans, int req_obj_type)
auto comp = [trans](const OpResult<FFResult>& lhs, const OpResult<FFResult>& rhs) { auto comp = [trans](const OpResult<FFResult>& lhs, const OpResult<FFResult>& rhs) {
if (!lhs || !rhs) if (!lhs || !rhs)
return lhs.ok(); return lhs.ok();
size_t i1 = std::get<1>(*lhs); size_t i1 = trans->ReverseArgIndex(std::get<ShardId>(*lhs), std::get<unsigned>(*lhs));
size_t i2 = std::get<1>(*rhs); size_t i2 = trans->ReverseArgIndex(std::get<ShardId>(*rhs), std::get<unsigned>(*rhs));
return i1 < i2; return i1 < i2;
}; };

View file

@ -42,8 +42,8 @@ struct Entry : public EntryBase {
struct Payload { struct Payload {
std::string_view cmd; std::string_view cmd;
std::variant<CmdArgList, // Parts of a full command. std::variant<CmdArgList, // Parts of a full command.
ShardArgs, // Shard parts. ShardArgs // Command and its shard parts.
ArgSlice> >
args; args;
Payload() = default; Payload() = default;
@ -51,8 +51,6 @@ struct Entry : public EntryBase {
} }
Payload(std::string_view c, const ShardArgs& a) : cmd(c), args(a) { Payload(std::string_view c, const ShardArgs& a) : cmd(c), args(a) {
} }
Payload(std::string_view c, ArgSlice a) : cmd(c), args(a) {
}
}; };
Entry(TxId txid, Op opcode, DbIndex dbid, uint32_t shard_cnt, Entry(TxId txid, Op opcode, DbIndex dbid, uint32_t shard_cnt,

View file

@ -1543,14 +1543,12 @@ void JsonFamily::MGet(CmdArgList args, ConnectionContext* cntx) {
continue; continue;
vector<OptString>& res = mget_resp[sid]; vector<OptString>& res = mget_resp[sid];
ShardArgs shard_args = transaction->GetShardArgs(sid); for (size_t j = 0; j < res.size(); ++j) {
unsigned src_index = 0; if (!res[j])
for (auto it = shard_args.begin(); it != shard_args.end(); ++it, ++src_index) {
if (!res[src_index])
continue; continue;
uint32_t dst_indx = it.index(); uint32_t indx = transaction->ReverseArgIndex(sid, j);
results[dst_indx] = std::move(res[src_index]); results[indx] = std::move(res[j]);
} }
} }
@ -2093,7 +2091,8 @@ void JsonFamily::Register(CommandRegistry* registry) {
constexpr size_t kMsetFlags = CO::WRITE | CO::DENYOOM | CO::FAST | CO::INTERLEAVED_KEYS; constexpr size_t kMsetFlags = CO::WRITE | CO::DENYOOM | CO::FAST | CO::INTERLEAVED_KEYS;
registry->StartFamily(); registry->StartFamily();
*registry << CI{"JSON.GET", CO::READONLY | CO::FAST, -2, 1, 1, acl::JSON}.HFUNC(Get); *registry << CI{"JSON.GET", CO::READONLY | CO::FAST, -2, 1, 1, acl::JSON}.HFUNC(Get);
*registry << CI{"JSON.MGET", CO::READONLY | CO::FAST, -3, 1, -2, acl::JSON}.HFUNC(MGet); *registry << CI{"JSON.MGET", CO::READONLY | CO::FAST | CO::REVERSE_MAPPING, -3, 1, -2, acl::JSON}
.HFUNC(MGet);
*registry << CI{"JSON.TYPE", CO::READONLY | CO::FAST, 3, 1, 1, acl::JSON}.HFUNC(Type); *registry << CI{"JSON.TYPE", CO::READONLY | CO::FAST, 3, 1, 1, acl::JSON}.HFUNC(Type);
*registry << CI{"JSON.STRLEN", CO::READONLY | CO::FAST, 3, 1, 1, acl::JSON}.HFUNC(StrLen); *registry << CI{"JSON.STRLEN", CO::READONLY | CO::FAST, 3, 1, 1, acl::JSON}.HFUNC(StrLen);
*registry << CI{"JSON.OBJLEN", CO::READONLY | CO::FAST, 3, 1, 1, acl::JSON}.HFUNC(ObjLen); *registry << CI{"JSON.OBJLEN", CO::READONLY | CO::FAST, 3, 1, 1, acl::JSON}.HFUNC(ObjLen);

View file

@ -158,22 +158,6 @@ struct CircularMessages {
// Used to recover logs for BLPOP failures. See OpBPop. // Used to recover logs for BLPOP failures. See OpBPop.
thread_local CircularMessages debugMessages{50}; thread_local CircularMessages debugMessages{50};
// A bit awkward translation from a single key to ShardArgs.
// We create a mutable slice (which will never be mutated) from the key, then we create
// a CmdArgList of size 1 that references mslice and finally
// we reference the first element in the CmdArgList via islice.
struct SingleArg {
MutableSlice mslice;
IndexSlice islice{0, 1};
SingleArg(string_view arg) : mslice(const_cast<char*>(arg.data()), arg.size()) {
}
ShardArgs Get() {
return ShardArgs{CmdArgList{&mslice, 1}, absl::MakeSpan(&islice, 1)};
}
};
class BPopPusher { class BPopPusher {
public: public:
BPopPusher(string_view pop_key, string_view push_key, ListDir popdir, ListDir pushdir); BPopPusher(string_view pop_key, string_view push_key, ListDir popdir, ListDir pushdir);
@ -464,9 +448,7 @@ OpResult<string> MoveTwoShards(Transaction* trans, string_view src, string_view
// hack, again. since we hacked which queue we are waiting on (see RunPair) // hack, again. since we hacked which queue we are waiting on (see RunPair)
// we must clean-up src key here manually. See RunPair why we do this. // we must clean-up src key here manually. See RunPair why we do this.
// in short- we suspended on "src" on both shards. // in short- we suspended on "src" on both shards.
shard->blocking_controller()->FinalizeWatched(ArgSlice{&src, 1}, t);
SingleArg single_arg{src};
shard->blocking_controller()->FinalizeWatched(single_arg.Get(), t);
} }
} else { } else {
DVLOG(1) << "Popping value from list: " << key; DVLOG(1) << "Popping value from list: " << key;
@ -891,8 +873,7 @@ OpResult<string> BPopPusher::RunSingle(ConnectionContext* cntx, time_point tp) {
return op_res; return op_res;
} }
SingleArg single_arg{pop_key_}; auto wcb = [&](Transaction* t, EngineShard* shard) { return ShardArgs{&this->pop_key_, 1}; };
auto wcb = [&](Transaction* t, EngineShard* shard) { return single_arg.Get(); };
const auto key_checker = [](EngineShard* owner, const DbContext& context, Transaction*, const auto key_checker = [](EngineShard* owner, const DbContext& context, Transaction*,
std::string_view key) -> bool { std::string_view key) -> bool {
@ -919,13 +900,11 @@ OpResult<string> BPopPusher::RunPair(ConnectionContext* cntx, time_point tp) {
return op_res; return op_res;
} }
SingleArg single_arg(this->pop_key_);
// a hack: we watch in both shards for pop_key but only in the source shard it's relevant. // a hack: we watch in both shards for pop_key but only in the source shard it's relevant.
// Therefore we follow the regular flow of watching the key but for the destination shard it // Therefore we follow the regular flow of watching the key but for the destination shard it
// will never be triggerred. // will never be triggerred.
// This allows us to run Transaction::Execute on watched transactions in both shards. // This allows us to run Transaction::Execute on watched transactions in both shards.
auto wcb = [&](Transaction* t, EngineShard* shard) { return single_arg.Get(); }; auto wcb = [&](Transaction* t, EngineShard* shard) { return ArgSlice{&this->pop_key_, 1}; };
const auto key_checker = [](EngineShard* owner, const DbContext& context, Transaction*, const auto key_checker = [](EngineShard* owner, const DbContext& context, Transaction*,
std::string_view key) -> bool { std::string_view key) -> bool {

View file

@ -2989,19 +2989,17 @@ void XReadImpl(CmdArgList args, std::optional<ReadOpts> opts, ConnectionContext*
continue; continue;
vector<RecordVec>& results = xread_resp[sid]; vector<RecordVec>& results = xread_resp[sid];
unsigned src_index = 0;
ShardArgs shard_args = cntx->transaction->GetShardArgs(sid);
for (auto it = shard_args.begin(); it != shard_args.end(); ++it, ++src_index) { for (size_t i = 0; i < results.size(); ++i) {
if (results[src_index].size() == 0) { if (results[i].size() == 0) {
continue; continue;
} }
resolved_streams++; resolved_streams++;
// Add the stream records ordered by the original stream arguments. // Add the stream records ordered by the original stream arguments.
size_t dst_indx = it.index(); size_t indx = cntx->transaction->ReverseArgIndex(sid, i);
res[dst_indx - opts->streams_arg] = std::move(results[src_index]); res[indx - opts->streams_arg] = std::move(results[i]);
} }
} }
@ -3325,7 +3323,7 @@ constexpr uint32_t kXAutoClaim = WRITE | STREAM | FAST;
void StreamFamily::Register(CommandRegistry* registry) { void StreamFamily::Register(CommandRegistry* registry) {
using CI = CommandId; using CI = CommandId;
registry->StartFamily(); registry->StartFamily();
constexpr auto kReadFlags = CO::READONLY | CO::BLOCKING | CO::VARIADIC_KEYS; constexpr auto kReadFlags = CO::READONLY | CO::BLOCKING | CO::REVERSE_MAPPING | CO::VARIADIC_KEYS;
*registry << CI{"XADD", CO::WRITE | CO::DENYOOM | CO::FAST, -5, 1, 1, acl::kXAdd}.HFUNC(XAdd) *registry << CI{"XADD", CO::WRITE | CO::DENYOOM | CO::FAST, -5, 1, 1, acl::kXAdd}.HFUNC(XAdd)
<< CI{"XCLAIM", CO::WRITE | CO::FAST, -6, 1, 1, acl::kXClaim}.HFUNC(XClaim) << CI{"XCLAIM", CO::WRITE | CO::FAST, -6, 1, 1, acl::kXClaim}.HFUNC(XClaim)
<< CI{"XDEL", CO::WRITE | CO::FAST, -3, 1, 1, acl::kXDel}.HFUNC(XDel) << CI{"XDEL", CO::WRITE | CO::FAST, -3, 1, 1, acl::kXDel}.HFUNC(XDel)

View file

@ -270,7 +270,6 @@ void OpMSet(const OpArgs& op_args, const ShardArgs& args, atomic_bool* success)
SetCmd sg(op_args, false); SetCmd sg(op_args, false);
size_t index = 0; size_t index = 0;
bool partial = false;
for (auto it = args.begin(); it != args.end(); ++it) { for (auto it = args.begin(); it != args.end(); ++it) {
string_view key = *it; string_view key = *it;
++it; ++it;
@ -278,7 +277,6 @@ void OpMSet(const OpArgs& op_args, const ShardArgs& args, atomic_bool* success)
DVLOG(1) << "MSet " << key << ":" << value; DVLOG(1) << "MSet " << key << ":" << value;
if (sg.Set(params, key, value) != OpStatus::OK) { // OOM for example. if (sg.Set(params, key, value) != OpStatus::OK) { // OOM for example.
success->store(false); success->store(false);
partial = true;
break; break;
} }
index += 2; index += 2;
@ -287,29 +285,18 @@ void OpMSet(const OpArgs& op_args, const ShardArgs& args, atomic_bool* success)
if (auto journal = op_args.shard->journal(); journal) { if (auto journal = op_args.shard->journal(); journal) {
// We write a custom journal because an OOM in the above loop could lead to partial success, so // We write a custom journal because an OOM in the above loop could lead to partial success, so
// we replicate only what was changed. // we replicate only what was changed.
if (partial) { string_view cmd;
string_view cmd; ArgSlice cmd_args;
ArgSlice cmd_args; if (index == 0) {
vector<string_view> store_args(index); // All shards must record the tx was executed for the replica to execute it, so we send a PING
if (index == 0) { // in case nothing was changed
// All shards must record the tx was executed for the replica to execute it, so we send a cmd = "PING";
// PING in case nothing was changed
cmd = "PING";
} else {
// journal [0, i)
cmd = "MSET";
unsigned i = 0;
for (string_view arg : args) {
store_args[i++] = arg;
if (i >= store_args.size())
break;
}
cmd_args = absl::MakeSpan(store_args);
}
RecordJournal(op_args, cmd, cmd_args, op_args.tx->GetUniqueShardCnt());
} else { } else {
RecordJournal(op_args, "MSET", args, op_args.tx->GetUniqueShardCnt()); // journal [0, i)
cmd = "MSET";
cmd_args = ArgSlice(args.begin(), index);
} }
RecordJournal(op_args, cmd, cmd_args, op_args.tx->GetUniqueShardCnt());
} }
} }
@ -1179,17 +1166,16 @@ void StringFamily::MGet(CmdArgList args, ConnectionContext* cntx) {
src.storage_list->next = res.storage_list; src.storage_list->next = res.storage_list;
res.storage_list = src.storage_list; res.storage_list = src.storage_list;
src.storage_list = nullptr; src.storage_list = nullptr;
ShardArgs shard_args = transaction->GetShardArgs(sid);
unsigned src_indx = 0; for (size_t j = 0; j < src.resp_arr.size(); ++j) {
for (auto it = shard_args.begin(); it != shard_args.end(); ++it, ++src_indx) { if (!src.resp_arr[j])
if (!src.resp_arr[src_indx])
continue; continue;
uint32_t indx = it.index(); uint32_t indx = transaction->ReverseArgIndex(sid, j);
res.resp_arr[indx] = std::move(src.resp_arr[src_indx]); res.resp_arr[indx] = std::move(src.resp_arr[j]);
if (cntx->protocol() == Protocol::MEMCACHE) { if (cntx->protocol() == Protocol::MEMCACHE) {
res.resp_arr[indx]->key = *it; res.resp_arr[indx]->key = ArgS(args, indx);
} }
} }
} }
@ -1505,7 +1491,9 @@ void StringFamily::Register(CommandRegistry* registry) {
<< CI{"GETEX", CO::WRITE | CO::DENYOOM | CO::FAST | CO::NO_AUTOJOURNAL, -1, 1, 1, acl::kGetEx} << CI{"GETEX", CO::WRITE | CO::DENYOOM | CO::FAST | CO::NO_AUTOJOURNAL, -1, 1, 1, acl::kGetEx}
.HFUNC(GetEx) .HFUNC(GetEx)
<< CI{"GETSET", CO::WRITE | CO::DENYOOM | CO::FAST, 3, 1, 1, acl::kGetSet}.HFUNC(GetSet) << CI{"GETSET", CO::WRITE | CO::DENYOOM | CO::FAST, 3, 1, 1, acl::kGetSet}.HFUNC(GetSet)
<< CI{"MGET", CO::READONLY | CO::FAST | CO::IDEMPOTENT, -2, 1, -1, acl::kMGet}.HFUNC(MGet) << CI{"MGET", CO::READONLY | CO::FAST | CO::REVERSE_MAPPING | CO::IDEMPOTENT, -2, 1, -1,
acl::kMGet}
.HFUNC(MGet)
<< CI{"MSET", kMSetMask, -3, 1, -1, acl::kMSet}.HFUNC(MSet) << CI{"MSET", kMSetMask, -3, 1, -1, acl::kMSet}.HFUNC(MSet)
<< CI{"MSETNX", kMSetMask, -3, 1, -1, acl::kMSetNx}.HFUNC(MSetNx) << CI{"MSETNX", kMSetMask, -3, 1, -1, acl::kMSetNx}.HFUNC(MSetNx)
<< CI{"STRLEN", CO::READONLY | CO::FAST, 2, 1, 1, acl::kStrLen}.HFUNC(StrLen) << CI{"STRLEN", CO::READONLY | CO::FAST, 2, 1, 1, acl::kStrLen}.HFUNC(StrLen)

View file

@ -184,13 +184,12 @@ void Transaction::InitGlobal() {
void Transaction::BuildShardIndex(const KeyIndex& key_index, std::vector<PerShardCache>* out) { void Transaction::BuildShardIndex(const KeyIndex& key_index, std::vector<PerShardCache>* out) {
auto& shard_index = *out; auto& shard_index = *out;
auto add = [this, &shard_index](uint32_t sid, uint32_t b, uint32_t e) { auto add = [this, rev_mapping = key_index.has_reverse_mapping, &shard_index](uint32_t sid,
auto& slices = shard_index[sid].slices; uint32_t i) {
if (!slices.empty() && slices.back().second == b) { string_view val = ArgS(full_args_, i);
slices.back().second = e; shard_index[sid].args.push_back(val);
} else { if (rev_mapping)
slices.emplace_back(b, e); shard_index[sid].original_index.push_back(i);
}
}; };
if (key_index.bonus) { if (key_index.bonus) {
@ -198,39 +197,47 @@ void Transaction::BuildShardIndex(const KeyIndex& key_index, std::vector<PerShar
string_view key = ArgS(full_args_, *key_index.bonus); string_view key = ArgS(full_args_, *key_index.bonus);
unique_slot_checker_.Add(key); unique_slot_checker_.Add(key);
uint32_t sid = Shard(key, shard_data_.size()); uint32_t sid = Shard(key, shard_data_.size());
add(sid, *key_index.bonus, *key_index.bonus + 1); add(sid, *key_index.bonus);
} }
for (unsigned i = key_index.start; i < key_index.end; i += key_index.step) { for (unsigned i = key_index.start; i < key_index.end; ++i) {
string_view key = ArgS(full_args_, i); string_view key = ArgS(full_args_, i);
unique_slot_checker_.Add(key); unique_slot_checker_.Add(key);
uint32_t sid = Shard(key, shard_data_.size()); uint32_t sid = Shard(key, shard_data_.size());
shard_index[sid].key_step = key_index.step; shard_index[sid].key_step = key_index.step;
add(sid, i, i + key_index.step); add(sid, i);
// Handle values associated with preceding key.
for (unsigned j = 1; j < key_index.step; ++j) {
add(sid, ++i);
}
} }
} }
void Transaction::InitShardData(absl::Span<const PerShardCache> shard_index, size_t num_args) { void Transaction::InitShardData(absl::Span<const PerShardCache> shard_index, size_t num_args,
args_slices_.reserve(num_args); bool rev_mapping) {
kv_args_.reserve(num_args);
DCHECK(kv_fp_.empty()); DCHECK(kv_fp_.empty());
kv_fp_.reserve(num_args); kv_fp_.reserve(num_args);
if (rev_mapping)
reverse_index_.reserve(num_args);
// Store the concatenated per-shard arguments from the shard index inside kv_args_ // Store the concatenated per-shard arguments from the shard index inside kv_args_
// and make each shard data point to its own sub-span inside kv_args_. // and make each shard data point to its own sub-span inside kv_args_.
for (size_t i = 0; i < shard_data_.size(); ++i) { for (size_t i = 0; i < shard_data_.size(); ++i) {
auto& sd = shard_data_[i]; auto& sd = shard_data_[i];
const auto& src = shard_index[i]; const auto& si = shard_index[i];
sd.slice_count = src.slices.size(); sd.arg_count = si.args.size();
sd.slice_start = args_slices_.size(); sd.arg_start = kv_args_.size();
sd.fp_start = kv_fp_.size(); sd.fp_start = kv_fp_.size();
sd.fp_count = 0; sd.fp_count = 0;
// Multi transactions can re-initialize on different shards, so clear ACTIVE flag. // Multi transactions can re-initialize on different shards, so clear ACTIVE flag.
DCHECK_EQ(sd.local_mask & ACTIVE, 0); DCHECK_EQ(sd.local_mask & ACTIVE, 0);
if (sd.slice_count == 0) if (sd.arg_count == 0)
continue; continue;
sd.local_mask |= ACTIVE; sd.local_mask |= ACTIVE;
@ -238,16 +245,19 @@ void Transaction::InitShardData(absl::Span<const PerShardCache> shard_index, siz
unique_shard_cnt_++; unique_shard_cnt_++;
unique_shard_id_ = i; unique_shard_id_ = i;
for (size_t j = 0; j < src.slices.size(); ++j) { for (size_t j = 0; j < si.args.size(); ++j) {
IndexSlice slice = src.slices[j]; string_view arg = si.args[j];
args_slices_.push_back(slice); kv_args_.push_back(arg);
for (uint32_t k = slice.first; k < slice.second; k += src.key_step) { if (si.key_step == 1 || j % si.key_step == 0) {
string_view key = ArgS(full_args_, k); kv_fp_.push_back(LockTag(arg).Fingerprint());
kv_fp_.push_back(LockTag(key).Fingerprint());
sd.fp_count++; sd.fp_count++;
} }
if (rev_mapping)
reverse_index_.push_back(si.original_index[j]);
} }
} }
DCHECK_EQ(kv_args_.size(), num_args);
} }
void Transaction::PrepareMultiFps(CmdArgList keys) { void Transaction::PrepareMultiFps(CmdArgList keys) {
@ -267,13 +277,22 @@ void Transaction::PrepareMultiFps(CmdArgList keys) {
void Transaction::StoreKeysInArgs(const KeyIndex& key_index) { void Transaction::StoreKeysInArgs(const KeyIndex& key_index) {
DCHECK(!key_index.bonus); DCHECK(!key_index.bonus);
DCHECK(kv_fp_.empty()); DCHECK(kv_fp_.empty());
DCHECK(args_slices_.empty());
// even for a single key we may have multiple arguments per key (MSET). // even for a single key we may have multiple arguments per key (MSET).
args_slices_.emplace_back(key_index.start, key_index.end);
for (unsigned j = key_index.start; j < key_index.end; j += key_index.step) { for (unsigned j = key_index.start; j < key_index.end; j += key_index.step) {
string_view key = ArgS(full_args_, j); string_view arg = ArgS(full_args_, j);
kv_fp_.push_back(LockTag(key).Fingerprint()); kv_args_.push_back(arg);
kv_fp_.push_back(LockTag(arg).Fingerprint());
for (unsigned k = j + 1; k < j + key_index.step; ++k)
kv_args_.push_back(ArgS(full_args_, k));
}
if (key_index.has_reverse_mapping) {
reverse_index_.resize(kv_args_.size());
for (unsigned j = 0; j < reverse_index_.size(); ++j) {
reverse_index_[j] = j + key_index.start;
}
} }
} }
@ -295,7 +314,7 @@ void Transaction::InitByKeys(const KeyIndex& key_index) {
StoreKeysInArgs(key_index); StoreKeysInArgs(key_index);
unique_shard_cnt_ = 1; unique_shard_cnt_ = 1;
string_view akey = ArgS(full_args_, key_index.start); string_view akey = kv_args_.front();
if (is_stub) // stub transactions don't migrate if (is_stub) // stub transactions don't migrate
DCHECK_EQ(unique_shard_id_, Shard(akey, shard_set->size())); DCHECK_EQ(unique_shard_id_, Shard(akey, shard_set->size()));
else { else {
@ -321,11 +340,11 @@ void Transaction::InitByKeys(const KeyIndex& key_index) {
BuildShardIndex(key_index, &shard_index); BuildShardIndex(key_index, &shard_index);
// Initialize shard data based on distributed arguments. // Initialize shard data based on distributed arguments.
InitShardData(shard_index, key_index.num_args()); InitShardData(shard_index, key_index.num_args(), key_index.has_reverse_mapping);
DCHECK(!multi_ || multi_->mode != LOCK_AHEAD || !multi_->tag_fps.empty()); DCHECK(!multi_ || multi_->mode != LOCK_AHEAD || !multi_->tag_fps.empty());
DVLOG(1) << "InitByArgs " << DebugId() << facade::ToSV(full_args_.front()); DVLOG(1) << "InitByArgs " << DebugId() << " " << kv_args_.front();
// Compress shard data, if we occupy only one shard. // Compress shard data, if we occupy only one shard.
if (unique_shard_cnt_ == 1) { if (unique_shard_cnt_ == 1) {
@ -338,8 +357,15 @@ void Transaction::InitByKeys(const KeyIndex& key_index) {
sd = &shard_data_.front(); sd = &shard_data_.front();
sd->local_mask |= ACTIVE; sd->local_mask |= ACTIVE;
} }
sd->slice_count = -1; sd->arg_count = -1;
sd->slice_start = -1; sd->arg_start = -1;
}
// Validation. Check reverse mapping was built correctly.
if (key_index.has_reverse_mapping) {
for (size_t i = 0; i < kv_args_.size(); ++i) {
DCHECK_EQ(kv_args_[i], ArgS(full_args_, reverse_index_[i])) << full_args_;
}
} }
// Validation. // Validation.
@ -370,7 +396,7 @@ OpStatus Transaction::InitByArgs(DbIndex index, CmdArgList args) {
} }
DCHECK_EQ(unique_shard_cnt_, 0u); DCHECK_EQ(unique_shard_cnt_, 0u);
DCHECK(args_slices_.empty()); DCHECK(kv_args_.empty());
DCHECK(kv_fp_.empty()); DCHECK(kv_fp_.empty());
OpResult<KeyIndex> key_index = DetermineKeys(cid_, args); OpResult<KeyIndex> key_index = DetermineKeys(cid_, args);
@ -401,8 +427,8 @@ void Transaction::PrepareSquashedMultiHop(const CommandId* cid,
} else { } else {
shard_data_[i].local_mask &= ~ACTIVE; shard_data_[i].local_mask &= ~ACTIVE;
} }
shard_data_[i].slice_start = 0; shard_data_[i].arg_start = 0;
shard_data_[i].slice_count = 0; shard_data_[i].arg_count = 0;
} }
MultiBecomeSquasher(); MultiBecomeSquasher();
@ -459,14 +485,15 @@ void Transaction::MultiSwitchCmd(const CommandId* cid) {
unique_shard_id_ = 0; unique_shard_id_ = 0;
unique_shard_cnt_ = 0; unique_shard_cnt_ = 0;
args_slices_.clear(); kv_args_.clear();
kv_fp_.clear(); kv_fp_.clear();
reverse_index_.clear();
cid_ = cid; cid_ = cid;
cb_ptr_ = nullptr; cb_ptr_ = nullptr;
for (auto& sd : shard_data_) { for (auto& sd : shard_data_) {
sd.slice_count = sd.slice_start = 0; sd.arg_count = sd.arg_start = 0;
if (multi_->mode == NON_ATOMIC) { if (multi_->mode == NON_ATOMIC) {
sd.local_mask = 0; // Non atomic transactions schedule each time, so remove all flags sd.local_mask = 0; // Non atomic transactions schedule each time, so remove all flags
@ -528,6 +555,7 @@ void Transaction::PrepareMultiForScheduleSingleHop(ShardId sid, DbIndex db, CmdA
EnableShard(sid); EnableShard(sid);
OpResult<KeyIndex> key_index = DetermineKeys(cid_, args); OpResult<KeyIndex> key_index = DetermineKeys(cid_, args);
CHECK(key_index); CHECK(key_index);
DCHECK(!key_index->has_reverse_mapping);
StoreKeysInArgs(*key_index); StoreKeysInArgs(*key_index);
} }
@ -1153,12 +1181,23 @@ ShardArgs Transaction::GetShardArgs(ShardId sid) const {
// We can read unique_shard_cnt_ only because ShardArgsInShard is called after IsArmedInShard // We can read unique_shard_cnt_ only because ShardArgsInShard is called after IsArmedInShard
// barrier. // barrier.
if (unique_shard_cnt_ == 1) { if (unique_shard_cnt_ == 1) {
return ShardArgs{full_args_, absl::MakeSpan(args_slices_)}; return kv_args_;
} }
const auto& sd = shard_data_[sid]; const auto& sd = shard_data_[sid];
return ShardArgs{full_args_, return ShardArgs{kv_args_.data() + sd.arg_start, sd.arg_count};
absl::MakeSpan(args_slices_.data() + sd.slice_start, sd.slice_count)}; }
// from local index back to original arg index skipping the command.
// i.e. returns (first_key_pos -1) or bigger.
size_t Transaction::ReverseArgIndex(ShardId shard_id, size_t arg_index) const {
DCHECK_LT(arg_index, reverse_index_.size());
if (unique_shard_cnt_ == 1)
return reverse_index_[arg_index];
const auto& sd = shard_data_[shard_id];
return reverse_index_[sd.arg_start + arg_index];
} }
OpStatus Transaction::WaitOnWatch(const time_point& tp, WaitKeysProvider wkeys_provider, OpStatus Transaction::WaitOnWatch(const time_point& tp, WaitKeysProvider wkeys_provider,
@ -1334,7 +1373,7 @@ bool Transaction::NotifySuspended(TxId committed_txid, ShardId sid, string_view
// Change state to awaked and store index of awakened key // Change state to awaked and store index of awakened key
sd.local_mask &= ~SUSPENDED_Q; sd.local_mask &= ~SUSPENDED_Q;
sd.local_mask |= AWAKED_Q; sd.local_mask |= AWAKED_Q;
sd.wake_key_pos = it.index(); sd.wake_key_pos = it - args.begin();
blocking_barrier_.Close(); blocking_barrier_.Close();
return true; return true;
@ -1345,8 +1384,8 @@ optional<string_view> Transaction::GetWakeKey(ShardId sid) const {
if ((sd.local_mask & AWAKED_Q) == 0) if ((sd.local_mask & AWAKED_Q) == 0)
return nullopt; return nullopt;
CHECK_LT(sd.wake_key_pos, full_args_.size()); CHECK_NE(sd.wake_key_pos, UINT16_MAX);
return ArgS(full_args_, sd.wake_key_pos); return GetShardArgs(sid).at(sd.wake_key_pos);
} }
void Transaction::LogAutoJournalOnShard(EngineShard* shard, RunnableResult result) { void Transaction::LogAutoJournalOnShard(EngineShard* shard, RunnableResult result) {
@ -1384,11 +1423,10 @@ void Transaction::LogAutoJournalOnShard(EngineShard* shard, RunnableResult resul
journal::Entry::Payload entry_payload; journal::Entry::Payload entry_payload;
string_view cmd{cid_->name()}; string_view cmd{cid_->name()};
if (unique_shard_cnt_ == 1 || args_slices_.empty()) { if (unique_shard_cnt_ == 1 || kv_args_.empty()) {
entry_payload = journal::Entry::Payload(cmd, full_args_); entry_payload = journal::Entry::Payload(cmd, full_args_);
} else { } else {
ShardArgs shard_args = GetShardArgs(shard->shard_id()); entry_payload = journal::Entry::Payload(cmd, GetShardArgs(shard->shard_id()).AsSlice());
entry_payload = journal::Entry::Payload(cmd, shard_args);
} }
LogJournalOnShard(shard, std::move(entry_payload), unique_shard_cnt_, false, true); LogJournalOnShard(shard, std::move(entry_payload), unique_shard_cnt_, false, true);
} }
@ -1475,6 +1513,10 @@ OpResult<KeyIndex> DetermineKeys(const CommandId* cid, CmdArgList args) {
int num_custom_keys = -1; int num_custom_keys = -1;
if (cid->opt_mask() & CO::REVERSE_MAPPING) {
key_index.has_reverse_mapping = true;
}
if (cid->opt_mask() & CO::VARIADIC_KEYS) { if (cid->opt_mask() & CO::VARIADIC_KEYS) {
// ZUNION/INTER <num_keys> <key1> [<key2> ...] // ZUNION/INTER <num_keys> <key1> [<key2> ...]
// EVAL <script> <num_keys> // EVAL <script> <num_keys>

View file

@ -180,6 +180,9 @@ class Transaction {
// Get command arguments for specific shard. Called from shard thread. // Get command arguments for specific shard. Called from shard thread.
ShardArgs GetShardArgs(ShardId sid) const; ShardArgs GetShardArgs(ShardId sid) const;
// Map arg_index from GetShardArgs slice to index in original command slice from InitByArgs.
size_t ReverseArgIndex(ShardId shard_id, size_t arg_index) const;
// Execute transaction hop. If conclude is true, it is removed from the pending queue. // Execute transaction hop. If conclude is true, it is removed from the pending queue.
void Execute(RunnableType cb, bool conclude); void Execute(RunnableType cb, bool conclude);
@ -386,8 +389,8 @@ class Transaction {
// Set when the shard is prepared for another hop. Sync point. Cleared when execution starts. // Set when the shard is prepared for another hop. Sync point. Cleared when execution starts.
std::atomic_bool is_armed = false; std::atomic_bool is_armed = false;
uint32_t slice_start = 0; // Subspan in kv_args_ with local arguments. uint32_t arg_start = 0; // Subspan in kv_args_ with local arguments.
uint32_t slice_count = 0; uint32_t arg_count = 0;
// span into kv_fp_ // span into kv_fp_
uint32_t fp_start = 0; uint32_t fp_start = 0;
@ -397,7 +400,7 @@ class Transaction {
TxQueue::Iterator pq_pos = TxQueue::kEnd; TxQueue::Iterator pq_pos = TxQueue::kEnd;
// Index of key relative to args in shard that the shard was woken up after blocking wait. // Index of key relative to args in shard that the shard was woken up after blocking wait.
uint32_t wake_key_pos = UINT32_MAX; uint16_t wake_key_pos = UINT16_MAX;
// Irrational stats purely for debugging purposes. // Irrational stats purely for debugging purposes.
struct Stats { struct Stats {
@ -440,11 +443,13 @@ class Transaction {
// Auxiliary structure used during initialization // Auxiliary structure used during initialization
struct PerShardCache { struct PerShardCache {
std::vector<IndexSlice> slices; std::vector<std::string_view> args;
std::vector<uint32_t> original_index;
unsigned key_step = 1; unsigned key_step = 1;
void Clear() { void Clear() {
slices.clear(); args.clear();
original_index.clear();
} }
}; };
@ -483,7 +488,8 @@ class Transaction {
void BuildShardIndex(const KeyIndex& keys, std::vector<PerShardCache>* out); void BuildShardIndex(const KeyIndex& keys, std::vector<PerShardCache>* out);
// Init shard data from shard index. // Init shard data from shard index.
void InitShardData(absl::Span<const PerShardCache> shard_index, size_t num_args); void InitShardData(absl::Span<const PerShardCache> shard_index, size_t num_args,
bool rev_mapping);
// Store all key index keys in args_. Used only for single shard initialization. // Store all key index keys in args_. Used only for single shard initialization.
void StoreKeysInArgs(const KeyIndex& key_index); void StoreKeysInArgs(const KeyIndex& key_index);
@ -582,11 +588,10 @@ class Transaction {
// TODO: explore dense packing // TODO: explore dense packing
absl::InlinedVector<PerShardData, 4> shard_data_; absl::InlinedVector<PerShardData, 4> shard_data_;
// Stores slices of key/values partitioned by shards. // Stores keys/values of the transaction partitioned by shards.
// Slices reference full_args_.
// We need values as well since we reorder keys, and we need to know what value corresponds // We need values as well since we reorder keys, and we need to know what value corresponds
// to what key. // to what key.
absl::InlinedVector<IndexSlice, 4> args_slices_; absl::InlinedVector<std::string_view, 4> kv_args_;
// Fingerprints of keys, precomputed once during the transaction initialization. // Fingerprints of keys, precomputed once during the transaction initialization.
absl::InlinedVector<LockFp, 4> kv_fp_; absl::InlinedVector<LockFp, 4> kv_fp_;
@ -597,6 +602,9 @@ class Transaction {
// Set if a NO_AUTOJOURNAL command asked to enable auto journal again // Set if a NO_AUTOJOURNAL command asked to enable auto journal again
bool re_enabled_auto_journal_ = false; bool re_enabled_auto_journal_ = false;
// Reverse argument mapping for ReverseArgIndex to convert from shard index to original index.
std::vector<uint32_t> reverse_index_;
RunnableType* cb_ptr_ = nullptr; // Run on shard threads RunnableType* cb_ptr_ = nullptr; // Run on shard threads
const CommandId* cid_ = nullptr; // Underlying command const CommandId* cid_ = nullptr; // Underlying command
std::unique_ptr<MultiData> multi_; // Initialized when the transaction is multi/exec. std::unique_ptr<MultiData> multi_; // Initialized when the transaction is multi/exec.

View file

@ -15,21 +15,7 @@ namespace dfly {
using namespace std; using namespace std;
using Payload = journal::Entry::Payload; using Payload = journal::Entry::Payload;
size_t ShardArgs::Size() const { void RecordJournal(const OpArgs& op_args, string_view cmd, ArgSlice args, uint32_t shard_cnt,
size_t sz = 0;
for (const auto& s : slice_.second)
sz += (s.second - s.first);
return sz;
}
void RecordJournal(const OpArgs& op_args, string_view cmd, const ShardArgs& args,
uint32_t shard_cnt, bool multi_commands) {
VLOG(2) << "Logging command " << cmd << " from txn " << op_args.tx->txid();
op_args.tx->LogJournalOnShard(op_args.shard, Payload(cmd, args), shard_cnt, multi_commands,
false);
}
void RecordJournal(const OpArgs& op_args, std::string_view cmd, ArgSlice args, uint32_t shard_cnt,
bool multi_commands) { bool multi_commands) {
VLOG(2) << "Logging command " << cmd << " from txn " << op_args.tx->txid(); VLOG(2) << "Logging command " << cmd << " from txn " << op_args.tx->txid();
op_args.tx->LogJournalOnShard(op_args.shard, Payload(cmd, args), shard_cnt, multi_commands, op_args.tx->LogJournalOnShard(op_args.shard, Payload(cmd, args), shard_cnt, multi_commands,

View file

@ -39,6 +39,7 @@ struct KeyIndex {
// if index is non-zero then adds another key index (usually 0). // if index is non-zero then adds another key index (usually 0).
// relevant for for commands like ZUNIONSTORE/ZINTERSTORE for destination key. // relevant for for commands like ZUNIONSTORE/ZINTERSTORE for destination key.
std::optional<uint16_t> bonus{}; std::optional<uint16_t> bonus{};
bool has_reverse_mapping = false;
KeyIndex(unsigned s = 0, unsigned e = 0, unsigned step = 0) : start(s), end(e), step(step) { KeyIndex(unsigned s = 0, unsigned e = 0, unsigned step = 0) : start(s), end(e), step(step) {
} }
@ -106,94 +107,52 @@ using KeyReadyChecker =
std::function<bool(EngineShard*, const DbContext& context, Transaction* tx, std::string_view)>; std::function<bool(EngineShard*, const DbContext& context, Transaction* tx, std::string_view)>;
// References arguments in another array. // References arguments in another array.
using IndexSlice = std::pair<uint32_t, uint32_t>; // [begin, end) using IndexSlice = std::pair<uint32_t, uint32_t>; // (begin, end)
// ShardArgs - hold a span to full arguments and a span of sub-ranges
// referencing those arguments.
class ShardArgs {
using ArgsIndexPair = std::pair<facade::CmdArgList, absl::Span<const IndexSlice>>;
ArgsIndexPair slice_;
class ShardArgs : protected ArgSlice {
public: public:
class Iterator { using ArgSlice::ArgSlice;
facade::CmdArgList arglist_; using ArgSlice::at;
absl::Span<const IndexSlice>::const_iterator index_it_; using ArgSlice::operator=;
uint32_t delta_ = 0; using Iterator = ArgSlice::iterator;
public: ShardArgs(const ArgSlice& o) : ArgSlice(o) {
using iterator_category = std::input_iterator_tag;
using value_type = std::string_view;
using difference_type = ptrdiff_t;
using pointer = value_type*;
using reference = value_type&;
// First version, corresponds to spans over arguments.
Iterator(facade::CmdArgList list, absl::Span<const IndexSlice>::const_iterator it)
: arglist_(list), index_it_(it) {
}
bool operator==(const Iterator& o) const {
return arglist_ == o.arglist_ && index_it_ == o.index_it_ && delta_ == o.delta_;
}
bool operator!=(const Iterator& o) const {
return !(*this == o);
}
std::string_view operator*() const {
return facade::ArgS(arglist_, index());
}
Iterator& operator++() {
++delta_;
if (index() >= index_it_->second) {
++index_it_;
++delta_ = 0;
}
return *this;
}
size_t index() const {
return index_it_->first + delta_;
}
};
ShardArgs(facade::CmdArgList fa, absl::Span<const IndexSlice> s) : slice_(ArgsIndexPair(fa, s)) {
} }
ShardArgs() : slice_(ArgsIndexPair{}) { size_t Size() const {
return ArgSlice::size();
} }
size_t Size() const; auto cbegin() const {
return ArgSlice::cbegin();
Iterator cbegin() const {
return Iterator{slice_.first, slice_.second.begin()};
} }
Iterator cend() const { auto cend() const {
return Iterator{slice_.first, slice_.second.end()}; return ArgSlice::cend();
} }
Iterator begin() const { auto begin() const {
return cbegin(); return cbegin();
} }
Iterator end() const { auto end() const {
return cend(); return cend();
} }
bool Empty() const { bool Empty() const {
return slice_.second.empty(); return ArgSlice::empty();
} }
std::string_view Front() const { std::string_view Front() const {
return *cbegin(); return *cbegin();
} }
ArgSlice AsSlice() const {
return ArgSlice(*this);
}
}; };
// Record non auto journal command with own txid and dbid. // Record non auto journal command with own txid and dbid.
void RecordJournal(const OpArgs& op_args, std::string_view cmd, const ShardArgs& args,
uint32_t shard_cnt = 1, bool multi_commands = false);
void RecordJournal(const OpArgs& op_args, std::string_view cmd, ArgSlice args, void RecordJournal(const OpArgs& op_args, std::string_view cmd, ArgSlice args,
uint32_t shard_cnt = 1, bool multi_commands = false); uint32_t shard_cnt = 1, bool multi_commands = false);

View file

@ -879,7 +879,7 @@ double GetKeyWeight(Transaction* t, ShardId shard_id, const vector<double>& weig
return 1; return 1;
} }
unsigned windex = key_index - cmdargs_keys_offset; unsigned windex = t->ReverseArgIndex(shard_id, key_index) - cmdargs_keys_offset;
DCHECK_LT(windex, weights.size()); DCHECK_LT(windex, weights.size());
return weights[windex]; return weights[windex];
} }
@ -920,8 +920,8 @@ OpResult<ScoredMap> OpUnion(EngineShard* shard, Transaction* t, string_view dest
++index; ++index;
continue; continue;
} }
key_weight_vec[index] = { key_weight_vec[index] = {*it_res, GetKeyWeight(t, shard->shard_id(), weights,
*it_res, GetKeyWeight(t, shard->shard_id(), weights, start.index(), cmdargs_keys_offset)}; index + removed_keys, cmdargs_keys_offset)};
++index; ++index;
} }
@ -3329,7 +3329,7 @@ constexpr uint32_t kGeoRadiusByMember = WRITE | GEO | SLOW;
} // namespace acl } // namespace acl
void ZSetFamily::Register(CommandRegistry* registry) { void ZSetFamily::Register(CommandRegistry* registry) {
constexpr uint32_t kStoreMask = CO::WRITE | CO::VARIADIC_KEYS | CO::DENYOOM; constexpr uint32_t kStoreMask = CO::WRITE | CO::VARIADIC_KEYS | CO::REVERSE_MAPPING | CO::DENYOOM;
registry->StartFamily(); registry->StartFamily();
// TODO: to add support for SCRIPT for BZPOPMIN, BZPOPMAX similarly to BLPOP. // TODO: to add support for SCRIPT for BZPOPMIN, BZPOPMAX similarly to BLPOP.
*registry *registry
@ -3368,7 +3368,9 @@ void ZSetFamily::Register(CommandRegistry* registry) {
ZRevRangeByScore) ZRevRangeByScore)
<< CI{"ZREVRANK", CO::READONLY | CO::FAST, 3, 1, 1, acl::kZRevRank}.HFUNC(ZRevRank) << CI{"ZREVRANK", CO::READONLY | CO::FAST, 3, 1, 1, acl::kZRevRank}.HFUNC(ZRevRank)
<< CI{"ZSCAN", CO::READONLY, -3, 1, 1, acl::kZScan}.HFUNC(ZScan) << CI{"ZSCAN", CO::READONLY, -3, 1, 1, acl::kZScan}.HFUNC(ZScan)
<< CI{"ZUNION", CO::READONLY | CO::VARIADIC_KEYS, -3, 2, 2, acl::kZUnion}.HFUNC(ZUnion) << CI{"ZUNION", CO::READONLY | CO::REVERSE_MAPPING | CO::VARIADIC_KEYS, -3, 2, 2,
acl::kZUnion}
.HFUNC(ZUnion)
<< CI{"ZUNIONSTORE", kStoreMask, -4, 3, 3, acl::kZUnionStore}.HFUNC(ZUnionStore) << CI{"ZUNIONSTORE", kStoreMask, -4, 3, 3, acl::kZUnionStore}.HFUNC(ZUnionStore)
// GEO functions // GEO functions