* fix: Remove a stale reference to blocking watch queue

1. Remove the duplicated FinalizeWatched function
2. Identify the case where we delete the watched queue while we may still have awakedened_keys pointing to it.
3. Add a test reproducing the issue of having in awakened_keys an untangled key.

Properly fixes #2514

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
---------

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2024-02-01 14:19:08 +02:00 committed by GitHub
parent 2b0310db32
commit adeac6bd27
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 36 additions and 48 deletions

View file

@ -93,13 +93,14 @@ bool BlockingController::DbWatchTable::UnwatchTx(string_view key, Transaction* t
// the suspended item does not have to be the first one in the queue.
// This shard has not been awakened and in case this transaction in the queue
// we must clean it up.
if (auto it = wq->Find(tx); it != wq->items.end()) {
wq->items.erase(it);
}
}
if (wq->items.empty()) {
DVLOG(1) << "queue_map.erase";
awakened_keys.erase(wq_it->first);
queue_map.erase(wq_it);
}
return res;
@ -120,45 +121,7 @@ bool BlockingController::DbWatchTable::AddAwakeEvent(string_view key) {
return awakened_keys.insert(it->first).second;
}
// Optionally removes tx from the front of the watch queues.
void BlockingController::FinalizeWatched(KeyLockArgs lock_args, Transaction* tx) {
DCHECK(tx);
ShardId sid = owner_->shard_id();
uint16_t local_mask = tx->GetLocalMask(sid);
VLOG(1) << "FinalizeBlocking [" << sid << "]" << tx->DebugId() << " " << local_mask;
bool is_awakened = local_mask & Transaction::AWAKED_Q;
if (is_awakened)
awakened_transactions_.erase(tx);
auto dbit = watched_dbs_.find(tx->GetDbIndex());
// Can happen if it was the only transaction in the queue and it was notified and removed.
if (dbit == watched_dbs_.end())
return;
DbWatchTable& wt = *dbit->second;
// Add keys of processed transaction so we could awake the next one in the queue
// in case those keys still exist.
for (size_t i = 0; i < lock_args.args.size(); i += lock_args.key_step) {
string_view key = lock_args.args[i];
bool removed_awakened = wt.UnwatchTx(key, tx);
if (removed_awakened) {
CHECK(is_awakened) << tx->DebugId() << " " << key << " " << local_mask;
}
}
if (wt.queue_map.empty()) {
watched_dbs_.erase(dbit);
}
awakened_indices_.emplace(tx->GetDbIndex());
}
// Similar function but with ArgSlice. TODO: to fix the duplication.
// Removes tx from its watch queues if tx appears there.
void BlockingController::FinalizeWatched(ArgSlice args, Transaction* tx) {
DCHECK(tx);
@ -195,6 +158,7 @@ void BlockingController::FinalizeWatched(ArgSlice args, Transaction* tx) {
awakened_indices_.emplace(tx->GetDbIndex());
}
// Runs on the shard thread.
void BlockingController::NotifyPending() {
const Transaction* tx = owner_->GetContTx();
CHECK(tx == nullptr) << tx->DebugId();
@ -214,6 +178,8 @@ void BlockingController::NotifyPending() {
DVLOG(1) << "Processing awakened key " << sv_key;
auto w_it = wt.queue_map.find(sv_key);
if (w_it == wt.queue_map.end()) {
// This should not happen because we remove keys from awakened_keys every type we remove
// the entry from queue_map. TODO: to make it a CHECK after Dec 2024
LOG(ERROR) << "Internal error: Key " << sv_key
<< " was not found in the watch queue, wt.awakened_keys len is "
<< wt.awakened_keys.size() << " wt.queue_map len is " << wt.queue_map.size();
@ -229,6 +195,7 @@ void BlockingController::NotifyPending() {
WatchQueue* wq = w_it->second.get();
NotifyWatchQueue(sv_key, wq, context);
if (wq->items.empty()) {
// we erase awakened_keys right after this loop finishes running.
wt.queue_map.erase(w_it);
}
}