From d4b708d33c510d143ab8f36856473bc1802d7b8c Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Mon, 22 Nov 2021 09:43:43 +0200 Subject: [PATCH] Introduce basic memcached parser --- server/CMakeLists.txt | 5 +- server/memcache_parser.cc | 148 +++++++++++++++++++++++++++++++++ server/memcache_parser.h | 67 +++++++++++++++ server/memcache_parser_test.cc | 39 +++++++++ 4 files changed, 257 insertions(+), 2 deletions(-) create mode 100644 server/memcache_parser.cc create mode 100644 server/memcache_parser.h create mode 100644 server/memcache_parser_test.cc diff --git a/server/CMakeLists.txt b/server/CMakeLists.txt index f2697b3c7..edf9415a4 100644 --- a/server/CMakeLists.txt +++ b/server/CMakeLists.txt @@ -2,8 +2,8 @@ add_executable(dragonfly dfly_main.cc) cxx_link(dragonfly base dragonfly_lib) add_library(dragonfly_lib command_registry.cc config_flags.cc db_slice.cc dragonfly_listener.cc - dragonfly_connection.cc - main_service.cc engine_shard_set.cc + dragonfly_connection.cc engine_shard_set.cc + main_service.cc memcache_parser.cc redis_parser.cc resp_expr.cc reply_builder.cc) cxx_link(dragonfly_lib uring_fiber_lib @@ -13,3 +13,4 @@ add_library(dfly_test_lib test_utils.cc) cxx_link(dfly_test_lib dragonfly_lib gtest_main_ext) cxx_test(redis_parser_test dfly_test_lib LABELS DFLY) +cxx_test(memcache_parser_test dfly_test_lib LABELS DFLY) diff --git a/server/memcache_parser.cc b/server/memcache_parser.cc new file mode 100644 index 000000000..6828d75f4 --- /dev/null +++ b/server/memcache_parser.cc @@ -0,0 +1,148 @@ +// Copyright 2021, Beeri 15. All rights reserved. +// Author: Roman Gershman (romange@gmail.com) +// +#include "server/memcache_parser.h" + +#include +#include + +namespace dfly { +using namespace std; + +namespace { + +pair cmd_map[] = { + {"set", MemcacheParser::SET}, {"add", MemcacheParser::ADD}, + {"replace", MemcacheParser::REPLACE}, {"append", MemcacheParser::APPEND}, + {"prepend", MemcacheParser::PREPEND}, {"cas", MemcacheParser::CAS}, + {"get", MemcacheParser::GET}, {"gets", MemcacheParser::GETS}, + {"gat", MemcacheParser::GAT}, {"gats", MemcacheParser::GATS}, +}; + +MemcacheParser::CmdType From(string_view token) { + for (const auto& k_v : cmd_map) { + if (token == k_v.first) + return k_v.second; + } + return MemcacheParser::INVALID; +} + +MemcacheParser::Result ParseStore(const std::string_view* tokens, unsigned num_tokens, + MemcacheParser::Command* res) { + + unsigned opt_pos = 3; + if (res->type == MemcacheParser::CAS) { + if (num_tokens <= opt_pos) + return MemcacheParser::PARSE_ERROR; + ++opt_pos; + } + + uint32_t flags; + if (!absl::SimpleAtoi(tokens[0], &flags) || !absl::SimpleAtoi(tokens[1], &res->expire_ts) || + !absl::SimpleAtoi(tokens[2], &res->bytes_len)) + return MemcacheParser::BAD_INT; + + if (flags > 0xFFFF) + return MemcacheParser::BAD_INT; + + if (res->type == MemcacheParser::CAS && !absl::SimpleAtoi(tokens[3], &res->cas_unique)) { + return MemcacheParser::BAD_INT; + } + + res->flags = flags; + if (num_tokens == opt_pos + 1) { + if (tokens[opt_pos] == "noreply") { + res->no_reply = true; + } else { + return MemcacheParser::PARSE_ERROR; + } + } else if (num_tokens > opt_pos + 1) { + return MemcacheParser::PARSE_ERROR; + } + + return MemcacheParser::OK; +} + +MemcacheParser::Result ParseRetrieve(const std::string_view* tokens, unsigned num_tokens, + MemcacheParser::Command* res) { + unsigned key_pos = 0; + if (res->type == MemcacheParser::GAT || res->type == MemcacheParser::GATS) { + if (!absl::SimpleAtoi(tokens[0], &res->expire_ts)) { + return MemcacheParser::BAD_INT; + } + ++key_pos; + } + res->key = tokens[key_pos++]; + while (key_pos < num_tokens) { + res->keys_ext.push_back(tokens[key_pos++]); + } + + return MemcacheParser::OK; +} + +} // namespace + +auto MemcacheParser::Parse(string_view str, uint32_t* consumed, Command* res) -> Result { + auto pos = str.find('\n'); + *consumed = 0; + if (pos == string_view::npos) { + // TODO: it's over simplified since we may process gets command that is not limited to + // 300 characters. + return str.size() > 300 ? PARSE_ERROR : INPUT_PENDING; + } + if (pos == 0 || str[pos - 1] != '\r') { + return PARSE_ERROR; + } + *consumed = pos + 1; + + // cas [noreply]\r\n + // get *\r\n + string_view tokens[8]; + unsigned num_tokens = 0; + uint32_t cur = 0; + + while (cur < pos && str[cur] == ' ') + ++cur; + uint32_t s = cur; + for (; cur < pos; ++cur) { + if (str[cur] == ' ' || str[cur] == '\r') { + if (cur != s) { + tokens[num_tokens++] = str.substr(s, cur - s); + if (num_tokens == ABSL_ARRAYSIZE(tokens)) { + ++cur; + s = cur; + break; + } + } + s = cur + 1; + } + } + if (num_tokens == 0) + return PARSE_ERROR; + + while (cur < pos - 1) { + if (str[cur] != ' ') + return PARSE_ERROR; + ++cur; + } + + res->type = From(tokens[0]); + if (res->type == INVALID) { + return UNKNOWN_CMD; + } + + if (res->type <= CAS) { // Store command + if (num_tokens < 5 || tokens[1].size() > 250) { + return MemcacheParser::PARSE_ERROR; + } + + // memcpy(single_key_, tokens[0].data(), tokens[0].size()); // we copy the key + res->key = string_view{tokens[1].data(), tokens[1].size()}; + + return ParseStore(tokens + 2, num_tokens - 2, res); + } + + return ParseRetrieve(tokens + 1, num_tokens - 1, res); +}; + +} // namespace dfly \ No newline at end of file diff --git a/server/memcache_parser.h b/server/memcache_parser.h new file mode 100644 index 000000000..cb3280103 --- /dev/null +++ b/server/memcache_parser.h @@ -0,0 +1,67 @@ +// Copyright 2021, Beeri 15. All rights reserved. +// Author: Roman Gershman (romange@gmail.com) +// + +#pragma once + +#include +#include + +namespace dfly { + +// Memcache parser does not parse value blobs, only the commands. +// The expectation is that the caller will parse the command and +// then will follow up with reading the blob data directly from source. +class MemcacheParser { + public: + enum CmdType { + INVALID = 0, + SET = 1, + ADD = 2, + REPLACE = 3, + APPEND = 4, + PREPEND = 5, + CAS = 6, + + // Retrieval + GET = 10, + GETS = 11, + GAT = 12, + GATS = 13, + + // Delete and INCR + DELETE = 21, + INCR = 22, + DECR = 23, + }; + + struct Command { + CmdType type = INVALID; + std::string_view key; + std::vector keys_ext; + + uint64_t cas_unique = 0; + uint32_t expire_ts = 0; + uint32_t bytes_len = 0; + uint16_t flags = 0; + bool no_reply = false; + }; + + enum Result { + OK, + INPUT_PENDING, + UNKNOWN_CMD, + BAD_INT, + PARSE_ERROR, + }; + + static bool IsStoreCmd(CmdType type) { + return type >= SET && type <= CAS; + } + + Result Parse(std::string_view str, uint32_t* consumed, Command* res); + + private: +}; + +} // namespace dfly \ No newline at end of file diff --git a/server/memcache_parser_test.cc b/server/memcache_parser_test.cc new file mode 100644 index 000000000..071b28397 --- /dev/null +++ b/server/memcache_parser_test.cc @@ -0,0 +1,39 @@ +// Copyright 2021, Beeri 15. All rights reserved. +// Author: Roman Gershman (romange@gmail.com) +// + +#include "server/memcache_parser.h" + +#include + +#include "absl/strings/str_cat.h" +#include "base/gtest.h" +#include "base/logging.h" +#include "server/test_utils.h" + +using namespace testing; +using namespace std; +namespace dfly { + +class MCParserTest : public testing::Test { + protected: + RedisParser::Result Parse(std::string_view str); + + MemcacheParser parser_; + MemcacheParser::Command cmd_; + uint32_t consumed_; + + unique_ptr stash_; +}; + + +TEST_F(MCParserTest, Basic) { + MemcacheParser::Result st = parser_.Parse("set a 1 20 3\r\n", &consumed_, &cmd_); + EXPECT_EQ(MemcacheParser::OK, st); + EXPECT_EQ("a", cmd_.key); + EXPECT_EQ(1, cmd_.flags); + EXPECT_EQ(20, cmd_.expire_ts); + EXPECT_EQ(3, cmd_.bytes_len); +} + +} // namespace dfly \ No newline at end of file