From bcae2dfb46708de06ae41b18b286c67ce488f3fa Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Fri, 23 Feb 2024 13:53:41 +0200 Subject: [PATCH] a test with flat buffers (#2520) feat: a test with flat buffers Also, add an experimental flag `--experimental_flat_json` that allows writing json objects as flat strings using flexibuffers. The experiment shows that `debug populate 100000 a 10 type json elements 30` uses almost 3 times less memory than with native jsoncons objects. Signed-off-by: Roman Gershman --- .devcontainer/alpine/devcontainer.json | 10 ++- .devcontainer/alpine/post-create.sh | 5 ++ src/CMakeLists.txt | 13 +++ src/core/CMakeLists.txt | 1 + src/core/flatbuffers_test.cc | 106 +++++++++++++++++++++++++ src/server/CMakeLists.txt | 2 +- src/server/json_family.cc | 23 +++++- 7 files changed, 152 insertions(+), 8 deletions(-) create mode 100755 .devcontainer/alpine/post-create.sh create mode 100644 src/core/flatbuffers_test.cc diff --git a/.devcontainer/alpine/devcontainer.json b/.devcontainer/alpine/devcontainer.json index cba91e3f3..a642d49af 100644 --- a/.devcontainer/alpine/devcontainer.json +++ b/.devcontainer/alpine/devcontainer.json @@ -1,5 +1,5 @@ { - "name": "helio", + "name": "alpine-dev", "image": "ghcr.io/romange/alpine-dev", "customizations": { "vscode": { @@ -10,11 +10,13 @@ "twxs.cmake" ], "settings": { - "cmake.buildDirectory": "${workspaceFolder}/build-alpine" + "cmake.buildDirectory": "/build", + "cmake.configureArgs": [] } } }, "mounts": [ - "source=alpine-vol,target=/root,type=volume" - ] + "source=alpine-vol,target=/build,type=volume" + ], + "postCreateCommand": ".devcontainer/alpine/post-create.sh ${containerWorkspaceFolder}" } diff --git a/.devcontainer/alpine/post-create.sh b/.devcontainer/alpine/post-create.sh new file mode 100755 index 000000000..0b6e937c3 --- /dev/null +++ b/.devcontainer/alpine/post-create.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +containerWorkspaceFolder=$1 +git config --global --add safe.directory ${containerWorkspaceFolder}/helio +mkdir -p /root/.local/share/CMakeTools diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 83885d1bf..e3661db7d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -141,6 +141,19 @@ set_target_properties(TRDP::fast_float PROPERTIES Message(STATUS "THIRD_PARTY_LIB_DIR ${THIRD_PARTY_LIB_DIR}") +find_package(Flatbuffers) +if (TARGET flatbuffers::flatbuffers) + get_target_property(FLATBUF_PATH flatbuffers::flatbuffers LOCATION) + set(FLATBUF_TARGET flatbuffers::flatbuffers) + Message("-- Flatbuffers found at ${FLATBUF_PATH}") +elseif (TARGET flatbuffers::flatbuffers_shared) + # alpine linux has shared library + get_target_property(FLATBUF_PATH flatbuffers::flatbuffers_shared LOCATION) + set(FLATBUF_TARGET flatbuffers::flatbuffers_shared) + Message("-- Flatbuffers found at ${FLATBUF_PATH}") +else() + Message("-- Flatbuffers not found, please install via libflatbuffers-dev") +endif() option(ENABLE_GIT_VERSION "Build with Git metadata" OFF) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 98669fbf9..231110926 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -27,3 +27,4 @@ cxx_test(string_map_test dfly_core LABELS DFLY) cxx_test(sorted_map_test dfly_core redis_test_lib LABELS DFLY) cxx_test(bptree_set_test dfly_core LABELS DFLY) cxx_test(score_map_test dfly_core LABELS DFLY) +cxx_test(flatbuffers_test dfly_core ${FLATBUF_TARGET} LABELS DFLY) diff --git a/src/core/flatbuffers_test.cc b/src/core/flatbuffers_test.cc new file mode 100644 index 000000000..b269474c1 --- /dev/null +++ b/src/core/flatbuffers_test.cc @@ -0,0 +1,106 @@ +// Copyright 2023, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#include +#include +#include +#include + +#include "base/gtest.h" +#include "base/logging.h" + +using namespace std; + +namespace dfly { +class FlatBuffersTest : public ::testing::Test { + protected: +}; + +TEST_F(FlatBuffersTest, Basic) { + flexbuffers::Builder fbb; + fbb.Map([&] { + fbb.String("foo", "bar"); + fbb.Double("bar", 1.5); + fbb.Vector("strs", [&] { + fbb.String("hello"); + fbb.String("world"); + }); + }); + + fbb.Finish(); + auto buffer = fbb.GetBuffer(); + auto map = flexbuffers::GetRoot(buffer).AsMap(); + EXPECT_EQ("bar", map["foo"].AsString().str()); +} + +TEST_F(FlatBuffersTest, FlexiParser) { + flatbuffers::Parser parser; + const char* json = R"( + { + "foo": "bar", + "bar": 1.5, + "strs": ["hello", "world"] + } + )"; + flexbuffers::Builder fbb; + ASSERT_TRUE(parser.ParseFlexBuffer(json, nullptr, &fbb)); + fbb.Finish(); + const auto& buffer = fbb.GetBuffer(); + string_view buf_view{reinterpret_cast(buffer.data()), buffer.size()}; + LOG(INFO) << "Binary buffer: " << absl::CHexEscape(buf_view); + + auto map = flexbuffers::GetRoot(buffer).AsMap(); + EXPECT_EQ("bar", map["foo"].AsString().str()); +} + +TEST_F(FlatBuffersTest, ParseJson) { + const char* schema = R"( + namespace dfly; + table Foo { + foo: string; + bar: double; + strs: [string]; + } + root_type Foo; + )"; + + flatbuffers::Parser parser; + ASSERT_TRUE(parser.Parse(schema)); + parser.Serialize(); + flatbuffers::DetachedBuffer bsb = parser.builder_.Release(); + + // This schema will always reference bsb. + auto* fbs_schema = reflection::GetSchema(bsb.data()); + + flatbuffers::Verifier verifier(bsb.data(), bsb.size()); + ASSERT_TRUE(fbs_schema->Verify(verifier)); + + auto* root_table = fbs_schema->root_table(); + auto* fields = root_table->fields(); + auto* field_foo = fields->LookupByKey("foo"); + ASSERT_EQ(field_foo->type()->base_type(), reflection::String); + + const char* json = R"( + { + "foo": "value", + "bar": 1.5, + "strs": ["hello", "world"] + } + )"; + + ASSERT_TRUE(parser.Parse(json)); + size_t buf_size = parser.builder_.GetSize(); + + ASSERT_TRUE( + flatbuffers::Verify(*fbs_schema, *root_table, parser.builder_.GetBufferPointer(), buf_size)); + auto* root_obj = flatbuffers::GetAnyRoot(parser.builder_.GetBufferPointer()); + + const flatbuffers::String* value = flatbuffers::GetFieldS(*root_obj, *field_foo); + EXPECT_EQ("value", value->str()); + + // wrong type. + ASSERT_FALSE(parser.Parse(R"({"foo": 1})")); +} + +} // namespace dfly diff --git a/src/server/CMakeLists.txt b/src/server/CMakeLists.txt index 159368855..5bfda5d02 100644 --- a/src/server/CMakeLists.txt +++ b/src/server/CMakeLists.txt @@ -69,7 +69,7 @@ cxx_link(dfly_transaction dfly_core strings_lib TRDP::fast_float) cxx_link(dragonfly_lib dfly_transaction dfly_facade redis_lib awsv2_lib jsonpath strings_lib html_lib http_client_lib absl::random_random TRDP::jsoncons ${ZSTD_LIB} TRDP::lz4 - TRDP::croncpp) + TRDP::croncpp ${FLATBUF_TARGET}) if (DF_USE_SSL) set(TLS_LIB tls_lib) diff --git a/src/server/json_family.cc b/src/server/json_family.cc index e44e0e958..3644b51a1 100644 --- a/src/server/json_family.cc +++ b/src/server/json_family.cc @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include @@ -24,10 +26,12 @@ #include "server/error.h" #include "server/journal/journal.h" #include "server/search/doc_index.h" +#include "server/string_family.h" #include "server/tiered_storage.h" #include "server/transaction.h" ABSL_FLAG(bool, jsonpathv2, false, "If true uses Dragonfly jsonpath implementation."); +ABSL_FLAG(bool, experimental_flat_json, false, "If true uses flat json implementation."); namespace dfly { @@ -1125,10 +1129,23 @@ OpResult OpSet(const OpArgs& op_args, string_view key, string_view path, } } - if (SetJson(op_args, key, std::move(parsed_json.value())) == OpStatus::OUT_OF_MEMORY) { - return OpStatus::OUT_OF_MEMORY; - } + if (absl::GetFlag(FLAGS_experimental_flat_json)) { + flatbuffers::Parser parser; + flexbuffers::Builder fbb; + string tmp(json_str); + CHECK_EQ(json_str.size(), strlen(tmp.c_str())); + parser.ParseFlexBuffer(tmp.c_str(), nullptr, &fbb); + fbb.Finish(); + const auto& buffer = fbb.GetBuffer(); + string_view buf_view{reinterpret_cast(buffer.data()), buffer.size()}; + SetCmd scmd(op_args, false); + scmd.Set(SetCmd::SetParams{}, key, buf_view); + } else { + if (SetJson(op_args, key, std::move(parsed_json.value())) == OpStatus::OUT_OF_MEMORY) { + return OpStatus::OUT_OF_MEMORY; + } + } return true; }