llama: remove model loading for grammar (#10096)

2025-05-11 02:16:36 +02:00 · 2025-04-24 11:51:19 -07:00 · 2025-04-24 11:51:19 -07:00 · a53d744b01
commit a53d744b01
parent 40b10eee6d
13 changed files with 521 additions and 107 deletions
--- a/llama/sampling_ext.cpp
+++ b/llama/sampling_ext.cpp
@ -5,6 +5,7 @@
 #include "llama.h"
 #include "llama-model.h"
 #include "llama-model-loader.h"
+#include "llama-grammar.h"

 struct common_sampler *common_sampler_cinit(const struct llama_model *model, struct common_sampler_cparams *params) {
    try {
@ -86,3 +87,49 @@ struct llama_vocab * llama_load_vocab_from_file(const char * fname) {
 void llama_free_vocab(struct llama_vocab * vocab) {
    delete vocab;
 }
+struct llama_grammar *grammar_init(char* grammar, uint32_t* tokens, size_t n_tokens, const char** pieces, uint32_t* eog_tokens, size_t n_eog_tokens) {
+    try {
+        if (grammar == nullptr) {
+            LLAMA_LOG_ERROR("%s: null grammar input\n", __func__);
+            return nullptr;
+        }
+
+        ollama_vocab *vocab = new ollama_vocab();
+        vocab->set_eog_tokens(eog_tokens, n_eog_tokens);
+        vocab->add_token_pieces(tokens, n_tokens, pieces);
+        
+        struct llama_grammar *g = llama_grammar_init_impl(nullptr, vocab, grammar, "root", false, nullptr, 0, nullptr, 0);
+        if (g == nullptr) {
+            LLAMA_LOG_ERROR("%s: failed to initialize grammar\n", __func__);
+            delete vocab;
+            return nullptr;
+        }
+        return g;
+
+    } catch (const std::exception& e) {
+        LLAMA_LOG_ERROR("%s: exception during initialization: %s\n", __func__, e.what());
+        return nullptr;
+    }
+}
+
+void grammar_free(struct llama_grammar *g) {
+    if (g != nullptr) {
+        if (g->vocab != nullptr) {
+            delete g->vocab;
+        }
+        llama_grammar_free_impl(g);
+    }
+}
+
+void grammar_apply(struct llama_grammar *g, struct llama_token_data_array *tokens) {
+    if (g == nullptr || tokens == nullptr) {
+        LLAMA_LOG_ERROR("%s: null grammar or tokens input\n", __func__);
+        return;
+    }
+    llama_grammar_apply_impl(*g, tokens);
+}
+
+
+void grammar_accept(struct llama_grammar *g, llama_token id) {
+    llama_grammar_accept_impl(*g, id);
+}