mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 02:16:36 +02:00
llama: remove model loading for grammar (#10096)
This commit is contained in:
parent
40b10eee6d
commit
a53d744b01
13 changed files with 521 additions and 107 deletions
47
llama/sampling_ext.cpp
vendored
47
llama/sampling_ext.cpp
vendored
|
@ -5,6 +5,7 @@
|
|||
#include "llama.h"
|
||||
#include "llama-model.h"
|
||||
#include "llama-model-loader.h"
|
||||
#include "llama-grammar.h"
|
||||
|
||||
struct common_sampler *common_sampler_cinit(const struct llama_model *model, struct common_sampler_cparams *params) {
|
||||
try {
|
||||
|
@ -86,3 +87,49 @@ struct llama_vocab * llama_load_vocab_from_file(const char * fname) {
|
|||
void llama_free_vocab(struct llama_vocab * vocab) {
|
||||
delete vocab;
|
||||
}
|
||||
struct llama_grammar *grammar_init(char* grammar, uint32_t* tokens, size_t n_tokens, const char** pieces, uint32_t* eog_tokens, size_t n_eog_tokens) {
|
||||
try {
|
||||
if (grammar == nullptr) {
|
||||
LLAMA_LOG_ERROR("%s: null grammar input\n", __func__);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ollama_vocab *vocab = new ollama_vocab();
|
||||
vocab->set_eog_tokens(eog_tokens, n_eog_tokens);
|
||||
vocab->add_token_pieces(tokens, n_tokens, pieces);
|
||||
|
||||
struct llama_grammar *g = llama_grammar_init_impl(nullptr, vocab, grammar, "root", false, nullptr, 0, nullptr, 0);
|
||||
if (g == nullptr) {
|
||||
LLAMA_LOG_ERROR("%s: failed to initialize grammar\n", __func__);
|
||||
delete vocab;
|
||||
return nullptr;
|
||||
}
|
||||
return g;
|
||||
|
||||
} catch (const std::exception& e) {
|
||||
LLAMA_LOG_ERROR("%s: exception during initialization: %s\n", __func__, e.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void grammar_free(struct llama_grammar *g) {
|
||||
if (g != nullptr) {
|
||||
if (g->vocab != nullptr) {
|
||||
delete g->vocab;
|
||||
}
|
||||
llama_grammar_free_impl(g);
|
||||
}
|
||||
}
|
||||
|
||||
void grammar_apply(struct llama_grammar *g, struct llama_token_data_array *tokens) {
|
||||
if (g == nullptr || tokens == nullptr) {
|
||||
LLAMA_LOG_ERROR("%s: null grammar or tokens input\n", __func__);
|
||||
return;
|
||||
}
|
||||
llama_grammar_apply_impl(*g, tokens);
|
||||
}
|
||||
|
||||
|
||||
void grammar_accept(struct llama_grammar *g, llama_token id) {
|
||||
llama_grammar_accept_impl(*g, id);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue