sample: temporarily use grammars for constrained generation in new engine (#9586)

This commit is contained in:
Jeffrey Morgan 2025-03-10 16:17:39 +01:00 committed by GitHub
parent a1cda80bcb
commit e093db92c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 301 additions and 213 deletions

View file

@ -2,6 +2,9 @@
#include "sampling.h"
#include "sampling_ext.h"
#include "json-schema-to-grammar.h"
#include "llama.h"
#include "llama-model.h"
#include "llama-model-loader.h"
struct common_sampler *common_sampler_cinit(const struct llama_model *model, struct common_sampler_cparams *params) {
try {
@ -64,3 +67,22 @@ int schema_to_grammar(const char *json_schema, char *grammar, size_t max_len)
return 0;
}
}
struct llama_vocab * llama_load_vocab_from_file(const char * fname) {
llama_vocab * vocab = new llama_vocab();
try {
const auto kv = LLM_KV(LLM_ARCH_UNKNOWN);
std::vector<std::string> splits = {};
llama_model_loader ml(std::string(fname), splits, false, false, nullptr);
vocab->load(ml, kv);
} catch (const std::exception & err) {
LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
return nullptr;
}
return vocab;
}
void llama_free_vocab(struct llama_vocab * vocab) {
delete vocab;
}