mirror of
https://github.com/ollama/ollama.git
synced 2025-05-12 02:46:36 +02:00
remove mllama patch
This commit is contained in:
parent
0d6e35d3c6
commit
f8586c6b2b
35 changed files with 37 additions and 2444 deletions
6
llama/llama.cpp/include/llama.h
vendored
6
llama/llama.cpp/include/llama.h
vendored
|
@ -256,7 +256,6 @@ extern "C" {
|
|||
|
||||
llama_token * token;
|
||||
float * embd;
|
||||
int32_t n_embd;
|
||||
llama_pos * pos;
|
||||
int32_t * n_seq_id;
|
||||
llama_seq_id ** seq_id;
|
||||
|
@ -359,7 +358,6 @@ extern "C" {
|
|||
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
||||
bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
|
||||
bool no_perf; // whether to measure performance timings
|
||||
bool cross_attn; // whether to use cross attention
|
||||
|
||||
// Abort callback
|
||||
// if it returns true, execution of llama_decode() will be aborted
|
||||
|
@ -461,10 +459,6 @@ extern "C" {
|
|||
struct llama_context_params params),
|
||||
"use llama_init_from_model instead");
|
||||
|
||||
// TODO (jmorganca): this should most likely be passed in as part of a batch
|
||||
// and not set on the context for all batches.
|
||||
LLAMA_API void llama_set_cross_attention(struct llama_context * ctx, bool cross_attn_state);
|
||||
|
||||
// Frees all allocated memory
|
||||
LLAMA_API void llama_free(struct llama_context * ctx);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue