mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 18:36:41 +02:00
llama: update to commit 2016f07b (#10352)
This commit is contained in:
parent
11dde41824
commit
e9e5f61c45
46 changed files with 1967 additions and 1753 deletions
6
llama/llama.cpp/src/llama-context.cpp
vendored
6
llama/llama.cpp/src/llama-context.cpp
vendored
|
@ -10,6 +10,7 @@
|
|||
#include <cstring>
|
||||
#include <stdexcept>
|
||||
#include <cinttypes>
|
||||
#include <cmath>
|
||||
|
||||
//
|
||||
// llama_context
|
||||
|
@ -473,7 +474,6 @@ ggml_tensor * llama_context::build_rope_shift(
|
|||
const auto & n_ctx_orig = cparams.n_ctx_orig_yarn;
|
||||
|
||||
const auto & yarn_ext_factor = cparams.yarn_ext_factor;
|
||||
const auto & yarn_attn_factor = cparams.yarn_attn_factor;
|
||||
const auto & yarn_beta_fast = cparams.yarn_beta_fast;
|
||||
const auto & yarn_beta_slow = cparams.yarn_beta_slow;
|
||||
|
||||
|
@ -482,6 +482,10 @@ ggml_tensor * llama_context::build_rope_shift(
|
|||
const auto & n_rot = hparams.n_rot;
|
||||
const auto & rope_type = hparams.rope_type;
|
||||
|
||||
// See llm_build_deepseek2() for why attn_factor has to be scaled for YaRN RoPE to work correctly.
|
||||
// See https://github.com/ggerganov/llama.cpp/discussions/7416 for detailed explanation.
|
||||
const float yarn_attn_factor = model.arch == LLM_ARCH_DEEPSEEK2 ? 1.0f / (1.0f + 0.1f * logf(1.0f / freq_scale)) : cparams.yarn_attn_factor;
|
||||
|
||||
ggml_tensor * tmp;
|
||||
|
||||
if (ggml_is_quantized(cur->type)) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue