llama: preserve field order in user-defined JSON schemas (#8002)

Previously we decoded and re-encoded JSON schemas during validation, which served no purpose since json.RawMessage already validates JSON syntax. Worse, the re-encoding lost field ordering from the original schema, which affects inference quality during step-by-step reasoning. While fixing this ordering issue by using json.RawMessage directly, testing revealed that schema_to_grammar (from llama.cpp) also fails to preserve field order during grammar generation. This appears to be the root cause of inference degradation. This change prevents us from mangling the user's original schema order, but we still need to address the ordering issue in schema_to_grammar. That will be a separate change. Updates #7978
2025-05-11 18:36:41 +02:00 · 2024-12-11 14:07:30 -08:00 · 2024-12-11 14:07:30 -08:00 · 9039c821a2
commit 9039c821a2
parent 581a4a5553
5 changed files with 104 additions and 114 deletions
--- a/llm/server.go
+++ b/llm/server.go
@ -610,7 +610,7 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
 	}
 }

-const jsonGrammar = `
+var grammarJSON = `
 root   ::= object
 value  ::= object | array | string | number | ("true" | "false" | "null") ws
 object ::=
@ -722,22 +722,19 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
 		return fmt.Errorf("unexpected server status: %s", status.ToString())
 	}

-	// TODO (parthsareen): Move conversion to grammar with sampling logic
-	// API should do error handling for invalid formats
-	if req.Format != nil && strings.TrimSpace(string(req.Format)) != "null" {
-		if strings.ToLower(strings.TrimSpace(string(req.Format))) == `"json"` {
-			request["grammar"] = jsonGrammar
-			if !strings.Contains(strings.ToLower(req.Prompt), "json") {
-				slog.Warn("prompt does not specify that the LLM should response in JSON, but JSON format is expected. For best results specify that JSON is expected in the system prompt.")
+	if len(req.Format) > 0 {
+		switch {
+		case bytes.Equal(req.Format, []byte(`"json"`)):
+			request["grammar"] = grammarJSON
+		case bytes.HasPrefix(req.Format, []byte("{")):
+			// User provided a JSON schema
+			g := llama.SchemaToGrammar(req.Format)
+			if g == nil {
+				return fmt.Errorf("invalid JSON schema in format")
 			}
-		} else if schema, err := func() (llama.JsonSchema, error) {
-			var schema llama.JsonSchema
-			err := json.Unmarshal(req.Format, &schema)
-			return schema, err
-		}(); err == nil {
-			request["grammar"] = schema.AsGrammar()
-		} else {
-			slog.Warn(`format is neither a schema or "json"`, "format", req.Format)
+			request["grammar"] = string(g)
+		default:
+			return errors.New(`invalid format: expected "json" or a JSON schema`)
 		}
 	}