From f49e1e28d20fa91692c20510a2e1045df241f9ce Mon Sep 17 00:00:00 2001 From: "Thibault \"bui\" Koechlin" Date: Tue, 4 Mar 2025 10:41:11 +0100 Subject: [PATCH] move ParseQuery to expr helpers, add ExtractQueryParam (#3491) * move ParseQuery to expr helpers, add ExtractQueryParam --- pkg/appsec/query_utils.go | 78 ---------- pkg/appsec/query_utils_test.go | 207 ------------------------- pkg/appsec/request.go | 3 +- pkg/exprhelpers/expr_lib.go | 15 ++ pkg/exprhelpers/exprlib_test.go | 63 ++++++++ pkg/exprhelpers/waf.go | 111 ++++++++++++++ pkg/exprhelpers/waf_test.go | 264 ++++++++++++++++++++++++++++++++ 7 files changed, 455 insertions(+), 286 deletions(-) delete mode 100644 pkg/appsec/query_utils.go delete mode 100644 pkg/appsec/query_utils_test.go diff --git a/pkg/appsec/query_utils.go b/pkg/appsec/query_utils.go deleted file mode 100644 index 0c886e0ea..000000000 --- a/pkg/appsec/query_utils.go +++ /dev/null @@ -1,78 +0,0 @@ -package appsec - -// This file is mostly stolen from net/url package, but with some modifications to allow less strict parsing of query strings - -import ( - "net/url" - "strings" -) - -// parseQuery and parseQuery are copied net/url package, but allow semicolon in values -func ParseQuery(query string) url.Values { - m := make(url.Values) - parseQuery(m, query) - return m -} - -func parseQuery(m url.Values, query string) { - for query != "" { - var key string - key, query, _ = strings.Cut(query, "&") - - if key == "" { - continue - } - key, value, _ := strings.Cut(key, "=") - //for now we'll just ignore the errors, but ideally we want to fire some "internal" rules when we see invalid query strings - key = unescape(key) - value = unescape(value) - m[key] = append(m[key], value) - } -} - -func hexDigitToByte(digit byte) (byte, bool) { - switch { - case digit >= '0' && digit <= '9': - return digit - '0', true - case digit >= 'a' && digit <= 'f': - return digit - 'a' + 10, true - case digit >= 'A' && digit <= 'F': - return digit - 'A' + 10, true - default: - return 0, false - } -} - -func unescape(input string) string { - ilen := len(input) - res := strings.Builder{} - res.Grow(ilen) - for i := 0; i < ilen; i++ { - ci := input[i] - if ci == '+' { - res.WriteByte(' ') - continue - } - if ci == '%' { - if i+2 >= ilen { - res.WriteByte(ci) - continue - } - hi, ok := hexDigitToByte(input[i+1]) - if !ok { - res.WriteByte(ci) - continue - } - lo, ok := hexDigitToByte(input[i+2]) - if !ok { - res.WriteByte(ci) - continue - } - res.WriteByte(hi<<4 | lo) - i += 2 - continue - } - res.WriteByte(ci) - } - return res.String() -} diff --git a/pkg/appsec/query_utils_test.go b/pkg/appsec/query_utils_test.go deleted file mode 100644 index 2ad792796..000000000 --- a/pkg/appsec/query_utils_test.go +++ /dev/null @@ -1,207 +0,0 @@ -package appsec - -import ( - "net/url" - "reflect" - "testing" -) - -func TestParseQuery(t *testing.T) { - tests := []struct { - name string - query string - expected url.Values - }{ - { - name: "Simple query", - query: "foo=bar", - expected: url.Values{ - "foo": []string{"bar"}, - }, - }, - { - name: "Multiple values", - query: "foo=bar&foo=baz", - expected: url.Values{ - "foo": []string{"bar", "baz"}, - }, - }, - { - name: "Empty value", - query: "foo=", - expected: url.Values{ - "foo": []string{""}, - }, - }, - { - name: "Empty key", - query: "=bar", - expected: url.Values{ - "": []string{"bar"}, - }, - }, - { - name: "Empty query", - query: "", - expected: url.Values{}, - }, - { - name: "Multiple keys", - query: "foo=bar&baz=qux", - expected: url.Values{ - "foo": []string{"bar"}, - "baz": []string{"qux"}, - }, - }, - { - name: "Multiple keys with empty value", - query: "foo=bar&baz=qux&quux=", - expected: url.Values{ - "foo": []string{"bar"}, - "baz": []string{"qux"}, - "quux": []string{""}, - }, - }, - { - name: "Multiple keys with empty value and empty key", - query: "foo=bar&baz=qux&quux=&=quuz", - expected: url.Values{ - "foo": []string{"bar"}, - "baz": []string{"qux"}, - "quux": []string{""}, - "": []string{"quuz"}, - }, - }, - { - name: "Multiple keys with empty value and empty key and multiple values", - query: "foo=bar&baz=qux&quux=&=quuz&foo=baz", - expected: url.Values{ - "foo": []string{"bar", "baz"}, - "baz": []string{"qux"}, - "quux": []string{""}, - "": []string{"quuz"}, - }, - }, - { - name: "Multiple keys with empty value and empty key and multiple values and escaped characters", - query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz", - expected: url.Values{ - "foo": []string{"bar", "baz", "bar baz"}, - "baz": []string{"qux"}, - "quux": []string{""}, - "": []string{"quuz"}, - }, - }, - { - name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon", - query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz", - expected: url.Values{ - "foo": []string{"bar", "baz", "bar baz", "bar;baz"}, - "baz": []string{"qux"}, - "quux": []string{""}, - "": []string{"quuz"}, - }, - }, - { - name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon and ampersand", - query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz&foo=bar%26baz", - expected: url.Values{ - "foo": []string{"bar", "baz", "bar baz", "bar;baz", "bar&baz"}, - "baz": []string{"qux"}, - "quux": []string{""}, - "": []string{"quuz"}, - }, - }, - { - name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon and ampersand and equals", - query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz&foo=bar%26baz&foo=bar%3Dbaz", - expected: url.Values{ - "foo": []string{"bar", "baz", "bar baz", "bar;baz", "bar&baz", "bar=baz"}, - "baz": []string{"qux"}, - "quux": []string{""}, - "": []string{"quuz"}, - }, - }, - { - name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon and ampersand and equals and question mark", - query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz&foo=bar%26baz&foo=bar%3Dbaz&foo=bar%3Fbaz", - expected: url.Values{ - "foo": []string{"bar", "baz", "bar baz", "bar;baz", "bar&baz", "bar=baz", "bar?baz"}, - "baz": []string{"qux"}, - "quux": []string{""}, - "": []string{"quuz"}, - }, - }, - { - name: "keys with escaped characters", - query: "foo=ba;r&baz=qu;;x&quux=x\\&ww&xx=qu?uz&", - expected: url.Values{ - "foo": []string{"ba;r"}, - "baz": []string{"qu;;x"}, - "quux": []string{"x\\"}, - "ww": []string{""}, - "xx": []string{"qu?uz"}, - }, - }, - { - name: "hexadecimal characters", - query: "foo=bar%20baz", - expected: url.Values{ - "foo": []string{"bar baz"}, - }, - }, - { - name: "hexadecimal characters upper and lower case", - query: "foo=Ba%42%42&bar=w%2f%2F", - expected: url.Values{ - "foo": []string{"BaBB"}, - "bar": []string{"w//"}, - }, - }, - { - name: "hexadecimal characters with invalid characters", - query: "foo=bar%20baz%2", - expected: url.Values{ - "foo": []string{"bar baz%2"}, - }, - }, - { - name: "hexadecimal characters with invalid hex characters", - query: "foo=bar%xx", - expected: url.Values{ - "foo": []string{"bar%xx"}, - }, - }, - { - name: "hexadecimal characters with invalid 2nd hex character", - query: "foo=bar%2x", - expected: url.Values{ - "foo": []string{"bar%2x"}, - }, - }, - { - name: "url +", - query: "foo=bar+x", - expected: url.Values{ - "foo": []string{"bar x"}, - }, - }, - { - name: "url &&", - query: "foo=bar&&lol=bur", - expected: url.Values{ - "foo": []string{"bar"}, - "lol": []string{"bur"}, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - res := ParseQuery(test.query) - if !reflect.DeepEqual(res, test.expected) { - t.Fatalf("unexpected result: %v", res) - } - }) - } -} diff --git a/pkg/appsec/request.go b/pkg/appsec/request.go index 66ca13d55..e0e17bf99 100644 --- a/pkg/appsec/request.go +++ b/pkg/appsec/request.go @@ -11,6 +11,7 @@ import ( "os" "regexp" + "github.com/crowdsecurity/crowdsec/pkg/exprhelpers" "github.com/google/uuid" log "github.com/sirupsen/logrus" ) @@ -396,7 +397,7 @@ func NewParsedRequestFromRequest(r *http.Request, logger *log.Entry) (ParsedRequ URL: parsedURL, Proto: r.Proto, Body: body, - Args: ParseQuery(parsedURL.RawQuery), + Args: exprhelpers.ParseQuery(parsedURL.RawQuery), TransferEncoding: r.TransferEncoding, ResponseChannel: make(chan AppsecTempResponse), RemoteAddrNormalized: remoteAddrNormalized, diff --git a/pkg/exprhelpers/expr_lib.go b/pkg/exprhelpers/expr_lib.go index 8c49bdc5f..e0d7f6d97 100644 --- a/pkg/exprhelpers/expr_lib.go +++ b/pkg/exprhelpers/expr_lib.go @@ -3,6 +3,7 @@ package exprhelpers import ( "net" "net/http" + "net/url" "time" "github.com/oschwald/geoip2-golang" @@ -151,6 +152,20 @@ var exprFuncs = []exprCustomFunc{ new(func(string) map[string][]string), }, }, + { + name: "ParseQuery", + function: ExprWrapParseQuery, + signature: []interface{}{ + new(func(string) url.Values), + }, + }, + { + name: "ExtractQueryParam", + function: ExprWrapExtractQueryParam, + signature: []interface{}{ + new(func(string, string) []string), + }, + }, { name: "PathUnescape", function: PathUnescape, diff --git a/pkg/exprhelpers/exprlib_test.go b/pkg/exprhelpers/exprlib_test.go index e449115c1..5829b01ef 100644 --- a/pkg/exprhelpers/exprlib_test.go +++ b/pkg/exprhelpers/exprlib_test.go @@ -2,6 +2,7 @@ package exprhelpers import ( "errors" + "net/url" "testing" "time" @@ -160,6 +161,68 @@ func TestMatch(t *testing.T) { } } +// just to verify that the function is available, real tests are in TestExtractQueryParam +func TestExtractQueryParamExpr(t *testing.T) { + err := Init(nil) + require.NoError(t, err) + tests := []struct { + name string + env map[string]interface{} + code string + result []string + err string + }{ + { + name: "ExtractQueryParam() test: basic test", + env: map[string]interface{}{ + "query": "/foo?a=1&b=2", + }, + code: "ExtractQueryParam(query, 'a')", + result: []string{"1"}, + }, + } + for _, test := range tests { + program, err := expr.Compile(test.code, GetExprOptions(test.env)...) + require.NoError(t, err) + output, err := expr.Run(program, test.env) + require.NoError(t, err) + require.Equal(t, test.result, output) + log.Printf("test '%s' : OK", test.name) + } + +} + +// just to verify that the function is available, real tests are in TestParseQuery +func TestParseQueryInExpr(t *testing.T) { + err := Init(nil) + require.NoError(t, err) + tests := []struct { + name string + env map[string]interface{} + code string + result url.Values + err string + }{ + { + name: "ParseQuery() test: basic test", + env: map[string]interface{}{ + "query": "a=1&b=2", + "ParseQuery": ParseQuery, + }, + code: "ParseQuery(query)", + result: url.Values{"a": {"1"}, "b": {"2"}}, + }, + } + for _, test := range tests { + program, err := expr.Compile(test.code, GetExprOptions(test.env)...) + require.NoError(t, err) + output, err := expr.Run(program, test.env) + require.NoError(t, err) + require.Equal(t, test.result, output) + log.Printf("test '%s' : OK", test.name) + } +} + func TestDistanceHelper(t *testing.T) { err := Init(nil) require.NoError(t, err) diff --git a/pkg/exprhelpers/waf.go b/pkg/exprhelpers/waf.go index 0648f7ffc..8fca31768 100644 --- a/pkg/exprhelpers/waf.go +++ b/pkg/exprhelpers/waf.go @@ -2,6 +2,8 @@ package exprhelpers import ( "net/http" + "net/url" + "strings" "github.com/crowdsecurity/crowdsec/pkg/appsec/ja4h" ) @@ -11,3 +13,112 @@ func JA4H(params ...any) (any, error) { req := params[0].(*http.Request) return ja4h.JA4H(req), nil } + +// just a expr wrapper for ParseQuery +func ExprWrapParseQuery(params ...any) (any, error) { + query := params[0].(string) + return ParseQuery(query), nil +} + +// parseQuery and parseQuery are copied net/url package, but allow semicolon in values +func ParseQuery(query string) url.Values { + m := make(url.Values) + ParseQueryIntoValues(m, query) + return m +} + +func ParseQueryIntoValues(m url.Values, query string) { + for query != "" { + var key string + key, query, _ = strings.Cut(query, "&") + + if key == "" { + continue + } + key, value, _ := strings.Cut(key, "=") + //for now we'll just ignore the errors, but ideally we want to fire some "internal" rules when we see invalid query strings + key = unescape(key) + value = unescape(value) + m[key] = append(m[key], value) + } +} + +func hexDigitToByte(digit byte) (byte, bool) { + switch { + case digit >= '0' && digit <= '9': + return digit - '0', true + case digit >= 'a' && digit <= 'f': + return digit - 'a' + 10, true + case digit >= 'A' && digit <= 'F': + return digit - 'A' + 10, true + default: + return 0, false + } +} + +func unescape(input string) string { + ilen := len(input) + res := strings.Builder{} + res.Grow(ilen) + for i := 0; i < ilen; i++ { + ci := input[i] + if ci == '+' { + res.WriteByte(' ') + continue + } + if ci == '%' { + if i+2 >= ilen { + res.WriteByte(ci) + continue + } + hi, ok := hexDigitToByte(input[i+1]) + if !ok { + res.WriteByte(ci) + continue + } + lo, ok := hexDigitToByte(input[i+2]) + if !ok { + res.WriteByte(ci) + continue + } + res.WriteByte(hi<<4 | lo) + i += 2 + continue + } + res.WriteByte(ci) + } + return res.String() +} + +// just a expr wrapper for ExtractQueryParam +func ExprWrapExtractQueryParam(params ...any) (any, error) { + uri := params[0].(string) + param := params[1].(string) + return ExtractQueryParam(uri, param), nil +} + +// ExtractQueryParam extracts values for a given query parameter from a raw URI string. +func ExtractQueryParam(uri, param string) []string { + // Find the first occurrence of "?" + idx := strings.Index(uri, "?") + if idx == -1 { + // No query string present + return []string{} + } + + // Extract the query string part + queryString := uri[idx+1:] + + // Parse the query string using a function that supports both `&` and `;` + values := ParseQuery(queryString) + + if values == nil { + // No query string present + return []string{} + } + // Retrieve the values for the specified parameter + if _, ok := values[param]; !ok { + return []string{} + } + return values[param] +} diff --git a/pkg/exprhelpers/waf_test.go b/pkg/exprhelpers/waf_test.go index 594488fb7..bfab22bb2 100644 --- a/pkg/exprhelpers/waf_test.go +++ b/pkg/exprhelpers/waf_test.go @@ -2,11 +2,275 @@ package exprhelpers import ( "net/http" + "net/url" + "reflect" "testing" "github.com/stretchr/testify/require" ) +func TestParseQuery(t *testing.T) { + tests := []struct { + name string + query string + expected url.Values + }{ + { + name: "Full URI", + query: "/foobar/toto?ab=cd&ef=gh", + expected: url.Values{ + "/foobar/toto?ab": []string{"cd"}, + "ef": []string{"gh"}, + }, + }, + { + name: "Simple query", + query: "foo=bar", + expected: url.Values{ + "foo": []string{"bar"}, + }, + }, + { + name: "Multiple values", + query: "foo=bar&foo=baz", + expected: url.Values{ + "foo": []string{"bar", "baz"}, + }, + }, + { + name: "Empty value", + query: "foo=", + expected: url.Values{ + "foo": []string{""}, + }, + }, + { + name: "Empty key", + query: "=bar", + expected: url.Values{ + "": []string{"bar"}, + }, + }, + { + name: "Empty query", + query: "", + expected: url.Values{}, + }, + { + name: "Multiple keys", + query: "foo=bar&baz=qux", + expected: url.Values{ + "foo": []string{"bar"}, + "baz": []string{"qux"}, + }, + }, + { + name: "Multiple keys with empty value", + query: "foo=bar&baz=qux&quux=", + expected: url.Values{ + "foo": []string{"bar"}, + "baz": []string{"qux"}, + "quux": []string{""}, + }, + }, + { + name: "Multiple keys with empty value and empty key", + query: "foo=bar&baz=qux&quux=&=quuz", + expected: url.Values{ + "foo": []string{"bar"}, + "baz": []string{"qux"}, + "quux": []string{""}, + "": []string{"quuz"}, + }, + }, + { + name: "Multiple keys with empty value and empty key and multiple values", + query: "foo=bar&baz=qux&quux=&=quuz&foo=baz", + expected: url.Values{ + "foo": []string{"bar", "baz"}, + "baz": []string{"qux"}, + "quux": []string{""}, + "": []string{"quuz"}, + }, + }, + { + name: "Multiple keys with empty value and empty key and multiple values and escaped characters", + query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz", + expected: url.Values{ + "foo": []string{"bar", "baz", "bar baz"}, + "baz": []string{"qux"}, + "quux": []string{""}, + "": []string{"quuz"}, + }, + }, + { + name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon", + query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz", + expected: url.Values{ + "foo": []string{"bar", "baz", "bar baz", "bar;baz"}, + "baz": []string{"qux"}, + "quux": []string{""}, + "": []string{"quuz"}, + }, + }, + { + name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon and ampersand", + query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz&foo=bar%26baz", + expected: url.Values{ + "foo": []string{"bar", "baz", "bar baz", "bar;baz", "bar&baz"}, + "baz": []string{"qux"}, + "quux": []string{""}, + "": []string{"quuz"}, + }, + }, + { + name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon and ampersand and equals", + query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz&foo=bar%26baz&foo=bar%3Dbaz", + expected: url.Values{ + "foo": []string{"bar", "baz", "bar baz", "bar;baz", "bar&baz", "bar=baz"}, + "baz": []string{"qux"}, + "quux": []string{""}, + "": []string{"quuz"}, + }, + }, + { + name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon and ampersand and equals and question mark", + query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz&foo=bar%26baz&foo=bar%3Dbaz&foo=bar%3Fbaz", + expected: url.Values{ + "foo": []string{"bar", "baz", "bar baz", "bar;baz", "bar&baz", "bar=baz", "bar?baz"}, + "baz": []string{"qux"}, + "quux": []string{""}, + "": []string{"quuz"}, + }, + }, + { + name: "keys with escaped characters", + query: "foo=ba;r&baz=qu;;x&quux=x\\&ww&xx=qu?uz&", + expected: url.Values{ + "foo": []string{"ba;r"}, + "baz": []string{"qu;;x"}, + "quux": []string{"x\\"}, + "ww": []string{""}, + "xx": []string{"qu?uz"}, + }, + }, + { + name: "hexadecimal characters", + query: "foo=bar%20baz", + expected: url.Values{ + "foo": []string{"bar baz"}, + }, + }, + { + name: "hexadecimal characters upper and lower case", + query: "foo=Ba%42%42&bar=w%2f%2F", + expected: url.Values{ + "foo": []string{"BaBB"}, + "bar": []string{"w//"}, + }, + }, + { + name: "hexadecimal characters with invalid characters", + query: "foo=bar%20baz%2", + expected: url.Values{ + "foo": []string{"bar baz%2"}, + }, + }, + { + name: "hexadecimal characters with invalid hex characters", + query: "foo=bar%xx", + expected: url.Values{ + "foo": []string{"bar%xx"}, + }, + }, + { + name: "hexadecimal characters with invalid 2nd hex character", + query: "foo=bar%2x", + expected: url.Values{ + "foo": []string{"bar%2x"}, + }, + }, + { + name: "url +", + query: "foo=bar+x", + expected: url.Values{ + "foo": []string{"bar x"}, + }, + }, + { + name: "url &&", + query: "foo=bar&&lol=bur", + expected: url.Values{ + "foo": []string{"bar"}, + "lol": []string{"bur"}, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + res := ParseQuery(test.query) + if !reflect.DeepEqual(res, test.expected) { + t.Fatalf("unexpected result: %v", res) + } + }) + } +} + +func TestExtractQueryParam(t *testing.T) { + tests := []struct { + name string + query string + param string + expected []string + }{ + { + name: "Simple uri", + query: "/foobar/toto?ab=cd&ef=gh", + param: "ab", + expected: []string{"cd"}, + }, + { + name: "Simple uri, repeating param", + query: "/foobar?foo=bar&foo=baz", + param: "foo", + expected: []string{"bar", "baz"}, + }, + { + name: "Simple uri with semicolon", + query: "/foobar/toto?ab=cd;ef=gh", + param: "ab", + expected: []string{"cd;ef=gh"}, + }, + { + name: "Simple query no uri", + query: "foo=bar", + param: "foo", + expected: []string{}, + }, + { + name: "No QS", + query: "/foobar", + param: "foo", + expected: []string{}, + }, + { + name: "missing param", + query: "/foobar/toto?ab=cd&ef=gh", + param: "baz", + expected: []string{}, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + res := ExtractQueryParam(test.query, test.param) + if !reflect.DeepEqual(res, test.expected) { + t.Fatalf("unexpected result: %v", res) + } + }) + } +} + func TestJA4H(t *testing.T) { tests := []struct {