move ParseQuery to expr helpers, add ExtractQueryParam (#3491)

* move ParseQuery to expr helpers, add ExtractQueryParam
This commit is contained in:
Thibault "bui" Koechlin 2025-03-04 10:41:11 +01:00 committed by GitHub
parent 61c7de1331
commit f49e1e28d2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 455 additions and 286 deletions

View file

@ -1,78 +0,0 @@
package appsec
// This file is mostly stolen from net/url package, but with some modifications to allow less strict parsing of query strings
import (
"net/url"
"strings"
)
// parseQuery and parseQuery are copied net/url package, but allow semicolon in values
func ParseQuery(query string) url.Values {
m := make(url.Values)
parseQuery(m, query)
return m
}
func parseQuery(m url.Values, query string) {
for query != "" {
var key string
key, query, _ = strings.Cut(query, "&")
if key == "" {
continue
}
key, value, _ := strings.Cut(key, "=")
//for now we'll just ignore the errors, but ideally we want to fire some "internal" rules when we see invalid query strings
key = unescape(key)
value = unescape(value)
m[key] = append(m[key], value)
}
}
func hexDigitToByte(digit byte) (byte, bool) {
switch {
case digit >= '0' && digit <= '9':
return digit - '0', true
case digit >= 'a' && digit <= 'f':
return digit - 'a' + 10, true
case digit >= 'A' && digit <= 'F':
return digit - 'A' + 10, true
default:
return 0, false
}
}
func unescape(input string) string {
ilen := len(input)
res := strings.Builder{}
res.Grow(ilen)
for i := 0; i < ilen; i++ {
ci := input[i]
if ci == '+' {
res.WriteByte(' ')
continue
}
if ci == '%' {
if i+2 >= ilen {
res.WriteByte(ci)
continue
}
hi, ok := hexDigitToByte(input[i+1])
if !ok {
res.WriteByte(ci)
continue
}
lo, ok := hexDigitToByte(input[i+2])
if !ok {
res.WriteByte(ci)
continue
}
res.WriteByte(hi<<4 | lo)
i += 2
continue
}
res.WriteByte(ci)
}
return res.String()
}

View file

@ -1,207 +0,0 @@
package appsec
import (
"net/url"
"reflect"
"testing"
)
func TestParseQuery(t *testing.T) {
tests := []struct {
name string
query string
expected url.Values
}{
{
name: "Simple query",
query: "foo=bar",
expected: url.Values{
"foo": []string{"bar"},
},
},
{
name: "Multiple values",
query: "foo=bar&foo=baz",
expected: url.Values{
"foo": []string{"bar", "baz"},
},
},
{
name: "Empty value",
query: "foo=",
expected: url.Values{
"foo": []string{""},
},
},
{
name: "Empty key",
query: "=bar",
expected: url.Values{
"": []string{"bar"},
},
},
{
name: "Empty query",
query: "",
expected: url.Values{},
},
{
name: "Multiple keys",
query: "foo=bar&baz=qux",
expected: url.Values{
"foo": []string{"bar"},
"baz": []string{"qux"},
},
},
{
name: "Multiple keys with empty value",
query: "foo=bar&baz=qux&quux=",
expected: url.Values{
"foo": []string{"bar"},
"baz": []string{"qux"},
"quux": []string{""},
},
},
{
name: "Multiple keys with empty value and empty key",
query: "foo=bar&baz=qux&quux=&=quuz",
expected: url.Values{
"foo": []string{"bar"},
"baz": []string{"qux"},
"quux": []string{""},
"": []string{"quuz"},
},
},
{
name: "Multiple keys with empty value and empty key and multiple values",
query: "foo=bar&baz=qux&quux=&=quuz&foo=baz",
expected: url.Values{
"foo": []string{"bar", "baz"},
"baz": []string{"qux"},
"quux": []string{""},
"": []string{"quuz"},
},
},
{
name: "Multiple keys with empty value and empty key and multiple values and escaped characters",
query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz",
expected: url.Values{
"foo": []string{"bar", "baz", "bar baz"},
"baz": []string{"qux"},
"quux": []string{""},
"": []string{"quuz"},
},
},
{
name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon",
query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz",
expected: url.Values{
"foo": []string{"bar", "baz", "bar baz", "bar;baz"},
"baz": []string{"qux"},
"quux": []string{""},
"": []string{"quuz"},
},
},
{
name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon and ampersand",
query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz&foo=bar%26baz",
expected: url.Values{
"foo": []string{"bar", "baz", "bar baz", "bar;baz", "bar&baz"},
"baz": []string{"qux"},
"quux": []string{""},
"": []string{"quuz"},
},
},
{
name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon and ampersand and equals",
query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz&foo=bar%26baz&foo=bar%3Dbaz",
expected: url.Values{
"foo": []string{"bar", "baz", "bar baz", "bar;baz", "bar&baz", "bar=baz"},
"baz": []string{"qux"},
"quux": []string{""},
"": []string{"quuz"},
},
},
{
name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon and ampersand and equals and question mark",
query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz&foo=bar%26baz&foo=bar%3Dbaz&foo=bar%3Fbaz",
expected: url.Values{
"foo": []string{"bar", "baz", "bar baz", "bar;baz", "bar&baz", "bar=baz", "bar?baz"},
"baz": []string{"qux"},
"quux": []string{""},
"": []string{"quuz"},
},
},
{
name: "keys with escaped characters",
query: "foo=ba;r&baz=qu;;x&quux=x\\&ww&xx=qu?uz&",
expected: url.Values{
"foo": []string{"ba;r"},
"baz": []string{"qu;;x"},
"quux": []string{"x\\"},
"ww": []string{""},
"xx": []string{"qu?uz"},
},
},
{
name: "hexadecimal characters",
query: "foo=bar%20baz",
expected: url.Values{
"foo": []string{"bar baz"},
},
},
{
name: "hexadecimal characters upper and lower case",
query: "foo=Ba%42%42&bar=w%2f%2F",
expected: url.Values{
"foo": []string{"BaBB"},
"bar": []string{"w//"},
},
},
{
name: "hexadecimal characters with invalid characters",
query: "foo=bar%20baz%2",
expected: url.Values{
"foo": []string{"bar baz%2"},
},
},
{
name: "hexadecimal characters with invalid hex characters",
query: "foo=bar%xx",
expected: url.Values{
"foo": []string{"bar%xx"},
},
},
{
name: "hexadecimal characters with invalid 2nd hex character",
query: "foo=bar%2x",
expected: url.Values{
"foo": []string{"bar%2x"},
},
},
{
name: "url +",
query: "foo=bar+x",
expected: url.Values{
"foo": []string{"bar x"},
},
},
{
name: "url &&",
query: "foo=bar&&lol=bur",
expected: url.Values{
"foo": []string{"bar"},
"lol": []string{"bur"},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
res := ParseQuery(test.query)
if !reflect.DeepEqual(res, test.expected) {
t.Fatalf("unexpected result: %v", res)
}
})
}
}

View file

@ -11,6 +11,7 @@ import (
"os"
"regexp"
"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
"github.com/google/uuid"
log "github.com/sirupsen/logrus"
)
@ -396,7 +397,7 @@ func NewParsedRequestFromRequest(r *http.Request, logger *log.Entry) (ParsedRequ
URL: parsedURL,
Proto: r.Proto,
Body: body,
Args: ParseQuery(parsedURL.RawQuery),
Args: exprhelpers.ParseQuery(parsedURL.RawQuery),
TransferEncoding: r.TransferEncoding,
ResponseChannel: make(chan AppsecTempResponse),
RemoteAddrNormalized: remoteAddrNormalized,

View file

@ -3,6 +3,7 @@ package exprhelpers
import (
"net"
"net/http"
"net/url"
"time"
"github.com/oschwald/geoip2-golang"
@ -151,6 +152,20 @@ var exprFuncs = []exprCustomFunc{
new(func(string) map[string][]string),
},
},
{
name: "ParseQuery",
function: ExprWrapParseQuery,
signature: []interface{}{
new(func(string) url.Values),
},
},
{
name: "ExtractQueryParam",
function: ExprWrapExtractQueryParam,
signature: []interface{}{
new(func(string, string) []string),
},
},
{
name: "PathUnescape",
function: PathUnescape,

View file

@ -2,6 +2,7 @@ package exprhelpers
import (
"errors"
"net/url"
"testing"
"time"
@ -160,6 +161,68 @@ func TestMatch(t *testing.T) {
}
}
// just to verify that the function is available, real tests are in TestExtractQueryParam
func TestExtractQueryParamExpr(t *testing.T) {
err := Init(nil)
require.NoError(t, err)
tests := []struct {
name string
env map[string]interface{}
code string
result []string
err string
}{
{
name: "ExtractQueryParam() test: basic test",
env: map[string]interface{}{
"query": "/foo?a=1&b=2",
},
code: "ExtractQueryParam(query, 'a')",
result: []string{"1"},
},
}
for _, test := range tests {
program, err := expr.Compile(test.code, GetExprOptions(test.env)...)
require.NoError(t, err)
output, err := expr.Run(program, test.env)
require.NoError(t, err)
require.Equal(t, test.result, output)
log.Printf("test '%s' : OK", test.name)
}
}
// just to verify that the function is available, real tests are in TestParseQuery
func TestParseQueryInExpr(t *testing.T) {
err := Init(nil)
require.NoError(t, err)
tests := []struct {
name string
env map[string]interface{}
code string
result url.Values
err string
}{
{
name: "ParseQuery() test: basic test",
env: map[string]interface{}{
"query": "a=1&b=2",
"ParseQuery": ParseQuery,
},
code: "ParseQuery(query)",
result: url.Values{"a": {"1"}, "b": {"2"}},
},
}
for _, test := range tests {
program, err := expr.Compile(test.code, GetExprOptions(test.env)...)
require.NoError(t, err)
output, err := expr.Run(program, test.env)
require.NoError(t, err)
require.Equal(t, test.result, output)
log.Printf("test '%s' : OK", test.name)
}
}
func TestDistanceHelper(t *testing.T) {
err := Init(nil)
require.NoError(t, err)

View file

@ -2,6 +2,8 @@ package exprhelpers
import (
"net/http"
"net/url"
"strings"
"github.com/crowdsecurity/crowdsec/pkg/appsec/ja4h"
)
@ -11,3 +13,112 @@ func JA4H(params ...any) (any, error) {
req := params[0].(*http.Request)
return ja4h.JA4H(req), nil
}
// just a expr wrapper for ParseQuery
func ExprWrapParseQuery(params ...any) (any, error) {
query := params[0].(string)
return ParseQuery(query), nil
}
// parseQuery and parseQuery are copied net/url package, but allow semicolon in values
func ParseQuery(query string) url.Values {
m := make(url.Values)
ParseQueryIntoValues(m, query)
return m
}
func ParseQueryIntoValues(m url.Values, query string) {
for query != "" {
var key string
key, query, _ = strings.Cut(query, "&")
if key == "" {
continue
}
key, value, _ := strings.Cut(key, "=")
//for now we'll just ignore the errors, but ideally we want to fire some "internal" rules when we see invalid query strings
key = unescape(key)
value = unescape(value)
m[key] = append(m[key], value)
}
}
func hexDigitToByte(digit byte) (byte, bool) {
switch {
case digit >= '0' && digit <= '9':
return digit - '0', true
case digit >= 'a' && digit <= 'f':
return digit - 'a' + 10, true
case digit >= 'A' && digit <= 'F':
return digit - 'A' + 10, true
default:
return 0, false
}
}
func unescape(input string) string {
ilen := len(input)
res := strings.Builder{}
res.Grow(ilen)
for i := 0; i < ilen; i++ {
ci := input[i]
if ci == '+' {
res.WriteByte(' ')
continue
}
if ci == '%' {
if i+2 >= ilen {
res.WriteByte(ci)
continue
}
hi, ok := hexDigitToByte(input[i+1])
if !ok {
res.WriteByte(ci)
continue
}
lo, ok := hexDigitToByte(input[i+2])
if !ok {
res.WriteByte(ci)
continue
}
res.WriteByte(hi<<4 | lo)
i += 2
continue
}
res.WriteByte(ci)
}
return res.String()
}
// just a expr wrapper for ExtractQueryParam
func ExprWrapExtractQueryParam(params ...any) (any, error) {
uri := params[0].(string)
param := params[1].(string)
return ExtractQueryParam(uri, param), nil
}
// ExtractQueryParam extracts values for a given query parameter from a raw URI string.
func ExtractQueryParam(uri, param string) []string {
// Find the first occurrence of "?"
idx := strings.Index(uri, "?")
if idx == -1 {
// No query string present
return []string{}
}
// Extract the query string part
queryString := uri[idx+1:]
// Parse the query string using a function that supports both `&` and `;`
values := ParseQuery(queryString)
if values == nil {
// No query string present
return []string{}
}
// Retrieve the values for the specified parameter
if _, ok := values[param]; !ok {
return []string{}
}
return values[param]
}

View file

@ -2,11 +2,275 @@ package exprhelpers
import (
"net/http"
"net/url"
"reflect"
"testing"
"github.com/stretchr/testify/require"
)
func TestParseQuery(t *testing.T) {
tests := []struct {
name string
query string
expected url.Values
}{
{
name: "Full URI",
query: "/foobar/toto?ab=cd&ef=gh",
expected: url.Values{
"/foobar/toto?ab": []string{"cd"},
"ef": []string{"gh"},
},
},
{
name: "Simple query",
query: "foo=bar",
expected: url.Values{
"foo": []string{"bar"},
},
},
{
name: "Multiple values",
query: "foo=bar&foo=baz",
expected: url.Values{
"foo": []string{"bar", "baz"},
},
},
{
name: "Empty value",
query: "foo=",
expected: url.Values{
"foo": []string{""},
},
},
{
name: "Empty key",
query: "=bar",
expected: url.Values{
"": []string{"bar"},
},
},
{
name: "Empty query",
query: "",
expected: url.Values{},
},
{
name: "Multiple keys",
query: "foo=bar&baz=qux",
expected: url.Values{
"foo": []string{"bar"},
"baz": []string{"qux"},
},
},
{
name: "Multiple keys with empty value",
query: "foo=bar&baz=qux&quux=",
expected: url.Values{
"foo": []string{"bar"},
"baz": []string{"qux"},
"quux": []string{""},
},
},
{
name: "Multiple keys with empty value and empty key",
query: "foo=bar&baz=qux&quux=&=quuz",
expected: url.Values{
"foo": []string{"bar"},
"baz": []string{"qux"},
"quux": []string{""},
"": []string{"quuz"},
},
},
{
name: "Multiple keys with empty value and empty key and multiple values",
query: "foo=bar&baz=qux&quux=&=quuz&foo=baz",
expected: url.Values{
"foo": []string{"bar", "baz"},
"baz": []string{"qux"},
"quux": []string{""},
"": []string{"quuz"},
},
},
{
name: "Multiple keys with empty value and empty key and multiple values and escaped characters",
query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz",
expected: url.Values{
"foo": []string{"bar", "baz", "bar baz"},
"baz": []string{"qux"},
"quux": []string{""},
"": []string{"quuz"},
},
},
{
name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon",
query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz",
expected: url.Values{
"foo": []string{"bar", "baz", "bar baz", "bar;baz"},
"baz": []string{"qux"},
"quux": []string{""},
"": []string{"quuz"},
},
},
{
name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon and ampersand",
query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz&foo=bar%26baz",
expected: url.Values{
"foo": []string{"bar", "baz", "bar baz", "bar;baz", "bar&baz"},
"baz": []string{"qux"},
"quux": []string{""},
"": []string{"quuz"},
},
},
{
name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon and ampersand and equals",
query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz&foo=bar%26baz&foo=bar%3Dbaz",
expected: url.Values{
"foo": []string{"bar", "baz", "bar baz", "bar;baz", "bar&baz", "bar=baz"},
"baz": []string{"qux"},
"quux": []string{""},
"": []string{"quuz"},
},
},
{
name: "Multiple keys with empty value and empty key and multiple values and escaped characters and semicolon and ampersand and equals and question mark",
query: "foo=bar&baz=qux&quux=&=quuz&foo=baz&foo=bar%20baz&foo=bar%3Bbaz&foo=bar%26baz&foo=bar%3Dbaz&foo=bar%3Fbaz",
expected: url.Values{
"foo": []string{"bar", "baz", "bar baz", "bar;baz", "bar&baz", "bar=baz", "bar?baz"},
"baz": []string{"qux"},
"quux": []string{""},
"": []string{"quuz"},
},
},
{
name: "keys with escaped characters",
query: "foo=ba;r&baz=qu;;x&quux=x\\&ww&xx=qu?uz&",
expected: url.Values{
"foo": []string{"ba;r"},
"baz": []string{"qu;;x"},
"quux": []string{"x\\"},
"ww": []string{""},
"xx": []string{"qu?uz"},
},
},
{
name: "hexadecimal characters",
query: "foo=bar%20baz",
expected: url.Values{
"foo": []string{"bar baz"},
},
},
{
name: "hexadecimal characters upper and lower case",
query: "foo=Ba%42%42&bar=w%2f%2F",
expected: url.Values{
"foo": []string{"BaBB"},
"bar": []string{"w//"},
},
},
{
name: "hexadecimal characters with invalid characters",
query: "foo=bar%20baz%2",
expected: url.Values{
"foo": []string{"bar baz%2"},
},
},
{
name: "hexadecimal characters with invalid hex characters",
query: "foo=bar%xx",
expected: url.Values{
"foo": []string{"bar%xx"},
},
},
{
name: "hexadecimal characters with invalid 2nd hex character",
query: "foo=bar%2x",
expected: url.Values{
"foo": []string{"bar%2x"},
},
},
{
name: "url +",
query: "foo=bar+x",
expected: url.Values{
"foo": []string{"bar x"},
},
},
{
name: "url &&",
query: "foo=bar&&lol=bur",
expected: url.Values{
"foo": []string{"bar"},
"lol": []string{"bur"},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
res := ParseQuery(test.query)
if !reflect.DeepEqual(res, test.expected) {
t.Fatalf("unexpected result: %v", res)
}
})
}
}
func TestExtractQueryParam(t *testing.T) {
tests := []struct {
name string
query string
param string
expected []string
}{
{
name: "Simple uri",
query: "/foobar/toto?ab=cd&ef=gh",
param: "ab",
expected: []string{"cd"},
},
{
name: "Simple uri, repeating param",
query: "/foobar?foo=bar&foo=baz",
param: "foo",
expected: []string{"bar", "baz"},
},
{
name: "Simple uri with semicolon",
query: "/foobar/toto?ab=cd;ef=gh",
param: "ab",
expected: []string{"cd;ef=gh"},
},
{
name: "Simple query no uri",
query: "foo=bar",
param: "foo",
expected: []string{},
},
{
name: "No QS",
query: "/foobar",
param: "foo",
expected: []string{},
},
{
name: "missing param",
query: "/foobar/toto?ab=cd&ef=gh",
param: "baz",
expected: []string{},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
res := ExtractQueryParam(test.query, test.param)
if !reflect.DeepEqual(res, test.expected) {
t.Fatalf("unexpected result: %v", res)
}
})
}
}
func TestJA4H(t *testing.T) {
tests := []struct {