Skip to content

Commit

Permalink
encoding/jsonschema: detect Perl regexps
Browse files Browse the repository at this point in the history
We want to treat Perl syntax as a missing feature
and make genuinely invalid regular expressions
an error.

Signed-off-by: Roger Peppe <[email protected]>
Change-Id: I187be5f8846e02c9af514ec808fa19a8598e41ce
Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1201127
TryBot-Result: CUEcueckoo <[email protected]>
Reviewed-by: Daniel Martí <[email protected]>
  • Loading branch information
rogpeppe committed Sep 12, 2024
1 parent 9000dc3 commit 67f9dfa
Show file tree
Hide file tree
Showing 9 changed files with 93 additions and 50 deletions.
13 changes: 3 additions & 10 deletions encoding/jsonschema/constraints_string.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
package jsonschema

import (
"regexp"
"sync"

"cuelang.org/go/cue"
Expand Down Expand Up @@ -50,17 +49,11 @@ func constraintMinLength(key string, n cue.Value, s *state) {
}

func constraintPattern(key string, n cue.Value, s *state) {
str, _ := s.strValue(n)
if _, err := regexp.Compile(str); err != nil {
if s.cfg.StrictFeatures {
// TODO check if the error is only because of an unsupported
// regexp feature (e.g. perl regexp) or because the regexp is just
// bad. If the latter, this should be an error even if Strict is false.
s.errf(n, "unsupported regexp: %v", err)
}
str, ok := s.regexpValue(n)
if !ok {
return
}
s.add(n, stringType, &ast.UnaryExpr{Op: token.MAT, X: s.string(n)})
s.add(n, stringType, &ast.UnaryExpr{Op: token.MAT, X: str})
}

type formatFuncInfo struct {
Expand Down
26 changes: 26 additions & 0 deletions encoding/jsonschema/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"fmt"
"math"
"net/url"
"regexp/syntax"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -256,6 +257,31 @@ func (d *decoder) strValue(n cue.Value) (s string, ok bool) {
return s, true
}

func (d *decoder) regexpValue(n cue.Value) (ast.Expr, bool) {
s, ok := d.strValue(n)
if !ok {
return nil, false
}
_, err := syntax.Parse(s, syntax.Perl)
if err == nil {
return d.string(n), true
}
var regErr *syntax.Error
if errors.As(err, &regErr) && regErr.Code == syntax.ErrInvalidPerlOp {
// It's Perl syntax that we'll never support because the CUE evaluation
// engine uses Go's regexp implementation and because the missing
// features are usually not there for good reason (e.g. exponential
// runtime). In other words, this is a missing feature but not an invalid
// regular expression as such.
if d.cfg.StrictFeatures {
d.errf(n, "unsupported Perl regexp syntax in %q: %v", s, err)
}
return nil, false
}
d.errf(n, "invalid regexp %q: %v", s, err)
return nil, false
}

// const draftCutoff = 5

type coreType int
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@
"pattern": "^\\cC$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`"
"v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`"
},
"tests": [
{
Expand Down Expand Up @@ -79,8 +79,8 @@
"pattern": "^\\cc$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`"
"v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`"
},
"tests": [
{
Expand Down Expand Up @@ -370,8 +370,8 @@
"pattern": "\\p{Letter}cole"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`"
"v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`",
"v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`"
},
"tests": [
{
Expand Down Expand Up @@ -496,8 +496,8 @@
"pattern": "^\\p{digit}+$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`"
"v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`",
"v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`"
},
"tests": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@
"pattern": "^\\cC$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`"
"v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`"
},
"tests": [
{
Expand Down Expand Up @@ -79,8 +79,8 @@
"pattern": "^\\cc$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`"
"v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`"
},
"tests": [
{
Expand Down Expand Up @@ -370,8 +370,8 @@
"pattern": "\\p{Letter}cole"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`"
"v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`",
"v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`"
},
"tests": [
{
Expand Down Expand Up @@ -514,8 +514,8 @@
"pattern": "^\\p{digit}+$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`"
"v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`",
"v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`"
},
"tests": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
"pattern": "^\\cC$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`"
"v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`"
},
"tests": [
{
Expand Down Expand Up @@ -75,8 +75,8 @@
"pattern": "^\\cc$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`"
"v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`"
},
"tests": [
{
Expand Down Expand Up @@ -359,8 +359,8 @@
"pattern": "\\p{Letter}cole"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`"
"v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`",
"v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`"
},
"tests": [
{
Expand Down Expand Up @@ -481,8 +481,8 @@
"pattern": "^\\p{digit}+$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`"
"v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`",
"v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`"
},
"tests": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
"pattern": "^\\cC$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`"
"v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`"
},
"tests": [
{
Expand Down Expand Up @@ -75,8 +75,8 @@
"pattern": "^\\cc$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`"
"v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`"
},
"tests": [
{
Expand Down Expand Up @@ -359,8 +359,8 @@
"pattern": "\\p{Letter}cole"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`"
"v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`",
"v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`"
},
"tests": [
{
Expand Down Expand Up @@ -481,8 +481,8 @@
"pattern": "^\\p{digit}+$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`"
"v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`",
"v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`"
},
"tests": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
"pattern": "^\\cC$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`"
"v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`"
},
"tests": [
{
Expand Down Expand Up @@ -75,8 +75,8 @@
"pattern": "^\\cc$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`"
"v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`",
"v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`"
},
"tests": [
{
Expand Down Expand Up @@ -359,8 +359,8 @@
"pattern": "\\p{Letter}cole"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`"
"v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`",
"v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`"
},
"tests": [
{
Expand Down Expand Up @@ -481,8 +481,8 @@
"pattern": "^\\p{digit}+$"
},
"skip": {
"v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`",
"v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`"
"v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`",
"v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`"
},
"tests": [
{
Expand Down
11 changes: 11 additions & 0 deletions encoding/jsonschema/testdata/txtar/perl_pattern.txtar
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Invalid perl syntax should not result in failure when #strictFeatures
isn't enabled.

-- schema.json --
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"pattern": "^(?![ \\t\\n]*\\(default(.*)\\))[\\s\\S]*"
}
-- out/decode/extract --
@jsonschema(schema="https://json-schema.org/draft/2020-12/schema")
_
13 changes: 13 additions & 0 deletions encoding/jsonschema/testdata/txtar/perl_pattern_strict.txtar
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Invalid perl syntax should result in failure when #strictFeatures
is enabled.
#strictFeatures

-- schema.json --
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"pattern": "^(?![ \\t\\n]*\\(default(.*)\\))[\\s\\S]*"
}
-- out/decode/extract --
ERROR:
unsupported Perl regexp syntax in "^(?![ \\t\\n]*\\(default(.*)\\))[\\s\\S]*": error parsing regexp: invalid or unsupported Perl syntax: `(?!`:
schema.json:3:5

0 comments on commit 67f9dfa

Please sign in to comment.