From aae56688113b4e3c8cc2b86ee3f150728f5fc175 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Mon, 6 May 2024 11:56:07 +0400 Subject: [PATCH] Attempt to implement ecma 262 validation There is too much work - implementing your own regex parser you definately miss something. Maybe one day later somebody will have time to implement ecma 262 regex validation in proper way. Keep this code just in that case --- .../general/FormatAssertionFactory.kt | 2 + .../internal/formats/RegexFormatValidator.kt | 197 ++++++++++++++++++ .../json/schema/internal/formats/UriSpec.kt | 3 +- .../schema/internal/formats/Validation.kt | 2 + .../schema/suite/AbstractSchemaTestSuite.kt | 8 +- 5 files changed, 203 insertions(+), 9 deletions(-) create mode 100644 src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/RegexFormatValidator.kt diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/factories/general/FormatAssertionFactory.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/factories/general/FormatAssertionFactory.kt index c38eecf0..51caba31 100644 --- a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/factories/general/FormatAssertionFactory.kt +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/factories/general/FormatAssertionFactory.kt @@ -25,6 +25,7 @@ import io.github.optimumcode.json.schema.internal.formats.IpV6FormatValidator import io.github.optimumcode.json.schema.internal.formats.IriFormatValidator import io.github.optimumcode.json.schema.internal.formats.IriReferenceFormatValidator import io.github.optimumcode.json.schema.internal.formats.JsonPointerFormatValidator +import io.github.optimumcode.json.schema.internal.formats.RegexFormatValidator import io.github.optimumcode.json.schema.internal.formats.RelativeJsonPointerFormatValidator import io.github.optimumcode.json.schema.internal.formats.TimeFormatValidator import io.github.optimumcode.json.schema.internal.formats.UriFormatValidator @@ -86,6 +87,7 @@ internal sealed class FormatAssertionFactory( "uri-template" to UriTemplateFormatValidator, "email" to EmailFormatValidator, "idn-email" to IdnEmailFormatValidator, + "regex" to RegexFormatValidator, ) } } diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/RegexFormatValidator.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/RegexFormatValidator.kt new file mode 100644 index 00000000..b6e163a9 --- /dev/null +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/RegexFormatValidator.kt @@ -0,0 +1,197 @@ +package io.github.optimumcode.json.schema.internal.formats + +import de.cketti.codepoints.CodePoints +import de.cketti.codepoints.codePointAt +import io.github.optimumcode.json.schema.FormatValidationResult +import io.github.optimumcode.json.schema.FormatValidator + +internal object RegexFormatValidator : AbstractStringFormatValidator() { + private const val OPENING_CURLY_BRACKET = '{'.code + private const val CLOSING_CURLY_BRACKET = '}'.code + private const val OPENING_SQUARE_BRACKET = '['.code + private const val CLOSING_SQUARE_BRACKET = ']'.code + private const val OPENING_BRACKET = '('.code + private const val CLOSING_BRACKET = ')'.code + private const val ESCAPE = '\''.code + + override fun validate(value: String): FormatValidationResult { + if (value.isEmpty()) { + return FormatValidator.Valid() + } + return if (isValidEcma262Regex(value)) { + FormatValidator.Valid() + } else { + FormatValidator.Invalid() + } + } + + private fun isValidEcma262Regex(value: String): Boolean { + val brackets = ArrayDeque() + var escaped = false + var index = 0 + while (index < value.length) { + val codePoint = value.codePointAt(index) + index += CodePoints.charCount(codePoint) + + if (!escaped) { + // check brackets + when (codePoint) { + OPENING_CURLY_BRACKET, + OPENING_SQUARE_BRACKET, + OPENING_BRACKET, + -> brackets.add(codePoint) + + CLOSING_CURLY_BRACKET, + CLOSING_SQUARE_BRACKET, + CLOSING_BRACKET, + -> { + val prev = brackets.removeLastOrNull() ?: return false + if (prev != oppositeBracket(codePoint)) { + return false + } + } + } + } + + if (codePoint == ESCAPE) { + escaped = true + continue + } + + val updatedIndex = checkGroupStart(index, value, codePoint, escaped) + if (updatedIndex > 0) { + index = updatedIndex + } else { + val nextIndex = checkValidPattern(index, value, codePoint, escaped) + if (nextIndex < 0) { + // invalid pattern + return false + } + index = nextIndex + } + + escaped = false + } + return brackets.isEmpty() && !escaped + } + + private fun checkValidPattern( + index: Int, + value: String, + codePoint: Int, + escaped: Boolean, + ): Int { + return if (escaped) { + when (codePoint) { + 'x'.code -> checkHexEscape(value, index) + 'u'.code -> checkUnicodeEscape(value, index) + 'c'.code -> checkControlLetter(value, index) + // control escape + 'f'.code, 'n'.code, 'r'.code, 't'.code, 'v'.code -> index + // character class escape + 'd'.code, 'D'.code, 's'.code, 'S'.code, 'w'.code, 'W'.code -> index + // assertion + 'b'.code, 'B'.code -> index + else -> checkDecimalEscape(value, index) + } + } else { + index + } + } + + private fun checkDecimalEscape( + value: String, + index: Int, + ): Int { + if (!Validation.isDigit(value[index])) { + return -1 + } + if (value[index] == '0') { + return if (index + 1 >= value.length || !Validation.isDigit(value[index + 1])) { + index + } else { + -1 + } + } + var lastDigitIndex = index + for (i in index..= value.length) { + return -1 + } + for (i in index..lastIndex) { + if (!Validation.isHexDigit(value[i])) { + return -1 + } + } + return lastIndex + } + + private fun checkHexEscape( + value: String, + index: Int, + ): Int { + val lastIndex = index + 1 + if (lastIndex >= value.length) { + return -1 + } + return if (Validation.isHexDigit(value[index]) && Validation.isHexDigit(value[lastIndex])) { + lastIndex + } else { + -1 + } + } + + private fun checkGroupStart( + nextIndex: Int, + value: String, + codePoint: Int, + escaped: Boolean, + ): Int { + return if (!escaped && codePoint == OPENING_BRACKET) { + if ( + value.regionMatches(nextIndex, "?=", 0, 2) || + value.regionMatches(nextIndex, "?!", 0, 2) || + value.regionMatches(nextIndex, "?:", 0, 2) + ) { + nextIndex + 1 + } else { + nextIndex + } + } else { + -1 + } + } + + private fun oppositeBracket(bracket: Int): Int { + return when (bracket) { + CLOSING_BRACKET -> OPENING_BRACKET + CLOSING_CURLY_BRACKET -> OPENING_CURLY_BRACKET + CLOSING_SQUARE_BRACKET -> OPENING_SQUARE_BRACKET + else -> error("no pair for bracket with code ${bracket.toString(16)}") + } + } +} \ No newline at end of file diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriSpec.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriSpec.kt index 08c629ce..85b92790 100644 --- a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriSpec.kt +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriSpec.kt @@ -2,6 +2,7 @@ package io.github.optimumcode.json.schema.internal.formats import io.github.optimumcode.json.schema.internal.formats.Validation.isAlpha import io.github.optimumcode.json.schema.internal.formats.Validation.isDigit +import io.github.optimumcode.json.schema.internal.formats.Validation.isHexDigit internal object UriSpec { const val SCHEMA_DELIMITER = ':' @@ -278,6 +279,4 @@ internal object UriSpec { private fun isSubDelimiter(c: Char): Boolean = c == '!' || c == '$' || c == '&' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' || c == '=' - - private fun isHexDigit(c: Char): Boolean = c in '0'..'9' || c in 'a'..'f' || c in 'A'..'F' } \ No newline at end of file diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/Validation.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/Validation.kt index f9fa6876..963658ff 100644 --- a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/Validation.kt +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/Validation.kt @@ -5,6 +5,8 @@ internal object Validation { fun isDigit(c: Char): Boolean = c in '0'..'9' + fun isHexDigit(c: Char): Boolean = c in '0'..'9' || c in 'a'..'f' || c in 'A'..'F' + inline fun eachSeparatedPart( value: String, separator: Char, diff --git a/test-suites/src/commonTest/kotlin/io/github/optimumcode/json/schema/suite/AbstractSchemaTestSuite.kt b/test-suites/src/commonTest/kotlin/io/github/optimumcode/json/schema/suite/AbstractSchemaTestSuite.kt index 7c70cac9..7ec899e7 100644 --- a/test-suites/src/commonTest/kotlin/io/github/optimumcode/json/schema/suite/AbstractSchemaTestSuite.kt +++ b/test-suites/src/commonTest/kotlin/io/github/optimumcode/json/schema/suite/AbstractSchemaTestSuite.kt @@ -43,13 +43,7 @@ internal class TestFilter( val excludeTests: Map> = emptyMap(), ) -internal val COMMON_FORMAT_FILTER = - TestFilter( - excludeSuites = - mapOf( - "regex" to emptySet(), - ), - ) +internal val COMMON_FORMAT_FILTER = TestFilter() /** * This class is a base for creating a test suite run from https://github.com/json-schema-org/JSON-Schema-Test-Suite.