diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/factories/general/FormatAssertionFactory.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/factories/general/FormatAssertionFactory.kt index c38eecf0..51caba31 100644 --- a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/factories/general/FormatAssertionFactory.kt +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/factories/general/FormatAssertionFactory.kt @@ -25,6 +25,7 @@ import io.github.optimumcode.json.schema.internal.formats.IpV6FormatValidator import io.github.optimumcode.json.schema.internal.formats.IriFormatValidator import io.github.optimumcode.json.schema.internal.formats.IriReferenceFormatValidator import io.github.optimumcode.json.schema.internal.formats.JsonPointerFormatValidator +import io.github.optimumcode.json.schema.internal.formats.RegexFormatValidator import io.github.optimumcode.json.schema.internal.formats.RelativeJsonPointerFormatValidator import io.github.optimumcode.json.schema.internal.formats.TimeFormatValidator import io.github.optimumcode.json.schema.internal.formats.UriFormatValidator @@ -86,6 +87,7 @@ internal sealed class FormatAssertionFactory( "uri-template" to UriTemplateFormatValidator, "email" to EmailFormatValidator, "idn-email" to IdnEmailFormatValidator, + "regex" to RegexFormatValidator, ) } } diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/RegexFormatValidator.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/RegexFormatValidator.kt new file mode 100644 index 00000000..b6e163a9 --- /dev/null +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/RegexFormatValidator.kt @@ -0,0 +1,197 @@ +package io.github.optimumcode.json.schema.internal.formats + +import de.cketti.codepoints.CodePoints +import de.cketti.codepoints.codePointAt +import io.github.optimumcode.json.schema.FormatValidationResult +import io.github.optimumcode.json.schema.FormatValidator + +internal object RegexFormatValidator : AbstractStringFormatValidator() { + private const val OPENING_CURLY_BRACKET = '{'.code + private const val CLOSING_CURLY_BRACKET = '}'.code + private const val OPENING_SQUARE_BRACKET = '['.code + private const val CLOSING_SQUARE_BRACKET = ']'.code + private const val OPENING_BRACKET = '('.code + private const val CLOSING_BRACKET = ')'.code + private const val ESCAPE = '\''.code + + override fun validate(value: String): FormatValidationResult { + if (value.isEmpty()) { + return FormatValidator.Valid() + } + return if (isValidEcma262Regex(value)) { + FormatValidator.Valid() + } else { + FormatValidator.Invalid() + } + } + + private fun isValidEcma262Regex(value: String): Boolean { + val brackets = ArrayDeque() + var escaped = false + var index = 0 + while (index < value.length) { + val codePoint = value.codePointAt(index) + index += CodePoints.charCount(codePoint) + + if (!escaped) { + // check brackets + when (codePoint) { + OPENING_CURLY_BRACKET, + OPENING_SQUARE_BRACKET, + OPENING_BRACKET, + -> brackets.add(codePoint) + + CLOSING_CURLY_BRACKET, + CLOSING_SQUARE_BRACKET, + CLOSING_BRACKET, + -> { + val prev = brackets.removeLastOrNull() ?: return false + if (prev != oppositeBracket(codePoint)) { + return false + } + } + } + } + + if (codePoint == ESCAPE) { + escaped = true + continue + } + + val updatedIndex = checkGroupStart(index, value, codePoint, escaped) + if (updatedIndex > 0) { + index = updatedIndex + } else { + val nextIndex = checkValidPattern(index, value, codePoint, escaped) + if (nextIndex < 0) { + // invalid pattern + return false + } + index = nextIndex + } + + escaped = false + } + return brackets.isEmpty() && !escaped + } + + private fun checkValidPattern( + index: Int, + value: String, + codePoint: Int, + escaped: Boolean, + ): Int { + return if (escaped) { + when (codePoint) { + 'x'.code -> checkHexEscape(value, index) + 'u'.code -> checkUnicodeEscape(value, index) + 'c'.code -> checkControlLetter(value, index) + // control escape + 'f'.code, 'n'.code, 'r'.code, 't'.code, 'v'.code -> index + // character class escape + 'd'.code, 'D'.code, 's'.code, 'S'.code, 'w'.code, 'W'.code -> index + // assertion + 'b'.code, 'B'.code -> index + else -> checkDecimalEscape(value, index) + } + } else { + index + } + } + + private fun checkDecimalEscape( + value: String, + index: Int, + ): Int { + if (!Validation.isDigit(value[index])) { + return -1 + } + if (value[index] == '0') { + return if (index + 1 >= value.length || !Validation.isDigit(value[index + 1])) { + index + } else { + -1 + } + } + var lastDigitIndex = index + for (i in index..= value.length) { + return -1 + } + for (i in index..lastIndex) { + if (!Validation.isHexDigit(value[i])) { + return -1 + } + } + return lastIndex + } + + private fun checkHexEscape( + value: String, + index: Int, + ): Int { + val lastIndex = index + 1 + if (lastIndex >= value.length) { + return -1 + } + return if (Validation.isHexDigit(value[index]) && Validation.isHexDigit(value[lastIndex])) { + lastIndex + } else { + -1 + } + } + + private fun checkGroupStart( + nextIndex: Int, + value: String, + codePoint: Int, + escaped: Boolean, + ): Int { + return if (!escaped && codePoint == OPENING_BRACKET) { + if ( + value.regionMatches(nextIndex, "?=", 0, 2) || + value.regionMatches(nextIndex, "?!", 0, 2) || + value.regionMatches(nextIndex, "?:", 0, 2) + ) { + nextIndex + 1 + } else { + nextIndex + } + } else { + -1 + } + } + + private fun oppositeBracket(bracket: Int): Int { + return when (bracket) { + CLOSING_BRACKET -> OPENING_BRACKET + CLOSING_CURLY_BRACKET -> OPENING_CURLY_BRACKET + CLOSING_SQUARE_BRACKET -> OPENING_SQUARE_BRACKET + else -> error("no pair for bracket with code ${bracket.toString(16)}") + } + } +} \ No newline at end of file diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriSpec.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriSpec.kt index 08c629ce..85b92790 100644 --- a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriSpec.kt +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriSpec.kt @@ -2,6 +2,7 @@ package io.github.optimumcode.json.schema.internal.formats import io.github.optimumcode.json.schema.internal.formats.Validation.isAlpha import io.github.optimumcode.json.schema.internal.formats.Validation.isDigit +import io.github.optimumcode.json.schema.internal.formats.Validation.isHexDigit internal object UriSpec { const val SCHEMA_DELIMITER = ':' @@ -278,6 +279,4 @@ internal object UriSpec { private fun isSubDelimiter(c: Char): Boolean = c == '!' || c == '$' || c == '&' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' || c == '=' - - private fun isHexDigit(c: Char): Boolean = c in '0'..'9' || c in 'a'..'f' || c in 'A'..'F' } \ No newline at end of file diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/Validation.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/Validation.kt index f9fa6876..963658ff 100644 --- a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/Validation.kt +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/Validation.kt @@ -5,6 +5,8 @@ internal object Validation { fun isDigit(c: Char): Boolean = c in '0'..'9' + fun isHexDigit(c: Char): Boolean = c in '0'..'9' || c in 'a'..'f' || c in 'A'..'F' + inline fun eachSeparatedPart( value: String, separator: Char, diff --git a/test-suites/src/commonTest/kotlin/io/github/optimumcode/json/schema/suite/AbstractSchemaTestSuite.kt b/test-suites/src/commonTest/kotlin/io/github/optimumcode/json/schema/suite/AbstractSchemaTestSuite.kt index 7c70cac9..7ec899e7 100644 --- a/test-suites/src/commonTest/kotlin/io/github/optimumcode/json/schema/suite/AbstractSchemaTestSuite.kt +++ b/test-suites/src/commonTest/kotlin/io/github/optimumcode/json/schema/suite/AbstractSchemaTestSuite.kt @@ -43,13 +43,7 @@ internal class TestFilter( val excludeTests: Map> = emptyMap(), ) -internal val COMMON_FORMAT_FILTER = - TestFilter( - excludeSuites = - mapOf( - "regex" to emptySet(), - ), - ) +internal val COMMON_FORMAT_FILTER = TestFilter() /** * This class is a base for creating a test suite run from https://github.com/json-schema-org/JSON-Schema-Test-Suite.