Skip to content

Commit f53a462

Browse files
DmitryNekrasovdkhalanskyjb
authored andcommitted
Unify time zone parsing to follow RFC 9557 specification grammar (#532)
Replace existing validation with a unified finite state automaton that implements RFC 9557 grammar for all time zone identifiers (both named zones and offsets). Parse-time validation is removed; time zones are now validated only when used. Syntactically valid but non-existent time zones will now parse successfully. Validation errors occur when creating TimeZone objects, not during parsing. Fixes #531
1 parent a3b21d5 commit f53a462

File tree

7 files changed

+227
-134
lines changed

7 files changed

+227
-134
lines changed

core/common/src/format/DateTimeComponents.kt

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import kotlinx.datetime.*
99
import kotlinx.datetime.DayOfWeek
1010
import kotlinx.datetime.internal.*
1111
import kotlinx.datetime.internal.format.*
12+
import kotlinx.datetime.internal.format.formatter.FormatterStructure
13+
import kotlinx.datetime.internal.format.formatter.StringFormatterStructure
1214
import kotlinx.datetime.internal.format.parser.Copyable
1315
import kotlinx.datetime.internal.format.parser.ParserStructure
1416
import kotlinx.datetime.internal.format.parser.TimeZoneParserOperation
@@ -565,27 +567,22 @@ internal class DateTimeComponentsContents internal constructor(
565567

566568
internal val timeZoneField = GenericFieldSpec(PropertyAccessor(DateTimeComponentsContents::timeZoneId))
567569

568-
internal class TimeZoneIdDirective(private val knownZones: Set<String>) :
569-
StringFieldFormatDirective<DateTimeComponentsContents>(timeZoneField, knownZones) {
570+
internal class TimeZoneIdDirective() : FieldFormatDirective<DateTimeComponentsContents> {
571+
override val field: FieldSpec<DateTimeComponentsContents, String>
572+
get() = timeZoneField
570573

571574
override val builderRepresentation: String
572-
get() =
573-
"${DateTimeFormatBuilder.WithDateTimeComponents::timeZoneId.name}()"
575+
get() = "${DateTimeFormatBuilder.WithDateTimeComponents::timeZoneId.name}()"
576+
577+
override fun formatter(): FormatterStructure<DateTimeComponentsContents> {
578+
return StringFormatterStructure(field.accessor::getterNotNull)
579+
}
574580

575581
override fun parser(): ParserStructure<DateTimeComponentsContents> =
576582
ParserStructure(
577-
emptyList(),
578-
listOf(
579-
super.parser(),
580-
ParserStructure(
581-
listOf(TimeZoneParserOperation(timeZoneField.accessor)),
582-
emptyList()
583-
)
584-
)
583+
listOf(TimeZoneParserOperation(timeZoneField.accessor)),
584+
emptyList()
585585
)
586-
587-
override fun equals(other: Any?): Boolean = other is TimeZoneIdDirective && other.knownZones == knownZones
588-
override fun hashCode(): Int = knownZones.hashCode()
589586
}
590587

591588
internal class DateTimeComponentsFormat(override val actualFormat: CachedFormatStructure<DateTimeComponentsContents>) :
@@ -609,7 +606,7 @@ internal class DateTimeComponentsFormat(override val actualFormat: CachedFormatS
609606
}
610607

611608
override fun timeZoneId() =
612-
actualBuilder.add(BasicFormatStructure(TimeZoneIdDirective(TimeZone.availableZoneIds)))
609+
actualBuilder.add(BasicFormatStructure(TimeZoneIdDirective()))
613610

614611
@Suppress("NO_ELSE_IN_WHEN")
615612
override fun dateTimeComponents(format: DateTimeFormat<DateTimeComponents>) = when (format) {

core/common/src/format/DateTimeFormatBuilder.kt

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -287,10 +287,29 @@ public sealed interface DateTimeFormatBuilder {
287287
*/
288288
public sealed interface WithDateTimeComponents : WithDateTime, WithUtcOffset {
289289
/**
290-
* The IANA time zone identifier, for example, "Europe/Berlin".
290+
* A timezone identifier, either offset-based or a region-based IANA timezone ID.
291291
*
292-
* When formatting, the timezone identifier is supplied as is, without any validation.
293-
* On parsing, [TimeZone.availableZoneIds] is used to validate the identifier.
292+
* Offset-based timezones:
293+
* - `Z` or `z` - UTC
294+
* - Optional prefix (`UTC`, `GMT`, `UT`) followed by offset
295+
* - Offset in one of the formats: `+H`, `+HH`, `+HHMM`, `+HHMMSS`, `+HH:MM`, `+HH:MM:SS`
296+
*
297+
* Region-based IANA timezone IDs:
298+
* Parsed according to RFC 9557 grammar (section 4.1 of https://datatracker.ietf.org/doc/rfc9557/):
299+
* ```
300+
* time-zone-initial = ALPHA / "." / "_"
301+
* time-zone-char = time-zone-initial / DIGIT / "-" / "+"
302+
* time-zone-part = time-zone-initial *time-zone-char
303+
* time-zone-name = time-zone-part *("/" time-zone-part)
304+
* ```
305+
*
306+
* Note: This implementation doesn't follow the RFC 9557 grammar fully and allows
307+
* "." and ".." as the time-zone-part.
308+
*
309+
* When formatting, outputs the identifier as-is. When parsing, validates syntax only;
310+
* actual timezone validation is deferred until creating a [TimeZone] object.
311+
*
312+
* If more than one way to read a valid timezone ID matches the string, we always take the longest one.
294313
*
295314
* @sample kotlinx.datetime.test.samples.format.DateTimeComponentsFormatSamples.timeZoneId
296315
*/

core/common/src/internal/format/FieldFormatDirective.kt

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -147,27 +147,6 @@ internal abstract class NamedEnumIntFieldFormatDirective<in Target, Type>(
147147
)
148148
}
149149

150-
internal abstract class StringFieldFormatDirective<in Target>(
151-
final override val field: FieldSpec<Target, String>,
152-
private val acceptedStrings: Set<String>,
153-
) : FieldFormatDirective<Target> {
154-
155-
init {
156-
require(acceptedStrings.isNotEmpty()) {
157-
"The set of accepted strings is empty"
158-
}
159-
}
160-
161-
override fun formatter(): FormatterStructure<Target> =
162-
StringFormatterStructure(field.accessor::getterNotNull)
163-
164-
override fun parser(): ParserStructure<Target> =
165-
ParserStructure(
166-
listOf(StringSetParserOperation(acceptedStrings, field.accessor, field.name)),
167-
emptyList()
168-
)
169-
}
170-
171150
internal abstract class SignedIntFieldFormatDirective<in Target>(
172151
final override val field: FieldSpec<Target, Int>,
173152
private val minDigits: Int?,

core/common/src/internal/format/parser/ParserOperation.kt

Lines changed: 62 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package kotlinx.datetime.internal.format.parser
77

88
import kotlinx.datetime.internal.isAsciiDigit
9+
import kotlinx.datetime.internal.isAsciiLetter
910

1011
internal interface ParserOperation<in Output> {
1112
fun consume(storage: Output, input: CharSequence, startIndex: Int): ParseResult
@@ -142,9 +143,8 @@ internal class UnconditionalModification<Output>(
142143
internal class TimeZoneParserOperation<Output>(
143144
private val setter: AssignableField<Output, String>
144145
) : ParserOperation<Output> {
145-
146146
override fun consume(storage: Output, input: CharSequence, startIndex: Int): ParseResult {
147-
val lastMatch = validateTimezone(input, startIndex)
147+
val lastMatch = validateTimeZone(input, startIndex)
148148
return if (lastMatch > startIndex) {
149149
setter.setWithoutReassigning(storage, input.substring(startIndex, lastMatch), startIndex, lastMatch)
150150
ParseResult.Ok(lastMatch)
@@ -158,95 +158,106 @@ internal class TimeZoneParserOperation<Output>(
158158
START,
159159
AFTER_PREFIX,
160160
AFTER_SIGN,
161+
AFTER_INIT_SIGN,
161162
AFTER_HOUR,
163+
AFTER_INIT_HOUR,
162164
AFTER_MINUTE,
163165
AFTER_COLON_MINUTE,
164-
END,
165-
INVALID
166+
IN_PART,
167+
AFTER_SLASH,
168+
END
166169
}
167170

168-
private fun validateTimezone(input: CharSequence, startIndex: Int): Int {
171+
private inline fun Boolean.onTrue(action: () -> Unit): Boolean = if (this) { action(); true } else false
172+
173+
private inline fun Boolean.onFalse(action: () -> Unit): Boolean = if (this) true else { action(); false }
174+
175+
private fun validateTimeZone(input: CharSequence, startIndex: Int): Int {
169176
var index = startIndex
170-
var lastValidIndex = startIndex
171177

172178
fun validatePrefix(validValues: List<String>): Boolean =
173-
validValues.firstOrNull { input.startsWith(it) }?.let {
174-
index += it.length
175-
lastValidIndex = index
176-
true
177-
} ?: false
178-
179-
fun validateTimeComponent(length: Int): Boolean {
180-
if ((index..<(index + length)).all { input.getOrNull(it)?.isAsciiDigit() ?: false }) {
181-
index += length
182-
lastValidIndex = index
183-
return true
184-
}
185-
return false
186-
}
179+
validValues.firstOrNull { input.startsWith(it, index) }?.also { index += it.length } != null
180+
181+
fun validateSign(): Boolean = (input[index] in listOf('+', '-')).onTrue { index++ }
182+
183+
fun validateTimeComponent(length: Int): Boolean =
184+
(index..<(index + length))
185+
.all { input.getOrNull(it)?.isAsciiDigit() ?: false }
186+
.onTrue { index += length }
187+
188+
fun validateTimeComponentWithColon(): Boolean =
189+
(input[index] == ':').onTrue { index++ } && validateTimeComponent(2).onFalse { index-- }
190+
191+
fun Char.isTimeZoneInitial(): Boolean = isAsciiLetter() || this == '.' || this == '_'
192+
fun Char.isTimeZoneChar(): Boolean = isTimeZoneInitial() || isAsciiDigit() || this == '-' || this == '+'
193+
194+
fun validateTimeZoneInitial(): Boolean = input[index].isTimeZoneInitial().onTrue { index++ }
195+
fun validateTimeZoneChar(): Boolean = input[index].isTimeZoneChar().onTrue { index++ }
196+
fun validateSlash(): Boolean = (input[index] == '/').onTrue { index++ }
187197

188198
var state = State.START
189199
while (index < input.length) {
190200
state = when (state) {
191201
State.START -> when {
192-
input[index] == 'Z' || input[index] == 'z' -> {
193-
index++
194-
State.END
195-
}
196-
197-
input[index] in listOf('+', '-') -> {
198-
index++
199-
State.AFTER_SIGN
200-
}
201-
202202
validatePrefix(listOf("UTC", "GMT", "UT")) -> State.AFTER_PREFIX
203-
else -> State.INVALID
203+
validateSign() -> State.AFTER_INIT_SIGN
204+
validateTimeZoneInitial() -> State.IN_PART
205+
else -> break
204206
}
205207

206208
State.AFTER_PREFIX -> when {
207-
input[index] in listOf('+', '-') -> {
208-
index++
209-
State.AFTER_SIGN
210-
}
211-
212-
else -> State.INVALID
209+
validateSign() -> State.AFTER_SIGN
210+
else -> State.IN_PART
213211
}
214212

215213
State.AFTER_SIGN -> when {
216214
validateTimeComponent(2) -> State.AFTER_HOUR
215+
else -> State.IN_PART
216+
}
217+
218+
State.AFTER_INIT_SIGN -> when {
219+
validateTimeComponent(2) -> State.AFTER_INIT_HOUR
217220
validateTimeComponent(1) -> State.END
218-
else -> State.INVALID
221+
else -> break
219222
}
220223

221224
State.AFTER_HOUR -> when {
222-
input[index] == ':' -> {
223-
index++
224-
if (validateTimeComponent(2)) State.AFTER_COLON_MINUTE else State.INVALID
225-
}
225+
validateTimeComponentWithColon() -> State.AFTER_COLON_MINUTE
226+
else -> State.IN_PART
227+
}
226228

229+
State.AFTER_INIT_HOUR -> when {
230+
validateTimeComponentWithColon() -> State.AFTER_COLON_MINUTE
227231
validateTimeComponent(2) -> State.AFTER_MINUTE
228-
else -> State.INVALID
232+
else -> break
229233
}
230234

231235
State.AFTER_MINUTE -> when {
232236
validateTimeComponent(2) -> State.END
233-
else -> State.INVALID
237+
else -> break
234238
}
235239

236240
State.AFTER_COLON_MINUTE -> when {
237-
input[index] == ':' -> {
238-
index++
239-
if (validateTimeComponent(2)) State.END else State.INVALID
240-
}
241+
validateTimeComponentWithColon() -> State.END
242+
else -> break
243+
}
244+
245+
State.IN_PART -> when {
246+
validateTimeZoneChar() -> State.IN_PART
247+
validateSlash() -> State.AFTER_SLASH
248+
else -> break
249+
}
241250

242-
else -> State.INVALID
251+
State.AFTER_SLASH -> when {
252+
validateTimeZoneInitial() -> State.IN_PART
253+
else -> break
243254
}
244255

245-
State.END, State.INVALID -> break
256+
State.END -> break
246257
}
247258
}
248259

249-
return if (state == State.END) index else lastValidIndex
260+
return index - if (state == State.AFTER_SLASH || state == State.AFTER_INIT_SIGN) 1 else 0
250261
}
251262
}
252263
}

core/common/src/internal/util.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ package kotlinx.datetime.internal
77

88
internal fun Char.isAsciiDigit(): Boolean = this in '0'..'9'
99

10+
internal fun Char.isAsciiLetter(): Boolean = this in 'A'..'Z' || this in 'a'..'z'
11+
1012
internal fun Char.asciiDigitToInt(): Int = this - '0'
1113

1214
/** Working around the JSR-310 behavior of failing to parse long year numbers even when they start with leading zeros */

0 commit comments

Comments
 (0)