@@ -61,6 +61,7 @@ private let asciiLowerS = UInt8(ascii: "s")
6161private let asciiLowerT = UInt8 ( ascii: " t " )
6262private let asciiUpperT = UInt8 ( ascii: " T " )
6363private let asciiLowerU = UInt8 ( ascii: " u " )
64+ private let asciiUpperU = UInt8 ( ascii: " U " )
6465private let asciiLowerV = UInt8 ( ascii: " v " )
6566private let asciiLowerX = UInt8 ( ascii: " x " )
6667private let asciiLowerY = UInt8 ( ascii: " y " )
@@ -80,6 +81,13 @@ private func fromHexDigit(_ c: UInt8) -> UInt8? {
8081 return nil
8182}
8283
84+ private func uint32FromHexDigit( _ c: UInt8 ) -> UInt32 ? {
85+ guard let u8 = fromHexDigit ( c) else {
86+ return nil
87+ }
88+ return UInt32 ( u8)
89+ }
90+
8391// Protobuf Text encoding assumes that you're working directly
8492// in UTF-8. So this implementation converts the string to UTF8,
8593// then decodes it into a sequence of bytes, then converts
@@ -116,6 +124,58 @@ private func decodeString(_ s: String) -> String? {
116124 out. append ( n)
117125 bytes = savedPosition
118126 }
127+ case asciiLowerU, asciiUpperU: // "u"
128+ // \u - 4 hex digits, \U 8 hex digits:
129+ if let digit1 = bytes. next ( ) ,
130+ let d1 = uint32FromHexDigit ( digit1) ,
131+ let digit2 = bytes. next ( ) ,
132+ let d2 = uint32FromHexDigit ( digit2) ,
133+ let digit3 = bytes. next ( ) ,
134+ let d3 = uint32FromHexDigit ( digit3) ,
135+ let digit4 = bytes. next ( ) ,
136+ let d4 = uint32FromHexDigit ( digit4) {
137+ var codePoint = ( d1 << 12 ) + ( d2 << 8 ) + ( d3 << 4 ) + d4
138+ if escaped == asciiUpperU {
139+ if let digit5 = bytes. next ( ) ,
140+ let d5 = uint32FromHexDigit ( digit5) ,
141+ let digit6 = bytes. next ( ) ,
142+ let d6 = uint32FromHexDigit ( digit6) ,
143+ let digit7 = bytes. next ( ) ,
144+ let d7 = uint32FromHexDigit ( digit7) ,
145+ let digit8 = bytes. next ( ) ,
146+ let d8 = uint32FromHexDigit ( digit8) {
147+ codePoint = ( codePoint << 16 ) + ( d5 << 12 ) + ( d6 << 8 ) + ( d7 << 4 ) + d8
148+ } else {
149+ // Malformed \U escape
150+ return nil
151+ }
152+ }
153+ switch codePoint {
154+ case 0 ... 0x7f :
155+ // 1 byte encoding
156+ out. append ( UInt8 ( truncatingIfNeeded: codePoint) )
157+ case 0x80 ... 0x7ff :
158+ // 2 byte encoding
159+ out. append ( 0xC0 + UInt8( truncatingIfNeeded: codePoint >> 6 ) )
160+ out. append ( 0x80 + UInt8( truncatingIfNeeded: codePoint & 0x3F ) )
161+ case 0x800 ... 0xffff :
162+ // 3 byte encoding
163+ out. append ( 0xE0 + UInt8( truncatingIfNeeded: codePoint >> 12 ) )
164+ out. append ( 0x80 + UInt8( truncatingIfNeeded: ( codePoint >> 6 ) & 0x3F ) )
165+ out. append ( 0x80 + UInt8( truncatingIfNeeded: codePoint & 0x3F ) )
166+ case 0x10000 ... 0x10FFFF :
167+ // 4 byte encoding
168+ out. append ( 0xF0 + UInt8( truncatingIfNeeded: codePoint >> 18 ) )
169+ out. append ( 0x80 + UInt8( truncatingIfNeeded: ( codePoint >> 12 ) & 0x3F ) )
170+ out. append ( 0x80 + UInt8( truncatingIfNeeded: ( codePoint >> 6 ) & 0x3F ) )
171+ out. append ( 0x80 + UInt8( truncatingIfNeeded: codePoint & 0x3F ) )
172+ default :
173+ return nil
174+ }
175+ } else {
176+ // Malformed \u,\U escape
177+ return nil
178+ }
119179 case asciiLowerX: // "x"
120180 // Unlike C/C++, protobuf only allows 1 or 2 digits here:
121181 if let byte = bytes. next ( ) , let digit = fromHexDigit ( byte) {
@@ -315,6 +375,39 @@ internal struct TextFormatScanner {
315375 }
316376 }
317377 count += 1
378+ case asciiLowerU, asciiUpperU: // 'u' or 'U' unicode escape
379+ let numDigits = ( escaped == asciiLowerU) ? 4 : 8
380+ var codePoint : UInt32 = 0
381+ for i in 0 ..< numDigits {
382+ guard p != end else {
383+ throw TextFormatDecodingError . malformedText // unicode escape must 4/8 digits
384+ }
385+ if let digit = uint32FromHexDigit ( p [ i] ) {
386+ codePoint = ( codePoint << 4 ) + digit
387+ } else {
388+ throw TextFormatDecodingError . malformedText // wasn't a hex digit
389+ }
390+ }
391+ p += numDigits
392+ switch codePoint {
393+ case 0 ... 0x7f :
394+ // 1 byte encoding
395+ count += 1
396+ case 0x80 ... 0x7ff :
397+ // 2 byte encoding
398+ count += 2
399+ case 0xD800 ... 0xDFFF :
400+ // Surrogate pair (low or high), shouldn't get a unicode literal of those.
401+ throw TextFormatDecodingError . malformedText
402+ case 0x800 ... 0xffff :
403+ // 3 byte encoding
404+ count += 3
405+ case 0x10000 ... 0x10FFFF :
406+ // 4 byte encoding
407+ count += 4
408+ default :
409+ throw TextFormatDecodingError . malformedText // Isn't a valid unicode character
410+ }
318411 case asciiLowerX: // 'x' hexadecimal escape
319412 if p != end && fromHexDigit ( p [ 0 ] ) != nil {
320413 p += 1
@@ -387,6 +480,39 @@ internal struct TextFormatScanner {
387480 out [ 0 ] = digit1Value
388481 out += 1
389482 }
483+ case asciiLowerU, asciiUpperU:
484+ let numDigits = ( escaped == asciiLowerU) ? 4 : 8
485+ var codePoint : UInt32 = 0
486+ for i in 0 ..< numDigits {
487+ codePoint = ( codePoint << 4 ) + uint32FromHexDigit( p [ i] ) !
488+ }
489+ p += numDigits
490+ switch codePoint {
491+ case 0 ... 0x7f :
492+ // 1 byte encoding
493+ out [ 0 ] = UInt8 ( truncatingIfNeeded: codePoint)
494+ out += 1
495+ case 0x80 ... 0x7ff :
496+ // 2 byte encoding
497+ out [ 0 ] = 0xC0 + UInt8( truncatingIfNeeded: codePoint >> 6 )
498+ out [ 1 ] = 0x80 + UInt8( truncatingIfNeeded: codePoint & 0x3F )
499+ out += 2
500+ case 0x800 ... 0xffff :
501+ // 3 byte encoding
502+ out [ 0 ] = 0xE0 + UInt8( truncatingIfNeeded: codePoint >> 12 )
503+ out [ 1 ] = 0x80 + UInt8( truncatingIfNeeded: ( codePoint >> 6 ) & 0x3F )
504+ out [ 2 ] = 0x80 + UInt8( truncatingIfNeeded: codePoint & 0x3F )
505+ out += 3
506+ case 0x10000 ... 0x10FFFF :
507+ // 4 byte encoding
508+ out [ 0 ] = 0xF0 + UInt8( truncatingIfNeeded: codePoint >> 18 )
509+ out [ 1 ] = 0x80 + UInt8( truncatingIfNeeded: ( codePoint >> 12 ) & 0x3F )
510+ out [ 2 ] = 0x80 + UInt8( truncatingIfNeeded: ( codePoint >> 6 ) & 0x3F )
511+ out [ 3 ] = 0x80 + UInt8( truncatingIfNeeded: codePoint & 0x3F )
512+ out += 4
513+ default :
514+ preconditionFailure ( ) // Already validated, can't happen
515+ }
390516 case asciiLowerX: // 'x' hexadecimal escape
391517 // We already validated, so we know there's at least one digit:
392518 var n = fromHexDigit ( p [ 0 ] ) !
0 commit comments