Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 3 additions & 33 deletions Sources/FoundationEssentials/String/String+IO.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,6 @@ internal import _FoundationCShims

fileprivate let stringEncodingAttributeName = "com.apple.TextEncoding"

private struct ExtendingToUTF16Sequence<Base: Sequence<UInt8>> : Sequence {
typealias Element = UInt16

struct Iterator : IteratorProtocol {
private var base: Base.Iterator

init(_ base: Base.Iterator) {
self.base = base
}

mutating func next() -> Element? {
guard let value = base.next() else { return nil }
return UInt16(value)
}
}

private let base: Base

init(_ base: Base) {
self.base = base
}

func makeIterator() -> Iterator {
Iterator(base.makeIterator())
}
}


@available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
extension String {
Expand Down Expand Up @@ -181,12 +154,9 @@ extension String {
}
#if !FOUNDATION_FRAMEWORK
case .isoLatin1:
guard bytes.allSatisfy(\.isValidISOLatin1) else {
return nil
}
// isoLatin1 is an 8-bit encoding that represents a subset of UTF-16
// Map to 16-bit values and decode as UTF-16
self.init(_validating: ExtendingToUTF16Sequence(bytes), as: UTF16.self)
// ISO Latin 1 bytes are always valid since it's an 8-bit encoding that maps scalars 0x0 through 0xFF
// Simply extend each byte to 16 bits and decode as UTF-16
self.init(decoding: bytes.lazy.map { UInt16($0) }, as: UTF16.self)
case .macOSRoman:
func buildString(_ bytes: UnsafeBufferPointer<UInt8>) -> String {
String(unsafeUninitializedCapacity: bytes.count * 3) { buffer in
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@ import Darwin

internal import _FoundationCShims

extension BinaryInteger {
var isValidISOLatin1: Bool {
(0x20 <= self && self <= 0x7E) || (0xA0 <= self && self <= 0xFF)
}
}

extension UInt8 {
private typealias UTF8Representation = (UInt8, UInt8, UInt8)
private static func withMacRomanMap<R>(_ body: (UnsafeBufferPointer<UTF8Representation>) -> R) -> R {
Expand Down Expand Up @@ -228,12 +222,14 @@ extension String {
return data + swapped
#if !FOUNDATION_FRAMEWORK
case .isoLatin1:
return try? Data(capacity: self.utf16.count) { buffer in
for scalar in self.utf16 {
guard scalar.isValidISOLatin1 else {
// ISO Latin 1 encodes code points 0x0 through 0xFF (a maximum of 2 UTF-8 scalars per ISO Latin 1 Scalar)
// The UTF-8 count is a cheap, reasonable starting capacity as it is precise for the all-ASCII case and it will only over estimate by 1 byte per non-ASCII character
return try? Data(capacity: self.utf8.count) { buffer in
for scalar in self.unicodeScalars {
guard let valid = UInt8(exactly: scalar.value) else {
throw CocoaError(.fileWriteInapplicableStringEncoding)
}
buffer.appendElement(UInt8(scalar & 0xFF))
buffer.appendElement(valid)
}
}
case .macOSRoman:
Expand Down
4 changes: 3 additions & 1 deletion Tests/FoundationEssentialsTests/StringTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1336,7 +1336,9 @@ final class StringTests : XCTestCase {
"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
"0123456789",
"!\"#$%&'()*+,-./",
"¡¶ÅÖæöÿ\u{00A0}~"
"¡¶ÅÖæöÿ\u{0080}\u{00A0}~",
"Hello\nworld",
"Hello\r\nworld"
], invalid: [
"🎺",
"מ",
Expand Down