Skip to content

Commit 8411210

Browse files
Sendable Config
ConfigTests, BinaryDistinctDictionary removed, Config JSON serialization/deserialization, Config compatible with jinja templating system @dynamicMemberLookup brought back for backward compatibility, ConfigTests/ConfigEquatable, Condig.Data equality improved @dynamicMemberLookup dot notation used in favour of the subscript formatting rebase
1 parent 41f26a3 commit 8411210

21 files changed

+1887
-346
lines changed

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@ DerivedData/
99
.swiftpm/config/registries.json
1010
.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata
1111
.netrc
12-
.idea
12+
.idea
13+
.index-build
14+
*.out

Package.swift

+3-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ let package = Package(
1313
],
1414
dependencies: [
1515
.package(url: "https://github.com/apple/swift-argument-parser.git", .upToNextMinor(from: "1.4.0")),
16+
.package(url: "https://github.com/apple/swift-collections.git", .upToNextMinor(from: "1.1.4")),
1617
.package(url: "https://github.com/johnmai-dev/Jinja", .upToNextMinor(from: "1.1.0")),
1718
],
1819
targets: [
@@ -24,13 +25,13 @@ let package = Package(
2425
]
2526
),
2627
.executableTarget(name: "HubCLI", dependencies: ["Hub", .product(name: "ArgumentParser", package: "swift-argument-parser")]),
27-
.target(name: "Hub", resources: [.process("FallbackConfigs")]),
28+
.target(name: "Hub", dependencies: [.product(name: "OrderedCollections", package: "swift-collections")], resources: [.process("FallbackConfigs")]),
2829
.target(name: "Tokenizers", dependencies: ["Hub", .product(name: "Jinja", package: "Jinja")]),
2930
.target(name: "TensorUtils"),
3031
.target(name: "Generation", dependencies: ["Tokenizers", "TensorUtils"]),
3132
.target(name: "Models", dependencies: ["Tokenizers", "Generation", "TensorUtils"]),
3233
.testTarget(name: "TokenizersTests", dependencies: ["Tokenizers", "Models", "Hub"], resources: [.process("Resources"), .process("Vocabs")]),
33-
.testTarget(name: "HubTests", dependencies: ["Hub"]),
34+
.testTarget(name: "HubTests", dependencies: ["Hub", .product(name: "Jinja", package: "Jinja")]),
3435
.testTarget(name: "PreTokenizerTests", dependencies: ["Tokenizers", "Hub"]),
3536
.testTarget(name: "TensorUtilsTests", dependencies: ["TensorUtils", "Models", "Hub"], resources: [.process("Resources")]),
3637
.testTarget(name: "NormalizerTests", dependencies: ["Tokenizers", "Hub"]),

Sources/Hub/BinaryDistinct.swift

+246
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
//
2+
// BinaryDistinctString.swift
3+
// swift-transformers
4+
//
5+
// Created by Piotr Kowalczuk on 06.03.25.
6+
//
7+
8+
import Foundation
9+
10+
/// BinaryDistinctString helps to overcome limitations of both String and NSString types. Where the prior is performing unicode normalization and the following is not Sendable. For more reference [Modifying-and-Comparing-Strings](https://developer.apple.com/documentation/swift/string#Modifying-and-Comparing-Strings).
11+
public struct BinaryDistinctString: Equatable, Hashable, Sendable, Comparable, CustomStringConvertible, ExpressibleByStringLiteral {
12+
public let value: [UInt16]
13+
14+
public var nsString: NSString {
15+
return String(utf16CodeUnits: self.value, count: self.value.count) as NSString
16+
}
17+
18+
public var string: String {
19+
return String(self.nsString)
20+
}
21+
22+
public var count: Int {
23+
self.string.count
24+
}
25+
26+
/// Satisfies ``CustomStringConvertible`` protocol.
27+
public var description: String {
28+
return self.string
29+
}
30+
31+
public init(_ bytes: [UInt16]) {
32+
self.value = bytes
33+
}
34+
35+
public init(_ str: NSString) {
36+
self.value = Array(str as String).flatMap { $0.utf16 }
37+
}
38+
39+
public init(_ str: String) {
40+
self.init(str as NSString)
41+
}
42+
43+
public init(_ character: BinaryDistinctCharacter) {
44+
self.value = character.bytes
45+
}
46+
47+
public init(_ characters: [BinaryDistinctCharacter]) {
48+
var data: [UInt16] = []
49+
for character in characters {
50+
data.append(contentsOf: character.bytes)
51+
}
52+
self.value = data
53+
}
54+
55+
/// Satisfies ``ExpressibleByStringLiteral`` protocol.
56+
public init(stringLiteral value: String) {
57+
self.init(value)
58+
}
59+
60+
public static func == (lhs: BinaryDistinctString, rhs: BinaryDistinctString) -> Bool {
61+
return lhs.value == rhs.value
62+
}
63+
64+
public static func < (lhs: BinaryDistinctString, rhs: BinaryDistinctString) -> Bool {
65+
return lhs.value.lexicographicallyPrecedes(rhs.value)
66+
}
67+
68+
public static func + (lhs: BinaryDistinctString, rhs: BinaryDistinctString) -> BinaryDistinctString {
69+
return BinaryDistinctString(lhs.value + rhs.value)
70+
}
71+
72+
public func hasPrefix(_ prefix: BinaryDistinctString) -> Bool {
73+
guard prefix.value.count <= self.value.count else { return false }
74+
return self.value.starts(with: prefix.value)
75+
}
76+
77+
public func hasSuffix(_ suffix: BinaryDistinctString) -> Bool {
78+
guard suffix.value.count <= self.value.count else { return false }
79+
return self.value.suffix(suffix.value.count) == suffix.value
80+
}
81+
82+
public func lowercased() -> BinaryDistinctString {
83+
.init(self.string.lowercased())
84+
}
85+
86+
public func replacingOccurrences(of: Self, with: Self) -> BinaryDistinctString {
87+
return BinaryDistinctString(self.string.replacingOccurrences(of: of.string, with: with.string))
88+
}
89+
}
90+
91+
extension BinaryDistinctString {
92+
public typealias Index = Int // Treat indices as integers
93+
94+
public var startIndex: Index { return 0 }
95+
public var endIndex: Index { return self.count }
96+
97+
public func index(_ i: Index, offsetBy distance: Int) -> Index {
98+
let newIndex = i + distance
99+
guard newIndex >= 0, newIndex <= self.count else {
100+
fatalError("Index out of bounds")
101+
}
102+
return newIndex
103+
}
104+
105+
public func index(_ i: Index, offsetBy distance: Int, limitedBy limit: Index) -> Index? {
106+
let newIndex = i + distance
107+
return newIndex <= limit ? newIndex : nil
108+
}
109+
}
110+
111+
extension BinaryDistinctString: Sequence {
112+
public func makeIterator() -> AnyIterator<BinaryDistinctCharacter> {
113+
var iterator = self.string.makeIterator() // Use native Swift String iterator
114+
115+
return AnyIterator {
116+
guard let char = iterator.next() else { return nil }
117+
return BinaryDistinctCharacter(char)
118+
}
119+
}
120+
}
121+
122+
extension BinaryDistinctString {
123+
public subscript(bounds: PartialRangeFrom<Int>) -> BinaryDistinctString {
124+
get {
125+
let validRange = bounds.lowerBound..<self.value.count // Convert to Range<Int>
126+
return self[validRange]
127+
}
128+
}
129+
130+
/// Returns a slice of the `BinaryDistinctString` while ensuring correct rune (grapheme cluster) boundaries.
131+
public subscript(bounds: Range<Int>) -> BinaryDistinctString {
132+
get {
133+
guard bounds.lowerBound >= 0, bounds.upperBound <= self.count else {
134+
fatalError("Index out of bounds")
135+
}
136+
137+
let utf8Bytes = self.value
138+
var byteIndices: [Int] = []
139+
140+
// Decode UTF-8 manually to find rune start positions
141+
var currentByteIndex = 0
142+
for (index, scalar) in self.string.unicodeScalars.enumerated() {
143+
if index == bounds.lowerBound {
144+
byteIndices.append(currentByteIndex)
145+
}
146+
currentByteIndex += scalar.utf8.count
147+
if index == bounds.upperBound - 1 {
148+
byteIndices.append(currentByteIndex)
149+
break
150+
}
151+
}
152+
153+
// Extract the byte range
154+
let startByteIndex = byteIndices.first ?? 0
155+
let endByteIndex = byteIndices.last ?? utf8Bytes.count
156+
157+
let slicedBytes = Array(utf8Bytes[startByteIndex..<endByteIndex])
158+
return BinaryDistinctString(slicedBytes)
159+
}
160+
}
161+
}
162+
163+
extension Dictionary where Key == BinaryDistinctString {
164+
/// Merges another `BinaryDistinctDictionary` into this one
165+
public mutating func merge(_ other: [BinaryDistinctString: Value], strategy: (Value, Value) -> Value = { _, new in new }) {
166+
self.merge(other, uniquingKeysWith: strategy)
167+
}
168+
169+
/// Merges a `[String: Value]` dictionary into this one
170+
public mutating func merge(_ other: [String: Value], strategy: (Value, Value) -> Value = { _, new in new }) {
171+
let converted = Dictionary(uniqueKeysWithValues: other.map { (BinaryDistinctString($0.key), $0.value) })
172+
self.merge(converted, uniquingKeysWith: strategy)
173+
}
174+
175+
/// Merges a `[NSString: Value]` dictionary into this one
176+
public mutating func merge(_ other: [NSString: Value], strategy: (Value, Value) -> Value = { _, new in new }) {
177+
let converted = Dictionary(uniqueKeysWithValues: other.map { (BinaryDistinctString($0.key), $0.value) })
178+
self.merge(converted, uniquingKeysWith: strategy)
179+
}
180+
181+
public func merging(_ other: [String: Value], strategy: (Value, Value) -> Value = { _, new in new }) -> Self {
182+
var newDict = self
183+
newDict.merge(other, strategy: strategy)
184+
return newDict
185+
}
186+
187+
public func merging(_ other: [BinaryDistinctString: Value], strategy: (Value, Value) -> Value = { _, new in new }) -> Self {
188+
var newDict = self
189+
newDict.merge(other, strategy: strategy)
190+
return newDict
191+
}
192+
193+
public func merging(_ other: [NSString: Value], strategy: (Value, Value) -> Value = { _, new in new }) -> Self {
194+
var newDict = self
195+
newDict.merge(other, strategy: strategy)
196+
return newDict
197+
}
198+
}
199+
200+
public protocol StringConvertible: ExpressibleByStringLiteral {}
201+
202+
extension BinaryDistinctString: StringConvertible {}
203+
extension String: StringConvertible {}
204+
extension NSString: StringConvertible {}
205+
206+
public struct BinaryDistinctCharacter: Equatable, Hashable, CustomStringConvertible, ExpressibleByStringLiteral {
207+
let bytes: [UInt16]
208+
209+
public init(_ character: Character) {
210+
self.bytes = Array(character.utf16)
211+
}
212+
213+
public init(_ string: String) {
214+
self.bytes = Array(string.utf16)
215+
}
216+
217+
public init(_ nsString: NSString) {
218+
let swiftString = nsString as String
219+
self.bytes = Array(swiftString.utf16)
220+
}
221+
222+
public init(bytes: [UInt16]) {
223+
self.bytes = bytes
224+
}
225+
226+
/// Satisfies ``ExpressibleByStringLiteral`` protocol.
227+
public init(stringLiteral value: String) {
228+
self.init(value)
229+
}
230+
231+
var stringValue: String? {
232+
String(utf16CodeUnits: self.bytes, count: self.bytes.count)
233+
}
234+
235+
public var description: String {
236+
if let str = stringValue {
237+
return "BinaryDistinctCharacter('\(str)', bytes: \(bytes.map { String(format: "0x%02X", $0) }))"
238+
} else {
239+
return "BinaryDistinctCharacter(invalid UTF-8, bytes: \(bytes.map { String(format: "0x%02X", $0) }))"
240+
}
241+
}
242+
243+
public static func == (lhs: BinaryDistinctCharacter, rhs: BinaryDistinctCharacter) -> Bool {
244+
lhs.bytes == rhs.bytes
245+
}
246+
}

0 commit comments

Comments
 (0)