From 36306e3263459b99684e45f077fb1d9c317fcabf Mon Sep 17 00:00:00 2001 From: ImUrX Date: Fri, 30 Dec 2022 07:06:14 -0300 Subject: [PATCH] use Intl.Locale --- fluent-langneg/src/locale.ts | 179 ----------------------------- fluent-langneg/src/matches.ts | 84 +++++++++++--- fluent-langneg/test/locale_test.js | 173 ---------------------------- 3 files changed, 65 insertions(+), 371 deletions(-) delete mode 100644 fluent-langneg/src/locale.ts delete mode 100644 fluent-langneg/test/locale_test.js diff --git a/fluent-langneg/src/locale.ts b/fluent-langneg/src/locale.ts deleted file mode 100644 index 6b8fef80..00000000 --- a/fluent-langneg/src/locale.ts +++ /dev/null @@ -1,179 +0,0 @@ -/* eslint no-magic-numbers: 0 */ - -const languageCodeRe = "([a-z]{2,3}|\\*)"; -const scriptCodeRe = "(?:-([a-z]{4}|\\*))"; -const regionCodeRe = "(?:-([a-z]{2}|\\*))"; -const variantCodeRe = "(?:-(([0-9][a-z0-9]{3}|[a-z0-9]{5,8})|\\*))"; - -/** - * Regular expression splitting locale id into four pieces: - * - * Example: `en-Latn-US-macos` - * - * language: en - * script: Latn - * region: US - * variant: macos - * - * It can also accept a range `*` character on any position. - */ -const localeRe = new RegExp( - `^${languageCodeRe}${scriptCodeRe}?${regionCodeRe}?${variantCodeRe}?$`, - "i" -); - -export class Locale { - isWellFormed: boolean; - language?: string; - script?: string; - region?: string; - variant?: string; - - /** - * Parses a locale id using the localeRe into an array with four elements. - * - * If the second argument `range` is set to true, it places range `*` char - * in place of any missing piece. - * - * It also allows skipping the script section of the id, so `en-US` is - * properly parsed as `en-*-US-*`. - */ - constructor(locale: string) { - const result = localeRe.exec(locale.replace(/_/g, "-")); - if (!result) { - this.isWellFormed = false; - return; - } - - let [, language, script, region, variant] = result; - - if (language) { - this.language = language.toLowerCase(); - } - if (script) { - this.script = script[0].toUpperCase() + script.slice(1); - } - if (region) { - this.region = region.toUpperCase(); - } - this.variant = variant; - this.isWellFormed = true; - } - - isEqual(other: Locale): boolean { - return ( - this.language === other.language && - this.script === other.script && - this.region === other.region && - this.variant === other.variant - ); - } - - matches(other: Locale, thisRange = false, otherRange = false): boolean { - return ( - (this.language === other.language || - (thisRange && this.language === undefined) || - (otherRange && other.language === undefined)) && - (this.script === other.script || - (thisRange && this.script === undefined) || - (otherRange && other.script === undefined)) && - (this.region === other.region || - (thisRange && this.region === undefined) || - (otherRange && other.region === undefined)) && - (this.variant === other.variant || - (thisRange && this.variant === undefined) || - (otherRange && other.variant === undefined)) - ); - } - - toString(): string { - return [this.language, this.script, this.region, this.variant] - .filter(part => part !== undefined) - .join("-"); - } - - clearVariants(): void { - this.variant = undefined; - } - - clearRegion(): void { - this.region = undefined; - } - - addLikelySubtags(): boolean { - const newLocale = getLikelySubtagsMin(this.toString().toLowerCase()); - if (newLocale) { - this.language = newLocale.language; - this.script = newLocale.script; - this.region = newLocale.region; - this.variant = newLocale.variant; - return true; - } - return false; - } -} - -/** - * Below is a manually a list of likely subtags corresponding to Unicode - * CLDR likelySubtags list. - * This list is curated by the maintainers of Project Fluent and is - * intended to be used in place of the full likelySubtags list in use cases - * where full list cannot be (for example, due to the size). - * - * This version of the list is based on CLDR 30.0.3. - */ -const likelySubtagsMin: Record = { - ar: "ar-arab-eg", - "az-arab": "az-arab-ir", - "az-ir": "az-arab-ir", - be: "be-cyrl-by", - da: "da-latn-dk", - el: "el-grek-gr", - en: "en-latn-us", - fa: "fa-arab-ir", - ja: "ja-jpan-jp", - ko: "ko-kore-kr", - pt: "pt-latn-br", - sr: "sr-cyrl-rs", - "sr-ru": "sr-latn-ru", - sv: "sv-latn-se", - ta: "ta-taml-in", - uk: "uk-cyrl-ua", - zh: "zh-hans-cn", - "zh-hant": "zh-hant-tw", - "zh-hk": "zh-hant-hk", - "zh-mo": "zh-hant-mo", - "zh-tw": "zh-hant-tw", - "zh-gb": "zh-hant-gb", - "zh-us": "zh-hant-us", -}; - -const regionMatchingLangs = [ - "az", - "bg", - "cs", - "de", - "es", - "fi", - "fr", - "hu", - "it", - "lt", - "lv", - "nl", - "pl", - "ro", - "ru", -]; - -function getLikelySubtagsMin(loc: string): Locale | null { - if (Object.prototype.hasOwnProperty.call(likelySubtagsMin, loc)) { - return new Locale(likelySubtagsMin[loc]); - } - const locale = new Locale(loc); - if (locale.language && regionMatchingLangs.includes(locale.language)) { - locale.region = locale.language.toUpperCase(); - return locale; - } - return null; -} diff --git a/fluent-langneg/src/matches.ts b/fluent-langneg/src/matches.ts index 7cfbfceb..e63bbb2c 100644 --- a/fluent-langneg/src/matches.ts +++ b/fluent-langneg/src/matches.ts @@ -1,7 +1,5 @@ /* eslint no-magic-numbers: 0 */ -import { Locale } from "./locale.js"; - /** * Negotiates the languages between the list of requested locales against * a list of available locales. @@ -78,20 +76,20 @@ export function filterMatches( strategy: string ): Array { const supportedLocales: Set = new Set(); - const availableLocalesMap: Map = new Map(); + const availableLocalesMap: Map = new Map(); for (let locale of availableLocales) { - let newLocale = new Locale(locale); - if (newLocale.isWellFormed) { - availableLocalesMap.set(locale, new Locale(locale)); + let newLocale = tryLocale(locale); + if (newLocale) { + availableLocalesMap.set(locale, newLocale); } } outer: for (const reqLocStr of requestedLocales) { const reqLocStrLC = reqLocStr.toLowerCase(); - const requestedLocale = new Locale(reqLocStrLC); + let requestedLocale = tryLocale(reqLocStrLC); - if (requestedLocale.language === undefined) { + if (!requestedLocale) { continue; } @@ -115,7 +113,7 @@ export function filterMatches( // This turns `en` into `en-*-*-*` and `en-US` into `en-*-US-*` // Example: ['en-US'] * ['en'] = ['en'] for (const [key, availableLocale] of availableLocalesMap.entries()) { - if (availableLocale.matches(requestedLocale, true, false)) { + if (localeMatches(availableLocale, requestedLocale, true, false)) { supportedLocales.add(key); availableLocalesMap.delete(key); if (strategy === "lookup") { @@ -132,9 +130,10 @@ export function filterMatches( // If data is available, it'll expand `en` into `en-Latn-US` and // `zh` into `zh-Hans-CN`. // Example: ['en'] * ['en-GB', 'en-US'] = ['en-US'] - if (requestedLocale.addLikelySubtags()) { + let maximized = requestedLocale.maximize(); + if (maximized.toString() !== requestedLocale.toString()) { for (const [key, availableLocale] of availableLocalesMap.entries()) { - if (availableLocale.matches(requestedLocale, true, false)) { + if (localeMatches(availableLocale, maximized, true, false)) { supportedLocales.add(key); availableLocalesMap.delete(key); if (strategy === "lookup") { @@ -150,10 +149,10 @@ export function filterMatches( // 4) Attempt to look up for a different variant for the same locale ID // Example: ['en-US-mac'] * ['en-US-win'] = ['en-US-win'] - requestedLocale.clearVariants(); + requestedLocale = requestedLocale.minimize(); for (const [key, availableLocale] of availableLocalesMap.entries()) { - if (availableLocale.matches(requestedLocale, true, true)) { + if (localeMatches(availableLocale, requestedLocale, true, true)) { supportedLocales.add(key); availableLocalesMap.delete(key); if (strategy === "lookup") { @@ -172,11 +171,11 @@ export function filterMatches( // over `zh-CN`. // // Example: ['zh-Hant-HK'] * ['zh-TW', 'zh-CN'] = ['zh-TW'] - requestedLocale.clearRegion(); - - if (requestedLocale.addLikelySubtags()) { + requestedLocale = new Intl.Locale(requestedLocale.toString(), { region: "001" }) + maximized = requestedLocale.maximize(); + if (maximized.toString() !== requestedLocale.toString()) { for (const [key, availableLocale] of availableLocalesMap.entries()) { - if (availableLocale.matches(requestedLocale, true, false)) { + if (localeMatches(availableLocale, requestedLocale, true, false)) { supportedLocales.add(key); availableLocalesMap.delete(key); if (strategy === "lookup") { @@ -192,10 +191,10 @@ export function filterMatches( // 6) Attempt to look up for a different region for the same locale ID // Example: ['en-US'] * ['en-AU'] = ['en-AU'] - requestedLocale.clearRegion(); + requestedLocale = new Intl.Locale(requestedLocale.toString(), { region: "001" }) for (const [key, availableLocale] of availableLocalesMap.entries()) { - if (availableLocale.matches(requestedLocale, true, true)) { + if (localeMatches(availableLocale, requestedLocale, true, true)) { supportedLocales.add(key); availableLocalesMap.delete(key); if (strategy === "lookup") { @@ -211,3 +210,50 @@ export function filterMatches( return Array.from(supportedLocales); } + +function tryLocale(locale: string): Intl.Locale | null { + try { + return new Intl.Locale(locale.replace(/_/g, "-")); + } catch { + return null; + } +} + +function getPrivate(loc: Intl.Locale): string | null { + const result = /(?:-x((?:-[a-z]{2,8})+))$/i.exec(loc.toString()); + if (!result) { + return null; + } + + const [, priv] = result; + return priv.substring(1).toLowerCase(); +} + +function localeMatches(loc1: Intl.Locale, loc2: Intl.Locale, thisRange = false, otherRange = false): boolean { + const loc1Priv = getPrivate(loc1); + const loc2Priv = getPrivate(loc2); + return (loc1.language === loc2.language || + (thisRange && loc1.language === undefined) || + (otherRange && loc2.language === undefined)) && + (loc1.script === loc2.script || + (thisRange && loc1.script === undefined) || + (otherRange && loc2.script === undefined)) && + (loc1.region === loc2.region || + (thisRange && loc1.region === undefined) || + (otherRange && loc2.region === undefined)) && + (loc1.calendar === loc2.calendar || + (thisRange && loc1.calendar === undefined) || + (otherRange && loc2.calendar === undefined)) && + (loc1.hourCycle === loc2.hourCycle || + (thisRange && loc1.hourCycle === undefined) || + (otherRange && loc2.hourCycle === undefined)) && + (loc1.numberingSystem === loc2.numberingSystem || + (thisRange && loc1.numberingSystem === undefined) || + (otherRange && loc2.numberingSystem === undefined)) && + (loc1.collation === loc2.collation || + (thisRange && loc1.collation === undefined) || + (otherRange && loc2.collation === undefined)) && + (loc1Priv === loc2Priv || + (thisRange && loc1Priv === null) || + (otherRange && loc2Priv === null)) +} diff --git a/fluent-langneg/test/locale_test.js b/fluent-langneg/test/locale_test.js deleted file mode 100644 index fc9300a9..00000000 --- a/fluent-langneg/test/locale_test.js +++ /dev/null @@ -1,173 +0,0 @@ -import assert from "assert"; -import { Locale } from "../esm/locale.js"; - -function isLocaleEqual(str, ref) { - const locale = new Locale(str); - return locale.isEqual(ref); -} - -suite("Parses simple locales", () => { - test("language part", () => { - assert.ok( - isLocaleEqual("en", { - language: "en", - }) - ); - - assert.ok( - isLocaleEqual("lij", { - language: "lij", - }) - ); - }); - - test("script part", () => { - assert.ok( - isLocaleEqual("en-Latn", { - language: "en", - script: "Latn", - }) - ); - - assert.ok( - isLocaleEqual("lij-Arab", { - language: "lij", - script: "Arab", - }) - ); - }); - - test("region part", () => { - assert.ok( - isLocaleEqual("en-Latn-US", { - language: "en", - script: "Latn", - region: "US", - }) - ); - - assert.ok( - isLocaleEqual("lij-Arab-FA", { - language: "lij", - script: "Arab", - region: "FA", - }) - ); - }); - - test("variant part", () => { - assert.ok( - isLocaleEqual("en-Latn-US-macos", { - language: "en", - script: "Latn", - region: "US", - variant: "macos", - }) - ); - - assert.ok( - isLocaleEqual("lij-Arab-FA-linux", { - language: "lij", - script: "Arab", - region: "FA", - variant: "linux", - }) - ); - }); - - test("skipping script part", () => { - assert.ok( - isLocaleEqual("en-US", { - language: "en", - region: "US", - }) - ); - - assert.ok( - isLocaleEqual("lij-FA-linux", { - language: "lij", - region: "FA", - variant: "linux", - }) - ); - }); - - test("skipping variant part", () => { - assert.ok( - isLocaleEqual("en-US", { - language: "en", - region: "US", - }) - ); - - assert.ok( - isLocaleEqual("lij-FA-linux", { - language: "lij", - region: "FA", - variant: "linux", - }) - ); - }); -}); - -suite("Parses locale ranges", () => { - test("language part", () => { - assert.ok( - isLocaleEqual("*", { - language: "*", - }) - ); - - assert.ok( - isLocaleEqual("*-Latn", { - language: "*", - script: "Latn", - }) - ); - - assert.ok( - isLocaleEqual("*-US", { - language: "*", - region: "US", - }) - ); - }); - - test("script part", () => { - assert.ok( - isLocaleEqual("en-*", { - language: "en", - script: "*", - }) - ); - - assert.ok( - isLocaleEqual("en-*-US", { - language: "en", - script: "*", - region: "US", - }) - ); - }); - - test("region part", () => { - assert.ok( - isLocaleEqual("en-Latn-*", { - language: "en", - script: "Latn", - region: "*", - }) - ); - }); - - test("variant part", () => { - assert.ok( - isLocaleEqual("en-Latn-US-*", { - language: "en", - script: "Latn", - region: "US", - variant: "*", - }) - ); - }); -});