@@ -34,8 +34,6 @@ const r = getData();
3434
3535import { read_member_array , read_mapped_map , read_emoji_trie } from './decoder.js' ;
3636
37- import type { Node } from "./decoder.js" ;
38-
3937// @TODO : This should be lazily loaded
4038
4139const VALID = new Set ( read_member_array ( r ) ) ;
@@ -44,64 +42,99 @@ const MAPPED = read_mapped_map(r);
4442const EMOJI_ROOT = read_emoji_trie ( r ) ;
4543//const NFC_CHECK = new Set(read_member_array(r, Array.from(VALID.values()).sort((a, b) => a - b)));
4644
47- function nfc ( s : string ) : string {
48- return s . normalize ( 'NFC' ) ;
45+ //const STOP = 0x2E;
46+ const HYPHEN = 0x2D ;
47+ const UNDERSCORE = 0x5F ;
48+
49+ function explode_cp ( name : string ) : Array < number > {
50+ return toUtf8CodePoints ( name ) ;
4951}
5052
5153function filter_fe0f ( cps : Array < number > ) : Array < number > {
5254 return cps . filter ( cp => cp != 0xFE0F ) ;
5355}
5456
55- export function ens_normalize ( name : string , beautify = false ) : string {
56- const input = toUtf8CodePoints ( name ) . reverse ( ) ; // flip for pop
57- const output = [ ] ;
58- while ( input . length ) {
59- const emoji = consume_emoji_reversed ( input , EMOJI_ROOT ) ;
60- if ( emoji ) {
61- output . push ( ...( beautify ? emoji : filter_fe0f ( emoji ) ) ) ;
62- continue ;
63- }
64- const cp = input . pop ( ) ;
65- if ( VALID . has ( cp ) ) {
66- output . push ( cp ) ;
67- continue ;
68- }
69- if ( IGNORED . has ( cp ) ) {
70- continue ;
71- }
72- let cps = MAPPED [ cp ] ;
73- if ( cps ) {
74- output . push ( ...cps ) ;
75- continue ;
76- }
77- throw new Error ( `Disallowed codepoint: 0x${ cp . toString ( 16 ) . toUpperCase ( ) } ` ) ;
78- }
79- return nfc ( String . fromCodePoint ( ...output ) ) ;
57+ export function ens_normalize_post_check ( name : string ) : string {
58+ for ( let label of name . split ( '.' ) ) {
59+ let cps = explode_cp ( label ) ;
60+ try {
61+ for ( let i = cps . lastIndexOf ( UNDERSCORE ) - 1 ; i >= 0 ; i -- ) {
62+ if ( cps [ i ] !== UNDERSCORE ) {
63+ throw new Error ( `underscore only allowed at start` ) ;
64+ }
65+ }
66+ if ( cps . length >= 4 && cps . every ( cp => cp < 0x80 ) && cps [ 2 ] === HYPHEN && cps [ 3 ] === HYPHEN ) {
67+ throw new Error ( `invalid label extension` ) ;
68+ }
69+ } catch ( err ) {
70+ throw new Error ( `Invalid label "${ label } ": ${ err . message } ` ) ;
71+ }
72+ }
73+ return name ;
74+ }
75+
76+ export function ens_normalize ( name : string ) : string {
77+ return ens_normalize_post_check ( normalize ( name , filter_fe0f ) ) ;
8078}
8179
80+ function normalize ( name : string , emoji_filter : ( a : Array < number > ) => Array < number > ) : string {
81+ let input = explode_cp ( name ) . reverse ( ) ; // flip for pop
82+ let output = [ ] ;
83+ while ( input . length ) {
84+ let emoji = consume_emoji_reversed ( input ) ;
85+ if ( emoji ) {
86+ output . push ( ...emoji_filter ( emoji ) ) ;
87+ continue ;
88+ }
89+ let cp = input . pop ( ) ;
90+ if ( VALID . has ( cp ) ) {
91+ output . push ( cp ) ;
92+ continue ;
93+ }
94+ if ( IGNORED . has ( cp ) ) {
95+ continue ;
96+ }
97+ let cps = MAPPED [ cp ] ;
98+ if ( cps ) {
99+ output . push ( ...cps ) ;
100+ continue ;
101+ }
102+ throw new Error ( `Disallowed codepoint: 0x${ cp . toString ( 16 ) . toUpperCase ( ) } ` ) ;
103+ }
104+ return ens_normalize_post_check ( nfc ( String . fromCodePoint ( ...output ) ) ) ;
105+ }
82106
83- function consume_emoji_reversed ( cps : Array < number > , node : Node , eaten ?: Array < number > ) {
84- let emoji ;
85- const stack = [ ] ;
86- let pos = cps . length ;
87- if ( eaten ) { eaten . length = 0 ; } // clear input buffer (if needed)
88- while ( pos ) {
89- const cp = cps [ -- pos ] ;
90- const branch = node . branches . find ( x => x . set . has ( cp ) ) ;
91- if ( branch == null ) { break ; }
92- node = branch . node ;
93- if ( ! node ) { break ; }
94- stack . push ( cp ) ;
95- if ( node . fe0f ) {
96- stack . push ( 0xFE0F ) ;
97- if ( pos > 0 && cps [ pos - 1 ] == 0xFE0F ) { pos -- ; }
98- }
99- if ( node . valid ) { // this is a valid emoji (so far)
100- emoji = stack . slice ( ) ; // copy stack
101- if ( eaten ) { eaten . push ( ...cps . slice ( pos ) . reverse ( ) ) ; } // copy input (if needed)
102- cps . length = pos ; // truncate
103- }
104- }
105- return emoji ;
107+ function nfc ( s : string ) : string {
108+ return s . normalize ( 'NFC' ) ;
106109}
107110
111+ function consume_emoji_reversed ( cps : Array < number > , eaten ?: Array < number > ) {
112+ let node = EMOJI_ROOT ;
113+ let emoji ;
114+ let saved ;
115+ let stack = [ ] ;
116+ let pos = cps . length ;
117+ if ( eaten ) eaten . length = 0 ; // clear input buffer (if needed)
118+ while ( pos ) {
119+ let cp = cps [ -- pos ] ;
120+ node = node . branches . find ( x => x . set . has ( cp ) ) ?. node ;
121+ if ( ! node ) break ;
122+ if ( node . save ) { // remember
123+ saved = cp ;
124+ } else if ( node . check ) { // check exclusion
125+ if ( cp === saved ) break ;
126+ }
127+ stack . push ( cp ) ;
128+ if ( node . fe0f ) {
129+ stack . push ( 0xFE0F ) ;
130+ if ( pos > 0 && cps [ pos - 1 ] == 0xFE0F ) pos -- ; // consume optional FE0F
131+ }
132+ if ( node . valid ) { // this is a valid emoji (so far)
133+ emoji = stack . slice ( ) ; // copy stack
134+ if ( node . valid == 2 ) emoji . splice ( 1 , 1 ) ; // delete FE0F at position 1 (RGI ZWJ don't follow spec!)
135+ if ( eaten ) eaten . push ( ...cps . slice ( pos ) . reverse ( ) ) ; // copy input (if needed)
136+ cps . length = pos ; // truncate
137+ }
138+ }
139+ return emoji ;
140+ }
0 commit comments