Skip to content

Commit d83ce5e

Browse files
committed
Add charsets from IANA
1 parent d6930b3 commit d83ce5e

File tree

5 files changed

+86
-13
lines changed

5 files changed

+86
-13
lines changed

HISTORY.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
unreleased
22
==========
33

4+
* Add charsets from IANA
45
* Add extension `.cjs` to `application/node`
56
* Add new upstream MIME types
67

db.json

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
},
55
"application/3gpdash-qoe-report+xml": {
66
"source": "iana",
7+
"charset": "UTF-8",
78
"compressible": true
89
},
910
"application/3gpp-ims+xml": {
@@ -153,6 +154,7 @@
153154
},
154155
"application/beep+xml": {
155156
"source": "iana",
157+
"charset": "UTF-8",
156158
"compressible": true
157159
},
158160
"application/calendar+json": {
@@ -172,6 +174,7 @@
172174
},
173175
"application/cap+xml": {
174176
"source": "iana",
177+
"charset": "UTF-8",
175178
"compressible": true
176179
},
177180
"application/cbor": {
@@ -477,10 +480,12 @@
477480
},
478481
"application/fhir+json": {
479482
"source": "iana",
483+
"charset": "UTF-8",
480484
"compressible": true
481485
},
482486
"application/fhir+xml": {
483487
"source": "iana",
488+
"charset": "UTF-8",
484489
"compressible": true
485490
},
486491
"application/fido.trusted-apps+json": {
@@ -577,6 +582,7 @@
577582
},
578583
"application/im-iscomposing+xml": {
579584
"source": "iana",
585+
"charset": "UTF-8",
580586
"compressible": true
581587
},
582588
"application/index": {
@@ -918,10 +924,12 @@
918924
},
919925
"application/msc-ivr+xml": {
920926
"source": "iana",
927+
"charset": "UTF-8",
921928
"compressible": true
922929
},
923930
"application/msc-mixer+xml": {
924931
"source": "iana",
932+
"charset": "UTF-8",
925933
"compressible": true
926934
},
927935
"application/msword": {
@@ -952,10 +960,12 @@
952960
"source": "iana"
953961
},
954962
"application/news-checkgroups": {
955-
"source": "iana"
963+
"source": "iana",
964+
"charset": "US-ASCII"
956965
},
957966
"application/news-groupinfo": {
958-
"source": "iana"
967+
"source": "iana",
968+
"charset": "US-ASCII"
959969
},
960970
"application/news-transmission": {
961971
"source": "iana"
@@ -1064,10 +1074,12 @@
10641074
},
10651075
"application/pidf+xml": {
10661076
"source": "iana",
1077+
"charset": "UTF-8",
10671078
"compressible": true
10681079
},
10691080
"application/pidf-diff+xml": {
10701081
"source": "iana",
1082+
"charset": "UTF-8",
10711083
"compressible": true
10721084
},
10731085
"application/pkcs10": {
@@ -1119,6 +1131,7 @@
11191131
},
11201132
"application/poc-settings+xml": {
11211133
"source": "iana",
1134+
"charset": "UTF-8",
11221135
"compressible": true
11231136
},
11241137
"application/postscript": {
@@ -4100,14 +4113,17 @@
41004113
},
41014114
"application/vnd.omads-email+xml": {
41024115
"source": "iana",
4116+
"charset": "UTF-8",
41034117
"compressible": true
41044118
},
41054119
"application/vnd.omads-file+xml": {
41064120
"source": "iana",
4121+
"charset": "UTF-8",
41074122
"compressible": true
41084123
},
41094124
"application/vnd.omads-folder+xml": {
41104125
"source": "iana",
4126+
"charset": "UTF-8",
41114127
"compressible": true
41124128
},
41134129
"application/vnd.omaloc-supl-init": {
@@ -4968,15 +4984,18 @@
49684984
},
49694985
"application/vnd.syncml+xml": {
49704986
"source": "iana",
4987+
"charset": "UTF-8",
49714988
"compressible": true,
49724989
"extensions": ["xsm"]
49734990
},
49744991
"application/vnd.syncml.dm+wbxml": {
49754992
"source": "iana",
4993+
"charset": "UTF-8",
49764994
"extensions": ["bdm"]
49774995
},
49784996
"application/vnd.syncml.dm+xml": {
49794997
"source": "iana",
4998+
"charset": "UTF-8",
49804999
"compressible": true,
49815000
"extensions": ["xdm"]
49825001
},
@@ -4988,6 +5007,7 @@
49885007
},
49895008
"application/vnd.syncml.dmddf+xml": {
49905009
"source": "iana",
5010+
"charset": "UTF-8",
49915011
"compressible": true,
49925012
"extensions": ["ddf"]
49935013
},
@@ -4996,6 +5016,7 @@
49965016
},
49975017
"application/vnd.syncml.dmtnds+xml": {
49985018
"source": "iana",
5019+
"charset": "UTF-8",
49995020
"compressible": true
50005021
},
50015022
"application/vnd.syncml.ds.notification": {
@@ -5164,6 +5185,7 @@
51645185
},
51655186
"application/vnd.wap.wbxml": {
51665187
"source": "iana",
5188+
"charset": "UTF-8",
51675189
"extensions": ["wbxml"]
51685190
},
51695191
"application/vnd.wap.wmlc": {
@@ -7467,11 +7489,13 @@
74677489
},
74687490
"text/n3": {
74697491
"source": "iana",
7492+
"charset": "UTF-8",
74707493
"compressible": true,
74717494
"extensions": ["n3"]
74727495
},
74737496
"text/parameters": {
7474-
"source": "iana"
7497+
"source": "iana",
7498+
"charset": "UTF-8"
74757499
},
74767500
"text/parityfec": {
74777501
"source": "iana"
@@ -7482,7 +7506,8 @@
74827506
"extensions": ["txt","text","conf","def","list","log","in","ini"]
74837507
},
74847508
"text/provenance-notation": {
7485-
"source": "iana"
7509+
"source": "iana",
7510+
"charset": "UTF-8"
74867511
},
74877512
"text/prs.fallenstein.rst": {
74887513
"source": "iana"
@@ -7594,7 +7619,8 @@
75947619
"extensions": ["scurl"]
75957620
},
75967621
"text/vnd.debian.copyright": {
7597-
"source": "iana"
7622+
"source": "iana",
7623+
"charset": "UTF-8"
75987624
},
75997625
"text/vnd.dmclientscript": {
76007626
"source": "iana"
@@ -7604,7 +7630,8 @@
76047630
"extensions": ["sub"]
76057631
},
76067632
"text/vnd.esmertec.theme-descriptor": {
7607-
"source": "iana"
7633+
"source": "iana",
7634+
"charset": "UTF-8"
76087635
},
76097636
"text/vnd.ficlab.flt": {
76107637
"source": "iana"
@@ -7667,10 +7694,12 @@
76677694
},
76687695
"text/vnd.sun.j2me.app-descriptor": {
76697696
"source": "iana",
7697+
"charset": "UTF-8",
76707698
"extensions": ["jad"]
76717699
},
76727700
"text/vnd.trolltech.linguist": {
7673-
"source": "iana"
7701+
"source": "iana",
7702+
"charset": "UTF-8"
76747703
},
76757704
"text/vnd.wap.si": {
76767705
"source": "iana"

scripts/fetch-iana.js

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ var symbolRegExp = /[._-]/g
2727
var trimQuotesRegExp = /^"|"$/gm
2828
var urlReferenceRegExp = /\[(https?:\/\/[^\]]+)]/gi
2929

30+
var CHARSET_DEFAULT_REGEXP = /(?:\bcharset\b[^.]*(?:\.\s+default\s+(?:value\s+)?is|\bdefault[^.]*(?:of|is)|\bmust\s+have\s+the\s+value|\bvalue\s+must\s+be)\s+|\bcharset\s*\(?defaults\s+to\s+|\bdefault\b[^.]*?\bchar(?:set|act[eo]r\s+set)\b[^.]*?(?:of|is)\s+|\bcharset\s+(?:must|is)\s+always\s+(?:be\s+)?)["']?([a-z0-9]+-[a-z0-9-]+)/im
31+
var MIME_TYPE_HAS_CHARSET_PARAMETER_REGEXP = /parameters\s*:[^.]*\bcharset\b/im
32+
3033
co(function * () {
3134
var gens = yield [
3235
get('application', { extensions: /(?:\/(?:gzip|ld\+json|n-quads|n-triples|vnd\.apple\..+)|\+xml)$/ }),
@@ -61,6 +64,7 @@ co(function * () {
6164
}
6265

6366
json[mime] = {
67+
charset: result.charset,
6468
extensions: result.extensions,
6569
notes: result.notes,
6670
sources: result.sources
@@ -118,6 +122,9 @@ function addTemplateData (data, options) {
118122
// use extracted mime
119123
data.mime = mime
120124

125+
// use extracted charset
126+
data.charset = extractTemplateCharset(body)
127+
121128
// use extracted extensions
122129
var useExt = opts.extensions &&
123130
(opts.extensions === true || opts.extensions.test(data.mime))
@@ -165,6 +172,18 @@ function extractTemplateMime (body) {
165172
return (type + '/' + subtype).toLowerCase()
166173
}
167174

175+
function extractTemplateCharset (body) {
176+
if (!MIME_TYPE_HAS_CHARSET_PARAMETER_REGEXP.test(body)) {
177+
return undefined
178+
}
179+
180+
var match = CHARSET_DEFAULT_REGEXP.exec(body)
181+
182+
return match
183+
? match[1].toUpperCase()
184+
: undefined
185+
}
186+
168187
function extractTemplateExtensions (body) {
169188
var match = extensionsRegExp.exec(body) || extensionsQuotedRegExp.exec(body)
170189

src/custom-types.json

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -769,12 +769,6 @@
769769
"text/tab-separated-values": {
770770
"compressible": true
771771
},
772-
"text/turtle": {
773-
"charset": "UTF-8",
774-
"sources": [
775-
"https://www.w3.org/TR/turtle/#h3_sec-mime"
776-
]
777-
},
778772
"text/uri-list": {
779773
"compressible": true
780774
},

0 commit comments

Comments
 (0)