ttf: Optimize by using standard MacRoman in post2 subtable, if possible.
authorSebastian Rasmussen <[email protected]>
Thu, 24 Apr 2025 12:48:49 +0000 (14:48 +0200)
committerSebastian Rasmussen <[email protected]>
Tue, 29 Apr 2025 10:21:32 +0000 (12:21 +0200)
If a string explicitly stored in the post2 subtable is identical
to a standard MacRoman string, choose to use the latter. This is
better because normal strings are stored in the string table after
the post2 header, but MacRoman strings are represented by a single
16-bit value only, making subset TTFs even smaller.

source/fitz/subset-ttf.c

index 02ce03d3d1c72c5c23b7d65c18e9ad9f21966f4b..a4d6e324ec2c165750ca0bcf9a4ae7da15f6c652 100644 (file)
@@ -1431,6 +1431,291 @@ shrink_loca_if_possible(fz_context *ctx, ttf_t *ttf)
        put16(ttf->index_to_loc_formatp, 0);
 }
 
+static struct { const char *charname; int idx; } macroman[] =
+{
+       {   ".notdef",                                 0},
+       {   ".null",                                   1},
+       {   "A",                                      36},
+       {   "AE",                                    144},
+       {   "Aacute",                                201},
+       {   "Acircumflex",                           199},
+       {   "Adieresis",                              98},
+       {   "Agrave",                                173},
+       {   "Aring",                                  99},
+       {   "Atilde",                                174},
+       {   "B",                                      37},
+       {   "C",                                      38},
+       {   "Cacute",                                253},
+       {   "Ccaron",                                255},
+       {   "Ccedilla",                              100},
+       {   "D",                                      39},
+       {   "Delta",                                 168},
+       {   "E",                                      40},
+       {   "Eacute",                                101},
+       {   "Ecircumflex",                           200},
+       {   "Edieresis",                             202},
+       {   "Egrave",                                203},
+       {   "Eth",                                   233},
+       {   "F",                                      41},
+       {   "G",                                      42},
+       {   "Gbreve",                                248},
+       {   "H",                                      43},
+       {   "I",                                      44},
+       {   "Iacute",                                204},
+       {   "Icircumflex",                           205},
+       {   "Idieresis",                             206},
+       {   "Idotaccent",                            250},
+       {   "Igrave",                                207},
+       {   "J",                                      45},
+       {   "K",                                      46},
+       {   "L",                                      47},
+       {   "Lslash",                                226},
+       {   "M",                                      48},
+       {   "N",                                      49},
+       {   "Ntilde",                                102},
+       {   "O",                                      50},
+       {   "OE",                                    176},
+       {   "Oacute",                                208},
+       {   "Ocircumflex",                           209},
+       {   "Odieresis",                             103},
+       {   "Ograve",                                211},
+       {   "Omega",                                 159},
+       {   "Oslash",                                145},
+       {   "Otilde",                                175},
+       {   "P",                                      51},
+       {   "Q",                                      52},
+       {   "R",                                      53},
+       {   "S",                                      54},
+       {   "Scaron",                                228},
+       {   "Scedilla",                              251},
+       {   "T",                                      55},
+       {   "Thorn",                                 237},
+       {   "U",                                      56},
+       {   "Uacute",                                212},
+       {   "Ucircumflex",                           213},
+       {   "Udieresis",                             104},
+       {   "Ugrave",                                214},
+       {   "V",                                      57},
+       {   "W",                                      58},
+       {   "X",                                      59},
+       {   "Y",                                      60},
+       {   "Yacute",                                235},
+       {   "Ydieresis",                             187},
+       {   "Z",                                      61},
+       {   "Zcaron",                                230},
+       {   "a",                                      68},
+       {   "aacute",                                105},
+       {   "acircumflex",                           107},
+       {   "acute",                                 141},
+       {   "adieresis",                             108},
+       {   "ae",                                    160},
+       {   "agrave",                                106},
+       {   "ampersand",                               9},
+       {   "apple",                                 210},
+       {   "approxequal",                           167},
+       {   "aring",                                 110},
+       {   "asciicircum",                            65},
+       {   "asciitilde",                             97},
+       {   "asterisk",                               13},
+       {   "at",                                     35},
+       {   "atilde",                                109},
+       {   "b",                                      69},
+       {   "backslash",                              63},
+       {   "bar",                                    95},
+       {   "braceleft",                              94},
+       {   "braceright",                             96},
+       {   "bracketleft",                            62},
+       {   "bracketright",                           64},
+       {   "breve",                                 219},
+       {   "brokenbar",                             232},
+       {   "bullet",                                135},
+       {   "c",                                      70},
+       {   "cacute",                                254},
+       {   "caron",                                 225},
+       {   "ccaron",                                256},
+       {   "ccedilla",                              111},
+       {   "cedilla",                               222},
+       {   "cent",                                  132},
+       {   "circumflex",                            216},
+       {   "colon",                                  29},
+       {   "comma",                                  15},
+       {   "copyright",                             139},
+       {   "currency",                              189},
+       {   "d",                                      71},
+       {   "dagger",                                130},
+       {   "daggerdbl",                             194},
+       {   "dcroat",                                257},
+       {   "degree",                                131},
+       {   "dieresis",                              142},
+       {   "divide",                                184},
+       {   "dollar",                                  7},
+       {   "dotaccent",                             220},
+       {   "dotlessi",                              215},
+       {   "e",                                      72},
+       {   "eacute",                                112},
+       {   "ecircumflex",                           114},
+       {   "edieresis",                             115},
+       {   "egrave",                                113},
+       {   "eight",                                  27},
+       {   "ellipsis",                              171},
+       {   "emdash",                                179},
+       {   "endash",                                178},
+       {   "equal",                                  32},
+       {   "eth",                                   234},
+       {   "exclam",                                  4},
+       {   "exclamdown",                            163},
+       {   "f",                                      73},
+       {   "fi",                                    192},
+       {   "five",                                   24},
+       {   "fl",                                    193},
+       {   "florin",                                166},
+       {   "four",                                   23},
+       {   "fraction",                              188},
+       {   "franc",                                 247},
+       {   "g",                                      74},
+       {   "gbreve",                                249},
+       {   "germandbls",                            137},
+       {   "grave",                                  67},
+       {   "greater",                                33},
+       {   "greaterequal",                          149},
+       {   "guillemotleft",                         169},
+       {   "guillemotright",                        170},
+       {   "guilsinglleft",                         190},
+       {   "guilsinglright",                        191},
+       {   "h",                                      75},
+       {   "hungarumlaut",                          223},
+       {   "hyphen",                                 16},
+       {   "i",                                      76},
+       {   "iacute",                                116},
+       {   "icircumflex",                           118},
+       {   "idieresis",                             119},
+       {   "igrave",                                117},
+       {   "infinity",                              146},
+       {   "integral",                              156},
+       {   "j",                                      77},
+       {   "k",                                      78},
+       {   "l",                                      79},
+       {   "less",                                   31},
+       {   "lessequal",                             148},
+       {   "logicalnot",                            164},
+       {   "lozenge",                               185},
+       {   "lslash",                                227},
+       {   "m",                                      80},
+       {   "macron",                                218},
+       {   "minus",                                 239},
+       {   "mu",                                    151},
+       {   "multiply",                              240},
+       {   "n",                                      81},
+       {   "nine",                                   28},
+       {   "nonbreakingspace",                      172},
+       {   "nonmarkingreturn",                        2},
+       {   "notequal",                              143},
+       {   "ntilde",                                120},
+       {   "numbersign",                              6},
+       {   "o",                                      82},
+       {   "oacute",                                121},
+       {   "ocircumflex",                           123},
+       {   "odieresis",                             124},
+       {   "oe",                                    177},
+       {   "ogonek",                                224},
+       {   "ograve",                                122},
+       {   "one",                                    20},
+       {   "onehalf",                               244},
+       {   "onequarter",                            245},
+       {   "onesuperior",                           241},
+       {   "ordfeminine",                           157},
+       {   "ordmasculine",                          158},
+       {   "oslash",                                161},
+       {   "otilde",                                125},
+       {   "p",                                      83},
+       {   "paragraph",                             136},
+       {   "parenleft",                              11},
+       {   "parenright",                             12},
+       {   "partialdiff",                           152},
+       {   "percent",                                 8},
+       {   "period",                                 17},
+       {   "periodcentered",                        195},
+       {   "perthousand",                           198},
+       {   "pi",                                    155},
+       {   "plus",                                   14},
+       {   "plusminus",                             147},
+       {   "product",                               154},
+       {   "q",                                      84},
+       {   "question",                               34},
+       {   "questiondown",                          162},
+       {   "quotedbl",                                5},
+       {   "quotedblbase",                          197},
+       {   "quotedblleft",                          180},
+       {   "quotedblright",                         181},
+       {   "quoteleft",                             182},
+       {   "quoteright",                            183},
+       {   "quotesinglbase",                        196},
+       {   "quotesingle",                            10},
+       {   "r",                                      85},
+       {   "radical",                               165},
+       {   "registered",                            138},
+       {   "ring",                                  221},
+       {   "s",                                      86},
+       {   "scaron",                                229},
+       {   "scedilla",                              252},
+       {   "section",                               134},
+       {   "semicolon",                              30},
+       {   "seven",                                  26},
+       {   "six",                                    25},
+       {   "slash",                                  18},
+       {   "space",                                   3},
+       {   "sterling",                              133},
+       {   "summation",                             153},
+       {   "t",                                      87},
+       {   "thorn",                                 238},
+       {   "three",                                  22},
+       {   "threequarters",                         246},
+       {   "threesuperior",                         243},
+       {   "tilde",                                 217},
+       {   "trademark",                             140},
+       {   "two",                                    21},
+       {   "twosuperior",                           242},
+       {   "u",                                      88},
+       {   "uacute",                                126},
+       {   "ucircumflex",                           128},
+       {   "udieresis",                             129},
+       {   "ugrave",                                127},
+       {   "underscore",                             66},
+       {   "v",                                      89},
+       {   "w",                                      90},
+       {   "x",                                      91},
+       {   "y",                                      92},
+       {   "yacute",                                236},
+       {   "ydieresis",                             186},
+       {   "yen",                                   150},
+       {   "z",                                      93},
+       {   "zcaron",                                231},
+       {   "zero",                                   19},
+};
+
+static int
+find_macroman_string(const char *s)
+{
+       int l, r, m;
+       int comparison;
+
+       l = 0;
+       r = nelem(macroman);
+       while (l <= r)
+       {
+               m = (l + r) >> 1;
+               comparison = strcmp(s, macroman[m].charname);
+               if (comparison < 0)
+                       r = m - 1;
+               else if (comparison > 0)
+                       l = m + 1;
+               else
+                       return macroman[m].idx;
+       }
+
+       return -1;
+}
+
 static size_t
 subset_post2(fz_context *ctx, ttf_t *ttf, uint8_t *d, size_t len, int *gids, int num_gids)
 {
@@ -1489,6 +1774,26 @@ subset_post2(fz_context *ctx, ttf_t *ttf, uint8_t *d, size_t len, int *gids, int
                if (o <= 257)
                        continue;
 
+               /* check if string is one of the macroman standard ones, and use its index if so. */
+               {
+                       uint8_t *q = d0 + 2 + (size_t) n * 2;
+                       int k;
+                       char buf[257] = { 0 };
+                       int macidx;
+                       for (k = 0; k < o - 258; k++)
+                               q += 1 + *q;
+                       for (k = 0; k < *q; k++)
+                               buf[k] = *(q + 1 + k);
+
+                       macidx = find_macroman_string(buf);
+
+                       if (macidx >= 0)
+                       {
+                               put16(d - 2, macidx);
+                               continue;
+                       }
+               }
+
                /* We want this gid, and it is a string. */
                new_strings++;