Skip to content

Commit 89dba7e

Browse files
Merge pull request #82 from jb-kerboeuf/master
misc french changes
2 parents c49ef52 + a35e1cc commit 89dba7e

File tree

3 files changed

+232
-17
lines changed

3 files changed

+232
-17
lines changed

SRC/spellcheck.cpp

Lines changed: 182 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,66 @@ static SUFFIX stems[] =
2020
{0},
2121
};
2222

23+
static SUFFIX stems_french[] =
24+
{
25+
{ (char*)"âtre",ADJECTIVE},
26+
{ (char*)"able",ADJECTIVE},
27+
{ (char*)"ade",NOUN},
28+
{ (char*)"age",NOUN},
29+
{ (char*)"aille",NOUN},
30+
{ (char*)"ain",NOUN|ADJECTIVE},
31+
{ (char*)"ais",NOUN|ADJECTIVE},
32+
{ (char*)"al",ADJECTIVE},
33+
{ (char*)"ance",NOUN},
34+
{ (char*)"ant",ADJECTIVE},
35+
{ (char*)"ard",ADJECTIVE},
36+
{ (char*)"aud",ADJECTIVE},
37+
{ (char*)"ère",NOUN},
38+
{ (char*)"ée",NOUN},
39+
{ (char*)"el",ADJECTIVE},
40+
{ (char*)"et",ADJECTIVE},
41+
{ (char*)"esse",NOUN},
42+
{ (char*)"eur",ADJECTIVE|NOUN},
43+
{ (char*)"euse",NOUN},
44+
{ (char*)"eux",ADJECTIVE},
45+
{ (char*)"ible",ADJECTIVE},
46+
{ (char*)"isme",NOUN},
47+
{ (char*)"iste",NOUN|ADJECTIVE},
48+
{ (char*)"ien",NOUN|ADJECTIVE},
49+
{ (char*)"ier",NOUN},
50+
{ (char*)"ie",NOUN},
51+
{ (char*)"if",ADJECTIVE},
52+
{ (char*)"in",ADJECTIVE},
53+
{ (char*)"ir",VERB},
54+
{ (char*)"asser",VERB},
55+
{ (char*)"ater",VERB},
56+
{ (char*)"ailler",VERB},
57+
{ (char*)"ifier",VERB},
58+
{ (char*)"iner",VERB},
59+
{ (char*)"iser",VERB},
60+
{ (char*)"oter",VERB},
61+
{ (char*)"ot",ADJECTIVE},
62+
{ (char*)"oyer",VERB},
63+
{ (char*)"er",VERB|NOUN},
64+
{ (char*)"ment",ADVERB|NOUN},
65+
{ (char*)"ois",NOUN},
66+
{ (char*)"son",NOUN},
67+
{ (char*)"tion",NOUN},
68+
{ (char*)"ure",NOUN},
69+
{ (char*)"logue",NOUN},
70+
{ (char*)"logie",NOUN},
71+
{ (char*)"gène",NOUN},
72+
{ (char*)"gramme",NOUN},
73+
{ (char*)"manie",NOUN},
74+
{ (char*)"phobe",NOUN},
75+
{ (char*)"phobie",NOUN},
76+
{ (char*)"ose",NOUN},
77+
{0},
78+
};
79+
80+
81+
82+
2383
bool multichoice = false;
2484

2585
void InitSpellCheck()
@@ -70,7 +130,8 @@ static int SplitWord(char* word)
70130
size_t len = strlen(word);
71131
for (unsigned int k = 1; k < len-1; ++k)
72132
{
73-
if (k == 1 &&*word != 'a' &&*word != 'A' &&*word != 'i' &&*word != 'I') continue; // only a and i are allowed single-letter words
133+
if (!stricmp(language,"english") && k == 1 &&*word != 'a' &&*word != 'A' &&*word != 'i' &&*word != 'I') continue; // only a and i are allowed single-letter words
134+
else if (!stricmp(language,"french") && k == 1 &&*word != 'y' &&*word != 'a' &&*word != 'A' &&*word != 'à' &&*word != 'À' &&*word != 'ô' &&*word != 'Ô') continue; // in french only y, a and ô are allowed single-letter words
74135
WORDP D1 = FindWord(word,k,PRIMARY_CASE_ALLOWED);
75136
if (!D1) continue;
76137
good = (D1->properties & (PART_OF_SPEECH|FOREIGN_WORD)) != 0 || (D1->internalBits & HAS_SUBSTITUTE) != 0;
@@ -829,6 +890,94 @@ static int EditDistance(WORDINFO& dictWordData, WORDINFO& realWordData,int min)
829890
continue;
830891
}
831892
}
893+
// french common bad spellings
894+
if (!stricmp(language, "french"))
895+
{
896+
if (*currentCharReal == 'a' && *currentCharDict == 'â')
897+
{
898+
val += 1;
899+
dictinfo = resumeDict;
900+
realinfo = resumeReal;
901+
continue;
902+
}
903+
if (*currentCharReal == 'e' && *currentCharDict == 'ê')
904+
{
905+
val += 10;
906+
dictinfo = resumeDict;
907+
realinfo = resumeReal;
908+
continue;
909+
}
910+
if (*currentCharReal == 'è' && *currentCharDict == 'ê')
911+
{
912+
val += 5;
913+
dictinfo = resumeDict;
914+
realinfo = resumeReal;
915+
continue;
916+
}
917+
if (*currentCharReal == 'i' && *currentCharDict == 'î')
918+
{
919+
val += 1;
920+
dictinfo = resumeDict;
921+
realinfo = resumeReal;
922+
continue;
923+
}
924+
if (*currentCharReal == 'o' && *currentCharDict == 'ô')
925+
{
926+
val += 1;
927+
dictinfo = resumeDict;
928+
realinfo = resumeReal;
929+
continue;
930+
}
931+
if (*currentCharReal == 'u' && *currentCharDict == 'û')
932+
{
933+
val += 5;
934+
dictinfo = resumeDict;
935+
realinfo = resumeReal;
936+
continue;
937+
}
938+
if (*currentCharReal == 'y' && *currentCharDict == 'l' && *nextCharDict == 'l')
939+
{
940+
val += 10;
941+
dictinfo = resumeDict1;
942+
realinfo = resumeReal;
943+
continue;
944+
}
945+
if (*currentCharReal == 'k' && *currentCharDict == 'q' && *nextCharDict == 'u')
946+
{
947+
val += 10;
948+
dictinfo = resumeDict1;
949+
realinfo = resumeReal;
950+
continue;
951+
}
952+
if (*currentCharReal == 'f' && *currentCharDict == 'p' && *nextCharDict == 'h')
953+
{
954+
val += 5;
955+
dictinfo = resumeDict1;
956+
realinfo = resumeReal;
957+
continue;
958+
}
959+
if (*currentCharReal == 's' && *currentCharDict == 'c')
960+
{
961+
val += 10;
962+
dictinfo = resumeDict;
963+
realinfo = resumeReal;
964+
continue;
965+
}
966+
if (*currentCharReal == 's' && *currentCharDict == 'ç')
967+
{
968+
val += 5;
969+
dictinfo = resumeDict;
970+
realinfo = resumeReal;
971+
continue;
972+
}
973+
if (*currentCharReal == 'c' && *currentCharDict == 'ç')
974+
{
975+
val += 5;
976+
dictinfo = resumeDict;
977+
realinfo = resumeReal;
978+
continue;
979+
}
980+
}
832981
// probable transposition since swapping syncs up
833982
if (!strcmp(currentCharReal, nextCharDict) && !strcmp(nextCharReal, currentCharDict))
834983
{
@@ -927,19 +1076,41 @@ static char* StemSpell(char* word,unsigned int i,uint64& base)
9271076
{
9281077
unsigned int i = 0;
9291078
char* suffix;
930-
while ((suffix = stems[i].word))
1079+
if (!stricmp(language, "english"))
9311080
{
932-
uint64 kind = stems[i++].flags;
933-
size_t suffixlen = strlen(suffix);
934-
if (!strnicmp(word+len-suffixlen,suffix,suffixlen))
1081+
while ((suffix = stems[i].word))
9351082
{
936-
word1[len-suffixlen] = 0;
937-
best = SpellFix(word1,0,kind);
938-
if (best)
1083+
uint64 kind = stems[i++].flags;
1084+
size_t suffixlen = strlen(suffix);
1085+
if (!strnicmp(word+len-suffixlen,suffix,suffixlen))
9391086
{
940-
ending = suffix;
941-
base = stems[i].flags;
942-
break;
1087+
word1[len-suffixlen] = 0;
1088+
best = SpellFix(word1,0,kind);
1089+
if (best)
1090+
{
1091+
ending = suffix;
1092+
base = stems[i].flags;
1093+
break;
1094+
}
1095+
}
1096+
}
1097+
}
1098+
else if (!stricmp(language, "french"))
1099+
{
1100+
while ((suffix = stems_french[i].word))
1101+
{
1102+
uint64 kind = stems_french[i++].flags;
1103+
size_t suffixlen = strlen(suffix);
1104+
if (!strnicmp(word+len-suffixlen,suffix,suffixlen))
1105+
{
1106+
word1[len-suffixlen] = 0;
1107+
best = SpellFix(word1,0,kind);
1108+
if (best)
1109+
{
1110+
ending = suffix;
1111+
base = stems_french[i].flags;
1112+
break;
1113+
}
9431114
}
9441115
}
9451116
}

SRC/textUtilities.cpp

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,42 @@ NUMBERDECODE numberValues[] = {
6060
{ (char*)"gross",144,5,0},
6161
{ (char*)"thousand",1000,8,REALNUMBER},
6262
{ (char*)"million",1000000,7,REALNUMBER},
63-
{ (char*)"billion",1000000,7,REALNUMBER},
63+
{ (char*)"billion",1000000000,7,REALNUMBER},
64+
};
65+
66+
NUMBERDECODE numberValuesFrench[] = {
67+
{ (char*)"zéro",0,4,REALNUMBER}, { (char*)"zero",0,4,REALNUMBER},
68+
{ (char*)"un",1,2,REALNUMBER}, { (char*)"une",1,3}, { (char*)"premier",1,7}, { (char*)"première",1,8}, { (char*)"mono",1,4,0}, { (char*)"uni",1,3,0}, { (char*)"one",1,3,0},
69+
{ (char*)"deux",2,4,REALNUMBER}, { (char*)"second",2,6}, { (char*)"seconde",2,7}, { (char*)"deuxième",2,8}, { (char*)"double",2,6,0}, { (char*)"bi",2,2,0}, { (char*)"paire",2,5,0}, { (char*)"moitié",2,5,FRACTION_NUMBER}, { (char*)"demi",2,4,FRACTION_NUMBER}, { (char*)"two",2,3,0},
70+
{ (char*)"trois",3,5,REALNUMBER}, { (char*)"troisième",3,9}, { (char*)"triple",3,6,0}, { (char*)"tiers",3,5,FRACTION_NUMBER}, { (char*)"three",3,5,0}, { (char*)"III",3,3,0},
71+
{ (char*)"quatre",4,6,REALNUMBER}, { (char*)"quatrième",4,9}, { (char*)"quart",4,5,FRACTION_NUMBER}, { (char*)"four",4,4,0}, { (char*)"IV",4,2,0},
72+
{ (char*)"cinq",5,4,REALNUMBER}, { (char*)"cinqième",5,8}, { (char*)"five",5,4,0}, { (char*)"V",5,1,0},
73+
{ (char*)"six",6,3,REALNUMBER}, { (char*)"sixième",6,7}, { (char*)"VI",6,2,0},
74+
{ (char*)"sept",7,4,REALNUMBER}, { (char*)"septième",7,8}, { (char*)"seven",7,5,0}, { (char*)"VII",7,3,0},
75+
{ (char*)"huit",8,4,REALNUMBER}, { (char*)"huitième",8,8}, { (char*)"eight",8,5,0}, { (char*)"VIII",8,4,0},
76+
{ (char*)"neuf",9,4,REALNUMBER}, { (char*)"neuvième",9,8}, { (char*)"nine",9,4,0}, { (char*)"IX",9,2,0},
77+
{ (char*)"dix",10,3,REALNUMBER}, { (char*)"dixième",10,7}, { (char*)"dizaine",10,7,0}, { (char*)"ten",10,3,0}, { (char*)"X",10,1,0},
78+
{ (char*)"onze",11,4,REALNUMBER}, { (char*)"onzième",11,7}, { (char*)"XI",11,2,0},
79+
{ (char*)"douze",12,5,REALNUMBER}, { (char*)"douzième",12,8}, { (char*)"douzaine",12,8,0}, { (char*)"XII",12,3,0},
80+
{ (char*)"treize",13,6,REALNUMBER}, { (char*)"treizième",13,9}, { (char*)"XIII",13,4,0},
81+
{ (char*)"quatorze",14,8,REALNUMBER}, { (char*)"quatorzième",14,11}, { (char*)"XIV",14,3,0},
82+
{ (char*)"quinze",15,6,REALNUMBER}, { (char*)"quinzième",15,9}, { (char*)"XV",15,2,0},
83+
{ (char*)"seize",16,5,REALNUMBER}, { (char*)"seizième",16,8}, { (char*)"XVI",16,3,0},
84+
{ (char*)"dix-sept",17,8,REALNUMBER}, { (char*)"dix-septième",17,12}, { (char*)"XVII",17,4,0},
85+
{ (char*)"dix-huit",18,8,REALNUMBER}, { (char*)"dix-huitième",18,12}, { (char*)"XVIII",18,5,0},
86+
{ (char*)"dix-neuf",19,8,REALNUMBER}, { (char*)"dix-neuvième",19,12}, { (char*)"XIX",19,3,0},
87+
{ (char*)"vingt",20,5,REALNUMBER}, { (char*)"vingtième",20,9}, { (char*)"XX",20,2,0},
88+
{ (char*)"trente",30,6,REALNUMBER}, { (char*)"trentième",30,9},
89+
{ (char*)"quarante",40,8,REALNUMBER}, { (char*)"quarantième",40,11},
90+
{ (char*)"cinquante",50,9,REALNUMBER}, { (char*)"cinquantième",50,12},
91+
{ (char*)"soixante",60,8,REALNUMBER}, { (char*)"soixantième",60,11},
92+
{ (char*)"soixante-dix",70,12,REALNUMBER}, { (char*)"septante",70,8,REALNUMBER}, { (char*)"soixante-dixième",70,17},
93+
{ (char*)"quatre-vingt",80,12,REALNUMBER}, { (char*)"octante",80,7,REALNUMBER}, { (char*)"quatre-vingtième",80,16},
94+
{ (char*)"quatre-vingt-dix",90,16,REALNUMBER}, { (char*)"nonante",90,7,REALNUMBER}, { (char*)"quatre-vingt-dixième",90,20},
95+
{ (char*)"cent",100,4,REALNUMBER}, { (char*)"cents",100,5,REALNUMBER}, { (char*)"centième",100,8}, { (char*)"centaine",100,8,0},
96+
{ (char*)"mille",1000,5,REALNUMBER}, { (char*)"millième",1000,8}, { (char*)"millier",1000,7,0},
97+
{ (char*)"million",1000000,7,REALNUMBER}, { (char*)"millions",1000000,8,REALNUMBER}, { (char*)"millionième",1000000,11},
98+
{ (char*)"milliard",1000000000,8,REALNUMBER}, { (char*)"milliards",1000000000,9,REALNUMBER}, { (char*)"milliardième",1000000000,12},
6499
};
65100

66101
char toHex[16] = {
@@ -2445,13 +2480,20 @@ int64 Convert2Integer(char* number) // non numbers return NOT_A_NUMBER
24452480
if (hyp) *hyp = '-';
24462481

24472482
// look up direct word numbers
2448-
if (!hasDigit) for (unsigned int i = 0; i < sizeof(numberValues)/sizeof(NUMBERDECODE); ++i)
2483+
if (!stricmp(language, "english") && !hasDigit) for (unsigned int i = 0; i < sizeof(numberValues)/sizeof(NUMBERDECODE); ++i)
24492484
{
24502485
if (len == numberValues[i].length && !strnicmp(word,numberValues[i].word,len))
24512486
{
24522487
return numberValues[i].value; // a match (but may be a fraction number)
24532488
}
24542489
}
2490+
else if (!stricmp(language, "french") && !hasDigit) for (unsigned int i = 0; i < sizeof(numberValuesFrench)/sizeof(NUMBERDECODE); ++i)
2491+
{
2492+
if (len == numberValuesFrench[i].length && !strnicmp(word,numberValuesFrench[i].word,len))
2493+
{
2494+
return numberValuesFrench[i].value; // a match (but may be a fraction number)
2495+
}
2496+
}
24552497

24562498
// try for hyphenated composite
24572499
char* hyphen = strchr(word,'-');

SRC/tokenSystem.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,7 @@ static char* FindWordEnd(char* ptr,char* priorToken,char** words,int &count,bool
724724
char* place = ptr;
725725
while (IsDigit(*place)) ++place;
726726
if (!stricmp(place,"st") || !stricmp(place,"nd") || !stricmp(place,"rd")) return end;
727-
else if (stricmp(language, "french") && (!stricmp(place, "er") || !stricmp(place, "ere") || !stricmp(place, "ère") || !stricmp(place, "nd") || !stricmp(place, "nde") || !stricmp(place, "eme") || !stricmp(place, "ème"))) return end;
727+
else if (!stricmp(language, "french") && (!stricmp(place, "er") || !stricmp(place, "ere") || !stricmp(place, "ère") || !stricmp(place, "nd") || !stricmp(place, "nde") || !stricmp(place, "eme") || !stricmp(place, "ème"))) return end;
728728
int len = end - ptr;
729729
char next2;
730730
if (*ptr == '/') return ptr+1; // split of things separated
@@ -762,9 +762,11 @@ static char* FindWordEnd(char* ptr,char* priorToken,char** words,int &count,bool
762762
}
763763
// ' as particle ellision
764764
if ((ptr - start) == 1 && (*start == 'd' || *start == 'c' || *start == 'j' || *start == 'l' || *start == 's' || *start == 't' || *start == 'm' || *start == 'n')) return ptr + 1; // break off d' argent and other foreign particles
765-
else if ((ptr - start) == 2 && *start == 'q' && *(start + 1) == 'u') return ptr + 1; // break off qu'
766-
else if ((ptr - start) == 6 && *start == 'l' && *(start + 1) == 'o' && *(start + 2) == 'r' && *(start + 3) == 's' && *(start + 4) == 'q' && *(start + 5) == 'u') return ptr + 1; // break off lorsqu'
767-
else if ((ptr - start) == 6 && *start == 'p' && *(start + 1) == 'u' && *(start + 2) == 'i' && *(start + 3) == 's' && *(start + 4) == 'q' && *(start + 5) == 'u') return ptr + 1; // break off puisqu'
765+
else if (!stricmp(language, "french") && (ptr - start) == 1 && (*start == 'D' || *start == 'C' || *start == 'J' || *start == 'L' || *start == 'S' || *start == 'T' || *start == 'M' || *start == 'N')) return ptr + 1; // break off french particles in upper case
766+
else if (!stricmp(language, "french") && (ptr - start) == 2 && (*start == 'q' || *start == 'Q') && *(start + 1) == 'u') return ptr + 1; // break off qu'
767+
else if (!stricmp(language, "french") && (ptr - start) == 5 && (*start == 'j' || *start == 'J') && *(start + 1) == 'u' && *(start + 2) == 's' && *(start + 3) == 'q' && *(start + 4) == 'u') return ptr + 1; // break off jusqu'
768+
else if (!stricmp(language, "french") && (ptr - start) == 6 && (*start == 'l' || *start == 'L') && *(start + 1) == 'o' && *(start + 2) == 'r' && *(start + 3) == 's' && *(start + 4) == 'q' && *(start + 5) == 'u') return ptr + 1; // break off lorsqu'
769+
else if (!stricmp(language, "french") && (ptr - start) == 6 && (*start == 'p' || *start == 'P') && *(start + 1) == 'u' && *(start + 2) == 'i' && *(start + 3) == 's' && *(start + 4) == 'q' && *(start + 5) == 'u') return ptr + 1; // break off puisqu'
768770

769771
// 12'6" or 12'. or 12'
770772
if (IsDigit(*start) && !IsAlphaUTF8(next)) return ptr + 1; // 12' swallow ' into number word

0 commit comments

Comments
 (0)