fuzzystrmatch's difference() function assumes that _soundex()
always initializes its output buffer fully. This was not so for
the case of a string containing no alphabetic characters, resulting
in unstable output and Valgrind complaints.
Fix by using memset() to fill the whole buffer in the early-exit
case. Also make some cosmetic improvements (I didn't care for the
random switches between "instr[0]" and "*instr" notation).
Report and diagnosis by Alexander Lakhin (bug #17935).
Back-patch to all supported branches.
Discussion: https://postgr.es/m/17935-
b99316aa79c18513@postgresql.org
A500 | M626 | 0
(1 row)
+SELECT soundex(''), difference('', '');
+ soundex | difference
+---------+------------
+ | 4
+(1 row)
+
SELECT levenshtein('GUMBO', 'GAMBOL');
levenshtein
-------------
Assert(instr);
Assert(outstr);
- outstr[SOUNDEX_LEN] = '\0';
-
/* Skip leading non-alphabetic characters */
- while (!isalpha((unsigned char) instr[0]) && instr[0])
+ while (*instr && !isalpha((unsigned char) *instr))
++instr;
- /* No string left */
- if (!instr[0])
+ /* If no string left, return all-zeroes buffer */
+ if (!*instr)
{
- outstr[0] = (char) 0;
+ memset(outstr, '\0', SOUNDEX_LEN + 1);
return;
}
if (isalpha((unsigned char) *instr) &&
soundex_code(*instr) != soundex_code(*(instr - 1)))
{
- *outstr = soundex_code(instr[0]);
+ *outstr = soundex_code(*instr);
if (*outstr != '0')
{
++outstr;
++outstr;
++count;
}
+
+ /* And null-terminate */
+ *outstr = '\0';
}
PG_FUNCTION_INFO_V1(difference);
SELECT soundex('Anne'), soundex('Ann'), difference('Anne', 'Ann');
SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew');
SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
+SELECT soundex(''), difference('', '');
SELECT levenshtein('GUMBO', 'GAMBOL');