Ensure Soundex difference() function handles empty input sanely.
authorTom Lane <[email protected]>
Tue, 16 May 2023 14:53:42 +0000 (10:53 -0400)
committerTom Lane <[email protected]>
Tue, 16 May 2023 14:53:42 +0000 (10:53 -0400)
fuzzystrmatch's difference() function assumes that _soundex()
always initializes its output buffer fully.  This was not so for
the case of a string containing no alphabetic characters, resulting
in unstable output and Valgrind complaints.

Fix by using memset() to fill the whole buffer in the early-exit
case.  Also make some cosmetic improvements (I didn't care for the
random switches between "instr[0]" and "*instr" notation).

Report and diagnosis by Alexander Lakhin (bug #17935).
Back-patch to all supported branches.

Discussion: https://postgr.es/m/17935-b99316aa79c18513@postgresql.org

contrib/fuzzystrmatch/expected/fuzzystrmatch.out
contrib/fuzzystrmatch/fuzzystrmatch.c
contrib/fuzzystrmatch/sql/fuzzystrmatch.sql

index bcb837fd6b6af27d468424495d77d25de5aec348..3195e1ec3c8bb6bd9ab9f0864a7585a349a4fdd0 100644 (file)
@@ -23,6 +23,12 @@ SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
  A500    | M626    |          0
 (1 row)
 
+SELECT soundex(''), difference('', '');
+ soundex | difference 
+---------+------------
+         |          4
+(1 row)
+
 SELECT levenshtein('GUMBO', 'GAMBOL');
  levenshtein 
 -------------
index e1222714e4a50ee2fceb10181118aed736c9e44a..56864979832ce6b4b4c740208bf4a46f3709e6eb 100644 (file)
@@ -728,16 +728,14 @@ _soundex(const char *instr, char *outstr)
        Assert(instr);
        Assert(outstr);
 
-       outstr[SOUNDEX_LEN] = '\0';
-
        /* Skip leading non-alphabetic characters */
-       while (!isalpha((unsigned char) instr[0]) && instr[0])
+       while (*instr && !isalpha((unsigned char) *instr))
                ++instr;
 
-       /* No string left */
-       if (!instr[0])
+       /* If no string left, return all-zeroes buffer */
+       if (!*instr)
        {
-               outstr[0] = (char) 0;
+               memset(outstr, '\0', SOUNDEX_LEN + 1);
                return;
        }
 
@@ -750,7 +748,7 @@ _soundex(const char *instr, char *outstr)
                if (isalpha((unsigned char) *instr) &&
                        soundex_code(*instr) != soundex_code(*(instr - 1)))
                {
-                       *outstr = soundex_code(instr[0]);
+                       *outstr = soundex_code(*instr);
                        if (*outstr != '0')
                        {
                                ++outstr;
@@ -767,6 +765,9 @@ _soundex(const char *instr, char *outstr)
                ++outstr;
                ++count;
        }
+
+       /* And null-terminate */
+       *outstr = '\0';
 }
 
 PG_FUNCTION_INFO_V1(difference);
index db05c7d6b6de5eb7757d5fc0b7c2933d75f25a32..0b4bb9be57ec2b230ca37dc55e33aa35b8495726 100644 (file)
@@ -6,6 +6,7 @@ SELECT soundex('hello world!');
 SELECT soundex('Anne'), soundex('Ann'), difference('Anne', 'Ann');
 SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew');
 SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
+SELECT soundex(''), difference('', '');
 
 
 SELECT levenshtein('GUMBO', 'GAMBOL');