Fix LIKE's special-case code for % followed by _. I'm not entirely sure that

author Tom Lane <[email protected]>

Sun, 24 May 2009 18:10:38 +0000 (18:10 +0000)

committer Tom Lane <[email protected]>

Sun, 24 May 2009 18:10:38 +0000 (18:10 +0000)
author Tom Lane <[email protected]>
Sun, 24 May 2009 18:10:38 +0000 (18:10 +0000)
committer Tom Lane <[email protected]>
Sun, 24 May 2009 18:10:38 +0000 (18:10 +0000)
diff --git a/src/backend/utils/adt/like_match.c b/src/backend/utils/adt/like_match.c

index 95550e0530e1dd62c7cc6a518ea4c6e0ab45a0f6..d72113b66b303a62fff6a031f0b74fdcad97f74b 100644 (file)
--- a/src/backend/utils/adt/like_match.c
+++ b/src/backend/utils/adt/like_match.c
@@ -9,7 +9,7 @@
   * (UTF8 is a special case because we can use a much more efficient version
   * of NextChar than can be used for general multi-byte encodings.)
   *
- * Before the inclusion, we need to define following macros:
+ * Before the inclusion, we need to define the following macros:
   *
   * NextChar
   * MatchText - to name of function wanted
@@ -19,47 +19,46 @@
   * Copyright (c) 1996-2009, PostgreSQL Global Development Group
   *
   * IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.24 2009/01/01 17:23:49 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.25 2009/05/24 18:10:37 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  
  /*
-** Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
-** Rich $alz is now <[email protected]>.
-** Special thanks to Lars Mathiesen <[email protected]> for the LABORT code.
-**
-** This code was shamelessly stolen from the "pql" code by myself and
-** slightly modified :)
-**
-** All references to the word "star" were replaced by "percent"
-** All references to the word "wild" were replaced by "like"
-**
-** All the nice shell RE matching stuff was replaced by just "_" and "%"
-**
-** As I don't have a copy of the SQL standard handy I wasn't sure whether
-** to leave in the '\' escape character handling.
-**
-** Keith Parks. <[email protected]>
-**
-** SQL92 lets you specify the escape character by saying
-** LIKE <pattern> ESCAPE <escape character>. We are a small operation
-** so we force you to use '\'. - ay 7/95
-**
-** Now we have the like_escape() function that converts patterns with
-** any specified escape character (or none at all) to the internal
-** default escape character, which is still '\'. - tgl 9/2000
-**
-** The code is rewritten to avoid requiring null-terminated strings,
-** which in turn allows us to leave out some memcpy() operations.
-** This code should be faster and take less memory, but no promises...
-** - thomas 2000-08-06
-**
-*/
+ * Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
+ * Rich $alz is now <[email protected]>.
+ * Special thanks to Lars Mathiesen <[email protected]> for the LABORT code.
+ *
+ * This code was shamelessly stolen from the "pql" code by myself and
+ * slightly modified :)
+ *
+ * All references to the word "star" were replaced by "percent"
+ * All references to the word "wild" were replaced by "like"
+ *
+ * All the nice shell RE matching stuff was replaced by just "_" and "%"
+ *
+ * As I don't have a copy of the SQL standard handy I wasn't sure whether
+ * to leave in the '\' escape character handling.
+ *
+ * Keith Parks. <[email protected]>
+ *
+ * SQL92 lets you specify the escape character by saying
+ * LIKE <pattern> ESCAPE <escape character>. We are a small operation
+ * so we force you to use '\'. - ay 7/95
+ *
+ * Now we have the like_escape() function that converts patterns with
+ * any specified escape character (or none at all) to the internal
+ * default escape character, which is still '\'. - tgl 9/2000
+ *
+ * The code is rewritten to avoid requiring null-terminated strings,
+ * which in turn allows us to leave out some memcpy() operations.
+ * This code should be faster and take less memory, but no promises...
+ * - thomas 2000-08-06
+ */
  
  
  /*--------------------
- * Match text and p, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
+ * Match text and pattern, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
   *
   * LIKE_TRUE: they match
   * LIKE_FALSE: they don't match
@@ -80,19 +79,18 @@ static int
  MatchText(char *t, int tlen, char *p, int plen)
  {
     /* Fast path for match-everything pattern */
-   if ((plen == 1) && (*p == '%'))
+   if (plen == 1 && *p == '%')
         return LIKE_TRUE;
  
     /*
      * In this loop, we advance by char when matching wildcards (and thus on
      * recursive entry to this function we are properly char-synced). On other
      * occasions it is safe to advance by byte, as the text and pattern will
-    * be in lockstep. This allows us to perform all comparisons  between the
+    * be in lockstep. This allows us to perform all comparisons between the
      * text and pattern on a byte by byte basis, even for multi-byte
      * encodings.
      */
-
-   while ((tlen > 0) && (plen > 0))
+   while (tlen > 0 && plen > 0)
     {
         if (*p == '\\')
         {
@@ -116,7 +114,7 @@ MatchText(char *t, int tlen, char *p, int plen)
  
             /* %% is the same as % according to the SQL standard */
             /* Advance past all %'s */
-           while ((plen > 0) && (*p == '%'))
+           while (plen > 0 && *p == '%')
                 NextByte(p, plen);
             /* Trailing percent matches everything. */
             if (plen <= 0)
@@ -127,22 +125,24 @@ MatchText(char *t, int tlen, char *p, int plen)
              * rest of the pattern.
              */
             if (*p == '_')
-
             {
                 /* %_ is the same as _% - avoid matching _ repeatedly */
  
-               NextChar(t, tlen);
-               NextByte(p, plen);
-
-               if (tlen <= 0)
-               {
-                   return (plen <= 0) ? LIKE_TRUE : LIKE_ABORT;
-               }
-               else if (plen <= 0)
+               do
                 {
-                   return LIKE_FALSE;
-               }
-
+                   NextChar(t, tlen);
+                   NextByte(p, plen);
+               } while (tlen > 0 && plen > 0 && *p == '_');
+
+               /*
+                * If we are at the end of the pattern, succeed: % followed
+                * by n _'s matches any string of at least n characters, and
+                * we have now found there are at least n characters.
+                */
+               if (plen <= 0)
+                   return LIKE_TRUE;
+
+               /* Look for a place that matches the rest of the pattern */
                 while (tlen > 0)
                 {
                     int         matched = MatchText(t, tlen, p, plen);
@@ -155,7 +155,6 @@ MatchText(char *t, int tlen, char *p, int plen)
             }
             else
             {
-
                 char        firstpat = TCHAR(*p);
  
                 if (*p == '\\')
@@ -180,7 +179,6 @@ MatchText(char *t, int tlen, char *p, int plen)
                     }
  
                     NextChar(t, tlen);
-
                 }
             }
  
@@ -192,20 +190,20 @@ MatchText(char *t, int tlen, char *p, int plen)
         }
         else if (*p == '_')
         {
+           /* _ matches any single character, and we know there is one */
             NextChar(t, tlen);
             NextByte(p, plen);
             continue;
         }
-       else if (TCHAR(*t) != TCHAR(*p))
+       else if (TCHAR(*p) != TCHAR(*t))
         {
-           /*
-            * Not the single-character wildcard and no explicit match? Then
-            * time to quit...
-            */
+           /* non-wildcard pattern char fails to match text char */
             return LIKE_FALSE;
         }
  
         /*
+        * Pattern and text match, so advance.
+        *
          * It is safe to use NextByte instead of NextChar here, even for
          * multi-byte character sets, because we are not following immediately
          * after a wildcard character. If we are in the middle of a multibyte
@@ -222,9 +220,8 @@ MatchText(char *t, int tlen, char *p, int plen)
     if (tlen > 0)
         return LIKE_FALSE;      /* end of pattern, but not of text */
  
-   /* End of input string.  Do we have matching pattern remaining? */
-   while ((plen > 0) && (*p == '%'))   /* allow multiple %'s at end of
-                                        * pattern */
+   /* End of text string.  Do we have matching pattern remaining? */
+   while (plen > 0 && *p == '%')   /* allow multiple %'s at end of pattern */
         NextByte(p, plen);
  
     if (plen <= 0)
@@ -354,5 +351,4 @@ do_like_escape(text *pat, text *esc)
  
  #ifdef MATCH_LOWER
  #undef MATCH_LOWER
-
  #endif
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out

index be8eb919fa333a9634a6c2470fb24fb8d49ed2d3..1241a2ace63a04ecfbb8408e9b4175f183b6cad1 100644 (file)
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -902,6 +902,33 @@ SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
   f
  (1 row)
  
+--
+-- test %/_ combination cases, cf bug #4821
+--
+SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
+ t | t | f 
+---+---+---
+ t | t | f
+(1 row)
+
+SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
+ t | t | f 
+---+---+---
+ t | t | f
+(1 row)
+
+SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
+ t | t | f 
+---+---+---
+ t | t | f
+(1 row)
+
+SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
+ t | t | f 
+---+---+---
+ t | t | f
+(1 row)
+
  --
  -- test implicit type conversion
  --
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql

index a28c75ac0449ec9c70738bea08c91790c4c5e610..681a0e1e62c65c5bbe4f2ab0f665d859d64a8a1d 100644 (file)
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -279,6 +279,17 @@ SELECT 'hawkeye' NOT ILIKE 'H%Eye' AS "false";
  SELECT 'Hawkeye' ILIKE 'h%' AS "true";
  SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
  
+--
+-- test %/_ combination cases, cf bug #4821
+--
+
+SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
+SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
+
+SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
+SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
+
+
  --
  -- test implicit type conversion
  --
author	Tom Lane <[email protected]>
	Sun, 24 May 2009 18:10:38 +0000 (18:10 +0000)
committer	Tom Lane <[email protected]>
	Sun, 24 May 2009 18:10:38 +0000 (18:10 +0000)
src/backend/utils/adt/like_match.c		patch \| blob \| blame \| history
src/test/regress/expected/strings.out		patch \| blob \| blame \| history
src/test/regress/sql/strings.sql		patch \| blob \| blame \| history