Add comments and a missing CHECK_FOR_INTERRUPTS in ts_headline.

author Tom Lane <[email protected]>

Mon, 21 Nov 2022 22:07:07 +0000 (17:07 -0500)

committer Tom Lane <[email protected]>

Mon, 21 Nov 2022 22:07:29 +0000 (17:07 -0500)
author Tom Lane <[email protected]>
Mon, 21 Nov 2022 22:07:07 +0000 (17:07 -0500)
committer Tom Lane <[email protected]>
Mon, 21 Nov 2022 22:07:29 +0000 (17:07 -0500)
diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c

index 92de1f7141a5330bf86dbd22333191e77705357e..eff1669b1218467a8b195ea748c3720b35012c3c 100644 (file)
--- a/src/backend/tsearch/ts_parse.c
+++ b/src/backend/tsearch/ts_parse.c
@@ -433,6 +433,8 @@ parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
  /*
   * Headline framework
   */
+
+/* Add a word to prs->words[] */
  static void
  hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
  {
@@ -449,6 +451,14 @@ hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
         prs->curwords++;
  }
  
+/*
+ * Add pos and matching-query-item data to the just-added word.
+ * Here, buf/buflen represent a processed lexeme, not raw token text.
+ *
+ * If the query contains more than one matching item, we replicate
+ * the last-added word so that each item can be pointed to.  The
+ * duplicate entries are marked with repeated = 1.
+ */
  static void
  hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
  {
@@ -589,6 +599,9 @@ hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int bu
         FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
  }
  
+/*
+ * Generate the headline, as a text object, from HeadlineParsedText.
+ */
  text *
  generateHeadline(HeadlineParsedText *prs)
  {
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c

index 826027844e7a4601cf9eb598dcbeb2d85649cbf8..2323a3b90860808efccbb052b8a0ab4cd9f7cfdb 100644 (file)
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -1914,10 +1914,6 @@ prsd_end(PG_FUNCTION_ARGS)
   */
  
  /* token type classification macros */
-#define LEAVETOKEN(x)  ( (x)==SPACE )
-#define COMPLEXTOKEN(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
-#define ENDPUNCTOKEN(x) ( (x)==SPACE )
-
  #define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
  #define HLIDREPLACE(x) ( (x)==TAG_T )
  #define HLIDSKIP(x)            ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c

index 1786c18f895dadb887c1778941de6e2c8e26ec50..f7c1e3d6d65df96515bf5766e7ab894381e5a206 100644 (file)
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -1617,6 +1617,9 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
         /* since this function recurses, it could be driven to stack overflow */
         check_stack_depth();
  
+       /* ... and let's check for query cancel while we're at it */
+       CHECK_FOR_INTERRUPTS();
+
         if (curitem->type == QI_VAL)
                 return chkcond(arg, (QueryOperand *) curitem, data);
  
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h

index bde1e8abc13c9ec18d28d0529670e40eda6f0419..fe2a16783de7dea1846965c0d241666bdaae7bbe 100644 (file)
--- a/src/include/tsearch/ts_public.h
+++ b/src/include/tsearch/ts_public.h
@@ -30,33 +30,60 @@ typedef struct
  } LexDescr;
  
  /*
- * Interface to headline generator
+ * Interface to headline generator (tsparser's prsheadline function)
+ *
+ * HeadlineParsedText describes the text that is to be highlighted.
+ * Some fields are passed from the core code to the prsheadline function,
+ * while others are output from the prsheadline function.
+ *
+ * The principal data is words[], an array of HeadlineWordEntry,
+ * one entry per token, of length curwords.
+ * The fields of HeadlineWordEntry are:
+ *
+ * in, selected, replace, skip: these flags are initially zero
+ * and may be set by the prsheadline function.  A consecutive group
+ * of tokens marked "in" form a "fragment" to be output.
+ * Such tokens may additionally be marked selected, replace, or skip
+ * to modify how they are shown.  (If you set more than one of those
+ * bits, you get an unspecified one of those behaviors.)
+ *
+ * type, len, pos, word: filled by core code to describe the token.
+ *
+ * item: if the token matches any operand of the tsquery of interest,
+ * a pointer to such an operand.  (If there are multiple matching
+ * operands, we generate extra copies of the HeadlineWordEntry to hold
+ * all the pointers.  The extras are marked with repeated = 1 and should
+ * be ignored except for checking the item pointer.)
   */
  typedef struct
  {
-       uint32          selected:1,
-                               in:1,
-                               replace:1,
-                               repeated:1,
-                               skip:1,
-                               unused:3,
-                               type:8,
-                               len:16;
-       WordEntryPos pos;
-       char       *word;
-       QueryOperand *item;
+       uint32          selected:1,             /* token is to be highlighted */
+                               in:1,                   /* token is part of headline */
+                               replace:1,              /* token is to be replaced with a space */
+                               repeated:1,             /* duplicate entry to hold item pointer */
+                               skip:1,                 /* token is to be skipped (not output) */
+                               unused:3,               /* available bits */
+                               type:8,                 /* parser's token category */
+                               len:16;                 /* length of token */
+       WordEntryPos pos;                       /* position of token */
+       char       *word;                       /* text of token (not null-terminated) */
+       QueryOperand *item;                     /* a matching query operand, or NULL if none */
  } HeadlineWordEntry;
  
  typedef struct
  {
+       /* Fields filled by core code before calling prsheadline function: */
         HeadlineWordEntry *words;
-       int32           lenwords;
-       int32           curwords;
-       int32           vectorpos;              /* positions a-la tsvector */
-       char       *startsel;
+       int32           lenwords;               /* allocated length of words[] */
+       int32           curwords;               /* current number of valid entries */
+       int32           vectorpos;              /* used by ts_parse.c in filling pos fields */
+
+       /* The prsheadline function must fill these fields: */
+       /* Strings for marking selected tokens and separating fragments: */
+       char       *startsel;           /* palloc'd strings */
         char       *stopsel;
         char       *fragdelim;
-       int16           startsellen;
+       int16           startsellen;    /* lengths of strings */
         int16           stopsellen;
         int16           fragdelimlen;
  } HeadlineParsedText;
author	Tom Lane <[email protected]>
	Mon, 21 Nov 2022 22:07:07 +0000 (17:07 -0500)
committer	Tom Lane <[email protected]>
	Mon, 21 Nov 2022 22:07:29 +0000 (17:07 -0500)
src/backend/tsearch/ts_parse.c		patch \| blob \| blame \| history
src/backend/tsearch/wparser_def.c		patch \| blob \| blame \| history
src/backend/utils/adt/tsvector_op.c		patch \| blob \| blame \| history
src/include/tsearch/ts_public.h		patch \| blob \| blame \| history