Back-patch code to deduce implied equalities from transitivity of

author Tom Lane <[email protected]>

Sat, 23 Sep 2000 23:50:47 +0000 (23:50 +0000)

committer Tom Lane <[email protected]>

Sat, 23 Sep 2000 23:50:47 +0000 (23:50 +0000)
author Tom Lane <[email protected]>
Sat, 23 Sep 2000 23:50:47 +0000 (23:50 +0000)
committer Tom Lane <[email protected]>
Sat, 23 Sep 2000 23:50:47 +0000 (23:50 +0000)
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c

index 580675a85b70b05c24e3496b81983c952a1da258..6c507cfd4525799463e8879748d9e410d7f3a8ee 100644 (file)
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.21 2000/04/12 17:15:20 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.21.2.1 2000/09/23 23:50:47 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -19,6 +19,7 @@
  #include "optimizer/joininfo.h"
  #include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
+#include "optimizer/planmain.h"
  #include "optimizer/tlist.h"
  #include "optimizer/var.h"
  #include "parser/parsetree.h"
@@ -227,35 +228,107 @@ add_equijoined_keys(Query *root, RestrictInfo *restrictinfo)
      * into our new set. When done, we add the new set to the front of
      * equi_key_list.
      *
+    * It may well be that the two items we're given are already known to
+    * be equijoin-equivalent, in which case we don't need to change our
+    * data structure.  If we find both of them in the same equivalence
+    * set to start with, we can quit immediately.
+    *
      * This is a standard UNION-FIND problem, for which there exist better
      * data structures than simple lists.  If this code ever proves to be
      * a bottleneck then it could be sped up --- but for now, simple is
      * beautiful.
      */
-   newset = lcons(item1, lcons(item2, NIL));
+   newset = NIL;
  
     foreach(cursetlink, root->equi_key_list)
     {
         List       *curset = lfirst(cursetlink);
+       bool        item1here = member(item1, curset);
+       bool        item2here = member(item2, curset);
  
-       if (member(item1, curset) || member(item2, curset))
+       if (item1here || item2here)
         {
+           /* If find both in same equivalence set, no need to do any more */
+           if (item1here && item2here)
+           {
+               /* Better not have seen only one in an earlier set... */
+               Assert(newset == NIL);
+               return;
+           }
+
+           /* Build the new set only when we know we must */
+           if (newset == NIL)
+               newset = lcons(item1, lcons(item2, NIL));
+
             /* Found a set to merge into our new set */
             newset = LispUnion(newset, curset);
  
             /*
              * Remove old set from equi_key_list.  NOTE this does not
-            * change lnext(cursetlink), so the outer foreach doesn't
-            * break.
+            * change lnext(cursetlink), so the foreach loop doesn't break.
              */
             root->equi_key_list = lremove(curset, root->equi_key_list);
             freeList(curset);   /* might as well recycle old cons cells */
         }
     }
  
+   /* Build the new set only when we know we must */
+   if (newset == NIL)
+       newset = lcons(item1, lcons(item2, NIL));
+
     root->equi_key_list = lcons(newset, root->equi_key_list);
  }
  
+/*
+ * generate_implied_equalities
+ *   Scan the completed equi_key_list for the query, and generate explicit
+ *   qualifications (WHERE clauses) for all the pairwise equalities not
+ *   already mentioned in the quals.  This is useful because the additional
+ *   clauses help the selectivity-estimation code, and in fact it's
+ *   *necessary* to ensure that sort keys we think are equivalent really
+ *   are (see src/backend/optimizer/README for more info).
+ *
+ * This routine just walks the equi_key_list to find all pairwise equalities.
+ * We call process_implied_equality (in plan/initsplan.c) to determine whether
+ * each is already known and add it to the proper restrictinfo list if not.
+ */
+void
+generate_implied_equalities(Query *root)
+{
+   List       *cursetlink;
+
+   foreach(cursetlink, root->equi_key_list)
+   {
+       List       *curset = lfirst(cursetlink);
+       List       *ptr1;
+
+       /*
+        * A set containing only two items cannot imply any equalities
+        * beyond the one that created the set, so we can skip it.
+        */
+       if (length(curset) < 3)
+           continue;
+
+       /*
+        * Match each item in the set with all that appear after it
+        * (it's sufficient to generate A=B, need not process B=A too).
+        */
+       foreach(ptr1, curset)
+       {
+           PathKeyItem *item1 = (PathKeyItem *) lfirst(ptr1);
+           List       *ptr2;
+
+           foreach(ptr2, lnext(ptr1))
+           {
+               PathKeyItem *item2 = (PathKeyItem *) lfirst(ptr2);
+
+               process_implied_equality(root, item1->key, item2->key,
+                                        item1->sortop, item2->sortop);
+           }
+       }
+   }
+}
+
  /*
   * make_canonical_pathkey
   *   Given a PathKeyItem, find the equi_key_list subset it is a member of,
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c

index 207981b527f5265e10986a22cf57674912be5985..6025b61be69d96fbd4f00e29faf22e1a8570b67d 100644 (file)
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,13 +8,14 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.46 2000/04/12 17:15:21 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.46.2.1 2000/09/23 23:50:47 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #include <sys/types.h>
  
  #include "postgres.h"
+#include "catalog/pg_operator.h"
  #include "catalog/pg_type.h"
  #include "nodes/makefuncs.h"
  #include "optimizer/clauses.h"
@@ -25,6 +26,9 @@
  #include "optimizer/planmain.h"
  #include "optimizer/tlist.h"
  #include "optimizer/var.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_oper.h"
+#include "parser/parse_type.h"
  #include "utils/lsyscache.h"
  
  
@@ -280,6 +284,113 @@ add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo,
     }
  }
  
+/*
+ * process_implied_equality
+ *   Check to see whether we already have a restrictinfo item that says
+ *   item1 = item2, and create one if not.  This is a consequence of
+ *   transitivity of mergejoin equality: if we have mergejoinable
+ *   clauses A = B and B = C, we can deduce A = C (where = is an
+ *   appropriate mergejoinable operator).
+ */
+void
+process_implied_equality(Query *root, Node *item1, Node *item2,
+                        Oid sortop1, Oid sortop2)
+{
+   Index       irel1;
+   Index       irel2;
+   RelOptInfo *rel1;
+   List       *restrictlist;
+   List       *itm;
+   Oid         ltype,
+               rtype;
+   Operator    eq_operator;
+   Form_pg_operator pgopform;
+   Expr       *clause;
+
+   /*
+    * Currently, since check_mergejoinable only accepts Var = Var clauses,
+    * we should only see Var nodes here.  Would have to work a little
+    * harder to locate the right rel(s) if more-general mergejoin clauses
+    * were accepted.
+    */
+   Assert(IsA(item1, Var));
+   irel1 = ((Var *) item1)->varno;
+   Assert(IsA(item2, Var));
+   irel2 = ((Var *) item2)->varno;
+   /*
+    * If both vars belong to same rel, we need to look at that rel's
+    * baserestrictinfo list.  If different rels, each will have a
+    * joininfo node for the other, and we can scan either list.
+    */
+   rel1 = get_base_rel(root, irel1);
+   if (irel1 == irel2)
+       restrictlist = rel1->baserestrictinfo;
+   else
+   {
+       JoinInfo   *joininfo = find_joininfo_node(rel1,
+                                                 lconsi(irel2, NIL));
+
+       restrictlist = joininfo->jinfo_restrictinfo;
+   }
+   /*
+    * Scan to see if equality is already known.
+    */
+   foreach(itm, restrictlist)
+   {
+       RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(itm);
+       Node       *left,
+                  *right;
+
+       if (restrictinfo->mergejoinoperator == InvalidOid)
+           continue;           /* ignore non-mergejoinable clauses */
+       /* We now know the restrictinfo clause is a binary opclause */
+       left = (Node *) get_leftop(restrictinfo->clause);
+       right = (Node *) get_rightop(restrictinfo->clause);
+       if ((equal(item1, left) && equal(item2, right)) ||
+           (equal(item2, left) && equal(item1, right)))
+           return;             /* found a matching clause */
+   }
+   /*
+    * This equality is new information, so construct a clause
+    * representing it to add to the query data structures.
+    */
+   ltype = exprType(item1);
+   rtype = exprType(item2);
+   eq_operator = oper("=", ltype, rtype, true);
+   if (!HeapTupleIsValid(eq_operator))
+   {
+       /*
+        * Would it be safe to just not add the equality to the query if
+        * we have no suitable equality operator for the combination of
+        * datatypes?  NO, because sortkey selection may screw up anyway.
+        */
+       elog(ERROR, "Unable to identify an equality operator for types '%s' and '%s'",
+            typeidTypeName(ltype), typeidTypeName(rtype));
+   }
+   pgopform = (Form_pg_operator) GETSTRUCT(eq_operator);
+   /*
+    * Let's just make sure this appears to be a compatible operator.
+    */
+   if (pgopform->oprlsortop != sortop1 ||
+       pgopform->oprrsortop != sortop2 ||
+       pgopform->oprresult != BOOLOID)
+       elog(ERROR, "Equality operator for types '%s' and '%s' should be mergejoinable, but isn't",
+            typeidTypeName(ltype), typeidTypeName(rtype));
+
+   clause = makeNode(Expr);
+   clause->typeOid = BOOLOID;
+   clause->opType = OP_EXPR;
+   clause->oper = (Node *) makeOper(oprid(eq_operator), /* opno */
+                                    InvalidOid, /* opid */
+                                    BOOLOID, /* operator result type */
+                                    0,
+                                    NULL);
+   clause->args = lcons(item1, lcons(item2, NIL));
+
+   add_restrict_and_join_to_rel(root, (Node *) clause);
+}
+
+
  /*****************************************************************************
   *
   *  CHECKS FOR MERGEJOINABLE AND HASHJOINABLE CLAUSES
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c

index 0e05c945380cdc6ded1f537355dde034869562dc..5e0619a3d8d0d73257270c767aa1d847f6d0db82 100644 (file)
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -14,7 +14,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.55 2000/04/12 17:15:22 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.55.2.1 2000/09/23 23:50:47 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -184,7 +184,7 @@ subplanner(Query *root,
      * base_rel_list as relation references are found (e.g., in the
      * qualification, the targetlist, etc.).  Restrict and join clauses
      * are added to appropriate lists belonging to the mentioned
-    * relations, and we also build lists of equijoined keys for pathkey
+    * relations.  We also build lists of equijoined keys for pathkey
      * construction.
      */
     root->base_rel_list = NIL;
@@ -193,8 +193,18 @@ subplanner(Query *root,
  
     make_var_only_tlist(root, flat_tlist);
     add_restrict_and_join_to_rels(root, qual);
+
+   /*
+    * Make sure we have RelOptInfo nodes for all relations used.
+    */
     add_missing_rels_to_query(root);
  
+   /*
+    * Use the completed lists of equijoined keys to deduce any implied
+    * but unstated equalities (for example, A=B and B=C imply A=C).
+    */
+   generate_implied_equalities(root);
+
     /*
      * We should now have all the pathkey equivalence sets built, so it's
      * now possible to convert the requested query_pathkeys to canonical
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h

index 0a2f56db63afd1fe1a7fd22fcaa758f7f786abe1..0c45fc897f1a111eb7eaba0b990c8102c5f4fa06 100644 (file)
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: paths.h,v 1.44 2000/04/12 17:16:42 momjian Exp $
+ * $Id: paths.h,v 1.44.2.1 2000/09/23 23:50:46 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -90,6 +90,7 @@ typedef enum
  } PathKeysComparison;
  
  extern void add_equijoined_keys(Query *root, RestrictInfo *restrictinfo);
+extern void generate_implied_equalities(Query *root);
  extern List *canonicalize_pathkeys(Query *root, List *pathkeys);
  extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
  extern bool pathkeys_contained_in(List *keys1, List *keys2);
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h

index e0a0d84bc8ac24bfac4cd32f81592fe828b19b6c..12e9d119578e331111336e2870dae166e05e359d 100644 (file)
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: planmain.h,v 1.39 2000/04/12 17:16:42 momjian Exp $
+ * $Id: planmain.h,v 1.39.2.1 2000/09/23 23:50:46 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -43,6 +43,8 @@ extern Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan);
  extern void make_var_only_tlist(Query *root, List *tlist);
  extern void add_restrict_and_join_to_rels(Query *root, List *clauses);
  extern void add_missing_rels_to_query(Query *root);
+extern void process_implied_equality(Query *root, Node *item1, Node *item2,
+                                    Oid sortop1, Oid sortop2);
  
  /*
   * prototypes for plan/setrefs.c
author	Tom Lane <[email protected]>
	Sat, 23 Sep 2000 23:50:47 +0000 (23:50 +0000)
committer	Tom Lane <[email protected]>
	Sat, 23 Sep 2000 23:50:47 +0000 (23:50 +0000)
src/backend/optimizer/path/pathkeys.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/initsplan.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/planmain.c		patch \| blob \| blame \| history
src/include/optimizer/paths.h		patch \| blob \| blame \| history
src/include/optimizer/planmain.h		patch \| blob \| blame \| history