Bug#34666531: Mysqld failure - String::ptr()

roylyseng · roylyseng · commit 35a5b2538ce5 · 2022-12-20T10:29:00.000+01:00
The problem is that we lose a non-deterministic property of a UDF
function when that function is part of a derived table that is merged
into the outer query block (or is part of a subquery that is converted
to a semi-join). It happens because the function fix_after_pullout()
forgets about this property. The function update_used_tables() handles
the property by adding some special logic that checks the
non-deterministic property before updating used tables information.

The fix is to add a member m_non_deterministic that is set to true
during resolving and used in fix_after_pullout() and
update_used_tables() to set correct values for used_tables().
The member is used by get_initial_pseudo_tables() and means that we can
also remove the special update_used_tables() implementation.

Change-Id: I26ea348e450acb92062df5b676e2a24110af5dd8
diff --git a/mysql-test/r/udf.result b/mysql-test/r/udf.result
@@ -725,4 +725,21 @@ f1	median
 2	100
 DROP TABLE t1;
 DROP FUNCTION my_median;
+CREATE FUNCTION sequence RETURNS INTEGER SONAME "UDF_EXAMPLE_LIB";
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1),(2),(3),(4);
+SELECT a FROM t1 WHERE a = sequence();
+a
+1
+2
+3
+4
+SELECT a FROM (SELECT sequence() AS seq, a FROM t1) AS dt WHERE a = seq;
+a
+1
+2
+3
+4
+DROP FUNCTION sequence;
+DROP TABLE t1;
 # End of the 8.0 tests
diff --git a/mysql-test/t/udf.test b/mysql-test/t/udf.test
@@ -867,4 +867,19 @@ let $query =
 DROP TABLE t1;
 DROP FUNCTION my_median;
 
+#
+# Bug#34666531: Mysqld failure - String::ptr()
+#
+
+--replace_result $UDF_EXAMPLE_LIB UDF_EXAMPLE_LIB
+eval CREATE FUNCTION sequence RETURNS INTEGER SONAME "$UDF_EXAMPLE_LIB";
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1),(2),(3),(4);
+
+SELECT a FROM t1 WHERE a = sequence();
+SELECT a FROM (SELECT sequence() AS seq, a FROM t1) AS dt WHERE a = seq;
+
+DROP FUNCTION sequence;
+DROP TABLE t1;
+
 --echo # End of the 8.0 tests
diff --git a/sql/item_func.cc b/sql/item_func.cc
@@ -4461,6 +4461,7 @@ bool Item_udf_func::fix_fields(THD *thd, Item **) {
   if (udf.fix_fields(thd, this, arg_count, args)) return true;
   if (thd->is_error()) return true;
   used_tables_cache = udf.used_tables_cache;
+  m_non_deterministic = is_non_deterministic();
   fixed = true;
   return false;
 }
@@ -4570,6 +4571,21 @@ bool udf_handler::fix_fields(THD *thd, Item_result_field *func, uint arg_count,
 
   if (func->resolve_type(thd)) return true;
 
+  /*
+    Calculation of constness and non-deterministic property of a UDF is done
+    according to this algorithm:
+    - If any argument to the UDF is non-const, the used tables information
+      and constness of the UDF is derived from the aggregated properties of
+      the arguments.
+    - If all arguments to the UDF are const and the init function specifies
+      the UDF to be non-const, the UDF is marked as non-deterministic.
+    Thus, initid.const_item is only considered when all arguments are const,
+    and it's use is thus slightly inconsistent. However, the current behavior
+    seems to work well in most circumstances.
+
+    @todo Clarify the semantics of initid.const_item and make it affect
+          the constness and non-deterministic property more consistently.
+  */
   initid.max_length = func->max_length;
   initid.maybe_null = func->m_nullable;
   initid.const_item = used_tables_cache == 0;
diff --git a/sql/item_func.h b/sql/item_func.h
@@ -2108,54 +2108,10 @@ class Item_udf_func : public Item_func {
   bool itemize(Parse_context *pc, Item **res) override;
   const char *func_name() const override { return udf.name(); }
   enum Functype functype() const override { return UDF_FUNC; }
-  bool fix_fields(THD *thd, Item **ref) override;
-  void update_used_tables() override {
-    /*
-      TODO: Make a member in UDF_INIT and return if a UDF is deterministic or
-      not.
-      Currently UDF_INIT has a member (const_item) that is an in/out
-      parameter to the init() call.
-      The code in udf_handler::fix_fields also duplicates the arguments
-      handling code in Item_func::fix_fields().
-
-      The lack of information if a UDF is deterministic makes writing
-      a correct update_used_tables() for UDFs impossible.
-      One solution to this would be :
-       - Add a is_deterministic member of UDF_INIT
-       - (optionally) deprecate the const_item member of UDF_INIT
-       - Take away the duplicate code from udf_handler::fix_fields() and
-         make Item_udf_func call Item_func::fix_fields() to process its
-         arguments as for any other function.
-       - Store the deterministic flag returned by <udf>_init into the
-       udf_handler.
-       - Don't implement Item_udf_func::fix_fields, implement
-       Item_udf_func::resolve_type() instead (similar to non-UDF functions).
-       - Override Item_func::update_used_tables to call
-       Item_func::update_used_tables() and add a RAND_TABLE_BIT to the
-       result of Item_func::update_used_tables() if the UDF is
-       non-deterministic.
-       - (optionally) rename RAND_TABLE_BIT to NONDETERMINISTIC_BIT to
-       better describe its usage.
-
-      The above would require a change of the UDF API.
-      Until that change is done here's how the current code works:
-      We call Item_func::update_used_tables() only when we know that
-      the function depends on real non-const tables and is deterministic.
-      This can be done only because we know that the optimizer will
-      call update_used_tables() only when there's possibly a new const
-      table. So update_used_tables() can only make a Item_func more
-      constant than it is currently.
-      That's why we don't need to do anything if a function is guaranteed
-      to return non-constant (it's non-deterministic) or is already a
-      const.
-    */
-    if ((used_tables_cache & ~PSEUDO_TABLE_BITS) &&
-        !(used_tables_cache & RAND_TABLE_BIT))
-      Item_func::update_used_tables();
-
-    not_null_tables_cache = 0;
-    assert(!null_on_null);  // no need to update not_null_tables_cache
+  table_map get_initial_pseudo_tables() const override {
+    return m_non_deterministic ? RAND_TABLE_BIT : 0;
   }
+  bool fix_fields(THD *thd, Item **ref) override;
   void cleanup() override;
   Item_result result_type() const override { return udf.result_type(); }
   bool is_expensive() override { return true; }
@@ -2172,6 +2128,13 @@ class Item_udf_func : public Item_func {
 
  protected:
   bool may_have_named_parameters() const override { return true; }
+
+ private:
+  /**
+    This member is set during resolving and is used by update_used_tables() and
+    fix_after_pullout() to preserve the non-deterministic property.
+  */
+  bool m_non_deterministic{false};
 };
 
 class Item_func_udf_float final : public Item_udf_func {