Handle zero-length sublist correctly in Python -> SQL array conversion.
authorTom Lane <[email protected]>
Fri, 28 Apr 2023 16:24:29 +0000 (12:24 -0400)
committerTom Lane <[email protected]>
Fri, 28 Apr 2023 16:24:29 +0000 (12:24 -0400)
If PLySequence_ToArray came across a zero-length sublist, it'd compute
the overall array size as zero, possibly leading to a memory clobber.
(This would likely qualify as a security bug, were it not that plpython
is an untrusted language already.)

I think there are other corner-case issues in this code as well, notably
that the error messages don't match the core code and for some ranges
of array sizes you'd get "invalid memory alloc request size" rather than
the intended message about array size.

Really this code has no business doing its own array size calculation
at all, so remove the faulty code in favor of using ArrayGetNItems().

Per bug #17912 from Alexander Lakhin.  Bug seems to have come in with
commit 94aceed31, so back-patch to all supported branches.

Discussion: https://postgr.es/m/17912-82ceed78731d9cdc@postgresql.org

src/pl/plpython/expected/plpython_types.out
src/pl/plpython/plpy_typeio.c
src/pl/plpython/sql/plpython_types.sql

index a470911c2ec08f2cdcf9125e62e31b07ef343260..c4e749a5a83dcd0db7bb09dc22f3a368605ac9b9 100644 (file)
@@ -687,6 +687,15 @@ SELECT * FROM test_type_conversion_array_mixed2();
 ERROR:  invalid input syntax for type integer: "abc"
 CONTEXT:  while creating return value
 PL/Python function "test_type_conversion_array_mixed2"
+CREATE FUNCTION test_type_conversion_array_mixed3() RETURNS text[] AS $$
+return [[], 'a']
+$$ LANGUAGE plpython3u;
+SELECT * FROM test_type_conversion_array_mixed3();
+ test_type_conversion_array_mixed3 
+-----------------------------------
+ {[],a}
+(1 row)
+
 CREATE FUNCTION test_type_conversion_mdarray_malformed() RETURNS int[] AS $$
 return [[1,2,3],[4,5]]
 $$ LANGUAGE plpython3u;
index 7018c9d40456e369873af5c13a8bf05ed087d65c..864b5f17650f499b46d32b910c5a435c680bd223 100644 (file)
@@ -1136,7 +1136,7 @@ PLySequence_ToArray(PLyObToDatum *arg, PyObject *plrv,
        int                     i;
        Datum      *elems;
        bool       *nulls;
-       int64           len;
+       int                     len;
        int                     ndim;
        int                     dims[MAXDIM];
        int                     lbs[MAXDIM];
@@ -1155,7 +1155,6 @@ PLySequence_ToArray(PLyObToDatum *arg, PyObject *plrv,
         * Determine the number of dimensions, and their sizes.
         */
        ndim = 0;
-       len = 1;
 
        Py_INCREF(plrv);
 
@@ -1174,17 +1173,6 @@ PLySequence_ToArray(PLyObToDatum *arg, PyObject *plrv,
                if (dims[ndim] < 0)
                        PLy_elog(ERROR, "could not determine sequence length for function return value");
 
-               if (dims[ndim] > MaxAllocSize)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                                        errmsg("array size exceeds the maximum allowed")));
-
-               len *= dims[ndim];
-               if (len > MaxAllocSize)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                                        errmsg("array size exceeds the maximum allowed")));
-
                if (dims[ndim] == 0)
                {
                        /* empty sequence */
@@ -1214,15 +1202,18 @@ PLySequence_ToArray(PLyObToDatum *arg, PyObject *plrv,
                                         errmsg("return value of function with array return type is not a Python sequence")));
 
                ndim = 1;
-               len = dims[0] = PySequence_Length(plrv);
+               dims[0] = PySequence_Length(plrv);
        }
 
+       /* Allocate space for work arrays, after detecting array size overflow */
+       len = ArrayGetNItems(ndim, dims);
+       elems = palloc(sizeof(Datum) * len);
+       nulls = palloc(sizeof(bool) * len);
+
        /*
         * Traverse the Python lists, in depth-first order, and collect all the
         * elements at the bottom level into 'elems'/'nulls' arrays.
         */
-       elems = palloc(sizeof(Datum) * len);
-       nulls = palloc(sizeof(bool) * len);
        currelem = 0;
        PLySequence_ToArray_recurse(arg->u.array.elm, plrv,
                                                                dims, ndim, 0,
index 40f4f79d99fef570341db0f787b9a58807d47378..9702a10a7213206354e9203231a8cd3a8a2e56e7 100644 (file)
@@ -328,6 +328,13 @@ $$ LANGUAGE plpython3u;
 
 SELECT * FROM test_type_conversion_array_mixed2();
 
+CREATE FUNCTION test_type_conversion_array_mixed3() RETURNS text[] AS $$
+return [[], 'a']
+$$ LANGUAGE plpython3u;
+
+SELECT * FROM test_type_conversion_array_mixed3();
+
+
 CREATE FUNCTION test_type_conversion_mdarray_malformed() RETURNS int[] AS $$
 return [[1,2,3],[4,5]]
 $$ LANGUAGE plpython3u;