Tighten error checks in datetime input, and remove bogus "ISO" format.
authorTom Lane <[email protected]>
Thu, 16 Mar 2023 18:18:28 +0000 (14:18 -0400)
committerTom Lane <[email protected]>
Thu, 16 Mar 2023 18:18:33 +0000 (14:18 -0400)
DecodeDateTime and DecodeTimeOnly had support for date input in the
style "Y2023M03D16", which the comments claimed to be an "ISO" format.
However, so far as I can find there is no such format in ISO 8601;
they write units before numbers in intervals, but not in datetimes.
Furthermore, the lesser-known ISO 8601-2 spec actually defines an
incompatible format "2023Y03M16D".  None of our documentation mentions
such a format either.  So let's just drop it.

That leaves us with only two cases for a prefix unit specifier in
datetimes: Julian dates written as Jnnnn, and the "T" separator
defined by ISO 8601.  Add checks to catch misuse of these specifiers,
that is consecutive specifiers or a dangling specifier at the end of
the string.  We do not however disallow a specifier that is separated
from the field that it disambiguates (by noise words or unrelated
fields).  That being the case, remove some overly-aggressive error
checks from the ISOTIME cases.

Joseph Koshakow, editorialized a bit by me; thanks also to
Peter Eisentraut for some standards-reading.

Discussion: https://postgr.es/m/CAAvxfHf2Q1gKLiHGnuPOiyf0ASvKUM4BnMfsXuwgtYEb_Gx0Zw@mail.gmail.com

src/backend/utils/adt/datetime.c
src/test/regress/expected/horology.out
src/test/regress/sql/horology.sql

index a7558d39a0e41eec4b3c8dcbde2b70407c136072..516ee9c154b25de74125800df7f935eab638b8dc 100644 (file)
@@ -983,7 +983,7 @@ DecodeDateTime(char **field, int *ftype, int nf,
        int                     fmask = 0,
                                tmask,
                                type;
-       int                     ptype = 0;              /* "prefix type" for ISO y2001m02d04 format */
+       int                     ptype = 0;              /* "prefix type" for ISO and Julian formats */
        int                     i;
        int                     val;
        int                     dterr;
@@ -1071,6 +1071,9 @@ DecodeDateTime(char **field, int *ftype, int nf,
                                        {
                                                char       *cp;
 
+                                               /*
+                                                * Allow a preceding "t" field, but no other units.
+                                                */
                                                if (ptype != 0)
                                                {
                                                        /* Sanity check; should not fail this test */
@@ -1175,8 +1178,7 @@ DecodeDateTime(char **field, int *ftype, int nf,
                        case DTK_NUMBER:
 
                                /*
-                                * Was this an "ISO date" with embedded field labels? An
-                                * example is "y2001m02d04" - thomas 2001-02-04
+                                * Deal with cases where previous field labeled this one
                                 */
                                if (ptype != 0)
                                {
@@ -1187,85 +1189,11 @@ DecodeDateTime(char **field, int *ftype, int nf,
                                        value = strtoint(field[i], &cp, 10);
                                        if (errno == ERANGE)
                                                return DTERR_FIELD_OVERFLOW;
-
-                                       /*
-                                        * only a few kinds are allowed to have an embedded
-                                        * decimal
-                                        */
-                                       if (*cp == '.')
-                                               switch (ptype)
-                                               {
-                                                       case DTK_JULIAN:
-                                                       case DTK_TIME:
-                                                       case DTK_SECOND:
-                                                               break;
-                                                       default:
-                                                               return DTERR_BAD_FORMAT;
-                                                               break;
-                                               }
-                                       else if (*cp != '\0')
+                                       if (*cp != '.' && *cp != '\0')
                                                return DTERR_BAD_FORMAT;
 
                                        switch (ptype)
                                        {
-                                               case DTK_YEAR:
-                                                       tm->tm_year = value;
-                                                       tmask = DTK_M(YEAR);
-                                                       break;
-
-                                               case DTK_MONTH:
-
-                                                       /*
-                                                        * already have a month and hour? then assume
-                                                        * minutes
-                                                        */
-                                                       if ((fmask & DTK_M(MONTH)) != 0 &&
-                                                               (fmask & DTK_M(HOUR)) != 0)
-                                                       {
-                                                               tm->tm_min = value;
-                                                               tmask = DTK_M(MINUTE);
-                                                       }
-                                                       else
-                                                       {
-                                                               tm->tm_mon = value;
-                                                               tmask = DTK_M(MONTH);
-                                                       }
-                                                       break;
-
-                                               case DTK_DAY:
-                                                       tm->tm_mday = value;
-                                                       tmask = DTK_M(DAY);
-                                                       break;
-
-                                               case DTK_HOUR:
-                                                       tm->tm_hour = value;
-                                                       tmask = DTK_M(HOUR);
-                                                       break;
-
-                                               case DTK_MINUTE:
-                                                       tm->tm_min = value;
-                                                       tmask = DTK_M(MINUTE);
-                                                       break;
-
-                                               case DTK_SECOND:
-                                                       tm->tm_sec = value;
-                                                       tmask = DTK_M(SECOND);
-                                                       if (*cp == '.')
-                                                       {
-                                                               dterr = ParseFractionalSecond(cp, fsec);
-                                                               if (dterr)
-                                                                       return dterr;
-                                                               tmask = DTK_ALL_SECS_M;
-                                                       }
-                                                       break;
-
-                                               case DTK_TZ:
-                                                       tmask = DTK_M(TZ);
-                                                       dterr = DecodeTimezone(field[i], tzp);
-                                                       if (dterr)
-                                                               return dterr;
-                                                       break;
-
                                                case DTK_JULIAN:
                                                        /* previous field was a label for "julian date" */
                                                        if (value < 0)
@@ -1519,6 +1447,9 @@ DecodeDateTime(char **field, int *ftype, int nf,
 
                                        case UNITS:
                                                tmask = 0;
+                                               /* reject consecutive unhandled units */
+                                               if (ptype != 0)
+                                                       return DTERR_BAD_FORMAT;
                                                ptype = val;
                                                break;
 
@@ -1534,18 +1465,9 @@ DecodeDateTime(char **field, int *ftype, int nf,
                                                if ((fmask & DTK_DATE_M) != DTK_DATE_M)
                                                        return DTERR_BAD_FORMAT;
 
-                                               /***
-                                                * We will need one of the following fields:
-                                                *      DTK_NUMBER should be hhmmss.fff
-                                                *      DTK_TIME should be hh:mm:ss.fff
-                                                *      DTK_DATE should be hhmmss-zz
-                                                ***/
-                                               if (i >= nf - 1 ||
-                                                       (ftype[i + 1] != DTK_NUMBER &&
-                                                        ftype[i + 1] != DTK_TIME &&
-                                                        ftype[i + 1] != DTK_DATE))
+                                               /* reject consecutive unhandled units */
+                                               if (ptype != 0)
                                                        return DTERR_BAD_FORMAT;
-
                                                ptype = val;
                                                break;
 
@@ -1576,6 +1498,10 @@ DecodeDateTime(char **field, int *ftype, int nf,
                fmask |= tmask;
        }                                                       /* end loop over fields */
 
+       /* reject if prefix type appeared and was never handled */
+       if (ptype != 0)
+               return DTERR_BAD_FORMAT;
+
        /* do additional checking for normal date specs (but not "infinity" etc) */
        if (*dtype == DTK_DATE)
        {
@@ -1943,7 +1869,7 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
        int                     fmask = 0,
                                tmask,
                                type;
-       int                     ptype = 0;              /* "prefix type" for ISO h04mm05s06 format */
+       int                     ptype = 0;              /* "prefix type" for ISO and Julian formats */
        int                     i;
        int                     val;
        int                     dterr;
@@ -2070,112 +1996,26 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
                        case DTK_NUMBER:
 
                                /*
-                                * Was this an "ISO time" with embedded field labels? An
-                                * example is "h04mm05s06" - thomas 2001-02-04
+                                * Deal with cases where previous field labeled this one
                                 */
                                if (ptype != 0)
                                {
                                        char       *cp;
                                        int                     value;
 
-                                       /* Only accept a date under limited circumstances */
-                                       switch (ptype)
-                                       {
-                                               case DTK_JULIAN:
-                                               case DTK_YEAR:
-                                               case DTK_MONTH:
-                                               case DTK_DAY:
-                                                       if (tzp == NULL)
-                                                               return DTERR_BAD_FORMAT;
-                                               default:
-                                                       break;
-                                       }
-
                                        errno = 0;
                                        value = strtoint(field[i], &cp, 10);
                                        if (errno == ERANGE)
                                                return DTERR_FIELD_OVERFLOW;
-
-                                       /*
-                                        * only a few kinds are allowed to have an embedded
-                                        * decimal
-                                        */
-                                       if (*cp == '.')
-                                               switch (ptype)
-                                               {
-                                                       case DTK_JULIAN:
-                                                       case DTK_TIME:
-                                                       case DTK_SECOND:
-                                                               break;
-                                                       default:
-                                                               return DTERR_BAD_FORMAT;
-                                                               break;
-                                               }
-                                       else if (*cp != '\0')
+                                       if (*cp != '.' && *cp != '\0')
                                                return DTERR_BAD_FORMAT;
 
                                        switch (ptype)
                                        {
-                                               case DTK_YEAR:
-                                                       tm->tm_year = value;
-                                                       tmask = DTK_M(YEAR);
-                                                       break;
-
-                                               case DTK_MONTH:
-
-                                                       /*
-                                                        * already have a month and hour? then assume
-                                                        * minutes
-                                                        */
-                                                       if ((fmask & DTK_M(MONTH)) != 0 &&
-                                                               (fmask & DTK_M(HOUR)) != 0)
-                                                       {
-                                                               tm->tm_min = value;
-                                                               tmask = DTK_M(MINUTE);
-                                                       }
-                                                       else
-                                                       {
-                                                               tm->tm_mon = value;
-                                                               tmask = DTK_M(MONTH);
-                                                       }
-                                                       break;
-
-                                               case DTK_DAY:
-                                                       tm->tm_mday = value;
-                                                       tmask = DTK_M(DAY);
-                                                       break;
-
-                                               case DTK_HOUR:
-                                                       tm->tm_hour = value;
-                                                       tmask = DTK_M(HOUR);
-                                                       break;
-
-                                               case DTK_MINUTE:
-                                                       tm->tm_min = value;
-                                                       tmask = DTK_M(MINUTE);
-                                                       break;
-
-                                               case DTK_SECOND:
-                                                       tm->tm_sec = value;
-                                                       tmask = DTK_M(SECOND);
-                                                       if (*cp == '.')
-                                                       {
-                                                               dterr = ParseFractionalSecond(cp, fsec);
-                                                               if (dterr)
-                                                                       return dterr;
-                                                               tmask = DTK_ALL_SECS_M;
-                                                       }
-                                                       break;
-
-                                               case DTK_TZ:
-                                                       tmask = DTK_M(TZ);
-                                                       dterr = DecodeTimezone(field[i], tzp);
-                                                       if (dterr)
-                                                               return dterr;
-                                                       break;
-
                                                case DTK_JULIAN:
                                                        /* previous field was a label for "julian date" */
+                                                       if (tzp == NULL)
+                                                               return DTERR_BAD_FORMAT;
                                                        if (value < 0)
                                                                return DTERR_FIELD_OVERFLOW;
                                                        tmask = DTK_DATE_M;
@@ -2378,24 +2218,17 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
 
                                        case UNITS:
                                                tmask = 0;
+                                               /* reject consecutive unhandled units */
+                                               if (ptype != 0)
+                                                       return DTERR_BAD_FORMAT;
                                                ptype = val;
                                                break;
 
                                        case ISOTIME:
                                                tmask = 0;
-
-                                               /***
-                                                * We will need one of the following fields:
-                                                *      DTK_NUMBER should be hhmmss.fff
-                                                *      DTK_TIME should be hh:mm:ss.fff
-                                                *      DTK_DATE should be hhmmss-zz
-                                                ***/
-                                               if (i >= nf - 1 ||
-                                                       (ftype[i + 1] != DTK_NUMBER &&
-                                                        ftype[i + 1] != DTK_TIME &&
-                                                        ftype[i + 1] != DTK_DATE))
+                                               /* reject consecutive unhandled units */
+                                               if (ptype != 0)
                                                        return DTERR_BAD_FORMAT;
-
                                                ptype = val;
                                                break;
 
@@ -2426,6 +2259,10 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
                fmask |= tmask;
        }                                                       /* end loop over fields */
 
+       /* reject if prefix type appeared and was never handled */
+       if (ptype != 0)
+               return DTERR_BAD_FORMAT;
+
        /* do final checking/adjustment of Y/M/D fields */
        dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm);
        if (dterr)
index 4f01131077bf43556fe9e7dac833dea6e2accbf0..e63e5b30fee8e16f979bc5372be6c97e1d41bf35 100644 (file)
@@ -83,6 +83,18 @@ SELECT timestamp with time zone '12/27/2001 04:05:06.789-08';
  Thu Dec 27 04:05:06.789 2001 PST
 (1 row)
 
+SELECT timestamp with time zone '2001-12-27 04:05:06.789 MET DST';
+           timestamptz            
+----------------------------------
+ Wed Dec 26 18:05:06.789 2001 PST
+(1 row)
+
+SELECT timestamp with time zone '2001-12-27 allballs';
+         timestamptz          
+------------------------------
+ Wed Dec 26 16:00:00 2001 PST
+(1 row)
+
 -- should fail in mdy mode:
 SELECT timestamp with time zone '27/12/2001 04:05:06.789-08';
 ERROR:  date/time field value out of range: "27/12/2001 04:05:06.789-08"
@@ -97,30 +109,6 @@ SELECT timestamp with time zone '27/12/2001 04:05:06.789-08';
 (1 row)
 
 reset datestyle;
-SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08';
-           timestamptz            
-----------------------------------
- Wed Dec 26 12:05:06.789 2001 PST
-(1 row)
-
-SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789-08';
-           timestamptz            
-----------------------------------
- Thu Dec 27 04:05:06.789 2001 PST
-(1 row)
-
-SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789+08';
-           timestamptz            
-----------------------------------
- Wed Dec 26 12:05:06.789 2001 PST
-(1 row)
-
-SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08';
-           timestamptz            
-----------------------------------
- Thu Dec 27 04:05:06.789 2001 PST
-(1 row)
-
 SELECT timestamp with time zone 'J2452271+08';
          timestamptz          
 ------------------------------
@@ -269,6 +257,23 @@ SELECT time with time zone 'T040506.789 -08';
  04:05:06.789-08
 (1 row)
 
+-- time with time zone should accept a date for DST resolution purposes
+SELECT time with time zone 'T040506.789 America/Los_Angeles';
+ERROR:  invalid input syntax for type time with time zone: "T040506.789 America/Los_Angeles"
+LINE 1: SELECT time with time zone 'T040506.789 America/Los_Angeles'...
+                                   ^
+SELECT time with time zone '2001-12-27 T040506.789 America/Los_Angeles';
+     timetz      
+-----------------
+ 04:05:06.789-08
+(1 row)
+
+SELECT time with time zone 'J2452271 T040506.789 America/Los_Angeles';
+     timetz      
+-----------------
+ 04:05:06.789-08
+(1 row)
+
 SET DateStyle = 'Postgres, MDY';
 -- Check Julian dates BC
 SELECT date 'J1520447' AS "Confucius' Birthday";
@@ -283,6 +288,25 @@ SELECT date 'J0' AS "Julian Epoch";
  11-24-4714 BC
 (1 row)
 
+-- test error on dangling Julian units
+SELECT date '1995-08-06  J J J';
+ERROR:  invalid input syntax for type date: "1995-08-06  J J J"
+LINE 1: SELECT date '1995-08-06  J J J';
+                    ^
+SELECT date 'J J 1520447';
+ERROR:  invalid input syntax for type date: "J J 1520447"
+LINE 1: SELECT date 'J J 1520447';
+                    ^
+-- We used to accept this input style, but it was based on a misreading
+-- of ISO8601, and it was never documented anyway
+SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08';
+ERROR:  invalid input syntax for type timestamp with time zone: "Y2001M12D27H04M05S06.789+08"
+LINE 1: SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08...
+                                        ^
+SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08';
+ERROR:  invalid input syntax for type timestamp with time zone: "Y2001M12D27H04MM05S06.789-08"
+LINE 1: SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-0...
+                                        ^
 -- conflicting fields should throw errors
 SELECT date '1995-08-06 epoch';
 ERROR:  invalid input syntax for type date: "1995-08-06 epoch"
index 0676cac5d173e26523ef2edccf98998c03a70355..f7f8c8d2dd9eba2b116a97239a7b3274a5f33353 100644 (file)
@@ -20,15 +20,13 @@ SELECT timestamp with time zone '2001-12-27 04:05:06.789-08';
 SELECT timestamp with time zone '2001.12.27 04:05:06.789-08';
 SELECT timestamp with time zone '2001/12/27 04:05:06.789-08';
 SELECT timestamp with time zone '12/27/2001 04:05:06.789-08';
+SELECT timestamp with time zone '2001-12-27 04:05:06.789 MET DST';
+SELECT timestamp with time zone '2001-12-27 allballs';
 -- should fail in mdy mode:
 SELECT timestamp with time zone '27/12/2001 04:05:06.789-08';
 set datestyle to dmy;
 SELECT timestamp with time zone '27/12/2001 04:05:06.789-08';
 reset datestyle;
-SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08';
-SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789-08';
-SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789+08';
-SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08';
 SELECT timestamp with time zone 'J2452271+08';
 SELECT timestamp with time zone 'J2452271-08';
 SELECT timestamp with time zone 'J2452271.5+08';
@@ -57,11 +55,24 @@ SELECT time with time zone 'T040506.789+08';
 SELECT time with time zone 'T040506.789-08';
 SELECT time with time zone 'T040506.789 +08';
 SELECT time with time zone 'T040506.789 -08';
+-- time with time zone should accept a date for DST resolution purposes
+SELECT time with time zone 'T040506.789 America/Los_Angeles';
+SELECT time with time zone '2001-12-27 T040506.789 America/Los_Angeles';
+SELECT time with time zone 'J2452271 T040506.789 America/Los_Angeles';
 SET DateStyle = 'Postgres, MDY';
 -- Check Julian dates BC
 SELECT date 'J1520447' AS "Confucius' Birthday";
 SELECT date 'J0' AS "Julian Epoch";
 
+-- test error on dangling Julian units
+SELECT date '1995-08-06  J J J';
+SELECT date 'J J 1520447';
+
+-- We used to accept this input style, but it was based on a misreading
+-- of ISO8601, and it was never documented anyway
+SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08';
+SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08';
+
 -- conflicting fields should throw errors
 SELECT date '1995-08-06 epoch';
 SELECT date '1995-08-06 infinity';