Skip to content

Commit b9fc5c0

Browse files
committed
[SPARK-44778][SQL] Add the alias TIMEDIFF for TIMESTAMPDIFF
### What changes were proposed in this pull request? In the PR, I propose to extend the rules of `primaryExpression` in `SqlBaseParser.g4`, and one more function `TIMEDIFF` which accepts 3-args in the same way as the existing expressions `TIMESTAMPDIFF`. ### Why are the changes needed? To achieve feature parity w/ other system and make the migration to Spark SQL from such systems easier: 1. Snowflake: https://docs.snowflake.com/en/sql-reference/functions/timediff 2. MySQL/MariaDB: https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_timediff ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running the existing test suites: ``` $ PYSPARK_PYTHON=python3 build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" ``` Closes apache#42435 from MaxGekk/timediff. Authored-by: Max Gekk <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent 7baf9da commit b9fc5c0

File tree

17 files changed

+760
-2
lines changed

17 files changed

+760
-2
lines changed

docs/sql-ref-ansi-compliance.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,7 @@ Below is a list of all the keywords in Spark SQL.
636636
|TERMINATED|non-reserved|non-reserved|non-reserved|
637637
|THEN|reserved|non-reserved|reserved|
638638
|TIME|reserved|non-reserved|reserved|
639+
|TIMEDIFF|non-reserved|non-reserved|non-reserved|
639640
|TIMESTAMP|non-reserved|non-reserved|non-reserved|
640641
|TIMESTAMP_LTZ|non-reserved|non-reserved|non-reserved|
641642
|TIMESTAMP_NTZ|non-reserved|non-reserved|non-reserved|

sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,7 @@ TEMPORARY: 'TEMPORARY' | 'TEMP';
373373
TERMINATED: 'TERMINATED';
374374
THEN: 'THEN';
375375
TIME: 'TIME';
376+
TIMEDIFF: 'TIMEDIFF';
376377
TIMESTAMP: 'TIMESTAMP';
377378
TIMESTAMP_LTZ: 'TIMESTAMP_LTZ';
378379
TIMESTAMP_NTZ: 'TIMESTAMP_NTZ';

sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -953,7 +953,7 @@ datetimeUnit
953953
primaryExpression
954954
: name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER | USER) #currentLike
955955
| name=(TIMESTAMPADD | DATEADD | DATE_ADD) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN #timestampadd
956-
| name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN #timestampdiff
956+
| name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF | TIMEDIFF) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN #timestampdiff
957957
| CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase
958958
| CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
959959
| name=(CAST | TRY_CAST) LEFT_PAREN expression AS dataType RIGHT_PAREN #cast
@@ -1511,6 +1511,7 @@ ansiNonReserved
15111511
| TBLPROPERTIES
15121512
| TEMPORARY
15131513
| TERMINATED
1514+
| TIMEDIFF
15141515
| TIMESTAMP
15151516
| TIMESTAMP_LTZ
15161517
| TIMESTAMP_NTZ
@@ -1853,6 +1854,7 @@ nonReserved
18531854
| TERMINATED
18541855
| THEN
18551856
| TIME
1857+
| TIMEDIFF
18561858
| TIMESTAMP
18571859
| TIMESTAMP_LTZ
18581860
| TIMESTAMP_NTZ

sql/core/src/test/resources/sql-tests/analyzer-results/ansi/timestamp.sql.out

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -875,3 +875,71 @@ org.apache.spark.sql.catalyst.parser.ParseException
875875
"fragment" : "timestampdiff('YEAR', date'2022-02-15', date'2023-02-15')"
876876
} ]
877877
}
878+
879+
880+
-- !query
881+
select timediff(QUARTER, timestamp'2023-08-10 01:02:03', timestamp'2022-01-14 01:02:03')
882+
-- !query analysis
883+
[Analyzer test output redacted due to nondeterminism]
884+
885+
886+
-- !query
887+
select timediff(HOUR, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 12:00:03')
888+
-- !query analysis
889+
[Analyzer test output redacted due to nondeterminism]
890+
891+
892+
-- !query
893+
select timediff(DAY, date'2022-02-15', date'2023-02-15')
894+
-- !query analysis
895+
[Analyzer test output redacted due to nondeterminism]
896+
897+
898+
-- !query
899+
select timediff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59')
900+
-- !query analysis
901+
[Analyzer test output redacted due to nondeterminism]
902+
903+
904+
-- !query
905+
select timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')
906+
-- !query analysis
907+
org.apache.spark.sql.catalyst.parser.ParseException
908+
{
909+
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
910+
"sqlState" : "22023",
911+
"messageParameters" : {
912+
"functionName" : "`timediff`",
913+
"invalidValue" : "'MINUTE'",
914+
"parameter" : "`unit`"
915+
},
916+
"queryContext" : [ {
917+
"objectType" : "",
918+
"objectName" : "",
919+
"startIndex" : 8,
920+
"stopIndex" : 89,
921+
"fragment" : "timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')"
922+
} ]
923+
}
924+
925+
926+
-- !query
927+
select timediff('YEAR', date'2020-02-15', date'2023-02-15')
928+
-- !query analysis
929+
org.apache.spark.sql.catalyst.parser.ParseException
930+
{
931+
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
932+
"sqlState" : "22023",
933+
"messageParameters" : {
934+
"functionName" : "`timediff`",
935+
"invalidValue" : "'YEAR'",
936+
"parameter" : "`unit`"
937+
},
938+
"queryContext" : [ {
939+
"objectType" : "",
940+
"objectName" : "",
941+
"startIndex" : 8,
942+
"stopIndex" : 59,
943+
"fragment" : "timediff('YEAR', date'2020-02-15', date'2023-02-15')"
944+
} ]
945+
}

sql/core/src/test/resources/sql-tests/analyzer-results/datetime-legacy.sql.out

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1978,3 +1978,71 @@ org.apache.spark.sql.catalyst.parser.ParseException
19781978
"fragment" : "timestampdiff('YEAR', date'2022-02-15', date'2023-02-15')"
19791979
} ]
19801980
}
1981+
1982+
1983+
-- !query
1984+
select timediff(QUARTER, timestamp'2023-08-10 01:02:03', timestamp'2022-01-14 01:02:03')
1985+
-- !query analysis
1986+
[Analyzer test output redacted due to nondeterminism]
1987+
1988+
1989+
-- !query
1990+
select timediff(HOUR, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 12:00:03')
1991+
-- !query analysis
1992+
[Analyzer test output redacted due to nondeterminism]
1993+
1994+
1995+
-- !query
1996+
select timediff(DAY, date'2022-02-15', date'2023-02-15')
1997+
-- !query analysis
1998+
[Analyzer test output redacted due to nondeterminism]
1999+
2000+
2001+
-- !query
2002+
select timediff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59')
2003+
-- !query analysis
2004+
[Analyzer test output redacted due to nondeterminism]
2005+
2006+
2007+
-- !query
2008+
select timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')
2009+
-- !query analysis
2010+
org.apache.spark.sql.catalyst.parser.ParseException
2011+
{
2012+
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
2013+
"sqlState" : "22023",
2014+
"messageParameters" : {
2015+
"functionName" : "`timediff`",
2016+
"invalidValue" : "'MINUTE'",
2017+
"parameter" : "`unit`"
2018+
},
2019+
"queryContext" : [ {
2020+
"objectType" : "",
2021+
"objectName" : "",
2022+
"startIndex" : 8,
2023+
"stopIndex" : 89,
2024+
"fragment" : "timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')"
2025+
} ]
2026+
}
2027+
2028+
2029+
-- !query
2030+
select timediff('YEAR', date'2020-02-15', date'2023-02-15')
2031+
-- !query analysis
2032+
org.apache.spark.sql.catalyst.parser.ParseException
2033+
{
2034+
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
2035+
"sqlState" : "22023",
2036+
"messageParameters" : {
2037+
"functionName" : "`timediff`",
2038+
"invalidValue" : "'YEAR'",
2039+
"parameter" : "`unit`"
2040+
},
2041+
"queryContext" : [ {
2042+
"objectType" : "",
2043+
"objectName" : "",
2044+
"startIndex" : 8,
2045+
"stopIndex" : 59,
2046+
"fragment" : "timediff('YEAR', date'2020-02-15', date'2023-02-15')"
2047+
} ]
2048+
}

sql/core/src/test/resources/sql-tests/analyzer-results/timestamp.sql.out

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -947,3 +947,71 @@ org.apache.spark.sql.catalyst.parser.ParseException
947947
"fragment" : "timestampdiff('YEAR', date'2022-02-15', date'2023-02-15')"
948948
} ]
949949
}
950+
951+
952+
-- !query
953+
select timediff(QUARTER, timestamp'2023-08-10 01:02:03', timestamp'2022-01-14 01:02:03')
954+
-- !query analysis
955+
[Analyzer test output redacted due to nondeterminism]
956+
957+
958+
-- !query
959+
select timediff(HOUR, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 12:00:03')
960+
-- !query analysis
961+
[Analyzer test output redacted due to nondeterminism]
962+
963+
964+
-- !query
965+
select timediff(DAY, date'2022-02-15', date'2023-02-15')
966+
-- !query analysis
967+
[Analyzer test output redacted due to nondeterminism]
968+
969+
970+
-- !query
971+
select timediff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59')
972+
-- !query analysis
973+
[Analyzer test output redacted due to nondeterminism]
974+
975+
976+
-- !query
977+
select timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')
978+
-- !query analysis
979+
org.apache.spark.sql.catalyst.parser.ParseException
980+
{
981+
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
982+
"sqlState" : "22023",
983+
"messageParameters" : {
984+
"functionName" : "`timediff`",
985+
"invalidValue" : "'MINUTE'",
986+
"parameter" : "`unit`"
987+
},
988+
"queryContext" : [ {
989+
"objectType" : "",
990+
"objectName" : "",
991+
"startIndex" : 8,
992+
"stopIndex" : 89,
993+
"fragment" : "timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')"
994+
} ]
995+
}
996+
997+
998+
-- !query
999+
select timediff('YEAR', date'2020-02-15', date'2023-02-15')
1000+
-- !query analysis
1001+
org.apache.spark.sql.catalyst.parser.ParseException
1002+
{
1003+
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
1004+
"sqlState" : "22023",
1005+
"messageParameters" : {
1006+
"functionName" : "`timediff`",
1007+
"invalidValue" : "'YEAR'",
1008+
"parameter" : "`unit`"
1009+
},
1010+
"queryContext" : [ {
1011+
"objectType" : "",
1012+
"objectName" : "",
1013+
"startIndex" : 8,
1014+
"stopIndex" : 59,
1015+
"fragment" : "timediff('YEAR', date'2020-02-15', date'2023-02-15')"
1016+
} ]
1017+
}

sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp-ansi.sql.out

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -894,3 +894,73 @@ org.apache.spark.sql.catalyst.parser.ParseException
894894
"fragment" : "timestampdiff('YEAR', date'2022-02-15', date'2023-02-15')"
895895
} ]
896896
}
897+
898+
899+
-- !query
900+
select timediff(QUARTER, timestamp'2023-08-10 01:02:03', timestamp'2022-01-14 01:02:03')
901+
-- !query analysis
902+
Project [timestampdiff(QUARTER, cast(2023-08-10 01:02:03 as timestamp), cast(2022-01-14 01:02:03 as timestamp), Some(America/Los_Angeles)) AS timestampdiff(QUARTER, TIMESTAMP_NTZ '2023-08-10 01:02:03', TIMESTAMP_NTZ '2022-01-14 01:02:03')#xL]
903+
+- OneRowRelation
904+
905+
906+
-- !query
907+
select timediff(HOUR, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 12:00:03')
908+
-- !query analysis
909+
Project [timestampdiff(HOUR, cast(2022-02-14 01:02:03 as timestamp), cast(2022-02-14 12:00:03 as timestamp), Some(America/Los_Angeles)) AS timestampdiff(HOUR, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '2022-02-14 12:00:03')#xL]
910+
+- OneRowRelation
911+
912+
913+
-- !query
914+
select timediff(DAY, date'2022-02-15', date'2023-02-15')
915+
-- !query analysis
916+
[Analyzer test output redacted due to nondeterminism]
917+
918+
919+
-- !query
920+
select timediff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59')
921+
-- !query analysis
922+
[Analyzer test output redacted due to nondeterminism]
923+
924+
925+
-- !query
926+
select timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')
927+
-- !query analysis
928+
org.apache.spark.sql.catalyst.parser.ParseException
929+
{
930+
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
931+
"sqlState" : "22023",
932+
"messageParameters" : {
933+
"functionName" : "`timediff`",
934+
"invalidValue" : "'MINUTE'",
935+
"parameter" : "`unit`"
936+
},
937+
"queryContext" : [ {
938+
"objectType" : "",
939+
"objectName" : "",
940+
"startIndex" : 8,
941+
"stopIndex" : 89,
942+
"fragment" : "timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')"
943+
} ]
944+
}
945+
946+
947+
-- !query
948+
select timediff('YEAR', date'2020-02-15', date'2023-02-15')
949+
-- !query analysis
950+
org.apache.spark.sql.catalyst.parser.ParseException
951+
{
952+
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
953+
"sqlState" : "22023",
954+
"messageParameters" : {
955+
"functionName" : "`timediff`",
956+
"invalidValue" : "'YEAR'",
957+
"parameter" : "`unit`"
958+
},
959+
"queryContext" : [ {
960+
"objectType" : "",
961+
"objectName" : "",
962+
"startIndex" : 8,
963+
"stopIndex" : 59,
964+
"fragment" : "timediff('YEAR', date'2020-02-15', date'2023-02-15')"
965+
} ]
966+
}

0 commit comments

Comments
 (0)