Skip to content

Commit 2490034

Browse files
authored
Initial Databricks SQL dialect (apache#1220)
1 parent 0adf4c6 commit 2490034

File tree

6 files changed

+135
-18
lines changed

6 files changed

+135
-18
lines changed

src/dialect/databricks.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
use crate::dialect::Dialect;
2+
3+
/// A [`Dialect`] for [Databricks SQL](https://www.databricks.com/)
4+
///
5+
/// See <https://docs.databricks.com/en/sql/language-manual/index.html>.
6+
#[derive(Debug, Default)]
7+
pub struct DatabricksDialect;
8+
9+
impl Dialect for DatabricksDialect {
10+
// see https://docs.databricks.com/en/sql/language-manual/sql-ref-identifiers.html
11+
12+
fn is_delimited_identifier_start(&self, ch: char) -> bool {
13+
matches!(ch, '`')
14+
}
15+
16+
fn is_identifier_start(&self, ch: char) -> bool {
17+
matches!(ch, 'a'..='z' | 'A'..='Z' | '_')
18+
}
19+
20+
fn is_identifier_part(&self, ch: char) -> bool {
21+
matches!(ch, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_')
22+
}
23+
24+
fn supports_filter_during_aggregation(&self) -> bool {
25+
true
26+
}
27+
28+
// https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-groupby.html
29+
fn supports_group_by_expr(&self) -> bool {
30+
true
31+
}
32+
}

src/dialect/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
mod ansi;
1414
mod bigquery;
1515
mod clickhouse;
16+
mod databricks;
1617
mod duckdb;
1718
mod generic;
1819
mod hive;
@@ -32,6 +33,7 @@ use core::str::Chars;
3233
pub use self::ansi::AnsiDialect;
3334
pub use self::bigquery::BigQueryDialect;
3435
pub use self::clickhouse::ClickHouseDialect;
36+
pub use self::databricks::DatabricksDialect;
3537
pub use self::duckdb::DuckDbDialect;
3638
pub use self::generic::GenericDialect;
3739
pub use self::hive::HiveDialect;

src/test_utils.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ pub fn all_dialects() -> TestedDialects {
207207
Box::new(BigQueryDialect {}) as Box<dyn Dialect>,
208208
Box::new(SQLiteDialect {}) as Box<dyn Dialect>,
209209
Box::new(DuckDbDialect {}) as Box<dyn Dialect>,
210+
Box::new(DatabricksDialect {}) as Box<dyn Dialect>,
210211
];
211212
TestedDialects {
212213
dialects: all_dialects,

tests/sqlparser_common.rs

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8937,6 +8937,76 @@ fn parse_map_access_expr() {
89378937
}
89388938
}
89398939

8940+
#[test]
8941+
fn test_selective_aggregation() {
8942+
let sql = concat!(
8943+
"SELECT ",
8944+
"ARRAY_AGG(name) FILTER (WHERE name IS NOT NULL), ",
8945+
"ARRAY_AGG(name) FILTER (WHERE name LIKE 'a%') AS agg2 ",
8946+
"FROM region"
8947+
);
8948+
assert_eq!(
8949+
all_dialects_where(|d| d.supports_filter_during_aggregation())
8950+
.verified_only_select(sql)
8951+
.projection,
8952+
vec![
8953+
SelectItem::UnnamedExpr(Expr::AggregateExpressionWithFilter {
8954+
expr: Box::new(Expr::ArrayAgg(ArrayAgg {
8955+
distinct: false,
8956+
expr: Box::new(Expr::Identifier(Ident::new("name"))),
8957+
order_by: None,
8958+
limit: None,
8959+
within_group: false,
8960+
})),
8961+
filter: Box::new(Expr::IsNotNull(Box::new(Expr::Identifier(Ident::new(
8962+
"name"
8963+
))))),
8964+
}),
8965+
SelectItem::ExprWithAlias {
8966+
expr: Expr::AggregateExpressionWithFilter {
8967+
expr: Box::new(Expr::ArrayAgg(ArrayAgg {
8968+
distinct: false,
8969+
expr: Box::new(Expr::Identifier(Ident::new("name"))),
8970+
order_by: None,
8971+
limit: None,
8972+
within_group: false,
8973+
})),
8974+
filter: Box::new(Expr::Like {
8975+
negated: false,
8976+
expr: Box::new(Expr::Identifier(Ident::new("name"))),
8977+
pattern: Box::new(Expr::Value(Value::SingleQuotedString("a%".to_owned()))),
8978+
escape_char: None,
8979+
}),
8980+
},
8981+
alias: Ident::new("agg2")
8982+
},
8983+
]
8984+
)
8985+
}
8986+
8987+
#[test]
8988+
fn test_group_by_grouping_sets() {
8989+
let sql = concat!(
8990+
"SELECT city, car_model, sum(quantity) AS sum ",
8991+
"FROM dealer ",
8992+
"GROUP BY GROUPING SETS ((city, car_model), (city), (car_model), ()) ",
8993+
"ORDER BY city",
8994+
);
8995+
assert_eq!(
8996+
all_dialects_where(|d| d.supports_group_by_expr())
8997+
.verified_only_select(sql)
8998+
.group_by,
8999+
GroupByExpr::Expressions(vec![Expr::GroupingSets(vec![
9000+
vec![
9001+
Expr::Identifier(Ident::new("city")),
9002+
Expr::Identifier(Ident::new("car_model"))
9003+
],
9004+
vec![Expr::Identifier(Ident::new("city")),],
9005+
vec![Expr::Identifier(Ident::new("car_model"))],
9006+
vec![]
9007+
])])
9008+
);
9009+
}
89409010
#[test]
89419011
fn test_match_recognize() {
89429012
use MatchRecognizePattern::*;

tests/sqlparser_databricks.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
use sqlparser::ast::*;
2+
use sqlparser::dialect::DatabricksDialect;
3+
use test_utils::*;
4+
5+
#[macro_use]
6+
mod test_utils;
7+
8+
fn databricks() -> TestedDialects {
9+
TestedDialects {
10+
dialects: vec![Box::new(DatabricksDialect {})],
11+
options: None,
12+
}
13+
}
14+
15+
#[test]
16+
fn test_databricks_identifiers() {
17+
// databricks uses backtick for delimited identifiers
18+
assert_eq!(
19+
databricks().verified_only_select("SELECT `Ä`").projection[0],
20+
SelectItem::UnnamedExpr(Expr::Identifier(Ident::with_quote('`', "Ä")))
21+
);
22+
23+
// double quotes produce string literals, not delimited identifiers
24+
assert_eq!(
25+
databricks()
26+
.verified_only_select(r#"SELECT "Ä""#)
27+
.projection[0],
28+
SelectItem::UnnamedExpr(Expr::Value(Value::DoubleQuotedString("Ä".to_owned())))
29+
);
30+
}

tests/sqlparser_hive.rs

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -336,24 +336,6 @@ fn parse_create_function() {
336336
);
337337
}
338338

339-
#[test]
340-
fn filtering_during_aggregation() {
341-
let rename = "SELECT \
342-
ARRAY_AGG(name) FILTER (WHERE name IS NOT NULL), \
343-
ARRAY_AGG(name) FILTER (WHERE name LIKE 'a%') \
344-
FROM region";
345-
println!("{}", hive().verified_stmt(rename));
346-
}
347-
348-
#[test]
349-
fn filtering_during_aggregation_aliased() {
350-
let rename = "SELECT \
351-
ARRAY_AGG(name) FILTER (WHERE name IS NOT NULL) AS agg1, \
352-
ARRAY_AGG(name) FILTER (WHERE name LIKE 'a%') AS agg2 \
353-
FROM region";
354-
println!("{}", hive().verified_stmt(rename));
355-
}
356-
357339
#[test]
358340
fn filter_as_alias() {
359341
let sql = "SELECT name filter FROM region";

0 commit comments

Comments
 (0)