sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 Version, 12 approx_count_distinct_sql, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 count_if_to_sum, 18 date_trunc_to_time, 19 datestrtodate_sql, 20 no_datetime_sql, 21 encode_decode_sql, 22 build_formatted_time, 23 inline_array_unless_query, 24 no_comment_column_constraint_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 rename_func, 29 remove_from_array_using_filter, 30 strposition_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36 sha256_sql, 37 build_regexp_extract, 38 explode_to_unnest_sql, 39 no_make_interval_sql, 40 groupconcat_sql, 41) 42from sqlglot.generator import unsupported_args 43from sqlglot.helper import seq_get 44from sqlglot.tokens import TokenType 45from sqlglot.parser import binary_range_parser 46 47DATETIME_DELTA = t.Union[ 48 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 49] 50 51 52def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 53 this = expression.this 54 unit = unit_to_var(expression) 55 op = ( 56 "+" 57 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 58 else "-" 59 ) 60 61 to_type: t.Optional[DATA_TYPE] = None 62 if isinstance(expression, exp.TsOrDsAdd): 63 to_type = expression.return_type 64 elif this.is_string: 65 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 66 to_type = ( 67 exp.DataType.Type.DATETIME 68 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 69 else exp.DataType.Type.DATE 70 ) 71 72 this = exp.cast(this, to_type) if to_type else this 73 74 expr = expression.expression 75 interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit) 76 77 return f"{self.sql(this)} {op} {self.sql(interval)}" 78 79 80# BigQuery -> DuckDB conversion for the DATE function 81def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 82 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 83 zone = self.sql(expression, "zone") 84 85 if zone: 86 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 87 date_str = f"{date_str} || ' ' || {zone}" 88 89 # This will create a TIMESTAMP with time zone information 90 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 91 92 return result 93 94 95# BigQuery -> DuckDB conversion for the TIME_DIFF function 96def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 97 this = exp.cast(expression.this, exp.DataType.Type.TIME) 98 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 99 100 # Although the 2 dialects share similar signatures, BQ seems to inverse 101 # the sign of the result so the start/end time operands are flipped 102 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 103 104 105@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 106def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 107 return self.func("ARRAY_SORT", expression.this) 108 109 110def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 111 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 112 return self.func(name, expression.this) 113 114 115def _build_sort_array_desc(args: t.List) -> exp.Expression: 116 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 117 118 119def _build_date_diff(args: t.List) -> exp.Expression: 120 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 121 122 123def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 124 def _builder(args: t.List) -> exp.GenerateSeries: 125 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 126 if len(args) == 1: 127 # DuckDB uses 0 as a default for the series' start when it's omitted 128 args.insert(0, exp.Literal.number("0")) 129 130 gen_series = exp.GenerateSeries.from_arg_list(args) 131 gen_series.set("is_end_exclusive", end_exclusive) 132 133 return gen_series 134 135 return _builder 136 137 138def _build_make_timestamp(args: t.List) -> exp.Expression: 139 if len(args) == 1: 140 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 141 142 return exp.TimestampFromParts( 143 year=seq_get(args, 0), 144 month=seq_get(args, 1), 145 day=seq_get(args, 2), 146 hour=seq_get(args, 3), 147 min=seq_get(args, 4), 148 sec=seq_get(args, 5), 149 ) 150 151 152def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]: 153 def _parse(self: DuckDB.Parser) -> exp.Show: 154 return self._parse_show_duckdb(*args, **kwargs) 155 156 return _parse 157 158 159def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 160 args: t.List[str] = [] 161 162 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 163 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 164 # The transformation to ROW will take place if: 165 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 166 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 167 ancestor_cast = expression.find_ancestor(exp.Cast) 168 is_bq_inline_struct = ( 169 (expression.find(exp.PropertyEQ) is None) 170 and ancestor_cast 171 and any( 172 casted_type.is_type(exp.DataType.Type.STRUCT) 173 for casted_type in ancestor_cast.find_all(exp.DataType) 174 ) 175 ) 176 177 for i, expr in enumerate(expression.expressions): 178 is_property_eq = isinstance(expr, exp.PropertyEQ) 179 value = expr.expression if is_property_eq else expr 180 181 if is_bq_inline_struct: 182 args.append(self.sql(value)) 183 else: 184 key = expr.name if is_property_eq else f"_{i}" 185 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 186 187 csv_args = ", ".join(args) 188 189 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 190 191 192def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 193 if expression.is_type("array"): 194 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 195 196 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 197 if expression.is_type( 198 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 199 ): 200 return expression.this.value 201 202 return self.datatype_sql(expression) 203 204 205def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 206 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 207 return f"CAST({sql} AS TEXT)" 208 209 210def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 211 scale = expression.args.get("scale") 212 timestamp = expression.this 213 214 if scale in (None, exp.UnixToTime.SECONDS): 215 return self.func("TO_TIMESTAMP", timestamp) 216 if scale == exp.UnixToTime.MILLIS: 217 return self.func("EPOCH_MS", timestamp) 218 if scale == exp.UnixToTime.MICROS: 219 return self.func("MAKE_TIMESTAMP", timestamp) 220 221 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 222 223 224WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 225 226 227def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 228 arrow_sql = arrow_json_extract_sql(self, expression) 229 if not expression.same_parent and isinstance( 230 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 231 ): 232 arrow_sql = self.wrap(arrow_sql) 233 return arrow_sql 234 235 236def _implicit_datetime_cast( 237 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 238) -> t.Optional[exp.Expression]: 239 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 240 241 242def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 243 this = _implicit_datetime_cast(expression.this) 244 expr = _implicit_datetime_cast(expression.expression) 245 246 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 247 248 249def _generate_datetime_array_sql( 250 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 251) -> str: 252 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 253 254 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 255 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 256 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 257 258 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 259 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 260 start=start, end=end, step=expression.args.get("step") 261 ) 262 263 if is_generate_date_array: 264 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 265 # GENERATE_DATE_ARRAY we must cast it back to DATE array 266 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 267 268 return self.sql(gen_series) 269 270 271def _json_extract_value_array_sql( 272 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 273) -> str: 274 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 275 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 276 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 277 278 279class DuckDB(Dialect): 280 NULL_ORDERING = "nulls_are_last" 281 SUPPORTS_USER_DEFINED_TYPES = True 282 SAFE_DIVISION = True 283 INDEX_OFFSET = 1 284 CONCAT_COALESCE = True 285 SUPPORTS_ORDER_BY_ALL = True 286 SUPPORTS_FIXED_SIZE_ARRAYS = True 287 STRICT_JSON_PATH_SYNTAX = False 288 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 289 290 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 291 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 292 293 DATE_PART_MAPPING = { 294 **Dialect.DATE_PART_MAPPING, 295 "DAYOFWEEKISO": "ISODOW", 296 } 297 DATE_PART_MAPPING.pop("WEEKDAY") 298 299 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 300 if isinstance(path, exp.Literal): 301 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 302 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 303 # This check ensures we'll avoid trying to parse these as JSON paths, which can 304 # either result in a noisy warning or in an invalid representation of the path. 305 path_text = path.name 306 if path_text.startswith("/") or "[#" in path_text: 307 return path 308 309 return super().to_json_path(path) 310 311 class Tokenizer(tokens.Tokenizer): 312 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 313 HEREDOC_STRINGS = ["$"] 314 315 HEREDOC_TAG_IS_IDENTIFIER = True 316 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 317 318 KEYWORDS = { 319 **tokens.Tokenizer.KEYWORDS, 320 "//": TokenType.DIV, 321 "**": TokenType.DSTAR, 322 "^@": TokenType.CARET_AT, 323 "@>": TokenType.AT_GT, 324 "<@": TokenType.LT_AT, 325 "ATTACH": TokenType.ATTACH, 326 "BINARY": TokenType.VARBINARY, 327 "BITSTRING": TokenType.BIT, 328 "BPCHAR": TokenType.TEXT, 329 "CHAR": TokenType.TEXT, 330 "DATETIME": TokenType.TIMESTAMPNTZ, 331 "DETACH": TokenType.DETACH, 332 "EXCLUDE": TokenType.EXCEPT, 333 "LOGICAL": TokenType.BOOLEAN, 334 "ONLY": TokenType.ONLY, 335 "PIVOT_WIDER": TokenType.PIVOT, 336 "POSITIONAL": TokenType.POSITIONAL, 337 "SIGNED": TokenType.INT, 338 "STRING": TokenType.TEXT, 339 "SUMMARIZE": TokenType.SUMMARIZE, 340 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 341 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 342 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 343 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 344 "TIMESTAMP_US": TokenType.TIMESTAMP, 345 "UBIGINT": TokenType.UBIGINT, 346 "UINTEGER": TokenType.UINT, 347 "USMALLINT": TokenType.USMALLINT, 348 "UTINYINT": TokenType.UTINYINT, 349 "VARCHAR": TokenType.TEXT, 350 } 351 KEYWORDS.pop("/*+") 352 353 SINGLE_TOKENS = { 354 **tokens.Tokenizer.SINGLE_TOKENS, 355 "$": TokenType.PARAMETER, 356 } 357 358 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 359 360 class Parser(parser.Parser): 361 BITWISE = { 362 **parser.Parser.BITWISE, 363 TokenType.TILDA: exp.RegexpLike, 364 } 365 BITWISE.pop(TokenType.CARET) 366 367 RANGE_PARSERS = { 368 **parser.Parser.RANGE_PARSERS, 369 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 370 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 371 } 372 373 EXPONENT = { 374 **parser.Parser.EXPONENT, 375 TokenType.CARET: exp.Pow, 376 TokenType.DSTAR: exp.Pow, 377 } 378 379 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 380 381 SHOW_PARSERS = { 382 "TABLES": _show_parser("TABLES"), 383 "ALL TABLES": _show_parser("ALL TABLES"), 384 } 385 386 FUNCTIONS = { 387 **parser.Parser.FUNCTIONS, 388 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 389 "ARRAY_SORT": exp.SortArray.from_arg_list, 390 "DATEDIFF": _build_date_diff, 391 "DATE_DIFF": _build_date_diff, 392 "DATE_TRUNC": date_trunc_to_time, 393 "DATETRUNC": date_trunc_to_time, 394 "DECODE": lambda args: exp.Decode( 395 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 396 ), 397 "EDITDIST3": exp.Levenshtein.from_arg_list, 398 "ENCODE": lambda args: exp.Encode( 399 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 400 ), 401 "EPOCH": exp.TimeToUnix.from_arg_list, 402 "EPOCH_MS": lambda args: exp.UnixToTime( 403 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 404 ), 405 "GENERATE_SERIES": _build_generate_series(), 406 "JSON": exp.ParseJSON.from_arg_list, 407 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 408 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 409 "LIST_HAS": exp.ArrayContains.from_arg_list, 410 "LIST_REVERSE_SORT": _build_sort_array_desc, 411 "LIST_SORT": exp.SortArray.from_arg_list, 412 "LIST_VALUE": lambda args: exp.Array(expressions=args), 413 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 414 "MAKE_TIMESTAMP": _build_make_timestamp, 415 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 416 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 417 "RANGE": _build_generate_series(end_exclusive=True), 418 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 419 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 420 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 421 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 422 this=seq_get(args, 0), 423 expression=seq_get(args, 1), 424 replacement=seq_get(args, 2), 425 modifiers=seq_get(args, 3), 426 ), 427 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 428 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 429 "STRING_SPLIT": exp.Split.from_arg_list, 430 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 431 "STRING_TO_ARRAY": exp.Split.from_arg_list, 432 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 433 "STRUCT_PACK": exp.Struct.from_arg_list, 434 "STR_SPLIT": exp.Split.from_arg_list, 435 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 436 "TIME_BUCKET": exp.DateBin.from_arg_list, 437 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 438 "UNNEST": exp.Explode.from_arg_list, 439 "XOR": binary_from_function(exp.BitwiseXor), 440 } 441 442 FUNCTIONS.pop("DATE_SUB") 443 FUNCTIONS.pop("GLOB") 444 445 FUNCTION_PARSERS = { 446 **parser.Parser.FUNCTION_PARSERS, 447 **dict.fromkeys( 448 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 449 ), 450 } 451 FUNCTION_PARSERS.pop("DECODE") 452 453 NO_PAREN_FUNCTION_PARSERS = { 454 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 455 "MAP": lambda self: self._parse_map(), 456 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 457 } 458 459 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 460 TokenType.SEMI, 461 TokenType.ANTI, 462 } 463 464 PLACEHOLDER_PARSERS = { 465 **parser.Parser.PLACEHOLDER_PARSERS, 466 TokenType.PARAMETER: lambda self: ( 467 self.expression(exp.Placeholder, this=self._prev.text) 468 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 469 else None 470 ), 471 } 472 473 TYPE_CONVERTERS = { 474 # https://duckdb.org/docs/sql/data_types/numeric 475 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 476 # https://duckdb.org/docs/sql/data_types/text 477 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 478 } 479 480 STATEMENT_PARSERS = { 481 **parser.Parser.STATEMENT_PARSERS, 482 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 483 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 484 TokenType.SHOW: lambda self: self._parse_show(), 485 } 486 487 SET_PARSERS = { 488 **parser.Parser.SET_PARSERS, 489 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 490 } 491 492 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 493 index = self._index 494 if not self._match_text_seq("LAMBDA"): 495 return super()._parse_lambda(alias=alias) 496 497 expressions = self._parse_csv(self._parse_lambda_arg) 498 if not self._match(TokenType.COLON): 499 self._retreat(index) 500 return None 501 502 this = self._replace_lambda(self._parse_assignment(), expressions) 503 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 504 505 def _parse_expression(self) -> t.Optional[exp.Expression]: 506 # DuckDB supports prefix aliases, e.g. foo: 1 507 if self._next and self._next.token_type == TokenType.COLON: 508 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 509 self._match(TokenType.COLON) 510 comments = self._prev_comments or [] 511 512 this = self._parse_assignment() 513 if isinstance(this, exp.Expression): 514 # Moves the comment next to the alias in `alias: expr /* comment */` 515 comments += this.pop_comments() or [] 516 517 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 518 519 return super()._parse_expression() 520 521 def _parse_table( 522 self, 523 schema: bool = False, 524 joins: bool = False, 525 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 526 parse_bracket: bool = False, 527 is_db_reference: bool = False, 528 parse_partition: bool = False, 529 consume_pipe: bool = False, 530 ) -> t.Optional[exp.Expression]: 531 # DuckDB supports prefix aliases, e.g. FROM foo: bar 532 if self._next and self._next.token_type == TokenType.COLON: 533 alias = self._parse_table_alias( 534 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 535 ) 536 self._match(TokenType.COLON) 537 comments = self._prev_comments or [] 538 else: 539 alias = None 540 comments = [] 541 542 table = super()._parse_table( 543 schema=schema, 544 joins=joins, 545 alias_tokens=alias_tokens, 546 parse_bracket=parse_bracket, 547 is_db_reference=is_db_reference, 548 parse_partition=parse_partition, 549 ) 550 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 551 # Moves the comment next to the alias in `alias: table /* comment */` 552 comments += table.pop_comments() or [] 553 alias.comments = alias.pop_comments() + comments 554 table.set("alias", alias) 555 556 return table 557 558 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 559 # https://duckdb.org/docs/sql/samples.html 560 sample = super()._parse_table_sample(as_modifier=as_modifier) 561 if sample and not sample.args.get("method"): 562 if sample.args.get("size"): 563 sample.set("method", exp.var("RESERVOIR")) 564 else: 565 sample.set("method", exp.var("SYSTEM")) 566 567 return sample 568 569 def _parse_bracket( 570 self, this: t.Optional[exp.Expression] = None 571 ) -> t.Optional[exp.Expression]: 572 bracket = super()._parse_bracket(this) 573 574 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 575 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 576 bracket.set("returns_list_for_maps", True) 577 578 return bracket 579 580 def _parse_map(self) -> exp.ToMap | exp.Map: 581 if self._match(TokenType.L_BRACE, advance=False): 582 return self.expression(exp.ToMap, this=self._parse_bracket()) 583 584 args = self._parse_wrapped_csv(self._parse_assignment) 585 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 586 587 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 588 return self._parse_field_def() 589 590 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 591 if len(aggregations) == 1: 592 return super()._pivot_column_names(aggregations) 593 return pivot_column_names(aggregations, dialect="duckdb") 594 595 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 596 def _parse_attach_option() -> exp.AttachOption: 597 return self.expression( 598 exp.AttachOption, 599 this=self._parse_var(any_token=True), 600 expression=self._parse_field(any_token=True), 601 ) 602 603 self._match(TokenType.DATABASE) 604 exists = self._parse_exists(not_=is_attach) 605 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 606 607 if self._match(TokenType.L_PAREN, advance=False): 608 expressions = self._parse_wrapped_csv(_parse_attach_option) 609 else: 610 expressions = None 611 612 return ( 613 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 614 if is_attach 615 else self.expression(exp.Detach, this=this, exists=exists) 616 ) 617 618 def _parse_show_duckdb(self, this: str) -> exp.Show: 619 return self.expression(exp.Show, this=this) 620 621 class Generator(generator.Generator): 622 PARAMETER_TOKEN = "$" 623 NAMED_PLACEHOLDER_TOKEN = "$" 624 JOIN_HINTS = False 625 TABLE_HINTS = False 626 QUERY_HINTS = False 627 LIMIT_FETCH = "LIMIT" 628 STRUCT_DELIMITER = ("(", ")") 629 RENAME_TABLE_WITH_DB = False 630 NVL2_SUPPORTED = False 631 SEMI_ANTI_JOIN_WITH_SIDE = False 632 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 633 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 634 LAST_DAY_SUPPORTS_DATE_PART = False 635 JSON_KEY_VALUE_PAIR_SEP = "," 636 IGNORE_NULLS_IN_FUNC = True 637 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 638 SUPPORTS_CREATE_TABLE_LIKE = False 639 MULTI_ARG_DISTINCT = False 640 CAN_IMPLEMENT_ARRAY_ANY = True 641 SUPPORTS_TO_NUMBER = False 642 SUPPORTS_WINDOW_EXCLUDE = True 643 COPY_HAS_INTO_KEYWORD = False 644 STAR_EXCEPT = "EXCLUDE" 645 PAD_FILL_PATTERN_IS_REQUIRED = True 646 ARRAY_CONCAT_IS_VAR_LEN = False 647 ARRAY_SIZE_DIM_REQUIRED = False 648 NORMALIZE_EXTRACT_DATE_PARTS = True 649 650 TRANSFORMS = { 651 **generator.Generator.TRANSFORMS, 652 exp.ApproxDistinct: approx_count_distinct_sql, 653 exp.Array: inline_array_unless_query, 654 exp.ArrayFilter: rename_func("LIST_FILTER"), 655 exp.ArrayRemove: remove_from_array_using_filter, 656 exp.ArraySort: _array_sort_sql, 657 exp.ArraySum: rename_func("LIST_SUM"), 658 exp.BitwiseXor: rename_func("XOR"), 659 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 660 exp.CurrentDate: lambda *_: "CURRENT_DATE", 661 exp.CurrentTime: lambda *_: "CURRENT_TIME", 662 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 663 exp.DayOfMonth: rename_func("DAYOFMONTH"), 664 exp.DayOfWeek: rename_func("DAYOFWEEK"), 665 exp.DayOfWeekIso: rename_func("ISODOW"), 666 exp.DayOfYear: rename_func("DAYOFYEAR"), 667 exp.DataType: _datatype_sql, 668 exp.Date: _date_sql, 669 exp.DateAdd: _date_delta_sql, 670 exp.DateFromParts: rename_func("MAKE_DATE"), 671 exp.DateSub: _date_delta_sql, 672 exp.DateDiff: _date_diff_sql, 673 exp.DateStrToDate: datestrtodate_sql, 674 exp.Datetime: no_datetime_sql, 675 exp.DatetimeSub: _date_delta_sql, 676 exp.DatetimeAdd: _date_delta_sql, 677 exp.DateToDi: lambda self, 678 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 679 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 680 exp.DiToDate: lambda self, 681 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 682 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 683 exp.GenerateDateArray: _generate_datetime_array_sql, 684 exp.GenerateTimestampArray: _generate_datetime_array_sql, 685 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 686 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 687 exp.Explode: rename_func("UNNEST"), 688 exp.IntDiv: lambda self, e: self.binary(e, "//"), 689 exp.IsInf: rename_func("ISINF"), 690 exp.IsNan: rename_func("ISNAN"), 691 exp.JSONBExists: rename_func("JSON_EXISTS"), 692 exp.JSONExtract: _arrow_json_extract_sql, 693 exp.JSONExtractArray: _json_extract_value_array_sql, 694 exp.JSONExtractScalar: _arrow_json_extract_sql, 695 exp.JSONFormat: _json_format_sql, 696 exp.JSONValueArray: _json_extract_value_array_sql, 697 exp.Lateral: explode_to_unnest_sql, 698 exp.LogicalOr: rename_func("BOOL_OR"), 699 exp.LogicalAnd: rename_func("BOOL_AND"), 700 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 701 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 702 exp.MonthsBetween: lambda self, e: self.func( 703 "DATEDIFF", 704 "'month'", 705 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 706 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 707 ), 708 exp.PercentileCont: rename_func("QUANTILE_CONT"), 709 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 710 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 711 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 712 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 713 exp.RegexpReplace: lambda self, e: self.func( 714 "REGEXP_REPLACE", 715 e.this, 716 e.expression, 717 e.args.get("replacement"), 718 e.args.get("modifiers"), 719 ), 720 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 721 exp.RegexpILike: lambda self, e: self.func( 722 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 723 ), 724 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 725 exp.Return: lambda self, e: self.sql(e, "this"), 726 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 727 exp.Rand: rename_func("RANDOM"), 728 exp.SHA: rename_func("SHA1"), 729 exp.SHA2: sha256_sql, 730 exp.Split: rename_func("STR_SPLIT"), 731 exp.SortArray: _sort_array_sql, 732 exp.StrPosition: strposition_sql, 733 exp.StrToUnix: lambda self, e: self.func( 734 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 735 ), 736 exp.Struct: _struct_sql, 737 exp.Transform: rename_func("LIST_TRANSFORM"), 738 exp.TimeAdd: _date_delta_sql, 739 exp.Time: no_time_sql, 740 exp.TimeDiff: _timediff_sql, 741 exp.Timestamp: no_timestamp_sql, 742 exp.TimestampDiff: lambda self, e: self.func( 743 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 744 ), 745 exp.TimestampTrunc: timestamptrunc_sql(), 746 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 747 exp.TimeStrToTime: timestrtotime_sql, 748 exp.TimeStrToUnix: lambda self, e: self.func( 749 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 750 ), 751 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 752 exp.TimeToUnix: rename_func("EPOCH"), 753 exp.TsOrDiToDi: lambda self, 754 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 755 exp.TsOrDsAdd: _date_delta_sql, 756 exp.TsOrDsDiff: lambda self, e: self.func( 757 "DATE_DIFF", 758 f"'{e.args.get('unit') or 'DAY'}'", 759 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 760 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 761 ), 762 exp.UnixToStr: lambda self, e: self.func( 763 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 764 ), 765 exp.DatetimeTrunc: lambda self, e: self.func( 766 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 767 ), 768 exp.UnixToTime: _unix_to_time_sql, 769 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 770 exp.VariancePop: rename_func("VAR_POP"), 771 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 772 exp.Xor: bool_xor_sql, 773 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 774 rename_func("LEVENSHTEIN") 775 ), 776 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 777 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 778 exp.DateBin: rename_func("TIME_BUCKET"), 779 } 780 781 SUPPORTED_JSON_PATH_PARTS = { 782 exp.JSONPathKey, 783 exp.JSONPathRoot, 784 exp.JSONPathSubscript, 785 exp.JSONPathWildcard, 786 } 787 788 TYPE_MAPPING = { 789 **generator.Generator.TYPE_MAPPING, 790 exp.DataType.Type.BINARY: "BLOB", 791 exp.DataType.Type.BPCHAR: "TEXT", 792 exp.DataType.Type.CHAR: "TEXT", 793 exp.DataType.Type.DATETIME: "TIMESTAMP", 794 exp.DataType.Type.FLOAT: "REAL", 795 exp.DataType.Type.JSONB: "JSON", 796 exp.DataType.Type.NCHAR: "TEXT", 797 exp.DataType.Type.NVARCHAR: "TEXT", 798 exp.DataType.Type.UINT: "UINTEGER", 799 exp.DataType.Type.VARBINARY: "BLOB", 800 exp.DataType.Type.ROWVERSION: "BLOB", 801 exp.DataType.Type.VARCHAR: "TEXT", 802 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 803 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 804 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 805 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 806 } 807 808 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 809 RESERVED_KEYWORDS = { 810 "array", 811 "analyse", 812 "union", 813 "all", 814 "when", 815 "in_p", 816 "default", 817 "create_p", 818 "window", 819 "asymmetric", 820 "to", 821 "else", 822 "localtime", 823 "from", 824 "end_p", 825 "select", 826 "current_date", 827 "foreign", 828 "with", 829 "grant", 830 "session_user", 831 "or", 832 "except", 833 "references", 834 "fetch", 835 "limit", 836 "group_p", 837 "leading", 838 "into", 839 "collate", 840 "offset", 841 "do", 842 "then", 843 "localtimestamp", 844 "check_p", 845 "lateral_p", 846 "current_role", 847 "where", 848 "asc_p", 849 "placing", 850 "desc_p", 851 "user", 852 "unique", 853 "initially", 854 "column", 855 "both", 856 "some", 857 "as", 858 "any", 859 "only", 860 "deferrable", 861 "null_p", 862 "current_time", 863 "true_p", 864 "table", 865 "case", 866 "trailing", 867 "variadic", 868 "for", 869 "on", 870 "distinct", 871 "false_p", 872 "not", 873 "constraint", 874 "current_timestamp", 875 "returning", 876 "primary", 877 "intersect", 878 "having", 879 "analyze", 880 "current_user", 881 "and", 882 "cast", 883 "symmetric", 884 "using", 885 "order", 886 "current_catalog", 887 } 888 889 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 890 891 # DuckDB doesn't generally support CREATE TABLE .. properties 892 # https://duckdb.org/docs/sql/statements/create_table.html 893 PROPERTIES_LOCATION = { 894 prop: exp.Properties.Location.UNSUPPORTED 895 for prop in generator.Generator.PROPERTIES_LOCATION 896 } 897 898 # There are a few exceptions (e.g. temporary tables) which are supported or 899 # can be transpiled to DuckDB, so we explicitly override them accordingly 900 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 901 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 902 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 903 904 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 905 exp.FirstValue, 906 exp.Lag, 907 exp.LastValue, 908 exp.Lead, 909 exp.NthValue, 910 ) 911 912 def lambda_sql( 913 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 914 ) -> str: 915 if expression.args.get("colon"): 916 prefix = "LAMBDA " 917 arrow_sep = ":" 918 wrap = False 919 else: 920 prefix = "" 921 922 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 923 return f"{prefix}{lambda_sql}" 924 925 def show_sql(self, expression: exp.Show) -> str: 926 return f"SHOW {expression.name}" 927 928 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 929 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 930 931 def strtotime_sql(self, expression: exp.StrToTime) -> str: 932 if expression.args.get("safe"): 933 formatted_time = self.format_time(expression) 934 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 935 return str_to_time_sql(self, expression) 936 937 def strtodate_sql(self, expression: exp.StrToDate) -> str: 938 if expression.args.get("safe"): 939 formatted_time = self.format_time(expression) 940 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 941 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 942 943 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 944 arg = expression.this 945 if expression.args.get("safe"): 946 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 947 return self.func("JSON", arg) 948 949 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 950 nano = expression.args.get("nano") 951 if nano is not None: 952 expression.set( 953 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 954 ) 955 956 return rename_func("MAKE_TIME")(self, expression) 957 958 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 959 sec = expression.args["sec"] 960 961 milli = expression.args.get("milli") 962 if milli is not None: 963 sec += milli.pop() / exp.Literal.number(1000.0) 964 965 nano = expression.args.get("nano") 966 if nano is not None: 967 sec += nano.pop() / exp.Literal.number(1000000000.0) 968 969 if milli or nano: 970 expression.set("sec", sec) 971 972 return rename_func("MAKE_TIMESTAMP")(self, expression) 973 974 def tablesample_sql( 975 self, 976 expression: exp.TableSample, 977 tablesample_keyword: t.Optional[str] = None, 978 ) -> str: 979 if not isinstance(expression.parent, exp.Select): 980 # This sample clause only applies to a single source, not the entire resulting relation 981 tablesample_keyword = "TABLESAMPLE" 982 983 if expression.args.get("size"): 984 method = expression.args.get("method") 985 if method and method.name.upper() != "RESERVOIR": 986 self.unsupported( 987 f"Sampling method {method} is not supported with a discrete sample count, " 988 "defaulting to reservoir sampling" 989 ) 990 expression.set("method", exp.var("RESERVOIR")) 991 992 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 993 994 def interval_sql(self, expression: exp.Interval) -> str: 995 multiplier: t.Optional[int] = None 996 unit = expression.text("unit").lower() 997 998 if unit.startswith("week"): 999 multiplier = 7 1000 if unit.startswith("quarter"): 1001 multiplier = 90 1002 1003 if multiplier: 1004 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 1005 1006 return super().interval_sql(expression) 1007 1008 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 1009 if isinstance(expression.parent, exp.UserDefinedFunction): 1010 return self.sql(expression, "this") 1011 return super().columndef_sql(expression, sep) 1012 1013 def join_sql(self, expression: exp.Join) -> str: 1014 if ( 1015 expression.side == "LEFT" 1016 and not expression.args.get("on") 1017 and isinstance(expression.this, exp.Unnest) 1018 ): 1019 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1020 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1021 return super().join_sql(expression.on(exp.true())) 1022 1023 return super().join_sql(expression) 1024 1025 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1026 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1027 if expression.args.get("is_end_exclusive"): 1028 return rename_func("RANGE")(self, expression) 1029 1030 return self.function_fallback_sql(expression) 1031 1032 def countif_sql(self, expression: exp.CountIf) -> str: 1033 if self.dialect.version >= Version("1.2"): 1034 return self.function_fallback_sql(expression) 1035 1036 # https://github.com/tobymao/sqlglot/pull/4749 1037 return count_if_to_sum(self, expression) 1038 1039 def bracket_sql(self, expression: exp.Bracket) -> str: 1040 if self.dialect.version >= Version("1.2"): 1041 return super().bracket_sql(expression) 1042 1043 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1044 this = expression.this 1045 if isinstance(this, exp.Array): 1046 this.replace(exp.paren(this)) 1047 1048 bracket = super().bracket_sql(expression) 1049 1050 if not expression.args.get("returns_list_for_maps"): 1051 if not this.type: 1052 from sqlglot.optimizer.annotate_types import annotate_types 1053 1054 this = annotate_types(this, dialect=self.dialect) 1055 1056 if this.is_type(exp.DataType.Type.MAP): 1057 bracket = f"({bracket})[1]" 1058 1059 return bracket 1060 1061 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1062 expression_sql = self.sql(expression, "expression") 1063 1064 func = expression.this 1065 if isinstance(func, exp.PERCENTILES): 1066 # Make the order key the first arg and slide the fraction to the right 1067 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1068 order_col = expression.find(exp.Ordered) 1069 if order_col: 1070 func.set("expression", func.this) 1071 func.set("this", order_col.this) 1072 1073 this = self.sql(expression, "this").rstrip(")") 1074 1075 return f"{this}{expression_sql})" 1076 1077 def length_sql(self, expression: exp.Length) -> str: 1078 arg = expression.this 1079 1080 # Dialects like BQ and Snowflake also accept binary values as args, so 1081 # DDB will attempt to infer the type or resort to case/when resolution 1082 if not expression.args.get("binary") or arg.is_string: 1083 return self.func("LENGTH", arg) 1084 1085 if not arg.type: 1086 from sqlglot.optimizer.annotate_types import annotate_types 1087 1088 arg = annotate_types(arg, dialect=self.dialect) 1089 1090 if arg.is_type(*exp.DataType.TEXT_TYPES): 1091 return self.func("LENGTH", arg) 1092 1093 # We need these casts to make duckdb's static type checker happy 1094 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1095 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1096 1097 case = ( 1098 exp.case(self.func("TYPEOF", arg)) 1099 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1100 .else_( 1101 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1102 ) # anonymous to break length_sql recursion 1103 ) 1104 1105 return self.sql(case) 1106 1107 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1108 this = expression.this 1109 key = expression.args.get("key") 1110 key_sql = key.name if isinstance(key, exp.Expression) else "" 1111 value_sql = self.sql(expression, "value") 1112 1113 kv_sql = f"{key_sql} := {value_sql}" 1114 1115 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1116 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1117 if isinstance(this, exp.Struct) and not this.expressions: 1118 return self.func("STRUCT_PACK", kv_sql) 1119 1120 return self.func("STRUCT_INSERT", this, kv_sql) 1121 1122 def unnest_sql(self, expression: exp.Unnest) -> str: 1123 explode_array = expression.args.get("explode_array") 1124 if explode_array: 1125 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1126 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1127 expression.expressions.append( 1128 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1129 ) 1130 1131 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1132 alias = expression.args.get("alias") 1133 if alias: 1134 expression.set("alias", None) 1135 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1136 1137 unnest_sql = super().unnest_sql(expression) 1138 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1139 return self.sql(select) 1140 1141 return super().unnest_sql(expression) 1142 1143 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1144 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1145 # DuckDB should render IGNORE NULLS only for the general-purpose 1146 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1147 return super().ignorenulls_sql(expression) 1148 1149 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1150 return self.sql(expression, "this") 1151 1152 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1153 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1154 # DuckDB should render RESPECT NULLS only for the general-purpose 1155 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1156 return super().respectnulls_sql(expression) 1157 1158 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1159 return self.sql(expression, "this") 1160 1161 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1162 this = self.sql(expression, "this") 1163 null_text = self.sql(expression, "null") 1164 1165 if null_text: 1166 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1167 1168 return self.func("ARRAY_TO_STRING", this, expression.expression) 1169 1170 @unsupported_args("position", "occurrence") 1171 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1172 group = expression.args.get("group") 1173 params = expression.args.get("parameters") 1174 1175 # Do not render group if there is no following argument, 1176 # and it's the default value for this dialect 1177 if ( 1178 not params 1179 and group 1180 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1181 ): 1182 group = None 1183 return self.func( 1184 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1185 ) 1186 1187 @unsupported_args("culture") 1188 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1189 fmt = expression.args.get("format") 1190 if fmt and fmt.is_int: 1191 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1192 1193 self.unsupported("Only integer formats are supported by NumberToStr") 1194 return self.function_fallback_sql(expression) 1195 1196 def autoincrementcolumnconstraint_sql(self, _) -> str: 1197 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1198 return "" 1199 1200 def aliases_sql(self, expression: exp.Aliases) -> str: 1201 this = expression.this 1202 if isinstance(this, exp.Posexplode): 1203 return self.posexplode_sql(this) 1204 1205 return super().aliases_sql(expression) 1206 1207 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1208 this = expression.this 1209 parent = expression.parent 1210 1211 # The default Spark aliases are "pos" and "col", unless specified otherwise 1212 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1213 1214 if isinstance(parent, exp.Aliases): 1215 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1216 pos, col = parent.expressions 1217 elif isinstance(parent, exp.Table): 1218 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1219 alias = parent.args.get("alias") 1220 if alias: 1221 pos, col = alias.columns or [pos, col] 1222 alias.pop() 1223 1224 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1225 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1226 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1227 gen_subscripts = self.sql( 1228 exp.Alias( 1229 this=exp.Anonymous( 1230 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1231 ) 1232 - exp.Literal.number(1), 1233 alias=pos, 1234 ) 1235 ) 1236 1237 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1238 1239 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1240 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1241 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1242 1243 return posexplode_sql
280class DuckDB(Dialect): 281 NULL_ORDERING = "nulls_are_last" 282 SUPPORTS_USER_DEFINED_TYPES = True 283 SAFE_DIVISION = True 284 INDEX_OFFSET = 1 285 CONCAT_COALESCE = True 286 SUPPORTS_ORDER_BY_ALL = True 287 SUPPORTS_FIXED_SIZE_ARRAYS = True 288 STRICT_JSON_PATH_SYNTAX = False 289 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 290 291 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 292 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 293 294 DATE_PART_MAPPING = { 295 **Dialect.DATE_PART_MAPPING, 296 "DAYOFWEEKISO": "ISODOW", 297 } 298 DATE_PART_MAPPING.pop("WEEKDAY") 299 300 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 301 if isinstance(path, exp.Literal): 302 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 303 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 304 # This check ensures we'll avoid trying to parse these as JSON paths, which can 305 # either result in a noisy warning or in an invalid representation of the path. 306 path_text = path.name 307 if path_text.startswith("/") or "[#" in path_text: 308 return path 309 310 return super().to_json_path(path) 311 312 class Tokenizer(tokens.Tokenizer): 313 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 314 HEREDOC_STRINGS = ["$"] 315 316 HEREDOC_TAG_IS_IDENTIFIER = True 317 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 318 319 KEYWORDS = { 320 **tokens.Tokenizer.KEYWORDS, 321 "//": TokenType.DIV, 322 "**": TokenType.DSTAR, 323 "^@": TokenType.CARET_AT, 324 "@>": TokenType.AT_GT, 325 "<@": TokenType.LT_AT, 326 "ATTACH": TokenType.ATTACH, 327 "BINARY": TokenType.VARBINARY, 328 "BITSTRING": TokenType.BIT, 329 "BPCHAR": TokenType.TEXT, 330 "CHAR": TokenType.TEXT, 331 "DATETIME": TokenType.TIMESTAMPNTZ, 332 "DETACH": TokenType.DETACH, 333 "EXCLUDE": TokenType.EXCEPT, 334 "LOGICAL": TokenType.BOOLEAN, 335 "ONLY": TokenType.ONLY, 336 "PIVOT_WIDER": TokenType.PIVOT, 337 "POSITIONAL": TokenType.POSITIONAL, 338 "SIGNED": TokenType.INT, 339 "STRING": TokenType.TEXT, 340 "SUMMARIZE": TokenType.SUMMARIZE, 341 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 342 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 343 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 344 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 345 "TIMESTAMP_US": TokenType.TIMESTAMP, 346 "UBIGINT": TokenType.UBIGINT, 347 "UINTEGER": TokenType.UINT, 348 "USMALLINT": TokenType.USMALLINT, 349 "UTINYINT": TokenType.UTINYINT, 350 "VARCHAR": TokenType.TEXT, 351 } 352 KEYWORDS.pop("/*+") 353 354 SINGLE_TOKENS = { 355 **tokens.Tokenizer.SINGLE_TOKENS, 356 "$": TokenType.PARAMETER, 357 } 358 359 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 360 361 class Parser(parser.Parser): 362 BITWISE = { 363 **parser.Parser.BITWISE, 364 TokenType.TILDA: exp.RegexpLike, 365 } 366 BITWISE.pop(TokenType.CARET) 367 368 RANGE_PARSERS = { 369 **parser.Parser.RANGE_PARSERS, 370 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 371 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 372 } 373 374 EXPONENT = { 375 **parser.Parser.EXPONENT, 376 TokenType.CARET: exp.Pow, 377 TokenType.DSTAR: exp.Pow, 378 } 379 380 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 381 382 SHOW_PARSERS = { 383 "TABLES": _show_parser("TABLES"), 384 "ALL TABLES": _show_parser("ALL TABLES"), 385 } 386 387 FUNCTIONS = { 388 **parser.Parser.FUNCTIONS, 389 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 390 "ARRAY_SORT": exp.SortArray.from_arg_list, 391 "DATEDIFF": _build_date_diff, 392 "DATE_DIFF": _build_date_diff, 393 "DATE_TRUNC": date_trunc_to_time, 394 "DATETRUNC": date_trunc_to_time, 395 "DECODE": lambda args: exp.Decode( 396 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 397 ), 398 "EDITDIST3": exp.Levenshtein.from_arg_list, 399 "ENCODE": lambda args: exp.Encode( 400 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 401 ), 402 "EPOCH": exp.TimeToUnix.from_arg_list, 403 "EPOCH_MS": lambda args: exp.UnixToTime( 404 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 405 ), 406 "GENERATE_SERIES": _build_generate_series(), 407 "JSON": exp.ParseJSON.from_arg_list, 408 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 409 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 410 "LIST_HAS": exp.ArrayContains.from_arg_list, 411 "LIST_REVERSE_SORT": _build_sort_array_desc, 412 "LIST_SORT": exp.SortArray.from_arg_list, 413 "LIST_VALUE": lambda args: exp.Array(expressions=args), 414 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 415 "MAKE_TIMESTAMP": _build_make_timestamp, 416 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 417 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 418 "RANGE": _build_generate_series(end_exclusive=True), 419 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 420 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 421 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 422 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 423 this=seq_get(args, 0), 424 expression=seq_get(args, 1), 425 replacement=seq_get(args, 2), 426 modifiers=seq_get(args, 3), 427 ), 428 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 429 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 430 "STRING_SPLIT": exp.Split.from_arg_list, 431 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 432 "STRING_TO_ARRAY": exp.Split.from_arg_list, 433 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 434 "STRUCT_PACK": exp.Struct.from_arg_list, 435 "STR_SPLIT": exp.Split.from_arg_list, 436 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 437 "TIME_BUCKET": exp.DateBin.from_arg_list, 438 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 439 "UNNEST": exp.Explode.from_arg_list, 440 "XOR": binary_from_function(exp.BitwiseXor), 441 } 442 443 FUNCTIONS.pop("DATE_SUB") 444 FUNCTIONS.pop("GLOB") 445 446 FUNCTION_PARSERS = { 447 **parser.Parser.FUNCTION_PARSERS, 448 **dict.fromkeys( 449 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 450 ), 451 } 452 FUNCTION_PARSERS.pop("DECODE") 453 454 NO_PAREN_FUNCTION_PARSERS = { 455 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 456 "MAP": lambda self: self._parse_map(), 457 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 458 } 459 460 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 461 TokenType.SEMI, 462 TokenType.ANTI, 463 } 464 465 PLACEHOLDER_PARSERS = { 466 **parser.Parser.PLACEHOLDER_PARSERS, 467 TokenType.PARAMETER: lambda self: ( 468 self.expression(exp.Placeholder, this=self._prev.text) 469 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 470 else None 471 ), 472 } 473 474 TYPE_CONVERTERS = { 475 # https://duckdb.org/docs/sql/data_types/numeric 476 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 477 # https://duckdb.org/docs/sql/data_types/text 478 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 479 } 480 481 STATEMENT_PARSERS = { 482 **parser.Parser.STATEMENT_PARSERS, 483 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 484 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 485 TokenType.SHOW: lambda self: self._parse_show(), 486 } 487 488 SET_PARSERS = { 489 **parser.Parser.SET_PARSERS, 490 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 491 } 492 493 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 494 index = self._index 495 if not self._match_text_seq("LAMBDA"): 496 return super()._parse_lambda(alias=alias) 497 498 expressions = self._parse_csv(self._parse_lambda_arg) 499 if not self._match(TokenType.COLON): 500 self._retreat(index) 501 return None 502 503 this = self._replace_lambda(self._parse_assignment(), expressions) 504 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 505 506 def _parse_expression(self) -> t.Optional[exp.Expression]: 507 # DuckDB supports prefix aliases, e.g. foo: 1 508 if self._next and self._next.token_type == TokenType.COLON: 509 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 510 self._match(TokenType.COLON) 511 comments = self._prev_comments or [] 512 513 this = self._parse_assignment() 514 if isinstance(this, exp.Expression): 515 # Moves the comment next to the alias in `alias: expr /* comment */` 516 comments += this.pop_comments() or [] 517 518 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 519 520 return super()._parse_expression() 521 522 def _parse_table( 523 self, 524 schema: bool = False, 525 joins: bool = False, 526 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 527 parse_bracket: bool = False, 528 is_db_reference: bool = False, 529 parse_partition: bool = False, 530 consume_pipe: bool = False, 531 ) -> t.Optional[exp.Expression]: 532 # DuckDB supports prefix aliases, e.g. FROM foo: bar 533 if self._next and self._next.token_type == TokenType.COLON: 534 alias = self._parse_table_alias( 535 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 536 ) 537 self._match(TokenType.COLON) 538 comments = self._prev_comments or [] 539 else: 540 alias = None 541 comments = [] 542 543 table = super()._parse_table( 544 schema=schema, 545 joins=joins, 546 alias_tokens=alias_tokens, 547 parse_bracket=parse_bracket, 548 is_db_reference=is_db_reference, 549 parse_partition=parse_partition, 550 ) 551 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 552 # Moves the comment next to the alias in `alias: table /* comment */` 553 comments += table.pop_comments() or [] 554 alias.comments = alias.pop_comments() + comments 555 table.set("alias", alias) 556 557 return table 558 559 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 560 # https://duckdb.org/docs/sql/samples.html 561 sample = super()._parse_table_sample(as_modifier=as_modifier) 562 if sample and not sample.args.get("method"): 563 if sample.args.get("size"): 564 sample.set("method", exp.var("RESERVOIR")) 565 else: 566 sample.set("method", exp.var("SYSTEM")) 567 568 return sample 569 570 def _parse_bracket( 571 self, this: t.Optional[exp.Expression] = None 572 ) -> t.Optional[exp.Expression]: 573 bracket = super()._parse_bracket(this) 574 575 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 576 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 577 bracket.set("returns_list_for_maps", True) 578 579 return bracket 580 581 def _parse_map(self) -> exp.ToMap | exp.Map: 582 if self._match(TokenType.L_BRACE, advance=False): 583 return self.expression(exp.ToMap, this=self._parse_bracket()) 584 585 args = self._parse_wrapped_csv(self._parse_assignment) 586 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 587 588 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 589 return self._parse_field_def() 590 591 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 592 if len(aggregations) == 1: 593 return super()._pivot_column_names(aggregations) 594 return pivot_column_names(aggregations, dialect="duckdb") 595 596 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 597 def _parse_attach_option() -> exp.AttachOption: 598 return self.expression( 599 exp.AttachOption, 600 this=self._parse_var(any_token=True), 601 expression=self._parse_field(any_token=True), 602 ) 603 604 self._match(TokenType.DATABASE) 605 exists = self._parse_exists(not_=is_attach) 606 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 607 608 if self._match(TokenType.L_PAREN, advance=False): 609 expressions = self._parse_wrapped_csv(_parse_attach_option) 610 else: 611 expressions = None 612 613 return ( 614 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 615 if is_attach 616 else self.expression(exp.Detach, this=this, exists=exists) 617 ) 618 619 def _parse_show_duckdb(self, this: str) -> exp.Show: 620 return self.expression(exp.Show, this=this) 621 622 class Generator(generator.Generator): 623 PARAMETER_TOKEN = "$" 624 NAMED_PLACEHOLDER_TOKEN = "$" 625 JOIN_HINTS = False 626 TABLE_HINTS = False 627 QUERY_HINTS = False 628 LIMIT_FETCH = "LIMIT" 629 STRUCT_DELIMITER = ("(", ")") 630 RENAME_TABLE_WITH_DB = False 631 NVL2_SUPPORTED = False 632 SEMI_ANTI_JOIN_WITH_SIDE = False 633 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 634 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 635 LAST_DAY_SUPPORTS_DATE_PART = False 636 JSON_KEY_VALUE_PAIR_SEP = "," 637 IGNORE_NULLS_IN_FUNC = True 638 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 639 SUPPORTS_CREATE_TABLE_LIKE = False 640 MULTI_ARG_DISTINCT = False 641 CAN_IMPLEMENT_ARRAY_ANY = True 642 SUPPORTS_TO_NUMBER = False 643 SUPPORTS_WINDOW_EXCLUDE = True 644 COPY_HAS_INTO_KEYWORD = False 645 STAR_EXCEPT = "EXCLUDE" 646 PAD_FILL_PATTERN_IS_REQUIRED = True 647 ARRAY_CONCAT_IS_VAR_LEN = False 648 ARRAY_SIZE_DIM_REQUIRED = False 649 NORMALIZE_EXTRACT_DATE_PARTS = True 650 651 TRANSFORMS = { 652 **generator.Generator.TRANSFORMS, 653 exp.ApproxDistinct: approx_count_distinct_sql, 654 exp.Array: inline_array_unless_query, 655 exp.ArrayFilter: rename_func("LIST_FILTER"), 656 exp.ArrayRemove: remove_from_array_using_filter, 657 exp.ArraySort: _array_sort_sql, 658 exp.ArraySum: rename_func("LIST_SUM"), 659 exp.BitwiseXor: rename_func("XOR"), 660 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 661 exp.CurrentDate: lambda *_: "CURRENT_DATE", 662 exp.CurrentTime: lambda *_: "CURRENT_TIME", 663 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 664 exp.DayOfMonth: rename_func("DAYOFMONTH"), 665 exp.DayOfWeek: rename_func("DAYOFWEEK"), 666 exp.DayOfWeekIso: rename_func("ISODOW"), 667 exp.DayOfYear: rename_func("DAYOFYEAR"), 668 exp.DataType: _datatype_sql, 669 exp.Date: _date_sql, 670 exp.DateAdd: _date_delta_sql, 671 exp.DateFromParts: rename_func("MAKE_DATE"), 672 exp.DateSub: _date_delta_sql, 673 exp.DateDiff: _date_diff_sql, 674 exp.DateStrToDate: datestrtodate_sql, 675 exp.Datetime: no_datetime_sql, 676 exp.DatetimeSub: _date_delta_sql, 677 exp.DatetimeAdd: _date_delta_sql, 678 exp.DateToDi: lambda self, 679 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 680 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 681 exp.DiToDate: lambda self, 682 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 683 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 684 exp.GenerateDateArray: _generate_datetime_array_sql, 685 exp.GenerateTimestampArray: _generate_datetime_array_sql, 686 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 687 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 688 exp.Explode: rename_func("UNNEST"), 689 exp.IntDiv: lambda self, e: self.binary(e, "//"), 690 exp.IsInf: rename_func("ISINF"), 691 exp.IsNan: rename_func("ISNAN"), 692 exp.JSONBExists: rename_func("JSON_EXISTS"), 693 exp.JSONExtract: _arrow_json_extract_sql, 694 exp.JSONExtractArray: _json_extract_value_array_sql, 695 exp.JSONExtractScalar: _arrow_json_extract_sql, 696 exp.JSONFormat: _json_format_sql, 697 exp.JSONValueArray: _json_extract_value_array_sql, 698 exp.Lateral: explode_to_unnest_sql, 699 exp.LogicalOr: rename_func("BOOL_OR"), 700 exp.LogicalAnd: rename_func("BOOL_AND"), 701 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 702 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 703 exp.MonthsBetween: lambda self, e: self.func( 704 "DATEDIFF", 705 "'month'", 706 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 707 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 708 ), 709 exp.PercentileCont: rename_func("QUANTILE_CONT"), 710 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 711 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 712 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 713 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 714 exp.RegexpReplace: lambda self, e: self.func( 715 "REGEXP_REPLACE", 716 e.this, 717 e.expression, 718 e.args.get("replacement"), 719 e.args.get("modifiers"), 720 ), 721 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 722 exp.RegexpILike: lambda self, e: self.func( 723 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 724 ), 725 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 726 exp.Return: lambda self, e: self.sql(e, "this"), 727 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 728 exp.Rand: rename_func("RANDOM"), 729 exp.SHA: rename_func("SHA1"), 730 exp.SHA2: sha256_sql, 731 exp.Split: rename_func("STR_SPLIT"), 732 exp.SortArray: _sort_array_sql, 733 exp.StrPosition: strposition_sql, 734 exp.StrToUnix: lambda self, e: self.func( 735 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 736 ), 737 exp.Struct: _struct_sql, 738 exp.Transform: rename_func("LIST_TRANSFORM"), 739 exp.TimeAdd: _date_delta_sql, 740 exp.Time: no_time_sql, 741 exp.TimeDiff: _timediff_sql, 742 exp.Timestamp: no_timestamp_sql, 743 exp.TimestampDiff: lambda self, e: self.func( 744 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 745 ), 746 exp.TimestampTrunc: timestamptrunc_sql(), 747 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 748 exp.TimeStrToTime: timestrtotime_sql, 749 exp.TimeStrToUnix: lambda self, e: self.func( 750 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 751 ), 752 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 753 exp.TimeToUnix: rename_func("EPOCH"), 754 exp.TsOrDiToDi: lambda self, 755 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 756 exp.TsOrDsAdd: _date_delta_sql, 757 exp.TsOrDsDiff: lambda self, e: self.func( 758 "DATE_DIFF", 759 f"'{e.args.get('unit') or 'DAY'}'", 760 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 761 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 762 ), 763 exp.UnixToStr: lambda self, e: self.func( 764 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 765 ), 766 exp.DatetimeTrunc: lambda self, e: self.func( 767 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 768 ), 769 exp.UnixToTime: _unix_to_time_sql, 770 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 771 exp.VariancePop: rename_func("VAR_POP"), 772 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 773 exp.Xor: bool_xor_sql, 774 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 775 rename_func("LEVENSHTEIN") 776 ), 777 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 778 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 779 exp.DateBin: rename_func("TIME_BUCKET"), 780 } 781 782 SUPPORTED_JSON_PATH_PARTS = { 783 exp.JSONPathKey, 784 exp.JSONPathRoot, 785 exp.JSONPathSubscript, 786 exp.JSONPathWildcard, 787 } 788 789 TYPE_MAPPING = { 790 **generator.Generator.TYPE_MAPPING, 791 exp.DataType.Type.BINARY: "BLOB", 792 exp.DataType.Type.BPCHAR: "TEXT", 793 exp.DataType.Type.CHAR: "TEXT", 794 exp.DataType.Type.DATETIME: "TIMESTAMP", 795 exp.DataType.Type.FLOAT: "REAL", 796 exp.DataType.Type.JSONB: "JSON", 797 exp.DataType.Type.NCHAR: "TEXT", 798 exp.DataType.Type.NVARCHAR: "TEXT", 799 exp.DataType.Type.UINT: "UINTEGER", 800 exp.DataType.Type.VARBINARY: "BLOB", 801 exp.DataType.Type.ROWVERSION: "BLOB", 802 exp.DataType.Type.VARCHAR: "TEXT", 803 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 804 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 805 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 806 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 807 } 808 809 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 810 RESERVED_KEYWORDS = { 811 "array", 812 "analyse", 813 "union", 814 "all", 815 "when", 816 "in_p", 817 "default", 818 "create_p", 819 "window", 820 "asymmetric", 821 "to", 822 "else", 823 "localtime", 824 "from", 825 "end_p", 826 "select", 827 "current_date", 828 "foreign", 829 "with", 830 "grant", 831 "session_user", 832 "or", 833 "except", 834 "references", 835 "fetch", 836 "limit", 837 "group_p", 838 "leading", 839 "into", 840 "collate", 841 "offset", 842 "do", 843 "then", 844 "localtimestamp", 845 "check_p", 846 "lateral_p", 847 "current_role", 848 "where", 849 "asc_p", 850 "placing", 851 "desc_p", 852 "user", 853 "unique", 854 "initially", 855 "column", 856 "both", 857 "some", 858 "as", 859 "any", 860 "only", 861 "deferrable", 862 "null_p", 863 "current_time", 864 "true_p", 865 "table", 866 "case", 867 "trailing", 868 "variadic", 869 "for", 870 "on", 871 "distinct", 872 "false_p", 873 "not", 874 "constraint", 875 "current_timestamp", 876 "returning", 877 "primary", 878 "intersect", 879 "having", 880 "analyze", 881 "current_user", 882 "and", 883 "cast", 884 "symmetric", 885 "using", 886 "order", 887 "current_catalog", 888 } 889 890 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 891 892 # DuckDB doesn't generally support CREATE TABLE .. properties 893 # https://duckdb.org/docs/sql/statements/create_table.html 894 PROPERTIES_LOCATION = { 895 prop: exp.Properties.Location.UNSUPPORTED 896 for prop in generator.Generator.PROPERTIES_LOCATION 897 } 898 899 # There are a few exceptions (e.g. temporary tables) which are supported or 900 # can be transpiled to DuckDB, so we explicitly override them accordingly 901 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 902 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 903 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 904 905 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 906 exp.FirstValue, 907 exp.Lag, 908 exp.LastValue, 909 exp.Lead, 910 exp.NthValue, 911 ) 912 913 def lambda_sql( 914 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 915 ) -> str: 916 if expression.args.get("colon"): 917 prefix = "LAMBDA " 918 arrow_sep = ":" 919 wrap = False 920 else: 921 prefix = "" 922 923 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 924 return f"{prefix}{lambda_sql}" 925 926 def show_sql(self, expression: exp.Show) -> str: 927 return f"SHOW {expression.name}" 928 929 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 930 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 931 932 def strtotime_sql(self, expression: exp.StrToTime) -> str: 933 if expression.args.get("safe"): 934 formatted_time = self.format_time(expression) 935 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 936 return str_to_time_sql(self, expression) 937 938 def strtodate_sql(self, expression: exp.StrToDate) -> str: 939 if expression.args.get("safe"): 940 formatted_time = self.format_time(expression) 941 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 942 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 943 944 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 945 arg = expression.this 946 if expression.args.get("safe"): 947 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 948 return self.func("JSON", arg) 949 950 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 951 nano = expression.args.get("nano") 952 if nano is not None: 953 expression.set( 954 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 955 ) 956 957 return rename_func("MAKE_TIME")(self, expression) 958 959 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 960 sec = expression.args["sec"] 961 962 milli = expression.args.get("milli") 963 if milli is not None: 964 sec += milli.pop() / exp.Literal.number(1000.0) 965 966 nano = expression.args.get("nano") 967 if nano is not None: 968 sec += nano.pop() / exp.Literal.number(1000000000.0) 969 970 if milli or nano: 971 expression.set("sec", sec) 972 973 return rename_func("MAKE_TIMESTAMP")(self, expression) 974 975 def tablesample_sql( 976 self, 977 expression: exp.TableSample, 978 tablesample_keyword: t.Optional[str] = None, 979 ) -> str: 980 if not isinstance(expression.parent, exp.Select): 981 # This sample clause only applies to a single source, not the entire resulting relation 982 tablesample_keyword = "TABLESAMPLE" 983 984 if expression.args.get("size"): 985 method = expression.args.get("method") 986 if method and method.name.upper() != "RESERVOIR": 987 self.unsupported( 988 f"Sampling method {method} is not supported with a discrete sample count, " 989 "defaulting to reservoir sampling" 990 ) 991 expression.set("method", exp.var("RESERVOIR")) 992 993 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 994 995 def interval_sql(self, expression: exp.Interval) -> str: 996 multiplier: t.Optional[int] = None 997 unit = expression.text("unit").lower() 998 999 if unit.startswith("week"): 1000 multiplier = 7 1001 if unit.startswith("quarter"): 1002 multiplier = 90 1003 1004 if multiplier: 1005 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 1006 1007 return super().interval_sql(expression) 1008 1009 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 1010 if isinstance(expression.parent, exp.UserDefinedFunction): 1011 return self.sql(expression, "this") 1012 return super().columndef_sql(expression, sep) 1013 1014 def join_sql(self, expression: exp.Join) -> str: 1015 if ( 1016 expression.side == "LEFT" 1017 and not expression.args.get("on") 1018 and isinstance(expression.this, exp.Unnest) 1019 ): 1020 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1021 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1022 return super().join_sql(expression.on(exp.true())) 1023 1024 return super().join_sql(expression) 1025 1026 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1027 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1028 if expression.args.get("is_end_exclusive"): 1029 return rename_func("RANGE")(self, expression) 1030 1031 return self.function_fallback_sql(expression) 1032 1033 def countif_sql(self, expression: exp.CountIf) -> str: 1034 if self.dialect.version >= Version("1.2"): 1035 return self.function_fallback_sql(expression) 1036 1037 # https://github.com/tobymao/sqlglot/pull/4749 1038 return count_if_to_sum(self, expression) 1039 1040 def bracket_sql(self, expression: exp.Bracket) -> str: 1041 if self.dialect.version >= Version("1.2"): 1042 return super().bracket_sql(expression) 1043 1044 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1045 this = expression.this 1046 if isinstance(this, exp.Array): 1047 this.replace(exp.paren(this)) 1048 1049 bracket = super().bracket_sql(expression) 1050 1051 if not expression.args.get("returns_list_for_maps"): 1052 if not this.type: 1053 from sqlglot.optimizer.annotate_types import annotate_types 1054 1055 this = annotate_types(this, dialect=self.dialect) 1056 1057 if this.is_type(exp.DataType.Type.MAP): 1058 bracket = f"({bracket})[1]" 1059 1060 return bracket 1061 1062 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1063 expression_sql = self.sql(expression, "expression") 1064 1065 func = expression.this 1066 if isinstance(func, exp.PERCENTILES): 1067 # Make the order key the first arg and slide the fraction to the right 1068 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1069 order_col = expression.find(exp.Ordered) 1070 if order_col: 1071 func.set("expression", func.this) 1072 func.set("this", order_col.this) 1073 1074 this = self.sql(expression, "this").rstrip(")") 1075 1076 return f"{this}{expression_sql})" 1077 1078 def length_sql(self, expression: exp.Length) -> str: 1079 arg = expression.this 1080 1081 # Dialects like BQ and Snowflake also accept binary values as args, so 1082 # DDB will attempt to infer the type or resort to case/when resolution 1083 if not expression.args.get("binary") or arg.is_string: 1084 return self.func("LENGTH", arg) 1085 1086 if not arg.type: 1087 from sqlglot.optimizer.annotate_types import annotate_types 1088 1089 arg = annotate_types(arg, dialect=self.dialect) 1090 1091 if arg.is_type(*exp.DataType.TEXT_TYPES): 1092 return self.func("LENGTH", arg) 1093 1094 # We need these casts to make duckdb's static type checker happy 1095 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1096 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1097 1098 case = ( 1099 exp.case(self.func("TYPEOF", arg)) 1100 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1101 .else_( 1102 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1103 ) # anonymous to break length_sql recursion 1104 ) 1105 1106 return self.sql(case) 1107 1108 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1109 this = expression.this 1110 key = expression.args.get("key") 1111 key_sql = key.name if isinstance(key, exp.Expression) else "" 1112 value_sql = self.sql(expression, "value") 1113 1114 kv_sql = f"{key_sql} := {value_sql}" 1115 1116 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1117 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1118 if isinstance(this, exp.Struct) and not this.expressions: 1119 return self.func("STRUCT_PACK", kv_sql) 1120 1121 return self.func("STRUCT_INSERT", this, kv_sql) 1122 1123 def unnest_sql(self, expression: exp.Unnest) -> str: 1124 explode_array = expression.args.get("explode_array") 1125 if explode_array: 1126 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1127 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1128 expression.expressions.append( 1129 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1130 ) 1131 1132 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1133 alias = expression.args.get("alias") 1134 if alias: 1135 expression.set("alias", None) 1136 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1137 1138 unnest_sql = super().unnest_sql(expression) 1139 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1140 return self.sql(select) 1141 1142 return super().unnest_sql(expression) 1143 1144 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1145 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1146 # DuckDB should render IGNORE NULLS only for the general-purpose 1147 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1148 return super().ignorenulls_sql(expression) 1149 1150 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1151 return self.sql(expression, "this") 1152 1153 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1154 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1155 # DuckDB should render RESPECT NULLS only for the general-purpose 1156 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1157 return super().respectnulls_sql(expression) 1158 1159 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1160 return self.sql(expression, "this") 1161 1162 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1163 this = self.sql(expression, "this") 1164 null_text = self.sql(expression, "null") 1165 1166 if null_text: 1167 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1168 1169 return self.func("ARRAY_TO_STRING", this, expression.expression) 1170 1171 @unsupported_args("position", "occurrence") 1172 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1173 group = expression.args.get("group") 1174 params = expression.args.get("parameters") 1175 1176 # Do not render group if there is no following argument, 1177 # and it's the default value for this dialect 1178 if ( 1179 not params 1180 and group 1181 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1182 ): 1183 group = None 1184 return self.func( 1185 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1186 ) 1187 1188 @unsupported_args("culture") 1189 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1190 fmt = expression.args.get("format") 1191 if fmt and fmt.is_int: 1192 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1193 1194 self.unsupported("Only integer formats are supported by NumberToStr") 1195 return self.function_fallback_sql(expression) 1196 1197 def autoincrementcolumnconstraint_sql(self, _) -> str: 1198 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1199 return "" 1200 1201 def aliases_sql(self, expression: exp.Aliases) -> str: 1202 this = expression.this 1203 if isinstance(this, exp.Posexplode): 1204 return self.posexplode_sql(this) 1205 1206 return super().aliases_sql(expression) 1207 1208 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1209 this = expression.this 1210 parent = expression.parent 1211 1212 # The default Spark aliases are "pos" and "col", unless specified otherwise 1213 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1214 1215 if isinstance(parent, exp.Aliases): 1216 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1217 pos, col = parent.expressions 1218 elif isinstance(parent, exp.Table): 1219 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1220 alias = parent.args.get("alias") 1221 if alias: 1222 pos, col = alias.columns or [pos, col] 1223 alias.pop() 1224 1225 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1226 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1227 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1228 gen_subscripts = self.sql( 1229 exp.Alias( 1230 this=exp.Anonymous( 1231 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1232 ) 1233 - exp.Literal.number(1), 1234 alias=pos, 1235 ) 1236 ) 1237 1238 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1239 1240 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1241 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1242 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1243 1244 return posexplode_sql
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
300 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 301 if isinstance(path, exp.Literal): 302 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 303 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 304 # This check ensures we'll avoid trying to parse these as JSON paths, which can 305 # either result in a noisy warning or in an invalid representation of the path. 306 path_text = path.name 307 if path_text.startswith("/") or "[#" in path_text: 308 return path 309 310 return super().to_json_path(path)
312 class Tokenizer(tokens.Tokenizer): 313 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 314 HEREDOC_STRINGS = ["$"] 315 316 HEREDOC_TAG_IS_IDENTIFIER = True 317 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 318 319 KEYWORDS = { 320 **tokens.Tokenizer.KEYWORDS, 321 "//": TokenType.DIV, 322 "**": TokenType.DSTAR, 323 "^@": TokenType.CARET_AT, 324 "@>": TokenType.AT_GT, 325 "<@": TokenType.LT_AT, 326 "ATTACH": TokenType.ATTACH, 327 "BINARY": TokenType.VARBINARY, 328 "BITSTRING": TokenType.BIT, 329 "BPCHAR": TokenType.TEXT, 330 "CHAR": TokenType.TEXT, 331 "DATETIME": TokenType.TIMESTAMPNTZ, 332 "DETACH": TokenType.DETACH, 333 "EXCLUDE": TokenType.EXCEPT, 334 "LOGICAL": TokenType.BOOLEAN, 335 "ONLY": TokenType.ONLY, 336 "PIVOT_WIDER": TokenType.PIVOT, 337 "POSITIONAL": TokenType.POSITIONAL, 338 "SIGNED": TokenType.INT, 339 "STRING": TokenType.TEXT, 340 "SUMMARIZE": TokenType.SUMMARIZE, 341 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 342 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 343 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 344 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 345 "TIMESTAMP_US": TokenType.TIMESTAMP, 346 "UBIGINT": TokenType.UBIGINT, 347 "UINTEGER": TokenType.UINT, 348 "USMALLINT": TokenType.USMALLINT, 349 "UTINYINT": TokenType.UTINYINT, 350 "VARCHAR": TokenType.TEXT, 351 } 352 KEYWORDS.pop("/*+") 353 354 SINGLE_TOKENS = { 355 **tokens.Tokenizer.SINGLE_TOKENS, 356 "$": TokenType.PARAMETER, 357 } 358 359 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
361 class Parser(parser.Parser): 362 BITWISE = { 363 **parser.Parser.BITWISE, 364 TokenType.TILDA: exp.RegexpLike, 365 } 366 BITWISE.pop(TokenType.CARET) 367 368 RANGE_PARSERS = { 369 **parser.Parser.RANGE_PARSERS, 370 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 371 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 372 } 373 374 EXPONENT = { 375 **parser.Parser.EXPONENT, 376 TokenType.CARET: exp.Pow, 377 TokenType.DSTAR: exp.Pow, 378 } 379 380 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 381 382 SHOW_PARSERS = { 383 "TABLES": _show_parser("TABLES"), 384 "ALL TABLES": _show_parser("ALL TABLES"), 385 } 386 387 FUNCTIONS = { 388 **parser.Parser.FUNCTIONS, 389 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 390 "ARRAY_SORT": exp.SortArray.from_arg_list, 391 "DATEDIFF": _build_date_diff, 392 "DATE_DIFF": _build_date_diff, 393 "DATE_TRUNC": date_trunc_to_time, 394 "DATETRUNC": date_trunc_to_time, 395 "DECODE": lambda args: exp.Decode( 396 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 397 ), 398 "EDITDIST3": exp.Levenshtein.from_arg_list, 399 "ENCODE": lambda args: exp.Encode( 400 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 401 ), 402 "EPOCH": exp.TimeToUnix.from_arg_list, 403 "EPOCH_MS": lambda args: exp.UnixToTime( 404 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 405 ), 406 "GENERATE_SERIES": _build_generate_series(), 407 "JSON": exp.ParseJSON.from_arg_list, 408 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 409 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 410 "LIST_HAS": exp.ArrayContains.from_arg_list, 411 "LIST_REVERSE_SORT": _build_sort_array_desc, 412 "LIST_SORT": exp.SortArray.from_arg_list, 413 "LIST_VALUE": lambda args: exp.Array(expressions=args), 414 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 415 "MAKE_TIMESTAMP": _build_make_timestamp, 416 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 417 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 418 "RANGE": _build_generate_series(end_exclusive=True), 419 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 420 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 421 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 422 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 423 this=seq_get(args, 0), 424 expression=seq_get(args, 1), 425 replacement=seq_get(args, 2), 426 modifiers=seq_get(args, 3), 427 ), 428 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 429 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 430 "STRING_SPLIT": exp.Split.from_arg_list, 431 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 432 "STRING_TO_ARRAY": exp.Split.from_arg_list, 433 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 434 "STRUCT_PACK": exp.Struct.from_arg_list, 435 "STR_SPLIT": exp.Split.from_arg_list, 436 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 437 "TIME_BUCKET": exp.DateBin.from_arg_list, 438 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 439 "UNNEST": exp.Explode.from_arg_list, 440 "XOR": binary_from_function(exp.BitwiseXor), 441 } 442 443 FUNCTIONS.pop("DATE_SUB") 444 FUNCTIONS.pop("GLOB") 445 446 FUNCTION_PARSERS = { 447 **parser.Parser.FUNCTION_PARSERS, 448 **dict.fromkeys( 449 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 450 ), 451 } 452 FUNCTION_PARSERS.pop("DECODE") 453 454 NO_PAREN_FUNCTION_PARSERS = { 455 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 456 "MAP": lambda self: self._parse_map(), 457 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 458 } 459 460 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 461 TokenType.SEMI, 462 TokenType.ANTI, 463 } 464 465 PLACEHOLDER_PARSERS = { 466 **parser.Parser.PLACEHOLDER_PARSERS, 467 TokenType.PARAMETER: lambda self: ( 468 self.expression(exp.Placeholder, this=self._prev.text) 469 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 470 else None 471 ), 472 } 473 474 TYPE_CONVERTERS = { 475 # https://duckdb.org/docs/sql/data_types/numeric 476 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 477 # https://duckdb.org/docs/sql/data_types/text 478 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 479 } 480 481 STATEMENT_PARSERS = { 482 **parser.Parser.STATEMENT_PARSERS, 483 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 484 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 485 TokenType.SHOW: lambda self: self._parse_show(), 486 } 487 488 SET_PARSERS = { 489 **parser.Parser.SET_PARSERS, 490 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 491 } 492 493 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 494 index = self._index 495 if not self._match_text_seq("LAMBDA"): 496 return super()._parse_lambda(alias=alias) 497 498 expressions = self._parse_csv(self._parse_lambda_arg) 499 if not self._match(TokenType.COLON): 500 self._retreat(index) 501 return None 502 503 this = self._replace_lambda(self._parse_assignment(), expressions) 504 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 505 506 def _parse_expression(self) -> t.Optional[exp.Expression]: 507 # DuckDB supports prefix aliases, e.g. foo: 1 508 if self._next and self._next.token_type == TokenType.COLON: 509 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 510 self._match(TokenType.COLON) 511 comments = self._prev_comments or [] 512 513 this = self._parse_assignment() 514 if isinstance(this, exp.Expression): 515 # Moves the comment next to the alias in `alias: expr /* comment */` 516 comments += this.pop_comments() or [] 517 518 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 519 520 return super()._parse_expression() 521 522 def _parse_table( 523 self, 524 schema: bool = False, 525 joins: bool = False, 526 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 527 parse_bracket: bool = False, 528 is_db_reference: bool = False, 529 parse_partition: bool = False, 530 consume_pipe: bool = False, 531 ) -> t.Optional[exp.Expression]: 532 # DuckDB supports prefix aliases, e.g. FROM foo: bar 533 if self._next and self._next.token_type == TokenType.COLON: 534 alias = self._parse_table_alias( 535 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 536 ) 537 self._match(TokenType.COLON) 538 comments = self._prev_comments or [] 539 else: 540 alias = None 541 comments = [] 542 543 table = super()._parse_table( 544 schema=schema, 545 joins=joins, 546 alias_tokens=alias_tokens, 547 parse_bracket=parse_bracket, 548 is_db_reference=is_db_reference, 549 parse_partition=parse_partition, 550 ) 551 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 552 # Moves the comment next to the alias in `alias: table /* comment */` 553 comments += table.pop_comments() or [] 554 alias.comments = alias.pop_comments() + comments 555 table.set("alias", alias) 556 557 return table 558 559 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 560 # https://duckdb.org/docs/sql/samples.html 561 sample = super()._parse_table_sample(as_modifier=as_modifier) 562 if sample and not sample.args.get("method"): 563 if sample.args.get("size"): 564 sample.set("method", exp.var("RESERVOIR")) 565 else: 566 sample.set("method", exp.var("SYSTEM")) 567 568 return sample 569 570 def _parse_bracket( 571 self, this: t.Optional[exp.Expression] = None 572 ) -> t.Optional[exp.Expression]: 573 bracket = super()._parse_bracket(this) 574 575 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 576 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 577 bracket.set("returns_list_for_maps", True) 578 579 return bracket 580 581 def _parse_map(self) -> exp.ToMap | exp.Map: 582 if self._match(TokenType.L_BRACE, advance=False): 583 return self.expression(exp.ToMap, this=self._parse_bracket()) 584 585 args = self._parse_wrapped_csv(self._parse_assignment) 586 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 587 588 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 589 return self._parse_field_def() 590 591 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 592 if len(aggregations) == 1: 593 return super()._pivot_column_names(aggregations) 594 return pivot_column_names(aggregations, dialect="duckdb") 595 596 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 597 def _parse_attach_option() -> exp.AttachOption: 598 return self.expression( 599 exp.AttachOption, 600 this=self._parse_var(any_token=True), 601 expression=self._parse_field(any_token=True), 602 ) 603 604 self._match(TokenType.DATABASE) 605 exists = self._parse_exists(not_=is_attach) 606 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 607 608 if self._match(TokenType.L_PAREN, advance=False): 609 expressions = self._parse_wrapped_csv(_parse_attach_option) 610 else: 611 expressions = None 612 613 return ( 614 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 615 if is_attach 616 else self.expression(exp.Detach, this=this, exists=exists) 617 ) 618 619 def _parse_show_duckdb(self, this: str) -> exp.Show: 620 return self.expression(exp.Show, this=this)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
622 class Generator(generator.Generator): 623 PARAMETER_TOKEN = "$" 624 NAMED_PLACEHOLDER_TOKEN = "$" 625 JOIN_HINTS = False 626 TABLE_HINTS = False 627 QUERY_HINTS = False 628 LIMIT_FETCH = "LIMIT" 629 STRUCT_DELIMITER = ("(", ")") 630 RENAME_TABLE_WITH_DB = False 631 NVL2_SUPPORTED = False 632 SEMI_ANTI_JOIN_WITH_SIDE = False 633 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 634 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 635 LAST_DAY_SUPPORTS_DATE_PART = False 636 JSON_KEY_VALUE_PAIR_SEP = "," 637 IGNORE_NULLS_IN_FUNC = True 638 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 639 SUPPORTS_CREATE_TABLE_LIKE = False 640 MULTI_ARG_DISTINCT = False 641 CAN_IMPLEMENT_ARRAY_ANY = True 642 SUPPORTS_TO_NUMBER = False 643 SUPPORTS_WINDOW_EXCLUDE = True 644 COPY_HAS_INTO_KEYWORD = False 645 STAR_EXCEPT = "EXCLUDE" 646 PAD_FILL_PATTERN_IS_REQUIRED = True 647 ARRAY_CONCAT_IS_VAR_LEN = False 648 ARRAY_SIZE_DIM_REQUIRED = False 649 NORMALIZE_EXTRACT_DATE_PARTS = True 650 651 TRANSFORMS = { 652 **generator.Generator.TRANSFORMS, 653 exp.ApproxDistinct: approx_count_distinct_sql, 654 exp.Array: inline_array_unless_query, 655 exp.ArrayFilter: rename_func("LIST_FILTER"), 656 exp.ArrayRemove: remove_from_array_using_filter, 657 exp.ArraySort: _array_sort_sql, 658 exp.ArraySum: rename_func("LIST_SUM"), 659 exp.BitwiseXor: rename_func("XOR"), 660 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 661 exp.CurrentDate: lambda *_: "CURRENT_DATE", 662 exp.CurrentTime: lambda *_: "CURRENT_TIME", 663 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 664 exp.DayOfMonth: rename_func("DAYOFMONTH"), 665 exp.DayOfWeek: rename_func("DAYOFWEEK"), 666 exp.DayOfWeekIso: rename_func("ISODOW"), 667 exp.DayOfYear: rename_func("DAYOFYEAR"), 668 exp.DataType: _datatype_sql, 669 exp.Date: _date_sql, 670 exp.DateAdd: _date_delta_sql, 671 exp.DateFromParts: rename_func("MAKE_DATE"), 672 exp.DateSub: _date_delta_sql, 673 exp.DateDiff: _date_diff_sql, 674 exp.DateStrToDate: datestrtodate_sql, 675 exp.Datetime: no_datetime_sql, 676 exp.DatetimeSub: _date_delta_sql, 677 exp.DatetimeAdd: _date_delta_sql, 678 exp.DateToDi: lambda self, 679 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 680 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 681 exp.DiToDate: lambda self, 682 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 683 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 684 exp.GenerateDateArray: _generate_datetime_array_sql, 685 exp.GenerateTimestampArray: _generate_datetime_array_sql, 686 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 687 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 688 exp.Explode: rename_func("UNNEST"), 689 exp.IntDiv: lambda self, e: self.binary(e, "//"), 690 exp.IsInf: rename_func("ISINF"), 691 exp.IsNan: rename_func("ISNAN"), 692 exp.JSONBExists: rename_func("JSON_EXISTS"), 693 exp.JSONExtract: _arrow_json_extract_sql, 694 exp.JSONExtractArray: _json_extract_value_array_sql, 695 exp.JSONExtractScalar: _arrow_json_extract_sql, 696 exp.JSONFormat: _json_format_sql, 697 exp.JSONValueArray: _json_extract_value_array_sql, 698 exp.Lateral: explode_to_unnest_sql, 699 exp.LogicalOr: rename_func("BOOL_OR"), 700 exp.LogicalAnd: rename_func("BOOL_AND"), 701 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 702 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 703 exp.MonthsBetween: lambda self, e: self.func( 704 "DATEDIFF", 705 "'month'", 706 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 707 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 708 ), 709 exp.PercentileCont: rename_func("QUANTILE_CONT"), 710 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 711 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 712 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 713 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 714 exp.RegexpReplace: lambda self, e: self.func( 715 "REGEXP_REPLACE", 716 e.this, 717 e.expression, 718 e.args.get("replacement"), 719 e.args.get("modifiers"), 720 ), 721 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 722 exp.RegexpILike: lambda self, e: self.func( 723 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 724 ), 725 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 726 exp.Return: lambda self, e: self.sql(e, "this"), 727 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 728 exp.Rand: rename_func("RANDOM"), 729 exp.SHA: rename_func("SHA1"), 730 exp.SHA2: sha256_sql, 731 exp.Split: rename_func("STR_SPLIT"), 732 exp.SortArray: _sort_array_sql, 733 exp.StrPosition: strposition_sql, 734 exp.StrToUnix: lambda self, e: self.func( 735 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 736 ), 737 exp.Struct: _struct_sql, 738 exp.Transform: rename_func("LIST_TRANSFORM"), 739 exp.TimeAdd: _date_delta_sql, 740 exp.Time: no_time_sql, 741 exp.TimeDiff: _timediff_sql, 742 exp.Timestamp: no_timestamp_sql, 743 exp.TimestampDiff: lambda self, e: self.func( 744 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 745 ), 746 exp.TimestampTrunc: timestamptrunc_sql(), 747 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 748 exp.TimeStrToTime: timestrtotime_sql, 749 exp.TimeStrToUnix: lambda self, e: self.func( 750 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 751 ), 752 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 753 exp.TimeToUnix: rename_func("EPOCH"), 754 exp.TsOrDiToDi: lambda self, 755 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 756 exp.TsOrDsAdd: _date_delta_sql, 757 exp.TsOrDsDiff: lambda self, e: self.func( 758 "DATE_DIFF", 759 f"'{e.args.get('unit') or 'DAY'}'", 760 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 761 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 762 ), 763 exp.UnixToStr: lambda self, e: self.func( 764 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 765 ), 766 exp.DatetimeTrunc: lambda self, e: self.func( 767 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 768 ), 769 exp.UnixToTime: _unix_to_time_sql, 770 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 771 exp.VariancePop: rename_func("VAR_POP"), 772 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 773 exp.Xor: bool_xor_sql, 774 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 775 rename_func("LEVENSHTEIN") 776 ), 777 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 778 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 779 exp.DateBin: rename_func("TIME_BUCKET"), 780 } 781 782 SUPPORTED_JSON_PATH_PARTS = { 783 exp.JSONPathKey, 784 exp.JSONPathRoot, 785 exp.JSONPathSubscript, 786 exp.JSONPathWildcard, 787 } 788 789 TYPE_MAPPING = { 790 **generator.Generator.TYPE_MAPPING, 791 exp.DataType.Type.BINARY: "BLOB", 792 exp.DataType.Type.BPCHAR: "TEXT", 793 exp.DataType.Type.CHAR: "TEXT", 794 exp.DataType.Type.DATETIME: "TIMESTAMP", 795 exp.DataType.Type.FLOAT: "REAL", 796 exp.DataType.Type.JSONB: "JSON", 797 exp.DataType.Type.NCHAR: "TEXT", 798 exp.DataType.Type.NVARCHAR: "TEXT", 799 exp.DataType.Type.UINT: "UINTEGER", 800 exp.DataType.Type.VARBINARY: "BLOB", 801 exp.DataType.Type.ROWVERSION: "BLOB", 802 exp.DataType.Type.VARCHAR: "TEXT", 803 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 804 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 805 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 806 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 807 } 808 809 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 810 RESERVED_KEYWORDS = { 811 "array", 812 "analyse", 813 "union", 814 "all", 815 "when", 816 "in_p", 817 "default", 818 "create_p", 819 "window", 820 "asymmetric", 821 "to", 822 "else", 823 "localtime", 824 "from", 825 "end_p", 826 "select", 827 "current_date", 828 "foreign", 829 "with", 830 "grant", 831 "session_user", 832 "or", 833 "except", 834 "references", 835 "fetch", 836 "limit", 837 "group_p", 838 "leading", 839 "into", 840 "collate", 841 "offset", 842 "do", 843 "then", 844 "localtimestamp", 845 "check_p", 846 "lateral_p", 847 "current_role", 848 "where", 849 "asc_p", 850 "placing", 851 "desc_p", 852 "user", 853 "unique", 854 "initially", 855 "column", 856 "both", 857 "some", 858 "as", 859 "any", 860 "only", 861 "deferrable", 862 "null_p", 863 "current_time", 864 "true_p", 865 "table", 866 "case", 867 "trailing", 868 "variadic", 869 "for", 870 "on", 871 "distinct", 872 "false_p", 873 "not", 874 "constraint", 875 "current_timestamp", 876 "returning", 877 "primary", 878 "intersect", 879 "having", 880 "analyze", 881 "current_user", 882 "and", 883 "cast", 884 "symmetric", 885 "using", 886 "order", 887 "current_catalog", 888 } 889 890 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 891 892 # DuckDB doesn't generally support CREATE TABLE .. properties 893 # https://duckdb.org/docs/sql/statements/create_table.html 894 PROPERTIES_LOCATION = { 895 prop: exp.Properties.Location.UNSUPPORTED 896 for prop in generator.Generator.PROPERTIES_LOCATION 897 } 898 899 # There are a few exceptions (e.g. temporary tables) which are supported or 900 # can be transpiled to DuckDB, so we explicitly override them accordingly 901 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 902 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 903 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 904 905 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 906 exp.FirstValue, 907 exp.Lag, 908 exp.LastValue, 909 exp.Lead, 910 exp.NthValue, 911 ) 912 913 def lambda_sql( 914 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 915 ) -> str: 916 if expression.args.get("colon"): 917 prefix = "LAMBDA " 918 arrow_sep = ":" 919 wrap = False 920 else: 921 prefix = "" 922 923 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 924 return f"{prefix}{lambda_sql}" 925 926 def show_sql(self, expression: exp.Show) -> str: 927 return f"SHOW {expression.name}" 928 929 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 930 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 931 932 def strtotime_sql(self, expression: exp.StrToTime) -> str: 933 if expression.args.get("safe"): 934 formatted_time = self.format_time(expression) 935 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 936 return str_to_time_sql(self, expression) 937 938 def strtodate_sql(self, expression: exp.StrToDate) -> str: 939 if expression.args.get("safe"): 940 formatted_time = self.format_time(expression) 941 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 942 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 943 944 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 945 arg = expression.this 946 if expression.args.get("safe"): 947 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 948 return self.func("JSON", arg) 949 950 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 951 nano = expression.args.get("nano") 952 if nano is not None: 953 expression.set( 954 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 955 ) 956 957 return rename_func("MAKE_TIME")(self, expression) 958 959 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 960 sec = expression.args["sec"] 961 962 milli = expression.args.get("milli") 963 if milli is not None: 964 sec += milli.pop() / exp.Literal.number(1000.0) 965 966 nano = expression.args.get("nano") 967 if nano is not None: 968 sec += nano.pop() / exp.Literal.number(1000000000.0) 969 970 if milli or nano: 971 expression.set("sec", sec) 972 973 return rename_func("MAKE_TIMESTAMP")(self, expression) 974 975 def tablesample_sql( 976 self, 977 expression: exp.TableSample, 978 tablesample_keyword: t.Optional[str] = None, 979 ) -> str: 980 if not isinstance(expression.parent, exp.Select): 981 # This sample clause only applies to a single source, not the entire resulting relation 982 tablesample_keyword = "TABLESAMPLE" 983 984 if expression.args.get("size"): 985 method = expression.args.get("method") 986 if method and method.name.upper() != "RESERVOIR": 987 self.unsupported( 988 f"Sampling method {method} is not supported with a discrete sample count, " 989 "defaulting to reservoir sampling" 990 ) 991 expression.set("method", exp.var("RESERVOIR")) 992 993 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 994 995 def interval_sql(self, expression: exp.Interval) -> str: 996 multiplier: t.Optional[int] = None 997 unit = expression.text("unit").lower() 998 999 if unit.startswith("week"): 1000 multiplier = 7 1001 if unit.startswith("quarter"): 1002 multiplier = 90 1003 1004 if multiplier: 1005 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 1006 1007 return super().interval_sql(expression) 1008 1009 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 1010 if isinstance(expression.parent, exp.UserDefinedFunction): 1011 return self.sql(expression, "this") 1012 return super().columndef_sql(expression, sep) 1013 1014 def join_sql(self, expression: exp.Join) -> str: 1015 if ( 1016 expression.side == "LEFT" 1017 and not expression.args.get("on") 1018 and isinstance(expression.this, exp.Unnest) 1019 ): 1020 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1021 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1022 return super().join_sql(expression.on(exp.true())) 1023 1024 return super().join_sql(expression) 1025 1026 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1027 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1028 if expression.args.get("is_end_exclusive"): 1029 return rename_func("RANGE")(self, expression) 1030 1031 return self.function_fallback_sql(expression) 1032 1033 def countif_sql(self, expression: exp.CountIf) -> str: 1034 if self.dialect.version >= Version("1.2"): 1035 return self.function_fallback_sql(expression) 1036 1037 # https://github.com/tobymao/sqlglot/pull/4749 1038 return count_if_to_sum(self, expression) 1039 1040 def bracket_sql(self, expression: exp.Bracket) -> str: 1041 if self.dialect.version >= Version("1.2"): 1042 return super().bracket_sql(expression) 1043 1044 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1045 this = expression.this 1046 if isinstance(this, exp.Array): 1047 this.replace(exp.paren(this)) 1048 1049 bracket = super().bracket_sql(expression) 1050 1051 if not expression.args.get("returns_list_for_maps"): 1052 if not this.type: 1053 from sqlglot.optimizer.annotate_types import annotate_types 1054 1055 this = annotate_types(this, dialect=self.dialect) 1056 1057 if this.is_type(exp.DataType.Type.MAP): 1058 bracket = f"({bracket})[1]" 1059 1060 return bracket 1061 1062 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1063 expression_sql = self.sql(expression, "expression") 1064 1065 func = expression.this 1066 if isinstance(func, exp.PERCENTILES): 1067 # Make the order key the first arg and slide the fraction to the right 1068 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1069 order_col = expression.find(exp.Ordered) 1070 if order_col: 1071 func.set("expression", func.this) 1072 func.set("this", order_col.this) 1073 1074 this = self.sql(expression, "this").rstrip(")") 1075 1076 return f"{this}{expression_sql})" 1077 1078 def length_sql(self, expression: exp.Length) -> str: 1079 arg = expression.this 1080 1081 # Dialects like BQ and Snowflake also accept binary values as args, so 1082 # DDB will attempt to infer the type or resort to case/when resolution 1083 if not expression.args.get("binary") or arg.is_string: 1084 return self.func("LENGTH", arg) 1085 1086 if not arg.type: 1087 from sqlglot.optimizer.annotate_types import annotate_types 1088 1089 arg = annotate_types(arg, dialect=self.dialect) 1090 1091 if arg.is_type(*exp.DataType.TEXT_TYPES): 1092 return self.func("LENGTH", arg) 1093 1094 # We need these casts to make duckdb's static type checker happy 1095 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1096 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1097 1098 case = ( 1099 exp.case(self.func("TYPEOF", arg)) 1100 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1101 .else_( 1102 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1103 ) # anonymous to break length_sql recursion 1104 ) 1105 1106 return self.sql(case) 1107 1108 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1109 this = expression.this 1110 key = expression.args.get("key") 1111 key_sql = key.name if isinstance(key, exp.Expression) else "" 1112 value_sql = self.sql(expression, "value") 1113 1114 kv_sql = f"{key_sql} := {value_sql}" 1115 1116 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1117 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1118 if isinstance(this, exp.Struct) and not this.expressions: 1119 return self.func("STRUCT_PACK", kv_sql) 1120 1121 return self.func("STRUCT_INSERT", this, kv_sql) 1122 1123 def unnest_sql(self, expression: exp.Unnest) -> str: 1124 explode_array = expression.args.get("explode_array") 1125 if explode_array: 1126 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1127 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1128 expression.expressions.append( 1129 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1130 ) 1131 1132 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1133 alias = expression.args.get("alias") 1134 if alias: 1135 expression.set("alias", None) 1136 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1137 1138 unnest_sql = super().unnest_sql(expression) 1139 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1140 return self.sql(select) 1141 1142 return super().unnest_sql(expression) 1143 1144 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1145 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1146 # DuckDB should render IGNORE NULLS only for the general-purpose 1147 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1148 return super().ignorenulls_sql(expression) 1149 1150 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1151 return self.sql(expression, "this") 1152 1153 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1154 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1155 # DuckDB should render RESPECT NULLS only for the general-purpose 1156 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1157 return super().respectnulls_sql(expression) 1158 1159 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1160 return self.sql(expression, "this") 1161 1162 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1163 this = self.sql(expression, "this") 1164 null_text = self.sql(expression, "null") 1165 1166 if null_text: 1167 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1168 1169 return self.func("ARRAY_TO_STRING", this, expression.expression) 1170 1171 @unsupported_args("position", "occurrence") 1172 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1173 group = expression.args.get("group") 1174 params = expression.args.get("parameters") 1175 1176 # Do not render group if there is no following argument, 1177 # and it's the default value for this dialect 1178 if ( 1179 not params 1180 and group 1181 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1182 ): 1183 group = None 1184 return self.func( 1185 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1186 ) 1187 1188 @unsupported_args("culture") 1189 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1190 fmt = expression.args.get("format") 1191 if fmt and fmt.is_int: 1192 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1193 1194 self.unsupported("Only integer formats are supported by NumberToStr") 1195 return self.function_fallback_sql(expression) 1196 1197 def autoincrementcolumnconstraint_sql(self, _) -> str: 1198 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1199 return "" 1200 1201 def aliases_sql(self, expression: exp.Aliases) -> str: 1202 this = expression.this 1203 if isinstance(this, exp.Posexplode): 1204 return self.posexplode_sql(this) 1205 1206 return super().aliases_sql(expression) 1207 1208 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1209 this = expression.this 1210 parent = expression.parent 1211 1212 # The default Spark aliases are "pos" and "col", unless specified otherwise 1213 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1214 1215 if isinstance(parent, exp.Aliases): 1216 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1217 pos, col = parent.expressions 1218 elif isinstance(parent, exp.Table): 1219 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1220 alias = parent.args.get("alias") 1221 if alias: 1222 pos, col = alias.columns or [pos, col] 1223 alias.pop() 1224 1225 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1226 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1227 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1228 gen_subscripts = self.sql( 1229 exp.Alias( 1230 this=exp.Anonymous( 1231 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1232 ) 1233 - exp.Literal.number(1), 1234 alias=pos, 1235 ) 1236 ) 1237 1238 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1239 1240 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1241 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1242 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1243 1244 return posexplode_sql
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
913 def lambda_sql( 914 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 915 ) -> str: 916 if expression.args.get("colon"): 917 prefix = "LAMBDA " 918 arrow_sep = ":" 919 wrap = False 920 else: 921 prefix = "" 922 923 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 924 return f"{prefix}{lambda_sql}"
938 def strtodate_sql(self, expression: exp.StrToDate) -> str: 939 if expression.args.get("safe"): 940 formatted_time = self.format_time(expression) 941 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 942 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
950 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 951 nano = expression.args.get("nano") 952 if nano is not None: 953 expression.set( 954 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 955 ) 956 957 return rename_func("MAKE_TIME")(self, expression)
959 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 960 sec = expression.args["sec"] 961 962 milli = expression.args.get("milli") 963 if milli is not None: 964 sec += milli.pop() / exp.Literal.number(1000.0) 965 966 nano = expression.args.get("nano") 967 if nano is not None: 968 sec += nano.pop() / exp.Literal.number(1000000000.0) 969 970 if milli or nano: 971 expression.set("sec", sec) 972 973 return rename_func("MAKE_TIMESTAMP")(self, expression)
975 def tablesample_sql( 976 self, 977 expression: exp.TableSample, 978 tablesample_keyword: t.Optional[str] = None, 979 ) -> str: 980 if not isinstance(expression.parent, exp.Select): 981 # This sample clause only applies to a single source, not the entire resulting relation 982 tablesample_keyword = "TABLESAMPLE" 983 984 if expression.args.get("size"): 985 method = expression.args.get("method") 986 if method and method.name.upper() != "RESERVOIR": 987 self.unsupported( 988 f"Sampling method {method} is not supported with a discrete sample count, " 989 "defaulting to reservoir sampling" 990 ) 991 expression.set("method", exp.var("RESERVOIR")) 992 993 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
995 def interval_sql(self, expression: exp.Interval) -> str: 996 multiplier: t.Optional[int] = None 997 unit = expression.text("unit").lower() 998 999 if unit.startswith("week"): 1000 multiplier = 7 1001 if unit.startswith("quarter"): 1002 multiplier = 90 1003 1004 if multiplier: 1005 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 1006 1007 return super().interval_sql(expression)
1014 def join_sql(self, expression: exp.Join) -> str: 1015 if ( 1016 expression.side == "LEFT" 1017 and not expression.args.get("on") 1018 and isinstance(expression.this, exp.Unnest) 1019 ): 1020 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1021 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1022 return super().join_sql(expression.on(exp.true())) 1023 1024 return super().join_sql(expression)
1040 def bracket_sql(self, expression: exp.Bracket) -> str: 1041 if self.dialect.version >= Version("1.2"): 1042 return super().bracket_sql(expression) 1043 1044 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1045 this = expression.this 1046 if isinstance(this, exp.Array): 1047 this.replace(exp.paren(this)) 1048 1049 bracket = super().bracket_sql(expression) 1050 1051 if not expression.args.get("returns_list_for_maps"): 1052 if not this.type: 1053 from sqlglot.optimizer.annotate_types import annotate_types 1054 1055 this = annotate_types(this, dialect=self.dialect) 1056 1057 if this.is_type(exp.DataType.Type.MAP): 1058 bracket = f"({bracket})[1]" 1059 1060 return bracket
1062 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1063 expression_sql = self.sql(expression, "expression") 1064 1065 func = expression.this 1066 if isinstance(func, exp.PERCENTILES): 1067 # Make the order key the first arg and slide the fraction to the right 1068 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1069 order_col = expression.find(exp.Ordered) 1070 if order_col: 1071 func.set("expression", func.this) 1072 func.set("this", order_col.this) 1073 1074 this = self.sql(expression, "this").rstrip(")") 1075 1076 return f"{this}{expression_sql})"
1078 def length_sql(self, expression: exp.Length) -> str: 1079 arg = expression.this 1080 1081 # Dialects like BQ and Snowflake also accept binary values as args, so 1082 # DDB will attempt to infer the type or resort to case/when resolution 1083 if not expression.args.get("binary") or arg.is_string: 1084 return self.func("LENGTH", arg) 1085 1086 if not arg.type: 1087 from sqlglot.optimizer.annotate_types import annotate_types 1088 1089 arg = annotate_types(arg, dialect=self.dialect) 1090 1091 if arg.is_type(*exp.DataType.TEXT_TYPES): 1092 return self.func("LENGTH", arg) 1093 1094 # We need these casts to make duckdb's static type checker happy 1095 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1096 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1097 1098 case = ( 1099 exp.case(self.func("TYPEOF", arg)) 1100 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1101 .else_( 1102 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1103 ) # anonymous to break length_sql recursion 1104 ) 1105 1106 return self.sql(case)
1108 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1109 this = expression.this 1110 key = expression.args.get("key") 1111 key_sql = key.name if isinstance(key, exp.Expression) else "" 1112 value_sql = self.sql(expression, "value") 1113 1114 kv_sql = f"{key_sql} := {value_sql}" 1115 1116 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1117 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1118 if isinstance(this, exp.Struct) and not this.expressions: 1119 return self.func("STRUCT_PACK", kv_sql) 1120 1121 return self.func("STRUCT_INSERT", this, kv_sql)
1123 def unnest_sql(self, expression: exp.Unnest) -> str: 1124 explode_array = expression.args.get("explode_array") 1125 if explode_array: 1126 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1127 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1128 expression.expressions.append( 1129 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1130 ) 1131 1132 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1133 alias = expression.args.get("alias") 1134 if alias: 1135 expression.set("alias", None) 1136 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1137 1138 unnest_sql = super().unnest_sql(expression) 1139 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1140 return self.sql(select) 1141 1142 return super().unnest_sql(expression)
1144 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1145 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1146 # DuckDB should render IGNORE NULLS only for the general-purpose 1147 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1148 return super().ignorenulls_sql(expression) 1149 1150 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1151 return self.sql(expression, "this")
1153 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1154 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1155 # DuckDB should render RESPECT NULLS only for the general-purpose 1156 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1157 return super().respectnulls_sql(expression) 1158 1159 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1160 return self.sql(expression, "this")
1162 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1163 this = self.sql(expression, "this") 1164 null_text = self.sql(expression, "null") 1165 1166 if null_text: 1167 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1168 1169 return self.func("ARRAY_TO_STRING", this, expression.expression)
1171 @unsupported_args("position", "occurrence") 1172 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1173 group = expression.args.get("group") 1174 params = expression.args.get("parameters") 1175 1176 # Do not render group if there is no following argument, 1177 # and it's the default value for this dialect 1178 if ( 1179 not params 1180 and group 1181 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1182 ): 1183 group = None 1184 return self.func( 1185 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1186 )
1188 @unsupported_args("culture") 1189 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1190 fmt = expression.args.get("format") 1191 if fmt and fmt.is_int: 1192 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1193 1194 self.unsupported("Only integer formats are supported by NumberToStr") 1195 return self.function_fallback_sql(expression)
1208 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1209 this = expression.this 1210 parent = expression.parent 1211 1212 # The default Spark aliases are "pos" and "col", unless specified otherwise 1213 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1214 1215 if isinstance(parent, exp.Aliases): 1216 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1217 pos, col = parent.expressions 1218 elif isinstance(parent, exp.Table): 1219 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1220 alias = parent.args.get("alias") 1221 if alias: 1222 pos, col = alias.columns or [pos, col] 1223 alias.pop() 1224 1225 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1226 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1227 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1228 gen_subscripts = self.sql( 1229 exp.Alias( 1230 this=exp.Anonymous( 1231 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1232 ) 1233 - exp.Literal.number(1), 1234 alias=pos, 1235 ) 1236 ) 1237 1238 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1239 1240 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1241 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1242 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1243 1244 return posexplode_sql
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql