sqlglot.parser
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6import itertools 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOMETRY, 384 TokenType.POINT, 385 TokenType.RING, 386 TokenType.LINESTRING, 387 TokenType.MULTILINESTRING, 388 TokenType.POLYGON, 389 TokenType.MULTIPOLYGON, 390 TokenType.HLLSKETCH, 391 TokenType.HSTORE, 392 TokenType.PSEUDO_TYPE, 393 TokenType.SUPER, 394 TokenType.SERIAL, 395 TokenType.SMALLSERIAL, 396 TokenType.BIGSERIAL, 397 TokenType.XML, 398 TokenType.YEAR, 399 TokenType.USERDEFINED, 400 TokenType.MONEY, 401 TokenType.SMALLMONEY, 402 TokenType.ROWVERSION, 403 TokenType.IMAGE, 404 TokenType.VARIANT, 405 TokenType.VECTOR, 406 TokenType.VOID, 407 TokenType.OBJECT, 408 TokenType.OBJECT_IDENTIFIER, 409 TokenType.INET, 410 TokenType.IPADDRESS, 411 TokenType.IPPREFIX, 412 TokenType.IPV4, 413 TokenType.IPV6, 414 TokenType.UNKNOWN, 415 TokenType.NOTHING, 416 TokenType.NULL, 417 TokenType.NAME, 418 TokenType.TDIGEST, 419 TokenType.DYNAMIC, 420 *ENUM_TYPE_TOKENS, 421 *NESTED_TYPE_TOKENS, 422 *AGGREGATE_TYPE_TOKENS, 423 } 424 425 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 426 TokenType.BIGINT: TokenType.UBIGINT, 427 TokenType.INT: TokenType.UINT, 428 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 429 TokenType.SMALLINT: TokenType.USMALLINT, 430 TokenType.TINYINT: TokenType.UTINYINT, 431 TokenType.DECIMAL: TokenType.UDECIMAL, 432 TokenType.DOUBLE: TokenType.UDOUBLE, 433 } 434 435 SUBQUERY_PREDICATES = { 436 TokenType.ANY: exp.Any, 437 TokenType.ALL: exp.All, 438 TokenType.EXISTS: exp.Exists, 439 TokenType.SOME: exp.Any, 440 } 441 442 RESERVED_TOKENS = { 443 *Tokenizer.SINGLE_TOKENS.values(), 444 TokenType.SELECT, 445 } - {TokenType.IDENTIFIER} 446 447 DB_CREATABLES = { 448 TokenType.DATABASE, 449 TokenType.DICTIONARY, 450 TokenType.FILE_FORMAT, 451 TokenType.MODEL, 452 TokenType.NAMESPACE, 453 TokenType.SCHEMA, 454 TokenType.SEQUENCE, 455 TokenType.SINK, 456 TokenType.SOURCE, 457 TokenType.STAGE, 458 TokenType.STORAGE_INTEGRATION, 459 TokenType.STREAMLIT, 460 TokenType.TABLE, 461 TokenType.TAG, 462 TokenType.VIEW, 463 TokenType.WAREHOUSE, 464 } 465 466 CREATABLES = { 467 TokenType.COLUMN, 468 TokenType.CONSTRAINT, 469 TokenType.FOREIGN_KEY, 470 TokenType.FUNCTION, 471 TokenType.INDEX, 472 TokenType.PROCEDURE, 473 *DB_CREATABLES, 474 } 475 476 ALTERABLES = { 477 TokenType.INDEX, 478 TokenType.TABLE, 479 TokenType.VIEW, 480 } 481 482 # Tokens that can represent identifiers 483 ID_VAR_TOKENS = { 484 TokenType.ALL, 485 TokenType.ATTACH, 486 TokenType.VAR, 487 TokenType.ANTI, 488 TokenType.APPLY, 489 TokenType.ASC, 490 TokenType.ASOF, 491 TokenType.AUTO_INCREMENT, 492 TokenType.BEGIN, 493 TokenType.BPCHAR, 494 TokenType.CACHE, 495 TokenType.CASE, 496 TokenType.COLLATE, 497 TokenType.COMMAND, 498 TokenType.COMMENT, 499 TokenType.COMMIT, 500 TokenType.CONSTRAINT, 501 TokenType.COPY, 502 TokenType.CUBE, 503 TokenType.CURRENT_SCHEMA, 504 TokenType.DEFAULT, 505 TokenType.DELETE, 506 TokenType.DESC, 507 TokenType.DESCRIBE, 508 TokenType.DETACH, 509 TokenType.DICTIONARY, 510 TokenType.DIV, 511 TokenType.END, 512 TokenType.EXECUTE, 513 TokenType.EXPORT, 514 TokenType.ESCAPE, 515 TokenType.FALSE, 516 TokenType.FIRST, 517 TokenType.FILTER, 518 TokenType.FINAL, 519 TokenType.FORMAT, 520 TokenType.FULL, 521 TokenType.GET, 522 TokenType.IDENTIFIER, 523 TokenType.IS, 524 TokenType.ISNULL, 525 TokenType.INTERVAL, 526 TokenType.KEEP, 527 TokenType.KILL, 528 TokenType.LEFT, 529 TokenType.LIMIT, 530 TokenType.LOAD, 531 TokenType.MERGE, 532 TokenType.NATURAL, 533 TokenType.NEXT, 534 TokenType.OFFSET, 535 TokenType.OPERATOR, 536 TokenType.ORDINALITY, 537 TokenType.OVERLAPS, 538 TokenType.OVERWRITE, 539 TokenType.PARTITION, 540 TokenType.PERCENT, 541 TokenType.PIVOT, 542 TokenType.PRAGMA, 543 TokenType.PUT, 544 TokenType.RANGE, 545 TokenType.RECURSIVE, 546 TokenType.REFERENCES, 547 TokenType.REFRESH, 548 TokenType.RENAME, 549 TokenType.REPLACE, 550 TokenType.RIGHT, 551 TokenType.ROLLUP, 552 TokenType.ROW, 553 TokenType.ROWS, 554 TokenType.SEMI, 555 TokenType.SET, 556 TokenType.SETTINGS, 557 TokenType.SHOW, 558 TokenType.TEMPORARY, 559 TokenType.TOP, 560 TokenType.TRUE, 561 TokenType.TRUNCATE, 562 TokenType.UNIQUE, 563 TokenType.UNNEST, 564 TokenType.UNPIVOT, 565 TokenType.UPDATE, 566 TokenType.USE, 567 TokenType.VOLATILE, 568 TokenType.WINDOW, 569 *CREATABLES, 570 *SUBQUERY_PREDICATES, 571 *TYPE_TOKENS, 572 *NO_PAREN_FUNCTIONS, 573 } 574 ID_VAR_TOKENS.remove(TokenType.UNION) 575 576 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 577 TokenType.ANTI, 578 TokenType.APPLY, 579 TokenType.ASOF, 580 TokenType.FULL, 581 TokenType.LEFT, 582 TokenType.LOCK, 583 TokenType.NATURAL, 584 TokenType.RIGHT, 585 TokenType.SEMI, 586 TokenType.WINDOW, 587 } 588 589 ALIAS_TOKENS = ID_VAR_TOKENS 590 591 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 592 593 ARRAY_CONSTRUCTORS = { 594 "ARRAY": exp.Array, 595 "LIST": exp.List, 596 } 597 598 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 599 600 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 601 602 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 603 604 FUNC_TOKENS = { 605 TokenType.COLLATE, 606 TokenType.COMMAND, 607 TokenType.CURRENT_DATE, 608 TokenType.CURRENT_DATETIME, 609 TokenType.CURRENT_SCHEMA, 610 TokenType.CURRENT_TIMESTAMP, 611 TokenType.CURRENT_TIME, 612 TokenType.CURRENT_USER, 613 TokenType.FILTER, 614 TokenType.FIRST, 615 TokenType.FORMAT, 616 TokenType.GET, 617 TokenType.GLOB, 618 TokenType.IDENTIFIER, 619 TokenType.INDEX, 620 TokenType.ISNULL, 621 TokenType.ILIKE, 622 TokenType.INSERT, 623 TokenType.LIKE, 624 TokenType.MERGE, 625 TokenType.NEXT, 626 TokenType.OFFSET, 627 TokenType.PRIMARY_KEY, 628 TokenType.RANGE, 629 TokenType.REPLACE, 630 TokenType.RLIKE, 631 TokenType.ROW, 632 TokenType.UNNEST, 633 TokenType.VAR, 634 TokenType.LEFT, 635 TokenType.RIGHT, 636 TokenType.SEQUENCE, 637 TokenType.DATE, 638 TokenType.DATETIME, 639 TokenType.TABLE, 640 TokenType.TIMESTAMP, 641 TokenType.TIMESTAMPTZ, 642 TokenType.TRUNCATE, 643 TokenType.WINDOW, 644 TokenType.XOR, 645 *TYPE_TOKENS, 646 *SUBQUERY_PREDICATES, 647 } 648 649 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.AND: exp.And, 651 } 652 653 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 654 TokenType.COLON_EQ: exp.PropertyEQ, 655 } 656 657 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 658 TokenType.OR: exp.Or, 659 } 660 661 EQUALITY = { 662 TokenType.EQ: exp.EQ, 663 TokenType.NEQ: exp.NEQ, 664 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 665 } 666 667 COMPARISON = { 668 TokenType.GT: exp.GT, 669 TokenType.GTE: exp.GTE, 670 TokenType.LT: exp.LT, 671 TokenType.LTE: exp.LTE, 672 } 673 674 BITWISE = { 675 TokenType.AMP: exp.BitwiseAnd, 676 TokenType.CARET: exp.BitwiseXor, 677 TokenType.PIPE: exp.BitwiseOr, 678 } 679 680 TERM = { 681 TokenType.DASH: exp.Sub, 682 TokenType.PLUS: exp.Add, 683 TokenType.MOD: exp.Mod, 684 TokenType.COLLATE: exp.Collate, 685 } 686 687 FACTOR = { 688 TokenType.DIV: exp.IntDiv, 689 TokenType.LR_ARROW: exp.Distance, 690 TokenType.SLASH: exp.Div, 691 TokenType.STAR: exp.Mul, 692 } 693 694 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 695 696 TIMES = { 697 TokenType.TIME, 698 TokenType.TIMETZ, 699 } 700 701 TIMESTAMPS = { 702 TokenType.TIMESTAMP, 703 TokenType.TIMESTAMPNTZ, 704 TokenType.TIMESTAMPTZ, 705 TokenType.TIMESTAMPLTZ, 706 *TIMES, 707 } 708 709 SET_OPERATIONS = { 710 TokenType.UNION, 711 TokenType.INTERSECT, 712 TokenType.EXCEPT, 713 } 714 715 JOIN_METHODS = { 716 TokenType.ASOF, 717 TokenType.NATURAL, 718 TokenType.POSITIONAL, 719 } 720 721 JOIN_SIDES = { 722 TokenType.LEFT, 723 TokenType.RIGHT, 724 TokenType.FULL, 725 } 726 727 JOIN_KINDS = { 728 TokenType.ANTI, 729 TokenType.CROSS, 730 TokenType.INNER, 731 TokenType.OUTER, 732 TokenType.SEMI, 733 TokenType.STRAIGHT_JOIN, 734 } 735 736 JOIN_HINTS: t.Set[str] = set() 737 738 LAMBDAS = { 739 TokenType.ARROW: lambda self, expressions: self.expression( 740 exp.Lambda, 741 this=self._replace_lambda( 742 self._parse_assignment(), 743 expressions, 744 ), 745 expressions=expressions, 746 ), 747 TokenType.FARROW: lambda self, expressions: self.expression( 748 exp.Kwarg, 749 this=exp.var(expressions[0].name), 750 expression=self._parse_assignment(), 751 ), 752 } 753 754 COLUMN_OPERATORS = { 755 TokenType.DOT: None, 756 TokenType.DOTCOLON: lambda self, this, to: self.expression( 757 exp.JSONCast, 758 this=this, 759 to=to, 760 ), 761 TokenType.DCOLON: lambda self, this, to: self.expression( 762 exp.Cast if self.STRICT_CAST else exp.TryCast, 763 this=this, 764 to=to, 765 ), 766 TokenType.ARROW: lambda self, this, path: self.expression( 767 exp.JSONExtract, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.DARROW: lambda self, this, path: self.expression( 773 exp.JSONExtractScalar, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 779 exp.JSONBExtract, 780 this=this, 781 expression=path, 782 ), 783 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtractScalar, 785 this=this, 786 expression=path, 787 ), 788 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 789 exp.JSONBContains, 790 this=this, 791 expression=key, 792 ), 793 } 794 795 EXPRESSION_PARSERS = { 796 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 797 exp.Column: lambda self: self._parse_column(), 798 exp.Condition: lambda self: self._parse_assignment(), 799 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 800 exp.Expression: lambda self: self._parse_expression(), 801 exp.From: lambda self: self._parse_from(joins=True), 802 exp.Group: lambda self: self._parse_group(), 803 exp.Having: lambda self: self._parse_having(), 804 exp.Hint: lambda self: self._parse_hint_body(), 805 exp.Identifier: lambda self: self._parse_id_var(), 806 exp.Join: lambda self: self._parse_join(), 807 exp.Lambda: lambda self: self._parse_lambda(), 808 exp.Lateral: lambda self: self._parse_lateral(), 809 exp.Limit: lambda self: self._parse_limit(), 810 exp.Offset: lambda self: self._parse_offset(), 811 exp.Order: lambda self: self._parse_order(), 812 exp.Ordered: lambda self: self._parse_ordered(), 813 exp.Properties: lambda self: self._parse_properties(), 814 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 815 exp.Qualify: lambda self: self._parse_qualify(), 816 exp.Returning: lambda self: self._parse_returning(), 817 exp.Select: lambda self: self._parse_select(), 818 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 819 exp.Table: lambda self: self._parse_table_parts(), 820 exp.TableAlias: lambda self: self._parse_table_alias(), 821 exp.Tuple: lambda self: self._parse_value(values=False), 822 exp.Whens: lambda self: self._parse_when_matched(), 823 exp.Where: lambda self: self._parse_where(), 824 exp.Window: lambda self: self._parse_named_window(), 825 exp.With: lambda self: self._parse_with(), 826 "JOIN_TYPE": lambda self: self._parse_join_parts(), 827 } 828 829 STATEMENT_PARSERS = { 830 TokenType.ALTER: lambda self: self._parse_alter(), 831 TokenType.ANALYZE: lambda self: self._parse_analyze(), 832 TokenType.BEGIN: lambda self: self._parse_transaction(), 833 TokenType.CACHE: lambda self: self._parse_cache(), 834 TokenType.COMMENT: lambda self: self._parse_comment(), 835 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 836 TokenType.COPY: lambda self: self._parse_copy(), 837 TokenType.CREATE: lambda self: self._parse_create(), 838 TokenType.DELETE: lambda self: self._parse_delete(), 839 TokenType.DESC: lambda self: self._parse_describe(), 840 TokenType.DESCRIBE: lambda self: self._parse_describe(), 841 TokenType.DROP: lambda self: self._parse_drop(), 842 TokenType.GRANT: lambda self: self._parse_grant(), 843 TokenType.INSERT: lambda self: self._parse_insert(), 844 TokenType.KILL: lambda self: self._parse_kill(), 845 TokenType.LOAD: lambda self: self._parse_load(), 846 TokenType.MERGE: lambda self: self._parse_merge(), 847 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 848 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 849 TokenType.REFRESH: lambda self: self._parse_refresh(), 850 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 851 TokenType.SET: lambda self: self._parse_set(), 852 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 853 TokenType.UNCACHE: lambda self: self._parse_uncache(), 854 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 855 TokenType.UPDATE: lambda self: self._parse_update(), 856 TokenType.USE: lambda self: self._parse_use(), 857 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 858 } 859 860 UNARY_PARSERS = { 861 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 862 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 863 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 864 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 865 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 866 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 867 } 868 869 STRING_PARSERS = { 870 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 871 exp.RawString, this=token.text 872 ), 873 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 874 exp.National, this=token.text 875 ), 876 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 877 TokenType.STRING: lambda self, token: self.expression( 878 exp.Literal, this=token.text, is_string=True 879 ), 880 TokenType.UNICODE_STRING: lambda self, token: self.expression( 881 exp.UnicodeString, 882 this=token.text, 883 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 884 ), 885 } 886 887 NUMERIC_PARSERS = { 888 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 889 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 890 TokenType.HEX_STRING: lambda self, token: self.expression( 891 exp.HexString, 892 this=token.text, 893 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 894 ), 895 TokenType.NUMBER: lambda self, token: self.expression( 896 exp.Literal, this=token.text, is_string=False 897 ), 898 } 899 900 PRIMARY_PARSERS = { 901 **STRING_PARSERS, 902 **NUMERIC_PARSERS, 903 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 904 TokenType.NULL: lambda self, _: self.expression(exp.Null), 905 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 906 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 907 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 908 TokenType.STAR: lambda self, _: self._parse_star_ops(), 909 } 910 911 PLACEHOLDER_PARSERS = { 912 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 913 TokenType.PARAMETER: lambda self: self._parse_parameter(), 914 TokenType.COLON: lambda self: ( 915 self.expression(exp.Placeholder, this=self._prev.text) 916 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 917 else None 918 ), 919 } 920 921 RANGE_PARSERS = { 922 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 923 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 924 TokenType.GLOB: binary_range_parser(exp.Glob), 925 TokenType.ILIKE: binary_range_parser(exp.ILike), 926 TokenType.IN: lambda self, this: self._parse_in(this), 927 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 928 TokenType.IS: lambda self, this: self._parse_is(this), 929 TokenType.LIKE: binary_range_parser(exp.Like), 930 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 931 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 932 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 933 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 934 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 935 } 936 937 PIPE_SYNTAX_TRANSFORM_PARSERS = { 938 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 939 "AS": lambda self, query: self._build_pipe_cte( 940 query, [exp.Star()], self._parse_table_alias() 941 ), 942 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 943 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 944 "ORDER BY": lambda self, query: query.order_by( 945 self._parse_order(), append=False, copy=False 946 ), 947 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 948 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 949 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 950 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 951 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 952 } 953 954 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 955 "ALLOWED_VALUES": lambda self: self.expression( 956 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 957 ), 958 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 959 "AUTO": lambda self: self._parse_auto_property(), 960 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 961 "BACKUP": lambda self: self.expression( 962 exp.BackupProperty, this=self._parse_var(any_token=True) 963 ), 964 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 965 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 966 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 967 "CHECKSUM": lambda self: self._parse_checksum(), 968 "CLUSTER BY": lambda self: self._parse_cluster(), 969 "CLUSTERED": lambda self: self._parse_clustered_by(), 970 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 971 exp.CollateProperty, **kwargs 972 ), 973 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 974 "CONTAINS": lambda self: self._parse_contains_property(), 975 "COPY": lambda self: self._parse_copy_property(), 976 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 977 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 978 "DEFINER": lambda self: self._parse_definer(), 979 "DETERMINISTIC": lambda self: self.expression( 980 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 981 ), 982 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 983 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 984 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 985 "DISTKEY": lambda self: self._parse_distkey(), 986 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 987 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 988 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 989 "ENVIRONMENT": lambda self: self.expression( 990 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 991 ), 992 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 993 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 994 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 995 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 996 "FREESPACE": lambda self: self._parse_freespace(), 997 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 998 "HEAP": lambda self: self.expression(exp.HeapProperty), 999 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1000 "IMMUTABLE": lambda self: self.expression( 1001 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1002 ), 1003 "INHERITS": lambda self: self.expression( 1004 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1005 ), 1006 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1007 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1008 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1009 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1010 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1011 "LIKE": lambda self: self._parse_create_like(), 1012 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1013 "LOCK": lambda self: self._parse_locking(), 1014 "LOCKING": lambda self: self._parse_locking(), 1015 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1016 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1017 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1018 "MODIFIES": lambda self: self._parse_modifies_property(), 1019 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1020 "NO": lambda self: self._parse_no_property(), 1021 "ON": lambda self: self._parse_on_property(), 1022 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1023 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1024 "PARTITION": lambda self: self._parse_partitioned_of(), 1025 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1026 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1027 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1028 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1029 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1030 "READS": lambda self: self._parse_reads_property(), 1031 "REMOTE": lambda self: self._parse_remote_with_connection(), 1032 "RETURNS": lambda self: self._parse_returns(), 1033 "STRICT": lambda self: self.expression(exp.StrictProperty), 1034 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1035 "ROW": lambda self: self._parse_row(), 1036 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1037 "SAMPLE": lambda self: self.expression( 1038 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1039 ), 1040 "SECURE": lambda self: self.expression(exp.SecureProperty), 1041 "SECURITY": lambda self: self._parse_security(), 1042 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1043 "SETTINGS": lambda self: self._parse_settings_property(), 1044 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1045 "SORTKEY": lambda self: self._parse_sortkey(), 1046 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1047 "STABLE": lambda self: self.expression( 1048 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1049 ), 1050 "STORED": lambda self: self._parse_stored(), 1051 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1052 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1053 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1054 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1055 "TO": lambda self: self._parse_to_table(), 1056 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1057 "TRANSFORM": lambda self: self.expression( 1058 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1059 ), 1060 "TTL": lambda self: self._parse_ttl(), 1061 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1062 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1063 "VOLATILE": lambda self: self._parse_volatile_property(), 1064 "WITH": lambda self: self._parse_with_property(), 1065 } 1066 1067 CONSTRAINT_PARSERS = { 1068 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1069 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1070 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1071 "CHARACTER SET": lambda self: self.expression( 1072 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1073 ), 1074 "CHECK": lambda self: self.expression( 1075 exp.CheckColumnConstraint, 1076 this=self._parse_wrapped(self._parse_assignment), 1077 enforced=self._match_text_seq("ENFORCED"), 1078 ), 1079 "COLLATE": lambda self: self.expression( 1080 exp.CollateColumnConstraint, 1081 this=self._parse_identifier() or self._parse_column(), 1082 ), 1083 "COMMENT": lambda self: self.expression( 1084 exp.CommentColumnConstraint, this=self._parse_string() 1085 ), 1086 "COMPRESS": lambda self: self._parse_compress(), 1087 "CLUSTERED": lambda self: self.expression( 1088 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1089 ), 1090 "NONCLUSTERED": lambda self: self.expression( 1091 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1092 ), 1093 "DEFAULT": lambda self: self.expression( 1094 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1095 ), 1096 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1097 "EPHEMERAL": lambda self: self.expression( 1098 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1099 ), 1100 "EXCLUDE": lambda self: self.expression( 1101 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1102 ), 1103 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1104 "FORMAT": lambda self: self.expression( 1105 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1106 ), 1107 "GENERATED": lambda self: self._parse_generated_as_identity(), 1108 "IDENTITY": lambda self: self._parse_auto_increment(), 1109 "INLINE": lambda self: self._parse_inline(), 1110 "LIKE": lambda self: self._parse_create_like(), 1111 "NOT": lambda self: self._parse_not_constraint(), 1112 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1113 "ON": lambda self: ( 1114 self._match(TokenType.UPDATE) 1115 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1116 ) 1117 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1118 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1119 "PERIOD": lambda self: self._parse_period_for_system_time(), 1120 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1121 "REFERENCES": lambda self: self._parse_references(match=False), 1122 "TITLE": lambda self: self.expression( 1123 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1124 ), 1125 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1126 "UNIQUE": lambda self: self._parse_unique(), 1127 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1128 "WATERMARK": lambda self: self.expression( 1129 exp.WatermarkColumnConstraint, 1130 this=self._match(TokenType.FOR) and self._parse_column(), 1131 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1132 ), 1133 "WITH": lambda self: self.expression( 1134 exp.Properties, expressions=self._parse_wrapped_properties() 1135 ), 1136 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1137 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1138 } 1139 1140 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1141 klass = ( 1142 exp.PartitionedByBucket 1143 if self._prev.text.upper() == "BUCKET" 1144 else exp.PartitionByTruncate 1145 ) 1146 1147 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1148 this, expression = seq_get(args, 0), seq_get(args, 1) 1149 1150 if isinstance(this, exp.Literal): 1151 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1152 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1153 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1154 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1155 # 1156 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1157 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1158 this, expression = expression, this 1159 1160 return self.expression(klass, this=this, expression=expression) 1161 1162 ALTER_PARSERS = { 1163 "ADD": lambda self: self._parse_alter_table_add(), 1164 "AS": lambda self: self._parse_select(), 1165 "ALTER": lambda self: self._parse_alter_table_alter(), 1166 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1167 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1168 "DROP": lambda self: self._parse_alter_table_drop(), 1169 "RENAME": lambda self: self._parse_alter_table_rename(), 1170 "SET": lambda self: self._parse_alter_table_set(), 1171 "SWAP": lambda self: self.expression( 1172 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1173 ), 1174 } 1175 1176 ALTER_ALTER_PARSERS = { 1177 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1178 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1179 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1180 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1181 } 1182 1183 SCHEMA_UNNAMED_CONSTRAINTS = { 1184 "CHECK", 1185 "EXCLUDE", 1186 "FOREIGN KEY", 1187 "LIKE", 1188 "PERIOD", 1189 "PRIMARY KEY", 1190 "UNIQUE", 1191 "WATERMARK", 1192 "BUCKET", 1193 "TRUNCATE", 1194 } 1195 1196 NO_PAREN_FUNCTION_PARSERS = { 1197 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1198 "CASE": lambda self: self._parse_case(), 1199 "CONNECT_BY_ROOT": lambda self: self.expression( 1200 exp.ConnectByRoot, this=self._parse_column() 1201 ), 1202 "IF": lambda self: self._parse_if(), 1203 } 1204 1205 INVALID_FUNC_NAME_TOKENS = { 1206 TokenType.IDENTIFIER, 1207 TokenType.STRING, 1208 } 1209 1210 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1211 1212 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1213 1214 FUNCTION_PARSERS = { 1215 **{ 1216 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1217 }, 1218 **{ 1219 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1220 }, 1221 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1222 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1223 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1224 "DECODE": lambda self: self._parse_decode(), 1225 "EXTRACT": lambda self: self._parse_extract(), 1226 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1227 "GAP_FILL": lambda self: self._parse_gap_fill(), 1228 "JSON_OBJECT": lambda self: self._parse_json_object(), 1229 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1230 "JSON_TABLE": lambda self: self._parse_json_table(), 1231 "MATCH": lambda self: self._parse_match_against(), 1232 "NORMALIZE": lambda self: self._parse_normalize(), 1233 "OPENJSON": lambda self: self._parse_open_json(), 1234 "OVERLAY": lambda self: self._parse_overlay(), 1235 "POSITION": lambda self: self._parse_position(), 1236 "PREDICT": lambda self: self._parse_predict(), 1237 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1238 "STRING_AGG": lambda self: self._parse_string_agg(), 1239 "SUBSTRING": lambda self: self._parse_substring(), 1240 "TRIM": lambda self: self._parse_trim(), 1241 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1242 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1243 "XMLELEMENT": lambda self: self.expression( 1244 exp.XMLElement, 1245 this=self._match_text_seq("NAME") and self._parse_id_var(), 1246 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1247 ), 1248 "XMLTABLE": lambda self: self._parse_xml_table(), 1249 } 1250 1251 QUERY_MODIFIER_PARSERS = { 1252 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1253 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1254 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1255 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1256 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1257 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1258 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1259 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1260 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1261 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1262 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1263 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1264 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1265 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1266 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1267 TokenType.CLUSTER_BY: lambda self: ( 1268 "cluster", 1269 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1270 ), 1271 TokenType.DISTRIBUTE_BY: lambda self: ( 1272 "distribute", 1273 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1274 ), 1275 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1276 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1277 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1278 } 1279 1280 SET_PARSERS = { 1281 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1282 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1283 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1284 "TRANSACTION": lambda self: self._parse_set_transaction(), 1285 } 1286 1287 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1288 1289 TYPE_LITERAL_PARSERS = { 1290 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1291 } 1292 1293 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1294 1295 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1296 1297 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1298 1299 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1300 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1301 "ISOLATION": ( 1302 ("LEVEL", "REPEATABLE", "READ"), 1303 ("LEVEL", "READ", "COMMITTED"), 1304 ("LEVEL", "READ", "UNCOMITTED"), 1305 ("LEVEL", "SERIALIZABLE"), 1306 ), 1307 "READ": ("WRITE", "ONLY"), 1308 } 1309 1310 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1311 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1312 ) 1313 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1314 1315 CREATE_SEQUENCE: OPTIONS_TYPE = { 1316 "SCALE": ("EXTEND", "NOEXTEND"), 1317 "SHARD": ("EXTEND", "NOEXTEND"), 1318 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1319 **dict.fromkeys( 1320 ( 1321 "SESSION", 1322 "GLOBAL", 1323 "KEEP", 1324 "NOKEEP", 1325 "ORDER", 1326 "NOORDER", 1327 "NOCACHE", 1328 "CYCLE", 1329 "NOCYCLE", 1330 "NOMINVALUE", 1331 "NOMAXVALUE", 1332 "NOSCALE", 1333 "NOSHARD", 1334 ), 1335 tuple(), 1336 ), 1337 } 1338 1339 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1340 1341 USABLES: OPTIONS_TYPE = dict.fromkeys( 1342 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1343 ) 1344 1345 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1346 1347 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1348 "TYPE": ("EVOLUTION",), 1349 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1350 } 1351 1352 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1353 1354 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1355 1356 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1357 "NOT": ("ENFORCED",), 1358 "MATCH": ( 1359 "FULL", 1360 "PARTIAL", 1361 "SIMPLE", 1362 ), 1363 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1364 "USING": ( 1365 "BTREE", 1366 "HASH", 1367 ), 1368 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1369 } 1370 1371 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1372 "NO": ("OTHERS",), 1373 "CURRENT": ("ROW",), 1374 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1375 } 1376 1377 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1378 1379 CLONE_KEYWORDS = {"CLONE", "COPY"} 1380 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1381 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1382 1383 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1384 1385 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1386 1387 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1388 1389 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1390 1391 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1392 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1393 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1394 1395 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1396 1397 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1398 1399 ADD_CONSTRAINT_TOKENS = { 1400 TokenType.CONSTRAINT, 1401 TokenType.FOREIGN_KEY, 1402 TokenType.INDEX, 1403 TokenType.KEY, 1404 TokenType.PRIMARY_KEY, 1405 TokenType.UNIQUE, 1406 } 1407 1408 DISTINCT_TOKENS = {TokenType.DISTINCT} 1409 1410 NULL_TOKENS = {TokenType.NULL} 1411 1412 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1413 1414 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1415 1416 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1417 1418 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1419 1420 ODBC_DATETIME_LITERALS = { 1421 "d": exp.Date, 1422 "t": exp.Time, 1423 "ts": exp.Timestamp, 1424 } 1425 1426 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1427 1428 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1429 1430 # The style options for the DESCRIBE statement 1431 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1432 1433 # The style options for the ANALYZE statement 1434 ANALYZE_STYLES = { 1435 "BUFFER_USAGE_LIMIT", 1436 "FULL", 1437 "LOCAL", 1438 "NO_WRITE_TO_BINLOG", 1439 "SAMPLE", 1440 "SKIP_LOCKED", 1441 "VERBOSE", 1442 } 1443 1444 ANALYZE_EXPRESSION_PARSERS = { 1445 "ALL": lambda self: self._parse_analyze_columns(), 1446 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1447 "DELETE": lambda self: self._parse_analyze_delete(), 1448 "DROP": lambda self: self._parse_analyze_histogram(), 1449 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1450 "LIST": lambda self: self._parse_analyze_list(), 1451 "PREDICATE": lambda self: self._parse_analyze_columns(), 1452 "UPDATE": lambda self: self._parse_analyze_histogram(), 1453 "VALIDATE": lambda self: self._parse_analyze_validate(), 1454 } 1455 1456 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1457 1458 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1459 1460 OPERATION_MODIFIERS: t.Set[str] = set() 1461 1462 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1463 1464 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1465 1466 STRICT_CAST = True 1467 1468 PREFIXED_PIVOT_COLUMNS = False 1469 IDENTIFY_PIVOT_STRINGS = False 1470 1471 LOG_DEFAULTS_TO_LN = False 1472 1473 # Whether the table sample clause expects CSV syntax 1474 TABLESAMPLE_CSV = False 1475 1476 # The default method used for table sampling 1477 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1478 1479 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1480 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1481 1482 # Whether the TRIM function expects the characters to trim as its first argument 1483 TRIM_PATTERN_FIRST = False 1484 1485 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1486 STRING_ALIASES = False 1487 1488 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1489 MODIFIERS_ATTACHED_TO_SET_OP = True 1490 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1491 1492 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1493 NO_PAREN_IF_COMMANDS = True 1494 1495 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1496 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1497 1498 # Whether the `:` operator is used to extract a value from a VARIANT column 1499 COLON_IS_VARIANT_EXTRACT = False 1500 1501 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1502 # If this is True and '(' is not found, the keyword will be treated as an identifier 1503 VALUES_FOLLOWED_BY_PAREN = True 1504 1505 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1506 SUPPORTS_IMPLICIT_UNNEST = False 1507 1508 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1509 INTERVAL_SPANS = True 1510 1511 # Whether a PARTITION clause can follow a table reference 1512 SUPPORTS_PARTITION_SELECTION = False 1513 1514 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1515 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1516 1517 # Whether the 'AS' keyword is optional in the CTE definition syntax 1518 OPTIONAL_ALIAS_TOKEN_CTE = True 1519 1520 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1521 ALTER_RENAME_REQUIRES_COLUMN = True 1522 1523 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1524 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1525 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1526 # as BigQuery, where all joins have the same precedence. 1527 JOINS_HAVE_EQUAL_PRECEDENCE = False 1528 1529 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1530 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1531 1532 __slots__ = ( 1533 "error_level", 1534 "error_message_context", 1535 "max_errors", 1536 "dialect", 1537 "sql", 1538 "errors", 1539 "_tokens", 1540 "_index", 1541 "_curr", 1542 "_next", 1543 "_prev", 1544 "_prev_comments", 1545 "_pipe_cte_counter", 1546 ) 1547 1548 # Autofilled 1549 SHOW_TRIE: t.Dict = {} 1550 SET_TRIE: t.Dict = {} 1551 1552 def __init__( 1553 self, 1554 error_level: t.Optional[ErrorLevel] = None, 1555 error_message_context: int = 100, 1556 max_errors: int = 3, 1557 dialect: DialectType = None, 1558 ): 1559 from sqlglot.dialects import Dialect 1560 1561 self.error_level = error_level or ErrorLevel.IMMEDIATE 1562 self.error_message_context = error_message_context 1563 self.max_errors = max_errors 1564 self.dialect = Dialect.get_or_raise(dialect) 1565 self.reset() 1566 1567 def reset(self): 1568 self.sql = "" 1569 self.errors = [] 1570 self._tokens = [] 1571 self._index = 0 1572 self._curr = None 1573 self._next = None 1574 self._prev = None 1575 self._prev_comments = None 1576 self._pipe_cte_counter = 0 1577 1578 def parse( 1579 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1580 ) -> t.List[t.Optional[exp.Expression]]: 1581 """ 1582 Parses a list of tokens and returns a list of syntax trees, one tree 1583 per parsed SQL statement. 1584 1585 Args: 1586 raw_tokens: The list of tokens. 1587 sql: The original SQL string, used to produce helpful debug messages. 1588 1589 Returns: 1590 The list of the produced syntax trees. 1591 """ 1592 return self._parse( 1593 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1594 ) 1595 1596 def parse_into( 1597 self, 1598 expression_types: exp.IntoType, 1599 raw_tokens: t.List[Token], 1600 sql: t.Optional[str] = None, 1601 ) -> t.List[t.Optional[exp.Expression]]: 1602 """ 1603 Parses a list of tokens into a given Expression type. If a collection of Expression 1604 types is given instead, this method will try to parse the token list into each one 1605 of them, stopping at the first for which the parsing succeeds. 1606 1607 Args: 1608 expression_types: The expression type(s) to try and parse the token list into. 1609 raw_tokens: The list of tokens. 1610 sql: The original SQL string, used to produce helpful debug messages. 1611 1612 Returns: 1613 The target Expression. 1614 """ 1615 errors = [] 1616 for expression_type in ensure_list(expression_types): 1617 parser = self.EXPRESSION_PARSERS.get(expression_type) 1618 if not parser: 1619 raise TypeError(f"No parser registered for {expression_type}") 1620 1621 try: 1622 return self._parse(parser, raw_tokens, sql) 1623 except ParseError as e: 1624 e.errors[0]["into_expression"] = expression_type 1625 errors.append(e) 1626 1627 raise ParseError( 1628 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1629 errors=merge_errors(errors), 1630 ) from errors[-1] 1631 1632 def _parse( 1633 self, 1634 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1635 raw_tokens: t.List[Token], 1636 sql: t.Optional[str] = None, 1637 ) -> t.List[t.Optional[exp.Expression]]: 1638 self.reset() 1639 self.sql = sql or "" 1640 1641 total = len(raw_tokens) 1642 chunks: t.List[t.List[Token]] = [[]] 1643 1644 for i, token in enumerate(raw_tokens): 1645 if token.token_type == TokenType.SEMICOLON: 1646 if token.comments: 1647 chunks.append([token]) 1648 1649 if i < total - 1: 1650 chunks.append([]) 1651 else: 1652 chunks[-1].append(token) 1653 1654 expressions = [] 1655 1656 for tokens in chunks: 1657 self._index = -1 1658 self._tokens = tokens 1659 self._advance() 1660 1661 expressions.append(parse_method(self)) 1662 1663 if self._index < len(self._tokens): 1664 self.raise_error("Invalid expression / Unexpected token") 1665 1666 self.check_errors() 1667 1668 return expressions 1669 1670 def check_errors(self) -> None: 1671 """Logs or raises any found errors, depending on the chosen error level setting.""" 1672 if self.error_level == ErrorLevel.WARN: 1673 for error in self.errors: 1674 logger.error(str(error)) 1675 elif self.error_level == ErrorLevel.RAISE and self.errors: 1676 raise ParseError( 1677 concat_messages(self.errors, self.max_errors), 1678 errors=merge_errors(self.errors), 1679 ) 1680 1681 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1682 """ 1683 Appends an error in the list of recorded errors or raises it, depending on the chosen 1684 error level setting. 1685 """ 1686 token = token or self._curr or self._prev or Token.string("") 1687 start = token.start 1688 end = token.end + 1 1689 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1690 highlight = self.sql[start:end] 1691 end_context = self.sql[end : end + self.error_message_context] 1692 1693 error = ParseError.new( 1694 f"{message}. Line {token.line}, Col: {token.col}.\n" 1695 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1696 description=message, 1697 line=token.line, 1698 col=token.col, 1699 start_context=start_context, 1700 highlight=highlight, 1701 end_context=end_context, 1702 ) 1703 1704 if self.error_level == ErrorLevel.IMMEDIATE: 1705 raise error 1706 1707 self.errors.append(error) 1708 1709 def expression( 1710 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1711 ) -> E: 1712 """ 1713 Creates a new, validated Expression. 1714 1715 Args: 1716 exp_class: The expression class to instantiate. 1717 comments: An optional list of comments to attach to the expression. 1718 kwargs: The arguments to set for the expression along with their respective values. 1719 1720 Returns: 1721 The target expression. 1722 """ 1723 instance = exp_class(**kwargs) 1724 instance.add_comments(comments) if comments else self._add_comments(instance) 1725 return self.validate_expression(instance) 1726 1727 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1728 if expression and self._prev_comments: 1729 expression.add_comments(self._prev_comments) 1730 self._prev_comments = None 1731 1732 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1733 """ 1734 Validates an Expression, making sure that all its mandatory arguments are set. 1735 1736 Args: 1737 expression: The expression to validate. 1738 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1739 1740 Returns: 1741 The validated expression. 1742 """ 1743 if self.error_level != ErrorLevel.IGNORE: 1744 for error_message in expression.error_messages(args): 1745 self.raise_error(error_message) 1746 1747 return expression 1748 1749 def _find_sql(self, start: Token, end: Token) -> str: 1750 return self.sql[start.start : end.end + 1] 1751 1752 def _is_connected(self) -> bool: 1753 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1754 1755 def _advance(self, times: int = 1) -> None: 1756 self._index += times 1757 self._curr = seq_get(self._tokens, self._index) 1758 self._next = seq_get(self._tokens, self._index + 1) 1759 1760 if self._index > 0: 1761 self._prev = self._tokens[self._index - 1] 1762 self._prev_comments = self._prev.comments 1763 else: 1764 self._prev = None 1765 self._prev_comments = None 1766 1767 def _retreat(self, index: int) -> None: 1768 if index != self._index: 1769 self._advance(index - self._index) 1770 1771 def _warn_unsupported(self) -> None: 1772 if len(self._tokens) <= 1: 1773 return 1774 1775 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1776 # interested in emitting a warning for the one being currently processed. 1777 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1778 1779 logger.warning( 1780 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1781 ) 1782 1783 def _parse_command(self) -> exp.Command: 1784 self._warn_unsupported() 1785 return self.expression( 1786 exp.Command, 1787 comments=self._prev_comments, 1788 this=self._prev.text.upper(), 1789 expression=self._parse_string(), 1790 ) 1791 1792 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1793 """ 1794 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1795 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1796 solve this by setting & resetting the parser state accordingly 1797 """ 1798 index = self._index 1799 error_level = self.error_level 1800 1801 self.error_level = ErrorLevel.IMMEDIATE 1802 try: 1803 this = parse_method() 1804 except ParseError: 1805 this = None 1806 finally: 1807 if not this or retreat: 1808 self._retreat(index) 1809 self.error_level = error_level 1810 1811 return this 1812 1813 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1814 start = self._prev 1815 exists = self._parse_exists() if allow_exists else None 1816 1817 self._match(TokenType.ON) 1818 1819 materialized = self._match_text_seq("MATERIALIZED") 1820 kind = self._match_set(self.CREATABLES) and self._prev 1821 if not kind: 1822 return self._parse_as_command(start) 1823 1824 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1825 this = self._parse_user_defined_function(kind=kind.token_type) 1826 elif kind.token_type == TokenType.TABLE: 1827 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1828 elif kind.token_type == TokenType.COLUMN: 1829 this = self._parse_column() 1830 else: 1831 this = self._parse_id_var() 1832 1833 self._match(TokenType.IS) 1834 1835 return self.expression( 1836 exp.Comment, 1837 this=this, 1838 kind=kind.text, 1839 expression=self._parse_string(), 1840 exists=exists, 1841 materialized=materialized, 1842 ) 1843 1844 def _parse_to_table( 1845 self, 1846 ) -> exp.ToTableProperty: 1847 table = self._parse_table_parts(schema=True) 1848 return self.expression(exp.ToTableProperty, this=table) 1849 1850 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1851 def _parse_ttl(self) -> exp.Expression: 1852 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1853 this = self._parse_bitwise() 1854 1855 if self._match_text_seq("DELETE"): 1856 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1857 if self._match_text_seq("RECOMPRESS"): 1858 return self.expression( 1859 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1860 ) 1861 if self._match_text_seq("TO", "DISK"): 1862 return self.expression( 1863 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1864 ) 1865 if self._match_text_seq("TO", "VOLUME"): 1866 return self.expression( 1867 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1868 ) 1869 1870 return this 1871 1872 expressions = self._parse_csv(_parse_ttl_action) 1873 where = self._parse_where() 1874 group = self._parse_group() 1875 1876 aggregates = None 1877 if group and self._match(TokenType.SET): 1878 aggregates = self._parse_csv(self._parse_set_item) 1879 1880 return self.expression( 1881 exp.MergeTreeTTL, 1882 expressions=expressions, 1883 where=where, 1884 group=group, 1885 aggregates=aggregates, 1886 ) 1887 1888 def _parse_statement(self) -> t.Optional[exp.Expression]: 1889 if self._curr is None: 1890 return None 1891 1892 if self._match_set(self.STATEMENT_PARSERS): 1893 comments = self._prev_comments 1894 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1895 stmt.add_comments(comments, prepend=True) 1896 return stmt 1897 1898 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1899 return self._parse_command() 1900 1901 expression = self._parse_expression() 1902 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1903 return self._parse_query_modifiers(expression) 1904 1905 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1906 start = self._prev 1907 temporary = self._match(TokenType.TEMPORARY) 1908 materialized = self._match_text_seq("MATERIALIZED") 1909 1910 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1911 if not kind: 1912 return self._parse_as_command(start) 1913 1914 concurrently = self._match_text_seq("CONCURRENTLY") 1915 if_exists = exists or self._parse_exists() 1916 1917 if kind == "COLUMN": 1918 this = self._parse_column() 1919 else: 1920 this = self._parse_table_parts( 1921 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1922 ) 1923 1924 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1925 1926 if self._match(TokenType.L_PAREN, advance=False): 1927 expressions = self._parse_wrapped_csv(self._parse_types) 1928 else: 1929 expressions = None 1930 1931 return self.expression( 1932 exp.Drop, 1933 exists=if_exists, 1934 this=this, 1935 expressions=expressions, 1936 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1937 temporary=temporary, 1938 materialized=materialized, 1939 cascade=self._match_text_seq("CASCADE"), 1940 constraints=self._match_text_seq("CONSTRAINTS"), 1941 purge=self._match_text_seq("PURGE"), 1942 cluster=cluster, 1943 concurrently=concurrently, 1944 ) 1945 1946 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1947 return ( 1948 self._match_text_seq("IF") 1949 and (not not_ or self._match(TokenType.NOT)) 1950 and self._match(TokenType.EXISTS) 1951 ) 1952 1953 def _parse_create(self) -> exp.Create | exp.Command: 1954 # Note: this can't be None because we've matched a statement parser 1955 start = self._prev 1956 1957 replace = ( 1958 start.token_type == TokenType.REPLACE 1959 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1960 or self._match_pair(TokenType.OR, TokenType.ALTER) 1961 ) 1962 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1963 1964 unique = self._match(TokenType.UNIQUE) 1965 1966 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1967 clustered = True 1968 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1969 "COLUMNSTORE" 1970 ): 1971 clustered = False 1972 else: 1973 clustered = None 1974 1975 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1976 self._advance() 1977 1978 properties = None 1979 create_token = self._match_set(self.CREATABLES) and self._prev 1980 1981 if not create_token: 1982 # exp.Properties.Location.POST_CREATE 1983 properties = self._parse_properties() 1984 create_token = self._match_set(self.CREATABLES) and self._prev 1985 1986 if not properties or not create_token: 1987 return self._parse_as_command(start) 1988 1989 concurrently = self._match_text_seq("CONCURRENTLY") 1990 exists = self._parse_exists(not_=True) 1991 this = None 1992 expression: t.Optional[exp.Expression] = None 1993 indexes = None 1994 no_schema_binding = None 1995 begin = None 1996 end = None 1997 clone = None 1998 1999 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2000 nonlocal properties 2001 if properties and temp_props: 2002 properties.expressions.extend(temp_props.expressions) 2003 elif temp_props: 2004 properties = temp_props 2005 2006 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2007 this = self._parse_user_defined_function(kind=create_token.token_type) 2008 2009 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2010 extend_props(self._parse_properties()) 2011 2012 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2013 extend_props(self._parse_properties()) 2014 2015 if not expression: 2016 if self._match(TokenType.COMMAND): 2017 expression = self._parse_as_command(self._prev) 2018 else: 2019 begin = self._match(TokenType.BEGIN) 2020 return_ = self._match_text_seq("RETURN") 2021 2022 if self._match(TokenType.STRING, advance=False): 2023 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2024 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2025 expression = self._parse_string() 2026 extend_props(self._parse_properties()) 2027 else: 2028 expression = self._parse_user_defined_function_expression() 2029 2030 end = self._match_text_seq("END") 2031 2032 if return_: 2033 expression = self.expression(exp.Return, this=expression) 2034 elif create_token.token_type == TokenType.INDEX: 2035 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2036 if not self._match(TokenType.ON): 2037 index = self._parse_id_var() 2038 anonymous = False 2039 else: 2040 index = None 2041 anonymous = True 2042 2043 this = self._parse_index(index=index, anonymous=anonymous) 2044 elif create_token.token_type in self.DB_CREATABLES: 2045 table_parts = self._parse_table_parts( 2046 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2047 ) 2048 2049 # exp.Properties.Location.POST_NAME 2050 self._match(TokenType.COMMA) 2051 extend_props(self._parse_properties(before=True)) 2052 2053 this = self._parse_schema(this=table_parts) 2054 2055 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2056 extend_props(self._parse_properties()) 2057 2058 has_alias = self._match(TokenType.ALIAS) 2059 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2060 # exp.Properties.Location.POST_ALIAS 2061 extend_props(self._parse_properties()) 2062 2063 if create_token.token_type == TokenType.SEQUENCE: 2064 expression = self._parse_types() 2065 extend_props(self._parse_properties()) 2066 else: 2067 expression = self._parse_ddl_select() 2068 2069 # Some dialects also support using a table as an alias instead of a SELECT. 2070 # Here we fallback to this as an alternative. 2071 if not expression and has_alias: 2072 expression = self._try_parse(self._parse_table_parts) 2073 2074 if create_token.token_type == TokenType.TABLE: 2075 # exp.Properties.Location.POST_EXPRESSION 2076 extend_props(self._parse_properties()) 2077 2078 indexes = [] 2079 while True: 2080 index = self._parse_index() 2081 2082 # exp.Properties.Location.POST_INDEX 2083 extend_props(self._parse_properties()) 2084 if not index: 2085 break 2086 else: 2087 self._match(TokenType.COMMA) 2088 indexes.append(index) 2089 elif create_token.token_type == TokenType.VIEW: 2090 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2091 no_schema_binding = True 2092 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2093 extend_props(self._parse_properties()) 2094 2095 shallow = self._match_text_seq("SHALLOW") 2096 2097 if self._match_texts(self.CLONE_KEYWORDS): 2098 copy = self._prev.text.lower() == "copy" 2099 clone = self.expression( 2100 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2101 ) 2102 2103 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2104 return self._parse_as_command(start) 2105 2106 create_kind_text = create_token.text.upper() 2107 return self.expression( 2108 exp.Create, 2109 this=this, 2110 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2111 replace=replace, 2112 refresh=refresh, 2113 unique=unique, 2114 expression=expression, 2115 exists=exists, 2116 properties=properties, 2117 indexes=indexes, 2118 no_schema_binding=no_schema_binding, 2119 begin=begin, 2120 end=end, 2121 clone=clone, 2122 concurrently=concurrently, 2123 clustered=clustered, 2124 ) 2125 2126 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2127 seq = exp.SequenceProperties() 2128 2129 options = [] 2130 index = self._index 2131 2132 while self._curr: 2133 self._match(TokenType.COMMA) 2134 if self._match_text_seq("INCREMENT"): 2135 self._match_text_seq("BY") 2136 self._match_text_seq("=") 2137 seq.set("increment", self._parse_term()) 2138 elif self._match_text_seq("MINVALUE"): 2139 seq.set("minvalue", self._parse_term()) 2140 elif self._match_text_seq("MAXVALUE"): 2141 seq.set("maxvalue", self._parse_term()) 2142 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2143 self._match_text_seq("=") 2144 seq.set("start", self._parse_term()) 2145 elif self._match_text_seq("CACHE"): 2146 # T-SQL allows empty CACHE which is initialized dynamically 2147 seq.set("cache", self._parse_number() or True) 2148 elif self._match_text_seq("OWNED", "BY"): 2149 # "OWNED BY NONE" is the default 2150 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2151 else: 2152 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2153 if opt: 2154 options.append(opt) 2155 else: 2156 break 2157 2158 seq.set("options", options if options else None) 2159 return None if self._index == index else seq 2160 2161 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2162 # only used for teradata currently 2163 self._match(TokenType.COMMA) 2164 2165 kwargs = { 2166 "no": self._match_text_seq("NO"), 2167 "dual": self._match_text_seq("DUAL"), 2168 "before": self._match_text_seq("BEFORE"), 2169 "default": self._match_text_seq("DEFAULT"), 2170 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2171 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2172 "after": self._match_text_seq("AFTER"), 2173 "minimum": self._match_texts(("MIN", "MINIMUM")), 2174 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2175 } 2176 2177 if self._match_texts(self.PROPERTY_PARSERS): 2178 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2179 try: 2180 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2181 except TypeError: 2182 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2183 2184 return None 2185 2186 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2187 return self._parse_wrapped_csv(self._parse_property) 2188 2189 def _parse_property(self) -> t.Optional[exp.Expression]: 2190 if self._match_texts(self.PROPERTY_PARSERS): 2191 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2192 2193 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2194 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2195 2196 if self._match_text_seq("COMPOUND", "SORTKEY"): 2197 return self._parse_sortkey(compound=True) 2198 2199 if self._match_text_seq("SQL", "SECURITY"): 2200 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2201 2202 index = self._index 2203 key = self._parse_column() 2204 2205 if not self._match(TokenType.EQ): 2206 self._retreat(index) 2207 return self._parse_sequence_properties() 2208 2209 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2210 if isinstance(key, exp.Column): 2211 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2212 2213 value = self._parse_bitwise() or self._parse_var(any_token=True) 2214 2215 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2216 if isinstance(value, exp.Column): 2217 value = exp.var(value.name) 2218 2219 return self.expression(exp.Property, this=key, value=value) 2220 2221 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2222 if self._match_text_seq("BY"): 2223 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2224 2225 self._match(TokenType.ALIAS) 2226 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2227 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2228 2229 return self.expression( 2230 exp.FileFormatProperty, 2231 this=( 2232 self.expression( 2233 exp.InputOutputFormat, 2234 input_format=input_format, 2235 output_format=output_format, 2236 ) 2237 if input_format or output_format 2238 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2239 ), 2240 hive_format=True, 2241 ) 2242 2243 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2244 field = self._parse_field() 2245 if isinstance(field, exp.Identifier) and not field.quoted: 2246 field = exp.var(field) 2247 2248 return field 2249 2250 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2251 self._match(TokenType.EQ) 2252 self._match(TokenType.ALIAS) 2253 2254 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2255 2256 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2257 properties = [] 2258 while True: 2259 if before: 2260 prop = self._parse_property_before() 2261 else: 2262 prop = self._parse_property() 2263 if not prop: 2264 break 2265 for p in ensure_list(prop): 2266 properties.append(p) 2267 2268 if properties: 2269 return self.expression(exp.Properties, expressions=properties) 2270 2271 return None 2272 2273 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2274 return self.expression( 2275 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2276 ) 2277 2278 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2279 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2280 security_specifier = self._prev.text.upper() 2281 return self.expression(exp.SecurityProperty, this=security_specifier) 2282 return None 2283 2284 def _parse_settings_property(self) -> exp.SettingsProperty: 2285 return self.expression( 2286 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2287 ) 2288 2289 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2290 if self._index >= 2: 2291 pre_volatile_token = self._tokens[self._index - 2] 2292 else: 2293 pre_volatile_token = None 2294 2295 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2296 return exp.VolatileProperty() 2297 2298 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2299 2300 def _parse_retention_period(self) -> exp.Var: 2301 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2302 number = self._parse_number() 2303 number_str = f"{number} " if number else "" 2304 unit = self._parse_var(any_token=True) 2305 return exp.var(f"{number_str}{unit}") 2306 2307 def _parse_system_versioning_property( 2308 self, with_: bool = False 2309 ) -> exp.WithSystemVersioningProperty: 2310 self._match(TokenType.EQ) 2311 prop = self.expression( 2312 exp.WithSystemVersioningProperty, 2313 **{ # type: ignore 2314 "on": True, 2315 "with": with_, 2316 }, 2317 ) 2318 2319 if self._match_text_seq("OFF"): 2320 prop.set("on", False) 2321 return prop 2322 2323 self._match(TokenType.ON) 2324 if self._match(TokenType.L_PAREN): 2325 while self._curr and not self._match(TokenType.R_PAREN): 2326 if self._match_text_seq("HISTORY_TABLE", "="): 2327 prop.set("this", self._parse_table_parts()) 2328 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2329 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2330 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2331 prop.set("retention_period", self._parse_retention_period()) 2332 2333 self._match(TokenType.COMMA) 2334 2335 return prop 2336 2337 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2338 self._match(TokenType.EQ) 2339 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2340 prop = self.expression(exp.DataDeletionProperty, on=on) 2341 2342 if self._match(TokenType.L_PAREN): 2343 while self._curr and not self._match(TokenType.R_PAREN): 2344 if self._match_text_seq("FILTER_COLUMN", "="): 2345 prop.set("filter_column", self._parse_column()) 2346 elif self._match_text_seq("RETENTION_PERIOD", "="): 2347 prop.set("retention_period", self._parse_retention_period()) 2348 2349 self._match(TokenType.COMMA) 2350 2351 return prop 2352 2353 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2354 kind = "HASH" 2355 expressions: t.Optional[t.List[exp.Expression]] = None 2356 if self._match_text_seq("BY", "HASH"): 2357 expressions = self._parse_wrapped_csv(self._parse_id_var) 2358 elif self._match_text_seq("BY", "RANDOM"): 2359 kind = "RANDOM" 2360 2361 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2362 buckets: t.Optional[exp.Expression] = None 2363 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2364 buckets = self._parse_number() 2365 2366 return self.expression( 2367 exp.DistributedByProperty, 2368 expressions=expressions, 2369 kind=kind, 2370 buckets=buckets, 2371 order=self._parse_order(), 2372 ) 2373 2374 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2375 self._match_text_seq("KEY") 2376 expressions = self._parse_wrapped_id_vars() 2377 return self.expression(expr_type, expressions=expressions) 2378 2379 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2380 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2381 prop = self._parse_system_versioning_property(with_=True) 2382 self._match_r_paren() 2383 return prop 2384 2385 if self._match(TokenType.L_PAREN, advance=False): 2386 return self._parse_wrapped_properties() 2387 2388 if self._match_text_seq("JOURNAL"): 2389 return self._parse_withjournaltable() 2390 2391 if self._match_texts(self.VIEW_ATTRIBUTES): 2392 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2393 2394 if self._match_text_seq("DATA"): 2395 return self._parse_withdata(no=False) 2396 elif self._match_text_seq("NO", "DATA"): 2397 return self._parse_withdata(no=True) 2398 2399 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2400 return self._parse_serde_properties(with_=True) 2401 2402 if self._match(TokenType.SCHEMA): 2403 return self.expression( 2404 exp.WithSchemaBindingProperty, 2405 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2406 ) 2407 2408 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2409 return self.expression( 2410 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2411 ) 2412 2413 if not self._next: 2414 return None 2415 2416 return self._parse_withisolatedloading() 2417 2418 def _parse_procedure_option(self) -> exp.Expression | None: 2419 if self._match_text_seq("EXECUTE", "AS"): 2420 return self.expression( 2421 exp.ExecuteAsProperty, 2422 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2423 or self._parse_string(), 2424 ) 2425 2426 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2427 2428 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2429 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2430 self._match(TokenType.EQ) 2431 2432 user = self._parse_id_var() 2433 self._match(TokenType.PARAMETER) 2434 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2435 2436 if not user or not host: 2437 return None 2438 2439 return exp.DefinerProperty(this=f"{user}@{host}") 2440 2441 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2442 self._match(TokenType.TABLE) 2443 self._match(TokenType.EQ) 2444 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2445 2446 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2447 return self.expression(exp.LogProperty, no=no) 2448 2449 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2450 return self.expression(exp.JournalProperty, **kwargs) 2451 2452 def _parse_checksum(self) -> exp.ChecksumProperty: 2453 self._match(TokenType.EQ) 2454 2455 on = None 2456 if self._match(TokenType.ON): 2457 on = True 2458 elif self._match_text_seq("OFF"): 2459 on = False 2460 2461 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2462 2463 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2464 return self.expression( 2465 exp.Cluster, 2466 expressions=( 2467 self._parse_wrapped_csv(self._parse_ordered) 2468 if wrapped 2469 else self._parse_csv(self._parse_ordered) 2470 ), 2471 ) 2472 2473 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2474 self._match_text_seq("BY") 2475 2476 self._match_l_paren() 2477 expressions = self._parse_csv(self._parse_column) 2478 self._match_r_paren() 2479 2480 if self._match_text_seq("SORTED", "BY"): 2481 self._match_l_paren() 2482 sorted_by = self._parse_csv(self._parse_ordered) 2483 self._match_r_paren() 2484 else: 2485 sorted_by = None 2486 2487 self._match(TokenType.INTO) 2488 buckets = self._parse_number() 2489 self._match_text_seq("BUCKETS") 2490 2491 return self.expression( 2492 exp.ClusteredByProperty, 2493 expressions=expressions, 2494 sorted_by=sorted_by, 2495 buckets=buckets, 2496 ) 2497 2498 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2499 if not self._match_text_seq("GRANTS"): 2500 self._retreat(self._index - 1) 2501 return None 2502 2503 return self.expression(exp.CopyGrantsProperty) 2504 2505 def _parse_freespace(self) -> exp.FreespaceProperty: 2506 self._match(TokenType.EQ) 2507 return self.expression( 2508 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2509 ) 2510 2511 def _parse_mergeblockratio( 2512 self, no: bool = False, default: bool = False 2513 ) -> exp.MergeBlockRatioProperty: 2514 if self._match(TokenType.EQ): 2515 return self.expression( 2516 exp.MergeBlockRatioProperty, 2517 this=self._parse_number(), 2518 percent=self._match(TokenType.PERCENT), 2519 ) 2520 2521 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2522 2523 def _parse_datablocksize( 2524 self, 2525 default: t.Optional[bool] = None, 2526 minimum: t.Optional[bool] = None, 2527 maximum: t.Optional[bool] = None, 2528 ) -> exp.DataBlocksizeProperty: 2529 self._match(TokenType.EQ) 2530 size = self._parse_number() 2531 2532 units = None 2533 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2534 units = self._prev.text 2535 2536 return self.expression( 2537 exp.DataBlocksizeProperty, 2538 size=size, 2539 units=units, 2540 default=default, 2541 minimum=minimum, 2542 maximum=maximum, 2543 ) 2544 2545 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2546 self._match(TokenType.EQ) 2547 always = self._match_text_seq("ALWAYS") 2548 manual = self._match_text_seq("MANUAL") 2549 never = self._match_text_seq("NEVER") 2550 default = self._match_text_seq("DEFAULT") 2551 2552 autotemp = None 2553 if self._match_text_seq("AUTOTEMP"): 2554 autotemp = self._parse_schema() 2555 2556 return self.expression( 2557 exp.BlockCompressionProperty, 2558 always=always, 2559 manual=manual, 2560 never=never, 2561 default=default, 2562 autotemp=autotemp, 2563 ) 2564 2565 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2566 index = self._index 2567 no = self._match_text_seq("NO") 2568 concurrent = self._match_text_seq("CONCURRENT") 2569 2570 if not self._match_text_seq("ISOLATED", "LOADING"): 2571 self._retreat(index) 2572 return None 2573 2574 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2575 return self.expression( 2576 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2577 ) 2578 2579 def _parse_locking(self) -> exp.LockingProperty: 2580 if self._match(TokenType.TABLE): 2581 kind = "TABLE" 2582 elif self._match(TokenType.VIEW): 2583 kind = "VIEW" 2584 elif self._match(TokenType.ROW): 2585 kind = "ROW" 2586 elif self._match_text_seq("DATABASE"): 2587 kind = "DATABASE" 2588 else: 2589 kind = None 2590 2591 if kind in ("DATABASE", "TABLE", "VIEW"): 2592 this = self._parse_table_parts() 2593 else: 2594 this = None 2595 2596 if self._match(TokenType.FOR): 2597 for_or_in = "FOR" 2598 elif self._match(TokenType.IN): 2599 for_or_in = "IN" 2600 else: 2601 for_or_in = None 2602 2603 if self._match_text_seq("ACCESS"): 2604 lock_type = "ACCESS" 2605 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2606 lock_type = "EXCLUSIVE" 2607 elif self._match_text_seq("SHARE"): 2608 lock_type = "SHARE" 2609 elif self._match_text_seq("READ"): 2610 lock_type = "READ" 2611 elif self._match_text_seq("WRITE"): 2612 lock_type = "WRITE" 2613 elif self._match_text_seq("CHECKSUM"): 2614 lock_type = "CHECKSUM" 2615 else: 2616 lock_type = None 2617 2618 override = self._match_text_seq("OVERRIDE") 2619 2620 return self.expression( 2621 exp.LockingProperty, 2622 this=this, 2623 kind=kind, 2624 for_or_in=for_or_in, 2625 lock_type=lock_type, 2626 override=override, 2627 ) 2628 2629 def _parse_partition_by(self) -> t.List[exp.Expression]: 2630 if self._match(TokenType.PARTITION_BY): 2631 return self._parse_csv(self._parse_assignment) 2632 return [] 2633 2634 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2635 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2636 if self._match_text_seq("MINVALUE"): 2637 return exp.var("MINVALUE") 2638 if self._match_text_seq("MAXVALUE"): 2639 return exp.var("MAXVALUE") 2640 return self._parse_bitwise() 2641 2642 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2643 expression = None 2644 from_expressions = None 2645 to_expressions = None 2646 2647 if self._match(TokenType.IN): 2648 this = self._parse_wrapped_csv(self._parse_bitwise) 2649 elif self._match(TokenType.FROM): 2650 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2651 self._match_text_seq("TO") 2652 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2653 elif self._match_text_seq("WITH", "(", "MODULUS"): 2654 this = self._parse_number() 2655 self._match_text_seq(",", "REMAINDER") 2656 expression = self._parse_number() 2657 self._match_r_paren() 2658 else: 2659 self.raise_error("Failed to parse partition bound spec.") 2660 2661 return self.expression( 2662 exp.PartitionBoundSpec, 2663 this=this, 2664 expression=expression, 2665 from_expressions=from_expressions, 2666 to_expressions=to_expressions, 2667 ) 2668 2669 # https://www.postgresql.org/docs/current/sql-createtable.html 2670 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2671 if not self._match_text_seq("OF"): 2672 self._retreat(self._index - 1) 2673 return None 2674 2675 this = self._parse_table(schema=True) 2676 2677 if self._match(TokenType.DEFAULT): 2678 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2679 elif self._match_text_seq("FOR", "VALUES"): 2680 expression = self._parse_partition_bound_spec() 2681 else: 2682 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2683 2684 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2685 2686 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2687 self._match(TokenType.EQ) 2688 return self.expression( 2689 exp.PartitionedByProperty, 2690 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2691 ) 2692 2693 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2694 if self._match_text_seq("AND", "STATISTICS"): 2695 statistics = True 2696 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2697 statistics = False 2698 else: 2699 statistics = None 2700 2701 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2702 2703 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2704 if self._match_text_seq("SQL"): 2705 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2706 return None 2707 2708 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2709 if self._match_text_seq("SQL", "DATA"): 2710 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2711 return None 2712 2713 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2714 if self._match_text_seq("PRIMARY", "INDEX"): 2715 return exp.NoPrimaryIndexProperty() 2716 if self._match_text_seq("SQL"): 2717 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2718 return None 2719 2720 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2721 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2722 return exp.OnCommitProperty() 2723 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2724 return exp.OnCommitProperty(delete=True) 2725 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2726 2727 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2728 if self._match_text_seq("SQL", "DATA"): 2729 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2730 return None 2731 2732 def _parse_distkey(self) -> exp.DistKeyProperty: 2733 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2734 2735 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2736 table = self._parse_table(schema=True) 2737 2738 options = [] 2739 while self._match_texts(("INCLUDING", "EXCLUDING")): 2740 this = self._prev.text.upper() 2741 2742 id_var = self._parse_id_var() 2743 if not id_var: 2744 return None 2745 2746 options.append( 2747 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2748 ) 2749 2750 return self.expression(exp.LikeProperty, this=table, expressions=options) 2751 2752 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2753 return self.expression( 2754 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2755 ) 2756 2757 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2758 self._match(TokenType.EQ) 2759 return self.expression( 2760 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2761 ) 2762 2763 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2764 self._match_text_seq("WITH", "CONNECTION") 2765 return self.expression( 2766 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2767 ) 2768 2769 def _parse_returns(self) -> exp.ReturnsProperty: 2770 value: t.Optional[exp.Expression] 2771 null = None 2772 is_table = self._match(TokenType.TABLE) 2773 2774 if is_table: 2775 if self._match(TokenType.LT): 2776 value = self.expression( 2777 exp.Schema, 2778 this="TABLE", 2779 expressions=self._parse_csv(self._parse_struct_types), 2780 ) 2781 if not self._match(TokenType.GT): 2782 self.raise_error("Expecting >") 2783 else: 2784 value = self._parse_schema(exp.var("TABLE")) 2785 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2786 null = True 2787 value = None 2788 else: 2789 value = self._parse_types() 2790 2791 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2792 2793 def _parse_describe(self) -> exp.Describe: 2794 kind = self._match_set(self.CREATABLES) and self._prev.text 2795 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2796 if self._match(TokenType.DOT): 2797 style = None 2798 self._retreat(self._index - 2) 2799 2800 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2801 2802 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2803 this = self._parse_statement() 2804 else: 2805 this = self._parse_table(schema=True) 2806 2807 properties = self._parse_properties() 2808 expressions = properties.expressions if properties else None 2809 partition = self._parse_partition() 2810 return self.expression( 2811 exp.Describe, 2812 this=this, 2813 style=style, 2814 kind=kind, 2815 expressions=expressions, 2816 partition=partition, 2817 format=format, 2818 ) 2819 2820 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2821 kind = self._prev.text.upper() 2822 expressions = [] 2823 2824 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2825 if self._match(TokenType.WHEN): 2826 expression = self._parse_disjunction() 2827 self._match(TokenType.THEN) 2828 else: 2829 expression = None 2830 2831 else_ = self._match(TokenType.ELSE) 2832 2833 if not self._match(TokenType.INTO): 2834 return None 2835 2836 return self.expression( 2837 exp.ConditionalInsert, 2838 this=self.expression( 2839 exp.Insert, 2840 this=self._parse_table(schema=True), 2841 expression=self._parse_derived_table_values(), 2842 ), 2843 expression=expression, 2844 else_=else_, 2845 ) 2846 2847 expression = parse_conditional_insert() 2848 while expression is not None: 2849 expressions.append(expression) 2850 expression = parse_conditional_insert() 2851 2852 return self.expression( 2853 exp.MultitableInserts, 2854 kind=kind, 2855 comments=comments, 2856 expressions=expressions, 2857 source=self._parse_table(), 2858 ) 2859 2860 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2861 comments = [] 2862 hint = self._parse_hint() 2863 overwrite = self._match(TokenType.OVERWRITE) 2864 ignore = self._match(TokenType.IGNORE) 2865 local = self._match_text_seq("LOCAL") 2866 alternative = None 2867 is_function = None 2868 2869 if self._match_text_seq("DIRECTORY"): 2870 this: t.Optional[exp.Expression] = self.expression( 2871 exp.Directory, 2872 this=self._parse_var_or_string(), 2873 local=local, 2874 row_format=self._parse_row_format(match_row=True), 2875 ) 2876 else: 2877 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2878 comments += ensure_list(self._prev_comments) 2879 return self._parse_multitable_inserts(comments) 2880 2881 if self._match(TokenType.OR): 2882 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2883 2884 self._match(TokenType.INTO) 2885 comments += ensure_list(self._prev_comments) 2886 self._match(TokenType.TABLE) 2887 is_function = self._match(TokenType.FUNCTION) 2888 2889 this = ( 2890 self._parse_table(schema=True, parse_partition=True) 2891 if not is_function 2892 else self._parse_function() 2893 ) 2894 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2895 this.set("alias", self._parse_table_alias()) 2896 2897 returning = self._parse_returning() 2898 2899 return self.expression( 2900 exp.Insert, 2901 comments=comments, 2902 hint=hint, 2903 is_function=is_function, 2904 this=this, 2905 stored=self._match_text_seq("STORED") and self._parse_stored(), 2906 by_name=self._match_text_seq("BY", "NAME"), 2907 exists=self._parse_exists(), 2908 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2909 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2910 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2911 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2912 conflict=self._parse_on_conflict(), 2913 returning=returning or self._parse_returning(), 2914 overwrite=overwrite, 2915 alternative=alternative, 2916 ignore=ignore, 2917 source=self._match(TokenType.TABLE) and self._parse_table(), 2918 ) 2919 2920 def _parse_kill(self) -> exp.Kill: 2921 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2922 2923 return self.expression( 2924 exp.Kill, 2925 this=self._parse_primary(), 2926 kind=kind, 2927 ) 2928 2929 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2930 conflict = self._match_text_seq("ON", "CONFLICT") 2931 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2932 2933 if not conflict and not duplicate: 2934 return None 2935 2936 conflict_keys = None 2937 constraint = None 2938 2939 if conflict: 2940 if self._match_text_seq("ON", "CONSTRAINT"): 2941 constraint = self._parse_id_var() 2942 elif self._match(TokenType.L_PAREN): 2943 conflict_keys = self._parse_csv(self._parse_id_var) 2944 self._match_r_paren() 2945 2946 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2947 if self._prev.token_type == TokenType.UPDATE: 2948 self._match(TokenType.SET) 2949 expressions = self._parse_csv(self._parse_equality) 2950 else: 2951 expressions = None 2952 2953 return self.expression( 2954 exp.OnConflict, 2955 duplicate=duplicate, 2956 expressions=expressions, 2957 action=action, 2958 conflict_keys=conflict_keys, 2959 constraint=constraint, 2960 where=self._parse_where(), 2961 ) 2962 2963 def _parse_returning(self) -> t.Optional[exp.Returning]: 2964 if not self._match(TokenType.RETURNING): 2965 return None 2966 return self.expression( 2967 exp.Returning, 2968 expressions=self._parse_csv(self._parse_expression), 2969 into=self._match(TokenType.INTO) and self._parse_table_part(), 2970 ) 2971 2972 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2973 if not self._match(TokenType.FORMAT): 2974 return None 2975 return self._parse_row_format() 2976 2977 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2978 index = self._index 2979 with_ = with_ or self._match_text_seq("WITH") 2980 2981 if not self._match(TokenType.SERDE_PROPERTIES): 2982 self._retreat(index) 2983 return None 2984 return self.expression( 2985 exp.SerdeProperties, 2986 **{ # type: ignore 2987 "expressions": self._parse_wrapped_properties(), 2988 "with": with_, 2989 }, 2990 ) 2991 2992 def _parse_row_format( 2993 self, match_row: bool = False 2994 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2995 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2996 return None 2997 2998 if self._match_text_seq("SERDE"): 2999 this = self._parse_string() 3000 3001 serde_properties = self._parse_serde_properties() 3002 3003 return self.expression( 3004 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3005 ) 3006 3007 self._match_text_seq("DELIMITED") 3008 3009 kwargs = {} 3010 3011 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3012 kwargs["fields"] = self._parse_string() 3013 if self._match_text_seq("ESCAPED", "BY"): 3014 kwargs["escaped"] = self._parse_string() 3015 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3016 kwargs["collection_items"] = self._parse_string() 3017 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3018 kwargs["map_keys"] = self._parse_string() 3019 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3020 kwargs["lines"] = self._parse_string() 3021 if self._match_text_seq("NULL", "DEFINED", "AS"): 3022 kwargs["null"] = self._parse_string() 3023 3024 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3025 3026 def _parse_load(self) -> exp.LoadData | exp.Command: 3027 if self._match_text_seq("DATA"): 3028 local = self._match_text_seq("LOCAL") 3029 self._match_text_seq("INPATH") 3030 inpath = self._parse_string() 3031 overwrite = self._match(TokenType.OVERWRITE) 3032 self._match_pair(TokenType.INTO, TokenType.TABLE) 3033 3034 return self.expression( 3035 exp.LoadData, 3036 this=self._parse_table(schema=True), 3037 local=local, 3038 overwrite=overwrite, 3039 inpath=inpath, 3040 partition=self._parse_partition(), 3041 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3042 serde=self._match_text_seq("SERDE") and self._parse_string(), 3043 ) 3044 return self._parse_as_command(self._prev) 3045 3046 def _parse_delete(self) -> exp.Delete: 3047 # This handles MySQL's "Multiple-Table Syntax" 3048 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3049 tables = None 3050 if not self._match(TokenType.FROM, advance=False): 3051 tables = self._parse_csv(self._parse_table) or None 3052 3053 returning = self._parse_returning() 3054 3055 return self.expression( 3056 exp.Delete, 3057 tables=tables, 3058 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3059 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3060 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3061 where=self._parse_where(), 3062 returning=returning or self._parse_returning(), 3063 limit=self._parse_limit(), 3064 ) 3065 3066 def _parse_update(self) -> exp.Update: 3067 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3068 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3069 returning = self._parse_returning() 3070 return self.expression( 3071 exp.Update, 3072 **{ # type: ignore 3073 "this": this, 3074 "expressions": expressions, 3075 "from": self._parse_from(joins=True), 3076 "where": self._parse_where(), 3077 "returning": returning or self._parse_returning(), 3078 "order": self._parse_order(), 3079 "limit": self._parse_limit(), 3080 }, 3081 ) 3082 3083 def _parse_use(self) -> exp.Use: 3084 return self.expression( 3085 exp.Use, 3086 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3087 this=self._parse_table(schema=False), 3088 ) 3089 3090 def _parse_uncache(self) -> exp.Uncache: 3091 if not self._match(TokenType.TABLE): 3092 self.raise_error("Expecting TABLE after UNCACHE") 3093 3094 return self.expression( 3095 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3096 ) 3097 3098 def _parse_cache(self) -> exp.Cache: 3099 lazy = self._match_text_seq("LAZY") 3100 self._match(TokenType.TABLE) 3101 table = self._parse_table(schema=True) 3102 3103 options = [] 3104 if self._match_text_seq("OPTIONS"): 3105 self._match_l_paren() 3106 k = self._parse_string() 3107 self._match(TokenType.EQ) 3108 v = self._parse_string() 3109 options = [k, v] 3110 self._match_r_paren() 3111 3112 self._match(TokenType.ALIAS) 3113 return self.expression( 3114 exp.Cache, 3115 this=table, 3116 lazy=lazy, 3117 options=options, 3118 expression=self._parse_select(nested=True), 3119 ) 3120 3121 def _parse_partition(self) -> t.Optional[exp.Partition]: 3122 if not self._match_texts(self.PARTITION_KEYWORDS): 3123 return None 3124 3125 return self.expression( 3126 exp.Partition, 3127 subpartition=self._prev.text.upper() == "SUBPARTITION", 3128 expressions=self._parse_wrapped_csv(self._parse_assignment), 3129 ) 3130 3131 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3132 def _parse_value_expression() -> t.Optional[exp.Expression]: 3133 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3134 return exp.var(self._prev.text.upper()) 3135 return self._parse_expression() 3136 3137 if self._match(TokenType.L_PAREN): 3138 expressions = self._parse_csv(_parse_value_expression) 3139 self._match_r_paren() 3140 return self.expression(exp.Tuple, expressions=expressions) 3141 3142 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3143 expression = self._parse_expression() 3144 if expression: 3145 return self.expression(exp.Tuple, expressions=[expression]) 3146 return None 3147 3148 def _parse_projections(self) -> t.List[exp.Expression]: 3149 return self._parse_expressions() 3150 3151 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3152 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3153 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3154 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3155 ) 3156 elif self._match(TokenType.FROM): 3157 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3158 # Support parentheses for duckdb FROM-first syntax 3159 select = self._parse_select() 3160 if select: 3161 select.set("from", from_) 3162 this = select 3163 else: 3164 this = exp.select("*").from_(t.cast(exp.From, from_)) 3165 else: 3166 this = ( 3167 self._parse_table(consume_pipe=True) 3168 if table 3169 else self._parse_select(nested=True, parse_set_operation=False) 3170 ) 3171 3172 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3173 # in case a modifier (e.g. join) is following 3174 if table and isinstance(this, exp.Values) and this.alias: 3175 alias = this.args["alias"].pop() 3176 this = exp.Table(this=this, alias=alias) 3177 3178 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3179 3180 return this 3181 3182 def _parse_select( 3183 self, 3184 nested: bool = False, 3185 table: bool = False, 3186 parse_subquery_alias: bool = True, 3187 parse_set_operation: bool = True, 3188 consume_pipe: bool = True, 3189 ) -> t.Optional[exp.Expression]: 3190 query = self._parse_select_query( 3191 nested=nested, 3192 table=table, 3193 parse_subquery_alias=parse_subquery_alias, 3194 parse_set_operation=parse_set_operation, 3195 ) 3196 3197 if ( 3198 consume_pipe 3199 and self._match(TokenType.PIPE_GT, advance=False) 3200 and isinstance(query, exp.Query) 3201 ): 3202 query = self._parse_pipe_syntax_query(query) 3203 query = query.subquery(copy=False) if query and table else query 3204 3205 return query 3206 3207 def _parse_select_query( 3208 self, 3209 nested: bool = False, 3210 table: bool = False, 3211 parse_subquery_alias: bool = True, 3212 parse_set_operation: bool = True, 3213 ) -> t.Optional[exp.Expression]: 3214 cte = self._parse_with() 3215 3216 if cte: 3217 this = self._parse_statement() 3218 3219 if not this: 3220 self.raise_error("Failed to parse any statement following CTE") 3221 return cte 3222 3223 if "with" in this.arg_types: 3224 this.set("with", cte) 3225 else: 3226 self.raise_error(f"{this.key} does not support CTE") 3227 this = cte 3228 3229 return this 3230 3231 # duckdb supports leading with FROM x 3232 from_ = ( 3233 self._parse_from(consume_pipe=True) 3234 if self._match(TokenType.FROM, advance=False) 3235 else None 3236 ) 3237 3238 if self._match(TokenType.SELECT): 3239 comments = self._prev_comments 3240 3241 hint = self._parse_hint() 3242 3243 if self._next and not self._next.token_type == TokenType.DOT: 3244 all_ = self._match(TokenType.ALL) 3245 distinct = self._match_set(self.DISTINCT_TOKENS) 3246 else: 3247 all_, distinct = None, None 3248 3249 kind = ( 3250 self._match(TokenType.ALIAS) 3251 and self._match_texts(("STRUCT", "VALUE")) 3252 and self._prev.text.upper() 3253 ) 3254 3255 if distinct: 3256 distinct = self.expression( 3257 exp.Distinct, 3258 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3259 ) 3260 3261 if all_ and distinct: 3262 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3263 3264 operation_modifiers = [] 3265 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3266 operation_modifiers.append(exp.var(self._prev.text.upper())) 3267 3268 limit = self._parse_limit(top=True) 3269 projections = self._parse_projections() 3270 3271 this = self.expression( 3272 exp.Select, 3273 kind=kind, 3274 hint=hint, 3275 distinct=distinct, 3276 expressions=projections, 3277 limit=limit, 3278 operation_modifiers=operation_modifiers or None, 3279 ) 3280 this.comments = comments 3281 3282 into = self._parse_into() 3283 if into: 3284 this.set("into", into) 3285 3286 if not from_: 3287 from_ = self._parse_from() 3288 3289 if from_: 3290 this.set("from", from_) 3291 3292 this = self._parse_query_modifiers(this) 3293 elif (table or nested) and self._match(TokenType.L_PAREN): 3294 this = self._parse_wrapped_select(table=table) 3295 3296 # We return early here so that the UNION isn't attached to the subquery by the 3297 # following call to _parse_set_operations, but instead becomes the parent node 3298 self._match_r_paren() 3299 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3300 elif self._match(TokenType.VALUES, advance=False): 3301 this = self._parse_derived_table_values() 3302 elif from_: 3303 this = exp.select("*").from_(from_.this, copy=False) 3304 elif self._match(TokenType.SUMMARIZE): 3305 table = self._match(TokenType.TABLE) 3306 this = self._parse_select() or self._parse_string() or self._parse_table() 3307 return self.expression(exp.Summarize, this=this, table=table) 3308 elif self._match(TokenType.DESCRIBE): 3309 this = self._parse_describe() 3310 elif self._match_text_seq("STREAM"): 3311 this = self._parse_function() 3312 if this: 3313 this = self.expression(exp.Stream, this=this) 3314 else: 3315 self._retreat(self._index - 1) 3316 else: 3317 this = None 3318 3319 return self._parse_set_operations(this) if parse_set_operation else this 3320 3321 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3322 self._match_text_seq("SEARCH") 3323 3324 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3325 3326 if not kind: 3327 return None 3328 3329 self._match_text_seq("FIRST", "BY") 3330 3331 return self.expression( 3332 exp.RecursiveWithSearch, 3333 kind=kind, 3334 this=self._parse_id_var(), 3335 expression=self._match_text_seq("SET") and self._parse_id_var(), 3336 using=self._match_text_seq("USING") and self._parse_id_var(), 3337 ) 3338 3339 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3340 if not skip_with_token and not self._match(TokenType.WITH): 3341 return None 3342 3343 comments = self._prev_comments 3344 recursive = self._match(TokenType.RECURSIVE) 3345 3346 last_comments = None 3347 expressions = [] 3348 while True: 3349 cte = self._parse_cte() 3350 if isinstance(cte, exp.CTE): 3351 expressions.append(cte) 3352 if last_comments: 3353 cte.add_comments(last_comments) 3354 3355 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3356 break 3357 else: 3358 self._match(TokenType.WITH) 3359 3360 last_comments = self._prev_comments 3361 3362 return self.expression( 3363 exp.With, 3364 comments=comments, 3365 expressions=expressions, 3366 recursive=recursive, 3367 search=self._parse_recursive_with_search(), 3368 ) 3369 3370 def _parse_cte(self) -> t.Optional[exp.CTE]: 3371 index = self._index 3372 3373 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3374 if not alias or not alias.this: 3375 self.raise_error("Expected CTE to have alias") 3376 3377 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3378 self._retreat(index) 3379 return None 3380 3381 comments = self._prev_comments 3382 3383 if self._match_text_seq("NOT", "MATERIALIZED"): 3384 materialized = False 3385 elif self._match_text_seq("MATERIALIZED"): 3386 materialized = True 3387 else: 3388 materialized = None 3389 3390 cte = self.expression( 3391 exp.CTE, 3392 this=self._parse_wrapped(self._parse_statement), 3393 alias=alias, 3394 materialized=materialized, 3395 comments=comments, 3396 ) 3397 3398 values = cte.this 3399 if isinstance(values, exp.Values): 3400 if values.alias: 3401 cte.set("this", exp.select("*").from_(values)) 3402 else: 3403 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3404 3405 return cte 3406 3407 def _parse_table_alias( 3408 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3409 ) -> t.Optional[exp.TableAlias]: 3410 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3411 # so this section tries to parse the clause version and if it fails, it treats the token 3412 # as an identifier (alias) 3413 if self._can_parse_limit_or_offset(): 3414 return None 3415 3416 any_token = self._match(TokenType.ALIAS) 3417 alias = ( 3418 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3419 or self._parse_string_as_identifier() 3420 ) 3421 3422 index = self._index 3423 if self._match(TokenType.L_PAREN): 3424 columns = self._parse_csv(self._parse_function_parameter) 3425 self._match_r_paren() if columns else self._retreat(index) 3426 else: 3427 columns = None 3428 3429 if not alias and not columns: 3430 return None 3431 3432 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3433 3434 # We bubble up comments from the Identifier to the TableAlias 3435 if isinstance(alias, exp.Identifier): 3436 table_alias.add_comments(alias.pop_comments()) 3437 3438 return table_alias 3439 3440 def _parse_subquery( 3441 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3442 ) -> t.Optional[exp.Subquery]: 3443 if not this: 3444 return None 3445 3446 return self.expression( 3447 exp.Subquery, 3448 this=this, 3449 pivots=self._parse_pivots(), 3450 alias=self._parse_table_alias() if parse_alias else None, 3451 sample=self._parse_table_sample(), 3452 ) 3453 3454 def _implicit_unnests_to_explicit(self, this: E) -> E: 3455 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3456 3457 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3458 for i, join in enumerate(this.args.get("joins") or []): 3459 table = join.this 3460 normalized_table = table.copy() 3461 normalized_table.meta["maybe_column"] = True 3462 normalized_table = _norm(normalized_table, dialect=self.dialect) 3463 3464 if isinstance(table, exp.Table) and not join.args.get("on"): 3465 if normalized_table.parts[0].name in refs: 3466 table_as_column = table.to_column() 3467 unnest = exp.Unnest(expressions=[table_as_column]) 3468 3469 # Table.to_column creates a parent Alias node that we want to convert to 3470 # a TableAlias and attach to the Unnest, so it matches the parser's output 3471 if isinstance(table.args.get("alias"), exp.TableAlias): 3472 table_as_column.replace(table_as_column.this) 3473 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3474 3475 table.replace(unnest) 3476 3477 refs.add(normalized_table.alias_or_name) 3478 3479 return this 3480 3481 def _parse_query_modifiers( 3482 self, this: t.Optional[exp.Expression] 3483 ) -> t.Optional[exp.Expression]: 3484 if isinstance(this, self.MODIFIABLES): 3485 for join in self._parse_joins(): 3486 this.append("joins", join) 3487 for lateral in iter(self._parse_lateral, None): 3488 this.append("laterals", lateral) 3489 3490 while True: 3491 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3492 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3493 key, expression = parser(self) 3494 3495 if expression: 3496 this.set(key, expression) 3497 if key == "limit": 3498 offset = expression.args.pop("offset", None) 3499 3500 if offset: 3501 offset = exp.Offset(expression=offset) 3502 this.set("offset", offset) 3503 3504 limit_by_expressions = expression.expressions 3505 expression.set("expressions", None) 3506 offset.set("expressions", limit_by_expressions) 3507 continue 3508 break 3509 3510 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3511 this = self._implicit_unnests_to_explicit(this) 3512 3513 return this 3514 3515 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3516 start = self._curr 3517 while self._curr: 3518 self._advance() 3519 3520 end = self._tokens[self._index - 1] 3521 return exp.Hint(expressions=[self._find_sql(start, end)]) 3522 3523 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3524 return self._parse_function_call() 3525 3526 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3527 start_index = self._index 3528 should_fallback_to_string = False 3529 3530 hints = [] 3531 try: 3532 for hint in iter( 3533 lambda: self._parse_csv( 3534 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3535 ), 3536 [], 3537 ): 3538 hints.extend(hint) 3539 except ParseError: 3540 should_fallback_to_string = True 3541 3542 if should_fallback_to_string or self._curr: 3543 self._retreat(start_index) 3544 return self._parse_hint_fallback_to_string() 3545 3546 return self.expression(exp.Hint, expressions=hints) 3547 3548 def _parse_hint(self) -> t.Optional[exp.Hint]: 3549 if self._match(TokenType.HINT) and self._prev_comments: 3550 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3551 3552 return None 3553 3554 def _parse_into(self) -> t.Optional[exp.Into]: 3555 if not self._match(TokenType.INTO): 3556 return None 3557 3558 temp = self._match(TokenType.TEMPORARY) 3559 unlogged = self._match_text_seq("UNLOGGED") 3560 self._match(TokenType.TABLE) 3561 3562 return self.expression( 3563 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3564 ) 3565 3566 def _parse_from( 3567 self, 3568 joins: bool = False, 3569 skip_from_token: bool = False, 3570 consume_pipe: bool = False, 3571 ) -> t.Optional[exp.From]: 3572 if not skip_from_token and not self._match(TokenType.FROM): 3573 return None 3574 3575 return self.expression( 3576 exp.From, 3577 comments=self._prev_comments, 3578 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3579 ) 3580 3581 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3582 return self.expression( 3583 exp.MatchRecognizeMeasure, 3584 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3585 this=self._parse_expression(), 3586 ) 3587 3588 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3589 if not self._match(TokenType.MATCH_RECOGNIZE): 3590 return None 3591 3592 self._match_l_paren() 3593 3594 partition = self._parse_partition_by() 3595 order = self._parse_order() 3596 3597 measures = ( 3598 self._parse_csv(self._parse_match_recognize_measure) 3599 if self._match_text_seq("MEASURES") 3600 else None 3601 ) 3602 3603 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3604 rows = exp.var("ONE ROW PER MATCH") 3605 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3606 text = "ALL ROWS PER MATCH" 3607 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3608 text += " SHOW EMPTY MATCHES" 3609 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3610 text += " OMIT EMPTY MATCHES" 3611 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3612 text += " WITH UNMATCHED ROWS" 3613 rows = exp.var(text) 3614 else: 3615 rows = None 3616 3617 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3618 text = "AFTER MATCH SKIP" 3619 if self._match_text_seq("PAST", "LAST", "ROW"): 3620 text += " PAST LAST ROW" 3621 elif self._match_text_seq("TO", "NEXT", "ROW"): 3622 text += " TO NEXT ROW" 3623 elif self._match_text_seq("TO", "FIRST"): 3624 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3625 elif self._match_text_seq("TO", "LAST"): 3626 text += f" TO LAST {self._advance_any().text}" # type: ignore 3627 after = exp.var(text) 3628 else: 3629 after = None 3630 3631 if self._match_text_seq("PATTERN"): 3632 self._match_l_paren() 3633 3634 if not self._curr: 3635 self.raise_error("Expecting )", self._curr) 3636 3637 paren = 1 3638 start = self._curr 3639 3640 while self._curr and paren > 0: 3641 if self._curr.token_type == TokenType.L_PAREN: 3642 paren += 1 3643 if self._curr.token_type == TokenType.R_PAREN: 3644 paren -= 1 3645 3646 end = self._prev 3647 self._advance() 3648 3649 if paren > 0: 3650 self.raise_error("Expecting )", self._curr) 3651 3652 pattern = exp.var(self._find_sql(start, end)) 3653 else: 3654 pattern = None 3655 3656 define = ( 3657 self._parse_csv(self._parse_name_as_expression) 3658 if self._match_text_seq("DEFINE") 3659 else None 3660 ) 3661 3662 self._match_r_paren() 3663 3664 return self.expression( 3665 exp.MatchRecognize, 3666 partition_by=partition, 3667 order=order, 3668 measures=measures, 3669 rows=rows, 3670 after=after, 3671 pattern=pattern, 3672 define=define, 3673 alias=self._parse_table_alias(), 3674 ) 3675 3676 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3677 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3678 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3679 cross_apply = False 3680 3681 if cross_apply is not None: 3682 this = self._parse_select(table=True) 3683 view = None 3684 outer = None 3685 elif self._match(TokenType.LATERAL): 3686 this = self._parse_select(table=True) 3687 view = self._match(TokenType.VIEW) 3688 outer = self._match(TokenType.OUTER) 3689 else: 3690 return None 3691 3692 if not this: 3693 this = ( 3694 self._parse_unnest() 3695 or self._parse_function() 3696 or self._parse_id_var(any_token=False) 3697 ) 3698 3699 while self._match(TokenType.DOT): 3700 this = exp.Dot( 3701 this=this, 3702 expression=self._parse_function() or self._parse_id_var(any_token=False), 3703 ) 3704 3705 ordinality: t.Optional[bool] = None 3706 3707 if view: 3708 table = self._parse_id_var(any_token=False) 3709 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3710 table_alias: t.Optional[exp.TableAlias] = self.expression( 3711 exp.TableAlias, this=table, columns=columns 3712 ) 3713 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3714 # We move the alias from the lateral's child node to the lateral itself 3715 table_alias = this.args["alias"].pop() 3716 else: 3717 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3718 table_alias = self._parse_table_alias() 3719 3720 return self.expression( 3721 exp.Lateral, 3722 this=this, 3723 view=view, 3724 outer=outer, 3725 alias=table_alias, 3726 cross_apply=cross_apply, 3727 ordinality=ordinality, 3728 ) 3729 3730 def _parse_join_parts( 3731 self, 3732 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3733 return ( 3734 self._match_set(self.JOIN_METHODS) and self._prev, 3735 self._match_set(self.JOIN_SIDES) and self._prev, 3736 self._match_set(self.JOIN_KINDS) and self._prev, 3737 ) 3738 3739 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3740 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3741 this = self._parse_column() 3742 if isinstance(this, exp.Column): 3743 return this.this 3744 return this 3745 3746 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3747 3748 def _parse_join( 3749 self, skip_join_token: bool = False, parse_bracket: bool = False 3750 ) -> t.Optional[exp.Join]: 3751 if self._match(TokenType.COMMA): 3752 table = self._try_parse(self._parse_table) 3753 cross_join = self.expression(exp.Join, this=table) if table else None 3754 3755 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3756 cross_join.set("kind", "CROSS") 3757 3758 return cross_join 3759 3760 index = self._index 3761 method, side, kind = self._parse_join_parts() 3762 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3763 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3764 join_comments = self._prev_comments 3765 3766 if not skip_join_token and not join: 3767 self._retreat(index) 3768 kind = None 3769 method = None 3770 side = None 3771 3772 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3773 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3774 3775 if not skip_join_token and not join and not outer_apply and not cross_apply: 3776 return None 3777 3778 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3779 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3780 kwargs["expressions"] = self._parse_csv( 3781 lambda: self._parse_table(parse_bracket=parse_bracket) 3782 ) 3783 3784 if method: 3785 kwargs["method"] = method.text 3786 if side: 3787 kwargs["side"] = side.text 3788 if kind: 3789 kwargs["kind"] = kind.text 3790 if hint: 3791 kwargs["hint"] = hint 3792 3793 if self._match(TokenType.MATCH_CONDITION): 3794 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3795 3796 if self._match(TokenType.ON): 3797 kwargs["on"] = self._parse_assignment() 3798 elif self._match(TokenType.USING): 3799 kwargs["using"] = self._parse_using_identifiers() 3800 elif ( 3801 not (outer_apply or cross_apply) 3802 and not isinstance(kwargs["this"], exp.Unnest) 3803 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3804 ): 3805 index = self._index 3806 joins: t.Optional[list] = list(self._parse_joins()) 3807 3808 if joins and self._match(TokenType.ON): 3809 kwargs["on"] = self._parse_assignment() 3810 elif joins and self._match(TokenType.USING): 3811 kwargs["using"] = self._parse_using_identifiers() 3812 else: 3813 joins = None 3814 self._retreat(index) 3815 3816 kwargs["this"].set("joins", joins if joins else None) 3817 3818 kwargs["pivots"] = self._parse_pivots() 3819 3820 comments = [c for token in (method, side, kind) if token for c in token.comments] 3821 comments = (join_comments or []) + comments 3822 return self.expression(exp.Join, comments=comments, **kwargs) 3823 3824 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3825 this = self._parse_assignment() 3826 3827 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3828 return this 3829 3830 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3831 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3832 3833 return this 3834 3835 def _parse_index_params(self) -> exp.IndexParameters: 3836 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3837 3838 if self._match(TokenType.L_PAREN, advance=False): 3839 columns = self._parse_wrapped_csv(self._parse_with_operator) 3840 else: 3841 columns = None 3842 3843 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3844 partition_by = self._parse_partition_by() 3845 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3846 tablespace = ( 3847 self._parse_var(any_token=True) 3848 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3849 else None 3850 ) 3851 where = self._parse_where() 3852 3853 on = self._parse_field() if self._match(TokenType.ON) else None 3854 3855 return self.expression( 3856 exp.IndexParameters, 3857 using=using, 3858 columns=columns, 3859 include=include, 3860 partition_by=partition_by, 3861 where=where, 3862 with_storage=with_storage, 3863 tablespace=tablespace, 3864 on=on, 3865 ) 3866 3867 def _parse_index( 3868 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3869 ) -> t.Optional[exp.Index]: 3870 if index or anonymous: 3871 unique = None 3872 primary = None 3873 amp = None 3874 3875 self._match(TokenType.ON) 3876 self._match(TokenType.TABLE) # hive 3877 table = self._parse_table_parts(schema=True) 3878 else: 3879 unique = self._match(TokenType.UNIQUE) 3880 primary = self._match_text_seq("PRIMARY") 3881 amp = self._match_text_seq("AMP") 3882 3883 if not self._match(TokenType.INDEX): 3884 return None 3885 3886 index = self._parse_id_var() 3887 table = None 3888 3889 params = self._parse_index_params() 3890 3891 return self.expression( 3892 exp.Index, 3893 this=index, 3894 table=table, 3895 unique=unique, 3896 primary=primary, 3897 amp=amp, 3898 params=params, 3899 ) 3900 3901 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3902 hints: t.List[exp.Expression] = [] 3903 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3904 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3905 hints.append( 3906 self.expression( 3907 exp.WithTableHint, 3908 expressions=self._parse_csv( 3909 lambda: self._parse_function() or self._parse_var(any_token=True) 3910 ), 3911 ) 3912 ) 3913 self._match_r_paren() 3914 else: 3915 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3916 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3917 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3918 3919 self._match_set((TokenType.INDEX, TokenType.KEY)) 3920 if self._match(TokenType.FOR): 3921 hint.set("target", self._advance_any() and self._prev.text.upper()) 3922 3923 hint.set("expressions", self._parse_wrapped_id_vars()) 3924 hints.append(hint) 3925 3926 return hints or None 3927 3928 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3929 return ( 3930 (not schema and self._parse_function(optional_parens=False)) 3931 or self._parse_id_var(any_token=False) 3932 or self._parse_string_as_identifier() 3933 or self._parse_placeholder() 3934 ) 3935 3936 def _parse_table_parts( 3937 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3938 ) -> exp.Table: 3939 catalog = None 3940 db = None 3941 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3942 3943 while self._match(TokenType.DOT): 3944 if catalog: 3945 # This allows nesting the table in arbitrarily many dot expressions if needed 3946 table = self.expression( 3947 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3948 ) 3949 else: 3950 catalog = db 3951 db = table 3952 # "" used for tsql FROM a..b case 3953 table = self._parse_table_part(schema=schema) or "" 3954 3955 if ( 3956 wildcard 3957 and self._is_connected() 3958 and (isinstance(table, exp.Identifier) or not table) 3959 and self._match(TokenType.STAR) 3960 ): 3961 if isinstance(table, exp.Identifier): 3962 table.args["this"] += "*" 3963 else: 3964 table = exp.Identifier(this="*") 3965 3966 # We bubble up comments from the Identifier to the Table 3967 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3968 3969 if is_db_reference: 3970 catalog = db 3971 db = table 3972 table = None 3973 3974 if not table and not is_db_reference: 3975 self.raise_error(f"Expected table name but got {self._curr}") 3976 if not db and is_db_reference: 3977 self.raise_error(f"Expected database name but got {self._curr}") 3978 3979 table = self.expression( 3980 exp.Table, 3981 comments=comments, 3982 this=table, 3983 db=db, 3984 catalog=catalog, 3985 ) 3986 3987 changes = self._parse_changes() 3988 if changes: 3989 table.set("changes", changes) 3990 3991 at_before = self._parse_historical_data() 3992 if at_before: 3993 table.set("when", at_before) 3994 3995 pivots = self._parse_pivots() 3996 if pivots: 3997 table.set("pivots", pivots) 3998 3999 return table 4000 4001 def _parse_table( 4002 self, 4003 schema: bool = False, 4004 joins: bool = False, 4005 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4006 parse_bracket: bool = False, 4007 is_db_reference: bool = False, 4008 parse_partition: bool = False, 4009 consume_pipe: bool = False, 4010 ) -> t.Optional[exp.Expression]: 4011 lateral = self._parse_lateral() 4012 if lateral: 4013 return lateral 4014 4015 unnest = self._parse_unnest() 4016 if unnest: 4017 return unnest 4018 4019 values = self._parse_derived_table_values() 4020 if values: 4021 return values 4022 4023 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4024 if subquery: 4025 if not subquery.args.get("pivots"): 4026 subquery.set("pivots", self._parse_pivots()) 4027 return subquery 4028 4029 bracket = parse_bracket and self._parse_bracket(None) 4030 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4031 4032 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4033 self._parse_table 4034 ) 4035 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4036 4037 only = self._match(TokenType.ONLY) 4038 4039 this = t.cast( 4040 exp.Expression, 4041 bracket 4042 or rows_from 4043 or self._parse_bracket( 4044 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4045 ), 4046 ) 4047 4048 if only: 4049 this.set("only", only) 4050 4051 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4052 self._match_text_seq("*") 4053 4054 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4055 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4056 this.set("partition", self._parse_partition()) 4057 4058 if schema: 4059 return self._parse_schema(this=this) 4060 4061 version = self._parse_version() 4062 4063 if version: 4064 this.set("version", version) 4065 4066 if self.dialect.ALIAS_POST_TABLESAMPLE: 4067 this.set("sample", self._parse_table_sample()) 4068 4069 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4070 if alias: 4071 this.set("alias", alias) 4072 4073 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4074 return self.expression( 4075 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4076 ) 4077 4078 this.set("hints", self._parse_table_hints()) 4079 4080 if not this.args.get("pivots"): 4081 this.set("pivots", self._parse_pivots()) 4082 4083 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4084 this.set("sample", self._parse_table_sample()) 4085 4086 if joins: 4087 for join in self._parse_joins(): 4088 this.append("joins", join) 4089 4090 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4091 this.set("ordinality", True) 4092 this.set("alias", self._parse_table_alias()) 4093 4094 return this 4095 4096 def _parse_version(self) -> t.Optional[exp.Version]: 4097 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4098 this = "TIMESTAMP" 4099 elif self._match(TokenType.VERSION_SNAPSHOT): 4100 this = "VERSION" 4101 else: 4102 return None 4103 4104 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4105 kind = self._prev.text.upper() 4106 start = self._parse_bitwise() 4107 self._match_texts(("TO", "AND")) 4108 end = self._parse_bitwise() 4109 expression: t.Optional[exp.Expression] = self.expression( 4110 exp.Tuple, expressions=[start, end] 4111 ) 4112 elif self._match_text_seq("CONTAINED", "IN"): 4113 kind = "CONTAINED IN" 4114 expression = self.expression( 4115 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4116 ) 4117 elif self._match(TokenType.ALL): 4118 kind = "ALL" 4119 expression = None 4120 else: 4121 self._match_text_seq("AS", "OF") 4122 kind = "AS OF" 4123 expression = self._parse_type() 4124 4125 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4126 4127 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4128 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4129 index = self._index 4130 historical_data = None 4131 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4132 this = self._prev.text.upper() 4133 kind = ( 4134 self._match(TokenType.L_PAREN) 4135 and self._match_texts(self.HISTORICAL_DATA_KIND) 4136 and self._prev.text.upper() 4137 ) 4138 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4139 4140 if expression: 4141 self._match_r_paren() 4142 historical_data = self.expression( 4143 exp.HistoricalData, this=this, kind=kind, expression=expression 4144 ) 4145 else: 4146 self._retreat(index) 4147 4148 return historical_data 4149 4150 def _parse_changes(self) -> t.Optional[exp.Changes]: 4151 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4152 return None 4153 4154 information = self._parse_var(any_token=True) 4155 self._match_r_paren() 4156 4157 return self.expression( 4158 exp.Changes, 4159 information=information, 4160 at_before=self._parse_historical_data(), 4161 end=self._parse_historical_data(), 4162 ) 4163 4164 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4165 if not self._match(TokenType.UNNEST): 4166 return None 4167 4168 expressions = self._parse_wrapped_csv(self._parse_equality) 4169 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4170 4171 alias = self._parse_table_alias() if with_alias else None 4172 4173 if alias: 4174 if self.dialect.UNNEST_COLUMN_ONLY: 4175 if alias.args.get("columns"): 4176 self.raise_error("Unexpected extra column alias in unnest.") 4177 4178 alias.set("columns", [alias.this]) 4179 alias.set("this", None) 4180 4181 columns = alias.args.get("columns") or [] 4182 if offset and len(expressions) < len(columns): 4183 offset = columns.pop() 4184 4185 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4186 self._match(TokenType.ALIAS) 4187 offset = self._parse_id_var( 4188 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4189 ) or exp.to_identifier("offset") 4190 4191 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4192 4193 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4194 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4195 if not is_derived and not ( 4196 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4197 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4198 ): 4199 return None 4200 4201 expressions = self._parse_csv(self._parse_value) 4202 alias = self._parse_table_alias() 4203 4204 if is_derived: 4205 self._match_r_paren() 4206 4207 return self.expression( 4208 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4209 ) 4210 4211 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4212 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4213 as_modifier and self._match_text_seq("USING", "SAMPLE") 4214 ): 4215 return None 4216 4217 bucket_numerator = None 4218 bucket_denominator = None 4219 bucket_field = None 4220 percent = None 4221 size = None 4222 seed = None 4223 4224 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4225 matched_l_paren = self._match(TokenType.L_PAREN) 4226 4227 if self.TABLESAMPLE_CSV: 4228 num = None 4229 expressions = self._parse_csv(self._parse_primary) 4230 else: 4231 expressions = None 4232 num = ( 4233 self._parse_factor() 4234 if self._match(TokenType.NUMBER, advance=False) 4235 else self._parse_primary() or self._parse_placeholder() 4236 ) 4237 4238 if self._match_text_seq("BUCKET"): 4239 bucket_numerator = self._parse_number() 4240 self._match_text_seq("OUT", "OF") 4241 bucket_denominator = bucket_denominator = self._parse_number() 4242 self._match(TokenType.ON) 4243 bucket_field = self._parse_field() 4244 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4245 percent = num 4246 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4247 size = num 4248 else: 4249 percent = num 4250 4251 if matched_l_paren: 4252 self._match_r_paren() 4253 4254 if self._match(TokenType.L_PAREN): 4255 method = self._parse_var(upper=True) 4256 seed = self._match(TokenType.COMMA) and self._parse_number() 4257 self._match_r_paren() 4258 elif self._match_texts(("SEED", "REPEATABLE")): 4259 seed = self._parse_wrapped(self._parse_number) 4260 4261 if not method and self.DEFAULT_SAMPLING_METHOD: 4262 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4263 4264 return self.expression( 4265 exp.TableSample, 4266 expressions=expressions, 4267 method=method, 4268 bucket_numerator=bucket_numerator, 4269 bucket_denominator=bucket_denominator, 4270 bucket_field=bucket_field, 4271 percent=percent, 4272 size=size, 4273 seed=seed, 4274 ) 4275 4276 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4277 return list(iter(self._parse_pivot, None)) or None 4278 4279 def _parse_joins(self) -> t.Iterator[exp.Join]: 4280 return iter(self._parse_join, None) 4281 4282 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4283 if not self._match(TokenType.INTO): 4284 return None 4285 4286 return self.expression( 4287 exp.UnpivotColumns, 4288 this=self._match_text_seq("NAME") and self._parse_column(), 4289 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4290 ) 4291 4292 # https://duckdb.org/docs/sql/statements/pivot 4293 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4294 def _parse_on() -> t.Optional[exp.Expression]: 4295 this = self._parse_bitwise() 4296 4297 if self._match(TokenType.IN): 4298 # PIVOT ... ON col IN (row_val1, row_val2) 4299 return self._parse_in(this) 4300 if self._match(TokenType.ALIAS, advance=False): 4301 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4302 return self._parse_alias(this) 4303 4304 return this 4305 4306 this = self._parse_table() 4307 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4308 into = self._parse_unpivot_columns() 4309 using = self._match(TokenType.USING) and self._parse_csv( 4310 lambda: self._parse_alias(self._parse_function()) 4311 ) 4312 group = self._parse_group() 4313 4314 return self.expression( 4315 exp.Pivot, 4316 this=this, 4317 expressions=expressions, 4318 using=using, 4319 group=group, 4320 unpivot=is_unpivot, 4321 into=into, 4322 ) 4323 4324 def _parse_pivot_in(self) -> exp.In: 4325 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4326 this = self._parse_select_or_expression() 4327 4328 self._match(TokenType.ALIAS) 4329 alias = self._parse_bitwise() 4330 if alias: 4331 if isinstance(alias, exp.Column) and not alias.db: 4332 alias = alias.this 4333 return self.expression(exp.PivotAlias, this=this, alias=alias) 4334 4335 return this 4336 4337 value = self._parse_column() 4338 4339 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4340 self.raise_error("Expecting IN (") 4341 4342 if self._match(TokenType.ANY): 4343 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4344 else: 4345 exprs = self._parse_csv(_parse_aliased_expression) 4346 4347 self._match_r_paren() 4348 return self.expression(exp.In, this=value, expressions=exprs) 4349 4350 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4351 index = self._index 4352 include_nulls = None 4353 4354 if self._match(TokenType.PIVOT): 4355 unpivot = False 4356 elif self._match(TokenType.UNPIVOT): 4357 unpivot = True 4358 4359 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4360 if self._match_text_seq("INCLUDE", "NULLS"): 4361 include_nulls = True 4362 elif self._match_text_seq("EXCLUDE", "NULLS"): 4363 include_nulls = False 4364 else: 4365 return None 4366 4367 expressions = [] 4368 4369 if not self._match(TokenType.L_PAREN): 4370 self._retreat(index) 4371 return None 4372 4373 if unpivot: 4374 expressions = self._parse_csv(self._parse_column) 4375 else: 4376 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4377 4378 if not expressions: 4379 self.raise_error("Failed to parse PIVOT's aggregation list") 4380 4381 if not self._match(TokenType.FOR): 4382 self.raise_error("Expecting FOR") 4383 4384 fields = [] 4385 while True: 4386 field = self._try_parse(self._parse_pivot_in) 4387 if not field: 4388 break 4389 fields.append(field) 4390 4391 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4392 self._parse_bitwise 4393 ) 4394 4395 group = self._parse_group() 4396 4397 self._match_r_paren() 4398 4399 pivot = self.expression( 4400 exp.Pivot, 4401 expressions=expressions, 4402 fields=fields, 4403 unpivot=unpivot, 4404 include_nulls=include_nulls, 4405 default_on_null=default_on_null, 4406 group=group, 4407 ) 4408 4409 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4410 pivot.set("alias", self._parse_table_alias()) 4411 4412 if not unpivot: 4413 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4414 4415 columns: t.List[exp.Expression] = [] 4416 all_fields = [] 4417 for pivot_field in pivot.fields: 4418 pivot_field_expressions = pivot_field.expressions 4419 4420 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4421 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4422 continue 4423 4424 all_fields.append( 4425 [ 4426 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4427 for fld in pivot_field_expressions 4428 ] 4429 ) 4430 4431 if all_fields: 4432 if names: 4433 all_fields.append(names) 4434 4435 # Generate all possible combinations of the pivot columns 4436 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4437 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4438 for fld_parts_tuple in itertools.product(*all_fields): 4439 fld_parts = list(fld_parts_tuple) 4440 4441 if names and self.PREFIXED_PIVOT_COLUMNS: 4442 # Move the "name" to the front of the list 4443 fld_parts.insert(0, fld_parts.pop(-1)) 4444 4445 columns.append(exp.to_identifier("_".join(fld_parts))) 4446 4447 pivot.set("columns", columns) 4448 4449 return pivot 4450 4451 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4452 return [agg.alias for agg in aggregations if agg.alias] 4453 4454 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4455 if not skip_where_token and not self._match(TokenType.PREWHERE): 4456 return None 4457 4458 return self.expression( 4459 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4460 ) 4461 4462 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4463 if not skip_where_token and not self._match(TokenType.WHERE): 4464 return None 4465 4466 return self.expression( 4467 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4468 ) 4469 4470 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4471 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4472 return None 4473 comments = self._prev_comments 4474 4475 elements: t.Dict[str, t.Any] = defaultdict(list) 4476 4477 if self._match(TokenType.ALL): 4478 elements["all"] = True 4479 elif self._match(TokenType.DISTINCT): 4480 elements["all"] = False 4481 4482 while True: 4483 index = self._index 4484 4485 elements["expressions"].extend( 4486 self._parse_csv( 4487 lambda: None 4488 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4489 else self._parse_assignment() 4490 ) 4491 ) 4492 4493 before_with_index = self._index 4494 with_prefix = self._match(TokenType.WITH) 4495 4496 if self._match(TokenType.ROLLUP): 4497 elements["rollup"].append( 4498 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4499 ) 4500 elif self._match(TokenType.CUBE): 4501 elements["cube"].append( 4502 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4503 ) 4504 elif self._match(TokenType.GROUPING_SETS): 4505 elements["grouping_sets"].append( 4506 self.expression( 4507 exp.GroupingSets, 4508 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4509 ) 4510 ) 4511 elif self._match_text_seq("TOTALS"): 4512 elements["totals"] = True # type: ignore 4513 4514 if before_with_index <= self._index <= before_with_index + 1: 4515 self._retreat(before_with_index) 4516 break 4517 4518 if index == self._index: 4519 break 4520 4521 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4522 4523 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4524 return self.expression( 4525 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4526 ) 4527 4528 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4529 if self._match(TokenType.L_PAREN): 4530 grouping_set = self._parse_csv(self._parse_column) 4531 self._match_r_paren() 4532 return self.expression(exp.Tuple, expressions=grouping_set) 4533 4534 return self._parse_column() 4535 4536 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4537 if not skip_having_token and not self._match(TokenType.HAVING): 4538 return None 4539 return self.expression( 4540 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4541 ) 4542 4543 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4544 if not self._match(TokenType.QUALIFY): 4545 return None 4546 return self.expression(exp.Qualify, this=self._parse_assignment()) 4547 4548 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4549 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4550 exp.Prior, this=self._parse_bitwise() 4551 ) 4552 connect = self._parse_assignment() 4553 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4554 return connect 4555 4556 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4557 if skip_start_token: 4558 start = None 4559 elif self._match(TokenType.START_WITH): 4560 start = self._parse_assignment() 4561 else: 4562 return None 4563 4564 self._match(TokenType.CONNECT_BY) 4565 nocycle = self._match_text_seq("NOCYCLE") 4566 connect = self._parse_connect_with_prior() 4567 4568 if not start and self._match(TokenType.START_WITH): 4569 start = self._parse_assignment() 4570 4571 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4572 4573 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4574 this = self._parse_id_var(any_token=True) 4575 if self._match(TokenType.ALIAS): 4576 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4577 return this 4578 4579 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4580 if self._match_text_seq("INTERPOLATE"): 4581 return self._parse_wrapped_csv(self._parse_name_as_expression) 4582 return None 4583 4584 def _parse_order( 4585 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4586 ) -> t.Optional[exp.Expression]: 4587 siblings = None 4588 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4589 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4590 return this 4591 4592 siblings = True 4593 4594 return self.expression( 4595 exp.Order, 4596 comments=self._prev_comments, 4597 this=this, 4598 expressions=self._parse_csv(self._parse_ordered), 4599 siblings=siblings, 4600 ) 4601 4602 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4603 if not self._match(token): 4604 return None 4605 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4606 4607 def _parse_ordered( 4608 self, parse_method: t.Optional[t.Callable] = None 4609 ) -> t.Optional[exp.Ordered]: 4610 this = parse_method() if parse_method else self._parse_assignment() 4611 if not this: 4612 return None 4613 4614 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4615 this = exp.var("ALL") 4616 4617 asc = self._match(TokenType.ASC) 4618 desc = self._match(TokenType.DESC) or (asc and False) 4619 4620 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4621 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4622 4623 nulls_first = is_nulls_first or False 4624 explicitly_null_ordered = is_nulls_first or is_nulls_last 4625 4626 if ( 4627 not explicitly_null_ordered 4628 and ( 4629 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4630 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4631 ) 4632 and self.dialect.NULL_ORDERING != "nulls_are_last" 4633 ): 4634 nulls_first = True 4635 4636 if self._match_text_seq("WITH", "FILL"): 4637 with_fill = self.expression( 4638 exp.WithFill, 4639 **{ # type: ignore 4640 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4641 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4642 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4643 "interpolate": self._parse_interpolate(), 4644 }, 4645 ) 4646 else: 4647 with_fill = None 4648 4649 return self.expression( 4650 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4651 ) 4652 4653 def _parse_limit_options(self) -> exp.LimitOptions: 4654 percent = self._match(TokenType.PERCENT) 4655 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4656 self._match_text_seq("ONLY") 4657 with_ties = self._match_text_seq("WITH", "TIES") 4658 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4659 4660 def _parse_limit( 4661 self, 4662 this: t.Optional[exp.Expression] = None, 4663 top: bool = False, 4664 skip_limit_token: bool = False, 4665 ) -> t.Optional[exp.Expression]: 4666 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4667 comments = self._prev_comments 4668 if top: 4669 limit_paren = self._match(TokenType.L_PAREN) 4670 expression = self._parse_term() if limit_paren else self._parse_number() 4671 4672 if limit_paren: 4673 self._match_r_paren() 4674 4675 limit_options = self._parse_limit_options() 4676 else: 4677 limit_options = None 4678 expression = self._parse_term() 4679 4680 if self._match(TokenType.COMMA): 4681 offset = expression 4682 expression = self._parse_term() 4683 else: 4684 offset = None 4685 4686 limit_exp = self.expression( 4687 exp.Limit, 4688 this=this, 4689 expression=expression, 4690 offset=offset, 4691 comments=comments, 4692 limit_options=limit_options, 4693 expressions=self._parse_limit_by(), 4694 ) 4695 4696 return limit_exp 4697 4698 if self._match(TokenType.FETCH): 4699 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4700 direction = self._prev.text.upper() if direction else "FIRST" 4701 4702 count = self._parse_field(tokens=self.FETCH_TOKENS) 4703 4704 return self.expression( 4705 exp.Fetch, 4706 direction=direction, 4707 count=count, 4708 limit_options=self._parse_limit_options(), 4709 ) 4710 4711 return this 4712 4713 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4714 if not self._match(TokenType.OFFSET): 4715 return this 4716 4717 count = self._parse_term() 4718 self._match_set((TokenType.ROW, TokenType.ROWS)) 4719 4720 return self.expression( 4721 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4722 ) 4723 4724 def _can_parse_limit_or_offset(self) -> bool: 4725 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4726 return False 4727 4728 index = self._index 4729 result = bool( 4730 self._try_parse(self._parse_limit, retreat=True) 4731 or self._try_parse(self._parse_offset, retreat=True) 4732 ) 4733 self._retreat(index) 4734 return result 4735 4736 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4737 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4738 4739 def _parse_locks(self) -> t.List[exp.Lock]: 4740 locks = [] 4741 while True: 4742 if self._match_text_seq("FOR", "UPDATE"): 4743 update = True 4744 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4745 "LOCK", "IN", "SHARE", "MODE" 4746 ): 4747 update = False 4748 else: 4749 break 4750 4751 expressions = None 4752 if self._match_text_seq("OF"): 4753 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4754 4755 wait: t.Optional[bool | exp.Expression] = None 4756 if self._match_text_seq("NOWAIT"): 4757 wait = True 4758 elif self._match_text_seq("WAIT"): 4759 wait = self._parse_primary() 4760 elif self._match_text_seq("SKIP", "LOCKED"): 4761 wait = False 4762 4763 locks.append( 4764 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4765 ) 4766 4767 return locks 4768 4769 def parse_set_operation( 4770 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4771 ) -> t.Optional[exp.Expression]: 4772 start = self._index 4773 _, side_token, kind_token = self._parse_join_parts() 4774 4775 side = side_token.text if side_token else None 4776 kind = kind_token.text if kind_token else None 4777 4778 if not self._match_set(self.SET_OPERATIONS): 4779 self._retreat(start) 4780 return None 4781 4782 token_type = self._prev.token_type 4783 4784 if token_type == TokenType.UNION: 4785 operation: t.Type[exp.SetOperation] = exp.Union 4786 elif token_type == TokenType.EXCEPT: 4787 operation = exp.Except 4788 else: 4789 operation = exp.Intersect 4790 4791 comments = self._prev.comments 4792 4793 if self._match(TokenType.DISTINCT): 4794 distinct: t.Optional[bool] = True 4795 elif self._match(TokenType.ALL): 4796 distinct = False 4797 else: 4798 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4799 if distinct is None: 4800 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4801 4802 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4803 "STRICT", "CORRESPONDING" 4804 ) 4805 if self._match_text_seq("CORRESPONDING"): 4806 by_name = True 4807 if not side and not kind: 4808 kind = "INNER" 4809 4810 on_column_list = None 4811 if by_name and self._match_texts(("ON", "BY")): 4812 on_column_list = self._parse_wrapped_csv(self._parse_column) 4813 4814 expression = self._parse_select( 4815 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4816 ) 4817 4818 return self.expression( 4819 operation, 4820 comments=comments, 4821 this=this, 4822 distinct=distinct, 4823 by_name=by_name, 4824 expression=expression, 4825 side=side, 4826 kind=kind, 4827 on=on_column_list, 4828 ) 4829 4830 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4831 while this: 4832 setop = self.parse_set_operation(this) 4833 if not setop: 4834 break 4835 this = setop 4836 4837 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4838 expression = this.expression 4839 4840 if expression: 4841 for arg in self.SET_OP_MODIFIERS: 4842 expr = expression.args.get(arg) 4843 if expr: 4844 this.set(arg, expr.pop()) 4845 4846 return this 4847 4848 def _parse_expression(self) -> t.Optional[exp.Expression]: 4849 return self._parse_alias(self._parse_assignment()) 4850 4851 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4852 this = self._parse_disjunction() 4853 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4854 # This allows us to parse <non-identifier token> := <expr> 4855 this = exp.column( 4856 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4857 ) 4858 4859 while self._match_set(self.ASSIGNMENT): 4860 if isinstance(this, exp.Column) and len(this.parts) == 1: 4861 this = this.this 4862 4863 this = self.expression( 4864 self.ASSIGNMENT[self._prev.token_type], 4865 this=this, 4866 comments=self._prev_comments, 4867 expression=self._parse_assignment(), 4868 ) 4869 4870 return this 4871 4872 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4873 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4874 4875 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4876 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4877 4878 def _parse_equality(self) -> t.Optional[exp.Expression]: 4879 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4880 4881 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4882 return self._parse_tokens(self._parse_range, self.COMPARISON) 4883 4884 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4885 this = this or self._parse_bitwise() 4886 negate = self._match(TokenType.NOT) 4887 4888 if self._match_set(self.RANGE_PARSERS): 4889 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4890 if not expression: 4891 return this 4892 4893 this = expression 4894 elif self._match(TokenType.ISNULL): 4895 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4896 4897 # Postgres supports ISNULL and NOTNULL for conditions. 4898 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4899 if self._match(TokenType.NOTNULL): 4900 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4901 this = self.expression(exp.Not, this=this) 4902 4903 if negate: 4904 this = self._negate_range(this) 4905 4906 if self._match(TokenType.IS): 4907 this = self._parse_is(this) 4908 4909 return this 4910 4911 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4912 if not this: 4913 return this 4914 4915 return self.expression(exp.Not, this=this) 4916 4917 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4918 index = self._index - 1 4919 negate = self._match(TokenType.NOT) 4920 4921 if self._match_text_seq("DISTINCT", "FROM"): 4922 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4923 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4924 4925 if self._match(TokenType.JSON): 4926 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4927 4928 if self._match_text_seq("WITH"): 4929 _with = True 4930 elif self._match_text_seq("WITHOUT"): 4931 _with = False 4932 else: 4933 _with = None 4934 4935 unique = self._match(TokenType.UNIQUE) 4936 self._match_text_seq("KEYS") 4937 expression: t.Optional[exp.Expression] = self.expression( 4938 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4939 ) 4940 else: 4941 expression = self._parse_primary() or self._parse_null() 4942 if not expression: 4943 self._retreat(index) 4944 return None 4945 4946 this = self.expression(exp.Is, this=this, expression=expression) 4947 return self.expression(exp.Not, this=this) if negate else this 4948 4949 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4950 unnest = self._parse_unnest(with_alias=False) 4951 if unnest: 4952 this = self.expression(exp.In, this=this, unnest=unnest) 4953 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4954 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4955 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4956 4957 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4958 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4959 else: 4960 this = self.expression(exp.In, this=this, expressions=expressions) 4961 4962 if matched_l_paren: 4963 self._match_r_paren(this) 4964 elif not self._match(TokenType.R_BRACKET, expression=this): 4965 self.raise_error("Expecting ]") 4966 else: 4967 this = self.expression(exp.In, this=this, field=self._parse_column()) 4968 4969 return this 4970 4971 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4972 low = self._parse_bitwise() 4973 self._match(TokenType.AND) 4974 high = self._parse_bitwise() 4975 return self.expression(exp.Between, this=this, low=low, high=high) 4976 4977 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4978 if not self._match(TokenType.ESCAPE): 4979 return this 4980 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4981 4982 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4983 index = self._index 4984 4985 if not self._match(TokenType.INTERVAL) and match_interval: 4986 return None 4987 4988 if self._match(TokenType.STRING, advance=False): 4989 this = self._parse_primary() 4990 else: 4991 this = self._parse_term() 4992 4993 if not this or ( 4994 isinstance(this, exp.Column) 4995 and not this.table 4996 and not this.this.quoted 4997 and this.name.upper() == "IS" 4998 ): 4999 self._retreat(index) 5000 return None 5001 5002 unit = self._parse_function() or ( 5003 not self._match(TokenType.ALIAS, advance=False) 5004 and self._parse_var(any_token=True, upper=True) 5005 ) 5006 5007 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5008 # each INTERVAL expression into this canonical form so it's easy to transpile 5009 if this and this.is_number: 5010 this = exp.Literal.string(this.to_py()) 5011 elif this and this.is_string: 5012 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5013 if parts and unit: 5014 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5015 unit = None 5016 self._retreat(self._index - 1) 5017 5018 if len(parts) == 1: 5019 this = exp.Literal.string(parts[0][0]) 5020 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5021 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5022 unit = self.expression( 5023 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5024 ) 5025 5026 interval = self.expression(exp.Interval, this=this, unit=unit) 5027 5028 index = self._index 5029 self._match(TokenType.PLUS) 5030 5031 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5032 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5033 return self.expression( 5034 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5035 ) 5036 5037 self._retreat(index) 5038 return interval 5039 5040 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5041 this = self._parse_term() 5042 5043 while True: 5044 if self._match_set(self.BITWISE): 5045 this = self.expression( 5046 self.BITWISE[self._prev.token_type], 5047 this=this, 5048 expression=self._parse_term(), 5049 ) 5050 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5051 this = self.expression( 5052 exp.DPipe, 5053 this=this, 5054 expression=self._parse_term(), 5055 safe=not self.dialect.STRICT_STRING_CONCAT, 5056 ) 5057 elif self._match(TokenType.DQMARK): 5058 this = self.expression( 5059 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5060 ) 5061 elif self._match_pair(TokenType.LT, TokenType.LT): 5062 this = self.expression( 5063 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5064 ) 5065 elif self._match_pair(TokenType.GT, TokenType.GT): 5066 this = self.expression( 5067 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5068 ) 5069 else: 5070 break 5071 5072 return this 5073 5074 def _parse_term(self) -> t.Optional[exp.Expression]: 5075 this = self._parse_factor() 5076 5077 while self._match_set(self.TERM): 5078 klass = self.TERM[self._prev.token_type] 5079 comments = self._prev_comments 5080 expression = self._parse_factor() 5081 5082 this = self.expression(klass, this=this, comments=comments, expression=expression) 5083 5084 if isinstance(this, exp.Collate): 5085 expr = this.expression 5086 5087 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5088 # fallback to Identifier / Var 5089 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5090 ident = expr.this 5091 if isinstance(ident, exp.Identifier): 5092 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5093 5094 return this 5095 5096 def _parse_factor(self) -> t.Optional[exp.Expression]: 5097 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5098 this = parse_method() 5099 5100 while self._match_set(self.FACTOR): 5101 klass = self.FACTOR[self._prev.token_type] 5102 comments = self._prev_comments 5103 expression = parse_method() 5104 5105 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5106 self._retreat(self._index - 1) 5107 return this 5108 5109 this = self.expression(klass, this=this, comments=comments, expression=expression) 5110 5111 if isinstance(this, exp.Div): 5112 this.args["typed"] = self.dialect.TYPED_DIVISION 5113 this.args["safe"] = self.dialect.SAFE_DIVISION 5114 5115 return this 5116 5117 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5118 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5119 5120 def _parse_unary(self) -> t.Optional[exp.Expression]: 5121 if self._match_set(self.UNARY_PARSERS): 5122 return self.UNARY_PARSERS[self._prev.token_type](self) 5123 return self._parse_at_time_zone(self._parse_type()) 5124 5125 def _parse_type( 5126 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5127 ) -> t.Optional[exp.Expression]: 5128 interval = parse_interval and self._parse_interval() 5129 if interval: 5130 return interval 5131 5132 index = self._index 5133 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5134 5135 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5136 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5137 if isinstance(data_type, exp.Cast): 5138 # This constructor can contain ops directly after it, for instance struct unnesting: 5139 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5140 return self._parse_column_ops(data_type) 5141 5142 if data_type: 5143 index2 = self._index 5144 this = self._parse_primary() 5145 5146 if isinstance(this, exp.Literal): 5147 literal = this.name 5148 this = self._parse_column_ops(this) 5149 5150 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5151 if parser: 5152 return parser(self, this, data_type) 5153 5154 if ( 5155 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5156 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5157 and TIME_ZONE_RE.search(literal) 5158 ): 5159 data_type = exp.DataType.build("TIMESTAMPTZ") 5160 5161 return self.expression(exp.Cast, this=this, to=data_type) 5162 5163 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5164 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5165 # 5166 # If the index difference here is greater than 1, that means the parser itself must have 5167 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5168 # 5169 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5170 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5171 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5172 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5173 # 5174 # In these cases, we don't really want to return the converted type, but instead retreat 5175 # and try to parse a Column or Identifier in the section below. 5176 if data_type.expressions and index2 - index > 1: 5177 self._retreat(index2) 5178 return self._parse_column_ops(data_type) 5179 5180 self._retreat(index) 5181 5182 if fallback_to_identifier: 5183 return self._parse_id_var() 5184 5185 this = self._parse_column() 5186 return this and self._parse_column_ops(this) 5187 5188 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5189 this = self._parse_type() 5190 if not this: 5191 return None 5192 5193 if isinstance(this, exp.Column) and not this.table: 5194 this = exp.var(this.name.upper()) 5195 5196 return self.expression( 5197 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5198 ) 5199 5200 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5201 type_name = identifier.name 5202 5203 while self._match(TokenType.DOT): 5204 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5205 5206 return exp.DataType.build(type_name, udt=True) 5207 5208 def _parse_types( 5209 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5210 ) -> t.Optional[exp.Expression]: 5211 index = self._index 5212 5213 this: t.Optional[exp.Expression] = None 5214 prefix = self._match_text_seq("SYSUDTLIB", ".") 5215 5216 if not self._match_set(self.TYPE_TOKENS): 5217 identifier = allow_identifiers and self._parse_id_var( 5218 any_token=False, tokens=(TokenType.VAR,) 5219 ) 5220 if isinstance(identifier, exp.Identifier): 5221 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5222 5223 if len(tokens) != 1: 5224 self.raise_error("Unexpected identifier", self._prev) 5225 5226 if tokens[0].token_type in self.TYPE_TOKENS: 5227 self._prev = tokens[0] 5228 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5229 this = self._parse_user_defined_type(identifier) 5230 else: 5231 self._retreat(self._index - 1) 5232 return None 5233 else: 5234 return None 5235 5236 type_token = self._prev.token_type 5237 5238 if type_token == TokenType.PSEUDO_TYPE: 5239 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5240 5241 if type_token == TokenType.OBJECT_IDENTIFIER: 5242 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5243 5244 # https://materialize.com/docs/sql/types/map/ 5245 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5246 key_type = self._parse_types( 5247 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5248 ) 5249 if not self._match(TokenType.FARROW): 5250 self._retreat(index) 5251 return None 5252 5253 value_type = self._parse_types( 5254 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5255 ) 5256 if not self._match(TokenType.R_BRACKET): 5257 self._retreat(index) 5258 return None 5259 5260 return exp.DataType( 5261 this=exp.DataType.Type.MAP, 5262 expressions=[key_type, value_type], 5263 nested=True, 5264 prefix=prefix, 5265 ) 5266 5267 nested = type_token in self.NESTED_TYPE_TOKENS 5268 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5269 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5270 expressions = None 5271 maybe_func = False 5272 5273 if self._match(TokenType.L_PAREN): 5274 if is_struct: 5275 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5276 elif nested: 5277 expressions = self._parse_csv( 5278 lambda: self._parse_types( 5279 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5280 ) 5281 ) 5282 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5283 this = expressions[0] 5284 this.set("nullable", True) 5285 self._match_r_paren() 5286 return this 5287 elif type_token in self.ENUM_TYPE_TOKENS: 5288 expressions = self._parse_csv(self._parse_equality) 5289 elif is_aggregate: 5290 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5291 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5292 ) 5293 if not func_or_ident: 5294 return None 5295 expressions = [func_or_ident] 5296 if self._match(TokenType.COMMA): 5297 expressions.extend( 5298 self._parse_csv( 5299 lambda: self._parse_types( 5300 check_func=check_func, 5301 schema=schema, 5302 allow_identifiers=allow_identifiers, 5303 ) 5304 ) 5305 ) 5306 else: 5307 expressions = self._parse_csv(self._parse_type_size) 5308 5309 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5310 if type_token == TokenType.VECTOR and len(expressions) == 2: 5311 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5312 5313 if not expressions or not self._match(TokenType.R_PAREN): 5314 self._retreat(index) 5315 return None 5316 5317 maybe_func = True 5318 5319 values: t.Optional[t.List[exp.Expression]] = None 5320 5321 if nested and self._match(TokenType.LT): 5322 if is_struct: 5323 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5324 else: 5325 expressions = self._parse_csv( 5326 lambda: self._parse_types( 5327 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5328 ) 5329 ) 5330 5331 if not self._match(TokenType.GT): 5332 self.raise_error("Expecting >") 5333 5334 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5335 values = self._parse_csv(self._parse_assignment) 5336 if not values and is_struct: 5337 values = None 5338 self._retreat(self._index - 1) 5339 else: 5340 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5341 5342 if type_token in self.TIMESTAMPS: 5343 if self._match_text_seq("WITH", "TIME", "ZONE"): 5344 maybe_func = False 5345 tz_type = ( 5346 exp.DataType.Type.TIMETZ 5347 if type_token in self.TIMES 5348 else exp.DataType.Type.TIMESTAMPTZ 5349 ) 5350 this = exp.DataType(this=tz_type, expressions=expressions) 5351 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5352 maybe_func = False 5353 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5354 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5355 maybe_func = False 5356 elif type_token == TokenType.INTERVAL: 5357 unit = self._parse_var(upper=True) 5358 if unit: 5359 if self._match_text_seq("TO"): 5360 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5361 5362 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5363 else: 5364 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5365 elif type_token == TokenType.VOID: 5366 this = exp.DataType(this=exp.DataType.Type.NULL) 5367 5368 if maybe_func and check_func: 5369 index2 = self._index 5370 peek = self._parse_string() 5371 5372 if not peek: 5373 self._retreat(index) 5374 return None 5375 5376 self._retreat(index2) 5377 5378 if not this: 5379 if self._match_text_seq("UNSIGNED"): 5380 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5381 if not unsigned_type_token: 5382 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5383 5384 type_token = unsigned_type_token or type_token 5385 5386 this = exp.DataType( 5387 this=exp.DataType.Type[type_token.value], 5388 expressions=expressions, 5389 nested=nested, 5390 prefix=prefix, 5391 ) 5392 5393 # Empty arrays/structs are allowed 5394 if values is not None: 5395 cls = exp.Struct if is_struct else exp.Array 5396 this = exp.cast(cls(expressions=values), this, copy=False) 5397 5398 elif expressions: 5399 this.set("expressions", expressions) 5400 5401 # https://materialize.com/docs/sql/types/list/#type-name 5402 while self._match(TokenType.LIST): 5403 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5404 5405 index = self._index 5406 5407 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5408 matched_array = self._match(TokenType.ARRAY) 5409 5410 while self._curr: 5411 datatype_token = self._prev.token_type 5412 matched_l_bracket = self._match(TokenType.L_BRACKET) 5413 5414 if (not matched_l_bracket and not matched_array) or ( 5415 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5416 ): 5417 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5418 # not to be confused with the fixed size array parsing 5419 break 5420 5421 matched_array = False 5422 values = self._parse_csv(self._parse_assignment) or None 5423 if ( 5424 values 5425 and not schema 5426 and ( 5427 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5428 ) 5429 ): 5430 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5431 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5432 self._retreat(index) 5433 break 5434 5435 this = exp.DataType( 5436 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5437 ) 5438 self._match(TokenType.R_BRACKET) 5439 5440 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5441 converter = self.TYPE_CONVERTERS.get(this.this) 5442 if converter: 5443 this = converter(t.cast(exp.DataType, this)) 5444 5445 return this 5446 5447 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5448 index = self._index 5449 5450 if ( 5451 self._curr 5452 and self._next 5453 and self._curr.token_type in self.TYPE_TOKENS 5454 and self._next.token_type in self.TYPE_TOKENS 5455 ): 5456 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5457 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5458 this = self._parse_id_var() 5459 else: 5460 this = ( 5461 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5462 or self._parse_id_var() 5463 ) 5464 5465 self._match(TokenType.COLON) 5466 5467 if ( 5468 type_required 5469 and not isinstance(this, exp.DataType) 5470 and not self._match_set(self.TYPE_TOKENS, advance=False) 5471 ): 5472 self._retreat(index) 5473 return self._parse_types() 5474 5475 return self._parse_column_def(this) 5476 5477 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5478 if not self._match_text_seq("AT", "TIME", "ZONE"): 5479 return this 5480 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5481 5482 def _parse_column(self) -> t.Optional[exp.Expression]: 5483 this = self._parse_column_reference() 5484 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5485 5486 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5487 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5488 5489 return column 5490 5491 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5492 this = self._parse_field() 5493 if ( 5494 not this 5495 and self._match(TokenType.VALUES, advance=False) 5496 and self.VALUES_FOLLOWED_BY_PAREN 5497 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5498 ): 5499 this = self._parse_id_var() 5500 5501 if isinstance(this, exp.Identifier): 5502 # We bubble up comments from the Identifier to the Column 5503 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5504 5505 return this 5506 5507 def _parse_colon_as_variant_extract( 5508 self, this: t.Optional[exp.Expression] 5509 ) -> t.Optional[exp.Expression]: 5510 casts = [] 5511 json_path = [] 5512 escape = None 5513 5514 while self._match(TokenType.COLON): 5515 start_index = self._index 5516 5517 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5518 path = self._parse_column_ops( 5519 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5520 ) 5521 5522 # The cast :: operator has a lower precedence than the extraction operator :, so 5523 # we rearrange the AST appropriately to avoid casting the JSON path 5524 while isinstance(path, exp.Cast): 5525 casts.append(path.to) 5526 path = path.this 5527 5528 if casts: 5529 dcolon_offset = next( 5530 i 5531 for i, t in enumerate(self._tokens[start_index:]) 5532 if t.token_type == TokenType.DCOLON 5533 ) 5534 end_token = self._tokens[start_index + dcolon_offset - 1] 5535 else: 5536 end_token = self._prev 5537 5538 if path: 5539 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5540 # it'll roundtrip to a string literal in GET_PATH 5541 if isinstance(path, exp.Identifier) and path.quoted: 5542 escape = True 5543 5544 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5545 5546 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5547 # Databricks transforms it back to the colon/dot notation 5548 if json_path: 5549 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5550 5551 if json_path_expr: 5552 json_path_expr.set("escape", escape) 5553 5554 this = self.expression( 5555 exp.JSONExtract, 5556 this=this, 5557 expression=json_path_expr, 5558 variant_extract=True, 5559 ) 5560 5561 while casts: 5562 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5563 5564 return this 5565 5566 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5567 return self._parse_types() 5568 5569 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5570 this = self._parse_bracket(this) 5571 5572 while self._match_set(self.COLUMN_OPERATORS): 5573 op_token = self._prev.token_type 5574 op = self.COLUMN_OPERATORS.get(op_token) 5575 5576 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5577 field = self._parse_dcolon() 5578 if not field: 5579 self.raise_error("Expected type") 5580 elif op and self._curr: 5581 field = self._parse_column_reference() or self._parse_bracket() 5582 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5583 field = self._parse_column_ops(field) 5584 else: 5585 field = self._parse_field(any_token=True, anonymous_func=True) 5586 5587 # Function calls can be qualified, e.g., x.y.FOO() 5588 # This converts the final AST to a series of Dots leading to the function call 5589 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5590 if isinstance(field, (exp.Func, exp.Window)) and this: 5591 this = this.transform( 5592 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5593 ) 5594 5595 if op: 5596 this = op(self, this, field) 5597 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5598 this = self.expression( 5599 exp.Column, 5600 comments=this.comments, 5601 this=field, 5602 table=this.this, 5603 db=this.args.get("table"), 5604 catalog=this.args.get("db"), 5605 ) 5606 elif isinstance(field, exp.Window): 5607 # Move the exp.Dot's to the window's function 5608 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5609 field.set("this", window_func) 5610 this = field 5611 else: 5612 this = self.expression(exp.Dot, this=this, expression=field) 5613 5614 if field and field.comments: 5615 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5616 5617 this = self._parse_bracket(this) 5618 5619 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5620 5621 def _parse_paren(self) -> t.Optional[exp.Expression]: 5622 if not self._match(TokenType.L_PAREN): 5623 return None 5624 5625 comments = self._prev_comments 5626 query = self._parse_select() 5627 5628 if query: 5629 expressions = [query] 5630 else: 5631 expressions = self._parse_expressions() 5632 5633 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5634 5635 if not this and self._match(TokenType.R_PAREN, advance=False): 5636 this = self.expression(exp.Tuple) 5637 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5638 this = self._parse_subquery(this=this, parse_alias=False) 5639 elif isinstance(this, exp.Subquery): 5640 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5641 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5642 this = self.expression(exp.Tuple, expressions=expressions) 5643 else: 5644 this = self.expression(exp.Paren, this=this) 5645 5646 if this: 5647 this.add_comments(comments) 5648 5649 self._match_r_paren(expression=this) 5650 return this 5651 5652 def _parse_primary(self) -> t.Optional[exp.Expression]: 5653 if self._match_set(self.PRIMARY_PARSERS): 5654 token_type = self._prev.token_type 5655 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5656 5657 if token_type == TokenType.STRING: 5658 expressions = [primary] 5659 while self._match(TokenType.STRING): 5660 expressions.append(exp.Literal.string(self._prev.text)) 5661 5662 if len(expressions) > 1: 5663 return self.expression(exp.Concat, expressions=expressions) 5664 5665 return primary 5666 5667 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5668 return exp.Literal.number(f"0.{self._prev.text}") 5669 5670 return self._parse_paren() 5671 5672 def _parse_field( 5673 self, 5674 any_token: bool = False, 5675 tokens: t.Optional[t.Collection[TokenType]] = None, 5676 anonymous_func: bool = False, 5677 ) -> t.Optional[exp.Expression]: 5678 if anonymous_func: 5679 field = ( 5680 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5681 or self._parse_primary() 5682 ) 5683 else: 5684 field = self._parse_primary() or self._parse_function( 5685 anonymous=anonymous_func, any_token=any_token 5686 ) 5687 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5688 5689 def _parse_function( 5690 self, 5691 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5692 anonymous: bool = False, 5693 optional_parens: bool = True, 5694 any_token: bool = False, 5695 ) -> t.Optional[exp.Expression]: 5696 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5697 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5698 fn_syntax = False 5699 if ( 5700 self._match(TokenType.L_BRACE, advance=False) 5701 and self._next 5702 and self._next.text.upper() == "FN" 5703 ): 5704 self._advance(2) 5705 fn_syntax = True 5706 5707 func = self._parse_function_call( 5708 functions=functions, 5709 anonymous=anonymous, 5710 optional_parens=optional_parens, 5711 any_token=any_token, 5712 ) 5713 5714 if fn_syntax: 5715 self._match(TokenType.R_BRACE) 5716 5717 return func 5718 5719 def _parse_function_call( 5720 self, 5721 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5722 anonymous: bool = False, 5723 optional_parens: bool = True, 5724 any_token: bool = False, 5725 ) -> t.Optional[exp.Expression]: 5726 if not self._curr: 5727 return None 5728 5729 comments = self._curr.comments 5730 token = self._curr 5731 token_type = self._curr.token_type 5732 this = self._curr.text 5733 upper = this.upper() 5734 5735 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5736 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5737 self._advance() 5738 return self._parse_window(parser(self)) 5739 5740 if not self._next or self._next.token_type != TokenType.L_PAREN: 5741 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5742 self._advance() 5743 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5744 5745 return None 5746 5747 if any_token: 5748 if token_type in self.RESERVED_TOKENS: 5749 return None 5750 elif token_type not in self.FUNC_TOKENS: 5751 return None 5752 5753 self._advance(2) 5754 5755 parser = self.FUNCTION_PARSERS.get(upper) 5756 if parser and not anonymous: 5757 this = parser(self) 5758 else: 5759 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5760 5761 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5762 this = self.expression( 5763 subquery_predicate, comments=comments, this=self._parse_select() 5764 ) 5765 self._match_r_paren() 5766 return this 5767 5768 if functions is None: 5769 functions = self.FUNCTIONS 5770 5771 function = functions.get(upper) 5772 known_function = function and not anonymous 5773 5774 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5775 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5776 5777 post_func_comments = self._curr and self._curr.comments 5778 if known_function and post_func_comments: 5779 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5780 # call we'll construct it as exp.Anonymous, even if it's "known" 5781 if any( 5782 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5783 for comment in post_func_comments 5784 ): 5785 known_function = False 5786 5787 if alias and known_function: 5788 args = self._kv_to_prop_eq(args) 5789 5790 if known_function: 5791 func_builder = t.cast(t.Callable, function) 5792 5793 if "dialect" in func_builder.__code__.co_varnames: 5794 func = func_builder(args, dialect=self.dialect) 5795 else: 5796 func = func_builder(args) 5797 5798 func = self.validate_expression(func, args) 5799 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5800 func.meta["name"] = this 5801 5802 this = func 5803 else: 5804 if token_type == TokenType.IDENTIFIER: 5805 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5806 5807 this = self.expression(exp.Anonymous, this=this, expressions=args) 5808 this = this.update_positions(token) 5809 5810 if isinstance(this, exp.Expression): 5811 this.add_comments(comments) 5812 5813 self._match_r_paren(this) 5814 return self._parse_window(this) 5815 5816 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5817 return expression 5818 5819 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5820 transformed = [] 5821 5822 for index, e in enumerate(expressions): 5823 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5824 if isinstance(e, exp.Alias): 5825 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5826 5827 if not isinstance(e, exp.PropertyEQ): 5828 e = self.expression( 5829 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5830 ) 5831 5832 if isinstance(e.this, exp.Column): 5833 e.this.replace(e.this.this) 5834 else: 5835 e = self._to_prop_eq(e, index) 5836 5837 transformed.append(e) 5838 5839 return transformed 5840 5841 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5842 return self._parse_statement() 5843 5844 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5845 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5846 5847 def _parse_user_defined_function( 5848 self, kind: t.Optional[TokenType] = None 5849 ) -> t.Optional[exp.Expression]: 5850 this = self._parse_table_parts(schema=True) 5851 5852 if not self._match(TokenType.L_PAREN): 5853 return this 5854 5855 expressions = self._parse_csv(self._parse_function_parameter) 5856 self._match_r_paren() 5857 return self.expression( 5858 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5859 ) 5860 5861 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5862 literal = self._parse_primary() 5863 if literal: 5864 return self.expression(exp.Introducer, this=token.text, expression=literal) 5865 5866 return self._identifier_expression(token) 5867 5868 def _parse_session_parameter(self) -> exp.SessionParameter: 5869 kind = None 5870 this = self._parse_id_var() or self._parse_primary() 5871 5872 if this and self._match(TokenType.DOT): 5873 kind = this.name 5874 this = self._parse_var() or self._parse_primary() 5875 5876 return self.expression(exp.SessionParameter, this=this, kind=kind) 5877 5878 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5879 return self._parse_id_var() 5880 5881 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5882 index = self._index 5883 5884 if self._match(TokenType.L_PAREN): 5885 expressions = t.cast( 5886 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5887 ) 5888 5889 if not self._match(TokenType.R_PAREN): 5890 self._retreat(index) 5891 else: 5892 expressions = [self._parse_lambda_arg()] 5893 5894 if self._match_set(self.LAMBDAS): 5895 return self.LAMBDAS[self._prev.token_type](self, expressions) 5896 5897 self._retreat(index) 5898 5899 this: t.Optional[exp.Expression] 5900 5901 if self._match(TokenType.DISTINCT): 5902 this = self.expression( 5903 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5904 ) 5905 else: 5906 this = self._parse_select_or_expression(alias=alias) 5907 5908 return self._parse_limit( 5909 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5910 ) 5911 5912 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5913 index = self._index 5914 if not self._match(TokenType.L_PAREN): 5915 return this 5916 5917 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5918 # expr can be of both types 5919 if self._match_set(self.SELECT_START_TOKENS): 5920 self._retreat(index) 5921 return this 5922 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5923 self._match_r_paren() 5924 return self.expression(exp.Schema, this=this, expressions=args) 5925 5926 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5927 return self._parse_column_def(self._parse_field(any_token=True)) 5928 5929 def _parse_column_def( 5930 self, this: t.Optional[exp.Expression], computed_column: bool = True 5931 ) -> t.Optional[exp.Expression]: 5932 # column defs are not really columns, they're identifiers 5933 if isinstance(this, exp.Column): 5934 this = this.this 5935 5936 if not computed_column: 5937 self._match(TokenType.ALIAS) 5938 5939 kind = self._parse_types(schema=True) 5940 5941 if self._match_text_seq("FOR", "ORDINALITY"): 5942 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5943 5944 constraints: t.List[exp.Expression] = [] 5945 5946 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5947 ("ALIAS", "MATERIALIZED") 5948 ): 5949 persisted = self._prev.text.upper() == "MATERIALIZED" 5950 constraint_kind = exp.ComputedColumnConstraint( 5951 this=self._parse_assignment(), 5952 persisted=persisted or self._match_text_seq("PERSISTED"), 5953 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5954 ) 5955 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5956 elif ( 5957 kind 5958 and self._match(TokenType.ALIAS, advance=False) 5959 and ( 5960 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5961 or (self._next and self._next.token_type == TokenType.L_PAREN) 5962 ) 5963 ): 5964 self._advance() 5965 constraints.append( 5966 self.expression( 5967 exp.ColumnConstraint, 5968 kind=exp.ComputedColumnConstraint( 5969 this=self._parse_disjunction(), 5970 persisted=self._match_texts(("STORED", "VIRTUAL")) 5971 and self._prev.text.upper() == "STORED", 5972 ), 5973 ) 5974 ) 5975 5976 while True: 5977 constraint = self._parse_column_constraint() 5978 if not constraint: 5979 break 5980 constraints.append(constraint) 5981 5982 if not kind and not constraints: 5983 return this 5984 5985 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5986 5987 def _parse_auto_increment( 5988 self, 5989 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5990 start = None 5991 increment = None 5992 order = None 5993 5994 if self._match(TokenType.L_PAREN, advance=False): 5995 args = self._parse_wrapped_csv(self._parse_bitwise) 5996 start = seq_get(args, 0) 5997 increment = seq_get(args, 1) 5998 elif self._match_text_seq("START"): 5999 start = self._parse_bitwise() 6000 self._match_text_seq("INCREMENT") 6001 increment = self._parse_bitwise() 6002 if self._match_text_seq("ORDER"): 6003 order = True 6004 elif self._match_text_seq("NOORDER"): 6005 order = False 6006 6007 if start and increment: 6008 return exp.GeneratedAsIdentityColumnConstraint( 6009 start=start, increment=increment, this=False, order=order 6010 ) 6011 6012 return exp.AutoIncrementColumnConstraint() 6013 6014 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6015 if not self._match_text_seq("REFRESH"): 6016 self._retreat(self._index - 1) 6017 return None 6018 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6019 6020 def _parse_compress(self) -> exp.CompressColumnConstraint: 6021 if self._match(TokenType.L_PAREN, advance=False): 6022 return self.expression( 6023 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6024 ) 6025 6026 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6027 6028 def _parse_generated_as_identity( 6029 self, 6030 ) -> ( 6031 exp.GeneratedAsIdentityColumnConstraint 6032 | exp.ComputedColumnConstraint 6033 | exp.GeneratedAsRowColumnConstraint 6034 ): 6035 if self._match_text_seq("BY", "DEFAULT"): 6036 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6037 this = self.expression( 6038 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6039 ) 6040 else: 6041 self._match_text_seq("ALWAYS") 6042 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6043 6044 self._match(TokenType.ALIAS) 6045 6046 if self._match_text_seq("ROW"): 6047 start = self._match_text_seq("START") 6048 if not start: 6049 self._match(TokenType.END) 6050 hidden = self._match_text_seq("HIDDEN") 6051 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6052 6053 identity = self._match_text_seq("IDENTITY") 6054 6055 if self._match(TokenType.L_PAREN): 6056 if self._match(TokenType.START_WITH): 6057 this.set("start", self._parse_bitwise()) 6058 if self._match_text_seq("INCREMENT", "BY"): 6059 this.set("increment", self._parse_bitwise()) 6060 if self._match_text_seq("MINVALUE"): 6061 this.set("minvalue", self._parse_bitwise()) 6062 if self._match_text_seq("MAXVALUE"): 6063 this.set("maxvalue", self._parse_bitwise()) 6064 6065 if self._match_text_seq("CYCLE"): 6066 this.set("cycle", True) 6067 elif self._match_text_seq("NO", "CYCLE"): 6068 this.set("cycle", False) 6069 6070 if not identity: 6071 this.set("expression", self._parse_range()) 6072 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6073 args = self._parse_csv(self._parse_bitwise) 6074 this.set("start", seq_get(args, 0)) 6075 this.set("increment", seq_get(args, 1)) 6076 6077 self._match_r_paren() 6078 6079 return this 6080 6081 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6082 self._match_text_seq("LENGTH") 6083 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6084 6085 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6086 if self._match_text_seq("NULL"): 6087 return self.expression(exp.NotNullColumnConstraint) 6088 if self._match_text_seq("CASESPECIFIC"): 6089 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6090 if self._match_text_seq("FOR", "REPLICATION"): 6091 return self.expression(exp.NotForReplicationColumnConstraint) 6092 6093 # Unconsume the `NOT` token 6094 self._retreat(self._index - 1) 6095 return None 6096 6097 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6098 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6099 6100 procedure_option_follows = ( 6101 self._match(TokenType.WITH, advance=False) 6102 and self._next 6103 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6104 ) 6105 6106 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6107 return self.expression( 6108 exp.ColumnConstraint, 6109 this=this, 6110 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6111 ) 6112 6113 return this 6114 6115 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6116 if not self._match(TokenType.CONSTRAINT): 6117 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6118 6119 return self.expression( 6120 exp.Constraint, 6121 this=self._parse_id_var(), 6122 expressions=self._parse_unnamed_constraints(), 6123 ) 6124 6125 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6126 constraints = [] 6127 while True: 6128 constraint = self._parse_unnamed_constraint() or self._parse_function() 6129 if not constraint: 6130 break 6131 constraints.append(constraint) 6132 6133 return constraints 6134 6135 def _parse_unnamed_constraint( 6136 self, constraints: t.Optional[t.Collection[str]] = None 6137 ) -> t.Optional[exp.Expression]: 6138 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6139 constraints or self.CONSTRAINT_PARSERS 6140 ): 6141 return None 6142 6143 constraint = self._prev.text.upper() 6144 if constraint not in self.CONSTRAINT_PARSERS: 6145 self.raise_error(f"No parser found for schema constraint {constraint}.") 6146 6147 return self.CONSTRAINT_PARSERS[constraint](self) 6148 6149 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6150 return self._parse_id_var(any_token=False) 6151 6152 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6153 self._match_text_seq("KEY") 6154 return self.expression( 6155 exp.UniqueColumnConstraint, 6156 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6157 this=self._parse_schema(self._parse_unique_key()), 6158 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6159 on_conflict=self._parse_on_conflict(), 6160 options=self._parse_key_constraint_options(), 6161 ) 6162 6163 def _parse_key_constraint_options(self) -> t.List[str]: 6164 options = [] 6165 while True: 6166 if not self._curr: 6167 break 6168 6169 if self._match(TokenType.ON): 6170 action = None 6171 on = self._advance_any() and self._prev.text 6172 6173 if self._match_text_seq("NO", "ACTION"): 6174 action = "NO ACTION" 6175 elif self._match_text_seq("CASCADE"): 6176 action = "CASCADE" 6177 elif self._match_text_seq("RESTRICT"): 6178 action = "RESTRICT" 6179 elif self._match_pair(TokenType.SET, TokenType.NULL): 6180 action = "SET NULL" 6181 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6182 action = "SET DEFAULT" 6183 else: 6184 self.raise_error("Invalid key constraint") 6185 6186 options.append(f"ON {on} {action}") 6187 else: 6188 var = self._parse_var_from_options( 6189 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6190 ) 6191 if not var: 6192 break 6193 options.append(var.name) 6194 6195 return options 6196 6197 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6198 if match and not self._match(TokenType.REFERENCES): 6199 return None 6200 6201 expressions = None 6202 this = self._parse_table(schema=True) 6203 options = self._parse_key_constraint_options() 6204 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6205 6206 def _parse_foreign_key(self) -> exp.ForeignKey: 6207 expressions = ( 6208 self._parse_wrapped_id_vars() 6209 if not self._match(TokenType.REFERENCES, advance=False) 6210 else None 6211 ) 6212 reference = self._parse_references() 6213 on_options = {} 6214 6215 while self._match(TokenType.ON): 6216 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6217 self.raise_error("Expected DELETE or UPDATE") 6218 6219 kind = self._prev.text.lower() 6220 6221 if self._match_text_seq("NO", "ACTION"): 6222 action = "NO ACTION" 6223 elif self._match(TokenType.SET): 6224 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6225 action = "SET " + self._prev.text.upper() 6226 else: 6227 self._advance() 6228 action = self._prev.text.upper() 6229 6230 on_options[kind] = action 6231 6232 return self.expression( 6233 exp.ForeignKey, 6234 expressions=expressions, 6235 reference=reference, 6236 options=self._parse_key_constraint_options(), 6237 **on_options, # type: ignore 6238 ) 6239 6240 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6241 return self._parse_ordered() or self._parse_field() 6242 6243 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6244 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6245 self._retreat(self._index - 1) 6246 return None 6247 6248 id_vars = self._parse_wrapped_id_vars() 6249 return self.expression( 6250 exp.PeriodForSystemTimeConstraint, 6251 this=seq_get(id_vars, 0), 6252 expression=seq_get(id_vars, 1), 6253 ) 6254 6255 def _parse_primary_key( 6256 self, wrapped_optional: bool = False, in_props: bool = False 6257 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6258 desc = ( 6259 self._match_set((TokenType.ASC, TokenType.DESC)) 6260 and self._prev.token_type == TokenType.DESC 6261 ) 6262 6263 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6264 return self.expression( 6265 exp.PrimaryKeyColumnConstraint, 6266 desc=desc, 6267 options=self._parse_key_constraint_options(), 6268 ) 6269 6270 expressions = self._parse_wrapped_csv( 6271 self._parse_primary_key_part, optional=wrapped_optional 6272 ) 6273 options = self._parse_key_constraint_options() 6274 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6275 6276 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6277 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6278 6279 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6280 """ 6281 Parses a datetime column in ODBC format. We parse the column into the corresponding 6282 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6283 same as we did for `DATE('yyyy-mm-dd')`. 6284 6285 Reference: 6286 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6287 """ 6288 self._match(TokenType.VAR) 6289 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6290 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6291 if not self._match(TokenType.R_BRACE): 6292 self.raise_error("Expected }") 6293 return expression 6294 6295 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6296 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6297 return this 6298 6299 bracket_kind = self._prev.token_type 6300 if ( 6301 bracket_kind == TokenType.L_BRACE 6302 and self._curr 6303 and self._curr.token_type == TokenType.VAR 6304 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6305 ): 6306 return self._parse_odbc_datetime_literal() 6307 6308 expressions = self._parse_csv( 6309 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6310 ) 6311 6312 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6313 self.raise_error("Expected ]") 6314 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6315 self.raise_error("Expected }") 6316 6317 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6318 if bracket_kind == TokenType.L_BRACE: 6319 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6320 elif not this: 6321 this = build_array_constructor( 6322 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6323 ) 6324 else: 6325 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6326 if constructor_type: 6327 return build_array_constructor( 6328 constructor_type, 6329 args=expressions, 6330 bracket_kind=bracket_kind, 6331 dialect=self.dialect, 6332 ) 6333 6334 expressions = apply_index_offset( 6335 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6336 ) 6337 this = self.expression( 6338 exp.Bracket, 6339 this=this, 6340 expressions=expressions, 6341 comments=this.pop_comments(), 6342 ) 6343 6344 self._add_comments(this) 6345 return self._parse_bracket(this) 6346 6347 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6348 if self._match(TokenType.COLON): 6349 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6350 return this 6351 6352 def _parse_case(self) -> t.Optional[exp.Expression]: 6353 ifs = [] 6354 default = None 6355 6356 comments = self._prev_comments 6357 expression = self._parse_assignment() 6358 6359 while self._match(TokenType.WHEN): 6360 this = self._parse_assignment() 6361 self._match(TokenType.THEN) 6362 then = self._parse_assignment() 6363 ifs.append(self.expression(exp.If, this=this, true=then)) 6364 6365 if self._match(TokenType.ELSE): 6366 default = self._parse_assignment() 6367 6368 if not self._match(TokenType.END): 6369 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6370 default = exp.column("interval") 6371 else: 6372 self.raise_error("Expected END after CASE", self._prev) 6373 6374 return self.expression( 6375 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6376 ) 6377 6378 def _parse_if(self) -> t.Optional[exp.Expression]: 6379 if self._match(TokenType.L_PAREN): 6380 args = self._parse_csv( 6381 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6382 ) 6383 this = self.validate_expression(exp.If.from_arg_list(args), args) 6384 self._match_r_paren() 6385 else: 6386 index = self._index - 1 6387 6388 if self.NO_PAREN_IF_COMMANDS and index == 0: 6389 return self._parse_as_command(self._prev) 6390 6391 condition = self._parse_assignment() 6392 6393 if not condition: 6394 self._retreat(index) 6395 return None 6396 6397 self._match(TokenType.THEN) 6398 true = self._parse_assignment() 6399 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6400 self._match(TokenType.END) 6401 this = self.expression(exp.If, this=condition, true=true, false=false) 6402 6403 return this 6404 6405 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6406 if not self._match_text_seq("VALUE", "FOR"): 6407 self._retreat(self._index - 1) 6408 return None 6409 6410 return self.expression( 6411 exp.NextValueFor, 6412 this=self._parse_column(), 6413 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6414 ) 6415 6416 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6417 this = self._parse_function() or self._parse_var_or_string(upper=True) 6418 6419 if self._match(TokenType.FROM): 6420 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6421 6422 if not self._match(TokenType.COMMA): 6423 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6424 6425 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6426 6427 def _parse_gap_fill(self) -> exp.GapFill: 6428 self._match(TokenType.TABLE) 6429 this = self._parse_table() 6430 6431 self._match(TokenType.COMMA) 6432 args = [this, *self._parse_csv(self._parse_lambda)] 6433 6434 gap_fill = exp.GapFill.from_arg_list(args) 6435 return self.validate_expression(gap_fill, args) 6436 6437 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6438 this = self._parse_assignment() 6439 6440 if not self._match(TokenType.ALIAS): 6441 if self._match(TokenType.COMMA): 6442 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6443 6444 self.raise_error("Expected AS after CAST") 6445 6446 fmt = None 6447 to = self._parse_types() 6448 6449 default = self._match(TokenType.DEFAULT) 6450 if default: 6451 default = self._parse_bitwise() 6452 self._match_text_seq("ON", "CONVERSION", "ERROR") 6453 6454 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6455 fmt_string = self._parse_string() 6456 fmt = self._parse_at_time_zone(fmt_string) 6457 6458 if not to: 6459 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6460 if to.this in exp.DataType.TEMPORAL_TYPES: 6461 this = self.expression( 6462 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6463 this=this, 6464 format=exp.Literal.string( 6465 format_time( 6466 fmt_string.this if fmt_string else "", 6467 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6468 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6469 ) 6470 ), 6471 safe=safe, 6472 ) 6473 6474 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6475 this.set("zone", fmt.args["zone"]) 6476 return this 6477 elif not to: 6478 self.raise_error("Expected TYPE after CAST") 6479 elif isinstance(to, exp.Identifier): 6480 to = exp.DataType.build(to.name, udt=True) 6481 elif to.this == exp.DataType.Type.CHAR: 6482 if self._match(TokenType.CHARACTER_SET): 6483 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6484 6485 return self.expression( 6486 exp.Cast if strict else exp.TryCast, 6487 this=this, 6488 to=to, 6489 format=fmt, 6490 safe=safe, 6491 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6492 default=default, 6493 ) 6494 6495 def _parse_string_agg(self) -> exp.GroupConcat: 6496 if self._match(TokenType.DISTINCT): 6497 args: t.List[t.Optional[exp.Expression]] = [ 6498 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6499 ] 6500 if self._match(TokenType.COMMA): 6501 args.extend(self._parse_csv(self._parse_assignment)) 6502 else: 6503 args = self._parse_csv(self._parse_assignment) # type: ignore 6504 6505 if self._match_text_seq("ON", "OVERFLOW"): 6506 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6507 if self._match_text_seq("ERROR"): 6508 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6509 else: 6510 self._match_text_seq("TRUNCATE") 6511 on_overflow = self.expression( 6512 exp.OverflowTruncateBehavior, 6513 this=self._parse_string(), 6514 with_count=( 6515 self._match_text_seq("WITH", "COUNT") 6516 or not self._match_text_seq("WITHOUT", "COUNT") 6517 ), 6518 ) 6519 else: 6520 on_overflow = None 6521 6522 index = self._index 6523 if not self._match(TokenType.R_PAREN) and args: 6524 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6525 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6526 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6527 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6528 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6529 6530 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6531 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6532 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6533 if not self._match_text_seq("WITHIN", "GROUP"): 6534 self._retreat(index) 6535 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6536 6537 # The corresponding match_r_paren will be called in parse_function (caller) 6538 self._match_l_paren() 6539 6540 return self.expression( 6541 exp.GroupConcat, 6542 this=self._parse_order(this=seq_get(args, 0)), 6543 separator=seq_get(args, 1), 6544 on_overflow=on_overflow, 6545 ) 6546 6547 def _parse_convert( 6548 self, strict: bool, safe: t.Optional[bool] = None 6549 ) -> t.Optional[exp.Expression]: 6550 this = self._parse_bitwise() 6551 6552 if self._match(TokenType.USING): 6553 to: t.Optional[exp.Expression] = self.expression( 6554 exp.CharacterSet, this=self._parse_var() 6555 ) 6556 elif self._match(TokenType.COMMA): 6557 to = self._parse_types() 6558 else: 6559 to = None 6560 6561 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6562 6563 def _parse_xml_table(self) -> exp.XMLTable: 6564 namespaces = None 6565 passing = None 6566 columns = None 6567 6568 if self._match_text_seq("XMLNAMESPACES", "("): 6569 namespaces = self._parse_xml_namespace() 6570 self._match_text_seq(")", ",") 6571 6572 this = self._parse_string() 6573 6574 if self._match_text_seq("PASSING"): 6575 # The BY VALUE keywords are optional and are provided for semantic clarity 6576 self._match_text_seq("BY", "VALUE") 6577 passing = self._parse_csv(self._parse_column) 6578 6579 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6580 6581 if self._match_text_seq("COLUMNS"): 6582 columns = self._parse_csv(self._parse_field_def) 6583 6584 return self.expression( 6585 exp.XMLTable, 6586 this=this, 6587 namespaces=namespaces, 6588 passing=passing, 6589 columns=columns, 6590 by_ref=by_ref, 6591 ) 6592 6593 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6594 namespaces = [] 6595 6596 while True: 6597 if self._match(TokenType.DEFAULT): 6598 uri = self._parse_string() 6599 else: 6600 uri = self._parse_alias(self._parse_string()) 6601 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6602 if not self._match(TokenType.COMMA): 6603 break 6604 6605 return namespaces 6606 6607 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6608 args = self._parse_csv(self._parse_assignment) 6609 6610 if len(args) < 3: 6611 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6612 6613 return self.expression(exp.DecodeCase, expressions=args) 6614 6615 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6616 self._match_text_seq("KEY") 6617 key = self._parse_column() 6618 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6619 self._match_text_seq("VALUE") 6620 value = self._parse_bitwise() 6621 6622 if not key and not value: 6623 return None 6624 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6625 6626 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6627 if not this or not self._match_text_seq("FORMAT", "JSON"): 6628 return this 6629 6630 return self.expression(exp.FormatJson, this=this) 6631 6632 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6633 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6634 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6635 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6636 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6637 else: 6638 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6639 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6640 6641 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6642 6643 if not empty and not error and not null: 6644 return None 6645 6646 return self.expression( 6647 exp.OnCondition, 6648 empty=empty, 6649 error=error, 6650 null=null, 6651 ) 6652 6653 def _parse_on_handling( 6654 self, on: str, *values: str 6655 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6656 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6657 for value in values: 6658 if self._match_text_seq(value, "ON", on): 6659 return f"{value} ON {on}" 6660 6661 index = self._index 6662 if self._match(TokenType.DEFAULT): 6663 default_value = self._parse_bitwise() 6664 if self._match_text_seq("ON", on): 6665 return default_value 6666 6667 self._retreat(index) 6668 6669 return None 6670 6671 @t.overload 6672 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6673 6674 @t.overload 6675 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6676 6677 def _parse_json_object(self, agg=False): 6678 star = self._parse_star() 6679 expressions = ( 6680 [star] 6681 if star 6682 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6683 ) 6684 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6685 6686 unique_keys = None 6687 if self._match_text_seq("WITH", "UNIQUE"): 6688 unique_keys = True 6689 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6690 unique_keys = False 6691 6692 self._match_text_seq("KEYS") 6693 6694 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6695 self._parse_type() 6696 ) 6697 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6698 6699 return self.expression( 6700 exp.JSONObjectAgg if agg else exp.JSONObject, 6701 expressions=expressions, 6702 null_handling=null_handling, 6703 unique_keys=unique_keys, 6704 return_type=return_type, 6705 encoding=encoding, 6706 ) 6707 6708 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6709 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6710 if not self._match_text_seq("NESTED"): 6711 this = self._parse_id_var() 6712 kind = self._parse_types(allow_identifiers=False) 6713 nested = None 6714 else: 6715 this = None 6716 kind = None 6717 nested = True 6718 6719 path = self._match_text_seq("PATH") and self._parse_string() 6720 nested_schema = nested and self._parse_json_schema() 6721 6722 return self.expression( 6723 exp.JSONColumnDef, 6724 this=this, 6725 kind=kind, 6726 path=path, 6727 nested_schema=nested_schema, 6728 ) 6729 6730 def _parse_json_schema(self) -> exp.JSONSchema: 6731 self._match_text_seq("COLUMNS") 6732 return self.expression( 6733 exp.JSONSchema, 6734 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6735 ) 6736 6737 def _parse_json_table(self) -> exp.JSONTable: 6738 this = self._parse_format_json(self._parse_bitwise()) 6739 path = self._match(TokenType.COMMA) and self._parse_string() 6740 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6741 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6742 schema = self._parse_json_schema() 6743 6744 return exp.JSONTable( 6745 this=this, 6746 schema=schema, 6747 path=path, 6748 error_handling=error_handling, 6749 empty_handling=empty_handling, 6750 ) 6751 6752 def _parse_match_against(self) -> exp.MatchAgainst: 6753 expressions = self._parse_csv(self._parse_column) 6754 6755 self._match_text_seq(")", "AGAINST", "(") 6756 6757 this = self._parse_string() 6758 6759 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6760 modifier = "IN NATURAL LANGUAGE MODE" 6761 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6762 modifier = f"{modifier} WITH QUERY EXPANSION" 6763 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6764 modifier = "IN BOOLEAN MODE" 6765 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6766 modifier = "WITH QUERY EXPANSION" 6767 else: 6768 modifier = None 6769 6770 return self.expression( 6771 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6772 ) 6773 6774 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6775 def _parse_open_json(self) -> exp.OpenJSON: 6776 this = self._parse_bitwise() 6777 path = self._match(TokenType.COMMA) and self._parse_string() 6778 6779 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6780 this = self._parse_field(any_token=True) 6781 kind = self._parse_types() 6782 path = self._parse_string() 6783 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6784 6785 return self.expression( 6786 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6787 ) 6788 6789 expressions = None 6790 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6791 self._match_l_paren() 6792 expressions = self._parse_csv(_parse_open_json_column_def) 6793 6794 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6795 6796 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6797 args = self._parse_csv(self._parse_bitwise) 6798 6799 if self._match(TokenType.IN): 6800 return self.expression( 6801 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6802 ) 6803 6804 if haystack_first: 6805 haystack = seq_get(args, 0) 6806 needle = seq_get(args, 1) 6807 else: 6808 haystack = seq_get(args, 1) 6809 needle = seq_get(args, 0) 6810 6811 return self.expression( 6812 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6813 ) 6814 6815 def _parse_predict(self) -> exp.Predict: 6816 self._match_text_seq("MODEL") 6817 this = self._parse_table() 6818 6819 self._match(TokenType.COMMA) 6820 self._match_text_seq("TABLE") 6821 6822 return self.expression( 6823 exp.Predict, 6824 this=this, 6825 expression=self._parse_table(), 6826 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6827 ) 6828 6829 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6830 args = self._parse_csv(self._parse_table) 6831 return exp.JoinHint(this=func_name.upper(), expressions=args) 6832 6833 def _parse_substring(self) -> exp.Substring: 6834 # Postgres supports the form: substring(string [from int] [for int]) 6835 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6836 6837 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6838 6839 if self._match(TokenType.FROM): 6840 args.append(self._parse_bitwise()) 6841 if self._match(TokenType.FOR): 6842 if len(args) == 1: 6843 args.append(exp.Literal.number(1)) 6844 args.append(self._parse_bitwise()) 6845 6846 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6847 6848 def _parse_trim(self) -> exp.Trim: 6849 # https://www.w3resource.com/sql/character-functions/trim.php 6850 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6851 6852 position = None 6853 collation = None 6854 expression = None 6855 6856 if self._match_texts(self.TRIM_TYPES): 6857 position = self._prev.text.upper() 6858 6859 this = self._parse_bitwise() 6860 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6861 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6862 expression = self._parse_bitwise() 6863 6864 if invert_order: 6865 this, expression = expression, this 6866 6867 if self._match(TokenType.COLLATE): 6868 collation = self._parse_bitwise() 6869 6870 return self.expression( 6871 exp.Trim, this=this, position=position, expression=expression, collation=collation 6872 ) 6873 6874 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6875 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6876 6877 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6878 return self._parse_window(self._parse_id_var(), alias=True) 6879 6880 def _parse_respect_or_ignore_nulls( 6881 self, this: t.Optional[exp.Expression] 6882 ) -> t.Optional[exp.Expression]: 6883 if self._match_text_seq("IGNORE", "NULLS"): 6884 return self.expression(exp.IgnoreNulls, this=this) 6885 if self._match_text_seq("RESPECT", "NULLS"): 6886 return self.expression(exp.RespectNulls, this=this) 6887 return this 6888 6889 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6890 if self._match(TokenType.HAVING): 6891 self._match_texts(("MAX", "MIN")) 6892 max = self._prev.text.upper() != "MIN" 6893 return self.expression( 6894 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6895 ) 6896 6897 return this 6898 6899 def _parse_window( 6900 self, this: t.Optional[exp.Expression], alias: bool = False 6901 ) -> t.Optional[exp.Expression]: 6902 func = this 6903 comments = func.comments if isinstance(func, exp.Expression) else None 6904 6905 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6906 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6907 if self._match_text_seq("WITHIN", "GROUP"): 6908 order = self._parse_wrapped(self._parse_order) 6909 this = self.expression(exp.WithinGroup, this=this, expression=order) 6910 6911 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6912 self._match(TokenType.WHERE) 6913 this = self.expression( 6914 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6915 ) 6916 self._match_r_paren() 6917 6918 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6919 # Some dialects choose to implement and some do not. 6920 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6921 6922 # There is some code above in _parse_lambda that handles 6923 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6924 6925 # The below changes handle 6926 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6927 6928 # Oracle allows both formats 6929 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6930 # and Snowflake chose to do the same for familiarity 6931 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6932 if isinstance(this, exp.AggFunc): 6933 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6934 6935 if ignore_respect and ignore_respect is not this: 6936 ignore_respect.replace(ignore_respect.this) 6937 this = self.expression(ignore_respect.__class__, this=this) 6938 6939 this = self._parse_respect_or_ignore_nulls(this) 6940 6941 # bigquery select from window x AS (partition by ...) 6942 if alias: 6943 over = None 6944 self._match(TokenType.ALIAS) 6945 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6946 return this 6947 else: 6948 over = self._prev.text.upper() 6949 6950 if comments and isinstance(func, exp.Expression): 6951 func.pop_comments() 6952 6953 if not self._match(TokenType.L_PAREN): 6954 return self.expression( 6955 exp.Window, 6956 comments=comments, 6957 this=this, 6958 alias=self._parse_id_var(False), 6959 over=over, 6960 ) 6961 6962 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6963 6964 first = self._match(TokenType.FIRST) 6965 if self._match_text_seq("LAST"): 6966 first = False 6967 6968 partition, order = self._parse_partition_and_order() 6969 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6970 6971 if kind: 6972 self._match(TokenType.BETWEEN) 6973 start = self._parse_window_spec() 6974 self._match(TokenType.AND) 6975 end = self._parse_window_spec() 6976 exclude = ( 6977 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6978 if self._match_text_seq("EXCLUDE") 6979 else None 6980 ) 6981 6982 spec = self.expression( 6983 exp.WindowSpec, 6984 kind=kind, 6985 start=start["value"], 6986 start_side=start["side"], 6987 end=end["value"], 6988 end_side=end["side"], 6989 exclude=exclude, 6990 ) 6991 else: 6992 spec = None 6993 6994 self._match_r_paren() 6995 6996 window = self.expression( 6997 exp.Window, 6998 comments=comments, 6999 this=this, 7000 partition_by=partition, 7001 order=order, 7002 spec=spec, 7003 alias=window_alias, 7004 over=over, 7005 first=first, 7006 ) 7007 7008 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7009 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7010 return self._parse_window(window, alias=alias) 7011 7012 return window 7013 7014 def _parse_partition_and_order( 7015 self, 7016 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7017 return self._parse_partition_by(), self._parse_order() 7018 7019 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7020 self._match(TokenType.BETWEEN) 7021 7022 return { 7023 "value": ( 7024 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7025 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7026 or self._parse_bitwise() 7027 ), 7028 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7029 } 7030 7031 def _parse_alias( 7032 self, this: t.Optional[exp.Expression], explicit: bool = False 7033 ) -> t.Optional[exp.Expression]: 7034 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7035 # so this section tries to parse the clause version and if it fails, it treats the token 7036 # as an identifier (alias) 7037 if self._can_parse_limit_or_offset(): 7038 return this 7039 7040 any_token = self._match(TokenType.ALIAS) 7041 comments = self._prev_comments or [] 7042 7043 if explicit and not any_token: 7044 return this 7045 7046 if self._match(TokenType.L_PAREN): 7047 aliases = self.expression( 7048 exp.Aliases, 7049 comments=comments, 7050 this=this, 7051 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7052 ) 7053 self._match_r_paren(aliases) 7054 return aliases 7055 7056 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7057 self.STRING_ALIASES and self._parse_string_as_identifier() 7058 ) 7059 7060 if alias: 7061 comments.extend(alias.pop_comments()) 7062 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7063 column = this.this 7064 7065 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7066 if not this.comments and column and column.comments: 7067 this.comments = column.pop_comments() 7068 7069 return this 7070 7071 def _parse_id_var( 7072 self, 7073 any_token: bool = True, 7074 tokens: t.Optional[t.Collection[TokenType]] = None, 7075 ) -> t.Optional[exp.Expression]: 7076 expression = self._parse_identifier() 7077 if not expression and ( 7078 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7079 ): 7080 quoted = self._prev.token_type == TokenType.STRING 7081 expression = self._identifier_expression(quoted=quoted) 7082 7083 return expression 7084 7085 def _parse_string(self) -> t.Optional[exp.Expression]: 7086 if self._match_set(self.STRING_PARSERS): 7087 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7088 return self._parse_placeholder() 7089 7090 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7091 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7092 if output: 7093 output.update_positions(self._prev) 7094 return output 7095 7096 def _parse_number(self) -> t.Optional[exp.Expression]: 7097 if self._match_set(self.NUMERIC_PARSERS): 7098 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7099 return self._parse_placeholder() 7100 7101 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7102 if self._match(TokenType.IDENTIFIER): 7103 return self._identifier_expression(quoted=True) 7104 return self._parse_placeholder() 7105 7106 def _parse_var( 7107 self, 7108 any_token: bool = False, 7109 tokens: t.Optional[t.Collection[TokenType]] = None, 7110 upper: bool = False, 7111 ) -> t.Optional[exp.Expression]: 7112 if ( 7113 (any_token and self._advance_any()) 7114 or self._match(TokenType.VAR) 7115 or (self._match_set(tokens) if tokens else False) 7116 ): 7117 return self.expression( 7118 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7119 ) 7120 return self._parse_placeholder() 7121 7122 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7123 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7124 self._advance() 7125 return self._prev 7126 return None 7127 7128 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7129 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7130 7131 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7132 return self._parse_primary() or self._parse_var(any_token=True) 7133 7134 def _parse_null(self) -> t.Optional[exp.Expression]: 7135 if self._match_set(self.NULL_TOKENS): 7136 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7137 return self._parse_placeholder() 7138 7139 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7140 if self._match(TokenType.TRUE): 7141 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7142 if self._match(TokenType.FALSE): 7143 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7144 return self._parse_placeholder() 7145 7146 def _parse_star(self) -> t.Optional[exp.Expression]: 7147 if self._match(TokenType.STAR): 7148 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7149 return self._parse_placeholder() 7150 7151 def _parse_parameter(self) -> exp.Parameter: 7152 this = self._parse_identifier() or self._parse_primary_or_var() 7153 return self.expression(exp.Parameter, this=this) 7154 7155 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7156 if self._match_set(self.PLACEHOLDER_PARSERS): 7157 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7158 if placeholder: 7159 return placeholder 7160 self._advance(-1) 7161 return None 7162 7163 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7164 if not self._match_texts(keywords): 7165 return None 7166 if self._match(TokenType.L_PAREN, advance=False): 7167 return self._parse_wrapped_csv(self._parse_expression) 7168 7169 expression = self._parse_expression() 7170 return [expression] if expression else None 7171 7172 def _parse_csv( 7173 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7174 ) -> t.List[exp.Expression]: 7175 parse_result = parse_method() 7176 items = [parse_result] if parse_result is not None else [] 7177 7178 while self._match(sep): 7179 self._add_comments(parse_result) 7180 parse_result = parse_method() 7181 if parse_result is not None: 7182 items.append(parse_result) 7183 7184 return items 7185 7186 def _parse_tokens( 7187 self, parse_method: t.Callable, expressions: t.Dict 7188 ) -> t.Optional[exp.Expression]: 7189 this = parse_method() 7190 7191 while self._match_set(expressions): 7192 this = self.expression( 7193 expressions[self._prev.token_type], 7194 this=this, 7195 comments=self._prev_comments, 7196 expression=parse_method(), 7197 ) 7198 7199 return this 7200 7201 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7202 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7203 7204 def _parse_wrapped_csv( 7205 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7206 ) -> t.List[exp.Expression]: 7207 return self._parse_wrapped( 7208 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7209 ) 7210 7211 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7212 wrapped = self._match(TokenType.L_PAREN) 7213 if not wrapped and not optional: 7214 self.raise_error("Expecting (") 7215 parse_result = parse_method() 7216 if wrapped: 7217 self._match_r_paren() 7218 return parse_result 7219 7220 def _parse_expressions(self) -> t.List[exp.Expression]: 7221 return self._parse_csv(self._parse_expression) 7222 7223 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7224 return self._parse_select() or self._parse_set_operations( 7225 self._parse_alias(self._parse_assignment(), explicit=True) 7226 if alias 7227 else self._parse_assignment() 7228 ) 7229 7230 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7231 return self._parse_query_modifiers( 7232 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7233 ) 7234 7235 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7236 this = None 7237 if self._match_texts(self.TRANSACTION_KIND): 7238 this = self._prev.text 7239 7240 self._match_texts(("TRANSACTION", "WORK")) 7241 7242 modes = [] 7243 while True: 7244 mode = [] 7245 while self._match(TokenType.VAR): 7246 mode.append(self._prev.text) 7247 7248 if mode: 7249 modes.append(" ".join(mode)) 7250 if not self._match(TokenType.COMMA): 7251 break 7252 7253 return self.expression(exp.Transaction, this=this, modes=modes) 7254 7255 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7256 chain = None 7257 savepoint = None 7258 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7259 7260 self._match_texts(("TRANSACTION", "WORK")) 7261 7262 if self._match_text_seq("TO"): 7263 self._match_text_seq("SAVEPOINT") 7264 savepoint = self._parse_id_var() 7265 7266 if self._match(TokenType.AND): 7267 chain = not self._match_text_seq("NO") 7268 self._match_text_seq("CHAIN") 7269 7270 if is_rollback: 7271 return self.expression(exp.Rollback, savepoint=savepoint) 7272 7273 return self.expression(exp.Commit, chain=chain) 7274 7275 def _parse_refresh(self) -> exp.Refresh: 7276 self._match(TokenType.TABLE) 7277 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7278 7279 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7280 if not self._prev.text.upper() == "ADD": 7281 return None 7282 7283 start = self._index 7284 self._match(TokenType.COLUMN) 7285 7286 exists_column = self._parse_exists(not_=True) 7287 expression = self._parse_field_def() 7288 7289 if not isinstance(expression, exp.ColumnDef): 7290 self._retreat(start) 7291 return None 7292 7293 expression.set("exists", exists_column) 7294 7295 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7296 if self._match_texts(("FIRST", "AFTER")): 7297 position = self._prev.text 7298 column_position = self.expression( 7299 exp.ColumnPosition, this=self._parse_column(), position=position 7300 ) 7301 expression.set("position", column_position) 7302 7303 return expression 7304 7305 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7306 drop = self._match(TokenType.DROP) and self._parse_drop() 7307 if drop and not isinstance(drop, exp.Command): 7308 drop.set("kind", drop.args.get("kind", "COLUMN")) 7309 return drop 7310 7311 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7312 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7313 return self.expression( 7314 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7315 ) 7316 7317 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7318 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7319 self._match_text_seq("ADD") 7320 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7321 return self.expression( 7322 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7323 ) 7324 7325 column_def = self._parse_add_column() 7326 if isinstance(column_def, exp.ColumnDef): 7327 return column_def 7328 7329 exists = self._parse_exists(not_=True) 7330 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7331 return self.expression( 7332 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7333 ) 7334 7335 return None 7336 7337 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7338 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7339 or self._match_text_seq("COLUMNS") 7340 ): 7341 schema = self._parse_schema() 7342 7343 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7344 7345 return self._parse_csv(_parse_add_alteration) 7346 7347 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7348 if self._match_texts(self.ALTER_ALTER_PARSERS): 7349 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7350 7351 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7352 # keyword after ALTER we default to parsing this statement 7353 self._match(TokenType.COLUMN) 7354 column = self._parse_field(any_token=True) 7355 7356 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7357 return self.expression(exp.AlterColumn, this=column, drop=True) 7358 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7359 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7360 if self._match(TokenType.COMMENT): 7361 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7362 if self._match_text_seq("DROP", "NOT", "NULL"): 7363 return self.expression( 7364 exp.AlterColumn, 7365 this=column, 7366 drop=True, 7367 allow_null=True, 7368 ) 7369 if self._match_text_seq("SET", "NOT", "NULL"): 7370 return self.expression( 7371 exp.AlterColumn, 7372 this=column, 7373 allow_null=False, 7374 ) 7375 7376 if self._match_text_seq("SET", "VISIBLE"): 7377 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7378 if self._match_text_seq("SET", "INVISIBLE"): 7379 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7380 7381 self._match_text_seq("SET", "DATA") 7382 self._match_text_seq("TYPE") 7383 return self.expression( 7384 exp.AlterColumn, 7385 this=column, 7386 dtype=self._parse_types(), 7387 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7388 using=self._match(TokenType.USING) and self._parse_assignment(), 7389 ) 7390 7391 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7392 if self._match_texts(("ALL", "EVEN", "AUTO")): 7393 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7394 7395 self._match_text_seq("KEY", "DISTKEY") 7396 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7397 7398 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7399 if compound: 7400 self._match_text_seq("SORTKEY") 7401 7402 if self._match(TokenType.L_PAREN, advance=False): 7403 return self.expression( 7404 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7405 ) 7406 7407 self._match_texts(("AUTO", "NONE")) 7408 return self.expression( 7409 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7410 ) 7411 7412 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7413 index = self._index - 1 7414 7415 partition_exists = self._parse_exists() 7416 if self._match(TokenType.PARTITION, advance=False): 7417 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7418 7419 self._retreat(index) 7420 return self._parse_csv(self._parse_drop_column) 7421 7422 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7423 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7424 exists = self._parse_exists() 7425 old_column = self._parse_column() 7426 to = self._match_text_seq("TO") 7427 new_column = self._parse_column() 7428 7429 if old_column is None or to is None or new_column is None: 7430 return None 7431 7432 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7433 7434 self._match_text_seq("TO") 7435 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7436 7437 def _parse_alter_table_set(self) -> exp.AlterSet: 7438 alter_set = self.expression(exp.AlterSet) 7439 7440 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7441 "TABLE", "PROPERTIES" 7442 ): 7443 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7444 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7445 alter_set.set("expressions", [self._parse_assignment()]) 7446 elif self._match_texts(("LOGGED", "UNLOGGED")): 7447 alter_set.set("option", exp.var(self._prev.text.upper())) 7448 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7449 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7450 elif self._match_text_seq("LOCATION"): 7451 alter_set.set("location", self._parse_field()) 7452 elif self._match_text_seq("ACCESS", "METHOD"): 7453 alter_set.set("access_method", self._parse_field()) 7454 elif self._match_text_seq("TABLESPACE"): 7455 alter_set.set("tablespace", self._parse_field()) 7456 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7457 alter_set.set("file_format", [self._parse_field()]) 7458 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7459 alter_set.set("file_format", self._parse_wrapped_options()) 7460 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7461 alter_set.set("copy_options", self._parse_wrapped_options()) 7462 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7463 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7464 else: 7465 if self._match_text_seq("SERDE"): 7466 alter_set.set("serde", self._parse_field()) 7467 7468 properties = self._parse_wrapped(self._parse_properties, optional=True) 7469 alter_set.set("expressions", [properties]) 7470 7471 return alter_set 7472 7473 def _parse_alter(self) -> exp.Alter | exp.Command: 7474 start = self._prev 7475 7476 alter_token = self._match_set(self.ALTERABLES) and self._prev 7477 if not alter_token: 7478 return self._parse_as_command(start) 7479 7480 exists = self._parse_exists() 7481 only = self._match_text_seq("ONLY") 7482 this = self._parse_table(schema=True) 7483 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7484 7485 if self._next: 7486 self._advance() 7487 7488 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7489 if parser: 7490 actions = ensure_list(parser(self)) 7491 not_valid = self._match_text_seq("NOT", "VALID") 7492 options = self._parse_csv(self._parse_property) 7493 7494 if not self._curr and actions: 7495 return self.expression( 7496 exp.Alter, 7497 this=this, 7498 kind=alter_token.text.upper(), 7499 exists=exists, 7500 actions=actions, 7501 only=only, 7502 options=options, 7503 cluster=cluster, 7504 not_valid=not_valid, 7505 ) 7506 7507 return self._parse_as_command(start) 7508 7509 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7510 start = self._prev 7511 # https://duckdb.org/docs/sql/statements/analyze 7512 if not self._curr: 7513 return self.expression(exp.Analyze) 7514 7515 options = [] 7516 while self._match_texts(self.ANALYZE_STYLES): 7517 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7518 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7519 else: 7520 options.append(self._prev.text.upper()) 7521 7522 this: t.Optional[exp.Expression] = None 7523 inner_expression: t.Optional[exp.Expression] = None 7524 7525 kind = self._curr and self._curr.text.upper() 7526 7527 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7528 this = self._parse_table_parts() 7529 elif self._match_text_seq("TABLES"): 7530 if self._match_set((TokenType.FROM, TokenType.IN)): 7531 kind = f"{kind} {self._prev.text.upper()}" 7532 this = self._parse_table(schema=True, is_db_reference=True) 7533 elif self._match_text_seq("DATABASE"): 7534 this = self._parse_table(schema=True, is_db_reference=True) 7535 elif self._match_text_seq("CLUSTER"): 7536 this = self._parse_table() 7537 # Try matching inner expr keywords before fallback to parse table. 7538 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7539 kind = None 7540 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7541 else: 7542 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7543 kind = None 7544 this = self._parse_table_parts() 7545 7546 partition = self._try_parse(self._parse_partition) 7547 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7548 return self._parse_as_command(start) 7549 7550 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7551 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7552 "WITH", "ASYNC", "MODE" 7553 ): 7554 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7555 else: 7556 mode = None 7557 7558 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7559 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7560 7561 properties = self._parse_properties() 7562 return self.expression( 7563 exp.Analyze, 7564 kind=kind, 7565 this=this, 7566 mode=mode, 7567 partition=partition, 7568 properties=properties, 7569 expression=inner_expression, 7570 options=options, 7571 ) 7572 7573 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7574 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7575 this = None 7576 kind = self._prev.text.upper() 7577 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7578 expressions = [] 7579 7580 if not self._match_text_seq("STATISTICS"): 7581 self.raise_error("Expecting token STATISTICS") 7582 7583 if self._match_text_seq("NOSCAN"): 7584 this = "NOSCAN" 7585 elif self._match(TokenType.FOR): 7586 if self._match_text_seq("ALL", "COLUMNS"): 7587 this = "FOR ALL COLUMNS" 7588 if self._match_texts("COLUMNS"): 7589 this = "FOR COLUMNS" 7590 expressions = self._parse_csv(self._parse_column_reference) 7591 elif self._match_text_seq("SAMPLE"): 7592 sample = self._parse_number() 7593 expressions = [ 7594 self.expression( 7595 exp.AnalyzeSample, 7596 sample=sample, 7597 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7598 ) 7599 ] 7600 7601 return self.expression( 7602 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7603 ) 7604 7605 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7606 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7607 kind = None 7608 this = None 7609 expression: t.Optional[exp.Expression] = None 7610 if self._match_text_seq("REF", "UPDATE"): 7611 kind = "REF" 7612 this = "UPDATE" 7613 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7614 this = "UPDATE SET DANGLING TO NULL" 7615 elif self._match_text_seq("STRUCTURE"): 7616 kind = "STRUCTURE" 7617 if self._match_text_seq("CASCADE", "FAST"): 7618 this = "CASCADE FAST" 7619 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7620 ("ONLINE", "OFFLINE") 7621 ): 7622 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7623 expression = self._parse_into() 7624 7625 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7626 7627 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7628 this = self._prev.text.upper() 7629 if self._match_text_seq("COLUMNS"): 7630 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7631 return None 7632 7633 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7634 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7635 if self._match_text_seq("STATISTICS"): 7636 return self.expression(exp.AnalyzeDelete, kind=kind) 7637 return None 7638 7639 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7640 if self._match_text_seq("CHAINED", "ROWS"): 7641 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7642 return None 7643 7644 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7645 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7646 this = self._prev.text.upper() 7647 expression: t.Optional[exp.Expression] = None 7648 expressions = [] 7649 update_options = None 7650 7651 if self._match_text_seq("HISTOGRAM", "ON"): 7652 expressions = self._parse_csv(self._parse_column_reference) 7653 with_expressions = [] 7654 while self._match(TokenType.WITH): 7655 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7656 if self._match_texts(("SYNC", "ASYNC")): 7657 if self._match_text_seq("MODE", advance=False): 7658 with_expressions.append(f"{self._prev.text.upper()} MODE") 7659 self._advance() 7660 else: 7661 buckets = self._parse_number() 7662 if self._match_text_seq("BUCKETS"): 7663 with_expressions.append(f"{buckets} BUCKETS") 7664 if with_expressions: 7665 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7666 7667 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7668 TokenType.UPDATE, advance=False 7669 ): 7670 update_options = self._prev.text.upper() 7671 self._advance() 7672 elif self._match_text_seq("USING", "DATA"): 7673 expression = self.expression(exp.UsingData, this=self._parse_string()) 7674 7675 return self.expression( 7676 exp.AnalyzeHistogram, 7677 this=this, 7678 expressions=expressions, 7679 expression=expression, 7680 update_options=update_options, 7681 ) 7682 7683 def _parse_merge(self) -> exp.Merge: 7684 self._match(TokenType.INTO) 7685 target = self._parse_table() 7686 7687 if target and self._match(TokenType.ALIAS, advance=False): 7688 target.set("alias", self._parse_table_alias()) 7689 7690 self._match(TokenType.USING) 7691 using = self._parse_table() 7692 7693 self._match(TokenType.ON) 7694 on = self._parse_assignment() 7695 7696 return self.expression( 7697 exp.Merge, 7698 this=target, 7699 using=using, 7700 on=on, 7701 whens=self._parse_when_matched(), 7702 returning=self._parse_returning(), 7703 ) 7704 7705 def _parse_when_matched(self) -> exp.Whens: 7706 whens = [] 7707 7708 while self._match(TokenType.WHEN): 7709 matched = not self._match(TokenType.NOT) 7710 self._match_text_seq("MATCHED") 7711 source = ( 7712 False 7713 if self._match_text_seq("BY", "TARGET") 7714 else self._match_text_seq("BY", "SOURCE") 7715 ) 7716 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7717 7718 self._match(TokenType.THEN) 7719 7720 if self._match(TokenType.INSERT): 7721 this = self._parse_star() 7722 if this: 7723 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7724 else: 7725 then = self.expression( 7726 exp.Insert, 7727 this=exp.var("ROW") 7728 if self._match_text_seq("ROW") 7729 else self._parse_value(values=False), 7730 expression=self._match_text_seq("VALUES") and self._parse_value(), 7731 ) 7732 elif self._match(TokenType.UPDATE): 7733 expressions = self._parse_star() 7734 if expressions: 7735 then = self.expression(exp.Update, expressions=expressions) 7736 else: 7737 then = self.expression( 7738 exp.Update, 7739 expressions=self._match(TokenType.SET) 7740 and self._parse_csv(self._parse_equality), 7741 ) 7742 elif self._match(TokenType.DELETE): 7743 then = self.expression(exp.Var, this=self._prev.text) 7744 else: 7745 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7746 7747 whens.append( 7748 self.expression( 7749 exp.When, 7750 matched=matched, 7751 source=source, 7752 condition=condition, 7753 then=then, 7754 ) 7755 ) 7756 return self.expression(exp.Whens, expressions=whens) 7757 7758 def _parse_show(self) -> t.Optional[exp.Expression]: 7759 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7760 if parser: 7761 return parser(self) 7762 return self._parse_as_command(self._prev) 7763 7764 def _parse_set_item_assignment( 7765 self, kind: t.Optional[str] = None 7766 ) -> t.Optional[exp.Expression]: 7767 index = self._index 7768 7769 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7770 return self._parse_set_transaction(global_=kind == "GLOBAL") 7771 7772 left = self._parse_primary() or self._parse_column() 7773 assignment_delimiter = self._match_texts(("=", "TO")) 7774 7775 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7776 self._retreat(index) 7777 return None 7778 7779 right = self._parse_statement() or self._parse_id_var() 7780 if isinstance(right, (exp.Column, exp.Identifier)): 7781 right = exp.var(right.name) 7782 7783 this = self.expression(exp.EQ, this=left, expression=right) 7784 return self.expression(exp.SetItem, this=this, kind=kind) 7785 7786 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7787 self._match_text_seq("TRANSACTION") 7788 characteristics = self._parse_csv( 7789 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7790 ) 7791 return self.expression( 7792 exp.SetItem, 7793 expressions=characteristics, 7794 kind="TRANSACTION", 7795 **{"global": global_}, # type: ignore 7796 ) 7797 7798 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7799 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7800 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7801 7802 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7803 index = self._index 7804 set_ = self.expression( 7805 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7806 ) 7807 7808 if self._curr: 7809 self._retreat(index) 7810 return self._parse_as_command(self._prev) 7811 7812 return set_ 7813 7814 def _parse_var_from_options( 7815 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7816 ) -> t.Optional[exp.Var]: 7817 start = self._curr 7818 if not start: 7819 return None 7820 7821 option = start.text.upper() 7822 continuations = options.get(option) 7823 7824 index = self._index 7825 self._advance() 7826 for keywords in continuations or []: 7827 if isinstance(keywords, str): 7828 keywords = (keywords,) 7829 7830 if self._match_text_seq(*keywords): 7831 option = f"{option} {' '.join(keywords)}" 7832 break 7833 else: 7834 if continuations or continuations is None: 7835 if raise_unmatched: 7836 self.raise_error(f"Unknown option {option}") 7837 7838 self._retreat(index) 7839 return None 7840 7841 return exp.var(option) 7842 7843 def _parse_as_command(self, start: Token) -> exp.Command: 7844 while self._curr: 7845 self._advance() 7846 text = self._find_sql(start, self._prev) 7847 size = len(start.text) 7848 self._warn_unsupported() 7849 return exp.Command(this=text[:size], expression=text[size:]) 7850 7851 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7852 settings = [] 7853 7854 self._match_l_paren() 7855 kind = self._parse_id_var() 7856 7857 if self._match(TokenType.L_PAREN): 7858 while True: 7859 key = self._parse_id_var() 7860 value = self._parse_primary() 7861 if not key and value is None: 7862 break 7863 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7864 self._match(TokenType.R_PAREN) 7865 7866 self._match_r_paren() 7867 7868 return self.expression( 7869 exp.DictProperty, 7870 this=this, 7871 kind=kind.this if kind else None, 7872 settings=settings, 7873 ) 7874 7875 def _parse_dict_range(self, this: str) -> exp.DictRange: 7876 self._match_l_paren() 7877 has_min = self._match_text_seq("MIN") 7878 if has_min: 7879 min = self._parse_var() or self._parse_primary() 7880 self._match_text_seq("MAX") 7881 max = self._parse_var() or self._parse_primary() 7882 else: 7883 max = self._parse_var() or self._parse_primary() 7884 min = exp.Literal.number(0) 7885 self._match_r_paren() 7886 return self.expression(exp.DictRange, this=this, min=min, max=max) 7887 7888 def _parse_comprehension( 7889 self, this: t.Optional[exp.Expression] 7890 ) -> t.Optional[exp.Comprehension]: 7891 index = self._index 7892 expression = self._parse_column() 7893 if not self._match(TokenType.IN): 7894 self._retreat(index - 1) 7895 return None 7896 iterator = self._parse_column() 7897 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7898 return self.expression( 7899 exp.Comprehension, 7900 this=this, 7901 expression=expression, 7902 iterator=iterator, 7903 condition=condition, 7904 ) 7905 7906 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7907 if self._match(TokenType.HEREDOC_STRING): 7908 return self.expression(exp.Heredoc, this=self._prev.text) 7909 7910 if not self._match_text_seq("$"): 7911 return None 7912 7913 tags = ["$"] 7914 tag_text = None 7915 7916 if self._is_connected(): 7917 self._advance() 7918 tags.append(self._prev.text.upper()) 7919 else: 7920 self.raise_error("No closing $ found") 7921 7922 if tags[-1] != "$": 7923 if self._is_connected() and self._match_text_seq("$"): 7924 tag_text = tags[-1] 7925 tags.append("$") 7926 else: 7927 self.raise_error("No closing $ found") 7928 7929 heredoc_start = self._curr 7930 7931 while self._curr: 7932 if self._match_text_seq(*tags, advance=False): 7933 this = self._find_sql(heredoc_start, self._prev) 7934 self._advance(len(tags)) 7935 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7936 7937 self._advance() 7938 7939 self.raise_error(f"No closing {''.join(tags)} found") 7940 return None 7941 7942 def _find_parser( 7943 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7944 ) -> t.Optional[t.Callable]: 7945 if not self._curr: 7946 return None 7947 7948 index = self._index 7949 this = [] 7950 while True: 7951 # The current token might be multiple words 7952 curr = self._curr.text.upper() 7953 key = curr.split(" ") 7954 this.append(curr) 7955 7956 self._advance() 7957 result, trie = in_trie(trie, key) 7958 if result == TrieResult.FAILED: 7959 break 7960 7961 if result == TrieResult.EXISTS: 7962 subparser = parsers[" ".join(this)] 7963 return subparser 7964 7965 self._retreat(index) 7966 return None 7967 7968 def _match(self, token_type, advance=True, expression=None): 7969 if not self._curr: 7970 return None 7971 7972 if self._curr.token_type == token_type: 7973 if advance: 7974 self._advance() 7975 self._add_comments(expression) 7976 return True 7977 7978 return None 7979 7980 def _match_set(self, types, advance=True): 7981 if not self._curr: 7982 return None 7983 7984 if self._curr.token_type in types: 7985 if advance: 7986 self._advance() 7987 return True 7988 7989 return None 7990 7991 def _match_pair(self, token_type_a, token_type_b, advance=True): 7992 if not self._curr or not self._next: 7993 return None 7994 7995 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7996 if advance: 7997 self._advance(2) 7998 return True 7999 8000 return None 8001 8002 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8003 if not self._match(TokenType.L_PAREN, expression=expression): 8004 self.raise_error("Expecting (") 8005 8006 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8007 if not self._match(TokenType.R_PAREN, expression=expression): 8008 self.raise_error("Expecting )") 8009 8010 def _match_texts(self, texts, advance=True): 8011 if ( 8012 self._curr 8013 and self._curr.token_type != TokenType.STRING 8014 and self._curr.text.upper() in texts 8015 ): 8016 if advance: 8017 self._advance() 8018 return True 8019 return None 8020 8021 def _match_text_seq(self, *texts, advance=True): 8022 index = self._index 8023 for text in texts: 8024 if ( 8025 self._curr 8026 and self._curr.token_type != TokenType.STRING 8027 and self._curr.text.upper() == text 8028 ): 8029 self._advance() 8030 else: 8031 self._retreat(index) 8032 return None 8033 8034 if not advance: 8035 self._retreat(index) 8036 8037 return True 8038 8039 def _replace_lambda( 8040 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8041 ) -> t.Optional[exp.Expression]: 8042 if not node: 8043 return node 8044 8045 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8046 8047 for column in node.find_all(exp.Column): 8048 typ = lambda_types.get(column.parts[0].name) 8049 if typ is not None: 8050 dot_or_id = column.to_dot() if column.table else column.this 8051 8052 if typ: 8053 dot_or_id = self.expression( 8054 exp.Cast, 8055 this=dot_or_id, 8056 to=typ, 8057 ) 8058 8059 parent = column.parent 8060 8061 while isinstance(parent, exp.Dot): 8062 if not isinstance(parent.parent, exp.Dot): 8063 parent.replace(dot_or_id) 8064 break 8065 parent = parent.parent 8066 else: 8067 if column is node: 8068 node = dot_or_id 8069 else: 8070 column.replace(dot_or_id) 8071 return node 8072 8073 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8074 start = self._prev 8075 8076 # Not to be confused with TRUNCATE(number, decimals) function call 8077 if self._match(TokenType.L_PAREN): 8078 self._retreat(self._index - 2) 8079 return self._parse_function() 8080 8081 # Clickhouse supports TRUNCATE DATABASE as well 8082 is_database = self._match(TokenType.DATABASE) 8083 8084 self._match(TokenType.TABLE) 8085 8086 exists = self._parse_exists(not_=False) 8087 8088 expressions = self._parse_csv( 8089 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8090 ) 8091 8092 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8093 8094 if self._match_text_seq("RESTART", "IDENTITY"): 8095 identity = "RESTART" 8096 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8097 identity = "CONTINUE" 8098 else: 8099 identity = None 8100 8101 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8102 option = self._prev.text 8103 else: 8104 option = None 8105 8106 partition = self._parse_partition() 8107 8108 # Fallback case 8109 if self._curr: 8110 return self._parse_as_command(start) 8111 8112 return self.expression( 8113 exp.TruncateTable, 8114 expressions=expressions, 8115 is_database=is_database, 8116 exists=exists, 8117 cluster=cluster, 8118 identity=identity, 8119 option=option, 8120 partition=partition, 8121 ) 8122 8123 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8124 this = self._parse_ordered(self._parse_opclass) 8125 8126 if not self._match(TokenType.WITH): 8127 return this 8128 8129 op = self._parse_var(any_token=True) 8130 8131 return self.expression(exp.WithOperator, this=this, op=op) 8132 8133 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8134 self._match(TokenType.EQ) 8135 self._match(TokenType.L_PAREN) 8136 8137 opts: t.List[t.Optional[exp.Expression]] = [] 8138 option: exp.Expression | None 8139 while self._curr and not self._match(TokenType.R_PAREN): 8140 if self._match_text_seq("FORMAT_NAME", "="): 8141 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8142 option = self._parse_format_name() 8143 else: 8144 option = self._parse_property() 8145 8146 if option is None: 8147 self.raise_error("Unable to parse option") 8148 break 8149 8150 opts.append(option) 8151 8152 return opts 8153 8154 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8155 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8156 8157 options = [] 8158 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8159 option = self._parse_var(any_token=True) 8160 prev = self._prev.text.upper() 8161 8162 # Different dialects might separate options and values by white space, "=" and "AS" 8163 self._match(TokenType.EQ) 8164 self._match(TokenType.ALIAS) 8165 8166 param = self.expression(exp.CopyParameter, this=option) 8167 8168 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8169 TokenType.L_PAREN, advance=False 8170 ): 8171 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8172 param.set("expressions", self._parse_wrapped_options()) 8173 elif prev == "FILE_FORMAT": 8174 # T-SQL's external file format case 8175 param.set("expression", self._parse_field()) 8176 else: 8177 param.set("expression", self._parse_unquoted_field()) 8178 8179 options.append(param) 8180 self._match(sep) 8181 8182 return options 8183 8184 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8185 expr = self.expression(exp.Credentials) 8186 8187 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8188 expr.set("storage", self._parse_field()) 8189 if self._match_text_seq("CREDENTIALS"): 8190 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8191 creds = ( 8192 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8193 ) 8194 expr.set("credentials", creds) 8195 if self._match_text_seq("ENCRYPTION"): 8196 expr.set("encryption", self._parse_wrapped_options()) 8197 if self._match_text_seq("IAM_ROLE"): 8198 expr.set("iam_role", self._parse_field()) 8199 if self._match_text_seq("REGION"): 8200 expr.set("region", self._parse_field()) 8201 8202 return expr 8203 8204 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8205 return self._parse_field() 8206 8207 def _parse_copy(self) -> exp.Copy | exp.Command: 8208 start = self._prev 8209 8210 self._match(TokenType.INTO) 8211 8212 this = ( 8213 self._parse_select(nested=True, parse_subquery_alias=False) 8214 if self._match(TokenType.L_PAREN, advance=False) 8215 else self._parse_table(schema=True) 8216 ) 8217 8218 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8219 8220 files = self._parse_csv(self._parse_file_location) 8221 credentials = self._parse_credentials() 8222 8223 self._match_text_seq("WITH") 8224 8225 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8226 8227 # Fallback case 8228 if self._curr: 8229 return self._parse_as_command(start) 8230 8231 return self.expression( 8232 exp.Copy, 8233 this=this, 8234 kind=kind, 8235 credentials=credentials, 8236 files=files, 8237 params=params, 8238 ) 8239 8240 def _parse_normalize(self) -> exp.Normalize: 8241 return self.expression( 8242 exp.Normalize, 8243 this=self._parse_bitwise(), 8244 form=self._match(TokenType.COMMA) and self._parse_var(), 8245 ) 8246 8247 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8248 args = self._parse_csv(lambda: self._parse_lambda()) 8249 8250 this = seq_get(args, 0) 8251 decimals = seq_get(args, 1) 8252 8253 return expr_type( 8254 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8255 ) 8256 8257 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8258 star_token = self._prev 8259 8260 if self._match_text_seq("COLUMNS", "(", advance=False): 8261 this = self._parse_function() 8262 if isinstance(this, exp.Columns): 8263 this.set("unpack", True) 8264 return this 8265 8266 return self.expression( 8267 exp.Star, 8268 **{ # type: ignore 8269 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8270 "replace": self._parse_star_op("REPLACE"), 8271 "rename": self._parse_star_op("RENAME"), 8272 }, 8273 ).update_positions(star_token) 8274 8275 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8276 privilege_parts = [] 8277 8278 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8279 # (end of privilege list) or L_PAREN (start of column list) are met 8280 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8281 privilege_parts.append(self._curr.text.upper()) 8282 self._advance() 8283 8284 this = exp.var(" ".join(privilege_parts)) 8285 expressions = ( 8286 self._parse_wrapped_csv(self._parse_column) 8287 if self._match(TokenType.L_PAREN, advance=False) 8288 else None 8289 ) 8290 8291 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8292 8293 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8294 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8295 principal = self._parse_id_var() 8296 8297 if not principal: 8298 return None 8299 8300 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8301 8302 def _parse_grant(self) -> exp.Grant | exp.Command: 8303 start = self._prev 8304 8305 privileges = self._parse_csv(self._parse_grant_privilege) 8306 8307 self._match(TokenType.ON) 8308 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8309 8310 # Attempt to parse the securable e.g. MySQL allows names 8311 # such as "foo.*", "*.*" which are not easily parseable yet 8312 securable = self._try_parse(self._parse_table_parts) 8313 8314 if not securable or not self._match_text_seq("TO"): 8315 return self._parse_as_command(start) 8316 8317 principals = self._parse_csv(self._parse_grant_principal) 8318 8319 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8320 8321 if self._curr: 8322 return self._parse_as_command(start) 8323 8324 return self.expression( 8325 exp.Grant, 8326 privileges=privileges, 8327 kind=kind, 8328 securable=securable, 8329 principals=principals, 8330 grant_option=grant_option, 8331 ) 8332 8333 def _parse_overlay(self) -> exp.Overlay: 8334 return self.expression( 8335 exp.Overlay, 8336 **{ # type: ignore 8337 "this": self._parse_bitwise(), 8338 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8339 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8340 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8341 }, 8342 ) 8343 8344 def _parse_format_name(self) -> exp.Property: 8345 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8346 # for FILE_FORMAT = <format_name> 8347 return self.expression( 8348 exp.Property, 8349 this=exp.var("FORMAT_NAME"), 8350 value=self._parse_string() or self._parse_table_parts(), 8351 ) 8352 8353 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8354 args: t.List[exp.Expression] = [] 8355 8356 if self._match(TokenType.DISTINCT): 8357 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8358 self._match(TokenType.COMMA) 8359 8360 args.extend(self._parse_csv(self._parse_assignment)) 8361 8362 return self.expression( 8363 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8364 ) 8365 8366 def _identifier_expression( 8367 self, token: t.Optional[Token] = None, **kwargs: t.Any 8368 ) -> exp.Identifier: 8369 token = token or self._prev 8370 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8371 expression.update_positions(token) 8372 return expression 8373 8374 def _build_pipe_cte( 8375 self, 8376 query: exp.Query, 8377 expressions: t.List[exp.Expression], 8378 alias_cte: t.Optional[exp.TableAlias] = None, 8379 ) -> exp.Select: 8380 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8381 if alias_cte: 8382 new_cte = alias_cte 8383 else: 8384 self._pipe_cte_counter += 1 8385 new_cte = f"__tmp{self._pipe_cte_counter}" 8386 8387 with_ = query.args.get("with") 8388 ctes = with_.pop() if with_ else None 8389 8390 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8391 if ctes: 8392 new_select.set("with", ctes) 8393 8394 return new_select.with_(new_cte, as_=query, copy=False) 8395 8396 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8397 select = self._parse_select(consume_pipe=False) 8398 if not select: 8399 return query 8400 8401 return self._build_pipe_cte( 8402 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8403 ) 8404 8405 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8406 limit = self._parse_limit() 8407 offset = self._parse_offset() 8408 if limit: 8409 curr_limit = query.args.get("limit", limit) 8410 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8411 query.limit(limit, copy=False) 8412 if offset: 8413 curr_offset = query.args.get("offset") 8414 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8415 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8416 8417 return query 8418 8419 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8420 this = self._parse_assignment() 8421 if self._match_text_seq("GROUP", "AND", advance=False): 8422 return this 8423 8424 this = self._parse_alias(this) 8425 8426 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8427 return self._parse_ordered(lambda: this) 8428 8429 return this 8430 8431 def _parse_pipe_syntax_aggregate_group_order_by( 8432 self, query: exp.Select, group_by_exists: bool = True 8433 ) -> exp.Select: 8434 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8435 aggregates_or_groups, orders = [], [] 8436 for element in expr: 8437 if isinstance(element, exp.Ordered): 8438 this = element.this 8439 if isinstance(this, exp.Alias): 8440 element.set("this", this.args["alias"]) 8441 orders.append(element) 8442 else: 8443 this = element 8444 aggregates_or_groups.append(this) 8445 8446 if group_by_exists: 8447 query.select(*aggregates_or_groups, copy=False).group_by( 8448 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8449 copy=False, 8450 ) 8451 else: 8452 query.select(*aggregates_or_groups, append=False, copy=False) 8453 8454 if orders: 8455 return query.order_by(*orders, append=False, copy=False) 8456 8457 return query 8458 8459 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8460 self._match_text_seq("AGGREGATE") 8461 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8462 8463 if self._match(TokenType.GROUP_BY) or ( 8464 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8465 ): 8466 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8467 8468 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8469 8470 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8471 first_setop = self.parse_set_operation(this=query) 8472 if not first_setop: 8473 return None 8474 8475 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8476 expr = self._parse_paren() 8477 return expr.assert_is(exp.Subquery).unnest() if expr else None 8478 8479 first_setop.this.pop() 8480 8481 setops = [ 8482 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8483 *self._parse_csv(_parse_and_unwrap_query), 8484 ] 8485 8486 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8487 with_ = query.args.get("with") 8488 ctes = with_.pop() if with_ else None 8489 8490 if isinstance(first_setop, exp.Union): 8491 query = query.union(*setops, copy=False, **first_setop.args) 8492 elif isinstance(first_setop, exp.Except): 8493 query = query.except_(*setops, copy=False, **first_setop.args) 8494 else: 8495 query = query.intersect(*setops, copy=False, **first_setop.args) 8496 8497 query.set("with", ctes) 8498 8499 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8500 8501 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8502 join = self._parse_join() 8503 if not join: 8504 return None 8505 8506 if isinstance(query, exp.Select): 8507 return query.join(join, copy=False) 8508 8509 return query 8510 8511 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8512 pivots = self._parse_pivots() 8513 if not pivots: 8514 return query 8515 8516 from_ = query.args.get("from") 8517 if from_: 8518 from_.this.set("pivots", pivots) 8519 8520 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8521 8522 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8523 self._match_text_seq("EXTEND") 8524 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8525 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8526 8527 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8528 sample = self._parse_table_sample() 8529 8530 with_ = query.args.get("with") 8531 if with_: 8532 with_.expressions[-1].this.set("sample", sample) 8533 else: 8534 query.set("sample", sample) 8535 8536 return query 8537 8538 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8539 if isinstance(query, exp.Subquery): 8540 query = exp.select("*").from_(query, copy=False) 8541 8542 if not query.args.get("from"): 8543 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8544 8545 while self._match(TokenType.PIPE_GT): 8546 start = self._curr 8547 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8548 if not parser: 8549 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8550 # keywords, making it tricky to disambiguate them without lookahead. The approach 8551 # here is to try and parse a set operation and if that fails, then try to parse a 8552 # join operator. If that fails as well, then the operator is not supported. 8553 parsed_query = self._parse_pipe_syntax_set_operator(query) 8554 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8555 if not parsed_query: 8556 self._retreat(start) 8557 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8558 break 8559 query = parsed_query 8560 else: 8561 query = parser(self, query) 8562 8563 return query
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEQUENCE, 456 TokenType.SINK, 457 TokenType.SOURCE, 458 TokenType.STAGE, 459 TokenType.STORAGE_INTEGRATION, 460 TokenType.STREAMLIT, 461 TokenType.TABLE, 462 TokenType.TAG, 463 TokenType.VIEW, 464 TokenType.WAREHOUSE, 465 } 466 467 CREATABLES = { 468 TokenType.COLUMN, 469 TokenType.CONSTRAINT, 470 TokenType.FOREIGN_KEY, 471 TokenType.FUNCTION, 472 TokenType.INDEX, 473 TokenType.PROCEDURE, 474 *DB_CREATABLES, 475 } 476 477 ALTERABLES = { 478 TokenType.INDEX, 479 TokenType.TABLE, 480 TokenType.VIEW, 481 } 482 483 # Tokens that can represent identifiers 484 ID_VAR_TOKENS = { 485 TokenType.ALL, 486 TokenType.ATTACH, 487 TokenType.VAR, 488 TokenType.ANTI, 489 TokenType.APPLY, 490 TokenType.ASC, 491 TokenType.ASOF, 492 TokenType.AUTO_INCREMENT, 493 TokenType.BEGIN, 494 TokenType.BPCHAR, 495 TokenType.CACHE, 496 TokenType.CASE, 497 TokenType.COLLATE, 498 TokenType.COMMAND, 499 TokenType.COMMENT, 500 TokenType.COMMIT, 501 TokenType.CONSTRAINT, 502 TokenType.COPY, 503 TokenType.CUBE, 504 TokenType.CURRENT_SCHEMA, 505 TokenType.DEFAULT, 506 TokenType.DELETE, 507 TokenType.DESC, 508 TokenType.DESCRIBE, 509 TokenType.DETACH, 510 TokenType.DICTIONARY, 511 TokenType.DIV, 512 TokenType.END, 513 TokenType.EXECUTE, 514 TokenType.EXPORT, 515 TokenType.ESCAPE, 516 TokenType.FALSE, 517 TokenType.FIRST, 518 TokenType.FILTER, 519 TokenType.FINAL, 520 TokenType.FORMAT, 521 TokenType.FULL, 522 TokenType.GET, 523 TokenType.IDENTIFIER, 524 TokenType.IS, 525 TokenType.ISNULL, 526 TokenType.INTERVAL, 527 TokenType.KEEP, 528 TokenType.KILL, 529 TokenType.LEFT, 530 TokenType.LIMIT, 531 TokenType.LOAD, 532 TokenType.MERGE, 533 TokenType.NATURAL, 534 TokenType.NEXT, 535 TokenType.OFFSET, 536 TokenType.OPERATOR, 537 TokenType.ORDINALITY, 538 TokenType.OVERLAPS, 539 TokenType.OVERWRITE, 540 TokenType.PARTITION, 541 TokenType.PERCENT, 542 TokenType.PIVOT, 543 TokenType.PRAGMA, 544 TokenType.PUT, 545 TokenType.RANGE, 546 TokenType.RECURSIVE, 547 TokenType.REFERENCES, 548 TokenType.REFRESH, 549 TokenType.RENAME, 550 TokenType.REPLACE, 551 TokenType.RIGHT, 552 TokenType.ROLLUP, 553 TokenType.ROW, 554 TokenType.ROWS, 555 TokenType.SEMI, 556 TokenType.SET, 557 TokenType.SETTINGS, 558 TokenType.SHOW, 559 TokenType.TEMPORARY, 560 TokenType.TOP, 561 TokenType.TRUE, 562 TokenType.TRUNCATE, 563 TokenType.UNIQUE, 564 TokenType.UNNEST, 565 TokenType.UNPIVOT, 566 TokenType.UPDATE, 567 TokenType.USE, 568 TokenType.VOLATILE, 569 TokenType.WINDOW, 570 *CREATABLES, 571 *SUBQUERY_PREDICATES, 572 *TYPE_TOKENS, 573 *NO_PAREN_FUNCTIONS, 574 } 575 ID_VAR_TOKENS.remove(TokenType.UNION) 576 577 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 578 TokenType.ANTI, 579 TokenType.APPLY, 580 TokenType.ASOF, 581 TokenType.FULL, 582 TokenType.LEFT, 583 TokenType.LOCK, 584 TokenType.NATURAL, 585 TokenType.RIGHT, 586 TokenType.SEMI, 587 TokenType.WINDOW, 588 } 589 590 ALIAS_TOKENS = ID_VAR_TOKENS 591 592 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 593 594 ARRAY_CONSTRUCTORS = { 595 "ARRAY": exp.Array, 596 "LIST": exp.List, 597 } 598 599 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 600 601 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 602 603 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 604 605 FUNC_TOKENS = { 606 TokenType.COLLATE, 607 TokenType.COMMAND, 608 TokenType.CURRENT_DATE, 609 TokenType.CURRENT_DATETIME, 610 TokenType.CURRENT_SCHEMA, 611 TokenType.CURRENT_TIMESTAMP, 612 TokenType.CURRENT_TIME, 613 TokenType.CURRENT_USER, 614 TokenType.FILTER, 615 TokenType.FIRST, 616 TokenType.FORMAT, 617 TokenType.GET, 618 TokenType.GLOB, 619 TokenType.IDENTIFIER, 620 TokenType.INDEX, 621 TokenType.ISNULL, 622 TokenType.ILIKE, 623 TokenType.INSERT, 624 TokenType.LIKE, 625 TokenType.MERGE, 626 TokenType.NEXT, 627 TokenType.OFFSET, 628 TokenType.PRIMARY_KEY, 629 TokenType.RANGE, 630 TokenType.REPLACE, 631 TokenType.RLIKE, 632 TokenType.ROW, 633 TokenType.UNNEST, 634 TokenType.VAR, 635 TokenType.LEFT, 636 TokenType.RIGHT, 637 TokenType.SEQUENCE, 638 TokenType.DATE, 639 TokenType.DATETIME, 640 TokenType.TABLE, 641 TokenType.TIMESTAMP, 642 TokenType.TIMESTAMPTZ, 643 TokenType.TRUNCATE, 644 TokenType.WINDOW, 645 TokenType.XOR, 646 *TYPE_TOKENS, 647 *SUBQUERY_PREDICATES, 648 } 649 650 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.AND: exp.And, 652 } 653 654 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.COLON_EQ: exp.PropertyEQ, 656 } 657 658 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 659 TokenType.OR: exp.Or, 660 } 661 662 EQUALITY = { 663 TokenType.EQ: exp.EQ, 664 TokenType.NEQ: exp.NEQ, 665 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 666 } 667 668 COMPARISON = { 669 TokenType.GT: exp.GT, 670 TokenType.GTE: exp.GTE, 671 TokenType.LT: exp.LT, 672 TokenType.LTE: exp.LTE, 673 } 674 675 BITWISE = { 676 TokenType.AMP: exp.BitwiseAnd, 677 TokenType.CARET: exp.BitwiseXor, 678 TokenType.PIPE: exp.BitwiseOr, 679 } 680 681 TERM = { 682 TokenType.DASH: exp.Sub, 683 TokenType.PLUS: exp.Add, 684 TokenType.MOD: exp.Mod, 685 TokenType.COLLATE: exp.Collate, 686 } 687 688 FACTOR = { 689 TokenType.DIV: exp.IntDiv, 690 TokenType.LR_ARROW: exp.Distance, 691 TokenType.SLASH: exp.Div, 692 TokenType.STAR: exp.Mul, 693 } 694 695 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 696 697 TIMES = { 698 TokenType.TIME, 699 TokenType.TIMETZ, 700 } 701 702 TIMESTAMPS = { 703 TokenType.TIMESTAMP, 704 TokenType.TIMESTAMPNTZ, 705 TokenType.TIMESTAMPTZ, 706 TokenType.TIMESTAMPLTZ, 707 *TIMES, 708 } 709 710 SET_OPERATIONS = { 711 TokenType.UNION, 712 TokenType.INTERSECT, 713 TokenType.EXCEPT, 714 } 715 716 JOIN_METHODS = { 717 TokenType.ASOF, 718 TokenType.NATURAL, 719 TokenType.POSITIONAL, 720 } 721 722 JOIN_SIDES = { 723 TokenType.LEFT, 724 TokenType.RIGHT, 725 TokenType.FULL, 726 } 727 728 JOIN_KINDS = { 729 TokenType.ANTI, 730 TokenType.CROSS, 731 TokenType.INNER, 732 TokenType.OUTER, 733 TokenType.SEMI, 734 TokenType.STRAIGHT_JOIN, 735 } 736 737 JOIN_HINTS: t.Set[str] = set() 738 739 LAMBDAS = { 740 TokenType.ARROW: lambda self, expressions: self.expression( 741 exp.Lambda, 742 this=self._replace_lambda( 743 self._parse_assignment(), 744 expressions, 745 ), 746 expressions=expressions, 747 ), 748 TokenType.FARROW: lambda self, expressions: self.expression( 749 exp.Kwarg, 750 this=exp.var(expressions[0].name), 751 expression=self._parse_assignment(), 752 ), 753 } 754 755 COLUMN_OPERATORS = { 756 TokenType.DOT: None, 757 TokenType.DOTCOLON: lambda self, this, to: self.expression( 758 exp.JSONCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.DCOLON: lambda self, this, to: self.expression( 763 exp.Cast if self.STRICT_CAST else exp.TryCast, 764 this=this, 765 to=to, 766 ), 767 TokenType.ARROW: lambda self, this, path: self.expression( 768 exp.JSONExtract, 769 this=this, 770 expression=self.dialect.to_json_path(path), 771 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 772 ), 773 TokenType.DARROW: lambda self, this, path: self.expression( 774 exp.JSONExtractScalar, 775 this=this, 776 expression=self.dialect.to_json_path(path), 777 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 778 ), 779 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtract, 781 this=this, 782 expression=path, 783 ), 784 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtractScalar, 786 this=this, 787 expression=path, 788 ), 789 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 790 exp.JSONBContains, 791 this=this, 792 expression=key, 793 ), 794 } 795 796 EXPRESSION_PARSERS = { 797 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 798 exp.Column: lambda self: self._parse_column(), 799 exp.Condition: lambda self: self._parse_assignment(), 800 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 801 exp.Expression: lambda self: self._parse_expression(), 802 exp.From: lambda self: self._parse_from(joins=True), 803 exp.Group: lambda self: self._parse_group(), 804 exp.Having: lambda self: self._parse_having(), 805 exp.Hint: lambda self: self._parse_hint_body(), 806 exp.Identifier: lambda self: self._parse_id_var(), 807 exp.Join: lambda self: self._parse_join(), 808 exp.Lambda: lambda self: self._parse_lambda(), 809 exp.Lateral: lambda self: self._parse_lateral(), 810 exp.Limit: lambda self: self._parse_limit(), 811 exp.Offset: lambda self: self._parse_offset(), 812 exp.Order: lambda self: self._parse_order(), 813 exp.Ordered: lambda self: self._parse_ordered(), 814 exp.Properties: lambda self: self._parse_properties(), 815 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 816 exp.Qualify: lambda self: self._parse_qualify(), 817 exp.Returning: lambda self: self._parse_returning(), 818 exp.Select: lambda self: self._parse_select(), 819 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 820 exp.Table: lambda self: self._parse_table_parts(), 821 exp.TableAlias: lambda self: self._parse_table_alias(), 822 exp.Tuple: lambda self: self._parse_value(values=False), 823 exp.Whens: lambda self: self._parse_when_matched(), 824 exp.Where: lambda self: self._parse_where(), 825 exp.Window: lambda self: self._parse_named_window(), 826 exp.With: lambda self: self._parse_with(), 827 "JOIN_TYPE": lambda self: self._parse_join_parts(), 828 } 829 830 STATEMENT_PARSERS = { 831 TokenType.ALTER: lambda self: self._parse_alter(), 832 TokenType.ANALYZE: lambda self: self._parse_analyze(), 833 TokenType.BEGIN: lambda self: self._parse_transaction(), 834 TokenType.CACHE: lambda self: self._parse_cache(), 835 TokenType.COMMENT: lambda self: self._parse_comment(), 836 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 837 TokenType.COPY: lambda self: self._parse_copy(), 838 TokenType.CREATE: lambda self: self._parse_create(), 839 TokenType.DELETE: lambda self: self._parse_delete(), 840 TokenType.DESC: lambda self: self._parse_describe(), 841 TokenType.DESCRIBE: lambda self: self._parse_describe(), 842 TokenType.DROP: lambda self: self._parse_drop(), 843 TokenType.GRANT: lambda self: self._parse_grant(), 844 TokenType.INSERT: lambda self: self._parse_insert(), 845 TokenType.KILL: lambda self: self._parse_kill(), 846 TokenType.LOAD: lambda self: self._parse_load(), 847 TokenType.MERGE: lambda self: self._parse_merge(), 848 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 849 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 850 TokenType.REFRESH: lambda self: self._parse_refresh(), 851 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 852 TokenType.SET: lambda self: self._parse_set(), 853 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 854 TokenType.UNCACHE: lambda self: self._parse_uncache(), 855 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 856 TokenType.UPDATE: lambda self: self._parse_update(), 857 TokenType.USE: lambda self: self._parse_use(), 858 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 859 } 860 861 UNARY_PARSERS = { 862 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 863 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 864 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 865 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 866 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 867 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 868 } 869 870 STRING_PARSERS = { 871 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 872 exp.RawString, this=token.text 873 ), 874 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 875 exp.National, this=token.text 876 ), 877 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 878 TokenType.STRING: lambda self, token: self.expression( 879 exp.Literal, this=token.text, is_string=True 880 ), 881 TokenType.UNICODE_STRING: lambda self, token: self.expression( 882 exp.UnicodeString, 883 this=token.text, 884 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 885 ), 886 } 887 888 NUMERIC_PARSERS = { 889 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 890 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 891 TokenType.HEX_STRING: lambda self, token: self.expression( 892 exp.HexString, 893 this=token.text, 894 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 895 ), 896 TokenType.NUMBER: lambda self, token: self.expression( 897 exp.Literal, this=token.text, is_string=False 898 ), 899 } 900 901 PRIMARY_PARSERS = { 902 **STRING_PARSERS, 903 **NUMERIC_PARSERS, 904 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 905 TokenType.NULL: lambda self, _: self.expression(exp.Null), 906 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 907 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 908 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 909 TokenType.STAR: lambda self, _: self._parse_star_ops(), 910 } 911 912 PLACEHOLDER_PARSERS = { 913 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 914 TokenType.PARAMETER: lambda self: self._parse_parameter(), 915 TokenType.COLON: lambda self: ( 916 self.expression(exp.Placeholder, this=self._prev.text) 917 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 918 else None 919 ), 920 } 921 922 RANGE_PARSERS = { 923 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 924 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 925 TokenType.GLOB: binary_range_parser(exp.Glob), 926 TokenType.ILIKE: binary_range_parser(exp.ILike), 927 TokenType.IN: lambda self, this: self._parse_in(this), 928 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 929 TokenType.IS: lambda self, this: self._parse_is(this), 930 TokenType.LIKE: binary_range_parser(exp.Like), 931 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 932 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 933 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 934 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 935 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 936 } 937 938 PIPE_SYNTAX_TRANSFORM_PARSERS = { 939 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 940 "AS": lambda self, query: self._build_pipe_cte( 941 query, [exp.Star()], self._parse_table_alias() 942 ), 943 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 944 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 945 "ORDER BY": lambda self, query: query.order_by( 946 self._parse_order(), append=False, copy=False 947 ), 948 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 949 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 950 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 951 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 952 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 953 } 954 955 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 956 "ALLOWED_VALUES": lambda self: self.expression( 957 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 958 ), 959 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 960 "AUTO": lambda self: self._parse_auto_property(), 961 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 962 "BACKUP": lambda self: self.expression( 963 exp.BackupProperty, this=self._parse_var(any_token=True) 964 ), 965 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 966 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 967 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 968 "CHECKSUM": lambda self: self._parse_checksum(), 969 "CLUSTER BY": lambda self: self._parse_cluster(), 970 "CLUSTERED": lambda self: self._parse_clustered_by(), 971 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 972 exp.CollateProperty, **kwargs 973 ), 974 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 975 "CONTAINS": lambda self: self._parse_contains_property(), 976 "COPY": lambda self: self._parse_copy_property(), 977 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 978 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 979 "DEFINER": lambda self: self._parse_definer(), 980 "DETERMINISTIC": lambda self: self.expression( 981 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 982 ), 983 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 984 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 985 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 986 "DISTKEY": lambda self: self._parse_distkey(), 987 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 988 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 989 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 990 "ENVIRONMENT": lambda self: self.expression( 991 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 992 ), 993 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 994 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 995 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 996 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 997 "FREESPACE": lambda self: self._parse_freespace(), 998 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 999 "HEAP": lambda self: self.expression(exp.HeapProperty), 1000 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1001 "IMMUTABLE": lambda self: self.expression( 1002 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1003 ), 1004 "INHERITS": lambda self: self.expression( 1005 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1006 ), 1007 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1008 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1009 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1010 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1011 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1012 "LIKE": lambda self: self._parse_create_like(), 1013 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1014 "LOCK": lambda self: self._parse_locking(), 1015 "LOCKING": lambda self: self._parse_locking(), 1016 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1017 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1018 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1019 "MODIFIES": lambda self: self._parse_modifies_property(), 1020 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1021 "NO": lambda self: self._parse_no_property(), 1022 "ON": lambda self: self._parse_on_property(), 1023 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1024 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1025 "PARTITION": lambda self: self._parse_partitioned_of(), 1026 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1027 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1028 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1029 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1030 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1031 "READS": lambda self: self._parse_reads_property(), 1032 "REMOTE": lambda self: self._parse_remote_with_connection(), 1033 "RETURNS": lambda self: self._parse_returns(), 1034 "STRICT": lambda self: self.expression(exp.StrictProperty), 1035 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1036 "ROW": lambda self: self._parse_row(), 1037 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1038 "SAMPLE": lambda self: self.expression( 1039 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1040 ), 1041 "SECURE": lambda self: self.expression(exp.SecureProperty), 1042 "SECURITY": lambda self: self._parse_security(), 1043 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1044 "SETTINGS": lambda self: self._parse_settings_property(), 1045 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1046 "SORTKEY": lambda self: self._parse_sortkey(), 1047 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1048 "STABLE": lambda self: self.expression( 1049 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1050 ), 1051 "STORED": lambda self: self._parse_stored(), 1052 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1053 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1054 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1055 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1056 "TO": lambda self: self._parse_to_table(), 1057 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1058 "TRANSFORM": lambda self: self.expression( 1059 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1060 ), 1061 "TTL": lambda self: self._parse_ttl(), 1062 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1063 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1064 "VOLATILE": lambda self: self._parse_volatile_property(), 1065 "WITH": lambda self: self._parse_with_property(), 1066 } 1067 1068 CONSTRAINT_PARSERS = { 1069 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1070 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1071 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1072 "CHARACTER SET": lambda self: self.expression( 1073 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1074 ), 1075 "CHECK": lambda self: self.expression( 1076 exp.CheckColumnConstraint, 1077 this=self._parse_wrapped(self._parse_assignment), 1078 enforced=self._match_text_seq("ENFORCED"), 1079 ), 1080 "COLLATE": lambda self: self.expression( 1081 exp.CollateColumnConstraint, 1082 this=self._parse_identifier() or self._parse_column(), 1083 ), 1084 "COMMENT": lambda self: self.expression( 1085 exp.CommentColumnConstraint, this=self._parse_string() 1086 ), 1087 "COMPRESS": lambda self: self._parse_compress(), 1088 "CLUSTERED": lambda self: self.expression( 1089 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1090 ), 1091 "NONCLUSTERED": lambda self: self.expression( 1092 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1093 ), 1094 "DEFAULT": lambda self: self.expression( 1095 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1096 ), 1097 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1098 "EPHEMERAL": lambda self: self.expression( 1099 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1100 ), 1101 "EXCLUDE": lambda self: self.expression( 1102 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1103 ), 1104 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1105 "FORMAT": lambda self: self.expression( 1106 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1107 ), 1108 "GENERATED": lambda self: self._parse_generated_as_identity(), 1109 "IDENTITY": lambda self: self._parse_auto_increment(), 1110 "INLINE": lambda self: self._parse_inline(), 1111 "LIKE": lambda self: self._parse_create_like(), 1112 "NOT": lambda self: self._parse_not_constraint(), 1113 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1114 "ON": lambda self: ( 1115 self._match(TokenType.UPDATE) 1116 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1117 ) 1118 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1119 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1120 "PERIOD": lambda self: self._parse_period_for_system_time(), 1121 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1122 "REFERENCES": lambda self: self._parse_references(match=False), 1123 "TITLE": lambda self: self.expression( 1124 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1125 ), 1126 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1127 "UNIQUE": lambda self: self._parse_unique(), 1128 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1129 "WATERMARK": lambda self: self.expression( 1130 exp.WatermarkColumnConstraint, 1131 this=self._match(TokenType.FOR) and self._parse_column(), 1132 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1133 ), 1134 "WITH": lambda self: self.expression( 1135 exp.Properties, expressions=self._parse_wrapped_properties() 1136 ), 1137 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1138 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1139 } 1140 1141 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1142 klass = ( 1143 exp.PartitionedByBucket 1144 if self._prev.text.upper() == "BUCKET" 1145 else exp.PartitionByTruncate 1146 ) 1147 1148 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1149 this, expression = seq_get(args, 0), seq_get(args, 1) 1150 1151 if isinstance(this, exp.Literal): 1152 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1153 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1154 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1155 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1156 # 1157 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1158 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1159 this, expression = expression, this 1160 1161 return self.expression(klass, this=this, expression=expression) 1162 1163 ALTER_PARSERS = { 1164 "ADD": lambda self: self._parse_alter_table_add(), 1165 "AS": lambda self: self._parse_select(), 1166 "ALTER": lambda self: self._parse_alter_table_alter(), 1167 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1168 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1169 "DROP": lambda self: self._parse_alter_table_drop(), 1170 "RENAME": lambda self: self._parse_alter_table_rename(), 1171 "SET": lambda self: self._parse_alter_table_set(), 1172 "SWAP": lambda self: self.expression( 1173 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1174 ), 1175 } 1176 1177 ALTER_ALTER_PARSERS = { 1178 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1179 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1180 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1181 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1182 } 1183 1184 SCHEMA_UNNAMED_CONSTRAINTS = { 1185 "CHECK", 1186 "EXCLUDE", 1187 "FOREIGN KEY", 1188 "LIKE", 1189 "PERIOD", 1190 "PRIMARY KEY", 1191 "UNIQUE", 1192 "WATERMARK", 1193 "BUCKET", 1194 "TRUNCATE", 1195 } 1196 1197 NO_PAREN_FUNCTION_PARSERS = { 1198 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1199 "CASE": lambda self: self._parse_case(), 1200 "CONNECT_BY_ROOT": lambda self: self.expression( 1201 exp.ConnectByRoot, this=self._parse_column() 1202 ), 1203 "IF": lambda self: self._parse_if(), 1204 } 1205 1206 INVALID_FUNC_NAME_TOKENS = { 1207 TokenType.IDENTIFIER, 1208 TokenType.STRING, 1209 } 1210 1211 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1212 1213 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1214 1215 FUNCTION_PARSERS = { 1216 **{ 1217 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1218 }, 1219 **{ 1220 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1221 }, 1222 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1223 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1224 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1225 "DECODE": lambda self: self._parse_decode(), 1226 "EXTRACT": lambda self: self._parse_extract(), 1227 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1228 "GAP_FILL": lambda self: self._parse_gap_fill(), 1229 "JSON_OBJECT": lambda self: self._parse_json_object(), 1230 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1231 "JSON_TABLE": lambda self: self._parse_json_table(), 1232 "MATCH": lambda self: self._parse_match_against(), 1233 "NORMALIZE": lambda self: self._parse_normalize(), 1234 "OPENJSON": lambda self: self._parse_open_json(), 1235 "OVERLAY": lambda self: self._parse_overlay(), 1236 "POSITION": lambda self: self._parse_position(), 1237 "PREDICT": lambda self: self._parse_predict(), 1238 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1239 "STRING_AGG": lambda self: self._parse_string_agg(), 1240 "SUBSTRING": lambda self: self._parse_substring(), 1241 "TRIM": lambda self: self._parse_trim(), 1242 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1243 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1244 "XMLELEMENT": lambda self: self.expression( 1245 exp.XMLElement, 1246 this=self._match_text_seq("NAME") and self._parse_id_var(), 1247 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1248 ), 1249 "XMLTABLE": lambda self: self._parse_xml_table(), 1250 } 1251 1252 QUERY_MODIFIER_PARSERS = { 1253 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1254 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1255 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1256 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1257 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1258 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1259 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1260 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1261 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1262 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1263 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1264 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1265 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1266 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1267 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1268 TokenType.CLUSTER_BY: lambda self: ( 1269 "cluster", 1270 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1271 ), 1272 TokenType.DISTRIBUTE_BY: lambda self: ( 1273 "distribute", 1274 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1275 ), 1276 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1277 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1278 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1279 } 1280 1281 SET_PARSERS = { 1282 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1283 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1284 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1285 "TRANSACTION": lambda self: self._parse_set_transaction(), 1286 } 1287 1288 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1289 1290 TYPE_LITERAL_PARSERS = { 1291 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1292 } 1293 1294 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1295 1296 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1297 1298 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1299 1300 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1301 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1302 "ISOLATION": ( 1303 ("LEVEL", "REPEATABLE", "READ"), 1304 ("LEVEL", "READ", "COMMITTED"), 1305 ("LEVEL", "READ", "UNCOMITTED"), 1306 ("LEVEL", "SERIALIZABLE"), 1307 ), 1308 "READ": ("WRITE", "ONLY"), 1309 } 1310 1311 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1312 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1313 ) 1314 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1315 1316 CREATE_SEQUENCE: OPTIONS_TYPE = { 1317 "SCALE": ("EXTEND", "NOEXTEND"), 1318 "SHARD": ("EXTEND", "NOEXTEND"), 1319 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1320 **dict.fromkeys( 1321 ( 1322 "SESSION", 1323 "GLOBAL", 1324 "KEEP", 1325 "NOKEEP", 1326 "ORDER", 1327 "NOORDER", 1328 "NOCACHE", 1329 "CYCLE", 1330 "NOCYCLE", 1331 "NOMINVALUE", 1332 "NOMAXVALUE", 1333 "NOSCALE", 1334 "NOSHARD", 1335 ), 1336 tuple(), 1337 ), 1338 } 1339 1340 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1341 1342 USABLES: OPTIONS_TYPE = dict.fromkeys( 1343 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1344 ) 1345 1346 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1347 1348 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1349 "TYPE": ("EVOLUTION",), 1350 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1351 } 1352 1353 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1354 1355 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1356 1357 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1358 "NOT": ("ENFORCED",), 1359 "MATCH": ( 1360 "FULL", 1361 "PARTIAL", 1362 "SIMPLE", 1363 ), 1364 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1365 "USING": ( 1366 "BTREE", 1367 "HASH", 1368 ), 1369 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1370 } 1371 1372 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1373 "NO": ("OTHERS",), 1374 "CURRENT": ("ROW",), 1375 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1376 } 1377 1378 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1379 1380 CLONE_KEYWORDS = {"CLONE", "COPY"} 1381 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1382 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1383 1384 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1385 1386 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1387 1388 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1389 1390 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1391 1392 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1393 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1394 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1395 1396 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1397 1398 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1399 1400 ADD_CONSTRAINT_TOKENS = { 1401 TokenType.CONSTRAINT, 1402 TokenType.FOREIGN_KEY, 1403 TokenType.INDEX, 1404 TokenType.KEY, 1405 TokenType.PRIMARY_KEY, 1406 TokenType.UNIQUE, 1407 } 1408 1409 DISTINCT_TOKENS = {TokenType.DISTINCT} 1410 1411 NULL_TOKENS = {TokenType.NULL} 1412 1413 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1414 1415 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1416 1417 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1418 1419 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1420 1421 ODBC_DATETIME_LITERALS = { 1422 "d": exp.Date, 1423 "t": exp.Time, 1424 "ts": exp.Timestamp, 1425 } 1426 1427 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1428 1429 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1430 1431 # The style options for the DESCRIBE statement 1432 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1433 1434 # The style options for the ANALYZE statement 1435 ANALYZE_STYLES = { 1436 "BUFFER_USAGE_LIMIT", 1437 "FULL", 1438 "LOCAL", 1439 "NO_WRITE_TO_BINLOG", 1440 "SAMPLE", 1441 "SKIP_LOCKED", 1442 "VERBOSE", 1443 } 1444 1445 ANALYZE_EXPRESSION_PARSERS = { 1446 "ALL": lambda self: self._parse_analyze_columns(), 1447 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1448 "DELETE": lambda self: self._parse_analyze_delete(), 1449 "DROP": lambda self: self._parse_analyze_histogram(), 1450 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1451 "LIST": lambda self: self._parse_analyze_list(), 1452 "PREDICATE": lambda self: self._parse_analyze_columns(), 1453 "UPDATE": lambda self: self._parse_analyze_histogram(), 1454 "VALIDATE": lambda self: self._parse_analyze_validate(), 1455 } 1456 1457 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1458 1459 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1460 1461 OPERATION_MODIFIERS: t.Set[str] = set() 1462 1463 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1464 1465 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1466 1467 STRICT_CAST = True 1468 1469 PREFIXED_PIVOT_COLUMNS = False 1470 IDENTIFY_PIVOT_STRINGS = False 1471 1472 LOG_DEFAULTS_TO_LN = False 1473 1474 # Whether the table sample clause expects CSV syntax 1475 TABLESAMPLE_CSV = False 1476 1477 # The default method used for table sampling 1478 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1479 1480 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1481 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1482 1483 # Whether the TRIM function expects the characters to trim as its first argument 1484 TRIM_PATTERN_FIRST = False 1485 1486 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1487 STRING_ALIASES = False 1488 1489 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1490 MODIFIERS_ATTACHED_TO_SET_OP = True 1491 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1492 1493 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1494 NO_PAREN_IF_COMMANDS = True 1495 1496 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1497 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1498 1499 # Whether the `:` operator is used to extract a value from a VARIANT column 1500 COLON_IS_VARIANT_EXTRACT = False 1501 1502 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1503 # If this is True and '(' is not found, the keyword will be treated as an identifier 1504 VALUES_FOLLOWED_BY_PAREN = True 1505 1506 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1507 SUPPORTS_IMPLICIT_UNNEST = False 1508 1509 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1510 INTERVAL_SPANS = True 1511 1512 # Whether a PARTITION clause can follow a table reference 1513 SUPPORTS_PARTITION_SELECTION = False 1514 1515 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1516 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1517 1518 # Whether the 'AS' keyword is optional in the CTE definition syntax 1519 OPTIONAL_ALIAS_TOKEN_CTE = True 1520 1521 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1522 ALTER_RENAME_REQUIRES_COLUMN = True 1523 1524 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1525 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1526 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1527 # as BigQuery, where all joins have the same precedence. 1528 JOINS_HAVE_EQUAL_PRECEDENCE = False 1529 1530 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1531 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1532 1533 __slots__ = ( 1534 "error_level", 1535 "error_message_context", 1536 "max_errors", 1537 "dialect", 1538 "sql", 1539 "errors", 1540 "_tokens", 1541 "_index", 1542 "_curr", 1543 "_next", 1544 "_prev", 1545 "_prev_comments", 1546 "_pipe_cte_counter", 1547 ) 1548 1549 # Autofilled 1550 SHOW_TRIE: t.Dict = {} 1551 SET_TRIE: t.Dict = {} 1552 1553 def __init__( 1554 self, 1555 error_level: t.Optional[ErrorLevel] = None, 1556 error_message_context: int = 100, 1557 max_errors: int = 3, 1558 dialect: DialectType = None, 1559 ): 1560 from sqlglot.dialects import Dialect 1561 1562 self.error_level = error_level or ErrorLevel.IMMEDIATE 1563 self.error_message_context = error_message_context 1564 self.max_errors = max_errors 1565 self.dialect = Dialect.get_or_raise(dialect) 1566 self.reset() 1567 1568 def reset(self): 1569 self.sql = "" 1570 self.errors = [] 1571 self._tokens = [] 1572 self._index = 0 1573 self._curr = None 1574 self._next = None 1575 self._prev = None 1576 self._prev_comments = None 1577 self._pipe_cte_counter = 0 1578 1579 def parse( 1580 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1581 ) -> t.List[t.Optional[exp.Expression]]: 1582 """ 1583 Parses a list of tokens and returns a list of syntax trees, one tree 1584 per parsed SQL statement. 1585 1586 Args: 1587 raw_tokens: The list of tokens. 1588 sql: The original SQL string, used to produce helpful debug messages. 1589 1590 Returns: 1591 The list of the produced syntax trees. 1592 """ 1593 return self._parse( 1594 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1595 ) 1596 1597 def parse_into( 1598 self, 1599 expression_types: exp.IntoType, 1600 raw_tokens: t.List[Token], 1601 sql: t.Optional[str] = None, 1602 ) -> t.List[t.Optional[exp.Expression]]: 1603 """ 1604 Parses a list of tokens into a given Expression type. If a collection of Expression 1605 types is given instead, this method will try to parse the token list into each one 1606 of them, stopping at the first for which the parsing succeeds. 1607 1608 Args: 1609 expression_types: The expression type(s) to try and parse the token list into. 1610 raw_tokens: The list of tokens. 1611 sql: The original SQL string, used to produce helpful debug messages. 1612 1613 Returns: 1614 The target Expression. 1615 """ 1616 errors = [] 1617 for expression_type in ensure_list(expression_types): 1618 parser = self.EXPRESSION_PARSERS.get(expression_type) 1619 if not parser: 1620 raise TypeError(f"No parser registered for {expression_type}") 1621 1622 try: 1623 return self._parse(parser, raw_tokens, sql) 1624 except ParseError as e: 1625 e.errors[0]["into_expression"] = expression_type 1626 errors.append(e) 1627 1628 raise ParseError( 1629 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1630 errors=merge_errors(errors), 1631 ) from errors[-1] 1632 1633 def _parse( 1634 self, 1635 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1636 raw_tokens: t.List[Token], 1637 sql: t.Optional[str] = None, 1638 ) -> t.List[t.Optional[exp.Expression]]: 1639 self.reset() 1640 self.sql = sql or "" 1641 1642 total = len(raw_tokens) 1643 chunks: t.List[t.List[Token]] = [[]] 1644 1645 for i, token in enumerate(raw_tokens): 1646 if token.token_type == TokenType.SEMICOLON: 1647 if token.comments: 1648 chunks.append([token]) 1649 1650 if i < total - 1: 1651 chunks.append([]) 1652 else: 1653 chunks[-1].append(token) 1654 1655 expressions = [] 1656 1657 for tokens in chunks: 1658 self._index = -1 1659 self._tokens = tokens 1660 self._advance() 1661 1662 expressions.append(parse_method(self)) 1663 1664 if self._index < len(self._tokens): 1665 self.raise_error("Invalid expression / Unexpected token") 1666 1667 self.check_errors() 1668 1669 return expressions 1670 1671 def check_errors(self) -> None: 1672 """Logs or raises any found errors, depending on the chosen error level setting.""" 1673 if self.error_level == ErrorLevel.WARN: 1674 for error in self.errors: 1675 logger.error(str(error)) 1676 elif self.error_level == ErrorLevel.RAISE and self.errors: 1677 raise ParseError( 1678 concat_messages(self.errors, self.max_errors), 1679 errors=merge_errors(self.errors), 1680 ) 1681 1682 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1683 """ 1684 Appends an error in the list of recorded errors or raises it, depending on the chosen 1685 error level setting. 1686 """ 1687 token = token or self._curr or self._prev or Token.string("") 1688 start = token.start 1689 end = token.end + 1 1690 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1691 highlight = self.sql[start:end] 1692 end_context = self.sql[end : end + self.error_message_context] 1693 1694 error = ParseError.new( 1695 f"{message}. Line {token.line}, Col: {token.col}.\n" 1696 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1697 description=message, 1698 line=token.line, 1699 col=token.col, 1700 start_context=start_context, 1701 highlight=highlight, 1702 end_context=end_context, 1703 ) 1704 1705 if self.error_level == ErrorLevel.IMMEDIATE: 1706 raise error 1707 1708 self.errors.append(error) 1709 1710 def expression( 1711 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1712 ) -> E: 1713 """ 1714 Creates a new, validated Expression. 1715 1716 Args: 1717 exp_class: The expression class to instantiate. 1718 comments: An optional list of comments to attach to the expression. 1719 kwargs: The arguments to set for the expression along with their respective values. 1720 1721 Returns: 1722 The target expression. 1723 """ 1724 instance = exp_class(**kwargs) 1725 instance.add_comments(comments) if comments else self._add_comments(instance) 1726 return self.validate_expression(instance) 1727 1728 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1729 if expression and self._prev_comments: 1730 expression.add_comments(self._prev_comments) 1731 self._prev_comments = None 1732 1733 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1734 """ 1735 Validates an Expression, making sure that all its mandatory arguments are set. 1736 1737 Args: 1738 expression: The expression to validate. 1739 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1740 1741 Returns: 1742 The validated expression. 1743 """ 1744 if self.error_level != ErrorLevel.IGNORE: 1745 for error_message in expression.error_messages(args): 1746 self.raise_error(error_message) 1747 1748 return expression 1749 1750 def _find_sql(self, start: Token, end: Token) -> str: 1751 return self.sql[start.start : end.end + 1] 1752 1753 def _is_connected(self) -> bool: 1754 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1755 1756 def _advance(self, times: int = 1) -> None: 1757 self._index += times 1758 self._curr = seq_get(self._tokens, self._index) 1759 self._next = seq_get(self._tokens, self._index + 1) 1760 1761 if self._index > 0: 1762 self._prev = self._tokens[self._index - 1] 1763 self._prev_comments = self._prev.comments 1764 else: 1765 self._prev = None 1766 self._prev_comments = None 1767 1768 def _retreat(self, index: int) -> None: 1769 if index != self._index: 1770 self._advance(index - self._index) 1771 1772 def _warn_unsupported(self) -> None: 1773 if len(self._tokens) <= 1: 1774 return 1775 1776 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1777 # interested in emitting a warning for the one being currently processed. 1778 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1779 1780 logger.warning( 1781 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1782 ) 1783 1784 def _parse_command(self) -> exp.Command: 1785 self._warn_unsupported() 1786 return self.expression( 1787 exp.Command, 1788 comments=self._prev_comments, 1789 this=self._prev.text.upper(), 1790 expression=self._parse_string(), 1791 ) 1792 1793 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1794 """ 1795 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1796 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1797 solve this by setting & resetting the parser state accordingly 1798 """ 1799 index = self._index 1800 error_level = self.error_level 1801 1802 self.error_level = ErrorLevel.IMMEDIATE 1803 try: 1804 this = parse_method() 1805 except ParseError: 1806 this = None 1807 finally: 1808 if not this or retreat: 1809 self._retreat(index) 1810 self.error_level = error_level 1811 1812 return this 1813 1814 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1815 start = self._prev 1816 exists = self._parse_exists() if allow_exists else None 1817 1818 self._match(TokenType.ON) 1819 1820 materialized = self._match_text_seq("MATERIALIZED") 1821 kind = self._match_set(self.CREATABLES) and self._prev 1822 if not kind: 1823 return self._parse_as_command(start) 1824 1825 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1826 this = self._parse_user_defined_function(kind=kind.token_type) 1827 elif kind.token_type == TokenType.TABLE: 1828 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1829 elif kind.token_type == TokenType.COLUMN: 1830 this = self._parse_column() 1831 else: 1832 this = self._parse_id_var() 1833 1834 self._match(TokenType.IS) 1835 1836 return self.expression( 1837 exp.Comment, 1838 this=this, 1839 kind=kind.text, 1840 expression=self._parse_string(), 1841 exists=exists, 1842 materialized=materialized, 1843 ) 1844 1845 def _parse_to_table( 1846 self, 1847 ) -> exp.ToTableProperty: 1848 table = self._parse_table_parts(schema=True) 1849 return self.expression(exp.ToTableProperty, this=table) 1850 1851 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1852 def _parse_ttl(self) -> exp.Expression: 1853 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1854 this = self._parse_bitwise() 1855 1856 if self._match_text_seq("DELETE"): 1857 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1858 if self._match_text_seq("RECOMPRESS"): 1859 return self.expression( 1860 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1861 ) 1862 if self._match_text_seq("TO", "DISK"): 1863 return self.expression( 1864 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1865 ) 1866 if self._match_text_seq("TO", "VOLUME"): 1867 return self.expression( 1868 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1869 ) 1870 1871 return this 1872 1873 expressions = self._parse_csv(_parse_ttl_action) 1874 where = self._parse_where() 1875 group = self._parse_group() 1876 1877 aggregates = None 1878 if group and self._match(TokenType.SET): 1879 aggregates = self._parse_csv(self._parse_set_item) 1880 1881 return self.expression( 1882 exp.MergeTreeTTL, 1883 expressions=expressions, 1884 where=where, 1885 group=group, 1886 aggregates=aggregates, 1887 ) 1888 1889 def _parse_statement(self) -> t.Optional[exp.Expression]: 1890 if self._curr is None: 1891 return None 1892 1893 if self._match_set(self.STATEMENT_PARSERS): 1894 comments = self._prev_comments 1895 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1896 stmt.add_comments(comments, prepend=True) 1897 return stmt 1898 1899 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1900 return self._parse_command() 1901 1902 expression = self._parse_expression() 1903 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1904 return self._parse_query_modifiers(expression) 1905 1906 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1907 start = self._prev 1908 temporary = self._match(TokenType.TEMPORARY) 1909 materialized = self._match_text_seq("MATERIALIZED") 1910 1911 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1912 if not kind: 1913 return self._parse_as_command(start) 1914 1915 concurrently = self._match_text_seq("CONCURRENTLY") 1916 if_exists = exists or self._parse_exists() 1917 1918 if kind == "COLUMN": 1919 this = self._parse_column() 1920 else: 1921 this = self._parse_table_parts( 1922 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1923 ) 1924 1925 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1926 1927 if self._match(TokenType.L_PAREN, advance=False): 1928 expressions = self._parse_wrapped_csv(self._parse_types) 1929 else: 1930 expressions = None 1931 1932 return self.expression( 1933 exp.Drop, 1934 exists=if_exists, 1935 this=this, 1936 expressions=expressions, 1937 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1938 temporary=temporary, 1939 materialized=materialized, 1940 cascade=self._match_text_seq("CASCADE"), 1941 constraints=self._match_text_seq("CONSTRAINTS"), 1942 purge=self._match_text_seq("PURGE"), 1943 cluster=cluster, 1944 concurrently=concurrently, 1945 ) 1946 1947 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1948 return ( 1949 self._match_text_seq("IF") 1950 and (not not_ or self._match(TokenType.NOT)) 1951 and self._match(TokenType.EXISTS) 1952 ) 1953 1954 def _parse_create(self) -> exp.Create | exp.Command: 1955 # Note: this can't be None because we've matched a statement parser 1956 start = self._prev 1957 1958 replace = ( 1959 start.token_type == TokenType.REPLACE 1960 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1961 or self._match_pair(TokenType.OR, TokenType.ALTER) 1962 ) 1963 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1964 1965 unique = self._match(TokenType.UNIQUE) 1966 1967 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1968 clustered = True 1969 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1970 "COLUMNSTORE" 1971 ): 1972 clustered = False 1973 else: 1974 clustered = None 1975 1976 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1977 self._advance() 1978 1979 properties = None 1980 create_token = self._match_set(self.CREATABLES) and self._prev 1981 1982 if not create_token: 1983 # exp.Properties.Location.POST_CREATE 1984 properties = self._parse_properties() 1985 create_token = self._match_set(self.CREATABLES) and self._prev 1986 1987 if not properties or not create_token: 1988 return self._parse_as_command(start) 1989 1990 concurrently = self._match_text_seq("CONCURRENTLY") 1991 exists = self._parse_exists(not_=True) 1992 this = None 1993 expression: t.Optional[exp.Expression] = None 1994 indexes = None 1995 no_schema_binding = None 1996 begin = None 1997 end = None 1998 clone = None 1999 2000 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2001 nonlocal properties 2002 if properties and temp_props: 2003 properties.expressions.extend(temp_props.expressions) 2004 elif temp_props: 2005 properties = temp_props 2006 2007 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2008 this = self._parse_user_defined_function(kind=create_token.token_type) 2009 2010 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2011 extend_props(self._parse_properties()) 2012 2013 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2014 extend_props(self._parse_properties()) 2015 2016 if not expression: 2017 if self._match(TokenType.COMMAND): 2018 expression = self._parse_as_command(self._prev) 2019 else: 2020 begin = self._match(TokenType.BEGIN) 2021 return_ = self._match_text_seq("RETURN") 2022 2023 if self._match(TokenType.STRING, advance=False): 2024 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2025 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2026 expression = self._parse_string() 2027 extend_props(self._parse_properties()) 2028 else: 2029 expression = self._parse_user_defined_function_expression() 2030 2031 end = self._match_text_seq("END") 2032 2033 if return_: 2034 expression = self.expression(exp.Return, this=expression) 2035 elif create_token.token_type == TokenType.INDEX: 2036 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2037 if not self._match(TokenType.ON): 2038 index = self._parse_id_var() 2039 anonymous = False 2040 else: 2041 index = None 2042 anonymous = True 2043 2044 this = self._parse_index(index=index, anonymous=anonymous) 2045 elif create_token.token_type in self.DB_CREATABLES: 2046 table_parts = self._parse_table_parts( 2047 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2048 ) 2049 2050 # exp.Properties.Location.POST_NAME 2051 self._match(TokenType.COMMA) 2052 extend_props(self._parse_properties(before=True)) 2053 2054 this = self._parse_schema(this=table_parts) 2055 2056 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2057 extend_props(self._parse_properties()) 2058 2059 has_alias = self._match(TokenType.ALIAS) 2060 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2061 # exp.Properties.Location.POST_ALIAS 2062 extend_props(self._parse_properties()) 2063 2064 if create_token.token_type == TokenType.SEQUENCE: 2065 expression = self._parse_types() 2066 extend_props(self._parse_properties()) 2067 else: 2068 expression = self._parse_ddl_select() 2069 2070 # Some dialects also support using a table as an alias instead of a SELECT. 2071 # Here we fallback to this as an alternative. 2072 if not expression and has_alias: 2073 expression = self._try_parse(self._parse_table_parts) 2074 2075 if create_token.token_type == TokenType.TABLE: 2076 # exp.Properties.Location.POST_EXPRESSION 2077 extend_props(self._parse_properties()) 2078 2079 indexes = [] 2080 while True: 2081 index = self._parse_index() 2082 2083 # exp.Properties.Location.POST_INDEX 2084 extend_props(self._parse_properties()) 2085 if not index: 2086 break 2087 else: 2088 self._match(TokenType.COMMA) 2089 indexes.append(index) 2090 elif create_token.token_type == TokenType.VIEW: 2091 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2092 no_schema_binding = True 2093 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2094 extend_props(self._parse_properties()) 2095 2096 shallow = self._match_text_seq("SHALLOW") 2097 2098 if self._match_texts(self.CLONE_KEYWORDS): 2099 copy = self._prev.text.lower() == "copy" 2100 clone = self.expression( 2101 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2102 ) 2103 2104 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2105 return self._parse_as_command(start) 2106 2107 create_kind_text = create_token.text.upper() 2108 return self.expression( 2109 exp.Create, 2110 this=this, 2111 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2112 replace=replace, 2113 refresh=refresh, 2114 unique=unique, 2115 expression=expression, 2116 exists=exists, 2117 properties=properties, 2118 indexes=indexes, 2119 no_schema_binding=no_schema_binding, 2120 begin=begin, 2121 end=end, 2122 clone=clone, 2123 concurrently=concurrently, 2124 clustered=clustered, 2125 ) 2126 2127 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2128 seq = exp.SequenceProperties() 2129 2130 options = [] 2131 index = self._index 2132 2133 while self._curr: 2134 self._match(TokenType.COMMA) 2135 if self._match_text_seq("INCREMENT"): 2136 self._match_text_seq("BY") 2137 self._match_text_seq("=") 2138 seq.set("increment", self._parse_term()) 2139 elif self._match_text_seq("MINVALUE"): 2140 seq.set("minvalue", self._parse_term()) 2141 elif self._match_text_seq("MAXVALUE"): 2142 seq.set("maxvalue", self._parse_term()) 2143 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2144 self._match_text_seq("=") 2145 seq.set("start", self._parse_term()) 2146 elif self._match_text_seq("CACHE"): 2147 # T-SQL allows empty CACHE which is initialized dynamically 2148 seq.set("cache", self._parse_number() or True) 2149 elif self._match_text_seq("OWNED", "BY"): 2150 # "OWNED BY NONE" is the default 2151 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2152 else: 2153 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2154 if opt: 2155 options.append(opt) 2156 else: 2157 break 2158 2159 seq.set("options", options if options else None) 2160 return None if self._index == index else seq 2161 2162 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2163 # only used for teradata currently 2164 self._match(TokenType.COMMA) 2165 2166 kwargs = { 2167 "no": self._match_text_seq("NO"), 2168 "dual": self._match_text_seq("DUAL"), 2169 "before": self._match_text_seq("BEFORE"), 2170 "default": self._match_text_seq("DEFAULT"), 2171 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2172 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2173 "after": self._match_text_seq("AFTER"), 2174 "minimum": self._match_texts(("MIN", "MINIMUM")), 2175 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2176 } 2177 2178 if self._match_texts(self.PROPERTY_PARSERS): 2179 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2180 try: 2181 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2182 except TypeError: 2183 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2184 2185 return None 2186 2187 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2188 return self._parse_wrapped_csv(self._parse_property) 2189 2190 def _parse_property(self) -> t.Optional[exp.Expression]: 2191 if self._match_texts(self.PROPERTY_PARSERS): 2192 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2193 2194 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2195 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2196 2197 if self._match_text_seq("COMPOUND", "SORTKEY"): 2198 return self._parse_sortkey(compound=True) 2199 2200 if self._match_text_seq("SQL", "SECURITY"): 2201 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2202 2203 index = self._index 2204 key = self._parse_column() 2205 2206 if not self._match(TokenType.EQ): 2207 self._retreat(index) 2208 return self._parse_sequence_properties() 2209 2210 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2211 if isinstance(key, exp.Column): 2212 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2213 2214 value = self._parse_bitwise() or self._parse_var(any_token=True) 2215 2216 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2217 if isinstance(value, exp.Column): 2218 value = exp.var(value.name) 2219 2220 return self.expression(exp.Property, this=key, value=value) 2221 2222 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2223 if self._match_text_seq("BY"): 2224 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2225 2226 self._match(TokenType.ALIAS) 2227 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2228 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2229 2230 return self.expression( 2231 exp.FileFormatProperty, 2232 this=( 2233 self.expression( 2234 exp.InputOutputFormat, 2235 input_format=input_format, 2236 output_format=output_format, 2237 ) 2238 if input_format or output_format 2239 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2240 ), 2241 hive_format=True, 2242 ) 2243 2244 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2245 field = self._parse_field() 2246 if isinstance(field, exp.Identifier) and not field.quoted: 2247 field = exp.var(field) 2248 2249 return field 2250 2251 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2252 self._match(TokenType.EQ) 2253 self._match(TokenType.ALIAS) 2254 2255 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2256 2257 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2258 properties = [] 2259 while True: 2260 if before: 2261 prop = self._parse_property_before() 2262 else: 2263 prop = self._parse_property() 2264 if not prop: 2265 break 2266 for p in ensure_list(prop): 2267 properties.append(p) 2268 2269 if properties: 2270 return self.expression(exp.Properties, expressions=properties) 2271 2272 return None 2273 2274 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2275 return self.expression( 2276 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2277 ) 2278 2279 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2280 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2281 security_specifier = self._prev.text.upper() 2282 return self.expression(exp.SecurityProperty, this=security_specifier) 2283 return None 2284 2285 def _parse_settings_property(self) -> exp.SettingsProperty: 2286 return self.expression( 2287 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2288 ) 2289 2290 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2291 if self._index >= 2: 2292 pre_volatile_token = self._tokens[self._index - 2] 2293 else: 2294 pre_volatile_token = None 2295 2296 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2297 return exp.VolatileProperty() 2298 2299 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2300 2301 def _parse_retention_period(self) -> exp.Var: 2302 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2303 number = self._parse_number() 2304 number_str = f"{number} " if number else "" 2305 unit = self._parse_var(any_token=True) 2306 return exp.var(f"{number_str}{unit}") 2307 2308 def _parse_system_versioning_property( 2309 self, with_: bool = False 2310 ) -> exp.WithSystemVersioningProperty: 2311 self._match(TokenType.EQ) 2312 prop = self.expression( 2313 exp.WithSystemVersioningProperty, 2314 **{ # type: ignore 2315 "on": True, 2316 "with": with_, 2317 }, 2318 ) 2319 2320 if self._match_text_seq("OFF"): 2321 prop.set("on", False) 2322 return prop 2323 2324 self._match(TokenType.ON) 2325 if self._match(TokenType.L_PAREN): 2326 while self._curr and not self._match(TokenType.R_PAREN): 2327 if self._match_text_seq("HISTORY_TABLE", "="): 2328 prop.set("this", self._parse_table_parts()) 2329 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2330 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2331 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2332 prop.set("retention_period", self._parse_retention_period()) 2333 2334 self._match(TokenType.COMMA) 2335 2336 return prop 2337 2338 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2339 self._match(TokenType.EQ) 2340 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2341 prop = self.expression(exp.DataDeletionProperty, on=on) 2342 2343 if self._match(TokenType.L_PAREN): 2344 while self._curr and not self._match(TokenType.R_PAREN): 2345 if self._match_text_seq("FILTER_COLUMN", "="): 2346 prop.set("filter_column", self._parse_column()) 2347 elif self._match_text_seq("RETENTION_PERIOD", "="): 2348 prop.set("retention_period", self._parse_retention_period()) 2349 2350 self._match(TokenType.COMMA) 2351 2352 return prop 2353 2354 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2355 kind = "HASH" 2356 expressions: t.Optional[t.List[exp.Expression]] = None 2357 if self._match_text_seq("BY", "HASH"): 2358 expressions = self._parse_wrapped_csv(self._parse_id_var) 2359 elif self._match_text_seq("BY", "RANDOM"): 2360 kind = "RANDOM" 2361 2362 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2363 buckets: t.Optional[exp.Expression] = None 2364 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2365 buckets = self._parse_number() 2366 2367 return self.expression( 2368 exp.DistributedByProperty, 2369 expressions=expressions, 2370 kind=kind, 2371 buckets=buckets, 2372 order=self._parse_order(), 2373 ) 2374 2375 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2376 self._match_text_seq("KEY") 2377 expressions = self._parse_wrapped_id_vars() 2378 return self.expression(expr_type, expressions=expressions) 2379 2380 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2381 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2382 prop = self._parse_system_versioning_property(with_=True) 2383 self._match_r_paren() 2384 return prop 2385 2386 if self._match(TokenType.L_PAREN, advance=False): 2387 return self._parse_wrapped_properties() 2388 2389 if self._match_text_seq("JOURNAL"): 2390 return self._parse_withjournaltable() 2391 2392 if self._match_texts(self.VIEW_ATTRIBUTES): 2393 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2394 2395 if self._match_text_seq("DATA"): 2396 return self._parse_withdata(no=False) 2397 elif self._match_text_seq("NO", "DATA"): 2398 return self._parse_withdata(no=True) 2399 2400 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2401 return self._parse_serde_properties(with_=True) 2402 2403 if self._match(TokenType.SCHEMA): 2404 return self.expression( 2405 exp.WithSchemaBindingProperty, 2406 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2407 ) 2408 2409 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2410 return self.expression( 2411 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2412 ) 2413 2414 if not self._next: 2415 return None 2416 2417 return self._parse_withisolatedloading() 2418 2419 def _parse_procedure_option(self) -> exp.Expression | None: 2420 if self._match_text_seq("EXECUTE", "AS"): 2421 return self.expression( 2422 exp.ExecuteAsProperty, 2423 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2424 or self._parse_string(), 2425 ) 2426 2427 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2428 2429 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2430 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2431 self._match(TokenType.EQ) 2432 2433 user = self._parse_id_var() 2434 self._match(TokenType.PARAMETER) 2435 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2436 2437 if not user or not host: 2438 return None 2439 2440 return exp.DefinerProperty(this=f"{user}@{host}") 2441 2442 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2443 self._match(TokenType.TABLE) 2444 self._match(TokenType.EQ) 2445 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2446 2447 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2448 return self.expression(exp.LogProperty, no=no) 2449 2450 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2451 return self.expression(exp.JournalProperty, **kwargs) 2452 2453 def _parse_checksum(self) -> exp.ChecksumProperty: 2454 self._match(TokenType.EQ) 2455 2456 on = None 2457 if self._match(TokenType.ON): 2458 on = True 2459 elif self._match_text_seq("OFF"): 2460 on = False 2461 2462 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2463 2464 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2465 return self.expression( 2466 exp.Cluster, 2467 expressions=( 2468 self._parse_wrapped_csv(self._parse_ordered) 2469 if wrapped 2470 else self._parse_csv(self._parse_ordered) 2471 ), 2472 ) 2473 2474 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2475 self._match_text_seq("BY") 2476 2477 self._match_l_paren() 2478 expressions = self._parse_csv(self._parse_column) 2479 self._match_r_paren() 2480 2481 if self._match_text_seq("SORTED", "BY"): 2482 self._match_l_paren() 2483 sorted_by = self._parse_csv(self._parse_ordered) 2484 self._match_r_paren() 2485 else: 2486 sorted_by = None 2487 2488 self._match(TokenType.INTO) 2489 buckets = self._parse_number() 2490 self._match_text_seq("BUCKETS") 2491 2492 return self.expression( 2493 exp.ClusteredByProperty, 2494 expressions=expressions, 2495 sorted_by=sorted_by, 2496 buckets=buckets, 2497 ) 2498 2499 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2500 if not self._match_text_seq("GRANTS"): 2501 self._retreat(self._index - 1) 2502 return None 2503 2504 return self.expression(exp.CopyGrantsProperty) 2505 2506 def _parse_freespace(self) -> exp.FreespaceProperty: 2507 self._match(TokenType.EQ) 2508 return self.expression( 2509 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2510 ) 2511 2512 def _parse_mergeblockratio( 2513 self, no: bool = False, default: bool = False 2514 ) -> exp.MergeBlockRatioProperty: 2515 if self._match(TokenType.EQ): 2516 return self.expression( 2517 exp.MergeBlockRatioProperty, 2518 this=self._parse_number(), 2519 percent=self._match(TokenType.PERCENT), 2520 ) 2521 2522 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2523 2524 def _parse_datablocksize( 2525 self, 2526 default: t.Optional[bool] = None, 2527 minimum: t.Optional[bool] = None, 2528 maximum: t.Optional[bool] = None, 2529 ) -> exp.DataBlocksizeProperty: 2530 self._match(TokenType.EQ) 2531 size = self._parse_number() 2532 2533 units = None 2534 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2535 units = self._prev.text 2536 2537 return self.expression( 2538 exp.DataBlocksizeProperty, 2539 size=size, 2540 units=units, 2541 default=default, 2542 minimum=minimum, 2543 maximum=maximum, 2544 ) 2545 2546 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2547 self._match(TokenType.EQ) 2548 always = self._match_text_seq("ALWAYS") 2549 manual = self._match_text_seq("MANUAL") 2550 never = self._match_text_seq("NEVER") 2551 default = self._match_text_seq("DEFAULT") 2552 2553 autotemp = None 2554 if self._match_text_seq("AUTOTEMP"): 2555 autotemp = self._parse_schema() 2556 2557 return self.expression( 2558 exp.BlockCompressionProperty, 2559 always=always, 2560 manual=manual, 2561 never=never, 2562 default=default, 2563 autotemp=autotemp, 2564 ) 2565 2566 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2567 index = self._index 2568 no = self._match_text_seq("NO") 2569 concurrent = self._match_text_seq("CONCURRENT") 2570 2571 if not self._match_text_seq("ISOLATED", "LOADING"): 2572 self._retreat(index) 2573 return None 2574 2575 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2576 return self.expression( 2577 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2578 ) 2579 2580 def _parse_locking(self) -> exp.LockingProperty: 2581 if self._match(TokenType.TABLE): 2582 kind = "TABLE" 2583 elif self._match(TokenType.VIEW): 2584 kind = "VIEW" 2585 elif self._match(TokenType.ROW): 2586 kind = "ROW" 2587 elif self._match_text_seq("DATABASE"): 2588 kind = "DATABASE" 2589 else: 2590 kind = None 2591 2592 if kind in ("DATABASE", "TABLE", "VIEW"): 2593 this = self._parse_table_parts() 2594 else: 2595 this = None 2596 2597 if self._match(TokenType.FOR): 2598 for_or_in = "FOR" 2599 elif self._match(TokenType.IN): 2600 for_or_in = "IN" 2601 else: 2602 for_or_in = None 2603 2604 if self._match_text_seq("ACCESS"): 2605 lock_type = "ACCESS" 2606 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2607 lock_type = "EXCLUSIVE" 2608 elif self._match_text_seq("SHARE"): 2609 lock_type = "SHARE" 2610 elif self._match_text_seq("READ"): 2611 lock_type = "READ" 2612 elif self._match_text_seq("WRITE"): 2613 lock_type = "WRITE" 2614 elif self._match_text_seq("CHECKSUM"): 2615 lock_type = "CHECKSUM" 2616 else: 2617 lock_type = None 2618 2619 override = self._match_text_seq("OVERRIDE") 2620 2621 return self.expression( 2622 exp.LockingProperty, 2623 this=this, 2624 kind=kind, 2625 for_or_in=for_or_in, 2626 lock_type=lock_type, 2627 override=override, 2628 ) 2629 2630 def _parse_partition_by(self) -> t.List[exp.Expression]: 2631 if self._match(TokenType.PARTITION_BY): 2632 return self._parse_csv(self._parse_assignment) 2633 return [] 2634 2635 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2636 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2637 if self._match_text_seq("MINVALUE"): 2638 return exp.var("MINVALUE") 2639 if self._match_text_seq("MAXVALUE"): 2640 return exp.var("MAXVALUE") 2641 return self._parse_bitwise() 2642 2643 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2644 expression = None 2645 from_expressions = None 2646 to_expressions = None 2647 2648 if self._match(TokenType.IN): 2649 this = self._parse_wrapped_csv(self._parse_bitwise) 2650 elif self._match(TokenType.FROM): 2651 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2652 self._match_text_seq("TO") 2653 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2654 elif self._match_text_seq("WITH", "(", "MODULUS"): 2655 this = self._parse_number() 2656 self._match_text_seq(",", "REMAINDER") 2657 expression = self._parse_number() 2658 self._match_r_paren() 2659 else: 2660 self.raise_error("Failed to parse partition bound spec.") 2661 2662 return self.expression( 2663 exp.PartitionBoundSpec, 2664 this=this, 2665 expression=expression, 2666 from_expressions=from_expressions, 2667 to_expressions=to_expressions, 2668 ) 2669 2670 # https://www.postgresql.org/docs/current/sql-createtable.html 2671 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2672 if not self._match_text_seq("OF"): 2673 self._retreat(self._index - 1) 2674 return None 2675 2676 this = self._parse_table(schema=True) 2677 2678 if self._match(TokenType.DEFAULT): 2679 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2680 elif self._match_text_seq("FOR", "VALUES"): 2681 expression = self._parse_partition_bound_spec() 2682 else: 2683 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2684 2685 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2686 2687 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2688 self._match(TokenType.EQ) 2689 return self.expression( 2690 exp.PartitionedByProperty, 2691 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2692 ) 2693 2694 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2695 if self._match_text_seq("AND", "STATISTICS"): 2696 statistics = True 2697 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2698 statistics = False 2699 else: 2700 statistics = None 2701 2702 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2703 2704 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2705 if self._match_text_seq("SQL"): 2706 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2707 return None 2708 2709 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2710 if self._match_text_seq("SQL", "DATA"): 2711 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2712 return None 2713 2714 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2715 if self._match_text_seq("PRIMARY", "INDEX"): 2716 return exp.NoPrimaryIndexProperty() 2717 if self._match_text_seq("SQL"): 2718 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2719 return None 2720 2721 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2722 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2723 return exp.OnCommitProperty() 2724 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2725 return exp.OnCommitProperty(delete=True) 2726 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2727 2728 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2729 if self._match_text_seq("SQL", "DATA"): 2730 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2731 return None 2732 2733 def _parse_distkey(self) -> exp.DistKeyProperty: 2734 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2735 2736 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2737 table = self._parse_table(schema=True) 2738 2739 options = [] 2740 while self._match_texts(("INCLUDING", "EXCLUDING")): 2741 this = self._prev.text.upper() 2742 2743 id_var = self._parse_id_var() 2744 if not id_var: 2745 return None 2746 2747 options.append( 2748 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2749 ) 2750 2751 return self.expression(exp.LikeProperty, this=table, expressions=options) 2752 2753 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2754 return self.expression( 2755 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2756 ) 2757 2758 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2759 self._match(TokenType.EQ) 2760 return self.expression( 2761 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2762 ) 2763 2764 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2765 self._match_text_seq("WITH", "CONNECTION") 2766 return self.expression( 2767 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2768 ) 2769 2770 def _parse_returns(self) -> exp.ReturnsProperty: 2771 value: t.Optional[exp.Expression] 2772 null = None 2773 is_table = self._match(TokenType.TABLE) 2774 2775 if is_table: 2776 if self._match(TokenType.LT): 2777 value = self.expression( 2778 exp.Schema, 2779 this="TABLE", 2780 expressions=self._parse_csv(self._parse_struct_types), 2781 ) 2782 if not self._match(TokenType.GT): 2783 self.raise_error("Expecting >") 2784 else: 2785 value = self._parse_schema(exp.var("TABLE")) 2786 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2787 null = True 2788 value = None 2789 else: 2790 value = self._parse_types() 2791 2792 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2793 2794 def _parse_describe(self) -> exp.Describe: 2795 kind = self._match_set(self.CREATABLES) and self._prev.text 2796 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2797 if self._match(TokenType.DOT): 2798 style = None 2799 self._retreat(self._index - 2) 2800 2801 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2802 2803 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2804 this = self._parse_statement() 2805 else: 2806 this = self._parse_table(schema=True) 2807 2808 properties = self._parse_properties() 2809 expressions = properties.expressions if properties else None 2810 partition = self._parse_partition() 2811 return self.expression( 2812 exp.Describe, 2813 this=this, 2814 style=style, 2815 kind=kind, 2816 expressions=expressions, 2817 partition=partition, 2818 format=format, 2819 ) 2820 2821 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2822 kind = self._prev.text.upper() 2823 expressions = [] 2824 2825 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2826 if self._match(TokenType.WHEN): 2827 expression = self._parse_disjunction() 2828 self._match(TokenType.THEN) 2829 else: 2830 expression = None 2831 2832 else_ = self._match(TokenType.ELSE) 2833 2834 if not self._match(TokenType.INTO): 2835 return None 2836 2837 return self.expression( 2838 exp.ConditionalInsert, 2839 this=self.expression( 2840 exp.Insert, 2841 this=self._parse_table(schema=True), 2842 expression=self._parse_derived_table_values(), 2843 ), 2844 expression=expression, 2845 else_=else_, 2846 ) 2847 2848 expression = parse_conditional_insert() 2849 while expression is not None: 2850 expressions.append(expression) 2851 expression = parse_conditional_insert() 2852 2853 return self.expression( 2854 exp.MultitableInserts, 2855 kind=kind, 2856 comments=comments, 2857 expressions=expressions, 2858 source=self._parse_table(), 2859 ) 2860 2861 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2862 comments = [] 2863 hint = self._parse_hint() 2864 overwrite = self._match(TokenType.OVERWRITE) 2865 ignore = self._match(TokenType.IGNORE) 2866 local = self._match_text_seq("LOCAL") 2867 alternative = None 2868 is_function = None 2869 2870 if self._match_text_seq("DIRECTORY"): 2871 this: t.Optional[exp.Expression] = self.expression( 2872 exp.Directory, 2873 this=self._parse_var_or_string(), 2874 local=local, 2875 row_format=self._parse_row_format(match_row=True), 2876 ) 2877 else: 2878 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2879 comments += ensure_list(self._prev_comments) 2880 return self._parse_multitable_inserts(comments) 2881 2882 if self._match(TokenType.OR): 2883 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2884 2885 self._match(TokenType.INTO) 2886 comments += ensure_list(self._prev_comments) 2887 self._match(TokenType.TABLE) 2888 is_function = self._match(TokenType.FUNCTION) 2889 2890 this = ( 2891 self._parse_table(schema=True, parse_partition=True) 2892 if not is_function 2893 else self._parse_function() 2894 ) 2895 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2896 this.set("alias", self._parse_table_alias()) 2897 2898 returning = self._parse_returning() 2899 2900 return self.expression( 2901 exp.Insert, 2902 comments=comments, 2903 hint=hint, 2904 is_function=is_function, 2905 this=this, 2906 stored=self._match_text_seq("STORED") and self._parse_stored(), 2907 by_name=self._match_text_seq("BY", "NAME"), 2908 exists=self._parse_exists(), 2909 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2910 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2911 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2912 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2913 conflict=self._parse_on_conflict(), 2914 returning=returning or self._parse_returning(), 2915 overwrite=overwrite, 2916 alternative=alternative, 2917 ignore=ignore, 2918 source=self._match(TokenType.TABLE) and self._parse_table(), 2919 ) 2920 2921 def _parse_kill(self) -> exp.Kill: 2922 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2923 2924 return self.expression( 2925 exp.Kill, 2926 this=self._parse_primary(), 2927 kind=kind, 2928 ) 2929 2930 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2931 conflict = self._match_text_seq("ON", "CONFLICT") 2932 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2933 2934 if not conflict and not duplicate: 2935 return None 2936 2937 conflict_keys = None 2938 constraint = None 2939 2940 if conflict: 2941 if self._match_text_seq("ON", "CONSTRAINT"): 2942 constraint = self._parse_id_var() 2943 elif self._match(TokenType.L_PAREN): 2944 conflict_keys = self._parse_csv(self._parse_id_var) 2945 self._match_r_paren() 2946 2947 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2948 if self._prev.token_type == TokenType.UPDATE: 2949 self._match(TokenType.SET) 2950 expressions = self._parse_csv(self._parse_equality) 2951 else: 2952 expressions = None 2953 2954 return self.expression( 2955 exp.OnConflict, 2956 duplicate=duplicate, 2957 expressions=expressions, 2958 action=action, 2959 conflict_keys=conflict_keys, 2960 constraint=constraint, 2961 where=self._parse_where(), 2962 ) 2963 2964 def _parse_returning(self) -> t.Optional[exp.Returning]: 2965 if not self._match(TokenType.RETURNING): 2966 return None 2967 return self.expression( 2968 exp.Returning, 2969 expressions=self._parse_csv(self._parse_expression), 2970 into=self._match(TokenType.INTO) and self._parse_table_part(), 2971 ) 2972 2973 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2974 if not self._match(TokenType.FORMAT): 2975 return None 2976 return self._parse_row_format() 2977 2978 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2979 index = self._index 2980 with_ = with_ or self._match_text_seq("WITH") 2981 2982 if not self._match(TokenType.SERDE_PROPERTIES): 2983 self._retreat(index) 2984 return None 2985 return self.expression( 2986 exp.SerdeProperties, 2987 **{ # type: ignore 2988 "expressions": self._parse_wrapped_properties(), 2989 "with": with_, 2990 }, 2991 ) 2992 2993 def _parse_row_format( 2994 self, match_row: bool = False 2995 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2996 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2997 return None 2998 2999 if self._match_text_seq("SERDE"): 3000 this = self._parse_string() 3001 3002 serde_properties = self._parse_serde_properties() 3003 3004 return self.expression( 3005 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3006 ) 3007 3008 self._match_text_seq("DELIMITED") 3009 3010 kwargs = {} 3011 3012 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3013 kwargs["fields"] = self._parse_string() 3014 if self._match_text_seq("ESCAPED", "BY"): 3015 kwargs["escaped"] = self._parse_string() 3016 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3017 kwargs["collection_items"] = self._parse_string() 3018 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3019 kwargs["map_keys"] = self._parse_string() 3020 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3021 kwargs["lines"] = self._parse_string() 3022 if self._match_text_seq("NULL", "DEFINED", "AS"): 3023 kwargs["null"] = self._parse_string() 3024 3025 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3026 3027 def _parse_load(self) -> exp.LoadData | exp.Command: 3028 if self._match_text_seq("DATA"): 3029 local = self._match_text_seq("LOCAL") 3030 self._match_text_seq("INPATH") 3031 inpath = self._parse_string() 3032 overwrite = self._match(TokenType.OVERWRITE) 3033 self._match_pair(TokenType.INTO, TokenType.TABLE) 3034 3035 return self.expression( 3036 exp.LoadData, 3037 this=self._parse_table(schema=True), 3038 local=local, 3039 overwrite=overwrite, 3040 inpath=inpath, 3041 partition=self._parse_partition(), 3042 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3043 serde=self._match_text_seq("SERDE") and self._parse_string(), 3044 ) 3045 return self._parse_as_command(self._prev) 3046 3047 def _parse_delete(self) -> exp.Delete: 3048 # This handles MySQL's "Multiple-Table Syntax" 3049 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3050 tables = None 3051 if not self._match(TokenType.FROM, advance=False): 3052 tables = self._parse_csv(self._parse_table) or None 3053 3054 returning = self._parse_returning() 3055 3056 return self.expression( 3057 exp.Delete, 3058 tables=tables, 3059 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3060 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3061 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3062 where=self._parse_where(), 3063 returning=returning or self._parse_returning(), 3064 limit=self._parse_limit(), 3065 ) 3066 3067 def _parse_update(self) -> exp.Update: 3068 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3069 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3070 returning = self._parse_returning() 3071 return self.expression( 3072 exp.Update, 3073 **{ # type: ignore 3074 "this": this, 3075 "expressions": expressions, 3076 "from": self._parse_from(joins=True), 3077 "where": self._parse_where(), 3078 "returning": returning or self._parse_returning(), 3079 "order": self._parse_order(), 3080 "limit": self._parse_limit(), 3081 }, 3082 ) 3083 3084 def _parse_use(self) -> exp.Use: 3085 return self.expression( 3086 exp.Use, 3087 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3088 this=self._parse_table(schema=False), 3089 ) 3090 3091 def _parse_uncache(self) -> exp.Uncache: 3092 if not self._match(TokenType.TABLE): 3093 self.raise_error("Expecting TABLE after UNCACHE") 3094 3095 return self.expression( 3096 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3097 ) 3098 3099 def _parse_cache(self) -> exp.Cache: 3100 lazy = self._match_text_seq("LAZY") 3101 self._match(TokenType.TABLE) 3102 table = self._parse_table(schema=True) 3103 3104 options = [] 3105 if self._match_text_seq("OPTIONS"): 3106 self._match_l_paren() 3107 k = self._parse_string() 3108 self._match(TokenType.EQ) 3109 v = self._parse_string() 3110 options = [k, v] 3111 self._match_r_paren() 3112 3113 self._match(TokenType.ALIAS) 3114 return self.expression( 3115 exp.Cache, 3116 this=table, 3117 lazy=lazy, 3118 options=options, 3119 expression=self._parse_select(nested=True), 3120 ) 3121 3122 def _parse_partition(self) -> t.Optional[exp.Partition]: 3123 if not self._match_texts(self.PARTITION_KEYWORDS): 3124 return None 3125 3126 return self.expression( 3127 exp.Partition, 3128 subpartition=self._prev.text.upper() == "SUBPARTITION", 3129 expressions=self._parse_wrapped_csv(self._parse_assignment), 3130 ) 3131 3132 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3133 def _parse_value_expression() -> t.Optional[exp.Expression]: 3134 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3135 return exp.var(self._prev.text.upper()) 3136 return self._parse_expression() 3137 3138 if self._match(TokenType.L_PAREN): 3139 expressions = self._parse_csv(_parse_value_expression) 3140 self._match_r_paren() 3141 return self.expression(exp.Tuple, expressions=expressions) 3142 3143 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3144 expression = self._parse_expression() 3145 if expression: 3146 return self.expression(exp.Tuple, expressions=[expression]) 3147 return None 3148 3149 def _parse_projections(self) -> t.List[exp.Expression]: 3150 return self._parse_expressions() 3151 3152 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3153 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3154 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3155 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3156 ) 3157 elif self._match(TokenType.FROM): 3158 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3159 # Support parentheses for duckdb FROM-first syntax 3160 select = self._parse_select() 3161 if select: 3162 select.set("from", from_) 3163 this = select 3164 else: 3165 this = exp.select("*").from_(t.cast(exp.From, from_)) 3166 else: 3167 this = ( 3168 self._parse_table(consume_pipe=True) 3169 if table 3170 else self._parse_select(nested=True, parse_set_operation=False) 3171 ) 3172 3173 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3174 # in case a modifier (e.g. join) is following 3175 if table and isinstance(this, exp.Values) and this.alias: 3176 alias = this.args["alias"].pop() 3177 this = exp.Table(this=this, alias=alias) 3178 3179 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3180 3181 return this 3182 3183 def _parse_select( 3184 self, 3185 nested: bool = False, 3186 table: bool = False, 3187 parse_subquery_alias: bool = True, 3188 parse_set_operation: bool = True, 3189 consume_pipe: bool = True, 3190 ) -> t.Optional[exp.Expression]: 3191 query = self._parse_select_query( 3192 nested=nested, 3193 table=table, 3194 parse_subquery_alias=parse_subquery_alias, 3195 parse_set_operation=parse_set_operation, 3196 ) 3197 3198 if ( 3199 consume_pipe 3200 and self._match(TokenType.PIPE_GT, advance=False) 3201 and isinstance(query, exp.Query) 3202 ): 3203 query = self._parse_pipe_syntax_query(query) 3204 query = query.subquery(copy=False) if query and table else query 3205 3206 return query 3207 3208 def _parse_select_query( 3209 self, 3210 nested: bool = False, 3211 table: bool = False, 3212 parse_subquery_alias: bool = True, 3213 parse_set_operation: bool = True, 3214 ) -> t.Optional[exp.Expression]: 3215 cte = self._parse_with() 3216 3217 if cte: 3218 this = self._parse_statement() 3219 3220 if not this: 3221 self.raise_error("Failed to parse any statement following CTE") 3222 return cte 3223 3224 if "with" in this.arg_types: 3225 this.set("with", cte) 3226 else: 3227 self.raise_error(f"{this.key} does not support CTE") 3228 this = cte 3229 3230 return this 3231 3232 # duckdb supports leading with FROM x 3233 from_ = ( 3234 self._parse_from(consume_pipe=True) 3235 if self._match(TokenType.FROM, advance=False) 3236 else None 3237 ) 3238 3239 if self._match(TokenType.SELECT): 3240 comments = self._prev_comments 3241 3242 hint = self._parse_hint() 3243 3244 if self._next and not self._next.token_type == TokenType.DOT: 3245 all_ = self._match(TokenType.ALL) 3246 distinct = self._match_set(self.DISTINCT_TOKENS) 3247 else: 3248 all_, distinct = None, None 3249 3250 kind = ( 3251 self._match(TokenType.ALIAS) 3252 and self._match_texts(("STRUCT", "VALUE")) 3253 and self._prev.text.upper() 3254 ) 3255 3256 if distinct: 3257 distinct = self.expression( 3258 exp.Distinct, 3259 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3260 ) 3261 3262 if all_ and distinct: 3263 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3264 3265 operation_modifiers = [] 3266 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3267 operation_modifiers.append(exp.var(self._prev.text.upper())) 3268 3269 limit = self._parse_limit(top=True) 3270 projections = self._parse_projections() 3271 3272 this = self.expression( 3273 exp.Select, 3274 kind=kind, 3275 hint=hint, 3276 distinct=distinct, 3277 expressions=projections, 3278 limit=limit, 3279 operation_modifiers=operation_modifiers or None, 3280 ) 3281 this.comments = comments 3282 3283 into = self._parse_into() 3284 if into: 3285 this.set("into", into) 3286 3287 if not from_: 3288 from_ = self._parse_from() 3289 3290 if from_: 3291 this.set("from", from_) 3292 3293 this = self._parse_query_modifiers(this) 3294 elif (table or nested) and self._match(TokenType.L_PAREN): 3295 this = self._parse_wrapped_select(table=table) 3296 3297 # We return early here so that the UNION isn't attached to the subquery by the 3298 # following call to _parse_set_operations, but instead becomes the parent node 3299 self._match_r_paren() 3300 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3301 elif self._match(TokenType.VALUES, advance=False): 3302 this = self._parse_derived_table_values() 3303 elif from_: 3304 this = exp.select("*").from_(from_.this, copy=False) 3305 elif self._match(TokenType.SUMMARIZE): 3306 table = self._match(TokenType.TABLE) 3307 this = self._parse_select() or self._parse_string() or self._parse_table() 3308 return self.expression(exp.Summarize, this=this, table=table) 3309 elif self._match(TokenType.DESCRIBE): 3310 this = self._parse_describe() 3311 elif self._match_text_seq("STREAM"): 3312 this = self._parse_function() 3313 if this: 3314 this = self.expression(exp.Stream, this=this) 3315 else: 3316 self._retreat(self._index - 1) 3317 else: 3318 this = None 3319 3320 return self._parse_set_operations(this) if parse_set_operation else this 3321 3322 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3323 self._match_text_seq("SEARCH") 3324 3325 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3326 3327 if not kind: 3328 return None 3329 3330 self._match_text_seq("FIRST", "BY") 3331 3332 return self.expression( 3333 exp.RecursiveWithSearch, 3334 kind=kind, 3335 this=self._parse_id_var(), 3336 expression=self._match_text_seq("SET") and self._parse_id_var(), 3337 using=self._match_text_seq("USING") and self._parse_id_var(), 3338 ) 3339 3340 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3341 if not skip_with_token and not self._match(TokenType.WITH): 3342 return None 3343 3344 comments = self._prev_comments 3345 recursive = self._match(TokenType.RECURSIVE) 3346 3347 last_comments = None 3348 expressions = [] 3349 while True: 3350 cte = self._parse_cte() 3351 if isinstance(cte, exp.CTE): 3352 expressions.append(cte) 3353 if last_comments: 3354 cte.add_comments(last_comments) 3355 3356 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3357 break 3358 else: 3359 self._match(TokenType.WITH) 3360 3361 last_comments = self._prev_comments 3362 3363 return self.expression( 3364 exp.With, 3365 comments=comments, 3366 expressions=expressions, 3367 recursive=recursive, 3368 search=self._parse_recursive_with_search(), 3369 ) 3370 3371 def _parse_cte(self) -> t.Optional[exp.CTE]: 3372 index = self._index 3373 3374 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3375 if not alias or not alias.this: 3376 self.raise_error("Expected CTE to have alias") 3377 3378 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3379 self._retreat(index) 3380 return None 3381 3382 comments = self._prev_comments 3383 3384 if self._match_text_seq("NOT", "MATERIALIZED"): 3385 materialized = False 3386 elif self._match_text_seq("MATERIALIZED"): 3387 materialized = True 3388 else: 3389 materialized = None 3390 3391 cte = self.expression( 3392 exp.CTE, 3393 this=self._parse_wrapped(self._parse_statement), 3394 alias=alias, 3395 materialized=materialized, 3396 comments=comments, 3397 ) 3398 3399 values = cte.this 3400 if isinstance(values, exp.Values): 3401 if values.alias: 3402 cte.set("this", exp.select("*").from_(values)) 3403 else: 3404 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3405 3406 return cte 3407 3408 def _parse_table_alias( 3409 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3410 ) -> t.Optional[exp.TableAlias]: 3411 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3412 # so this section tries to parse the clause version and if it fails, it treats the token 3413 # as an identifier (alias) 3414 if self._can_parse_limit_or_offset(): 3415 return None 3416 3417 any_token = self._match(TokenType.ALIAS) 3418 alias = ( 3419 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3420 or self._parse_string_as_identifier() 3421 ) 3422 3423 index = self._index 3424 if self._match(TokenType.L_PAREN): 3425 columns = self._parse_csv(self._parse_function_parameter) 3426 self._match_r_paren() if columns else self._retreat(index) 3427 else: 3428 columns = None 3429 3430 if not alias and not columns: 3431 return None 3432 3433 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3434 3435 # We bubble up comments from the Identifier to the TableAlias 3436 if isinstance(alias, exp.Identifier): 3437 table_alias.add_comments(alias.pop_comments()) 3438 3439 return table_alias 3440 3441 def _parse_subquery( 3442 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3443 ) -> t.Optional[exp.Subquery]: 3444 if not this: 3445 return None 3446 3447 return self.expression( 3448 exp.Subquery, 3449 this=this, 3450 pivots=self._parse_pivots(), 3451 alias=self._parse_table_alias() if parse_alias else None, 3452 sample=self._parse_table_sample(), 3453 ) 3454 3455 def _implicit_unnests_to_explicit(self, this: E) -> E: 3456 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3457 3458 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3459 for i, join in enumerate(this.args.get("joins") or []): 3460 table = join.this 3461 normalized_table = table.copy() 3462 normalized_table.meta["maybe_column"] = True 3463 normalized_table = _norm(normalized_table, dialect=self.dialect) 3464 3465 if isinstance(table, exp.Table) and not join.args.get("on"): 3466 if normalized_table.parts[0].name in refs: 3467 table_as_column = table.to_column() 3468 unnest = exp.Unnest(expressions=[table_as_column]) 3469 3470 # Table.to_column creates a parent Alias node that we want to convert to 3471 # a TableAlias and attach to the Unnest, so it matches the parser's output 3472 if isinstance(table.args.get("alias"), exp.TableAlias): 3473 table_as_column.replace(table_as_column.this) 3474 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3475 3476 table.replace(unnest) 3477 3478 refs.add(normalized_table.alias_or_name) 3479 3480 return this 3481 3482 def _parse_query_modifiers( 3483 self, this: t.Optional[exp.Expression] 3484 ) -> t.Optional[exp.Expression]: 3485 if isinstance(this, self.MODIFIABLES): 3486 for join in self._parse_joins(): 3487 this.append("joins", join) 3488 for lateral in iter(self._parse_lateral, None): 3489 this.append("laterals", lateral) 3490 3491 while True: 3492 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3493 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3494 key, expression = parser(self) 3495 3496 if expression: 3497 this.set(key, expression) 3498 if key == "limit": 3499 offset = expression.args.pop("offset", None) 3500 3501 if offset: 3502 offset = exp.Offset(expression=offset) 3503 this.set("offset", offset) 3504 3505 limit_by_expressions = expression.expressions 3506 expression.set("expressions", None) 3507 offset.set("expressions", limit_by_expressions) 3508 continue 3509 break 3510 3511 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3512 this = self._implicit_unnests_to_explicit(this) 3513 3514 return this 3515 3516 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3517 start = self._curr 3518 while self._curr: 3519 self._advance() 3520 3521 end = self._tokens[self._index - 1] 3522 return exp.Hint(expressions=[self._find_sql(start, end)]) 3523 3524 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3525 return self._parse_function_call() 3526 3527 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3528 start_index = self._index 3529 should_fallback_to_string = False 3530 3531 hints = [] 3532 try: 3533 for hint in iter( 3534 lambda: self._parse_csv( 3535 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3536 ), 3537 [], 3538 ): 3539 hints.extend(hint) 3540 except ParseError: 3541 should_fallback_to_string = True 3542 3543 if should_fallback_to_string or self._curr: 3544 self._retreat(start_index) 3545 return self._parse_hint_fallback_to_string() 3546 3547 return self.expression(exp.Hint, expressions=hints) 3548 3549 def _parse_hint(self) -> t.Optional[exp.Hint]: 3550 if self._match(TokenType.HINT) and self._prev_comments: 3551 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3552 3553 return None 3554 3555 def _parse_into(self) -> t.Optional[exp.Into]: 3556 if not self._match(TokenType.INTO): 3557 return None 3558 3559 temp = self._match(TokenType.TEMPORARY) 3560 unlogged = self._match_text_seq("UNLOGGED") 3561 self._match(TokenType.TABLE) 3562 3563 return self.expression( 3564 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3565 ) 3566 3567 def _parse_from( 3568 self, 3569 joins: bool = False, 3570 skip_from_token: bool = False, 3571 consume_pipe: bool = False, 3572 ) -> t.Optional[exp.From]: 3573 if not skip_from_token and not self._match(TokenType.FROM): 3574 return None 3575 3576 return self.expression( 3577 exp.From, 3578 comments=self._prev_comments, 3579 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3580 ) 3581 3582 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3583 return self.expression( 3584 exp.MatchRecognizeMeasure, 3585 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3586 this=self._parse_expression(), 3587 ) 3588 3589 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3590 if not self._match(TokenType.MATCH_RECOGNIZE): 3591 return None 3592 3593 self._match_l_paren() 3594 3595 partition = self._parse_partition_by() 3596 order = self._parse_order() 3597 3598 measures = ( 3599 self._parse_csv(self._parse_match_recognize_measure) 3600 if self._match_text_seq("MEASURES") 3601 else None 3602 ) 3603 3604 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3605 rows = exp.var("ONE ROW PER MATCH") 3606 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3607 text = "ALL ROWS PER MATCH" 3608 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3609 text += " SHOW EMPTY MATCHES" 3610 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3611 text += " OMIT EMPTY MATCHES" 3612 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3613 text += " WITH UNMATCHED ROWS" 3614 rows = exp.var(text) 3615 else: 3616 rows = None 3617 3618 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3619 text = "AFTER MATCH SKIP" 3620 if self._match_text_seq("PAST", "LAST", "ROW"): 3621 text += " PAST LAST ROW" 3622 elif self._match_text_seq("TO", "NEXT", "ROW"): 3623 text += " TO NEXT ROW" 3624 elif self._match_text_seq("TO", "FIRST"): 3625 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3626 elif self._match_text_seq("TO", "LAST"): 3627 text += f" TO LAST {self._advance_any().text}" # type: ignore 3628 after = exp.var(text) 3629 else: 3630 after = None 3631 3632 if self._match_text_seq("PATTERN"): 3633 self._match_l_paren() 3634 3635 if not self._curr: 3636 self.raise_error("Expecting )", self._curr) 3637 3638 paren = 1 3639 start = self._curr 3640 3641 while self._curr and paren > 0: 3642 if self._curr.token_type == TokenType.L_PAREN: 3643 paren += 1 3644 if self._curr.token_type == TokenType.R_PAREN: 3645 paren -= 1 3646 3647 end = self._prev 3648 self._advance() 3649 3650 if paren > 0: 3651 self.raise_error("Expecting )", self._curr) 3652 3653 pattern = exp.var(self._find_sql(start, end)) 3654 else: 3655 pattern = None 3656 3657 define = ( 3658 self._parse_csv(self._parse_name_as_expression) 3659 if self._match_text_seq("DEFINE") 3660 else None 3661 ) 3662 3663 self._match_r_paren() 3664 3665 return self.expression( 3666 exp.MatchRecognize, 3667 partition_by=partition, 3668 order=order, 3669 measures=measures, 3670 rows=rows, 3671 after=after, 3672 pattern=pattern, 3673 define=define, 3674 alias=self._parse_table_alias(), 3675 ) 3676 3677 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3678 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3679 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3680 cross_apply = False 3681 3682 if cross_apply is not None: 3683 this = self._parse_select(table=True) 3684 view = None 3685 outer = None 3686 elif self._match(TokenType.LATERAL): 3687 this = self._parse_select(table=True) 3688 view = self._match(TokenType.VIEW) 3689 outer = self._match(TokenType.OUTER) 3690 else: 3691 return None 3692 3693 if not this: 3694 this = ( 3695 self._parse_unnest() 3696 or self._parse_function() 3697 or self._parse_id_var(any_token=False) 3698 ) 3699 3700 while self._match(TokenType.DOT): 3701 this = exp.Dot( 3702 this=this, 3703 expression=self._parse_function() or self._parse_id_var(any_token=False), 3704 ) 3705 3706 ordinality: t.Optional[bool] = None 3707 3708 if view: 3709 table = self._parse_id_var(any_token=False) 3710 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3711 table_alias: t.Optional[exp.TableAlias] = self.expression( 3712 exp.TableAlias, this=table, columns=columns 3713 ) 3714 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3715 # We move the alias from the lateral's child node to the lateral itself 3716 table_alias = this.args["alias"].pop() 3717 else: 3718 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3719 table_alias = self._parse_table_alias() 3720 3721 return self.expression( 3722 exp.Lateral, 3723 this=this, 3724 view=view, 3725 outer=outer, 3726 alias=table_alias, 3727 cross_apply=cross_apply, 3728 ordinality=ordinality, 3729 ) 3730 3731 def _parse_join_parts( 3732 self, 3733 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3734 return ( 3735 self._match_set(self.JOIN_METHODS) and self._prev, 3736 self._match_set(self.JOIN_SIDES) and self._prev, 3737 self._match_set(self.JOIN_KINDS) and self._prev, 3738 ) 3739 3740 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3741 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3742 this = self._parse_column() 3743 if isinstance(this, exp.Column): 3744 return this.this 3745 return this 3746 3747 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3748 3749 def _parse_join( 3750 self, skip_join_token: bool = False, parse_bracket: bool = False 3751 ) -> t.Optional[exp.Join]: 3752 if self._match(TokenType.COMMA): 3753 table = self._try_parse(self._parse_table) 3754 cross_join = self.expression(exp.Join, this=table) if table else None 3755 3756 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3757 cross_join.set("kind", "CROSS") 3758 3759 return cross_join 3760 3761 index = self._index 3762 method, side, kind = self._parse_join_parts() 3763 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3764 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3765 join_comments = self._prev_comments 3766 3767 if not skip_join_token and not join: 3768 self._retreat(index) 3769 kind = None 3770 method = None 3771 side = None 3772 3773 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3774 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3775 3776 if not skip_join_token and not join and not outer_apply and not cross_apply: 3777 return None 3778 3779 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3780 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3781 kwargs["expressions"] = self._parse_csv( 3782 lambda: self._parse_table(parse_bracket=parse_bracket) 3783 ) 3784 3785 if method: 3786 kwargs["method"] = method.text 3787 if side: 3788 kwargs["side"] = side.text 3789 if kind: 3790 kwargs["kind"] = kind.text 3791 if hint: 3792 kwargs["hint"] = hint 3793 3794 if self._match(TokenType.MATCH_CONDITION): 3795 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3796 3797 if self._match(TokenType.ON): 3798 kwargs["on"] = self._parse_assignment() 3799 elif self._match(TokenType.USING): 3800 kwargs["using"] = self._parse_using_identifiers() 3801 elif ( 3802 not (outer_apply or cross_apply) 3803 and not isinstance(kwargs["this"], exp.Unnest) 3804 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3805 ): 3806 index = self._index 3807 joins: t.Optional[list] = list(self._parse_joins()) 3808 3809 if joins and self._match(TokenType.ON): 3810 kwargs["on"] = self._parse_assignment() 3811 elif joins and self._match(TokenType.USING): 3812 kwargs["using"] = self._parse_using_identifiers() 3813 else: 3814 joins = None 3815 self._retreat(index) 3816 3817 kwargs["this"].set("joins", joins if joins else None) 3818 3819 kwargs["pivots"] = self._parse_pivots() 3820 3821 comments = [c for token in (method, side, kind) if token for c in token.comments] 3822 comments = (join_comments or []) + comments 3823 return self.expression(exp.Join, comments=comments, **kwargs) 3824 3825 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3826 this = self._parse_assignment() 3827 3828 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3829 return this 3830 3831 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3832 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3833 3834 return this 3835 3836 def _parse_index_params(self) -> exp.IndexParameters: 3837 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3838 3839 if self._match(TokenType.L_PAREN, advance=False): 3840 columns = self._parse_wrapped_csv(self._parse_with_operator) 3841 else: 3842 columns = None 3843 3844 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3845 partition_by = self._parse_partition_by() 3846 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3847 tablespace = ( 3848 self._parse_var(any_token=True) 3849 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3850 else None 3851 ) 3852 where = self._parse_where() 3853 3854 on = self._parse_field() if self._match(TokenType.ON) else None 3855 3856 return self.expression( 3857 exp.IndexParameters, 3858 using=using, 3859 columns=columns, 3860 include=include, 3861 partition_by=partition_by, 3862 where=where, 3863 with_storage=with_storage, 3864 tablespace=tablespace, 3865 on=on, 3866 ) 3867 3868 def _parse_index( 3869 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3870 ) -> t.Optional[exp.Index]: 3871 if index or anonymous: 3872 unique = None 3873 primary = None 3874 amp = None 3875 3876 self._match(TokenType.ON) 3877 self._match(TokenType.TABLE) # hive 3878 table = self._parse_table_parts(schema=True) 3879 else: 3880 unique = self._match(TokenType.UNIQUE) 3881 primary = self._match_text_seq("PRIMARY") 3882 amp = self._match_text_seq("AMP") 3883 3884 if not self._match(TokenType.INDEX): 3885 return None 3886 3887 index = self._parse_id_var() 3888 table = None 3889 3890 params = self._parse_index_params() 3891 3892 return self.expression( 3893 exp.Index, 3894 this=index, 3895 table=table, 3896 unique=unique, 3897 primary=primary, 3898 amp=amp, 3899 params=params, 3900 ) 3901 3902 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3903 hints: t.List[exp.Expression] = [] 3904 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3905 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3906 hints.append( 3907 self.expression( 3908 exp.WithTableHint, 3909 expressions=self._parse_csv( 3910 lambda: self._parse_function() or self._parse_var(any_token=True) 3911 ), 3912 ) 3913 ) 3914 self._match_r_paren() 3915 else: 3916 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3917 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3918 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3919 3920 self._match_set((TokenType.INDEX, TokenType.KEY)) 3921 if self._match(TokenType.FOR): 3922 hint.set("target", self._advance_any() and self._prev.text.upper()) 3923 3924 hint.set("expressions", self._parse_wrapped_id_vars()) 3925 hints.append(hint) 3926 3927 return hints or None 3928 3929 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3930 return ( 3931 (not schema and self._parse_function(optional_parens=False)) 3932 or self._parse_id_var(any_token=False) 3933 or self._parse_string_as_identifier() 3934 or self._parse_placeholder() 3935 ) 3936 3937 def _parse_table_parts( 3938 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3939 ) -> exp.Table: 3940 catalog = None 3941 db = None 3942 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3943 3944 while self._match(TokenType.DOT): 3945 if catalog: 3946 # This allows nesting the table in arbitrarily many dot expressions if needed 3947 table = self.expression( 3948 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3949 ) 3950 else: 3951 catalog = db 3952 db = table 3953 # "" used for tsql FROM a..b case 3954 table = self._parse_table_part(schema=schema) or "" 3955 3956 if ( 3957 wildcard 3958 and self._is_connected() 3959 and (isinstance(table, exp.Identifier) or not table) 3960 and self._match(TokenType.STAR) 3961 ): 3962 if isinstance(table, exp.Identifier): 3963 table.args["this"] += "*" 3964 else: 3965 table = exp.Identifier(this="*") 3966 3967 # We bubble up comments from the Identifier to the Table 3968 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3969 3970 if is_db_reference: 3971 catalog = db 3972 db = table 3973 table = None 3974 3975 if not table and not is_db_reference: 3976 self.raise_error(f"Expected table name but got {self._curr}") 3977 if not db and is_db_reference: 3978 self.raise_error(f"Expected database name but got {self._curr}") 3979 3980 table = self.expression( 3981 exp.Table, 3982 comments=comments, 3983 this=table, 3984 db=db, 3985 catalog=catalog, 3986 ) 3987 3988 changes = self._parse_changes() 3989 if changes: 3990 table.set("changes", changes) 3991 3992 at_before = self._parse_historical_data() 3993 if at_before: 3994 table.set("when", at_before) 3995 3996 pivots = self._parse_pivots() 3997 if pivots: 3998 table.set("pivots", pivots) 3999 4000 return table 4001 4002 def _parse_table( 4003 self, 4004 schema: bool = False, 4005 joins: bool = False, 4006 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4007 parse_bracket: bool = False, 4008 is_db_reference: bool = False, 4009 parse_partition: bool = False, 4010 consume_pipe: bool = False, 4011 ) -> t.Optional[exp.Expression]: 4012 lateral = self._parse_lateral() 4013 if lateral: 4014 return lateral 4015 4016 unnest = self._parse_unnest() 4017 if unnest: 4018 return unnest 4019 4020 values = self._parse_derived_table_values() 4021 if values: 4022 return values 4023 4024 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4025 if subquery: 4026 if not subquery.args.get("pivots"): 4027 subquery.set("pivots", self._parse_pivots()) 4028 return subquery 4029 4030 bracket = parse_bracket and self._parse_bracket(None) 4031 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4032 4033 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4034 self._parse_table 4035 ) 4036 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4037 4038 only = self._match(TokenType.ONLY) 4039 4040 this = t.cast( 4041 exp.Expression, 4042 bracket 4043 or rows_from 4044 or self._parse_bracket( 4045 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4046 ), 4047 ) 4048 4049 if only: 4050 this.set("only", only) 4051 4052 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4053 self._match_text_seq("*") 4054 4055 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4056 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4057 this.set("partition", self._parse_partition()) 4058 4059 if schema: 4060 return self._parse_schema(this=this) 4061 4062 version = self._parse_version() 4063 4064 if version: 4065 this.set("version", version) 4066 4067 if self.dialect.ALIAS_POST_TABLESAMPLE: 4068 this.set("sample", self._parse_table_sample()) 4069 4070 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4071 if alias: 4072 this.set("alias", alias) 4073 4074 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4075 return self.expression( 4076 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4077 ) 4078 4079 this.set("hints", self._parse_table_hints()) 4080 4081 if not this.args.get("pivots"): 4082 this.set("pivots", self._parse_pivots()) 4083 4084 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4085 this.set("sample", self._parse_table_sample()) 4086 4087 if joins: 4088 for join in self._parse_joins(): 4089 this.append("joins", join) 4090 4091 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4092 this.set("ordinality", True) 4093 this.set("alias", self._parse_table_alias()) 4094 4095 return this 4096 4097 def _parse_version(self) -> t.Optional[exp.Version]: 4098 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4099 this = "TIMESTAMP" 4100 elif self._match(TokenType.VERSION_SNAPSHOT): 4101 this = "VERSION" 4102 else: 4103 return None 4104 4105 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4106 kind = self._prev.text.upper() 4107 start = self._parse_bitwise() 4108 self._match_texts(("TO", "AND")) 4109 end = self._parse_bitwise() 4110 expression: t.Optional[exp.Expression] = self.expression( 4111 exp.Tuple, expressions=[start, end] 4112 ) 4113 elif self._match_text_seq("CONTAINED", "IN"): 4114 kind = "CONTAINED IN" 4115 expression = self.expression( 4116 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4117 ) 4118 elif self._match(TokenType.ALL): 4119 kind = "ALL" 4120 expression = None 4121 else: 4122 self._match_text_seq("AS", "OF") 4123 kind = "AS OF" 4124 expression = self._parse_type() 4125 4126 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4127 4128 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4129 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4130 index = self._index 4131 historical_data = None 4132 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4133 this = self._prev.text.upper() 4134 kind = ( 4135 self._match(TokenType.L_PAREN) 4136 and self._match_texts(self.HISTORICAL_DATA_KIND) 4137 and self._prev.text.upper() 4138 ) 4139 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4140 4141 if expression: 4142 self._match_r_paren() 4143 historical_data = self.expression( 4144 exp.HistoricalData, this=this, kind=kind, expression=expression 4145 ) 4146 else: 4147 self._retreat(index) 4148 4149 return historical_data 4150 4151 def _parse_changes(self) -> t.Optional[exp.Changes]: 4152 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4153 return None 4154 4155 information = self._parse_var(any_token=True) 4156 self._match_r_paren() 4157 4158 return self.expression( 4159 exp.Changes, 4160 information=information, 4161 at_before=self._parse_historical_data(), 4162 end=self._parse_historical_data(), 4163 ) 4164 4165 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4166 if not self._match(TokenType.UNNEST): 4167 return None 4168 4169 expressions = self._parse_wrapped_csv(self._parse_equality) 4170 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4171 4172 alias = self._parse_table_alias() if with_alias else None 4173 4174 if alias: 4175 if self.dialect.UNNEST_COLUMN_ONLY: 4176 if alias.args.get("columns"): 4177 self.raise_error("Unexpected extra column alias in unnest.") 4178 4179 alias.set("columns", [alias.this]) 4180 alias.set("this", None) 4181 4182 columns = alias.args.get("columns") or [] 4183 if offset and len(expressions) < len(columns): 4184 offset = columns.pop() 4185 4186 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4187 self._match(TokenType.ALIAS) 4188 offset = self._parse_id_var( 4189 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4190 ) or exp.to_identifier("offset") 4191 4192 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4193 4194 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4195 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4196 if not is_derived and not ( 4197 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4198 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4199 ): 4200 return None 4201 4202 expressions = self._parse_csv(self._parse_value) 4203 alias = self._parse_table_alias() 4204 4205 if is_derived: 4206 self._match_r_paren() 4207 4208 return self.expression( 4209 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4210 ) 4211 4212 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4213 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4214 as_modifier and self._match_text_seq("USING", "SAMPLE") 4215 ): 4216 return None 4217 4218 bucket_numerator = None 4219 bucket_denominator = None 4220 bucket_field = None 4221 percent = None 4222 size = None 4223 seed = None 4224 4225 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4226 matched_l_paren = self._match(TokenType.L_PAREN) 4227 4228 if self.TABLESAMPLE_CSV: 4229 num = None 4230 expressions = self._parse_csv(self._parse_primary) 4231 else: 4232 expressions = None 4233 num = ( 4234 self._parse_factor() 4235 if self._match(TokenType.NUMBER, advance=False) 4236 else self._parse_primary() or self._parse_placeholder() 4237 ) 4238 4239 if self._match_text_seq("BUCKET"): 4240 bucket_numerator = self._parse_number() 4241 self._match_text_seq("OUT", "OF") 4242 bucket_denominator = bucket_denominator = self._parse_number() 4243 self._match(TokenType.ON) 4244 bucket_field = self._parse_field() 4245 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4246 percent = num 4247 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4248 size = num 4249 else: 4250 percent = num 4251 4252 if matched_l_paren: 4253 self._match_r_paren() 4254 4255 if self._match(TokenType.L_PAREN): 4256 method = self._parse_var(upper=True) 4257 seed = self._match(TokenType.COMMA) and self._parse_number() 4258 self._match_r_paren() 4259 elif self._match_texts(("SEED", "REPEATABLE")): 4260 seed = self._parse_wrapped(self._parse_number) 4261 4262 if not method and self.DEFAULT_SAMPLING_METHOD: 4263 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4264 4265 return self.expression( 4266 exp.TableSample, 4267 expressions=expressions, 4268 method=method, 4269 bucket_numerator=bucket_numerator, 4270 bucket_denominator=bucket_denominator, 4271 bucket_field=bucket_field, 4272 percent=percent, 4273 size=size, 4274 seed=seed, 4275 ) 4276 4277 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4278 return list(iter(self._parse_pivot, None)) or None 4279 4280 def _parse_joins(self) -> t.Iterator[exp.Join]: 4281 return iter(self._parse_join, None) 4282 4283 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4284 if not self._match(TokenType.INTO): 4285 return None 4286 4287 return self.expression( 4288 exp.UnpivotColumns, 4289 this=self._match_text_seq("NAME") and self._parse_column(), 4290 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4291 ) 4292 4293 # https://duckdb.org/docs/sql/statements/pivot 4294 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4295 def _parse_on() -> t.Optional[exp.Expression]: 4296 this = self._parse_bitwise() 4297 4298 if self._match(TokenType.IN): 4299 # PIVOT ... ON col IN (row_val1, row_val2) 4300 return self._parse_in(this) 4301 if self._match(TokenType.ALIAS, advance=False): 4302 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4303 return self._parse_alias(this) 4304 4305 return this 4306 4307 this = self._parse_table() 4308 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4309 into = self._parse_unpivot_columns() 4310 using = self._match(TokenType.USING) and self._parse_csv( 4311 lambda: self._parse_alias(self._parse_function()) 4312 ) 4313 group = self._parse_group() 4314 4315 return self.expression( 4316 exp.Pivot, 4317 this=this, 4318 expressions=expressions, 4319 using=using, 4320 group=group, 4321 unpivot=is_unpivot, 4322 into=into, 4323 ) 4324 4325 def _parse_pivot_in(self) -> exp.In: 4326 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4327 this = self._parse_select_or_expression() 4328 4329 self._match(TokenType.ALIAS) 4330 alias = self._parse_bitwise() 4331 if alias: 4332 if isinstance(alias, exp.Column) and not alias.db: 4333 alias = alias.this 4334 return self.expression(exp.PivotAlias, this=this, alias=alias) 4335 4336 return this 4337 4338 value = self._parse_column() 4339 4340 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4341 self.raise_error("Expecting IN (") 4342 4343 if self._match(TokenType.ANY): 4344 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4345 else: 4346 exprs = self._parse_csv(_parse_aliased_expression) 4347 4348 self._match_r_paren() 4349 return self.expression(exp.In, this=value, expressions=exprs) 4350 4351 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4352 index = self._index 4353 include_nulls = None 4354 4355 if self._match(TokenType.PIVOT): 4356 unpivot = False 4357 elif self._match(TokenType.UNPIVOT): 4358 unpivot = True 4359 4360 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4361 if self._match_text_seq("INCLUDE", "NULLS"): 4362 include_nulls = True 4363 elif self._match_text_seq("EXCLUDE", "NULLS"): 4364 include_nulls = False 4365 else: 4366 return None 4367 4368 expressions = [] 4369 4370 if not self._match(TokenType.L_PAREN): 4371 self._retreat(index) 4372 return None 4373 4374 if unpivot: 4375 expressions = self._parse_csv(self._parse_column) 4376 else: 4377 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4378 4379 if not expressions: 4380 self.raise_error("Failed to parse PIVOT's aggregation list") 4381 4382 if not self._match(TokenType.FOR): 4383 self.raise_error("Expecting FOR") 4384 4385 fields = [] 4386 while True: 4387 field = self._try_parse(self._parse_pivot_in) 4388 if not field: 4389 break 4390 fields.append(field) 4391 4392 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4393 self._parse_bitwise 4394 ) 4395 4396 group = self._parse_group() 4397 4398 self._match_r_paren() 4399 4400 pivot = self.expression( 4401 exp.Pivot, 4402 expressions=expressions, 4403 fields=fields, 4404 unpivot=unpivot, 4405 include_nulls=include_nulls, 4406 default_on_null=default_on_null, 4407 group=group, 4408 ) 4409 4410 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4411 pivot.set("alias", self._parse_table_alias()) 4412 4413 if not unpivot: 4414 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4415 4416 columns: t.List[exp.Expression] = [] 4417 all_fields = [] 4418 for pivot_field in pivot.fields: 4419 pivot_field_expressions = pivot_field.expressions 4420 4421 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4422 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4423 continue 4424 4425 all_fields.append( 4426 [ 4427 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4428 for fld in pivot_field_expressions 4429 ] 4430 ) 4431 4432 if all_fields: 4433 if names: 4434 all_fields.append(names) 4435 4436 # Generate all possible combinations of the pivot columns 4437 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4438 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4439 for fld_parts_tuple in itertools.product(*all_fields): 4440 fld_parts = list(fld_parts_tuple) 4441 4442 if names and self.PREFIXED_PIVOT_COLUMNS: 4443 # Move the "name" to the front of the list 4444 fld_parts.insert(0, fld_parts.pop(-1)) 4445 4446 columns.append(exp.to_identifier("_".join(fld_parts))) 4447 4448 pivot.set("columns", columns) 4449 4450 return pivot 4451 4452 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4453 return [agg.alias for agg in aggregations if agg.alias] 4454 4455 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4456 if not skip_where_token and not self._match(TokenType.PREWHERE): 4457 return None 4458 4459 return self.expression( 4460 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4461 ) 4462 4463 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4464 if not skip_where_token and not self._match(TokenType.WHERE): 4465 return None 4466 4467 return self.expression( 4468 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4469 ) 4470 4471 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4472 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4473 return None 4474 comments = self._prev_comments 4475 4476 elements: t.Dict[str, t.Any] = defaultdict(list) 4477 4478 if self._match(TokenType.ALL): 4479 elements["all"] = True 4480 elif self._match(TokenType.DISTINCT): 4481 elements["all"] = False 4482 4483 while True: 4484 index = self._index 4485 4486 elements["expressions"].extend( 4487 self._parse_csv( 4488 lambda: None 4489 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4490 else self._parse_assignment() 4491 ) 4492 ) 4493 4494 before_with_index = self._index 4495 with_prefix = self._match(TokenType.WITH) 4496 4497 if self._match(TokenType.ROLLUP): 4498 elements["rollup"].append( 4499 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4500 ) 4501 elif self._match(TokenType.CUBE): 4502 elements["cube"].append( 4503 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4504 ) 4505 elif self._match(TokenType.GROUPING_SETS): 4506 elements["grouping_sets"].append( 4507 self.expression( 4508 exp.GroupingSets, 4509 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4510 ) 4511 ) 4512 elif self._match_text_seq("TOTALS"): 4513 elements["totals"] = True # type: ignore 4514 4515 if before_with_index <= self._index <= before_with_index + 1: 4516 self._retreat(before_with_index) 4517 break 4518 4519 if index == self._index: 4520 break 4521 4522 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4523 4524 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4525 return self.expression( 4526 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4527 ) 4528 4529 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4530 if self._match(TokenType.L_PAREN): 4531 grouping_set = self._parse_csv(self._parse_column) 4532 self._match_r_paren() 4533 return self.expression(exp.Tuple, expressions=grouping_set) 4534 4535 return self._parse_column() 4536 4537 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4538 if not skip_having_token and not self._match(TokenType.HAVING): 4539 return None 4540 return self.expression( 4541 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4542 ) 4543 4544 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4545 if not self._match(TokenType.QUALIFY): 4546 return None 4547 return self.expression(exp.Qualify, this=self._parse_assignment()) 4548 4549 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4550 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4551 exp.Prior, this=self._parse_bitwise() 4552 ) 4553 connect = self._parse_assignment() 4554 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4555 return connect 4556 4557 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4558 if skip_start_token: 4559 start = None 4560 elif self._match(TokenType.START_WITH): 4561 start = self._parse_assignment() 4562 else: 4563 return None 4564 4565 self._match(TokenType.CONNECT_BY) 4566 nocycle = self._match_text_seq("NOCYCLE") 4567 connect = self._parse_connect_with_prior() 4568 4569 if not start and self._match(TokenType.START_WITH): 4570 start = self._parse_assignment() 4571 4572 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4573 4574 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4575 this = self._parse_id_var(any_token=True) 4576 if self._match(TokenType.ALIAS): 4577 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4578 return this 4579 4580 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4581 if self._match_text_seq("INTERPOLATE"): 4582 return self._parse_wrapped_csv(self._parse_name_as_expression) 4583 return None 4584 4585 def _parse_order( 4586 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4587 ) -> t.Optional[exp.Expression]: 4588 siblings = None 4589 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4590 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4591 return this 4592 4593 siblings = True 4594 4595 return self.expression( 4596 exp.Order, 4597 comments=self._prev_comments, 4598 this=this, 4599 expressions=self._parse_csv(self._parse_ordered), 4600 siblings=siblings, 4601 ) 4602 4603 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4604 if not self._match(token): 4605 return None 4606 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4607 4608 def _parse_ordered( 4609 self, parse_method: t.Optional[t.Callable] = None 4610 ) -> t.Optional[exp.Ordered]: 4611 this = parse_method() if parse_method else self._parse_assignment() 4612 if not this: 4613 return None 4614 4615 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4616 this = exp.var("ALL") 4617 4618 asc = self._match(TokenType.ASC) 4619 desc = self._match(TokenType.DESC) or (asc and False) 4620 4621 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4622 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4623 4624 nulls_first = is_nulls_first or False 4625 explicitly_null_ordered = is_nulls_first or is_nulls_last 4626 4627 if ( 4628 not explicitly_null_ordered 4629 and ( 4630 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4631 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4632 ) 4633 and self.dialect.NULL_ORDERING != "nulls_are_last" 4634 ): 4635 nulls_first = True 4636 4637 if self._match_text_seq("WITH", "FILL"): 4638 with_fill = self.expression( 4639 exp.WithFill, 4640 **{ # type: ignore 4641 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4642 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4643 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4644 "interpolate": self._parse_interpolate(), 4645 }, 4646 ) 4647 else: 4648 with_fill = None 4649 4650 return self.expression( 4651 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4652 ) 4653 4654 def _parse_limit_options(self) -> exp.LimitOptions: 4655 percent = self._match(TokenType.PERCENT) 4656 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4657 self._match_text_seq("ONLY") 4658 with_ties = self._match_text_seq("WITH", "TIES") 4659 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4660 4661 def _parse_limit( 4662 self, 4663 this: t.Optional[exp.Expression] = None, 4664 top: bool = False, 4665 skip_limit_token: bool = False, 4666 ) -> t.Optional[exp.Expression]: 4667 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4668 comments = self._prev_comments 4669 if top: 4670 limit_paren = self._match(TokenType.L_PAREN) 4671 expression = self._parse_term() if limit_paren else self._parse_number() 4672 4673 if limit_paren: 4674 self._match_r_paren() 4675 4676 limit_options = self._parse_limit_options() 4677 else: 4678 limit_options = None 4679 expression = self._parse_term() 4680 4681 if self._match(TokenType.COMMA): 4682 offset = expression 4683 expression = self._parse_term() 4684 else: 4685 offset = None 4686 4687 limit_exp = self.expression( 4688 exp.Limit, 4689 this=this, 4690 expression=expression, 4691 offset=offset, 4692 comments=comments, 4693 limit_options=limit_options, 4694 expressions=self._parse_limit_by(), 4695 ) 4696 4697 return limit_exp 4698 4699 if self._match(TokenType.FETCH): 4700 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4701 direction = self._prev.text.upper() if direction else "FIRST" 4702 4703 count = self._parse_field(tokens=self.FETCH_TOKENS) 4704 4705 return self.expression( 4706 exp.Fetch, 4707 direction=direction, 4708 count=count, 4709 limit_options=self._parse_limit_options(), 4710 ) 4711 4712 return this 4713 4714 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4715 if not self._match(TokenType.OFFSET): 4716 return this 4717 4718 count = self._parse_term() 4719 self._match_set((TokenType.ROW, TokenType.ROWS)) 4720 4721 return self.expression( 4722 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4723 ) 4724 4725 def _can_parse_limit_or_offset(self) -> bool: 4726 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4727 return False 4728 4729 index = self._index 4730 result = bool( 4731 self._try_parse(self._parse_limit, retreat=True) 4732 or self._try_parse(self._parse_offset, retreat=True) 4733 ) 4734 self._retreat(index) 4735 return result 4736 4737 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4738 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4739 4740 def _parse_locks(self) -> t.List[exp.Lock]: 4741 locks = [] 4742 while True: 4743 if self._match_text_seq("FOR", "UPDATE"): 4744 update = True 4745 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4746 "LOCK", "IN", "SHARE", "MODE" 4747 ): 4748 update = False 4749 else: 4750 break 4751 4752 expressions = None 4753 if self._match_text_seq("OF"): 4754 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4755 4756 wait: t.Optional[bool | exp.Expression] = None 4757 if self._match_text_seq("NOWAIT"): 4758 wait = True 4759 elif self._match_text_seq("WAIT"): 4760 wait = self._parse_primary() 4761 elif self._match_text_seq("SKIP", "LOCKED"): 4762 wait = False 4763 4764 locks.append( 4765 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4766 ) 4767 4768 return locks 4769 4770 def parse_set_operation( 4771 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4772 ) -> t.Optional[exp.Expression]: 4773 start = self._index 4774 _, side_token, kind_token = self._parse_join_parts() 4775 4776 side = side_token.text if side_token else None 4777 kind = kind_token.text if kind_token else None 4778 4779 if not self._match_set(self.SET_OPERATIONS): 4780 self._retreat(start) 4781 return None 4782 4783 token_type = self._prev.token_type 4784 4785 if token_type == TokenType.UNION: 4786 operation: t.Type[exp.SetOperation] = exp.Union 4787 elif token_type == TokenType.EXCEPT: 4788 operation = exp.Except 4789 else: 4790 operation = exp.Intersect 4791 4792 comments = self._prev.comments 4793 4794 if self._match(TokenType.DISTINCT): 4795 distinct: t.Optional[bool] = True 4796 elif self._match(TokenType.ALL): 4797 distinct = False 4798 else: 4799 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4800 if distinct is None: 4801 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4802 4803 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4804 "STRICT", "CORRESPONDING" 4805 ) 4806 if self._match_text_seq("CORRESPONDING"): 4807 by_name = True 4808 if not side and not kind: 4809 kind = "INNER" 4810 4811 on_column_list = None 4812 if by_name and self._match_texts(("ON", "BY")): 4813 on_column_list = self._parse_wrapped_csv(self._parse_column) 4814 4815 expression = self._parse_select( 4816 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4817 ) 4818 4819 return self.expression( 4820 operation, 4821 comments=comments, 4822 this=this, 4823 distinct=distinct, 4824 by_name=by_name, 4825 expression=expression, 4826 side=side, 4827 kind=kind, 4828 on=on_column_list, 4829 ) 4830 4831 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4832 while this: 4833 setop = self.parse_set_operation(this) 4834 if not setop: 4835 break 4836 this = setop 4837 4838 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4839 expression = this.expression 4840 4841 if expression: 4842 for arg in self.SET_OP_MODIFIERS: 4843 expr = expression.args.get(arg) 4844 if expr: 4845 this.set(arg, expr.pop()) 4846 4847 return this 4848 4849 def _parse_expression(self) -> t.Optional[exp.Expression]: 4850 return self._parse_alias(self._parse_assignment()) 4851 4852 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4853 this = self._parse_disjunction() 4854 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4855 # This allows us to parse <non-identifier token> := <expr> 4856 this = exp.column( 4857 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4858 ) 4859 4860 while self._match_set(self.ASSIGNMENT): 4861 if isinstance(this, exp.Column) and len(this.parts) == 1: 4862 this = this.this 4863 4864 this = self.expression( 4865 self.ASSIGNMENT[self._prev.token_type], 4866 this=this, 4867 comments=self._prev_comments, 4868 expression=self._parse_assignment(), 4869 ) 4870 4871 return this 4872 4873 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4874 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4875 4876 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4877 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4878 4879 def _parse_equality(self) -> t.Optional[exp.Expression]: 4880 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4881 4882 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4883 return self._parse_tokens(self._parse_range, self.COMPARISON) 4884 4885 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4886 this = this or self._parse_bitwise() 4887 negate = self._match(TokenType.NOT) 4888 4889 if self._match_set(self.RANGE_PARSERS): 4890 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4891 if not expression: 4892 return this 4893 4894 this = expression 4895 elif self._match(TokenType.ISNULL): 4896 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4897 4898 # Postgres supports ISNULL and NOTNULL for conditions. 4899 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4900 if self._match(TokenType.NOTNULL): 4901 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4902 this = self.expression(exp.Not, this=this) 4903 4904 if negate: 4905 this = self._negate_range(this) 4906 4907 if self._match(TokenType.IS): 4908 this = self._parse_is(this) 4909 4910 return this 4911 4912 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4913 if not this: 4914 return this 4915 4916 return self.expression(exp.Not, this=this) 4917 4918 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4919 index = self._index - 1 4920 negate = self._match(TokenType.NOT) 4921 4922 if self._match_text_seq("DISTINCT", "FROM"): 4923 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4924 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4925 4926 if self._match(TokenType.JSON): 4927 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4928 4929 if self._match_text_seq("WITH"): 4930 _with = True 4931 elif self._match_text_seq("WITHOUT"): 4932 _with = False 4933 else: 4934 _with = None 4935 4936 unique = self._match(TokenType.UNIQUE) 4937 self._match_text_seq("KEYS") 4938 expression: t.Optional[exp.Expression] = self.expression( 4939 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4940 ) 4941 else: 4942 expression = self._parse_primary() or self._parse_null() 4943 if not expression: 4944 self._retreat(index) 4945 return None 4946 4947 this = self.expression(exp.Is, this=this, expression=expression) 4948 return self.expression(exp.Not, this=this) if negate else this 4949 4950 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4951 unnest = self._parse_unnest(with_alias=False) 4952 if unnest: 4953 this = self.expression(exp.In, this=this, unnest=unnest) 4954 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4955 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4956 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4957 4958 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4959 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4960 else: 4961 this = self.expression(exp.In, this=this, expressions=expressions) 4962 4963 if matched_l_paren: 4964 self._match_r_paren(this) 4965 elif not self._match(TokenType.R_BRACKET, expression=this): 4966 self.raise_error("Expecting ]") 4967 else: 4968 this = self.expression(exp.In, this=this, field=self._parse_column()) 4969 4970 return this 4971 4972 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4973 low = self._parse_bitwise() 4974 self._match(TokenType.AND) 4975 high = self._parse_bitwise() 4976 return self.expression(exp.Between, this=this, low=low, high=high) 4977 4978 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4979 if not self._match(TokenType.ESCAPE): 4980 return this 4981 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4982 4983 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4984 index = self._index 4985 4986 if not self._match(TokenType.INTERVAL) and match_interval: 4987 return None 4988 4989 if self._match(TokenType.STRING, advance=False): 4990 this = self._parse_primary() 4991 else: 4992 this = self._parse_term() 4993 4994 if not this or ( 4995 isinstance(this, exp.Column) 4996 and not this.table 4997 and not this.this.quoted 4998 and this.name.upper() == "IS" 4999 ): 5000 self._retreat(index) 5001 return None 5002 5003 unit = self._parse_function() or ( 5004 not self._match(TokenType.ALIAS, advance=False) 5005 and self._parse_var(any_token=True, upper=True) 5006 ) 5007 5008 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5009 # each INTERVAL expression into this canonical form so it's easy to transpile 5010 if this and this.is_number: 5011 this = exp.Literal.string(this.to_py()) 5012 elif this and this.is_string: 5013 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5014 if parts and unit: 5015 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5016 unit = None 5017 self._retreat(self._index - 1) 5018 5019 if len(parts) == 1: 5020 this = exp.Literal.string(parts[0][0]) 5021 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5022 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5023 unit = self.expression( 5024 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5025 ) 5026 5027 interval = self.expression(exp.Interval, this=this, unit=unit) 5028 5029 index = self._index 5030 self._match(TokenType.PLUS) 5031 5032 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5033 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5034 return self.expression( 5035 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5036 ) 5037 5038 self._retreat(index) 5039 return interval 5040 5041 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5042 this = self._parse_term() 5043 5044 while True: 5045 if self._match_set(self.BITWISE): 5046 this = self.expression( 5047 self.BITWISE[self._prev.token_type], 5048 this=this, 5049 expression=self._parse_term(), 5050 ) 5051 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5052 this = self.expression( 5053 exp.DPipe, 5054 this=this, 5055 expression=self._parse_term(), 5056 safe=not self.dialect.STRICT_STRING_CONCAT, 5057 ) 5058 elif self._match(TokenType.DQMARK): 5059 this = self.expression( 5060 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5061 ) 5062 elif self._match_pair(TokenType.LT, TokenType.LT): 5063 this = self.expression( 5064 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5065 ) 5066 elif self._match_pair(TokenType.GT, TokenType.GT): 5067 this = self.expression( 5068 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5069 ) 5070 else: 5071 break 5072 5073 return this 5074 5075 def _parse_term(self) -> t.Optional[exp.Expression]: 5076 this = self._parse_factor() 5077 5078 while self._match_set(self.TERM): 5079 klass = self.TERM[self._prev.token_type] 5080 comments = self._prev_comments 5081 expression = self._parse_factor() 5082 5083 this = self.expression(klass, this=this, comments=comments, expression=expression) 5084 5085 if isinstance(this, exp.Collate): 5086 expr = this.expression 5087 5088 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5089 # fallback to Identifier / Var 5090 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5091 ident = expr.this 5092 if isinstance(ident, exp.Identifier): 5093 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5094 5095 return this 5096 5097 def _parse_factor(self) -> t.Optional[exp.Expression]: 5098 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5099 this = parse_method() 5100 5101 while self._match_set(self.FACTOR): 5102 klass = self.FACTOR[self._prev.token_type] 5103 comments = self._prev_comments 5104 expression = parse_method() 5105 5106 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5107 self._retreat(self._index - 1) 5108 return this 5109 5110 this = self.expression(klass, this=this, comments=comments, expression=expression) 5111 5112 if isinstance(this, exp.Div): 5113 this.args["typed"] = self.dialect.TYPED_DIVISION 5114 this.args["safe"] = self.dialect.SAFE_DIVISION 5115 5116 return this 5117 5118 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5119 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5120 5121 def _parse_unary(self) -> t.Optional[exp.Expression]: 5122 if self._match_set(self.UNARY_PARSERS): 5123 return self.UNARY_PARSERS[self._prev.token_type](self) 5124 return self._parse_at_time_zone(self._parse_type()) 5125 5126 def _parse_type( 5127 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5128 ) -> t.Optional[exp.Expression]: 5129 interval = parse_interval and self._parse_interval() 5130 if interval: 5131 return interval 5132 5133 index = self._index 5134 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5135 5136 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5137 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5138 if isinstance(data_type, exp.Cast): 5139 # This constructor can contain ops directly after it, for instance struct unnesting: 5140 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5141 return self._parse_column_ops(data_type) 5142 5143 if data_type: 5144 index2 = self._index 5145 this = self._parse_primary() 5146 5147 if isinstance(this, exp.Literal): 5148 literal = this.name 5149 this = self._parse_column_ops(this) 5150 5151 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5152 if parser: 5153 return parser(self, this, data_type) 5154 5155 if ( 5156 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5157 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5158 and TIME_ZONE_RE.search(literal) 5159 ): 5160 data_type = exp.DataType.build("TIMESTAMPTZ") 5161 5162 return self.expression(exp.Cast, this=this, to=data_type) 5163 5164 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5165 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5166 # 5167 # If the index difference here is greater than 1, that means the parser itself must have 5168 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5169 # 5170 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5171 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5172 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5173 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5174 # 5175 # In these cases, we don't really want to return the converted type, but instead retreat 5176 # and try to parse a Column or Identifier in the section below. 5177 if data_type.expressions and index2 - index > 1: 5178 self._retreat(index2) 5179 return self._parse_column_ops(data_type) 5180 5181 self._retreat(index) 5182 5183 if fallback_to_identifier: 5184 return self._parse_id_var() 5185 5186 this = self._parse_column() 5187 return this and self._parse_column_ops(this) 5188 5189 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5190 this = self._parse_type() 5191 if not this: 5192 return None 5193 5194 if isinstance(this, exp.Column) and not this.table: 5195 this = exp.var(this.name.upper()) 5196 5197 return self.expression( 5198 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5199 ) 5200 5201 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5202 type_name = identifier.name 5203 5204 while self._match(TokenType.DOT): 5205 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5206 5207 return exp.DataType.build(type_name, udt=True) 5208 5209 def _parse_types( 5210 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5211 ) -> t.Optional[exp.Expression]: 5212 index = self._index 5213 5214 this: t.Optional[exp.Expression] = None 5215 prefix = self._match_text_seq("SYSUDTLIB", ".") 5216 5217 if not self._match_set(self.TYPE_TOKENS): 5218 identifier = allow_identifiers and self._parse_id_var( 5219 any_token=False, tokens=(TokenType.VAR,) 5220 ) 5221 if isinstance(identifier, exp.Identifier): 5222 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5223 5224 if len(tokens) != 1: 5225 self.raise_error("Unexpected identifier", self._prev) 5226 5227 if tokens[0].token_type in self.TYPE_TOKENS: 5228 self._prev = tokens[0] 5229 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5230 this = self._parse_user_defined_type(identifier) 5231 else: 5232 self._retreat(self._index - 1) 5233 return None 5234 else: 5235 return None 5236 5237 type_token = self._prev.token_type 5238 5239 if type_token == TokenType.PSEUDO_TYPE: 5240 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5241 5242 if type_token == TokenType.OBJECT_IDENTIFIER: 5243 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5244 5245 # https://materialize.com/docs/sql/types/map/ 5246 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5247 key_type = self._parse_types( 5248 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5249 ) 5250 if not self._match(TokenType.FARROW): 5251 self._retreat(index) 5252 return None 5253 5254 value_type = self._parse_types( 5255 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5256 ) 5257 if not self._match(TokenType.R_BRACKET): 5258 self._retreat(index) 5259 return None 5260 5261 return exp.DataType( 5262 this=exp.DataType.Type.MAP, 5263 expressions=[key_type, value_type], 5264 nested=True, 5265 prefix=prefix, 5266 ) 5267 5268 nested = type_token in self.NESTED_TYPE_TOKENS 5269 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5270 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5271 expressions = None 5272 maybe_func = False 5273 5274 if self._match(TokenType.L_PAREN): 5275 if is_struct: 5276 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5277 elif nested: 5278 expressions = self._parse_csv( 5279 lambda: self._parse_types( 5280 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5281 ) 5282 ) 5283 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5284 this = expressions[0] 5285 this.set("nullable", True) 5286 self._match_r_paren() 5287 return this 5288 elif type_token in self.ENUM_TYPE_TOKENS: 5289 expressions = self._parse_csv(self._parse_equality) 5290 elif is_aggregate: 5291 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5292 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5293 ) 5294 if not func_or_ident: 5295 return None 5296 expressions = [func_or_ident] 5297 if self._match(TokenType.COMMA): 5298 expressions.extend( 5299 self._parse_csv( 5300 lambda: self._parse_types( 5301 check_func=check_func, 5302 schema=schema, 5303 allow_identifiers=allow_identifiers, 5304 ) 5305 ) 5306 ) 5307 else: 5308 expressions = self._parse_csv(self._parse_type_size) 5309 5310 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5311 if type_token == TokenType.VECTOR and len(expressions) == 2: 5312 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5313 5314 if not expressions or not self._match(TokenType.R_PAREN): 5315 self._retreat(index) 5316 return None 5317 5318 maybe_func = True 5319 5320 values: t.Optional[t.List[exp.Expression]] = None 5321 5322 if nested and self._match(TokenType.LT): 5323 if is_struct: 5324 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5325 else: 5326 expressions = self._parse_csv( 5327 lambda: self._parse_types( 5328 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5329 ) 5330 ) 5331 5332 if not self._match(TokenType.GT): 5333 self.raise_error("Expecting >") 5334 5335 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5336 values = self._parse_csv(self._parse_assignment) 5337 if not values and is_struct: 5338 values = None 5339 self._retreat(self._index - 1) 5340 else: 5341 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5342 5343 if type_token in self.TIMESTAMPS: 5344 if self._match_text_seq("WITH", "TIME", "ZONE"): 5345 maybe_func = False 5346 tz_type = ( 5347 exp.DataType.Type.TIMETZ 5348 if type_token in self.TIMES 5349 else exp.DataType.Type.TIMESTAMPTZ 5350 ) 5351 this = exp.DataType(this=tz_type, expressions=expressions) 5352 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5353 maybe_func = False 5354 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5355 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5356 maybe_func = False 5357 elif type_token == TokenType.INTERVAL: 5358 unit = self._parse_var(upper=True) 5359 if unit: 5360 if self._match_text_seq("TO"): 5361 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5362 5363 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5364 else: 5365 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5366 elif type_token == TokenType.VOID: 5367 this = exp.DataType(this=exp.DataType.Type.NULL) 5368 5369 if maybe_func and check_func: 5370 index2 = self._index 5371 peek = self._parse_string() 5372 5373 if not peek: 5374 self._retreat(index) 5375 return None 5376 5377 self._retreat(index2) 5378 5379 if not this: 5380 if self._match_text_seq("UNSIGNED"): 5381 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5382 if not unsigned_type_token: 5383 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5384 5385 type_token = unsigned_type_token or type_token 5386 5387 this = exp.DataType( 5388 this=exp.DataType.Type[type_token.value], 5389 expressions=expressions, 5390 nested=nested, 5391 prefix=prefix, 5392 ) 5393 5394 # Empty arrays/structs are allowed 5395 if values is not None: 5396 cls = exp.Struct if is_struct else exp.Array 5397 this = exp.cast(cls(expressions=values), this, copy=False) 5398 5399 elif expressions: 5400 this.set("expressions", expressions) 5401 5402 # https://materialize.com/docs/sql/types/list/#type-name 5403 while self._match(TokenType.LIST): 5404 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5405 5406 index = self._index 5407 5408 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5409 matched_array = self._match(TokenType.ARRAY) 5410 5411 while self._curr: 5412 datatype_token = self._prev.token_type 5413 matched_l_bracket = self._match(TokenType.L_BRACKET) 5414 5415 if (not matched_l_bracket and not matched_array) or ( 5416 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5417 ): 5418 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5419 # not to be confused with the fixed size array parsing 5420 break 5421 5422 matched_array = False 5423 values = self._parse_csv(self._parse_assignment) or None 5424 if ( 5425 values 5426 and not schema 5427 and ( 5428 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5429 ) 5430 ): 5431 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5432 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5433 self._retreat(index) 5434 break 5435 5436 this = exp.DataType( 5437 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5438 ) 5439 self._match(TokenType.R_BRACKET) 5440 5441 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5442 converter = self.TYPE_CONVERTERS.get(this.this) 5443 if converter: 5444 this = converter(t.cast(exp.DataType, this)) 5445 5446 return this 5447 5448 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5449 index = self._index 5450 5451 if ( 5452 self._curr 5453 and self._next 5454 and self._curr.token_type in self.TYPE_TOKENS 5455 and self._next.token_type in self.TYPE_TOKENS 5456 ): 5457 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5458 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5459 this = self._parse_id_var() 5460 else: 5461 this = ( 5462 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5463 or self._parse_id_var() 5464 ) 5465 5466 self._match(TokenType.COLON) 5467 5468 if ( 5469 type_required 5470 and not isinstance(this, exp.DataType) 5471 and not self._match_set(self.TYPE_TOKENS, advance=False) 5472 ): 5473 self._retreat(index) 5474 return self._parse_types() 5475 5476 return self._parse_column_def(this) 5477 5478 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5479 if not self._match_text_seq("AT", "TIME", "ZONE"): 5480 return this 5481 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5482 5483 def _parse_column(self) -> t.Optional[exp.Expression]: 5484 this = self._parse_column_reference() 5485 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5486 5487 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5488 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5489 5490 return column 5491 5492 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5493 this = self._parse_field() 5494 if ( 5495 not this 5496 and self._match(TokenType.VALUES, advance=False) 5497 and self.VALUES_FOLLOWED_BY_PAREN 5498 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5499 ): 5500 this = self._parse_id_var() 5501 5502 if isinstance(this, exp.Identifier): 5503 # We bubble up comments from the Identifier to the Column 5504 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5505 5506 return this 5507 5508 def _parse_colon_as_variant_extract( 5509 self, this: t.Optional[exp.Expression] 5510 ) -> t.Optional[exp.Expression]: 5511 casts = [] 5512 json_path = [] 5513 escape = None 5514 5515 while self._match(TokenType.COLON): 5516 start_index = self._index 5517 5518 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5519 path = self._parse_column_ops( 5520 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5521 ) 5522 5523 # The cast :: operator has a lower precedence than the extraction operator :, so 5524 # we rearrange the AST appropriately to avoid casting the JSON path 5525 while isinstance(path, exp.Cast): 5526 casts.append(path.to) 5527 path = path.this 5528 5529 if casts: 5530 dcolon_offset = next( 5531 i 5532 for i, t in enumerate(self._tokens[start_index:]) 5533 if t.token_type == TokenType.DCOLON 5534 ) 5535 end_token = self._tokens[start_index + dcolon_offset - 1] 5536 else: 5537 end_token = self._prev 5538 5539 if path: 5540 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5541 # it'll roundtrip to a string literal in GET_PATH 5542 if isinstance(path, exp.Identifier) and path.quoted: 5543 escape = True 5544 5545 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5546 5547 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5548 # Databricks transforms it back to the colon/dot notation 5549 if json_path: 5550 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5551 5552 if json_path_expr: 5553 json_path_expr.set("escape", escape) 5554 5555 this = self.expression( 5556 exp.JSONExtract, 5557 this=this, 5558 expression=json_path_expr, 5559 variant_extract=True, 5560 ) 5561 5562 while casts: 5563 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5564 5565 return this 5566 5567 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5568 return self._parse_types() 5569 5570 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5571 this = self._parse_bracket(this) 5572 5573 while self._match_set(self.COLUMN_OPERATORS): 5574 op_token = self._prev.token_type 5575 op = self.COLUMN_OPERATORS.get(op_token) 5576 5577 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5578 field = self._parse_dcolon() 5579 if not field: 5580 self.raise_error("Expected type") 5581 elif op and self._curr: 5582 field = self._parse_column_reference() or self._parse_bracket() 5583 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5584 field = self._parse_column_ops(field) 5585 else: 5586 field = self._parse_field(any_token=True, anonymous_func=True) 5587 5588 # Function calls can be qualified, e.g., x.y.FOO() 5589 # This converts the final AST to a series of Dots leading to the function call 5590 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5591 if isinstance(field, (exp.Func, exp.Window)) and this: 5592 this = this.transform( 5593 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5594 ) 5595 5596 if op: 5597 this = op(self, this, field) 5598 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5599 this = self.expression( 5600 exp.Column, 5601 comments=this.comments, 5602 this=field, 5603 table=this.this, 5604 db=this.args.get("table"), 5605 catalog=this.args.get("db"), 5606 ) 5607 elif isinstance(field, exp.Window): 5608 # Move the exp.Dot's to the window's function 5609 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5610 field.set("this", window_func) 5611 this = field 5612 else: 5613 this = self.expression(exp.Dot, this=this, expression=field) 5614 5615 if field and field.comments: 5616 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5617 5618 this = self._parse_bracket(this) 5619 5620 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5621 5622 def _parse_paren(self) -> t.Optional[exp.Expression]: 5623 if not self._match(TokenType.L_PAREN): 5624 return None 5625 5626 comments = self._prev_comments 5627 query = self._parse_select() 5628 5629 if query: 5630 expressions = [query] 5631 else: 5632 expressions = self._parse_expressions() 5633 5634 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5635 5636 if not this and self._match(TokenType.R_PAREN, advance=False): 5637 this = self.expression(exp.Tuple) 5638 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5639 this = self._parse_subquery(this=this, parse_alias=False) 5640 elif isinstance(this, exp.Subquery): 5641 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5642 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5643 this = self.expression(exp.Tuple, expressions=expressions) 5644 else: 5645 this = self.expression(exp.Paren, this=this) 5646 5647 if this: 5648 this.add_comments(comments) 5649 5650 self._match_r_paren(expression=this) 5651 return this 5652 5653 def _parse_primary(self) -> t.Optional[exp.Expression]: 5654 if self._match_set(self.PRIMARY_PARSERS): 5655 token_type = self._prev.token_type 5656 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5657 5658 if token_type == TokenType.STRING: 5659 expressions = [primary] 5660 while self._match(TokenType.STRING): 5661 expressions.append(exp.Literal.string(self._prev.text)) 5662 5663 if len(expressions) > 1: 5664 return self.expression(exp.Concat, expressions=expressions) 5665 5666 return primary 5667 5668 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5669 return exp.Literal.number(f"0.{self._prev.text}") 5670 5671 return self._parse_paren() 5672 5673 def _parse_field( 5674 self, 5675 any_token: bool = False, 5676 tokens: t.Optional[t.Collection[TokenType]] = None, 5677 anonymous_func: bool = False, 5678 ) -> t.Optional[exp.Expression]: 5679 if anonymous_func: 5680 field = ( 5681 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5682 or self._parse_primary() 5683 ) 5684 else: 5685 field = self._parse_primary() or self._parse_function( 5686 anonymous=anonymous_func, any_token=any_token 5687 ) 5688 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5689 5690 def _parse_function( 5691 self, 5692 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5693 anonymous: bool = False, 5694 optional_parens: bool = True, 5695 any_token: bool = False, 5696 ) -> t.Optional[exp.Expression]: 5697 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5698 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5699 fn_syntax = False 5700 if ( 5701 self._match(TokenType.L_BRACE, advance=False) 5702 and self._next 5703 and self._next.text.upper() == "FN" 5704 ): 5705 self._advance(2) 5706 fn_syntax = True 5707 5708 func = self._parse_function_call( 5709 functions=functions, 5710 anonymous=anonymous, 5711 optional_parens=optional_parens, 5712 any_token=any_token, 5713 ) 5714 5715 if fn_syntax: 5716 self._match(TokenType.R_BRACE) 5717 5718 return func 5719 5720 def _parse_function_call( 5721 self, 5722 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5723 anonymous: bool = False, 5724 optional_parens: bool = True, 5725 any_token: bool = False, 5726 ) -> t.Optional[exp.Expression]: 5727 if not self._curr: 5728 return None 5729 5730 comments = self._curr.comments 5731 token = self._curr 5732 token_type = self._curr.token_type 5733 this = self._curr.text 5734 upper = this.upper() 5735 5736 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5737 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5738 self._advance() 5739 return self._parse_window(parser(self)) 5740 5741 if not self._next or self._next.token_type != TokenType.L_PAREN: 5742 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5743 self._advance() 5744 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5745 5746 return None 5747 5748 if any_token: 5749 if token_type in self.RESERVED_TOKENS: 5750 return None 5751 elif token_type not in self.FUNC_TOKENS: 5752 return None 5753 5754 self._advance(2) 5755 5756 parser = self.FUNCTION_PARSERS.get(upper) 5757 if parser and not anonymous: 5758 this = parser(self) 5759 else: 5760 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5761 5762 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5763 this = self.expression( 5764 subquery_predicate, comments=comments, this=self._parse_select() 5765 ) 5766 self._match_r_paren() 5767 return this 5768 5769 if functions is None: 5770 functions = self.FUNCTIONS 5771 5772 function = functions.get(upper) 5773 known_function = function and not anonymous 5774 5775 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5776 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5777 5778 post_func_comments = self._curr and self._curr.comments 5779 if known_function and post_func_comments: 5780 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5781 # call we'll construct it as exp.Anonymous, even if it's "known" 5782 if any( 5783 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5784 for comment in post_func_comments 5785 ): 5786 known_function = False 5787 5788 if alias and known_function: 5789 args = self._kv_to_prop_eq(args) 5790 5791 if known_function: 5792 func_builder = t.cast(t.Callable, function) 5793 5794 if "dialect" in func_builder.__code__.co_varnames: 5795 func = func_builder(args, dialect=self.dialect) 5796 else: 5797 func = func_builder(args) 5798 5799 func = self.validate_expression(func, args) 5800 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5801 func.meta["name"] = this 5802 5803 this = func 5804 else: 5805 if token_type == TokenType.IDENTIFIER: 5806 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5807 5808 this = self.expression(exp.Anonymous, this=this, expressions=args) 5809 this = this.update_positions(token) 5810 5811 if isinstance(this, exp.Expression): 5812 this.add_comments(comments) 5813 5814 self._match_r_paren(this) 5815 return self._parse_window(this) 5816 5817 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5818 return expression 5819 5820 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5821 transformed = [] 5822 5823 for index, e in enumerate(expressions): 5824 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5825 if isinstance(e, exp.Alias): 5826 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5827 5828 if not isinstance(e, exp.PropertyEQ): 5829 e = self.expression( 5830 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5831 ) 5832 5833 if isinstance(e.this, exp.Column): 5834 e.this.replace(e.this.this) 5835 else: 5836 e = self._to_prop_eq(e, index) 5837 5838 transformed.append(e) 5839 5840 return transformed 5841 5842 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5843 return self._parse_statement() 5844 5845 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5846 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5847 5848 def _parse_user_defined_function( 5849 self, kind: t.Optional[TokenType] = None 5850 ) -> t.Optional[exp.Expression]: 5851 this = self._parse_table_parts(schema=True) 5852 5853 if not self._match(TokenType.L_PAREN): 5854 return this 5855 5856 expressions = self._parse_csv(self._parse_function_parameter) 5857 self._match_r_paren() 5858 return self.expression( 5859 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5860 ) 5861 5862 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5863 literal = self._parse_primary() 5864 if literal: 5865 return self.expression(exp.Introducer, this=token.text, expression=literal) 5866 5867 return self._identifier_expression(token) 5868 5869 def _parse_session_parameter(self) -> exp.SessionParameter: 5870 kind = None 5871 this = self._parse_id_var() or self._parse_primary() 5872 5873 if this and self._match(TokenType.DOT): 5874 kind = this.name 5875 this = self._parse_var() or self._parse_primary() 5876 5877 return self.expression(exp.SessionParameter, this=this, kind=kind) 5878 5879 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5880 return self._parse_id_var() 5881 5882 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5883 index = self._index 5884 5885 if self._match(TokenType.L_PAREN): 5886 expressions = t.cast( 5887 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5888 ) 5889 5890 if not self._match(TokenType.R_PAREN): 5891 self._retreat(index) 5892 else: 5893 expressions = [self._parse_lambda_arg()] 5894 5895 if self._match_set(self.LAMBDAS): 5896 return self.LAMBDAS[self._prev.token_type](self, expressions) 5897 5898 self._retreat(index) 5899 5900 this: t.Optional[exp.Expression] 5901 5902 if self._match(TokenType.DISTINCT): 5903 this = self.expression( 5904 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5905 ) 5906 else: 5907 this = self._parse_select_or_expression(alias=alias) 5908 5909 return self._parse_limit( 5910 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5911 ) 5912 5913 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5914 index = self._index 5915 if not self._match(TokenType.L_PAREN): 5916 return this 5917 5918 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5919 # expr can be of both types 5920 if self._match_set(self.SELECT_START_TOKENS): 5921 self._retreat(index) 5922 return this 5923 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5924 self._match_r_paren() 5925 return self.expression(exp.Schema, this=this, expressions=args) 5926 5927 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5928 return self._parse_column_def(self._parse_field(any_token=True)) 5929 5930 def _parse_column_def( 5931 self, this: t.Optional[exp.Expression], computed_column: bool = True 5932 ) -> t.Optional[exp.Expression]: 5933 # column defs are not really columns, they're identifiers 5934 if isinstance(this, exp.Column): 5935 this = this.this 5936 5937 if not computed_column: 5938 self._match(TokenType.ALIAS) 5939 5940 kind = self._parse_types(schema=True) 5941 5942 if self._match_text_seq("FOR", "ORDINALITY"): 5943 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5944 5945 constraints: t.List[exp.Expression] = [] 5946 5947 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5948 ("ALIAS", "MATERIALIZED") 5949 ): 5950 persisted = self._prev.text.upper() == "MATERIALIZED" 5951 constraint_kind = exp.ComputedColumnConstraint( 5952 this=self._parse_assignment(), 5953 persisted=persisted or self._match_text_seq("PERSISTED"), 5954 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5955 ) 5956 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5957 elif ( 5958 kind 5959 and self._match(TokenType.ALIAS, advance=False) 5960 and ( 5961 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5962 or (self._next and self._next.token_type == TokenType.L_PAREN) 5963 ) 5964 ): 5965 self._advance() 5966 constraints.append( 5967 self.expression( 5968 exp.ColumnConstraint, 5969 kind=exp.ComputedColumnConstraint( 5970 this=self._parse_disjunction(), 5971 persisted=self._match_texts(("STORED", "VIRTUAL")) 5972 and self._prev.text.upper() == "STORED", 5973 ), 5974 ) 5975 ) 5976 5977 while True: 5978 constraint = self._parse_column_constraint() 5979 if not constraint: 5980 break 5981 constraints.append(constraint) 5982 5983 if not kind and not constraints: 5984 return this 5985 5986 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5987 5988 def _parse_auto_increment( 5989 self, 5990 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5991 start = None 5992 increment = None 5993 order = None 5994 5995 if self._match(TokenType.L_PAREN, advance=False): 5996 args = self._parse_wrapped_csv(self._parse_bitwise) 5997 start = seq_get(args, 0) 5998 increment = seq_get(args, 1) 5999 elif self._match_text_seq("START"): 6000 start = self._parse_bitwise() 6001 self._match_text_seq("INCREMENT") 6002 increment = self._parse_bitwise() 6003 if self._match_text_seq("ORDER"): 6004 order = True 6005 elif self._match_text_seq("NOORDER"): 6006 order = False 6007 6008 if start and increment: 6009 return exp.GeneratedAsIdentityColumnConstraint( 6010 start=start, increment=increment, this=False, order=order 6011 ) 6012 6013 return exp.AutoIncrementColumnConstraint() 6014 6015 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6016 if not self._match_text_seq("REFRESH"): 6017 self._retreat(self._index - 1) 6018 return None 6019 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6020 6021 def _parse_compress(self) -> exp.CompressColumnConstraint: 6022 if self._match(TokenType.L_PAREN, advance=False): 6023 return self.expression( 6024 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6025 ) 6026 6027 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6028 6029 def _parse_generated_as_identity( 6030 self, 6031 ) -> ( 6032 exp.GeneratedAsIdentityColumnConstraint 6033 | exp.ComputedColumnConstraint 6034 | exp.GeneratedAsRowColumnConstraint 6035 ): 6036 if self._match_text_seq("BY", "DEFAULT"): 6037 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6038 this = self.expression( 6039 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6040 ) 6041 else: 6042 self._match_text_seq("ALWAYS") 6043 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6044 6045 self._match(TokenType.ALIAS) 6046 6047 if self._match_text_seq("ROW"): 6048 start = self._match_text_seq("START") 6049 if not start: 6050 self._match(TokenType.END) 6051 hidden = self._match_text_seq("HIDDEN") 6052 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6053 6054 identity = self._match_text_seq("IDENTITY") 6055 6056 if self._match(TokenType.L_PAREN): 6057 if self._match(TokenType.START_WITH): 6058 this.set("start", self._parse_bitwise()) 6059 if self._match_text_seq("INCREMENT", "BY"): 6060 this.set("increment", self._parse_bitwise()) 6061 if self._match_text_seq("MINVALUE"): 6062 this.set("minvalue", self._parse_bitwise()) 6063 if self._match_text_seq("MAXVALUE"): 6064 this.set("maxvalue", self._parse_bitwise()) 6065 6066 if self._match_text_seq("CYCLE"): 6067 this.set("cycle", True) 6068 elif self._match_text_seq("NO", "CYCLE"): 6069 this.set("cycle", False) 6070 6071 if not identity: 6072 this.set("expression", self._parse_range()) 6073 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6074 args = self._parse_csv(self._parse_bitwise) 6075 this.set("start", seq_get(args, 0)) 6076 this.set("increment", seq_get(args, 1)) 6077 6078 self._match_r_paren() 6079 6080 return this 6081 6082 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6083 self._match_text_seq("LENGTH") 6084 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6085 6086 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6087 if self._match_text_seq("NULL"): 6088 return self.expression(exp.NotNullColumnConstraint) 6089 if self._match_text_seq("CASESPECIFIC"): 6090 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6091 if self._match_text_seq("FOR", "REPLICATION"): 6092 return self.expression(exp.NotForReplicationColumnConstraint) 6093 6094 # Unconsume the `NOT` token 6095 self._retreat(self._index - 1) 6096 return None 6097 6098 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6099 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6100 6101 procedure_option_follows = ( 6102 self._match(TokenType.WITH, advance=False) 6103 and self._next 6104 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6105 ) 6106 6107 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6108 return self.expression( 6109 exp.ColumnConstraint, 6110 this=this, 6111 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6112 ) 6113 6114 return this 6115 6116 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6117 if not self._match(TokenType.CONSTRAINT): 6118 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6119 6120 return self.expression( 6121 exp.Constraint, 6122 this=self._parse_id_var(), 6123 expressions=self._parse_unnamed_constraints(), 6124 ) 6125 6126 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6127 constraints = [] 6128 while True: 6129 constraint = self._parse_unnamed_constraint() or self._parse_function() 6130 if not constraint: 6131 break 6132 constraints.append(constraint) 6133 6134 return constraints 6135 6136 def _parse_unnamed_constraint( 6137 self, constraints: t.Optional[t.Collection[str]] = None 6138 ) -> t.Optional[exp.Expression]: 6139 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6140 constraints or self.CONSTRAINT_PARSERS 6141 ): 6142 return None 6143 6144 constraint = self._prev.text.upper() 6145 if constraint not in self.CONSTRAINT_PARSERS: 6146 self.raise_error(f"No parser found for schema constraint {constraint}.") 6147 6148 return self.CONSTRAINT_PARSERS[constraint](self) 6149 6150 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6151 return self._parse_id_var(any_token=False) 6152 6153 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6154 self._match_text_seq("KEY") 6155 return self.expression( 6156 exp.UniqueColumnConstraint, 6157 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6158 this=self._parse_schema(self._parse_unique_key()), 6159 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6160 on_conflict=self._parse_on_conflict(), 6161 options=self._parse_key_constraint_options(), 6162 ) 6163 6164 def _parse_key_constraint_options(self) -> t.List[str]: 6165 options = [] 6166 while True: 6167 if not self._curr: 6168 break 6169 6170 if self._match(TokenType.ON): 6171 action = None 6172 on = self._advance_any() and self._prev.text 6173 6174 if self._match_text_seq("NO", "ACTION"): 6175 action = "NO ACTION" 6176 elif self._match_text_seq("CASCADE"): 6177 action = "CASCADE" 6178 elif self._match_text_seq("RESTRICT"): 6179 action = "RESTRICT" 6180 elif self._match_pair(TokenType.SET, TokenType.NULL): 6181 action = "SET NULL" 6182 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6183 action = "SET DEFAULT" 6184 else: 6185 self.raise_error("Invalid key constraint") 6186 6187 options.append(f"ON {on} {action}") 6188 else: 6189 var = self._parse_var_from_options( 6190 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6191 ) 6192 if not var: 6193 break 6194 options.append(var.name) 6195 6196 return options 6197 6198 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6199 if match and not self._match(TokenType.REFERENCES): 6200 return None 6201 6202 expressions = None 6203 this = self._parse_table(schema=True) 6204 options = self._parse_key_constraint_options() 6205 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6206 6207 def _parse_foreign_key(self) -> exp.ForeignKey: 6208 expressions = ( 6209 self._parse_wrapped_id_vars() 6210 if not self._match(TokenType.REFERENCES, advance=False) 6211 else None 6212 ) 6213 reference = self._parse_references() 6214 on_options = {} 6215 6216 while self._match(TokenType.ON): 6217 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6218 self.raise_error("Expected DELETE or UPDATE") 6219 6220 kind = self._prev.text.lower() 6221 6222 if self._match_text_seq("NO", "ACTION"): 6223 action = "NO ACTION" 6224 elif self._match(TokenType.SET): 6225 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6226 action = "SET " + self._prev.text.upper() 6227 else: 6228 self._advance() 6229 action = self._prev.text.upper() 6230 6231 on_options[kind] = action 6232 6233 return self.expression( 6234 exp.ForeignKey, 6235 expressions=expressions, 6236 reference=reference, 6237 options=self._parse_key_constraint_options(), 6238 **on_options, # type: ignore 6239 ) 6240 6241 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6242 return self._parse_ordered() or self._parse_field() 6243 6244 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6245 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6246 self._retreat(self._index - 1) 6247 return None 6248 6249 id_vars = self._parse_wrapped_id_vars() 6250 return self.expression( 6251 exp.PeriodForSystemTimeConstraint, 6252 this=seq_get(id_vars, 0), 6253 expression=seq_get(id_vars, 1), 6254 ) 6255 6256 def _parse_primary_key( 6257 self, wrapped_optional: bool = False, in_props: bool = False 6258 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6259 desc = ( 6260 self._match_set((TokenType.ASC, TokenType.DESC)) 6261 and self._prev.token_type == TokenType.DESC 6262 ) 6263 6264 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6265 return self.expression( 6266 exp.PrimaryKeyColumnConstraint, 6267 desc=desc, 6268 options=self._parse_key_constraint_options(), 6269 ) 6270 6271 expressions = self._parse_wrapped_csv( 6272 self._parse_primary_key_part, optional=wrapped_optional 6273 ) 6274 options = self._parse_key_constraint_options() 6275 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6276 6277 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6278 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6279 6280 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6281 """ 6282 Parses a datetime column in ODBC format. We parse the column into the corresponding 6283 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6284 same as we did for `DATE('yyyy-mm-dd')`. 6285 6286 Reference: 6287 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6288 """ 6289 self._match(TokenType.VAR) 6290 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6291 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6292 if not self._match(TokenType.R_BRACE): 6293 self.raise_error("Expected }") 6294 return expression 6295 6296 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6297 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6298 return this 6299 6300 bracket_kind = self._prev.token_type 6301 if ( 6302 bracket_kind == TokenType.L_BRACE 6303 and self._curr 6304 and self._curr.token_type == TokenType.VAR 6305 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6306 ): 6307 return self._parse_odbc_datetime_literal() 6308 6309 expressions = self._parse_csv( 6310 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6311 ) 6312 6313 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6314 self.raise_error("Expected ]") 6315 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6316 self.raise_error("Expected }") 6317 6318 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6319 if bracket_kind == TokenType.L_BRACE: 6320 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6321 elif not this: 6322 this = build_array_constructor( 6323 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6324 ) 6325 else: 6326 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6327 if constructor_type: 6328 return build_array_constructor( 6329 constructor_type, 6330 args=expressions, 6331 bracket_kind=bracket_kind, 6332 dialect=self.dialect, 6333 ) 6334 6335 expressions = apply_index_offset( 6336 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6337 ) 6338 this = self.expression( 6339 exp.Bracket, 6340 this=this, 6341 expressions=expressions, 6342 comments=this.pop_comments(), 6343 ) 6344 6345 self._add_comments(this) 6346 return self._parse_bracket(this) 6347 6348 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6349 if self._match(TokenType.COLON): 6350 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6351 return this 6352 6353 def _parse_case(self) -> t.Optional[exp.Expression]: 6354 ifs = [] 6355 default = None 6356 6357 comments = self._prev_comments 6358 expression = self._parse_assignment() 6359 6360 while self._match(TokenType.WHEN): 6361 this = self._parse_assignment() 6362 self._match(TokenType.THEN) 6363 then = self._parse_assignment() 6364 ifs.append(self.expression(exp.If, this=this, true=then)) 6365 6366 if self._match(TokenType.ELSE): 6367 default = self._parse_assignment() 6368 6369 if not self._match(TokenType.END): 6370 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6371 default = exp.column("interval") 6372 else: 6373 self.raise_error("Expected END after CASE", self._prev) 6374 6375 return self.expression( 6376 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6377 ) 6378 6379 def _parse_if(self) -> t.Optional[exp.Expression]: 6380 if self._match(TokenType.L_PAREN): 6381 args = self._parse_csv( 6382 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6383 ) 6384 this = self.validate_expression(exp.If.from_arg_list(args), args) 6385 self._match_r_paren() 6386 else: 6387 index = self._index - 1 6388 6389 if self.NO_PAREN_IF_COMMANDS and index == 0: 6390 return self._parse_as_command(self._prev) 6391 6392 condition = self._parse_assignment() 6393 6394 if not condition: 6395 self._retreat(index) 6396 return None 6397 6398 self._match(TokenType.THEN) 6399 true = self._parse_assignment() 6400 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6401 self._match(TokenType.END) 6402 this = self.expression(exp.If, this=condition, true=true, false=false) 6403 6404 return this 6405 6406 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6407 if not self._match_text_seq("VALUE", "FOR"): 6408 self._retreat(self._index - 1) 6409 return None 6410 6411 return self.expression( 6412 exp.NextValueFor, 6413 this=self._parse_column(), 6414 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6415 ) 6416 6417 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6418 this = self._parse_function() or self._parse_var_or_string(upper=True) 6419 6420 if self._match(TokenType.FROM): 6421 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6422 6423 if not self._match(TokenType.COMMA): 6424 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6425 6426 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6427 6428 def _parse_gap_fill(self) -> exp.GapFill: 6429 self._match(TokenType.TABLE) 6430 this = self._parse_table() 6431 6432 self._match(TokenType.COMMA) 6433 args = [this, *self._parse_csv(self._parse_lambda)] 6434 6435 gap_fill = exp.GapFill.from_arg_list(args) 6436 return self.validate_expression(gap_fill, args) 6437 6438 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6439 this = self._parse_assignment() 6440 6441 if not self._match(TokenType.ALIAS): 6442 if self._match(TokenType.COMMA): 6443 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6444 6445 self.raise_error("Expected AS after CAST") 6446 6447 fmt = None 6448 to = self._parse_types() 6449 6450 default = self._match(TokenType.DEFAULT) 6451 if default: 6452 default = self._parse_bitwise() 6453 self._match_text_seq("ON", "CONVERSION", "ERROR") 6454 6455 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6456 fmt_string = self._parse_string() 6457 fmt = self._parse_at_time_zone(fmt_string) 6458 6459 if not to: 6460 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6461 if to.this in exp.DataType.TEMPORAL_TYPES: 6462 this = self.expression( 6463 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6464 this=this, 6465 format=exp.Literal.string( 6466 format_time( 6467 fmt_string.this if fmt_string else "", 6468 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6469 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6470 ) 6471 ), 6472 safe=safe, 6473 ) 6474 6475 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6476 this.set("zone", fmt.args["zone"]) 6477 return this 6478 elif not to: 6479 self.raise_error("Expected TYPE after CAST") 6480 elif isinstance(to, exp.Identifier): 6481 to = exp.DataType.build(to.name, udt=True) 6482 elif to.this == exp.DataType.Type.CHAR: 6483 if self._match(TokenType.CHARACTER_SET): 6484 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6485 6486 return self.expression( 6487 exp.Cast if strict else exp.TryCast, 6488 this=this, 6489 to=to, 6490 format=fmt, 6491 safe=safe, 6492 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6493 default=default, 6494 ) 6495 6496 def _parse_string_agg(self) -> exp.GroupConcat: 6497 if self._match(TokenType.DISTINCT): 6498 args: t.List[t.Optional[exp.Expression]] = [ 6499 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6500 ] 6501 if self._match(TokenType.COMMA): 6502 args.extend(self._parse_csv(self._parse_assignment)) 6503 else: 6504 args = self._parse_csv(self._parse_assignment) # type: ignore 6505 6506 if self._match_text_seq("ON", "OVERFLOW"): 6507 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6508 if self._match_text_seq("ERROR"): 6509 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6510 else: 6511 self._match_text_seq("TRUNCATE") 6512 on_overflow = self.expression( 6513 exp.OverflowTruncateBehavior, 6514 this=self._parse_string(), 6515 with_count=( 6516 self._match_text_seq("WITH", "COUNT") 6517 or not self._match_text_seq("WITHOUT", "COUNT") 6518 ), 6519 ) 6520 else: 6521 on_overflow = None 6522 6523 index = self._index 6524 if not self._match(TokenType.R_PAREN) and args: 6525 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6526 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6527 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6528 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6529 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6530 6531 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6532 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6533 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6534 if not self._match_text_seq("WITHIN", "GROUP"): 6535 self._retreat(index) 6536 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6537 6538 # The corresponding match_r_paren will be called in parse_function (caller) 6539 self._match_l_paren() 6540 6541 return self.expression( 6542 exp.GroupConcat, 6543 this=self._parse_order(this=seq_get(args, 0)), 6544 separator=seq_get(args, 1), 6545 on_overflow=on_overflow, 6546 ) 6547 6548 def _parse_convert( 6549 self, strict: bool, safe: t.Optional[bool] = None 6550 ) -> t.Optional[exp.Expression]: 6551 this = self._parse_bitwise() 6552 6553 if self._match(TokenType.USING): 6554 to: t.Optional[exp.Expression] = self.expression( 6555 exp.CharacterSet, this=self._parse_var() 6556 ) 6557 elif self._match(TokenType.COMMA): 6558 to = self._parse_types() 6559 else: 6560 to = None 6561 6562 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6563 6564 def _parse_xml_table(self) -> exp.XMLTable: 6565 namespaces = None 6566 passing = None 6567 columns = None 6568 6569 if self._match_text_seq("XMLNAMESPACES", "("): 6570 namespaces = self._parse_xml_namespace() 6571 self._match_text_seq(")", ",") 6572 6573 this = self._parse_string() 6574 6575 if self._match_text_seq("PASSING"): 6576 # The BY VALUE keywords are optional and are provided for semantic clarity 6577 self._match_text_seq("BY", "VALUE") 6578 passing = self._parse_csv(self._parse_column) 6579 6580 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6581 6582 if self._match_text_seq("COLUMNS"): 6583 columns = self._parse_csv(self._parse_field_def) 6584 6585 return self.expression( 6586 exp.XMLTable, 6587 this=this, 6588 namespaces=namespaces, 6589 passing=passing, 6590 columns=columns, 6591 by_ref=by_ref, 6592 ) 6593 6594 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6595 namespaces = [] 6596 6597 while True: 6598 if self._match(TokenType.DEFAULT): 6599 uri = self._parse_string() 6600 else: 6601 uri = self._parse_alias(self._parse_string()) 6602 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6603 if not self._match(TokenType.COMMA): 6604 break 6605 6606 return namespaces 6607 6608 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6609 args = self._parse_csv(self._parse_assignment) 6610 6611 if len(args) < 3: 6612 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6613 6614 return self.expression(exp.DecodeCase, expressions=args) 6615 6616 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6617 self._match_text_seq("KEY") 6618 key = self._parse_column() 6619 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6620 self._match_text_seq("VALUE") 6621 value = self._parse_bitwise() 6622 6623 if not key and not value: 6624 return None 6625 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6626 6627 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6628 if not this or not self._match_text_seq("FORMAT", "JSON"): 6629 return this 6630 6631 return self.expression(exp.FormatJson, this=this) 6632 6633 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6634 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6635 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6636 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6637 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6638 else: 6639 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6640 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6641 6642 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6643 6644 if not empty and not error and not null: 6645 return None 6646 6647 return self.expression( 6648 exp.OnCondition, 6649 empty=empty, 6650 error=error, 6651 null=null, 6652 ) 6653 6654 def _parse_on_handling( 6655 self, on: str, *values: str 6656 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6657 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6658 for value in values: 6659 if self._match_text_seq(value, "ON", on): 6660 return f"{value} ON {on}" 6661 6662 index = self._index 6663 if self._match(TokenType.DEFAULT): 6664 default_value = self._parse_bitwise() 6665 if self._match_text_seq("ON", on): 6666 return default_value 6667 6668 self._retreat(index) 6669 6670 return None 6671 6672 @t.overload 6673 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6674 6675 @t.overload 6676 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6677 6678 def _parse_json_object(self, agg=False): 6679 star = self._parse_star() 6680 expressions = ( 6681 [star] 6682 if star 6683 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6684 ) 6685 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6686 6687 unique_keys = None 6688 if self._match_text_seq("WITH", "UNIQUE"): 6689 unique_keys = True 6690 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6691 unique_keys = False 6692 6693 self._match_text_seq("KEYS") 6694 6695 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6696 self._parse_type() 6697 ) 6698 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6699 6700 return self.expression( 6701 exp.JSONObjectAgg if agg else exp.JSONObject, 6702 expressions=expressions, 6703 null_handling=null_handling, 6704 unique_keys=unique_keys, 6705 return_type=return_type, 6706 encoding=encoding, 6707 ) 6708 6709 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6710 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6711 if not self._match_text_seq("NESTED"): 6712 this = self._parse_id_var() 6713 kind = self._parse_types(allow_identifiers=False) 6714 nested = None 6715 else: 6716 this = None 6717 kind = None 6718 nested = True 6719 6720 path = self._match_text_seq("PATH") and self._parse_string() 6721 nested_schema = nested and self._parse_json_schema() 6722 6723 return self.expression( 6724 exp.JSONColumnDef, 6725 this=this, 6726 kind=kind, 6727 path=path, 6728 nested_schema=nested_schema, 6729 ) 6730 6731 def _parse_json_schema(self) -> exp.JSONSchema: 6732 self._match_text_seq("COLUMNS") 6733 return self.expression( 6734 exp.JSONSchema, 6735 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6736 ) 6737 6738 def _parse_json_table(self) -> exp.JSONTable: 6739 this = self._parse_format_json(self._parse_bitwise()) 6740 path = self._match(TokenType.COMMA) and self._parse_string() 6741 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6742 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6743 schema = self._parse_json_schema() 6744 6745 return exp.JSONTable( 6746 this=this, 6747 schema=schema, 6748 path=path, 6749 error_handling=error_handling, 6750 empty_handling=empty_handling, 6751 ) 6752 6753 def _parse_match_against(self) -> exp.MatchAgainst: 6754 expressions = self._parse_csv(self._parse_column) 6755 6756 self._match_text_seq(")", "AGAINST", "(") 6757 6758 this = self._parse_string() 6759 6760 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6761 modifier = "IN NATURAL LANGUAGE MODE" 6762 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6763 modifier = f"{modifier} WITH QUERY EXPANSION" 6764 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6765 modifier = "IN BOOLEAN MODE" 6766 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6767 modifier = "WITH QUERY EXPANSION" 6768 else: 6769 modifier = None 6770 6771 return self.expression( 6772 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6773 ) 6774 6775 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6776 def _parse_open_json(self) -> exp.OpenJSON: 6777 this = self._parse_bitwise() 6778 path = self._match(TokenType.COMMA) and self._parse_string() 6779 6780 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6781 this = self._parse_field(any_token=True) 6782 kind = self._parse_types() 6783 path = self._parse_string() 6784 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6785 6786 return self.expression( 6787 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6788 ) 6789 6790 expressions = None 6791 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6792 self._match_l_paren() 6793 expressions = self._parse_csv(_parse_open_json_column_def) 6794 6795 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6796 6797 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6798 args = self._parse_csv(self._parse_bitwise) 6799 6800 if self._match(TokenType.IN): 6801 return self.expression( 6802 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6803 ) 6804 6805 if haystack_first: 6806 haystack = seq_get(args, 0) 6807 needle = seq_get(args, 1) 6808 else: 6809 haystack = seq_get(args, 1) 6810 needle = seq_get(args, 0) 6811 6812 return self.expression( 6813 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6814 ) 6815 6816 def _parse_predict(self) -> exp.Predict: 6817 self._match_text_seq("MODEL") 6818 this = self._parse_table() 6819 6820 self._match(TokenType.COMMA) 6821 self._match_text_seq("TABLE") 6822 6823 return self.expression( 6824 exp.Predict, 6825 this=this, 6826 expression=self._parse_table(), 6827 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6828 ) 6829 6830 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6831 args = self._parse_csv(self._parse_table) 6832 return exp.JoinHint(this=func_name.upper(), expressions=args) 6833 6834 def _parse_substring(self) -> exp.Substring: 6835 # Postgres supports the form: substring(string [from int] [for int]) 6836 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6837 6838 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6839 6840 if self._match(TokenType.FROM): 6841 args.append(self._parse_bitwise()) 6842 if self._match(TokenType.FOR): 6843 if len(args) == 1: 6844 args.append(exp.Literal.number(1)) 6845 args.append(self._parse_bitwise()) 6846 6847 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6848 6849 def _parse_trim(self) -> exp.Trim: 6850 # https://www.w3resource.com/sql/character-functions/trim.php 6851 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6852 6853 position = None 6854 collation = None 6855 expression = None 6856 6857 if self._match_texts(self.TRIM_TYPES): 6858 position = self._prev.text.upper() 6859 6860 this = self._parse_bitwise() 6861 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6862 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6863 expression = self._parse_bitwise() 6864 6865 if invert_order: 6866 this, expression = expression, this 6867 6868 if self._match(TokenType.COLLATE): 6869 collation = self._parse_bitwise() 6870 6871 return self.expression( 6872 exp.Trim, this=this, position=position, expression=expression, collation=collation 6873 ) 6874 6875 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6876 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6877 6878 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6879 return self._parse_window(self._parse_id_var(), alias=True) 6880 6881 def _parse_respect_or_ignore_nulls( 6882 self, this: t.Optional[exp.Expression] 6883 ) -> t.Optional[exp.Expression]: 6884 if self._match_text_seq("IGNORE", "NULLS"): 6885 return self.expression(exp.IgnoreNulls, this=this) 6886 if self._match_text_seq("RESPECT", "NULLS"): 6887 return self.expression(exp.RespectNulls, this=this) 6888 return this 6889 6890 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6891 if self._match(TokenType.HAVING): 6892 self._match_texts(("MAX", "MIN")) 6893 max = self._prev.text.upper() != "MIN" 6894 return self.expression( 6895 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6896 ) 6897 6898 return this 6899 6900 def _parse_window( 6901 self, this: t.Optional[exp.Expression], alias: bool = False 6902 ) -> t.Optional[exp.Expression]: 6903 func = this 6904 comments = func.comments if isinstance(func, exp.Expression) else None 6905 6906 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6907 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6908 if self._match_text_seq("WITHIN", "GROUP"): 6909 order = self._parse_wrapped(self._parse_order) 6910 this = self.expression(exp.WithinGroup, this=this, expression=order) 6911 6912 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6913 self._match(TokenType.WHERE) 6914 this = self.expression( 6915 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6916 ) 6917 self._match_r_paren() 6918 6919 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6920 # Some dialects choose to implement and some do not. 6921 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6922 6923 # There is some code above in _parse_lambda that handles 6924 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6925 6926 # The below changes handle 6927 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6928 6929 # Oracle allows both formats 6930 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6931 # and Snowflake chose to do the same for familiarity 6932 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6933 if isinstance(this, exp.AggFunc): 6934 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6935 6936 if ignore_respect and ignore_respect is not this: 6937 ignore_respect.replace(ignore_respect.this) 6938 this = self.expression(ignore_respect.__class__, this=this) 6939 6940 this = self._parse_respect_or_ignore_nulls(this) 6941 6942 # bigquery select from window x AS (partition by ...) 6943 if alias: 6944 over = None 6945 self._match(TokenType.ALIAS) 6946 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6947 return this 6948 else: 6949 over = self._prev.text.upper() 6950 6951 if comments and isinstance(func, exp.Expression): 6952 func.pop_comments() 6953 6954 if not self._match(TokenType.L_PAREN): 6955 return self.expression( 6956 exp.Window, 6957 comments=comments, 6958 this=this, 6959 alias=self._parse_id_var(False), 6960 over=over, 6961 ) 6962 6963 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6964 6965 first = self._match(TokenType.FIRST) 6966 if self._match_text_seq("LAST"): 6967 first = False 6968 6969 partition, order = self._parse_partition_and_order() 6970 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6971 6972 if kind: 6973 self._match(TokenType.BETWEEN) 6974 start = self._parse_window_spec() 6975 self._match(TokenType.AND) 6976 end = self._parse_window_spec() 6977 exclude = ( 6978 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6979 if self._match_text_seq("EXCLUDE") 6980 else None 6981 ) 6982 6983 spec = self.expression( 6984 exp.WindowSpec, 6985 kind=kind, 6986 start=start["value"], 6987 start_side=start["side"], 6988 end=end["value"], 6989 end_side=end["side"], 6990 exclude=exclude, 6991 ) 6992 else: 6993 spec = None 6994 6995 self._match_r_paren() 6996 6997 window = self.expression( 6998 exp.Window, 6999 comments=comments, 7000 this=this, 7001 partition_by=partition, 7002 order=order, 7003 spec=spec, 7004 alias=window_alias, 7005 over=over, 7006 first=first, 7007 ) 7008 7009 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7010 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7011 return self._parse_window(window, alias=alias) 7012 7013 return window 7014 7015 def _parse_partition_and_order( 7016 self, 7017 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7018 return self._parse_partition_by(), self._parse_order() 7019 7020 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7021 self._match(TokenType.BETWEEN) 7022 7023 return { 7024 "value": ( 7025 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7026 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7027 or self._parse_bitwise() 7028 ), 7029 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7030 } 7031 7032 def _parse_alias( 7033 self, this: t.Optional[exp.Expression], explicit: bool = False 7034 ) -> t.Optional[exp.Expression]: 7035 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7036 # so this section tries to parse the clause version and if it fails, it treats the token 7037 # as an identifier (alias) 7038 if self._can_parse_limit_or_offset(): 7039 return this 7040 7041 any_token = self._match(TokenType.ALIAS) 7042 comments = self._prev_comments or [] 7043 7044 if explicit and not any_token: 7045 return this 7046 7047 if self._match(TokenType.L_PAREN): 7048 aliases = self.expression( 7049 exp.Aliases, 7050 comments=comments, 7051 this=this, 7052 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7053 ) 7054 self._match_r_paren(aliases) 7055 return aliases 7056 7057 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7058 self.STRING_ALIASES and self._parse_string_as_identifier() 7059 ) 7060 7061 if alias: 7062 comments.extend(alias.pop_comments()) 7063 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7064 column = this.this 7065 7066 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7067 if not this.comments and column and column.comments: 7068 this.comments = column.pop_comments() 7069 7070 return this 7071 7072 def _parse_id_var( 7073 self, 7074 any_token: bool = True, 7075 tokens: t.Optional[t.Collection[TokenType]] = None, 7076 ) -> t.Optional[exp.Expression]: 7077 expression = self._parse_identifier() 7078 if not expression and ( 7079 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7080 ): 7081 quoted = self._prev.token_type == TokenType.STRING 7082 expression = self._identifier_expression(quoted=quoted) 7083 7084 return expression 7085 7086 def _parse_string(self) -> t.Optional[exp.Expression]: 7087 if self._match_set(self.STRING_PARSERS): 7088 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7089 return self._parse_placeholder() 7090 7091 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7092 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7093 if output: 7094 output.update_positions(self._prev) 7095 return output 7096 7097 def _parse_number(self) -> t.Optional[exp.Expression]: 7098 if self._match_set(self.NUMERIC_PARSERS): 7099 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7100 return self._parse_placeholder() 7101 7102 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7103 if self._match(TokenType.IDENTIFIER): 7104 return self._identifier_expression(quoted=True) 7105 return self._parse_placeholder() 7106 7107 def _parse_var( 7108 self, 7109 any_token: bool = False, 7110 tokens: t.Optional[t.Collection[TokenType]] = None, 7111 upper: bool = False, 7112 ) -> t.Optional[exp.Expression]: 7113 if ( 7114 (any_token and self._advance_any()) 7115 or self._match(TokenType.VAR) 7116 or (self._match_set(tokens) if tokens else False) 7117 ): 7118 return self.expression( 7119 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7120 ) 7121 return self._parse_placeholder() 7122 7123 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7124 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7125 self._advance() 7126 return self._prev 7127 return None 7128 7129 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7130 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7131 7132 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7133 return self._parse_primary() or self._parse_var(any_token=True) 7134 7135 def _parse_null(self) -> t.Optional[exp.Expression]: 7136 if self._match_set(self.NULL_TOKENS): 7137 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7138 return self._parse_placeholder() 7139 7140 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7141 if self._match(TokenType.TRUE): 7142 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7143 if self._match(TokenType.FALSE): 7144 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7145 return self._parse_placeholder() 7146 7147 def _parse_star(self) -> t.Optional[exp.Expression]: 7148 if self._match(TokenType.STAR): 7149 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7150 return self._parse_placeholder() 7151 7152 def _parse_parameter(self) -> exp.Parameter: 7153 this = self._parse_identifier() or self._parse_primary_or_var() 7154 return self.expression(exp.Parameter, this=this) 7155 7156 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7157 if self._match_set(self.PLACEHOLDER_PARSERS): 7158 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7159 if placeholder: 7160 return placeholder 7161 self._advance(-1) 7162 return None 7163 7164 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7165 if not self._match_texts(keywords): 7166 return None 7167 if self._match(TokenType.L_PAREN, advance=False): 7168 return self._parse_wrapped_csv(self._parse_expression) 7169 7170 expression = self._parse_expression() 7171 return [expression] if expression else None 7172 7173 def _parse_csv( 7174 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7175 ) -> t.List[exp.Expression]: 7176 parse_result = parse_method() 7177 items = [parse_result] if parse_result is not None else [] 7178 7179 while self._match(sep): 7180 self._add_comments(parse_result) 7181 parse_result = parse_method() 7182 if parse_result is not None: 7183 items.append(parse_result) 7184 7185 return items 7186 7187 def _parse_tokens( 7188 self, parse_method: t.Callable, expressions: t.Dict 7189 ) -> t.Optional[exp.Expression]: 7190 this = parse_method() 7191 7192 while self._match_set(expressions): 7193 this = self.expression( 7194 expressions[self._prev.token_type], 7195 this=this, 7196 comments=self._prev_comments, 7197 expression=parse_method(), 7198 ) 7199 7200 return this 7201 7202 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7203 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7204 7205 def _parse_wrapped_csv( 7206 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7207 ) -> t.List[exp.Expression]: 7208 return self._parse_wrapped( 7209 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7210 ) 7211 7212 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7213 wrapped = self._match(TokenType.L_PAREN) 7214 if not wrapped and not optional: 7215 self.raise_error("Expecting (") 7216 parse_result = parse_method() 7217 if wrapped: 7218 self._match_r_paren() 7219 return parse_result 7220 7221 def _parse_expressions(self) -> t.List[exp.Expression]: 7222 return self._parse_csv(self._parse_expression) 7223 7224 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7225 return self._parse_select() or self._parse_set_operations( 7226 self._parse_alias(self._parse_assignment(), explicit=True) 7227 if alias 7228 else self._parse_assignment() 7229 ) 7230 7231 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7232 return self._parse_query_modifiers( 7233 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7234 ) 7235 7236 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7237 this = None 7238 if self._match_texts(self.TRANSACTION_KIND): 7239 this = self._prev.text 7240 7241 self._match_texts(("TRANSACTION", "WORK")) 7242 7243 modes = [] 7244 while True: 7245 mode = [] 7246 while self._match(TokenType.VAR): 7247 mode.append(self._prev.text) 7248 7249 if mode: 7250 modes.append(" ".join(mode)) 7251 if not self._match(TokenType.COMMA): 7252 break 7253 7254 return self.expression(exp.Transaction, this=this, modes=modes) 7255 7256 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7257 chain = None 7258 savepoint = None 7259 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7260 7261 self._match_texts(("TRANSACTION", "WORK")) 7262 7263 if self._match_text_seq("TO"): 7264 self._match_text_seq("SAVEPOINT") 7265 savepoint = self._parse_id_var() 7266 7267 if self._match(TokenType.AND): 7268 chain = not self._match_text_seq("NO") 7269 self._match_text_seq("CHAIN") 7270 7271 if is_rollback: 7272 return self.expression(exp.Rollback, savepoint=savepoint) 7273 7274 return self.expression(exp.Commit, chain=chain) 7275 7276 def _parse_refresh(self) -> exp.Refresh: 7277 self._match(TokenType.TABLE) 7278 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7279 7280 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7281 if not self._prev.text.upper() == "ADD": 7282 return None 7283 7284 start = self._index 7285 self._match(TokenType.COLUMN) 7286 7287 exists_column = self._parse_exists(not_=True) 7288 expression = self._parse_field_def() 7289 7290 if not isinstance(expression, exp.ColumnDef): 7291 self._retreat(start) 7292 return None 7293 7294 expression.set("exists", exists_column) 7295 7296 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7297 if self._match_texts(("FIRST", "AFTER")): 7298 position = self._prev.text 7299 column_position = self.expression( 7300 exp.ColumnPosition, this=self._parse_column(), position=position 7301 ) 7302 expression.set("position", column_position) 7303 7304 return expression 7305 7306 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7307 drop = self._match(TokenType.DROP) and self._parse_drop() 7308 if drop and not isinstance(drop, exp.Command): 7309 drop.set("kind", drop.args.get("kind", "COLUMN")) 7310 return drop 7311 7312 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7313 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7314 return self.expression( 7315 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7316 ) 7317 7318 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7319 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7320 self._match_text_seq("ADD") 7321 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7322 return self.expression( 7323 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7324 ) 7325 7326 column_def = self._parse_add_column() 7327 if isinstance(column_def, exp.ColumnDef): 7328 return column_def 7329 7330 exists = self._parse_exists(not_=True) 7331 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7332 return self.expression( 7333 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7334 ) 7335 7336 return None 7337 7338 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7339 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7340 or self._match_text_seq("COLUMNS") 7341 ): 7342 schema = self._parse_schema() 7343 7344 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7345 7346 return self._parse_csv(_parse_add_alteration) 7347 7348 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7349 if self._match_texts(self.ALTER_ALTER_PARSERS): 7350 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7351 7352 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7353 # keyword after ALTER we default to parsing this statement 7354 self._match(TokenType.COLUMN) 7355 column = self._parse_field(any_token=True) 7356 7357 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7358 return self.expression(exp.AlterColumn, this=column, drop=True) 7359 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7360 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7361 if self._match(TokenType.COMMENT): 7362 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7363 if self._match_text_seq("DROP", "NOT", "NULL"): 7364 return self.expression( 7365 exp.AlterColumn, 7366 this=column, 7367 drop=True, 7368 allow_null=True, 7369 ) 7370 if self._match_text_seq("SET", "NOT", "NULL"): 7371 return self.expression( 7372 exp.AlterColumn, 7373 this=column, 7374 allow_null=False, 7375 ) 7376 7377 if self._match_text_seq("SET", "VISIBLE"): 7378 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7379 if self._match_text_seq("SET", "INVISIBLE"): 7380 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7381 7382 self._match_text_seq("SET", "DATA") 7383 self._match_text_seq("TYPE") 7384 return self.expression( 7385 exp.AlterColumn, 7386 this=column, 7387 dtype=self._parse_types(), 7388 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7389 using=self._match(TokenType.USING) and self._parse_assignment(), 7390 ) 7391 7392 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7393 if self._match_texts(("ALL", "EVEN", "AUTO")): 7394 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7395 7396 self._match_text_seq("KEY", "DISTKEY") 7397 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7398 7399 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7400 if compound: 7401 self._match_text_seq("SORTKEY") 7402 7403 if self._match(TokenType.L_PAREN, advance=False): 7404 return self.expression( 7405 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7406 ) 7407 7408 self._match_texts(("AUTO", "NONE")) 7409 return self.expression( 7410 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7411 ) 7412 7413 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7414 index = self._index - 1 7415 7416 partition_exists = self._parse_exists() 7417 if self._match(TokenType.PARTITION, advance=False): 7418 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7419 7420 self._retreat(index) 7421 return self._parse_csv(self._parse_drop_column) 7422 7423 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7424 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7425 exists = self._parse_exists() 7426 old_column = self._parse_column() 7427 to = self._match_text_seq("TO") 7428 new_column = self._parse_column() 7429 7430 if old_column is None or to is None or new_column is None: 7431 return None 7432 7433 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7434 7435 self._match_text_seq("TO") 7436 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7437 7438 def _parse_alter_table_set(self) -> exp.AlterSet: 7439 alter_set = self.expression(exp.AlterSet) 7440 7441 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7442 "TABLE", "PROPERTIES" 7443 ): 7444 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7445 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7446 alter_set.set("expressions", [self._parse_assignment()]) 7447 elif self._match_texts(("LOGGED", "UNLOGGED")): 7448 alter_set.set("option", exp.var(self._prev.text.upper())) 7449 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7450 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7451 elif self._match_text_seq("LOCATION"): 7452 alter_set.set("location", self._parse_field()) 7453 elif self._match_text_seq("ACCESS", "METHOD"): 7454 alter_set.set("access_method", self._parse_field()) 7455 elif self._match_text_seq("TABLESPACE"): 7456 alter_set.set("tablespace", self._parse_field()) 7457 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7458 alter_set.set("file_format", [self._parse_field()]) 7459 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7460 alter_set.set("file_format", self._parse_wrapped_options()) 7461 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7462 alter_set.set("copy_options", self._parse_wrapped_options()) 7463 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7464 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7465 else: 7466 if self._match_text_seq("SERDE"): 7467 alter_set.set("serde", self._parse_field()) 7468 7469 properties = self._parse_wrapped(self._parse_properties, optional=True) 7470 alter_set.set("expressions", [properties]) 7471 7472 return alter_set 7473 7474 def _parse_alter(self) -> exp.Alter | exp.Command: 7475 start = self._prev 7476 7477 alter_token = self._match_set(self.ALTERABLES) and self._prev 7478 if not alter_token: 7479 return self._parse_as_command(start) 7480 7481 exists = self._parse_exists() 7482 only = self._match_text_seq("ONLY") 7483 this = self._parse_table(schema=True) 7484 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7485 7486 if self._next: 7487 self._advance() 7488 7489 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7490 if parser: 7491 actions = ensure_list(parser(self)) 7492 not_valid = self._match_text_seq("NOT", "VALID") 7493 options = self._parse_csv(self._parse_property) 7494 7495 if not self._curr and actions: 7496 return self.expression( 7497 exp.Alter, 7498 this=this, 7499 kind=alter_token.text.upper(), 7500 exists=exists, 7501 actions=actions, 7502 only=only, 7503 options=options, 7504 cluster=cluster, 7505 not_valid=not_valid, 7506 ) 7507 7508 return self._parse_as_command(start) 7509 7510 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7511 start = self._prev 7512 # https://duckdb.org/docs/sql/statements/analyze 7513 if not self._curr: 7514 return self.expression(exp.Analyze) 7515 7516 options = [] 7517 while self._match_texts(self.ANALYZE_STYLES): 7518 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7519 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7520 else: 7521 options.append(self._prev.text.upper()) 7522 7523 this: t.Optional[exp.Expression] = None 7524 inner_expression: t.Optional[exp.Expression] = None 7525 7526 kind = self._curr and self._curr.text.upper() 7527 7528 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7529 this = self._parse_table_parts() 7530 elif self._match_text_seq("TABLES"): 7531 if self._match_set((TokenType.FROM, TokenType.IN)): 7532 kind = f"{kind} {self._prev.text.upper()}" 7533 this = self._parse_table(schema=True, is_db_reference=True) 7534 elif self._match_text_seq("DATABASE"): 7535 this = self._parse_table(schema=True, is_db_reference=True) 7536 elif self._match_text_seq("CLUSTER"): 7537 this = self._parse_table() 7538 # Try matching inner expr keywords before fallback to parse table. 7539 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7540 kind = None 7541 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7542 else: 7543 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7544 kind = None 7545 this = self._parse_table_parts() 7546 7547 partition = self._try_parse(self._parse_partition) 7548 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7549 return self._parse_as_command(start) 7550 7551 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7552 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7553 "WITH", "ASYNC", "MODE" 7554 ): 7555 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7556 else: 7557 mode = None 7558 7559 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7560 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7561 7562 properties = self._parse_properties() 7563 return self.expression( 7564 exp.Analyze, 7565 kind=kind, 7566 this=this, 7567 mode=mode, 7568 partition=partition, 7569 properties=properties, 7570 expression=inner_expression, 7571 options=options, 7572 ) 7573 7574 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7575 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7576 this = None 7577 kind = self._prev.text.upper() 7578 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7579 expressions = [] 7580 7581 if not self._match_text_seq("STATISTICS"): 7582 self.raise_error("Expecting token STATISTICS") 7583 7584 if self._match_text_seq("NOSCAN"): 7585 this = "NOSCAN" 7586 elif self._match(TokenType.FOR): 7587 if self._match_text_seq("ALL", "COLUMNS"): 7588 this = "FOR ALL COLUMNS" 7589 if self._match_texts("COLUMNS"): 7590 this = "FOR COLUMNS" 7591 expressions = self._parse_csv(self._parse_column_reference) 7592 elif self._match_text_seq("SAMPLE"): 7593 sample = self._parse_number() 7594 expressions = [ 7595 self.expression( 7596 exp.AnalyzeSample, 7597 sample=sample, 7598 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7599 ) 7600 ] 7601 7602 return self.expression( 7603 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7604 ) 7605 7606 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7607 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7608 kind = None 7609 this = None 7610 expression: t.Optional[exp.Expression] = None 7611 if self._match_text_seq("REF", "UPDATE"): 7612 kind = "REF" 7613 this = "UPDATE" 7614 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7615 this = "UPDATE SET DANGLING TO NULL" 7616 elif self._match_text_seq("STRUCTURE"): 7617 kind = "STRUCTURE" 7618 if self._match_text_seq("CASCADE", "FAST"): 7619 this = "CASCADE FAST" 7620 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7621 ("ONLINE", "OFFLINE") 7622 ): 7623 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7624 expression = self._parse_into() 7625 7626 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7627 7628 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7629 this = self._prev.text.upper() 7630 if self._match_text_seq("COLUMNS"): 7631 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7632 return None 7633 7634 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7635 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7636 if self._match_text_seq("STATISTICS"): 7637 return self.expression(exp.AnalyzeDelete, kind=kind) 7638 return None 7639 7640 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7641 if self._match_text_seq("CHAINED", "ROWS"): 7642 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7643 return None 7644 7645 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7646 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7647 this = self._prev.text.upper() 7648 expression: t.Optional[exp.Expression] = None 7649 expressions = [] 7650 update_options = None 7651 7652 if self._match_text_seq("HISTOGRAM", "ON"): 7653 expressions = self._parse_csv(self._parse_column_reference) 7654 with_expressions = [] 7655 while self._match(TokenType.WITH): 7656 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7657 if self._match_texts(("SYNC", "ASYNC")): 7658 if self._match_text_seq("MODE", advance=False): 7659 with_expressions.append(f"{self._prev.text.upper()} MODE") 7660 self._advance() 7661 else: 7662 buckets = self._parse_number() 7663 if self._match_text_seq("BUCKETS"): 7664 with_expressions.append(f"{buckets} BUCKETS") 7665 if with_expressions: 7666 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7667 7668 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7669 TokenType.UPDATE, advance=False 7670 ): 7671 update_options = self._prev.text.upper() 7672 self._advance() 7673 elif self._match_text_seq("USING", "DATA"): 7674 expression = self.expression(exp.UsingData, this=self._parse_string()) 7675 7676 return self.expression( 7677 exp.AnalyzeHistogram, 7678 this=this, 7679 expressions=expressions, 7680 expression=expression, 7681 update_options=update_options, 7682 ) 7683 7684 def _parse_merge(self) -> exp.Merge: 7685 self._match(TokenType.INTO) 7686 target = self._parse_table() 7687 7688 if target and self._match(TokenType.ALIAS, advance=False): 7689 target.set("alias", self._parse_table_alias()) 7690 7691 self._match(TokenType.USING) 7692 using = self._parse_table() 7693 7694 self._match(TokenType.ON) 7695 on = self._parse_assignment() 7696 7697 return self.expression( 7698 exp.Merge, 7699 this=target, 7700 using=using, 7701 on=on, 7702 whens=self._parse_when_matched(), 7703 returning=self._parse_returning(), 7704 ) 7705 7706 def _parse_when_matched(self) -> exp.Whens: 7707 whens = [] 7708 7709 while self._match(TokenType.WHEN): 7710 matched = not self._match(TokenType.NOT) 7711 self._match_text_seq("MATCHED") 7712 source = ( 7713 False 7714 if self._match_text_seq("BY", "TARGET") 7715 else self._match_text_seq("BY", "SOURCE") 7716 ) 7717 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7718 7719 self._match(TokenType.THEN) 7720 7721 if self._match(TokenType.INSERT): 7722 this = self._parse_star() 7723 if this: 7724 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7725 else: 7726 then = self.expression( 7727 exp.Insert, 7728 this=exp.var("ROW") 7729 if self._match_text_seq("ROW") 7730 else self._parse_value(values=False), 7731 expression=self._match_text_seq("VALUES") and self._parse_value(), 7732 ) 7733 elif self._match(TokenType.UPDATE): 7734 expressions = self._parse_star() 7735 if expressions: 7736 then = self.expression(exp.Update, expressions=expressions) 7737 else: 7738 then = self.expression( 7739 exp.Update, 7740 expressions=self._match(TokenType.SET) 7741 and self._parse_csv(self._parse_equality), 7742 ) 7743 elif self._match(TokenType.DELETE): 7744 then = self.expression(exp.Var, this=self._prev.text) 7745 else: 7746 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7747 7748 whens.append( 7749 self.expression( 7750 exp.When, 7751 matched=matched, 7752 source=source, 7753 condition=condition, 7754 then=then, 7755 ) 7756 ) 7757 return self.expression(exp.Whens, expressions=whens) 7758 7759 def _parse_show(self) -> t.Optional[exp.Expression]: 7760 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7761 if parser: 7762 return parser(self) 7763 return self._parse_as_command(self._prev) 7764 7765 def _parse_set_item_assignment( 7766 self, kind: t.Optional[str] = None 7767 ) -> t.Optional[exp.Expression]: 7768 index = self._index 7769 7770 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7771 return self._parse_set_transaction(global_=kind == "GLOBAL") 7772 7773 left = self._parse_primary() or self._parse_column() 7774 assignment_delimiter = self._match_texts(("=", "TO")) 7775 7776 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7777 self._retreat(index) 7778 return None 7779 7780 right = self._parse_statement() or self._parse_id_var() 7781 if isinstance(right, (exp.Column, exp.Identifier)): 7782 right = exp.var(right.name) 7783 7784 this = self.expression(exp.EQ, this=left, expression=right) 7785 return self.expression(exp.SetItem, this=this, kind=kind) 7786 7787 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7788 self._match_text_seq("TRANSACTION") 7789 characteristics = self._parse_csv( 7790 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7791 ) 7792 return self.expression( 7793 exp.SetItem, 7794 expressions=characteristics, 7795 kind="TRANSACTION", 7796 **{"global": global_}, # type: ignore 7797 ) 7798 7799 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7800 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7801 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7802 7803 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7804 index = self._index 7805 set_ = self.expression( 7806 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7807 ) 7808 7809 if self._curr: 7810 self._retreat(index) 7811 return self._parse_as_command(self._prev) 7812 7813 return set_ 7814 7815 def _parse_var_from_options( 7816 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7817 ) -> t.Optional[exp.Var]: 7818 start = self._curr 7819 if not start: 7820 return None 7821 7822 option = start.text.upper() 7823 continuations = options.get(option) 7824 7825 index = self._index 7826 self._advance() 7827 for keywords in continuations or []: 7828 if isinstance(keywords, str): 7829 keywords = (keywords,) 7830 7831 if self._match_text_seq(*keywords): 7832 option = f"{option} {' '.join(keywords)}" 7833 break 7834 else: 7835 if continuations or continuations is None: 7836 if raise_unmatched: 7837 self.raise_error(f"Unknown option {option}") 7838 7839 self._retreat(index) 7840 return None 7841 7842 return exp.var(option) 7843 7844 def _parse_as_command(self, start: Token) -> exp.Command: 7845 while self._curr: 7846 self._advance() 7847 text = self._find_sql(start, self._prev) 7848 size = len(start.text) 7849 self._warn_unsupported() 7850 return exp.Command(this=text[:size], expression=text[size:]) 7851 7852 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7853 settings = [] 7854 7855 self._match_l_paren() 7856 kind = self._parse_id_var() 7857 7858 if self._match(TokenType.L_PAREN): 7859 while True: 7860 key = self._parse_id_var() 7861 value = self._parse_primary() 7862 if not key and value is None: 7863 break 7864 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7865 self._match(TokenType.R_PAREN) 7866 7867 self._match_r_paren() 7868 7869 return self.expression( 7870 exp.DictProperty, 7871 this=this, 7872 kind=kind.this if kind else None, 7873 settings=settings, 7874 ) 7875 7876 def _parse_dict_range(self, this: str) -> exp.DictRange: 7877 self._match_l_paren() 7878 has_min = self._match_text_seq("MIN") 7879 if has_min: 7880 min = self._parse_var() or self._parse_primary() 7881 self._match_text_seq("MAX") 7882 max = self._parse_var() or self._parse_primary() 7883 else: 7884 max = self._parse_var() or self._parse_primary() 7885 min = exp.Literal.number(0) 7886 self._match_r_paren() 7887 return self.expression(exp.DictRange, this=this, min=min, max=max) 7888 7889 def _parse_comprehension( 7890 self, this: t.Optional[exp.Expression] 7891 ) -> t.Optional[exp.Comprehension]: 7892 index = self._index 7893 expression = self._parse_column() 7894 if not self._match(TokenType.IN): 7895 self._retreat(index - 1) 7896 return None 7897 iterator = self._parse_column() 7898 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7899 return self.expression( 7900 exp.Comprehension, 7901 this=this, 7902 expression=expression, 7903 iterator=iterator, 7904 condition=condition, 7905 ) 7906 7907 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7908 if self._match(TokenType.HEREDOC_STRING): 7909 return self.expression(exp.Heredoc, this=self._prev.text) 7910 7911 if not self._match_text_seq("$"): 7912 return None 7913 7914 tags = ["$"] 7915 tag_text = None 7916 7917 if self._is_connected(): 7918 self._advance() 7919 tags.append(self._prev.text.upper()) 7920 else: 7921 self.raise_error("No closing $ found") 7922 7923 if tags[-1] != "$": 7924 if self._is_connected() and self._match_text_seq("$"): 7925 tag_text = tags[-1] 7926 tags.append("$") 7927 else: 7928 self.raise_error("No closing $ found") 7929 7930 heredoc_start = self._curr 7931 7932 while self._curr: 7933 if self._match_text_seq(*tags, advance=False): 7934 this = self._find_sql(heredoc_start, self._prev) 7935 self._advance(len(tags)) 7936 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7937 7938 self._advance() 7939 7940 self.raise_error(f"No closing {''.join(tags)} found") 7941 return None 7942 7943 def _find_parser( 7944 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7945 ) -> t.Optional[t.Callable]: 7946 if not self._curr: 7947 return None 7948 7949 index = self._index 7950 this = [] 7951 while True: 7952 # The current token might be multiple words 7953 curr = self._curr.text.upper() 7954 key = curr.split(" ") 7955 this.append(curr) 7956 7957 self._advance() 7958 result, trie = in_trie(trie, key) 7959 if result == TrieResult.FAILED: 7960 break 7961 7962 if result == TrieResult.EXISTS: 7963 subparser = parsers[" ".join(this)] 7964 return subparser 7965 7966 self._retreat(index) 7967 return None 7968 7969 def _match(self, token_type, advance=True, expression=None): 7970 if not self._curr: 7971 return None 7972 7973 if self._curr.token_type == token_type: 7974 if advance: 7975 self._advance() 7976 self._add_comments(expression) 7977 return True 7978 7979 return None 7980 7981 def _match_set(self, types, advance=True): 7982 if not self._curr: 7983 return None 7984 7985 if self._curr.token_type in types: 7986 if advance: 7987 self._advance() 7988 return True 7989 7990 return None 7991 7992 def _match_pair(self, token_type_a, token_type_b, advance=True): 7993 if not self._curr or not self._next: 7994 return None 7995 7996 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7997 if advance: 7998 self._advance(2) 7999 return True 8000 8001 return None 8002 8003 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8004 if not self._match(TokenType.L_PAREN, expression=expression): 8005 self.raise_error("Expecting (") 8006 8007 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8008 if not self._match(TokenType.R_PAREN, expression=expression): 8009 self.raise_error("Expecting )") 8010 8011 def _match_texts(self, texts, advance=True): 8012 if ( 8013 self._curr 8014 and self._curr.token_type != TokenType.STRING 8015 and self._curr.text.upper() in texts 8016 ): 8017 if advance: 8018 self._advance() 8019 return True 8020 return None 8021 8022 def _match_text_seq(self, *texts, advance=True): 8023 index = self._index 8024 for text in texts: 8025 if ( 8026 self._curr 8027 and self._curr.token_type != TokenType.STRING 8028 and self._curr.text.upper() == text 8029 ): 8030 self._advance() 8031 else: 8032 self._retreat(index) 8033 return None 8034 8035 if not advance: 8036 self._retreat(index) 8037 8038 return True 8039 8040 def _replace_lambda( 8041 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8042 ) -> t.Optional[exp.Expression]: 8043 if not node: 8044 return node 8045 8046 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8047 8048 for column in node.find_all(exp.Column): 8049 typ = lambda_types.get(column.parts[0].name) 8050 if typ is not None: 8051 dot_or_id = column.to_dot() if column.table else column.this 8052 8053 if typ: 8054 dot_or_id = self.expression( 8055 exp.Cast, 8056 this=dot_or_id, 8057 to=typ, 8058 ) 8059 8060 parent = column.parent 8061 8062 while isinstance(parent, exp.Dot): 8063 if not isinstance(parent.parent, exp.Dot): 8064 parent.replace(dot_or_id) 8065 break 8066 parent = parent.parent 8067 else: 8068 if column is node: 8069 node = dot_or_id 8070 else: 8071 column.replace(dot_or_id) 8072 return node 8073 8074 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8075 start = self._prev 8076 8077 # Not to be confused with TRUNCATE(number, decimals) function call 8078 if self._match(TokenType.L_PAREN): 8079 self._retreat(self._index - 2) 8080 return self._parse_function() 8081 8082 # Clickhouse supports TRUNCATE DATABASE as well 8083 is_database = self._match(TokenType.DATABASE) 8084 8085 self._match(TokenType.TABLE) 8086 8087 exists = self._parse_exists(not_=False) 8088 8089 expressions = self._parse_csv( 8090 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8091 ) 8092 8093 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8094 8095 if self._match_text_seq("RESTART", "IDENTITY"): 8096 identity = "RESTART" 8097 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8098 identity = "CONTINUE" 8099 else: 8100 identity = None 8101 8102 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8103 option = self._prev.text 8104 else: 8105 option = None 8106 8107 partition = self._parse_partition() 8108 8109 # Fallback case 8110 if self._curr: 8111 return self._parse_as_command(start) 8112 8113 return self.expression( 8114 exp.TruncateTable, 8115 expressions=expressions, 8116 is_database=is_database, 8117 exists=exists, 8118 cluster=cluster, 8119 identity=identity, 8120 option=option, 8121 partition=partition, 8122 ) 8123 8124 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8125 this = self._parse_ordered(self._parse_opclass) 8126 8127 if not self._match(TokenType.WITH): 8128 return this 8129 8130 op = self._parse_var(any_token=True) 8131 8132 return self.expression(exp.WithOperator, this=this, op=op) 8133 8134 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8135 self._match(TokenType.EQ) 8136 self._match(TokenType.L_PAREN) 8137 8138 opts: t.List[t.Optional[exp.Expression]] = [] 8139 option: exp.Expression | None 8140 while self._curr and not self._match(TokenType.R_PAREN): 8141 if self._match_text_seq("FORMAT_NAME", "="): 8142 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8143 option = self._parse_format_name() 8144 else: 8145 option = self._parse_property() 8146 8147 if option is None: 8148 self.raise_error("Unable to parse option") 8149 break 8150 8151 opts.append(option) 8152 8153 return opts 8154 8155 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8156 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8157 8158 options = [] 8159 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8160 option = self._parse_var(any_token=True) 8161 prev = self._prev.text.upper() 8162 8163 # Different dialects might separate options and values by white space, "=" and "AS" 8164 self._match(TokenType.EQ) 8165 self._match(TokenType.ALIAS) 8166 8167 param = self.expression(exp.CopyParameter, this=option) 8168 8169 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8170 TokenType.L_PAREN, advance=False 8171 ): 8172 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8173 param.set("expressions", self._parse_wrapped_options()) 8174 elif prev == "FILE_FORMAT": 8175 # T-SQL's external file format case 8176 param.set("expression", self._parse_field()) 8177 else: 8178 param.set("expression", self._parse_unquoted_field()) 8179 8180 options.append(param) 8181 self._match(sep) 8182 8183 return options 8184 8185 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8186 expr = self.expression(exp.Credentials) 8187 8188 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8189 expr.set("storage", self._parse_field()) 8190 if self._match_text_seq("CREDENTIALS"): 8191 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8192 creds = ( 8193 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8194 ) 8195 expr.set("credentials", creds) 8196 if self._match_text_seq("ENCRYPTION"): 8197 expr.set("encryption", self._parse_wrapped_options()) 8198 if self._match_text_seq("IAM_ROLE"): 8199 expr.set("iam_role", self._parse_field()) 8200 if self._match_text_seq("REGION"): 8201 expr.set("region", self._parse_field()) 8202 8203 return expr 8204 8205 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8206 return self._parse_field() 8207 8208 def _parse_copy(self) -> exp.Copy | exp.Command: 8209 start = self._prev 8210 8211 self._match(TokenType.INTO) 8212 8213 this = ( 8214 self._parse_select(nested=True, parse_subquery_alias=False) 8215 if self._match(TokenType.L_PAREN, advance=False) 8216 else self._parse_table(schema=True) 8217 ) 8218 8219 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8220 8221 files = self._parse_csv(self._parse_file_location) 8222 credentials = self._parse_credentials() 8223 8224 self._match_text_seq("WITH") 8225 8226 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8227 8228 # Fallback case 8229 if self._curr: 8230 return self._parse_as_command(start) 8231 8232 return self.expression( 8233 exp.Copy, 8234 this=this, 8235 kind=kind, 8236 credentials=credentials, 8237 files=files, 8238 params=params, 8239 ) 8240 8241 def _parse_normalize(self) -> exp.Normalize: 8242 return self.expression( 8243 exp.Normalize, 8244 this=self._parse_bitwise(), 8245 form=self._match(TokenType.COMMA) and self._parse_var(), 8246 ) 8247 8248 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8249 args = self._parse_csv(lambda: self._parse_lambda()) 8250 8251 this = seq_get(args, 0) 8252 decimals = seq_get(args, 1) 8253 8254 return expr_type( 8255 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8256 ) 8257 8258 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8259 star_token = self._prev 8260 8261 if self._match_text_seq("COLUMNS", "(", advance=False): 8262 this = self._parse_function() 8263 if isinstance(this, exp.Columns): 8264 this.set("unpack", True) 8265 return this 8266 8267 return self.expression( 8268 exp.Star, 8269 **{ # type: ignore 8270 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8271 "replace": self._parse_star_op("REPLACE"), 8272 "rename": self._parse_star_op("RENAME"), 8273 }, 8274 ).update_positions(star_token) 8275 8276 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8277 privilege_parts = [] 8278 8279 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8280 # (end of privilege list) or L_PAREN (start of column list) are met 8281 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8282 privilege_parts.append(self._curr.text.upper()) 8283 self._advance() 8284 8285 this = exp.var(" ".join(privilege_parts)) 8286 expressions = ( 8287 self._parse_wrapped_csv(self._parse_column) 8288 if self._match(TokenType.L_PAREN, advance=False) 8289 else None 8290 ) 8291 8292 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8293 8294 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8295 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8296 principal = self._parse_id_var() 8297 8298 if not principal: 8299 return None 8300 8301 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8302 8303 def _parse_grant(self) -> exp.Grant | exp.Command: 8304 start = self._prev 8305 8306 privileges = self._parse_csv(self._parse_grant_privilege) 8307 8308 self._match(TokenType.ON) 8309 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8310 8311 # Attempt to parse the securable e.g. MySQL allows names 8312 # such as "foo.*", "*.*" which are not easily parseable yet 8313 securable = self._try_parse(self._parse_table_parts) 8314 8315 if not securable or not self._match_text_seq("TO"): 8316 return self._parse_as_command(start) 8317 8318 principals = self._parse_csv(self._parse_grant_principal) 8319 8320 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8321 8322 if self._curr: 8323 return self._parse_as_command(start) 8324 8325 return self.expression( 8326 exp.Grant, 8327 privileges=privileges, 8328 kind=kind, 8329 securable=securable, 8330 principals=principals, 8331 grant_option=grant_option, 8332 ) 8333 8334 def _parse_overlay(self) -> exp.Overlay: 8335 return self.expression( 8336 exp.Overlay, 8337 **{ # type: ignore 8338 "this": self._parse_bitwise(), 8339 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8340 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8341 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8342 }, 8343 ) 8344 8345 def _parse_format_name(self) -> exp.Property: 8346 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8347 # for FILE_FORMAT = <format_name> 8348 return self.expression( 8349 exp.Property, 8350 this=exp.var("FORMAT_NAME"), 8351 value=self._parse_string() or self._parse_table_parts(), 8352 ) 8353 8354 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8355 args: t.List[exp.Expression] = [] 8356 8357 if self._match(TokenType.DISTINCT): 8358 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8359 self._match(TokenType.COMMA) 8360 8361 args.extend(self._parse_csv(self._parse_assignment)) 8362 8363 return self.expression( 8364 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8365 ) 8366 8367 def _identifier_expression( 8368 self, token: t.Optional[Token] = None, **kwargs: t.Any 8369 ) -> exp.Identifier: 8370 token = token or self._prev 8371 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8372 expression.update_positions(token) 8373 return expression 8374 8375 def _build_pipe_cte( 8376 self, 8377 query: exp.Query, 8378 expressions: t.List[exp.Expression], 8379 alias_cte: t.Optional[exp.TableAlias] = None, 8380 ) -> exp.Select: 8381 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8382 if alias_cte: 8383 new_cte = alias_cte 8384 else: 8385 self._pipe_cte_counter += 1 8386 new_cte = f"__tmp{self._pipe_cte_counter}" 8387 8388 with_ = query.args.get("with") 8389 ctes = with_.pop() if with_ else None 8390 8391 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8392 if ctes: 8393 new_select.set("with", ctes) 8394 8395 return new_select.with_(new_cte, as_=query, copy=False) 8396 8397 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8398 select = self._parse_select(consume_pipe=False) 8399 if not select: 8400 return query 8401 8402 return self._build_pipe_cte( 8403 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8404 ) 8405 8406 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8407 limit = self._parse_limit() 8408 offset = self._parse_offset() 8409 if limit: 8410 curr_limit = query.args.get("limit", limit) 8411 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8412 query.limit(limit, copy=False) 8413 if offset: 8414 curr_offset = query.args.get("offset") 8415 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8416 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8417 8418 return query 8419 8420 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8421 this = self._parse_assignment() 8422 if self._match_text_seq("GROUP", "AND", advance=False): 8423 return this 8424 8425 this = self._parse_alias(this) 8426 8427 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8428 return self._parse_ordered(lambda: this) 8429 8430 return this 8431 8432 def _parse_pipe_syntax_aggregate_group_order_by( 8433 self, query: exp.Select, group_by_exists: bool = True 8434 ) -> exp.Select: 8435 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8436 aggregates_or_groups, orders = [], [] 8437 for element in expr: 8438 if isinstance(element, exp.Ordered): 8439 this = element.this 8440 if isinstance(this, exp.Alias): 8441 element.set("this", this.args["alias"]) 8442 orders.append(element) 8443 else: 8444 this = element 8445 aggregates_or_groups.append(this) 8446 8447 if group_by_exists: 8448 query.select(*aggregates_or_groups, copy=False).group_by( 8449 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8450 copy=False, 8451 ) 8452 else: 8453 query.select(*aggregates_or_groups, append=False, copy=False) 8454 8455 if orders: 8456 return query.order_by(*orders, append=False, copy=False) 8457 8458 return query 8459 8460 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8461 self._match_text_seq("AGGREGATE") 8462 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8463 8464 if self._match(TokenType.GROUP_BY) or ( 8465 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8466 ): 8467 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8468 8469 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8470 8471 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8472 first_setop = self.parse_set_operation(this=query) 8473 if not first_setop: 8474 return None 8475 8476 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8477 expr = self._parse_paren() 8478 return expr.assert_is(exp.Subquery).unnest() if expr else None 8479 8480 first_setop.this.pop() 8481 8482 setops = [ 8483 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8484 *self._parse_csv(_parse_and_unwrap_query), 8485 ] 8486 8487 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8488 with_ = query.args.get("with") 8489 ctes = with_.pop() if with_ else None 8490 8491 if isinstance(first_setop, exp.Union): 8492 query = query.union(*setops, copy=False, **first_setop.args) 8493 elif isinstance(first_setop, exp.Except): 8494 query = query.except_(*setops, copy=False, **first_setop.args) 8495 else: 8496 query = query.intersect(*setops, copy=False, **first_setop.args) 8497 8498 query.set("with", ctes) 8499 8500 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8501 8502 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8503 join = self._parse_join() 8504 if not join: 8505 return None 8506 8507 if isinstance(query, exp.Select): 8508 return query.join(join, copy=False) 8509 8510 return query 8511 8512 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8513 pivots = self._parse_pivots() 8514 if not pivots: 8515 return query 8516 8517 from_ = query.args.get("from") 8518 if from_: 8519 from_.this.set("pivots", pivots) 8520 8521 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8522 8523 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8524 self._match_text_seq("EXTEND") 8525 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8526 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8527 8528 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8529 sample = self._parse_table_sample() 8530 8531 with_ = query.args.get("with") 8532 if with_: 8533 with_.expressions[-1].this.set("sample", sample) 8534 else: 8535 query.set("sample", sample) 8536 8537 return query 8538 8539 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8540 if isinstance(query, exp.Subquery): 8541 query = exp.select("*").from_(query, copy=False) 8542 8543 if not query.args.get("from"): 8544 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8545 8546 while self._match(TokenType.PIPE_GT): 8547 start = self._curr 8548 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8549 if not parser: 8550 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8551 # keywords, making it tricky to disambiguate them without lookahead. The approach 8552 # here is to try and parse a set operation and if that fails, then try to parse a 8553 # join operator. If that fails as well, then the operator is not supported. 8554 parsed_query = self._parse_pipe_syntax_set_operator(query) 8555 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8556 if not parsed_query: 8557 self._retreat(start) 8558 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8559 break 8560 query = parsed_query 8561 else: 8562 query = parser(self, query) 8563 8564 return query
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1553 def __init__( 1554 self, 1555 error_level: t.Optional[ErrorLevel] = None, 1556 error_message_context: int = 100, 1557 max_errors: int = 3, 1558 dialect: DialectType = None, 1559 ): 1560 from sqlglot.dialects import Dialect 1561 1562 self.error_level = error_level or ErrorLevel.IMMEDIATE 1563 self.error_message_context = error_message_context 1564 self.max_errors = max_errors 1565 self.dialect = Dialect.get_or_raise(dialect) 1566 self.reset()
1579 def parse( 1580 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1581 ) -> t.List[t.Optional[exp.Expression]]: 1582 """ 1583 Parses a list of tokens and returns a list of syntax trees, one tree 1584 per parsed SQL statement. 1585 1586 Args: 1587 raw_tokens: The list of tokens. 1588 sql: The original SQL string, used to produce helpful debug messages. 1589 1590 Returns: 1591 The list of the produced syntax trees. 1592 """ 1593 return self._parse( 1594 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1595 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1597 def parse_into( 1598 self, 1599 expression_types: exp.IntoType, 1600 raw_tokens: t.List[Token], 1601 sql: t.Optional[str] = None, 1602 ) -> t.List[t.Optional[exp.Expression]]: 1603 """ 1604 Parses a list of tokens into a given Expression type. If a collection of Expression 1605 types is given instead, this method will try to parse the token list into each one 1606 of them, stopping at the first for which the parsing succeeds. 1607 1608 Args: 1609 expression_types: The expression type(s) to try and parse the token list into. 1610 raw_tokens: The list of tokens. 1611 sql: The original SQL string, used to produce helpful debug messages. 1612 1613 Returns: 1614 The target Expression. 1615 """ 1616 errors = [] 1617 for expression_type in ensure_list(expression_types): 1618 parser = self.EXPRESSION_PARSERS.get(expression_type) 1619 if not parser: 1620 raise TypeError(f"No parser registered for {expression_type}") 1621 1622 try: 1623 return self._parse(parser, raw_tokens, sql) 1624 except ParseError as e: 1625 e.errors[0]["into_expression"] = expression_type 1626 errors.append(e) 1627 1628 raise ParseError( 1629 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1630 errors=merge_errors(errors), 1631 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1671 def check_errors(self) -> None: 1672 """Logs or raises any found errors, depending on the chosen error level setting.""" 1673 if self.error_level == ErrorLevel.WARN: 1674 for error in self.errors: 1675 logger.error(str(error)) 1676 elif self.error_level == ErrorLevel.RAISE and self.errors: 1677 raise ParseError( 1678 concat_messages(self.errors, self.max_errors), 1679 errors=merge_errors(self.errors), 1680 )
Logs or raises any found errors, depending on the chosen error level setting.
1682 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1683 """ 1684 Appends an error in the list of recorded errors or raises it, depending on the chosen 1685 error level setting. 1686 """ 1687 token = token or self._curr or self._prev or Token.string("") 1688 start = token.start 1689 end = token.end + 1 1690 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1691 highlight = self.sql[start:end] 1692 end_context = self.sql[end : end + self.error_message_context] 1693 1694 error = ParseError.new( 1695 f"{message}. Line {token.line}, Col: {token.col}.\n" 1696 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1697 description=message, 1698 line=token.line, 1699 col=token.col, 1700 start_context=start_context, 1701 highlight=highlight, 1702 end_context=end_context, 1703 ) 1704 1705 if self.error_level == ErrorLevel.IMMEDIATE: 1706 raise error 1707 1708 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1710 def expression( 1711 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1712 ) -> E: 1713 """ 1714 Creates a new, validated Expression. 1715 1716 Args: 1717 exp_class: The expression class to instantiate. 1718 comments: An optional list of comments to attach to the expression. 1719 kwargs: The arguments to set for the expression along with their respective values. 1720 1721 Returns: 1722 The target expression. 1723 """ 1724 instance = exp_class(**kwargs) 1725 instance.add_comments(comments) if comments else self._add_comments(instance) 1726 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1733 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1734 """ 1735 Validates an Expression, making sure that all its mandatory arguments are set. 1736 1737 Args: 1738 expression: The expression to validate. 1739 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1740 1741 Returns: 1742 The validated expression. 1743 """ 1744 if self.error_level != ErrorLevel.IGNORE: 1745 for error_message in expression.error_messages(args): 1746 self.raise_error(error_message) 1747 1748 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4770 def parse_set_operation( 4771 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4772 ) -> t.Optional[exp.Expression]: 4773 start = self._index 4774 _, side_token, kind_token = self._parse_join_parts() 4775 4776 side = side_token.text if side_token else None 4777 kind = kind_token.text if kind_token else None 4778 4779 if not self._match_set(self.SET_OPERATIONS): 4780 self._retreat(start) 4781 return None 4782 4783 token_type = self._prev.token_type 4784 4785 if token_type == TokenType.UNION: 4786 operation: t.Type[exp.SetOperation] = exp.Union 4787 elif token_type == TokenType.EXCEPT: 4788 operation = exp.Except 4789 else: 4790 operation = exp.Intersect 4791 4792 comments = self._prev.comments 4793 4794 if self._match(TokenType.DISTINCT): 4795 distinct: t.Optional[bool] = True 4796 elif self._match(TokenType.ALL): 4797 distinct = False 4798 else: 4799 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4800 if distinct is None: 4801 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4802 4803 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4804 "STRICT", "CORRESPONDING" 4805 ) 4806 if self._match_text_seq("CORRESPONDING"): 4807 by_name = True 4808 if not side and not kind: 4809 kind = "INNER" 4810 4811 on_column_list = None 4812 if by_name and self._match_texts(("ON", "BY")): 4813 on_column_list = self._parse_wrapped_csv(self._parse_column) 4814 4815 expression = self._parse_select( 4816 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4817 ) 4818 4819 return self.expression( 4820 operation, 4821 comments=comments, 4822 this=this, 4823 distinct=distinct, 4824 by_name=by_name, 4825 expression=expression, 4826 side=side, 4827 kind=kind, 4828 on=on_column_list, 4829 )