sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_timestamp_sql, 20 regexp_extract_sql, 21 rename_func, 22 right_to_substring_sql, 23 sha256_sql, 24 strposition_sql, 25 struct_extract_sql, 26 timestamptrunc_sql, 27 timestrtotime_sql, 28 ts_or_ds_add_cast, 29 unit_to_str, 30 sequence_sql, 31 build_regexp_extract, 32 explode_to_unnest_sql, 33) 34from sqlglot.dialects.hive import Hive 35from sqlglot.dialects.mysql import MySQL 36from sqlglot.helper import apply_index_offset, seq_get 37from sqlglot.optimizer.scope import find_all_in_scope 38from sqlglot.tokens import TokenType 39from sqlglot.transforms import unqualify_columns 40from sqlglot.generator import unsupported_args 41 42DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 43 44 45def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 46 regex = r"(\w)(\w*)" 47 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 48 49 50def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 51 if expression.args.get("asc") == exp.false(): 52 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 53 else: 54 comparator = None 55 return self.func("ARRAY_SORT", expression.this, comparator) 56 57 58def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 59 if isinstance(expression.parent, exp.PartitionedByProperty): 60 # Any columns in the ARRAY[] string literals should not be quoted 61 expression.transform(lambda n: n.name if isinstance(n, exp.Identifier) else n, copy=False) 62 63 partition_exprs = [ 64 self.sql(c) if isinstance(c, (exp.Func, exp.Property)) else self.sql(c, "this") 65 for c in expression.expressions 66 ] 67 return self.sql(exp.Array(expressions=[exp.Literal.string(c) for c in partition_exprs])) 68 69 if expression.parent: 70 for schema in expression.parent.find_all(exp.Schema): 71 if schema is expression: 72 continue 73 74 column_defs = schema.find_all(exp.ColumnDef) 75 if column_defs and isinstance(schema.parent, exp.Property): 76 expression.expressions.extend(column_defs) 77 78 return self.schema_sql(expression) 79 80 81def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 82 self.unsupported("Presto does not support exact quantiles") 83 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 84 85 86def _str_to_time_sql( 87 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 88) -> str: 89 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 90 91 92def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 93 time_format = self.format_time(expression) 94 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 95 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 96 return self.sql( 97 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 98 ) 99 100 101def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 102 expression = ts_or_ds_add_cast(expression) 103 unit = unit_to_str(expression) 104 return self.func("DATE_ADD", unit, expression.expression, expression.this) 105 106 107def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 108 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 109 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 110 unit = unit_to_str(expression) 111 return self.func("DATE_DIFF", unit, expr, this) 112 113 114def _build_approx_percentile(args: t.List) -> exp.Expression: 115 if len(args) == 4: 116 return exp.ApproxQuantile( 117 this=seq_get(args, 0), 118 weight=seq_get(args, 1), 119 quantile=seq_get(args, 2), 120 accuracy=seq_get(args, 3), 121 ) 122 if len(args) == 3: 123 return exp.ApproxQuantile( 124 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 125 ) 126 return exp.ApproxQuantile.from_arg_list(args) 127 128 129def _build_from_unixtime(args: t.List) -> exp.Expression: 130 if len(args) == 3: 131 return exp.UnixToTime( 132 this=seq_get(args, 0), 133 hours=seq_get(args, 1), 134 minutes=seq_get(args, 2), 135 ) 136 if len(args) == 2: 137 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 138 139 return exp.UnixToTime.from_arg_list(args) 140 141 142def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 143 """ 144 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 145 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 146 they're converted into an ARBITRARY call. 147 148 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 149 """ 150 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 151 return self.function_fallback_sql(expression) 152 153 return rename_func("ARBITRARY")(self, expression) 154 155 156def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 157 scale = expression.args.get("scale") 158 timestamp = self.sql(expression, "this") 159 if scale in (None, exp.UnixToTime.SECONDS): 160 return rename_func("FROM_UNIXTIME")(self, expression) 161 162 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 163 164 165def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 166 if not expression.type: 167 from sqlglot.optimizer.annotate_types import annotate_types 168 169 annotate_types(expression, dialect=self.dialect) 170 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 171 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 172 return expression 173 174 175def _build_to_char(args: t.List) -> exp.TimeToStr: 176 fmt = seq_get(args, 1) 177 if isinstance(fmt, exp.Literal): 178 # We uppercase this to match Teradata's format mapping keys 179 fmt.set("this", fmt.this.upper()) 180 181 # We use "teradata" on purpose here, because the time formats are different in Presto. 182 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 183 return build_formatted_time(exp.TimeToStr, "teradata")(args) 184 185 186def _date_delta_sql( 187 name: str, negate_interval: bool = False 188) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 189 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 190 interval = _to_int(self, expression.expression) 191 return self.func( 192 name, 193 unit_to_str(expression), 194 interval * (-1) if negate_interval else interval, 195 expression.this, 196 ) 197 198 return _delta_sql 199 200 201def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 202 explode = expression.this 203 if isinstance(explode, exp.Explode): 204 exploded_type = explode.this.type 205 alias = expression.args.get("alias") 206 207 # This attempts a best-effort transpilation of LATERAL VIEW EXPLODE on a struct array 208 if ( 209 isinstance(alias, exp.TableAlias) 210 and isinstance(exploded_type, exp.DataType) 211 and exploded_type.is_type(exp.DataType.Type.ARRAY) 212 and exploded_type.expressions 213 and exploded_type.expressions[0].is_type(exp.DataType.Type.STRUCT) 214 ): 215 # When unnesting a ROW in Presto, it produces N columns, so we need to fix the alias 216 alias.set("columns", [c.this.copy() for c in exploded_type.expressions[0].expressions]) 217 elif isinstance(explode, exp.Inline): 218 explode.replace(exp.Explode(this=explode.this.copy())) 219 220 return explode_to_unnest_sql(self, expression) 221 222 223def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 224 # We check for expression.type because the columns can be amended only if types were inferred 225 if isinstance(expression, exp.Select) and expression.type: 226 for lateral in expression.args.get("laterals") or []: 227 alias = lateral.args.get("alias") 228 if ( 229 not isinstance(lateral.this, exp.Explode) 230 or not isinstance(alias, exp.TableAlias) 231 or len(alias.columns) != 1 232 ): 233 continue 234 235 new_table = alias.this 236 old_table = alias.columns[0].name.lower() 237 238 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 239 # with the struct column, resulting in invalid Presto references that need to be amended 240 for column in find_all_in_scope(expression, exp.Column): 241 if column.db.lower() == old_table: 242 column.set("table", column.args["db"].pop()) 243 elif column.table.lower() == old_table: 244 column.set("table", new_table.copy()) 245 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 246 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 247 248 return expression 249 250 251class Presto(Dialect): 252 INDEX_OFFSET = 1 253 NULL_ORDERING = "nulls_are_last" 254 TIME_FORMAT = MySQL.TIME_FORMAT 255 STRICT_STRING_CONCAT = True 256 SUPPORTS_SEMI_ANTI_JOIN = False 257 TYPED_DIVISION = True 258 TABLESAMPLE_SIZE_IS_PERCENT = True 259 LOG_BASE_FIRST: t.Optional[bool] = None 260 SUPPORTS_VALUES_DEFAULT = False 261 262 TIME_MAPPING = MySQL.TIME_MAPPING 263 264 # https://github.com/trinodb/trino/issues/17 265 # https://github.com/trinodb/trino/issues/12289 266 # https://github.com/prestodb/presto/issues/2863 267 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 268 269 # The result of certain math functions in Presto/Trino is of type 270 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 271 ANNOTATORS = { 272 **Dialect.ANNOTATORS, 273 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 274 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 275 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 276 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 277 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 278 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 279 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 280 if e.this 281 else self._set_type(e, exp.DataType.Type.DOUBLE), 282 } 283 284 SUPPORTED_SETTINGS = { 285 *Dialect.SUPPORTED_SETTINGS, 286 "variant_extract_is_json_extract", 287 } 288 289 class Tokenizer(tokens.Tokenizer): 290 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 291 UNICODE_STRINGS = [ 292 (prefix + q, q) 293 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 294 for prefix in ("U&", "u&") 295 ] 296 297 KEYWORDS = { 298 **tokens.Tokenizer.KEYWORDS, 299 "DEALLOCATE PREPARE": TokenType.COMMAND, 300 "DESCRIBE INPUT": TokenType.COMMAND, 301 "DESCRIBE OUTPUT": TokenType.COMMAND, 302 "RESET SESSION": TokenType.COMMAND, 303 "START": TokenType.BEGIN, 304 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 305 "ROW": TokenType.STRUCT, 306 "IPADDRESS": TokenType.IPADDRESS, 307 "IPPREFIX": TokenType.IPPREFIX, 308 "TDIGEST": TokenType.TDIGEST, 309 "HYPERLOGLOG": TokenType.HLLSKETCH, 310 } 311 KEYWORDS.pop("/*+") 312 KEYWORDS.pop("QUALIFY") 313 314 class Parser(parser.Parser): 315 VALUES_FOLLOWED_BY_PAREN = False 316 317 FUNCTIONS = { 318 **parser.Parser.FUNCTIONS, 319 "ARBITRARY": exp.AnyValue.from_arg_list, 320 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 321 "APPROX_PERCENTILE": _build_approx_percentile, 322 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 323 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 324 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 325 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 326 "CARDINALITY": exp.ArraySize.from_arg_list, 327 "CONTAINS": exp.ArrayContains.from_arg_list, 328 "DATE_ADD": lambda args: exp.DateAdd( 329 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 330 ), 331 "DATE_DIFF": lambda args: exp.DateDiff( 332 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 333 ), 334 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 335 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 336 "DATE_TRUNC": date_trunc_to_time, 337 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 338 "DOW": exp.DayOfWeekIso.from_arg_list, 339 "DOY": exp.DayOfYear.from_arg_list, 340 "ELEMENT_AT": lambda args: exp.Bracket( 341 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 342 ), 343 "FROM_HEX": exp.Unhex.from_arg_list, 344 "FROM_UNIXTIME": _build_from_unixtime, 345 "FROM_UTF8": lambda args: exp.Decode( 346 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 347 ), 348 "JSON_FORMAT": lambda args: exp.JSONFormat( 349 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 350 ), 351 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 352 "NOW": exp.CurrentTimestamp.from_arg_list, 353 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 354 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 355 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 356 this=seq_get(args, 0), 357 expression=seq_get(args, 1), 358 replacement=seq_get(args, 2) or exp.Literal.string(""), 359 ), 360 "ROW": exp.Struct.from_arg_list, 361 "SEQUENCE": exp.GenerateSeries.from_arg_list, 362 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 363 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 364 "STRPOS": lambda args: exp.StrPosition( 365 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 366 ), 367 "TO_CHAR": _build_to_char, 368 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 369 "TO_UTF8": lambda args: exp.Encode( 370 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 371 ), 372 "MD5": exp.MD5Digest.from_arg_list, 373 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 374 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 375 } 376 377 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 378 FUNCTION_PARSERS.pop("TRIM") 379 380 class Generator(generator.Generator): 381 INTERVAL_ALLOWS_PLURAL_FORM = False 382 JOIN_HINTS = False 383 TABLE_HINTS = False 384 QUERY_HINTS = False 385 IS_BOOL_ALLOWED = False 386 TZ_TO_WITH_TIME_ZONE = True 387 NVL2_SUPPORTED = False 388 STRUCT_DELIMITER = ("(", ")") 389 LIMIT_ONLY_LITERALS = True 390 SUPPORTS_SINGLE_ARG_CONCAT = False 391 LIKE_PROPERTY_INSIDE_SCHEMA = True 392 MULTI_ARG_DISTINCT = False 393 SUPPORTS_TO_NUMBER = False 394 HEX_FUNC = "TO_HEX" 395 PARSE_JSON_NAME = "JSON_PARSE" 396 PAD_FILL_PATTERN_IS_REQUIRED = True 397 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 398 SUPPORTS_MEDIAN = False 399 ARRAY_SIZE_NAME = "CARDINALITY" 400 401 PROPERTIES_LOCATION = { 402 **generator.Generator.PROPERTIES_LOCATION, 403 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 404 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 405 } 406 407 TYPE_MAPPING = { 408 **generator.Generator.TYPE_MAPPING, 409 exp.DataType.Type.BINARY: "VARBINARY", 410 exp.DataType.Type.BIT: "BOOLEAN", 411 exp.DataType.Type.DATETIME: "TIMESTAMP", 412 exp.DataType.Type.DATETIME64: "TIMESTAMP", 413 exp.DataType.Type.FLOAT: "REAL", 414 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 415 exp.DataType.Type.INT: "INTEGER", 416 exp.DataType.Type.STRUCT: "ROW", 417 exp.DataType.Type.TEXT: "VARCHAR", 418 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 419 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 420 exp.DataType.Type.TIMETZ: "TIME", 421 } 422 423 TRANSFORMS = { 424 **generator.Generator.TRANSFORMS, 425 exp.AnyValue: rename_func("ARBITRARY"), 426 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 427 exp.ArgMax: rename_func("MAX_BY"), 428 exp.ArgMin: rename_func("MIN_BY"), 429 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 430 exp.ArrayAny: rename_func("ANY_MATCH"), 431 exp.ArrayConcat: rename_func("CONCAT"), 432 exp.ArrayContains: rename_func("CONTAINS"), 433 exp.ArrayToString: rename_func("ARRAY_JOIN"), 434 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 435 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 436 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 437 exp.BitwiseLeftShift: lambda self, e: self.func( 438 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 439 ), 440 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 441 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 442 exp.BitwiseRightShift: lambda self, e: self.func( 443 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 444 ), 445 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 446 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 447 exp.CurrentTime: lambda *_: "CURRENT_TIME", 448 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 449 exp.CurrentUser: lambda *_: "CURRENT_USER", 450 exp.DateAdd: _date_delta_sql("DATE_ADD"), 451 exp.DateDiff: lambda self, e: self.func( 452 "DATE_DIFF", unit_to_str(e), e.expression, e.this 453 ), 454 exp.DateStrToDate: datestrtodate_sql, 455 exp.DateToDi: lambda self, 456 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 457 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 458 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 459 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 460 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 461 exp.DiToDate: lambda self, 462 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 463 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 464 exp.FileFormatProperty: lambda self, 465 e: f"format={self.sql(exp.Literal.string(e.name))}", 466 exp.First: _first_last_sql, 467 exp.FromTimeZone: lambda self, 468 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 469 exp.GenerateSeries: sequence_sql, 470 exp.GenerateDateArray: sequence_sql, 471 exp.Group: transforms.preprocess([transforms.unalias_group]), 472 exp.If: if_sql(), 473 exp.ILike: no_ilike_sql, 474 exp.Initcap: _initcap_sql, 475 exp.Last: _first_last_sql, 476 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 477 exp.Lateral: _explode_to_unnest_sql, 478 exp.Left: left_to_substring_sql, 479 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 480 rename_func("LEVENSHTEIN_DISTANCE") 481 ), 482 exp.LogicalAnd: rename_func("BOOL_AND"), 483 exp.LogicalOr: rename_func("BOOL_OR"), 484 exp.Pivot: no_pivot_sql, 485 exp.Quantile: _quantile_sql, 486 exp.RegexpExtract: regexp_extract_sql, 487 exp.RegexpExtractAll: regexp_extract_sql, 488 exp.Right: right_to_substring_sql, 489 exp.Schema: _schema_sql, 490 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 491 exp.Select: transforms.preprocess( 492 [ 493 transforms.eliminate_window_clause, 494 transforms.eliminate_qualify, 495 transforms.eliminate_distinct_on, 496 transforms.explode_projection_to_unnest(1), 497 transforms.eliminate_semi_and_anti_joins, 498 amend_exploded_column_table, 499 ] 500 ), 501 exp.SortArray: _no_sort_array, 502 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 503 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 504 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 505 exp.StrToTime: _str_to_time_sql, 506 exp.StructExtract: struct_extract_sql, 507 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 508 exp.Timestamp: no_timestamp_sql, 509 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 510 exp.TimestampTrunc: timestamptrunc_sql(), 511 exp.TimeStrToDate: timestrtotime_sql, 512 exp.TimeStrToTime: timestrtotime_sql, 513 exp.TimeStrToUnix: lambda self, e: self.func( 514 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 515 ), 516 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 517 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 518 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 519 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 520 exp.TsOrDiToDi: lambda self, 521 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 522 exp.TsOrDsAdd: _ts_or_ds_add_sql, 523 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 524 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 525 exp.Unhex: rename_func("FROM_HEX"), 526 exp.UnixToStr: lambda self, 527 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 528 exp.UnixToTime: _unix_to_time_sql, 529 exp.UnixToTimeStr: lambda self, 530 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 531 exp.VariancePop: rename_func("VAR_POP"), 532 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 533 exp.WithinGroup: transforms.preprocess( 534 [transforms.remove_within_group_for_percentiles] 535 ), 536 exp.Xor: bool_xor_sql, 537 exp.MD5Digest: rename_func("MD5"), 538 exp.SHA: rename_func("SHA1"), 539 exp.SHA2: sha256_sql, 540 } 541 542 RESERVED_KEYWORDS = { 543 "alter", 544 "and", 545 "as", 546 "between", 547 "by", 548 "case", 549 "cast", 550 "constraint", 551 "create", 552 "cross", 553 "current_time", 554 "current_timestamp", 555 "deallocate", 556 "delete", 557 "describe", 558 "distinct", 559 "drop", 560 "else", 561 "end", 562 "escape", 563 "except", 564 "execute", 565 "exists", 566 "extract", 567 "false", 568 "for", 569 "from", 570 "full", 571 "group", 572 "having", 573 "in", 574 "inner", 575 "insert", 576 "intersect", 577 "into", 578 "is", 579 "join", 580 "left", 581 "like", 582 "natural", 583 "not", 584 "null", 585 "on", 586 "or", 587 "order", 588 "outer", 589 "prepare", 590 "right", 591 "select", 592 "table", 593 "then", 594 "true", 595 "union", 596 "using", 597 "values", 598 "when", 599 "where", 600 "with", 601 } 602 603 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 604 this = expression.this 605 is_json = expression.args.get("is_json") 606 607 if this and not (is_json or this.type): 608 from sqlglot.optimizer.annotate_types import annotate_types 609 610 this = annotate_types(this, dialect=self.dialect) 611 612 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 613 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 614 615 return self.function_fallback_sql(expression) 616 617 def md5_sql(self, expression: exp.MD5) -> str: 618 this = expression.this 619 620 if not this.type: 621 from sqlglot.optimizer.annotate_types import annotate_types 622 623 this = annotate_types(this, dialect=self.dialect) 624 625 if this.is_type(*exp.DataType.TEXT_TYPES): 626 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 627 628 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 629 630 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 631 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 632 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 633 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 634 # which seems to be using the same time mapping as Hive, as per: 635 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 636 this = expression.this 637 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 638 value_as_timestamp = ( 639 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 640 ) 641 642 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 643 644 formatted_value = self.func( 645 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 646 ) 647 parse_with_tz = self.func( 648 "PARSE_DATETIME", 649 formatted_value, 650 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 651 ) 652 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 653 return self.func("TO_UNIXTIME", coalesced) 654 655 def bracket_sql(self, expression: exp.Bracket) -> str: 656 if expression.args.get("safe"): 657 return self.func( 658 "ELEMENT_AT", 659 expression.this, 660 seq_get( 661 apply_index_offset( 662 expression.this, 663 expression.expressions, 664 1 - expression.args.get("offset", 0), 665 dialect=self.dialect, 666 ), 667 0, 668 ), 669 ) 670 return super().bracket_sql(expression) 671 672 def struct_sql(self, expression: exp.Struct) -> str: 673 from sqlglot.optimizer.annotate_types import annotate_types 674 675 expression = annotate_types(expression, dialect=self.dialect) 676 values: t.List[str] = [] 677 schema: t.List[str] = [] 678 unknown_type = False 679 680 for e in expression.expressions: 681 if isinstance(e, exp.PropertyEQ): 682 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 683 unknown_type = True 684 else: 685 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 686 values.append(self.sql(e, "expression")) 687 else: 688 values.append(self.sql(e)) 689 690 size = len(expression.expressions) 691 692 if not size or len(schema) != size: 693 if unknown_type: 694 self.unsupported( 695 "Cannot convert untyped key-value definitions (try annotate_types)." 696 ) 697 return self.func("ROW", *values) 698 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 699 700 def interval_sql(self, expression: exp.Interval) -> str: 701 if expression.this and expression.text("unit").upper().startswith("WEEK"): 702 return f"({expression.this.name} * INTERVAL '7' DAY)" 703 return super().interval_sql(expression) 704 705 def transaction_sql(self, expression: exp.Transaction) -> str: 706 modes = expression.args.get("modes") 707 modes = f" {', '.join(modes)}" if modes else "" 708 return f"START TRANSACTION{modes}" 709 710 def offset_limit_modifiers( 711 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 712 ) -> t.List[str]: 713 return [ 714 self.sql(expression, "offset"), 715 self.sql(limit), 716 ] 717 718 def create_sql(self, expression: exp.Create) -> str: 719 """ 720 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 721 so we need to remove them 722 """ 723 kind = expression.args["kind"] 724 schema = expression.this 725 if kind == "VIEW" and schema.expressions: 726 expression.this.set("expressions", None) 727 return super().create_sql(expression) 728 729 def delete_sql(self, expression: exp.Delete) -> str: 730 """ 731 Presto only supports DELETE FROM for a single table without an alias, so we need 732 to remove the unnecessary parts. If the original DELETE statement contains more 733 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 734 """ 735 tables = expression.args.get("tables") or [expression.this] 736 if len(tables) > 1: 737 return super().delete_sql(expression) 738 739 table = tables[0] 740 expression.set("this", table) 741 expression.set("tables", None) 742 743 if isinstance(table, exp.Table): 744 table_alias = table.args.get("alias") 745 if table_alias: 746 table_alias.pop() 747 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 748 749 return super().delete_sql(expression) 750 751 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 752 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 753 754 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 755 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 756 if not expression.args.get("variant_extract") or is_json_extract: 757 return self.func( 758 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 759 ) 760 761 this = self.sql(expression, "this") 762 763 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 764 segments = [] 765 for path_key in expression.expression.expressions[1:]: 766 if not isinstance(path_key, exp.JSONPathKey): 767 # Cannot transpile subscripts, wildcards etc to dot notation 768 self.unsupported( 769 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 770 ) 771 continue 772 key = path_key.this 773 if not exp.SAFE_IDENTIFIER_RE.match(key): 774 key = f'"{key}"' 775 segments.append(f".{key}") 776 777 expr = "".join(segments) 778 779 return f"{this}{expr}" 780 781 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 782 return self.func( 783 "ARRAY_JOIN", 784 self.func("ARRAY_AGG", expression.this), 785 expression.args.get("separator"), 786 )
224def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 225 # We check for expression.type because the columns can be amended only if types were inferred 226 if isinstance(expression, exp.Select) and expression.type: 227 for lateral in expression.args.get("laterals") or []: 228 alias = lateral.args.get("alias") 229 if ( 230 not isinstance(lateral.this, exp.Explode) 231 or not isinstance(alias, exp.TableAlias) 232 or len(alias.columns) != 1 233 ): 234 continue 235 236 new_table = alias.this 237 old_table = alias.columns[0].name.lower() 238 239 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 240 # with the struct column, resulting in invalid Presto references that need to be amended 241 for column in find_all_in_scope(expression, exp.Column): 242 if column.db.lower() == old_table: 243 column.set("table", column.args["db"].pop()) 244 elif column.table.lower() == old_table: 245 column.set("table", new_table.copy()) 246 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 247 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 248 249 return expression
252class Presto(Dialect): 253 INDEX_OFFSET = 1 254 NULL_ORDERING = "nulls_are_last" 255 TIME_FORMAT = MySQL.TIME_FORMAT 256 STRICT_STRING_CONCAT = True 257 SUPPORTS_SEMI_ANTI_JOIN = False 258 TYPED_DIVISION = True 259 TABLESAMPLE_SIZE_IS_PERCENT = True 260 LOG_BASE_FIRST: t.Optional[bool] = None 261 SUPPORTS_VALUES_DEFAULT = False 262 263 TIME_MAPPING = MySQL.TIME_MAPPING 264 265 # https://github.com/trinodb/trino/issues/17 266 # https://github.com/trinodb/trino/issues/12289 267 # https://github.com/prestodb/presto/issues/2863 268 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 269 270 # The result of certain math functions in Presto/Trino is of type 271 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 272 ANNOTATORS = { 273 **Dialect.ANNOTATORS, 274 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 275 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 276 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 277 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 278 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 279 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 280 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 281 if e.this 282 else self._set_type(e, exp.DataType.Type.DOUBLE), 283 } 284 285 SUPPORTED_SETTINGS = { 286 *Dialect.SUPPORTED_SETTINGS, 287 "variant_extract_is_json_extract", 288 } 289 290 class Tokenizer(tokens.Tokenizer): 291 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 292 UNICODE_STRINGS = [ 293 (prefix + q, q) 294 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 295 for prefix in ("U&", "u&") 296 ] 297 298 KEYWORDS = { 299 **tokens.Tokenizer.KEYWORDS, 300 "DEALLOCATE PREPARE": TokenType.COMMAND, 301 "DESCRIBE INPUT": TokenType.COMMAND, 302 "DESCRIBE OUTPUT": TokenType.COMMAND, 303 "RESET SESSION": TokenType.COMMAND, 304 "START": TokenType.BEGIN, 305 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 306 "ROW": TokenType.STRUCT, 307 "IPADDRESS": TokenType.IPADDRESS, 308 "IPPREFIX": TokenType.IPPREFIX, 309 "TDIGEST": TokenType.TDIGEST, 310 "HYPERLOGLOG": TokenType.HLLSKETCH, 311 } 312 KEYWORDS.pop("/*+") 313 KEYWORDS.pop("QUALIFY") 314 315 class Parser(parser.Parser): 316 VALUES_FOLLOWED_BY_PAREN = False 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARBITRARY": exp.AnyValue.from_arg_list, 321 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 322 "APPROX_PERCENTILE": _build_approx_percentile, 323 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 324 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 325 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 326 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 327 "CARDINALITY": exp.ArraySize.from_arg_list, 328 "CONTAINS": exp.ArrayContains.from_arg_list, 329 "DATE_ADD": lambda args: exp.DateAdd( 330 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 331 ), 332 "DATE_DIFF": lambda args: exp.DateDiff( 333 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 334 ), 335 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 336 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 337 "DATE_TRUNC": date_trunc_to_time, 338 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 339 "DOW": exp.DayOfWeekIso.from_arg_list, 340 "DOY": exp.DayOfYear.from_arg_list, 341 "ELEMENT_AT": lambda args: exp.Bracket( 342 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 343 ), 344 "FROM_HEX": exp.Unhex.from_arg_list, 345 "FROM_UNIXTIME": _build_from_unixtime, 346 "FROM_UTF8": lambda args: exp.Decode( 347 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 348 ), 349 "JSON_FORMAT": lambda args: exp.JSONFormat( 350 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 351 ), 352 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 353 "NOW": exp.CurrentTimestamp.from_arg_list, 354 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 355 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 356 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1), 359 replacement=seq_get(args, 2) or exp.Literal.string(""), 360 ), 361 "ROW": exp.Struct.from_arg_list, 362 "SEQUENCE": exp.GenerateSeries.from_arg_list, 363 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 364 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 365 "STRPOS": lambda args: exp.StrPosition( 366 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 367 ), 368 "TO_CHAR": _build_to_char, 369 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 370 "TO_UTF8": lambda args: exp.Encode( 371 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 372 ), 373 "MD5": exp.MD5Digest.from_arg_list, 374 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 375 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 376 } 377 378 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 379 FUNCTION_PARSERS.pop("TRIM") 380 381 class Generator(generator.Generator): 382 INTERVAL_ALLOWS_PLURAL_FORM = False 383 JOIN_HINTS = False 384 TABLE_HINTS = False 385 QUERY_HINTS = False 386 IS_BOOL_ALLOWED = False 387 TZ_TO_WITH_TIME_ZONE = True 388 NVL2_SUPPORTED = False 389 STRUCT_DELIMITER = ("(", ")") 390 LIMIT_ONLY_LITERALS = True 391 SUPPORTS_SINGLE_ARG_CONCAT = False 392 LIKE_PROPERTY_INSIDE_SCHEMA = True 393 MULTI_ARG_DISTINCT = False 394 SUPPORTS_TO_NUMBER = False 395 HEX_FUNC = "TO_HEX" 396 PARSE_JSON_NAME = "JSON_PARSE" 397 PAD_FILL_PATTERN_IS_REQUIRED = True 398 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 399 SUPPORTS_MEDIAN = False 400 ARRAY_SIZE_NAME = "CARDINALITY" 401 402 PROPERTIES_LOCATION = { 403 **generator.Generator.PROPERTIES_LOCATION, 404 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 405 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 406 } 407 408 TYPE_MAPPING = { 409 **generator.Generator.TYPE_MAPPING, 410 exp.DataType.Type.BINARY: "VARBINARY", 411 exp.DataType.Type.BIT: "BOOLEAN", 412 exp.DataType.Type.DATETIME: "TIMESTAMP", 413 exp.DataType.Type.DATETIME64: "TIMESTAMP", 414 exp.DataType.Type.FLOAT: "REAL", 415 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 416 exp.DataType.Type.INT: "INTEGER", 417 exp.DataType.Type.STRUCT: "ROW", 418 exp.DataType.Type.TEXT: "VARCHAR", 419 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 420 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 421 exp.DataType.Type.TIMETZ: "TIME", 422 } 423 424 TRANSFORMS = { 425 **generator.Generator.TRANSFORMS, 426 exp.AnyValue: rename_func("ARBITRARY"), 427 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 428 exp.ArgMax: rename_func("MAX_BY"), 429 exp.ArgMin: rename_func("MIN_BY"), 430 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 431 exp.ArrayAny: rename_func("ANY_MATCH"), 432 exp.ArrayConcat: rename_func("CONCAT"), 433 exp.ArrayContains: rename_func("CONTAINS"), 434 exp.ArrayToString: rename_func("ARRAY_JOIN"), 435 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 436 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 437 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 438 exp.BitwiseLeftShift: lambda self, e: self.func( 439 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 440 ), 441 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 442 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 443 exp.BitwiseRightShift: lambda self, e: self.func( 444 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 445 ), 446 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 447 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 448 exp.CurrentTime: lambda *_: "CURRENT_TIME", 449 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 450 exp.CurrentUser: lambda *_: "CURRENT_USER", 451 exp.DateAdd: _date_delta_sql("DATE_ADD"), 452 exp.DateDiff: lambda self, e: self.func( 453 "DATE_DIFF", unit_to_str(e), e.expression, e.this 454 ), 455 exp.DateStrToDate: datestrtodate_sql, 456 exp.DateToDi: lambda self, 457 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 458 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 459 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 460 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 461 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 462 exp.DiToDate: lambda self, 463 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 464 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 465 exp.FileFormatProperty: lambda self, 466 e: f"format={self.sql(exp.Literal.string(e.name))}", 467 exp.First: _first_last_sql, 468 exp.FromTimeZone: lambda self, 469 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 470 exp.GenerateSeries: sequence_sql, 471 exp.GenerateDateArray: sequence_sql, 472 exp.Group: transforms.preprocess([transforms.unalias_group]), 473 exp.If: if_sql(), 474 exp.ILike: no_ilike_sql, 475 exp.Initcap: _initcap_sql, 476 exp.Last: _first_last_sql, 477 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 478 exp.Lateral: _explode_to_unnest_sql, 479 exp.Left: left_to_substring_sql, 480 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 481 rename_func("LEVENSHTEIN_DISTANCE") 482 ), 483 exp.LogicalAnd: rename_func("BOOL_AND"), 484 exp.LogicalOr: rename_func("BOOL_OR"), 485 exp.Pivot: no_pivot_sql, 486 exp.Quantile: _quantile_sql, 487 exp.RegexpExtract: regexp_extract_sql, 488 exp.RegexpExtractAll: regexp_extract_sql, 489 exp.Right: right_to_substring_sql, 490 exp.Schema: _schema_sql, 491 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 492 exp.Select: transforms.preprocess( 493 [ 494 transforms.eliminate_window_clause, 495 transforms.eliminate_qualify, 496 transforms.eliminate_distinct_on, 497 transforms.explode_projection_to_unnest(1), 498 transforms.eliminate_semi_and_anti_joins, 499 amend_exploded_column_table, 500 ] 501 ), 502 exp.SortArray: _no_sort_array, 503 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 504 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 505 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 506 exp.StrToTime: _str_to_time_sql, 507 exp.StructExtract: struct_extract_sql, 508 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 509 exp.Timestamp: no_timestamp_sql, 510 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 511 exp.TimestampTrunc: timestamptrunc_sql(), 512 exp.TimeStrToDate: timestrtotime_sql, 513 exp.TimeStrToTime: timestrtotime_sql, 514 exp.TimeStrToUnix: lambda self, e: self.func( 515 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 516 ), 517 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 518 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 519 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 520 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 521 exp.TsOrDiToDi: lambda self, 522 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 523 exp.TsOrDsAdd: _ts_or_ds_add_sql, 524 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 525 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 526 exp.Unhex: rename_func("FROM_HEX"), 527 exp.UnixToStr: lambda self, 528 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 529 exp.UnixToTime: _unix_to_time_sql, 530 exp.UnixToTimeStr: lambda self, 531 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 532 exp.VariancePop: rename_func("VAR_POP"), 533 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 534 exp.WithinGroup: transforms.preprocess( 535 [transforms.remove_within_group_for_percentiles] 536 ), 537 exp.Xor: bool_xor_sql, 538 exp.MD5Digest: rename_func("MD5"), 539 exp.SHA: rename_func("SHA1"), 540 exp.SHA2: sha256_sql, 541 } 542 543 RESERVED_KEYWORDS = { 544 "alter", 545 "and", 546 "as", 547 "between", 548 "by", 549 "case", 550 "cast", 551 "constraint", 552 "create", 553 "cross", 554 "current_time", 555 "current_timestamp", 556 "deallocate", 557 "delete", 558 "describe", 559 "distinct", 560 "drop", 561 "else", 562 "end", 563 "escape", 564 "except", 565 "execute", 566 "exists", 567 "extract", 568 "false", 569 "for", 570 "from", 571 "full", 572 "group", 573 "having", 574 "in", 575 "inner", 576 "insert", 577 "intersect", 578 "into", 579 "is", 580 "join", 581 "left", 582 "like", 583 "natural", 584 "not", 585 "null", 586 "on", 587 "or", 588 "order", 589 "outer", 590 "prepare", 591 "right", 592 "select", 593 "table", 594 "then", 595 "true", 596 "union", 597 "using", 598 "values", 599 "when", 600 "where", 601 "with", 602 } 603 604 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 605 this = expression.this 606 is_json = expression.args.get("is_json") 607 608 if this and not (is_json or this.type): 609 from sqlglot.optimizer.annotate_types import annotate_types 610 611 this = annotate_types(this, dialect=self.dialect) 612 613 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 614 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 615 616 return self.function_fallback_sql(expression) 617 618 def md5_sql(self, expression: exp.MD5) -> str: 619 this = expression.this 620 621 if not this.type: 622 from sqlglot.optimizer.annotate_types import annotate_types 623 624 this = annotate_types(this, dialect=self.dialect) 625 626 if this.is_type(*exp.DataType.TEXT_TYPES): 627 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 628 629 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 630 631 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 632 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 633 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 634 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 635 # which seems to be using the same time mapping as Hive, as per: 636 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 637 this = expression.this 638 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 639 value_as_timestamp = ( 640 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 641 ) 642 643 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 644 645 formatted_value = self.func( 646 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 647 ) 648 parse_with_tz = self.func( 649 "PARSE_DATETIME", 650 formatted_value, 651 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 652 ) 653 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 654 return self.func("TO_UNIXTIME", coalesced) 655 656 def bracket_sql(self, expression: exp.Bracket) -> str: 657 if expression.args.get("safe"): 658 return self.func( 659 "ELEMENT_AT", 660 expression.this, 661 seq_get( 662 apply_index_offset( 663 expression.this, 664 expression.expressions, 665 1 - expression.args.get("offset", 0), 666 dialect=self.dialect, 667 ), 668 0, 669 ), 670 ) 671 return super().bracket_sql(expression) 672 673 def struct_sql(self, expression: exp.Struct) -> str: 674 from sqlglot.optimizer.annotate_types import annotate_types 675 676 expression = annotate_types(expression, dialect=self.dialect) 677 values: t.List[str] = [] 678 schema: t.List[str] = [] 679 unknown_type = False 680 681 for e in expression.expressions: 682 if isinstance(e, exp.PropertyEQ): 683 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 684 unknown_type = True 685 else: 686 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 687 values.append(self.sql(e, "expression")) 688 else: 689 values.append(self.sql(e)) 690 691 size = len(expression.expressions) 692 693 if not size or len(schema) != size: 694 if unknown_type: 695 self.unsupported( 696 "Cannot convert untyped key-value definitions (try annotate_types)." 697 ) 698 return self.func("ROW", *values) 699 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 700 701 def interval_sql(self, expression: exp.Interval) -> str: 702 if expression.this and expression.text("unit").upper().startswith("WEEK"): 703 return f"({expression.this.name} * INTERVAL '7' DAY)" 704 return super().interval_sql(expression) 705 706 def transaction_sql(self, expression: exp.Transaction) -> str: 707 modes = expression.args.get("modes") 708 modes = f" {', '.join(modes)}" if modes else "" 709 return f"START TRANSACTION{modes}" 710 711 def offset_limit_modifiers( 712 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 713 ) -> t.List[str]: 714 return [ 715 self.sql(expression, "offset"), 716 self.sql(limit), 717 ] 718 719 def create_sql(self, expression: exp.Create) -> str: 720 """ 721 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 722 so we need to remove them 723 """ 724 kind = expression.args["kind"] 725 schema = expression.this 726 if kind == "VIEW" and schema.expressions: 727 expression.this.set("expressions", None) 728 return super().create_sql(expression) 729 730 def delete_sql(self, expression: exp.Delete) -> str: 731 """ 732 Presto only supports DELETE FROM for a single table without an alias, so we need 733 to remove the unnecessary parts. If the original DELETE statement contains more 734 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 735 """ 736 tables = expression.args.get("tables") or [expression.this] 737 if len(tables) > 1: 738 return super().delete_sql(expression) 739 740 table = tables[0] 741 expression.set("this", table) 742 expression.set("tables", None) 743 744 if isinstance(table, exp.Table): 745 table_alias = table.args.get("alias") 746 if table_alias: 747 table_alias.pop() 748 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 749 750 return super().delete_sql(expression) 751 752 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 753 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 754 755 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 756 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 757 if not expression.args.get("variant_extract") or is_json_extract: 758 return self.func( 759 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 760 ) 761 762 this = self.sql(expression, "this") 763 764 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 765 segments = [] 766 for path_key in expression.expression.expressions[1:]: 767 if not isinstance(path_key, exp.JSONPathKey): 768 # Cannot transpile subscripts, wildcards etc to dot notation 769 self.unsupported( 770 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 771 ) 772 continue 773 key = path_key.this 774 if not exp.SAFE_IDENTIFIER_RE.match(key): 775 key = f'"{key}"' 776 segments.append(f".{key}") 777 778 expr = "".join(segments) 779 780 return f"{this}{expr}" 781 782 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 783 return self.func( 784 "ARRAY_JOIN", 785 self.func("ARRAY_AGG", expression.this), 786 expression.args.get("separator"), 787 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Associates this dialect's time formats with their equivalent Python strftime
formats.
Specifies the strategy according to which identifiers should be normalized.
290 class Tokenizer(tokens.Tokenizer): 291 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 292 UNICODE_STRINGS = [ 293 (prefix + q, q) 294 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 295 for prefix in ("U&", "u&") 296 ] 297 298 KEYWORDS = { 299 **tokens.Tokenizer.KEYWORDS, 300 "DEALLOCATE PREPARE": TokenType.COMMAND, 301 "DESCRIBE INPUT": TokenType.COMMAND, 302 "DESCRIBE OUTPUT": TokenType.COMMAND, 303 "RESET SESSION": TokenType.COMMAND, 304 "START": TokenType.BEGIN, 305 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 306 "ROW": TokenType.STRUCT, 307 "IPADDRESS": TokenType.IPADDRESS, 308 "IPPREFIX": TokenType.IPPREFIX, 309 "TDIGEST": TokenType.TDIGEST, 310 "HYPERLOGLOG": TokenType.HLLSKETCH, 311 } 312 KEYWORDS.pop("/*+") 313 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
315 class Parser(parser.Parser): 316 VALUES_FOLLOWED_BY_PAREN = False 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARBITRARY": exp.AnyValue.from_arg_list, 321 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 322 "APPROX_PERCENTILE": _build_approx_percentile, 323 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 324 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 325 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 326 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 327 "CARDINALITY": exp.ArraySize.from_arg_list, 328 "CONTAINS": exp.ArrayContains.from_arg_list, 329 "DATE_ADD": lambda args: exp.DateAdd( 330 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 331 ), 332 "DATE_DIFF": lambda args: exp.DateDiff( 333 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 334 ), 335 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 336 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 337 "DATE_TRUNC": date_trunc_to_time, 338 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 339 "DOW": exp.DayOfWeekIso.from_arg_list, 340 "DOY": exp.DayOfYear.from_arg_list, 341 "ELEMENT_AT": lambda args: exp.Bracket( 342 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 343 ), 344 "FROM_HEX": exp.Unhex.from_arg_list, 345 "FROM_UNIXTIME": _build_from_unixtime, 346 "FROM_UTF8": lambda args: exp.Decode( 347 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 348 ), 349 "JSON_FORMAT": lambda args: exp.JSONFormat( 350 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 351 ), 352 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 353 "NOW": exp.CurrentTimestamp.from_arg_list, 354 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 355 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 356 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1), 359 replacement=seq_get(args, 2) or exp.Literal.string(""), 360 ), 361 "ROW": exp.Struct.from_arg_list, 362 "SEQUENCE": exp.GenerateSeries.from_arg_list, 363 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 364 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 365 "STRPOS": lambda args: exp.StrPosition( 366 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 367 ), 368 "TO_CHAR": _build_to_char, 369 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 370 "TO_UTF8": lambda args: exp.Encode( 371 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 372 ), 373 "MD5": exp.MD5Digest.from_arg_list, 374 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 375 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 376 } 377 378 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 379 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
381 class Generator(generator.Generator): 382 INTERVAL_ALLOWS_PLURAL_FORM = False 383 JOIN_HINTS = False 384 TABLE_HINTS = False 385 QUERY_HINTS = False 386 IS_BOOL_ALLOWED = False 387 TZ_TO_WITH_TIME_ZONE = True 388 NVL2_SUPPORTED = False 389 STRUCT_DELIMITER = ("(", ")") 390 LIMIT_ONLY_LITERALS = True 391 SUPPORTS_SINGLE_ARG_CONCAT = False 392 LIKE_PROPERTY_INSIDE_SCHEMA = True 393 MULTI_ARG_DISTINCT = False 394 SUPPORTS_TO_NUMBER = False 395 HEX_FUNC = "TO_HEX" 396 PARSE_JSON_NAME = "JSON_PARSE" 397 PAD_FILL_PATTERN_IS_REQUIRED = True 398 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 399 SUPPORTS_MEDIAN = False 400 ARRAY_SIZE_NAME = "CARDINALITY" 401 402 PROPERTIES_LOCATION = { 403 **generator.Generator.PROPERTIES_LOCATION, 404 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 405 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 406 } 407 408 TYPE_MAPPING = { 409 **generator.Generator.TYPE_MAPPING, 410 exp.DataType.Type.BINARY: "VARBINARY", 411 exp.DataType.Type.BIT: "BOOLEAN", 412 exp.DataType.Type.DATETIME: "TIMESTAMP", 413 exp.DataType.Type.DATETIME64: "TIMESTAMP", 414 exp.DataType.Type.FLOAT: "REAL", 415 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 416 exp.DataType.Type.INT: "INTEGER", 417 exp.DataType.Type.STRUCT: "ROW", 418 exp.DataType.Type.TEXT: "VARCHAR", 419 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 420 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 421 exp.DataType.Type.TIMETZ: "TIME", 422 } 423 424 TRANSFORMS = { 425 **generator.Generator.TRANSFORMS, 426 exp.AnyValue: rename_func("ARBITRARY"), 427 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 428 exp.ArgMax: rename_func("MAX_BY"), 429 exp.ArgMin: rename_func("MIN_BY"), 430 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 431 exp.ArrayAny: rename_func("ANY_MATCH"), 432 exp.ArrayConcat: rename_func("CONCAT"), 433 exp.ArrayContains: rename_func("CONTAINS"), 434 exp.ArrayToString: rename_func("ARRAY_JOIN"), 435 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 436 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 437 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 438 exp.BitwiseLeftShift: lambda self, e: self.func( 439 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 440 ), 441 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 442 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 443 exp.BitwiseRightShift: lambda self, e: self.func( 444 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 445 ), 446 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 447 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 448 exp.CurrentTime: lambda *_: "CURRENT_TIME", 449 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 450 exp.CurrentUser: lambda *_: "CURRENT_USER", 451 exp.DateAdd: _date_delta_sql("DATE_ADD"), 452 exp.DateDiff: lambda self, e: self.func( 453 "DATE_DIFF", unit_to_str(e), e.expression, e.this 454 ), 455 exp.DateStrToDate: datestrtodate_sql, 456 exp.DateToDi: lambda self, 457 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 458 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 459 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 460 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 461 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 462 exp.DiToDate: lambda self, 463 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 464 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 465 exp.FileFormatProperty: lambda self, 466 e: f"format={self.sql(exp.Literal.string(e.name))}", 467 exp.First: _first_last_sql, 468 exp.FromTimeZone: lambda self, 469 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 470 exp.GenerateSeries: sequence_sql, 471 exp.GenerateDateArray: sequence_sql, 472 exp.Group: transforms.preprocess([transforms.unalias_group]), 473 exp.If: if_sql(), 474 exp.ILike: no_ilike_sql, 475 exp.Initcap: _initcap_sql, 476 exp.Last: _first_last_sql, 477 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 478 exp.Lateral: _explode_to_unnest_sql, 479 exp.Left: left_to_substring_sql, 480 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 481 rename_func("LEVENSHTEIN_DISTANCE") 482 ), 483 exp.LogicalAnd: rename_func("BOOL_AND"), 484 exp.LogicalOr: rename_func("BOOL_OR"), 485 exp.Pivot: no_pivot_sql, 486 exp.Quantile: _quantile_sql, 487 exp.RegexpExtract: regexp_extract_sql, 488 exp.RegexpExtractAll: regexp_extract_sql, 489 exp.Right: right_to_substring_sql, 490 exp.Schema: _schema_sql, 491 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 492 exp.Select: transforms.preprocess( 493 [ 494 transforms.eliminate_window_clause, 495 transforms.eliminate_qualify, 496 transforms.eliminate_distinct_on, 497 transforms.explode_projection_to_unnest(1), 498 transforms.eliminate_semi_and_anti_joins, 499 amend_exploded_column_table, 500 ] 501 ), 502 exp.SortArray: _no_sort_array, 503 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 504 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 505 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 506 exp.StrToTime: _str_to_time_sql, 507 exp.StructExtract: struct_extract_sql, 508 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 509 exp.Timestamp: no_timestamp_sql, 510 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 511 exp.TimestampTrunc: timestamptrunc_sql(), 512 exp.TimeStrToDate: timestrtotime_sql, 513 exp.TimeStrToTime: timestrtotime_sql, 514 exp.TimeStrToUnix: lambda self, e: self.func( 515 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 516 ), 517 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 518 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 519 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 520 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 521 exp.TsOrDiToDi: lambda self, 522 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 523 exp.TsOrDsAdd: _ts_or_ds_add_sql, 524 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 525 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 526 exp.Unhex: rename_func("FROM_HEX"), 527 exp.UnixToStr: lambda self, 528 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 529 exp.UnixToTime: _unix_to_time_sql, 530 exp.UnixToTimeStr: lambda self, 531 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 532 exp.VariancePop: rename_func("VAR_POP"), 533 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 534 exp.WithinGroup: transforms.preprocess( 535 [transforms.remove_within_group_for_percentiles] 536 ), 537 exp.Xor: bool_xor_sql, 538 exp.MD5Digest: rename_func("MD5"), 539 exp.SHA: rename_func("SHA1"), 540 exp.SHA2: sha256_sql, 541 } 542 543 RESERVED_KEYWORDS = { 544 "alter", 545 "and", 546 "as", 547 "between", 548 "by", 549 "case", 550 "cast", 551 "constraint", 552 "create", 553 "cross", 554 "current_time", 555 "current_timestamp", 556 "deallocate", 557 "delete", 558 "describe", 559 "distinct", 560 "drop", 561 "else", 562 "end", 563 "escape", 564 "except", 565 "execute", 566 "exists", 567 "extract", 568 "false", 569 "for", 570 "from", 571 "full", 572 "group", 573 "having", 574 "in", 575 "inner", 576 "insert", 577 "intersect", 578 "into", 579 "is", 580 "join", 581 "left", 582 "like", 583 "natural", 584 "not", 585 "null", 586 "on", 587 "or", 588 "order", 589 "outer", 590 "prepare", 591 "right", 592 "select", 593 "table", 594 "then", 595 "true", 596 "union", 597 "using", 598 "values", 599 "when", 600 "where", 601 "with", 602 } 603 604 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 605 this = expression.this 606 is_json = expression.args.get("is_json") 607 608 if this and not (is_json or this.type): 609 from sqlglot.optimizer.annotate_types import annotate_types 610 611 this = annotate_types(this, dialect=self.dialect) 612 613 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 614 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 615 616 return self.function_fallback_sql(expression) 617 618 def md5_sql(self, expression: exp.MD5) -> str: 619 this = expression.this 620 621 if not this.type: 622 from sqlglot.optimizer.annotate_types import annotate_types 623 624 this = annotate_types(this, dialect=self.dialect) 625 626 if this.is_type(*exp.DataType.TEXT_TYPES): 627 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 628 629 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 630 631 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 632 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 633 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 634 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 635 # which seems to be using the same time mapping as Hive, as per: 636 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 637 this = expression.this 638 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 639 value_as_timestamp = ( 640 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 641 ) 642 643 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 644 645 formatted_value = self.func( 646 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 647 ) 648 parse_with_tz = self.func( 649 "PARSE_DATETIME", 650 formatted_value, 651 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 652 ) 653 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 654 return self.func("TO_UNIXTIME", coalesced) 655 656 def bracket_sql(self, expression: exp.Bracket) -> str: 657 if expression.args.get("safe"): 658 return self.func( 659 "ELEMENT_AT", 660 expression.this, 661 seq_get( 662 apply_index_offset( 663 expression.this, 664 expression.expressions, 665 1 - expression.args.get("offset", 0), 666 dialect=self.dialect, 667 ), 668 0, 669 ), 670 ) 671 return super().bracket_sql(expression) 672 673 def struct_sql(self, expression: exp.Struct) -> str: 674 from sqlglot.optimizer.annotate_types import annotate_types 675 676 expression = annotate_types(expression, dialect=self.dialect) 677 values: t.List[str] = [] 678 schema: t.List[str] = [] 679 unknown_type = False 680 681 for e in expression.expressions: 682 if isinstance(e, exp.PropertyEQ): 683 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 684 unknown_type = True 685 else: 686 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 687 values.append(self.sql(e, "expression")) 688 else: 689 values.append(self.sql(e)) 690 691 size = len(expression.expressions) 692 693 if not size or len(schema) != size: 694 if unknown_type: 695 self.unsupported( 696 "Cannot convert untyped key-value definitions (try annotate_types)." 697 ) 698 return self.func("ROW", *values) 699 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 700 701 def interval_sql(self, expression: exp.Interval) -> str: 702 if expression.this and expression.text("unit").upper().startswith("WEEK"): 703 return f"({expression.this.name} * INTERVAL '7' DAY)" 704 return super().interval_sql(expression) 705 706 def transaction_sql(self, expression: exp.Transaction) -> str: 707 modes = expression.args.get("modes") 708 modes = f" {', '.join(modes)}" if modes else "" 709 return f"START TRANSACTION{modes}" 710 711 def offset_limit_modifiers( 712 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 713 ) -> t.List[str]: 714 return [ 715 self.sql(expression, "offset"), 716 self.sql(limit), 717 ] 718 719 def create_sql(self, expression: exp.Create) -> str: 720 """ 721 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 722 so we need to remove them 723 """ 724 kind = expression.args["kind"] 725 schema = expression.this 726 if kind == "VIEW" and schema.expressions: 727 expression.this.set("expressions", None) 728 return super().create_sql(expression) 729 730 def delete_sql(self, expression: exp.Delete) -> str: 731 """ 732 Presto only supports DELETE FROM for a single table without an alias, so we need 733 to remove the unnecessary parts. If the original DELETE statement contains more 734 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 735 """ 736 tables = expression.args.get("tables") or [expression.this] 737 if len(tables) > 1: 738 return super().delete_sql(expression) 739 740 table = tables[0] 741 expression.set("this", table) 742 expression.set("tables", None) 743 744 if isinstance(table, exp.Table): 745 table_alias = table.args.get("alias") 746 if table_alias: 747 table_alias.pop() 748 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 749 750 return super().delete_sql(expression) 751 752 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 753 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 754 755 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 756 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 757 if not expression.args.get("variant_extract") or is_json_extract: 758 return self.func( 759 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 760 ) 761 762 this = self.sql(expression, "this") 763 764 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 765 segments = [] 766 for path_key in expression.expression.expressions[1:]: 767 if not isinstance(path_key, exp.JSONPathKey): 768 # Cannot transpile subscripts, wildcards etc to dot notation 769 self.unsupported( 770 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 771 ) 772 continue 773 key = path_key.this 774 if not exp.SAFE_IDENTIFIER_RE.match(key): 775 key = f'"{key}"' 776 segments.append(f".{key}") 777 778 expr = "".join(segments) 779 780 return f"{this}{expr}" 781 782 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 783 return self.func( 784 "ARRAY_JOIN", 785 self.func("ARRAY_AGG", expression.this), 786 expression.args.get("separator"), 787 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
604 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 605 this = expression.this 606 is_json = expression.args.get("is_json") 607 608 if this and not (is_json or this.type): 609 from sqlglot.optimizer.annotate_types import annotate_types 610 611 this = annotate_types(this, dialect=self.dialect) 612 613 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 614 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 615 616 return self.function_fallback_sql(expression)
618 def md5_sql(self, expression: exp.MD5) -> str: 619 this = expression.this 620 621 if not this.type: 622 from sqlglot.optimizer.annotate_types import annotate_types 623 624 this = annotate_types(this, dialect=self.dialect) 625 626 if this.is_type(*exp.DataType.TEXT_TYPES): 627 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 628 629 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
631 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 632 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 633 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 634 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 635 # which seems to be using the same time mapping as Hive, as per: 636 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 637 this = expression.this 638 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 639 value_as_timestamp = ( 640 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 641 ) 642 643 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 644 645 formatted_value = self.func( 646 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 647 ) 648 parse_with_tz = self.func( 649 "PARSE_DATETIME", 650 formatted_value, 651 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 652 ) 653 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 654 return self.func("TO_UNIXTIME", coalesced)
656 def bracket_sql(self, expression: exp.Bracket) -> str: 657 if expression.args.get("safe"): 658 return self.func( 659 "ELEMENT_AT", 660 expression.this, 661 seq_get( 662 apply_index_offset( 663 expression.this, 664 expression.expressions, 665 1 - expression.args.get("offset", 0), 666 dialect=self.dialect, 667 ), 668 0, 669 ), 670 ) 671 return super().bracket_sql(expression)
673 def struct_sql(self, expression: exp.Struct) -> str: 674 from sqlglot.optimizer.annotate_types import annotate_types 675 676 expression = annotate_types(expression, dialect=self.dialect) 677 values: t.List[str] = [] 678 schema: t.List[str] = [] 679 unknown_type = False 680 681 for e in expression.expressions: 682 if isinstance(e, exp.PropertyEQ): 683 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 684 unknown_type = True 685 else: 686 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 687 values.append(self.sql(e, "expression")) 688 else: 689 values.append(self.sql(e)) 690 691 size = len(expression.expressions) 692 693 if not size or len(schema) != size: 694 if unknown_type: 695 self.unsupported( 696 "Cannot convert untyped key-value definitions (try annotate_types)." 697 ) 698 return self.func("ROW", *values) 699 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
719 def create_sql(self, expression: exp.Create) -> str: 720 """ 721 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 722 so we need to remove them 723 """ 724 kind = expression.args["kind"] 725 schema = expression.this 726 if kind == "VIEW" and schema.expressions: 727 expression.this.set("expressions", None) 728 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
730 def delete_sql(self, expression: exp.Delete) -> str: 731 """ 732 Presto only supports DELETE FROM for a single table without an alias, so we need 733 to remove the unnecessary parts. If the original DELETE statement contains more 734 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 735 """ 736 tables = expression.args.get("tables") or [expression.this] 737 if len(tables) > 1: 738 return super().delete_sql(expression) 739 740 table = tables[0] 741 expression.set("this", table) 742 expression.set("tables", None) 743 744 if isinstance(table, exp.Table): 745 table_alias = table.args.get("alias") 746 if table_alias: 747 table_alias.pop() 748 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 749 750 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
752 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 753 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 754 755 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 756 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 757 if not expression.args.get("variant_extract") or is_json_extract: 758 return self.func( 759 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 760 ) 761 762 this = self.sql(expression, "this") 763 764 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 765 segments = [] 766 for path_key in expression.expression.expressions[1:]: 767 if not isinstance(path_key, exp.JSONPathKey): 768 # Cannot transpile subscripts, wildcards etc to dot notation 769 self.unsupported( 770 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 771 ) 772 continue 773 key = path_key.this 774 if not exp.SAFE_IDENTIFIER_RE.match(key): 775 key = f'"{key}"' 776 segments.append(f".{key}") 777 778 expr = "".join(segments) 779 780 return f"{this}{expr}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql