sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 154 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 155 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 156 "LIKE": build_like, 157 "LOG": build_logarithm, 158 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 159 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 160 "MOD": build_mod, 161 "TIME_TO_TIME_STR": lambda args: exp.Cast( 162 this=seq_get(args, 0), 163 to=exp.DataType(this=exp.DataType.Type.TEXT), 164 ), 165 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 166 this=exp.Cast( 167 this=seq_get(args, 0), 168 to=exp.DataType(this=exp.DataType.Type.TEXT), 169 ), 170 start=exp.Literal.number(1), 171 length=exp.Literal.number(10), 172 ), 173 "VAR_MAP": build_var_map, 174 "LOWER": build_lower, 175 "UPPER": build_upper, 176 "HEX": build_hex, 177 "TO_HEX": build_hex, 178 } 179 180 NO_PAREN_FUNCTIONS = { 181 TokenType.CURRENT_DATE: exp.CurrentDate, 182 TokenType.CURRENT_DATETIME: exp.CurrentDate, 183 TokenType.CURRENT_TIME: exp.CurrentTime, 184 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 185 TokenType.CURRENT_USER: exp.CurrentUser, 186 } 187 188 STRUCT_TYPE_TOKENS = { 189 TokenType.NESTED, 190 TokenType.OBJECT, 191 TokenType.STRUCT, 192 } 193 194 NESTED_TYPE_TOKENS = { 195 TokenType.ARRAY, 196 TokenType.LOWCARDINALITY, 197 TokenType.MAP, 198 TokenType.NULLABLE, 199 *STRUCT_TYPE_TOKENS, 200 } 201 202 ENUM_TYPE_TOKENS = { 203 TokenType.ENUM, 204 TokenType.ENUM8, 205 TokenType.ENUM16, 206 } 207 208 AGGREGATE_TYPE_TOKENS = { 209 TokenType.AGGREGATEFUNCTION, 210 TokenType.SIMPLEAGGREGATEFUNCTION, 211 } 212 213 TYPE_TOKENS = { 214 TokenType.BIT, 215 TokenType.BOOLEAN, 216 TokenType.TINYINT, 217 TokenType.UTINYINT, 218 TokenType.SMALLINT, 219 TokenType.USMALLINT, 220 TokenType.INT, 221 TokenType.UINT, 222 TokenType.BIGINT, 223 TokenType.UBIGINT, 224 TokenType.INT128, 225 TokenType.UINT128, 226 TokenType.INT256, 227 TokenType.UINT256, 228 TokenType.MEDIUMINT, 229 TokenType.UMEDIUMINT, 230 TokenType.FIXEDSTRING, 231 TokenType.FLOAT, 232 TokenType.DOUBLE, 233 TokenType.CHAR, 234 TokenType.NCHAR, 235 TokenType.VARCHAR, 236 TokenType.NVARCHAR, 237 TokenType.BPCHAR, 238 TokenType.TEXT, 239 TokenType.MEDIUMTEXT, 240 TokenType.LONGTEXT, 241 TokenType.MEDIUMBLOB, 242 TokenType.LONGBLOB, 243 TokenType.BINARY, 244 TokenType.VARBINARY, 245 TokenType.JSON, 246 TokenType.JSONB, 247 TokenType.INTERVAL, 248 TokenType.TINYBLOB, 249 TokenType.TINYTEXT, 250 TokenType.TIME, 251 TokenType.TIMETZ, 252 TokenType.TIMESTAMP, 253 TokenType.TIMESTAMP_S, 254 TokenType.TIMESTAMP_MS, 255 TokenType.TIMESTAMP_NS, 256 TokenType.TIMESTAMPTZ, 257 TokenType.TIMESTAMPLTZ, 258 TokenType.TIMESTAMPNTZ, 259 TokenType.DATETIME, 260 TokenType.DATETIME64, 261 TokenType.DATE, 262 TokenType.DATE32, 263 TokenType.INT4RANGE, 264 TokenType.INT4MULTIRANGE, 265 TokenType.INT8RANGE, 266 TokenType.INT8MULTIRANGE, 267 TokenType.NUMRANGE, 268 TokenType.NUMMULTIRANGE, 269 TokenType.TSRANGE, 270 TokenType.TSMULTIRANGE, 271 TokenType.TSTZRANGE, 272 TokenType.TSTZMULTIRANGE, 273 TokenType.DATERANGE, 274 TokenType.DATEMULTIRANGE, 275 TokenType.DECIMAL, 276 TokenType.UDECIMAL, 277 TokenType.BIGDECIMAL, 278 TokenType.UUID, 279 TokenType.GEOGRAPHY, 280 TokenType.GEOMETRY, 281 TokenType.HLLSKETCH, 282 TokenType.HSTORE, 283 TokenType.PSEUDO_TYPE, 284 TokenType.SUPER, 285 TokenType.SERIAL, 286 TokenType.SMALLSERIAL, 287 TokenType.BIGSERIAL, 288 TokenType.XML, 289 TokenType.YEAR, 290 TokenType.UNIQUEIDENTIFIER, 291 TokenType.USERDEFINED, 292 TokenType.MONEY, 293 TokenType.SMALLMONEY, 294 TokenType.ROWVERSION, 295 TokenType.IMAGE, 296 TokenType.VARIANT, 297 TokenType.OBJECT, 298 TokenType.OBJECT_IDENTIFIER, 299 TokenType.INET, 300 TokenType.IPADDRESS, 301 TokenType.IPPREFIX, 302 TokenType.IPV4, 303 TokenType.IPV6, 304 TokenType.UNKNOWN, 305 TokenType.NULL, 306 TokenType.NAME, 307 TokenType.TDIGEST, 308 *ENUM_TYPE_TOKENS, 309 *NESTED_TYPE_TOKENS, 310 *AGGREGATE_TYPE_TOKENS, 311 } 312 313 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 314 TokenType.BIGINT: TokenType.UBIGINT, 315 TokenType.INT: TokenType.UINT, 316 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 317 TokenType.SMALLINT: TokenType.USMALLINT, 318 TokenType.TINYINT: TokenType.UTINYINT, 319 TokenType.DECIMAL: TokenType.UDECIMAL, 320 } 321 322 SUBQUERY_PREDICATES = { 323 TokenType.ANY: exp.Any, 324 TokenType.ALL: exp.All, 325 TokenType.EXISTS: exp.Exists, 326 TokenType.SOME: exp.Any, 327 } 328 329 RESERVED_TOKENS = { 330 *Tokenizer.SINGLE_TOKENS.values(), 331 TokenType.SELECT, 332 } - {TokenType.IDENTIFIER} 333 334 DB_CREATABLES = { 335 TokenType.DATABASE, 336 TokenType.DICTIONARY, 337 TokenType.MODEL, 338 TokenType.SCHEMA, 339 TokenType.SEQUENCE, 340 TokenType.STORAGE_INTEGRATION, 341 TokenType.TABLE, 342 TokenType.TAG, 343 TokenType.VIEW, 344 TokenType.WAREHOUSE, 345 TokenType.STREAMLIT, 346 } 347 348 CREATABLES = { 349 TokenType.COLUMN, 350 TokenType.CONSTRAINT, 351 TokenType.FOREIGN_KEY, 352 TokenType.FUNCTION, 353 TokenType.INDEX, 354 TokenType.PROCEDURE, 355 *DB_CREATABLES, 356 } 357 358 # Tokens that can represent identifiers 359 ID_VAR_TOKENS = { 360 TokenType.VAR, 361 TokenType.ANTI, 362 TokenType.APPLY, 363 TokenType.ASC, 364 TokenType.ASOF, 365 TokenType.AUTO_INCREMENT, 366 TokenType.BEGIN, 367 TokenType.BPCHAR, 368 TokenType.CACHE, 369 TokenType.CASE, 370 TokenType.COLLATE, 371 TokenType.COMMAND, 372 TokenType.COMMENT, 373 TokenType.COMMIT, 374 TokenType.CONSTRAINT, 375 TokenType.COPY, 376 TokenType.DEFAULT, 377 TokenType.DELETE, 378 TokenType.DESC, 379 TokenType.DESCRIBE, 380 TokenType.DICTIONARY, 381 TokenType.DIV, 382 TokenType.END, 383 TokenType.EXECUTE, 384 TokenType.ESCAPE, 385 TokenType.FALSE, 386 TokenType.FIRST, 387 TokenType.FILTER, 388 TokenType.FINAL, 389 TokenType.FORMAT, 390 TokenType.FULL, 391 TokenType.IDENTIFIER, 392 TokenType.IS, 393 TokenType.ISNULL, 394 TokenType.INTERVAL, 395 TokenType.KEEP, 396 TokenType.KILL, 397 TokenType.LEFT, 398 TokenType.LOAD, 399 TokenType.MERGE, 400 TokenType.NATURAL, 401 TokenType.NEXT, 402 TokenType.OFFSET, 403 TokenType.OPERATOR, 404 TokenType.ORDINALITY, 405 TokenType.OVERLAPS, 406 TokenType.OVERWRITE, 407 TokenType.PARTITION, 408 TokenType.PERCENT, 409 TokenType.PIVOT, 410 TokenType.PRAGMA, 411 TokenType.RANGE, 412 TokenType.RECURSIVE, 413 TokenType.REFERENCES, 414 TokenType.REFRESH, 415 TokenType.REPLACE, 416 TokenType.RIGHT, 417 TokenType.ROLLUP, 418 TokenType.ROW, 419 TokenType.ROWS, 420 TokenType.SEMI, 421 TokenType.SET, 422 TokenType.SETTINGS, 423 TokenType.SHOW, 424 TokenType.TEMPORARY, 425 TokenType.TOP, 426 TokenType.TRUE, 427 TokenType.TRUNCATE, 428 TokenType.UNIQUE, 429 TokenType.UNNEST, 430 TokenType.UNPIVOT, 431 TokenType.UPDATE, 432 TokenType.USE, 433 TokenType.VOLATILE, 434 TokenType.WINDOW, 435 *CREATABLES, 436 *SUBQUERY_PREDICATES, 437 *TYPE_TOKENS, 438 *NO_PAREN_FUNCTIONS, 439 } 440 441 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 442 443 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 444 TokenType.ANTI, 445 TokenType.APPLY, 446 TokenType.ASOF, 447 TokenType.FULL, 448 TokenType.LEFT, 449 TokenType.LOCK, 450 TokenType.NATURAL, 451 TokenType.OFFSET, 452 TokenType.RIGHT, 453 TokenType.SEMI, 454 TokenType.WINDOW, 455 } 456 457 ALIAS_TOKENS = ID_VAR_TOKENS 458 459 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 460 461 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 462 463 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 464 465 FUNC_TOKENS = { 466 TokenType.COLLATE, 467 TokenType.COMMAND, 468 TokenType.CURRENT_DATE, 469 TokenType.CURRENT_DATETIME, 470 TokenType.CURRENT_TIMESTAMP, 471 TokenType.CURRENT_TIME, 472 TokenType.CURRENT_USER, 473 TokenType.FILTER, 474 TokenType.FIRST, 475 TokenType.FORMAT, 476 TokenType.GLOB, 477 TokenType.IDENTIFIER, 478 TokenType.INDEX, 479 TokenType.ISNULL, 480 TokenType.ILIKE, 481 TokenType.INSERT, 482 TokenType.LIKE, 483 TokenType.MERGE, 484 TokenType.OFFSET, 485 TokenType.PRIMARY_KEY, 486 TokenType.RANGE, 487 TokenType.REPLACE, 488 TokenType.RLIKE, 489 TokenType.ROW, 490 TokenType.UNNEST, 491 TokenType.VAR, 492 TokenType.LEFT, 493 TokenType.RIGHT, 494 TokenType.SEQUENCE, 495 TokenType.DATE, 496 TokenType.DATETIME, 497 TokenType.TABLE, 498 TokenType.TIMESTAMP, 499 TokenType.TIMESTAMPTZ, 500 TokenType.TRUNCATE, 501 TokenType.WINDOW, 502 TokenType.XOR, 503 *TYPE_TOKENS, 504 *SUBQUERY_PREDICATES, 505 } 506 507 CONJUNCTION = { 508 TokenType.AND: exp.And, 509 TokenType.OR: exp.Or, 510 } 511 512 EQUALITY = { 513 TokenType.EQ: exp.EQ, 514 TokenType.NEQ: exp.NEQ, 515 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 516 } 517 518 COMPARISON = { 519 TokenType.GT: exp.GT, 520 TokenType.GTE: exp.GTE, 521 TokenType.LT: exp.LT, 522 TokenType.LTE: exp.LTE, 523 } 524 525 BITWISE = { 526 TokenType.AMP: exp.BitwiseAnd, 527 TokenType.CARET: exp.BitwiseXor, 528 TokenType.PIPE: exp.BitwiseOr, 529 } 530 531 TERM = { 532 TokenType.DASH: exp.Sub, 533 TokenType.PLUS: exp.Add, 534 TokenType.MOD: exp.Mod, 535 TokenType.COLLATE: exp.Collate, 536 } 537 538 FACTOR = { 539 TokenType.DIV: exp.IntDiv, 540 TokenType.LR_ARROW: exp.Distance, 541 TokenType.SLASH: exp.Div, 542 TokenType.STAR: exp.Mul, 543 } 544 545 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 546 547 TIMES = { 548 TokenType.TIME, 549 TokenType.TIMETZ, 550 } 551 552 TIMESTAMPS = { 553 TokenType.TIMESTAMP, 554 TokenType.TIMESTAMPTZ, 555 TokenType.TIMESTAMPLTZ, 556 *TIMES, 557 } 558 559 SET_OPERATIONS = { 560 TokenType.UNION, 561 TokenType.INTERSECT, 562 TokenType.EXCEPT, 563 } 564 565 JOIN_METHODS = { 566 TokenType.ASOF, 567 TokenType.NATURAL, 568 TokenType.POSITIONAL, 569 } 570 571 JOIN_SIDES = { 572 TokenType.LEFT, 573 TokenType.RIGHT, 574 TokenType.FULL, 575 } 576 577 JOIN_KINDS = { 578 TokenType.INNER, 579 TokenType.OUTER, 580 TokenType.CROSS, 581 TokenType.SEMI, 582 TokenType.ANTI, 583 } 584 585 JOIN_HINTS: t.Set[str] = set() 586 587 LAMBDAS = { 588 TokenType.ARROW: lambda self, expressions: self.expression( 589 exp.Lambda, 590 this=self._replace_lambda( 591 self._parse_conjunction(), 592 expressions, 593 ), 594 expressions=expressions, 595 ), 596 TokenType.FARROW: lambda self, expressions: self.expression( 597 exp.Kwarg, 598 this=exp.var(expressions[0].name), 599 expression=self._parse_conjunction(), 600 ), 601 } 602 603 COLUMN_OPERATORS = { 604 TokenType.DOT: None, 605 TokenType.DCOLON: lambda self, this, to: self.expression( 606 exp.Cast if self.STRICT_CAST else exp.TryCast, 607 this=this, 608 to=to, 609 ), 610 TokenType.ARROW: lambda self, this, path: self.expression( 611 exp.JSONExtract, 612 this=this, 613 expression=self.dialect.to_json_path(path), 614 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 615 ), 616 TokenType.DARROW: lambda self, this, path: self.expression( 617 exp.JSONExtractScalar, 618 this=this, 619 expression=self.dialect.to_json_path(path), 620 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 621 ), 622 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 623 exp.JSONBExtract, 624 this=this, 625 expression=path, 626 ), 627 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 628 exp.JSONBExtractScalar, 629 this=this, 630 expression=path, 631 ), 632 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 633 exp.JSONBContains, 634 this=this, 635 expression=key, 636 ), 637 } 638 639 EXPRESSION_PARSERS = { 640 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 641 exp.Column: lambda self: self._parse_column(), 642 exp.Condition: lambda self: self._parse_conjunction(), 643 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 644 exp.Expression: lambda self: self._parse_expression(), 645 exp.From: lambda self: self._parse_from(joins=True), 646 exp.Group: lambda self: self._parse_group(), 647 exp.Having: lambda self: self._parse_having(), 648 exp.Identifier: lambda self: self._parse_id_var(), 649 exp.Join: lambda self: self._parse_join(), 650 exp.Lambda: lambda self: self._parse_lambda(), 651 exp.Lateral: lambda self: self._parse_lateral(), 652 exp.Limit: lambda self: self._parse_limit(), 653 exp.Offset: lambda self: self._parse_offset(), 654 exp.Order: lambda self: self._parse_order(), 655 exp.Ordered: lambda self: self._parse_ordered(), 656 exp.Properties: lambda self: self._parse_properties(), 657 exp.Qualify: lambda self: self._parse_qualify(), 658 exp.Returning: lambda self: self._parse_returning(), 659 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 660 exp.Table: lambda self: self._parse_table_parts(), 661 exp.TableAlias: lambda self: self._parse_table_alias(), 662 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 663 exp.Where: lambda self: self._parse_where(), 664 exp.Window: lambda self: self._parse_named_window(), 665 exp.With: lambda self: self._parse_with(), 666 "JOIN_TYPE": lambda self: self._parse_join_parts(), 667 } 668 669 STATEMENT_PARSERS = { 670 TokenType.ALTER: lambda self: self._parse_alter(), 671 TokenType.BEGIN: lambda self: self._parse_transaction(), 672 TokenType.CACHE: lambda self: self._parse_cache(), 673 TokenType.COMMENT: lambda self: self._parse_comment(), 674 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 675 TokenType.COPY: lambda self: self._parse_copy(), 676 TokenType.CREATE: lambda self: self._parse_create(), 677 TokenType.DELETE: lambda self: self._parse_delete(), 678 TokenType.DESC: lambda self: self._parse_describe(), 679 TokenType.DESCRIBE: lambda self: self._parse_describe(), 680 TokenType.DROP: lambda self: self._parse_drop(), 681 TokenType.INSERT: lambda self: self._parse_insert(), 682 TokenType.KILL: lambda self: self._parse_kill(), 683 TokenType.LOAD: lambda self: self._parse_load(), 684 TokenType.MERGE: lambda self: self._parse_merge(), 685 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 686 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 687 TokenType.REFRESH: lambda self: self._parse_refresh(), 688 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 689 TokenType.SET: lambda self: self._parse_set(), 690 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 691 TokenType.UNCACHE: lambda self: self._parse_uncache(), 692 TokenType.UPDATE: lambda self: self._parse_update(), 693 TokenType.USE: lambda self: self.expression( 694 exp.Use, 695 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 696 this=self._parse_table(schema=False), 697 ), 698 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 699 } 700 701 UNARY_PARSERS = { 702 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 703 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 704 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 705 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 706 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 707 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 708 } 709 710 STRING_PARSERS = { 711 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 712 exp.RawString, this=token.text 713 ), 714 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 715 exp.National, this=token.text 716 ), 717 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 718 TokenType.STRING: lambda self, token: self.expression( 719 exp.Literal, this=token.text, is_string=True 720 ), 721 TokenType.UNICODE_STRING: lambda self, token: self.expression( 722 exp.UnicodeString, 723 this=token.text, 724 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 725 ), 726 } 727 728 NUMERIC_PARSERS = { 729 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 730 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 731 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 732 TokenType.NUMBER: lambda self, token: self.expression( 733 exp.Literal, this=token.text, is_string=False 734 ), 735 } 736 737 PRIMARY_PARSERS = { 738 **STRING_PARSERS, 739 **NUMERIC_PARSERS, 740 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 741 TokenType.NULL: lambda self, _: self.expression(exp.Null), 742 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 743 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 744 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 745 TokenType.STAR: lambda self, _: self.expression( 746 exp.Star, 747 **{ 748 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 749 "replace": self._parse_star_op("REPLACE"), 750 "rename": self._parse_star_op("RENAME"), 751 }, 752 ), 753 } 754 755 PLACEHOLDER_PARSERS = { 756 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 757 TokenType.PARAMETER: lambda self: self._parse_parameter(), 758 TokenType.COLON: lambda self: ( 759 self.expression(exp.Placeholder, this=self._prev.text) 760 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 761 else None 762 ), 763 } 764 765 RANGE_PARSERS = { 766 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 767 TokenType.GLOB: binary_range_parser(exp.Glob), 768 TokenType.ILIKE: binary_range_parser(exp.ILike), 769 TokenType.IN: lambda self, this: self._parse_in(this), 770 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 771 TokenType.IS: lambda self, this: self._parse_is(this), 772 TokenType.LIKE: binary_range_parser(exp.Like), 773 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 774 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 775 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 776 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 777 } 778 779 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 780 "ALLOWED_VALUES": lambda self: self.expression( 781 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 782 ), 783 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 784 "AUTO": lambda self: self._parse_auto_property(), 785 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 786 "BACKUP": lambda self: self.expression( 787 exp.BackupProperty, this=self._parse_var(any_token=True) 788 ), 789 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 790 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 791 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 792 "CHECKSUM": lambda self: self._parse_checksum(), 793 "CLUSTER BY": lambda self: self._parse_cluster(), 794 "CLUSTERED": lambda self: self._parse_clustered_by(), 795 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 796 exp.CollateProperty, **kwargs 797 ), 798 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 799 "CONTAINS": lambda self: self._parse_contains_property(), 800 "COPY": lambda self: self._parse_copy_property(), 801 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 802 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 803 "DEFINER": lambda self: self._parse_definer(), 804 "DETERMINISTIC": lambda self: self.expression( 805 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 806 ), 807 "DISTKEY": lambda self: self._parse_distkey(), 808 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 809 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 810 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 811 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 812 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 813 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 814 "FREESPACE": lambda self: self._parse_freespace(), 815 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 816 "HEAP": lambda self: self.expression(exp.HeapProperty), 817 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 818 "IMMUTABLE": lambda self: self.expression( 819 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 820 ), 821 "INHERITS": lambda self: self.expression( 822 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 823 ), 824 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 825 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 826 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 827 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 828 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 829 "LIKE": lambda self: self._parse_create_like(), 830 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 831 "LOCK": lambda self: self._parse_locking(), 832 "LOCKING": lambda self: self._parse_locking(), 833 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 834 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 835 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 836 "MODIFIES": lambda self: self._parse_modifies_property(), 837 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 838 "NO": lambda self: self._parse_no_property(), 839 "ON": lambda self: self._parse_on_property(), 840 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 841 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 842 "PARTITION": lambda self: self._parse_partitioned_of(), 843 "PARTITION BY": lambda self: self._parse_partitioned_by(), 844 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 845 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 846 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 847 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 848 "READS": lambda self: self._parse_reads_property(), 849 "REMOTE": lambda self: self._parse_remote_with_connection(), 850 "RETURNS": lambda self: self._parse_returns(), 851 "STRICT": lambda self: self.expression(exp.StrictProperty), 852 "ROW": lambda self: self._parse_row(), 853 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 854 "SAMPLE": lambda self: self.expression( 855 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 856 ), 857 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 858 "SETTINGS": lambda self: self.expression( 859 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 860 ), 861 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 862 "SORTKEY": lambda self: self._parse_sortkey(), 863 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 864 "STABLE": lambda self: self.expression( 865 exp.StabilityProperty, this=exp.Literal.string("STABLE") 866 ), 867 "STORED": lambda self: self._parse_stored(), 868 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 869 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 870 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 871 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 872 "TO": lambda self: self._parse_to_table(), 873 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 874 "TRANSFORM": lambda self: self.expression( 875 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 876 ), 877 "TTL": lambda self: self._parse_ttl(), 878 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 879 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 880 "VOLATILE": lambda self: self._parse_volatile_property(), 881 "WITH": lambda self: self._parse_with_property(), 882 } 883 884 CONSTRAINT_PARSERS = { 885 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 886 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 887 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 888 "CHARACTER SET": lambda self: self.expression( 889 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 890 ), 891 "CHECK": lambda self: self.expression( 892 exp.CheckColumnConstraint, 893 this=self._parse_wrapped(self._parse_conjunction), 894 enforced=self._match_text_seq("ENFORCED"), 895 ), 896 "COLLATE": lambda self: self.expression( 897 exp.CollateColumnConstraint, this=self._parse_var() 898 ), 899 "COMMENT": lambda self: self.expression( 900 exp.CommentColumnConstraint, this=self._parse_string() 901 ), 902 "COMPRESS": lambda self: self._parse_compress(), 903 "CLUSTERED": lambda self: self.expression( 904 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 905 ), 906 "NONCLUSTERED": lambda self: self.expression( 907 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 908 ), 909 "DEFAULT": lambda self: self.expression( 910 exp.DefaultColumnConstraint, this=self._parse_bitwise() 911 ), 912 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 913 "EPHEMERAL": lambda self: self.expression( 914 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 915 ), 916 "EXCLUDE": lambda self: self.expression( 917 exp.ExcludeColumnConstraint, this=self._parse_index_params() 918 ), 919 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 920 "FORMAT": lambda self: self.expression( 921 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 922 ), 923 "GENERATED": lambda self: self._parse_generated_as_identity(), 924 "IDENTITY": lambda self: self._parse_auto_increment(), 925 "INLINE": lambda self: self._parse_inline(), 926 "LIKE": lambda self: self._parse_create_like(), 927 "NOT": lambda self: self._parse_not_constraint(), 928 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 929 "ON": lambda self: ( 930 self._match(TokenType.UPDATE) 931 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 932 ) 933 or self.expression(exp.OnProperty, this=self._parse_id_var()), 934 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 935 "PERIOD": lambda self: self._parse_period_for_system_time(), 936 "PRIMARY KEY": lambda self: self._parse_primary_key(), 937 "REFERENCES": lambda self: self._parse_references(match=False), 938 "TITLE": lambda self: self.expression( 939 exp.TitleColumnConstraint, this=self._parse_var_or_string() 940 ), 941 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 942 "UNIQUE": lambda self: self._parse_unique(), 943 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 944 "WITH": lambda self: self.expression( 945 exp.Properties, expressions=self._parse_wrapped_properties() 946 ), 947 } 948 949 ALTER_PARSERS = { 950 "ADD": lambda self: self._parse_alter_table_add(), 951 "ALTER": lambda self: self._parse_alter_table_alter(), 952 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 953 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 954 "DROP": lambda self: self._parse_alter_table_drop(), 955 "RENAME": lambda self: self._parse_alter_table_rename(), 956 "SET": lambda self: self._parse_alter_table_set(), 957 } 958 959 ALTER_ALTER_PARSERS = { 960 "DISTKEY": lambda self: self._parse_alter_diststyle(), 961 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 962 "SORTKEY": lambda self: self._parse_alter_sortkey(), 963 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 964 } 965 966 SCHEMA_UNNAMED_CONSTRAINTS = { 967 "CHECK", 968 "EXCLUDE", 969 "FOREIGN KEY", 970 "LIKE", 971 "PERIOD", 972 "PRIMARY KEY", 973 "UNIQUE", 974 } 975 976 NO_PAREN_FUNCTION_PARSERS = { 977 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 978 "CASE": lambda self: self._parse_case(), 979 "IF": lambda self: self._parse_if(), 980 "NEXT": lambda self: self._parse_next_value_for(), 981 } 982 983 INVALID_FUNC_NAME_TOKENS = { 984 TokenType.IDENTIFIER, 985 TokenType.STRING, 986 } 987 988 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 989 990 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 991 992 FUNCTION_PARSERS = { 993 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 994 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 995 "DECODE": lambda self: self._parse_decode(), 996 "EXTRACT": lambda self: self._parse_extract(), 997 "JSON_OBJECT": lambda self: self._parse_json_object(), 998 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 999 "JSON_TABLE": lambda self: self._parse_json_table(), 1000 "MATCH": lambda self: self._parse_match_against(), 1001 "OPENJSON": lambda self: self._parse_open_json(), 1002 "POSITION": lambda self: self._parse_position(), 1003 "PREDICT": lambda self: self._parse_predict(), 1004 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1005 "STRING_AGG": lambda self: self._parse_string_agg(), 1006 "SUBSTRING": lambda self: self._parse_substring(), 1007 "TRIM": lambda self: self._parse_trim(), 1008 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1009 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1010 } 1011 1012 QUERY_MODIFIER_PARSERS = { 1013 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1014 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1015 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1016 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1017 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1018 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1019 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1020 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1021 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1022 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1023 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1024 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1025 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1026 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1027 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1028 TokenType.CLUSTER_BY: lambda self: ( 1029 "cluster", 1030 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1031 ), 1032 TokenType.DISTRIBUTE_BY: lambda self: ( 1033 "distribute", 1034 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1035 ), 1036 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1037 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1038 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1039 } 1040 1041 SET_PARSERS = { 1042 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1043 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1044 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1045 "TRANSACTION": lambda self: self._parse_set_transaction(), 1046 } 1047 1048 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1049 1050 TYPE_LITERAL_PARSERS = { 1051 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1052 } 1053 1054 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1055 1056 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1057 1058 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1059 1060 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1061 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1062 "ISOLATION": ( 1063 ("LEVEL", "REPEATABLE", "READ"), 1064 ("LEVEL", "READ", "COMMITTED"), 1065 ("LEVEL", "READ", "UNCOMITTED"), 1066 ("LEVEL", "SERIALIZABLE"), 1067 ), 1068 "READ": ("WRITE", "ONLY"), 1069 } 1070 1071 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1072 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1073 ) 1074 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1075 1076 CREATE_SEQUENCE: OPTIONS_TYPE = { 1077 "SCALE": ("EXTEND", "NOEXTEND"), 1078 "SHARD": ("EXTEND", "NOEXTEND"), 1079 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1080 **dict.fromkeys( 1081 ( 1082 "SESSION", 1083 "GLOBAL", 1084 "KEEP", 1085 "NOKEEP", 1086 "ORDER", 1087 "NOORDER", 1088 "NOCACHE", 1089 "CYCLE", 1090 "NOCYCLE", 1091 "NOMINVALUE", 1092 "NOMAXVALUE", 1093 "NOSCALE", 1094 "NOSHARD", 1095 ), 1096 tuple(), 1097 ), 1098 } 1099 1100 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1101 1102 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1103 1104 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1105 1106 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1107 1108 CLONE_KEYWORDS = {"CLONE", "COPY"} 1109 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1110 1111 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1112 1113 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1114 1115 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1116 1117 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1118 1119 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1120 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1121 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1122 1123 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1124 1125 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1126 1127 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1128 1129 DISTINCT_TOKENS = {TokenType.DISTINCT} 1130 1131 NULL_TOKENS = {TokenType.NULL} 1132 1133 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1134 1135 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1136 1137 STRICT_CAST = True 1138 1139 PREFIXED_PIVOT_COLUMNS = False 1140 IDENTIFY_PIVOT_STRINGS = False 1141 1142 LOG_DEFAULTS_TO_LN = False 1143 1144 # Whether ADD is present for each column added by ALTER TABLE 1145 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1146 1147 # Whether the table sample clause expects CSV syntax 1148 TABLESAMPLE_CSV = False 1149 1150 # The default method used for table sampling 1151 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1152 1153 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1154 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1155 1156 # Whether the TRIM function expects the characters to trim as its first argument 1157 TRIM_PATTERN_FIRST = False 1158 1159 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1160 STRING_ALIASES = False 1161 1162 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1163 MODIFIERS_ATTACHED_TO_UNION = True 1164 UNION_MODIFIERS = {"order", "limit", "offset"} 1165 1166 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1167 NO_PAREN_IF_COMMANDS = True 1168 1169 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1170 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1171 1172 # Whether the `:` operator is used to extract a value from a JSON document 1173 COLON_IS_JSON_EXTRACT = False 1174 1175 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1176 # If this is True and '(' is not found, the keyword will be treated as an identifier 1177 VALUES_FOLLOWED_BY_PAREN = True 1178 1179 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1180 SUPPORTS_IMPLICIT_UNNEST = False 1181 1182 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1183 INTERVAL_SPANS = True 1184 1185 # Whether a PARTITION clause can follow a table reference 1186 SUPPORTS_PARTITION_SELECTION = False 1187 1188 __slots__ = ( 1189 "error_level", 1190 "error_message_context", 1191 "max_errors", 1192 "dialect", 1193 "sql", 1194 "errors", 1195 "_tokens", 1196 "_index", 1197 "_curr", 1198 "_next", 1199 "_prev", 1200 "_prev_comments", 1201 ) 1202 1203 # Autofilled 1204 SHOW_TRIE: t.Dict = {} 1205 SET_TRIE: t.Dict = {} 1206 1207 def __init__( 1208 self, 1209 error_level: t.Optional[ErrorLevel] = None, 1210 error_message_context: int = 100, 1211 max_errors: int = 3, 1212 dialect: DialectType = None, 1213 ): 1214 from sqlglot.dialects import Dialect 1215 1216 self.error_level = error_level or ErrorLevel.IMMEDIATE 1217 self.error_message_context = error_message_context 1218 self.max_errors = max_errors 1219 self.dialect = Dialect.get_or_raise(dialect) 1220 self.reset() 1221 1222 def reset(self): 1223 self.sql = "" 1224 self.errors = [] 1225 self._tokens = [] 1226 self._index = 0 1227 self._curr = None 1228 self._next = None 1229 self._prev = None 1230 self._prev_comments = None 1231 1232 def parse( 1233 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1234 ) -> t.List[t.Optional[exp.Expression]]: 1235 """ 1236 Parses a list of tokens and returns a list of syntax trees, one tree 1237 per parsed SQL statement. 1238 1239 Args: 1240 raw_tokens: The list of tokens. 1241 sql: The original SQL string, used to produce helpful debug messages. 1242 1243 Returns: 1244 The list of the produced syntax trees. 1245 """ 1246 return self._parse( 1247 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1248 ) 1249 1250 def parse_into( 1251 self, 1252 expression_types: exp.IntoType, 1253 raw_tokens: t.List[Token], 1254 sql: t.Optional[str] = None, 1255 ) -> t.List[t.Optional[exp.Expression]]: 1256 """ 1257 Parses a list of tokens into a given Expression type. If a collection of Expression 1258 types is given instead, this method will try to parse the token list into each one 1259 of them, stopping at the first for which the parsing succeeds. 1260 1261 Args: 1262 expression_types: The expression type(s) to try and parse the token list into. 1263 raw_tokens: The list of tokens. 1264 sql: The original SQL string, used to produce helpful debug messages. 1265 1266 Returns: 1267 The target Expression. 1268 """ 1269 errors = [] 1270 for expression_type in ensure_list(expression_types): 1271 parser = self.EXPRESSION_PARSERS.get(expression_type) 1272 if not parser: 1273 raise TypeError(f"No parser registered for {expression_type}") 1274 1275 try: 1276 return self._parse(parser, raw_tokens, sql) 1277 except ParseError as e: 1278 e.errors[0]["into_expression"] = expression_type 1279 errors.append(e) 1280 1281 raise ParseError( 1282 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1283 errors=merge_errors(errors), 1284 ) from errors[-1] 1285 1286 def _parse( 1287 self, 1288 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1289 raw_tokens: t.List[Token], 1290 sql: t.Optional[str] = None, 1291 ) -> t.List[t.Optional[exp.Expression]]: 1292 self.reset() 1293 self.sql = sql or "" 1294 1295 total = len(raw_tokens) 1296 chunks: t.List[t.List[Token]] = [[]] 1297 1298 for i, token in enumerate(raw_tokens): 1299 if token.token_type == TokenType.SEMICOLON: 1300 if token.comments: 1301 chunks.append([token]) 1302 1303 if i < total - 1: 1304 chunks.append([]) 1305 else: 1306 chunks[-1].append(token) 1307 1308 expressions = [] 1309 1310 for tokens in chunks: 1311 self._index = -1 1312 self._tokens = tokens 1313 self._advance() 1314 1315 expressions.append(parse_method(self)) 1316 1317 if self._index < len(self._tokens): 1318 self.raise_error("Invalid expression / Unexpected token") 1319 1320 self.check_errors() 1321 1322 return expressions 1323 1324 def check_errors(self) -> None: 1325 """Logs or raises any found errors, depending on the chosen error level setting.""" 1326 if self.error_level == ErrorLevel.WARN: 1327 for error in self.errors: 1328 logger.error(str(error)) 1329 elif self.error_level == ErrorLevel.RAISE and self.errors: 1330 raise ParseError( 1331 concat_messages(self.errors, self.max_errors), 1332 errors=merge_errors(self.errors), 1333 ) 1334 1335 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1336 """ 1337 Appends an error in the list of recorded errors or raises it, depending on the chosen 1338 error level setting. 1339 """ 1340 token = token or self._curr or self._prev or Token.string("") 1341 start = token.start 1342 end = token.end + 1 1343 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1344 highlight = self.sql[start:end] 1345 end_context = self.sql[end : end + self.error_message_context] 1346 1347 error = ParseError.new( 1348 f"{message}. Line {token.line}, Col: {token.col}.\n" 1349 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1350 description=message, 1351 line=token.line, 1352 col=token.col, 1353 start_context=start_context, 1354 highlight=highlight, 1355 end_context=end_context, 1356 ) 1357 1358 if self.error_level == ErrorLevel.IMMEDIATE: 1359 raise error 1360 1361 self.errors.append(error) 1362 1363 def expression( 1364 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1365 ) -> E: 1366 """ 1367 Creates a new, validated Expression. 1368 1369 Args: 1370 exp_class: The expression class to instantiate. 1371 comments: An optional list of comments to attach to the expression. 1372 kwargs: The arguments to set for the expression along with their respective values. 1373 1374 Returns: 1375 The target expression. 1376 """ 1377 instance = exp_class(**kwargs) 1378 instance.add_comments(comments) if comments else self._add_comments(instance) 1379 return self.validate_expression(instance) 1380 1381 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1382 if expression and self._prev_comments: 1383 expression.add_comments(self._prev_comments) 1384 self._prev_comments = None 1385 1386 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1387 """ 1388 Validates an Expression, making sure that all its mandatory arguments are set. 1389 1390 Args: 1391 expression: The expression to validate. 1392 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1393 1394 Returns: 1395 The validated expression. 1396 """ 1397 if self.error_level != ErrorLevel.IGNORE: 1398 for error_message in expression.error_messages(args): 1399 self.raise_error(error_message) 1400 1401 return expression 1402 1403 def _find_sql(self, start: Token, end: Token) -> str: 1404 return self.sql[start.start : end.end + 1] 1405 1406 def _is_connected(self) -> bool: 1407 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1408 1409 def _advance(self, times: int = 1) -> None: 1410 self._index += times 1411 self._curr = seq_get(self._tokens, self._index) 1412 self._next = seq_get(self._tokens, self._index + 1) 1413 1414 if self._index > 0: 1415 self._prev = self._tokens[self._index - 1] 1416 self._prev_comments = self._prev.comments 1417 else: 1418 self._prev = None 1419 self._prev_comments = None 1420 1421 def _retreat(self, index: int) -> None: 1422 if index != self._index: 1423 self._advance(index - self._index) 1424 1425 def _warn_unsupported(self) -> None: 1426 if len(self._tokens) <= 1: 1427 return 1428 1429 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1430 # interested in emitting a warning for the one being currently processed. 1431 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1432 1433 logger.warning( 1434 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1435 ) 1436 1437 def _parse_command(self) -> exp.Command: 1438 self._warn_unsupported() 1439 return self.expression( 1440 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1441 ) 1442 1443 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1444 """ 1445 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1446 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1447 the parser state accordingly 1448 """ 1449 index = self._index 1450 error_level = self.error_level 1451 1452 self.error_level = ErrorLevel.IMMEDIATE 1453 try: 1454 this = parse_method() 1455 except ParseError: 1456 this = None 1457 finally: 1458 if not this or retreat: 1459 self._retreat(index) 1460 self.error_level = error_level 1461 1462 return this 1463 1464 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1465 start = self._prev 1466 exists = self._parse_exists() if allow_exists else None 1467 1468 self._match(TokenType.ON) 1469 1470 materialized = self._match_text_seq("MATERIALIZED") 1471 kind = self._match_set(self.CREATABLES) and self._prev 1472 if not kind: 1473 return self._parse_as_command(start) 1474 1475 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1476 this = self._parse_user_defined_function(kind=kind.token_type) 1477 elif kind.token_type == TokenType.TABLE: 1478 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1479 elif kind.token_type == TokenType.COLUMN: 1480 this = self._parse_column() 1481 else: 1482 this = self._parse_id_var() 1483 1484 self._match(TokenType.IS) 1485 1486 return self.expression( 1487 exp.Comment, 1488 this=this, 1489 kind=kind.text, 1490 expression=self._parse_string(), 1491 exists=exists, 1492 materialized=materialized, 1493 ) 1494 1495 def _parse_to_table( 1496 self, 1497 ) -> exp.ToTableProperty: 1498 table = self._parse_table_parts(schema=True) 1499 return self.expression(exp.ToTableProperty, this=table) 1500 1501 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1502 def _parse_ttl(self) -> exp.Expression: 1503 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1504 this = self._parse_bitwise() 1505 1506 if self._match_text_seq("DELETE"): 1507 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1508 if self._match_text_seq("RECOMPRESS"): 1509 return self.expression( 1510 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1511 ) 1512 if self._match_text_seq("TO", "DISK"): 1513 return self.expression( 1514 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1515 ) 1516 if self._match_text_seq("TO", "VOLUME"): 1517 return self.expression( 1518 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1519 ) 1520 1521 return this 1522 1523 expressions = self._parse_csv(_parse_ttl_action) 1524 where = self._parse_where() 1525 group = self._parse_group() 1526 1527 aggregates = None 1528 if group and self._match(TokenType.SET): 1529 aggregates = self._parse_csv(self._parse_set_item) 1530 1531 return self.expression( 1532 exp.MergeTreeTTL, 1533 expressions=expressions, 1534 where=where, 1535 group=group, 1536 aggregates=aggregates, 1537 ) 1538 1539 def _parse_statement(self) -> t.Optional[exp.Expression]: 1540 if self._curr is None: 1541 return None 1542 1543 if self._match_set(self.STATEMENT_PARSERS): 1544 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1545 1546 if self._match_set(self.dialect.tokenizer.COMMANDS): 1547 return self._parse_command() 1548 1549 expression = self._parse_expression() 1550 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1551 return self._parse_query_modifiers(expression) 1552 1553 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1554 start = self._prev 1555 temporary = self._match(TokenType.TEMPORARY) 1556 materialized = self._match_text_seq("MATERIALIZED") 1557 1558 kind = self._match_set(self.CREATABLES) and self._prev.text 1559 if not kind: 1560 return self._parse_as_command(start) 1561 1562 if_exists = exists or self._parse_exists() 1563 table = self._parse_table_parts( 1564 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1565 ) 1566 1567 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1568 1569 if self._match(TokenType.L_PAREN, advance=False): 1570 expressions = self._parse_wrapped_csv(self._parse_types) 1571 else: 1572 expressions = None 1573 1574 return self.expression( 1575 exp.Drop, 1576 comments=start.comments, 1577 exists=if_exists, 1578 this=table, 1579 expressions=expressions, 1580 kind=kind.upper(), 1581 temporary=temporary, 1582 materialized=materialized, 1583 cascade=self._match_text_seq("CASCADE"), 1584 constraints=self._match_text_seq("CONSTRAINTS"), 1585 purge=self._match_text_seq("PURGE"), 1586 cluster=cluster, 1587 ) 1588 1589 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1590 return ( 1591 self._match_text_seq("IF") 1592 and (not not_ or self._match(TokenType.NOT)) 1593 and self._match(TokenType.EXISTS) 1594 ) 1595 1596 def _parse_create(self) -> exp.Create | exp.Command: 1597 # Note: this can't be None because we've matched a statement parser 1598 start = self._prev 1599 comments = self._prev_comments 1600 1601 replace = ( 1602 start.token_type == TokenType.REPLACE 1603 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1604 or self._match_pair(TokenType.OR, TokenType.ALTER) 1605 ) 1606 1607 unique = self._match(TokenType.UNIQUE) 1608 1609 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1610 self._advance() 1611 1612 properties = None 1613 create_token = self._match_set(self.CREATABLES) and self._prev 1614 1615 if not create_token: 1616 # exp.Properties.Location.POST_CREATE 1617 properties = self._parse_properties() 1618 create_token = self._match_set(self.CREATABLES) and self._prev 1619 1620 if not properties or not create_token: 1621 return self._parse_as_command(start) 1622 1623 exists = self._parse_exists(not_=True) 1624 this = None 1625 expression: t.Optional[exp.Expression] = None 1626 indexes = None 1627 no_schema_binding = None 1628 begin = None 1629 end = None 1630 clone = None 1631 1632 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1633 nonlocal properties 1634 if properties and temp_props: 1635 properties.expressions.extend(temp_props.expressions) 1636 elif temp_props: 1637 properties = temp_props 1638 1639 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1640 this = self._parse_user_defined_function(kind=create_token.token_type) 1641 1642 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1643 extend_props(self._parse_properties()) 1644 1645 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1646 extend_props(self._parse_properties()) 1647 1648 if not expression: 1649 if self._match(TokenType.COMMAND): 1650 expression = self._parse_as_command(self._prev) 1651 else: 1652 begin = self._match(TokenType.BEGIN) 1653 return_ = self._match_text_seq("RETURN") 1654 1655 if self._match(TokenType.STRING, advance=False): 1656 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1657 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1658 expression = self._parse_string() 1659 extend_props(self._parse_properties()) 1660 else: 1661 expression = self._parse_statement() 1662 1663 end = self._match_text_seq("END") 1664 1665 if return_: 1666 expression = self.expression(exp.Return, this=expression) 1667 elif create_token.token_type == TokenType.INDEX: 1668 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1669 if not self._match(TokenType.ON): 1670 index = self._parse_id_var() 1671 anonymous = False 1672 else: 1673 index = None 1674 anonymous = True 1675 1676 this = self._parse_index(index=index, anonymous=anonymous) 1677 elif create_token.token_type in self.DB_CREATABLES: 1678 table_parts = self._parse_table_parts( 1679 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1680 ) 1681 1682 # exp.Properties.Location.POST_NAME 1683 self._match(TokenType.COMMA) 1684 extend_props(self._parse_properties(before=True)) 1685 1686 this = self._parse_schema(this=table_parts) 1687 1688 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1689 extend_props(self._parse_properties()) 1690 1691 self._match(TokenType.ALIAS) 1692 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1693 # exp.Properties.Location.POST_ALIAS 1694 extend_props(self._parse_properties()) 1695 1696 if create_token.token_type == TokenType.SEQUENCE: 1697 expression = self._parse_types() 1698 extend_props(self._parse_properties()) 1699 else: 1700 expression = self._parse_ddl_select() 1701 1702 if create_token.token_type == TokenType.TABLE: 1703 # exp.Properties.Location.POST_EXPRESSION 1704 extend_props(self._parse_properties()) 1705 1706 indexes = [] 1707 while True: 1708 index = self._parse_index() 1709 1710 # exp.Properties.Location.POST_INDEX 1711 extend_props(self._parse_properties()) 1712 1713 if not index: 1714 break 1715 else: 1716 self._match(TokenType.COMMA) 1717 indexes.append(index) 1718 elif create_token.token_type == TokenType.VIEW: 1719 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1720 no_schema_binding = True 1721 1722 shallow = self._match_text_seq("SHALLOW") 1723 1724 if self._match_texts(self.CLONE_KEYWORDS): 1725 copy = self._prev.text.lower() == "copy" 1726 clone = self.expression( 1727 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1728 ) 1729 1730 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1731 return self._parse_as_command(start) 1732 1733 return self.expression( 1734 exp.Create, 1735 comments=comments, 1736 this=this, 1737 kind=create_token.text.upper(), 1738 replace=replace, 1739 unique=unique, 1740 expression=expression, 1741 exists=exists, 1742 properties=properties, 1743 indexes=indexes, 1744 no_schema_binding=no_schema_binding, 1745 begin=begin, 1746 end=end, 1747 clone=clone, 1748 ) 1749 1750 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1751 seq = exp.SequenceProperties() 1752 1753 options = [] 1754 index = self._index 1755 1756 while self._curr: 1757 self._match(TokenType.COMMA) 1758 if self._match_text_seq("INCREMENT"): 1759 self._match_text_seq("BY") 1760 self._match_text_seq("=") 1761 seq.set("increment", self._parse_term()) 1762 elif self._match_text_seq("MINVALUE"): 1763 seq.set("minvalue", self._parse_term()) 1764 elif self._match_text_seq("MAXVALUE"): 1765 seq.set("maxvalue", self._parse_term()) 1766 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1767 self._match_text_seq("=") 1768 seq.set("start", self._parse_term()) 1769 elif self._match_text_seq("CACHE"): 1770 # T-SQL allows empty CACHE which is initialized dynamically 1771 seq.set("cache", self._parse_number() or True) 1772 elif self._match_text_seq("OWNED", "BY"): 1773 # "OWNED BY NONE" is the default 1774 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1775 else: 1776 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1777 if opt: 1778 options.append(opt) 1779 else: 1780 break 1781 1782 seq.set("options", options if options else None) 1783 return None if self._index == index else seq 1784 1785 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1786 # only used for teradata currently 1787 self._match(TokenType.COMMA) 1788 1789 kwargs = { 1790 "no": self._match_text_seq("NO"), 1791 "dual": self._match_text_seq("DUAL"), 1792 "before": self._match_text_seq("BEFORE"), 1793 "default": self._match_text_seq("DEFAULT"), 1794 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1795 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1796 "after": self._match_text_seq("AFTER"), 1797 "minimum": self._match_texts(("MIN", "MINIMUM")), 1798 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1799 } 1800 1801 if self._match_texts(self.PROPERTY_PARSERS): 1802 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1803 try: 1804 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1805 except TypeError: 1806 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1807 1808 return None 1809 1810 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1811 return self._parse_wrapped_csv(self._parse_property) 1812 1813 def _parse_property(self) -> t.Optional[exp.Expression]: 1814 if self._match_texts(self.PROPERTY_PARSERS): 1815 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1816 1817 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1818 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1819 1820 if self._match_text_seq("COMPOUND", "SORTKEY"): 1821 return self._parse_sortkey(compound=True) 1822 1823 if self._match_text_seq("SQL", "SECURITY"): 1824 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1825 1826 index = self._index 1827 key = self._parse_column() 1828 1829 if not self._match(TokenType.EQ): 1830 self._retreat(index) 1831 return self._parse_sequence_properties() 1832 1833 return self.expression( 1834 exp.Property, 1835 this=key.to_dot() if isinstance(key, exp.Column) else key, 1836 value=self._parse_bitwise() or self._parse_var(any_token=True), 1837 ) 1838 1839 def _parse_stored(self) -> exp.FileFormatProperty: 1840 self._match(TokenType.ALIAS) 1841 1842 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1843 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1844 1845 return self.expression( 1846 exp.FileFormatProperty, 1847 this=( 1848 self.expression( 1849 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1850 ) 1851 if input_format or output_format 1852 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1853 ), 1854 ) 1855 1856 def _parse_unquoted_field(self): 1857 field = self._parse_field() 1858 if isinstance(field, exp.Identifier) and not field.quoted: 1859 field = exp.var(field) 1860 1861 return field 1862 1863 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1864 self._match(TokenType.EQ) 1865 self._match(TokenType.ALIAS) 1866 1867 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1868 1869 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1870 properties = [] 1871 while True: 1872 if before: 1873 prop = self._parse_property_before() 1874 else: 1875 prop = self._parse_property() 1876 if not prop: 1877 break 1878 for p in ensure_list(prop): 1879 properties.append(p) 1880 1881 if properties: 1882 return self.expression(exp.Properties, expressions=properties) 1883 1884 return None 1885 1886 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1887 return self.expression( 1888 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1889 ) 1890 1891 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1892 if self._index >= 2: 1893 pre_volatile_token = self._tokens[self._index - 2] 1894 else: 1895 pre_volatile_token = None 1896 1897 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1898 return exp.VolatileProperty() 1899 1900 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1901 1902 def _parse_retention_period(self) -> exp.Var: 1903 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1904 number = self._parse_number() 1905 number_str = f"{number} " if number else "" 1906 unit = self._parse_var(any_token=True) 1907 return exp.var(f"{number_str}{unit}") 1908 1909 def _parse_system_versioning_property( 1910 self, with_: bool = False 1911 ) -> exp.WithSystemVersioningProperty: 1912 self._match(TokenType.EQ) 1913 prop = self.expression( 1914 exp.WithSystemVersioningProperty, 1915 **{ # type: ignore 1916 "on": True, 1917 "with": with_, 1918 }, 1919 ) 1920 1921 if self._match_text_seq("OFF"): 1922 prop.set("on", False) 1923 return prop 1924 1925 self._match(TokenType.ON) 1926 if self._match(TokenType.L_PAREN): 1927 while self._curr and not self._match(TokenType.R_PAREN): 1928 if self._match_text_seq("HISTORY_TABLE", "="): 1929 prop.set("this", self._parse_table_parts()) 1930 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1931 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1932 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1933 prop.set("retention_period", self._parse_retention_period()) 1934 1935 self._match(TokenType.COMMA) 1936 1937 return prop 1938 1939 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1940 self._match(TokenType.EQ) 1941 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1942 prop = self.expression(exp.DataDeletionProperty, on=on) 1943 1944 if self._match(TokenType.L_PAREN): 1945 while self._curr and not self._match(TokenType.R_PAREN): 1946 if self._match_text_seq("FILTER_COLUMN", "="): 1947 prop.set("filter_column", self._parse_column()) 1948 elif self._match_text_seq("RETENTION_PERIOD", "="): 1949 prop.set("retention_period", self._parse_retention_period()) 1950 1951 self._match(TokenType.COMMA) 1952 1953 return prop 1954 1955 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1956 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1957 prop = self._parse_system_versioning_property(with_=True) 1958 self._match_r_paren() 1959 return prop 1960 1961 if self._match(TokenType.L_PAREN, advance=False): 1962 return self._parse_wrapped_properties() 1963 1964 if self._match_text_seq("JOURNAL"): 1965 return self._parse_withjournaltable() 1966 1967 if self._match_texts(self.VIEW_ATTRIBUTES): 1968 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1969 1970 if self._match_text_seq("DATA"): 1971 return self._parse_withdata(no=False) 1972 elif self._match_text_seq("NO", "DATA"): 1973 return self._parse_withdata(no=True) 1974 1975 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1976 return self._parse_serde_properties(with_=True) 1977 1978 if not self._next: 1979 return None 1980 1981 return self._parse_withisolatedloading() 1982 1983 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1984 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1985 self._match(TokenType.EQ) 1986 1987 user = self._parse_id_var() 1988 self._match(TokenType.PARAMETER) 1989 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1990 1991 if not user or not host: 1992 return None 1993 1994 return exp.DefinerProperty(this=f"{user}@{host}") 1995 1996 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1997 self._match(TokenType.TABLE) 1998 self._match(TokenType.EQ) 1999 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2000 2001 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2002 return self.expression(exp.LogProperty, no=no) 2003 2004 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2005 return self.expression(exp.JournalProperty, **kwargs) 2006 2007 def _parse_checksum(self) -> exp.ChecksumProperty: 2008 self._match(TokenType.EQ) 2009 2010 on = None 2011 if self._match(TokenType.ON): 2012 on = True 2013 elif self._match_text_seq("OFF"): 2014 on = False 2015 2016 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2017 2018 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2019 return self.expression( 2020 exp.Cluster, 2021 expressions=( 2022 self._parse_wrapped_csv(self._parse_ordered) 2023 if wrapped 2024 else self._parse_csv(self._parse_ordered) 2025 ), 2026 ) 2027 2028 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2029 self._match_text_seq("BY") 2030 2031 self._match_l_paren() 2032 expressions = self._parse_csv(self._parse_column) 2033 self._match_r_paren() 2034 2035 if self._match_text_seq("SORTED", "BY"): 2036 self._match_l_paren() 2037 sorted_by = self._parse_csv(self._parse_ordered) 2038 self._match_r_paren() 2039 else: 2040 sorted_by = None 2041 2042 self._match(TokenType.INTO) 2043 buckets = self._parse_number() 2044 self._match_text_seq("BUCKETS") 2045 2046 return self.expression( 2047 exp.ClusteredByProperty, 2048 expressions=expressions, 2049 sorted_by=sorted_by, 2050 buckets=buckets, 2051 ) 2052 2053 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2054 if not self._match_text_seq("GRANTS"): 2055 self._retreat(self._index - 1) 2056 return None 2057 2058 return self.expression(exp.CopyGrantsProperty) 2059 2060 def _parse_freespace(self) -> exp.FreespaceProperty: 2061 self._match(TokenType.EQ) 2062 return self.expression( 2063 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2064 ) 2065 2066 def _parse_mergeblockratio( 2067 self, no: bool = False, default: bool = False 2068 ) -> exp.MergeBlockRatioProperty: 2069 if self._match(TokenType.EQ): 2070 return self.expression( 2071 exp.MergeBlockRatioProperty, 2072 this=self._parse_number(), 2073 percent=self._match(TokenType.PERCENT), 2074 ) 2075 2076 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2077 2078 def _parse_datablocksize( 2079 self, 2080 default: t.Optional[bool] = None, 2081 minimum: t.Optional[bool] = None, 2082 maximum: t.Optional[bool] = None, 2083 ) -> exp.DataBlocksizeProperty: 2084 self._match(TokenType.EQ) 2085 size = self._parse_number() 2086 2087 units = None 2088 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2089 units = self._prev.text 2090 2091 return self.expression( 2092 exp.DataBlocksizeProperty, 2093 size=size, 2094 units=units, 2095 default=default, 2096 minimum=minimum, 2097 maximum=maximum, 2098 ) 2099 2100 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2101 self._match(TokenType.EQ) 2102 always = self._match_text_seq("ALWAYS") 2103 manual = self._match_text_seq("MANUAL") 2104 never = self._match_text_seq("NEVER") 2105 default = self._match_text_seq("DEFAULT") 2106 2107 autotemp = None 2108 if self._match_text_seq("AUTOTEMP"): 2109 autotemp = self._parse_schema() 2110 2111 return self.expression( 2112 exp.BlockCompressionProperty, 2113 always=always, 2114 manual=manual, 2115 never=never, 2116 default=default, 2117 autotemp=autotemp, 2118 ) 2119 2120 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2121 index = self._index 2122 no = self._match_text_seq("NO") 2123 concurrent = self._match_text_seq("CONCURRENT") 2124 2125 if not self._match_text_seq("ISOLATED", "LOADING"): 2126 self._retreat(index) 2127 return None 2128 2129 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2130 return self.expression( 2131 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2132 ) 2133 2134 def _parse_locking(self) -> exp.LockingProperty: 2135 if self._match(TokenType.TABLE): 2136 kind = "TABLE" 2137 elif self._match(TokenType.VIEW): 2138 kind = "VIEW" 2139 elif self._match(TokenType.ROW): 2140 kind = "ROW" 2141 elif self._match_text_seq("DATABASE"): 2142 kind = "DATABASE" 2143 else: 2144 kind = None 2145 2146 if kind in ("DATABASE", "TABLE", "VIEW"): 2147 this = self._parse_table_parts() 2148 else: 2149 this = None 2150 2151 if self._match(TokenType.FOR): 2152 for_or_in = "FOR" 2153 elif self._match(TokenType.IN): 2154 for_or_in = "IN" 2155 else: 2156 for_or_in = None 2157 2158 if self._match_text_seq("ACCESS"): 2159 lock_type = "ACCESS" 2160 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2161 lock_type = "EXCLUSIVE" 2162 elif self._match_text_seq("SHARE"): 2163 lock_type = "SHARE" 2164 elif self._match_text_seq("READ"): 2165 lock_type = "READ" 2166 elif self._match_text_seq("WRITE"): 2167 lock_type = "WRITE" 2168 elif self._match_text_seq("CHECKSUM"): 2169 lock_type = "CHECKSUM" 2170 else: 2171 lock_type = None 2172 2173 override = self._match_text_seq("OVERRIDE") 2174 2175 return self.expression( 2176 exp.LockingProperty, 2177 this=this, 2178 kind=kind, 2179 for_or_in=for_or_in, 2180 lock_type=lock_type, 2181 override=override, 2182 ) 2183 2184 def _parse_partition_by(self) -> t.List[exp.Expression]: 2185 if self._match(TokenType.PARTITION_BY): 2186 return self._parse_csv(self._parse_conjunction) 2187 return [] 2188 2189 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2190 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2191 if self._match_text_seq("MINVALUE"): 2192 return exp.var("MINVALUE") 2193 if self._match_text_seq("MAXVALUE"): 2194 return exp.var("MAXVALUE") 2195 return self._parse_bitwise() 2196 2197 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2198 expression = None 2199 from_expressions = None 2200 to_expressions = None 2201 2202 if self._match(TokenType.IN): 2203 this = self._parse_wrapped_csv(self._parse_bitwise) 2204 elif self._match(TokenType.FROM): 2205 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2206 self._match_text_seq("TO") 2207 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2208 elif self._match_text_seq("WITH", "(", "MODULUS"): 2209 this = self._parse_number() 2210 self._match_text_seq(",", "REMAINDER") 2211 expression = self._parse_number() 2212 self._match_r_paren() 2213 else: 2214 self.raise_error("Failed to parse partition bound spec.") 2215 2216 return self.expression( 2217 exp.PartitionBoundSpec, 2218 this=this, 2219 expression=expression, 2220 from_expressions=from_expressions, 2221 to_expressions=to_expressions, 2222 ) 2223 2224 # https://www.postgresql.org/docs/current/sql-createtable.html 2225 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2226 if not self._match_text_seq("OF"): 2227 self._retreat(self._index - 1) 2228 return None 2229 2230 this = self._parse_table(schema=True) 2231 2232 if self._match(TokenType.DEFAULT): 2233 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2234 elif self._match_text_seq("FOR", "VALUES"): 2235 expression = self._parse_partition_bound_spec() 2236 else: 2237 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2238 2239 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2240 2241 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2242 self._match(TokenType.EQ) 2243 return self.expression( 2244 exp.PartitionedByProperty, 2245 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2246 ) 2247 2248 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2249 if self._match_text_seq("AND", "STATISTICS"): 2250 statistics = True 2251 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2252 statistics = False 2253 else: 2254 statistics = None 2255 2256 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2257 2258 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2259 if self._match_text_seq("SQL"): 2260 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2261 return None 2262 2263 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2264 if self._match_text_seq("SQL", "DATA"): 2265 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2266 return None 2267 2268 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2269 if self._match_text_seq("PRIMARY", "INDEX"): 2270 return exp.NoPrimaryIndexProperty() 2271 if self._match_text_seq("SQL"): 2272 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2273 return None 2274 2275 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2276 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2277 return exp.OnCommitProperty() 2278 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2279 return exp.OnCommitProperty(delete=True) 2280 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2281 2282 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2283 if self._match_text_seq("SQL", "DATA"): 2284 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2285 return None 2286 2287 def _parse_distkey(self) -> exp.DistKeyProperty: 2288 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2289 2290 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2291 table = self._parse_table(schema=True) 2292 2293 options = [] 2294 while self._match_texts(("INCLUDING", "EXCLUDING")): 2295 this = self._prev.text.upper() 2296 2297 id_var = self._parse_id_var() 2298 if not id_var: 2299 return None 2300 2301 options.append( 2302 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2303 ) 2304 2305 return self.expression(exp.LikeProperty, this=table, expressions=options) 2306 2307 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2308 return self.expression( 2309 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2310 ) 2311 2312 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2313 self._match(TokenType.EQ) 2314 return self.expression( 2315 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2316 ) 2317 2318 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2319 self._match_text_seq("WITH", "CONNECTION") 2320 return self.expression( 2321 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2322 ) 2323 2324 def _parse_returns(self) -> exp.ReturnsProperty: 2325 value: t.Optional[exp.Expression] 2326 null = None 2327 is_table = self._match(TokenType.TABLE) 2328 2329 if is_table: 2330 if self._match(TokenType.LT): 2331 value = self.expression( 2332 exp.Schema, 2333 this="TABLE", 2334 expressions=self._parse_csv(self._parse_struct_types), 2335 ) 2336 if not self._match(TokenType.GT): 2337 self.raise_error("Expecting >") 2338 else: 2339 value = self._parse_schema(exp.var("TABLE")) 2340 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2341 null = True 2342 value = None 2343 else: 2344 value = self._parse_types() 2345 2346 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2347 2348 def _parse_describe(self) -> exp.Describe: 2349 kind = self._match_set(self.CREATABLES) and self._prev.text 2350 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2351 if self._match(TokenType.DOT): 2352 style = None 2353 self._retreat(self._index - 2) 2354 this = self._parse_table(schema=True) 2355 properties = self._parse_properties() 2356 expressions = properties.expressions if properties else None 2357 return self.expression( 2358 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2359 ) 2360 2361 def _parse_insert(self) -> exp.Insert: 2362 comments = ensure_list(self._prev_comments) 2363 hint = self._parse_hint() 2364 overwrite = self._match(TokenType.OVERWRITE) 2365 ignore = self._match(TokenType.IGNORE) 2366 local = self._match_text_seq("LOCAL") 2367 alternative = None 2368 is_function = None 2369 2370 if self._match_text_seq("DIRECTORY"): 2371 this: t.Optional[exp.Expression] = self.expression( 2372 exp.Directory, 2373 this=self._parse_var_or_string(), 2374 local=local, 2375 row_format=self._parse_row_format(match_row=True), 2376 ) 2377 else: 2378 if self._match(TokenType.OR): 2379 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2380 2381 self._match(TokenType.INTO) 2382 comments += ensure_list(self._prev_comments) 2383 self._match(TokenType.TABLE) 2384 is_function = self._match(TokenType.FUNCTION) 2385 2386 this = ( 2387 self._parse_table(schema=True, parse_partition=True) 2388 if not is_function 2389 else self._parse_function() 2390 ) 2391 2392 returning = self._parse_returning() 2393 2394 return self.expression( 2395 exp.Insert, 2396 comments=comments, 2397 hint=hint, 2398 is_function=is_function, 2399 this=this, 2400 stored=self._match_text_seq("STORED") and self._parse_stored(), 2401 by_name=self._match_text_seq("BY", "NAME"), 2402 exists=self._parse_exists(), 2403 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2404 and self._parse_conjunction(), 2405 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2406 conflict=self._parse_on_conflict(), 2407 returning=returning or self._parse_returning(), 2408 overwrite=overwrite, 2409 alternative=alternative, 2410 ignore=ignore, 2411 ) 2412 2413 def _parse_kill(self) -> exp.Kill: 2414 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2415 2416 return self.expression( 2417 exp.Kill, 2418 this=self._parse_primary(), 2419 kind=kind, 2420 ) 2421 2422 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2423 conflict = self._match_text_seq("ON", "CONFLICT") 2424 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2425 2426 if not conflict and not duplicate: 2427 return None 2428 2429 conflict_keys = None 2430 constraint = None 2431 2432 if conflict: 2433 if self._match_text_seq("ON", "CONSTRAINT"): 2434 constraint = self._parse_id_var() 2435 elif self._match(TokenType.L_PAREN): 2436 conflict_keys = self._parse_csv(self._parse_id_var) 2437 self._match_r_paren() 2438 2439 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2440 if self._prev.token_type == TokenType.UPDATE: 2441 self._match(TokenType.SET) 2442 expressions = self._parse_csv(self._parse_equality) 2443 else: 2444 expressions = None 2445 2446 return self.expression( 2447 exp.OnConflict, 2448 duplicate=duplicate, 2449 expressions=expressions, 2450 action=action, 2451 conflict_keys=conflict_keys, 2452 constraint=constraint, 2453 ) 2454 2455 def _parse_returning(self) -> t.Optional[exp.Returning]: 2456 if not self._match(TokenType.RETURNING): 2457 return None 2458 return self.expression( 2459 exp.Returning, 2460 expressions=self._parse_csv(self._parse_expression), 2461 into=self._match(TokenType.INTO) and self._parse_table_part(), 2462 ) 2463 2464 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2465 if not self._match(TokenType.FORMAT): 2466 return None 2467 return self._parse_row_format() 2468 2469 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2470 index = self._index 2471 with_ = with_ or self._match_text_seq("WITH") 2472 2473 if not self._match(TokenType.SERDE_PROPERTIES): 2474 self._retreat(index) 2475 return None 2476 return self.expression( 2477 exp.SerdeProperties, 2478 **{ # type: ignore 2479 "expressions": self._parse_wrapped_properties(), 2480 "with": with_, 2481 }, 2482 ) 2483 2484 def _parse_row_format( 2485 self, match_row: bool = False 2486 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2487 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2488 return None 2489 2490 if self._match_text_seq("SERDE"): 2491 this = self._parse_string() 2492 2493 serde_properties = self._parse_serde_properties() 2494 2495 return self.expression( 2496 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2497 ) 2498 2499 self._match_text_seq("DELIMITED") 2500 2501 kwargs = {} 2502 2503 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2504 kwargs["fields"] = self._parse_string() 2505 if self._match_text_seq("ESCAPED", "BY"): 2506 kwargs["escaped"] = self._parse_string() 2507 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2508 kwargs["collection_items"] = self._parse_string() 2509 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2510 kwargs["map_keys"] = self._parse_string() 2511 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2512 kwargs["lines"] = self._parse_string() 2513 if self._match_text_seq("NULL", "DEFINED", "AS"): 2514 kwargs["null"] = self._parse_string() 2515 2516 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2517 2518 def _parse_load(self) -> exp.LoadData | exp.Command: 2519 if self._match_text_seq("DATA"): 2520 local = self._match_text_seq("LOCAL") 2521 self._match_text_seq("INPATH") 2522 inpath = self._parse_string() 2523 overwrite = self._match(TokenType.OVERWRITE) 2524 self._match_pair(TokenType.INTO, TokenType.TABLE) 2525 2526 return self.expression( 2527 exp.LoadData, 2528 this=self._parse_table(schema=True), 2529 local=local, 2530 overwrite=overwrite, 2531 inpath=inpath, 2532 partition=self._parse_partition(), 2533 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2534 serde=self._match_text_seq("SERDE") and self._parse_string(), 2535 ) 2536 return self._parse_as_command(self._prev) 2537 2538 def _parse_delete(self) -> exp.Delete: 2539 # This handles MySQL's "Multiple-Table Syntax" 2540 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2541 tables = None 2542 comments = self._prev_comments 2543 if not self._match(TokenType.FROM, advance=False): 2544 tables = self._parse_csv(self._parse_table) or None 2545 2546 returning = self._parse_returning() 2547 2548 return self.expression( 2549 exp.Delete, 2550 comments=comments, 2551 tables=tables, 2552 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2553 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2554 where=self._parse_where(), 2555 returning=returning or self._parse_returning(), 2556 limit=self._parse_limit(), 2557 ) 2558 2559 def _parse_update(self) -> exp.Update: 2560 comments = self._prev_comments 2561 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2562 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2563 returning = self._parse_returning() 2564 return self.expression( 2565 exp.Update, 2566 comments=comments, 2567 **{ # type: ignore 2568 "this": this, 2569 "expressions": expressions, 2570 "from": self._parse_from(joins=True), 2571 "where": self._parse_where(), 2572 "returning": returning or self._parse_returning(), 2573 "order": self._parse_order(), 2574 "limit": self._parse_limit(), 2575 }, 2576 ) 2577 2578 def _parse_uncache(self) -> exp.Uncache: 2579 if not self._match(TokenType.TABLE): 2580 self.raise_error("Expecting TABLE after UNCACHE") 2581 2582 return self.expression( 2583 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2584 ) 2585 2586 def _parse_cache(self) -> exp.Cache: 2587 lazy = self._match_text_seq("LAZY") 2588 self._match(TokenType.TABLE) 2589 table = self._parse_table(schema=True) 2590 2591 options = [] 2592 if self._match_text_seq("OPTIONS"): 2593 self._match_l_paren() 2594 k = self._parse_string() 2595 self._match(TokenType.EQ) 2596 v = self._parse_string() 2597 options = [k, v] 2598 self._match_r_paren() 2599 2600 self._match(TokenType.ALIAS) 2601 return self.expression( 2602 exp.Cache, 2603 this=table, 2604 lazy=lazy, 2605 options=options, 2606 expression=self._parse_select(nested=True), 2607 ) 2608 2609 def _parse_partition(self) -> t.Optional[exp.Partition]: 2610 if not self._match(TokenType.PARTITION): 2611 return None 2612 2613 return self.expression( 2614 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2615 ) 2616 2617 def _parse_value(self) -> t.Optional[exp.Tuple]: 2618 if self._match(TokenType.L_PAREN): 2619 expressions = self._parse_csv(self._parse_expression) 2620 self._match_r_paren() 2621 return self.expression(exp.Tuple, expressions=expressions) 2622 2623 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2624 expression = self._parse_expression() 2625 if expression: 2626 return self.expression(exp.Tuple, expressions=[expression]) 2627 return None 2628 2629 def _parse_projections(self) -> t.List[exp.Expression]: 2630 return self._parse_expressions() 2631 2632 def _parse_select( 2633 self, 2634 nested: bool = False, 2635 table: bool = False, 2636 parse_subquery_alias: bool = True, 2637 parse_set_operation: bool = True, 2638 ) -> t.Optional[exp.Expression]: 2639 cte = self._parse_with() 2640 2641 if cte: 2642 this = self._parse_statement() 2643 2644 if not this: 2645 self.raise_error("Failed to parse any statement following CTE") 2646 return cte 2647 2648 if "with" in this.arg_types: 2649 this.set("with", cte) 2650 else: 2651 self.raise_error(f"{this.key} does not support CTE") 2652 this = cte 2653 2654 return this 2655 2656 # duckdb supports leading with FROM x 2657 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2658 2659 if self._match(TokenType.SELECT): 2660 comments = self._prev_comments 2661 2662 hint = self._parse_hint() 2663 all_ = self._match(TokenType.ALL) 2664 distinct = self._match_set(self.DISTINCT_TOKENS) 2665 2666 kind = ( 2667 self._match(TokenType.ALIAS) 2668 and self._match_texts(("STRUCT", "VALUE")) 2669 and self._prev.text.upper() 2670 ) 2671 2672 if distinct: 2673 distinct = self.expression( 2674 exp.Distinct, 2675 on=self._parse_value() if self._match(TokenType.ON) else None, 2676 ) 2677 2678 if all_ and distinct: 2679 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2680 2681 limit = self._parse_limit(top=True) 2682 projections = self._parse_projections() 2683 2684 this = self.expression( 2685 exp.Select, 2686 kind=kind, 2687 hint=hint, 2688 distinct=distinct, 2689 expressions=projections, 2690 limit=limit, 2691 ) 2692 this.comments = comments 2693 2694 into = self._parse_into() 2695 if into: 2696 this.set("into", into) 2697 2698 if not from_: 2699 from_ = self._parse_from() 2700 2701 if from_: 2702 this.set("from", from_) 2703 2704 this = self._parse_query_modifiers(this) 2705 elif (table or nested) and self._match(TokenType.L_PAREN): 2706 if self._match(TokenType.PIVOT): 2707 this = self._parse_simplified_pivot() 2708 elif self._match(TokenType.FROM): 2709 this = exp.select("*").from_( 2710 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2711 ) 2712 else: 2713 this = ( 2714 self._parse_table() 2715 if table 2716 else self._parse_select(nested=True, parse_set_operation=False) 2717 ) 2718 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2719 2720 self._match_r_paren() 2721 2722 # We return early here so that the UNION isn't attached to the subquery by the 2723 # following call to _parse_set_operations, but instead becomes the parent node 2724 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2725 elif self._match(TokenType.VALUES, advance=False): 2726 this = self._parse_derived_table_values() 2727 elif from_: 2728 this = exp.select("*").from_(from_.this, copy=False) 2729 else: 2730 this = None 2731 2732 if parse_set_operation: 2733 return self._parse_set_operations(this) 2734 return this 2735 2736 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2737 if not skip_with_token and not self._match(TokenType.WITH): 2738 return None 2739 2740 comments = self._prev_comments 2741 recursive = self._match(TokenType.RECURSIVE) 2742 2743 expressions = [] 2744 while True: 2745 expressions.append(self._parse_cte()) 2746 2747 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2748 break 2749 else: 2750 self._match(TokenType.WITH) 2751 2752 return self.expression( 2753 exp.With, comments=comments, expressions=expressions, recursive=recursive 2754 ) 2755 2756 def _parse_cte(self) -> exp.CTE: 2757 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2758 if not alias or not alias.this: 2759 self.raise_error("Expected CTE to have alias") 2760 2761 self._match(TokenType.ALIAS) 2762 2763 if self._match_text_seq("NOT", "MATERIALIZED"): 2764 materialized = False 2765 elif self._match_text_seq("MATERIALIZED"): 2766 materialized = True 2767 else: 2768 materialized = None 2769 2770 return self.expression( 2771 exp.CTE, 2772 this=self._parse_wrapped(self._parse_statement), 2773 alias=alias, 2774 materialized=materialized, 2775 ) 2776 2777 def _parse_table_alias( 2778 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2779 ) -> t.Optional[exp.TableAlias]: 2780 any_token = self._match(TokenType.ALIAS) 2781 alias = ( 2782 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2783 or self._parse_string_as_identifier() 2784 ) 2785 2786 index = self._index 2787 if self._match(TokenType.L_PAREN): 2788 columns = self._parse_csv(self._parse_function_parameter) 2789 self._match_r_paren() if columns else self._retreat(index) 2790 else: 2791 columns = None 2792 2793 if not alias and not columns: 2794 return None 2795 2796 return self.expression(exp.TableAlias, this=alias, columns=columns) 2797 2798 def _parse_subquery( 2799 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2800 ) -> t.Optional[exp.Subquery]: 2801 if not this: 2802 return None 2803 2804 return self.expression( 2805 exp.Subquery, 2806 this=this, 2807 pivots=self._parse_pivots(), 2808 alias=self._parse_table_alias() if parse_alias else None, 2809 ) 2810 2811 def _implicit_unnests_to_explicit(self, this: E) -> E: 2812 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2813 2814 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2815 for i, join in enumerate(this.args.get("joins") or []): 2816 table = join.this 2817 normalized_table = table.copy() 2818 normalized_table.meta["maybe_column"] = True 2819 normalized_table = _norm(normalized_table, dialect=self.dialect) 2820 2821 if isinstance(table, exp.Table) and not join.args.get("on"): 2822 if normalized_table.parts[0].name in refs: 2823 table_as_column = table.to_column() 2824 unnest = exp.Unnest(expressions=[table_as_column]) 2825 2826 # Table.to_column creates a parent Alias node that we want to convert to 2827 # a TableAlias and attach to the Unnest, so it matches the parser's output 2828 if isinstance(table.args.get("alias"), exp.TableAlias): 2829 table_as_column.replace(table_as_column.this) 2830 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2831 2832 table.replace(unnest) 2833 2834 refs.add(normalized_table.alias_or_name) 2835 2836 return this 2837 2838 def _parse_query_modifiers( 2839 self, this: t.Optional[exp.Expression] 2840 ) -> t.Optional[exp.Expression]: 2841 if isinstance(this, (exp.Query, exp.Table)): 2842 for join in self._parse_joins(): 2843 this.append("joins", join) 2844 for lateral in iter(self._parse_lateral, None): 2845 this.append("laterals", lateral) 2846 2847 while True: 2848 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2849 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2850 key, expression = parser(self) 2851 2852 if expression: 2853 this.set(key, expression) 2854 if key == "limit": 2855 offset = expression.args.pop("offset", None) 2856 2857 if offset: 2858 offset = exp.Offset(expression=offset) 2859 this.set("offset", offset) 2860 2861 limit_by_expressions = expression.expressions 2862 expression.set("expressions", None) 2863 offset.set("expressions", limit_by_expressions) 2864 continue 2865 break 2866 2867 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2868 this = self._implicit_unnests_to_explicit(this) 2869 2870 return this 2871 2872 def _parse_hint(self) -> t.Optional[exp.Hint]: 2873 if self._match(TokenType.HINT): 2874 hints = [] 2875 for hint in iter( 2876 lambda: self._parse_csv( 2877 lambda: self._parse_function() or self._parse_var(upper=True) 2878 ), 2879 [], 2880 ): 2881 hints.extend(hint) 2882 2883 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2884 self.raise_error("Expected */ after HINT") 2885 2886 return self.expression(exp.Hint, expressions=hints) 2887 2888 return None 2889 2890 def _parse_into(self) -> t.Optional[exp.Into]: 2891 if not self._match(TokenType.INTO): 2892 return None 2893 2894 temp = self._match(TokenType.TEMPORARY) 2895 unlogged = self._match_text_seq("UNLOGGED") 2896 self._match(TokenType.TABLE) 2897 2898 return self.expression( 2899 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2900 ) 2901 2902 def _parse_from( 2903 self, joins: bool = False, skip_from_token: bool = False 2904 ) -> t.Optional[exp.From]: 2905 if not skip_from_token and not self._match(TokenType.FROM): 2906 return None 2907 2908 return self.expression( 2909 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2910 ) 2911 2912 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2913 return self.expression( 2914 exp.MatchRecognizeMeasure, 2915 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2916 this=self._parse_expression(), 2917 ) 2918 2919 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2920 if not self._match(TokenType.MATCH_RECOGNIZE): 2921 return None 2922 2923 self._match_l_paren() 2924 2925 partition = self._parse_partition_by() 2926 order = self._parse_order() 2927 2928 measures = ( 2929 self._parse_csv(self._parse_match_recognize_measure) 2930 if self._match_text_seq("MEASURES") 2931 else None 2932 ) 2933 2934 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2935 rows = exp.var("ONE ROW PER MATCH") 2936 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2937 text = "ALL ROWS PER MATCH" 2938 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2939 text += " SHOW EMPTY MATCHES" 2940 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2941 text += " OMIT EMPTY MATCHES" 2942 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2943 text += " WITH UNMATCHED ROWS" 2944 rows = exp.var(text) 2945 else: 2946 rows = None 2947 2948 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2949 text = "AFTER MATCH SKIP" 2950 if self._match_text_seq("PAST", "LAST", "ROW"): 2951 text += " PAST LAST ROW" 2952 elif self._match_text_seq("TO", "NEXT", "ROW"): 2953 text += " TO NEXT ROW" 2954 elif self._match_text_seq("TO", "FIRST"): 2955 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2956 elif self._match_text_seq("TO", "LAST"): 2957 text += f" TO LAST {self._advance_any().text}" # type: ignore 2958 after = exp.var(text) 2959 else: 2960 after = None 2961 2962 if self._match_text_seq("PATTERN"): 2963 self._match_l_paren() 2964 2965 if not self._curr: 2966 self.raise_error("Expecting )", self._curr) 2967 2968 paren = 1 2969 start = self._curr 2970 2971 while self._curr and paren > 0: 2972 if self._curr.token_type == TokenType.L_PAREN: 2973 paren += 1 2974 if self._curr.token_type == TokenType.R_PAREN: 2975 paren -= 1 2976 2977 end = self._prev 2978 self._advance() 2979 2980 if paren > 0: 2981 self.raise_error("Expecting )", self._curr) 2982 2983 pattern = exp.var(self._find_sql(start, end)) 2984 else: 2985 pattern = None 2986 2987 define = ( 2988 self._parse_csv(self._parse_name_as_expression) 2989 if self._match_text_seq("DEFINE") 2990 else None 2991 ) 2992 2993 self._match_r_paren() 2994 2995 return self.expression( 2996 exp.MatchRecognize, 2997 partition_by=partition, 2998 order=order, 2999 measures=measures, 3000 rows=rows, 3001 after=after, 3002 pattern=pattern, 3003 define=define, 3004 alias=self._parse_table_alias(), 3005 ) 3006 3007 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3008 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3009 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3010 cross_apply = False 3011 3012 if cross_apply is not None: 3013 this = self._parse_select(table=True) 3014 view = None 3015 outer = None 3016 elif self._match(TokenType.LATERAL): 3017 this = self._parse_select(table=True) 3018 view = self._match(TokenType.VIEW) 3019 outer = self._match(TokenType.OUTER) 3020 else: 3021 return None 3022 3023 if not this: 3024 this = ( 3025 self._parse_unnest() 3026 or self._parse_function() 3027 or self._parse_id_var(any_token=False) 3028 ) 3029 3030 while self._match(TokenType.DOT): 3031 this = exp.Dot( 3032 this=this, 3033 expression=self._parse_function() or self._parse_id_var(any_token=False), 3034 ) 3035 3036 if view: 3037 table = self._parse_id_var(any_token=False) 3038 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3039 table_alias: t.Optional[exp.TableAlias] = self.expression( 3040 exp.TableAlias, this=table, columns=columns 3041 ) 3042 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3043 # We move the alias from the lateral's child node to the lateral itself 3044 table_alias = this.args["alias"].pop() 3045 else: 3046 table_alias = self._parse_table_alias() 3047 3048 return self.expression( 3049 exp.Lateral, 3050 this=this, 3051 view=view, 3052 outer=outer, 3053 alias=table_alias, 3054 cross_apply=cross_apply, 3055 ) 3056 3057 def _parse_join_parts( 3058 self, 3059 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3060 return ( 3061 self._match_set(self.JOIN_METHODS) and self._prev, 3062 self._match_set(self.JOIN_SIDES) and self._prev, 3063 self._match_set(self.JOIN_KINDS) and self._prev, 3064 ) 3065 3066 def _parse_join( 3067 self, skip_join_token: bool = False, parse_bracket: bool = False 3068 ) -> t.Optional[exp.Join]: 3069 if self._match(TokenType.COMMA): 3070 return self.expression(exp.Join, this=self._parse_table()) 3071 3072 index = self._index 3073 method, side, kind = self._parse_join_parts() 3074 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3075 join = self._match(TokenType.JOIN) 3076 3077 if not skip_join_token and not join: 3078 self._retreat(index) 3079 kind = None 3080 method = None 3081 side = None 3082 3083 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3084 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3085 3086 if not skip_join_token and not join and not outer_apply and not cross_apply: 3087 return None 3088 3089 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3090 3091 if method: 3092 kwargs["method"] = method.text 3093 if side: 3094 kwargs["side"] = side.text 3095 if kind: 3096 kwargs["kind"] = kind.text 3097 if hint: 3098 kwargs["hint"] = hint 3099 3100 if self._match(TokenType.MATCH_CONDITION): 3101 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3102 3103 if self._match(TokenType.ON): 3104 kwargs["on"] = self._parse_conjunction() 3105 elif self._match(TokenType.USING): 3106 kwargs["using"] = self._parse_wrapped_id_vars() 3107 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3108 kind and kind.token_type == TokenType.CROSS 3109 ): 3110 index = self._index 3111 joins: t.Optional[list] = list(self._parse_joins()) 3112 3113 if joins and self._match(TokenType.ON): 3114 kwargs["on"] = self._parse_conjunction() 3115 elif joins and self._match(TokenType.USING): 3116 kwargs["using"] = self._parse_wrapped_id_vars() 3117 else: 3118 joins = None 3119 self._retreat(index) 3120 3121 kwargs["this"].set("joins", joins if joins else None) 3122 3123 comments = [c for token in (method, side, kind) if token for c in token.comments] 3124 return self.expression(exp.Join, comments=comments, **kwargs) 3125 3126 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3127 this = self._parse_conjunction() 3128 3129 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3130 return this 3131 3132 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3133 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3134 3135 return this 3136 3137 def _parse_index_params(self) -> exp.IndexParameters: 3138 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3139 3140 if self._match(TokenType.L_PAREN, advance=False): 3141 columns = self._parse_wrapped_csv(self._parse_with_operator) 3142 else: 3143 columns = None 3144 3145 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3146 partition_by = self._parse_partition_by() 3147 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3148 tablespace = ( 3149 self._parse_var(any_token=True) 3150 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3151 else None 3152 ) 3153 where = self._parse_where() 3154 3155 return self.expression( 3156 exp.IndexParameters, 3157 using=using, 3158 columns=columns, 3159 include=include, 3160 partition_by=partition_by, 3161 where=where, 3162 with_storage=with_storage, 3163 tablespace=tablespace, 3164 ) 3165 3166 def _parse_index( 3167 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3168 ) -> t.Optional[exp.Index]: 3169 if index or anonymous: 3170 unique = None 3171 primary = None 3172 amp = None 3173 3174 self._match(TokenType.ON) 3175 self._match(TokenType.TABLE) # hive 3176 table = self._parse_table_parts(schema=True) 3177 else: 3178 unique = self._match(TokenType.UNIQUE) 3179 primary = self._match_text_seq("PRIMARY") 3180 amp = self._match_text_seq("AMP") 3181 3182 if not self._match(TokenType.INDEX): 3183 return None 3184 3185 index = self._parse_id_var() 3186 table = None 3187 3188 params = self._parse_index_params() 3189 3190 return self.expression( 3191 exp.Index, 3192 this=index, 3193 table=table, 3194 unique=unique, 3195 primary=primary, 3196 amp=amp, 3197 params=params, 3198 ) 3199 3200 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3201 hints: t.List[exp.Expression] = [] 3202 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3203 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3204 hints.append( 3205 self.expression( 3206 exp.WithTableHint, 3207 expressions=self._parse_csv( 3208 lambda: self._parse_function() or self._parse_var(any_token=True) 3209 ), 3210 ) 3211 ) 3212 self._match_r_paren() 3213 else: 3214 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3215 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3216 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3217 3218 self._match_texts(("INDEX", "KEY")) 3219 if self._match(TokenType.FOR): 3220 hint.set("target", self._advance_any() and self._prev.text.upper()) 3221 3222 hint.set("expressions", self._parse_wrapped_id_vars()) 3223 hints.append(hint) 3224 3225 return hints or None 3226 3227 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3228 return ( 3229 (not schema and self._parse_function(optional_parens=False)) 3230 or self._parse_id_var(any_token=False) 3231 or self._parse_string_as_identifier() 3232 or self._parse_placeholder() 3233 ) 3234 3235 def _parse_table_parts( 3236 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3237 ) -> exp.Table: 3238 catalog = None 3239 db = None 3240 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3241 3242 while self._match(TokenType.DOT): 3243 if catalog: 3244 # This allows nesting the table in arbitrarily many dot expressions if needed 3245 table = self.expression( 3246 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3247 ) 3248 else: 3249 catalog = db 3250 db = table 3251 # "" used for tsql FROM a..b case 3252 table = self._parse_table_part(schema=schema) or "" 3253 3254 if ( 3255 wildcard 3256 and self._is_connected() 3257 and (isinstance(table, exp.Identifier) or not table) 3258 and self._match(TokenType.STAR) 3259 ): 3260 if isinstance(table, exp.Identifier): 3261 table.args["this"] += "*" 3262 else: 3263 table = exp.Identifier(this="*") 3264 3265 # We bubble up comments from the Identifier to the Table 3266 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3267 3268 if is_db_reference: 3269 catalog = db 3270 db = table 3271 table = None 3272 3273 if not table and not is_db_reference: 3274 self.raise_error(f"Expected table name but got {self._curr}") 3275 if not db and is_db_reference: 3276 self.raise_error(f"Expected database name but got {self._curr}") 3277 3278 return self.expression( 3279 exp.Table, 3280 comments=comments, 3281 this=table, 3282 db=db, 3283 catalog=catalog, 3284 pivots=self._parse_pivots(), 3285 ) 3286 3287 def _parse_table( 3288 self, 3289 schema: bool = False, 3290 joins: bool = False, 3291 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3292 parse_bracket: bool = False, 3293 is_db_reference: bool = False, 3294 parse_partition: bool = False, 3295 ) -> t.Optional[exp.Expression]: 3296 lateral = self._parse_lateral() 3297 if lateral: 3298 return lateral 3299 3300 unnest = self._parse_unnest() 3301 if unnest: 3302 return unnest 3303 3304 values = self._parse_derived_table_values() 3305 if values: 3306 return values 3307 3308 subquery = self._parse_select(table=True) 3309 if subquery: 3310 if not subquery.args.get("pivots"): 3311 subquery.set("pivots", self._parse_pivots()) 3312 return subquery 3313 3314 bracket = parse_bracket and self._parse_bracket(None) 3315 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3316 3317 only = self._match(TokenType.ONLY) 3318 3319 this = t.cast( 3320 exp.Expression, 3321 bracket 3322 or self._parse_bracket( 3323 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3324 ), 3325 ) 3326 3327 if only: 3328 this.set("only", only) 3329 3330 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3331 self._match_text_seq("*") 3332 3333 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3334 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3335 this.set("partition", self._parse_partition()) 3336 3337 if schema: 3338 return self._parse_schema(this=this) 3339 3340 version = self._parse_version() 3341 3342 if version: 3343 this.set("version", version) 3344 3345 if self.dialect.ALIAS_POST_TABLESAMPLE: 3346 table_sample = self._parse_table_sample() 3347 3348 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3349 if alias: 3350 this.set("alias", alias) 3351 3352 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3353 return self.expression( 3354 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3355 ) 3356 3357 this.set("hints", self._parse_table_hints()) 3358 3359 if not this.args.get("pivots"): 3360 this.set("pivots", self._parse_pivots()) 3361 3362 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3363 table_sample = self._parse_table_sample() 3364 3365 if table_sample: 3366 table_sample.set("this", this) 3367 this = table_sample 3368 3369 if joins: 3370 for join in self._parse_joins(): 3371 this.append("joins", join) 3372 3373 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3374 this.set("ordinality", True) 3375 this.set("alias", self._parse_table_alias()) 3376 3377 return this 3378 3379 def _parse_version(self) -> t.Optional[exp.Version]: 3380 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3381 this = "TIMESTAMP" 3382 elif self._match(TokenType.VERSION_SNAPSHOT): 3383 this = "VERSION" 3384 else: 3385 return None 3386 3387 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3388 kind = self._prev.text.upper() 3389 start = self._parse_bitwise() 3390 self._match_texts(("TO", "AND")) 3391 end = self._parse_bitwise() 3392 expression: t.Optional[exp.Expression] = self.expression( 3393 exp.Tuple, expressions=[start, end] 3394 ) 3395 elif self._match_text_seq("CONTAINED", "IN"): 3396 kind = "CONTAINED IN" 3397 expression = self.expression( 3398 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3399 ) 3400 elif self._match(TokenType.ALL): 3401 kind = "ALL" 3402 expression = None 3403 else: 3404 self._match_text_seq("AS", "OF") 3405 kind = "AS OF" 3406 expression = self._parse_type() 3407 3408 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3409 3410 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3411 if not self._match(TokenType.UNNEST): 3412 return None 3413 3414 expressions = self._parse_wrapped_csv(self._parse_equality) 3415 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3416 3417 alias = self._parse_table_alias() if with_alias else None 3418 3419 if alias: 3420 if self.dialect.UNNEST_COLUMN_ONLY: 3421 if alias.args.get("columns"): 3422 self.raise_error("Unexpected extra column alias in unnest.") 3423 3424 alias.set("columns", [alias.this]) 3425 alias.set("this", None) 3426 3427 columns = alias.args.get("columns") or [] 3428 if offset and len(expressions) < len(columns): 3429 offset = columns.pop() 3430 3431 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3432 self._match(TokenType.ALIAS) 3433 offset = self._parse_id_var( 3434 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3435 ) or exp.to_identifier("offset") 3436 3437 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3438 3439 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3440 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3441 if not is_derived and not self._match_text_seq("VALUES"): 3442 return None 3443 3444 expressions = self._parse_csv(self._parse_value) 3445 alias = self._parse_table_alias() 3446 3447 if is_derived: 3448 self._match_r_paren() 3449 3450 return self.expression( 3451 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3452 ) 3453 3454 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3455 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3456 as_modifier and self._match_text_seq("USING", "SAMPLE") 3457 ): 3458 return None 3459 3460 bucket_numerator = None 3461 bucket_denominator = None 3462 bucket_field = None 3463 percent = None 3464 size = None 3465 seed = None 3466 3467 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3468 matched_l_paren = self._match(TokenType.L_PAREN) 3469 3470 if self.TABLESAMPLE_CSV: 3471 num = None 3472 expressions = self._parse_csv(self._parse_primary) 3473 else: 3474 expressions = None 3475 num = ( 3476 self._parse_factor() 3477 if self._match(TokenType.NUMBER, advance=False) 3478 else self._parse_primary() or self._parse_placeholder() 3479 ) 3480 3481 if self._match_text_seq("BUCKET"): 3482 bucket_numerator = self._parse_number() 3483 self._match_text_seq("OUT", "OF") 3484 bucket_denominator = bucket_denominator = self._parse_number() 3485 self._match(TokenType.ON) 3486 bucket_field = self._parse_field() 3487 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3488 percent = num 3489 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3490 size = num 3491 else: 3492 percent = num 3493 3494 if matched_l_paren: 3495 self._match_r_paren() 3496 3497 if self._match(TokenType.L_PAREN): 3498 method = self._parse_var(upper=True) 3499 seed = self._match(TokenType.COMMA) and self._parse_number() 3500 self._match_r_paren() 3501 elif self._match_texts(("SEED", "REPEATABLE")): 3502 seed = self._parse_wrapped(self._parse_number) 3503 3504 if not method and self.DEFAULT_SAMPLING_METHOD: 3505 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3506 3507 return self.expression( 3508 exp.TableSample, 3509 expressions=expressions, 3510 method=method, 3511 bucket_numerator=bucket_numerator, 3512 bucket_denominator=bucket_denominator, 3513 bucket_field=bucket_field, 3514 percent=percent, 3515 size=size, 3516 seed=seed, 3517 ) 3518 3519 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3520 return list(iter(self._parse_pivot, None)) or None 3521 3522 def _parse_joins(self) -> t.Iterator[exp.Join]: 3523 return iter(self._parse_join, None) 3524 3525 # https://duckdb.org/docs/sql/statements/pivot 3526 def _parse_simplified_pivot(self) -> exp.Pivot: 3527 def _parse_on() -> t.Optional[exp.Expression]: 3528 this = self._parse_bitwise() 3529 return self._parse_in(this) if self._match(TokenType.IN) else this 3530 3531 this = self._parse_table() 3532 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3533 using = self._match(TokenType.USING) and self._parse_csv( 3534 lambda: self._parse_alias(self._parse_function()) 3535 ) 3536 group = self._parse_group() 3537 return self.expression( 3538 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3539 ) 3540 3541 def _parse_pivot_in(self) -> exp.In: 3542 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3543 this = self._parse_conjunction() 3544 3545 self._match(TokenType.ALIAS) 3546 alias = self._parse_field() 3547 if alias: 3548 return self.expression(exp.PivotAlias, this=this, alias=alias) 3549 3550 return this 3551 3552 value = self._parse_column() 3553 3554 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3555 self.raise_error("Expecting IN (") 3556 3557 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3558 3559 self._match_r_paren() 3560 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3561 3562 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3563 index = self._index 3564 include_nulls = None 3565 3566 if self._match(TokenType.PIVOT): 3567 unpivot = False 3568 elif self._match(TokenType.UNPIVOT): 3569 unpivot = True 3570 3571 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3572 if self._match_text_seq("INCLUDE", "NULLS"): 3573 include_nulls = True 3574 elif self._match_text_seq("EXCLUDE", "NULLS"): 3575 include_nulls = False 3576 else: 3577 return None 3578 3579 expressions = [] 3580 3581 if not self._match(TokenType.L_PAREN): 3582 self._retreat(index) 3583 return None 3584 3585 if unpivot: 3586 expressions = self._parse_csv(self._parse_column) 3587 else: 3588 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3589 3590 if not expressions: 3591 self.raise_error("Failed to parse PIVOT's aggregation list") 3592 3593 if not self._match(TokenType.FOR): 3594 self.raise_error("Expecting FOR") 3595 3596 field = self._parse_pivot_in() 3597 3598 self._match_r_paren() 3599 3600 pivot = self.expression( 3601 exp.Pivot, 3602 expressions=expressions, 3603 field=field, 3604 unpivot=unpivot, 3605 include_nulls=include_nulls, 3606 ) 3607 3608 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3609 pivot.set("alias", self._parse_table_alias()) 3610 3611 if not unpivot: 3612 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3613 3614 columns: t.List[exp.Expression] = [] 3615 for fld in pivot.args["field"].expressions: 3616 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3617 for name in names: 3618 if self.PREFIXED_PIVOT_COLUMNS: 3619 name = f"{name}_{field_name}" if name else field_name 3620 else: 3621 name = f"{field_name}_{name}" if name else field_name 3622 3623 columns.append(exp.to_identifier(name)) 3624 3625 pivot.set("columns", columns) 3626 3627 return pivot 3628 3629 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3630 return [agg.alias for agg in aggregations] 3631 3632 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3633 if not skip_where_token and not self._match(TokenType.PREWHERE): 3634 return None 3635 3636 return self.expression( 3637 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3638 ) 3639 3640 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3641 if not skip_where_token and not self._match(TokenType.WHERE): 3642 return None 3643 3644 return self.expression( 3645 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3646 ) 3647 3648 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3649 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3650 return None 3651 3652 elements: t.Dict[str, t.Any] = defaultdict(list) 3653 3654 if self._match(TokenType.ALL): 3655 elements["all"] = True 3656 elif self._match(TokenType.DISTINCT): 3657 elements["all"] = False 3658 3659 while True: 3660 expressions = self._parse_csv( 3661 lambda: None 3662 if self._match(TokenType.ROLLUP, advance=False) 3663 else self._parse_conjunction() 3664 ) 3665 if expressions: 3666 elements["expressions"].extend(expressions) 3667 3668 grouping_sets = self._parse_grouping_sets() 3669 if grouping_sets: 3670 elements["grouping_sets"].extend(grouping_sets) 3671 3672 rollup = None 3673 cube = None 3674 totals = None 3675 3676 index = self._index 3677 with_ = self._match(TokenType.WITH) 3678 if self._match(TokenType.ROLLUP): 3679 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3680 elements["rollup"].extend(ensure_list(rollup)) 3681 3682 if self._match(TokenType.CUBE): 3683 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3684 elements["cube"].extend(ensure_list(cube)) 3685 3686 if self._match_text_seq("TOTALS"): 3687 totals = True 3688 elements["totals"] = True # type: ignore 3689 3690 if not (grouping_sets or rollup or cube or totals): 3691 if with_: 3692 self._retreat(index) 3693 break 3694 3695 return self.expression(exp.Group, **elements) # type: ignore 3696 3697 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3698 if not self._match(TokenType.GROUPING_SETS): 3699 return None 3700 3701 return self._parse_wrapped_csv(self._parse_grouping_set) 3702 3703 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3704 if self._match(TokenType.L_PAREN): 3705 grouping_set = self._parse_csv(self._parse_column) 3706 self._match_r_paren() 3707 return self.expression(exp.Tuple, expressions=grouping_set) 3708 3709 return self._parse_column() 3710 3711 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3712 if not skip_having_token and not self._match(TokenType.HAVING): 3713 return None 3714 return self.expression(exp.Having, this=self._parse_conjunction()) 3715 3716 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3717 if not self._match(TokenType.QUALIFY): 3718 return None 3719 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3720 3721 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3722 if skip_start_token: 3723 start = None 3724 elif self._match(TokenType.START_WITH): 3725 start = self._parse_conjunction() 3726 else: 3727 return None 3728 3729 self._match(TokenType.CONNECT_BY) 3730 nocycle = self._match_text_seq("NOCYCLE") 3731 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3732 exp.Prior, this=self._parse_bitwise() 3733 ) 3734 connect = self._parse_conjunction() 3735 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3736 3737 if not start and self._match(TokenType.START_WITH): 3738 start = self._parse_conjunction() 3739 3740 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3741 3742 def _parse_name_as_expression(self) -> exp.Alias: 3743 return self.expression( 3744 exp.Alias, 3745 alias=self._parse_id_var(any_token=True), 3746 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3747 ) 3748 3749 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3750 if self._match_text_seq("INTERPOLATE"): 3751 return self._parse_wrapped_csv(self._parse_name_as_expression) 3752 return None 3753 3754 def _parse_order( 3755 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3756 ) -> t.Optional[exp.Expression]: 3757 siblings = None 3758 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3759 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3760 return this 3761 3762 siblings = True 3763 3764 return self.expression( 3765 exp.Order, 3766 this=this, 3767 expressions=self._parse_csv(self._parse_ordered), 3768 interpolate=self._parse_interpolate(), 3769 siblings=siblings, 3770 ) 3771 3772 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3773 if not self._match(token): 3774 return None 3775 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3776 3777 def _parse_ordered( 3778 self, parse_method: t.Optional[t.Callable] = None 3779 ) -> t.Optional[exp.Ordered]: 3780 this = parse_method() if parse_method else self._parse_conjunction() 3781 if not this: 3782 return None 3783 3784 asc = self._match(TokenType.ASC) 3785 desc = self._match(TokenType.DESC) or (asc and False) 3786 3787 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3788 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3789 3790 nulls_first = is_nulls_first or False 3791 explicitly_null_ordered = is_nulls_first or is_nulls_last 3792 3793 if ( 3794 not explicitly_null_ordered 3795 and ( 3796 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3797 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3798 ) 3799 and self.dialect.NULL_ORDERING != "nulls_are_last" 3800 ): 3801 nulls_first = True 3802 3803 if self._match_text_seq("WITH", "FILL"): 3804 with_fill = self.expression( 3805 exp.WithFill, 3806 **{ # type: ignore 3807 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3808 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3809 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3810 }, 3811 ) 3812 else: 3813 with_fill = None 3814 3815 return self.expression( 3816 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3817 ) 3818 3819 def _parse_limit( 3820 self, 3821 this: t.Optional[exp.Expression] = None, 3822 top: bool = False, 3823 skip_limit_token: bool = False, 3824 ) -> t.Optional[exp.Expression]: 3825 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3826 comments = self._prev_comments 3827 if top: 3828 limit_paren = self._match(TokenType.L_PAREN) 3829 expression = self._parse_term() if limit_paren else self._parse_number() 3830 3831 if limit_paren: 3832 self._match_r_paren() 3833 else: 3834 expression = self._parse_term() 3835 3836 if self._match(TokenType.COMMA): 3837 offset = expression 3838 expression = self._parse_term() 3839 else: 3840 offset = None 3841 3842 limit_exp = self.expression( 3843 exp.Limit, 3844 this=this, 3845 expression=expression, 3846 offset=offset, 3847 comments=comments, 3848 expressions=self._parse_limit_by(), 3849 ) 3850 3851 return limit_exp 3852 3853 if self._match(TokenType.FETCH): 3854 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3855 direction = self._prev.text.upper() if direction else "FIRST" 3856 3857 count = self._parse_field(tokens=self.FETCH_TOKENS) 3858 percent = self._match(TokenType.PERCENT) 3859 3860 self._match_set((TokenType.ROW, TokenType.ROWS)) 3861 3862 only = self._match_text_seq("ONLY") 3863 with_ties = self._match_text_seq("WITH", "TIES") 3864 3865 if only and with_ties: 3866 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3867 3868 return self.expression( 3869 exp.Fetch, 3870 direction=direction, 3871 count=count, 3872 percent=percent, 3873 with_ties=with_ties, 3874 ) 3875 3876 return this 3877 3878 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3879 if not self._match(TokenType.OFFSET): 3880 return this 3881 3882 count = self._parse_term() 3883 self._match_set((TokenType.ROW, TokenType.ROWS)) 3884 3885 return self.expression( 3886 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3887 ) 3888 3889 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3890 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3891 3892 def _parse_locks(self) -> t.List[exp.Lock]: 3893 locks = [] 3894 while True: 3895 if self._match_text_seq("FOR", "UPDATE"): 3896 update = True 3897 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3898 "LOCK", "IN", "SHARE", "MODE" 3899 ): 3900 update = False 3901 else: 3902 break 3903 3904 expressions = None 3905 if self._match_text_seq("OF"): 3906 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3907 3908 wait: t.Optional[bool | exp.Expression] = None 3909 if self._match_text_seq("NOWAIT"): 3910 wait = True 3911 elif self._match_text_seq("WAIT"): 3912 wait = self._parse_primary() 3913 elif self._match_text_seq("SKIP", "LOCKED"): 3914 wait = False 3915 3916 locks.append( 3917 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3918 ) 3919 3920 return locks 3921 3922 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3923 while this and self._match_set(self.SET_OPERATIONS): 3924 token_type = self._prev.token_type 3925 3926 if token_type == TokenType.UNION: 3927 operation = exp.Union 3928 elif token_type == TokenType.EXCEPT: 3929 operation = exp.Except 3930 else: 3931 operation = exp.Intersect 3932 3933 comments = self._prev.comments 3934 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3935 by_name = self._match_text_seq("BY", "NAME") 3936 expression = self._parse_select(nested=True, parse_set_operation=False) 3937 3938 this = self.expression( 3939 operation, 3940 comments=comments, 3941 this=this, 3942 distinct=distinct, 3943 by_name=by_name, 3944 expression=expression, 3945 ) 3946 3947 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3948 expression = this.expression 3949 3950 if expression: 3951 for arg in self.UNION_MODIFIERS: 3952 expr = expression.args.get(arg) 3953 if expr: 3954 this.set(arg, expr.pop()) 3955 3956 return this 3957 3958 def _parse_expression(self) -> t.Optional[exp.Expression]: 3959 return self._parse_alias(self._parse_conjunction()) 3960 3961 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3962 this = self._parse_equality() 3963 3964 if self._match(TokenType.COLON_EQ): 3965 this = self.expression( 3966 exp.PropertyEQ, 3967 this=this, 3968 comments=self._prev_comments, 3969 expression=self._parse_conjunction(), 3970 ) 3971 3972 while self._match_set(self.CONJUNCTION): 3973 this = self.expression( 3974 self.CONJUNCTION[self._prev.token_type], 3975 this=this, 3976 comments=self._prev_comments, 3977 expression=self._parse_equality(), 3978 ) 3979 return this 3980 3981 def _parse_equality(self) -> t.Optional[exp.Expression]: 3982 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3983 3984 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3985 return self._parse_tokens(self._parse_range, self.COMPARISON) 3986 3987 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3988 this = this or self._parse_bitwise() 3989 negate = self._match(TokenType.NOT) 3990 3991 if self._match_set(self.RANGE_PARSERS): 3992 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3993 if not expression: 3994 return this 3995 3996 this = expression 3997 elif self._match(TokenType.ISNULL): 3998 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3999 4000 # Postgres supports ISNULL and NOTNULL for conditions. 4001 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4002 if self._match(TokenType.NOTNULL): 4003 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4004 this = self.expression(exp.Not, this=this) 4005 4006 if negate: 4007 this = self.expression(exp.Not, this=this) 4008 4009 if self._match(TokenType.IS): 4010 this = self._parse_is(this) 4011 4012 return this 4013 4014 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4015 index = self._index - 1 4016 negate = self._match(TokenType.NOT) 4017 4018 if self._match_text_seq("DISTINCT", "FROM"): 4019 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4020 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4021 4022 expression = self._parse_null() or self._parse_boolean() 4023 if not expression: 4024 self._retreat(index) 4025 return None 4026 4027 this = self.expression(exp.Is, this=this, expression=expression) 4028 return self.expression(exp.Not, this=this) if negate else this 4029 4030 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4031 unnest = self._parse_unnest(with_alias=False) 4032 if unnest: 4033 this = self.expression(exp.In, this=this, unnest=unnest) 4034 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4035 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4036 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4037 4038 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4039 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4040 else: 4041 this = self.expression(exp.In, this=this, expressions=expressions) 4042 4043 if matched_l_paren: 4044 self._match_r_paren(this) 4045 elif not self._match(TokenType.R_BRACKET, expression=this): 4046 self.raise_error("Expecting ]") 4047 else: 4048 this = self.expression(exp.In, this=this, field=self._parse_field()) 4049 4050 return this 4051 4052 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4053 low = self._parse_bitwise() 4054 self._match(TokenType.AND) 4055 high = self._parse_bitwise() 4056 return self.expression(exp.Between, this=this, low=low, high=high) 4057 4058 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4059 if not self._match(TokenType.ESCAPE): 4060 return this 4061 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4062 4063 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4064 index = self._index 4065 4066 if not self._match(TokenType.INTERVAL) and match_interval: 4067 return None 4068 4069 if self._match(TokenType.STRING, advance=False): 4070 this = self._parse_primary() 4071 else: 4072 this = self._parse_term() 4073 4074 if not this or ( 4075 isinstance(this, exp.Column) 4076 and not this.table 4077 and not this.this.quoted 4078 and this.name.upper() == "IS" 4079 ): 4080 self._retreat(index) 4081 return None 4082 4083 unit = self._parse_function() or ( 4084 not self._match(TokenType.ALIAS, advance=False) 4085 and self._parse_var(any_token=True, upper=True) 4086 ) 4087 4088 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4089 # each INTERVAL expression into this canonical form so it's easy to transpile 4090 if this and this.is_number: 4091 this = exp.Literal.string(this.name) 4092 elif this and this.is_string: 4093 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4094 if len(parts) == 1: 4095 if unit: 4096 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4097 self._retreat(self._index - 1) 4098 4099 this = exp.Literal.string(parts[0][0]) 4100 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4101 4102 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4103 unit = self.expression( 4104 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4105 ) 4106 4107 interval = self.expression(exp.Interval, this=this, unit=unit) 4108 4109 index = self._index 4110 self._match(TokenType.PLUS) 4111 4112 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4113 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4114 return self.expression( 4115 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4116 ) 4117 4118 self._retreat(index) 4119 return interval 4120 4121 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4122 this = self._parse_term() 4123 4124 while True: 4125 if self._match_set(self.BITWISE): 4126 this = self.expression( 4127 self.BITWISE[self._prev.token_type], 4128 this=this, 4129 expression=self._parse_term(), 4130 ) 4131 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4132 this = self.expression( 4133 exp.DPipe, 4134 this=this, 4135 expression=self._parse_term(), 4136 safe=not self.dialect.STRICT_STRING_CONCAT, 4137 ) 4138 elif self._match(TokenType.DQMARK): 4139 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4140 elif self._match_pair(TokenType.LT, TokenType.LT): 4141 this = self.expression( 4142 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4143 ) 4144 elif self._match_pair(TokenType.GT, TokenType.GT): 4145 this = self.expression( 4146 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4147 ) 4148 else: 4149 break 4150 4151 return this 4152 4153 def _parse_term(self) -> t.Optional[exp.Expression]: 4154 return self._parse_tokens(self._parse_factor, self.TERM) 4155 4156 def _parse_factor(self) -> t.Optional[exp.Expression]: 4157 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4158 this = parse_method() 4159 4160 while self._match_set(self.FACTOR): 4161 this = self.expression( 4162 self.FACTOR[self._prev.token_type], 4163 this=this, 4164 comments=self._prev_comments, 4165 expression=parse_method(), 4166 ) 4167 if isinstance(this, exp.Div): 4168 this.args["typed"] = self.dialect.TYPED_DIVISION 4169 this.args["safe"] = self.dialect.SAFE_DIVISION 4170 4171 return this 4172 4173 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4174 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4175 4176 def _parse_unary(self) -> t.Optional[exp.Expression]: 4177 if self._match_set(self.UNARY_PARSERS): 4178 return self.UNARY_PARSERS[self._prev.token_type](self) 4179 return self._parse_at_time_zone(self._parse_type()) 4180 4181 def _parse_type( 4182 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4183 ) -> t.Optional[exp.Expression]: 4184 interval = parse_interval and self._parse_interval() 4185 if interval: 4186 return interval 4187 4188 index = self._index 4189 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4190 4191 if data_type: 4192 index2 = self._index 4193 this = self._parse_primary() 4194 4195 if isinstance(this, exp.Literal): 4196 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4197 if parser: 4198 return parser(self, this, data_type) 4199 4200 return self.expression(exp.Cast, this=this, to=data_type) 4201 4202 if data_type.expressions: 4203 self._retreat(index2) 4204 return self._parse_column_ops(data_type) 4205 4206 self._retreat(index) 4207 4208 if fallback_to_identifier: 4209 return self._parse_id_var() 4210 4211 this = self._parse_column() 4212 return this and self._parse_column_ops(this) 4213 4214 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4215 this = self._parse_type() 4216 if not this: 4217 return None 4218 4219 if isinstance(this, exp.Column) and not this.table: 4220 this = exp.var(this.name.upper()) 4221 4222 return self.expression( 4223 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4224 ) 4225 4226 def _parse_types( 4227 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4228 ) -> t.Optional[exp.Expression]: 4229 index = self._index 4230 4231 this: t.Optional[exp.Expression] = None 4232 prefix = self._match_text_seq("SYSUDTLIB", ".") 4233 4234 if not self._match_set(self.TYPE_TOKENS): 4235 identifier = allow_identifiers and self._parse_id_var( 4236 any_token=False, tokens=(TokenType.VAR,) 4237 ) 4238 if identifier: 4239 tokens = self.dialect.tokenize(identifier.name) 4240 4241 if len(tokens) != 1: 4242 self.raise_error("Unexpected identifier", self._prev) 4243 4244 if tokens[0].token_type in self.TYPE_TOKENS: 4245 self._prev = tokens[0] 4246 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4247 type_name = identifier.name 4248 4249 while self._match(TokenType.DOT): 4250 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4251 4252 this = exp.DataType.build(type_name, udt=True) 4253 else: 4254 self._retreat(self._index - 1) 4255 return None 4256 else: 4257 return None 4258 4259 type_token = self._prev.token_type 4260 4261 if type_token == TokenType.PSEUDO_TYPE: 4262 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4263 4264 if type_token == TokenType.OBJECT_IDENTIFIER: 4265 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4266 4267 nested = type_token in self.NESTED_TYPE_TOKENS 4268 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4269 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4270 expressions = None 4271 maybe_func = False 4272 4273 if self._match(TokenType.L_PAREN): 4274 if is_struct: 4275 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4276 elif nested: 4277 expressions = self._parse_csv( 4278 lambda: self._parse_types( 4279 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4280 ) 4281 ) 4282 elif type_token in self.ENUM_TYPE_TOKENS: 4283 expressions = self._parse_csv(self._parse_equality) 4284 elif is_aggregate: 4285 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4286 any_token=False, tokens=(TokenType.VAR,) 4287 ) 4288 if not func_or_ident or not self._match(TokenType.COMMA): 4289 return None 4290 expressions = self._parse_csv( 4291 lambda: self._parse_types( 4292 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4293 ) 4294 ) 4295 expressions.insert(0, func_or_ident) 4296 else: 4297 expressions = self._parse_csv(self._parse_type_size) 4298 4299 if not expressions or not self._match(TokenType.R_PAREN): 4300 self._retreat(index) 4301 return None 4302 4303 maybe_func = True 4304 4305 values: t.Optional[t.List[exp.Expression]] = None 4306 4307 if nested and self._match(TokenType.LT): 4308 if is_struct: 4309 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4310 else: 4311 expressions = self._parse_csv( 4312 lambda: self._parse_types( 4313 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4314 ) 4315 ) 4316 4317 if not self._match(TokenType.GT): 4318 self.raise_error("Expecting >") 4319 4320 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4321 values = self._parse_csv(self._parse_conjunction) 4322 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4323 4324 if type_token in self.TIMESTAMPS: 4325 if self._match_text_seq("WITH", "TIME", "ZONE"): 4326 maybe_func = False 4327 tz_type = ( 4328 exp.DataType.Type.TIMETZ 4329 if type_token in self.TIMES 4330 else exp.DataType.Type.TIMESTAMPTZ 4331 ) 4332 this = exp.DataType(this=tz_type, expressions=expressions) 4333 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4334 maybe_func = False 4335 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4336 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4337 maybe_func = False 4338 elif type_token == TokenType.INTERVAL: 4339 unit = self._parse_var(upper=True) 4340 if unit: 4341 if self._match_text_seq("TO"): 4342 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4343 4344 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4345 else: 4346 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4347 4348 if maybe_func and check_func: 4349 index2 = self._index 4350 peek = self._parse_string() 4351 4352 if not peek: 4353 self._retreat(index) 4354 return None 4355 4356 self._retreat(index2) 4357 4358 if not this: 4359 if self._match_text_seq("UNSIGNED"): 4360 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4361 if not unsigned_type_token: 4362 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4363 4364 type_token = unsigned_type_token or type_token 4365 4366 this = exp.DataType( 4367 this=exp.DataType.Type[type_token.value], 4368 expressions=expressions, 4369 nested=nested, 4370 values=values, 4371 prefix=prefix, 4372 ) 4373 elif expressions: 4374 this.set("expressions", expressions) 4375 4376 index = self._index 4377 4378 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4379 matched_array = self._match(TokenType.ARRAY) 4380 4381 while self._curr: 4382 matched_l_bracket = self._match(TokenType.L_BRACKET) 4383 if not matched_l_bracket and not matched_array: 4384 break 4385 4386 matched_array = False 4387 values = self._parse_csv(self._parse_conjunction) or None 4388 if values and not schema: 4389 self._retreat(index) 4390 break 4391 4392 this = exp.DataType( 4393 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4394 ) 4395 self._match(TokenType.R_BRACKET) 4396 4397 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4398 converter = self.TYPE_CONVERTER.get(this.this) 4399 if converter: 4400 this = converter(t.cast(exp.DataType, this)) 4401 4402 return this 4403 4404 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4405 index = self._index 4406 this = ( 4407 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4408 or self._parse_id_var() 4409 ) 4410 self._match(TokenType.COLON) 4411 4412 if ( 4413 type_required 4414 and not isinstance(this, exp.DataType) 4415 and not self._match_set(self.TYPE_TOKENS, advance=False) 4416 ): 4417 self._retreat(index) 4418 return self._parse_types() 4419 4420 return self._parse_column_def(this) 4421 4422 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4423 if not self._match_text_seq("AT", "TIME", "ZONE"): 4424 return this 4425 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4426 4427 def _parse_column(self) -> t.Optional[exp.Expression]: 4428 this = self._parse_column_reference() 4429 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4430 4431 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4432 this = self._parse_field() 4433 if ( 4434 not this 4435 and self._match(TokenType.VALUES, advance=False) 4436 and self.VALUES_FOLLOWED_BY_PAREN 4437 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4438 ): 4439 this = self._parse_id_var() 4440 4441 if isinstance(this, exp.Identifier): 4442 # We bubble up comments from the Identifier to the Column 4443 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4444 4445 return this 4446 4447 def _parse_colon_as_json_extract( 4448 self, this: t.Optional[exp.Expression] 4449 ) -> t.Optional[exp.Expression]: 4450 casts = [] 4451 json_path = [] 4452 4453 while self._match(TokenType.COLON): 4454 start_index = self._index 4455 path = self._parse_column_ops(self._parse_field(any_token=True)) 4456 4457 # The cast :: operator has a lower precedence than the extraction operator :, so 4458 # we rearrange the AST appropriately to avoid casting the JSON path 4459 while isinstance(path, exp.Cast): 4460 casts.append(path.to) 4461 path = path.this 4462 4463 if casts: 4464 dcolon_offset = next( 4465 i 4466 for i, t in enumerate(self._tokens[start_index:]) 4467 if t.token_type == TokenType.DCOLON 4468 ) 4469 end_token = self._tokens[start_index + dcolon_offset - 1] 4470 else: 4471 end_token = self._prev 4472 4473 if path: 4474 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4475 4476 if json_path: 4477 this = self.expression( 4478 exp.JSONExtract, 4479 this=this, 4480 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4481 ) 4482 4483 while casts: 4484 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4485 4486 return this 4487 4488 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4489 this = self._parse_bracket(this) 4490 4491 while self._match_set(self.COLUMN_OPERATORS): 4492 op_token = self._prev.token_type 4493 op = self.COLUMN_OPERATORS.get(op_token) 4494 4495 if op_token == TokenType.DCOLON: 4496 field = self._parse_types() 4497 if not field: 4498 self.raise_error("Expected type") 4499 elif op and self._curr: 4500 field = self._parse_column_reference() 4501 else: 4502 field = self._parse_field(any_token=True, anonymous_func=True) 4503 4504 if isinstance(field, exp.Func) and this: 4505 # bigquery allows function calls like x.y.count(...) 4506 # SAFE.SUBSTR(...) 4507 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4508 this = exp.replace_tree( 4509 this, 4510 lambda n: ( 4511 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4512 if n.table 4513 else n.this 4514 ) 4515 if isinstance(n, exp.Column) 4516 else n, 4517 ) 4518 4519 if op: 4520 this = op(self, this, field) 4521 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4522 this = self.expression( 4523 exp.Column, 4524 this=field, 4525 table=this.this, 4526 db=this.args.get("table"), 4527 catalog=this.args.get("db"), 4528 ) 4529 else: 4530 this = self.expression(exp.Dot, this=this, expression=field) 4531 4532 this = self._parse_bracket(this) 4533 4534 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4535 4536 def _parse_primary(self) -> t.Optional[exp.Expression]: 4537 if self._match_set(self.PRIMARY_PARSERS): 4538 token_type = self._prev.token_type 4539 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4540 4541 if token_type == TokenType.STRING: 4542 expressions = [primary] 4543 while self._match(TokenType.STRING): 4544 expressions.append(exp.Literal.string(self._prev.text)) 4545 4546 if len(expressions) > 1: 4547 return self.expression(exp.Concat, expressions=expressions) 4548 4549 return primary 4550 4551 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4552 return exp.Literal.number(f"0.{self._prev.text}") 4553 4554 if self._match(TokenType.L_PAREN): 4555 comments = self._prev_comments 4556 query = self._parse_select() 4557 4558 if query: 4559 expressions = [query] 4560 else: 4561 expressions = self._parse_expressions() 4562 4563 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4564 4565 if not this and self._match(TokenType.R_PAREN, advance=False): 4566 this = self.expression(exp.Tuple) 4567 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4568 this = self._parse_subquery(this=this, parse_alias=False) 4569 elif isinstance(this, exp.Subquery): 4570 this = self._parse_subquery( 4571 this=self._parse_set_operations(this), parse_alias=False 4572 ) 4573 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4574 this = self.expression(exp.Tuple, expressions=expressions) 4575 else: 4576 this = self.expression(exp.Paren, this=this) 4577 4578 if this: 4579 this.add_comments(comments) 4580 4581 self._match_r_paren(expression=this) 4582 return this 4583 4584 return None 4585 4586 def _parse_field( 4587 self, 4588 any_token: bool = False, 4589 tokens: t.Optional[t.Collection[TokenType]] = None, 4590 anonymous_func: bool = False, 4591 ) -> t.Optional[exp.Expression]: 4592 if anonymous_func: 4593 field = ( 4594 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4595 or self._parse_primary() 4596 ) 4597 else: 4598 field = self._parse_primary() or self._parse_function( 4599 anonymous=anonymous_func, any_token=any_token 4600 ) 4601 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4602 4603 def _parse_function( 4604 self, 4605 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4606 anonymous: bool = False, 4607 optional_parens: bool = True, 4608 any_token: bool = False, 4609 ) -> t.Optional[exp.Expression]: 4610 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4611 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4612 fn_syntax = False 4613 if ( 4614 self._match(TokenType.L_BRACE, advance=False) 4615 and self._next 4616 and self._next.text.upper() == "FN" 4617 ): 4618 self._advance(2) 4619 fn_syntax = True 4620 4621 func = self._parse_function_call( 4622 functions=functions, 4623 anonymous=anonymous, 4624 optional_parens=optional_parens, 4625 any_token=any_token, 4626 ) 4627 4628 if fn_syntax: 4629 self._match(TokenType.R_BRACE) 4630 4631 return func 4632 4633 def _parse_function_call( 4634 self, 4635 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4636 anonymous: bool = False, 4637 optional_parens: bool = True, 4638 any_token: bool = False, 4639 ) -> t.Optional[exp.Expression]: 4640 if not self._curr: 4641 return None 4642 4643 comments = self._curr.comments 4644 token_type = self._curr.token_type 4645 this = self._curr.text 4646 upper = this.upper() 4647 4648 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4649 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4650 self._advance() 4651 return self._parse_window(parser(self)) 4652 4653 if not self._next or self._next.token_type != TokenType.L_PAREN: 4654 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4655 self._advance() 4656 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4657 4658 return None 4659 4660 if any_token: 4661 if token_type in self.RESERVED_TOKENS: 4662 return None 4663 elif token_type not in self.FUNC_TOKENS: 4664 return None 4665 4666 self._advance(2) 4667 4668 parser = self.FUNCTION_PARSERS.get(upper) 4669 if parser and not anonymous: 4670 this = parser(self) 4671 else: 4672 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4673 4674 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4675 this = self.expression(subquery_predicate, this=self._parse_select()) 4676 self._match_r_paren() 4677 return this 4678 4679 if functions is None: 4680 functions = self.FUNCTIONS 4681 4682 function = functions.get(upper) 4683 4684 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4685 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4686 4687 if alias: 4688 args = self._kv_to_prop_eq(args) 4689 4690 if function and not anonymous: 4691 if "dialect" in function.__code__.co_varnames: 4692 func = function(args, dialect=self.dialect) 4693 else: 4694 func = function(args) 4695 4696 func = self.validate_expression(func, args) 4697 if not self.dialect.NORMALIZE_FUNCTIONS: 4698 func.meta["name"] = this 4699 4700 this = func 4701 else: 4702 if token_type == TokenType.IDENTIFIER: 4703 this = exp.Identifier(this=this, quoted=True) 4704 this = self.expression(exp.Anonymous, this=this, expressions=args) 4705 4706 if isinstance(this, exp.Expression): 4707 this.add_comments(comments) 4708 4709 self._match_r_paren(this) 4710 return self._parse_window(this) 4711 4712 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4713 transformed = [] 4714 4715 for e in expressions: 4716 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4717 if isinstance(e, exp.Alias): 4718 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4719 4720 if not isinstance(e, exp.PropertyEQ): 4721 e = self.expression( 4722 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4723 ) 4724 4725 if isinstance(e.this, exp.Column): 4726 e.this.replace(e.this.this) 4727 4728 transformed.append(e) 4729 4730 return transformed 4731 4732 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4733 return self._parse_column_def(self._parse_id_var()) 4734 4735 def _parse_user_defined_function( 4736 self, kind: t.Optional[TokenType] = None 4737 ) -> t.Optional[exp.Expression]: 4738 this = self._parse_id_var() 4739 4740 while self._match(TokenType.DOT): 4741 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4742 4743 if not self._match(TokenType.L_PAREN): 4744 return this 4745 4746 expressions = self._parse_csv(self._parse_function_parameter) 4747 self._match_r_paren() 4748 return self.expression( 4749 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4750 ) 4751 4752 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4753 literal = self._parse_primary() 4754 if literal: 4755 return self.expression(exp.Introducer, this=token.text, expression=literal) 4756 4757 return self.expression(exp.Identifier, this=token.text) 4758 4759 def _parse_session_parameter(self) -> exp.SessionParameter: 4760 kind = None 4761 this = self._parse_id_var() or self._parse_primary() 4762 4763 if this and self._match(TokenType.DOT): 4764 kind = this.name 4765 this = self._parse_var() or self._parse_primary() 4766 4767 return self.expression(exp.SessionParameter, this=this, kind=kind) 4768 4769 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4770 return self._parse_id_var() 4771 4772 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4773 index = self._index 4774 4775 if self._match(TokenType.L_PAREN): 4776 expressions = t.cast( 4777 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4778 ) 4779 4780 if not self._match(TokenType.R_PAREN): 4781 self._retreat(index) 4782 else: 4783 expressions = [self._parse_lambda_arg()] 4784 4785 if self._match_set(self.LAMBDAS): 4786 return self.LAMBDAS[self._prev.token_type](self, expressions) 4787 4788 self._retreat(index) 4789 4790 this: t.Optional[exp.Expression] 4791 4792 if self._match(TokenType.DISTINCT): 4793 this = self.expression( 4794 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4795 ) 4796 else: 4797 this = self._parse_select_or_expression(alias=alias) 4798 4799 return self._parse_limit( 4800 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4801 ) 4802 4803 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4804 index = self._index 4805 if not self._match(TokenType.L_PAREN): 4806 return this 4807 4808 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4809 # expr can be of both types 4810 if self._match_set(self.SELECT_START_TOKENS): 4811 self._retreat(index) 4812 return this 4813 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4814 self._match_r_paren() 4815 return self.expression(exp.Schema, this=this, expressions=args) 4816 4817 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4818 return self._parse_column_def(self._parse_field(any_token=True)) 4819 4820 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4821 # column defs are not really columns, they're identifiers 4822 if isinstance(this, exp.Column): 4823 this = this.this 4824 4825 kind = self._parse_types(schema=True) 4826 4827 if self._match_text_seq("FOR", "ORDINALITY"): 4828 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4829 4830 constraints: t.List[exp.Expression] = [] 4831 4832 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4833 ("ALIAS", "MATERIALIZED") 4834 ): 4835 persisted = self._prev.text.upper() == "MATERIALIZED" 4836 constraints.append( 4837 self.expression( 4838 exp.ComputedColumnConstraint, 4839 this=self._parse_conjunction(), 4840 persisted=persisted or self._match_text_seq("PERSISTED"), 4841 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4842 ) 4843 ) 4844 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4845 self._match(TokenType.ALIAS) 4846 constraints.append( 4847 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4848 ) 4849 4850 while True: 4851 constraint = self._parse_column_constraint() 4852 if not constraint: 4853 break 4854 constraints.append(constraint) 4855 4856 if not kind and not constraints: 4857 return this 4858 4859 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4860 4861 def _parse_auto_increment( 4862 self, 4863 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4864 start = None 4865 increment = None 4866 4867 if self._match(TokenType.L_PAREN, advance=False): 4868 args = self._parse_wrapped_csv(self._parse_bitwise) 4869 start = seq_get(args, 0) 4870 increment = seq_get(args, 1) 4871 elif self._match_text_seq("START"): 4872 start = self._parse_bitwise() 4873 self._match_text_seq("INCREMENT") 4874 increment = self._parse_bitwise() 4875 4876 if start and increment: 4877 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4878 4879 return exp.AutoIncrementColumnConstraint() 4880 4881 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4882 if not self._match_text_seq("REFRESH"): 4883 self._retreat(self._index - 1) 4884 return None 4885 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4886 4887 def _parse_compress(self) -> exp.CompressColumnConstraint: 4888 if self._match(TokenType.L_PAREN, advance=False): 4889 return self.expression( 4890 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4891 ) 4892 4893 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4894 4895 def _parse_generated_as_identity( 4896 self, 4897 ) -> ( 4898 exp.GeneratedAsIdentityColumnConstraint 4899 | exp.ComputedColumnConstraint 4900 | exp.GeneratedAsRowColumnConstraint 4901 ): 4902 if self._match_text_seq("BY", "DEFAULT"): 4903 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4904 this = self.expression( 4905 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4906 ) 4907 else: 4908 self._match_text_seq("ALWAYS") 4909 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4910 4911 self._match(TokenType.ALIAS) 4912 4913 if self._match_text_seq("ROW"): 4914 start = self._match_text_seq("START") 4915 if not start: 4916 self._match(TokenType.END) 4917 hidden = self._match_text_seq("HIDDEN") 4918 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4919 4920 identity = self._match_text_seq("IDENTITY") 4921 4922 if self._match(TokenType.L_PAREN): 4923 if self._match(TokenType.START_WITH): 4924 this.set("start", self._parse_bitwise()) 4925 if self._match_text_seq("INCREMENT", "BY"): 4926 this.set("increment", self._parse_bitwise()) 4927 if self._match_text_seq("MINVALUE"): 4928 this.set("minvalue", self._parse_bitwise()) 4929 if self._match_text_seq("MAXVALUE"): 4930 this.set("maxvalue", self._parse_bitwise()) 4931 4932 if self._match_text_seq("CYCLE"): 4933 this.set("cycle", True) 4934 elif self._match_text_seq("NO", "CYCLE"): 4935 this.set("cycle", False) 4936 4937 if not identity: 4938 this.set("expression", self._parse_range()) 4939 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4940 args = self._parse_csv(self._parse_bitwise) 4941 this.set("start", seq_get(args, 0)) 4942 this.set("increment", seq_get(args, 1)) 4943 4944 self._match_r_paren() 4945 4946 return this 4947 4948 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4949 self._match_text_seq("LENGTH") 4950 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4951 4952 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4953 if self._match_text_seq("NULL"): 4954 return self.expression(exp.NotNullColumnConstraint) 4955 if self._match_text_seq("CASESPECIFIC"): 4956 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4957 if self._match_text_seq("FOR", "REPLICATION"): 4958 return self.expression(exp.NotForReplicationColumnConstraint) 4959 return None 4960 4961 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4962 if self._match(TokenType.CONSTRAINT): 4963 this = self._parse_id_var() 4964 else: 4965 this = None 4966 4967 if self._match_texts(self.CONSTRAINT_PARSERS): 4968 return self.expression( 4969 exp.ColumnConstraint, 4970 this=this, 4971 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4972 ) 4973 4974 return this 4975 4976 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4977 if not self._match(TokenType.CONSTRAINT): 4978 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4979 4980 return self.expression( 4981 exp.Constraint, 4982 this=self._parse_id_var(), 4983 expressions=self._parse_unnamed_constraints(), 4984 ) 4985 4986 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4987 constraints = [] 4988 while True: 4989 constraint = self._parse_unnamed_constraint() or self._parse_function() 4990 if not constraint: 4991 break 4992 constraints.append(constraint) 4993 4994 return constraints 4995 4996 def _parse_unnamed_constraint( 4997 self, constraints: t.Optional[t.Collection[str]] = None 4998 ) -> t.Optional[exp.Expression]: 4999 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5000 constraints or self.CONSTRAINT_PARSERS 5001 ): 5002 return None 5003 5004 constraint = self._prev.text.upper() 5005 if constraint not in self.CONSTRAINT_PARSERS: 5006 self.raise_error(f"No parser found for schema constraint {constraint}.") 5007 5008 return self.CONSTRAINT_PARSERS[constraint](self) 5009 5010 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5011 self._match_text_seq("KEY") 5012 return self.expression( 5013 exp.UniqueColumnConstraint, 5014 this=self._parse_schema(self._parse_id_var(any_token=False)), 5015 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5016 on_conflict=self._parse_on_conflict(), 5017 ) 5018 5019 def _parse_key_constraint_options(self) -> t.List[str]: 5020 options = [] 5021 while True: 5022 if not self._curr: 5023 break 5024 5025 if self._match(TokenType.ON): 5026 action = None 5027 on = self._advance_any() and self._prev.text 5028 5029 if self._match_text_seq("NO", "ACTION"): 5030 action = "NO ACTION" 5031 elif self._match_text_seq("CASCADE"): 5032 action = "CASCADE" 5033 elif self._match_text_seq("RESTRICT"): 5034 action = "RESTRICT" 5035 elif self._match_pair(TokenType.SET, TokenType.NULL): 5036 action = "SET NULL" 5037 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5038 action = "SET DEFAULT" 5039 else: 5040 self.raise_error("Invalid key constraint") 5041 5042 options.append(f"ON {on} {action}") 5043 elif self._match_text_seq("NOT", "ENFORCED"): 5044 options.append("NOT ENFORCED") 5045 elif self._match_text_seq("DEFERRABLE"): 5046 options.append("DEFERRABLE") 5047 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5048 options.append("INITIALLY DEFERRED") 5049 elif self._match_text_seq("NORELY"): 5050 options.append("NORELY") 5051 elif self._match_text_seq("MATCH", "FULL"): 5052 options.append("MATCH FULL") 5053 else: 5054 break 5055 5056 return options 5057 5058 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5059 if match and not self._match(TokenType.REFERENCES): 5060 return None 5061 5062 expressions = None 5063 this = self._parse_table(schema=True) 5064 options = self._parse_key_constraint_options() 5065 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5066 5067 def _parse_foreign_key(self) -> exp.ForeignKey: 5068 expressions = self._parse_wrapped_id_vars() 5069 reference = self._parse_references() 5070 options = {} 5071 5072 while self._match(TokenType.ON): 5073 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5074 self.raise_error("Expected DELETE or UPDATE") 5075 5076 kind = self._prev.text.lower() 5077 5078 if self._match_text_seq("NO", "ACTION"): 5079 action = "NO ACTION" 5080 elif self._match(TokenType.SET): 5081 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5082 action = "SET " + self._prev.text.upper() 5083 else: 5084 self._advance() 5085 action = self._prev.text.upper() 5086 5087 options[kind] = action 5088 5089 return self.expression( 5090 exp.ForeignKey, 5091 expressions=expressions, 5092 reference=reference, 5093 **options, # type: ignore 5094 ) 5095 5096 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5097 return self._parse_field() 5098 5099 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5100 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5101 self._retreat(self._index - 1) 5102 return None 5103 5104 id_vars = self._parse_wrapped_id_vars() 5105 return self.expression( 5106 exp.PeriodForSystemTimeConstraint, 5107 this=seq_get(id_vars, 0), 5108 expression=seq_get(id_vars, 1), 5109 ) 5110 5111 def _parse_primary_key( 5112 self, wrapped_optional: bool = False, in_props: bool = False 5113 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5114 desc = ( 5115 self._match_set((TokenType.ASC, TokenType.DESC)) 5116 and self._prev.token_type == TokenType.DESC 5117 ) 5118 5119 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5120 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5121 5122 expressions = self._parse_wrapped_csv( 5123 self._parse_primary_key_part, optional=wrapped_optional 5124 ) 5125 options = self._parse_key_constraint_options() 5126 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5127 5128 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5129 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5130 5131 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5132 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5133 return this 5134 5135 bracket_kind = self._prev.token_type 5136 expressions = self._parse_csv( 5137 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5138 ) 5139 5140 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5141 self.raise_error("Expected ]") 5142 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5143 self.raise_error("Expected }") 5144 5145 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5146 if bracket_kind == TokenType.L_BRACE: 5147 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5148 elif not this or this.name.upper() == "ARRAY": 5149 this = self.expression(exp.Array, expressions=expressions) 5150 else: 5151 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5152 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5153 5154 self._add_comments(this) 5155 return self._parse_bracket(this) 5156 5157 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5158 if self._match(TokenType.COLON): 5159 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5160 return this 5161 5162 def _parse_case(self) -> t.Optional[exp.Expression]: 5163 ifs = [] 5164 default = None 5165 5166 comments = self._prev_comments 5167 expression = self._parse_conjunction() 5168 5169 while self._match(TokenType.WHEN): 5170 this = self._parse_conjunction() 5171 self._match(TokenType.THEN) 5172 then = self._parse_conjunction() 5173 ifs.append(self.expression(exp.If, this=this, true=then)) 5174 5175 if self._match(TokenType.ELSE): 5176 default = self._parse_conjunction() 5177 5178 if not self._match(TokenType.END): 5179 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5180 default = exp.column("interval") 5181 else: 5182 self.raise_error("Expected END after CASE", self._prev) 5183 5184 return self.expression( 5185 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5186 ) 5187 5188 def _parse_if(self) -> t.Optional[exp.Expression]: 5189 if self._match(TokenType.L_PAREN): 5190 args = self._parse_csv(self._parse_conjunction) 5191 this = self.validate_expression(exp.If.from_arg_list(args), args) 5192 self._match_r_paren() 5193 else: 5194 index = self._index - 1 5195 5196 if self.NO_PAREN_IF_COMMANDS and index == 0: 5197 return self._parse_as_command(self._prev) 5198 5199 condition = self._parse_conjunction() 5200 5201 if not condition: 5202 self._retreat(index) 5203 return None 5204 5205 self._match(TokenType.THEN) 5206 true = self._parse_conjunction() 5207 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5208 self._match(TokenType.END) 5209 this = self.expression(exp.If, this=condition, true=true, false=false) 5210 5211 return this 5212 5213 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5214 if not self._match_text_seq("VALUE", "FOR"): 5215 self._retreat(self._index - 1) 5216 return None 5217 5218 return self.expression( 5219 exp.NextValueFor, 5220 this=self._parse_column(), 5221 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5222 ) 5223 5224 def _parse_extract(self) -> exp.Extract: 5225 this = self._parse_function() or self._parse_var() or self._parse_type() 5226 5227 if self._match(TokenType.FROM): 5228 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5229 5230 if not self._match(TokenType.COMMA): 5231 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5232 5233 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5234 5235 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5236 this = self._parse_conjunction() 5237 5238 if not self._match(TokenType.ALIAS): 5239 if self._match(TokenType.COMMA): 5240 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5241 5242 self.raise_error("Expected AS after CAST") 5243 5244 fmt = None 5245 to = self._parse_types() 5246 5247 if self._match(TokenType.FORMAT): 5248 fmt_string = self._parse_string() 5249 fmt = self._parse_at_time_zone(fmt_string) 5250 5251 if not to: 5252 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5253 if to.this in exp.DataType.TEMPORAL_TYPES: 5254 this = self.expression( 5255 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5256 this=this, 5257 format=exp.Literal.string( 5258 format_time( 5259 fmt_string.this if fmt_string else "", 5260 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5261 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5262 ) 5263 ), 5264 ) 5265 5266 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5267 this.set("zone", fmt.args["zone"]) 5268 return this 5269 elif not to: 5270 self.raise_error("Expected TYPE after CAST") 5271 elif isinstance(to, exp.Identifier): 5272 to = exp.DataType.build(to.name, udt=True) 5273 elif to.this == exp.DataType.Type.CHAR: 5274 if self._match(TokenType.CHARACTER_SET): 5275 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5276 5277 return self.expression( 5278 exp.Cast if strict else exp.TryCast, 5279 this=this, 5280 to=to, 5281 format=fmt, 5282 safe=safe, 5283 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5284 ) 5285 5286 def _parse_string_agg(self) -> exp.Expression: 5287 if self._match(TokenType.DISTINCT): 5288 args: t.List[t.Optional[exp.Expression]] = [ 5289 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5290 ] 5291 if self._match(TokenType.COMMA): 5292 args.extend(self._parse_csv(self._parse_conjunction)) 5293 else: 5294 args = self._parse_csv(self._parse_conjunction) # type: ignore 5295 5296 index = self._index 5297 if not self._match(TokenType.R_PAREN) and args: 5298 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5299 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5300 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5301 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5302 5303 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5304 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5305 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5306 if not self._match_text_seq("WITHIN", "GROUP"): 5307 self._retreat(index) 5308 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5309 5310 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5311 order = self._parse_order(this=seq_get(args, 0)) 5312 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5313 5314 def _parse_convert( 5315 self, strict: bool, safe: t.Optional[bool] = None 5316 ) -> t.Optional[exp.Expression]: 5317 this = self._parse_bitwise() 5318 5319 if self._match(TokenType.USING): 5320 to: t.Optional[exp.Expression] = self.expression( 5321 exp.CharacterSet, this=self._parse_var() 5322 ) 5323 elif self._match(TokenType.COMMA): 5324 to = self._parse_types() 5325 else: 5326 to = None 5327 5328 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5329 5330 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5331 """ 5332 There are generally two variants of the DECODE function: 5333 5334 - DECODE(bin, charset) 5335 - DECODE(expression, search, result [, search, result] ... [, default]) 5336 5337 The second variant will always be parsed into a CASE expression. Note that NULL 5338 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5339 instead of relying on pattern matching. 5340 """ 5341 args = self._parse_csv(self._parse_conjunction) 5342 5343 if len(args) < 3: 5344 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5345 5346 expression, *expressions = args 5347 if not expression: 5348 return None 5349 5350 ifs = [] 5351 for search, result in zip(expressions[::2], expressions[1::2]): 5352 if not search or not result: 5353 return None 5354 5355 if isinstance(search, exp.Literal): 5356 ifs.append( 5357 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5358 ) 5359 elif isinstance(search, exp.Null): 5360 ifs.append( 5361 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5362 ) 5363 else: 5364 cond = exp.or_( 5365 exp.EQ(this=expression.copy(), expression=search), 5366 exp.and_( 5367 exp.Is(this=expression.copy(), expression=exp.Null()), 5368 exp.Is(this=search.copy(), expression=exp.Null()), 5369 copy=False, 5370 ), 5371 copy=False, 5372 ) 5373 ifs.append(exp.If(this=cond, true=result)) 5374 5375 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5376 5377 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5378 self._match_text_seq("KEY") 5379 key = self._parse_column() 5380 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5381 self._match_text_seq("VALUE") 5382 value = self._parse_bitwise() 5383 5384 if not key and not value: 5385 return None 5386 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5387 5388 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5389 if not this or not self._match_text_seq("FORMAT", "JSON"): 5390 return this 5391 5392 return self.expression(exp.FormatJson, this=this) 5393 5394 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5395 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5396 for value in values: 5397 if self._match_text_seq(value, "ON", on): 5398 return f"{value} ON {on}" 5399 5400 return None 5401 5402 @t.overload 5403 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5404 5405 @t.overload 5406 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5407 5408 def _parse_json_object(self, agg=False): 5409 star = self._parse_star() 5410 expressions = ( 5411 [star] 5412 if star 5413 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5414 ) 5415 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5416 5417 unique_keys = None 5418 if self._match_text_seq("WITH", "UNIQUE"): 5419 unique_keys = True 5420 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5421 unique_keys = False 5422 5423 self._match_text_seq("KEYS") 5424 5425 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5426 self._parse_type() 5427 ) 5428 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5429 5430 return self.expression( 5431 exp.JSONObjectAgg if agg else exp.JSONObject, 5432 expressions=expressions, 5433 null_handling=null_handling, 5434 unique_keys=unique_keys, 5435 return_type=return_type, 5436 encoding=encoding, 5437 ) 5438 5439 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5440 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5441 if not self._match_text_seq("NESTED"): 5442 this = self._parse_id_var() 5443 kind = self._parse_types(allow_identifiers=False) 5444 nested = None 5445 else: 5446 this = None 5447 kind = None 5448 nested = True 5449 5450 path = self._match_text_seq("PATH") and self._parse_string() 5451 nested_schema = nested and self._parse_json_schema() 5452 5453 return self.expression( 5454 exp.JSONColumnDef, 5455 this=this, 5456 kind=kind, 5457 path=path, 5458 nested_schema=nested_schema, 5459 ) 5460 5461 def _parse_json_schema(self) -> exp.JSONSchema: 5462 self._match_text_seq("COLUMNS") 5463 return self.expression( 5464 exp.JSONSchema, 5465 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5466 ) 5467 5468 def _parse_json_table(self) -> exp.JSONTable: 5469 this = self._parse_format_json(self._parse_bitwise()) 5470 path = self._match(TokenType.COMMA) and self._parse_string() 5471 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5472 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5473 schema = self._parse_json_schema() 5474 5475 return exp.JSONTable( 5476 this=this, 5477 schema=schema, 5478 path=path, 5479 error_handling=error_handling, 5480 empty_handling=empty_handling, 5481 ) 5482 5483 def _parse_match_against(self) -> exp.MatchAgainst: 5484 expressions = self._parse_csv(self._parse_column) 5485 5486 self._match_text_seq(")", "AGAINST", "(") 5487 5488 this = self._parse_string() 5489 5490 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5491 modifier = "IN NATURAL LANGUAGE MODE" 5492 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5493 modifier = f"{modifier} WITH QUERY EXPANSION" 5494 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5495 modifier = "IN BOOLEAN MODE" 5496 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5497 modifier = "WITH QUERY EXPANSION" 5498 else: 5499 modifier = None 5500 5501 return self.expression( 5502 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5503 ) 5504 5505 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5506 def _parse_open_json(self) -> exp.OpenJSON: 5507 this = self._parse_bitwise() 5508 path = self._match(TokenType.COMMA) and self._parse_string() 5509 5510 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5511 this = self._parse_field(any_token=True) 5512 kind = self._parse_types() 5513 path = self._parse_string() 5514 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5515 5516 return self.expression( 5517 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5518 ) 5519 5520 expressions = None 5521 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5522 self._match_l_paren() 5523 expressions = self._parse_csv(_parse_open_json_column_def) 5524 5525 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5526 5527 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5528 args = self._parse_csv(self._parse_bitwise) 5529 5530 if self._match(TokenType.IN): 5531 return self.expression( 5532 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5533 ) 5534 5535 if haystack_first: 5536 haystack = seq_get(args, 0) 5537 needle = seq_get(args, 1) 5538 else: 5539 needle = seq_get(args, 0) 5540 haystack = seq_get(args, 1) 5541 5542 return self.expression( 5543 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5544 ) 5545 5546 def _parse_predict(self) -> exp.Predict: 5547 self._match_text_seq("MODEL") 5548 this = self._parse_table() 5549 5550 self._match(TokenType.COMMA) 5551 self._match_text_seq("TABLE") 5552 5553 return self.expression( 5554 exp.Predict, 5555 this=this, 5556 expression=self._parse_table(), 5557 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5558 ) 5559 5560 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5561 args = self._parse_csv(self._parse_table) 5562 return exp.JoinHint(this=func_name.upper(), expressions=args) 5563 5564 def _parse_substring(self) -> exp.Substring: 5565 # Postgres supports the form: substring(string [from int] [for int]) 5566 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5567 5568 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5569 5570 if self._match(TokenType.FROM): 5571 args.append(self._parse_bitwise()) 5572 if self._match(TokenType.FOR): 5573 if len(args) == 1: 5574 args.append(exp.Literal.number(1)) 5575 args.append(self._parse_bitwise()) 5576 5577 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5578 5579 def _parse_trim(self) -> exp.Trim: 5580 # https://www.w3resource.com/sql/character-functions/trim.php 5581 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5582 5583 position = None 5584 collation = None 5585 expression = None 5586 5587 if self._match_texts(self.TRIM_TYPES): 5588 position = self._prev.text.upper() 5589 5590 this = self._parse_bitwise() 5591 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5592 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5593 expression = self._parse_bitwise() 5594 5595 if invert_order: 5596 this, expression = expression, this 5597 5598 if self._match(TokenType.COLLATE): 5599 collation = self._parse_bitwise() 5600 5601 return self.expression( 5602 exp.Trim, this=this, position=position, expression=expression, collation=collation 5603 ) 5604 5605 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5606 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5607 5608 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5609 return self._parse_window(self._parse_id_var(), alias=True) 5610 5611 def _parse_respect_or_ignore_nulls( 5612 self, this: t.Optional[exp.Expression] 5613 ) -> t.Optional[exp.Expression]: 5614 if self._match_text_seq("IGNORE", "NULLS"): 5615 return self.expression(exp.IgnoreNulls, this=this) 5616 if self._match_text_seq("RESPECT", "NULLS"): 5617 return self.expression(exp.RespectNulls, this=this) 5618 return this 5619 5620 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5621 if self._match(TokenType.HAVING): 5622 self._match_texts(("MAX", "MIN")) 5623 max = self._prev.text.upper() != "MIN" 5624 return self.expression( 5625 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5626 ) 5627 5628 return this 5629 5630 def _parse_window( 5631 self, this: t.Optional[exp.Expression], alias: bool = False 5632 ) -> t.Optional[exp.Expression]: 5633 func = this 5634 comments = func.comments if isinstance(func, exp.Expression) else None 5635 5636 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5637 self._match(TokenType.WHERE) 5638 this = self.expression( 5639 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5640 ) 5641 self._match_r_paren() 5642 5643 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5644 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5645 if self._match_text_seq("WITHIN", "GROUP"): 5646 order = self._parse_wrapped(self._parse_order) 5647 this = self.expression(exp.WithinGroup, this=this, expression=order) 5648 5649 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5650 # Some dialects choose to implement and some do not. 5651 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5652 5653 # There is some code above in _parse_lambda that handles 5654 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5655 5656 # The below changes handle 5657 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5658 5659 # Oracle allows both formats 5660 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5661 # and Snowflake chose to do the same for familiarity 5662 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5663 if isinstance(this, exp.AggFunc): 5664 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5665 5666 if ignore_respect and ignore_respect is not this: 5667 ignore_respect.replace(ignore_respect.this) 5668 this = self.expression(ignore_respect.__class__, this=this) 5669 5670 this = self._parse_respect_or_ignore_nulls(this) 5671 5672 # bigquery select from window x AS (partition by ...) 5673 if alias: 5674 over = None 5675 self._match(TokenType.ALIAS) 5676 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5677 return this 5678 else: 5679 over = self._prev.text.upper() 5680 5681 if comments and isinstance(func, exp.Expression): 5682 func.pop_comments() 5683 5684 if not self._match(TokenType.L_PAREN): 5685 return self.expression( 5686 exp.Window, 5687 comments=comments, 5688 this=this, 5689 alias=self._parse_id_var(False), 5690 over=over, 5691 ) 5692 5693 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5694 5695 first = self._match(TokenType.FIRST) 5696 if self._match_text_seq("LAST"): 5697 first = False 5698 5699 partition, order = self._parse_partition_and_order() 5700 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5701 5702 if kind: 5703 self._match(TokenType.BETWEEN) 5704 start = self._parse_window_spec() 5705 self._match(TokenType.AND) 5706 end = self._parse_window_spec() 5707 5708 spec = self.expression( 5709 exp.WindowSpec, 5710 kind=kind, 5711 start=start["value"], 5712 start_side=start["side"], 5713 end=end["value"], 5714 end_side=end["side"], 5715 ) 5716 else: 5717 spec = None 5718 5719 self._match_r_paren() 5720 5721 window = self.expression( 5722 exp.Window, 5723 comments=comments, 5724 this=this, 5725 partition_by=partition, 5726 order=order, 5727 spec=spec, 5728 alias=window_alias, 5729 over=over, 5730 first=first, 5731 ) 5732 5733 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5734 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5735 return self._parse_window(window, alias=alias) 5736 5737 return window 5738 5739 def _parse_partition_and_order( 5740 self, 5741 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5742 return self._parse_partition_by(), self._parse_order() 5743 5744 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5745 self._match(TokenType.BETWEEN) 5746 5747 return { 5748 "value": ( 5749 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5750 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5751 or self._parse_bitwise() 5752 ), 5753 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5754 } 5755 5756 def _parse_alias( 5757 self, this: t.Optional[exp.Expression], explicit: bool = False 5758 ) -> t.Optional[exp.Expression]: 5759 any_token = self._match(TokenType.ALIAS) 5760 comments = self._prev_comments or [] 5761 5762 if explicit and not any_token: 5763 return this 5764 5765 if self._match(TokenType.L_PAREN): 5766 aliases = self.expression( 5767 exp.Aliases, 5768 comments=comments, 5769 this=this, 5770 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5771 ) 5772 self._match_r_paren(aliases) 5773 return aliases 5774 5775 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5776 self.STRING_ALIASES and self._parse_string_as_identifier() 5777 ) 5778 5779 if alias: 5780 comments.extend(alias.pop_comments()) 5781 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5782 column = this.this 5783 5784 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5785 if not this.comments and column and column.comments: 5786 this.comments = column.pop_comments() 5787 5788 return this 5789 5790 def _parse_id_var( 5791 self, 5792 any_token: bool = True, 5793 tokens: t.Optional[t.Collection[TokenType]] = None, 5794 ) -> t.Optional[exp.Expression]: 5795 expression = self._parse_identifier() 5796 if not expression and ( 5797 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5798 ): 5799 quoted = self._prev.token_type == TokenType.STRING 5800 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5801 5802 return expression 5803 5804 def _parse_string(self) -> t.Optional[exp.Expression]: 5805 if self._match_set(self.STRING_PARSERS): 5806 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5807 return self._parse_placeholder() 5808 5809 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5810 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5811 5812 def _parse_number(self) -> t.Optional[exp.Expression]: 5813 if self._match_set(self.NUMERIC_PARSERS): 5814 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5815 return self._parse_placeholder() 5816 5817 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5818 if self._match(TokenType.IDENTIFIER): 5819 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5820 return self._parse_placeholder() 5821 5822 def _parse_var( 5823 self, 5824 any_token: bool = False, 5825 tokens: t.Optional[t.Collection[TokenType]] = None, 5826 upper: bool = False, 5827 ) -> t.Optional[exp.Expression]: 5828 if ( 5829 (any_token and self._advance_any()) 5830 or self._match(TokenType.VAR) 5831 or (self._match_set(tokens) if tokens else False) 5832 ): 5833 return self.expression( 5834 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5835 ) 5836 return self._parse_placeholder() 5837 5838 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5839 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5840 self._advance() 5841 return self._prev 5842 return None 5843 5844 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5845 return self._parse_var() or self._parse_string() 5846 5847 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5848 return self._parse_primary() or self._parse_var(any_token=True) 5849 5850 def _parse_null(self) -> t.Optional[exp.Expression]: 5851 if self._match_set(self.NULL_TOKENS): 5852 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5853 return self._parse_placeholder() 5854 5855 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5856 if self._match(TokenType.TRUE): 5857 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5858 if self._match(TokenType.FALSE): 5859 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5860 return self._parse_placeholder() 5861 5862 def _parse_star(self) -> t.Optional[exp.Expression]: 5863 if self._match(TokenType.STAR): 5864 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5865 return self._parse_placeholder() 5866 5867 def _parse_parameter(self) -> exp.Parameter: 5868 this = self._parse_identifier() or self._parse_primary_or_var() 5869 return self.expression(exp.Parameter, this=this) 5870 5871 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5872 if self._match_set(self.PLACEHOLDER_PARSERS): 5873 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5874 if placeholder: 5875 return placeholder 5876 self._advance(-1) 5877 return None 5878 5879 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5880 if not self._match_texts(keywords): 5881 return None 5882 if self._match(TokenType.L_PAREN, advance=False): 5883 return self._parse_wrapped_csv(self._parse_expression) 5884 5885 expression = self._parse_expression() 5886 return [expression] if expression else None 5887 5888 def _parse_csv( 5889 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5890 ) -> t.List[exp.Expression]: 5891 parse_result = parse_method() 5892 items = [parse_result] if parse_result is not None else [] 5893 5894 while self._match(sep): 5895 self._add_comments(parse_result) 5896 parse_result = parse_method() 5897 if parse_result is not None: 5898 items.append(parse_result) 5899 5900 return items 5901 5902 def _parse_tokens( 5903 self, parse_method: t.Callable, expressions: t.Dict 5904 ) -> t.Optional[exp.Expression]: 5905 this = parse_method() 5906 5907 while self._match_set(expressions): 5908 this = self.expression( 5909 expressions[self._prev.token_type], 5910 this=this, 5911 comments=self._prev_comments, 5912 expression=parse_method(), 5913 ) 5914 5915 return this 5916 5917 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5918 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5919 5920 def _parse_wrapped_csv( 5921 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5922 ) -> t.List[exp.Expression]: 5923 return self._parse_wrapped( 5924 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5925 ) 5926 5927 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5928 wrapped = self._match(TokenType.L_PAREN) 5929 if not wrapped and not optional: 5930 self.raise_error("Expecting (") 5931 parse_result = parse_method() 5932 if wrapped: 5933 self._match_r_paren() 5934 return parse_result 5935 5936 def _parse_expressions(self) -> t.List[exp.Expression]: 5937 return self._parse_csv(self._parse_expression) 5938 5939 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5940 return self._parse_select() or self._parse_set_operations( 5941 self._parse_expression() if alias else self._parse_conjunction() 5942 ) 5943 5944 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5945 return self._parse_query_modifiers( 5946 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5947 ) 5948 5949 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5950 this = None 5951 if self._match_texts(self.TRANSACTION_KIND): 5952 this = self._prev.text 5953 5954 self._match_texts(("TRANSACTION", "WORK")) 5955 5956 modes = [] 5957 while True: 5958 mode = [] 5959 while self._match(TokenType.VAR): 5960 mode.append(self._prev.text) 5961 5962 if mode: 5963 modes.append(" ".join(mode)) 5964 if not self._match(TokenType.COMMA): 5965 break 5966 5967 return self.expression(exp.Transaction, this=this, modes=modes) 5968 5969 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5970 chain = None 5971 savepoint = None 5972 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5973 5974 self._match_texts(("TRANSACTION", "WORK")) 5975 5976 if self._match_text_seq("TO"): 5977 self._match_text_seq("SAVEPOINT") 5978 savepoint = self._parse_id_var() 5979 5980 if self._match(TokenType.AND): 5981 chain = not self._match_text_seq("NO") 5982 self._match_text_seq("CHAIN") 5983 5984 if is_rollback: 5985 return self.expression(exp.Rollback, savepoint=savepoint) 5986 5987 return self.expression(exp.Commit, chain=chain) 5988 5989 def _parse_refresh(self) -> exp.Refresh: 5990 self._match(TokenType.TABLE) 5991 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5992 5993 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5994 if not self._match_text_seq("ADD"): 5995 return None 5996 5997 self._match(TokenType.COLUMN) 5998 exists_column = self._parse_exists(not_=True) 5999 expression = self._parse_field_def() 6000 6001 if expression: 6002 expression.set("exists", exists_column) 6003 6004 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6005 if self._match_texts(("FIRST", "AFTER")): 6006 position = self._prev.text 6007 column_position = self.expression( 6008 exp.ColumnPosition, this=self._parse_column(), position=position 6009 ) 6010 expression.set("position", column_position) 6011 6012 return expression 6013 6014 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6015 drop = self._match(TokenType.DROP) and self._parse_drop() 6016 if drop and not isinstance(drop, exp.Command): 6017 drop.set("kind", drop.args.get("kind", "COLUMN")) 6018 return drop 6019 6020 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6021 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6022 return self.expression( 6023 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6024 ) 6025 6026 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6027 index = self._index - 1 6028 6029 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6030 return self._parse_csv( 6031 lambda: self.expression( 6032 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6033 ) 6034 ) 6035 6036 self._retreat(index) 6037 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6038 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6039 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6040 6041 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6042 if self._match_texts(self.ALTER_ALTER_PARSERS): 6043 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6044 6045 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6046 # keyword after ALTER we default to parsing this statement 6047 self._match(TokenType.COLUMN) 6048 column = self._parse_field(any_token=True) 6049 6050 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6051 return self.expression(exp.AlterColumn, this=column, drop=True) 6052 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6053 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 6054 if self._match(TokenType.COMMENT): 6055 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6056 if self._match_text_seq("DROP", "NOT", "NULL"): 6057 return self.expression( 6058 exp.AlterColumn, 6059 this=column, 6060 drop=True, 6061 allow_null=True, 6062 ) 6063 if self._match_text_seq("SET", "NOT", "NULL"): 6064 return self.expression( 6065 exp.AlterColumn, 6066 this=column, 6067 allow_null=False, 6068 ) 6069 self._match_text_seq("SET", "DATA") 6070 self._match_text_seq("TYPE") 6071 return self.expression( 6072 exp.AlterColumn, 6073 this=column, 6074 dtype=self._parse_types(), 6075 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6076 using=self._match(TokenType.USING) and self._parse_conjunction(), 6077 ) 6078 6079 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6080 if self._match_texts(("ALL", "EVEN", "AUTO")): 6081 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6082 6083 self._match_text_seq("KEY", "DISTKEY") 6084 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6085 6086 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6087 if compound: 6088 self._match_text_seq("SORTKEY") 6089 6090 if self._match(TokenType.L_PAREN, advance=False): 6091 return self.expression( 6092 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6093 ) 6094 6095 self._match_texts(("AUTO", "NONE")) 6096 return self.expression( 6097 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6098 ) 6099 6100 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6101 index = self._index - 1 6102 6103 partition_exists = self._parse_exists() 6104 if self._match(TokenType.PARTITION, advance=False): 6105 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6106 6107 self._retreat(index) 6108 return self._parse_csv(self._parse_drop_column) 6109 6110 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6111 if self._match(TokenType.COLUMN): 6112 exists = self._parse_exists() 6113 old_column = self._parse_column() 6114 to = self._match_text_seq("TO") 6115 new_column = self._parse_column() 6116 6117 if old_column is None or to is None or new_column is None: 6118 return None 6119 6120 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6121 6122 self._match_text_seq("TO") 6123 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6124 6125 def _parse_alter_table_set(self) -> exp.AlterSet: 6126 alter_set = self.expression(exp.AlterSet) 6127 6128 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6129 "TABLE", "PROPERTIES" 6130 ): 6131 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6132 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6133 alter_set.set("expressions", [self._parse_conjunction()]) 6134 elif self._match_texts(("LOGGED", "UNLOGGED")): 6135 alter_set.set("option", exp.var(self._prev.text.upper())) 6136 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6137 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6138 elif self._match_text_seq("LOCATION"): 6139 alter_set.set("location", self._parse_field()) 6140 elif self._match_text_seq("ACCESS", "METHOD"): 6141 alter_set.set("access_method", self._parse_field()) 6142 elif self._match_text_seq("TABLESPACE"): 6143 alter_set.set("tablespace", self._parse_field()) 6144 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6145 alter_set.set("file_format", [self._parse_field()]) 6146 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6147 alter_set.set("file_format", self._parse_wrapped_options()) 6148 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6149 alter_set.set("copy_options", self._parse_wrapped_options()) 6150 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6151 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6152 else: 6153 if self._match_text_seq("SERDE"): 6154 alter_set.set("serde", self._parse_field()) 6155 6156 alter_set.set("expressions", [self._parse_properties()]) 6157 6158 return alter_set 6159 6160 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6161 start = self._prev 6162 6163 if not self._match(TokenType.TABLE): 6164 return self._parse_as_command(start) 6165 6166 exists = self._parse_exists() 6167 only = self._match_text_seq("ONLY") 6168 this = self._parse_table(schema=True) 6169 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6170 6171 if self._next: 6172 self._advance() 6173 6174 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6175 if parser: 6176 actions = ensure_list(parser(self)) 6177 options = self._parse_csv(self._parse_property) 6178 6179 if not self._curr and actions: 6180 return self.expression( 6181 exp.AlterTable, 6182 this=this, 6183 exists=exists, 6184 actions=actions, 6185 only=only, 6186 options=options, 6187 cluster=cluster, 6188 ) 6189 6190 return self._parse_as_command(start) 6191 6192 def _parse_merge(self) -> exp.Merge: 6193 self._match(TokenType.INTO) 6194 target = self._parse_table() 6195 6196 if target and self._match(TokenType.ALIAS, advance=False): 6197 target.set("alias", self._parse_table_alias()) 6198 6199 self._match(TokenType.USING) 6200 using = self._parse_table() 6201 6202 self._match(TokenType.ON) 6203 on = self._parse_conjunction() 6204 6205 return self.expression( 6206 exp.Merge, 6207 this=target, 6208 using=using, 6209 on=on, 6210 expressions=self._parse_when_matched(), 6211 ) 6212 6213 def _parse_when_matched(self) -> t.List[exp.When]: 6214 whens = [] 6215 6216 while self._match(TokenType.WHEN): 6217 matched = not self._match(TokenType.NOT) 6218 self._match_text_seq("MATCHED") 6219 source = ( 6220 False 6221 if self._match_text_seq("BY", "TARGET") 6222 else self._match_text_seq("BY", "SOURCE") 6223 ) 6224 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6225 6226 self._match(TokenType.THEN) 6227 6228 if self._match(TokenType.INSERT): 6229 _this = self._parse_star() 6230 if _this: 6231 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6232 else: 6233 then = self.expression( 6234 exp.Insert, 6235 this=self._parse_value(), 6236 expression=self._match_text_seq("VALUES") and self._parse_value(), 6237 ) 6238 elif self._match(TokenType.UPDATE): 6239 expressions = self._parse_star() 6240 if expressions: 6241 then = self.expression(exp.Update, expressions=expressions) 6242 else: 6243 then = self.expression( 6244 exp.Update, 6245 expressions=self._match(TokenType.SET) 6246 and self._parse_csv(self._parse_equality), 6247 ) 6248 elif self._match(TokenType.DELETE): 6249 then = self.expression(exp.Var, this=self._prev.text) 6250 else: 6251 then = None 6252 6253 whens.append( 6254 self.expression( 6255 exp.When, 6256 matched=matched, 6257 source=source, 6258 condition=condition, 6259 then=then, 6260 ) 6261 ) 6262 return whens 6263 6264 def _parse_show(self) -> t.Optional[exp.Expression]: 6265 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6266 if parser: 6267 return parser(self) 6268 return self._parse_as_command(self._prev) 6269 6270 def _parse_set_item_assignment( 6271 self, kind: t.Optional[str] = None 6272 ) -> t.Optional[exp.Expression]: 6273 index = self._index 6274 6275 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6276 return self._parse_set_transaction(global_=kind == "GLOBAL") 6277 6278 left = self._parse_primary() or self._parse_column() 6279 assignment_delimiter = self._match_texts(("=", "TO")) 6280 6281 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6282 self._retreat(index) 6283 return None 6284 6285 right = self._parse_statement() or self._parse_id_var() 6286 if isinstance(right, (exp.Column, exp.Identifier)): 6287 right = exp.var(right.name) 6288 6289 this = self.expression(exp.EQ, this=left, expression=right) 6290 return self.expression(exp.SetItem, this=this, kind=kind) 6291 6292 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6293 self._match_text_seq("TRANSACTION") 6294 characteristics = self._parse_csv( 6295 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6296 ) 6297 return self.expression( 6298 exp.SetItem, 6299 expressions=characteristics, 6300 kind="TRANSACTION", 6301 **{"global": global_}, # type: ignore 6302 ) 6303 6304 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6305 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6306 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6307 6308 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6309 index = self._index 6310 set_ = self.expression( 6311 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6312 ) 6313 6314 if self._curr: 6315 self._retreat(index) 6316 return self._parse_as_command(self._prev) 6317 6318 return set_ 6319 6320 def _parse_var_from_options( 6321 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6322 ) -> t.Optional[exp.Var]: 6323 start = self._curr 6324 if not start: 6325 return None 6326 6327 option = start.text.upper() 6328 continuations = options.get(option) 6329 6330 index = self._index 6331 self._advance() 6332 for keywords in continuations or []: 6333 if isinstance(keywords, str): 6334 keywords = (keywords,) 6335 6336 if self._match_text_seq(*keywords): 6337 option = f"{option} {' '.join(keywords)}" 6338 break 6339 else: 6340 if continuations or continuations is None: 6341 if raise_unmatched: 6342 self.raise_error(f"Unknown option {option}") 6343 6344 self._retreat(index) 6345 return None 6346 6347 return exp.var(option) 6348 6349 def _parse_as_command(self, start: Token) -> exp.Command: 6350 while self._curr: 6351 self._advance() 6352 text = self._find_sql(start, self._prev) 6353 size = len(start.text) 6354 self._warn_unsupported() 6355 return exp.Command(this=text[:size], expression=text[size:]) 6356 6357 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6358 settings = [] 6359 6360 self._match_l_paren() 6361 kind = self._parse_id_var() 6362 6363 if self._match(TokenType.L_PAREN): 6364 while True: 6365 key = self._parse_id_var() 6366 value = self._parse_primary() 6367 6368 if not key and value is None: 6369 break 6370 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6371 self._match(TokenType.R_PAREN) 6372 6373 self._match_r_paren() 6374 6375 return self.expression( 6376 exp.DictProperty, 6377 this=this, 6378 kind=kind.this if kind else None, 6379 settings=settings, 6380 ) 6381 6382 def _parse_dict_range(self, this: str) -> exp.DictRange: 6383 self._match_l_paren() 6384 has_min = self._match_text_seq("MIN") 6385 if has_min: 6386 min = self._parse_var() or self._parse_primary() 6387 self._match_text_seq("MAX") 6388 max = self._parse_var() or self._parse_primary() 6389 else: 6390 max = self._parse_var() or self._parse_primary() 6391 min = exp.Literal.number(0) 6392 self._match_r_paren() 6393 return self.expression(exp.DictRange, this=this, min=min, max=max) 6394 6395 def _parse_comprehension( 6396 self, this: t.Optional[exp.Expression] 6397 ) -> t.Optional[exp.Comprehension]: 6398 index = self._index 6399 expression = self._parse_column() 6400 if not self._match(TokenType.IN): 6401 self._retreat(index - 1) 6402 return None 6403 iterator = self._parse_column() 6404 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6405 return self.expression( 6406 exp.Comprehension, 6407 this=this, 6408 expression=expression, 6409 iterator=iterator, 6410 condition=condition, 6411 ) 6412 6413 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6414 if self._match(TokenType.HEREDOC_STRING): 6415 return self.expression(exp.Heredoc, this=self._prev.text) 6416 6417 if not self._match_text_seq("$"): 6418 return None 6419 6420 tags = ["$"] 6421 tag_text = None 6422 6423 if self._is_connected(): 6424 self._advance() 6425 tags.append(self._prev.text.upper()) 6426 else: 6427 self.raise_error("No closing $ found") 6428 6429 if tags[-1] != "$": 6430 if self._is_connected() and self._match_text_seq("$"): 6431 tag_text = tags[-1] 6432 tags.append("$") 6433 else: 6434 self.raise_error("No closing $ found") 6435 6436 heredoc_start = self._curr 6437 6438 while self._curr: 6439 if self._match_text_seq(*tags, advance=False): 6440 this = self._find_sql(heredoc_start, self._prev) 6441 self._advance(len(tags)) 6442 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6443 6444 self._advance() 6445 6446 self.raise_error(f"No closing {''.join(tags)} found") 6447 return None 6448 6449 def _find_parser( 6450 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6451 ) -> t.Optional[t.Callable]: 6452 if not self._curr: 6453 return None 6454 6455 index = self._index 6456 this = [] 6457 while True: 6458 # The current token might be multiple words 6459 curr = self._curr.text.upper() 6460 key = curr.split(" ") 6461 this.append(curr) 6462 6463 self._advance() 6464 result, trie = in_trie(trie, key) 6465 if result == TrieResult.FAILED: 6466 break 6467 6468 if result == TrieResult.EXISTS: 6469 subparser = parsers[" ".join(this)] 6470 return subparser 6471 6472 self._retreat(index) 6473 return None 6474 6475 def _match(self, token_type, advance=True, expression=None): 6476 if not self._curr: 6477 return None 6478 6479 if self._curr.token_type == token_type: 6480 if advance: 6481 self._advance() 6482 self._add_comments(expression) 6483 return True 6484 6485 return None 6486 6487 def _match_set(self, types, advance=True): 6488 if not self._curr: 6489 return None 6490 6491 if self._curr.token_type in types: 6492 if advance: 6493 self._advance() 6494 return True 6495 6496 return None 6497 6498 def _match_pair(self, token_type_a, token_type_b, advance=True): 6499 if not self._curr or not self._next: 6500 return None 6501 6502 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6503 if advance: 6504 self._advance(2) 6505 return True 6506 6507 return None 6508 6509 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6510 if not self._match(TokenType.L_PAREN, expression=expression): 6511 self.raise_error("Expecting (") 6512 6513 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6514 if not self._match(TokenType.R_PAREN, expression=expression): 6515 self.raise_error("Expecting )") 6516 6517 def _match_texts(self, texts, advance=True): 6518 if self._curr and self._curr.text.upper() in texts: 6519 if advance: 6520 self._advance() 6521 return True 6522 return None 6523 6524 def _match_text_seq(self, *texts, advance=True): 6525 index = self._index 6526 for text in texts: 6527 if self._curr and self._curr.text.upper() == text: 6528 self._advance() 6529 else: 6530 self._retreat(index) 6531 return None 6532 6533 if not advance: 6534 self._retreat(index) 6535 6536 return True 6537 6538 def _replace_lambda( 6539 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6540 ) -> t.Optional[exp.Expression]: 6541 if not node: 6542 return node 6543 6544 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6545 6546 for column in node.find_all(exp.Column): 6547 typ = lambda_types.get(column.parts[0].name) 6548 if typ is not None: 6549 dot_or_id = column.to_dot() if column.table else column.this 6550 6551 if typ: 6552 dot_or_id = self.expression( 6553 exp.Cast, 6554 this=dot_or_id, 6555 to=typ, 6556 ) 6557 6558 parent = column.parent 6559 6560 while isinstance(parent, exp.Dot): 6561 if not isinstance(parent.parent, exp.Dot): 6562 parent.replace(dot_or_id) 6563 break 6564 parent = parent.parent 6565 else: 6566 if column is node: 6567 node = dot_or_id 6568 else: 6569 column.replace(dot_or_id) 6570 return node 6571 6572 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6573 start = self._prev 6574 6575 # Not to be confused with TRUNCATE(number, decimals) function call 6576 if self._match(TokenType.L_PAREN): 6577 self._retreat(self._index - 2) 6578 return self._parse_function() 6579 6580 # Clickhouse supports TRUNCATE DATABASE as well 6581 is_database = self._match(TokenType.DATABASE) 6582 6583 self._match(TokenType.TABLE) 6584 6585 exists = self._parse_exists(not_=False) 6586 6587 expressions = self._parse_csv( 6588 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6589 ) 6590 6591 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6592 6593 if self._match_text_seq("RESTART", "IDENTITY"): 6594 identity = "RESTART" 6595 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6596 identity = "CONTINUE" 6597 else: 6598 identity = None 6599 6600 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6601 option = self._prev.text 6602 else: 6603 option = None 6604 6605 partition = self._parse_partition() 6606 6607 # Fallback case 6608 if self._curr: 6609 return self._parse_as_command(start) 6610 6611 return self.expression( 6612 exp.TruncateTable, 6613 expressions=expressions, 6614 is_database=is_database, 6615 exists=exists, 6616 cluster=cluster, 6617 identity=identity, 6618 option=option, 6619 partition=partition, 6620 ) 6621 6622 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6623 this = self._parse_ordered(self._parse_opclass) 6624 6625 if not self._match(TokenType.WITH): 6626 return this 6627 6628 op = self._parse_var(any_token=True) 6629 6630 return self.expression(exp.WithOperator, this=this, op=op) 6631 6632 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6633 opts = [] 6634 self._match(TokenType.EQ) 6635 self._match(TokenType.L_PAREN) 6636 while self._curr and not self._match(TokenType.R_PAREN): 6637 opts.append(self._parse_conjunction()) 6638 self._match(TokenType.COMMA) 6639 return opts 6640 6641 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6642 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6643 6644 options = [] 6645 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6646 option = self._parse_unquoted_field() 6647 value = None 6648 6649 # Some options are defined as functions with the values as params 6650 if not isinstance(option, exp.Func): 6651 prev = self._prev.text.upper() 6652 # Different dialects might separate options and values by white space, "=" and "AS" 6653 self._match(TokenType.EQ) 6654 self._match(TokenType.ALIAS) 6655 6656 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6657 # Snowflake FILE_FORMAT case 6658 value = self._parse_wrapped_options() 6659 else: 6660 value = self._parse_unquoted_field() 6661 6662 param = self.expression(exp.CopyParameter, this=option, expression=value) 6663 options.append(param) 6664 6665 if sep: 6666 self._match(sep) 6667 6668 return options 6669 6670 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6671 expr = self.expression(exp.Credentials) 6672 6673 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6674 expr.set("storage", self._parse_conjunction()) 6675 if self._match_text_seq("CREDENTIALS"): 6676 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6677 creds = ( 6678 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6679 ) 6680 expr.set("credentials", creds) 6681 if self._match_text_seq("ENCRYPTION"): 6682 expr.set("encryption", self._parse_wrapped_options()) 6683 if self._match_text_seq("IAM_ROLE"): 6684 expr.set("iam_role", self._parse_field()) 6685 if self._match_text_seq("REGION"): 6686 expr.set("region", self._parse_field()) 6687 6688 return expr 6689 6690 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6691 return self._parse_field() 6692 6693 def _parse_copy(self) -> exp.Copy | exp.Command: 6694 start = self._prev 6695 6696 self._match(TokenType.INTO) 6697 6698 this = ( 6699 self._parse_conjunction() 6700 if self._match(TokenType.L_PAREN, advance=False) 6701 else self._parse_table(schema=True) 6702 ) 6703 6704 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6705 6706 files = self._parse_csv(self._parse_file_location) 6707 credentials = self._parse_credentials() 6708 6709 self._match_text_seq("WITH") 6710 6711 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6712 6713 # Fallback case 6714 if self._curr: 6715 return self._parse_as_command(start) 6716 6717 return self.expression( 6718 exp.Copy, 6719 this=this, 6720 kind=kind, 6721 credentials=credentials, 6722 files=files, 6723 params=params, 6724 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "MOD": build_mod, 162 "TIME_TO_TIME_STR": lambda args: exp.Cast( 163 this=seq_get(args, 0), 164 to=exp.DataType(this=exp.DataType.Type.TEXT), 165 ), 166 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 167 this=exp.Cast( 168 this=seq_get(args, 0), 169 to=exp.DataType(this=exp.DataType.Type.TEXT), 170 ), 171 start=exp.Literal.number(1), 172 length=exp.Literal.number(10), 173 ), 174 "VAR_MAP": build_var_map, 175 "LOWER": build_lower, 176 "UPPER": build_upper, 177 "HEX": build_hex, 178 "TO_HEX": build_hex, 179 } 180 181 NO_PAREN_FUNCTIONS = { 182 TokenType.CURRENT_DATE: exp.CurrentDate, 183 TokenType.CURRENT_DATETIME: exp.CurrentDate, 184 TokenType.CURRENT_TIME: exp.CurrentTime, 185 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 186 TokenType.CURRENT_USER: exp.CurrentUser, 187 } 188 189 STRUCT_TYPE_TOKENS = { 190 TokenType.NESTED, 191 TokenType.OBJECT, 192 TokenType.STRUCT, 193 } 194 195 NESTED_TYPE_TOKENS = { 196 TokenType.ARRAY, 197 TokenType.LOWCARDINALITY, 198 TokenType.MAP, 199 TokenType.NULLABLE, 200 *STRUCT_TYPE_TOKENS, 201 } 202 203 ENUM_TYPE_TOKENS = { 204 TokenType.ENUM, 205 TokenType.ENUM8, 206 TokenType.ENUM16, 207 } 208 209 AGGREGATE_TYPE_TOKENS = { 210 TokenType.AGGREGATEFUNCTION, 211 TokenType.SIMPLEAGGREGATEFUNCTION, 212 } 213 214 TYPE_TOKENS = { 215 TokenType.BIT, 216 TokenType.BOOLEAN, 217 TokenType.TINYINT, 218 TokenType.UTINYINT, 219 TokenType.SMALLINT, 220 TokenType.USMALLINT, 221 TokenType.INT, 222 TokenType.UINT, 223 TokenType.BIGINT, 224 TokenType.UBIGINT, 225 TokenType.INT128, 226 TokenType.UINT128, 227 TokenType.INT256, 228 TokenType.UINT256, 229 TokenType.MEDIUMINT, 230 TokenType.UMEDIUMINT, 231 TokenType.FIXEDSTRING, 232 TokenType.FLOAT, 233 TokenType.DOUBLE, 234 TokenType.CHAR, 235 TokenType.NCHAR, 236 TokenType.VARCHAR, 237 TokenType.NVARCHAR, 238 TokenType.BPCHAR, 239 TokenType.TEXT, 240 TokenType.MEDIUMTEXT, 241 TokenType.LONGTEXT, 242 TokenType.MEDIUMBLOB, 243 TokenType.LONGBLOB, 244 TokenType.BINARY, 245 TokenType.VARBINARY, 246 TokenType.JSON, 247 TokenType.JSONB, 248 TokenType.INTERVAL, 249 TokenType.TINYBLOB, 250 TokenType.TINYTEXT, 251 TokenType.TIME, 252 TokenType.TIMETZ, 253 TokenType.TIMESTAMP, 254 TokenType.TIMESTAMP_S, 255 TokenType.TIMESTAMP_MS, 256 TokenType.TIMESTAMP_NS, 257 TokenType.TIMESTAMPTZ, 258 TokenType.TIMESTAMPLTZ, 259 TokenType.TIMESTAMPNTZ, 260 TokenType.DATETIME, 261 TokenType.DATETIME64, 262 TokenType.DATE, 263 TokenType.DATE32, 264 TokenType.INT4RANGE, 265 TokenType.INT4MULTIRANGE, 266 TokenType.INT8RANGE, 267 TokenType.INT8MULTIRANGE, 268 TokenType.NUMRANGE, 269 TokenType.NUMMULTIRANGE, 270 TokenType.TSRANGE, 271 TokenType.TSMULTIRANGE, 272 TokenType.TSTZRANGE, 273 TokenType.TSTZMULTIRANGE, 274 TokenType.DATERANGE, 275 TokenType.DATEMULTIRANGE, 276 TokenType.DECIMAL, 277 TokenType.UDECIMAL, 278 TokenType.BIGDECIMAL, 279 TokenType.UUID, 280 TokenType.GEOGRAPHY, 281 TokenType.GEOMETRY, 282 TokenType.HLLSKETCH, 283 TokenType.HSTORE, 284 TokenType.PSEUDO_TYPE, 285 TokenType.SUPER, 286 TokenType.SERIAL, 287 TokenType.SMALLSERIAL, 288 TokenType.BIGSERIAL, 289 TokenType.XML, 290 TokenType.YEAR, 291 TokenType.UNIQUEIDENTIFIER, 292 TokenType.USERDEFINED, 293 TokenType.MONEY, 294 TokenType.SMALLMONEY, 295 TokenType.ROWVERSION, 296 TokenType.IMAGE, 297 TokenType.VARIANT, 298 TokenType.OBJECT, 299 TokenType.OBJECT_IDENTIFIER, 300 TokenType.INET, 301 TokenType.IPADDRESS, 302 TokenType.IPPREFIX, 303 TokenType.IPV4, 304 TokenType.IPV6, 305 TokenType.UNKNOWN, 306 TokenType.NULL, 307 TokenType.NAME, 308 TokenType.TDIGEST, 309 *ENUM_TYPE_TOKENS, 310 *NESTED_TYPE_TOKENS, 311 *AGGREGATE_TYPE_TOKENS, 312 } 313 314 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 315 TokenType.BIGINT: TokenType.UBIGINT, 316 TokenType.INT: TokenType.UINT, 317 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 318 TokenType.SMALLINT: TokenType.USMALLINT, 319 TokenType.TINYINT: TokenType.UTINYINT, 320 TokenType.DECIMAL: TokenType.UDECIMAL, 321 } 322 323 SUBQUERY_PREDICATES = { 324 TokenType.ANY: exp.Any, 325 TokenType.ALL: exp.All, 326 TokenType.EXISTS: exp.Exists, 327 TokenType.SOME: exp.Any, 328 } 329 330 RESERVED_TOKENS = { 331 *Tokenizer.SINGLE_TOKENS.values(), 332 TokenType.SELECT, 333 } - {TokenType.IDENTIFIER} 334 335 DB_CREATABLES = { 336 TokenType.DATABASE, 337 TokenType.DICTIONARY, 338 TokenType.MODEL, 339 TokenType.SCHEMA, 340 TokenType.SEQUENCE, 341 TokenType.STORAGE_INTEGRATION, 342 TokenType.TABLE, 343 TokenType.TAG, 344 TokenType.VIEW, 345 TokenType.WAREHOUSE, 346 TokenType.STREAMLIT, 347 } 348 349 CREATABLES = { 350 TokenType.COLUMN, 351 TokenType.CONSTRAINT, 352 TokenType.FOREIGN_KEY, 353 TokenType.FUNCTION, 354 TokenType.INDEX, 355 TokenType.PROCEDURE, 356 *DB_CREATABLES, 357 } 358 359 # Tokens that can represent identifiers 360 ID_VAR_TOKENS = { 361 TokenType.VAR, 362 TokenType.ANTI, 363 TokenType.APPLY, 364 TokenType.ASC, 365 TokenType.ASOF, 366 TokenType.AUTO_INCREMENT, 367 TokenType.BEGIN, 368 TokenType.BPCHAR, 369 TokenType.CACHE, 370 TokenType.CASE, 371 TokenType.COLLATE, 372 TokenType.COMMAND, 373 TokenType.COMMENT, 374 TokenType.COMMIT, 375 TokenType.CONSTRAINT, 376 TokenType.COPY, 377 TokenType.DEFAULT, 378 TokenType.DELETE, 379 TokenType.DESC, 380 TokenType.DESCRIBE, 381 TokenType.DICTIONARY, 382 TokenType.DIV, 383 TokenType.END, 384 TokenType.EXECUTE, 385 TokenType.ESCAPE, 386 TokenType.FALSE, 387 TokenType.FIRST, 388 TokenType.FILTER, 389 TokenType.FINAL, 390 TokenType.FORMAT, 391 TokenType.FULL, 392 TokenType.IDENTIFIER, 393 TokenType.IS, 394 TokenType.ISNULL, 395 TokenType.INTERVAL, 396 TokenType.KEEP, 397 TokenType.KILL, 398 TokenType.LEFT, 399 TokenType.LOAD, 400 TokenType.MERGE, 401 TokenType.NATURAL, 402 TokenType.NEXT, 403 TokenType.OFFSET, 404 TokenType.OPERATOR, 405 TokenType.ORDINALITY, 406 TokenType.OVERLAPS, 407 TokenType.OVERWRITE, 408 TokenType.PARTITION, 409 TokenType.PERCENT, 410 TokenType.PIVOT, 411 TokenType.PRAGMA, 412 TokenType.RANGE, 413 TokenType.RECURSIVE, 414 TokenType.REFERENCES, 415 TokenType.REFRESH, 416 TokenType.REPLACE, 417 TokenType.RIGHT, 418 TokenType.ROLLUP, 419 TokenType.ROW, 420 TokenType.ROWS, 421 TokenType.SEMI, 422 TokenType.SET, 423 TokenType.SETTINGS, 424 TokenType.SHOW, 425 TokenType.TEMPORARY, 426 TokenType.TOP, 427 TokenType.TRUE, 428 TokenType.TRUNCATE, 429 TokenType.UNIQUE, 430 TokenType.UNNEST, 431 TokenType.UNPIVOT, 432 TokenType.UPDATE, 433 TokenType.USE, 434 TokenType.VOLATILE, 435 TokenType.WINDOW, 436 *CREATABLES, 437 *SUBQUERY_PREDICATES, 438 *TYPE_TOKENS, 439 *NO_PAREN_FUNCTIONS, 440 } 441 442 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 443 444 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 445 TokenType.ANTI, 446 TokenType.APPLY, 447 TokenType.ASOF, 448 TokenType.FULL, 449 TokenType.LEFT, 450 TokenType.LOCK, 451 TokenType.NATURAL, 452 TokenType.OFFSET, 453 TokenType.RIGHT, 454 TokenType.SEMI, 455 TokenType.WINDOW, 456 } 457 458 ALIAS_TOKENS = ID_VAR_TOKENS 459 460 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 461 462 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 463 464 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 465 466 FUNC_TOKENS = { 467 TokenType.COLLATE, 468 TokenType.COMMAND, 469 TokenType.CURRENT_DATE, 470 TokenType.CURRENT_DATETIME, 471 TokenType.CURRENT_TIMESTAMP, 472 TokenType.CURRENT_TIME, 473 TokenType.CURRENT_USER, 474 TokenType.FILTER, 475 TokenType.FIRST, 476 TokenType.FORMAT, 477 TokenType.GLOB, 478 TokenType.IDENTIFIER, 479 TokenType.INDEX, 480 TokenType.ISNULL, 481 TokenType.ILIKE, 482 TokenType.INSERT, 483 TokenType.LIKE, 484 TokenType.MERGE, 485 TokenType.OFFSET, 486 TokenType.PRIMARY_KEY, 487 TokenType.RANGE, 488 TokenType.REPLACE, 489 TokenType.RLIKE, 490 TokenType.ROW, 491 TokenType.UNNEST, 492 TokenType.VAR, 493 TokenType.LEFT, 494 TokenType.RIGHT, 495 TokenType.SEQUENCE, 496 TokenType.DATE, 497 TokenType.DATETIME, 498 TokenType.TABLE, 499 TokenType.TIMESTAMP, 500 TokenType.TIMESTAMPTZ, 501 TokenType.TRUNCATE, 502 TokenType.WINDOW, 503 TokenType.XOR, 504 *TYPE_TOKENS, 505 *SUBQUERY_PREDICATES, 506 } 507 508 CONJUNCTION = { 509 TokenType.AND: exp.And, 510 TokenType.OR: exp.Or, 511 } 512 513 EQUALITY = { 514 TokenType.EQ: exp.EQ, 515 TokenType.NEQ: exp.NEQ, 516 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 517 } 518 519 COMPARISON = { 520 TokenType.GT: exp.GT, 521 TokenType.GTE: exp.GTE, 522 TokenType.LT: exp.LT, 523 TokenType.LTE: exp.LTE, 524 } 525 526 BITWISE = { 527 TokenType.AMP: exp.BitwiseAnd, 528 TokenType.CARET: exp.BitwiseXor, 529 TokenType.PIPE: exp.BitwiseOr, 530 } 531 532 TERM = { 533 TokenType.DASH: exp.Sub, 534 TokenType.PLUS: exp.Add, 535 TokenType.MOD: exp.Mod, 536 TokenType.COLLATE: exp.Collate, 537 } 538 539 FACTOR = { 540 TokenType.DIV: exp.IntDiv, 541 TokenType.LR_ARROW: exp.Distance, 542 TokenType.SLASH: exp.Div, 543 TokenType.STAR: exp.Mul, 544 } 545 546 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 547 548 TIMES = { 549 TokenType.TIME, 550 TokenType.TIMETZ, 551 } 552 553 TIMESTAMPS = { 554 TokenType.TIMESTAMP, 555 TokenType.TIMESTAMPTZ, 556 TokenType.TIMESTAMPLTZ, 557 *TIMES, 558 } 559 560 SET_OPERATIONS = { 561 TokenType.UNION, 562 TokenType.INTERSECT, 563 TokenType.EXCEPT, 564 } 565 566 JOIN_METHODS = { 567 TokenType.ASOF, 568 TokenType.NATURAL, 569 TokenType.POSITIONAL, 570 } 571 572 JOIN_SIDES = { 573 TokenType.LEFT, 574 TokenType.RIGHT, 575 TokenType.FULL, 576 } 577 578 JOIN_KINDS = { 579 TokenType.INNER, 580 TokenType.OUTER, 581 TokenType.CROSS, 582 TokenType.SEMI, 583 TokenType.ANTI, 584 } 585 586 JOIN_HINTS: t.Set[str] = set() 587 588 LAMBDAS = { 589 TokenType.ARROW: lambda self, expressions: self.expression( 590 exp.Lambda, 591 this=self._replace_lambda( 592 self._parse_conjunction(), 593 expressions, 594 ), 595 expressions=expressions, 596 ), 597 TokenType.FARROW: lambda self, expressions: self.expression( 598 exp.Kwarg, 599 this=exp.var(expressions[0].name), 600 expression=self._parse_conjunction(), 601 ), 602 } 603 604 COLUMN_OPERATORS = { 605 TokenType.DOT: None, 606 TokenType.DCOLON: lambda self, this, to: self.expression( 607 exp.Cast if self.STRICT_CAST else exp.TryCast, 608 this=this, 609 to=to, 610 ), 611 TokenType.ARROW: lambda self, this, path: self.expression( 612 exp.JSONExtract, 613 this=this, 614 expression=self.dialect.to_json_path(path), 615 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 616 ), 617 TokenType.DARROW: lambda self, this, path: self.expression( 618 exp.JSONExtractScalar, 619 this=this, 620 expression=self.dialect.to_json_path(path), 621 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 622 ), 623 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 624 exp.JSONBExtract, 625 this=this, 626 expression=path, 627 ), 628 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 629 exp.JSONBExtractScalar, 630 this=this, 631 expression=path, 632 ), 633 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 634 exp.JSONBContains, 635 this=this, 636 expression=key, 637 ), 638 } 639 640 EXPRESSION_PARSERS = { 641 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 642 exp.Column: lambda self: self._parse_column(), 643 exp.Condition: lambda self: self._parse_conjunction(), 644 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 645 exp.Expression: lambda self: self._parse_expression(), 646 exp.From: lambda self: self._parse_from(joins=True), 647 exp.Group: lambda self: self._parse_group(), 648 exp.Having: lambda self: self._parse_having(), 649 exp.Identifier: lambda self: self._parse_id_var(), 650 exp.Join: lambda self: self._parse_join(), 651 exp.Lambda: lambda self: self._parse_lambda(), 652 exp.Lateral: lambda self: self._parse_lateral(), 653 exp.Limit: lambda self: self._parse_limit(), 654 exp.Offset: lambda self: self._parse_offset(), 655 exp.Order: lambda self: self._parse_order(), 656 exp.Ordered: lambda self: self._parse_ordered(), 657 exp.Properties: lambda self: self._parse_properties(), 658 exp.Qualify: lambda self: self._parse_qualify(), 659 exp.Returning: lambda self: self._parse_returning(), 660 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 661 exp.Table: lambda self: self._parse_table_parts(), 662 exp.TableAlias: lambda self: self._parse_table_alias(), 663 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 664 exp.Where: lambda self: self._parse_where(), 665 exp.Window: lambda self: self._parse_named_window(), 666 exp.With: lambda self: self._parse_with(), 667 "JOIN_TYPE": lambda self: self._parse_join_parts(), 668 } 669 670 STATEMENT_PARSERS = { 671 TokenType.ALTER: lambda self: self._parse_alter(), 672 TokenType.BEGIN: lambda self: self._parse_transaction(), 673 TokenType.CACHE: lambda self: self._parse_cache(), 674 TokenType.COMMENT: lambda self: self._parse_comment(), 675 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 676 TokenType.COPY: lambda self: self._parse_copy(), 677 TokenType.CREATE: lambda self: self._parse_create(), 678 TokenType.DELETE: lambda self: self._parse_delete(), 679 TokenType.DESC: lambda self: self._parse_describe(), 680 TokenType.DESCRIBE: lambda self: self._parse_describe(), 681 TokenType.DROP: lambda self: self._parse_drop(), 682 TokenType.INSERT: lambda self: self._parse_insert(), 683 TokenType.KILL: lambda self: self._parse_kill(), 684 TokenType.LOAD: lambda self: self._parse_load(), 685 TokenType.MERGE: lambda self: self._parse_merge(), 686 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 687 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 688 TokenType.REFRESH: lambda self: self._parse_refresh(), 689 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 690 TokenType.SET: lambda self: self._parse_set(), 691 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 692 TokenType.UNCACHE: lambda self: self._parse_uncache(), 693 TokenType.UPDATE: lambda self: self._parse_update(), 694 TokenType.USE: lambda self: self.expression( 695 exp.Use, 696 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 697 this=self._parse_table(schema=False), 698 ), 699 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 700 } 701 702 UNARY_PARSERS = { 703 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 704 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 705 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 706 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 707 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 708 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 709 } 710 711 STRING_PARSERS = { 712 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 713 exp.RawString, this=token.text 714 ), 715 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 716 exp.National, this=token.text 717 ), 718 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 719 TokenType.STRING: lambda self, token: self.expression( 720 exp.Literal, this=token.text, is_string=True 721 ), 722 TokenType.UNICODE_STRING: lambda self, token: self.expression( 723 exp.UnicodeString, 724 this=token.text, 725 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 726 ), 727 } 728 729 NUMERIC_PARSERS = { 730 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 731 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 732 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 733 TokenType.NUMBER: lambda self, token: self.expression( 734 exp.Literal, this=token.text, is_string=False 735 ), 736 } 737 738 PRIMARY_PARSERS = { 739 **STRING_PARSERS, 740 **NUMERIC_PARSERS, 741 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 742 TokenType.NULL: lambda self, _: self.expression(exp.Null), 743 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 744 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 745 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 746 TokenType.STAR: lambda self, _: self.expression( 747 exp.Star, 748 **{ 749 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 750 "replace": self._parse_star_op("REPLACE"), 751 "rename": self._parse_star_op("RENAME"), 752 }, 753 ), 754 } 755 756 PLACEHOLDER_PARSERS = { 757 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 758 TokenType.PARAMETER: lambda self: self._parse_parameter(), 759 TokenType.COLON: lambda self: ( 760 self.expression(exp.Placeholder, this=self._prev.text) 761 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 762 else None 763 ), 764 } 765 766 RANGE_PARSERS = { 767 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 768 TokenType.GLOB: binary_range_parser(exp.Glob), 769 TokenType.ILIKE: binary_range_parser(exp.ILike), 770 TokenType.IN: lambda self, this: self._parse_in(this), 771 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 772 TokenType.IS: lambda self, this: self._parse_is(this), 773 TokenType.LIKE: binary_range_parser(exp.Like), 774 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 775 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 776 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 777 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 778 } 779 780 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 781 "ALLOWED_VALUES": lambda self: self.expression( 782 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 783 ), 784 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 785 "AUTO": lambda self: self._parse_auto_property(), 786 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 787 "BACKUP": lambda self: self.expression( 788 exp.BackupProperty, this=self._parse_var(any_token=True) 789 ), 790 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 791 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 792 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 793 "CHECKSUM": lambda self: self._parse_checksum(), 794 "CLUSTER BY": lambda self: self._parse_cluster(), 795 "CLUSTERED": lambda self: self._parse_clustered_by(), 796 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 797 exp.CollateProperty, **kwargs 798 ), 799 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 800 "CONTAINS": lambda self: self._parse_contains_property(), 801 "COPY": lambda self: self._parse_copy_property(), 802 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 803 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 804 "DEFINER": lambda self: self._parse_definer(), 805 "DETERMINISTIC": lambda self: self.expression( 806 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 807 ), 808 "DISTKEY": lambda self: self._parse_distkey(), 809 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 810 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 811 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 812 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 813 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 814 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 815 "FREESPACE": lambda self: self._parse_freespace(), 816 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 817 "HEAP": lambda self: self.expression(exp.HeapProperty), 818 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 819 "IMMUTABLE": lambda self: self.expression( 820 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 821 ), 822 "INHERITS": lambda self: self.expression( 823 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 824 ), 825 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 826 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 827 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 828 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 829 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 830 "LIKE": lambda self: self._parse_create_like(), 831 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 832 "LOCK": lambda self: self._parse_locking(), 833 "LOCKING": lambda self: self._parse_locking(), 834 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 835 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 836 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 837 "MODIFIES": lambda self: self._parse_modifies_property(), 838 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 839 "NO": lambda self: self._parse_no_property(), 840 "ON": lambda self: self._parse_on_property(), 841 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 842 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 843 "PARTITION": lambda self: self._parse_partitioned_of(), 844 "PARTITION BY": lambda self: self._parse_partitioned_by(), 845 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 846 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 847 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 848 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 849 "READS": lambda self: self._parse_reads_property(), 850 "REMOTE": lambda self: self._parse_remote_with_connection(), 851 "RETURNS": lambda self: self._parse_returns(), 852 "STRICT": lambda self: self.expression(exp.StrictProperty), 853 "ROW": lambda self: self._parse_row(), 854 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 855 "SAMPLE": lambda self: self.expression( 856 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 857 ), 858 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 859 "SETTINGS": lambda self: self.expression( 860 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 861 ), 862 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 863 "SORTKEY": lambda self: self._parse_sortkey(), 864 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 865 "STABLE": lambda self: self.expression( 866 exp.StabilityProperty, this=exp.Literal.string("STABLE") 867 ), 868 "STORED": lambda self: self._parse_stored(), 869 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 870 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 871 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 872 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 873 "TO": lambda self: self._parse_to_table(), 874 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 875 "TRANSFORM": lambda self: self.expression( 876 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 877 ), 878 "TTL": lambda self: self._parse_ttl(), 879 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 880 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 881 "VOLATILE": lambda self: self._parse_volatile_property(), 882 "WITH": lambda self: self._parse_with_property(), 883 } 884 885 CONSTRAINT_PARSERS = { 886 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 887 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 888 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 889 "CHARACTER SET": lambda self: self.expression( 890 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 891 ), 892 "CHECK": lambda self: self.expression( 893 exp.CheckColumnConstraint, 894 this=self._parse_wrapped(self._parse_conjunction), 895 enforced=self._match_text_seq("ENFORCED"), 896 ), 897 "COLLATE": lambda self: self.expression( 898 exp.CollateColumnConstraint, this=self._parse_var() 899 ), 900 "COMMENT": lambda self: self.expression( 901 exp.CommentColumnConstraint, this=self._parse_string() 902 ), 903 "COMPRESS": lambda self: self._parse_compress(), 904 "CLUSTERED": lambda self: self.expression( 905 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 906 ), 907 "NONCLUSTERED": lambda self: self.expression( 908 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 909 ), 910 "DEFAULT": lambda self: self.expression( 911 exp.DefaultColumnConstraint, this=self._parse_bitwise() 912 ), 913 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 914 "EPHEMERAL": lambda self: self.expression( 915 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 916 ), 917 "EXCLUDE": lambda self: self.expression( 918 exp.ExcludeColumnConstraint, this=self._parse_index_params() 919 ), 920 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 921 "FORMAT": lambda self: self.expression( 922 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 923 ), 924 "GENERATED": lambda self: self._parse_generated_as_identity(), 925 "IDENTITY": lambda self: self._parse_auto_increment(), 926 "INLINE": lambda self: self._parse_inline(), 927 "LIKE": lambda self: self._parse_create_like(), 928 "NOT": lambda self: self._parse_not_constraint(), 929 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 930 "ON": lambda self: ( 931 self._match(TokenType.UPDATE) 932 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 933 ) 934 or self.expression(exp.OnProperty, this=self._parse_id_var()), 935 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 936 "PERIOD": lambda self: self._parse_period_for_system_time(), 937 "PRIMARY KEY": lambda self: self._parse_primary_key(), 938 "REFERENCES": lambda self: self._parse_references(match=False), 939 "TITLE": lambda self: self.expression( 940 exp.TitleColumnConstraint, this=self._parse_var_or_string() 941 ), 942 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 943 "UNIQUE": lambda self: self._parse_unique(), 944 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 945 "WITH": lambda self: self.expression( 946 exp.Properties, expressions=self._parse_wrapped_properties() 947 ), 948 } 949 950 ALTER_PARSERS = { 951 "ADD": lambda self: self._parse_alter_table_add(), 952 "ALTER": lambda self: self._parse_alter_table_alter(), 953 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 954 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 955 "DROP": lambda self: self._parse_alter_table_drop(), 956 "RENAME": lambda self: self._parse_alter_table_rename(), 957 "SET": lambda self: self._parse_alter_table_set(), 958 } 959 960 ALTER_ALTER_PARSERS = { 961 "DISTKEY": lambda self: self._parse_alter_diststyle(), 962 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 963 "SORTKEY": lambda self: self._parse_alter_sortkey(), 964 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 965 } 966 967 SCHEMA_UNNAMED_CONSTRAINTS = { 968 "CHECK", 969 "EXCLUDE", 970 "FOREIGN KEY", 971 "LIKE", 972 "PERIOD", 973 "PRIMARY KEY", 974 "UNIQUE", 975 } 976 977 NO_PAREN_FUNCTION_PARSERS = { 978 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 979 "CASE": lambda self: self._parse_case(), 980 "IF": lambda self: self._parse_if(), 981 "NEXT": lambda self: self._parse_next_value_for(), 982 } 983 984 INVALID_FUNC_NAME_TOKENS = { 985 TokenType.IDENTIFIER, 986 TokenType.STRING, 987 } 988 989 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 990 991 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 992 993 FUNCTION_PARSERS = { 994 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 995 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 996 "DECODE": lambda self: self._parse_decode(), 997 "EXTRACT": lambda self: self._parse_extract(), 998 "JSON_OBJECT": lambda self: self._parse_json_object(), 999 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1000 "JSON_TABLE": lambda self: self._parse_json_table(), 1001 "MATCH": lambda self: self._parse_match_against(), 1002 "OPENJSON": lambda self: self._parse_open_json(), 1003 "POSITION": lambda self: self._parse_position(), 1004 "PREDICT": lambda self: self._parse_predict(), 1005 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1006 "STRING_AGG": lambda self: self._parse_string_agg(), 1007 "SUBSTRING": lambda self: self._parse_substring(), 1008 "TRIM": lambda self: self._parse_trim(), 1009 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1010 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1011 } 1012 1013 QUERY_MODIFIER_PARSERS = { 1014 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1015 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1016 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1017 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1018 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1019 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1020 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1021 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1022 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1023 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1024 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1025 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1026 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1027 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1028 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1029 TokenType.CLUSTER_BY: lambda self: ( 1030 "cluster", 1031 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1032 ), 1033 TokenType.DISTRIBUTE_BY: lambda self: ( 1034 "distribute", 1035 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1036 ), 1037 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1038 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1039 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1040 } 1041 1042 SET_PARSERS = { 1043 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1044 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1045 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1046 "TRANSACTION": lambda self: self._parse_set_transaction(), 1047 } 1048 1049 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1050 1051 TYPE_LITERAL_PARSERS = { 1052 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1053 } 1054 1055 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1056 1057 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1058 1059 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1060 1061 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1062 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1063 "ISOLATION": ( 1064 ("LEVEL", "REPEATABLE", "READ"), 1065 ("LEVEL", "READ", "COMMITTED"), 1066 ("LEVEL", "READ", "UNCOMITTED"), 1067 ("LEVEL", "SERIALIZABLE"), 1068 ), 1069 "READ": ("WRITE", "ONLY"), 1070 } 1071 1072 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1073 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1074 ) 1075 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1076 1077 CREATE_SEQUENCE: OPTIONS_TYPE = { 1078 "SCALE": ("EXTEND", "NOEXTEND"), 1079 "SHARD": ("EXTEND", "NOEXTEND"), 1080 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1081 **dict.fromkeys( 1082 ( 1083 "SESSION", 1084 "GLOBAL", 1085 "KEEP", 1086 "NOKEEP", 1087 "ORDER", 1088 "NOORDER", 1089 "NOCACHE", 1090 "CYCLE", 1091 "NOCYCLE", 1092 "NOMINVALUE", 1093 "NOMAXVALUE", 1094 "NOSCALE", 1095 "NOSHARD", 1096 ), 1097 tuple(), 1098 ), 1099 } 1100 1101 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1102 1103 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1104 1105 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1106 1107 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1108 1109 CLONE_KEYWORDS = {"CLONE", "COPY"} 1110 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1111 1112 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1113 1114 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1115 1116 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1117 1118 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1119 1120 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1121 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1122 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1123 1124 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1125 1126 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1127 1128 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1129 1130 DISTINCT_TOKENS = {TokenType.DISTINCT} 1131 1132 NULL_TOKENS = {TokenType.NULL} 1133 1134 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1135 1136 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1137 1138 STRICT_CAST = True 1139 1140 PREFIXED_PIVOT_COLUMNS = False 1141 IDENTIFY_PIVOT_STRINGS = False 1142 1143 LOG_DEFAULTS_TO_LN = False 1144 1145 # Whether ADD is present for each column added by ALTER TABLE 1146 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1147 1148 # Whether the table sample clause expects CSV syntax 1149 TABLESAMPLE_CSV = False 1150 1151 # The default method used for table sampling 1152 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1153 1154 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1155 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1156 1157 # Whether the TRIM function expects the characters to trim as its first argument 1158 TRIM_PATTERN_FIRST = False 1159 1160 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1161 STRING_ALIASES = False 1162 1163 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1164 MODIFIERS_ATTACHED_TO_UNION = True 1165 UNION_MODIFIERS = {"order", "limit", "offset"} 1166 1167 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1168 NO_PAREN_IF_COMMANDS = True 1169 1170 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1171 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1172 1173 # Whether the `:` operator is used to extract a value from a JSON document 1174 COLON_IS_JSON_EXTRACT = False 1175 1176 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1177 # If this is True and '(' is not found, the keyword will be treated as an identifier 1178 VALUES_FOLLOWED_BY_PAREN = True 1179 1180 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1181 SUPPORTS_IMPLICIT_UNNEST = False 1182 1183 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1184 INTERVAL_SPANS = True 1185 1186 # Whether a PARTITION clause can follow a table reference 1187 SUPPORTS_PARTITION_SELECTION = False 1188 1189 __slots__ = ( 1190 "error_level", 1191 "error_message_context", 1192 "max_errors", 1193 "dialect", 1194 "sql", 1195 "errors", 1196 "_tokens", 1197 "_index", 1198 "_curr", 1199 "_next", 1200 "_prev", 1201 "_prev_comments", 1202 ) 1203 1204 # Autofilled 1205 SHOW_TRIE: t.Dict = {} 1206 SET_TRIE: t.Dict = {} 1207 1208 def __init__( 1209 self, 1210 error_level: t.Optional[ErrorLevel] = None, 1211 error_message_context: int = 100, 1212 max_errors: int = 3, 1213 dialect: DialectType = None, 1214 ): 1215 from sqlglot.dialects import Dialect 1216 1217 self.error_level = error_level or ErrorLevel.IMMEDIATE 1218 self.error_message_context = error_message_context 1219 self.max_errors = max_errors 1220 self.dialect = Dialect.get_or_raise(dialect) 1221 self.reset() 1222 1223 def reset(self): 1224 self.sql = "" 1225 self.errors = [] 1226 self._tokens = [] 1227 self._index = 0 1228 self._curr = None 1229 self._next = None 1230 self._prev = None 1231 self._prev_comments = None 1232 1233 def parse( 1234 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1235 ) -> t.List[t.Optional[exp.Expression]]: 1236 """ 1237 Parses a list of tokens and returns a list of syntax trees, one tree 1238 per parsed SQL statement. 1239 1240 Args: 1241 raw_tokens: The list of tokens. 1242 sql: The original SQL string, used to produce helpful debug messages. 1243 1244 Returns: 1245 The list of the produced syntax trees. 1246 """ 1247 return self._parse( 1248 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1249 ) 1250 1251 def parse_into( 1252 self, 1253 expression_types: exp.IntoType, 1254 raw_tokens: t.List[Token], 1255 sql: t.Optional[str] = None, 1256 ) -> t.List[t.Optional[exp.Expression]]: 1257 """ 1258 Parses a list of tokens into a given Expression type. If a collection of Expression 1259 types is given instead, this method will try to parse the token list into each one 1260 of them, stopping at the first for which the parsing succeeds. 1261 1262 Args: 1263 expression_types: The expression type(s) to try and parse the token list into. 1264 raw_tokens: The list of tokens. 1265 sql: The original SQL string, used to produce helpful debug messages. 1266 1267 Returns: 1268 The target Expression. 1269 """ 1270 errors = [] 1271 for expression_type in ensure_list(expression_types): 1272 parser = self.EXPRESSION_PARSERS.get(expression_type) 1273 if not parser: 1274 raise TypeError(f"No parser registered for {expression_type}") 1275 1276 try: 1277 return self._parse(parser, raw_tokens, sql) 1278 except ParseError as e: 1279 e.errors[0]["into_expression"] = expression_type 1280 errors.append(e) 1281 1282 raise ParseError( 1283 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1284 errors=merge_errors(errors), 1285 ) from errors[-1] 1286 1287 def _parse( 1288 self, 1289 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1290 raw_tokens: t.List[Token], 1291 sql: t.Optional[str] = None, 1292 ) -> t.List[t.Optional[exp.Expression]]: 1293 self.reset() 1294 self.sql = sql or "" 1295 1296 total = len(raw_tokens) 1297 chunks: t.List[t.List[Token]] = [[]] 1298 1299 for i, token in enumerate(raw_tokens): 1300 if token.token_type == TokenType.SEMICOLON: 1301 if token.comments: 1302 chunks.append([token]) 1303 1304 if i < total - 1: 1305 chunks.append([]) 1306 else: 1307 chunks[-1].append(token) 1308 1309 expressions = [] 1310 1311 for tokens in chunks: 1312 self._index = -1 1313 self._tokens = tokens 1314 self._advance() 1315 1316 expressions.append(parse_method(self)) 1317 1318 if self._index < len(self._tokens): 1319 self.raise_error("Invalid expression / Unexpected token") 1320 1321 self.check_errors() 1322 1323 return expressions 1324 1325 def check_errors(self) -> None: 1326 """Logs or raises any found errors, depending on the chosen error level setting.""" 1327 if self.error_level == ErrorLevel.WARN: 1328 for error in self.errors: 1329 logger.error(str(error)) 1330 elif self.error_level == ErrorLevel.RAISE and self.errors: 1331 raise ParseError( 1332 concat_messages(self.errors, self.max_errors), 1333 errors=merge_errors(self.errors), 1334 ) 1335 1336 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1337 """ 1338 Appends an error in the list of recorded errors or raises it, depending on the chosen 1339 error level setting. 1340 """ 1341 token = token or self._curr or self._prev or Token.string("") 1342 start = token.start 1343 end = token.end + 1 1344 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1345 highlight = self.sql[start:end] 1346 end_context = self.sql[end : end + self.error_message_context] 1347 1348 error = ParseError.new( 1349 f"{message}. Line {token.line}, Col: {token.col}.\n" 1350 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1351 description=message, 1352 line=token.line, 1353 col=token.col, 1354 start_context=start_context, 1355 highlight=highlight, 1356 end_context=end_context, 1357 ) 1358 1359 if self.error_level == ErrorLevel.IMMEDIATE: 1360 raise error 1361 1362 self.errors.append(error) 1363 1364 def expression( 1365 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1366 ) -> E: 1367 """ 1368 Creates a new, validated Expression. 1369 1370 Args: 1371 exp_class: The expression class to instantiate. 1372 comments: An optional list of comments to attach to the expression. 1373 kwargs: The arguments to set for the expression along with their respective values. 1374 1375 Returns: 1376 The target expression. 1377 """ 1378 instance = exp_class(**kwargs) 1379 instance.add_comments(comments) if comments else self._add_comments(instance) 1380 return self.validate_expression(instance) 1381 1382 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1383 if expression and self._prev_comments: 1384 expression.add_comments(self._prev_comments) 1385 self._prev_comments = None 1386 1387 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1388 """ 1389 Validates an Expression, making sure that all its mandatory arguments are set. 1390 1391 Args: 1392 expression: The expression to validate. 1393 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1394 1395 Returns: 1396 The validated expression. 1397 """ 1398 if self.error_level != ErrorLevel.IGNORE: 1399 for error_message in expression.error_messages(args): 1400 self.raise_error(error_message) 1401 1402 return expression 1403 1404 def _find_sql(self, start: Token, end: Token) -> str: 1405 return self.sql[start.start : end.end + 1] 1406 1407 def _is_connected(self) -> bool: 1408 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1409 1410 def _advance(self, times: int = 1) -> None: 1411 self._index += times 1412 self._curr = seq_get(self._tokens, self._index) 1413 self._next = seq_get(self._tokens, self._index + 1) 1414 1415 if self._index > 0: 1416 self._prev = self._tokens[self._index - 1] 1417 self._prev_comments = self._prev.comments 1418 else: 1419 self._prev = None 1420 self._prev_comments = None 1421 1422 def _retreat(self, index: int) -> None: 1423 if index != self._index: 1424 self._advance(index - self._index) 1425 1426 def _warn_unsupported(self) -> None: 1427 if len(self._tokens) <= 1: 1428 return 1429 1430 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1431 # interested in emitting a warning for the one being currently processed. 1432 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1433 1434 logger.warning( 1435 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1436 ) 1437 1438 def _parse_command(self) -> exp.Command: 1439 self._warn_unsupported() 1440 return self.expression( 1441 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1442 ) 1443 1444 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1445 """ 1446 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1447 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1448 the parser state accordingly 1449 """ 1450 index = self._index 1451 error_level = self.error_level 1452 1453 self.error_level = ErrorLevel.IMMEDIATE 1454 try: 1455 this = parse_method() 1456 except ParseError: 1457 this = None 1458 finally: 1459 if not this or retreat: 1460 self._retreat(index) 1461 self.error_level = error_level 1462 1463 return this 1464 1465 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1466 start = self._prev 1467 exists = self._parse_exists() if allow_exists else None 1468 1469 self._match(TokenType.ON) 1470 1471 materialized = self._match_text_seq("MATERIALIZED") 1472 kind = self._match_set(self.CREATABLES) and self._prev 1473 if not kind: 1474 return self._parse_as_command(start) 1475 1476 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1477 this = self._parse_user_defined_function(kind=kind.token_type) 1478 elif kind.token_type == TokenType.TABLE: 1479 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1480 elif kind.token_type == TokenType.COLUMN: 1481 this = self._parse_column() 1482 else: 1483 this = self._parse_id_var() 1484 1485 self._match(TokenType.IS) 1486 1487 return self.expression( 1488 exp.Comment, 1489 this=this, 1490 kind=kind.text, 1491 expression=self._parse_string(), 1492 exists=exists, 1493 materialized=materialized, 1494 ) 1495 1496 def _parse_to_table( 1497 self, 1498 ) -> exp.ToTableProperty: 1499 table = self._parse_table_parts(schema=True) 1500 return self.expression(exp.ToTableProperty, this=table) 1501 1502 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1503 def _parse_ttl(self) -> exp.Expression: 1504 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1505 this = self._parse_bitwise() 1506 1507 if self._match_text_seq("DELETE"): 1508 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1509 if self._match_text_seq("RECOMPRESS"): 1510 return self.expression( 1511 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1512 ) 1513 if self._match_text_seq("TO", "DISK"): 1514 return self.expression( 1515 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1516 ) 1517 if self._match_text_seq("TO", "VOLUME"): 1518 return self.expression( 1519 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1520 ) 1521 1522 return this 1523 1524 expressions = self._parse_csv(_parse_ttl_action) 1525 where = self._parse_where() 1526 group = self._parse_group() 1527 1528 aggregates = None 1529 if group and self._match(TokenType.SET): 1530 aggregates = self._parse_csv(self._parse_set_item) 1531 1532 return self.expression( 1533 exp.MergeTreeTTL, 1534 expressions=expressions, 1535 where=where, 1536 group=group, 1537 aggregates=aggregates, 1538 ) 1539 1540 def _parse_statement(self) -> t.Optional[exp.Expression]: 1541 if self._curr is None: 1542 return None 1543 1544 if self._match_set(self.STATEMENT_PARSERS): 1545 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1546 1547 if self._match_set(self.dialect.tokenizer.COMMANDS): 1548 return self._parse_command() 1549 1550 expression = self._parse_expression() 1551 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1552 return self._parse_query_modifiers(expression) 1553 1554 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1555 start = self._prev 1556 temporary = self._match(TokenType.TEMPORARY) 1557 materialized = self._match_text_seq("MATERIALIZED") 1558 1559 kind = self._match_set(self.CREATABLES) and self._prev.text 1560 if not kind: 1561 return self._parse_as_command(start) 1562 1563 if_exists = exists or self._parse_exists() 1564 table = self._parse_table_parts( 1565 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1566 ) 1567 1568 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1569 1570 if self._match(TokenType.L_PAREN, advance=False): 1571 expressions = self._parse_wrapped_csv(self._parse_types) 1572 else: 1573 expressions = None 1574 1575 return self.expression( 1576 exp.Drop, 1577 comments=start.comments, 1578 exists=if_exists, 1579 this=table, 1580 expressions=expressions, 1581 kind=kind.upper(), 1582 temporary=temporary, 1583 materialized=materialized, 1584 cascade=self._match_text_seq("CASCADE"), 1585 constraints=self._match_text_seq("CONSTRAINTS"), 1586 purge=self._match_text_seq("PURGE"), 1587 cluster=cluster, 1588 ) 1589 1590 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1591 return ( 1592 self._match_text_seq("IF") 1593 and (not not_ or self._match(TokenType.NOT)) 1594 and self._match(TokenType.EXISTS) 1595 ) 1596 1597 def _parse_create(self) -> exp.Create | exp.Command: 1598 # Note: this can't be None because we've matched a statement parser 1599 start = self._prev 1600 comments = self._prev_comments 1601 1602 replace = ( 1603 start.token_type == TokenType.REPLACE 1604 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1605 or self._match_pair(TokenType.OR, TokenType.ALTER) 1606 ) 1607 1608 unique = self._match(TokenType.UNIQUE) 1609 1610 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1611 self._advance() 1612 1613 properties = None 1614 create_token = self._match_set(self.CREATABLES) and self._prev 1615 1616 if not create_token: 1617 # exp.Properties.Location.POST_CREATE 1618 properties = self._parse_properties() 1619 create_token = self._match_set(self.CREATABLES) and self._prev 1620 1621 if not properties or not create_token: 1622 return self._parse_as_command(start) 1623 1624 exists = self._parse_exists(not_=True) 1625 this = None 1626 expression: t.Optional[exp.Expression] = None 1627 indexes = None 1628 no_schema_binding = None 1629 begin = None 1630 end = None 1631 clone = None 1632 1633 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1634 nonlocal properties 1635 if properties and temp_props: 1636 properties.expressions.extend(temp_props.expressions) 1637 elif temp_props: 1638 properties = temp_props 1639 1640 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1641 this = self._parse_user_defined_function(kind=create_token.token_type) 1642 1643 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1644 extend_props(self._parse_properties()) 1645 1646 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1647 extend_props(self._parse_properties()) 1648 1649 if not expression: 1650 if self._match(TokenType.COMMAND): 1651 expression = self._parse_as_command(self._prev) 1652 else: 1653 begin = self._match(TokenType.BEGIN) 1654 return_ = self._match_text_seq("RETURN") 1655 1656 if self._match(TokenType.STRING, advance=False): 1657 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1658 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1659 expression = self._parse_string() 1660 extend_props(self._parse_properties()) 1661 else: 1662 expression = self._parse_statement() 1663 1664 end = self._match_text_seq("END") 1665 1666 if return_: 1667 expression = self.expression(exp.Return, this=expression) 1668 elif create_token.token_type == TokenType.INDEX: 1669 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1670 if not self._match(TokenType.ON): 1671 index = self._parse_id_var() 1672 anonymous = False 1673 else: 1674 index = None 1675 anonymous = True 1676 1677 this = self._parse_index(index=index, anonymous=anonymous) 1678 elif create_token.token_type in self.DB_CREATABLES: 1679 table_parts = self._parse_table_parts( 1680 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1681 ) 1682 1683 # exp.Properties.Location.POST_NAME 1684 self._match(TokenType.COMMA) 1685 extend_props(self._parse_properties(before=True)) 1686 1687 this = self._parse_schema(this=table_parts) 1688 1689 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1690 extend_props(self._parse_properties()) 1691 1692 self._match(TokenType.ALIAS) 1693 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1694 # exp.Properties.Location.POST_ALIAS 1695 extend_props(self._parse_properties()) 1696 1697 if create_token.token_type == TokenType.SEQUENCE: 1698 expression = self._parse_types() 1699 extend_props(self._parse_properties()) 1700 else: 1701 expression = self._parse_ddl_select() 1702 1703 if create_token.token_type == TokenType.TABLE: 1704 # exp.Properties.Location.POST_EXPRESSION 1705 extend_props(self._parse_properties()) 1706 1707 indexes = [] 1708 while True: 1709 index = self._parse_index() 1710 1711 # exp.Properties.Location.POST_INDEX 1712 extend_props(self._parse_properties()) 1713 1714 if not index: 1715 break 1716 else: 1717 self._match(TokenType.COMMA) 1718 indexes.append(index) 1719 elif create_token.token_type == TokenType.VIEW: 1720 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1721 no_schema_binding = True 1722 1723 shallow = self._match_text_seq("SHALLOW") 1724 1725 if self._match_texts(self.CLONE_KEYWORDS): 1726 copy = self._prev.text.lower() == "copy" 1727 clone = self.expression( 1728 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1729 ) 1730 1731 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1732 return self._parse_as_command(start) 1733 1734 return self.expression( 1735 exp.Create, 1736 comments=comments, 1737 this=this, 1738 kind=create_token.text.upper(), 1739 replace=replace, 1740 unique=unique, 1741 expression=expression, 1742 exists=exists, 1743 properties=properties, 1744 indexes=indexes, 1745 no_schema_binding=no_schema_binding, 1746 begin=begin, 1747 end=end, 1748 clone=clone, 1749 ) 1750 1751 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1752 seq = exp.SequenceProperties() 1753 1754 options = [] 1755 index = self._index 1756 1757 while self._curr: 1758 self._match(TokenType.COMMA) 1759 if self._match_text_seq("INCREMENT"): 1760 self._match_text_seq("BY") 1761 self._match_text_seq("=") 1762 seq.set("increment", self._parse_term()) 1763 elif self._match_text_seq("MINVALUE"): 1764 seq.set("minvalue", self._parse_term()) 1765 elif self._match_text_seq("MAXVALUE"): 1766 seq.set("maxvalue", self._parse_term()) 1767 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1768 self._match_text_seq("=") 1769 seq.set("start", self._parse_term()) 1770 elif self._match_text_seq("CACHE"): 1771 # T-SQL allows empty CACHE which is initialized dynamically 1772 seq.set("cache", self._parse_number() or True) 1773 elif self._match_text_seq("OWNED", "BY"): 1774 # "OWNED BY NONE" is the default 1775 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1776 else: 1777 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1778 if opt: 1779 options.append(opt) 1780 else: 1781 break 1782 1783 seq.set("options", options if options else None) 1784 return None if self._index == index else seq 1785 1786 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1787 # only used for teradata currently 1788 self._match(TokenType.COMMA) 1789 1790 kwargs = { 1791 "no": self._match_text_seq("NO"), 1792 "dual": self._match_text_seq("DUAL"), 1793 "before": self._match_text_seq("BEFORE"), 1794 "default": self._match_text_seq("DEFAULT"), 1795 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1796 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1797 "after": self._match_text_seq("AFTER"), 1798 "minimum": self._match_texts(("MIN", "MINIMUM")), 1799 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1800 } 1801 1802 if self._match_texts(self.PROPERTY_PARSERS): 1803 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1804 try: 1805 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1806 except TypeError: 1807 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1808 1809 return None 1810 1811 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1812 return self._parse_wrapped_csv(self._parse_property) 1813 1814 def _parse_property(self) -> t.Optional[exp.Expression]: 1815 if self._match_texts(self.PROPERTY_PARSERS): 1816 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1817 1818 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1819 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1820 1821 if self._match_text_seq("COMPOUND", "SORTKEY"): 1822 return self._parse_sortkey(compound=True) 1823 1824 if self._match_text_seq("SQL", "SECURITY"): 1825 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1826 1827 index = self._index 1828 key = self._parse_column() 1829 1830 if not self._match(TokenType.EQ): 1831 self._retreat(index) 1832 return self._parse_sequence_properties() 1833 1834 return self.expression( 1835 exp.Property, 1836 this=key.to_dot() if isinstance(key, exp.Column) else key, 1837 value=self._parse_bitwise() or self._parse_var(any_token=True), 1838 ) 1839 1840 def _parse_stored(self) -> exp.FileFormatProperty: 1841 self._match(TokenType.ALIAS) 1842 1843 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1844 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1845 1846 return self.expression( 1847 exp.FileFormatProperty, 1848 this=( 1849 self.expression( 1850 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1851 ) 1852 if input_format or output_format 1853 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1854 ), 1855 ) 1856 1857 def _parse_unquoted_field(self): 1858 field = self._parse_field() 1859 if isinstance(field, exp.Identifier) and not field.quoted: 1860 field = exp.var(field) 1861 1862 return field 1863 1864 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1865 self._match(TokenType.EQ) 1866 self._match(TokenType.ALIAS) 1867 1868 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1869 1870 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1871 properties = [] 1872 while True: 1873 if before: 1874 prop = self._parse_property_before() 1875 else: 1876 prop = self._parse_property() 1877 if not prop: 1878 break 1879 for p in ensure_list(prop): 1880 properties.append(p) 1881 1882 if properties: 1883 return self.expression(exp.Properties, expressions=properties) 1884 1885 return None 1886 1887 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1888 return self.expression( 1889 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1890 ) 1891 1892 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1893 if self._index >= 2: 1894 pre_volatile_token = self._tokens[self._index - 2] 1895 else: 1896 pre_volatile_token = None 1897 1898 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1899 return exp.VolatileProperty() 1900 1901 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1902 1903 def _parse_retention_period(self) -> exp.Var: 1904 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1905 number = self._parse_number() 1906 number_str = f"{number} " if number else "" 1907 unit = self._parse_var(any_token=True) 1908 return exp.var(f"{number_str}{unit}") 1909 1910 def _parse_system_versioning_property( 1911 self, with_: bool = False 1912 ) -> exp.WithSystemVersioningProperty: 1913 self._match(TokenType.EQ) 1914 prop = self.expression( 1915 exp.WithSystemVersioningProperty, 1916 **{ # type: ignore 1917 "on": True, 1918 "with": with_, 1919 }, 1920 ) 1921 1922 if self._match_text_seq("OFF"): 1923 prop.set("on", False) 1924 return prop 1925 1926 self._match(TokenType.ON) 1927 if self._match(TokenType.L_PAREN): 1928 while self._curr and not self._match(TokenType.R_PAREN): 1929 if self._match_text_seq("HISTORY_TABLE", "="): 1930 prop.set("this", self._parse_table_parts()) 1931 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1932 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1933 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1934 prop.set("retention_period", self._parse_retention_period()) 1935 1936 self._match(TokenType.COMMA) 1937 1938 return prop 1939 1940 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1941 self._match(TokenType.EQ) 1942 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1943 prop = self.expression(exp.DataDeletionProperty, on=on) 1944 1945 if self._match(TokenType.L_PAREN): 1946 while self._curr and not self._match(TokenType.R_PAREN): 1947 if self._match_text_seq("FILTER_COLUMN", "="): 1948 prop.set("filter_column", self._parse_column()) 1949 elif self._match_text_seq("RETENTION_PERIOD", "="): 1950 prop.set("retention_period", self._parse_retention_period()) 1951 1952 self._match(TokenType.COMMA) 1953 1954 return prop 1955 1956 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1957 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1958 prop = self._parse_system_versioning_property(with_=True) 1959 self._match_r_paren() 1960 return prop 1961 1962 if self._match(TokenType.L_PAREN, advance=False): 1963 return self._parse_wrapped_properties() 1964 1965 if self._match_text_seq("JOURNAL"): 1966 return self._parse_withjournaltable() 1967 1968 if self._match_texts(self.VIEW_ATTRIBUTES): 1969 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1970 1971 if self._match_text_seq("DATA"): 1972 return self._parse_withdata(no=False) 1973 elif self._match_text_seq("NO", "DATA"): 1974 return self._parse_withdata(no=True) 1975 1976 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1977 return self._parse_serde_properties(with_=True) 1978 1979 if not self._next: 1980 return None 1981 1982 return self._parse_withisolatedloading() 1983 1984 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1985 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1986 self._match(TokenType.EQ) 1987 1988 user = self._parse_id_var() 1989 self._match(TokenType.PARAMETER) 1990 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1991 1992 if not user or not host: 1993 return None 1994 1995 return exp.DefinerProperty(this=f"{user}@{host}") 1996 1997 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1998 self._match(TokenType.TABLE) 1999 self._match(TokenType.EQ) 2000 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2001 2002 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2003 return self.expression(exp.LogProperty, no=no) 2004 2005 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2006 return self.expression(exp.JournalProperty, **kwargs) 2007 2008 def _parse_checksum(self) -> exp.ChecksumProperty: 2009 self._match(TokenType.EQ) 2010 2011 on = None 2012 if self._match(TokenType.ON): 2013 on = True 2014 elif self._match_text_seq("OFF"): 2015 on = False 2016 2017 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2018 2019 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2020 return self.expression( 2021 exp.Cluster, 2022 expressions=( 2023 self._parse_wrapped_csv(self._parse_ordered) 2024 if wrapped 2025 else self._parse_csv(self._parse_ordered) 2026 ), 2027 ) 2028 2029 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2030 self._match_text_seq("BY") 2031 2032 self._match_l_paren() 2033 expressions = self._parse_csv(self._parse_column) 2034 self._match_r_paren() 2035 2036 if self._match_text_seq("SORTED", "BY"): 2037 self._match_l_paren() 2038 sorted_by = self._parse_csv(self._parse_ordered) 2039 self._match_r_paren() 2040 else: 2041 sorted_by = None 2042 2043 self._match(TokenType.INTO) 2044 buckets = self._parse_number() 2045 self._match_text_seq("BUCKETS") 2046 2047 return self.expression( 2048 exp.ClusteredByProperty, 2049 expressions=expressions, 2050 sorted_by=sorted_by, 2051 buckets=buckets, 2052 ) 2053 2054 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2055 if not self._match_text_seq("GRANTS"): 2056 self._retreat(self._index - 1) 2057 return None 2058 2059 return self.expression(exp.CopyGrantsProperty) 2060 2061 def _parse_freespace(self) -> exp.FreespaceProperty: 2062 self._match(TokenType.EQ) 2063 return self.expression( 2064 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2065 ) 2066 2067 def _parse_mergeblockratio( 2068 self, no: bool = False, default: bool = False 2069 ) -> exp.MergeBlockRatioProperty: 2070 if self._match(TokenType.EQ): 2071 return self.expression( 2072 exp.MergeBlockRatioProperty, 2073 this=self._parse_number(), 2074 percent=self._match(TokenType.PERCENT), 2075 ) 2076 2077 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2078 2079 def _parse_datablocksize( 2080 self, 2081 default: t.Optional[bool] = None, 2082 minimum: t.Optional[bool] = None, 2083 maximum: t.Optional[bool] = None, 2084 ) -> exp.DataBlocksizeProperty: 2085 self._match(TokenType.EQ) 2086 size = self._parse_number() 2087 2088 units = None 2089 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2090 units = self._prev.text 2091 2092 return self.expression( 2093 exp.DataBlocksizeProperty, 2094 size=size, 2095 units=units, 2096 default=default, 2097 minimum=minimum, 2098 maximum=maximum, 2099 ) 2100 2101 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2102 self._match(TokenType.EQ) 2103 always = self._match_text_seq("ALWAYS") 2104 manual = self._match_text_seq("MANUAL") 2105 never = self._match_text_seq("NEVER") 2106 default = self._match_text_seq("DEFAULT") 2107 2108 autotemp = None 2109 if self._match_text_seq("AUTOTEMP"): 2110 autotemp = self._parse_schema() 2111 2112 return self.expression( 2113 exp.BlockCompressionProperty, 2114 always=always, 2115 manual=manual, 2116 never=never, 2117 default=default, 2118 autotemp=autotemp, 2119 ) 2120 2121 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2122 index = self._index 2123 no = self._match_text_seq("NO") 2124 concurrent = self._match_text_seq("CONCURRENT") 2125 2126 if not self._match_text_seq("ISOLATED", "LOADING"): 2127 self._retreat(index) 2128 return None 2129 2130 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2131 return self.expression( 2132 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2133 ) 2134 2135 def _parse_locking(self) -> exp.LockingProperty: 2136 if self._match(TokenType.TABLE): 2137 kind = "TABLE" 2138 elif self._match(TokenType.VIEW): 2139 kind = "VIEW" 2140 elif self._match(TokenType.ROW): 2141 kind = "ROW" 2142 elif self._match_text_seq("DATABASE"): 2143 kind = "DATABASE" 2144 else: 2145 kind = None 2146 2147 if kind in ("DATABASE", "TABLE", "VIEW"): 2148 this = self._parse_table_parts() 2149 else: 2150 this = None 2151 2152 if self._match(TokenType.FOR): 2153 for_or_in = "FOR" 2154 elif self._match(TokenType.IN): 2155 for_or_in = "IN" 2156 else: 2157 for_or_in = None 2158 2159 if self._match_text_seq("ACCESS"): 2160 lock_type = "ACCESS" 2161 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2162 lock_type = "EXCLUSIVE" 2163 elif self._match_text_seq("SHARE"): 2164 lock_type = "SHARE" 2165 elif self._match_text_seq("READ"): 2166 lock_type = "READ" 2167 elif self._match_text_seq("WRITE"): 2168 lock_type = "WRITE" 2169 elif self._match_text_seq("CHECKSUM"): 2170 lock_type = "CHECKSUM" 2171 else: 2172 lock_type = None 2173 2174 override = self._match_text_seq("OVERRIDE") 2175 2176 return self.expression( 2177 exp.LockingProperty, 2178 this=this, 2179 kind=kind, 2180 for_or_in=for_or_in, 2181 lock_type=lock_type, 2182 override=override, 2183 ) 2184 2185 def _parse_partition_by(self) -> t.List[exp.Expression]: 2186 if self._match(TokenType.PARTITION_BY): 2187 return self._parse_csv(self._parse_conjunction) 2188 return [] 2189 2190 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2191 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2192 if self._match_text_seq("MINVALUE"): 2193 return exp.var("MINVALUE") 2194 if self._match_text_seq("MAXVALUE"): 2195 return exp.var("MAXVALUE") 2196 return self._parse_bitwise() 2197 2198 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2199 expression = None 2200 from_expressions = None 2201 to_expressions = None 2202 2203 if self._match(TokenType.IN): 2204 this = self._parse_wrapped_csv(self._parse_bitwise) 2205 elif self._match(TokenType.FROM): 2206 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2207 self._match_text_seq("TO") 2208 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2209 elif self._match_text_seq("WITH", "(", "MODULUS"): 2210 this = self._parse_number() 2211 self._match_text_seq(",", "REMAINDER") 2212 expression = self._parse_number() 2213 self._match_r_paren() 2214 else: 2215 self.raise_error("Failed to parse partition bound spec.") 2216 2217 return self.expression( 2218 exp.PartitionBoundSpec, 2219 this=this, 2220 expression=expression, 2221 from_expressions=from_expressions, 2222 to_expressions=to_expressions, 2223 ) 2224 2225 # https://www.postgresql.org/docs/current/sql-createtable.html 2226 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2227 if not self._match_text_seq("OF"): 2228 self._retreat(self._index - 1) 2229 return None 2230 2231 this = self._parse_table(schema=True) 2232 2233 if self._match(TokenType.DEFAULT): 2234 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2235 elif self._match_text_seq("FOR", "VALUES"): 2236 expression = self._parse_partition_bound_spec() 2237 else: 2238 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2239 2240 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2241 2242 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2243 self._match(TokenType.EQ) 2244 return self.expression( 2245 exp.PartitionedByProperty, 2246 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2247 ) 2248 2249 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2250 if self._match_text_seq("AND", "STATISTICS"): 2251 statistics = True 2252 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2253 statistics = False 2254 else: 2255 statistics = None 2256 2257 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2258 2259 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2260 if self._match_text_seq("SQL"): 2261 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2262 return None 2263 2264 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2265 if self._match_text_seq("SQL", "DATA"): 2266 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2267 return None 2268 2269 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2270 if self._match_text_seq("PRIMARY", "INDEX"): 2271 return exp.NoPrimaryIndexProperty() 2272 if self._match_text_seq("SQL"): 2273 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2274 return None 2275 2276 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2277 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2278 return exp.OnCommitProperty() 2279 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2280 return exp.OnCommitProperty(delete=True) 2281 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2282 2283 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2284 if self._match_text_seq("SQL", "DATA"): 2285 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2286 return None 2287 2288 def _parse_distkey(self) -> exp.DistKeyProperty: 2289 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2290 2291 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2292 table = self._parse_table(schema=True) 2293 2294 options = [] 2295 while self._match_texts(("INCLUDING", "EXCLUDING")): 2296 this = self._prev.text.upper() 2297 2298 id_var = self._parse_id_var() 2299 if not id_var: 2300 return None 2301 2302 options.append( 2303 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2304 ) 2305 2306 return self.expression(exp.LikeProperty, this=table, expressions=options) 2307 2308 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2309 return self.expression( 2310 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2311 ) 2312 2313 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2314 self._match(TokenType.EQ) 2315 return self.expression( 2316 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2317 ) 2318 2319 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2320 self._match_text_seq("WITH", "CONNECTION") 2321 return self.expression( 2322 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2323 ) 2324 2325 def _parse_returns(self) -> exp.ReturnsProperty: 2326 value: t.Optional[exp.Expression] 2327 null = None 2328 is_table = self._match(TokenType.TABLE) 2329 2330 if is_table: 2331 if self._match(TokenType.LT): 2332 value = self.expression( 2333 exp.Schema, 2334 this="TABLE", 2335 expressions=self._parse_csv(self._parse_struct_types), 2336 ) 2337 if not self._match(TokenType.GT): 2338 self.raise_error("Expecting >") 2339 else: 2340 value = self._parse_schema(exp.var("TABLE")) 2341 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2342 null = True 2343 value = None 2344 else: 2345 value = self._parse_types() 2346 2347 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2348 2349 def _parse_describe(self) -> exp.Describe: 2350 kind = self._match_set(self.CREATABLES) and self._prev.text 2351 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2352 if self._match(TokenType.DOT): 2353 style = None 2354 self._retreat(self._index - 2) 2355 this = self._parse_table(schema=True) 2356 properties = self._parse_properties() 2357 expressions = properties.expressions if properties else None 2358 return self.expression( 2359 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2360 ) 2361 2362 def _parse_insert(self) -> exp.Insert: 2363 comments = ensure_list(self._prev_comments) 2364 hint = self._parse_hint() 2365 overwrite = self._match(TokenType.OVERWRITE) 2366 ignore = self._match(TokenType.IGNORE) 2367 local = self._match_text_seq("LOCAL") 2368 alternative = None 2369 is_function = None 2370 2371 if self._match_text_seq("DIRECTORY"): 2372 this: t.Optional[exp.Expression] = self.expression( 2373 exp.Directory, 2374 this=self._parse_var_or_string(), 2375 local=local, 2376 row_format=self._parse_row_format(match_row=True), 2377 ) 2378 else: 2379 if self._match(TokenType.OR): 2380 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2381 2382 self._match(TokenType.INTO) 2383 comments += ensure_list(self._prev_comments) 2384 self._match(TokenType.TABLE) 2385 is_function = self._match(TokenType.FUNCTION) 2386 2387 this = ( 2388 self._parse_table(schema=True, parse_partition=True) 2389 if not is_function 2390 else self._parse_function() 2391 ) 2392 2393 returning = self._parse_returning() 2394 2395 return self.expression( 2396 exp.Insert, 2397 comments=comments, 2398 hint=hint, 2399 is_function=is_function, 2400 this=this, 2401 stored=self._match_text_seq("STORED") and self._parse_stored(), 2402 by_name=self._match_text_seq("BY", "NAME"), 2403 exists=self._parse_exists(), 2404 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2405 and self._parse_conjunction(), 2406 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2407 conflict=self._parse_on_conflict(), 2408 returning=returning or self._parse_returning(), 2409 overwrite=overwrite, 2410 alternative=alternative, 2411 ignore=ignore, 2412 ) 2413 2414 def _parse_kill(self) -> exp.Kill: 2415 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2416 2417 return self.expression( 2418 exp.Kill, 2419 this=self._parse_primary(), 2420 kind=kind, 2421 ) 2422 2423 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2424 conflict = self._match_text_seq("ON", "CONFLICT") 2425 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2426 2427 if not conflict and not duplicate: 2428 return None 2429 2430 conflict_keys = None 2431 constraint = None 2432 2433 if conflict: 2434 if self._match_text_seq("ON", "CONSTRAINT"): 2435 constraint = self._parse_id_var() 2436 elif self._match(TokenType.L_PAREN): 2437 conflict_keys = self._parse_csv(self._parse_id_var) 2438 self._match_r_paren() 2439 2440 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2441 if self._prev.token_type == TokenType.UPDATE: 2442 self._match(TokenType.SET) 2443 expressions = self._parse_csv(self._parse_equality) 2444 else: 2445 expressions = None 2446 2447 return self.expression( 2448 exp.OnConflict, 2449 duplicate=duplicate, 2450 expressions=expressions, 2451 action=action, 2452 conflict_keys=conflict_keys, 2453 constraint=constraint, 2454 ) 2455 2456 def _parse_returning(self) -> t.Optional[exp.Returning]: 2457 if not self._match(TokenType.RETURNING): 2458 return None 2459 return self.expression( 2460 exp.Returning, 2461 expressions=self._parse_csv(self._parse_expression), 2462 into=self._match(TokenType.INTO) and self._parse_table_part(), 2463 ) 2464 2465 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2466 if not self._match(TokenType.FORMAT): 2467 return None 2468 return self._parse_row_format() 2469 2470 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2471 index = self._index 2472 with_ = with_ or self._match_text_seq("WITH") 2473 2474 if not self._match(TokenType.SERDE_PROPERTIES): 2475 self._retreat(index) 2476 return None 2477 return self.expression( 2478 exp.SerdeProperties, 2479 **{ # type: ignore 2480 "expressions": self._parse_wrapped_properties(), 2481 "with": with_, 2482 }, 2483 ) 2484 2485 def _parse_row_format( 2486 self, match_row: bool = False 2487 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2488 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2489 return None 2490 2491 if self._match_text_seq("SERDE"): 2492 this = self._parse_string() 2493 2494 serde_properties = self._parse_serde_properties() 2495 2496 return self.expression( 2497 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2498 ) 2499 2500 self._match_text_seq("DELIMITED") 2501 2502 kwargs = {} 2503 2504 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2505 kwargs["fields"] = self._parse_string() 2506 if self._match_text_seq("ESCAPED", "BY"): 2507 kwargs["escaped"] = self._parse_string() 2508 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2509 kwargs["collection_items"] = self._parse_string() 2510 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2511 kwargs["map_keys"] = self._parse_string() 2512 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2513 kwargs["lines"] = self._parse_string() 2514 if self._match_text_seq("NULL", "DEFINED", "AS"): 2515 kwargs["null"] = self._parse_string() 2516 2517 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2518 2519 def _parse_load(self) -> exp.LoadData | exp.Command: 2520 if self._match_text_seq("DATA"): 2521 local = self._match_text_seq("LOCAL") 2522 self._match_text_seq("INPATH") 2523 inpath = self._parse_string() 2524 overwrite = self._match(TokenType.OVERWRITE) 2525 self._match_pair(TokenType.INTO, TokenType.TABLE) 2526 2527 return self.expression( 2528 exp.LoadData, 2529 this=self._parse_table(schema=True), 2530 local=local, 2531 overwrite=overwrite, 2532 inpath=inpath, 2533 partition=self._parse_partition(), 2534 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2535 serde=self._match_text_seq("SERDE") and self._parse_string(), 2536 ) 2537 return self._parse_as_command(self._prev) 2538 2539 def _parse_delete(self) -> exp.Delete: 2540 # This handles MySQL's "Multiple-Table Syntax" 2541 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2542 tables = None 2543 comments = self._prev_comments 2544 if not self._match(TokenType.FROM, advance=False): 2545 tables = self._parse_csv(self._parse_table) or None 2546 2547 returning = self._parse_returning() 2548 2549 return self.expression( 2550 exp.Delete, 2551 comments=comments, 2552 tables=tables, 2553 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2554 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2555 where=self._parse_where(), 2556 returning=returning or self._parse_returning(), 2557 limit=self._parse_limit(), 2558 ) 2559 2560 def _parse_update(self) -> exp.Update: 2561 comments = self._prev_comments 2562 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2563 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2564 returning = self._parse_returning() 2565 return self.expression( 2566 exp.Update, 2567 comments=comments, 2568 **{ # type: ignore 2569 "this": this, 2570 "expressions": expressions, 2571 "from": self._parse_from(joins=True), 2572 "where": self._parse_where(), 2573 "returning": returning or self._parse_returning(), 2574 "order": self._parse_order(), 2575 "limit": self._parse_limit(), 2576 }, 2577 ) 2578 2579 def _parse_uncache(self) -> exp.Uncache: 2580 if not self._match(TokenType.TABLE): 2581 self.raise_error("Expecting TABLE after UNCACHE") 2582 2583 return self.expression( 2584 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2585 ) 2586 2587 def _parse_cache(self) -> exp.Cache: 2588 lazy = self._match_text_seq("LAZY") 2589 self._match(TokenType.TABLE) 2590 table = self._parse_table(schema=True) 2591 2592 options = [] 2593 if self._match_text_seq("OPTIONS"): 2594 self._match_l_paren() 2595 k = self._parse_string() 2596 self._match(TokenType.EQ) 2597 v = self._parse_string() 2598 options = [k, v] 2599 self._match_r_paren() 2600 2601 self._match(TokenType.ALIAS) 2602 return self.expression( 2603 exp.Cache, 2604 this=table, 2605 lazy=lazy, 2606 options=options, 2607 expression=self._parse_select(nested=True), 2608 ) 2609 2610 def _parse_partition(self) -> t.Optional[exp.Partition]: 2611 if not self._match(TokenType.PARTITION): 2612 return None 2613 2614 return self.expression( 2615 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2616 ) 2617 2618 def _parse_value(self) -> t.Optional[exp.Tuple]: 2619 if self._match(TokenType.L_PAREN): 2620 expressions = self._parse_csv(self._parse_expression) 2621 self._match_r_paren() 2622 return self.expression(exp.Tuple, expressions=expressions) 2623 2624 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2625 expression = self._parse_expression() 2626 if expression: 2627 return self.expression(exp.Tuple, expressions=[expression]) 2628 return None 2629 2630 def _parse_projections(self) -> t.List[exp.Expression]: 2631 return self._parse_expressions() 2632 2633 def _parse_select( 2634 self, 2635 nested: bool = False, 2636 table: bool = False, 2637 parse_subquery_alias: bool = True, 2638 parse_set_operation: bool = True, 2639 ) -> t.Optional[exp.Expression]: 2640 cte = self._parse_with() 2641 2642 if cte: 2643 this = self._parse_statement() 2644 2645 if not this: 2646 self.raise_error("Failed to parse any statement following CTE") 2647 return cte 2648 2649 if "with" in this.arg_types: 2650 this.set("with", cte) 2651 else: 2652 self.raise_error(f"{this.key} does not support CTE") 2653 this = cte 2654 2655 return this 2656 2657 # duckdb supports leading with FROM x 2658 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2659 2660 if self._match(TokenType.SELECT): 2661 comments = self._prev_comments 2662 2663 hint = self._parse_hint() 2664 all_ = self._match(TokenType.ALL) 2665 distinct = self._match_set(self.DISTINCT_TOKENS) 2666 2667 kind = ( 2668 self._match(TokenType.ALIAS) 2669 and self._match_texts(("STRUCT", "VALUE")) 2670 and self._prev.text.upper() 2671 ) 2672 2673 if distinct: 2674 distinct = self.expression( 2675 exp.Distinct, 2676 on=self._parse_value() if self._match(TokenType.ON) else None, 2677 ) 2678 2679 if all_ and distinct: 2680 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2681 2682 limit = self._parse_limit(top=True) 2683 projections = self._parse_projections() 2684 2685 this = self.expression( 2686 exp.Select, 2687 kind=kind, 2688 hint=hint, 2689 distinct=distinct, 2690 expressions=projections, 2691 limit=limit, 2692 ) 2693 this.comments = comments 2694 2695 into = self._parse_into() 2696 if into: 2697 this.set("into", into) 2698 2699 if not from_: 2700 from_ = self._parse_from() 2701 2702 if from_: 2703 this.set("from", from_) 2704 2705 this = self._parse_query_modifiers(this) 2706 elif (table or nested) and self._match(TokenType.L_PAREN): 2707 if self._match(TokenType.PIVOT): 2708 this = self._parse_simplified_pivot() 2709 elif self._match(TokenType.FROM): 2710 this = exp.select("*").from_( 2711 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2712 ) 2713 else: 2714 this = ( 2715 self._parse_table() 2716 if table 2717 else self._parse_select(nested=True, parse_set_operation=False) 2718 ) 2719 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2720 2721 self._match_r_paren() 2722 2723 # We return early here so that the UNION isn't attached to the subquery by the 2724 # following call to _parse_set_operations, but instead becomes the parent node 2725 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2726 elif self._match(TokenType.VALUES, advance=False): 2727 this = self._parse_derived_table_values() 2728 elif from_: 2729 this = exp.select("*").from_(from_.this, copy=False) 2730 else: 2731 this = None 2732 2733 if parse_set_operation: 2734 return self._parse_set_operations(this) 2735 return this 2736 2737 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2738 if not skip_with_token and not self._match(TokenType.WITH): 2739 return None 2740 2741 comments = self._prev_comments 2742 recursive = self._match(TokenType.RECURSIVE) 2743 2744 expressions = [] 2745 while True: 2746 expressions.append(self._parse_cte()) 2747 2748 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2749 break 2750 else: 2751 self._match(TokenType.WITH) 2752 2753 return self.expression( 2754 exp.With, comments=comments, expressions=expressions, recursive=recursive 2755 ) 2756 2757 def _parse_cte(self) -> exp.CTE: 2758 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2759 if not alias or not alias.this: 2760 self.raise_error("Expected CTE to have alias") 2761 2762 self._match(TokenType.ALIAS) 2763 2764 if self._match_text_seq("NOT", "MATERIALIZED"): 2765 materialized = False 2766 elif self._match_text_seq("MATERIALIZED"): 2767 materialized = True 2768 else: 2769 materialized = None 2770 2771 return self.expression( 2772 exp.CTE, 2773 this=self._parse_wrapped(self._parse_statement), 2774 alias=alias, 2775 materialized=materialized, 2776 ) 2777 2778 def _parse_table_alias( 2779 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2780 ) -> t.Optional[exp.TableAlias]: 2781 any_token = self._match(TokenType.ALIAS) 2782 alias = ( 2783 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2784 or self._parse_string_as_identifier() 2785 ) 2786 2787 index = self._index 2788 if self._match(TokenType.L_PAREN): 2789 columns = self._parse_csv(self._parse_function_parameter) 2790 self._match_r_paren() if columns else self._retreat(index) 2791 else: 2792 columns = None 2793 2794 if not alias and not columns: 2795 return None 2796 2797 return self.expression(exp.TableAlias, this=alias, columns=columns) 2798 2799 def _parse_subquery( 2800 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2801 ) -> t.Optional[exp.Subquery]: 2802 if not this: 2803 return None 2804 2805 return self.expression( 2806 exp.Subquery, 2807 this=this, 2808 pivots=self._parse_pivots(), 2809 alias=self._parse_table_alias() if parse_alias else None, 2810 ) 2811 2812 def _implicit_unnests_to_explicit(self, this: E) -> E: 2813 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2814 2815 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2816 for i, join in enumerate(this.args.get("joins") or []): 2817 table = join.this 2818 normalized_table = table.copy() 2819 normalized_table.meta["maybe_column"] = True 2820 normalized_table = _norm(normalized_table, dialect=self.dialect) 2821 2822 if isinstance(table, exp.Table) and not join.args.get("on"): 2823 if normalized_table.parts[0].name in refs: 2824 table_as_column = table.to_column() 2825 unnest = exp.Unnest(expressions=[table_as_column]) 2826 2827 # Table.to_column creates a parent Alias node that we want to convert to 2828 # a TableAlias and attach to the Unnest, so it matches the parser's output 2829 if isinstance(table.args.get("alias"), exp.TableAlias): 2830 table_as_column.replace(table_as_column.this) 2831 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2832 2833 table.replace(unnest) 2834 2835 refs.add(normalized_table.alias_or_name) 2836 2837 return this 2838 2839 def _parse_query_modifiers( 2840 self, this: t.Optional[exp.Expression] 2841 ) -> t.Optional[exp.Expression]: 2842 if isinstance(this, (exp.Query, exp.Table)): 2843 for join in self._parse_joins(): 2844 this.append("joins", join) 2845 for lateral in iter(self._parse_lateral, None): 2846 this.append("laterals", lateral) 2847 2848 while True: 2849 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2850 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2851 key, expression = parser(self) 2852 2853 if expression: 2854 this.set(key, expression) 2855 if key == "limit": 2856 offset = expression.args.pop("offset", None) 2857 2858 if offset: 2859 offset = exp.Offset(expression=offset) 2860 this.set("offset", offset) 2861 2862 limit_by_expressions = expression.expressions 2863 expression.set("expressions", None) 2864 offset.set("expressions", limit_by_expressions) 2865 continue 2866 break 2867 2868 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2869 this = self._implicit_unnests_to_explicit(this) 2870 2871 return this 2872 2873 def _parse_hint(self) -> t.Optional[exp.Hint]: 2874 if self._match(TokenType.HINT): 2875 hints = [] 2876 for hint in iter( 2877 lambda: self._parse_csv( 2878 lambda: self._parse_function() or self._parse_var(upper=True) 2879 ), 2880 [], 2881 ): 2882 hints.extend(hint) 2883 2884 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2885 self.raise_error("Expected */ after HINT") 2886 2887 return self.expression(exp.Hint, expressions=hints) 2888 2889 return None 2890 2891 def _parse_into(self) -> t.Optional[exp.Into]: 2892 if not self._match(TokenType.INTO): 2893 return None 2894 2895 temp = self._match(TokenType.TEMPORARY) 2896 unlogged = self._match_text_seq("UNLOGGED") 2897 self._match(TokenType.TABLE) 2898 2899 return self.expression( 2900 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2901 ) 2902 2903 def _parse_from( 2904 self, joins: bool = False, skip_from_token: bool = False 2905 ) -> t.Optional[exp.From]: 2906 if not skip_from_token and not self._match(TokenType.FROM): 2907 return None 2908 2909 return self.expression( 2910 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2911 ) 2912 2913 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2914 return self.expression( 2915 exp.MatchRecognizeMeasure, 2916 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2917 this=self._parse_expression(), 2918 ) 2919 2920 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2921 if not self._match(TokenType.MATCH_RECOGNIZE): 2922 return None 2923 2924 self._match_l_paren() 2925 2926 partition = self._parse_partition_by() 2927 order = self._parse_order() 2928 2929 measures = ( 2930 self._parse_csv(self._parse_match_recognize_measure) 2931 if self._match_text_seq("MEASURES") 2932 else None 2933 ) 2934 2935 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2936 rows = exp.var("ONE ROW PER MATCH") 2937 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2938 text = "ALL ROWS PER MATCH" 2939 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2940 text += " SHOW EMPTY MATCHES" 2941 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2942 text += " OMIT EMPTY MATCHES" 2943 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2944 text += " WITH UNMATCHED ROWS" 2945 rows = exp.var(text) 2946 else: 2947 rows = None 2948 2949 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2950 text = "AFTER MATCH SKIP" 2951 if self._match_text_seq("PAST", "LAST", "ROW"): 2952 text += " PAST LAST ROW" 2953 elif self._match_text_seq("TO", "NEXT", "ROW"): 2954 text += " TO NEXT ROW" 2955 elif self._match_text_seq("TO", "FIRST"): 2956 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2957 elif self._match_text_seq("TO", "LAST"): 2958 text += f" TO LAST {self._advance_any().text}" # type: ignore 2959 after = exp.var(text) 2960 else: 2961 after = None 2962 2963 if self._match_text_seq("PATTERN"): 2964 self._match_l_paren() 2965 2966 if not self._curr: 2967 self.raise_error("Expecting )", self._curr) 2968 2969 paren = 1 2970 start = self._curr 2971 2972 while self._curr and paren > 0: 2973 if self._curr.token_type == TokenType.L_PAREN: 2974 paren += 1 2975 if self._curr.token_type == TokenType.R_PAREN: 2976 paren -= 1 2977 2978 end = self._prev 2979 self._advance() 2980 2981 if paren > 0: 2982 self.raise_error("Expecting )", self._curr) 2983 2984 pattern = exp.var(self._find_sql(start, end)) 2985 else: 2986 pattern = None 2987 2988 define = ( 2989 self._parse_csv(self._parse_name_as_expression) 2990 if self._match_text_seq("DEFINE") 2991 else None 2992 ) 2993 2994 self._match_r_paren() 2995 2996 return self.expression( 2997 exp.MatchRecognize, 2998 partition_by=partition, 2999 order=order, 3000 measures=measures, 3001 rows=rows, 3002 after=after, 3003 pattern=pattern, 3004 define=define, 3005 alias=self._parse_table_alias(), 3006 ) 3007 3008 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3009 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3010 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3011 cross_apply = False 3012 3013 if cross_apply is not None: 3014 this = self._parse_select(table=True) 3015 view = None 3016 outer = None 3017 elif self._match(TokenType.LATERAL): 3018 this = self._parse_select(table=True) 3019 view = self._match(TokenType.VIEW) 3020 outer = self._match(TokenType.OUTER) 3021 else: 3022 return None 3023 3024 if not this: 3025 this = ( 3026 self._parse_unnest() 3027 or self._parse_function() 3028 or self._parse_id_var(any_token=False) 3029 ) 3030 3031 while self._match(TokenType.DOT): 3032 this = exp.Dot( 3033 this=this, 3034 expression=self._parse_function() or self._parse_id_var(any_token=False), 3035 ) 3036 3037 if view: 3038 table = self._parse_id_var(any_token=False) 3039 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3040 table_alias: t.Optional[exp.TableAlias] = self.expression( 3041 exp.TableAlias, this=table, columns=columns 3042 ) 3043 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3044 # We move the alias from the lateral's child node to the lateral itself 3045 table_alias = this.args["alias"].pop() 3046 else: 3047 table_alias = self._parse_table_alias() 3048 3049 return self.expression( 3050 exp.Lateral, 3051 this=this, 3052 view=view, 3053 outer=outer, 3054 alias=table_alias, 3055 cross_apply=cross_apply, 3056 ) 3057 3058 def _parse_join_parts( 3059 self, 3060 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3061 return ( 3062 self._match_set(self.JOIN_METHODS) and self._prev, 3063 self._match_set(self.JOIN_SIDES) and self._prev, 3064 self._match_set(self.JOIN_KINDS) and self._prev, 3065 ) 3066 3067 def _parse_join( 3068 self, skip_join_token: bool = False, parse_bracket: bool = False 3069 ) -> t.Optional[exp.Join]: 3070 if self._match(TokenType.COMMA): 3071 return self.expression(exp.Join, this=self._parse_table()) 3072 3073 index = self._index 3074 method, side, kind = self._parse_join_parts() 3075 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3076 join = self._match(TokenType.JOIN) 3077 3078 if not skip_join_token and not join: 3079 self._retreat(index) 3080 kind = None 3081 method = None 3082 side = None 3083 3084 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3085 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3086 3087 if not skip_join_token and not join and not outer_apply and not cross_apply: 3088 return None 3089 3090 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3091 3092 if method: 3093 kwargs["method"] = method.text 3094 if side: 3095 kwargs["side"] = side.text 3096 if kind: 3097 kwargs["kind"] = kind.text 3098 if hint: 3099 kwargs["hint"] = hint 3100 3101 if self._match(TokenType.MATCH_CONDITION): 3102 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3103 3104 if self._match(TokenType.ON): 3105 kwargs["on"] = self._parse_conjunction() 3106 elif self._match(TokenType.USING): 3107 kwargs["using"] = self._parse_wrapped_id_vars() 3108 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3109 kind and kind.token_type == TokenType.CROSS 3110 ): 3111 index = self._index 3112 joins: t.Optional[list] = list(self._parse_joins()) 3113 3114 if joins and self._match(TokenType.ON): 3115 kwargs["on"] = self._parse_conjunction() 3116 elif joins and self._match(TokenType.USING): 3117 kwargs["using"] = self._parse_wrapped_id_vars() 3118 else: 3119 joins = None 3120 self._retreat(index) 3121 3122 kwargs["this"].set("joins", joins if joins else None) 3123 3124 comments = [c for token in (method, side, kind) if token for c in token.comments] 3125 return self.expression(exp.Join, comments=comments, **kwargs) 3126 3127 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3128 this = self._parse_conjunction() 3129 3130 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3131 return this 3132 3133 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3134 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3135 3136 return this 3137 3138 def _parse_index_params(self) -> exp.IndexParameters: 3139 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3140 3141 if self._match(TokenType.L_PAREN, advance=False): 3142 columns = self._parse_wrapped_csv(self._parse_with_operator) 3143 else: 3144 columns = None 3145 3146 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3147 partition_by = self._parse_partition_by() 3148 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3149 tablespace = ( 3150 self._parse_var(any_token=True) 3151 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3152 else None 3153 ) 3154 where = self._parse_where() 3155 3156 return self.expression( 3157 exp.IndexParameters, 3158 using=using, 3159 columns=columns, 3160 include=include, 3161 partition_by=partition_by, 3162 where=where, 3163 with_storage=with_storage, 3164 tablespace=tablespace, 3165 ) 3166 3167 def _parse_index( 3168 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3169 ) -> t.Optional[exp.Index]: 3170 if index or anonymous: 3171 unique = None 3172 primary = None 3173 amp = None 3174 3175 self._match(TokenType.ON) 3176 self._match(TokenType.TABLE) # hive 3177 table = self._parse_table_parts(schema=True) 3178 else: 3179 unique = self._match(TokenType.UNIQUE) 3180 primary = self._match_text_seq("PRIMARY") 3181 amp = self._match_text_seq("AMP") 3182 3183 if not self._match(TokenType.INDEX): 3184 return None 3185 3186 index = self._parse_id_var() 3187 table = None 3188 3189 params = self._parse_index_params() 3190 3191 return self.expression( 3192 exp.Index, 3193 this=index, 3194 table=table, 3195 unique=unique, 3196 primary=primary, 3197 amp=amp, 3198 params=params, 3199 ) 3200 3201 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3202 hints: t.List[exp.Expression] = [] 3203 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3204 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3205 hints.append( 3206 self.expression( 3207 exp.WithTableHint, 3208 expressions=self._parse_csv( 3209 lambda: self._parse_function() or self._parse_var(any_token=True) 3210 ), 3211 ) 3212 ) 3213 self._match_r_paren() 3214 else: 3215 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3216 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3217 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3218 3219 self._match_texts(("INDEX", "KEY")) 3220 if self._match(TokenType.FOR): 3221 hint.set("target", self._advance_any() and self._prev.text.upper()) 3222 3223 hint.set("expressions", self._parse_wrapped_id_vars()) 3224 hints.append(hint) 3225 3226 return hints or None 3227 3228 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3229 return ( 3230 (not schema and self._parse_function(optional_parens=False)) 3231 or self._parse_id_var(any_token=False) 3232 or self._parse_string_as_identifier() 3233 or self._parse_placeholder() 3234 ) 3235 3236 def _parse_table_parts( 3237 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3238 ) -> exp.Table: 3239 catalog = None 3240 db = None 3241 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3242 3243 while self._match(TokenType.DOT): 3244 if catalog: 3245 # This allows nesting the table in arbitrarily many dot expressions if needed 3246 table = self.expression( 3247 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3248 ) 3249 else: 3250 catalog = db 3251 db = table 3252 # "" used for tsql FROM a..b case 3253 table = self._parse_table_part(schema=schema) or "" 3254 3255 if ( 3256 wildcard 3257 and self._is_connected() 3258 and (isinstance(table, exp.Identifier) or not table) 3259 and self._match(TokenType.STAR) 3260 ): 3261 if isinstance(table, exp.Identifier): 3262 table.args["this"] += "*" 3263 else: 3264 table = exp.Identifier(this="*") 3265 3266 # We bubble up comments from the Identifier to the Table 3267 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3268 3269 if is_db_reference: 3270 catalog = db 3271 db = table 3272 table = None 3273 3274 if not table and not is_db_reference: 3275 self.raise_error(f"Expected table name but got {self._curr}") 3276 if not db and is_db_reference: 3277 self.raise_error(f"Expected database name but got {self._curr}") 3278 3279 return self.expression( 3280 exp.Table, 3281 comments=comments, 3282 this=table, 3283 db=db, 3284 catalog=catalog, 3285 pivots=self._parse_pivots(), 3286 ) 3287 3288 def _parse_table( 3289 self, 3290 schema: bool = False, 3291 joins: bool = False, 3292 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3293 parse_bracket: bool = False, 3294 is_db_reference: bool = False, 3295 parse_partition: bool = False, 3296 ) -> t.Optional[exp.Expression]: 3297 lateral = self._parse_lateral() 3298 if lateral: 3299 return lateral 3300 3301 unnest = self._parse_unnest() 3302 if unnest: 3303 return unnest 3304 3305 values = self._parse_derived_table_values() 3306 if values: 3307 return values 3308 3309 subquery = self._parse_select(table=True) 3310 if subquery: 3311 if not subquery.args.get("pivots"): 3312 subquery.set("pivots", self._parse_pivots()) 3313 return subquery 3314 3315 bracket = parse_bracket and self._parse_bracket(None) 3316 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3317 3318 only = self._match(TokenType.ONLY) 3319 3320 this = t.cast( 3321 exp.Expression, 3322 bracket 3323 or self._parse_bracket( 3324 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3325 ), 3326 ) 3327 3328 if only: 3329 this.set("only", only) 3330 3331 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3332 self._match_text_seq("*") 3333 3334 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3335 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3336 this.set("partition", self._parse_partition()) 3337 3338 if schema: 3339 return self._parse_schema(this=this) 3340 3341 version = self._parse_version() 3342 3343 if version: 3344 this.set("version", version) 3345 3346 if self.dialect.ALIAS_POST_TABLESAMPLE: 3347 table_sample = self._parse_table_sample() 3348 3349 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3350 if alias: 3351 this.set("alias", alias) 3352 3353 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3354 return self.expression( 3355 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3356 ) 3357 3358 this.set("hints", self._parse_table_hints()) 3359 3360 if not this.args.get("pivots"): 3361 this.set("pivots", self._parse_pivots()) 3362 3363 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3364 table_sample = self._parse_table_sample() 3365 3366 if table_sample: 3367 table_sample.set("this", this) 3368 this = table_sample 3369 3370 if joins: 3371 for join in self._parse_joins(): 3372 this.append("joins", join) 3373 3374 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3375 this.set("ordinality", True) 3376 this.set("alias", self._parse_table_alias()) 3377 3378 return this 3379 3380 def _parse_version(self) -> t.Optional[exp.Version]: 3381 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3382 this = "TIMESTAMP" 3383 elif self._match(TokenType.VERSION_SNAPSHOT): 3384 this = "VERSION" 3385 else: 3386 return None 3387 3388 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3389 kind = self._prev.text.upper() 3390 start = self._parse_bitwise() 3391 self._match_texts(("TO", "AND")) 3392 end = self._parse_bitwise() 3393 expression: t.Optional[exp.Expression] = self.expression( 3394 exp.Tuple, expressions=[start, end] 3395 ) 3396 elif self._match_text_seq("CONTAINED", "IN"): 3397 kind = "CONTAINED IN" 3398 expression = self.expression( 3399 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3400 ) 3401 elif self._match(TokenType.ALL): 3402 kind = "ALL" 3403 expression = None 3404 else: 3405 self._match_text_seq("AS", "OF") 3406 kind = "AS OF" 3407 expression = self._parse_type() 3408 3409 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3410 3411 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3412 if not self._match(TokenType.UNNEST): 3413 return None 3414 3415 expressions = self._parse_wrapped_csv(self._parse_equality) 3416 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3417 3418 alias = self._parse_table_alias() if with_alias else None 3419 3420 if alias: 3421 if self.dialect.UNNEST_COLUMN_ONLY: 3422 if alias.args.get("columns"): 3423 self.raise_error("Unexpected extra column alias in unnest.") 3424 3425 alias.set("columns", [alias.this]) 3426 alias.set("this", None) 3427 3428 columns = alias.args.get("columns") or [] 3429 if offset and len(expressions) < len(columns): 3430 offset = columns.pop() 3431 3432 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3433 self._match(TokenType.ALIAS) 3434 offset = self._parse_id_var( 3435 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3436 ) or exp.to_identifier("offset") 3437 3438 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3439 3440 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3441 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3442 if not is_derived and not self._match_text_seq("VALUES"): 3443 return None 3444 3445 expressions = self._parse_csv(self._parse_value) 3446 alias = self._parse_table_alias() 3447 3448 if is_derived: 3449 self._match_r_paren() 3450 3451 return self.expression( 3452 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3453 ) 3454 3455 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3456 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3457 as_modifier and self._match_text_seq("USING", "SAMPLE") 3458 ): 3459 return None 3460 3461 bucket_numerator = None 3462 bucket_denominator = None 3463 bucket_field = None 3464 percent = None 3465 size = None 3466 seed = None 3467 3468 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3469 matched_l_paren = self._match(TokenType.L_PAREN) 3470 3471 if self.TABLESAMPLE_CSV: 3472 num = None 3473 expressions = self._parse_csv(self._parse_primary) 3474 else: 3475 expressions = None 3476 num = ( 3477 self._parse_factor() 3478 if self._match(TokenType.NUMBER, advance=False) 3479 else self._parse_primary() or self._parse_placeholder() 3480 ) 3481 3482 if self._match_text_seq("BUCKET"): 3483 bucket_numerator = self._parse_number() 3484 self._match_text_seq("OUT", "OF") 3485 bucket_denominator = bucket_denominator = self._parse_number() 3486 self._match(TokenType.ON) 3487 bucket_field = self._parse_field() 3488 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3489 percent = num 3490 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3491 size = num 3492 else: 3493 percent = num 3494 3495 if matched_l_paren: 3496 self._match_r_paren() 3497 3498 if self._match(TokenType.L_PAREN): 3499 method = self._parse_var(upper=True) 3500 seed = self._match(TokenType.COMMA) and self._parse_number() 3501 self._match_r_paren() 3502 elif self._match_texts(("SEED", "REPEATABLE")): 3503 seed = self._parse_wrapped(self._parse_number) 3504 3505 if not method and self.DEFAULT_SAMPLING_METHOD: 3506 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3507 3508 return self.expression( 3509 exp.TableSample, 3510 expressions=expressions, 3511 method=method, 3512 bucket_numerator=bucket_numerator, 3513 bucket_denominator=bucket_denominator, 3514 bucket_field=bucket_field, 3515 percent=percent, 3516 size=size, 3517 seed=seed, 3518 ) 3519 3520 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3521 return list(iter(self._parse_pivot, None)) or None 3522 3523 def _parse_joins(self) -> t.Iterator[exp.Join]: 3524 return iter(self._parse_join, None) 3525 3526 # https://duckdb.org/docs/sql/statements/pivot 3527 def _parse_simplified_pivot(self) -> exp.Pivot: 3528 def _parse_on() -> t.Optional[exp.Expression]: 3529 this = self._parse_bitwise() 3530 return self._parse_in(this) if self._match(TokenType.IN) else this 3531 3532 this = self._parse_table() 3533 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3534 using = self._match(TokenType.USING) and self._parse_csv( 3535 lambda: self._parse_alias(self._parse_function()) 3536 ) 3537 group = self._parse_group() 3538 return self.expression( 3539 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3540 ) 3541 3542 def _parse_pivot_in(self) -> exp.In: 3543 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3544 this = self._parse_conjunction() 3545 3546 self._match(TokenType.ALIAS) 3547 alias = self._parse_field() 3548 if alias: 3549 return self.expression(exp.PivotAlias, this=this, alias=alias) 3550 3551 return this 3552 3553 value = self._parse_column() 3554 3555 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3556 self.raise_error("Expecting IN (") 3557 3558 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3559 3560 self._match_r_paren() 3561 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3562 3563 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3564 index = self._index 3565 include_nulls = None 3566 3567 if self._match(TokenType.PIVOT): 3568 unpivot = False 3569 elif self._match(TokenType.UNPIVOT): 3570 unpivot = True 3571 3572 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3573 if self._match_text_seq("INCLUDE", "NULLS"): 3574 include_nulls = True 3575 elif self._match_text_seq("EXCLUDE", "NULLS"): 3576 include_nulls = False 3577 else: 3578 return None 3579 3580 expressions = [] 3581 3582 if not self._match(TokenType.L_PAREN): 3583 self._retreat(index) 3584 return None 3585 3586 if unpivot: 3587 expressions = self._parse_csv(self._parse_column) 3588 else: 3589 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3590 3591 if not expressions: 3592 self.raise_error("Failed to parse PIVOT's aggregation list") 3593 3594 if not self._match(TokenType.FOR): 3595 self.raise_error("Expecting FOR") 3596 3597 field = self._parse_pivot_in() 3598 3599 self._match_r_paren() 3600 3601 pivot = self.expression( 3602 exp.Pivot, 3603 expressions=expressions, 3604 field=field, 3605 unpivot=unpivot, 3606 include_nulls=include_nulls, 3607 ) 3608 3609 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3610 pivot.set("alias", self._parse_table_alias()) 3611 3612 if not unpivot: 3613 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3614 3615 columns: t.List[exp.Expression] = [] 3616 for fld in pivot.args["field"].expressions: 3617 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3618 for name in names: 3619 if self.PREFIXED_PIVOT_COLUMNS: 3620 name = f"{name}_{field_name}" if name else field_name 3621 else: 3622 name = f"{field_name}_{name}" if name else field_name 3623 3624 columns.append(exp.to_identifier(name)) 3625 3626 pivot.set("columns", columns) 3627 3628 return pivot 3629 3630 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3631 return [agg.alias for agg in aggregations] 3632 3633 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3634 if not skip_where_token and not self._match(TokenType.PREWHERE): 3635 return None 3636 3637 return self.expression( 3638 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3639 ) 3640 3641 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3642 if not skip_where_token and not self._match(TokenType.WHERE): 3643 return None 3644 3645 return self.expression( 3646 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3647 ) 3648 3649 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3650 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3651 return None 3652 3653 elements: t.Dict[str, t.Any] = defaultdict(list) 3654 3655 if self._match(TokenType.ALL): 3656 elements["all"] = True 3657 elif self._match(TokenType.DISTINCT): 3658 elements["all"] = False 3659 3660 while True: 3661 expressions = self._parse_csv( 3662 lambda: None 3663 if self._match(TokenType.ROLLUP, advance=False) 3664 else self._parse_conjunction() 3665 ) 3666 if expressions: 3667 elements["expressions"].extend(expressions) 3668 3669 grouping_sets = self._parse_grouping_sets() 3670 if grouping_sets: 3671 elements["grouping_sets"].extend(grouping_sets) 3672 3673 rollup = None 3674 cube = None 3675 totals = None 3676 3677 index = self._index 3678 with_ = self._match(TokenType.WITH) 3679 if self._match(TokenType.ROLLUP): 3680 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3681 elements["rollup"].extend(ensure_list(rollup)) 3682 3683 if self._match(TokenType.CUBE): 3684 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3685 elements["cube"].extend(ensure_list(cube)) 3686 3687 if self._match_text_seq("TOTALS"): 3688 totals = True 3689 elements["totals"] = True # type: ignore 3690 3691 if not (grouping_sets or rollup or cube or totals): 3692 if with_: 3693 self._retreat(index) 3694 break 3695 3696 return self.expression(exp.Group, **elements) # type: ignore 3697 3698 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3699 if not self._match(TokenType.GROUPING_SETS): 3700 return None 3701 3702 return self._parse_wrapped_csv(self._parse_grouping_set) 3703 3704 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3705 if self._match(TokenType.L_PAREN): 3706 grouping_set = self._parse_csv(self._parse_column) 3707 self._match_r_paren() 3708 return self.expression(exp.Tuple, expressions=grouping_set) 3709 3710 return self._parse_column() 3711 3712 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3713 if not skip_having_token and not self._match(TokenType.HAVING): 3714 return None 3715 return self.expression(exp.Having, this=self._parse_conjunction()) 3716 3717 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3718 if not self._match(TokenType.QUALIFY): 3719 return None 3720 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3721 3722 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3723 if skip_start_token: 3724 start = None 3725 elif self._match(TokenType.START_WITH): 3726 start = self._parse_conjunction() 3727 else: 3728 return None 3729 3730 self._match(TokenType.CONNECT_BY) 3731 nocycle = self._match_text_seq("NOCYCLE") 3732 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3733 exp.Prior, this=self._parse_bitwise() 3734 ) 3735 connect = self._parse_conjunction() 3736 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3737 3738 if not start and self._match(TokenType.START_WITH): 3739 start = self._parse_conjunction() 3740 3741 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3742 3743 def _parse_name_as_expression(self) -> exp.Alias: 3744 return self.expression( 3745 exp.Alias, 3746 alias=self._parse_id_var(any_token=True), 3747 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3748 ) 3749 3750 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3751 if self._match_text_seq("INTERPOLATE"): 3752 return self._parse_wrapped_csv(self._parse_name_as_expression) 3753 return None 3754 3755 def _parse_order( 3756 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3757 ) -> t.Optional[exp.Expression]: 3758 siblings = None 3759 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3760 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3761 return this 3762 3763 siblings = True 3764 3765 return self.expression( 3766 exp.Order, 3767 this=this, 3768 expressions=self._parse_csv(self._parse_ordered), 3769 interpolate=self._parse_interpolate(), 3770 siblings=siblings, 3771 ) 3772 3773 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3774 if not self._match(token): 3775 return None 3776 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3777 3778 def _parse_ordered( 3779 self, parse_method: t.Optional[t.Callable] = None 3780 ) -> t.Optional[exp.Ordered]: 3781 this = parse_method() if parse_method else self._parse_conjunction() 3782 if not this: 3783 return None 3784 3785 asc = self._match(TokenType.ASC) 3786 desc = self._match(TokenType.DESC) or (asc and False) 3787 3788 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3789 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3790 3791 nulls_first = is_nulls_first or False 3792 explicitly_null_ordered = is_nulls_first or is_nulls_last 3793 3794 if ( 3795 not explicitly_null_ordered 3796 and ( 3797 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3798 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3799 ) 3800 and self.dialect.NULL_ORDERING != "nulls_are_last" 3801 ): 3802 nulls_first = True 3803 3804 if self._match_text_seq("WITH", "FILL"): 3805 with_fill = self.expression( 3806 exp.WithFill, 3807 **{ # type: ignore 3808 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3809 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3810 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3811 }, 3812 ) 3813 else: 3814 with_fill = None 3815 3816 return self.expression( 3817 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3818 ) 3819 3820 def _parse_limit( 3821 self, 3822 this: t.Optional[exp.Expression] = None, 3823 top: bool = False, 3824 skip_limit_token: bool = False, 3825 ) -> t.Optional[exp.Expression]: 3826 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3827 comments = self._prev_comments 3828 if top: 3829 limit_paren = self._match(TokenType.L_PAREN) 3830 expression = self._parse_term() if limit_paren else self._parse_number() 3831 3832 if limit_paren: 3833 self._match_r_paren() 3834 else: 3835 expression = self._parse_term() 3836 3837 if self._match(TokenType.COMMA): 3838 offset = expression 3839 expression = self._parse_term() 3840 else: 3841 offset = None 3842 3843 limit_exp = self.expression( 3844 exp.Limit, 3845 this=this, 3846 expression=expression, 3847 offset=offset, 3848 comments=comments, 3849 expressions=self._parse_limit_by(), 3850 ) 3851 3852 return limit_exp 3853 3854 if self._match(TokenType.FETCH): 3855 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3856 direction = self._prev.text.upper() if direction else "FIRST" 3857 3858 count = self._parse_field(tokens=self.FETCH_TOKENS) 3859 percent = self._match(TokenType.PERCENT) 3860 3861 self._match_set((TokenType.ROW, TokenType.ROWS)) 3862 3863 only = self._match_text_seq("ONLY") 3864 with_ties = self._match_text_seq("WITH", "TIES") 3865 3866 if only and with_ties: 3867 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3868 3869 return self.expression( 3870 exp.Fetch, 3871 direction=direction, 3872 count=count, 3873 percent=percent, 3874 with_ties=with_ties, 3875 ) 3876 3877 return this 3878 3879 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3880 if not self._match(TokenType.OFFSET): 3881 return this 3882 3883 count = self._parse_term() 3884 self._match_set((TokenType.ROW, TokenType.ROWS)) 3885 3886 return self.expression( 3887 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3888 ) 3889 3890 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3891 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3892 3893 def _parse_locks(self) -> t.List[exp.Lock]: 3894 locks = [] 3895 while True: 3896 if self._match_text_seq("FOR", "UPDATE"): 3897 update = True 3898 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3899 "LOCK", "IN", "SHARE", "MODE" 3900 ): 3901 update = False 3902 else: 3903 break 3904 3905 expressions = None 3906 if self._match_text_seq("OF"): 3907 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3908 3909 wait: t.Optional[bool | exp.Expression] = None 3910 if self._match_text_seq("NOWAIT"): 3911 wait = True 3912 elif self._match_text_seq("WAIT"): 3913 wait = self._parse_primary() 3914 elif self._match_text_seq("SKIP", "LOCKED"): 3915 wait = False 3916 3917 locks.append( 3918 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3919 ) 3920 3921 return locks 3922 3923 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3924 while this and self._match_set(self.SET_OPERATIONS): 3925 token_type = self._prev.token_type 3926 3927 if token_type == TokenType.UNION: 3928 operation = exp.Union 3929 elif token_type == TokenType.EXCEPT: 3930 operation = exp.Except 3931 else: 3932 operation = exp.Intersect 3933 3934 comments = self._prev.comments 3935 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3936 by_name = self._match_text_seq("BY", "NAME") 3937 expression = self._parse_select(nested=True, parse_set_operation=False) 3938 3939 this = self.expression( 3940 operation, 3941 comments=comments, 3942 this=this, 3943 distinct=distinct, 3944 by_name=by_name, 3945 expression=expression, 3946 ) 3947 3948 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3949 expression = this.expression 3950 3951 if expression: 3952 for arg in self.UNION_MODIFIERS: 3953 expr = expression.args.get(arg) 3954 if expr: 3955 this.set(arg, expr.pop()) 3956 3957 return this 3958 3959 def _parse_expression(self) -> t.Optional[exp.Expression]: 3960 return self._parse_alias(self._parse_conjunction()) 3961 3962 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3963 this = self._parse_equality() 3964 3965 if self._match(TokenType.COLON_EQ): 3966 this = self.expression( 3967 exp.PropertyEQ, 3968 this=this, 3969 comments=self._prev_comments, 3970 expression=self._parse_conjunction(), 3971 ) 3972 3973 while self._match_set(self.CONJUNCTION): 3974 this = self.expression( 3975 self.CONJUNCTION[self._prev.token_type], 3976 this=this, 3977 comments=self._prev_comments, 3978 expression=self._parse_equality(), 3979 ) 3980 return this 3981 3982 def _parse_equality(self) -> t.Optional[exp.Expression]: 3983 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3984 3985 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3986 return self._parse_tokens(self._parse_range, self.COMPARISON) 3987 3988 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3989 this = this or self._parse_bitwise() 3990 negate = self._match(TokenType.NOT) 3991 3992 if self._match_set(self.RANGE_PARSERS): 3993 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3994 if not expression: 3995 return this 3996 3997 this = expression 3998 elif self._match(TokenType.ISNULL): 3999 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4000 4001 # Postgres supports ISNULL and NOTNULL for conditions. 4002 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4003 if self._match(TokenType.NOTNULL): 4004 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4005 this = self.expression(exp.Not, this=this) 4006 4007 if negate: 4008 this = self.expression(exp.Not, this=this) 4009 4010 if self._match(TokenType.IS): 4011 this = self._parse_is(this) 4012 4013 return this 4014 4015 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4016 index = self._index - 1 4017 negate = self._match(TokenType.NOT) 4018 4019 if self._match_text_seq("DISTINCT", "FROM"): 4020 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4021 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4022 4023 expression = self._parse_null() or self._parse_boolean() 4024 if not expression: 4025 self._retreat(index) 4026 return None 4027 4028 this = self.expression(exp.Is, this=this, expression=expression) 4029 return self.expression(exp.Not, this=this) if negate else this 4030 4031 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4032 unnest = self._parse_unnest(with_alias=False) 4033 if unnest: 4034 this = self.expression(exp.In, this=this, unnest=unnest) 4035 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4036 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4037 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4038 4039 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4040 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4041 else: 4042 this = self.expression(exp.In, this=this, expressions=expressions) 4043 4044 if matched_l_paren: 4045 self._match_r_paren(this) 4046 elif not self._match(TokenType.R_BRACKET, expression=this): 4047 self.raise_error("Expecting ]") 4048 else: 4049 this = self.expression(exp.In, this=this, field=self._parse_field()) 4050 4051 return this 4052 4053 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4054 low = self._parse_bitwise() 4055 self._match(TokenType.AND) 4056 high = self._parse_bitwise() 4057 return self.expression(exp.Between, this=this, low=low, high=high) 4058 4059 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4060 if not self._match(TokenType.ESCAPE): 4061 return this 4062 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4063 4064 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4065 index = self._index 4066 4067 if not self._match(TokenType.INTERVAL) and match_interval: 4068 return None 4069 4070 if self._match(TokenType.STRING, advance=False): 4071 this = self._parse_primary() 4072 else: 4073 this = self._parse_term() 4074 4075 if not this or ( 4076 isinstance(this, exp.Column) 4077 and not this.table 4078 and not this.this.quoted 4079 and this.name.upper() == "IS" 4080 ): 4081 self._retreat(index) 4082 return None 4083 4084 unit = self._parse_function() or ( 4085 not self._match(TokenType.ALIAS, advance=False) 4086 and self._parse_var(any_token=True, upper=True) 4087 ) 4088 4089 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4090 # each INTERVAL expression into this canonical form so it's easy to transpile 4091 if this and this.is_number: 4092 this = exp.Literal.string(this.name) 4093 elif this and this.is_string: 4094 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4095 if len(parts) == 1: 4096 if unit: 4097 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4098 self._retreat(self._index - 1) 4099 4100 this = exp.Literal.string(parts[0][0]) 4101 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4102 4103 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4104 unit = self.expression( 4105 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4106 ) 4107 4108 interval = self.expression(exp.Interval, this=this, unit=unit) 4109 4110 index = self._index 4111 self._match(TokenType.PLUS) 4112 4113 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4114 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4115 return self.expression( 4116 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4117 ) 4118 4119 self._retreat(index) 4120 return interval 4121 4122 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4123 this = self._parse_term() 4124 4125 while True: 4126 if self._match_set(self.BITWISE): 4127 this = self.expression( 4128 self.BITWISE[self._prev.token_type], 4129 this=this, 4130 expression=self._parse_term(), 4131 ) 4132 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4133 this = self.expression( 4134 exp.DPipe, 4135 this=this, 4136 expression=self._parse_term(), 4137 safe=not self.dialect.STRICT_STRING_CONCAT, 4138 ) 4139 elif self._match(TokenType.DQMARK): 4140 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4141 elif self._match_pair(TokenType.LT, TokenType.LT): 4142 this = self.expression( 4143 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4144 ) 4145 elif self._match_pair(TokenType.GT, TokenType.GT): 4146 this = self.expression( 4147 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4148 ) 4149 else: 4150 break 4151 4152 return this 4153 4154 def _parse_term(self) -> t.Optional[exp.Expression]: 4155 return self._parse_tokens(self._parse_factor, self.TERM) 4156 4157 def _parse_factor(self) -> t.Optional[exp.Expression]: 4158 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4159 this = parse_method() 4160 4161 while self._match_set(self.FACTOR): 4162 this = self.expression( 4163 self.FACTOR[self._prev.token_type], 4164 this=this, 4165 comments=self._prev_comments, 4166 expression=parse_method(), 4167 ) 4168 if isinstance(this, exp.Div): 4169 this.args["typed"] = self.dialect.TYPED_DIVISION 4170 this.args["safe"] = self.dialect.SAFE_DIVISION 4171 4172 return this 4173 4174 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4175 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4176 4177 def _parse_unary(self) -> t.Optional[exp.Expression]: 4178 if self._match_set(self.UNARY_PARSERS): 4179 return self.UNARY_PARSERS[self._prev.token_type](self) 4180 return self._parse_at_time_zone(self._parse_type()) 4181 4182 def _parse_type( 4183 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4184 ) -> t.Optional[exp.Expression]: 4185 interval = parse_interval and self._parse_interval() 4186 if interval: 4187 return interval 4188 4189 index = self._index 4190 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4191 4192 if data_type: 4193 index2 = self._index 4194 this = self._parse_primary() 4195 4196 if isinstance(this, exp.Literal): 4197 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4198 if parser: 4199 return parser(self, this, data_type) 4200 4201 return self.expression(exp.Cast, this=this, to=data_type) 4202 4203 if data_type.expressions: 4204 self._retreat(index2) 4205 return self._parse_column_ops(data_type) 4206 4207 self._retreat(index) 4208 4209 if fallback_to_identifier: 4210 return self._parse_id_var() 4211 4212 this = self._parse_column() 4213 return this and self._parse_column_ops(this) 4214 4215 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4216 this = self._parse_type() 4217 if not this: 4218 return None 4219 4220 if isinstance(this, exp.Column) and not this.table: 4221 this = exp.var(this.name.upper()) 4222 4223 return self.expression( 4224 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4225 ) 4226 4227 def _parse_types( 4228 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4229 ) -> t.Optional[exp.Expression]: 4230 index = self._index 4231 4232 this: t.Optional[exp.Expression] = None 4233 prefix = self._match_text_seq("SYSUDTLIB", ".") 4234 4235 if not self._match_set(self.TYPE_TOKENS): 4236 identifier = allow_identifiers and self._parse_id_var( 4237 any_token=False, tokens=(TokenType.VAR,) 4238 ) 4239 if identifier: 4240 tokens = self.dialect.tokenize(identifier.name) 4241 4242 if len(tokens) != 1: 4243 self.raise_error("Unexpected identifier", self._prev) 4244 4245 if tokens[0].token_type in self.TYPE_TOKENS: 4246 self._prev = tokens[0] 4247 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4248 type_name = identifier.name 4249 4250 while self._match(TokenType.DOT): 4251 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4252 4253 this = exp.DataType.build(type_name, udt=True) 4254 else: 4255 self._retreat(self._index - 1) 4256 return None 4257 else: 4258 return None 4259 4260 type_token = self._prev.token_type 4261 4262 if type_token == TokenType.PSEUDO_TYPE: 4263 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4264 4265 if type_token == TokenType.OBJECT_IDENTIFIER: 4266 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4267 4268 nested = type_token in self.NESTED_TYPE_TOKENS 4269 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4270 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4271 expressions = None 4272 maybe_func = False 4273 4274 if self._match(TokenType.L_PAREN): 4275 if is_struct: 4276 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4277 elif nested: 4278 expressions = self._parse_csv( 4279 lambda: self._parse_types( 4280 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4281 ) 4282 ) 4283 elif type_token in self.ENUM_TYPE_TOKENS: 4284 expressions = self._parse_csv(self._parse_equality) 4285 elif is_aggregate: 4286 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4287 any_token=False, tokens=(TokenType.VAR,) 4288 ) 4289 if not func_or_ident or not self._match(TokenType.COMMA): 4290 return None 4291 expressions = self._parse_csv( 4292 lambda: self._parse_types( 4293 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4294 ) 4295 ) 4296 expressions.insert(0, func_or_ident) 4297 else: 4298 expressions = self._parse_csv(self._parse_type_size) 4299 4300 if not expressions or not self._match(TokenType.R_PAREN): 4301 self._retreat(index) 4302 return None 4303 4304 maybe_func = True 4305 4306 values: t.Optional[t.List[exp.Expression]] = None 4307 4308 if nested and self._match(TokenType.LT): 4309 if is_struct: 4310 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4311 else: 4312 expressions = self._parse_csv( 4313 lambda: self._parse_types( 4314 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4315 ) 4316 ) 4317 4318 if not self._match(TokenType.GT): 4319 self.raise_error("Expecting >") 4320 4321 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4322 values = self._parse_csv(self._parse_conjunction) 4323 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4324 4325 if type_token in self.TIMESTAMPS: 4326 if self._match_text_seq("WITH", "TIME", "ZONE"): 4327 maybe_func = False 4328 tz_type = ( 4329 exp.DataType.Type.TIMETZ 4330 if type_token in self.TIMES 4331 else exp.DataType.Type.TIMESTAMPTZ 4332 ) 4333 this = exp.DataType(this=tz_type, expressions=expressions) 4334 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4335 maybe_func = False 4336 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4337 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4338 maybe_func = False 4339 elif type_token == TokenType.INTERVAL: 4340 unit = self._parse_var(upper=True) 4341 if unit: 4342 if self._match_text_seq("TO"): 4343 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4344 4345 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4346 else: 4347 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4348 4349 if maybe_func and check_func: 4350 index2 = self._index 4351 peek = self._parse_string() 4352 4353 if not peek: 4354 self._retreat(index) 4355 return None 4356 4357 self._retreat(index2) 4358 4359 if not this: 4360 if self._match_text_seq("UNSIGNED"): 4361 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4362 if not unsigned_type_token: 4363 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4364 4365 type_token = unsigned_type_token or type_token 4366 4367 this = exp.DataType( 4368 this=exp.DataType.Type[type_token.value], 4369 expressions=expressions, 4370 nested=nested, 4371 values=values, 4372 prefix=prefix, 4373 ) 4374 elif expressions: 4375 this.set("expressions", expressions) 4376 4377 index = self._index 4378 4379 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4380 matched_array = self._match(TokenType.ARRAY) 4381 4382 while self._curr: 4383 matched_l_bracket = self._match(TokenType.L_BRACKET) 4384 if not matched_l_bracket and not matched_array: 4385 break 4386 4387 matched_array = False 4388 values = self._parse_csv(self._parse_conjunction) or None 4389 if values and not schema: 4390 self._retreat(index) 4391 break 4392 4393 this = exp.DataType( 4394 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4395 ) 4396 self._match(TokenType.R_BRACKET) 4397 4398 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4399 converter = self.TYPE_CONVERTER.get(this.this) 4400 if converter: 4401 this = converter(t.cast(exp.DataType, this)) 4402 4403 return this 4404 4405 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4406 index = self._index 4407 this = ( 4408 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4409 or self._parse_id_var() 4410 ) 4411 self._match(TokenType.COLON) 4412 4413 if ( 4414 type_required 4415 and not isinstance(this, exp.DataType) 4416 and not self._match_set(self.TYPE_TOKENS, advance=False) 4417 ): 4418 self._retreat(index) 4419 return self._parse_types() 4420 4421 return self._parse_column_def(this) 4422 4423 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4424 if not self._match_text_seq("AT", "TIME", "ZONE"): 4425 return this 4426 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4427 4428 def _parse_column(self) -> t.Optional[exp.Expression]: 4429 this = self._parse_column_reference() 4430 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4431 4432 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4433 this = self._parse_field() 4434 if ( 4435 not this 4436 and self._match(TokenType.VALUES, advance=False) 4437 and self.VALUES_FOLLOWED_BY_PAREN 4438 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4439 ): 4440 this = self._parse_id_var() 4441 4442 if isinstance(this, exp.Identifier): 4443 # We bubble up comments from the Identifier to the Column 4444 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4445 4446 return this 4447 4448 def _parse_colon_as_json_extract( 4449 self, this: t.Optional[exp.Expression] 4450 ) -> t.Optional[exp.Expression]: 4451 casts = [] 4452 json_path = [] 4453 4454 while self._match(TokenType.COLON): 4455 start_index = self._index 4456 path = self._parse_column_ops(self._parse_field(any_token=True)) 4457 4458 # The cast :: operator has a lower precedence than the extraction operator :, so 4459 # we rearrange the AST appropriately to avoid casting the JSON path 4460 while isinstance(path, exp.Cast): 4461 casts.append(path.to) 4462 path = path.this 4463 4464 if casts: 4465 dcolon_offset = next( 4466 i 4467 for i, t in enumerate(self._tokens[start_index:]) 4468 if t.token_type == TokenType.DCOLON 4469 ) 4470 end_token = self._tokens[start_index + dcolon_offset - 1] 4471 else: 4472 end_token = self._prev 4473 4474 if path: 4475 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4476 4477 if json_path: 4478 this = self.expression( 4479 exp.JSONExtract, 4480 this=this, 4481 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4482 ) 4483 4484 while casts: 4485 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4486 4487 return this 4488 4489 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4490 this = self._parse_bracket(this) 4491 4492 while self._match_set(self.COLUMN_OPERATORS): 4493 op_token = self._prev.token_type 4494 op = self.COLUMN_OPERATORS.get(op_token) 4495 4496 if op_token == TokenType.DCOLON: 4497 field = self._parse_types() 4498 if not field: 4499 self.raise_error("Expected type") 4500 elif op and self._curr: 4501 field = self._parse_column_reference() 4502 else: 4503 field = self._parse_field(any_token=True, anonymous_func=True) 4504 4505 if isinstance(field, exp.Func) and this: 4506 # bigquery allows function calls like x.y.count(...) 4507 # SAFE.SUBSTR(...) 4508 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4509 this = exp.replace_tree( 4510 this, 4511 lambda n: ( 4512 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4513 if n.table 4514 else n.this 4515 ) 4516 if isinstance(n, exp.Column) 4517 else n, 4518 ) 4519 4520 if op: 4521 this = op(self, this, field) 4522 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4523 this = self.expression( 4524 exp.Column, 4525 this=field, 4526 table=this.this, 4527 db=this.args.get("table"), 4528 catalog=this.args.get("db"), 4529 ) 4530 else: 4531 this = self.expression(exp.Dot, this=this, expression=field) 4532 4533 this = self._parse_bracket(this) 4534 4535 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4536 4537 def _parse_primary(self) -> t.Optional[exp.Expression]: 4538 if self._match_set(self.PRIMARY_PARSERS): 4539 token_type = self._prev.token_type 4540 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4541 4542 if token_type == TokenType.STRING: 4543 expressions = [primary] 4544 while self._match(TokenType.STRING): 4545 expressions.append(exp.Literal.string(self._prev.text)) 4546 4547 if len(expressions) > 1: 4548 return self.expression(exp.Concat, expressions=expressions) 4549 4550 return primary 4551 4552 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4553 return exp.Literal.number(f"0.{self._prev.text}") 4554 4555 if self._match(TokenType.L_PAREN): 4556 comments = self._prev_comments 4557 query = self._parse_select() 4558 4559 if query: 4560 expressions = [query] 4561 else: 4562 expressions = self._parse_expressions() 4563 4564 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4565 4566 if not this and self._match(TokenType.R_PAREN, advance=False): 4567 this = self.expression(exp.Tuple) 4568 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4569 this = self._parse_subquery(this=this, parse_alias=False) 4570 elif isinstance(this, exp.Subquery): 4571 this = self._parse_subquery( 4572 this=self._parse_set_operations(this), parse_alias=False 4573 ) 4574 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4575 this = self.expression(exp.Tuple, expressions=expressions) 4576 else: 4577 this = self.expression(exp.Paren, this=this) 4578 4579 if this: 4580 this.add_comments(comments) 4581 4582 self._match_r_paren(expression=this) 4583 return this 4584 4585 return None 4586 4587 def _parse_field( 4588 self, 4589 any_token: bool = False, 4590 tokens: t.Optional[t.Collection[TokenType]] = None, 4591 anonymous_func: bool = False, 4592 ) -> t.Optional[exp.Expression]: 4593 if anonymous_func: 4594 field = ( 4595 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4596 or self._parse_primary() 4597 ) 4598 else: 4599 field = self._parse_primary() or self._parse_function( 4600 anonymous=anonymous_func, any_token=any_token 4601 ) 4602 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4603 4604 def _parse_function( 4605 self, 4606 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4607 anonymous: bool = False, 4608 optional_parens: bool = True, 4609 any_token: bool = False, 4610 ) -> t.Optional[exp.Expression]: 4611 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4612 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4613 fn_syntax = False 4614 if ( 4615 self._match(TokenType.L_BRACE, advance=False) 4616 and self._next 4617 and self._next.text.upper() == "FN" 4618 ): 4619 self._advance(2) 4620 fn_syntax = True 4621 4622 func = self._parse_function_call( 4623 functions=functions, 4624 anonymous=anonymous, 4625 optional_parens=optional_parens, 4626 any_token=any_token, 4627 ) 4628 4629 if fn_syntax: 4630 self._match(TokenType.R_BRACE) 4631 4632 return func 4633 4634 def _parse_function_call( 4635 self, 4636 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4637 anonymous: bool = False, 4638 optional_parens: bool = True, 4639 any_token: bool = False, 4640 ) -> t.Optional[exp.Expression]: 4641 if not self._curr: 4642 return None 4643 4644 comments = self._curr.comments 4645 token_type = self._curr.token_type 4646 this = self._curr.text 4647 upper = this.upper() 4648 4649 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4650 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4651 self._advance() 4652 return self._parse_window(parser(self)) 4653 4654 if not self._next or self._next.token_type != TokenType.L_PAREN: 4655 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4656 self._advance() 4657 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4658 4659 return None 4660 4661 if any_token: 4662 if token_type in self.RESERVED_TOKENS: 4663 return None 4664 elif token_type not in self.FUNC_TOKENS: 4665 return None 4666 4667 self._advance(2) 4668 4669 parser = self.FUNCTION_PARSERS.get(upper) 4670 if parser and not anonymous: 4671 this = parser(self) 4672 else: 4673 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4674 4675 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4676 this = self.expression(subquery_predicate, this=self._parse_select()) 4677 self._match_r_paren() 4678 return this 4679 4680 if functions is None: 4681 functions = self.FUNCTIONS 4682 4683 function = functions.get(upper) 4684 4685 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4686 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4687 4688 if alias: 4689 args = self._kv_to_prop_eq(args) 4690 4691 if function and not anonymous: 4692 if "dialect" in function.__code__.co_varnames: 4693 func = function(args, dialect=self.dialect) 4694 else: 4695 func = function(args) 4696 4697 func = self.validate_expression(func, args) 4698 if not self.dialect.NORMALIZE_FUNCTIONS: 4699 func.meta["name"] = this 4700 4701 this = func 4702 else: 4703 if token_type == TokenType.IDENTIFIER: 4704 this = exp.Identifier(this=this, quoted=True) 4705 this = self.expression(exp.Anonymous, this=this, expressions=args) 4706 4707 if isinstance(this, exp.Expression): 4708 this.add_comments(comments) 4709 4710 self._match_r_paren(this) 4711 return self._parse_window(this) 4712 4713 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4714 transformed = [] 4715 4716 for e in expressions: 4717 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4718 if isinstance(e, exp.Alias): 4719 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4720 4721 if not isinstance(e, exp.PropertyEQ): 4722 e = self.expression( 4723 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4724 ) 4725 4726 if isinstance(e.this, exp.Column): 4727 e.this.replace(e.this.this) 4728 4729 transformed.append(e) 4730 4731 return transformed 4732 4733 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4734 return self._parse_column_def(self._parse_id_var()) 4735 4736 def _parse_user_defined_function( 4737 self, kind: t.Optional[TokenType] = None 4738 ) -> t.Optional[exp.Expression]: 4739 this = self._parse_id_var() 4740 4741 while self._match(TokenType.DOT): 4742 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4743 4744 if not self._match(TokenType.L_PAREN): 4745 return this 4746 4747 expressions = self._parse_csv(self._parse_function_parameter) 4748 self._match_r_paren() 4749 return self.expression( 4750 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4751 ) 4752 4753 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4754 literal = self._parse_primary() 4755 if literal: 4756 return self.expression(exp.Introducer, this=token.text, expression=literal) 4757 4758 return self.expression(exp.Identifier, this=token.text) 4759 4760 def _parse_session_parameter(self) -> exp.SessionParameter: 4761 kind = None 4762 this = self._parse_id_var() or self._parse_primary() 4763 4764 if this and self._match(TokenType.DOT): 4765 kind = this.name 4766 this = self._parse_var() or self._parse_primary() 4767 4768 return self.expression(exp.SessionParameter, this=this, kind=kind) 4769 4770 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4771 return self._parse_id_var() 4772 4773 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4774 index = self._index 4775 4776 if self._match(TokenType.L_PAREN): 4777 expressions = t.cast( 4778 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4779 ) 4780 4781 if not self._match(TokenType.R_PAREN): 4782 self._retreat(index) 4783 else: 4784 expressions = [self._parse_lambda_arg()] 4785 4786 if self._match_set(self.LAMBDAS): 4787 return self.LAMBDAS[self._prev.token_type](self, expressions) 4788 4789 self._retreat(index) 4790 4791 this: t.Optional[exp.Expression] 4792 4793 if self._match(TokenType.DISTINCT): 4794 this = self.expression( 4795 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4796 ) 4797 else: 4798 this = self._parse_select_or_expression(alias=alias) 4799 4800 return self._parse_limit( 4801 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4802 ) 4803 4804 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4805 index = self._index 4806 if not self._match(TokenType.L_PAREN): 4807 return this 4808 4809 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4810 # expr can be of both types 4811 if self._match_set(self.SELECT_START_TOKENS): 4812 self._retreat(index) 4813 return this 4814 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4815 self._match_r_paren() 4816 return self.expression(exp.Schema, this=this, expressions=args) 4817 4818 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4819 return self._parse_column_def(self._parse_field(any_token=True)) 4820 4821 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4822 # column defs are not really columns, they're identifiers 4823 if isinstance(this, exp.Column): 4824 this = this.this 4825 4826 kind = self._parse_types(schema=True) 4827 4828 if self._match_text_seq("FOR", "ORDINALITY"): 4829 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4830 4831 constraints: t.List[exp.Expression] = [] 4832 4833 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4834 ("ALIAS", "MATERIALIZED") 4835 ): 4836 persisted = self._prev.text.upper() == "MATERIALIZED" 4837 constraints.append( 4838 self.expression( 4839 exp.ComputedColumnConstraint, 4840 this=self._parse_conjunction(), 4841 persisted=persisted or self._match_text_seq("PERSISTED"), 4842 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4843 ) 4844 ) 4845 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4846 self._match(TokenType.ALIAS) 4847 constraints.append( 4848 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4849 ) 4850 4851 while True: 4852 constraint = self._parse_column_constraint() 4853 if not constraint: 4854 break 4855 constraints.append(constraint) 4856 4857 if not kind and not constraints: 4858 return this 4859 4860 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4861 4862 def _parse_auto_increment( 4863 self, 4864 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4865 start = None 4866 increment = None 4867 4868 if self._match(TokenType.L_PAREN, advance=False): 4869 args = self._parse_wrapped_csv(self._parse_bitwise) 4870 start = seq_get(args, 0) 4871 increment = seq_get(args, 1) 4872 elif self._match_text_seq("START"): 4873 start = self._parse_bitwise() 4874 self._match_text_seq("INCREMENT") 4875 increment = self._parse_bitwise() 4876 4877 if start and increment: 4878 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4879 4880 return exp.AutoIncrementColumnConstraint() 4881 4882 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4883 if not self._match_text_seq("REFRESH"): 4884 self._retreat(self._index - 1) 4885 return None 4886 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4887 4888 def _parse_compress(self) -> exp.CompressColumnConstraint: 4889 if self._match(TokenType.L_PAREN, advance=False): 4890 return self.expression( 4891 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4892 ) 4893 4894 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4895 4896 def _parse_generated_as_identity( 4897 self, 4898 ) -> ( 4899 exp.GeneratedAsIdentityColumnConstraint 4900 | exp.ComputedColumnConstraint 4901 | exp.GeneratedAsRowColumnConstraint 4902 ): 4903 if self._match_text_seq("BY", "DEFAULT"): 4904 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4905 this = self.expression( 4906 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4907 ) 4908 else: 4909 self._match_text_seq("ALWAYS") 4910 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4911 4912 self._match(TokenType.ALIAS) 4913 4914 if self._match_text_seq("ROW"): 4915 start = self._match_text_seq("START") 4916 if not start: 4917 self._match(TokenType.END) 4918 hidden = self._match_text_seq("HIDDEN") 4919 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4920 4921 identity = self._match_text_seq("IDENTITY") 4922 4923 if self._match(TokenType.L_PAREN): 4924 if self._match(TokenType.START_WITH): 4925 this.set("start", self._parse_bitwise()) 4926 if self._match_text_seq("INCREMENT", "BY"): 4927 this.set("increment", self._parse_bitwise()) 4928 if self._match_text_seq("MINVALUE"): 4929 this.set("minvalue", self._parse_bitwise()) 4930 if self._match_text_seq("MAXVALUE"): 4931 this.set("maxvalue", self._parse_bitwise()) 4932 4933 if self._match_text_seq("CYCLE"): 4934 this.set("cycle", True) 4935 elif self._match_text_seq("NO", "CYCLE"): 4936 this.set("cycle", False) 4937 4938 if not identity: 4939 this.set("expression", self._parse_range()) 4940 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4941 args = self._parse_csv(self._parse_bitwise) 4942 this.set("start", seq_get(args, 0)) 4943 this.set("increment", seq_get(args, 1)) 4944 4945 self._match_r_paren() 4946 4947 return this 4948 4949 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4950 self._match_text_seq("LENGTH") 4951 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4952 4953 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4954 if self._match_text_seq("NULL"): 4955 return self.expression(exp.NotNullColumnConstraint) 4956 if self._match_text_seq("CASESPECIFIC"): 4957 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4958 if self._match_text_seq("FOR", "REPLICATION"): 4959 return self.expression(exp.NotForReplicationColumnConstraint) 4960 return None 4961 4962 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4963 if self._match(TokenType.CONSTRAINT): 4964 this = self._parse_id_var() 4965 else: 4966 this = None 4967 4968 if self._match_texts(self.CONSTRAINT_PARSERS): 4969 return self.expression( 4970 exp.ColumnConstraint, 4971 this=this, 4972 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4973 ) 4974 4975 return this 4976 4977 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4978 if not self._match(TokenType.CONSTRAINT): 4979 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4980 4981 return self.expression( 4982 exp.Constraint, 4983 this=self._parse_id_var(), 4984 expressions=self._parse_unnamed_constraints(), 4985 ) 4986 4987 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4988 constraints = [] 4989 while True: 4990 constraint = self._parse_unnamed_constraint() or self._parse_function() 4991 if not constraint: 4992 break 4993 constraints.append(constraint) 4994 4995 return constraints 4996 4997 def _parse_unnamed_constraint( 4998 self, constraints: t.Optional[t.Collection[str]] = None 4999 ) -> t.Optional[exp.Expression]: 5000 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5001 constraints or self.CONSTRAINT_PARSERS 5002 ): 5003 return None 5004 5005 constraint = self._prev.text.upper() 5006 if constraint not in self.CONSTRAINT_PARSERS: 5007 self.raise_error(f"No parser found for schema constraint {constraint}.") 5008 5009 return self.CONSTRAINT_PARSERS[constraint](self) 5010 5011 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5012 self._match_text_seq("KEY") 5013 return self.expression( 5014 exp.UniqueColumnConstraint, 5015 this=self._parse_schema(self._parse_id_var(any_token=False)), 5016 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5017 on_conflict=self._parse_on_conflict(), 5018 ) 5019 5020 def _parse_key_constraint_options(self) -> t.List[str]: 5021 options = [] 5022 while True: 5023 if not self._curr: 5024 break 5025 5026 if self._match(TokenType.ON): 5027 action = None 5028 on = self._advance_any() and self._prev.text 5029 5030 if self._match_text_seq("NO", "ACTION"): 5031 action = "NO ACTION" 5032 elif self._match_text_seq("CASCADE"): 5033 action = "CASCADE" 5034 elif self._match_text_seq("RESTRICT"): 5035 action = "RESTRICT" 5036 elif self._match_pair(TokenType.SET, TokenType.NULL): 5037 action = "SET NULL" 5038 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5039 action = "SET DEFAULT" 5040 else: 5041 self.raise_error("Invalid key constraint") 5042 5043 options.append(f"ON {on} {action}") 5044 elif self._match_text_seq("NOT", "ENFORCED"): 5045 options.append("NOT ENFORCED") 5046 elif self._match_text_seq("DEFERRABLE"): 5047 options.append("DEFERRABLE") 5048 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5049 options.append("INITIALLY DEFERRED") 5050 elif self._match_text_seq("NORELY"): 5051 options.append("NORELY") 5052 elif self._match_text_seq("MATCH", "FULL"): 5053 options.append("MATCH FULL") 5054 else: 5055 break 5056 5057 return options 5058 5059 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5060 if match and not self._match(TokenType.REFERENCES): 5061 return None 5062 5063 expressions = None 5064 this = self._parse_table(schema=True) 5065 options = self._parse_key_constraint_options() 5066 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5067 5068 def _parse_foreign_key(self) -> exp.ForeignKey: 5069 expressions = self._parse_wrapped_id_vars() 5070 reference = self._parse_references() 5071 options = {} 5072 5073 while self._match(TokenType.ON): 5074 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5075 self.raise_error("Expected DELETE or UPDATE") 5076 5077 kind = self._prev.text.lower() 5078 5079 if self._match_text_seq("NO", "ACTION"): 5080 action = "NO ACTION" 5081 elif self._match(TokenType.SET): 5082 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5083 action = "SET " + self._prev.text.upper() 5084 else: 5085 self._advance() 5086 action = self._prev.text.upper() 5087 5088 options[kind] = action 5089 5090 return self.expression( 5091 exp.ForeignKey, 5092 expressions=expressions, 5093 reference=reference, 5094 **options, # type: ignore 5095 ) 5096 5097 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5098 return self._parse_field() 5099 5100 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5101 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5102 self._retreat(self._index - 1) 5103 return None 5104 5105 id_vars = self._parse_wrapped_id_vars() 5106 return self.expression( 5107 exp.PeriodForSystemTimeConstraint, 5108 this=seq_get(id_vars, 0), 5109 expression=seq_get(id_vars, 1), 5110 ) 5111 5112 def _parse_primary_key( 5113 self, wrapped_optional: bool = False, in_props: bool = False 5114 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5115 desc = ( 5116 self._match_set((TokenType.ASC, TokenType.DESC)) 5117 and self._prev.token_type == TokenType.DESC 5118 ) 5119 5120 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5121 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5122 5123 expressions = self._parse_wrapped_csv( 5124 self._parse_primary_key_part, optional=wrapped_optional 5125 ) 5126 options = self._parse_key_constraint_options() 5127 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5128 5129 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5130 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5131 5132 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5133 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5134 return this 5135 5136 bracket_kind = self._prev.token_type 5137 expressions = self._parse_csv( 5138 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5139 ) 5140 5141 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5142 self.raise_error("Expected ]") 5143 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5144 self.raise_error("Expected }") 5145 5146 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5147 if bracket_kind == TokenType.L_BRACE: 5148 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5149 elif not this or this.name.upper() == "ARRAY": 5150 this = self.expression(exp.Array, expressions=expressions) 5151 else: 5152 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5153 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5154 5155 self._add_comments(this) 5156 return self._parse_bracket(this) 5157 5158 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5159 if self._match(TokenType.COLON): 5160 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5161 return this 5162 5163 def _parse_case(self) -> t.Optional[exp.Expression]: 5164 ifs = [] 5165 default = None 5166 5167 comments = self._prev_comments 5168 expression = self._parse_conjunction() 5169 5170 while self._match(TokenType.WHEN): 5171 this = self._parse_conjunction() 5172 self._match(TokenType.THEN) 5173 then = self._parse_conjunction() 5174 ifs.append(self.expression(exp.If, this=this, true=then)) 5175 5176 if self._match(TokenType.ELSE): 5177 default = self._parse_conjunction() 5178 5179 if not self._match(TokenType.END): 5180 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5181 default = exp.column("interval") 5182 else: 5183 self.raise_error("Expected END after CASE", self._prev) 5184 5185 return self.expression( 5186 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5187 ) 5188 5189 def _parse_if(self) -> t.Optional[exp.Expression]: 5190 if self._match(TokenType.L_PAREN): 5191 args = self._parse_csv(self._parse_conjunction) 5192 this = self.validate_expression(exp.If.from_arg_list(args), args) 5193 self._match_r_paren() 5194 else: 5195 index = self._index - 1 5196 5197 if self.NO_PAREN_IF_COMMANDS and index == 0: 5198 return self._parse_as_command(self._prev) 5199 5200 condition = self._parse_conjunction() 5201 5202 if not condition: 5203 self._retreat(index) 5204 return None 5205 5206 self._match(TokenType.THEN) 5207 true = self._parse_conjunction() 5208 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5209 self._match(TokenType.END) 5210 this = self.expression(exp.If, this=condition, true=true, false=false) 5211 5212 return this 5213 5214 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5215 if not self._match_text_seq("VALUE", "FOR"): 5216 self._retreat(self._index - 1) 5217 return None 5218 5219 return self.expression( 5220 exp.NextValueFor, 5221 this=self._parse_column(), 5222 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5223 ) 5224 5225 def _parse_extract(self) -> exp.Extract: 5226 this = self._parse_function() or self._parse_var() or self._parse_type() 5227 5228 if self._match(TokenType.FROM): 5229 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5230 5231 if not self._match(TokenType.COMMA): 5232 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5233 5234 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5235 5236 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5237 this = self._parse_conjunction() 5238 5239 if not self._match(TokenType.ALIAS): 5240 if self._match(TokenType.COMMA): 5241 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5242 5243 self.raise_error("Expected AS after CAST") 5244 5245 fmt = None 5246 to = self._parse_types() 5247 5248 if self._match(TokenType.FORMAT): 5249 fmt_string = self._parse_string() 5250 fmt = self._parse_at_time_zone(fmt_string) 5251 5252 if not to: 5253 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5254 if to.this in exp.DataType.TEMPORAL_TYPES: 5255 this = self.expression( 5256 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5257 this=this, 5258 format=exp.Literal.string( 5259 format_time( 5260 fmt_string.this if fmt_string else "", 5261 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5262 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5263 ) 5264 ), 5265 ) 5266 5267 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5268 this.set("zone", fmt.args["zone"]) 5269 return this 5270 elif not to: 5271 self.raise_error("Expected TYPE after CAST") 5272 elif isinstance(to, exp.Identifier): 5273 to = exp.DataType.build(to.name, udt=True) 5274 elif to.this == exp.DataType.Type.CHAR: 5275 if self._match(TokenType.CHARACTER_SET): 5276 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5277 5278 return self.expression( 5279 exp.Cast if strict else exp.TryCast, 5280 this=this, 5281 to=to, 5282 format=fmt, 5283 safe=safe, 5284 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5285 ) 5286 5287 def _parse_string_agg(self) -> exp.Expression: 5288 if self._match(TokenType.DISTINCT): 5289 args: t.List[t.Optional[exp.Expression]] = [ 5290 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5291 ] 5292 if self._match(TokenType.COMMA): 5293 args.extend(self._parse_csv(self._parse_conjunction)) 5294 else: 5295 args = self._parse_csv(self._parse_conjunction) # type: ignore 5296 5297 index = self._index 5298 if not self._match(TokenType.R_PAREN) and args: 5299 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5300 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5301 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5302 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5303 5304 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5305 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5306 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5307 if not self._match_text_seq("WITHIN", "GROUP"): 5308 self._retreat(index) 5309 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5310 5311 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5312 order = self._parse_order(this=seq_get(args, 0)) 5313 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5314 5315 def _parse_convert( 5316 self, strict: bool, safe: t.Optional[bool] = None 5317 ) -> t.Optional[exp.Expression]: 5318 this = self._parse_bitwise() 5319 5320 if self._match(TokenType.USING): 5321 to: t.Optional[exp.Expression] = self.expression( 5322 exp.CharacterSet, this=self._parse_var() 5323 ) 5324 elif self._match(TokenType.COMMA): 5325 to = self._parse_types() 5326 else: 5327 to = None 5328 5329 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5330 5331 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5332 """ 5333 There are generally two variants of the DECODE function: 5334 5335 - DECODE(bin, charset) 5336 - DECODE(expression, search, result [, search, result] ... [, default]) 5337 5338 The second variant will always be parsed into a CASE expression. Note that NULL 5339 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5340 instead of relying on pattern matching. 5341 """ 5342 args = self._parse_csv(self._parse_conjunction) 5343 5344 if len(args) < 3: 5345 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5346 5347 expression, *expressions = args 5348 if not expression: 5349 return None 5350 5351 ifs = [] 5352 for search, result in zip(expressions[::2], expressions[1::2]): 5353 if not search or not result: 5354 return None 5355 5356 if isinstance(search, exp.Literal): 5357 ifs.append( 5358 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5359 ) 5360 elif isinstance(search, exp.Null): 5361 ifs.append( 5362 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5363 ) 5364 else: 5365 cond = exp.or_( 5366 exp.EQ(this=expression.copy(), expression=search), 5367 exp.and_( 5368 exp.Is(this=expression.copy(), expression=exp.Null()), 5369 exp.Is(this=search.copy(), expression=exp.Null()), 5370 copy=False, 5371 ), 5372 copy=False, 5373 ) 5374 ifs.append(exp.If(this=cond, true=result)) 5375 5376 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5377 5378 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5379 self._match_text_seq("KEY") 5380 key = self._parse_column() 5381 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5382 self._match_text_seq("VALUE") 5383 value = self._parse_bitwise() 5384 5385 if not key and not value: 5386 return None 5387 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5388 5389 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5390 if not this or not self._match_text_seq("FORMAT", "JSON"): 5391 return this 5392 5393 return self.expression(exp.FormatJson, this=this) 5394 5395 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5396 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5397 for value in values: 5398 if self._match_text_seq(value, "ON", on): 5399 return f"{value} ON {on}" 5400 5401 return None 5402 5403 @t.overload 5404 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5405 5406 @t.overload 5407 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5408 5409 def _parse_json_object(self, agg=False): 5410 star = self._parse_star() 5411 expressions = ( 5412 [star] 5413 if star 5414 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5415 ) 5416 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5417 5418 unique_keys = None 5419 if self._match_text_seq("WITH", "UNIQUE"): 5420 unique_keys = True 5421 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5422 unique_keys = False 5423 5424 self._match_text_seq("KEYS") 5425 5426 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5427 self._parse_type() 5428 ) 5429 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5430 5431 return self.expression( 5432 exp.JSONObjectAgg if agg else exp.JSONObject, 5433 expressions=expressions, 5434 null_handling=null_handling, 5435 unique_keys=unique_keys, 5436 return_type=return_type, 5437 encoding=encoding, 5438 ) 5439 5440 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5441 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5442 if not self._match_text_seq("NESTED"): 5443 this = self._parse_id_var() 5444 kind = self._parse_types(allow_identifiers=False) 5445 nested = None 5446 else: 5447 this = None 5448 kind = None 5449 nested = True 5450 5451 path = self._match_text_seq("PATH") and self._parse_string() 5452 nested_schema = nested and self._parse_json_schema() 5453 5454 return self.expression( 5455 exp.JSONColumnDef, 5456 this=this, 5457 kind=kind, 5458 path=path, 5459 nested_schema=nested_schema, 5460 ) 5461 5462 def _parse_json_schema(self) -> exp.JSONSchema: 5463 self._match_text_seq("COLUMNS") 5464 return self.expression( 5465 exp.JSONSchema, 5466 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5467 ) 5468 5469 def _parse_json_table(self) -> exp.JSONTable: 5470 this = self._parse_format_json(self._parse_bitwise()) 5471 path = self._match(TokenType.COMMA) and self._parse_string() 5472 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5473 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5474 schema = self._parse_json_schema() 5475 5476 return exp.JSONTable( 5477 this=this, 5478 schema=schema, 5479 path=path, 5480 error_handling=error_handling, 5481 empty_handling=empty_handling, 5482 ) 5483 5484 def _parse_match_against(self) -> exp.MatchAgainst: 5485 expressions = self._parse_csv(self._parse_column) 5486 5487 self._match_text_seq(")", "AGAINST", "(") 5488 5489 this = self._parse_string() 5490 5491 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5492 modifier = "IN NATURAL LANGUAGE MODE" 5493 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5494 modifier = f"{modifier} WITH QUERY EXPANSION" 5495 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5496 modifier = "IN BOOLEAN MODE" 5497 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5498 modifier = "WITH QUERY EXPANSION" 5499 else: 5500 modifier = None 5501 5502 return self.expression( 5503 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5504 ) 5505 5506 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5507 def _parse_open_json(self) -> exp.OpenJSON: 5508 this = self._parse_bitwise() 5509 path = self._match(TokenType.COMMA) and self._parse_string() 5510 5511 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5512 this = self._parse_field(any_token=True) 5513 kind = self._parse_types() 5514 path = self._parse_string() 5515 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5516 5517 return self.expression( 5518 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5519 ) 5520 5521 expressions = None 5522 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5523 self._match_l_paren() 5524 expressions = self._parse_csv(_parse_open_json_column_def) 5525 5526 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5527 5528 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5529 args = self._parse_csv(self._parse_bitwise) 5530 5531 if self._match(TokenType.IN): 5532 return self.expression( 5533 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5534 ) 5535 5536 if haystack_first: 5537 haystack = seq_get(args, 0) 5538 needle = seq_get(args, 1) 5539 else: 5540 needle = seq_get(args, 0) 5541 haystack = seq_get(args, 1) 5542 5543 return self.expression( 5544 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5545 ) 5546 5547 def _parse_predict(self) -> exp.Predict: 5548 self._match_text_seq("MODEL") 5549 this = self._parse_table() 5550 5551 self._match(TokenType.COMMA) 5552 self._match_text_seq("TABLE") 5553 5554 return self.expression( 5555 exp.Predict, 5556 this=this, 5557 expression=self._parse_table(), 5558 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5559 ) 5560 5561 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5562 args = self._parse_csv(self._parse_table) 5563 return exp.JoinHint(this=func_name.upper(), expressions=args) 5564 5565 def _parse_substring(self) -> exp.Substring: 5566 # Postgres supports the form: substring(string [from int] [for int]) 5567 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5568 5569 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5570 5571 if self._match(TokenType.FROM): 5572 args.append(self._parse_bitwise()) 5573 if self._match(TokenType.FOR): 5574 if len(args) == 1: 5575 args.append(exp.Literal.number(1)) 5576 args.append(self._parse_bitwise()) 5577 5578 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5579 5580 def _parse_trim(self) -> exp.Trim: 5581 # https://www.w3resource.com/sql/character-functions/trim.php 5582 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5583 5584 position = None 5585 collation = None 5586 expression = None 5587 5588 if self._match_texts(self.TRIM_TYPES): 5589 position = self._prev.text.upper() 5590 5591 this = self._parse_bitwise() 5592 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5593 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5594 expression = self._parse_bitwise() 5595 5596 if invert_order: 5597 this, expression = expression, this 5598 5599 if self._match(TokenType.COLLATE): 5600 collation = self._parse_bitwise() 5601 5602 return self.expression( 5603 exp.Trim, this=this, position=position, expression=expression, collation=collation 5604 ) 5605 5606 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5607 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5608 5609 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5610 return self._parse_window(self._parse_id_var(), alias=True) 5611 5612 def _parse_respect_or_ignore_nulls( 5613 self, this: t.Optional[exp.Expression] 5614 ) -> t.Optional[exp.Expression]: 5615 if self._match_text_seq("IGNORE", "NULLS"): 5616 return self.expression(exp.IgnoreNulls, this=this) 5617 if self._match_text_seq("RESPECT", "NULLS"): 5618 return self.expression(exp.RespectNulls, this=this) 5619 return this 5620 5621 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5622 if self._match(TokenType.HAVING): 5623 self._match_texts(("MAX", "MIN")) 5624 max = self._prev.text.upper() != "MIN" 5625 return self.expression( 5626 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5627 ) 5628 5629 return this 5630 5631 def _parse_window( 5632 self, this: t.Optional[exp.Expression], alias: bool = False 5633 ) -> t.Optional[exp.Expression]: 5634 func = this 5635 comments = func.comments if isinstance(func, exp.Expression) else None 5636 5637 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5638 self._match(TokenType.WHERE) 5639 this = self.expression( 5640 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5641 ) 5642 self._match_r_paren() 5643 5644 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5645 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5646 if self._match_text_seq("WITHIN", "GROUP"): 5647 order = self._parse_wrapped(self._parse_order) 5648 this = self.expression(exp.WithinGroup, this=this, expression=order) 5649 5650 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5651 # Some dialects choose to implement and some do not. 5652 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5653 5654 # There is some code above in _parse_lambda that handles 5655 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5656 5657 # The below changes handle 5658 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5659 5660 # Oracle allows both formats 5661 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5662 # and Snowflake chose to do the same for familiarity 5663 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5664 if isinstance(this, exp.AggFunc): 5665 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5666 5667 if ignore_respect and ignore_respect is not this: 5668 ignore_respect.replace(ignore_respect.this) 5669 this = self.expression(ignore_respect.__class__, this=this) 5670 5671 this = self._parse_respect_or_ignore_nulls(this) 5672 5673 # bigquery select from window x AS (partition by ...) 5674 if alias: 5675 over = None 5676 self._match(TokenType.ALIAS) 5677 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5678 return this 5679 else: 5680 over = self._prev.text.upper() 5681 5682 if comments and isinstance(func, exp.Expression): 5683 func.pop_comments() 5684 5685 if not self._match(TokenType.L_PAREN): 5686 return self.expression( 5687 exp.Window, 5688 comments=comments, 5689 this=this, 5690 alias=self._parse_id_var(False), 5691 over=over, 5692 ) 5693 5694 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5695 5696 first = self._match(TokenType.FIRST) 5697 if self._match_text_seq("LAST"): 5698 first = False 5699 5700 partition, order = self._parse_partition_and_order() 5701 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5702 5703 if kind: 5704 self._match(TokenType.BETWEEN) 5705 start = self._parse_window_spec() 5706 self._match(TokenType.AND) 5707 end = self._parse_window_spec() 5708 5709 spec = self.expression( 5710 exp.WindowSpec, 5711 kind=kind, 5712 start=start["value"], 5713 start_side=start["side"], 5714 end=end["value"], 5715 end_side=end["side"], 5716 ) 5717 else: 5718 spec = None 5719 5720 self._match_r_paren() 5721 5722 window = self.expression( 5723 exp.Window, 5724 comments=comments, 5725 this=this, 5726 partition_by=partition, 5727 order=order, 5728 spec=spec, 5729 alias=window_alias, 5730 over=over, 5731 first=first, 5732 ) 5733 5734 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5735 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5736 return self._parse_window(window, alias=alias) 5737 5738 return window 5739 5740 def _parse_partition_and_order( 5741 self, 5742 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5743 return self._parse_partition_by(), self._parse_order() 5744 5745 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5746 self._match(TokenType.BETWEEN) 5747 5748 return { 5749 "value": ( 5750 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5751 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5752 or self._parse_bitwise() 5753 ), 5754 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5755 } 5756 5757 def _parse_alias( 5758 self, this: t.Optional[exp.Expression], explicit: bool = False 5759 ) -> t.Optional[exp.Expression]: 5760 any_token = self._match(TokenType.ALIAS) 5761 comments = self._prev_comments or [] 5762 5763 if explicit and not any_token: 5764 return this 5765 5766 if self._match(TokenType.L_PAREN): 5767 aliases = self.expression( 5768 exp.Aliases, 5769 comments=comments, 5770 this=this, 5771 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5772 ) 5773 self._match_r_paren(aliases) 5774 return aliases 5775 5776 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5777 self.STRING_ALIASES and self._parse_string_as_identifier() 5778 ) 5779 5780 if alias: 5781 comments.extend(alias.pop_comments()) 5782 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5783 column = this.this 5784 5785 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5786 if not this.comments and column and column.comments: 5787 this.comments = column.pop_comments() 5788 5789 return this 5790 5791 def _parse_id_var( 5792 self, 5793 any_token: bool = True, 5794 tokens: t.Optional[t.Collection[TokenType]] = None, 5795 ) -> t.Optional[exp.Expression]: 5796 expression = self._parse_identifier() 5797 if not expression and ( 5798 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5799 ): 5800 quoted = self._prev.token_type == TokenType.STRING 5801 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5802 5803 return expression 5804 5805 def _parse_string(self) -> t.Optional[exp.Expression]: 5806 if self._match_set(self.STRING_PARSERS): 5807 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5808 return self._parse_placeholder() 5809 5810 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5811 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5812 5813 def _parse_number(self) -> t.Optional[exp.Expression]: 5814 if self._match_set(self.NUMERIC_PARSERS): 5815 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5816 return self._parse_placeholder() 5817 5818 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5819 if self._match(TokenType.IDENTIFIER): 5820 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5821 return self._parse_placeholder() 5822 5823 def _parse_var( 5824 self, 5825 any_token: bool = False, 5826 tokens: t.Optional[t.Collection[TokenType]] = None, 5827 upper: bool = False, 5828 ) -> t.Optional[exp.Expression]: 5829 if ( 5830 (any_token and self._advance_any()) 5831 or self._match(TokenType.VAR) 5832 or (self._match_set(tokens) if tokens else False) 5833 ): 5834 return self.expression( 5835 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5836 ) 5837 return self._parse_placeholder() 5838 5839 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5840 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5841 self._advance() 5842 return self._prev 5843 return None 5844 5845 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5846 return self._parse_var() or self._parse_string() 5847 5848 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5849 return self._parse_primary() or self._parse_var(any_token=True) 5850 5851 def _parse_null(self) -> t.Optional[exp.Expression]: 5852 if self._match_set(self.NULL_TOKENS): 5853 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5854 return self._parse_placeholder() 5855 5856 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5857 if self._match(TokenType.TRUE): 5858 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5859 if self._match(TokenType.FALSE): 5860 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5861 return self._parse_placeholder() 5862 5863 def _parse_star(self) -> t.Optional[exp.Expression]: 5864 if self._match(TokenType.STAR): 5865 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5866 return self._parse_placeholder() 5867 5868 def _parse_parameter(self) -> exp.Parameter: 5869 this = self._parse_identifier() or self._parse_primary_or_var() 5870 return self.expression(exp.Parameter, this=this) 5871 5872 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5873 if self._match_set(self.PLACEHOLDER_PARSERS): 5874 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5875 if placeholder: 5876 return placeholder 5877 self._advance(-1) 5878 return None 5879 5880 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5881 if not self._match_texts(keywords): 5882 return None 5883 if self._match(TokenType.L_PAREN, advance=False): 5884 return self._parse_wrapped_csv(self._parse_expression) 5885 5886 expression = self._parse_expression() 5887 return [expression] if expression else None 5888 5889 def _parse_csv( 5890 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5891 ) -> t.List[exp.Expression]: 5892 parse_result = parse_method() 5893 items = [parse_result] if parse_result is not None else [] 5894 5895 while self._match(sep): 5896 self._add_comments(parse_result) 5897 parse_result = parse_method() 5898 if parse_result is not None: 5899 items.append(parse_result) 5900 5901 return items 5902 5903 def _parse_tokens( 5904 self, parse_method: t.Callable, expressions: t.Dict 5905 ) -> t.Optional[exp.Expression]: 5906 this = parse_method() 5907 5908 while self._match_set(expressions): 5909 this = self.expression( 5910 expressions[self._prev.token_type], 5911 this=this, 5912 comments=self._prev_comments, 5913 expression=parse_method(), 5914 ) 5915 5916 return this 5917 5918 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5919 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5920 5921 def _parse_wrapped_csv( 5922 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5923 ) -> t.List[exp.Expression]: 5924 return self._parse_wrapped( 5925 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5926 ) 5927 5928 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5929 wrapped = self._match(TokenType.L_PAREN) 5930 if not wrapped and not optional: 5931 self.raise_error("Expecting (") 5932 parse_result = parse_method() 5933 if wrapped: 5934 self._match_r_paren() 5935 return parse_result 5936 5937 def _parse_expressions(self) -> t.List[exp.Expression]: 5938 return self._parse_csv(self._parse_expression) 5939 5940 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5941 return self._parse_select() or self._parse_set_operations( 5942 self._parse_expression() if alias else self._parse_conjunction() 5943 ) 5944 5945 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5946 return self._parse_query_modifiers( 5947 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5948 ) 5949 5950 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5951 this = None 5952 if self._match_texts(self.TRANSACTION_KIND): 5953 this = self._prev.text 5954 5955 self._match_texts(("TRANSACTION", "WORK")) 5956 5957 modes = [] 5958 while True: 5959 mode = [] 5960 while self._match(TokenType.VAR): 5961 mode.append(self._prev.text) 5962 5963 if mode: 5964 modes.append(" ".join(mode)) 5965 if not self._match(TokenType.COMMA): 5966 break 5967 5968 return self.expression(exp.Transaction, this=this, modes=modes) 5969 5970 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5971 chain = None 5972 savepoint = None 5973 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5974 5975 self._match_texts(("TRANSACTION", "WORK")) 5976 5977 if self._match_text_seq("TO"): 5978 self._match_text_seq("SAVEPOINT") 5979 savepoint = self._parse_id_var() 5980 5981 if self._match(TokenType.AND): 5982 chain = not self._match_text_seq("NO") 5983 self._match_text_seq("CHAIN") 5984 5985 if is_rollback: 5986 return self.expression(exp.Rollback, savepoint=savepoint) 5987 5988 return self.expression(exp.Commit, chain=chain) 5989 5990 def _parse_refresh(self) -> exp.Refresh: 5991 self._match(TokenType.TABLE) 5992 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5993 5994 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5995 if not self._match_text_seq("ADD"): 5996 return None 5997 5998 self._match(TokenType.COLUMN) 5999 exists_column = self._parse_exists(not_=True) 6000 expression = self._parse_field_def() 6001 6002 if expression: 6003 expression.set("exists", exists_column) 6004 6005 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6006 if self._match_texts(("FIRST", "AFTER")): 6007 position = self._prev.text 6008 column_position = self.expression( 6009 exp.ColumnPosition, this=self._parse_column(), position=position 6010 ) 6011 expression.set("position", column_position) 6012 6013 return expression 6014 6015 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6016 drop = self._match(TokenType.DROP) and self._parse_drop() 6017 if drop and not isinstance(drop, exp.Command): 6018 drop.set("kind", drop.args.get("kind", "COLUMN")) 6019 return drop 6020 6021 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6022 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6023 return self.expression( 6024 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6025 ) 6026 6027 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6028 index = self._index - 1 6029 6030 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6031 return self._parse_csv( 6032 lambda: self.expression( 6033 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6034 ) 6035 ) 6036 6037 self._retreat(index) 6038 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6039 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6040 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6041 6042 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6043 if self._match_texts(self.ALTER_ALTER_PARSERS): 6044 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6045 6046 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6047 # keyword after ALTER we default to parsing this statement 6048 self._match(TokenType.COLUMN) 6049 column = self._parse_field(any_token=True) 6050 6051 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6052 return self.expression(exp.AlterColumn, this=column, drop=True) 6053 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6054 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 6055 if self._match(TokenType.COMMENT): 6056 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6057 if self._match_text_seq("DROP", "NOT", "NULL"): 6058 return self.expression( 6059 exp.AlterColumn, 6060 this=column, 6061 drop=True, 6062 allow_null=True, 6063 ) 6064 if self._match_text_seq("SET", "NOT", "NULL"): 6065 return self.expression( 6066 exp.AlterColumn, 6067 this=column, 6068 allow_null=False, 6069 ) 6070 self._match_text_seq("SET", "DATA") 6071 self._match_text_seq("TYPE") 6072 return self.expression( 6073 exp.AlterColumn, 6074 this=column, 6075 dtype=self._parse_types(), 6076 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6077 using=self._match(TokenType.USING) and self._parse_conjunction(), 6078 ) 6079 6080 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6081 if self._match_texts(("ALL", "EVEN", "AUTO")): 6082 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6083 6084 self._match_text_seq("KEY", "DISTKEY") 6085 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6086 6087 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6088 if compound: 6089 self._match_text_seq("SORTKEY") 6090 6091 if self._match(TokenType.L_PAREN, advance=False): 6092 return self.expression( 6093 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6094 ) 6095 6096 self._match_texts(("AUTO", "NONE")) 6097 return self.expression( 6098 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6099 ) 6100 6101 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6102 index = self._index - 1 6103 6104 partition_exists = self._parse_exists() 6105 if self._match(TokenType.PARTITION, advance=False): 6106 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6107 6108 self._retreat(index) 6109 return self._parse_csv(self._parse_drop_column) 6110 6111 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6112 if self._match(TokenType.COLUMN): 6113 exists = self._parse_exists() 6114 old_column = self._parse_column() 6115 to = self._match_text_seq("TO") 6116 new_column = self._parse_column() 6117 6118 if old_column is None or to is None or new_column is None: 6119 return None 6120 6121 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6122 6123 self._match_text_seq("TO") 6124 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6125 6126 def _parse_alter_table_set(self) -> exp.AlterSet: 6127 alter_set = self.expression(exp.AlterSet) 6128 6129 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6130 "TABLE", "PROPERTIES" 6131 ): 6132 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6133 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6134 alter_set.set("expressions", [self._parse_conjunction()]) 6135 elif self._match_texts(("LOGGED", "UNLOGGED")): 6136 alter_set.set("option", exp.var(self._prev.text.upper())) 6137 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6138 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6139 elif self._match_text_seq("LOCATION"): 6140 alter_set.set("location", self._parse_field()) 6141 elif self._match_text_seq("ACCESS", "METHOD"): 6142 alter_set.set("access_method", self._parse_field()) 6143 elif self._match_text_seq("TABLESPACE"): 6144 alter_set.set("tablespace", self._parse_field()) 6145 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6146 alter_set.set("file_format", [self._parse_field()]) 6147 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6148 alter_set.set("file_format", self._parse_wrapped_options()) 6149 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6150 alter_set.set("copy_options", self._parse_wrapped_options()) 6151 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6152 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6153 else: 6154 if self._match_text_seq("SERDE"): 6155 alter_set.set("serde", self._parse_field()) 6156 6157 alter_set.set("expressions", [self._parse_properties()]) 6158 6159 return alter_set 6160 6161 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6162 start = self._prev 6163 6164 if not self._match(TokenType.TABLE): 6165 return self._parse_as_command(start) 6166 6167 exists = self._parse_exists() 6168 only = self._match_text_seq("ONLY") 6169 this = self._parse_table(schema=True) 6170 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6171 6172 if self._next: 6173 self._advance() 6174 6175 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6176 if parser: 6177 actions = ensure_list(parser(self)) 6178 options = self._parse_csv(self._parse_property) 6179 6180 if not self._curr and actions: 6181 return self.expression( 6182 exp.AlterTable, 6183 this=this, 6184 exists=exists, 6185 actions=actions, 6186 only=only, 6187 options=options, 6188 cluster=cluster, 6189 ) 6190 6191 return self._parse_as_command(start) 6192 6193 def _parse_merge(self) -> exp.Merge: 6194 self._match(TokenType.INTO) 6195 target = self._parse_table() 6196 6197 if target and self._match(TokenType.ALIAS, advance=False): 6198 target.set("alias", self._parse_table_alias()) 6199 6200 self._match(TokenType.USING) 6201 using = self._parse_table() 6202 6203 self._match(TokenType.ON) 6204 on = self._parse_conjunction() 6205 6206 return self.expression( 6207 exp.Merge, 6208 this=target, 6209 using=using, 6210 on=on, 6211 expressions=self._parse_when_matched(), 6212 ) 6213 6214 def _parse_when_matched(self) -> t.List[exp.When]: 6215 whens = [] 6216 6217 while self._match(TokenType.WHEN): 6218 matched = not self._match(TokenType.NOT) 6219 self._match_text_seq("MATCHED") 6220 source = ( 6221 False 6222 if self._match_text_seq("BY", "TARGET") 6223 else self._match_text_seq("BY", "SOURCE") 6224 ) 6225 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6226 6227 self._match(TokenType.THEN) 6228 6229 if self._match(TokenType.INSERT): 6230 _this = self._parse_star() 6231 if _this: 6232 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6233 else: 6234 then = self.expression( 6235 exp.Insert, 6236 this=self._parse_value(), 6237 expression=self._match_text_seq("VALUES") and self._parse_value(), 6238 ) 6239 elif self._match(TokenType.UPDATE): 6240 expressions = self._parse_star() 6241 if expressions: 6242 then = self.expression(exp.Update, expressions=expressions) 6243 else: 6244 then = self.expression( 6245 exp.Update, 6246 expressions=self._match(TokenType.SET) 6247 and self._parse_csv(self._parse_equality), 6248 ) 6249 elif self._match(TokenType.DELETE): 6250 then = self.expression(exp.Var, this=self._prev.text) 6251 else: 6252 then = None 6253 6254 whens.append( 6255 self.expression( 6256 exp.When, 6257 matched=matched, 6258 source=source, 6259 condition=condition, 6260 then=then, 6261 ) 6262 ) 6263 return whens 6264 6265 def _parse_show(self) -> t.Optional[exp.Expression]: 6266 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6267 if parser: 6268 return parser(self) 6269 return self._parse_as_command(self._prev) 6270 6271 def _parse_set_item_assignment( 6272 self, kind: t.Optional[str] = None 6273 ) -> t.Optional[exp.Expression]: 6274 index = self._index 6275 6276 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6277 return self._parse_set_transaction(global_=kind == "GLOBAL") 6278 6279 left = self._parse_primary() or self._parse_column() 6280 assignment_delimiter = self._match_texts(("=", "TO")) 6281 6282 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6283 self._retreat(index) 6284 return None 6285 6286 right = self._parse_statement() or self._parse_id_var() 6287 if isinstance(right, (exp.Column, exp.Identifier)): 6288 right = exp.var(right.name) 6289 6290 this = self.expression(exp.EQ, this=left, expression=right) 6291 return self.expression(exp.SetItem, this=this, kind=kind) 6292 6293 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6294 self._match_text_seq("TRANSACTION") 6295 characteristics = self._parse_csv( 6296 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6297 ) 6298 return self.expression( 6299 exp.SetItem, 6300 expressions=characteristics, 6301 kind="TRANSACTION", 6302 **{"global": global_}, # type: ignore 6303 ) 6304 6305 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6306 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6307 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6308 6309 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6310 index = self._index 6311 set_ = self.expression( 6312 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6313 ) 6314 6315 if self._curr: 6316 self._retreat(index) 6317 return self._parse_as_command(self._prev) 6318 6319 return set_ 6320 6321 def _parse_var_from_options( 6322 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6323 ) -> t.Optional[exp.Var]: 6324 start = self._curr 6325 if not start: 6326 return None 6327 6328 option = start.text.upper() 6329 continuations = options.get(option) 6330 6331 index = self._index 6332 self._advance() 6333 for keywords in continuations or []: 6334 if isinstance(keywords, str): 6335 keywords = (keywords,) 6336 6337 if self._match_text_seq(*keywords): 6338 option = f"{option} {' '.join(keywords)}" 6339 break 6340 else: 6341 if continuations or continuations is None: 6342 if raise_unmatched: 6343 self.raise_error(f"Unknown option {option}") 6344 6345 self._retreat(index) 6346 return None 6347 6348 return exp.var(option) 6349 6350 def _parse_as_command(self, start: Token) -> exp.Command: 6351 while self._curr: 6352 self._advance() 6353 text = self._find_sql(start, self._prev) 6354 size = len(start.text) 6355 self._warn_unsupported() 6356 return exp.Command(this=text[:size], expression=text[size:]) 6357 6358 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6359 settings = [] 6360 6361 self._match_l_paren() 6362 kind = self._parse_id_var() 6363 6364 if self._match(TokenType.L_PAREN): 6365 while True: 6366 key = self._parse_id_var() 6367 value = self._parse_primary() 6368 6369 if not key and value is None: 6370 break 6371 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6372 self._match(TokenType.R_PAREN) 6373 6374 self._match_r_paren() 6375 6376 return self.expression( 6377 exp.DictProperty, 6378 this=this, 6379 kind=kind.this if kind else None, 6380 settings=settings, 6381 ) 6382 6383 def _parse_dict_range(self, this: str) -> exp.DictRange: 6384 self._match_l_paren() 6385 has_min = self._match_text_seq("MIN") 6386 if has_min: 6387 min = self._parse_var() or self._parse_primary() 6388 self._match_text_seq("MAX") 6389 max = self._parse_var() or self._parse_primary() 6390 else: 6391 max = self._parse_var() or self._parse_primary() 6392 min = exp.Literal.number(0) 6393 self._match_r_paren() 6394 return self.expression(exp.DictRange, this=this, min=min, max=max) 6395 6396 def _parse_comprehension( 6397 self, this: t.Optional[exp.Expression] 6398 ) -> t.Optional[exp.Comprehension]: 6399 index = self._index 6400 expression = self._parse_column() 6401 if not self._match(TokenType.IN): 6402 self._retreat(index - 1) 6403 return None 6404 iterator = self._parse_column() 6405 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6406 return self.expression( 6407 exp.Comprehension, 6408 this=this, 6409 expression=expression, 6410 iterator=iterator, 6411 condition=condition, 6412 ) 6413 6414 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6415 if self._match(TokenType.HEREDOC_STRING): 6416 return self.expression(exp.Heredoc, this=self._prev.text) 6417 6418 if not self._match_text_seq("$"): 6419 return None 6420 6421 tags = ["$"] 6422 tag_text = None 6423 6424 if self._is_connected(): 6425 self._advance() 6426 tags.append(self._prev.text.upper()) 6427 else: 6428 self.raise_error("No closing $ found") 6429 6430 if tags[-1] != "$": 6431 if self._is_connected() and self._match_text_seq("$"): 6432 tag_text = tags[-1] 6433 tags.append("$") 6434 else: 6435 self.raise_error("No closing $ found") 6436 6437 heredoc_start = self._curr 6438 6439 while self._curr: 6440 if self._match_text_seq(*tags, advance=False): 6441 this = self._find_sql(heredoc_start, self._prev) 6442 self._advance(len(tags)) 6443 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6444 6445 self._advance() 6446 6447 self.raise_error(f"No closing {''.join(tags)} found") 6448 return None 6449 6450 def _find_parser( 6451 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6452 ) -> t.Optional[t.Callable]: 6453 if not self._curr: 6454 return None 6455 6456 index = self._index 6457 this = [] 6458 while True: 6459 # The current token might be multiple words 6460 curr = self._curr.text.upper() 6461 key = curr.split(" ") 6462 this.append(curr) 6463 6464 self._advance() 6465 result, trie = in_trie(trie, key) 6466 if result == TrieResult.FAILED: 6467 break 6468 6469 if result == TrieResult.EXISTS: 6470 subparser = parsers[" ".join(this)] 6471 return subparser 6472 6473 self._retreat(index) 6474 return None 6475 6476 def _match(self, token_type, advance=True, expression=None): 6477 if not self._curr: 6478 return None 6479 6480 if self._curr.token_type == token_type: 6481 if advance: 6482 self._advance() 6483 self._add_comments(expression) 6484 return True 6485 6486 return None 6487 6488 def _match_set(self, types, advance=True): 6489 if not self._curr: 6490 return None 6491 6492 if self._curr.token_type in types: 6493 if advance: 6494 self._advance() 6495 return True 6496 6497 return None 6498 6499 def _match_pair(self, token_type_a, token_type_b, advance=True): 6500 if not self._curr or not self._next: 6501 return None 6502 6503 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6504 if advance: 6505 self._advance(2) 6506 return True 6507 6508 return None 6509 6510 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6511 if not self._match(TokenType.L_PAREN, expression=expression): 6512 self.raise_error("Expecting (") 6513 6514 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6515 if not self._match(TokenType.R_PAREN, expression=expression): 6516 self.raise_error("Expecting )") 6517 6518 def _match_texts(self, texts, advance=True): 6519 if self._curr and self._curr.text.upper() in texts: 6520 if advance: 6521 self._advance() 6522 return True 6523 return None 6524 6525 def _match_text_seq(self, *texts, advance=True): 6526 index = self._index 6527 for text in texts: 6528 if self._curr and self._curr.text.upper() == text: 6529 self._advance() 6530 else: 6531 self._retreat(index) 6532 return None 6533 6534 if not advance: 6535 self._retreat(index) 6536 6537 return True 6538 6539 def _replace_lambda( 6540 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6541 ) -> t.Optional[exp.Expression]: 6542 if not node: 6543 return node 6544 6545 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6546 6547 for column in node.find_all(exp.Column): 6548 typ = lambda_types.get(column.parts[0].name) 6549 if typ is not None: 6550 dot_or_id = column.to_dot() if column.table else column.this 6551 6552 if typ: 6553 dot_or_id = self.expression( 6554 exp.Cast, 6555 this=dot_or_id, 6556 to=typ, 6557 ) 6558 6559 parent = column.parent 6560 6561 while isinstance(parent, exp.Dot): 6562 if not isinstance(parent.parent, exp.Dot): 6563 parent.replace(dot_or_id) 6564 break 6565 parent = parent.parent 6566 else: 6567 if column is node: 6568 node = dot_or_id 6569 else: 6570 column.replace(dot_or_id) 6571 return node 6572 6573 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6574 start = self._prev 6575 6576 # Not to be confused with TRUNCATE(number, decimals) function call 6577 if self._match(TokenType.L_PAREN): 6578 self._retreat(self._index - 2) 6579 return self._parse_function() 6580 6581 # Clickhouse supports TRUNCATE DATABASE as well 6582 is_database = self._match(TokenType.DATABASE) 6583 6584 self._match(TokenType.TABLE) 6585 6586 exists = self._parse_exists(not_=False) 6587 6588 expressions = self._parse_csv( 6589 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6590 ) 6591 6592 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6593 6594 if self._match_text_seq("RESTART", "IDENTITY"): 6595 identity = "RESTART" 6596 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6597 identity = "CONTINUE" 6598 else: 6599 identity = None 6600 6601 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6602 option = self._prev.text 6603 else: 6604 option = None 6605 6606 partition = self._parse_partition() 6607 6608 # Fallback case 6609 if self._curr: 6610 return self._parse_as_command(start) 6611 6612 return self.expression( 6613 exp.TruncateTable, 6614 expressions=expressions, 6615 is_database=is_database, 6616 exists=exists, 6617 cluster=cluster, 6618 identity=identity, 6619 option=option, 6620 partition=partition, 6621 ) 6622 6623 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6624 this = self._parse_ordered(self._parse_opclass) 6625 6626 if not self._match(TokenType.WITH): 6627 return this 6628 6629 op = self._parse_var(any_token=True) 6630 6631 return self.expression(exp.WithOperator, this=this, op=op) 6632 6633 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6634 opts = [] 6635 self._match(TokenType.EQ) 6636 self._match(TokenType.L_PAREN) 6637 while self._curr and not self._match(TokenType.R_PAREN): 6638 opts.append(self._parse_conjunction()) 6639 self._match(TokenType.COMMA) 6640 return opts 6641 6642 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6643 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6644 6645 options = [] 6646 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6647 option = self._parse_unquoted_field() 6648 value = None 6649 6650 # Some options are defined as functions with the values as params 6651 if not isinstance(option, exp.Func): 6652 prev = self._prev.text.upper() 6653 # Different dialects might separate options and values by white space, "=" and "AS" 6654 self._match(TokenType.EQ) 6655 self._match(TokenType.ALIAS) 6656 6657 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6658 # Snowflake FILE_FORMAT case 6659 value = self._parse_wrapped_options() 6660 else: 6661 value = self._parse_unquoted_field() 6662 6663 param = self.expression(exp.CopyParameter, this=option, expression=value) 6664 options.append(param) 6665 6666 if sep: 6667 self._match(sep) 6668 6669 return options 6670 6671 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6672 expr = self.expression(exp.Credentials) 6673 6674 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6675 expr.set("storage", self._parse_conjunction()) 6676 if self._match_text_seq("CREDENTIALS"): 6677 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6678 creds = ( 6679 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6680 ) 6681 expr.set("credentials", creds) 6682 if self._match_text_seq("ENCRYPTION"): 6683 expr.set("encryption", self._parse_wrapped_options()) 6684 if self._match_text_seq("IAM_ROLE"): 6685 expr.set("iam_role", self._parse_field()) 6686 if self._match_text_seq("REGION"): 6687 expr.set("region", self._parse_field()) 6688 6689 return expr 6690 6691 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6692 return self._parse_field() 6693 6694 def _parse_copy(self) -> exp.Copy | exp.Command: 6695 start = self._prev 6696 6697 self._match(TokenType.INTO) 6698 6699 this = ( 6700 self._parse_conjunction() 6701 if self._match(TokenType.L_PAREN, advance=False) 6702 else self._parse_table(schema=True) 6703 ) 6704 6705 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6706 6707 files = self._parse_csv(self._parse_file_location) 6708 credentials = self._parse_credentials() 6709 6710 self._match_text_seq("WITH") 6711 6712 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6713 6714 # Fallback case 6715 if self._curr: 6716 return self._parse_as_command(start) 6717 6718 return self.expression( 6719 exp.Copy, 6720 this=this, 6721 kind=kind, 6722 credentials=credentials, 6723 files=files, 6724 params=params, 6725 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1208 def __init__( 1209 self, 1210 error_level: t.Optional[ErrorLevel] = None, 1211 error_message_context: int = 100, 1212 max_errors: int = 3, 1213 dialect: DialectType = None, 1214 ): 1215 from sqlglot.dialects import Dialect 1216 1217 self.error_level = error_level or ErrorLevel.IMMEDIATE 1218 self.error_message_context = error_message_context 1219 self.max_errors = max_errors 1220 self.dialect = Dialect.get_or_raise(dialect) 1221 self.reset()
1233 def parse( 1234 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1235 ) -> t.List[t.Optional[exp.Expression]]: 1236 """ 1237 Parses a list of tokens and returns a list of syntax trees, one tree 1238 per parsed SQL statement. 1239 1240 Args: 1241 raw_tokens: The list of tokens. 1242 sql: The original SQL string, used to produce helpful debug messages. 1243 1244 Returns: 1245 The list of the produced syntax trees. 1246 """ 1247 return self._parse( 1248 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1249 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1251 def parse_into( 1252 self, 1253 expression_types: exp.IntoType, 1254 raw_tokens: t.List[Token], 1255 sql: t.Optional[str] = None, 1256 ) -> t.List[t.Optional[exp.Expression]]: 1257 """ 1258 Parses a list of tokens into a given Expression type. If a collection of Expression 1259 types is given instead, this method will try to parse the token list into each one 1260 of them, stopping at the first for which the parsing succeeds. 1261 1262 Args: 1263 expression_types: The expression type(s) to try and parse the token list into. 1264 raw_tokens: The list of tokens. 1265 sql: The original SQL string, used to produce helpful debug messages. 1266 1267 Returns: 1268 The target Expression. 1269 """ 1270 errors = [] 1271 for expression_type in ensure_list(expression_types): 1272 parser = self.EXPRESSION_PARSERS.get(expression_type) 1273 if not parser: 1274 raise TypeError(f"No parser registered for {expression_type}") 1275 1276 try: 1277 return self._parse(parser, raw_tokens, sql) 1278 except ParseError as e: 1279 e.errors[0]["into_expression"] = expression_type 1280 errors.append(e) 1281 1282 raise ParseError( 1283 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1284 errors=merge_errors(errors), 1285 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1325 def check_errors(self) -> None: 1326 """Logs or raises any found errors, depending on the chosen error level setting.""" 1327 if self.error_level == ErrorLevel.WARN: 1328 for error in self.errors: 1329 logger.error(str(error)) 1330 elif self.error_level == ErrorLevel.RAISE and self.errors: 1331 raise ParseError( 1332 concat_messages(self.errors, self.max_errors), 1333 errors=merge_errors(self.errors), 1334 )
Logs or raises any found errors, depending on the chosen error level setting.
1336 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1337 """ 1338 Appends an error in the list of recorded errors or raises it, depending on the chosen 1339 error level setting. 1340 """ 1341 token = token or self._curr or self._prev or Token.string("") 1342 start = token.start 1343 end = token.end + 1 1344 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1345 highlight = self.sql[start:end] 1346 end_context = self.sql[end : end + self.error_message_context] 1347 1348 error = ParseError.new( 1349 f"{message}. Line {token.line}, Col: {token.col}.\n" 1350 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1351 description=message, 1352 line=token.line, 1353 col=token.col, 1354 start_context=start_context, 1355 highlight=highlight, 1356 end_context=end_context, 1357 ) 1358 1359 if self.error_level == ErrorLevel.IMMEDIATE: 1360 raise error 1361 1362 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1364 def expression( 1365 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1366 ) -> E: 1367 """ 1368 Creates a new, validated Expression. 1369 1370 Args: 1371 exp_class: The expression class to instantiate. 1372 comments: An optional list of comments to attach to the expression. 1373 kwargs: The arguments to set for the expression along with their respective values. 1374 1375 Returns: 1376 The target expression. 1377 """ 1378 instance = exp_class(**kwargs) 1379 instance.add_comments(comments) if comments else self._add_comments(instance) 1380 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1387 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1388 """ 1389 Validates an Expression, making sure that all its mandatory arguments are set. 1390 1391 Args: 1392 expression: The expression to validate. 1393 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1394 1395 Returns: 1396 The validated expression. 1397 """ 1398 if self.error_level != ErrorLevel.IGNORE: 1399 for error_message in expression.error_messages(args): 1400 self.raise_error(error_message) 1401 1402 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.