sqlframe 1.1.3__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.1.3'
16
- __version_tuple__ = version_tuple = (1, 1, 3)
15
+ __version__ = version = '1.2.0'
16
+ __version_tuple__ = version_tuple = (1, 2, 0)
sqlframe/base/catalog.py CHANGED
@@ -26,6 +26,9 @@ else:
26
26
  class _BaseCatalog(t.Generic[SESSION, DF]):
27
27
  """User-facing catalog API, accessible through `SparkSession.catalog`."""
28
28
 
29
+ TEMP_CATALOG_FILTER: t.Optional[exp.Expression] = None
30
+ TEMP_SCHEMA_FILTER: t.Optional[exp.Expression] = None
31
+
29
32
  def __init__(self, sparkSession: SESSION, schema: t.Optional[MappingSchema] = None) -> None:
30
33
  """Create a new Catalog that wraps the underlying JVM object."""
31
34
  self.session = sparkSession
@@ -569,7 +572,9 @@ class _BaseCatalog(t.Generic[SESSION, DF]):
569
572
  """
570
573
  raise NotImplementedError
571
574
 
572
- def listColumns(self, tableName: str, dbName: t.Optional[str] = None) -> t.List[Column]:
575
+ def listColumns(
576
+ self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
577
+ ) -> t.List[Column]:
573
578
  """Returns a t.List of columns for the given table/view in the specified database.
574
579
 
575
580
  .. versionadded:: 2.0.0
@@ -315,7 +315,9 @@ class ListTablesFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF]
315
315
 
316
316
  class ListColumnsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF]):
317
317
  @normalize(["tableName", "dbName"])
318
- def listColumns(self, tableName: str, dbName: t.Optional[str] = None) -> t.List[Column]:
318
+ def listColumns(
319
+ self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
320
+ ) -> t.List[Column]:
319
321
  """Returns a t.List of columns for the given table/view in the specified database.
320
322
 
321
323
  .. versionadded:: 2.0.0
@@ -385,12 +387,6 @@ class ListColumnsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF
385
387
  "catalog",
386
388
  exp.parse_identifier(self.currentCatalog(), dialect=self.session.input_dialect),
387
389
  )
388
- # if self.QUALIFY_INFO_SCHEMA_WITH_DATABASE:
389
- # if not table.db:
390
- # raise ValueError("dbName must be specified when listing columns from INFORMATION_SCHEMA")
391
- # source_table = f"{table.db}.INFORMATION_SCHEMA.COLUMNS"
392
- # else:
393
- # source_table = "INFORMATION_SCHEMA.COLUMNS"
394
390
  source_table = self._get_info_schema_table("columns", database=table.db)
395
391
  select = (
396
392
  exp.select(
@@ -402,9 +398,15 @@ class ListColumnsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF
402
398
  .where(exp.column("table_name").eq(table.name))
403
399
  )
404
400
  if table.db:
405
- select = select.where(exp.column("table_schema").eq(table.db))
401
+ schema_filter: exp.Expression = exp.column("table_schema").eq(table.db)
402
+ if include_temp and self.TEMP_SCHEMA_FILTER:
403
+ schema_filter = exp.Or(this=schema_filter, expression=self.TEMP_SCHEMA_FILTER)
404
+ select = select.where(schema_filter)
406
405
  if table.catalog:
407
- select = select.where(exp.column("table_catalog").eq(table.catalog))
406
+ catalog_filter: exp.Expression = exp.column("table_catalog").eq(table.catalog)
407
+ if include_temp and self.TEMP_CATALOG_FILTER:
408
+ catalog_filter = exp.Or(this=catalog_filter, expression=self.TEMP_CATALOG_FILTER)
409
+ select = select.where(catalog_filter)
408
410
  results = self.session._fetch_rows(select)
409
411
  return [
410
412
  Column(
@@ -0,0 +1,63 @@
1
+ import typing as t
2
+
3
+ from sqlglot import exp
4
+
5
+ from sqlframe.base.catalog import Column
6
+ from sqlframe.base.dataframe import (
7
+ GROUP_DATA,
8
+ NA,
9
+ SESSION,
10
+ STAT,
11
+ WRITER,
12
+ _BaseDataFrame,
13
+ )
14
+
15
+
16
+ class PrintSchemaFromTempObjectsMixin(
17
+ _BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]
18
+ ):
19
+ def _get_columns_from_temp_object(self) -> t.List[Column]:
20
+ table = exp.to_table(self.session._random_id)
21
+ self.session._execute(
22
+ exp.Create(
23
+ this=table,
24
+ kind="VIEW",
25
+ replace=True,
26
+ properties=exp.Properties(expressions=[exp.TemporaryProperty()]),
27
+ expression=self.expression,
28
+ )
29
+ )
30
+ return self.session.catalog.listColumns(
31
+ table.sql(dialect=self.session.input_dialect), include_temp=True
32
+ )
33
+
34
+ def printSchema(self, level: t.Optional[int] = None) -> None:
35
+ def print_schema(
36
+ column_name: str, column_type: exp.DataType, nullable: bool, current_level: int
37
+ ):
38
+ if level and current_level >= level:
39
+ return
40
+ if current_level > 0:
41
+ print(" | " * current_level, end="")
42
+ print(
43
+ f" |-- {column_name}: {column_type.sql(self.session.output_dialect).lower()} (nullable = {str(nullable).lower()})"
44
+ )
45
+ if column_type.this == exp.DataType.Type.STRUCT:
46
+ for column_def in column_type.expressions:
47
+ print_schema(column_def.name, column_def.args["kind"], True, current_level + 1)
48
+ if column_type.this == exp.DataType.Type.ARRAY:
49
+ for data_type in column_type.expressions:
50
+ print_schema("element", data_type, True, current_level + 1)
51
+ if column_type.this == exp.DataType.Type.MAP:
52
+ print_schema("key", column_type.expressions[0], True, current_level + 1)
53
+ print_schema("value", column_type.expressions[1], True, current_level + 1)
54
+
55
+ columns = self._get_columns_from_temp_object()
56
+ print("root")
57
+ for column in columns:
58
+ print_schema(
59
+ column.name,
60
+ exp.DataType.build(column.dataType, dialect=self.session.output_dialect),
61
+ column.nullable,
62
+ 0,
63
+ )
@@ -46,7 +46,9 @@ class BigQueryCatalog(
46
46
  return to_schema(self.session.default_dataset).db
47
47
 
48
48
  @normalize(["tableName", "dbName"])
49
- def listColumns(self, tableName: str, dbName: t.Optional[str] = None) -> t.List[Column]:
49
+ def listColumns(
50
+ self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
51
+ ) -> t.List[Column]:
50
52
  """Returns a t.List of columns for the given table/view in the specified database.
51
53
 
52
54
  .. versionadded:: 2.0.0
@@ -36,6 +36,8 @@ class DuckDBCatalog(
36
36
  ListColumnsFromInfoSchemaMixin["DuckDBSession", "DuckDBDataFrame"],
37
37
  _BaseCatalog["DuckDBSession", "DuckDBDataFrame"],
38
38
  ):
39
+ TEMP_CATALOG_FILTER = exp.column("table_catalog").eq("temp")
40
+
39
41
  def listFunctions(
40
42
  self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
41
43
  ) -> t.List[Function]:
@@ -9,6 +9,7 @@ from sqlframe.base.dataframe import (
9
9
  _BaseDataFrameNaFunctions,
10
10
  _BaseDataFrameStatFunctions,
11
11
  )
12
+ from sqlframe.base.mixins.dataframe_mixins import PrintSchemaFromTempObjectsMixin
12
13
  from sqlframe.duckdb.group import DuckDBGroupedData
13
14
 
14
15
  if sys.version_info >= (3, 11):
@@ -34,13 +35,14 @@ class DuckDBDataFrameStatFunctions(_BaseDataFrameStatFunctions["DuckDBDataFrame"
34
35
 
35
36
 
36
37
  class DuckDBDataFrame(
38
+ PrintSchemaFromTempObjectsMixin,
37
39
  _BaseDataFrame[
38
40
  "DuckDBSession",
39
41
  "DuckDBDataFrameWriter",
40
42
  "DuckDBDataFrameNaFunctions",
41
43
  "DuckDBDataFrameStatFunctions",
42
44
  "DuckDBGroupedData",
43
- ]
45
+ ],
44
46
  ):
45
47
  _na = DuckDBDataFrameNaFunctions
46
48
  _stat = DuckDBDataFrameStatFunctions
@@ -34,6 +34,7 @@ class PostgresCatalog(
34
34
  _BaseCatalog["PostgresSession", "PostgresDataFrame"],
35
35
  ):
36
36
  CURRENT_CATALOG_EXPRESSION: exp.Expression = exp.column("current_catalog")
37
+ TEMP_SCHEMA_FILTER = exp.column("table_schema").like("pg_temp_%")
37
38
 
38
39
  def listFunctions(
39
40
  self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
@@ -9,6 +9,7 @@ from sqlframe.base.dataframe import (
9
9
  _BaseDataFrameNaFunctions,
10
10
  _BaseDataFrameStatFunctions,
11
11
  )
12
+ from sqlframe.base.mixins.dataframe_mixins import PrintSchemaFromTempObjectsMixin
12
13
  from sqlframe.postgres.group import PostgresGroupedData
13
14
 
14
15
  if sys.version_info >= (3, 11):
@@ -33,13 +34,14 @@ class PostgresDataFrameStatFunctions(_BaseDataFrameStatFunctions["PostgresDataFr
33
34
 
34
35
 
35
36
  class PostgresDataFrame(
37
+ PrintSchemaFromTempObjectsMixin,
36
38
  _BaseDataFrame[
37
39
  "PostgresSession",
38
40
  "PostgresDataFrameWriter",
39
41
  "PostgresDataFrameNaFunctions",
40
42
  "PostgresDataFrameStatFunctions",
41
43
  "PostgresGroupedData",
42
- ]
44
+ ],
43
45
  ):
44
46
  _na = PostgresDataFrameNaFunctions
45
47
  _stat = PostgresDataFrameStatFunctions
sqlframe/spark/catalog.py CHANGED
@@ -468,7 +468,9 @@ class SparkCatalog(
468
468
  )
469
469
  return [Table(*x) for x in self._spark_catalog.listTables(dbName, pattern)]
470
470
 
471
- def listColumns(self, tableName: str, dbName: t.Optional[str] = None) -> t.List[Column]:
471
+ def listColumns(
472
+ self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
473
+ ) -> t.List[Column]:
472
474
  """Returns a t.List of columns for the given table/view in the specified database.
473
475
 
474
476
  .. versionadded:: 2.0.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 1.1.3
3
+ Version: 1.2.0
4
4
  Summary: Taking the Spark out of PySpark by converting to SQL
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -1,8 +1,8 @@
1
1
  sqlframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- sqlframe/_version.py,sha256=HwUMBHweBEaQ6rszqbo52qsXT3x6N8a86Dx1PmA5rxM,411
2
+ sqlframe/_version.py,sha256=zMnMemknXglcJs59xkicNzeEJTVgYd1omSfLWj76yWw,411
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=DuTay8-o9W-pw3RPZCgLunKNJLS9PkaV11G_pxXp9NY,1256
5
- sqlframe/base/catalog.py,sha256=jbEuY1wje4oPRuFSMgKdX-yTohBL99S57QuHhUXCybI,37085
5
+ sqlframe/base/catalog.py,sha256=ATDGirouUjal05P4ymL-wIi8rgjg_8w4PoACamiO64A,37245
6
6
  sqlframe/base/column.py,sha256=1xFwPhBlzdO6ZL9tTpPESL7B3XQ3rFMPIoBekFm0TqM,15109
7
7
  sqlframe/base/dataframe.py,sha256=cFaJupLZflakMfwcR_KsrxtinV8ZckRZGRjWzx1vBs8,59136
8
8
  sqlframe/base/decorators.py,sha256=fnqT1Hqa0J_gUurDcVY1Dcscj6SXFxFJ5PKAw-xe5sU,2097
@@ -19,10 +19,11 @@ sqlframe/base/types.py,sha256=aJT5YXr-M_LAfUM0uK4asfbrQFab_xmsp1CP2zkG8p0,11924
19
19
  sqlframe/base/util.py,sha256=SeUC2pcSBGnsS1W5PL1p-IGC6bJG8_2a7En2hxSTmpA,7597
20
20
  sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
21
21
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- sqlframe/base/mixins/catalog_mixins.py,sha256=ZNzNn-cWB0RwT7L1KZCWYke2JlP-cZze0MDNOzSfHew,14093
22
+ sqlframe/base/mixins/catalog_mixins.py,sha256=arO9psYkFULnRi2-3XnnWJBYf_nwbU52fa0nEaRU4aE,14225
23
+ sqlframe/base/mixins/dataframe_mixins.py,sha256=U2tKIY5pCLnoPy1boAQ1YWLgK1E-ZT4x47oRWtGoYLQ,2360
23
24
  sqlframe/base/mixins/readwriter_mixins.py,sha256=N2nsXOG3A2j6O3N195U-_fYOZMkqfifGcfduxODUcxs,4656
24
25
  sqlframe/bigquery/__init__.py,sha256=i2NsMbiXOj2xphCtPuNk6cVw4iYeq5_B1I9dVI9aGAk,712
25
- sqlframe/bigquery/catalog.py,sha256=HdRXZfZczoyLHEQ0y30nfCFKBvTTOJ1s6t0mafN_bGk,9277
26
+ sqlframe/bigquery/catalog.py,sha256=h3aQAQAJg6MMvFpP8Ku0S4pcx30n5qYrqHhWSomxb6A,9319
26
27
  sqlframe/bigquery/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
27
28
  sqlframe/bigquery/dataframe.py,sha256=fPQ6043aSS_ds30WsvrYOgNZJPH0jq7BeNHGLQ2MEW4,1372
28
29
  sqlframe/bigquery/functions.py,sha256=RF8yG_4MS3at_60V0NNTE5ADERJZa7kZGYFWI4ST3jM,11149
@@ -32,9 +33,9 @@ sqlframe/bigquery/session.py,sha256=1-hE1Wr2b6SqfD4M_-OGMqjaSbhD6wSQd74v71xHZv8,
32
33
  sqlframe/bigquery/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
33
34
  sqlframe/bigquery/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
34
35
  sqlframe/duckdb/__init__.py,sha256=t85TA3ufZtL1weQNFmEs8itCSwbJFtw03-p0GT4XGf8,669
35
- sqlframe/duckdb/catalog.py,sha256=SR1JWPGKjNJ2Dq2au-4rZAadPYr8Zn4WsK5EYKRyFm4,3829
36
+ sqlframe/duckdb/catalog.py,sha256=rt3XuP3m4DbhuibOFyvx_95F2zZa6uDwCI_TmcvKy1A,3895
36
37
  sqlframe/duckdb/column.py,sha256=wkEPcp3xVsH5nC3kpacXqNkRv9htPtBgt-0uFRxIRNs,56
37
- sqlframe/duckdb/dataframe.py,sha256=9T6GV4JScaApFSA4T7fixot78HMUgkjGxU7TgjolOOM,1410
38
+ sqlframe/duckdb/dataframe.py,sha256=RPdXSOv_wCB0R5E5fzRMYOGFHilb4egqRk3UgiT6WEU,1530
38
39
  sqlframe/duckdb/functions.py,sha256=srvzbk_Wg-wQPFGYp624dRDyYJghi47M8E-Tu7pBdY0,1507
39
40
  sqlframe/duckdb/group.py,sha256=IkhbW42Ng1U5YT3FkIdiB4zBqRkW4QyTb-1detY1e_4,383
40
41
  sqlframe/duckdb/readwriter.py,sha256=6xiyE3JKzY9ieKqvbAOBlifiHE6NpYISHul3Idlmoa0,4542
@@ -42,9 +43,9 @@ sqlframe/duckdb/session.py,sha256=TCAVsSqBGGj1Otb2iIkSkWqjbzzg1MeDAafGN928-O8,18
42
43
  sqlframe/duckdb/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
43
44
  sqlframe/duckdb/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
44
45
  sqlframe/postgres/__init__.py,sha256=Sz_MtgV_oh_QhfZTC7iKM07ICUmNcJEDV0kEkSW9ZKU,712
45
- sqlframe/postgres/catalog.py,sha256=4f4Ytacfn0Q3xnT0MWUeEYPq4SwNPdS1EmRc2fBK9yc,3652
46
+ sqlframe/postgres/catalog.py,sha256=L5heEav8PTtKJDofJTf-51_cCLpZud5lDvZC-RFZIaw,3722
46
47
  sqlframe/postgres/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
47
- sqlframe/postgres/dataframe.py,sha256=bv_y9D9w03x-sfLdippb8n4goFQGazg1j0gZEPHe98k,1372
48
+ sqlframe/postgres/dataframe.py,sha256=feGvQo7GD-YGmWWGc5h94CMVZm0gcgUQsdlAktXS4Ac,1492
48
49
  sqlframe/postgres/functions.py,sha256=UNL7dE6LmzekvolwqWB-aFt8ITamxeSfuG50_NP_G8c,2133
49
50
  sqlframe/postgres/group.py,sha256=KUXeSFKWTSH9yCRJAhW85OvjZaG6Zr4In9LR_ie3yGU,391
50
51
  sqlframe/postgres/readwriter.py,sha256=L1e3yKXzFVNR_W5s1DHaWol7G8x7l4jcZ5sLGualyMk,870
@@ -72,7 +73,7 @@ sqlframe/snowflake/session.py,sha256=oJK_3t43TeUiAj7KBfn2lD5d6AVHWsI39xLu-j_h5QM
72
73
  sqlframe/snowflake/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
73
74
  sqlframe/snowflake/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
74
75
  sqlframe/spark/__init__.py,sha256=jamKYQtQaKjjXnQ01QGPHvatbrZSw9sWno_VOUGSz6I,712
75
- sqlframe/spark/catalog.py,sha256=nqiZf14m2-PPbZALLlSgvLnpLqSskNnAiZz_ccI-nPs,32484
76
+ sqlframe/spark/catalog.py,sha256=YeWBCUlkkhf2jDcmaFo-JvG4DQ6Daqyy1zEnVBx5gMo,32526
76
77
  sqlframe/spark/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
77
78
  sqlframe/spark/dataframe.py,sha256=V3z5Bx9snLgYh4bDwJfJb5mj1P7UsZF8DMlLwZXopBg,1309
78
79
  sqlframe/spark/functions.py,sha256=eSGMM2DXcj17nIPH5ZDLG95ZMuE7F8Qvn0IqGO_wQVw,586
@@ -91,8 +92,8 @@ sqlframe/standalone/readwriter.py,sha256=EZNyDJ4ID6sGNog3uP4-e9RvchX4biJJDNtc5hk
91
92
  sqlframe/standalone/session.py,sha256=wQmdu2sv6KMTAv0LRFk7TY7yzlh3xvmsyqilEtRecbY,1191
92
93
  sqlframe/standalone/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
93
94
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
94
- sqlframe-1.1.3.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
95
- sqlframe-1.1.3.dist-info/METADATA,sha256=-RpQ4lc5QSV9FvW0fau0TeW9in4ahPR2urnOGukAUXk,6873
96
- sqlframe-1.1.3.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
97
- sqlframe-1.1.3.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
98
- sqlframe-1.1.3.dist-info/RECORD,,
95
+ sqlframe-1.2.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
96
+ sqlframe-1.2.0.dist-info/METADATA,sha256=KguQN7Rfbq_cqfdxvu28gdyMfisTRZ6-DVOCAt31Q2Y,6873
97
+ sqlframe-1.2.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
98
+ sqlframe-1.2.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
99
+ sqlframe-1.2.0.dist-info/RECORD,,