sqlframe 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.0.0'
16
- __version_tuple__ = version_tuple = (1, 0, 0)
15
+ __version__ = version = '1.1.0'
16
+ __version_tuple__ = version_tuple = (1, 1, 0)
sqlframe/base/catalog.py CHANGED
@@ -8,7 +8,7 @@ from sqlglot import MappingSchema, exp
8
8
 
9
9
  from sqlframe.base.decorators import normalize
10
10
  from sqlframe.base.exceptions import TableSchemaError
11
- from sqlframe.base.util import to_schema
11
+ from sqlframe.base.util import ensure_column_mapping, to_schema
12
12
 
13
13
  if t.TYPE_CHECKING:
14
14
  from sqlglot.schema import ColumnMapping
@@ -82,6 +82,7 @@ class _BaseCatalog(t.Generic[SESSION, DF]):
82
82
  raise TableSchemaError(
83
83
  "This session does not have access to a catalog that can lookup column information. See docs for explicitly defining columns or using a session that can automatically determine this."
84
84
  )
85
+ column_mapping = ensure_column_mapping(column_mapping) # type: ignore
85
86
  self._schema.add_table(table, column_mapping, dialect=self.session.input_dialect)
86
87
 
87
88
  @normalize(["dbName"])
@@ -417,7 +417,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
417
417
  from sqlframe.base.session import _BaseSession
418
418
 
419
419
  value = expression.sql(dialect=_BaseSession().input_dialect).encode("utf-8")
420
- hash = f"t{zlib.crc32(value)}"[:6]
420
+ hash = f"t{zlib.crc32(value)}"[:9]
421
421
  return self.session._normalize_string(hash)
422
422
 
423
423
  def _get_select_expressions(
@@ -606,8 +606,11 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
606
606
  return df._convert_leaf_to_cte(sequence_id=new_sequence_id)
607
607
 
608
608
  @operation(Operation.WHERE)
609
- def where(self, column: t.Union[Column, bool], **kwargs) -> Self:
610
- col = self._ensure_and_normalize_col(column)
609
+ def where(self, column: t.Union[Column, str, bool], **kwargs) -> Self:
610
+ if isinstance(column, str):
611
+ col = sqlglot.parse_one(column, dialect=self.session.input_dialect)
612
+ else:
613
+ col = self._ensure_and_normalize_col(column)
611
614
  return self.copy(expression=self.expression.where(col.expression))
612
615
 
613
616
  filter = where
@@ -1094,7 +1097,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1094
1097
  )
1095
1098
  if existing_col_index:
1096
1099
  expression = self.expression.copy()
1097
- expression.expressions[existing_col_index] = col.expression
1100
+ expression.expressions[existing_col_index] = col.alias(colName).expression
1098
1101
  return self.copy(expression=expression)
1099
1102
  return self.copy().select(col.alias(colName), append=True)
1100
1103
 
sqlframe/base/session.py CHANGED
@@ -313,24 +313,16 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
313
313
  sel_expression = exp.Select(**select_kwargs)
314
314
  if empty_df:
315
315
  sel_expression = sel_expression.where(exp.false())
316
- # if empty_df:
317
- # if not column_mapping:
318
- # # If we don't have rows or columns then we just return a null with a false expression
319
- # sel_expression = (
320
- # exp.Select().select("null").from_("VALUES (NULL)").where(exp.false())
321
- # )
322
- # else:
323
- # # Ensure no results are returned if the dataframe is expected to be empty instead of
324
- # # a row of null values
325
- # sel_expression = sel_expression.where(exp.false())
326
316
  return self._create_df(sel_expression)
327
317
 
328
- def sql(self, sqlQuery: t.Union[str, exp.Expression]) -> DF:
329
- expression = self._optimize(
318
+ def sql(self, sqlQuery: t.Union[str, exp.Expression], optimize: bool = True) -> DF:
319
+ expression = (
330
320
  sqlglot.parse_one(sqlQuery, read=self.input_dialect)
331
321
  if isinstance(sqlQuery, str)
332
322
  else sqlQuery
333
323
  )
324
+ if optimize:
325
+ expression = self._optimize(expression)
334
326
  if self.temp_views:
335
327
  replacement_mapping = {}
336
328
  for table in expression.find_all(exp.Table):
sqlframe/base/util.py CHANGED
@@ -113,6 +113,8 @@ def ensure_column_mapping(schema: t.Union[str, StructType]) -> t.Dict:
113
113
  }
114
114
  # TODO: Make a protocol with a `simpleString` attribute as what it looks for instead of the actual
115
115
  # `StructType` object.
116
+ elif hasattr(schema, "simpleString"):
117
+ return {struct_field.name: struct_field.dataType.simpleString() for struct_field in schema}
116
118
  return sqlglot_ensure_column_mapping(schema) # type: ignore
117
119
 
118
120
 
@@ -5,6 +5,9 @@ from __future__ import annotations
5
5
  import logging
6
6
  import typing as t
7
7
 
8
+ from sqlglot import exp
9
+ from sqlglot.helper import ensure_list
10
+
8
11
  from sqlframe.base.readerwriter import _BaseDataFrameReader, _BaseDataFrameWriter
9
12
  from sqlframe.base.util import ensure_column_mapping, to_csv
10
13
 
@@ -69,13 +72,22 @@ class DuckDBDataFrameReader(_BaseDataFrameReader["DuckDBSession", "DuckDBDataFra
69
72
  |100|NULL|
70
73
  +---+----+
71
74
  """
75
+ if schema:
76
+ column_mapping = ensure_column_mapping(schema)
77
+ select_columns = [x.expression for x in self._to_casted_columns(column_mapping)]
78
+ if format == "csv":
79
+ duckdb_columns = ", ".join(
80
+ [f"'{column}': '{dtype}'" for column, dtype in column_mapping.items()]
81
+ )
82
+ options["columns"] = "{" + duckdb_columns + "}"
83
+ else:
84
+ select_columns = [exp.Star()]
72
85
  if format:
73
- sql = f"SELECT * FROM read_{format}('{path}', {to_csv(options)})"
86
+ paths = ",".join([f"'{path}'" for path in ensure_list(path)])
87
+ from_clause = f"read_{format}([{paths}], {to_csv(options)})"
74
88
  else:
75
- sql = f"select * from '{path}'"
76
- df = self.session.sql(sql)
77
- if schema:
78
- df = df.select(*self._to_casted_columns(ensure_column_mapping(schema)))
89
+ from_clause = f"'{path}'"
90
+ df = self.session.sql(exp.select(*select_columns).from_(from_clause), optimize=False)
79
91
  self.session._last_loaded_file = path # type: ignore
80
92
  return df
81
93
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 1.0.0
3
+ Version: 1.1.0
4
4
  Summary: Taking the Spark out of PySpark by converting to SQL
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -18,7 +18,7 @@ Requires-Python: >=3.8
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
20
  Requires-Dist: prettytable (<3.11.0)
21
- Requires-Dist: sqlglot (<23.18,>=23.14.0)
21
+ Requires-Dist: sqlglot (<24.1,>=24.0.0)
22
22
  Provides-Extra: bigquery
23
23
  Requires-Dist: google-cloud-bigquery-storage (<3,>=2) ; extra == 'bigquery'
24
24
  Requires-Dist: google-cloud-bigquery[pandas] (<4,>=3) ; extra == 'bigquery'
@@ -1,10 +1,10 @@
1
1
  sqlframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- sqlframe/_version.py,sha256=DGJ4pj32xs3_DRJhSzQwCiRNnAQrMgo09USYpyMZsKc,411
2
+ sqlframe/_version.py,sha256=CqDGE4B1ZqZ-56mxeOFcXRTmlxrdOh4ayrjbcPjziE4,411
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=DuTay8-o9W-pw3RPZCgLunKNJLS9PkaV11G_pxXp9NY,1256
5
- sqlframe/base/catalog.py,sha256=Y9v7ZdpUVeFjjqcKyrRTBKK1H-IXM5SuIwVm5blSXK4,36984
5
+ sqlframe/base/catalog.py,sha256=P55_mLrk9KUC2LRYyLgSzVAan1Lx6EBNfdDjMEtc9DA,37086
6
6
  sqlframe/base/column.py,sha256=K9TtpBjVsFK9NtEX9ZQscU6qZIKiVVh1zj3jG9HifyA,15110
7
- sqlframe/base/dataframe.py,sha256=MTghHiW5nXDE6p214h93FChUlOdd8c6xf2WIZxrToR0,58817
7
+ sqlframe/base/dataframe.py,sha256=lNBMm79rX1DAt5vj4qKuxDwyhJdnhDROcOPcqVDxNHE,58971
8
8
  sqlframe/base/decorators.py,sha256=fnqT1Hqa0J_gUurDcVY1Dcscj6SXFxFJ5PKAw-xe5sU,2097
9
9
  sqlframe/base/exceptions.py,sha256=pCB9hXX4jxZWzNg3JN1i38cv3BmpUlee5NoLYx3YXIQ,208
10
10
  sqlframe/base/function_alternatives.py,sha256=to0kv3MTJmQFeVTMcitz0AxBIoUJC3cu5LkEY5aJpoo,31318
@@ -13,10 +13,10 @@ sqlframe/base/group.py,sha256=sKoaI2aLMih9nJTQfqzfJ00NbBcGQtArWXYHT40motQ,4060
13
13
  sqlframe/base/normalize.py,sha256=Ie6IcrD9dL-xBUKgDoh_c_gfLw68tBK5AmiprCA8MXE,3633
14
14
  sqlframe/base/operations.py,sha256=fVlAse6-WdQnEaHghRZVHXOesQ3OnKQwBnVYv5nVRiI,3457
15
15
  sqlframe/base/readerwriter.py,sha256=cgg7KuO7Eu8fScKOg1KyNFAcgnsjpU6yusPVs0o52a4,25213
16
- sqlframe/base/session.py,sha256=0ZyUs5kHcEM2Kk74BH9M1hCvEGBsp1_RD1lRVwPCH9M,22390
16
+ sqlframe/base/session.py,sha256=evVdd-FGKkp-Wg80UG5289iRtBihLFfkqrcXTH64_R8,21926
17
17
  sqlframe/base/transforms.py,sha256=EKwUpfp83bncEs_MNmI2OO7gV6vA_Rr89ZWmE4eETSw,468
18
18
  sqlframe/base/types.py,sha256=1CwMW9Q1inYzQcPTyjv1QANtVSHha8ZmBigmopQET98,11925
19
- sqlframe/base/util.py,sha256=mnJKg1c_CpkuB1CqyB1f-WamvV7XL3__Y45tOIqauO4,7455
19
+ sqlframe/base/util.py,sha256=SeUC2pcSBGnsS1W5PL1p-IGC6bJG8_2a7En2hxSTmpA,7597
20
20
  sqlframe/base/window.py,sha256=yyKvoNi41vL2t7XK2Ysjp8Q2FNIu3BYv-9EPtp5og6k,4944
21
21
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  sqlframe/base/mixins/catalog_mixins.py,sha256=ZNzNn-cWB0RwT7L1KZCWYke2JlP-cZze0MDNOzSfHew,14093
@@ -37,7 +37,7 @@ sqlframe/duckdb/column.py,sha256=wkEPcp3xVsH5nC3kpacXqNkRv9htPtBgt-0uFRxIRNs,56
37
37
  sqlframe/duckdb/dataframe.py,sha256=9T6GV4JScaApFSA4T7fixot78HMUgkjGxU7TgjolOOM,1410
38
38
  sqlframe/duckdb/functions.py,sha256=srvzbk_Wg-wQPFGYp624dRDyYJghi47M8E-Tu7pBdY0,1507
39
39
  sqlframe/duckdb/group.py,sha256=sYTExtNprfbW74LWc_Lyjc1G6K1FogQsdILU2599Bq8,384
40
- sqlframe/duckdb/readwriter.py,sha256=ThRTEE_RHsFwJF-SHF_HkPiJ9q0SPSn20McChMZtJeE,3817
40
+ sqlframe/duckdb/readwriter.py,sha256=TC0LigUmCRpcdx4B8Mb5ap5ifFBrjmbqXmhUB5rG87U,4376
41
41
  sqlframe/duckdb/session.py,sha256=TCAVsSqBGGj1Otb2iIkSkWqjbzzg1MeDAafGN928-O8,1893
42
42
  sqlframe/duckdb/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
43
43
  sqlframe/duckdb/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
@@ -91,8 +91,8 @@ sqlframe/standalone/readwriter.py,sha256=n2uoebNdL_t6_eaXNkpu7Zv2UmZ9I3rASuo01gG
91
91
  sqlframe/standalone/session.py,sha256=xWxBh-OtH--LmWtpDboOBpwKLcaBK5JV-IF2gCra5k0,1192
92
92
  sqlframe/standalone/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
93
93
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
94
- sqlframe-1.0.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
95
- sqlframe-1.0.0.dist-info/METADATA,sha256=vNKV-_xHHk2p19RFX9cUrSs5cCbzTVcLJqzSntxROLI,6875
96
- sqlframe-1.0.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
97
- sqlframe-1.0.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
98
- sqlframe-1.0.0.dist-info/RECORD,,
94
+ sqlframe-1.1.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
95
+ sqlframe-1.1.0.dist-info/METADATA,sha256=RBSfrpj8FYCqz79aL88JvpDRIkKZvbASKiwT0YqwXm0,6873
96
+ sqlframe-1.1.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
97
+ sqlframe-1.1.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
98
+ sqlframe-1.1.0.dist-info/RECORD,,