sqlframe 3.5.0__py3-none-any.whl → 3.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.5.0'
16
- __version_tuple__ = version_tuple = (3, 5, 0)
15
+ __version__ = version = '3.7.0'
16
+ __version_tuple__ = version_tuple = (3, 7, 0)
@@ -75,7 +75,7 @@ class PandasLoaderMixin(_BaseDataFrameReader, t.Generic[SESSION, DF]):
75
75
 
76
76
  assert path is not None, "path is required"
77
77
  assert isinstance(path, str), "path must be a string"
78
- format = format or _infer_format(path)
78
+ format = format or self.state_format_to_read or _infer_format(path)
79
79
  kwargs = {k: v for k, v in options.items() if v is not None}
80
80
  if format == "json":
81
81
  df = pd.read_json(path, lines=True, **kwargs) # type: ignore
@@ -36,6 +36,7 @@ logger = logging.getLogger(__name__)
36
36
  class _BaseDataFrameReader(t.Generic[SESSION, DF]):
37
37
  def __init__(self, spark: SESSION):
38
38
  self._session = spark
39
+ self.state_format_to_read: t.Optional[str] = None
39
40
 
40
41
  @property
41
42
  def session(self) -> SESSION:
@@ -67,6 +68,44 @@ class _BaseDataFrameReader(t.Generic[SESSION, DF]):
67
68
  for k, v in column_mapping.items()
68
69
  ]
69
70
 
71
+ def format(self, source: str) -> "Self":
72
+ """Specifies the input data source format.
73
+
74
+ .. versionadded:: 1.4.0
75
+
76
+ .. versionchanged:: 3.4.0
77
+ Supports Spark Connect.
78
+
79
+ Parameters
80
+ ----------
81
+ source : str
82
+ string, name of the data source, e.g. 'json', 'parquet'.
83
+
84
+ Examples
85
+ --------
86
+ >>> spark.read.format('json')
87
+ <...readwriter.DataFrameReader object ...>
88
+
89
+ Write a DataFrame into a JSON file and read it back.
90
+
91
+ >>> import tempfile
92
+ >>> with tempfile.TemporaryDirectory() as d:
93
+ ... # Write a DataFrame into a JSON file
94
+ ... spark.createDataFrame(
95
+ ... [{"age": 100, "name": "Hyukjin Kwon"}]
96
+ ... ).write.mode("overwrite").format("json").save(d)
97
+ ...
98
+ ... # Read the JSON file as a DataFrame.
99
+ ... spark.read.format('json').load(d).show()
100
+ +---+------------+
101
+ |age| name|
102
+ +---+------------+
103
+ |100|Hyukjin Kwon|
104
+ +---+------------+
105
+ """
106
+ self.state_format_to_read = source
107
+ return self
108
+
70
109
  def load(
71
110
  self,
72
111
  path: t.Optional[PathOrPaths] = None,
@@ -72,6 +72,7 @@ class DuckDBDataFrameReader(_BaseDataFrameReader["DuckDBSession", "DuckDBDataFra
72
72
  |100|NULL|
73
73
  +---+----+
74
74
  """
75
+ format = format or self.state_format_to_read
75
76
  if schema:
76
77
  column_mapping = ensure_column_mapping(schema)
77
78
  select_column_mapping = column_mapping.copy()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.5.0
3
+ Version: 3.7.0
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -17,8 +17,8 @@ Classifier: Programming Language :: Python :: 3 :: Only
17
17
  Requires-Python: >=3.8
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
- Requires-Dist: prettytable (<3.11.1)
21
- Requires-Dist: sqlglot (<25.26,>=24.0.0)
20
+ Requires-Dist: prettytable (<3.12.1)
21
+ Requires-Dist: sqlglot (<25.29,>=24.0.0)
22
22
  Requires-Dist: typing-extensions (<5,>=4.8)
23
23
  Provides-Extra: bigquery
24
24
  Requires-Dist: google-cloud-bigquery-storage (<3,>=2) ; extra == 'bigquery'
@@ -26,12 +26,12 @@ Requires-Dist: google-cloud-bigquery[pandas] (<4,>=3) ; extra == 'bigquery'
26
26
  Provides-Extra: dev
27
27
  Requires-Dist: duckdb (<1.2,>=0.9) ; extra == 'dev'
28
28
  Requires-Dist: findspark (<3,>=2) ; extra == 'dev'
29
- Requires-Dist: mypy (<1.13,>=1.10.0) ; extra == 'dev'
30
- Requires-Dist: openai (<1.53,>=1.30) ; extra == 'dev'
29
+ Requires-Dist: mypy (<1.14,>=1.10.0) ; extra == 'dev'
30
+ Requires-Dist: openai (<1.54,>=1.30) ; extra == 'dev'
31
31
  Requires-Dist: pandas-stubs (<3,>=2) ; extra == 'dev'
32
32
  Requires-Dist: pandas (<3,>=2) ; extra == 'dev'
33
33
  Requires-Dist: psycopg (<4,>=3.1) ; extra == 'dev'
34
- Requires-Dist: pyarrow (<18,>=10) ; extra == 'dev'
34
+ Requires-Dist: pyarrow (<19,>=10) ; extra == 'dev'
35
35
  Requires-Dist: pyspark (<3.6,>=2) ; extra == 'dev'
36
36
  Requires-Dist: pytest-postgresql (<7,>=6) ; extra == 'dev'
37
37
  Requires-Dist: pytest-xdist (<3.7,>=3.6) ; extra == 'dev'
@@ -50,7 +50,7 @@ Provides-Extra: duckdb
50
50
  Requires-Dist: duckdb (<1.2,>=0.9) ; extra == 'duckdb'
51
51
  Requires-Dist: pandas (<3,>=2) ; extra == 'duckdb'
52
52
  Provides-Extra: openai
53
- Requires-Dist: openai (<1.53,>=1.30) ; extra == 'openai'
53
+ Requires-Dist: openai (<1.54,>=1.30) ; extra == 'openai'
54
54
  Provides-Extra: pandas
55
55
  Requires-Dist: pandas (<3,>=2) ; extra == 'pandas'
56
56
  Provides-Extra: postgres
@@ -1,5 +1,5 @@
1
1
  sqlframe/__init__.py,sha256=E3qCJ4PSEgKz6Hg3ves6LWt3JrQOV8c9HVLSIUOzKNc,3106
2
- sqlframe/_version.py,sha256=wy34mXzQ8fLJu7i4fZikKwCDGQODEviQb-OrdMe9F4Q,411
2
+ sqlframe/_version.py,sha256=olhhg46RdfZAlJOQkLB233pDX5rMKFCBvTE1Adjwfqs,411
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
5
5
  sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
@@ -12,7 +12,7 @@ sqlframe/base/functions.py,sha256=JDVULt3WI1cwWYboU8ybenIXZPdKSNtnzg7A4xC1Lao,18
12
12
  sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
13
13
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
14
14
  sqlframe/base/operations.py,sha256=-AhNuEzcV7ZExoP1oY3blaKip-joQyJeQVvfBTs_2g4,3456
15
- sqlframe/base/readerwriter.py,sha256=FkImnUR_qNASmXktk0JDsFeOzfh799hd09vI2uznNH8,25350
15
+ sqlframe/base/readerwriter.py,sha256=9cgOZuB3phZbQufY98JRDBVWRww3hsULc6Or1HK2Onk,26554
16
16
  sqlframe/base/session.py,sha256=6vQ2bJa357J1bycx1vMxkIUdcA2YbMIYq956bpytztI,25256
17
17
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
18
18
  sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
@@ -22,7 +22,7 @@ sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
22
22
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  sqlframe/base/mixins/catalog_mixins.py,sha256=9tn0mK8oPoqIIjNItystD5tdBMdK9YpkxTG7G9KQl8k,18619
24
24
  sqlframe/base/mixins/dataframe_mixins.py,sha256=3MMQu2hdHG_qtDQ6jDHo0Iy5KtLj4lHePfovCVxTqbo,1411
25
- sqlframe/base/mixins/readwriter_mixins.py,sha256=QnxGVL8ftZfYlBNG0Bl24N_bnA2YioSxUsTSgKIbuvQ,4723
25
+ sqlframe/base/mixins/readwriter_mixins.py,sha256=ap8j_g7PoUGHaHKCPMnRPbXofOsUhUzlaF7Loxy2m-I,4752
26
26
  sqlframe/bigquery/__init__.py,sha256=kbaomhYAANPdxeDQhajv8IHfMg_ENKivtYK-rPwaV08,939
27
27
  sqlframe/bigquery/catalog.py,sha256=8d36IzT5GPWd1FdxJ9vEljOdbIDepHnFOBjwP0bX6FE,11625
28
28
  sqlframe/bigquery/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
@@ -42,7 +42,7 @@ sqlframe/duckdb/dataframe.py,sha256=HZg_uMAz4RsubZJT4-MslUQS_0-InF0_P5Yq5HyJ3wE,
42
42
  sqlframe/duckdb/functions.py,sha256=Ee8o6YFtRdEiq0jNLXxgu5lcbc7Tsg0-lK6oRyxdcjo,1920
43
43
  sqlframe/duckdb/functions.pyi,sha256=bWfQl7Cm1eecI39LJAyyRcC4z7epDJ-h9JOozPsEc34,5879
44
44
  sqlframe/duckdb/group.py,sha256=IkhbW42Ng1U5YT3FkIdiB4zBqRkW4QyTb-1detY1e_4,383
45
- sqlframe/duckdb/readwriter.py,sha256=iY0Wsms35gymilETOhAQGQCHfH8LCj9_uY8aCdF77ds,4806
45
+ sqlframe/duckdb/readwriter.py,sha256=6nGnz2SE-tpuPDQXPI23SQQXRre_raUG5cou3s3NrpA,4859
46
46
  sqlframe/duckdb/session.py,sha256=b5IrKbTkYUVNQGSG2EJPNV9MTdJw4onN-9aMrskjxck,2721
47
47
  sqlframe/duckdb/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
48
48
  sqlframe/duckdb/udf.py,sha256=Du9LnOtT1lJvB90D4HSR2tB7MXy179jZngDR-EjVjQk,656
@@ -107,8 +107,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
107
107
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
108
108
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
109
109
  sqlframe/testing/utils.py,sha256=9DDYVuocO7tygee3RaajuJNZ24sJwf_LY556kKg7kTw,13011
110
- sqlframe-3.5.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
111
- sqlframe-3.5.0.dist-info/METADATA,sha256=vH1Trx574zbKeuhU0oPb0Q7dxgGvHqaONgNiO3tcbkk,8639
112
- sqlframe-3.5.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
113
- sqlframe-3.5.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
114
- sqlframe-3.5.0.dist-info/RECORD,,
110
+ sqlframe-3.7.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
111
+ sqlframe-3.7.0.dist-info/METADATA,sha256=0Zan1qWdwBIWr-4boxpMu2W4w1h541eJAVRWHgcQd5Q,8639
112
+ sqlframe-3.7.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
113
+ sqlframe-3.7.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
114
+ sqlframe-3.7.0.dist-info/RECORD,,