sqlframe 3.5.0__py3-none-any.whl → 3.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/mixins/readwriter_mixins.py +1 -1
- sqlframe/base/readerwriter.py +39 -0
- sqlframe/duckdb/readwriter.py +1 -0
- {sqlframe-3.5.0.dist-info → sqlframe-3.7.0.dist-info}/METADATA +7 -7
- {sqlframe-3.5.0.dist-info → sqlframe-3.7.0.dist-info}/RECORD +9 -9
- {sqlframe-3.5.0.dist-info → sqlframe-3.7.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.5.0.dist-info → sqlframe-3.7.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.5.0.dist-info → sqlframe-3.7.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
|
@@ -75,7 +75,7 @@ class PandasLoaderMixin(_BaseDataFrameReader, t.Generic[SESSION, DF]):
|
|
|
75
75
|
|
|
76
76
|
assert path is not None, "path is required"
|
|
77
77
|
assert isinstance(path, str), "path must be a string"
|
|
78
|
-
format = format or _infer_format(path)
|
|
78
|
+
format = format or self.state_format_to_read or _infer_format(path)
|
|
79
79
|
kwargs = {k: v for k, v in options.items() if v is not None}
|
|
80
80
|
if format == "json":
|
|
81
81
|
df = pd.read_json(path, lines=True, **kwargs) # type: ignore
|
sqlframe/base/readerwriter.py
CHANGED
|
@@ -36,6 +36,7 @@ logger = logging.getLogger(__name__)
|
|
|
36
36
|
class _BaseDataFrameReader(t.Generic[SESSION, DF]):
|
|
37
37
|
def __init__(self, spark: SESSION):
|
|
38
38
|
self._session = spark
|
|
39
|
+
self.state_format_to_read: t.Optional[str] = None
|
|
39
40
|
|
|
40
41
|
@property
|
|
41
42
|
def session(self) -> SESSION:
|
|
@@ -67,6 +68,44 @@ class _BaseDataFrameReader(t.Generic[SESSION, DF]):
|
|
|
67
68
|
for k, v in column_mapping.items()
|
|
68
69
|
]
|
|
69
70
|
|
|
71
|
+
def format(self, source: str) -> "Self":
|
|
72
|
+
"""Specifies the input data source format.
|
|
73
|
+
|
|
74
|
+
.. versionadded:: 1.4.0
|
|
75
|
+
|
|
76
|
+
.. versionchanged:: 3.4.0
|
|
77
|
+
Supports Spark Connect.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
source : str
|
|
82
|
+
string, name of the data source, e.g. 'json', 'parquet'.
|
|
83
|
+
|
|
84
|
+
Examples
|
|
85
|
+
--------
|
|
86
|
+
>>> spark.read.format('json')
|
|
87
|
+
<...readwriter.DataFrameReader object ...>
|
|
88
|
+
|
|
89
|
+
Write a DataFrame into a JSON file and read it back.
|
|
90
|
+
|
|
91
|
+
>>> import tempfile
|
|
92
|
+
>>> with tempfile.TemporaryDirectory() as d:
|
|
93
|
+
... # Write a DataFrame into a JSON file
|
|
94
|
+
... spark.createDataFrame(
|
|
95
|
+
... [{"age": 100, "name": "Hyukjin Kwon"}]
|
|
96
|
+
... ).write.mode("overwrite").format("json").save(d)
|
|
97
|
+
...
|
|
98
|
+
... # Read the JSON file as a DataFrame.
|
|
99
|
+
... spark.read.format('json').load(d).show()
|
|
100
|
+
+---+------------+
|
|
101
|
+
|age| name|
|
|
102
|
+
+---+------------+
|
|
103
|
+
|100|Hyukjin Kwon|
|
|
104
|
+
+---+------------+
|
|
105
|
+
"""
|
|
106
|
+
self.state_format_to_read = source
|
|
107
|
+
return self
|
|
108
|
+
|
|
70
109
|
def load(
|
|
71
110
|
self,
|
|
72
111
|
path: t.Optional[PathOrPaths] = None,
|
sqlframe/duckdb/readwriter.py
CHANGED
|
@@ -72,6 +72,7 @@ class DuckDBDataFrameReader(_BaseDataFrameReader["DuckDBSession", "DuckDBDataFra
|
|
|
72
72
|
|100|NULL|
|
|
73
73
|
+---+----+
|
|
74
74
|
"""
|
|
75
|
+
format = format or self.state_format_to_read
|
|
75
76
|
if schema:
|
|
76
77
|
column_mapping = ensure_column_mapping(schema)
|
|
77
78
|
select_column_mapping = column_mapping.copy()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sqlframe
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.7.0
|
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
|
6
6
|
Author: Ryan Eakman
|
|
@@ -17,8 +17,8 @@ Classifier: Programming Language :: Python :: 3 :: Only
|
|
|
17
17
|
Requires-Python: >=3.8
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
License-File: LICENSE
|
|
20
|
-
Requires-Dist: prettytable (<3.
|
|
21
|
-
Requires-Dist: sqlglot (<25.
|
|
20
|
+
Requires-Dist: prettytable (<3.12.1)
|
|
21
|
+
Requires-Dist: sqlglot (<25.29,>=24.0.0)
|
|
22
22
|
Requires-Dist: typing-extensions (<5,>=4.8)
|
|
23
23
|
Provides-Extra: bigquery
|
|
24
24
|
Requires-Dist: google-cloud-bigquery-storage (<3,>=2) ; extra == 'bigquery'
|
|
@@ -26,12 +26,12 @@ Requires-Dist: google-cloud-bigquery[pandas] (<4,>=3) ; extra == 'bigquery'
|
|
|
26
26
|
Provides-Extra: dev
|
|
27
27
|
Requires-Dist: duckdb (<1.2,>=0.9) ; extra == 'dev'
|
|
28
28
|
Requires-Dist: findspark (<3,>=2) ; extra == 'dev'
|
|
29
|
-
Requires-Dist: mypy (<1.
|
|
30
|
-
Requires-Dist: openai (<1.
|
|
29
|
+
Requires-Dist: mypy (<1.14,>=1.10.0) ; extra == 'dev'
|
|
30
|
+
Requires-Dist: openai (<1.54,>=1.30) ; extra == 'dev'
|
|
31
31
|
Requires-Dist: pandas-stubs (<3,>=2) ; extra == 'dev'
|
|
32
32
|
Requires-Dist: pandas (<3,>=2) ; extra == 'dev'
|
|
33
33
|
Requires-Dist: psycopg (<4,>=3.1) ; extra == 'dev'
|
|
34
|
-
Requires-Dist: pyarrow (<
|
|
34
|
+
Requires-Dist: pyarrow (<19,>=10) ; extra == 'dev'
|
|
35
35
|
Requires-Dist: pyspark (<3.6,>=2) ; extra == 'dev'
|
|
36
36
|
Requires-Dist: pytest-postgresql (<7,>=6) ; extra == 'dev'
|
|
37
37
|
Requires-Dist: pytest-xdist (<3.7,>=3.6) ; extra == 'dev'
|
|
@@ -50,7 +50,7 @@ Provides-Extra: duckdb
|
|
|
50
50
|
Requires-Dist: duckdb (<1.2,>=0.9) ; extra == 'duckdb'
|
|
51
51
|
Requires-Dist: pandas (<3,>=2) ; extra == 'duckdb'
|
|
52
52
|
Provides-Extra: openai
|
|
53
|
-
Requires-Dist: openai (<1.
|
|
53
|
+
Requires-Dist: openai (<1.54,>=1.30) ; extra == 'openai'
|
|
54
54
|
Provides-Extra: pandas
|
|
55
55
|
Requires-Dist: pandas (<3,>=2) ; extra == 'pandas'
|
|
56
56
|
Provides-Extra: postgres
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
sqlframe/__init__.py,sha256=E3qCJ4PSEgKz6Hg3ves6LWt3JrQOV8c9HVLSIUOzKNc,3106
|
|
2
|
-
sqlframe/_version.py,sha256=
|
|
2
|
+
sqlframe/_version.py,sha256=olhhg46RdfZAlJOQkLB233pDX5rMKFCBvTE1Adjwfqs,411
|
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
|
5
5
|
sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
|
|
@@ -12,7 +12,7 @@ sqlframe/base/functions.py,sha256=JDVULt3WI1cwWYboU8ybenIXZPdKSNtnzg7A4xC1Lao,18
|
|
|
12
12
|
sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
|
|
13
13
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
|
14
14
|
sqlframe/base/operations.py,sha256=-AhNuEzcV7ZExoP1oY3blaKip-joQyJeQVvfBTs_2g4,3456
|
|
15
|
-
sqlframe/base/readerwriter.py,sha256=
|
|
15
|
+
sqlframe/base/readerwriter.py,sha256=9cgOZuB3phZbQufY98JRDBVWRww3hsULc6Or1HK2Onk,26554
|
|
16
16
|
sqlframe/base/session.py,sha256=6vQ2bJa357J1bycx1vMxkIUdcA2YbMIYq956bpytztI,25256
|
|
17
17
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
|
18
18
|
sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
|
|
@@ -22,7 +22,7 @@ sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
|
|
|
22
22
|
sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
23
|
sqlframe/base/mixins/catalog_mixins.py,sha256=9tn0mK8oPoqIIjNItystD5tdBMdK9YpkxTG7G9KQl8k,18619
|
|
24
24
|
sqlframe/base/mixins/dataframe_mixins.py,sha256=3MMQu2hdHG_qtDQ6jDHo0Iy5KtLj4lHePfovCVxTqbo,1411
|
|
25
|
-
sqlframe/base/mixins/readwriter_mixins.py,sha256=
|
|
25
|
+
sqlframe/base/mixins/readwriter_mixins.py,sha256=ap8j_g7PoUGHaHKCPMnRPbXofOsUhUzlaF7Loxy2m-I,4752
|
|
26
26
|
sqlframe/bigquery/__init__.py,sha256=kbaomhYAANPdxeDQhajv8IHfMg_ENKivtYK-rPwaV08,939
|
|
27
27
|
sqlframe/bigquery/catalog.py,sha256=8d36IzT5GPWd1FdxJ9vEljOdbIDepHnFOBjwP0bX6FE,11625
|
|
28
28
|
sqlframe/bigquery/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
|
|
@@ -42,7 +42,7 @@ sqlframe/duckdb/dataframe.py,sha256=HZg_uMAz4RsubZJT4-MslUQS_0-InF0_P5Yq5HyJ3wE,
|
|
|
42
42
|
sqlframe/duckdb/functions.py,sha256=Ee8o6YFtRdEiq0jNLXxgu5lcbc7Tsg0-lK6oRyxdcjo,1920
|
|
43
43
|
sqlframe/duckdb/functions.pyi,sha256=bWfQl7Cm1eecI39LJAyyRcC4z7epDJ-h9JOozPsEc34,5879
|
|
44
44
|
sqlframe/duckdb/group.py,sha256=IkhbW42Ng1U5YT3FkIdiB4zBqRkW4QyTb-1detY1e_4,383
|
|
45
|
-
sqlframe/duckdb/readwriter.py,sha256=
|
|
45
|
+
sqlframe/duckdb/readwriter.py,sha256=6nGnz2SE-tpuPDQXPI23SQQXRre_raUG5cou3s3NrpA,4859
|
|
46
46
|
sqlframe/duckdb/session.py,sha256=b5IrKbTkYUVNQGSG2EJPNV9MTdJw4onN-9aMrskjxck,2721
|
|
47
47
|
sqlframe/duckdb/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
|
|
48
48
|
sqlframe/duckdb/udf.py,sha256=Du9LnOtT1lJvB90D4HSR2tB7MXy179jZngDR-EjVjQk,656
|
|
@@ -107,8 +107,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
|
107
107
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
|
108
108
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
|
109
109
|
sqlframe/testing/utils.py,sha256=9DDYVuocO7tygee3RaajuJNZ24sJwf_LY556kKg7kTw,13011
|
|
110
|
-
sqlframe-3.
|
|
111
|
-
sqlframe-3.
|
|
112
|
-
sqlframe-3.
|
|
113
|
-
sqlframe-3.
|
|
114
|
-
sqlframe-3.
|
|
110
|
+
sqlframe-3.7.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
|
111
|
+
sqlframe-3.7.0.dist-info/METADATA,sha256=0Zan1qWdwBIWr-4boxpMu2W4w1h541eJAVRWHgcQd5Q,8639
|
|
112
|
+
sqlframe-3.7.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
|
113
|
+
sqlframe-3.7.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
|
114
|
+
sqlframe-3.7.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|