sqlframe 1.6.3__py3-none-any.whl → 1.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/functions.py +19 -4
- sqlframe/base/session.py +2 -1
- sqlframe/spark/__init__.py +16 -16
- sqlframe/spark/functions.py +1 -3
- sqlframe/spark/functions.pyi +244 -0
- sqlframe/spark/session.py +55 -13
- {sqlframe-1.6.3.dist-info → sqlframe-1.7.1.dist-info}/METADATA +10 -5
- {sqlframe-1.6.3.dist-info → sqlframe-1.7.1.dist-info}/RECORD +12 -11
- {sqlframe-1.6.3.dist-info → sqlframe-1.7.1.dist-info}/LICENSE +0 -0
- {sqlframe-1.6.3.dist-info → sqlframe-1.7.1.dist-info}/WHEEL +0 -0
- {sqlframe-1.6.3.dist-info → sqlframe-1.7.1.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/functions.py
CHANGED
|
@@ -151,9 +151,10 @@ def sumDistinct(col: ColumnOrName) -> Column:
|
|
|
151
151
|
sum_distinct = sumDistinct
|
|
152
152
|
|
|
153
153
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
154
|
+
# Product does not have a SQL function available
|
|
155
|
+
# @meta(unsupported_engines="*")
|
|
156
|
+
# def product(col: ColumnOrName) -> Column:
|
|
157
|
+
# raise NotImplementedError("Product is not currently implemented")
|
|
157
158
|
|
|
158
159
|
|
|
159
160
|
@meta()
|
|
@@ -1430,6 +1431,8 @@ def to_json(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) ->
|
|
|
1430
1431
|
|
|
1431
1432
|
@meta(unsupported_engines="*")
|
|
1432
1433
|
def schema_of_json(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) -> Column:
|
|
1434
|
+
if isinstance(col, str):
|
|
1435
|
+
col = lit(col)
|
|
1433
1436
|
if options is not None:
|
|
1434
1437
|
options_col = create_map([lit(x) for x in _flatten(options.items())])
|
|
1435
1438
|
return Column.invoke_anonymous_function(col, "SCHEMA_OF_JSON", options_col)
|
|
@@ -1438,6 +1441,8 @@ def schema_of_json(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = No
|
|
|
1438
1441
|
|
|
1439
1442
|
@meta(unsupported_engines="*")
|
|
1440
1443
|
def schema_of_csv(col: ColumnOrName, options: t.Optional[t.Dict[str, str]] = None) -> Column:
|
|
1444
|
+
if isinstance(col, str):
|
|
1445
|
+
col = lit(col)
|
|
1441
1446
|
if options is not None:
|
|
1442
1447
|
options_col = create_map([lit(x) for x in _flatten(options.items())])
|
|
1443
1448
|
return Column.invoke_anonymous_function(col, "SCHEMA_OF_CSV", options_col)
|
|
@@ -1560,7 +1565,9 @@ def from_csv(
|
|
|
1560
1565
|
) -> Column:
|
|
1561
1566
|
schema = schema if isinstance(schema, Column) else lit(schema)
|
|
1562
1567
|
if options is not None:
|
|
1563
|
-
option_cols = create_map(
|
|
1568
|
+
option_cols = create_map(
|
|
1569
|
+
[lit(str(x) if isinstance(x, bool) else x) for x in _flatten(options.items())]
|
|
1570
|
+
)
|
|
1564
1571
|
return Column.invoke_anonymous_function(col, "FROM_CSV", schema, option_cols)
|
|
1565
1572
|
return Column.invoke_anonymous_function(col, "FROM_CSV", schema)
|
|
1566
1573
|
|
|
@@ -1667,6 +1674,14 @@ def nullif(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
|
1667
1674
|
return Column.invoke_expression_over_column(col1, expression.Nullif, expression=col2)
|
|
1668
1675
|
|
|
1669
1676
|
|
|
1677
|
+
@meta(unsupported_engines="*")
|
|
1678
|
+
def stack(*cols: ColumnOrName) -> Column:
|
|
1679
|
+
columns = [Column.ensure_col(x) for x in cols]
|
|
1680
|
+
return Column.invoke_anonymous_function(
|
|
1681
|
+
columns[0], "STACK", *columns[1:] if len(columns) > 1 else []
|
|
1682
|
+
)
|
|
1683
|
+
|
|
1684
|
+
|
|
1670
1685
|
@meta()
|
|
1671
1686
|
def _lambda_quoted(value: str) -> t.Optional[bool]:
|
|
1672
1687
|
return False if value == "_" else None
|
sqlframe/base/session.py
CHANGED
|
@@ -569,6 +569,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
|
|
|
569
569
|
self,
|
|
570
570
|
key: t.Optional[str] = None,
|
|
571
571
|
value: t.Optional[t.Any] = None,
|
|
572
|
+
conf: t.Optional[t.Any] = None,
|
|
572
573
|
*,
|
|
573
574
|
map: t.Optional[t.Dict[str, t.Any]] = None,
|
|
574
575
|
) -> Self:
|
|
@@ -578,7 +579,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
|
|
|
578
579
|
def _set_session_properties(self) -> None:
|
|
579
580
|
self.session.input_dialect = Dialect.get_or_raise(self.input_dialect)
|
|
580
581
|
self.session.output_dialect = Dialect.get_or_raise(self.output_dialect)
|
|
581
|
-
if not self.session._connection:
|
|
582
|
+
if hasattr(self.session, "_connection") and not self.session._connection:
|
|
582
583
|
self.session._connection = self._conn
|
|
583
584
|
|
|
584
585
|
builder = Builder()
|
sqlframe/spark/__init__.py
CHANGED
|
@@ -1,23 +1,23 @@
|
|
|
1
|
-
from sqlframe.
|
|
2
|
-
from sqlframe.
|
|
3
|
-
from sqlframe.
|
|
4
|
-
from sqlframe.
|
|
5
|
-
from sqlframe.
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
from sqlframe.spark.catalog import SparkCatalog
|
|
2
|
+
from sqlframe.spark.column import Column
|
|
3
|
+
from sqlframe.spark.dataframe import SparkDataFrame, SparkDataFrameNaFunctions
|
|
4
|
+
from sqlframe.spark.group import SparkGroupedData
|
|
5
|
+
from sqlframe.spark.readwriter import (
|
|
6
|
+
SparkDataFrameReader,
|
|
7
|
+
SparkDataFrameWriter,
|
|
8
8
|
)
|
|
9
|
-
from sqlframe.
|
|
10
|
-
from sqlframe.
|
|
9
|
+
from sqlframe.spark.session import SparkSession
|
|
10
|
+
from sqlframe.spark.window import Window, WindowSpec
|
|
11
11
|
|
|
12
12
|
__all__ = [
|
|
13
|
-
"
|
|
13
|
+
"SparkCatalog",
|
|
14
14
|
"Column",
|
|
15
|
-
"
|
|
16
|
-
"
|
|
17
|
-
"
|
|
18
|
-
"
|
|
19
|
-
"
|
|
20
|
-
"
|
|
15
|
+
"SparkDataFrame",
|
|
16
|
+
"SparkDataFrameNaFunctions",
|
|
17
|
+
"SparkGroupedData",
|
|
18
|
+
"SparkDataFrameReader",
|
|
19
|
+
"SparkDataFrameWriter",
|
|
20
|
+
"SparkSession",
|
|
21
21
|
"Window",
|
|
22
22
|
"WindowSpec",
|
|
23
23
|
]
|
sqlframe/spark/functions.py
CHANGED
|
@@ -8,9 +8,7 @@ globals().update(
|
|
|
8
8
|
{
|
|
9
9
|
name: func
|
|
10
10
|
for name, func in inspect.getmembers(module, inspect.isfunction)
|
|
11
|
-
if hasattr(func, "unsupported_engines")
|
|
12
|
-
and "spark" not in func.unsupported_engines
|
|
13
|
-
and "*" not in func.unsupported_engines
|
|
11
|
+
if hasattr(func, "unsupported_engines") and "spark" not in func.unsupported_engines
|
|
14
12
|
}
|
|
15
13
|
)
|
|
16
14
|
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
from sqlframe.base.function_alternatives import ( # noqa
|
|
2
|
+
percentile_without_disc as percentile,
|
|
3
|
+
add_months_by_multiplication as add_months,
|
|
4
|
+
arrays_overlap_renamed as arrays_overlap,
|
|
5
|
+
)
|
|
6
|
+
from sqlframe.base.functions import (
|
|
7
|
+
abs as abs,
|
|
8
|
+
acos as acos,
|
|
9
|
+
acosh as acosh,
|
|
10
|
+
aggregate as aggregate,
|
|
11
|
+
approxCountDistinct as approxCountDistinct,
|
|
12
|
+
approx_count_distinct as approx_count_distinct,
|
|
13
|
+
array as array,
|
|
14
|
+
array_contains as array_contains,
|
|
15
|
+
array_distinct as array_distinct,
|
|
16
|
+
array_except as array_except,
|
|
17
|
+
array_intersect as array_intersect,
|
|
18
|
+
array_join as array_join,
|
|
19
|
+
array_max as array_max,
|
|
20
|
+
array_min as array_min,
|
|
21
|
+
array_position as array_position,
|
|
22
|
+
array_remove as array_remove,
|
|
23
|
+
array_repeat as array_repeat,
|
|
24
|
+
array_sort as array_sort,
|
|
25
|
+
array_union as array_union,
|
|
26
|
+
arrays_zip as arrays_zip,
|
|
27
|
+
asc as asc,
|
|
28
|
+
asc_nulls_first as asc_nulls_first,
|
|
29
|
+
asc_nulls_last as asc_nulls_last,
|
|
30
|
+
ascii as ascii,
|
|
31
|
+
asin as asin,
|
|
32
|
+
asinh as asinh,
|
|
33
|
+
assert_true as assert_true,
|
|
34
|
+
atan as atan,
|
|
35
|
+
atan2 as atan2,
|
|
36
|
+
atanh as atanh,
|
|
37
|
+
avg as avg,
|
|
38
|
+
base64 as base64,
|
|
39
|
+
bin as bin,
|
|
40
|
+
bit_length as bit_length,
|
|
41
|
+
bitwiseNOT as bitwiseNOT,
|
|
42
|
+
bitwise_not as bitwise_not,
|
|
43
|
+
bround as bround,
|
|
44
|
+
cbrt as cbrt,
|
|
45
|
+
ceil as ceil,
|
|
46
|
+
ceiling as ceiling,
|
|
47
|
+
coalesce as coalesce,
|
|
48
|
+
col as col,
|
|
49
|
+
collect_list as collect_list,
|
|
50
|
+
collect_set as collect_set,
|
|
51
|
+
concat as concat,
|
|
52
|
+
concat_ws as concat_ws,
|
|
53
|
+
conv as conv,
|
|
54
|
+
corr as corr,
|
|
55
|
+
cos as cos,
|
|
56
|
+
cosh as cosh,
|
|
57
|
+
cot as cot,
|
|
58
|
+
count as count,
|
|
59
|
+
countDistinct as countDistinct,
|
|
60
|
+
count_distinct as count_distinct,
|
|
61
|
+
covar_pop as covar_pop,
|
|
62
|
+
covar_samp as covar_samp,
|
|
63
|
+
crc32 as crc32,
|
|
64
|
+
create_map as create_map,
|
|
65
|
+
csc as csc,
|
|
66
|
+
cume_dist as cume_dist,
|
|
67
|
+
current_date as current_date,
|
|
68
|
+
current_timestamp as current_timestamp,
|
|
69
|
+
date_add as date_add,
|
|
70
|
+
date_diff as date_diff,
|
|
71
|
+
date_format as date_format,
|
|
72
|
+
date_sub as date_sub,
|
|
73
|
+
date_trunc as date_trunc,
|
|
74
|
+
dayofmonth as dayofmonth,
|
|
75
|
+
dayofweek as dayofweek,
|
|
76
|
+
dayofyear as dayofyear,
|
|
77
|
+
decode as decode,
|
|
78
|
+
degrees as degrees,
|
|
79
|
+
dense_rank as dense_rank,
|
|
80
|
+
desc as desc,
|
|
81
|
+
desc_nulls_first as desc_nulls_first,
|
|
82
|
+
desc_nulls_last as desc_nulls_last,
|
|
83
|
+
e as e,
|
|
84
|
+
element_at as element_at,
|
|
85
|
+
encode as encode,
|
|
86
|
+
exists as exists,
|
|
87
|
+
exp as exp,
|
|
88
|
+
explode as explode,
|
|
89
|
+
explode_outer as explode_outer,
|
|
90
|
+
expm1 as expm1,
|
|
91
|
+
expr as expr,
|
|
92
|
+
factorial as factorial,
|
|
93
|
+
filter as filter,
|
|
94
|
+
first as first,
|
|
95
|
+
flatten as flatten,
|
|
96
|
+
floor as floor,
|
|
97
|
+
forall as forall,
|
|
98
|
+
format_number as format_number,
|
|
99
|
+
format_string as format_string,
|
|
100
|
+
from_csv as from_csv,
|
|
101
|
+
from_json as from_json,
|
|
102
|
+
from_unixtime as from_unixtime,
|
|
103
|
+
from_utc_timestamp as from_utc_timestamp,
|
|
104
|
+
get_json_object as get_json_object,
|
|
105
|
+
greatest as greatest,
|
|
106
|
+
grouping_id as grouping_id,
|
|
107
|
+
hash as hash,
|
|
108
|
+
hex as hex,
|
|
109
|
+
hour as hour,
|
|
110
|
+
hypot as hypot,
|
|
111
|
+
initcap as initcap,
|
|
112
|
+
input_file_name as input_file_name,
|
|
113
|
+
instr as instr,
|
|
114
|
+
isnan as isnan,
|
|
115
|
+
isnull as isnull,
|
|
116
|
+
json_tuple as json_tuple,
|
|
117
|
+
kurtosis as kurtosis,
|
|
118
|
+
lag as lag,
|
|
119
|
+
last as last,
|
|
120
|
+
last_day as last_day,
|
|
121
|
+
lead as lead,
|
|
122
|
+
least as least,
|
|
123
|
+
length as length,
|
|
124
|
+
levenshtein as levenshtein,
|
|
125
|
+
lit as lit,
|
|
126
|
+
locate as locate,
|
|
127
|
+
log as log,
|
|
128
|
+
log10 as log10,
|
|
129
|
+
log1p as log1p,
|
|
130
|
+
log2 as log2,
|
|
131
|
+
lower as lower,
|
|
132
|
+
lpad as lpad,
|
|
133
|
+
ltrim as ltrim,
|
|
134
|
+
make_date as make_date,
|
|
135
|
+
map_concat as map_concat,
|
|
136
|
+
map_entries as map_entries,
|
|
137
|
+
map_filter as map_filter,
|
|
138
|
+
map_from_arrays as map_from_arrays,
|
|
139
|
+
map_from_entries as map_from_entries,
|
|
140
|
+
map_keys as map_keys,
|
|
141
|
+
map_values as map_values,
|
|
142
|
+
map_zip_with as map_zip_with,
|
|
143
|
+
max as max,
|
|
144
|
+
max_by as max_by,
|
|
145
|
+
md5 as md5,
|
|
146
|
+
mean as mean,
|
|
147
|
+
min as min,
|
|
148
|
+
min_by as min_by,
|
|
149
|
+
minute as minute,
|
|
150
|
+
monotonically_increasing_id as monotonically_increasing_id,
|
|
151
|
+
month as month,
|
|
152
|
+
months_between as months_between,
|
|
153
|
+
nanvl as nanvl,
|
|
154
|
+
next_day as next_day,
|
|
155
|
+
nth_value as nth_value,
|
|
156
|
+
ntile as ntile,
|
|
157
|
+
nullif as nullif,
|
|
158
|
+
octet_length as octet_length,
|
|
159
|
+
overlay as overlay,
|
|
160
|
+
percent_rank as percent_rank,
|
|
161
|
+
percentile_approx as percentile_approx,
|
|
162
|
+
posexplode as posexplode,
|
|
163
|
+
posexplode_outer as posexplode_outer,
|
|
164
|
+
pow as pow,
|
|
165
|
+
quarter as quarter,
|
|
166
|
+
radians as radians,
|
|
167
|
+
raise_error as raise_error,
|
|
168
|
+
rand as rand,
|
|
169
|
+
randn as randn,
|
|
170
|
+
rank as rank,
|
|
171
|
+
regexp_extract as regexp_extract,
|
|
172
|
+
regexp_replace as regexp_replace,
|
|
173
|
+
repeat as repeat,
|
|
174
|
+
reverse as reverse,
|
|
175
|
+
rint as rint,
|
|
176
|
+
round as round,
|
|
177
|
+
row_number as row_number,
|
|
178
|
+
rpad as rpad,
|
|
179
|
+
rtrim as rtrim,
|
|
180
|
+
schema_of_csv as schema_of_csv,
|
|
181
|
+
schema_of_json as schema_of_json,
|
|
182
|
+
sec as sec,
|
|
183
|
+
second as second,
|
|
184
|
+
sentences as sentences,
|
|
185
|
+
sequence as sequence,
|
|
186
|
+
sha1 as sha1,
|
|
187
|
+
sha2 as sha2,
|
|
188
|
+
shiftLeft as shiftLeft,
|
|
189
|
+
shiftRight as shiftRight,
|
|
190
|
+
shiftRightUnsigned as shiftRightUnsigned,
|
|
191
|
+
shiftleft as shiftleft,
|
|
192
|
+
shiftright as shiftright,
|
|
193
|
+
shiftrightunsigned as shiftrightunsigned,
|
|
194
|
+
shuffle as shuffle,
|
|
195
|
+
signum as signum,
|
|
196
|
+
sin as sin,
|
|
197
|
+
sinh as sinh,
|
|
198
|
+
size as size,
|
|
199
|
+
skewness as skewness,
|
|
200
|
+
slice as slice,
|
|
201
|
+
sort_array as sort_array,
|
|
202
|
+
soundex as soundex,
|
|
203
|
+
split as split,
|
|
204
|
+
sqrt as sqrt,
|
|
205
|
+
stack as stack,
|
|
206
|
+
stddev as stddev,
|
|
207
|
+
stddev_pop as stddev_pop,
|
|
208
|
+
stddev_samp as stddev_samp,
|
|
209
|
+
struct as struct,
|
|
210
|
+
substring as substring,
|
|
211
|
+
substring_index as substring_index,
|
|
212
|
+
sum as sum,
|
|
213
|
+
sumDistinct as sumDistinct,
|
|
214
|
+
sum_distinct as sum_distinct,
|
|
215
|
+
tan as tan,
|
|
216
|
+
tanh as tanh,
|
|
217
|
+
timestamp_seconds as timestamp_seconds,
|
|
218
|
+
toDegrees as toDegrees,
|
|
219
|
+
toRadians as toRadians,
|
|
220
|
+
to_csv as to_csv,
|
|
221
|
+
to_date as to_date,
|
|
222
|
+
to_json as to_json,
|
|
223
|
+
to_timestamp as to_timestamp,
|
|
224
|
+
to_utc_timestamp as to_utc_timestamp,
|
|
225
|
+
transform as transform,
|
|
226
|
+
transform_keys as transform_keys,
|
|
227
|
+
transform_values as transform_values,
|
|
228
|
+
translate as translate,
|
|
229
|
+
trim as trim,
|
|
230
|
+
trunc as trunc,
|
|
231
|
+
typeof as typeof,
|
|
232
|
+
unbase64 as unbase64,
|
|
233
|
+
unhex as unhex,
|
|
234
|
+
unix_timestamp as unix_timestamp,
|
|
235
|
+
upper as upper,
|
|
236
|
+
var_pop as var_pop,
|
|
237
|
+
var_samp as var_samp,
|
|
238
|
+
variance as variance,
|
|
239
|
+
weekofyear as weekofyear,
|
|
240
|
+
when as when,
|
|
241
|
+
xxhash64 as xxhash64,
|
|
242
|
+
year as year,
|
|
243
|
+
zip_with as zip_with,
|
|
244
|
+
)
|
sqlframe/spark/session.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import typing as t
|
|
4
|
-
import warnings
|
|
5
4
|
|
|
6
5
|
from sqlglot import exp
|
|
6
|
+
from sqlglot.helper import classproperty
|
|
7
7
|
|
|
8
8
|
from sqlframe.base.session import _BaseSession
|
|
9
9
|
from sqlframe.spark.catalog import SparkCatalog
|
|
@@ -16,6 +16,12 @@ from sqlframe.spark.types import Row
|
|
|
16
16
|
|
|
17
17
|
if t.TYPE_CHECKING:
|
|
18
18
|
import pandas as pd
|
|
19
|
+
from pyspark.sql.session import SparkConf
|
|
20
|
+
from pyspark.sql.session import SparkSession as PySparkSession
|
|
21
|
+
|
|
22
|
+
from sqlframe.base._typing import OptionalPrimitiveType
|
|
23
|
+
else:
|
|
24
|
+
PySparkSession = t.Any
|
|
19
25
|
|
|
20
26
|
|
|
21
27
|
class SparkSession(
|
|
@@ -24,7 +30,7 @@ class SparkSession(
|
|
|
24
30
|
SparkDataFrameReader,
|
|
25
31
|
SparkDataFrameWriter,
|
|
26
32
|
SparkDataFrame,
|
|
27
|
-
|
|
33
|
+
PySparkSession,
|
|
28
34
|
],
|
|
29
35
|
):
|
|
30
36
|
_catalog = SparkCatalog
|
|
@@ -32,17 +38,13 @@ class SparkSession(
|
|
|
32
38
|
_writer = SparkDataFrameWriter
|
|
33
39
|
_df = SparkDataFrame
|
|
34
40
|
|
|
35
|
-
def __init__(self, conn: t.Optional[
|
|
36
|
-
warnings.warn(
|
|
37
|
-
"SparkSession is still in active development. Functions may not work as expected."
|
|
38
|
-
)
|
|
39
|
-
|
|
41
|
+
def __init__(self, conn: t.Optional[PySparkSession] = None, *args, **kwargs):
|
|
40
42
|
from pyspark.sql.session import DataFrame, SparkSession
|
|
41
43
|
|
|
44
|
+
self._last_df: t.Optional[DataFrame] = None
|
|
42
45
|
if not hasattr(self, "spark_session"):
|
|
43
46
|
super().__init__(conn)
|
|
44
|
-
self.spark_session = SparkSession.builder.getOrCreate()
|
|
45
|
-
self._last_df: t.Optional[DataFrame] = None
|
|
47
|
+
self.spark_session = conn or SparkSession.builder.getOrCreate()
|
|
46
48
|
|
|
47
49
|
@property
|
|
48
50
|
def _conn(self) -> t.Any:
|
|
@@ -77,16 +79,56 @@ class SparkSession(
|
|
|
77
79
|
def _has_connection(self) -> bool:
|
|
78
80
|
return True
|
|
79
81
|
|
|
82
|
+
@classproperty
|
|
83
|
+
def builder(cls) -> Builder: # type: ignore
|
|
84
|
+
"""Creates a :class:`Builder` for constructing a :class:`SparkSession`.
|
|
85
|
+
|
|
86
|
+
.. versionchanged:: 3.4.0
|
|
87
|
+
Supports Spark Connect.
|
|
88
|
+
"""
|
|
89
|
+
return cls.Builder()
|
|
90
|
+
|
|
80
91
|
class Builder(_BaseSession.Builder):
|
|
81
|
-
|
|
82
|
-
|
|
92
|
+
def __init__(self):
|
|
93
|
+
from pyspark.sql.session import SparkSession
|
|
94
|
+
|
|
95
|
+
super().__init__()
|
|
96
|
+
self.builder = SparkSession.builder
|
|
97
|
+
|
|
98
|
+
def config(
|
|
99
|
+
self,
|
|
100
|
+
key: t.Optional[str] = None,
|
|
101
|
+
value: t.Optional[t.Any] = None,
|
|
102
|
+
conf: t.Optional[SparkConf] = None,
|
|
103
|
+
*,
|
|
104
|
+
map: t.Optional[t.Dict[str, OptionalPrimitiveType]] = None,
|
|
105
|
+
) -> SparkSession.Builder:
|
|
106
|
+
super().config(key, value, map=map)
|
|
107
|
+
self.builder = self.builder.config(key, value, conf, map=map)
|
|
108
|
+
return self
|
|
109
|
+
|
|
110
|
+
def master(self, master: str) -> SparkSession.Builder:
|
|
111
|
+
self.builder = self.builder.config("spark.master", master)
|
|
112
|
+
return self
|
|
113
|
+
|
|
114
|
+
def remote(self, url: str) -> SparkSession.Builder:
|
|
115
|
+
self.builder = self.builder.config("spark.remote", url)
|
|
116
|
+
return self
|
|
117
|
+
|
|
118
|
+
def appName(self, name: str) -> SparkSession.Builder:
|
|
119
|
+
self.builder = self.builder.appName(name)
|
|
120
|
+
return self
|
|
121
|
+
|
|
122
|
+
def enableHiveSupport(self) -> SparkSession.Builder:
|
|
123
|
+
self.builder = self.builder.enableHiveSupport()
|
|
124
|
+
return self
|
|
83
125
|
|
|
84
126
|
@property
|
|
85
127
|
def session(self) -> SparkSession:
|
|
128
|
+
if "conn" not in self._session_kwargs:
|
|
129
|
+
self._session_kwargs["conn"] = self.builder.getOrCreate()
|
|
86
130
|
return SparkSession(**self._session_kwargs)
|
|
87
131
|
|
|
88
132
|
def getOrCreate(self) -> SparkSession:
|
|
89
133
|
self._set_session_properties()
|
|
90
134
|
return self.session
|
|
91
|
-
|
|
92
|
-
builder = Builder()
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sqlframe
|
|
3
|
-
Version: 1.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 1.7.1
|
|
4
|
+
Summary: Turning PySpark Into a Universal DataFrame API
|
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
|
6
6
|
Author: Ryan Eakman
|
|
7
7
|
Author-email: eakmanrq@gmail.com
|
|
@@ -19,13 +19,14 @@ Description-Content-Type: text/markdown
|
|
|
19
19
|
License-File: LICENSE
|
|
20
20
|
Requires-Dist: prettytable (<3.11.0)
|
|
21
21
|
Requires-Dist: sqlglot (<25.1,>=24.0.0)
|
|
22
|
+
Requires-Dist: typing-extensions (<5,>=4.8)
|
|
22
23
|
Provides-Extra: bigquery
|
|
23
24
|
Requires-Dist: google-cloud-bigquery-storage (<3,>=2) ; extra == 'bigquery'
|
|
24
25
|
Requires-Dist: google-cloud-bigquery[pandas] (<4,>=3) ; extra == 'bigquery'
|
|
25
26
|
Provides-Extra: dev
|
|
26
27
|
Requires-Dist: duckdb (<1.1,>=0.9) ; extra == 'dev'
|
|
27
28
|
Requires-Dist: mypy (<1.11,>=1.10.0) ; extra == 'dev'
|
|
28
|
-
Requires-Dist: openai (<1.
|
|
29
|
+
Requires-Dist: openai (<1.34,>=1.30) ; extra == 'dev'
|
|
29
30
|
Requires-Dist: pandas-stubs (<3,>=2) ; extra == 'dev'
|
|
30
31
|
Requires-Dist: pandas (<3,>=2) ; extra == 'dev'
|
|
31
32
|
Requires-Dist: psycopg (<4,>=3.1) ; extra == 'dev'
|
|
@@ -36,7 +37,6 @@ Requires-Dist: pytest-xdist (<3.7,>=3.6) ; extra == 'dev'
|
|
|
36
37
|
Requires-Dist: pytest (<8.3,>=8.2.0) ; extra == 'dev'
|
|
37
38
|
Requires-Dist: ruff (<0.5,>=0.4.4) ; extra == 'dev'
|
|
38
39
|
Requires-Dist: types-psycopg2 (<3,>=2.9) ; extra == 'dev'
|
|
39
|
-
Requires-Dist: typing-extensions (<5,>=4.11) ; extra == 'dev'
|
|
40
40
|
Requires-Dist: pre-commit (>=3.5) ; (python_version == "3.8") and extra == 'dev'
|
|
41
41
|
Requires-Dist: pre-commit (<3.8,>=3.7) ; (python_version >= "3.9") and extra == 'dev'
|
|
42
42
|
Provides-Extra: docs
|
|
@@ -49,7 +49,7 @@ Provides-Extra: duckdb
|
|
|
49
49
|
Requires-Dist: duckdb (<1.1,>=0.9) ; extra == 'duckdb'
|
|
50
50
|
Requires-Dist: pandas (<3,>=2) ; extra == 'duckdb'
|
|
51
51
|
Provides-Extra: openai
|
|
52
|
-
Requires-Dist: openai (<1.
|
|
52
|
+
Requires-Dist: openai (<1.34,>=1.30) ; extra == 'openai'
|
|
53
53
|
Provides-Extra: pandas
|
|
54
54
|
Requires-Dist: pandas (<3,>=2) ; extra == 'pandas'
|
|
55
55
|
Provides-Extra: postgres
|
|
@@ -73,14 +73,17 @@ SQLFrame currently supports the following engines (many more in development):
|
|
|
73
73
|
* [DuckDB](https://sqlframe.readthedocs.io/en/stable/duckdb)
|
|
74
74
|
* [Postgres](https://sqlframe.readthedocs.io/en/stable/postgres)
|
|
75
75
|
* [Snowflake](https://sqlframe.readthedocs.io/en/stable/snowflake)
|
|
76
|
+
* [Spark](https://sqlframe.readthedocs.io/en/stable/spark)
|
|
76
77
|
|
|
77
78
|
SQLFrame also has a "Standalone" session that be used to generate SQL without any connection to a database engine.
|
|
79
|
+
|
|
78
80
|
* [Standalone](https://sqlframe.readthedocs.io/en/stable/standalone)
|
|
79
81
|
|
|
80
82
|
SQLFrame is great for:
|
|
81
83
|
|
|
82
84
|
* Users who want to run PySpark DataFrame code without having to use a Spark cluster
|
|
83
85
|
* Users who want a SQL representation of their DataFrame code for debugging or sharing with others
|
|
86
|
+
* See [Spark Engine](https://sqlframe.readthedocs.io/en/stable/spark/) for more details
|
|
84
87
|
* Users who want a DataFrame API that leverages the full power of their engine to do the processing
|
|
85
88
|
|
|
86
89
|
## Installation
|
|
@@ -94,6 +97,8 @@ pip install "sqlframe[duckdb]"
|
|
|
94
97
|
pip install "sqlframe[postgres]"
|
|
95
98
|
# Snowflake
|
|
96
99
|
pip install "sqlframe[snowflake]"
|
|
100
|
+
# Spark
|
|
101
|
+
pip install "sqlframe[spark]"
|
|
97
102
|
# Standalone
|
|
98
103
|
pip install sqlframe
|
|
99
104
|
```
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
sqlframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
sqlframe/_version.py,sha256=
|
|
2
|
+
sqlframe/_version.py,sha256=kn-QYzzAhfbnfKK6EpE9gJz8TDZkEk52evaid1DHkG4,411
|
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
sqlframe/base/_typing.py,sha256=DuTay8-o9W-pw3RPZCgLunKNJLS9PkaV11G_pxXp9NY,1256
|
|
5
5
|
sqlframe/base/catalog.py,sha256=ATDGirouUjal05P4ymL-wIi8rgjg_8w4PoACamiO64A,37245
|
|
@@ -8,12 +8,12 @@ sqlframe/base/dataframe.py,sha256=uL4neDTMy1a9XJH46YLQryzdDci4iDxNXBtiJOzfHfs,67
|
|
|
8
8
|
sqlframe/base/decorators.py,sha256=I5osMgx9BuCgbtp4jVM2DNwYJVLzCv-OtTedhQEik0g,1882
|
|
9
9
|
sqlframe/base/exceptions.py,sha256=pCB9hXX4jxZWzNg3JN1i38cv3BmpUlee5NoLYx3YXIQ,208
|
|
10
10
|
sqlframe/base/function_alternatives.py,sha256=NDXs2igY7PBsStzTSRZvJcCshBOJkPQl2GbhpVFU6To,42931
|
|
11
|
-
sqlframe/base/functions.py,sha256=
|
|
11
|
+
sqlframe/base/functions.py,sha256=i4XvQ9KOW55SX2gs1CAk3U1t6-TRLiIZG_RJj_Zn5cI,54852
|
|
12
12
|
sqlframe/base/group.py,sha256=TES9CleVmH3x-0X-tqmuUKfCKSWjH5vg1aU3R6dDmFc,4059
|
|
13
13
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
|
14
14
|
sqlframe/base/operations.py,sha256=-AhNuEzcV7ZExoP1oY3blaKip-joQyJeQVvfBTs_2g4,3456
|
|
15
15
|
sqlframe/base/readerwriter.py,sha256=5NPQMiOrw6I54U243R_6-ynnWYsNksgqwRpPp4IFjIw,25288
|
|
16
|
-
sqlframe/base/session.py,sha256=
|
|
16
|
+
sqlframe/base/session.py,sha256=oQsOwlwAhbqtD8epR44kGXP29S31fIxM29NxfwCbcl0,21993
|
|
17
17
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
|
18
18
|
sqlframe/base/types.py,sha256=aJT5YXr-M_LAfUM0uK4asfbrQFab_xmsp1CP2zkG8p0,11924
|
|
19
19
|
sqlframe/base/util.py,sha256=hgmTVzdTvHhfc9d5I96wjk9LGr-vhSZlaB-MejENzcA,9110
|
|
@@ -76,14 +76,15 @@ sqlframe/snowflake/readwriter.py,sha256=yhRc2HcMq6PwV3ghZWC-q-qaE7LE4aEjZEXCip4O
|
|
|
76
76
|
sqlframe/snowflake/session.py,sha256=bDOlnuIiQ9j_zfF7F5H1gTLmpHUjruIxr2CfXcS_7YU,3284
|
|
77
77
|
sqlframe/snowflake/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
|
|
78
78
|
sqlframe/snowflake/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
|
79
|
-
sqlframe/spark/__init__.py,sha256=
|
|
79
|
+
sqlframe/spark/__init__.py,sha256=WhYQAZMJN1EMNAVGUH7BEinxNdYtXOrrr-6HUniJOyI,649
|
|
80
80
|
sqlframe/spark/catalog.py,sha256=YeWBCUlkkhf2jDcmaFo-JvG4DQ6Daqyy1zEnVBx5gMo,32526
|
|
81
81
|
sqlframe/spark/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
|
|
82
82
|
sqlframe/spark/dataframe.py,sha256=V3z5Bx9snLgYh4bDwJfJb5mj1P7UsZF8DMlLwZXopBg,1309
|
|
83
|
-
sqlframe/spark/functions.py,sha256=
|
|
83
|
+
sqlframe/spark/functions.py,sha256=PkK4MBpVADhnDbrgFDii5zFaNrhi4y-OYX3Lcu-SW0k,530
|
|
84
|
+
sqlframe/spark/functions.pyi,sha256=eiAAXKW57HOJNfqLndkyijxlpIt-5WfojLWTk2gN6_Y,6479
|
|
84
85
|
sqlframe/spark/group.py,sha256=MrvV_v-YkBc6T1zz882WrEqtWjlooWIyHBCmTQg3fCA,379
|
|
85
86
|
sqlframe/spark/readwriter.py,sha256=w68EImTcGJv64X7pc1tk5tDjDxb1nAnn-MiIaaN9Dc8,812
|
|
86
|
-
sqlframe/spark/session.py,sha256=
|
|
87
|
+
sqlframe/spark/session.py,sha256=ztIS7VCFxjR3B7i4JXaXo0evTUhUjOsIAZb7Ssqt2cU,4254
|
|
87
88
|
sqlframe/spark/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
|
|
88
89
|
sqlframe/spark/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
|
89
90
|
sqlframe/standalone/__init__.py,sha256=yu4A97HwhyDwllDEzG7io4ScyWipWSAH2tqUKS545OA,767
|
|
@@ -96,8 +97,8 @@ sqlframe/standalone/readwriter.py,sha256=EZNyDJ4ID6sGNog3uP4-e9RvchX4biJJDNtc5hk
|
|
|
96
97
|
sqlframe/standalone/session.py,sha256=wQmdu2sv6KMTAv0LRFk7TY7yzlh3xvmsyqilEtRecbY,1191
|
|
97
98
|
sqlframe/standalone/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
|
|
98
99
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
|
99
|
-
sqlframe-1.
|
|
100
|
-
sqlframe-1.
|
|
101
|
-
sqlframe-1.
|
|
102
|
-
sqlframe-1.
|
|
103
|
-
sqlframe-1.
|
|
100
|
+
sqlframe-1.7.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
|
101
|
+
sqlframe-1.7.1.dist-info/METADATA,sha256=K-7c9prIXrECo4scNvY_F2mHYwNEWjCefXJDpe_ahlc,7496
|
|
102
|
+
sqlframe-1.7.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
|
103
|
+
sqlframe-1.7.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
|
104
|
+
sqlframe-1.7.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|