sqlframe 1.11.0__py3-none-any.whl → 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/column.py +41 -0
- sqlframe/base/dataframe.py +23 -2
- sqlframe/base/function_alternatives.py +5 -7
- sqlframe/base/functions.py +4 -2
- sqlframe/base/types.py +10 -0
- sqlframe/bigquery/functions.py +1 -0
- {sqlframe-1.11.0.dist-info → sqlframe-1.12.0.dist-info}/METADATA +1 -1
- {sqlframe-1.11.0.dist-info → sqlframe-1.12.0.dist-info}/RECORD +12 -12
- {sqlframe-1.11.0.dist-info → sqlframe-1.12.0.dist-info}/LICENSE +0 -0
- {sqlframe-1.11.0.dist-info → sqlframe-1.12.0.dist-info}/WHEEL +0 -0
- {sqlframe-1.11.0.dist-info → sqlframe-1.12.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/column.py
CHANGED
|
@@ -407,3 +407,44 @@ class Column:
|
|
|
407
407
|
window_expression = window.expression.copy()
|
|
408
408
|
window_expression.set("this", self.column_expression)
|
|
409
409
|
return Column(window_expression)
|
|
410
|
+
|
|
411
|
+
def getItem(self, key: t.Any) -> Column:
|
|
412
|
+
"""
|
|
413
|
+
An expression that gets an item at position ``ordinal`` out of a list,
|
|
414
|
+
or gets an item by key out of a dict.
|
|
415
|
+
|
|
416
|
+
.. versionadded:: 1.3.0
|
|
417
|
+
|
|
418
|
+
.. versionchanged:: 3.4.0
|
|
419
|
+
Supports Spark Connect.
|
|
420
|
+
|
|
421
|
+
Parameters
|
|
422
|
+
----------
|
|
423
|
+
key
|
|
424
|
+
a literal value, or a :class:`Column` expression.
|
|
425
|
+
The result will only be true at a location if the item matches in the column.
|
|
426
|
+
|
|
427
|
+
.. deprecated:: 3.0.0
|
|
428
|
+
:class:`Column` as a parameter is deprecated.
|
|
429
|
+
|
|
430
|
+
Returns
|
|
431
|
+
-------
|
|
432
|
+
:class:`Column`
|
|
433
|
+
Column representing the item(s) got at position out of a list or by key out of a dict.
|
|
434
|
+
|
|
435
|
+
Examples
|
|
436
|
+
--------
|
|
437
|
+
>>> df = spark.createDataFrame([([1, 2], {"key": "value"})], ["l", "d"])
|
|
438
|
+
>>> df.select(df.l.getItem(0), df.d.getItem("key")).show()
|
|
439
|
+
+----+------+
|
|
440
|
+
|l[0]|d[key]|
|
|
441
|
+
+----+------+
|
|
442
|
+
| 1| value|
|
|
443
|
+
+----+------+
|
|
444
|
+
"""
|
|
445
|
+
element_at = get_func_from_session("element_at")
|
|
446
|
+
lit = get_func_from_session("lit")
|
|
447
|
+
key = lit(key) if not isinstance(key, Column) else key
|
|
448
|
+
if isinstance(key.expression, exp.Literal) and key.expression.is_number:
|
|
449
|
+
key = key + lit(1)
|
|
450
|
+
return element_at(self, key)
|
sqlframe/base/dataframe.py
CHANGED
|
@@ -361,7 +361,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
361
361
|
|
|
362
362
|
cols = self._ensure_list_of_columns(cols)
|
|
363
363
|
normalize(self.session, expression or self.expression, cols)
|
|
364
|
-
return cols
|
|
364
|
+
return list(flatten([self._expand_star(col) for col in cols]))
|
|
365
365
|
|
|
366
366
|
def _ensure_and_normalize_col(self, col):
|
|
367
367
|
from sqlframe.base.column import Column
|
|
@@ -514,6 +514,27 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
514
514
|
select_expressions.append(expression_select_pair) # type: ignore
|
|
515
515
|
return select_expressions
|
|
516
516
|
|
|
517
|
+
def _expand_star(self, col: Column) -> t.List[Column]:
|
|
518
|
+
from sqlframe.base.column import Column
|
|
519
|
+
|
|
520
|
+
if isinstance(col.column_expression, exp.Star):
|
|
521
|
+
return self._get_outer_select_columns(self.expression)
|
|
522
|
+
elif (
|
|
523
|
+
isinstance(col.column_expression, exp.Column)
|
|
524
|
+
and isinstance(col.column_expression.this, exp.Star)
|
|
525
|
+
and col.column_expression.args.get("table")
|
|
526
|
+
):
|
|
527
|
+
for cte in self.expression.ctes:
|
|
528
|
+
if cte.alias_or_name == col.column_expression.args["table"].this:
|
|
529
|
+
return [
|
|
530
|
+
Column.ensure_col(exp.column(x.column_alias_or_name, cte.alias_or_name))
|
|
531
|
+
for x in self._get_outer_select_columns(cte)
|
|
532
|
+
]
|
|
533
|
+
raise ValueError(
|
|
534
|
+
f"Could not find table to expand star: {col.column_expression.args['table']}"
|
|
535
|
+
)
|
|
536
|
+
return [col]
|
|
537
|
+
|
|
517
538
|
@t.overload
|
|
518
539
|
def sql(
|
|
519
540
|
self,
|
|
@@ -1555,7 +1576,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
1555
1576
|
result = self.session._fetch_rows(sql)
|
|
1556
1577
|
table = PrettyTable()
|
|
1557
1578
|
if row := seq_get(result, 0):
|
|
1558
|
-
table.field_names =
|
|
1579
|
+
table.field_names = row._unique_field_names
|
|
1559
1580
|
for row in result:
|
|
1560
1581
|
table.add_row(list(row))
|
|
1561
1582
|
print(table)
|
|
@@ -1135,13 +1135,11 @@ def array_intersect_using_intersection(col1: ColumnOrName, col2: ColumnOrName) -
|
|
|
1135
1135
|
def element_at_using_brackets(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
1136
1136
|
col_func = get_func_from_session("col")
|
|
1137
1137
|
lit = get_func_from_session("lit")
|
|
1138
|
-
#
|
|
1139
|
-
if
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
return Column(
|
|
1143
|
-
expression.Bracket(this=col_func(col).expression, expressions=[value_lit.expression])
|
|
1144
|
-
)
|
|
1138
|
+
# SQLGlot will auto add 1 to whatever we pass in for the brackets even though the value is already 1 based.
|
|
1139
|
+
value = value if isinstance(value, Column) else lit(value)
|
|
1140
|
+
if [x for x in value.expression.find_all(expression.Literal) if x.is_number]:
|
|
1141
|
+
value = value - lit(1)
|
|
1142
|
+
return Column(expression.Bracket(this=col_func(col).expression, expressions=[value.expression])) # type: ignore
|
|
1145
1143
|
|
|
1146
1144
|
|
|
1147
1145
|
def array_remove_using_filter(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
sqlframe/base/functions.py
CHANGED
|
@@ -1923,7 +1923,9 @@ def call_function(funcName: str, *cols: ColumnOrName) -> Column:
|
|
|
1923
1923
|
cols = ensure_list(cols) # type: ignore
|
|
1924
1924
|
if len(cols) > 1:
|
|
1925
1925
|
return Column.invoke_anonymous_function(cols[0], funcName, *cols[1:])
|
|
1926
|
-
|
|
1926
|
+
elif len(cols) == 1:
|
|
1927
|
+
return Column.invoke_anonymous_function(cols[0], funcName)
|
|
1928
|
+
return Column.invoke_anonymous_function(None, funcName)
|
|
1927
1929
|
|
|
1928
1930
|
|
|
1929
1931
|
# @meta(unsupported_engines="*")
|
|
@@ -2028,7 +2030,7 @@ def character_length(str: ColumnOrName) -> Column:
|
|
|
2028
2030
|
return Column.invoke_anonymous_function(str, "character_length")
|
|
2029
2031
|
|
|
2030
2032
|
|
|
2031
|
-
@meta(
|
|
2033
|
+
@meta()
|
|
2032
2034
|
def contains(left: ColumnOrName, right: ColumnOrName) -> Column:
|
|
2033
2035
|
return Column.invoke_anonymous_function(left, "contains", right)
|
|
2034
2036
|
|
sqlframe/base/types.py
CHANGED
|
@@ -416,3 +416,13 @@ class Row(tuple):
|
|
|
416
416
|
)
|
|
417
417
|
else:
|
|
418
418
|
return "<Row(%s)>" % ", ".join(repr(field) for field in self)
|
|
419
|
+
|
|
420
|
+
# SQLFrame Specific
|
|
421
|
+
@property
|
|
422
|
+
def _unique_field_names(self) -> t.List[str]:
|
|
423
|
+
fields = []
|
|
424
|
+
for i, field in enumerate(self.__fields__):
|
|
425
|
+
if field in fields:
|
|
426
|
+
field = field + "_" + str(i)
|
|
427
|
+
fields.append(field)
|
|
428
|
+
return fields
|
sqlframe/bigquery/functions.py
CHANGED
|
@@ -52,6 +52,7 @@ from sqlframe.base.function_alternatives import ( # noqa
|
|
|
52
52
|
make_date_from_date_func as make_date,
|
|
53
53
|
to_date_from_timestamp as to_date,
|
|
54
54
|
last_day_with_cast as last_day,
|
|
55
|
+
sha1_force_sha1_and_to_hex as sha,
|
|
55
56
|
sha1_force_sha1_and_to_hex as sha1,
|
|
56
57
|
hash_from_farm_fingerprint as hash,
|
|
57
58
|
base64_from_blob as base64,
|
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
sqlframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
sqlframe/_version.py,sha256=
|
|
2
|
+
sqlframe/_version.py,sha256=cgR9Mx-45EIoPNcTiuL_LLmCR2oVTYIPi5z0W11owvc,413
|
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
sqlframe/base/_typing.py,sha256=DuTay8-o9W-pw3RPZCgLunKNJLS9PkaV11G_pxXp9NY,1256
|
|
5
5
|
sqlframe/base/catalog.py,sha256=ATDGirouUjal05P4ymL-wIi8rgjg_8w4PoACamiO64A,37245
|
|
6
|
-
sqlframe/base/column.py,sha256=
|
|
7
|
-
sqlframe/base/dataframe.py,sha256=
|
|
6
|
+
sqlframe/base/column.py,sha256=y41rFV7y_seTNkAK3SSqnggGi2otXt0ejKzsMyHCYT4,17515
|
|
7
|
+
sqlframe/base/dataframe.py,sha256=75ZM9r52fufFmVShtntcDUr6dZ1stX9HDmXLuDrYTAU,71004
|
|
8
8
|
sqlframe/base/decorators.py,sha256=I5osMgx9BuCgbtp4jVM2DNwYJVLzCv-OtTedhQEik0g,1882
|
|
9
9
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
|
10
|
-
sqlframe/base/function_alternatives.py,sha256=
|
|
11
|
-
sqlframe/base/functions.py,sha256=
|
|
10
|
+
sqlframe/base/function_alternatives.py,sha256=l6Fu0mZ-eleObpYcCAnOXV1HvuHugeoCFUcSV7NmFis,45916
|
|
11
|
+
sqlframe/base/functions.py,sha256=hSLuyO03m2dXPJdmVKp9of-_xj4V2sUhSzJ65Ti240Q,187616
|
|
12
12
|
sqlframe/base/group.py,sha256=TES9CleVmH3x-0X-tqmuUKfCKSWjH5vg1aU3R6dDmFc,4059
|
|
13
13
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
|
14
14
|
sqlframe/base/operations.py,sha256=-AhNuEzcV7ZExoP1oY3blaKip-joQyJeQVvfBTs_2g4,3456
|
|
15
15
|
sqlframe/base/readerwriter.py,sha256=5NPQMiOrw6I54U243R_6-ynnWYsNksgqwRpPp4IFjIw,25288
|
|
16
16
|
sqlframe/base/session.py,sha256=2C0OsPoP49AuqVNtPiazTdVpwQA1668g5WOydrYP6SA,22001
|
|
17
17
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
|
18
|
-
sqlframe/base/types.py,sha256=
|
|
18
|
+
sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
|
|
19
19
|
sqlframe/base/util.py,sha256=tWccrZne-Acn4N2RxYr87mfI_GDMf_K9hRD7BnhGBq0,11756
|
|
20
20
|
sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
|
|
21
21
|
sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -26,7 +26,7 @@ sqlframe/bigquery/__init__.py,sha256=i2NsMbiXOj2xphCtPuNk6cVw4iYeq5_B1I9dVI9aGAk
|
|
|
26
26
|
sqlframe/bigquery/catalog.py,sha256=h3aQAQAJg6MMvFpP8Ku0S4pcx30n5qYrqHhWSomxb6A,9319
|
|
27
27
|
sqlframe/bigquery/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
|
|
28
28
|
sqlframe/bigquery/dataframe.py,sha256=Y2uy4FEYw0KxIHgnaA9uMwdIzxJzTlD_NSzIe7P7kxA,2405
|
|
29
|
-
sqlframe/bigquery/functions.py,sha256=
|
|
29
|
+
sqlframe/bigquery/functions.py,sha256=ifJxEyHDwSp2iA-yBt7XHLh9GhVPpgzs5YckMFvag8w,11370
|
|
30
30
|
sqlframe/bigquery/functions.pyi,sha256=JiyLFLiO0jyJec6j1o4ujPVQ7Tma-c9YHlm-3UQYD9M,13642
|
|
31
31
|
sqlframe/bigquery/group.py,sha256=UVBNBRTo8OqS-_cS5YwvTeJYgYxeG-d6R3kfyHmlFqw,391
|
|
32
32
|
sqlframe/bigquery/readwriter.py,sha256=WAD3ZMwkkjOpvPPoZXfaLLNM6tRTeUvdEj-hQZAzXeo,870
|
|
@@ -99,8 +99,8 @@ sqlframe/standalone/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,
|
|
|
99
99
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
|
100
100
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
|
101
101
|
sqlframe/testing/utils.py,sha256=9DDYVuocO7tygee3RaajuJNZ24sJwf_LY556kKg7kTw,13011
|
|
102
|
-
sqlframe-1.
|
|
103
|
-
sqlframe-1.
|
|
104
|
-
sqlframe-1.
|
|
105
|
-
sqlframe-1.
|
|
106
|
-
sqlframe-1.
|
|
102
|
+
sqlframe-1.12.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
|
103
|
+
sqlframe-1.12.0.dist-info/METADATA,sha256=pSBFDDNjWo6RscllTU6EquCE1DG0C8FXmuad1RINxgk,7497
|
|
104
|
+
sqlframe-1.12.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
|
105
|
+
sqlframe-1.12.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
|
106
|
+
sqlframe-1.12.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|