sqlframe 1.11.0__py3-none-any.whl → 1.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.11.0'
16
- __version_tuple__ = version_tuple = (1, 11, 0)
15
+ __version__ = version = '1.12.0'
16
+ __version_tuple__ = version_tuple = (1, 12, 0)
sqlframe/base/column.py CHANGED
@@ -407,3 +407,44 @@ class Column:
407
407
  window_expression = window.expression.copy()
408
408
  window_expression.set("this", self.column_expression)
409
409
  return Column(window_expression)
410
+
411
+ def getItem(self, key: t.Any) -> Column:
412
+ """
413
+ An expression that gets an item at position ``ordinal`` out of a list,
414
+ or gets an item by key out of a dict.
415
+
416
+ .. versionadded:: 1.3.0
417
+
418
+ .. versionchanged:: 3.4.0
419
+ Supports Spark Connect.
420
+
421
+ Parameters
422
+ ----------
423
+ key
424
+ a literal value, or a :class:`Column` expression.
425
+ The result will only be true at a location if the item matches in the column.
426
+
427
+ .. deprecated:: 3.0.0
428
+ :class:`Column` as a parameter is deprecated.
429
+
430
+ Returns
431
+ -------
432
+ :class:`Column`
433
+ Column representing the item(s) got at position out of a list or by key out of a dict.
434
+
435
+ Examples
436
+ --------
437
+ >>> df = spark.createDataFrame([([1, 2], {"key": "value"})], ["l", "d"])
438
+ >>> df.select(df.l.getItem(0), df.d.getItem("key")).show()
439
+ +----+------+
440
+ |l[0]|d[key]|
441
+ +----+------+
442
+ | 1| value|
443
+ +----+------+
444
+ """
445
+ element_at = get_func_from_session("element_at")
446
+ lit = get_func_from_session("lit")
447
+ key = lit(key) if not isinstance(key, Column) else key
448
+ if isinstance(key.expression, exp.Literal) and key.expression.is_number:
449
+ key = key + lit(1)
450
+ return element_at(self, key)
@@ -361,7 +361,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
361
361
 
362
362
  cols = self._ensure_list_of_columns(cols)
363
363
  normalize(self.session, expression or self.expression, cols)
364
- return cols
364
+ return list(flatten([self._expand_star(col) for col in cols]))
365
365
 
366
366
  def _ensure_and_normalize_col(self, col):
367
367
  from sqlframe.base.column import Column
@@ -514,6 +514,27 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
514
514
  select_expressions.append(expression_select_pair) # type: ignore
515
515
  return select_expressions
516
516
 
517
+ def _expand_star(self, col: Column) -> t.List[Column]:
518
+ from sqlframe.base.column import Column
519
+
520
+ if isinstance(col.column_expression, exp.Star):
521
+ return self._get_outer_select_columns(self.expression)
522
+ elif (
523
+ isinstance(col.column_expression, exp.Column)
524
+ and isinstance(col.column_expression.this, exp.Star)
525
+ and col.column_expression.args.get("table")
526
+ ):
527
+ for cte in self.expression.ctes:
528
+ if cte.alias_or_name == col.column_expression.args["table"].this:
529
+ return [
530
+ Column.ensure_col(exp.column(x.column_alias_or_name, cte.alias_or_name))
531
+ for x in self._get_outer_select_columns(cte)
532
+ ]
533
+ raise ValueError(
534
+ f"Could not find table to expand star: {col.column_expression.args['table']}"
535
+ )
536
+ return [col]
537
+
517
538
  @t.overload
518
539
  def sql(
519
540
  self,
@@ -1555,7 +1576,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1555
1576
  result = self.session._fetch_rows(sql)
1556
1577
  table = PrettyTable()
1557
1578
  if row := seq_get(result, 0):
1558
- table.field_names = list(row.asDict().keys())
1579
+ table.field_names = row._unique_field_names
1559
1580
  for row in result:
1560
1581
  table.add_row(list(row))
1561
1582
  print(table)
@@ -1135,13 +1135,11 @@ def array_intersect_using_intersection(col1: ColumnOrName, col2: ColumnOrName) -
1135
1135
  def element_at_using_brackets(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
1136
1136
  col_func = get_func_from_session("col")
1137
1137
  lit = get_func_from_session("lit")
1138
- # SQLGlot will auto add 1 to whatever we pass in for the brackets even though the value is already 1 based.
1139
- if not isinstance(value, int):
1140
- raise ValueError("This dialect requires the value must be an integer")
1141
- value_lit = lit(value - 1)
1142
- return Column(
1143
- expression.Bracket(this=col_func(col).expression, expressions=[value_lit.expression])
1144
- )
1138
+ # SQLGlot will auto add 1 to whatever we pass in for the brackets even though the value is already 1 based.
1139
+ value = value if isinstance(value, Column) else lit(value)
1140
+ if [x for x in value.expression.find_all(expression.Literal) if x.is_number]:
1141
+ value = value - lit(1)
1142
+ return Column(expression.Bracket(this=col_func(col).expression, expressions=[value.expression])) # type: ignore
1145
1143
 
1146
1144
 
1147
1145
  def array_remove_using_filter(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
@@ -1923,7 +1923,9 @@ def call_function(funcName: str, *cols: ColumnOrName) -> Column:
1923
1923
  cols = ensure_list(cols) # type: ignore
1924
1924
  if len(cols) > 1:
1925
1925
  return Column.invoke_anonymous_function(cols[0], funcName, *cols[1:])
1926
- return Column.invoke_anonymous_function(cols[0], funcName)
1926
+ elif len(cols) == 1:
1927
+ return Column.invoke_anonymous_function(cols[0], funcName)
1928
+ return Column.invoke_anonymous_function(None, funcName)
1927
1929
 
1928
1930
 
1929
1931
  # @meta(unsupported_engines="*")
@@ -2028,7 +2030,7 @@ def character_length(str: ColumnOrName) -> Column:
2028
2030
  return Column.invoke_anonymous_function(str, "character_length")
2029
2031
 
2030
2032
 
2031
- @meta(unsupported_engines="*")
2033
+ @meta()
2032
2034
  def contains(left: ColumnOrName, right: ColumnOrName) -> Column:
2033
2035
  return Column.invoke_anonymous_function(left, "contains", right)
2034
2036
 
sqlframe/base/types.py CHANGED
@@ -416,3 +416,13 @@ class Row(tuple):
416
416
  )
417
417
  else:
418
418
  return "<Row(%s)>" % ", ".join(repr(field) for field in self)
419
+
420
+ # SQLFrame Specific
421
+ @property
422
+ def _unique_field_names(self) -> t.List[str]:
423
+ fields = []
424
+ for i, field in enumerate(self.__fields__):
425
+ if field in fields:
426
+ field = field + "_" + str(i)
427
+ fields.append(field)
428
+ return fields
@@ -52,6 +52,7 @@ from sqlframe.base.function_alternatives import ( # noqa
52
52
  make_date_from_date_func as make_date,
53
53
  to_date_from_timestamp as to_date,
54
54
  last_day_with_cast as last_day,
55
+ sha1_force_sha1_and_to_hex as sha,
55
56
  sha1_force_sha1_and_to_hex as sha1,
56
57
  hash_from_farm_fingerprint as hash,
57
58
  base64_from_blob as base64,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 1.11.0
3
+ Version: 1.12.0
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -1,21 +1,21 @@
1
1
  sqlframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- sqlframe/_version.py,sha256=rZqhcUFwPMyj_mTWUN2A6qcFr8Ptv08CSbXbruC3jR4,413
2
+ sqlframe/_version.py,sha256=cgR9Mx-45EIoPNcTiuL_LLmCR2oVTYIPi5z0W11owvc,413
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=DuTay8-o9W-pw3RPZCgLunKNJLS9PkaV11G_pxXp9NY,1256
5
5
  sqlframe/base/catalog.py,sha256=ATDGirouUjal05P4ymL-wIi8rgjg_8w4PoACamiO64A,37245
6
- sqlframe/base/column.py,sha256=5bfJWj9dnStHUxLSrWMD-gwiC4-aHKC8lhoC62nhM1k,16153
7
- sqlframe/base/dataframe.py,sha256=Tf5euWTGxFmYirgHK5ZXUI41so5ruo-asVmUwj9DFdo,70015
6
+ sqlframe/base/column.py,sha256=y41rFV7y_seTNkAK3SSqnggGi2otXt0ejKzsMyHCYT4,17515
7
+ sqlframe/base/dataframe.py,sha256=75ZM9r52fufFmVShtntcDUr6dZ1stX9HDmXLuDrYTAU,71004
8
8
  sqlframe/base/decorators.py,sha256=I5osMgx9BuCgbtp4jVM2DNwYJVLzCv-OtTedhQEik0g,1882
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
- sqlframe/base/function_alternatives.py,sha256=QESqZy7Osp9-CV5Yoi6XFat5SE8PzCVZ3o7gOFmIY7g,45888
11
- sqlframe/base/functions.py,sha256=hJDpE7GYQpQ1iHjdr1hG_hu0mAIb60vNoghjEcgMREI,187550
10
+ sqlframe/base/function_alternatives.py,sha256=l6Fu0mZ-eleObpYcCAnOXV1HvuHugeoCFUcSV7NmFis,45916
11
+ sqlframe/base/functions.py,sha256=hSLuyO03m2dXPJdmVKp9of-_xj4V2sUhSzJ65Ti240Q,187616
12
12
  sqlframe/base/group.py,sha256=TES9CleVmH3x-0X-tqmuUKfCKSWjH5vg1aU3R6dDmFc,4059
13
13
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
14
14
  sqlframe/base/operations.py,sha256=-AhNuEzcV7ZExoP1oY3blaKip-joQyJeQVvfBTs_2g4,3456
15
15
  sqlframe/base/readerwriter.py,sha256=5NPQMiOrw6I54U243R_6-ynnWYsNksgqwRpPp4IFjIw,25288
16
16
  sqlframe/base/session.py,sha256=2C0OsPoP49AuqVNtPiazTdVpwQA1668g5WOydrYP6SA,22001
17
17
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
18
- sqlframe/base/types.py,sha256=K6mjafbX7oIk65CapwamcO2I8nf-poRIpKKt9XDNEaQ,11987
18
+ sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
19
19
  sqlframe/base/util.py,sha256=tWccrZne-Acn4N2RxYr87mfI_GDMf_K9hRD7BnhGBq0,11756
20
20
  sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
21
21
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -26,7 +26,7 @@ sqlframe/bigquery/__init__.py,sha256=i2NsMbiXOj2xphCtPuNk6cVw4iYeq5_B1I9dVI9aGAk
26
26
  sqlframe/bigquery/catalog.py,sha256=h3aQAQAJg6MMvFpP8Ku0S4pcx30n5qYrqHhWSomxb6A,9319
27
27
  sqlframe/bigquery/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
28
28
  sqlframe/bigquery/dataframe.py,sha256=Y2uy4FEYw0KxIHgnaA9uMwdIzxJzTlD_NSzIe7P7kxA,2405
29
- sqlframe/bigquery/functions.py,sha256=2YqJmBG0F0o10cztFZoP-G4px1QMKuHST6jlj1snUfY,11331
29
+ sqlframe/bigquery/functions.py,sha256=ifJxEyHDwSp2iA-yBt7XHLh9GhVPpgzs5YckMFvag8w,11370
30
30
  sqlframe/bigquery/functions.pyi,sha256=JiyLFLiO0jyJec6j1o4ujPVQ7Tma-c9YHlm-3UQYD9M,13642
31
31
  sqlframe/bigquery/group.py,sha256=UVBNBRTo8OqS-_cS5YwvTeJYgYxeG-d6R3kfyHmlFqw,391
32
32
  sqlframe/bigquery/readwriter.py,sha256=WAD3ZMwkkjOpvPPoZXfaLLNM6tRTeUvdEj-hQZAzXeo,870
@@ -99,8 +99,8 @@ sqlframe/standalone/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,
99
99
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
100
100
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
101
101
  sqlframe/testing/utils.py,sha256=9DDYVuocO7tygee3RaajuJNZ24sJwf_LY556kKg7kTw,13011
102
- sqlframe-1.11.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
103
- sqlframe-1.11.0.dist-info/METADATA,sha256=JTMUu99Ygcz_fK15KTHUb9OqQcPiQoUjQ1-7RQ09COE,7497
104
- sqlframe-1.11.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
105
- sqlframe-1.11.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
106
- sqlframe-1.11.0.dist-info/RECORD,,
102
+ sqlframe-1.12.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
103
+ sqlframe-1.12.0.dist-info/METADATA,sha256=pSBFDDNjWo6RscllTU6EquCE1DG0C8FXmuad1RINxgk,7497
104
+ sqlframe-1.12.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
105
+ sqlframe-1.12.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
106
+ sqlframe-1.12.0.dist-info/RECORD,,