sqlframe 3.6.0__py3-none-any.whl → 3.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.6.0'
16
- __version_tuple__ = version_tuple = (3, 6, 0)
15
+ __version__ = version = '3.8.0'
16
+ __version_tuple__ = version_tuple = (3, 8, 0)
@@ -1593,7 +1593,7 @@ def try_to_timestamp_pgtemp(col: ColumnOrName, format: t.Optional[ColumnOrName]
1593
1593
  def typeof_pg_typeof(col: ColumnOrName) -> Column:
1594
1594
  return (
1595
1595
  Column.invoke_anonymous_function(col, "pg_typeof")
1596
- .cast(expression.DataType.build("regtype", dialect="postgres"))
1596
+ .cast(expression.DataType(this=expression.DataType.Type.USERDEFINED, kind="regtype"))
1597
1597
  .cast("text")
1598
1598
  )
1599
1599
 
@@ -2069,9 +2069,11 @@ def character_length(str: ColumnOrName) -> Column:
2069
2069
  return Column.invoke_anonymous_function(str, "character_length")
2070
2070
 
2071
2071
 
2072
- @meta()
2072
+ @meta(unsupported_engines=["bigquery", "postgres"])
2073
2073
  def contains(left: ColumnOrName, right: ColumnOrName) -> Column:
2074
- return Column.invoke_anonymous_function(left, "contains", right)
2074
+ return Column.invoke_expression_over_column(
2075
+ left, expression.Contains, expression=Column.ensure_col(right).expression
2076
+ )
2075
2077
 
2076
2078
 
2077
2079
  @meta(unsupported_engines=["bigquery", "postgres"])
@@ -3484,7 +3486,7 @@ def mask(
3484
3486
  )
3485
3487
 
3486
3488
 
3487
- @meta(unsupported_engines="*")
3489
+ @meta(unsupported_engines=["bigquery"])
3488
3490
  def median(col: ColumnOrName) -> Column:
3489
3491
  """
3490
3492
  Returns the median of the values in a group.
@@ -3520,7 +3522,7 @@ def median(col: ColumnOrName) -> Column:
3520
3522
  |dotNET| 10000.0|
3521
3523
  +------+----------------+
3522
3524
  """
3523
- return Column.invoke_anonymous_function(col, "median")
3525
+ return Column.invoke_expression_over_column(col, expression.Median)
3524
3526
 
3525
3527
 
3526
3528
  @meta(unsupported_engines="*")
@@ -4106,11 +4108,9 @@ def regexp_extract_all(
4106
4108
  >>> df.select(regexp_extract_all('str', col("regexp")).alias('d')).collect()
4107
4109
  [Row(d=['100', '300'])]
4108
4110
  """
4109
- if idx is None:
4110
- return Column.invoke_anonymous_function(str, "regexp_extract_all", regexp)
4111
- else:
4112
- idx = lit(idx) if isinstance(idx, int) else idx
4113
- return Column.invoke_anonymous_function(str, "regexp_extract_all", regexp, idx)
4111
+ return Column.invoke_expression_over_column(
4112
+ str, expression.RegexpExtractAll, expression=regexp, group=idx
4113
+ )
4114
4114
 
4115
4115
 
4116
4116
  @meta(unsupported_engines="*")
@@ -5426,7 +5426,7 @@ def unix_millis(col: ColumnOrName) -> Column:
5426
5426
  return Column.invoke_anonymous_function(col, "unix_millis")
5427
5427
 
5428
5428
 
5429
- @meta(unsupported_engines="*")
5429
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
5430
5430
  def unix_seconds(col: ColumnOrName) -> Column:
5431
5431
  """Returns the number of seconds since 1970-01-01 00:00:00 UTC.
5432
5432
  Truncates higher levels of precision.
@@ -5441,7 +5441,7 @@ def unix_seconds(col: ColumnOrName) -> Column:
5441
5441
  [Row(n=1437584400)]
5442
5442
  >>> spark.conf.unset("spark.sql.session.timeZone")
5443
5443
  """
5444
- return Column.invoke_anonymous_function(col, "unix_seconds")
5444
+ return Column.invoke_expression_over_column(col, expression.UnixSeconds)
5445
5445
 
5446
5446
 
5447
5447
  @meta(unsupported_engines="*")
@@ -75,7 +75,7 @@ class PandasLoaderMixin(_BaseDataFrameReader, t.Generic[SESSION, DF]):
75
75
 
76
76
  assert path is not None, "path is required"
77
77
  assert isinstance(path, str), "path must be a string"
78
- format = format or _infer_format(path)
78
+ format = format or self.state_format_to_read or _infer_format(path)
79
79
  kwargs = {k: v for k, v in options.items() if v is not None}
80
80
  if format == "json":
81
81
  df = pd.read_json(path, lines=True, **kwargs) # type: ignore
@@ -36,6 +36,7 @@ logger = logging.getLogger(__name__)
36
36
  class _BaseDataFrameReader(t.Generic[SESSION, DF]):
37
37
  def __init__(self, spark: SESSION):
38
38
  self._session = spark
39
+ self.state_format_to_read: t.Optional[str] = None
39
40
 
40
41
  @property
41
42
  def session(self) -> SESSION:
@@ -67,6 +68,44 @@ class _BaseDataFrameReader(t.Generic[SESSION, DF]):
67
68
  for k, v in column_mapping.items()
68
69
  ]
69
70
 
71
+ def format(self, source: str) -> "Self":
72
+ """Specifies the input data source format.
73
+
74
+ .. versionadded:: 1.4.0
75
+
76
+ .. versionchanged:: 3.4.0
77
+ Supports Spark Connect.
78
+
79
+ Parameters
80
+ ----------
81
+ source : str
82
+ string, name of the data source, e.g. 'json', 'parquet'.
83
+
84
+ Examples
85
+ --------
86
+ >>> spark.read.format('json')
87
+ <...readwriter.DataFrameReader object ...>
88
+
89
+ Write a DataFrame into a JSON file and read it back.
90
+
91
+ >>> import tempfile
92
+ >>> with tempfile.TemporaryDirectory() as d:
93
+ ... # Write a DataFrame into a JSON file
94
+ ... spark.createDataFrame(
95
+ ... [{"age": 100, "name": "Hyukjin Kwon"}]
96
+ ... ).write.mode("overwrite").format("json").save(d)
97
+ ...
98
+ ... # Read the JSON file as a DataFrame.
99
+ ... spark.read.format('json').load(d).show()
100
+ +---+------------+
101
+ |age| name|
102
+ +---+------------+
103
+ |100|Hyukjin Kwon|
104
+ +---+------------+
105
+ """
106
+ self.state_format_to_read = source
107
+ return self
108
+
70
109
  def load(
71
110
  self,
72
111
  path: t.Optional[PathOrPaths] = None,
@@ -72,6 +72,7 @@ class DuckDBDataFrameReader(_BaseDataFrameReader["DuckDBSession", "DuckDBDataFra
72
72
  |100|NULL|
73
73
  +---+----+
74
74
  """
75
+ format = format or self.state_format_to_read
75
76
  if schema:
76
77
  column_mapping = ensure_column_mapping(schema)
77
78
  select_column_mapping = column_mapping.copy()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.6.0
3
+ Version: 3.8.0
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -17,8 +17,8 @@ Classifier: Programming Language :: Python :: 3 :: Only
17
17
  Requires-Python: >=3.8
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
- Requires-Dist: prettytable (<3.11.1)
21
- Requires-Dist: sqlglot (<25.29,>=24.0.0)
20
+ Requires-Dist: prettytable (<3.12.1)
21
+ Requires-Dist: sqlglot (<25.32,>=24.0.0)
22
22
  Requires-Dist: typing-extensions (<5,>=4.8)
23
23
  Provides-Extra: bigquery
24
24
  Requires-Dist: google-cloud-bigquery-storage (<3,>=2) ; extra == 'bigquery'
@@ -27,7 +27,7 @@ Provides-Extra: dev
27
27
  Requires-Dist: duckdb (<1.2,>=0.9) ; extra == 'dev'
28
28
  Requires-Dist: findspark (<3,>=2) ; extra == 'dev'
29
29
  Requires-Dist: mypy (<1.14,>=1.10.0) ; extra == 'dev'
30
- Requires-Dist: openai (<1.53,>=1.30) ; extra == 'dev'
30
+ Requires-Dist: openai (<1.55,>=1.30) ; extra == 'dev'
31
31
  Requires-Dist: pandas-stubs (<3,>=2) ; extra == 'dev'
32
32
  Requires-Dist: pandas (<3,>=2) ; extra == 'dev'
33
33
  Requires-Dist: psycopg (<4,>=3.1) ; extra == 'dev'
@@ -50,7 +50,7 @@ Provides-Extra: duckdb
50
50
  Requires-Dist: duckdb (<1.2,>=0.9) ; extra == 'duckdb'
51
51
  Requires-Dist: pandas (<3,>=2) ; extra == 'duckdb'
52
52
  Provides-Extra: openai
53
- Requires-Dist: openai (<1.53,>=1.30) ; extra == 'openai'
53
+ Requires-Dist: openai (<1.55,>=1.30) ; extra == 'openai'
54
54
  Provides-Extra: pandas
55
55
  Requires-Dist: pandas (<3,>=2) ; extra == 'pandas'
56
56
  Provides-Extra: postgres
@@ -1,5 +1,5 @@
1
1
  sqlframe/__init__.py,sha256=E3qCJ4PSEgKz6Hg3ves6LWt3JrQOV8c9HVLSIUOzKNc,3106
2
- sqlframe/_version.py,sha256=Z7ZPT2d9N4AepQhV8KR5f2xj-SC1GJPAoD3gGnvo1ZM,411
2
+ sqlframe/_version.py,sha256=hvmaOw9yeb_zs_ePB0RUeYnhDHdIDSo_FxV0X8T7LEk,411
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
5
5
  sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
@@ -7,12 +7,12 @@ sqlframe/base/column.py,sha256=06fhVZ2nCn2QLxnfjdK-oYKeTFJC_smgSxu7u2UYlVg,17878
7
7
  sqlframe/base/dataframe.py,sha256=coeUwntwYbT1g6YKVwk3ZfWMfJqAzd1ECYabBSsNsV0,72892
8
8
  sqlframe/base/decorators.py,sha256=Jy4bf8MhZ-AJ6CWTj59bBJRqamtLbPC0USUMFrY6g0w,449
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
- sqlframe/base/function_alternatives.py,sha256=Cqt-OjyWewCXZJ8PV-2vc8yrDG2ennLPm6yWaxWVXRc,50746
11
- sqlframe/base/functions.py,sha256=JDVULt3WI1cwWYboU8ybenIXZPdKSNtnzg7A4xC1Lao,189978
10
+ sqlframe/base/function_alternatives.py,sha256=jofb2-nweefqcjUsd4xVqfRmJSZ-T_0Iq5roW2pL0OA,50768
11
+ sqlframe/base/functions.py,sha256=0pfNPCgFs7qKopkM9Q-E_BxCGJHUqlk3YqIzKg4jdQ4,190024
12
12
  sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
13
13
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
14
14
  sqlframe/base/operations.py,sha256=-AhNuEzcV7ZExoP1oY3blaKip-joQyJeQVvfBTs_2g4,3456
15
- sqlframe/base/readerwriter.py,sha256=FkImnUR_qNASmXktk0JDsFeOzfh799hd09vI2uznNH8,25350
15
+ sqlframe/base/readerwriter.py,sha256=9cgOZuB3phZbQufY98JRDBVWRww3hsULc6Or1HK2Onk,26554
16
16
  sqlframe/base/session.py,sha256=6vQ2bJa357J1bycx1vMxkIUdcA2YbMIYq956bpytztI,25256
17
17
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
18
18
  sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
@@ -22,7 +22,7 @@ sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
22
22
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  sqlframe/base/mixins/catalog_mixins.py,sha256=9tn0mK8oPoqIIjNItystD5tdBMdK9YpkxTG7G9KQl8k,18619
24
24
  sqlframe/base/mixins/dataframe_mixins.py,sha256=3MMQu2hdHG_qtDQ6jDHo0Iy5KtLj4lHePfovCVxTqbo,1411
25
- sqlframe/base/mixins/readwriter_mixins.py,sha256=QnxGVL8ftZfYlBNG0Bl24N_bnA2YioSxUsTSgKIbuvQ,4723
25
+ sqlframe/base/mixins/readwriter_mixins.py,sha256=ap8j_g7PoUGHaHKCPMnRPbXofOsUhUzlaF7Loxy2m-I,4752
26
26
  sqlframe/bigquery/__init__.py,sha256=kbaomhYAANPdxeDQhajv8IHfMg_ENKivtYK-rPwaV08,939
27
27
  sqlframe/bigquery/catalog.py,sha256=8d36IzT5GPWd1FdxJ9vEljOdbIDepHnFOBjwP0bX6FE,11625
28
28
  sqlframe/bigquery/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
@@ -42,7 +42,7 @@ sqlframe/duckdb/dataframe.py,sha256=HZg_uMAz4RsubZJT4-MslUQS_0-InF0_P5Yq5HyJ3wE,
42
42
  sqlframe/duckdb/functions.py,sha256=Ee8o6YFtRdEiq0jNLXxgu5lcbc7Tsg0-lK6oRyxdcjo,1920
43
43
  sqlframe/duckdb/functions.pyi,sha256=bWfQl7Cm1eecI39LJAyyRcC4z7epDJ-h9JOozPsEc34,5879
44
44
  sqlframe/duckdb/group.py,sha256=IkhbW42Ng1U5YT3FkIdiB4zBqRkW4QyTb-1detY1e_4,383
45
- sqlframe/duckdb/readwriter.py,sha256=iY0Wsms35gymilETOhAQGQCHfH8LCj9_uY8aCdF77ds,4806
45
+ sqlframe/duckdb/readwriter.py,sha256=6nGnz2SE-tpuPDQXPI23SQQXRre_raUG5cou3s3NrpA,4859
46
46
  sqlframe/duckdb/session.py,sha256=b5IrKbTkYUVNQGSG2EJPNV9MTdJw4onN-9aMrskjxck,2721
47
47
  sqlframe/duckdb/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
48
48
  sqlframe/duckdb/udf.py,sha256=Du9LnOtT1lJvB90D4HSR2tB7MXy179jZngDR-EjVjQk,656
@@ -107,8 +107,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
107
107
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
108
108
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
109
109
  sqlframe/testing/utils.py,sha256=9DDYVuocO7tygee3RaajuJNZ24sJwf_LY556kKg7kTw,13011
110
- sqlframe-3.6.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
111
- sqlframe-3.6.0.dist-info/METADATA,sha256=Vax4ozsDNRqn-MBnTi1jW4WVkvZPDKsf_XGePysEw98,8639
112
- sqlframe-3.6.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
113
- sqlframe-3.6.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
114
- sqlframe-3.6.0.dist-info/RECORD,,
110
+ sqlframe-3.8.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
111
+ sqlframe-3.8.0.dist-info/METADATA,sha256=SYeasQhk4sDbmww5wM0FUhqX63gTihxdZjM8t3qyi3Q,8639
112
+ sqlframe-3.8.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
113
+ sqlframe-3.8.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
114
+ sqlframe-3.8.0.dist-info/RECORD,,