sqlframe 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/__init__.py +0 -0
- sqlframe/_version.py +16 -0
- sqlframe/base/__init__.py +0 -0
- sqlframe/base/_typing.py +39 -0
- sqlframe/base/catalog.py +1163 -0
- sqlframe/base/column.py +388 -0
- sqlframe/base/dataframe.py +1519 -0
- sqlframe/base/decorators.py +51 -0
- sqlframe/base/exceptions.py +14 -0
- sqlframe/base/function_alternatives.py +1055 -0
- sqlframe/base/functions.py +1678 -0
- sqlframe/base/group.py +102 -0
- sqlframe/base/mixins/__init__.py +0 -0
- sqlframe/base/mixins/catalog_mixins.py +419 -0
- sqlframe/base/mixins/readwriter_mixins.py +118 -0
- sqlframe/base/normalize.py +84 -0
- sqlframe/base/operations.py +87 -0
- sqlframe/base/readerwriter.py +679 -0
- sqlframe/base/session.py +585 -0
- sqlframe/base/transforms.py +13 -0
- sqlframe/base/types.py +418 -0
- sqlframe/base/util.py +242 -0
- sqlframe/base/window.py +139 -0
- sqlframe/bigquery/__init__.py +23 -0
- sqlframe/bigquery/catalog.py +255 -0
- sqlframe/bigquery/column.py +1 -0
- sqlframe/bigquery/dataframe.py +54 -0
- sqlframe/bigquery/functions.py +378 -0
- sqlframe/bigquery/group.py +14 -0
- sqlframe/bigquery/readwriter.py +29 -0
- sqlframe/bigquery/session.py +89 -0
- sqlframe/bigquery/types.py +1 -0
- sqlframe/bigquery/window.py +1 -0
- sqlframe/duckdb/__init__.py +20 -0
- sqlframe/duckdb/catalog.py +108 -0
- sqlframe/duckdb/column.py +1 -0
- sqlframe/duckdb/dataframe.py +55 -0
- sqlframe/duckdb/functions.py +47 -0
- sqlframe/duckdb/group.py +14 -0
- sqlframe/duckdb/readwriter.py +111 -0
- sqlframe/duckdb/session.py +65 -0
- sqlframe/duckdb/types.py +1 -0
- sqlframe/duckdb/window.py +1 -0
- sqlframe/postgres/__init__.py +23 -0
- sqlframe/postgres/catalog.py +106 -0
- sqlframe/postgres/column.py +1 -0
- sqlframe/postgres/dataframe.py +54 -0
- sqlframe/postgres/functions.py +61 -0
- sqlframe/postgres/group.py +14 -0
- sqlframe/postgres/readwriter.py +29 -0
- sqlframe/postgres/session.py +68 -0
- sqlframe/postgres/types.py +1 -0
- sqlframe/postgres/window.py +1 -0
- sqlframe/redshift/__init__.py +23 -0
- sqlframe/redshift/catalog.py +127 -0
- sqlframe/redshift/column.py +1 -0
- sqlframe/redshift/dataframe.py +54 -0
- sqlframe/redshift/functions.py +18 -0
- sqlframe/redshift/group.py +14 -0
- sqlframe/redshift/readwriter.py +29 -0
- sqlframe/redshift/session.py +53 -0
- sqlframe/redshift/types.py +1 -0
- sqlframe/redshift/window.py +1 -0
- sqlframe/snowflake/__init__.py +26 -0
- sqlframe/snowflake/catalog.py +134 -0
- sqlframe/snowflake/column.py +1 -0
- sqlframe/snowflake/dataframe.py +54 -0
- sqlframe/snowflake/functions.py +18 -0
- sqlframe/snowflake/group.py +14 -0
- sqlframe/snowflake/readwriter.py +29 -0
- sqlframe/snowflake/session.py +53 -0
- sqlframe/snowflake/types.py +1 -0
- sqlframe/snowflake/window.py +1 -0
- sqlframe/spark/__init__.py +23 -0
- sqlframe/spark/catalog.py +1028 -0
- sqlframe/spark/column.py +1 -0
- sqlframe/spark/dataframe.py +54 -0
- sqlframe/spark/functions.py +22 -0
- sqlframe/spark/group.py +14 -0
- sqlframe/spark/readwriter.py +29 -0
- sqlframe/spark/session.py +90 -0
- sqlframe/spark/types.py +1 -0
- sqlframe/spark/window.py +1 -0
- sqlframe/standalone/__init__.py +26 -0
- sqlframe/standalone/catalog.py +13 -0
- sqlframe/standalone/column.py +1 -0
- sqlframe/standalone/dataframe.py +36 -0
- sqlframe/standalone/functions.py +1 -0
- sqlframe/standalone/group.py +14 -0
- sqlframe/standalone/readwriter.py +19 -0
- sqlframe/standalone/session.py +40 -0
- sqlframe/standalone/types.py +1 -0
- sqlframe/standalone/window.py +1 -0
- sqlframe-1.1.3.dist-info/LICENSE +21 -0
- sqlframe-1.1.3.dist-info/METADATA +172 -0
- sqlframe-1.1.3.dist-info/RECORD +98 -0
- sqlframe-1.1.3.dist-info/WHEEL +5 -0
- sqlframe-1.1.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import inspect
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
import sqlframe.base.functions # noqa
|
|
7
|
+
|
|
8
|
+
module = sys.modules["sqlframe.base.functions"]
|
|
9
|
+
globals().update(
|
|
10
|
+
{
|
|
11
|
+
name: func
|
|
12
|
+
for name, func in inspect.getmembers(module, inspect.isfunction)
|
|
13
|
+
if hasattr(func, "unsupported_engines")
|
|
14
|
+
and "duckdb" not in func.unsupported_engines
|
|
15
|
+
and "*" not in func.unsupported_engines
|
|
16
|
+
}
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
from sqlframe.base.function_alternatives import ( # noqa
|
|
21
|
+
e_literal as e,
|
|
22
|
+
expm1_from_exp as expm1,
|
|
23
|
+
log1p_from_log as log1p,
|
|
24
|
+
rint_from_round as rint,
|
|
25
|
+
kurtosis_from_kurtosis_pop as kurtosis,
|
|
26
|
+
collect_set_from_list_distinct as collect_set,
|
|
27
|
+
first_always_ignore_nulls as first,
|
|
28
|
+
factorial_ensure_int as factorial,
|
|
29
|
+
isnull_using_equal as isnull,
|
|
30
|
+
nanvl_as_case as nanvl,
|
|
31
|
+
percentile_approx_without_accuracy as percentile_approx,
|
|
32
|
+
rand_no_seed as rand,
|
|
33
|
+
base64_from_blob as base64,
|
|
34
|
+
decode_from_blob as decode,
|
|
35
|
+
format_string_with_pipes as format_string,
|
|
36
|
+
overlay_from_substr as overlay,
|
|
37
|
+
split_no_limit as split,
|
|
38
|
+
arrays_overlap_using_intersect as arrays_overlap,
|
|
39
|
+
slice_as_list_slice as slice,
|
|
40
|
+
array_join_null_replacement_with_transform as array_join,
|
|
41
|
+
element_at_using_brackets as element_at,
|
|
42
|
+
array_remove_using_filter as array_remove,
|
|
43
|
+
array_union_using_list_concat as array_union,
|
|
44
|
+
array_min_from_sort as array_min,
|
|
45
|
+
array_max_from_sort as array_max,
|
|
46
|
+
sequence_from_generate_series as sequence,
|
|
47
|
+
)
|
sqlframe/duckdb/group.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import typing as t
|
|
6
|
+
|
|
7
|
+
from sqlframe.base.group import _BaseGroupedData
|
|
8
|
+
|
|
9
|
+
if t.TYPE_CHECKING:
|
|
10
|
+
from sqlframe.duckdb.dataframe import DuckDBDataFrame
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DuckDBGroupedData(_BaseGroupedData["DuckDBDataFrame"]):
|
|
14
|
+
pass
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import typing as t
|
|
7
|
+
|
|
8
|
+
from sqlglot import exp
|
|
9
|
+
from sqlglot.helper import ensure_list
|
|
10
|
+
|
|
11
|
+
from sqlframe.base.readerwriter import _BaseDataFrameReader, _BaseDataFrameWriter
|
|
12
|
+
from sqlframe.base.util import ensure_column_mapping, to_csv
|
|
13
|
+
|
|
14
|
+
if t.TYPE_CHECKING:
|
|
15
|
+
from sqlframe.base._typing import OptionalPrimitiveType, PathOrPaths
|
|
16
|
+
from sqlframe.base.types import StructType
|
|
17
|
+
from sqlframe.duckdb.dataframe import DuckDBDataFrame
|
|
18
|
+
from sqlframe.duckdb.session import DuckDBSession # noqa
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DuckDBDataFrameReader(_BaseDataFrameReader["DuckDBSession", "DuckDBDataFrame"]):
|
|
24
|
+
def load(
|
|
25
|
+
self,
|
|
26
|
+
path: t.Optional[PathOrPaths] = None,
|
|
27
|
+
format: t.Optional[str] = None,
|
|
28
|
+
schema: t.Optional[t.Union[StructType, str]] = None,
|
|
29
|
+
**options: OptionalPrimitiveType,
|
|
30
|
+
) -> DuckDBDataFrame:
|
|
31
|
+
"""Loads data from a data source and returns it as a :class:`DataFrame`.
|
|
32
|
+
|
|
33
|
+
.. versionadded:: 1.4.0
|
|
34
|
+
|
|
35
|
+
.. versionchanged:: 3.4.0
|
|
36
|
+
Supports Spark Connect.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
path : str or list, t.Optional
|
|
41
|
+
t.Optional string or a list of string for file-system backed data sources.
|
|
42
|
+
format : str, t.Optional
|
|
43
|
+
t.Optional string for format of the data source. Default to 'parquet'.
|
|
44
|
+
schema : :class:`pyspark.sql.types.StructType` or str, t.Optional
|
|
45
|
+
t.Optional :class:`pyspark.sql.types.StructType` for the input schema
|
|
46
|
+
or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
|
|
47
|
+
**options : dict
|
|
48
|
+
all other string options
|
|
49
|
+
|
|
50
|
+
Examples
|
|
51
|
+
--------
|
|
52
|
+
Load a CSV file with format, schema and options specified.
|
|
53
|
+
|
|
54
|
+
>>> import tempfile
|
|
55
|
+
>>> with tempfile.TemporaryDirectory() as d:
|
|
56
|
+
... # Write a DataFrame into a CSV file with a header
|
|
57
|
+
... df = spark.createDataFrame([{"age": 100, "name": "Hyukjin Kwon"}])
|
|
58
|
+
... df.write.option("header", True).mode("overwrite").format("csv").save(d)
|
|
59
|
+
...
|
|
60
|
+
... # Read the CSV file as a DataFrame with 'nullValue' option set to 'Hyukjin Kwon',
|
|
61
|
+
... # and 'header' option set to `True`.
|
|
62
|
+
... df = spark.read.load(
|
|
63
|
+
... d, schema=df.schema, format="csv", nullValue="Hyukjin Kwon", header=True)
|
|
64
|
+
... df.printSchema()
|
|
65
|
+
... df.show()
|
|
66
|
+
root
|
|
67
|
+
|-- age: long (nullable = true)
|
|
68
|
+
|-- name: string (nullable = true)
|
|
69
|
+
+---+----+
|
|
70
|
+
|age|name|
|
|
71
|
+
+---+----+
|
|
72
|
+
|100|NULL|
|
|
73
|
+
+---+----+
|
|
74
|
+
"""
|
|
75
|
+
if schema:
|
|
76
|
+
column_mapping = ensure_column_mapping(schema)
|
|
77
|
+
select_column_mapping = column_mapping.copy()
|
|
78
|
+
if options.get("filename"):
|
|
79
|
+
select_column_mapping["filename"] = "VARCHAR"
|
|
80
|
+
select_columns = [x.expression for x in self._to_casted_columns(select_column_mapping)]
|
|
81
|
+
if format == "csv":
|
|
82
|
+
duckdb_columns = ", ".join(
|
|
83
|
+
[f"'{column}': '{dtype}'" for column, dtype in column_mapping.items()]
|
|
84
|
+
)
|
|
85
|
+
options["columns"] = "{" + duckdb_columns + "}"
|
|
86
|
+
else:
|
|
87
|
+
select_columns = [exp.Star()]
|
|
88
|
+
if format:
|
|
89
|
+
paths = ",".join([f"'{path}'" for path in ensure_list(path)])
|
|
90
|
+
from_clause = f"read_{format}([{paths}], {to_csv(options)})"
|
|
91
|
+
else:
|
|
92
|
+
from_clause = f"'{path}'"
|
|
93
|
+
df = self.session.sql(exp.select(*select_columns).from_(from_clause), optimize=False)
|
|
94
|
+
self.session._last_loaded_file = path # type: ignore
|
|
95
|
+
return df
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class DuckDBDataFrameWriter(_BaseDataFrameWriter["DuckDBSession", "DuckDBDataFrame"]):
|
|
99
|
+
def _write(self, path: str, mode: t.Optional[str], **options): # type: ignore
|
|
100
|
+
mode, skip = self._validate_mode(path, mode)
|
|
101
|
+
if skip:
|
|
102
|
+
return
|
|
103
|
+
if mode == "append":
|
|
104
|
+
raise NotImplementedError("Append mode not supported")
|
|
105
|
+
options = to_csv(options, equality_char=" ") # type: ignore
|
|
106
|
+
sqls = self._df.sql(pretty=False, optimize=False, as_list=True)
|
|
107
|
+
for i, sql in enumerate(sqls):
|
|
108
|
+
if i < len(sqls) - 1:
|
|
109
|
+
self._df.session._fetch_rows(sql)
|
|
110
|
+
else:
|
|
111
|
+
self._df.session._fetch_rows(f"COPY ({sqls[0]}) TO '{path}' ({options})")
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
from functools import cached_property
|
|
5
|
+
|
|
6
|
+
from sqlframe.base.session import _BaseSession
|
|
7
|
+
from sqlframe.base.util import soundex
|
|
8
|
+
from sqlframe.duckdb.catalog import DuckDBCatalog
|
|
9
|
+
from sqlframe.duckdb.dataframe import DuckDBDataFrame
|
|
10
|
+
from sqlframe.duckdb.readwriter import (
|
|
11
|
+
DuckDBDataFrameReader,
|
|
12
|
+
DuckDBDataFrameWriter,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
if t.TYPE_CHECKING:
|
|
16
|
+
from duckdb import DuckDBPyConnection
|
|
17
|
+
|
|
18
|
+
else:
|
|
19
|
+
DuckDBPyConnection = t.Any
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DuckDBSession(
|
|
23
|
+
_BaseSession[ # type: ignore
|
|
24
|
+
DuckDBCatalog,
|
|
25
|
+
DuckDBDataFrameReader,
|
|
26
|
+
DuckDBDataFrameWriter,
|
|
27
|
+
DuckDBDataFrame,
|
|
28
|
+
DuckDBPyConnection,
|
|
29
|
+
]
|
|
30
|
+
):
|
|
31
|
+
_catalog = DuckDBCatalog
|
|
32
|
+
_reader = DuckDBDataFrameReader
|
|
33
|
+
_writer = DuckDBDataFrameWriter
|
|
34
|
+
_df = DuckDBDataFrame
|
|
35
|
+
|
|
36
|
+
DEFAULT_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
|
|
37
|
+
|
|
38
|
+
def __init__(self, conn: t.Optional[DuckDBPyConnection] = None, *args, **kwargs):
|
|
39
|
+
import duckdb
|
|
40
|
+
from duckdb.typing import VARCHAR
|
|
41
|
+
|
|
42
|
+
if not hasattr(self, "_conn"):
|
|
43
|
+
conn = conn or duckdb.connect()
|
|
44
|
+
conn.create_function("SOUNDEX", lambda x: soundex(x), return_type=VARCHAR)
|
|
45
|
+
super().__init__(conn, *args, **kwargs)
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def _try_get_map(cls, value: t.Any) -> t.Optional[t.Dict[str, t.Any]]:
|
|
49
|
+
if value and isinstance(value, dict) and "key" in value and "value" in value:
|
|
50
|
+
return dict(zip(value["key"], value["value"]))
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
class Builder(_BaseSession.Builder):
|
|
54
|
+
DEFAULT_INPUT_DIALECT = "duckdb"
|
|
55
|
+
DEFAULT_OUTPUT_DIALECT = "duckdb"
|
|
56
|
+
|
|
57
|
+
@cached_property
|
|
58
|
+
def session(self) -> DuckDBSession:
|
|
59
|
+
return DuckDBSession(**self._session_kwargs)
|
|
60
|
+
|
|
61
|
+
def getOrCreate(self) -> DuckDBSession:
|
|
62
|
+
self._set_session_properties()
|
|
63
|
+
return self.session
|
|
64
|
+
|
|
65
|
+
builder = Builder()
|
sqlframe/duckdb/types.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from sqlframe.base.types import *
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from sqlframe.base.window import *
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from sqlframe.postgres.catalog import PostgresCatalog
|
|
2
|
+
from sqlframe.postgres.column import Column
|
|
3
|
+
from sqlframe.postgres.dataframe import PostgresDataFrame, PostgresDataFrameNaFunctions
|
|
4
|
+
from sqlframe.postgres.group import PostgresGroupedData
|
|
5
|
+
from sqlframe.postgres.readwriter import (
|
|
6
|
+
PostgresDataFrameReader,
|
|
7
|
+
PostgresDataFrameWriter,
|
|
8
|
+
)
|
|
9
|
+
from sqlframe.postgres.session import PostgresSession
|
|
10
|
+
from sqlframe.postgres.window import Window, WindowSpec
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"PostgresCatalog",
|
|
14
|
+
"Column",
|
|
15
|
+
"PostgresDataFrame",
|
|
16
|
+
"PostgresDataFrameNaFunctions",
|
|
17
|
+
"PostgresGroupedData",
|
|
18
|
+
"PostgresDataFrameReader",
|
|
19
|
+
"PostgresDataFrameWriter",
|
|
20
|
+
"PostgresSession",
|
|
21
|
+
"Window",
|
|
22
|
+
"WindowSpec",
|
|
23
|
+
]
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import fnmatch
|
|
6
|
+
import typing as t
|
|
7
|
+
|
|
8
|
+
from sqlglot import exp, parse_one
|
|
9
|
+
|
|
10
|
+
from sqlframe.base.catalog import Function, _BaseCatalog
|
|
11
|
+
from sqlframe.base.mixins.catalog_mixins import (
|
|
12
|
+
GetCurrentCatalogFromFunctionMixin,
|
|
13
|
+
GetCurrentDatabaseFromFunctionMixin,
|
|
14
|
+
ListCatalogsFromInfoSchemaMixin,
|
|
15
|
+
ListColumnsFromInfoSchemaMixin,
|
|
16
|
+
ListDatabasesFromInfoSchemaMixin,
|
|
17
|
+
ListTablesFromInfoSchemaMixin,
|
|
18
|
+
SetCurrentDatabaseFromSearchPathMixin,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
if t.TYPE_CHECKING:
|
|
22
|
+
from sqlframe.postgres.session import PostgresSession # noqa
|
|
23
|
+
from sqlframe.postgres.dataframe import PostgresDataFrame # noqa
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class PostgresCatalog(
|
|
27
|
+
GetCurrentCatalogFromFunctionMixin["PostgresSession", "PostgresDataFrame"],
|
|
28
|
+
GetCurrentDatabaseFromFunctionMixin["PostgresSession", "PostgresDataFrame"],
|
|
29
|
+
ListDatabasesFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
|
|
30
|
+
ListCatalogsFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
|
|
31
|
+
SetCurrentDatabaseFromSearchPathMixin["PostgresSession", "PostgresDataFrame"],
|
|
32
|
+
ListTablesFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
|
|
33
|
+
ListColumnsFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
|
|
34
|
+
_BaseCatalog["PostgresSession", "PostgresDataFrame"],
|
|
35
|
+
):
|
|
36
|
+
CURRENT_CATALOG_EXPRESSION: exp.Expression = exp.column("current_catalog")
|
|
37
|
+
|
|
38
|
+
def listFunctions(
|
|
39
|
+
self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
|
|
40
|
+
) -> t.List[Function]:
|
|
41
|
+
"""
|
|
42
|
+
Returns a t.List of functions registered in the specified database.
|
|
43
|
+
|
|
44
|
+
.. versionadded:: 3.4.0
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
dbName : str
|
|
49
|
+
name of the database to t.List the functions.
|
|
50
|
+
``dbName`` can be qualified with catalog name.
|
|
51
|
+
pattern : str
|
|
52
|
+
The pattern that the function name needs to match.
|
|
53
|
+
|
|
54
|
+
.. versionchanged: 3.5.0
|
|
55
|
+
Adds ``pattern`` argument.
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
t.List
|
|
60
|
+
A t.List of :class:`Function`.
|
|
61
|
+
|
|
62
|
+
Notes
|
|
63
|
+
-----
|
|
64
|
+
If no database is specified, the current database and catalog
|
|
65
|
+
are used. This API includes all temporary functions.
|
|
66
|
+
|
|
67
|
+
Examples
|
|
68
|
+
--------
|
|
69
|
+
>>> spark.catalog.t.listFunctions()
|
|
70
|
+
[Function(name=...
|
|
71
|
+
|
|
72
|
+
>>> spark.catalog.t.listFunctions(pattern="to_*")
|
|
73
|
+
[Function(name=...
|
|
74
|
+
|
|
75
|
+
>>> spark.catalog.t.listFunctions(pattern="*not_existing_func*")
|
|
76
|
+
[]
|
|
77
|
+
"""
|
|
78
|
+
# SO: https://stackoverflow.com/questions/44143816/any-way-to-list-all-user-defined-postgresql-functions
|
|
79
|
+
query = parse_one(
|
|
80
|
+
"""SELECT n.nspname as "namespace",
|
|
81
|
+
p.proname as "name"
|
|
82
|
+
FROM pg_catalog.pg_proc p
|
|
83
|
+
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
|
84
|
+
WHERE pg_catalog.pg_function_is_visible(p.oid)
|
|
85
|
+
AND n.nspname <> 'pg_catalog'
|
|
86
|
+
AND n.nspname <> 'information_schema'
|
|
87
|
+
ORDER BY 1, 2;
|
|
88
|
+
""",
|
|
89
|
+
dialect=self.session.input_dialect,
|
|
90
|
+
)
|
|
91
|
+
functions = self.session._fetch_rows(query)
|
|
92
|
+
catalog = self.currentCatalog()
|
|
93
|
+
results = [
|
|
94
|
+
Function(
|
|
95
|
+
name=x["name"],
|
|
96
|
+
catalog=catalog,
|
|
97
|
+
namespace=[x["namespace"]],
|
|
98
|
+
description=None,
|
|
99
|
+
className="",
|
|
100
|
+
isTemporary=False,
|
|
101
|
+
)
|
|
102
|
+
for x in functions
|
|
103
|
+
]
|
|
104
|
+
if pattern:
|
|
105
|
+
results = [x for x in results if fnmatch.fnmatch(x.name, pattern)]
|
|
106
|
+
return results
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from sqlframe.base.column import Column
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sys
|
|
5
|
+
import typing as t
|
|
6
|
+
|
|
7
|
+
from sqlframe.base.dataframe import (
|
|
8
|
+
_BaseDataFrame,
|
|
9
|
+
_BaseDataFrameNaFunctions,
|
|
10
|
+
_BaseDataFrameStatFunctions,
|
|
11
|
+
)
|
|
12
|
+
from sqlframe.postgres.group import PostgresGroupedData
|
|
13
|
+
|
|
14
|
+
if sys.version_info >= (3, 11):
|
|
15
|
+
from typing import Self
|
|
16
|
+
else:
|
|
17
|
+
from typing_extensions import Self
|
|
18
|
+
|
|
19
|
+
if t.TYPE_CHECKING:
|
|
20
|
+
from sqlframe.postgres.readwriter import PostgresDataFrameWriter
|
|
21
|
+
from sqlframe.postgres.session import PostgresSession
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class PostgresDataFrameNaFunctions(_BaseDataFrameNaFunctions["PostgresDataFrame"]):
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PostgresDataFrameStatFunctions(_BaseDataFrameStatFunctions["PostgresDataFrame"]):
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class PostgresDataFrame(
|
|
36
|
+
_BaseDataFrame[
|
|
37
|
+
"PostgresSession",
|
|
38
|
+
"PostgresDataFrameWriter",
|
|
39
|
+
"PostgresDataFrameNaFunctions",
|
|
40
|
+
"PostgresDataFrameStatFunctions",
|
|
41
|
+
"PostgresGroupedData",
|
|
42
|
+
]
|
|
43
|
+
):
|
|
44
|
+
_na = PostgresDataFrameNaFunctions
|
|
45
|
+
_stat = PostgresDataFrameStatFunctions
|
|
46
|
+
_group_data = PostgresGroupedData
|
|
47
|
+
|
|
48
|
+
def cache(self) -> Self:
|
|
49
|
+
logger.warning("Postgres does not support caching. Ignoring cache() call.")
|
|
50
|
+
return self
|
|
51
|
+
|
|
52
|
+
def persist(self) -> Self:
|
|
53
|
+
logger.warning("Postgres does not support persist. Ignoring persist() call.")
|
|
54
|
+
return self
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
import sqlframe.base.functions
|
|
5
|
+
|
|
6
|
+
module = sys.modules["sqlframe.base.functions"]
|
|
7
|
+
globals().update(
|
|
8
|
+
{
|
|
9
|
+
name: func
|
|
10
|
+
for name, func in inspect.getmembers(module, inspect.isfunction)
|
|
11
|
+
if hasattr(func, "unsupported_engines")
|
|
12
|
+
and "postgres" not in func.unsupported_engines
|
|
13
|
+
and "*" not in func.unsupported_engines
|
|
14
|
+
}
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
from sqlframe.base.function_alternatives import ( # noqa
|
|
19
|
+
e_literal as e,
|
|
20
|
+
expm1_from_exp as expm1,
|
|
21
|
+
log1p_from_log as log1p,
|
|
22
|
+
rint_from_round as rint,
|
|
23
|
+
collect_set_from_list_distinct as collect_set,
|
|
24
|
+
isnan_using_equal as isnan,
|
|
25
|
+
isnull_using_equal as isnull,
|
|
26
|
+
nanvl_as_case as nanvl,
|
|
27
|
+
rand_no_seed as rand,
|
|
28
|
+
round_cast_as_numeric as round,
|
|
29
|
+
year_from_extract as year,
|
|
30
|
+
quarter_from_extract as quarter,
|
|
31
|
+
month_from_extract as month,
|
|
32
|
+
dayofweek_from_extract_with_isodow as dayofweek,
|
|
33
|
+
dayofmonth_from_extract_with_day as dayofmonth,
|
|
34
|
+
dayofyear_from_extract_doy as dayofyear,
|
|
35
|
+
hour_from_extract as hour,
|
|
36
|
+
minute_from_extract as minute,
|
|
37
|
+
second_from_extract as second,
|
|
38
|
+
weekofyear_from_extract_as_week as weekofyear,
|
|
39
|
+
make_date_casted_as_integer as make_date,
|
|
40
|
+
date_add_by_multiplication as date_add,
|
|
41
|
+
date_sub_by_multiplication as date_sub,
|
|
42
|
+
date_diff_with_subtraction as date_diff,
|
|
43
|
+
add_months_by_multiplication as add_months,
|
|
44
|
+
months_between_from_age_and_extract as months_between,
|
|
45
|
+
from_unixtime_from_timestamp as from_unixtime,
|
|
46
|
+
unix_timestamp_from_extract as unix_timestamp,
|
|
47
|
+
base64_from_blob as base64,
|
|
48
|
+
bas64_from_encode as base64,
|
|
49
|
+
unbase64_from_decode as unbase64,
|
|
50
|
+
decode_from_convert_from as decode,
|
|
51
|
+
encode_from_convert_to as encode,
|
|
52
|
+
format_number_from_to_char as format_number,
|
|
53
|
+
format_string_with_format as format_string,
|
|
54
|
+
split_from_regex_split_to_array as split,
|
|
55
|
+
array_contains_any as array_contains,
|
|
56
|
+
slice_with_brackets as slice,
|
|
57
|
+
element_at_using_brackets as element_at,
|
|
58
|
+
get_json_object_using_arrow_op as get_json_object,
|
|
59
|
+
array_min_from_subquery as array_min,
|
|
60
|
+
array_max_from_subquery as array_max,
|
|
61
|
+
)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import typing as t
|
|
6
|
+
|
|
7
|
+
from sqlframe.base.group import _BaseGroupedData
|
|
8
|
+
|
|
9
|
+
if t.TYPE_CHECKING:
|
|
10
|
+
from sqlframe.postgres.dataframe import PostgresDataFrame
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PostgresGroupedData(_BaseGroupedData["PostgresDataFrame"]):
|
|
14
|
+
pass
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import typing as t
|
|
6
|
+
|
|
7
|
+
from sqlframe.base.mixins.readwriter_mixins import PandasLoaderMixin, PandasWriterMixin
|
|
8
|
+
from sqlframe.base.readerwriter import (
|
|
9
|
+
_BaseDataFrameReader,
|
|
10
|
+
_BaseDataFrameWriter,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
if t.TYPE_CHECKING:
|
|
14
|
+
from sqlframe.postgres.session import PostgresSession # noqa
|
|
15
|
+
from sqlframe.postgres.dataframe import PostgresDataFrame # noqa
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PostgresDataFrameReader(
|
|
19
|
+
PandasLoaderMixin["PostgresSession", "PostgresDataFrame"],
|
|
20
|
+
_BaseDataFrameReader["PostgresSession", "PostgresDataFrame"],
|
|
21
|
+
):
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class PostgresDataFrameWriter(
|
|
26
|
+
PandasWriterMixin["PostgresSession", "PostgresDataFrame"],
|
|
27
|
+
_BaseDataFrameWriter["PostgresSession", "PostgresDataFrame"],
|
|
28
|
+
):
|
|
29
|
+
pass
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlglot import exp
|
|
6
|
+
|
|
7
|
+
from sqlframe.base.session import _BaseSession
|
|
8
|
+
from sqlframe.postgres.catalog import PostgresCatalog
|
|
9
|
+
from sqlframe.postgres.dataframe import PostgresDataFrame
|
|
10
|
+
from sqlframe.postgres.readwriter import (
|
|
11
|
+
PostgresDataFrameReader,
|
|
12
|
+
PostgresDataFrameWriter,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
if t.TYPE_CHECKING:
|
|
16
|
+
from psycopg2.extensions import connection as psycopg2_connection
|
|
17
|
+
|
|
18
|
+
from sqlframe.base.types import Row
|
|
19
|
+
else:
|
|
20
|
+
psycopg2_connection = t.Any
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class PostgresSession(
|
|
24
|
+
_BaseSession[ # type: ignore
|
|
25
|
+
PostgresCatalog,
|
|
26
|
+
PostgresDataFrameReader,
|
|
27
|
+
PostgresDataFrameWriter,
|
|
28
|
+
PostgresDataFrame,
|
|
29
|
+
psycopg2_connection,
|
|
30
|
+
],
|
|
31
|
+
):
|
|
32
|
+
_catalog = PostgresCatalog
|
|
33
|
+
_reader = PostgresDataFrameReader
|
|
34
|
+
_writer = PostgresDataFrameWriter
|
|
35
|
+
_df = PostgresDataFrame
|
|
36
|
+
|
|
37
|
+
DEFAULT_TIME_FORMAT = "yyyy-MM-dd HH:MI:SS"
|
|
38
|
+
|
|
39
|
+
def __init__(self, conn: t.Optional[psycopg2_connection] = None):
|
|
40
|
+
if not hasattr(self, "_conn"):
|
|
41
|
+
super().__init__(conn)
|
|
42
|
+
self._execute("CREATE EXTENSION IF NOT EXISTS fuzzystrmatch")
|
|
43
|
+
|
|
44
|
+
def _fetch_rows(
|
|
45
|
+
self, sql: t.Union[str, exp.Expression], *, quote_identifiers: bool = True
|
|
46
|
+
) -> t.List[Row]:
|
|
47
|
+
from psycopg2 import ProgrammingError
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
return super()._fetch_rows(sql, quote_identifiers=quote_identifiers)
|
|
51
|
+
except ProgrammingError as e:
|
|
52
|
+
if "no results to fetch" in str(e):
|
|
53
|
+
return []
|
|
54
|
+
raise e
|
|
55
|
+
|
|
56
|
+
class Builder(_BaseSession.Builder):
|
|
57
|
+
DEFAULT_INPUT_DIALECT = "postgres"
|
|
58
|
+
DEFAULT_OUTPUT_DIALECT = "postgres"
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def session(self) -> PostgresSession:
|
|
62
|
+
return PostgresSession(**self._session_kwargs)
|
|
63
|
+
|
|
64
|
+
def getOrCreate(self) -> PostgresSession:
|
|
65
|
+
self._set_session_properties()
|
|
66
|
+
return self.session
|
|
67
|
+
|
|
68
|
+
builder = Builder()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from sqlframe.base.types import *
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from sqlframe.base.window import *
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from sqlframe.redshift.catalog import RedshiftCatalog
|
|
2
|
+
from sqlframe.redshift.column import Column
|
|
3
|
+
from sqlframe.redshift.dataframe import RedshiftDataFrame, RedshiftDataFrameNaFunctions
|
|
4
|
+
from sqlframe.redshift.group import RedshiftGroupedData
|
|
5
|
+
from sqlframe.redshift.readwriter import (
|
|
6
|
+
RedshiftDataFrameReader,
|
|
7
|
+
RedshiftDataFrameWriter,
|
|
8
|
+
)
|
|
9
|
+
from sqlframe.redshift.session import RedshiftSession
|
|
10
|
+
from sqlframe.redshift.window import Window, WindowSpec
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"RedshiftCatalog",
|
|
14
|
+
"Column",
|
|
15
|
+
"RedshiftDataFrame",
|
|
16
|
+
"RedshiftDataFrameNaFunctions",
|
|
17
|
+
"RedshiftGroupedData",
|
|
18
|
+
"RedshiftDataFrameReader",
|
|
19
|
+
"RedshiftDataFrameWriter",
|
|
20
|
+
"RedshiftSession",
|
|
21
|
+
"Window",
|
|
22
|
+
"WindowSpec",
|
|
23
|
+
]
|