sqlframe 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. sqlframe/__init__.py +0 -0
  2. sqlframe/_version.py +16 -0
  3. sqlframe/base/__init__.py +0 -0
  4. sqlframe/base/_typing.py +39 -0
  5. sqlframe/base/catalog.py +1163 -0
  6. sqlframe/base/column.py +388 -0
  7. sqlframe/base/dataframe.py +1519 -0
  8. sqlframe/base/decorators.py +51 -0
  9. sqlframe/base/exceptions.py +14 -0
  10. sqlframe/base/function_alternatives.py +1055 -0
  11. sqlframe/base/functions.py +1678 -0
  12. sqlframe/base/group.py +102 -0
  13. sqlframe/base/mixins/__init__.py +0 -0
  14. sqlframe/base/mixins/catalog_mixins.py +419 -0
  15. sqlframe/base/mixins/readwriter_mixins.py +118 -0
  16. sqlframe/base/normalize.py +84 -0
  17. sqlframe/base/operations.py +87 -0
  18. sqlframe/base/readerwriter.py +679 -0
  19. sqlframe/base/session.py +585 -0
  20. sqlframe/base/transforms.py +13 -0
  21. sqlframe/base/types.py +418 -0
  22. sqlframe/base/util.py +242 -0
  23. sqlframe/base/window.py +139 -0
  24. sqlframe/bigquery/__init__.py +23 -0
  25. sqlframe/bigquery/catalog.py +255 -0
  26. sqlframe/bigquery/column.py +1 -0
  27. sqlframe/bigquery/dataframe.py +54 -0
  28. sqlframe/bigquery/functions.py +378 -0
  29. sqlframe/bigquery/group.py +14 -0
  30. sqlframe/bigquery/readwriter.py +29 -0
  31. sqlframe/bigquery/session.py +89 -0
  32. sqlframe/bigquery/types.py +1 -0
  33. sqlframe/bigquery/window.py +1 -0
  34. sqlframe/duckdb/__init__.py +20 -0
  35. sqlframe/duckdb/catalog.py +108 -0
  36. sqlframe/duckdb/column.py +1 -0
  37. sqlframe/duckdb/dataframe.py +55 -0
  38. sqlframe/duckdb/functions.py +47 -0
  39. sqlframe/duckdb/group.py +14 -0
  40. sqlframe/duckdb/readwriter.py +111 -0
  41. sqlframe/duckdb/session.py +65 -0
  42. sqlframe/duckdb/types.py +1 -0
  43. sqlframe/duckdb/window.py +1 -0
  44. sqlframe/postgres/__init__.py +23 -0
  45. sqlframe/postgres/catalog.py +106 -0
  46. sqlframe/postgres/column.py +1 -0
  47. sqlframe/postgres/dataframe.py +54 -0
  48. sqlframe/postgres/functions.py +61 -0
  49. sqlframe/postgres/group.py +14 -0
  50. sqlframe/postgres/readwriter.py +29 -0
  51. sqlframe/postgres/session.py +68 -0
  52. sqlframe/postgres/types.py +1 -0
  53. sqlframe/postgres/window.py +1 -0
  54. sqlframe/redshift/__init__.py +23 -0
  55. sqlframe/redshift/catalog.py +127 -0
  56. sqlframe/redshift/column.py +1 -0
  57. sqlframe/redshift/dataframe.py +54 -0
  58. sqlframe/redshift/functions.py +18 -0
  59. sqlframe/redshift/group.py +14 -0
  60. sqlframe/redshift/readwriter.py +29 -0
  61. sqlframe/redshift/session.py +53 -0
  62. sqlframe/redshift/types.py +1 -0
  63. sqlframe/redshift/window.py +1 -0
  64. sqlframe/snowflake/__init__.py +26 -0
  65. sqlframe/snowflake/catalog.py +134 -0
  66. sqlframe/snowflake/column.py +1 -0
  67. sqlframe/snowflake/dataframe.py +54 -0
  68. sqlframe/snowflake/functions.py +18 -0
  69. sqlframe/snowflake/group.py +14 -0
  70. sqlframe/snowflake/readwriter.py +29 -0
  71. sqlframe/snowflake/session.py +53 -0
  72. sqlframe/snowflake/types.py +1 -0
  73. sqlframe/snowflake/window.py +1 -0
  74. sqlframe/spark/__init__.py +23 -0
  75. sqlframe/spark/catalog.py +1028 -0
  76. sqlframe/spark/column.py +1 -0
  77. sqlframe/spark/dataframe.py +54 -0
  78. sqlframe/spark/functions.py +22 -0
  79. sqlframe/spark/group.py +14 -0
  80. sqlframe/spark/readwriter.py +29 -0
  81. sqlframe/spark/session.py +90 -0
  82. sqlframe/spark/types.py +1 -0
  83. sqlframe/spark/window.py +1 -0
  84. sqlframe/standalone/__init__.py +26 -0
  85. sqlframe/standalone/catalog.py +13 -0
  86. sqlframe/standalone/column.py +1 -0
  87. sqlframe/standalone/dataframe.py +36 -0
  88. sqlframe/standalone/functions.py +1 -0
  89. sqlframe/standalone/group.py +14 -0
  90. sqlframe/standalone/readwriter.py +19 -0
  91. sqlframe/standalone/session.py +40 -0
  92. sqlframe/standalone/types.py +1 -0
  93. sqlframe/standalone/window.py +1 -0
  94. sqlframe-1.1.3.dist-info/LICENSE +21 -0
  95. sqlframe-1.1.3.dist-info/METADATA +172 -0
  96. sqlframe-1.1.3.dist-info/RECORD +98 -0
  97. sqlframe-1.1.3.dist-info/WHEEL +5 -0
  98. sqlframe-1.1.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,47 @@
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import sys
5
+
6
+ import sqlframe.base.functions # noqa
7
+
8
+ module = sys.modules["sqlframe.base.functions"]
9
+ globals().update(
10
+ {
11
+ name: func
12
+ for name, func in inspect.getmembers(module, inspect.isfunction)
13
+ if hasattr(func, "unsupported_engines")
14
+ and "duckdb" not in func.unsupported_engines
15
+ and "*" not in func.unsupported_engines
16
+ }
17
+ )
18
+
19
+
20
+ from sqlframe.base.function_alternatives import ( # noqa
21
+ e_literal as e,
22
+ expm1_from_exp as expm1,
23
+ log1p_from_log as log1p,
24
+ rint_from_round as rint,
25
+ kurtosis_from_kurtosis_pop as kurtosis,
26
+ collect_set_from_list_distinct as collect_set,
27
+ first_always_ignore_nulls as first,
28
+ factorial_ensure_int as factorial,
29
+ isnull_using_equal as isnull,
30
+ nanvl_as_case as nanvl,
31
+ percentile_approx_without_accuracy as percentile_approx,
32
+ rand_no_seed as rand,
33
+ base64_from_blob as base64,
34
+ decode_from_blob as decode,
35
+ format_string_with_pipes as format_string,
36
+ overlay_from_substr as overlay,
37
+ split_no_limit as split,
38
+ arrays_overlap_using_intersect as arrays_overlap,
39
+ slice_as_list_slice as slice,
40
+ array_join_null_replacement_with_transform as array_join,
41
+ element_at_using_brackets as element_at,
42
+ array_remove_using_filter as array_remove,
43
+ array_union_using_list_concat as array_union,
44
+ array_min_from_sort as array_min,
45
+ array_max_from_sort as array_max,
46
+ sequence_from_generate_series as sequence,
47
+ )
@@ -0,0 +1,14 @@
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
+
3
+ from __future__ import annotations
4
+
5
+ import typing as t
6
+
7
+ from sqlframe.base.group import _BaseGroupedData
8
+
9
+ if t.TYPE_CHECKING:
10
+ from sqlframe.duckdb.dataframe import DuckDBDataFrame
11
+
12
+
13
+ class DuckDBGroupedData(_BaseGroupedData["DuckDBDataFrame"]):
14
+ pass
@@ -0,0 +1,111 @@
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import typing as t
7
+
8
+ from sqlglot import exp
9
+ from sqlglot.helper import ensure_list
10
+
11
+ from sqlframe.base.readerwriter import _BaseDataFrameReader, _BaseDataFrameWriter
12
+ from sqlframe.base.util import ensure_column_mapping, to_csv
13
+
14
+ if t.TYPE_CHECKING:
15
+ from sqlframe.base._typing import OptionalPrimitiveType, PathOrPaths
16
+ from sqlframe.base.types import StructType
17
+ from sqlframe.duckdb.dataframe import DuckDBDataFrame
18
+ from sqlframe.duckdb.session import DuckDBSession # noqa
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class DuckDBDataFrameReader(_BaseDataFrameReader["DuckDBSession", "DuckDBDataFrame"]):
24
+ def load(
25
+ self,
26
+ path: t.Optional[PathOrPaths] = None,
27
+ format: t.Optional[str] = None,
28
+ schema: t.Optional[t.Union[StructType, str]] = None,
29
+ **options: OptionalPrimitiveType,
30
+ ) -> DuckDBDataFrame:
31
+ """Loads data from a data source and returns it as a :class:`DataFrame`.
32
+
33
+ .. versionadded:: 1.4.0
34
+
35
+ .. versionchanged:: 3.4.0
36
+ Supports Spark Connect.
37
+
38
+ Parameters
39
+ ----------
40
+ path : str or list, t.Optional
41
+ t.Optional string or a list of string for file-system backed data sources.
42
+ format : str, t.Optional
43
+ t.Optional string for format of the data source. Default to 'parquet'.
44
+ schema : :class:`pyspark.sql.types.StructType` or str, t.Optional
45
+ t.Optional :class:`pyspark.sql.types.StructType` for the input schema
46
+ or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
47
+ **options : dict
48
+ all other string options
49
+
50
+ Examples
51
+ --------
52
+ Load a CSV file with format, schema and options specified.
53
+
54
+ >>> import tempfile
55
+ >>> with tempfile.TemporaryDirectory() as d:
56
+ ... # Write a DataFrame into a CSV file with a header
57
+ ... df = spark.createDataFrame([{"age": 100, "name": "Hyukjin Kwon"}])
58
+ ... df.write.option("header", True).mode("overwrite").format("csv").save(d)
59
+ ...
60
+ ... # Read the CSV file as a DataFrame with 'nullValue' option set to 'Hyukjin Kwon',
61
+ ... # and 'header' option set to `True`.
62
+ ... df = spark.read.load(
63
+ ... d, schema=df.schema, format="csv", nullValue="Hyukjin Kwon", header=True)
64
+ ... df.printSchema()
65
+ ... df.show()
66
+ root
67
+ |-- age: long (nullable = true)
68
+ |-- name: string (nullable = true)
69
+ +---+----+
70
+ |age|name|
71
+ +---+----+
72
+ |100|NULL|
73
+ +---+----+
74
+ """
75
+ if schema:
76
+ column_mapping = ensure_column_mapping(schema)
77
+ select_column_mapping = column_mapping.copy()
78
+ if options.get("filename"):
79
+ select_column_mapping["filename"] = "VARCHAR"
80
+ select_columns = [x.expression for x in self._to_casted_columns(select_column_mapping)]
81
+ if format == "csv":
82
+ duckdb_columns = ", ".join(
83
+ [f"'{column}': '{dtype}'" for column, dtype in column_mapping.items()]
84
+ )
85
+ options["columns"] = "{" + duckdb_columns + "}"
86
+ else:
87
+ select_columns = [exp.Star()]
88
+ if format:
89
+ paths = ",".join([f"'{path}'" for path in ensure_list(path)])
90
+ from_clause = f"read_{format}([{paths}], {to_csv(options)})"
91
+ else:
92
+ from_clause = f"'{path}'"
93
+ df = self.session.sql(exp.select(*select_columns).from_(from_clause), optimize=False)
94
+ self.session._last_loaded_file = path # type: ignore
95
+ return df
96
+
97
+
98
+ class DuckDBDataFrameWriter(_BaseDataFrameWriter["DuckDBSession", "DuckDBDataFrame"]):
99
+ def _write(self, path: str, mode: t.Optional[str], **options): # type: ignore
100
+ mode, skip = self._validate_mode(path, mode)
101
+ if skip:
102
+ return
103
+ if mode == "append":
104
+ raise NotImplementedError("Append mode not supported")
105
+ options = to_csv(options, equality_char=" ") # type: ignore
106
+ sqls = self._df.sql(pretty=False, optimize=False, as_list=True)
107
+ for i, sql in enumerate(sqls):
108
+ if i < len(sqls) - 1:
109
+ self._df.session._fetch_rows(sql)
110
+ else:
111
+ self._df.session._fetch_rows(f"COPY ({sqls[0]}) TO '{path}' ({options})")
@@ -0,0 +1,65 @@
1
+ from __future__ import annotations
2
+
3
+ import typing as t
4
+ from functools import cached_property
5
+
6
+ from sqlframe.base.session import _BaseSession
7
+ from sqlframe.base.util import soundex
8
+ from sqlframe.duckdb.catalog import DuckDBCatalog
9
+ from sqlframe.duckdb.dataframe import DuckDBDataFrame
10
+ from sqlframe.duckdb.readwriter import (
11
+ DuckDBDataFrameReader,
12
+ DuckDBDataFrameWriter,
13
+ )
14
+
15
+ if t.TYPE_CHECKING:
16
+ from duckdb import DuckDBPyConnection
17
+
18
+ else:
19
+ DuckDBPyConnection = t.Any
20
+
21
+
22
+ class DuckDBSession(
23
+ _BaseSession[ # type: ignore
24
+ DuckDBCatalog,
25
+ DuckDBDataFrameReader,
26
+ DuckDBDataFrameWriter,
27
+ DuckDBDataFrame,
28
+ DuckDBPyConnection,
29
+ ]
30
+ ):
31
+ _catalog = DuckDBCatalog
32
+ _reader = DuckDBDataFrameReader
33
+ _writer = DuckDBDataFrameWriter
34
+ _df = DuckDBDataFrame
35
+
36
+ DEFAULT_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
37
+
38
+ def __init__(self, conn: t.Optional[DuckDBPyConnection] = None, *args, **kwargs):
39
+ import duckdb
40
+ from duckdb.typing import VARCHAR
41
+
42
+ if not hasattr(self, "_conn"):
43
+ conn = conn or duckdb.connect()
44
+ conn.create_function("SOUNDEX", lambda x: soundex(x), return_type=VARCHAR)
45
+ super().__init__(conn, *args, **kwargs)
46
+
47
+ @classmethod
48
+ def _try_get_map(cls, value: t.Any) -> t.Optional[t.Dict[str, t.Any]]:
49
+ if value and isinstance(value, dict) and "key" in value and "value" in value:
50
+ return dict(zip(value["key"], value["value"]))
51
+ return None
52
+
53
+ class Builder(_BaseSession.Builder):
54
+ DEFAULT_INPUT_DIALECT = "duckdb"
55
+ DEFAULT_OUTPUT_DIALECT = "duckdb"
56
+
57
+ @cached_property
58
+ def session(self) -> DuckDBSession:
59
+ return DuckDBSession(**self._session_kwargs)
60
+
61
+ def getOrCreate(self) -> DuckDBSession:
62
+ self._set_session_properties()
63
+ return self.session
64
+
65
+ builder = Builder()
@@ -0,0 +1 @@
1
+ from sqlframe.base.types import *
@@ -0,0 +1 @@
1
+ from sqlframe.base.window import *
@@ -0,0 +1,23 @@
1
+ from sqlframe.postgres.catalog import PostgresCatalog
2
+ from sqlframe.postgres.column import Column
3
+ from sqlframe.postgres.dataframe import PostgresDataFrame, PostgresDataFrameNaFunctions
4
+ from sqlframe.postgres.group import PostgresGroupedData
5
+ from sqlframe.postgres.readwriter import (
6
+ PostgresDataFrameReader,
7
+ PostgresDataFrameWriter,
8
+ )
9
+ from sqlframe.postgres.session import PostgresSession
10
+ from sqlframe.postgres.window import Window, WindowSpec
11
+
12
+ __all__ = [
13
+ "PostgresCatalog",
14
+ "Column",
15
+ "PostgresDataFrame",
16
+ "PostgresDataFrameNaFunctions",
17
+ "PostgresGroupedData",
18
+ "PostgresDataFrameReader",
19
+ "PostgresDataFrameWriter",
20
+ "PostgresSession",
21
+ "Window",
22
+ "WindowSpec",
23
+ ]
@@ -0,0 +1,106 @@
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
+
3
+ from __future__ import annotations
4
+
5
+ import fnmatch
6
+ import typing as t
7
+
8
+ from sqlglot import exp, parse_one
9
+
10
+ from sqlframe.base.catalog import Function, _BaseCatalog
11
+ from sqlframe.base.mixins.catalog_mixins import (
12
+ GetCurrentCatalogFromFunctionMixin,
13
+ GetCurrentDatabaseFromFunctionMixin,
14
+ ListCatalogsFromInfoSchemaMixin,
15
+ ListColumnsFromInfoSchemaMixin,
16
+ ListDatabasesFromInfoSchemaMixin,
17
+ ListTablesFromInfoSchemaMixin,
18
+ SetCurrentDatabaseFromSearchPathMixin,
19
+ )
20
+
21
+ if t.TYPE_CHECKING:
22
+ from sqlframe.postgres.session import PostgresSession # noqa
23
+ from sqlframe.postgres.dataframe import PostgresDataFrame # noqa
24
+
25
+
26
+ class PostgresCatalog(
27
+ GetCurrentCatalogFromFunctionMixin["PostgresSession", "PostgresDataFrame"],
28
+ GetCurrentDatabaseFromFunctionMixin["PostgresSession", "PostgresDataFrame"],
29
+ ListDatabasesFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
30
+ ListCatalogsFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
31
+ SetCurrentDatabaseFromSearchPathMixin["PostgresSession", "PostgresDataFrame"],
32
+ ListTablesFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
33
+ ListColumnsFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
34
+ _BaseCatalog["PostgresSession", "PostgresDataFrame"],
35
+ ):
36
+ CURRENT_CATALOG_EXPRESSION: exp.Expression = exp.column("current_catalog")
37
+
38
+ def listFunctions(
39
+ self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
40
+ ) -> t.List[Function]:
41
+ """
42
+ Returns a t.List of functions registered in the specified database.
43
+
44
+ .. versionadded:: 3.4.0
45
+
46
+ Parameters
47
+ ----------
48
+ dbName : str
49
+ name of the database to t.List the functions.
50
+ ``dbName`` can be qualified with catalog name.
51
+ pattern : str
52
+ The pattern that the function name needs to match.
53
+
54
+ .. versionchanged: 3.5.0
55
+ Adds ``pattern`` argument.
56
+
57
+ Returns
58
+ -------
59
+ t.List
60
+ A t.List of :class:`Function`.
61
+
62
+ Notes
63
+ -----
64
+ If no database is specified, the current database and catalog
65
+ are used. This API includes all temporary functions.
66
+
67
+ Examples
68
+ --------
69
+ >>> spark.catalog.t.listFunctions()
70
+ [Function(name=...
71
+
72
+ >>> spark.catalog.t.listFunctions(pattern="to_*")
73
+ [Function(name=...
74
+
75
+ >>> spark.catalog.t.listFunctions(pattern="*not_existing_func*")
76
+ []
77
+ """
78
+ # SO: https://stackoverflow.com/questions/44143816/any-way-to-list-all-user-defined-postgresql-functions
79
+ query = parse_one(
80
+ """SELECT n.nspname as "namespace",
81
+ p.proname as "name"
82
+ FROM pg_catalog.pg_proc p
83
+ LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
84
+ WHERE pg_catalog.pg_function_is_visible(p.oid)
85
+ AND n.nspname <> 'pg_catalog'
86
+ AND n.nspname <> 'information_schema'
87
+ ORDER BY 1, 2;
88
+ """,
89
+ dialect=self.session.input_dialect,
90
+ )
91
+ functions = self.session._fetch_rows(query)
92
+ catalog = self.currentCatalog()
93
+ results = [
94
+ Function(
95
+ name=x["name"],
96
+ catalog=catalog,
97
+ namespace=[x["namespace"]],
98
+ description=None,
99
+ className="",
100
+ isTemporary=False,
101
+ )
102
+ for x in functions
103
+ ]
104
+ if pattern:
105
+ results = [x for x in results if fnmatch.fnmatch(x.name, pattern)]
106
+ return results
@@ -0,0 +1 @@
1
+ from sqlframe.base.column import Column
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import sys
5
+ import typing as t
6
+
7
+ from sqlframe.base.dataframe import (
8
+ _BaseDataFrame,
9
+ _BaseDataFrameNaFunctions,
10
+ _BaseDataFrameStatFunctions,
11
+ )
12
+ from sqlframe.postgres.group import PostgresGroupedData
13
+
14
+ if sys.version_info >= (3, 11):
15
+ from typing import Self
16
+ else:
17
+ from typing_extensions import Self
18
+
19
+ if t.TYPE_CHECKING:
20
+ from sqlframe.postgres.readwriter import PostgresDataFrameWriter
21
+ from sqlframe.postgres.session import PostgresSession
22
+
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class PostgresDataFrameNaFunctions(_BaseDataFrameNaFunctions["PostgresDataFrame"]):
28
+ pass
29
+
30
+
31
+ class PostgresDataFrameStatFunctions(_BaseDataFrameStatFunctions["PostgresDataFrame"]):
32
+ pass
33
+
34
+
35
+ class PostgresDataFrame(
36
+ _BaseDataFrame[
37
+ "PostgresSession",
38
+ "PostgresDataFrameWriter",
39
+ "PostgresDataFrameNaFunctions",
40
+ "PostgresDataFrameStatFunctions",
41
+ "PostgresGroupedData",
42
+ ]
43
+ ):
44
+ _na = PostgresDataFrameNaFunctions
45
+ _stat = PostgresDataFrameStatFunctions
46
+ _group_data = PostgresGroupedData
47
+
48
+ def cache(self) -> Self:
49
+ logger.warning("Postgres does not support caching. Ignoring cache() call.")
50
+ return self
51
+
52
+ def persist(self) -> Self:
53
+ logger.warning("Postgres does not support persist. Ignoring persist() call.")
54
+ return self
@@ -0,0 +1,61 @@
1
+ import inspect
2
+ import sys
3
+
4
+ import sqlframe.base.functions
5
+
6
+ module = sys.modules["sqlframe.base.functions"]
7
+ globals().update(
8
+ {
9
+ name: func
10
+ for name, func in inspect.getmembers(module, inspect.isfunction)
11
+ if hasattr(func, "unsupported_engines")
12
+ and "postgres" not in func.unsupported_engines
13
+ and "*" not in func.unsupported_engines
14
+ }
15
+ )
16
+
17
+
18
+ from sqlframe.base.function_alternatives import ( # noqa
19
+ e_literal as e,
20
+ expm1_from_exp as expm1,
21
+ log1p_from_log as log1p,
22
+ rint_from_round as rint,
23
+ collect_set_from_list_distinct as collect_set,
24
+ isnan_using_equal as isnan,
25
+ isnull_using_equal as isnull,
26
+ nanvl_as_case as nanvl,
27
+ rand_no_seed as rand,
28
+ round_cast_as_numeric as round,
29
+ year_from_extract as year,
30
+ quarter_from_extract as quarter,
31
+ month_from_extract as month,
32
+ dayofweek_from_extract_with_isodow as dayofweek,
33
+ dayofmonth_from_extract_with_day as dayofmonth,
34
+ dayofyear_from_extract_doy as dayofyear,
35
+ hour_from_extract as hour,
36
+ minute_from_extract as minute,
37
+ second_from_extract as second,
38
+ weekofyear_from_extract_as_week as weekofyear,
39
+ make_date_casted_as_integer as make_date,
40
+ date_add_by_multiplication as date_add,
41
+ date_sub_by_multiplication as date_sub,
42
+ date_diff_with_subtraction as date_diff,
43
+ add_months_by_multiplication as add_months,
44
+ months_between_from_age_and_extract as months_between,
45
+ from_unixtime_from_timestamp as from_unixtime,
46
+ unix_timestamp_from_extract as unix_timestamp,
47
+ base64_from_blob as base64,
48
+ bas64_from_encode as base64,
49
+ unbase64_from_decode as unbase64,
50
+ decode_from_convert_from as decode,
51
+ encode_from_convert_to as encode,
52
+ format_number_from_to_char as format_number,
53
+ format_string_with_format as format_string,
54
+ split_from_regex_split_to_array as split,
55
+ array_contains_any as array_contains,
56
+ slice_with_brackets as slice,
57
+ element_at_using_brackets as element_at,
58
+ get_json_object_using_arrow_op as get_json_object,
59
+ array_min_from_subquery as array_min,
60
+ array_max_from_subquery as array_max,
61
+ )
@@ -0,0 +1,14 @@
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
+
3
+ from __future__ import annotations
4
+
5
+ import typing as t
6
+
7
+ from sqlframe.base.group import _BaseGroupedData
8
+
9
+ if t.TYPE_CHECKING:
10
+ from sqlframe.postgres.dataframe import PostgresDataFrame
11
+
12
+
13
+ class PostgresGroupedData(_BaseGroupedData["PostgresDataFrame"]):
14
+ pass
@@ -0,0 +1,29 @@
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
+
3
+ from __future__ import annotations
4
+
5
+ import typing as t
6
+
7
+ from sqlframe.base.mixins.readwriter_mixins import PandasLoaderMixin, PandasWriterMixin
8
+ from sqlframe.base.readerwriter import (
9
+ _BaseDataFrameReader,
10
+ _BaseDataFrameWriter,
11
+ )
12
+
13
+ if t.TYPE_CHECKING:
14
+ from sqlframe.postgres.session import PostgresSession # noqa
15
+ from sqlframe.postgres.dataframe import PostgresDataFrame # noqa
16
+
17
+
18
+ class PostgresDataFrameReader(
19
+ PandasLoaderMixin["PostgresSession", "PostgresDataFrame"],
20
+ _BaseDataFrameReader["PostgresSession", "PostgresDataFrame"],
21
+ ):
22
+ pass
23
+
24
+
25
+ class PostgresDataFrameWriter(
26
+ PandasWriterMixin["PostgresSession", "PostgresDataFrame"],
27
+ _BaseDataFrameWriter["PostgresSession", "PostgresDataFrame"],
28
+ ):
29
+ pass
@@ -0,0 +1,68 @@
1
+ from __future__ import annotations
2
+
3
+ import typing as t
4
+
5
+ from sqlglot import exp
6
+
7
+ from sqlframe.base.session import _BaseSession
8
+ from sqlframe.postgres.catalog import PostgresCatalog
9
+ from sqlframe.postgres.dataframe import PostgresDataFrame
10
+ from sqlframe.postgres.readwriter import (
11
+ PostgresDataFrameReader,
12
+ PostgresDataFrameWriter,
13
+ )
14
+
15
+ if t.TYPE_CHECKING:
16
+ from psycopg2.extensions import connection as psycopg2_connection
17
+
18
+ from sqlframe.base.types import Row
19
+ else:
20
+ psycopg2_connection = t.Any
21
+
22
+
23
+ class PostgresSession(
24
+ _BaseSession[ # type: ignore
25
+ PostgresCatalog,
26
+ PostgresDataFrameReader,
27
+ PostgresDataFrameWriter,
28
+ PostgresDataFrame,
29
+ psycopg2_connection,
30
+ ],
31
+ ):
32
+ _catalog = PostgresCatalog
33
+ _reader = PostgresDataFrameReader
34
+ _writer = PostgresDataFrameWriter
35
+ _df = PostgresDataFrame
36
+
37
+ DEFAULT_TIME_FORMAT = "yyyy-MM-dd HH:MI:SS"
38
+
39
+ def __init__(self, conn: t.Optional[psycopg2_connection] = None):
40
+ if not hasattr(self, "_conn"):
41
+ super().__init__(conn)
42
+ self._execute("CREATE EXTENSION IF NOT EXISTS fuzzystrmatch")
43
+
44
+ def _fetch_rows(
45
+ self, sql: t.Union[str, exp.Expression], *, quote_identifiers: bool = True
46
+ ) -> t.List[Row]:
47
+ from psycopg2 import ProgrammingError
48
+
49
+ try:
50
+ return super()._fetch_rows(sql, quote_identifiers=quote_identifiers)
51
+ except ProgrammingError as e:
52
+ if "no results to fetch" in str(e):
53
+ return []
54
+ raise e
55
+
56
+ class Builder(_BaseSession.Builder):
57
+ DEFAULT_INPUT_DIALECT = "postgres"
58
+ DEFAULT_OUTPUT_DIALECT = "postgres"
59
+
60
+ @property
61
+ def session(self) -> PostgresSession:
62
+ return PostgresSession(**self._session_kwargs)
63
+
64
+ def getOrCreate(self) -> PostgresSession:
65
+ self._set_session_properties()
66
+ return self.session
67
+
68
+ builder = Builder()
@@ -0,0 +1 @@
1
+ from sqlframe.base.types import *
@@ -0,0 +1 @@
1
+ from sqlframe.base.window import *
@@ -0,0 +1,23 @@
1
+ from sqlframe.redshift.catalog import RedshiftCatalog
2
+ from sqlframe.redshift.column import Column
3
+ from sqlframe.redshift.dataframe import RedshiftDataFrame, RedshiftDataFrameNaFunctions
4
+ from sqlframe.redshift.group import RedshiftGroupedData
5
+ from sqlframe.redshift.readwriter import (
6
+ RedshiftDataFrameReader,
7
+ RedshiftDataFrameWriter,
8
+ )
9
+ from sqlframe.redshift.session import RedshiftSession
10
+ from sqlframe.redshift.window import Window, WindowSpec
11
+
12
+ __all__ = [
13
+ "RedshiftCatalog",
14
+ "Column",
15
+ "RedshiftDataFrame",
16
+ "RedshiftDataFrameNaFunctions",
17
+ "RedshiftGroupedData",
18
+ "RedshiftDataFrameReader",
19
+ "RedshiftDataFrameWriter",
20
+ "RedshiftSession",
21
+ "Window",
22
+ "WindowSpec",
23
+ ]