sqlframe 1.1.3__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlframe-1.1.3 → sqlframe-1.3.0}/Makefile +2 -2
- {sqlframe-1.1.3 → sqlframe-1.3.0}/PKG-INFO +3 -1
- sqlframe-1.3.0/docs/configuration.md +229 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/duckdb.md +1 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/postgres.md +1 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/mkdocs.yml +1 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/setup.py +8 -5
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/_version.py +2 -2
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/catalog.py +6 -1
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/column.py +7 -3
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/dataframe.py +50 -7
- sqlframe-1.3.0/sqlframe/base/decorators.py +53 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/mixins/catalog_mixins.py +12 -10
- sqlframe-1.3.0/sqlframe/base/mixins/dataframe_mixins.py +63 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/mixins/readwriter_mixins.py +4 -3
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/readerwriter.py +3 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/session.py +6 -9
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/util.py +38 -1
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/bigquery/catalog.py +3 -1
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/duckdb/catalog.py +2 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/duckdb/dataframe.py +3 -1
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/postgres/catalog.py +1 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/postgres/dataframe.py +3 -1
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/snowflake/catalog.py +3 -1
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/snowflake/session.py +31 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/spark/catalog.py +3 -1
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/spark/session.py +3 -1
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe.egg-info/PKG-INFO +3 -1
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe.egg-info/SOURCES.txt +5 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe.egg-info/requires.txt +8 -5
- sqlframe-1.3.0/tests/integration/engines/duck/test_duckdb_dataframe.py +79 -0
- sqlframe-1.3.0/tests/integration/engines/postgres/test_postgres_dataframe.py +64 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/snowflake/test_snowflake_session.py +2 -2
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/test_int_functions.py +4 -2
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/unit/standalone/test_session.py +1 -1
- sqlframe-1.3.0/tests/unit/test_util.py +26 -0
- sqlframe-1.1.3/sqlframe/base/decorators.py +0 -51
- {sqlframe-1.1.3 → sqlframe-1.3.0}/.github/CODEOWNERS +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/.github/workflows/main.workflow.yaml +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/.github/workflows/publish.workflow.yaml +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/.gitignore +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/.pre-commit-config.yaml +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/.readthedocs.yaml +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/LICENSE +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/README.md +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/blogs/images/but_wait_theres_more.gif +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/blogs/images/cake.gif +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/blogs/images/you_get_pyspark_api.gif +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/blogs/sqlframe_universal_dataframe_api.md +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/bigquery.md +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/docs/bigquery.md +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/docs/duckdb.md +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/docs/images/SF.png +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/docs/images/favicon.png +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/docs/images/favicon_old.png +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/docs/postgres.md +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/images/SF.png +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/images/favicon.png +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/images/favicon_old.png +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/index.md +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/requirements.txt +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/standalone.md +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/docs/stylesheets/extra.css +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/pytest.ini +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/renovate.json +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/setup.cfg +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/LICENSE +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/_typing.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/exceptions.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/function_alternatives.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/functions.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/group.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/mixins/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/normalize.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/operations.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/transforms.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/types.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/base/window.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/bigquery/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/bigquery/column.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/bigquery/dataframe.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/bigquery/functions.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/bigquery/functions.pyi +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/bigquery/group.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/bigquery/readwriter.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/bigquery/session.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/bigquery/types.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/bigquery/window.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/duckdb/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/duckdb/column.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/duckdb/functions.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/duckdb/functions.pyi +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/duckdb/group.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/duckdb/readwriter.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/duckdb/session.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/duckdb/types.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/duckdb/window.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/postgres/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/postgres/column.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/postgres/functions.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/postgres/functions.pyi +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/postgres/group.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/postgres/readwriter.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/postgres/session.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/postgres/types.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/postgres/window.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/redshift/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/redshift/catalog.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/redshift/column.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/redshift/dataframe.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/redshift/functions.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/redshift/group.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/redshift/readwriter.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/redshift/session.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/redshift/types.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/redshift/window.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/snowflake/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/snowflake/column.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/snowflake/dataframe.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/snowflake/functions.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/snowflake/group.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/snowflake/readwriter.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/snowflake/types.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/snowflake/window.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/spark/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/spark/column.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/spark/dataframe.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/spark/functions.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/spark/group.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/spark/readwriter.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/spark/types.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/spark/window.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/standalone/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/standalone/catalog.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/standalone/column.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/standalone/dataframe.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/standalone/functions.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/standalone/group.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/standalone/readwriter.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/standalone/session.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/standalone/types.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe/standalone/window.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe.egg-info/dependency_links.txt +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/sqlframe.egg-info/top_level.txt +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/common_fixtures.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/conftest.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/fixtures/employee.csv +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/fixtures/employee.json +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/fixtures/employee.parquet +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/fixtures/employee_extra_line.csv +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/bigquery/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/bigquery/test_bigquery_catalog.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/bigquery/test_bigquery_session.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/duck/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/duck/test_duckdb_catalog.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/duck/test_duckdb_reader.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/duck/test_duckdb_session.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/postgres/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/postgres/test_postgres_catalog.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/postgres/test_postgres_session.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/redshift/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/redshift/test_redshift_catalog.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/redshift/test_redshift_session.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/snowflake/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/snowflake/test_snowflake_catalog.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/spark/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/spark/test_spark_catalog.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/test_engine_dataframe.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/test_engine_reader.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/test_engine_session.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/engines/test_engine_writer.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/fixtures.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/test_int_dataframe.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/test_int_dataframe_stats.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/test_int_grouped_data.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/integration/test_int_session.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/types.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/unit/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/unit/standalone/__init__.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/unit/standalone/fixtures.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/unit/standalone/test_column.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/unit/standalone/test_dataframe.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/unit/standalone/test_dataframe_writer.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/unit/standalone/test_functions.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/unit/standalone/test_session_case_sensitivity.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/unit/standalone/test_types.py +0 -0
- {sqlframe-1.1.3 → sqlframe-1.3.0}/tests/unit/standalone/test_window.py +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
install-dev:
|
|
2
|
-
pip install -e ".[dev,docs,duckdb,postgres,redshift,
|
|
2
|
+
pip install -e ".[bigquery,dev,docs,duckdb,pandas,postgres,redshift,snowflake,spark]"
|
|
3
3
|
|
|
4
4
|
install-pre-commit:
|
|
5
5
|
pre-commit install
|
|
@@ -8,7 +8,7 @@ slow-test:
|
|
|
8
8
|
pytest -n auto tests
|
|
9
9
|
|
|
10
10
|
fast-test:
|
|
11
|
-
pytest -n auto
|
|
11
|
+
pytest -n auto tests/unit
|
|
12
12
|
|
|
13
13
|
local-test:
|
|
14
14
|
pytest -n auto -m "fast or local"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sqlframe
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Taking the Spark out of PySpark by converting to SQL
|
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
|
6
6
|
Author: Ryan Eakman
|
|
@@ -20,6 +20,8 @@ Provides-Extra: bigquery
|
|
|
20
20
|
Provides-Extra: dev
|
|
21
21
|
Provides-Extra: docs
|
|
22
22
|
Provides-Extra: duckdb
|
|
23
|
+
Provides-Extra: openai
|
|
24
|
+
Provides-Extra: pandas
|
|
23
25
|
Provides-Extra: postgres
|
|
24
26
|
Provides-Extra: redshift
|
|
25
27
|
Provides-Extra: snowflake
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# General Configuration
|
|
2
|
+
|
|
3
|
+
## Generated SQL
|
|
4
|
+
|
|
5
|
+
### Pretty
|
|
6
|
+
|
|
7
|
+
If the SQL should be returned in a "pretty" format meaning it has newlines and indentation. Defaults to `True`.
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
from sqlframe.standalone import StandaloneSession
|
|
11
|
+
|
|
12
|
+
session = StandaloneSession()
|
|
13
|
+
|
|
14
|
+
df = session.createDataFrame([{'a': 1, 'b': 2}])
|
|
15
|
+
```
|
|
16
|
+
```python
|
|
17
|
+
>>> print(df.sql())
|
|
18
|
+
SELECT
|
|
19
|
+
CAST(`a1`.`a` AS BIGINT) AS `a`,
|
|
20
|
+
CAST(`a1`.`b` AS BIGINT) AS `b`
|
|
21
|
+
FROM VALUES
|
|
22
|
+
(1, 2) AS `a1`(`a`, `b`)
|
|
23
|
+
```
|
|
24
|
+
```python
|
|
25
|
+
>>> print(df.sql(pretty=False))
|
|
26
|
+
SELECT CAST(`a3`.`a` AS BIGINT) AS `a`, CAST(`a3`.`b` AS BIGINT) AS `b` FROM VALUES (1, 2) AS `a3`(`a`, `b`)
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Optimized
|
|
30
|
+
|
|
31
|
+
Optimized SQL is SQL that has been processed by SQLGlot's optimizer. For complex queries this will significantly reduce the number of CTEs produced and remove extra unused columns. Defaults to `True`.
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from sqlframe.bigquery import BigQuerySession
|
|
35
|
+
from sqlframe.bigquery import functions as F
|
|
36
|
+
from sqlframe.bigquery import Window
|
|
37
|
+
|
|
38
|
+
session = BigQuerySession()
|
|
39
|
+
table_path = "bigquery-public-data.samples.natality"
|
|
40
|
+
# Top 5 years with the greatest year-over-year % change in new families with single child
|
|
41
|
+
df = (
|
|
42
|
+
session.table(table_path)
|
|
43
|
+
.where(F.col("ever_born") == 1)
|
|
44
|
+
.groupBy("year")
|
|
45
|
+
.agg(F.count("*").alias("num_single_child_families"))
|
|
46
|
+
.withColumn(
|
|
47
|
+
"last_year_num_single_child_families",
|
|
48
|
+
F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
|
|
49
|
+
)
|
|
50
|
+
.withColumn(
|
|
51
|
+
"percent_change",
|
|
52
|
+
(F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
|
|
53
|
+
/ F.col("last_year_num_single_child_families")
|
|
54
|
+
)
|
|
55
|
+
.orderBy(F.abs(F.col("percent_change")).desc())
|
|
56
|
+
.select(
|
|
57
|
+
F.col("year").alias("year"),
|
|
58
|
+
F.format_number("num_single_child_families", 0).alias("new families single child"),
|
|
59
|
+
F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
|
|
60
|
+
)
|
|
61
|
+
.limit(5)
|
|
62
|
+
)
|
|
63
|
+
```
|
|
64
|
+
```python
|
|
65
|
+
>>> print(df.sql(optimize=True))
|
|
66
|
+
WITH `t94228042` AS (
|
|
67
|
+
SELECT
|
|
68
|
+
`natality`.`year` AS `year`,
|
|
69
|
+
COUNT(*) AS `num_single_child_families`
|
|
70
|
+
FROM `bigquery-public-data`.`samples`.`natality` AS `natality`
|
|
71
|
+
WHERE
|
|
72
|
+
`natality`.`ever_born` = 1
|
|
73
|
+
GROUP BY
|
|
74
|
+
`natality`.`year`
|
|
75
|
+
), `t30206548` AS (
|
|
76
|
+
SELECT
|
|
77
|
+
`t94228042`.`year` AS `year`,
|
|
78
|
+
`t94228042`.`num_single_child_families` AS `num_single_child_families`,
|
|
79
|
+
LAG(`t94228042`.`num_single_child_families`, 1) OVER (ORDER BY `t94228042`.`year`) AS `last_year_num_single_child_families`
|
|
80
|
+
FROM `t94228042` AS `t94228042`
|
|
81
|
+
)
|
|
82
|
+
SELECT
|
|
83
|
+
`t30206548`.`year` AS `year`,
|
|
84
|
+
FORMAT('%\'.0f', ROUND(CAST(`t30206548`.`num_single_child_families` AS FLOAT64), 0)) AS `new families single child`,
|
|
85
|
+
FORMAT(
|
|
86
|
+
'%\'.2f',
|
|
87
|
+
ROUND(
|
|
88
|
+
CAST((
|
|
89
|
+
(
|
|
90
|
+
(
|
|
91
|
+
`t30206548`.`num_single_child_families` - `t30206548`.`last_year_num_single_child_families`
|
|
92
|
+
) / `t30206548`.`last_year_num_single_child_families`
|
|
93
|
+
) * 100
|
|
94
|
+
) AS FLOAT64),
|
|
95
|
+
2
|
|
96
|
+
)
|
|
97
|
+
) AS `percent change`
|
|
98
|
+
FROM `t30206548` AS `t30206548`
|
|
99
|
+
ORDER BY
|
|
100
|
+
ABS(`percent_change`) DESC
|
|
101
|
+
LIMIT 5
|
|
102
|
+
```
|
|
103
|
+
```python
|
|
104
|
+
>>> print(df.sql(optimize=False))
|
|
105
|
+
WITH t14183493 AS (
|
|
106
|
+
SELECT
|
|
107
|
+
`source_year`,
|
|
108
|
+
`year`,
|
|
109
|
+
`month`,
|
|
110
|
+
`day`,
|
|
111
|
+
`wday`,
|
|
112
|
+
`state`,
|
|
113
|
+
`is_male`,
|
|
114
|
+
`child_race`,
|
|
115
|
+
`weight_pounds`,
|
|
116
|
+
`plurality`,
|
|
117
|
+
`apgar_1min`,
|
|
118
|
+
`apgar_5min`,
|
|
119
|
+
`mother_residence_state`,
|
|
120
|
+
`mother_race`,
|
|
121
|
+
`mother_age`,
|
|
122
|
+
`gestation_weeks`,
|
|
123
|
+
`lmp`,
|
|
124
|
+
`mother_married`,
|
|
125
|
+
`mother_birth_state`,
|
|
126
|
+
`cigarette_use`,
|
|
127
|
+
`cigarettes_per_day`,
|
|
128
|
+
`alcohol_use`,
|
|
129
|
+
`drinks_per_week`,
|
|
130
|
+
`weight_gain_pounds`,
|
|
131
|
+
`born_alive_alive`,
|
|
132
|
+
`born_alive_dead`,
|
|
133
|
+
`born_dead`,
|
|
134
|
+
`ever_born`,
|
|
135
|
+
`father_race`,
|
|
136
|
+
`father_age`,
|
|
137
|
+
`record_weight`
|
|
138
|
+
FROM bigquery-public-data.samples.natality
|
|
139
|
+
), t17633417 AS (
|
|
140
|
+
SELECT
|
|
141
|
+
year,
|
|
142
|
+
COUNT(*) AS num_single_child_families
|
|
143
|
+
FROM t14183493
|
|
144
|
+
WHERE
|
|
145
|
+
ever_born = 1
|
|
146
|
+
GROUP BY
|
|
147
|
+
year
|
|
148
|
+
), t32066970 AS (
|
|
149
|
+
SELECT
|
|
150
|
+
year,
|
|
151
|
+
num_single_child_families,
|
|
152
|
+
LAG(num_single_child_families, 1) OVER (ORDER BY year) AS last_year_num_single_child_families
|
|
153
|
+
FROM t17633417
|
|
154
|
+
), t21362690 AS (
|
|
155
|
+
SELECT
|
|
156
|
+
year,
|
|
157
|
+
num_single_child_families,
|
|
158
|
+
last_year_num_single_child_families,
|
|
159
|
+
(
|
|
160
|
+
(
|
|
161
|
+
num_single_child_families - last_year_num_single_child_families
|
|
162
|
+
) / last_year_num_single_child_families
|
|
163
|
+
) AS percent_change
|
|
164
|
+
FROM t32066970
|
|
165
|
+
ORDER BY
|
|
166
|
+
ABS(percent_change) DESC
|
|
167
|
+
)
|
|
168
|
+
SELECT
|
|
169
|
+
year AS year,
|
|
170
|
+
FORMAT('%\'.0f', ROUND(CAST(num_single_child_families AS FLOAT64), 0)) AS `new families single child`,
|
|
171
|
+
FORMAT('%\'.2f', ROUND(CAST((
|
|
172
|
+
percent_change * 100
|
|
173
|
+
) AS FLOAT64), 2)) AS `percent change`
|
|
174
|
+
FROM t21362690
|
|
175
|
+
LIMIT 5
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Override Dialect
|
|
179
|
+
|
|
180
|
+
The dialect of the generated SQL will be based on the session's dialect. However, you can override the dialect by passing a string to the `dialect` parameter. This is useful when you want to generate SQL for a different database.
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
# create session and `df` like normal
|
|
184
|
+
df.sql(dialect="bigquery")
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### OpenAI Enriched
|
|
188
|
+
|
|
189
|
+
OpenAI's models can be used to enrich the generated SQL to make it more human-like.
|
|
190
|
+
This is useful when you want to generate SQL that is more readable for humans.
|
|
191
|
+
You must have `OPENAI_API_KEY` set in your environment variables to use this feature.
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
# create session and `df` like normal
|
|
195
|
+
# The model to use defaults to `gpt-4o` but can be changed by passing a string to the `openai_model` parameter.
|
|
196
|
+
>>> df.sql(optimize=False, use_openai=True)
|
|
197
|
+
WITH natality_data AS (
|
|
198
|
+
SELECT
|
|
199
|
+
year,
|
|
200
|
+
ever_born
|
|
201
|
+
FROM `bigquery-public-data`.`samples`.`natality`
|
|
202
|
+
), single_child_families AS (
|
|
203
|
+
SELECT
|
|
204
|
+
year,
|
|
205
|
+
COUNT(*) AS num_single_child_families
|
|
206
|
+
FROM natality_data
|
|
207
|
+
WHERE ever_born = 1
|
|
208
|
+
GROUP BY year
|
|
209
|
+
), lagged_families AS (
|
|
210
|
+
SELECT
|
|
211
|
+
year,
|
|
212
|
+
num_single_child_families,
|
|
213
|
+
LAG(num_single_child_families, 1) OVER (ORDER BY year) AS last_year_num_single_child_families
|
|
214
|
+
FROM single_child_families
|
|
215
|
+
), percent_change_families AS (
|
|
216
|
+
SELECT
|
|
217
|
+
year,
|
|
218
|
+
num_single_child_families,
|
|
219
|
+
((num_single_child_families - last_year_num_single_child_families) / last_year_num_single_child_families) AS percent_change
|
|
220
|
+
FROM lagged_families
|
|
221
|
+
ORDER BY ABS(percent_change) DESC
|
|
222
|
+
)
|
|
223
|
+
SELECT
|
|
224
|
+
year,
|
|
225
|
+
FORMAT('%\'.0f', ROUND(CAST(num_single_child_families AS FLOAT64), 0)) AS `new families single child`,
|
|
226
|
+
FORMAT('%\'.2f', ROUND(CAST((percent_change * 100) AS FLOAT64), 2)) AS `percent change`
|
|
227
|
+
FROM percent_change_families
|
|
228
|
+
LIMIT 5
|
|
229
|
+
```
|
|
@@ -171,6 +171,7 @@ df_store = session.createDataFrame(
|
|
|
171
171
|
* [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
|
|
172
172
|
* [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
|
|
173
173
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
174
|
+
* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
|
|
174
175
|
* [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
|
|
175
176
|
* [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
|
|
176
177
|
* [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
|
|
@@ -174,6 +174,7 @@ df_store = session.createDataFrame(
|
|
|
174
174
|
* [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
|
|
175
175
|
* [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
|
|
176
176
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
177
|
+
* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
|
|
177
178
|
* [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
|
|
178
179
|
* [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
|
|
179
180
|
* [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
|
|
@@ -26,11 +26,11 @@ setup(
|
|
|
26
26
|
"bigquery": [
|
|
27
27
|
"google-cloud-bigquery[pandas]>=3,<4",
|
|
28
28
|
"google-cloud-bigquery-storage>=2,<3",
|
|
29
|
-
"pandas>=2,<3",
|
|
30
29
|
],
|
|
31
30
|
"dev": [
|
|
32
31
|
"duckdb>=0.9,<0.11",
|
|
33
32
|
"mypy>=1.10.0,<1.11",
|
|
33
|
+
"openai>=1.30,<1.31",
|
|
34
34
|
"pandas>=2,<3",
|
|
35
35
|
"pandas-stubs>=2,<3",
|
|
36
36
|
"psycopg>=3.1,<4",
|
|
@@ -56,17 +56,20 @@ setup(
|
|
|
56
56
|
"duckdb>=0.9,<0.11",
|
|
57
57
|
"pandas>=2,<3",
|
|
58
58
|
],
|
|
59
|
-
"
|
|
59
|
+
"openai": [
|
|
60
|
+
"openai>=1.30,<1.31",
|
|
61
|
+
],
|
|
62
|
+
"pandas": [
|
|
60
63
|
"pandas>=2,<3",
|
|
64
|
+
],
|
|
65
|
+
"postgres": [
|
|
61
66
|
"psycopg2>=2.8,<3",
|
|
62
67
|
],
|
|
63
68
|
"redshift": [
|
|
64
|
-
"pandas>=2,<3",
|
|
65
69
|
"redshift_connector>=2.1.1,<2.2.0",
|
|
66
70
|
],
|
|
67
71
|
"snowflake": [
|
|
68
|
-
"
|
|
69
|
-
"snowflake-connector-python[pandas,secure-local-storage]>=3.10.0,<3.11",
|
|
72
|
+
"snowflake-connector-python[secure-local-storage]>=3.10.0,<3.11",
|
|
70
73
|
],
|
|
71
74
|
"spark": [
|
|
72
75
|
"pyspark>=2,<3.6",
|
|
@@ -26,6 +26,9 @@ else:
|
|
|
26
26
|
class _BaseCatalog(t.Generic[SESSION, DF]):
|
|
27
27
|
"""User-facing catalog API, accessible through `SparkSession.catalog`."""
|
|
28
28
|
|
|
29
|
+
TEMP_CATALOG_FILTER: t.Optional[exp.Expression] = None
|
|
30
|
+
TEMP_SCHEMA_FILTER: t.Optional[exp.Expression] = None
|
|
31
|
+
|
|
29
32
|
def __init__(self, sparkSession: SESSION, schema: t.Optional[MappingSchema] = None) -> None:
|
|
30
33
|
"""Create a new Catalog that wraps the underlying JVM object."""
|
|
31
34
|
self.session = sparkSession
|
|
@@ -569,7 +572,9 @@ class _BaseCatalog(t.Generic[SESSION, DF]):
|
|
|
569
572
|
"""
|
|
570
573
|
raise NotImplementedError
|
|
571
574
|
|
|
572
|
-
def listColumns(
|
|
575
|
+
def listColumns(
|
|
576
|
+
self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
|
|
577
|
+
) -> t.List[Column]:
|
|
573
578
|
"""Returns a t.List of columns for the given table/view in the specified database.
|
|
574
579
|
|
|
575
580
|
.. versionadded:: 2.0.0
|
|
@@ -9,9 +9,11 @@ import typing as t
|
|
|
9
9
|
import sqlglot
|
|
10
10
|
from sqlglot import expressions as exp
|
|
11
11
|
from sqlglot.helper import flatten, is_iterable
|
|
12
|
+
from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
|
|
12
13
|
|
|
14
|
+
from sqlframe.base.decorators import normalize
|
|
13
15
|
from sqlframe.base.types import DataType
|
|
14
|
-
from sqlframe.base.util import get_func_from_session
|
|
16
|
+
from sqlframe.base.util import get_func_from_session, quote_preserving_alias_or_name
|
|
15
17
|
|
|
16
18
|
if t.TYPE_CHECKING:
|
|
17
19
|
from sqlframe.base._typing import ColumnOrLiteral, ColumnOrName
|
|
@@ -237,7 +239,7 @@ class Column:
|
|
|
237
239
|
|
|
238
240
|
@property
|
|
239
241
|
def alias_or_name(self) -> str:
|
|
240
|
-
return self.expression
|
|
242
|
+
return quote_preserving_alias_or_name(self.expression) # type: ignore
|
|
241
243
|
|
|
242
244
|
@classmethod
|
|
243
245
|
def ensure_literal(cls, value) -> Column:
|
|
@@ -266,7 +268,9 @@ class Column:
|
|
|
266
268
|
from sqlframe.base.session import _BaseSession
|
|
267
269
|
|
|
268
270
|
dialect = _BaseSession().input_dialect
|
|
269
|
-
alias: exp.Expression =
|
|
271
|
+
alias: exp.Expression = normalize_identifiers(
|
|
272
|
+
exp.parse_identifier(name, dialect=dialect), dialect=dialect
|
|
273
|
+
)
|
|
270
274
|
new_expression = exp.Alias(
|
|
271
275
|
this=self.column_expression,
|
|
272
276
|
alias=alias.this if isinstance(alias, exp.Column) else alias,
|
|
@@ -15,13 +15,18 @@ from prettytable import PrettyTable
|
|
|
15
15
|
from sqlglot import Dialect
|
|
16
16
|
from sqlglot import expressions as exp
|
|
17
17
|
from sqlglot.helper import ensure_list, object_to_dict, seq_get
|
|
18
|
+
from sqlglot.optimizer.pushdown_projections import pushdown_projections
|
|
19
|
+
from sqlglot.optimizer.qualify import qualify
|
|
18
20
|
from sqlglot.optimizer.qualify_columns import quote_identifiers
|
|
19
21
|
|
|
22
|
+
from sqlframe.base.decorators import normalize
|
|
20
23
|
from sqlframe.base.operations import Operation, operation
|
|
21
24
|
from sqlframe.base.transforms import replace_id_value
|
|
22
25
|
from sqlframe.base.util import (
|
|
23
26
|
get_func_from_session,
|
|
24
27
|
get_tables_from_expression_with_join,
|
|
28
|
+
quote_preserving_alias_or_name,
|
|
29
|
+
verify_openai_installed,
|
|
25
30
|
)
|
|
26
31
|
|
|
27
32
|
if sys.version_info >= (3, 11):
|
|
@@ -410,7 +415,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
410
415
|
|
|
411
416
|
outer_select = item.find(exp.Select)
|
|
412
417
|
if outer_select:
|
|
413
|
-
return [col(x
|
|
418
|
+
return [col(quote_preserving_alias_or_name(x)) for x in outer_select.expressions]
|
|
414
419
|
return []
|
|
415
420
|
|
|
416
421
|
def _create_hash_from_expression(self, expression: exp.Expression) -> str:
|
|
@@ -471,6 +476,8 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
471
476
|
dialect: DialectType = None,
|
|
472
477
|
optimize: bool = True,
|
|
473
478
|
pretty: bool = True,
|
|
479
|
+
use_openai: bool = False,
|
|
480
|
+
openai_model: str = "gpt-4o",
|
|
474
481
|
as_list: bool = False,
|
|
475
482
|
**kwargs,
|
|
476
483
|
) -> t.Union[str, t.List[str]]:
|
|
@@ -490,6 +497,9 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
490
497
|
select_expression = t.cast(
|
|
491
498
|
exp.Select, self.session._optimize(select_expression, dialect=dialect)
|
|
492
499
|
)
|
|
500
|
+
elif use_openai:
|
|
501
|
+
qualify(select_expression, dialect=dialect, schema=self.session.catalog._schema)
|
|
502
|
+
pushdown_projections(select_expression, schema=self.session.catalog._schema)
|
|
493
503
|
|
|
494
504
|
select_expression = df._replace_cte_names_with_hashes(select_expression)
|
|
495
505
|
|
|
@@ -505,7 +515,9 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
505
515
|
self.session.catalog.add_table(
|
|
506
516
|
cache_table_name,
|
|
507
517
|
{
|
|
508
|
-
expression
|
|
518
|
+
quote_preserving_alias_or_name(expression): expression.type.sql(
|
|
519
|
+
dialect=dialect
|
|
520
|
+
)
|
|
509
521
|
if expression.type
|
|
510
522
|
else "UNKNOWN"
|
|
511
523
|
for expression in select_expression.expressions
|
|
@@ -541,10 +553,40 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
541
553
|
|
|
542
554
|
output_expressions.append(expression)
|
|
543
555
|
|
|
544
|
-
results = [
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
556
|
+
results = []
|
|
557
|
+
for expression in output_expressions:
|
|
558
|
+
sql = expression.sql(dialect=dialect, pretty=pretty, **kwargs)
|
|
559
|
+
if use_openai:
|
|
560
|
+
verify_openai_installed()
|
|
561
|
+
from openai import OpenAI
|
|
562
|
+
|
|
563
|
+
client = OpenAI()
|
|
564
|
+
prompt = f"""
|
|
565
|
+
You are a backend tool that converts correct {dialect} SQL to simplified and more human readable version.
|
|
566
|
+
You respond without code block with rewritten {dialect} SQL.
|
|
567
|
+
You don't change any column names in the final select because the user expects those to remain the same.
|
|
568
|
+
You make unique CTE alias names match what a human would write and in snake case.
|
|
569
|
+
You improve formatting with spacing and line-breaks.
|
|
570
|
+
You remove redundant parenthesis and aliases.
|
|
571
|
+
When remove extra quotes, make sure to keep quotes around words that could be reserved words
|
|
572
|
+
"""
|
|
573
|
+
chat_completed = client.chat.completions.create(
|
|
574
|
+
messages=[
|
|
575
|
+
{
|
|
576
|
+
"role": "system",
|
|
577
|
+
"content": prompt,
|
|
578
|
+
},
|
|
579
|
+
{
|
|
580
|
+
"role": "user",
|
|
581
|
+
"content": sql,
|
|
582
|
+
},
|
|
583
|
+
],
|
|
584
|
+
model=openai_model,
|
|
585
|
+
)
|
|
586
|
+
assert chat_completed.choices[0].message.content is not None
|
|
587
|
+
sql = chat_completed.choices[0].message.content
|
|
588
|
+
results.append(sql)
|
|
589
|
+
|
|
548
590
|
if as_list:
|
|
549
591
|
return results
|
|
550
592
|
return ";\n".join(results)
|
|
@@ -688,7 +730,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
688
730
|
join_expression = self._add_ctes_to_expression(join_expression, other_df.expression.ctes)
|
|
689
731
|
self_columns = self._get_outer_select_columns(join_expression)
|
|
690
732
|
other_columns = self._get_outer_select_columns(other_df.expression)
|
|
691
|
-
join_columns = self.
|
|
733
|
+
join_columns = self._ensure_and_normalize_cols(on)
|
|
692
734
|
# Determines the join clause and select columns to be used passed on what type of columns were provided for
|
|
693
735
|
# the join. The columns returned changes based on how the on expression is provided.
|
|
694
736
|
if how != "cross":
|
|
@@ -1324,6 +1366,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
1324
1366
|
assert sqls[-1] is not None
|
|
1325
1367
|
return self.session._fetchdf(sqls[-1])
|
|
1326
1368
|
|
|
1369
|
+
@normalize("name")
|
|
1327
1370
|
def createOrReplaceTempView(self, name: str) -> None:
|
|
1328
1371
|
self.session.temp_views[name] = self.copy()._convert_leaf_to_cte()
|
|
1329
1372
|
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import typing as t
|
|
5
|
+
|
|
6
|
+
from sqlglot import parse_one
|
|
7
|
+
from sqlglot.helper import ensure_list
|
|
8
|
+
from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
|
|
9
|
+
|
|
10
|
+
if t.TYPE_CHECKING:
|
|
11
|
+
from sqlframe.base.catalog import _BaseCatalog
|
|
12
|
+
|
|
13
|
+
CALLING_CLASS = t.TypeVar("CALLING_CLASS")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def normalize(normalize_kwargs: t.Union[str, t.List[str]]) -> t.Callable[[t.Callable], t.Callable]:
|
|
17
|
+
"""
|
|
18
|
+
Decorator used to normalize identifiers in the kwargs of a method.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def decorator(func: t.Callable) -> t.Callable:
|
|
22
|
+
@functools.wraps(func)
|
|
23
|
+
def wrapper(self: CALLING_CLASS, *args, **kwargs) -> CALLING_CLASS:
|
|
24
|
+
from sqlframe.base.session import _BaseSession
|
|
25
|
+
|
|
26
|
+
input_dialect = _BaseSession().input_dialect
|
|
27
|
+
kwargs.update(dict(zip(func.__code__.co_varnames[1:], args)))
|
|
28
|
+
for kwarg in ensure_list(normalize_kwargs):
|
|
29
|
+
if kwarg in kwargs:
|
|
30
|
+
value = kwargs.get(kwarg)
|
|
31
|
+
if value:
|
|
32
|
+
expression = (
|
|
33
|
+
parse_one(value, dialect=input_dialect)
|
|
34
|
+
if isinstance(value, str)
|
|
35
|
+
else value
|
|
36
|
+
)
|
|
37
|
+
kwargs[kwarg] = normalize_identifiers(expression, input_dialect).sql(
|
|
38
|
+
dialect=input_dialect
|
|
39
|
+
)
|
|
40
|
+
return func(self, **kwargs)
|
|
41
|
+
|
|
42
|
+
wrapper.__wrapped__ = func # type: ignore
|
|
43
|
+
return wrapper
|
|
44
|
+
|
|
45
|
+
return decorator
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def func_metadata(unsupported_engines: t.Optional[t.Union[str, t.List[str]]] = None) -> t.Callable:
|
|
49
|
+
def _metadata(func: t.Callable) -> t.Callable:
|
|
50
|
+
func.unsupported_engines = ensure_list(unsupported_engines) if unsupported_engines else [] # type: ignore
|
|
51
|
+
return func
|
|
52
|
+
|
|
53
|
+
return _metadata
|
|
@@ -13,7 +13,7 @@ from sqlframe.base.catalog import (
|
|
|
13
13
|
_BaseCatalog,
|
|
14
14
|
)
|
|
15
15
|
from sqlframe.base.decorators import normalize
|
|
16
|
-
from sqlframe.base.util import
|
|
16
|
+
from sqlframe.base.util import schema_, to_schema
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class _BaseInfoSchemaMixin(_BaseCatalog, t.Generic[SESSION, DF]):
|
|
@@ -315,7 +315,9 @@ class ListTablesFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF]
|
|
|
315
315
|
|
|
316
316
|
class ListColumnsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF]):
|
|
317
317
|
@normalize(["tableName", "dbName"])
|
|
318
|
-
def listColumns(
|
|
318
|
+
def listColumns(
|
|
319
|
+
self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
|
|
320
|
+
) -> t.List[Column]:
|
|
319
321
|
"""Returns a t.List of columns for the given table/view in the specified database.
|
|
320
322
|
|
|
321
323
|
.. versionadded:: 2.0.0
|
|
@@ -385,12 +387,6 @@ class ListColumnsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF
|
|
|
385
387
|
"catalog",
|
|
386
388
|
exp.parse_identifier(self.currentCatalog(), dialect=self.session.input_dialect),
|
|
387
389
|
)
|
|
388
|
-
# if self.QUALIFY_INFO_SCHEMA_WITH_DATABASE:
|
|
389
|
-
# if not table.db:
|
|
390
|
-
# raise ValueError("dbName must be specified when listing columns from INFORMATION_SCHEMA")
|
|
391
|
-
# source_table = f"{table.db}.INFORMATION_SCHEMA.COLUMNS"
|
|
392
|
-
# else:
|
|
393
|
-
# source_table = "INFORMATION_SCHEMA.COLUMNS"
|
|
394
390
|
source_table = self._get_info_schema_table("columns", database=table.db)
|
|
395
391
|
select = (
|
|
396
392
|
exp.select(
|
|
@@ -402,9 +398,15 @@ class ListColumnsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF
|
|
|
402
398
|
.where(exp.column("table_name").eq(table.name))
|
|
403
399
|
)
|
|
404
400
|
if table.db:
|
|
405
|
-
|
|
401
|
+
schema_filter: exp.Expression = exp.column("table_schema").eq(table.db)
|
|
402
|
+
if include_temp and self.TEMP_SCHEMA_FILTER:
|
|
403
|
+
schema_filter = exp.Or(this=schema_filter, expression=self.TEMP_SCHEMA_FILTER)
|
|
404
|
+
select = select.where(schema_filter)
|
|
406
405
|
if table.catalog:
|
|
407
|
-
|
|
406
|
+
catalog_filter: exp.Expression = exp.column("table_catalog").eq(table.catalog)
|
|
407
|
+
if include_temp and self.TEMP_CATALOG_FILTER:
|
|
408
|
+
catalog_filter = exp.Or(this=catalog_filter, expression=self.TEMP_CATALOG_FILTER)
|
|
409
|
+
select = select.where(catalog_filter)
|
|
408
410
|
results = self.session._fetch_rows(select)
|
|
409
411
|
return [
|
|
410
412
|
Column(
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
|
|
3
|
+
from sqlglot import exp
|
|
4
|
+
|
|
5
|
+
from sqlframe.base.catalog import Column
|
|
6
|
+
from sqlframe.base.dataframe import (
|
|
7
|
+
GROUP_DATA,
|
|
8
|
+
NA,
|
|
9
|
+
SESSION,
|
|
10
|
+
STAT,
|
|
11
|
+
WRITER,
|
|
12
|
+
_BaseDataFrame,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PrintSchemaFromTempObjectsMixin(
|
|
17
|
+
_BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]
|
|
18
|
+
):
|
|
19
|
+
def _get_columns_from_temp_object(self) -> t.List[Column]:
|
|
20
|
+
table = exp.to_table(self.session._random_id)
|
|
21
|
+
self.session._execute(
|
|
22
|
+
exp.Create(
|
|
23
|
+
this=table,
|
|
24
|
+
kind="VIEW",
|
|
25
|
+
replace=True,
|
|
26
|
+
properties=exp.Properties(expressions=[exp.TemporaryProperty()]),
|
|
27
|
+
expression=self.expression,
|
|
28
|
+
)
|
|
29
|
+
)
|
|
30
|
+
return self.session.catalog.listColumns(
|
|
31
|
+
table.sql(dialect=self.session.input_dialect), include_temp=True
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def printSchema(self, level: t.Optional[int] = None) -> None:
|
|
35
|
+
def print_schema(
|
|
36
|
+
column_name: str, column_type: exp.DataType, nullable: bool, current_level: int
|
|
37
|
+
):
|
|
38
|
+
if level and current_level >= level:
|
|
39
|
+
return
|
|
40
|
+
if current_level > 0:
|
|
41
|
+
print(" | " * current_level, end="")
|
|
42
|
+
print(
|
|
43
|
+
f" |-- {column_name}: {column_type.sql(self.session.output_dialect).lower()} (nullable = {str(nullable).lower()})"
|
|
44
|
+
)
|
|
45
|
+
if column_type.this == exp.DataType.Type.STRUCT:
|
|
46
|
+
for column_def in column_type.expressions:
|
|
47
|
+
print_schema(column_def.name, column_def.args["kind"], True, current_level + 1)
|
|
48
|
+
if column_type.this == exp.DataType.Type.ARRAY:
|
|
49
|
+
for data_type in column_type.expressions:
|
|
50
|
+
print_schema("element", data_type, True, current_level + 1)
|
|
51
|
+
if column_type.this == exp.DataType.Type.MAP:
|
|
52
|
+
print_schema("key", column_type.expressions[0], True, current_level + 1)
|
|
53
|
+
print_schema("value", column_type.expressions[1], True, current_level + 1)
|
|
54
|
+
|
|
55
|
+
columns = self._get_columns_from_temp_object()
|
|
56
|
+
print("root")
|
|
57
|
+
for column in columns:
|
|
58
|
+
print_schema(
|
|
59
|
+
column.name,
|
|
60
|
+
exp.DataType.build(column.dataType, dialect=self.session.output_dialect),
|
|
61
|
+
column.nullable,
|
|
62
|
+
0,
|
|
63
|
+
)
|