PyPI - sqlframe - Versions diffs - 1.2.0__tar.gz → 1.3.0__tar.gz - Mend

sqlframe 1.2.0tar.gz → 1.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

{sqlframe-1.2.0 → sqlframe-1.3.0}/Makefile RENAMED Viewed

@@ -1,5 +1,5 @@
 install-dev:
-	pip install -e ".[dev,docs,duckdb,postgres,redshift,bigquery,snowflake,spark]"
+	pip install -e ".[bigquery,dev,docs,duckdb,pandas,postgres,redshift,snowflake,spark]"
 install-pre-commit:
 	pre-commit install
@@ -8,7 +8,7 @@ slow-test:
 	pytest -n auto tests
 fast-test:
-	pytest -n auto -m "fast"
+	pytest -n auto tests/unit
 local-test:
 	pytest -n auto -m "fast or local"

{sqlframe-1.2.0 → sqlframe-1.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sqlframe
-Version: 1.2.0
+Version: 1.3.0
 Summary: Taking the Spark out of PySpark by converting to SQL
 Home-page: https://github.com/eakmanrq/sqlframe
 Author: Ryan Eakman
@@ -20,6 +20,8 @@ Provides-Extra: bigquery
 Provides-Extra: dev
 Provides-Extra: docs
 Provides-Extra: duckdb
+Provides-Extra: openai
+Provides-Extra: pandas
 Provides-Extra: postgres
 Provides-Extra: redshift
 Provides-Extra: snowflake

sqlframe-1.3.0/docs/configuration.md ADDED Viewed

@@ -0,0 +1,229 @@
+# General Configuration
+## Generated SQL
+### Pretty
+If the SQL should be returned in a "pretty" format meaning it has newlines and indentation. Defaults to `True`.
+```python
+from sqlframe.standalone import StandaloneSession
+session = StandaloneSession()
+df = session.createDataFrame([{'a': 1, 'b': 2}])
+```
+```python
+>>> print(df.sql())
+SELECT
+  CAST(`a1`.`a` AS BIGINT) AS `a`,
+  CAST(`a1`.`b` AS BIGINT) AS `b`
+FROM VALUES
+  (1, 2) AS `a1`(`a`, `b`)
+```
+```python
+>>> print(df.sql(pretty=False))
+SELECT CAST(`a3`.`a` AS BIGINT) AS `a`, CAST(`a3`.`b` AS BIGINT) AS `b` FROM VALUES (1, 2) AS `a3`(`a`, `b`)
+```
+### Optimized
+Optimized SQL is SQL that has been processed by SQLGlot's optimizer. For complex queries this will significantly reduce the number of CTEs produced and remove extra unused columns. Defaults to `True`.
+```python
+from sqlframe.bigquery import BigQuerySession
+from sqlframe.bigquery import functions as F
+from sqlframe.bigquery import Window
+session = BigQuerySession()
+table_path = "bigquery-public-data.samples.natality"
+# Top 5 years with the greatest year-over-year % change in new families with single child
+df = (
+    session.table(table_path)
+    .where(F.col("ever_born") == 1)
+    .groupBy("year")
+    .agg(F.count("*").alias("num_single_child_families"))
+    .withColumn(
+        "last_year_num_single_child_families",
+        F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
+    )
+    .withColumn(
+        "percent_change",
+        (F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
+        / F.col("last_year_num_single_child_families")
+    )
+    .orderBy(F.abs(F.col("percent_change")).desc())
+    .select(
+        F.col("year").alias("year"),
+        F.format_number("num_single_child_families", 0).alias("new families single child"),
+        F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
+    )
+    .limit(5)
+)
+```
+```python
+>>> print(df.sql(optimize=True))
+WITH `t94228042` AS (
+  SELECT
+    `natality`.`year` AS `year`,
+    COUNT(*) AS `num_single_child_families`
+  FROM `bigquery-public-data`.`samples`.`natality` AS `natality`
+  WHERE
+    `natality`.`ever_born` = 1
+  GROUP BY
+    `natality`.`year`
+), `t30206548` AS (
+  SELECT
+    `t94228042`.`year` AS `year`,
+    `t94228042`.`num_single_child_families` AS `num_single_child_families`,
+    LAG(`t94228042`.`num_single_child_families`, 1) OVER (ORDER BY `t94228042`.`year`) AS `last_year_num_single_child_families`
+  FROM `t94228042` AS `t94228042`
+)
+SELECT
+  `t30206548`.`year` AS `year`,
+  FORMAT('%\'.0f', ROUND(CAST(`t30206548`.`num_single_child_families` AS FLOAT64), 0)) AS `new families single child`,
+  FORMAT(
+    '%\'.2f',
+    ROUND(
+      CAST((
+        (
+          (
+            `t30206548`.`num_single_child_families` - `t30206548`.`last_year_num_single_child_families`
+          ) / `t30206548`.`last_year_num_single_child_families`
+        ) * 100
+      ) AS FLOAT64),
+      2
+    )
+  ) AS `percent change`
+FROM `t30206548` AS `t30206548`
+ORDER BY
+  ABS(`percent_change`) DESC
+LIMIT 5
+```
+```python
+>>> print(df.sql(optimize=False))
+WITH t14183493 AS (
+  SELECT
+    `source_year`,
+    `year`,
+    `month`,
+    `day`,
+    `wday`,
+    `state`,
+    `is_male`,
+    `child_race`,
+    `weight_pounds`,
+    `plurality`,
+    `apgar_1min`,
+    `apgar_5min`,
+    `mother_residence_state`,
+    `mother_race`,
+    `mother_age`,
+    `gestation_weeks`,
+    `lmp`,
+    `mother_married`,
+    `mother_birth_state`,
+    `cigarette_use`,
+    `cigarettes_per_day`,
+    `alcohol_use`,
+    `drinks_per_week`,
+    `weight_gain_pounds`,
+    `born_alive_alive`,
+    `born_alive_dead`,
+    `born_dead`,
+    `ever_born`,
+    `father_race`,
+    `father_age`,
+    `record_weight`
+  FROM bigquery-public-data.samples.natality
+), t17633417 AS (
+  SELECT
+    year,
+    COUNT(*) AS num_single_child_families
+  FROM t14183493
+  WHERE
+    ever_born = 1
+  GROUP BY
+    year
+), t32066970 AS (
+  SELECT
+    year,
+    num_single_child_families,
+    LAG(num_single_child_families, 1) OVER (ORDER BY year) AS last_year_num_single_child_families
+  FROM t17633417
+), t21362690 AS (
+  SELECT
+    year,
+    num_single_child_families,
+    last_year_num_single_child_families,
+    (
+      (
+        num_single_child_families - last_year_num_single_child_families
+      ) / last_year_num_single_child_families
+    ) AS percent_change
+  FROM t32066970
+  ORDER BY
+    ABS(percent_change) DESC
+)
+SELECT
+  year AS year,
+  FORMAT('%\'.0f', ROUND(CAST(num_single_child_families AS FLOAT64), 0)) AS `new families single child`,
+  FORMAT('%\'.2f', ROUND(CAST((
+    percent_change * 100
+  ) AS FLOAT64), 2)) AS `percent change`
+FROM t21362690
+LIMIT 5
+```
+### Override Dialect
+The dialect of the generated SQL will be based on the session's dialect. However, you can override the dialect by passing a string to the `dialect` parameter. This is useful when you want to generate SQL for a different database.
+```python
+# create session and `df` like normal
+df.sql(dialect="bigquery")
+```
+### OpenAI Enriched
+OpenAI's models can be used to enrich the generated SQL to make it more human-like.
+This is useful when you want to generate SQL that is more readable for humans.
+You must have `OPENAI_API_KEY` set in your environment variables to use this feature.
+```python
+# create session and `df` like normal
+# The model to use defaults to `gpt-4o` but can be changed by passing a string to the `openai_model` parameter.
+>>> df.sql(optimize=False, use_openai=True)
+WITH natality_data AS (
+  SELECT
+    year,
+    ever_born
+  FROM `bigquery-public-data`.`samples`.`natality`
+), single_child_families AS (
+  SELECT
+    year,
+    COUNT(*) AS num_single_child_families
+  FROM natality_data
+  WHERE ever_born = 1
+  GROUP BY year
+), lagged_families AS (
+  SELECT
+    year,
+    num_single_child_families,
+    LAG(num_single_child_families, 1) OVER (ORDER BY year) AS last_year_num_single_child_families
+  FROM single_child_families
+), percent_change_families AS (
+  SELECT
+    year,
+    num_single_child_families,
+    ((num_single_child_families - last_year_num_single_child_families) / last_year_num_single_child_families) AS percent_change
+  FROM lagged_families
+  ORDER BY ABS(percent_change) DESC
+)
+SELECT
+  year,
+  FORMAT('%\'.0f', ROUND(CAST(num_single_child_families AS FLOAT64), 0)) AS `new families single child`,
+  FORMAT('%\'.2f', ROUND(CAST((percent_change * 100) AS FLOAT64), 2)) AS `percent change`
+FROM percent_change_families
+LIMIT 5
+```

{sqlframe-1.2.0 → sqlframe-1.3.0}/mkdocs.yml RENAMED Viewed

@@ -7,6 +7,7 @@ nav:
   - "DuckDB": duckdb.md
   - "Postgres": postgres.md
   - "Standalone": standalone.md
+  - "Configuration": configuration.md
 theme:
   name: material
   logo: images/SF.png

{sqlframe-1.2.0 → sqlframe-1.3.0}/setup.py RENAMED Viewed

@@ -26,11 +26,11 @@ setup(
         "bigquery": [
             "google-cloud-bigquery[pandas]>=3,<4",
             "google-cloud-bigquery-storage>=2,<3",
-            "pandas>=2,<3",
         ],
         "dev": [
             "duckdb>=0.9,<0.11",
             "mypy>=1.10.0,<1.11",
+            "openai>=1.30,<1.31",
             "pandas>=2,<3",
             "pandas-stubs>=2,<3",
             "psycopg>=3.1,<4",
@@ -56,17 +56,20 @@ setup(
             "duckdb>=0.9,<0.11",
             "pandas>=2,<3",
         ],
-        "postgres": [
+        "openai": [
+            "openai>=1.30,<1.31",
+        ],
+        "pandas": [
             "pandas>=2,<3",
+        ],
+        "postgres": [
             "psycopg2>=2.8,<3",
         ],
         "redshift": [
-            "pandas>=2,<3",
             "redshift_connector>=2.1.1,<2.2.0",
         ],
         "snowflake": [
-            "pandas>=2,<3",
-            "snowflake-connector-python[pandas,secure-local-storage]>=3.10.0,<3.11",
+            "snowflake-connector-python[secure-local-storage]>=3.10.0,<3.11",
         ],
         "spark": [
             "pyspark>=2,<3.6",

{sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/_version.py RENAMED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.2.0'
-__version_tuple__ = version_tuple = (1, 2, 0)
+__version__ = version = '1.3.0'
+__version_tuple__ = version_tuple = (1, 3, 0)

{sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/column.py RENAMED Viewed

@@ -9,9 +9,11 @@ import typing as t
 import sqlglot
 from sqlglot import expressions as exp
 from sqlglot.helper import flatten, is_iterable
+from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
+from sqlframe.base.decorators import normalize
 from sqlframe.base.types import DataType
-from sqlframe.base.util import get_func_from_session
+from sqlframe.base.util import get_func_from_session, quote_preserving_alias_or_name
 if t.TYPE_CHECKING:
     from sqlframe.base._typing import ColumnOrLiteral, ColumnOrName
@@ -237,7 +239,7 @@ class Column:
     @property
     def alias_or_name(self) -> str:
-        return self.expression.alias_or_name
+        return quote_preserving_alias_or_name(self.expression)  # type: ignore
     @classmethod
     def ensure_literal(cls, value) -> Column:
@@ -266,7 +268,9 @@ class Column:
         from sqlframe.base.session import _BaseSession
         dialect = _BaseSession().input_dialect
-        alias: exp.Expression = exp.parse_identifier(name, dialect=dialect)
+        alias: exp.Expression = normalize_identifiers(
+            exp.parse_identifier(name, dialect=dialect), dialect=dialect
+        )
         new_expression = exp.Alias(
             this=self.column_expression,
             alias=alias.this if isinstance(alias, exp.Column) else alias,

{sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/dataframe.py RENAMED Viewed

@@ -15,13 +15,18 @@ from prettytable import PrettyTable
 from sqlglot import Dialect
 from sqlglot import expressions as exp
 from sqlglot.helper import ensure_list, object_to_dict, seq_get
+from sqlglot.optimizer.pushdown_projections import pushdown_projections
+from sqlglot.optimizer.qualify import qualify
 from sqlglot.optimizer.qualify_columns import quote_identifiers
+from sqlframe.base.decorators import normalize
 from sqlframe.base.operations import Operation, operation
 from sqlframe.base.transforms import replace_id_value
 from sqlframe.base.util import (
     get_func_from_session,
     get_tables_from_expression_with_join,
+    quote_preserving_alias_or_name,
+    verify_openai_installed,
 )
 if sys.version_info >= (3, 11):
@@ -410,7 +415,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         outer_select = item.find(exp.Select)
         if outer_select:
-            return [col(x.alias_or_name) for x in outer_select.expressions]
+            return [col(quote_preserving_alias_or_name(x)) for x in outer_select.expressions]
         return []
     def _create_hash_from_expression(self, expression: exp.Expression) -> str:
@@ -471,6 +476,8 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         dialect: DialectType = None,
         optimize: bool = True,
         pretty: bool = True,
+        use_openai: bool = False,
+        openai_model: str = "gpt-4o",
         as_list: bool = False,
         **kwargs,
     ) -> t.Union[str, t.List[str]]:
@@ -490,6 +497,9 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
                 select_expression = t.cast(
                     exp.Select, self.session._optimize(select_expression, dialect=dialect)
                 )
+            elif use_openai:
+                qualify(select_expression, dialect=dialect, schema=self.session.catalog._schema)
+                pushdown_projections(select_expression, schema=self.session.catalog._schema)
             select_expression = df._replace_cte_names_with_hashes(select_expression)
@@ -505,7 +515,9 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
                 self.session.catalog.add_table(
                     cache_table_name,
                     {
-                        expression.alias_or_name: expression.type.sql(dialect=dialect)
+                        quote_preserving_alias_or_name(expression): expression.type.sql(
+                            dialect=dialect
+                        )
                         if expression.type
                         else "UNKNOWN"
                         for expression in select_expression.expressions
@@ -541,10 +553,40 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
             output_expressions.append(expression)
-        results = [
-            expression.sql(dialect=dialect, pretty=pretty, **kwargs)
-            for expression in output_expressions
-        ]
+        results = []
+        for expression in output_expressions:
+            sql = expression.sql(dialect=dialect, pretty=pretty, **kwargs)
+            if use_openai:
+                verify_openai_installed()
+                from openai import OpenAI
+                client = OpenAI()
+                prompt = f"""
+                You are a backend tool that converts correct {dialect} SQL to simplified and more human readable version.
+                You respond without code block with rewritten {dialect} SQL.
+                You don't change any column names in the final select because the user expects those to remain the same.
+                You make unique CTE alias names match what a human would write and in snake case.
+                You improve formatting with spacing and line-breaks.
+                You remove redundant parenthesis and aliases.
+                When remove extra quotes, make sure to keep quotes around words that could be reserved words
+                """
+                chat_completed = client.chat.completions.create(
+                    messages=[
+                        {
+                            "role": "system",
+                            "content": prompt,
+                        },
+                        {
+                            "role": "user",
+                            "content": sql,
+                        },
+                    ],
+                    model=openai_model,
+                )
+                assert chat_completed.choices[0].message.content is not None
+                sql = chat_completed.choices[0].message.content
+            results.append(sql)
         if as_list:
             return results
         return ";\n".join(results)
@@ -688,7 +730,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         join_expression = self._add_ctes_to_expression(join_expression, other_df.expression.ctes)
         self_columns = self._get_outer_select_columns(join_expression)
         other_columns = self._get_outer_select_columns(other_df.expression)
-        join_columns = self._ensure_list_of_columns(on)
+        join_columns = self._ensure_and_normalize_cols(on)
         # Determines the join clause and select columns to be used passed on what type of columns were provided for
         # the join. The columns returned changes based on how the on expression is provided.
         if how != "cross":
@@ -1324,6 +1366,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         assert sqls[-1] is not None
         return self.session._fetchdf(sqls[-1])
+    @normalize("name")
     def createOrReplaceTempView(self, name: str) -> None:
         self.session.temp_views[name] = self.copy()._convert_leaf_to_cte()

sqlframe-1.3.0/sqlframe/base/decorators.py ADDED Viewed

@@ -0,0 +1,53 @@
+from __future__ import annotations
+import functools
+import typing as t
+from sqlglot import parse_one
+from sqlglot.helper import ensure_list
+from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
+if t.TYPE_CHECKING:
+    from sqlframe.base.catalog import _BaseCatalog
+CALLING_CLASS = t.TypeVar("CALLING_CLASS")
+def normalize(normalize_kwargs: t.Union[str, t.List[str]]) -> t.Callable[[t.Callable], t.Callable]:
+    """
+    Decorator used to normalize identifiers in the kwargs of a method.
+    """
+    def decorator(func: t.Callable) -> t.Callable:
+        @functools.wraps(func)
+        def wrapper(self: CALLING_CLASS, *args, **kwargs) -> CALLING_CLASS:
+            from sqlframe.base.session import _BaseSession
+            input_dialect = _BaseSession().input_dialect
+            kwargs.update(dict(zip(func.__code__.co_varnames[1:], args)))
+            for kwarg in ensure_list(normalize_kwargs):
+                if kwarg in kwargs:
+                    value = kwargs.get(kwarg)
+                    if value:
+                        expression = (
+                            parse_one(value, dialect=input_dialect)
+                            if isinstance(value, str)
+                            else value
+                        )
+                        kwargs[kwarg] = normalize_identifiers(expression, input_dialect).sql(
+                            dialect=input_dialect
+                        )
+            return func(self, **kwargs)
+        wrapper.__wrapped__ = func  # type: ignore
+        return wrapper
+    return decorator
+def func_metadata(unsupported_engines: t.Optional[t.Union[str, t.List[str]]] = None) -> t.Callable:
+    def _metadata(func: t.Callable) -> t.Callable:
+        func.unsupported_engines = ensure_list(unsupported_engines) if unsupported_engines else []  # type: ignore
+        return func
+    return _metadata

{sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/mixins/catalog_mixins.py RENAMED Viewed

@@ -13,7 +13,7 @@ from sqlframe.base.catalog import (
     _BaseCatalog,
 )
 from sqlframe.base.decorators import normalize
-from sqlframe.base.util import decoded_str, schema_, to_schema
+from sqlframe.base.util import schema_, to_schema
 class _BaseInfoSchemaMixin(_BaseCatalog, t.Generic[SESSION, DF]):

{sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/mixins/readwriter_mixins.py RENAMED Viewed

@@ -3,8 +3,6 @@ from __future__ import annotations
 import pathlib
 import typing as t
-import pandas as pd
 from sqlframe.base.exceptions import UnsupportedOperationError
 from sqlframe.base.readerwriter import (
     DF,
@@ -13,7 +11,7 @@ from sqlframe.base.readerwriter import (
     _BaseDataFrameWriter,
     _infer_format,
 )
-from sqlframe.base.util import pandas_to_spark_schema
+from sqlframe.base.util import pandas_to_spark_schema, verify_pandas_installed
 if t.TYPE_CHECKING:
     from sqlframe.base._typing import OptionalPrimitiveType, PathOrPaths
@@ -72,6 +70,9 @@ class PandasLoaderMixin(_BaseDataFrameReader, t.Generic[SESSION, DF]):
         |100|NULL|
         +---+----+
         """
+        verify_pandas_installed()
+        import pandas as pd
         assert path is not None, "path is required"
         assert isinstance(path, str), "path must be a string"
         format = format or _infer_format(path)

{sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/readerwriter.py RENAMED Viewed

@@ -11,6 +11,8 @@ from functools import reduce
 from sqlglot import exp
 from sqlglot.helper import object_to_dict
+from sqlframe.base.decorators import normalize
 if sys.version_info >= (3, 11):
     from typing import Self
 else:
@@ -39,6 +41,7 @@ class _BaseDataFrameReader(t.Generic[SESSION, DF]):
     def session(self) -> SESSION:
         return self._session
+    @normalize("tableName")
     def table(self, tableName: str) -> DF:
         if df := self.session.temp_views.get(tableName):
             return df

{sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/session.py RENAMED Viewed

@@ -24,7 +24,10 @@ from sqlglot.schema import MappingSchema
 from sqlframe.base.catalog import _BaseCatalog
 from sqlframe.base.dataframe import _BaseDataFrame
 from sqlframe.base.readerwriter import _BaseDataFrameReader, _BaseDataFrameWriter
-from sqlframe.base.util import get_column_mapping_from_schema_input
+from sqlframe.base.util import (
+    get_column_mapping_from_schema_input,
+    verify_pandas_installed,
+)
 if sys.version_info >= (3, 11):
     from typing import Self
@@ -412,6 +415,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
         self, expression: exp.Expression, dialect: t.Optional[Dialect] = None
     ) -> exp.Expression:
         dialect = dialect or self.output_dialect
+        normalize_identifiers(expression, dialect=self.input_dialect)
         quote_identifiers_func(expression, dialect=dialect)
         return optimize(expression, dialect=dialect, schema=self.catalog._schema)
@@ -446,14 +450,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
     def _fetch_rows(
         self, sql: t.Union[str, exp.Expression], *, quote_identifiers: bool = True
     ) -> t.List[Row]:
-        from sqlframe.base.types import Row
-        def _dict_to_row(row: t.Dict[str, t.Any]) -> Row:
-            for key, value in row.items():
-                if isinstance(value, dict):
-                    row[key] = _dict_to_row(value)
-            return Row(**row)
         self._execute(sql, quote_identifiers=quote_identifiers)
         result = self._cur.fetchall()
         if not self._cur.description:
@@ -464,6 +460,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
     def _fetchdf(
         self, sql: t.Union[str, exp.Expression], *, quote_identifiers: bool = True
     ) -> pd.DataFrame:
+        verify_pandas_installed()
         from pandas.io.sql import read_sql_query
         return read_sql_query(self._to_sql(sql, quote_identifiers=quote_identifiers), self._conn)

{sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/util.py RENAMED Viewed

@@ -154,7 +154,12 @@ def pandas_to_spark_schema(pandas_df: PandasDataFrame) -> types.StructType:
     """
     from sqlframe.base import types
-    columns = list([x.replace("?column?", "unknown_column") for x in pandas_df.columns])
+    columns = list(
+        [
+            x.replace("?column?", f"unknown_column_{i}").replace("NULL", f"unknown_column_{i}")
+            for i, x in enumerate(pandas_df.columns)
+        ]
+    )
     d_types = list(pandas_df.dtypes)
     p_schema = types.StructType(
         [
@@ -240,3 +245,35 @@ def soundex(s):
     result += "0" * (4 - count)
     return "".join(result)
+def verify_pandas_installed():
+    try:
+        import pandas  # noqa
+    except ImportError:
+        raise ImportError(
+            """Pandas is required for this functionality. `pip install "sqlframe[pandas]"` (also include your engine if needed) to install pandas."""
+        )
+def verify_openai_installed():
+    try:
+        import openai  # noqa
+    except ImportError:
+        raise ImportError(
+            """OpenAI is required for this functionality. `pip install "sqlframe[openai]"` (also include your engine if needed) to install openai."""
+        )
+def quote_preserving_alias_or_name(col: t.Union[exp.Column, exp.Alias]) -> str:
+    from sqlframe.base.session import _BaseSession
+    if isinstance(col, exp.Alias):
+        col = col.args["alias"]
+    if isinstance(col, exp.Column):
+        col = col.copy()
+        col.set("table", None)
+    if isinstance(col, (exp.Identifier, exp.Column)):
+        return col.sql(dialect=_BaseSession().input_dialect)
+    # We may get things like `Null()` expression or maybe literals so we just return the alias or name in those cases
+    return col.alias_or_name

{sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/snowflake/catalog.py RENAMED Viewed

@@ -127,7 +127,9 @@ class SnowflakeCatalog(
         sql = f"SHOW COLUMNS IN TABLE {table.sql(dialect=self.session.input_dialect)}"
         results = self.session._fetch_rows(sql)
         return {
-            row["column_name"]: exp.DataType.build(
+            exp.column(row["column_name"], quoted=True).sql(
+                dialect=self.session.input_dialect
+            ): exp.DataType.build(
                 json.loads(row["data_type"])["type"], dialect=self.session.input_dialect, udt=True
             )
             for row in results

sqlframe 1.2.0__tar.gz → 1.3.0__tar.gz

sqlframe 1.2.0tar.gz → 1.3.0tar.gz