PyPI - sqlframe - Versions diffs - 1.0.0__tar.gz → 1.1.1__tar.gz - Mend

sqlframe 1.0.0tar.gz → 1.1.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (191) hide show

{sqlframe-1.0.0 → sqlframe-1.1.1}/Makefile RENAMED Viewed

@@ -1,5 +1,5 @@
 install-dev:
-	pip install -e ".[dev,duckdb,postgres,redshift,bigquery,snowflake,spark]"
+	pip install -e ".[dev,docs,duckdb,postgres,redshift,bigquery,snowflake,spark]"
 install-pre-commit:
 	pre-commit install

{sqlframe-1.0.0 → sqlframe-1.1.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sqlframe
-Version: 1.0.0
+Version: 1.1.1
 Summary: Taking the Spark out of PySpark by converting to SQL
 Home-page: https://github.com/eakmanrq/sqlframe
 Author: Ryan Eakman

{sqlframe-1.0.0 → sqlframe-1.1.1}/docs/bigquery.md RENAMED Viewed

@@ -43,6 +43,25 @@ Regardless of approach, it is recommended to configure `default_dataset` in the
     session = BigQuerySession(conn=conn, default_dataset="sqlframe.db1")
     ```
+## Imports
+If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.bigquery`.
+In addition, many classes will have a `BigQuery` prefix.
+For example, `BigQueryDataFrame` instead of `DataFrame`.
+```python
+# PySpark import
+# from pyspark.sql import SparkSession
+# from pyspark.sql import functions as F
+# from pyspark.sql.dataframe import DataFrame
+# SQLFrame import
+from sqlframe.bigquery import BigQuerySession
+from sqlframe.bigquery import functions as F
+from sqlframe.bigquery import BigQueryDataFrame
+```
 ## Example Usage
 ```python

{sqlframe-1.0.0 → sqlframe-1.1.1}/docs/duckdb.md RENAMED Viewed

@@ -30,7 +30,23 @@ By default, SQLFrame will create a connection to an in-memory database.
     conn = duckdb.connect(database=":memory:")
     session = DuckDBSession(conn=conn)
     ```
+## Imports
+If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.duckdb`.
+In addition, many classes will have a `DuckDB` prefix.
+For example, `DuckDBDataFrame` instead of `DataFrame`.
+```python
+# PySpark import
+# from pyspark.sql import SparkSession
+# from pyspark.sql import functions as F
+# from pyspark.sql.dataframe import DataFrame
+# SQLFrame import
+from sqlframe.duckdb import DuckDBSession
+from sqlframe.duckdb import functions as F
+from sqlframe.duckdb import DuckDBDataFrame
+```
 ## Example Usage

{sqlframe-1.0.0 → sqlframe-1.1.1}/docs/postgres.md RENAMED Viewed

@@ -25,6 +25,24 @@ conn = connect(
 session = PostgresSession(conn=conn)
 ```
+## Imports
+If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.postgres`.
+In addition, many classes will have a `Postgres` prefix.
+For example, `PostgresDataFrame` instead of `DataFrame`.
+```python
+# PySpark import
+# from pyspark.sql import SparkSession
+# from pyspark.sql import functions as F
+# from pyspark.sql.dataframe import DataFrame
+# SQLFrame import
+from sqlframe.postgres import PostgresSession
+from sqlframe.postgres import functions as F
+from sqlframe.postgres import PostgresDataFrame
+```
 ## Example Usage
 ```python

{sqlframe-1.0.0 → sqlframe-1.1.1}/docs/standalone.md RENAMED Viewed

@@ -24,6 +24,24 @@ from sqlframe.standalone import StandaloneSession
 session = StandaloneSession.builder.config(map={"sqlframe.input.dialect": 'duckdb', "sqlframe.output.dialect": 'bigquery'}).getOrCreate()
 ```
+## Imports
+If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.standalone`.
+In addition, many classes will have a `Standalone` prefix.
+For example, `StandaloneDataFrame` instead of `DataFrame`.
+```python
+# PySpark import
+# from pyspark.sql import SparkSession
+# from pyspark.sql import functions as F
+# from pyspark.sql.dataframe import DataFrame
+# SQLFrame import
+from sqlframe.standalone import StandaloneSession
+from sqlframe.standalone import functions as F
+from sqlframe.standalone import StandaloneDataFrame
+```
 ## Accessing Tables
 PySpark DataFrame API, and currently SQLFrame, requires that a table can be access to get it's schema information.

{sqlframe-1.0.0 → sqlframe-1.1.1}/setup.py RENAMED Viewed

@@ -20,7 +20,7 @@ setup(
     python_requires=">=3.8",
     install_requires=[
         "prettytable<3.11.0",
-        "sqlglot>=23.14.0,<23.18",
+        "sqlglot>=24.0.0,<24.1",
     ],
     extras_require={
         "bigquery": [
@@ -47,7 +47,7 @@ setup(
         ],
         "docs": [
             "mkdocs==1.4.2",
-            "mkdocs-include-markdown-plugin==4.0.3",
+            "mkdocs-include-markdown-plugin==6.0.6",
             "mkdocs-material==9.0.5",
             "mkdocs-material-extensions==1.1.1",
             "pymdown-extensions",

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/_version.py RENAMED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.0.0'
-__version_tuple__ = version_tuple = (1, 0, 0)
+__version__ = version = '1.1.1'
+__version_tuple__ = version_tuple = (1, 1, 1)

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/catalog.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations
@@ -8,7 +8,7 @@ from sqlglot import MappingSchema, exp
 from sqlframe.base.decorators import normalize
 from sqlframe.base.exceptions import TableSchemaError
-from sqlframe.base.util import to_schema
+from sqlframe.base.util import ensure_column_mapping, to_schema
 if t.TYPE_CHECKING:
     from sqlglot.schema import ColumnMapping
@@ -82,6 +82,7 @@ class _BaseCatalog(t.Generic[SESSION, DF]):
                 raise TableSchemaError(
                     "This session does not have access to a catalog that can lookup column information. See docs for explicitly defining columns or using a session that can automatically determine this."
                 )
+        column_mapping = ensure_column_mapping(column_mapping)  # type: ignore
         self._schema.add_table(table, column_mapping, dialect=self.session.input_dialect)
     @normalize(["dbName"])

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/column.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/dataframe.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations
@@ -417,7 +417,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         from sqlframe.base.session import _BaseSession
         value = expression.sql(dialect=_BaseSession().input_dialect).encode("utf-8")
-        hash = f"t{zlib.crc32(value)}"[:6]
+        hash = f"t{zlib.crc32(value)}"[:9]
         return self.session._normalize_string(hash)
     def _get_select_expressions(
@@ -606,8 +606,13 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         return df._convert_leaf_to_cte(sequence_id=new_sequence_id)
     @operation(Operation.WHERE)
-    def where(self, column: t.Union[Column, bool], **kwargs) -> Self:
-        col = self._ensure_and_normalize_col(column)
+    def where(self, column: t.Union[Column, str, bool], **kwargs) -> Self:
+        if isinstance(column, str):
+            col = self._ensure_and_normalize_col(
+                sqlglot.parse_one(column, dialect=self.session.input_dialect)
+            )
+        else:
+            col = self._ensure_and_normalize_col(column)
         return self.copy(expression=self.expression.where(col.expression))
     filter = where
@@ -1094,7 +1099,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         )
         if existing_col_index:
             expression = self.expression.copy()
-            expression.expressions[existing_col_index] = col.expression
+            expression.expressions[existing_col_index] = col.alias(colName).expression
             return self.copy(expression=expression)
         return self.copy().select(col.alias(colName), append=True)

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/functions.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/group.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/normalize.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/operations.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/readerwriter.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/session.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations
@@ -313,24 +313,16 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
         sel_expression = exp.Select(**select_kwargs)
         if empty_df:
             sel_expression = sel_expression.where(exp.false())
-        # if empty_df:
-        #     if not column_mapping:
-        #         # If we don't have rows or columns then we just return a null with a false expression
-        #         sel_expression = (
-        #             exp.Select().select("null").from_("VALUES (NULL)").where(exp.false())
-        #         )
-        #     else:
-        #         # Ensure no results are returned if the dataframe is expected to be empty instead of
-        #         # a row of null values
-        #         sel_expression = sel_expression.where(exp.false())
         return self._create_df(sel_expression)
-    def sql(self, sqlQuery: t.Union[str, exp.Expression]) -> DF:
-        expression = self._optimize(
+    def sql(self, sqlQuery: t.Union[str, exp.Expression], optimize: bool = True) -> DF:
+        expression = (
             sqlglot.parse_one(sqlQuery, read=self.input_dialect)
             if isinstance(sqlQuery, str)
             else sqlQuery
         )
+        if optimize:
+            expression = self._optimize(expression)
         if self.temp_views:
             replacement_mapping = {}
             for table in expression.find_all(exp.Table):

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/transforms.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 import typing as t

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/types.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/util.py RENAMED Viewed

@@ -113,6 +113,8 @@ def ensure_column_mapping(schema: t.Union[str, StructType]) -> t.Dict:
         }
     # TODO: Make a protocol with a `simpleString` attribute as what it looks for instead of the actual
     # `StructType` object.
+    elif hasattr(schema, "simpleString"):
+        return {struct_field.name: struct_field.dataType.simpleString() for struct_field in schema}
     return sqlglot_ensure_column_mapping(schema)  # type: ignore

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/window.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/bigquery/group.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/bigquery/readwriter.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/duckdb/catalog.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/duckdb/group.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/duckdb/readwriter.py RENAMED Viewed

@@ -1,10 +1,13 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations
 import logging
 import typing as t
+from sqlglot import exp
+from sqlglot.helper import ensure_list
 from sqlframe.base.readerwriter import _BaseDataFrameReader, _BaseDataFrameWriter
 from sqlframe.base.util import ensure_column_mapping, to_csv
@@ -69,13 +72,22 @@ class DuckDBDataFrameReader(_BaseDataFrameReader["DuckDBSession", "DuckDBDataFra
         |100|NULL|
         +---+----+
         """
+        if schema:
+            column_mapping = ensure_column_mapping(schema)
+            select_columns = [x.expression for x in self._to_casted_columns(column_mapping)]
+            if format == "csv":
+                duckdb_columns = ", ".join(
+                    [f"'{column}': '{dtype}'" for column, dtype in column_mapping.items()]
+                )
+                options["columns"] = "{" + duckdb_columns + "}"
+        else:
+            select_columns = [exp.Star()]
         if format:
-            sql = f"SELECT * FROM read_{format}('{path}', {to_csv(options)})"
+            paths = ",".join([f"'{path}'" for path in ensure_list(path)])
+            from_clause = f"read_{format}([{paths}], {to_csv(options)})"
         else:
-            sql = f"select * from '{path}'"
-        df = self.session.sql(sql)
-        if schema:
-            df = df.select(*self._to_casted_columns(ensure_column_mapping(schema)))
+            from_clause = f"'{path}'"
+        df = self.session.sql(exp.select(*select_columns).from_(from_clause), optimize=False)
         self.session._last_loaded_file = path  # type: ignore
         return df

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/postgres/catalog.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/postgres/group.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/postgres/readwriter.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/redshift/catalog.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/redshift/group.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/redshift/readwriter.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/snowflake/catalog.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/snowflake/group.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/snowflake/readwriter.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/spark/catalog.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/spark/group.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/spark/readwriter.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/standalone/group.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/standalone/readwriter.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/standalone/session.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
 from __future__ import annotations

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sqlframe
-Version: 1.0.0
+Version: 1.1.1
 Summary: Taking the Spark out of PySpark by converting to SQL
 Home-page: https://github.com/eakmanrq/sqlframe
 Author: Ryan Eakman

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe.egg-info/SOURCES.txt RENAMED Viewed

@@ -145,6 +145,7 @@ tests/types.py
 tests/fixtures/employee.csv
 tests/fixtures/employee.json
 tests/fixtures/employee.parquet
+tests/fixtures/employee_extra_line.csv
 tests/integration/__init__.py
 tests/integration/fixtures.py
 tests/integration/test_int_dataframe.py
@@ -162,6 +163,7 @@ tests/integration/engines/bigquery/test_bigquery_catalog.py
 tests/integration/engines/bigquery/test_bigquery_session.py
 tests/integration/engines/duck/__init__.py
 tests/integration/engines/duck/test_duckdb_catalog.py
+tests/integration/engines/duck/test_duckdb_reader.py
 tests/integration/engines/duck/test_duckdb_session.py
 tests/integration/engines/postgres/__init__.py
 tests/integration/engines/postgres/test_postgres_catalog.py

{sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe.egg-info/requires.txt RENAMED Viewed

@@ -1,5 +1,5 @@
 prettytable<3.11.0
-sqlglot<23.18,>=23.14.0
+sqlglot<24.1,>=24.0.0
 [bigquery]
 google-cloud-bigquery-storage<3,>=2
@@ -28,7 +28,7 @@ pre-commit>=3.5
 pre-commit<3.8,>=3.7
 [docs]
-mkdocs-include-markdown-plugin==4.0.3
+mkdocs-include-markdown-plugin==6.0.6
 mkdocs-material-extensions==1.1.1
 mkdocs-material==9.0.5
 mkdocs==1.4.2

sqlframe-1.1.1/tests/fixtures/employee_extra_line.csv ADDED Viewed

@@ -0,0 +1,7 @@
+some,stats,that,dont,relate,to,data
+employee_id,fname,lname,age,store_id
+1,Jack,Shephard,37,1
+2,John,Locke,65,1
+3,Kate,Austen,37,2
+4,Claire,Littleton,27,2
+5,Hugo,Reyes,29,100

sqlframe-1.1.1/tests/integration/engines/duck/test_duckdb_reader.py ADDED Viewed

@@ -0,0 +1,57 @@
+from sqlframe.base.types import Row
+from sqlframe.duckdb import DuckDBSession
+pytest_plugins = ["tests.common_fixtures"]
+def test_employee_extra_line_csv(duckdb_session: DuckDBSession):
+    df = duckdb_session.read.load(
+        "tests/fixtures/employee_extra_line.csv",
+        format="csv",
+        schema="employee_id INT, fname STRING, lname STRING, age INT, store_id INT",
+        skip=1,
+        header=1,
+        filename=1,
+        null_padding=True,
+        ignore_errors=1,
+        auto_detect=False,
+    )
+    assert df.collect() == [
+        Row(**{"employee_id": 1, "fname": "Jack", "lname": "Shephard", "age": 37, "store_id": 1}),
+        Row(**{"employee_id": 2, "fname": "John", "lname": "Locke", "age": 65, "store_id": 1}),
+        Row(**{"employee_id": 3, "fname": "Kate", "lname": "Austen", "age": 37, "store_id": 2}),
+        Row(
+            **{"employee_id": 4, "fname": "Claire", "lname": "Littleton", "age": 27, "store_id": 2}
+        ),
+        Row(**{"employee_id": 5, "fname": "Hugo", "lname": "Reyes", "age": 29, "store_id": 100}),
+    ]
+def test_employee_extra_line_csv_multiple(duckdb_session: DuckDBSession):
+    df = duckdb_session.read.load(
+        ["tests/fixtures/employee_extra_line.csv", "tests/fixtures/employee_extra_line.csv"],
+        format="csv",
+        schema="employee_id INT, fname STRING, lname STRING, age INT, store_id INT",
+        skip=1,
+        header=1,
+        filename=1,
+        null_padding=True,
+        ignore_errors=1,
+        auto_detect=False,
+    )
+    assert df.collect() == [
+        Row(**{"employee_id": 1, "fname": "Jack", "lname": "Shephard", "age": 37, "store_id": 1}),
+        Row(**{"employee_id": 2, "fname": "John", "lname": "Locke", "age": 65, "store_id": 1}),
+        Row(**{"employee_id": 3, "fname": "Kate", "lname": "Austen", "age": 37, "store_id": 2}),
+        Row(
+            **{"employee_id": 4, "fname": "Claire", "lname": "Littleton", "age": 27, "store_id": 2}
+        ),
+        Row(**{"employee_id": 5, "fname": "Hugo", "lname": "Reyes", "age": 29, "store_id": 100}),
+        Row(**{"employee_id": 1, "fname": "Jack", "lname": "Shephard", "age": 37, "store_id": 1}),
+        Row(**{"employee_id": 2, "fname": "John", "lname": "Locke", "age": 65, "store_id": 1}),
+        Row(**{"employee_id": 3, "fname": "Kate", "lname": "Austen", "age": 37, "store_id": 2}),
+        Row(
+            **{"employee_id": 4, "fname": "Claire", "lname": "Littleton", "age": 27, "store_id": 2}
+        ),
+        Row(**{"employee_id": 5, "fname": "Hugo", "lname": "Reyes", "age": 29, "store_id": 100}),
+    ]

{sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/test_int_dataframe.py RENAMED Viewed

@@ -302,6 +302,17 @@ def test_where_multiple_chained(
     compare_frames(df_employee, dfs_employee)
+def test_where_sql_expr(
+    pyspark_employee: PySparkDataFrame,
+    get_df: t.Callable[[str], _BaseDataFrame],
+    compare_frames: t.Callable,
+):
+    employee = get_df("employee")
+    df_employee = pyspark_employee.where("age = 37 AND fname = 'Jack'")
+    dfs_employee = employee.where("age = 37 AND fname = 'Jack'")
+    compare_frames(df_employee, dfs_employee)
 def test_operators(
     pyspark_employee: PySparkDataFrame,
     get_df: t.Callable[[str], _BaseDataFrame],

sqlframe 1.0.0__tar.gz → 1.1.1__tar.gz

sqlframe 1.0.0tar.gz → 1.1.1tar.gz