sqlite-forge 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ import logging
2
+ from importlib.metadata import PackageNotFoundError, version
3
+ from pathlib import Path
4
+
5
+ MODULE_PATH = Path(__file__).resolve().parent
6
+ REPO_PATH = MODULE_PATH.parent
7
+ DATABASE_PATH = str(REPO_PATH)
8
+
9
+ log = logging.getLogger("sqlite_forge")
10
+ log.addHandler(logging.NullHandler())
11
+
12
+ try:
13
+ __version__ = version("sqlite-forge")
14
+ except PackageNotFoundError:
15
+ __version__ = "0.0.0"
16
+
17
+ from .database import SqliteDatabase
18
+ from .forger import BuildDatabase, sqlite3_process
19
+
20
+ __all__ = [
21
+ "BuildDatabase",
22
+ "SqliteDatabase",
23
+ "sqlite3_process",
24
+ "log",
25
+ "DATABASE_PATH",
26
+ "__version__",
27
+ ]
@@ -0,0 +1,236 @@
1
+ import sqlite3
2
+ from abc import ABC
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ from typing import Dict, List, Literal, Optional, Sequence, Union
6
+
7
+ import pandas as pd
8
+
9
+ from sqlite_forge import log
10
+ from sqlite_forge.forger import BuildDatabase, sqlite3_process
11
+
12
+
13
+ class SqliteDatabase(BuildDatabase, ABC):
14
+ """
15
+ Class for managing SQLite database operations.
16
+ """
17
+
18
+ # Table name for database
19
+ DEFAULT_PATH: Optional[str] = None
20
+
21
+ # Schema dictionary for database
22
+ DEFAULT_SCHEMA: Optional[Dict[str, str]] = None
23
+
24
+ # Optional primary key(s) for database
25
+ PRIMARY_KEY: Optional[Sequence[str]] = None
26
+
27
+ def __init__(self, *args, **kwargs):
28
+ """
29
+ Initialise the SqliteDatabase class.
30
+ """
31
+ super().__init__(*args, **kwargs)
32
+
33
+ @sqlite3_process
34
+ def drop_table(self, cursor: sqlite3.Cursor) -> None:
35
+ """
36
+ Drop the specified table from the database if it exists.
37
+ """
38
+ # Drop the table if it exists and overwrite is True
39
+ drop_query = f"DROP TABLE IF EXISTS {self.db_name};"
40
+ cursor.execute(drop_query)
41
+ log.info(f"Dropped existing table {self.db_name}.")
42
+
43
+ @sqlite3_process
44
+ def create_table(self, cursor: sqlite3.Cursor, overwrite: bool = False) -> None:
45
+ """
46
+ Create a table in the database with composite primary keys.
47
+ """
48
+ # Check if the table already exists
49
+ if self.exists():
50
+ if overwrite:
51
+ # Drop table
52
+ self.drop_table()
53
+ else:
54
+ # Log and return if the table exists and overwrite is False
55
+ log.info(f"Table {self.db_name} already exists and will not be overwritten.")
56
+ return
57
+
58
+ if self.DEFAULT_SCHEMA is None:
59
+ raise ValueError("DEFAULT_SCHEMA must be set before creating a table.")
60
+
61
+ # Define individual columns with their data types
62
+ columns_definitions = [
63
+ f'{column_name} {column_type}' for column_name, column_type in self.DEFAULT_SCHEMA.items()
64
+ ]
65
+
66
+ # Include primary key in the column definitions if specified
67
+ if self.PRIMARY_KEY:
68
+ primary_key_clause = f"PRIMARY KEY ({', '.join(self.PRIMARY_KEY)})"
69
+ columns_definitions.append(primary_key_clause)
70
+
71
+ columns_definitions_str = ', '.join(columns_definitions)
72
+
73
+ # Create the table with "IF NOT EXISTS" for safety
74
+ create_table_query = f'CREATE TABLE IF NOT EXISTS {self.db_name} ({columns_definitions_str})'
75
+
76
+ cursor.execute(create_table_query)
77
+ if self.PRIMARY_KEY:
78
+ log.info(
79
+ "Table %s created successfully with primary key(s): %s.",
80
+ self.db_name,
81
+ ", ".join(self.PRIMARY_KEY),
82
+ )
83
+ else:
84
+ log.info("Table %s created successfully.", self.db_name)
85
+
86
+ @sqlite3_process
87
+ def get_columns(self, cursor: sqlite3.Cursor) -> List[str]:
88
+ """
89
+ Retrieve column names from the database.
90
+ """
91
+ cursor.execute(f"PRAGMA table_info({self.db_name})")
92
+ columns_info = cursor.fetchall()
93
+ return [column_info[1] for column_info in columns_info]
94
+
95
+ @sqlite3_process
96
+ def execute_query(self, cursor: sqlite3.Cursor, query: str) -> pd.DataFrame:
97
+ """
98
+ Execute a query and return results as a DataFrame.
99
+ """
100
+ cursor.execute(query)
101
+ data = cursor.fetchall()
102
+ columns = [description[0] for description in cursor.description] if cursor.description else []
103
+ return pd.DataFrame(data, columns=columns)
104
+
105
+ @staticmethod
106
+ def _validate_headers(headers: List[str], schema: Dict[str, str]) -> None:
107
+ """
108
+ Validate column headers against the schema.
109
+ """
110
+ mismatched_headers = [
111
+ header for header in headers if header.lower() not in map(
112
+ str.lower, schema.keys())]
113
+ if mismatched_headers:
114
+ mismatched = ', '.join(mismatched_headers)
115
+ raise ValueError(
116
+ f"Following column(s) in imported file do not match the DEFAULT_SCHEMA: {mismatched}")
117
+
118
+ @sqlite3_process
119
+ def ingest_dataframe(
120
+ self,
121
+ cursor: sqlite3.Cursor,
122
+ df: pd.DataFrame,
123
+ load_date: bool = False,
124
+ overwrite: bool = False,
125
+ ) -> None:
126
+ """
127
+ Ingest a pandas dataframe into the database.
128
+
129
+ If load_date is True, adds a LOAD_DATE column with the current datetime.
130
+ If overwrite is True, updates existing records based on PRIMARY_KEY.
131
+ """
132
+
133
+ if load_date:
134
+ df = df.copy()
135
+ df['LOAD_DATE'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
136
+
137
+ if self.DEFAULT_SCHEMA is None:
138
+ raise ValueError("DEFAULT_SCHEMA must be set before ingesting a dataframe.")
139
+
140
+ headers = df.columns.tolist()
141
+ self._validate_headers(headers, self.DEFAULT_SCHEMA)
142
+
143
+ if self.PRIMARY_KEY:
144
+ # Pre-build the WHERE clause for existence check and update
145
+ where_clause = " AND ".join([f"{key} = ?" for key in self.PRIMARY_KEY])
146
+
147
+ insert_count = 0
148
+ for _, row in df.iterrows():
149
+
150
+ if self.PRIMARY_KEY:
151
+ where_values = tuple(row[key] for key in self.PRIMARY_KEY)
152
+
153
+ # Check if the record already exists
154
+ cursor.execute(f"SELECT COUNT(*) FROM {self.db_name} WHERE {where_clause}", where_values)
155
+ exists = cursor.fetchone()[0]
156
+
157
+ if exists and overwrite:
158
+ # Record exists, update it
159
+ update_clause = ", ".join([f"{header} = ?" for header in headers if header not in self.PRIMARY_KEY])
160
+ update_values = tuple(row[header] for header in headers if header not in self.PRIMARY_KEY)
161
+ update_query = f"""
162
+ UPDATE {self.db_name}
163
+ SET {update_clause}
164
+ WHERE {where_clause}"""
165
+ cursor.execute(update_query, update_values + where_values)
166
+ insert_count += 1
167
+ elif not exists:
168
+ # Record does not exist, insert it
169
+ insert_query = f"""
170
+ INSERT INTO {self.db_name} ({', '.join(headers)})
171
+ VALUES ({', '.join(['?' for _ in range(len(headers))])})"""
172
+ cursor.execute(insert_query, tuple(row[header] for header in headers))
173
+ insert_count += 1
174
+ else:
175
+ # No primary key provided, insert directly
176
+ insert_query = f"""
177
+ INSERT INTO {self.db_name} ({', '.join(headers)})
178
+ VALUES ({', '.join(['?' for _ in range(len(headers))])})"""
179
+ cursor.execute(insert_query, tuple(row[header] for header in headers))
180
+ insert_count += 1
181
+
182
+ log.info("%s rows written to %s; table now has %s rows.", insert_count, self.db_name, self.table_length)
183
+
184
+ @property
185
+ @sqlite3_process
186
+ def table_length(self, cursor: sqlite3.Cursor) -> int:
187
+ """
188
+ Return the number of rows in the table.
189
+ """
190
+ cursor.execute(f"SELECT COUNT(*) FROM {self.db_name}")
191
+ length = cursor.fetchone()[0]
192
+ return length
193
+
194
+ def fetch_table(self, limit: Optional[int] = None) -> pd.DataFrame:
195
+ """
196
+ Return rows from the managed table as a DataFrame.
197
+ """
198
+ query = f"SELECT * FROM {self.db_name}"
199
+ if limit is not None:
200
+ query += f" LIMIT {int(limit)}"
201
+ return self.execute_query(query)
202
+
203
+ def export_table(
204
+ self,
205
+ output_path: Union[str, Path],
206
+ format: Literal["csv", "json", "parquet"] = "csv",
207
+ limit: Optional[int] = None,
208
+ ) -> Path:
209
+ """
210
+ Export table rows to a file.
211
+
212
+ Args:
213
+ output_path: Destination file path.
214
+ format: Export format ("csv", "json", or "parquet").
215
+ limit: Optional row limit before exporting.
216
+ """
217
+ df = self.fetch_table(limit=limit)
218
+ path = Path(output_path).expanduser()
219
+ path.parent.mkdir(parents=True, exist_ok=True)
220
+
221
+ if format == "csv":
222
+ df.to_csv(path, index=False)
223
+ elif format == "json":
224
+ df.to_json(path, orient="records", indent=2)
225
+ elif format == "parquet":
226
+ try:
227
+ df.to_parquet(path, index=False)
228
+ except ImportError as exc:
229
+ raise ImportError(
230
+ "Parquet export requires an engine such as 'pyarrow' or 'fastparquet'."
231
+ ) from exc
232
+ else:
233
+ raise ValueError("format must be one of: csv, json, parquet")
234
+
235
+ log.info("Exported %s rows from %s to %s", len(df), self.db_name, path)
236
+ return path
sqlite_forge/forger.py ADDED
@@ -0,0 +1,83 @@
1
+ import logging
2
+ import sqlite3
3
+ from abc import ABC
4
+ from functools import wraps
5
+ from pathlib import Path
6
+ from typing import Callable, Dict, Optional, TypeVar, Union
7
+
8
+ log = logging.getLogger(__name__)
9
+
10
+ T = TypeVar("T")
11
+ DatabasePath = Union[str, Path]
12
+
13
+
14
+ def sqlite3_process(func: Callable[..., T]) -> Callable[..., T]:
15
+ """
16
+ Decorator to manage SQLite database connection.
17
+ """
18
+ @wraps(func)
19
+ def func_wrapper(self, *args, **kwargs) -> T:
20
+ database_dir = Path(self.database_path)
21
+ database_dir.mkdir(parents=True, exist_ok=True)
22
+ db_file = database_dir / f"{self.db_name}.db"
23
+
24
+ conn = sqlite3.connect(str(db_file))
25
+ try:
26
+ cursor = conn.cursor()
27
+ result = func(self, cursor, *args, **kwargs)
28
+ except Exception:
29
+ if conn.in_transaction:
30
+ conn.rollback()
31
+ raise
32
+ else:
33
+ conn.commit()
34
+ return result
35
+ finally:
36
+ conn.close()
37
+
38
+ return func_wrapper
39
+
40
+
41
+ class BuildDatabase(ABC):
42
+ """
43
+ Abstract base class for building a SQLite database.
44
+ """
45
+
46
+ DEFAULT_PATH: Optional[str] = None
47
+ DEFAULT_SCHEMA: Optional[Dict[str, str]] = None
48
+
49
+ def __init__(self, database_path: DatabasePath, database_name: Optional[str] = None) -> None:
50
+ """
51
+ Initialize the BuildDatabase class.
52
+ """
53
+ if not self.DEFAULT_PATH or not self.DEFAULT_SCHEMA:
54
+ raise ValueError("Both DEFAULT_PATH and DEFAULT_SCHEMA must be implemented in the inheriting child class!")
55
+ self.db_name = database_name or self.DEFAULT_PATH
56
+ self.database_path = Path(database_path).expanduser()
57
+
58
+ @property
59
+ def database(self) -> str:
60
+ """
61
+ Get the full path of the database file.
62
+ """
63
+ db_path = self.database_path / f"{self.db_name}.db"
64
+ if not db_path.exists():
65
+ raise FileNotFoundError(
66
+ f"Database file '{db_path}' does not exist, please create first!")
67
+ return str(db_path)
68
+
69
+ @property
70
+ def conn(self) -> sqlite3.Connection:
71
+ """
72
+ Establish a connection to the SQLite database.
73
+ """
74
+ return sqlite3.connect(self.database)
75
+
76
+ @sqlite3_process
77
+ def exists(self, cursor: sqlite3.Cursor) -> bool:
78
+ """
79
+ Check if a specified table exists in the database.
80
+ """
81
+ query = f"SELECT name FROM sqlite_master WHERE type='table' AND name='{self.db_name}';"
82
+ cursor.execute(query)
83
+ return cursor.fetchone() is not None
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Tom Freeman
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,122 @@
1
+ Metadata-Version: 2.1
2
+ Name: sqlite-forge
3
+ Version: 1.1.0
4
+ Summary: Common tool for forging and maintaining sqlite databases
5
+ Home-page: https://github.com/Tom3man/sqlite-forge
6
+ License: MIT
7
+ Keywords: sqlite,database,etl,data-ingestion
8
+ Author: Tom
9
+ Author-email: tomrfreeman3@gmail.com
10
+ Requires-Python: >=3.10,<4.0
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Database
19
+ Requires-Dist: pandas (>=2.2.2,<3.0.0)
20
+ Project-URL: Documentation, https://tom3man.github.io/sqlite-forge/
21
+ Project-URL: Issues, https://github.com/Tom3man/sqlite-forge/issues
22
+ Project-URL: Repository, https://github.com/Tom3man/sqlite-forge
23
+ Description-Content-Type: text/markdown
24
+
25
+ # SQLite Forge
26
+
27
+ SQLite Forge is a lightweight toolkit that helps you declare and maintain SQLite tables from Python. Define your schema once, then manage tables, run queries, ingest pandas `DataFrame` objects, and export results.
28
+
29
+ ## Highlights
30
+
31
+ - Declarative table definitions with schemas and optional multi-column primary keys
32
+ - Safe helpers to create/drop tables and check existence
33
+ - DataFrame ingestion with optional incremental overwrite support
34
+ - Query execution that returns pandas `DataFrame` objects
35
+ - Table export helpers for `csv`, `json`, and `parquet`
36
+
37
+ ## Installation
38
+
39
+ ```bash
40
+ pip install sqlite-forge
41
+ ```
42
+
43
+ For development:
44
+
45
+ ```bash
46
+ git clone https://github.com/Tom3man/sqlite-forge.git
47
+ cd sqlite-forge
48
+ poetry install --with dev --with docs
49
+ ```
50
+
51
+ ## Quick Start
52
+
53
+ ```python
54
+ from pathlib import Path
55
+
56
+ import pandas as pd
57
+
58
+ from sqlite_forge import SqliteDatabase
59
+
60
+
61
+ class ExampleTable(SqliteDatabase):
62
+ DEFAULT_PATH = "example_table"
63
+ PRIMARY_KEY = ("id",)
64
+ DEFAULT_SCHEMA = {
65
+ "id": "INTEGER",
66
+ "name": "TEXT",
67
+ "score": "REAL",
68
+ }
69
+
70
+
71
+ db = ExampleTable(database_path=Path("./data"))
72
+ db.create_table(overwrite=True)
73
+
74
+ db.ingest_dataframe(
75
+ pd.DataFrame(
76
+ [
77
+ {"id": 1, "name": "Alice", "score": 9.2},
78
+ {"id": 2, "name": "Bob", "score": 8.7},
79
+ ]
80
+ )
81
+ )
82
+
83
+ print(db.fetch_table())
84
+ db.export_table("./data/example_table.csv", format="csv")
85
+ ```
86
+
87
+ ## Development
88
+
89
+ ```bash
90
+ poetry run pytest
91
+ poetry run ruff check .
92
+ poetry run mypy
93
+ poetry build
94
+ ```
95
+
96
+ ## Documentation
97
+
98
+ - Docs site: https://tom3man.github.io/sqlite-forge/
99
+ - Build locally:
100
+
101
+ ```bash
102
+ poetry run mkdocs serve
103
+ ```
104
+
105
+ ## Release
106
+
107
+ 1. Bump version in `pyproject.toml`.
108
+ 2. Update `CHANGELOG.md`.
109
+ 3. Publish:
110
+
111
+ ```bash
112
+ poetry publish --build
113
+ ```
114
+
115
+ ## Changelog
116
+
117
+ See [CHANGELOG.md](CHANGELOG.md).
118
+
119
+ ## Licence
120
+
121
+ MIT. See [LICENSE](LICENSE).
122
+
@@ -0,0 +1,7 @@
1
+ sqlite_forge/__init__.py,sha256=Ihav6d3y7NgHKwlbUn4mgr4TALSxvL3iTX5uBW7LEQE,613
2
+ sqlite_forge/database.py,sha256=Oy3BK7NAJbbFAZu89JSkiebaXJYT9O0SVnNGLiSDI_Y,8852
3
+ sqlite_forge/forger.py,sha256=abq2Ggk_WiklWj7pPMqatfJ_x4f5bhxvSrBU1-2tANc,2535
4
+ sqlite_forge-1.1.0.dist-info/LICENSE,sha256=QfyckGoT3O2h2NMFYkVvoQ9wznKmpnmpI-YA-Xnqd1s,1068
5
+ sqlite_forge-1.1.0.dist-info/METADATA,sha256=AkFmF0cAUxwbKDFqBBqTwNgn7a1rEa94XG0ll2L-umU,2821
6
+ sqlite_forge-1.1.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
7
+ sqlite_forge-1.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 1.9.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any