sqlite-forge 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Tom Freeman
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: sqlite-forge
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: Common tool for forging and maintaining sqlite databases
|
|
5
|
+
Home-page: https://github.com/Tom3man/sqlite-forge
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: sqlite,database,etl,data-ingestion
|
|
8
|
+
Author: Tom
|
|
9
|
+
Author-email: tomrfreeman3@gmail.com
|
|
10
|
+
Requires-Python: >=3.10,<4.0
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Database
|
|
19
|
+
Requires-Dist: pandas (>=2.2.2,<3.0.0)
|
|
20
|
+
Project-URL: Documentation, https://tom3man.github.io/sqlite-forge/
|
|
21
|
+
Project-URL: Issues, https://github.com/Tom3man/sqlite-forge/issues
|
|
22
|
+
Project-URL: Repository, https://github.com/Tom3man/sqlite-forge
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# SQLite Forge
|
|
26
|
+
|
|
27
|
+
SQLite Forge is a lightweight toolkit that helps you declare and maintain SQLite tables from Python. Define your schema once, then manage tables, run queries, ingest pandas `DataFrame` objects, and export results.
|
|
28
|
+
|
|
29
|
+
## Highlights
|
|
30
|
+
|
|
31
|
+
- Declarative table definitions with schemas and optional multi-column primary keys
|
|
32
|
+
- Safe helpers to create/drop tables and check existence
|
|
33
|
+
- DataFrame ingestion with optional incremental overwrite support
|
|
34
|
+
- Query execution that returns pandas `DataFrame` objects
|
|
35
|
+
- Table export helpers for `csv`, `json`, and `parquet`
|
|
36
|
+
|
|
37
|
+
## Installation
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install sqlite-forge
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
For development:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
git clone https://github.com/Tom3man/sqlite-forge.git
|
|
47
|
+
cd sqlite-forge
|
|
48
|
+
poetry install --with dev --with docs
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Quick Start
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from pathlib import Path
|
|
55
|
+
|
|
56
|
+
import pandas as pd
|
|
57
|
+
|
|
58
|
+
from sqlite_forge import SqliteDatabase
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class ExampleTable(SqliteDatabase):
|
|
62
|
+
DEFAULT_PATH = "example_table"
|
|
63
|
+
PRIMARY_KEY = ("id",)
|
|
64
|
+
DEFAULT_SCHEMA = {
|
|
65
|
+
"id": "INTEGER",
|
|
66
|
+
"name": "TEXT",
|
|
67
|
+
"score": "REAL",
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
db = ExampleTable(database_path=Path("./data"))
|
|
72
|
+
db.create_table(overwrite=True)
|
|
73
|
+
|
|
74
|
+
db.ingest_dataframe(
|
|
75
|
+
pd.DataFrame(
|
|
76
|
+
[
|
|
77
|
+
{"id": 1, "name": "Alice", "score": 9.2},
|
|
78
|
+
{"id": 2, "name": "Bob", "score": 8.7},
|
|
79
|
+
]
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
print(db.fetch_table())
|
|
84
|
+
db.export_table("./data/example_table.csv", format="csv")
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Development
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
poetry run pytest
|
|
91
|
+
poetry run ruff check .
|
|
92
|
+
poetry run mypy
|
|
93
|
+
poetry build
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Documentation
|
|
97
|
+
|
|
98
|
+
- Docs site: https://tom3man.github.io/sqlite-forge/
|
|
99
|
+
- Build locally:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
poetry run mkdocs serve
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Release
|
|
106
|
+
|
|
107
|
+
1. Bump version in `pyproject.toml`.
|
|
108
|
+
2. Update `CHANGELOG.md`.
|
|
109
|
+
3. Publish:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
poetry publish --build
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Changelog
|
|
116
|
+
|
|
117
|
+
See [CHANGELOG.md](CHANGELOG.md).
|
|
118
|
+
|
|
119
|
+
## Licence
|
|
120
|
+
|
|
121
|
+
MIT. See [LICENSE](LICENSE).
|
|
122
|
+
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# SQLite Forge
|
|
2
|
+
|
|
3
|
+
SQLite Forge is a lightweight toolkit that helps you declare and maintain SQLite tables from Python. Define your schema once, then manage tables, run queries, ingest pandas `DataFrame` objects, and export results.
|
|
4
|
+
|
|
5
|
+
## Highlights
|
|
6
|
+
|
|
7
|
+
- Declarative table definitions with schemas and optional multi-column primary keys
|
|
8
|
+
- Safe helpers to create/drop tables and check existence
|
|
9
|
+
- DataFrame ingestion with optional incremental overwrite support
|
|
10
|
+
- Query execution that returns pandas `DataFrame` objects
|
|
11
|
+
- Table export helpers for `csv`, `json`, and `parquet`
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install sqlite-forge
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
For development:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
git clone https://github.com/Tom3man/sqlite-forge.git
|
|
23
|
+
cd sqlite-forge
|
|
24
|
+
poetry install --with dev --with docs
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
|
|
32
|
+
import pandas as pd
|
|
33
|
+
|
|
34
|
+
from sqlite_forge import SqliteDatabase
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ExampleTable(SqliteDatabase):
|
|
38
|
+
DEFAULT_PATH = "example_table"
|
|
39
|
+
PRIMARY_KEY = ("id",)
|
|
40
|
+
DEFAULT_SCHEMA = {
|
|
41
|
+
"id": "INTEGER",
|
|
42
|
+
"name": "TEXT",
|
|
43
|
+
"score": "REAL",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
db = ExampleTable(database_path=Path("./data"))
|
|
48
|
+
db.create_table(overwrite=True)
|
|
49
|
+
|
|
50
|
+
db.ingest_dataframe(
|
|
51
|
+
pd.DataFrame(
|
|
52
|
+
[
|
|
53
|
+
{"id": 1, "name": "Alice", "score": 9.2},
|
|
54
|
+
{"id": 2, "name": "Bob", "score": 8.7},
|
|
55
|
+
]
|
|
56
|
+
)
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
print(db.fetch_table())
|
|
60
|
+
db.export_table("./data/example_table.csv", format="csv")
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Development
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
poetry run pytest
|
|
67
|
+
poetry run ruff check .
|
|
68
|
+
poetry run mypy
|
|
69
|
+
poetry build
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Documentation
|
|
73
|
+
|
|
74
|
+
- Docs site: https://tom3man.github.io/sqlite-forge/
|
|
75
|
+
- Build locally:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
poetry run mkdocs serve
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Release
|
|
82
|
+
|
|
83
|
+
1. Bump version in `pyproject.toml`.
|
|
84
|
+
2. Update `CHANGELOG.md`.
|
|
85
|
+
3. Publish:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
poetry publish --build
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Changelog
|
|
92
|
+
|
|
93
|
+
See [CHANGELOG.md](CHANGELOG.md).
|
|
94
|
+
|
|
95
|
+
## Licence
|
|
96
|
+
|
|
97
|
+
MIT. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "sqlite-forge"
|
|
3
|
+
version = "1.1.0"
|
|
4
|
+
description = "Common tool for forging and maintaining sqlite databases"
|
|
5
|
+
authors = ["Tom <tomrfreeman3@gmail.com>"]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
homepage = "https://github.com/Tom3man/sqlite-forge"
|
|
9
|
+
repository = "https://github.com/Tom3man/sqlite-forge"
|
|
10
|
+
documentation = "https://tom3man.github.io/sqlite-forge/"
|
|
11
|
+
packages = [{include = "sqlite_forge"}]
|
|
12
|
+
keywords = ["sqlite", "database", "etl", "data-ingestion"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 4 - Beta",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Topic :: Database",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[tool.poetry.urls]
|
|
25
|
+
Issues = "https://github.com/Tom3man/sqlite-forge/issues"
|
|
26
|
+
|
|
27
|
+
[tool.poetry.dependencies]
|
|
28
|
+
python = ">=3.10,<4.0"
|
|
29
|
+
pandas = "^2.2.2"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
[tool.poetry.group.dev.dependencies]
|
|
33
|
+
ipykernel = "^6.28.0"
|
|
34
|
+
pytest = "^7.4.0"
|
|
35
|
+
pytest-cov = "^6.0.0"
|
|
36
|
+
ruff = "^0.9.7"
|
|
37
|
+
mypy = "^1.15.0"
|
|
38
|
+
|
|
39
|
+
[tool.poetry.group.docs.dependencies]
|
|
40
|
+
mkdocs = "^1.6.1"
|
|
41
|
+
mkdocs-material = "^9.6.20"
|
|
42
|
+
|
|
43
|
+
[build-system]
|
|
44
|
+
requires = ["poetry-core"]
|
|
45
|
+
build-backend = "poetry.core.masonry.api"
|
|
46
|
+
|
|
47
|
+
[tool.pytest.ini_options]
|
|
48
|
+
addopts = "-q --cov=sqlite_forge --cov-report=term-missing"
|
|
49
|
+
testpaths = ["tests"]
|
|
50
|
+
|
|
51
|
+
[tool.ruff]
|
|
52
|
+
line-length = 120
|
|
53
|
+
target-version = "py310"
|
|
54
|
+
|
|
55
|
+
[tool.ruff.lint]
|
|
56
|
+
select = ["E", "F", "I"]
|
|
57
|
+
ignore = ["E402"]
|
|
58
|
+
|
|
59
|
+
[tool.mypy]
|
|
60
|
+
python_version = "3.10"
|
|
61
|
+
warn_unused_configs = true
|
|
62
|
+
ignore_missing_imports = true
|
|
63
|
+
files = ["sqlite_forge"]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
MODULE_PATH = Path(__file__).resolve().parent
|
|
6
|
+
REPO_PATH = MODULE_PATH.parent
|
|
7
|
+
DATABASE_PATH = str(REPO_PATH)
|
|
8
|
+
|
|
9
|
+
log = logging.getLogger("sqlite_forge")
|
|
10
|
+
log.addHandler(logging.NullHandler())
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
__version__ = version("sqlite-forge")
|
|
14
|
+
except PackageNotFoundError:
|
|
15
|
+
__version__ = "0.0.0"
|
|
16
|
+
|
|
17
|
+
from .database import SqliteDatabase
|
|
18
|
+
from .forger import BuildDatabase, sqlite3_process
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"BuildDatabase",
|
|
22
|
+
"SqliteDatabase",
|
|
23
|
+
"sqlite3_process",
|
|
24
|
+
"log",
|
|
25
|
+
"DATABASE_PATH",
|
|
26
|
+
"__version__",
|
|
27
|
+
]
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import sqlite3
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, List, Literal, Optional, Sequence, Union
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from sqlite_forge import log
|
|
10
|
+
from sqlite_forge.forger import BuildDatabase, sqlite3_process
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SqliteDatabase(BuildDatabase, ABC):
|
|
14
|
+
"""
|
|
15
|
+
Class for managing SQLite database operations.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
# Table name for database
|
|
19
|
+
DEFAULT_PATH: Optional[str] = None
|
|
20
|
+
|
|
21
|
+
# Schema dictionary for database
|
|
22
|
+
DEFAULT_SCHEMA: Optional[Dict[str, str]] = None
|
|
23
|
+
|
|
24
|
+
# Optional primary key(s) for database
|
|
25
|
+
PRIMARY_KEY: Optional[Sequence[str]] = None
|
|
26
|
+
|
|
27
|
+
def __init__(self, *args, **kwargs):
|
|
28
|
+
"""
|
|
29
|
+
Initialise the SqliteDatabase class.
|
|
30
|
+
"""
|
|
31
|
+
super().__init__(*args, **kwargs)
|
|
32
|
+
|
|
33
|
+
@sqlite3_process
|
|
34
|
+
def drop_table(self, cursor: sqlite3.Cursor) -> None:
|
|
35
|
+
"""
|
|
36
|
+
Drop the specified table from the database if it exists.
|
|
37
|
+
"""
|
|
38
|
+
# Drop the table if it exists and overwrite is True
|
|
39
|
+
drop_query = f"DROP TABLE IF EXISTS {self.db_name};"
|
|
40
|
+
cursor.execute(drop_query)
|
|
41
|
+
log.info(f"Dropped existing table {self.db_name}.")
|
|
42
|
+
|
|
43
|
+
@sqlite3_process
|
|
44
|
+
def create_table(self, cursor: sqlite3.Cursor, overwrite: bool = False) -> None:
|
|
45
|
+
"""
|
|
46
|
+
Create a table in the database with composite primary keys.
|
|
47
|
+
"""
|
|
48
|
+
# Check if the table already exists
|
|
49
|
+
if self.exists():
|
|
50
|
+
if overwrite:
|
|
51
|
+
# Drop table
|
|
52
|
+
self.drop_table()
|
|
53
|
+
else:
|
|
54
|
+
# Log and return if the table exists and overwrite is False
|
|
55
|
+
log.info(f"Table {self.db_name} already exists and will not be overwritten.")
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
if self.DEFAULT_SCHEMA is None:
|
|
59
|
+
raise ValueError("DEFAULT_SCHEMA must be set before creating a table.")
|
|
60
|
+
|
|
61
|
+
# Define individual columns with their data types
|
|
62
|
+
columns_definitions = [
|
|
63
|
+
f'{column_name} {column_type}' for column_name, column_type in self.DEFAULT_SCHEMA.items()
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
# Include primary key in the column definitions if specified
|
|
67
|
+
if self.PRIMARY_KEY:
|
|
68
|
+
primary_key_clause = f"PRIMARY KEY ({', '.join(self.PRIMARY_KEY)})"
|
|
69
|
+
columns_definitions.append(primary_key_clause)
|
|
70
|
+
|
|
71
|
+
columns_definitions_str = ', '.join(columns_definitions)
|
|
72
|
+
|
|
73
|
+
# Create the table with "IF NOT EXISTS" for safety
|
|
74
|
+
create_table_query = f'CREATE TABLE IF NOT EXISTS {self.db_name} ({columns_definitions_str})'
|
|
75
|
+
|
|
76
|
+
cursor.execute(create_table_query)
|
|
77
|
+
if self.PRIMARY_KEY:
|
|
78
|
+
log.info(
|
|
79
|
+
"Table %s created successfully with primary key(s): %s.",
|
|
80
|
+
self.db_name,
|
|
81
|
+
", ".join(self.PRIMARY_KEY),
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
log.info("Table %s created successfully.", self.db_name)
|
|
85
|
+
|
|
86
|
+
@sqlite3_process
|
|
87
|
+
def get_columns(self, cursor: sqlite3.Cursor) -> List[str]:
|
|
88
|
+
"""
|
|
89
|
+
Retrieve column names from the database.
|
|
90
|
+
"""
|
|
91
|
+
cursor.execute(f"PRAGMA table_info({self.db_name})")
|
|
92
|
+
columns_info = cursor.fetchall()
|
|
93
|
+
return [column_info[1] for column_info in columns_info]
|
|
94
|
+
|
|
95
|
+
@sqlite3_process
|
|
96
|
+
def execute_query(self, cursor: sqlite3.Cursor, query: str) -> pd.DataFrame:
|
|
97
|
+
"""
|
|
98
|
+
Execute a query and return results as a DataFrame.
|
|
99
|
+
"""
|
|
100
|
+
cursor.execute(query)
|
|
101
|
+
data = cursor.fetchall()
|
|
102
|
+
columns = [description[0] for description in cursor.description] if cursor.description else []
|
|
103
|
+
return pd.DataFrame(data, columns=columns)
|
|
104
|
+
|
|
105
|
+
@staticmethod
|
|
106
|
+
def _validate_headers(headers: List[str], schema: Dict[str, str]) -> None:
|
|
107
|
+
"""
|
|
108
|
+
Validate column headers against the schema.
|
|
109
|
+
"""
|
|
110
|
+
mismatched_headers = [
|
|
111
|
+
header for header in headers if header.lower() not in map(
|
|
112
|
+
str.lower, schema.keys())]
|
|
113
|
+
if mismatched_headers:
|
|
114
|
+
mismatched = ', '.join(mismatched_headers)
|
|
115
|
+
raise ValueError(
|
|
116
|
+
f"Following column(s) in imported file do not match the DEFAULT_SCHEMA: {mismatched}")
|
|
117
|
+
|
|
118
|
+
@sqlite3_process
|
|
119
|
+
def ingest_dataframe(
|
|
120
|
+
self,
|
|
121
|
+
cursor: sqlite3.Cursor,
|
|
122
|
+
df: pd.DataFrame,
|
|
123
|
+
load_date: bool = False,
|
|
124
|
+
overwrite: bool = False,
|
|
125
|
+
) -> None:
|
|
126
|
+
"""
|
|
127
|
+
Ingest a pandas dataframe into the database.
|
|
128
|
+
|
|
129
|
+
If load_date is True, adds a LOAD_DATE column with the current datetime.
|
|
130
|
+
If overwrite is True, updates existing records based on PRIMARY_KEY.
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
if load_date:
|
|
134
|
+
df = df.copy()
|
|
135
|
+
df['LOAD_DATE'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
136
|
+
|
|
137
|
+
if self.DEFAULT_SCHEMA is None:
|
|
138
|
+
raise ValueError("DEFAULT_SCHEMA must be set before ingesting a dataframe.")
|
|
139
|
+
|
|
140
|
+
headers = df.columns.tolist()
|
|
141
|
+
self._validate_headers(headers, self.DEFAULT_SCHEMA)
|
|
142
|
+
|
|
143
|
+
if self.PRIMARY_KEY:
|
|
144
|
+
# Pre-build the WHERE clause for existence check and update
|
|
145
|
+
where_clause = " AND ".join([f"{key} = ?" for key in self.PRIMARY_KEY])
|
|
146
|
+
|
|
147
|
+
insert_count = 0
|
|
148
|
+
for _, row in df.iterrows():
|
|
149
|
+
|
|
150
|
+
if self.PRIMARY_KEY:
|
|
151
|
+
where_values = tuple(row[key] for key in self.PRIMARY_KEY)
|
|
152
|
+
|
|
153
|
+
# Check if the record already exists
|
|
154
|
+
cursor.execute(f"SELECT COUNT(*) FROM {self.db_name} WHERE {where_clause}", where_values)
|
|
155
|
+
exists = cursor.fetchone()[0]
|
|
156
|
+
|
|
157
|
+
if exists and overwrite:
|
|
158
|
+
# Record exists, update it
|
|
159
|
+
update_clause = ", ".join([f"{header} = ?" for header in headers if header not in self.PRIMARY_KEY])
|
|
160
|
+
update_values = tuple(row[header] for header in headers if header not in self.PRIMARY_KEY)
|
|
161
|
+
update_query = f"""
|
|
162
|
+
UPDATE {self.db_name}
|
|
163
|
+
SET {update_clause}
|
|
164
|
+
WHERE {where_clause}"""
|
|
165
|
+
cursor.execute(update_query, update_values + where_values)
|
|
166
|
+
insert_count += 1
|
|
167
|
+
elif not exists:
|
|
168
|
+
# Record does not exist, insert it
|
|
169
|
+
insert_query = f"""
|
|
170
|
+
INSERT INTO {self.db_name} ({', '.join(headers)})
|
|
171
|
+
VALUES ({', '.join(['?' for _ in range(len(headers))])})"""
|
|
172
|
+
cursor.execute(insert_query, tuple(row[header] for header in headers))
|
|
173
|
+
insert_count += 1
|
|
174
|
+
else:
|
|
175
|
+
# No primary key provided, insert directly
|
|
176
|
+
insert_query = f"""
|
|
177
|
+
INSERT INTO {self.db_name} ({', '.join(headers)})
|
|
178
|
+
VALUES ({', '.join(['?' for _ in range(len(headers))])})"""
|
|
179
|
+
cursor.execute(insert_query, tuple(row[header] for header in headers))
|
|
180
|
+
insert_count += 1
|
|
181
|
+
|
|
182
|
+
log.info("%s rows written to %s; table now has %s rows.", insert_count, self.db_name, self.table_length)
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
@sqlite3_process
|
|
186
|
+
def table_length(self, cursor: sqlite3.Cursor) -> int:
|
|
187
|
+
"""
|
|
188
|
+
Return the number of rows in the table.
|
|
189
|
+
"""
|
|
190
|
+
cursor.execute(f"SELECT COUNT(*) FROM {self.db_name}")
|
|
191
|
+
length = cursor.fetchone()[0]
|
|
192
|
+
return length
|
|
193
|
+
|
|
194
|
+
def fetch_table(self, limit: Optional[int] = None) -> pd.DataFrame:
|
|
195
|
+
"""
|
|
196
|
+
Return rows from the managed table as a DataFrame.
|
|
197
|
+
"""
|
|
198
|
+
query = f"SELECT * FROM {self.db_name}"
|
|
199
|
+
if limit is not None:
|
|
200
|
+
query += f" LIMIT {int(limit)}"
|
|
201
|
+
return self.execute_query(query)
|
|
202
|
+
|
|
203
|
+
def export_table(
|
|
204
|
+
self,
|
|
205
|
+
output_path: Union[str, Path],
|
|
206
|
+
format: Literal["csv", "json", "parquet"] = "csv",
|
|
207
|
+
limit: Optional[int] = None,
|
|
208
|
+
) -> Path:
|
|
209
|
+
"""
|
|
210
|
+
Export table rows to a file.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
output_path: Destination file path.
|
|
214
|
+
format: Export format ("csv", "json", or "parquet").
|
|
215
|
+
limit: Optional row limit before exporting.
|
|
216
|
+
"""
|
|
217
|
+
df = self.fetch_table(limit=limit)
|
|
218
|
+
path = Path(output_path).expanduser()
|
|
219
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
220
|
+
|
|
221
|
+
if format == "csv":
|
|
222
|
+
df.to_csv(path, index=False)
|
|
223
|
+
elif format == "json":
|
|
224
|
+
df.to_json(path, orient="records", indent=2)
|
|
225
|
+
elif format == "parquet":
|
|
226
|
+
try:
|
|
227
|
+
df.to_parquet(path, index=False)
|
|
228
|
+
except ImportError as exc:
|
|
229
|
+
raise ImportError(
|
|
230
|
+
"Parquet export requires an engine such as 'pyarrow' or 'fastparquet'."
|
|
231
|
+
) from exc
|
|
232
|
+
else:
|
|
233
|
+
raise ValueError("format must be one of: csv, json, parquet")
|
|
234
|
+
|
|
235
|
+
log.info("Exported %s rows from %s to %s", len(df), self.db_name, path)
|
|
236
|
+
return path
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sqlite3
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from functools import wraps
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Callable, Dict, Optional, TypeVar, Union
|
|
7
|
+
|
|
8
|
+
log = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
T = TypeVar("T")
|
|
11
|
+
DatabasePath = Union[str, Path]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def sqlite3_process(func: Callable[..., T]) -> Callable[..., T]:
|
|
15
|
+
"""
|
|
16
|
+
Decorator to manage SQLite database connection.
|
|
17
|
+
"""
|
|
18
|
+
@wraps(func)
|
|
19
|
+
def func_wrapper(self, *args, **kwargs) -> T:
|
|
20
|
+
database_dir = Path(self.database_path)
|
|
21
|
+
database_dir.mkdir(parents=True, exist_ok=True)
|
|
22
|
+
db_file = database_dir / f"{self.db_name}.db"
|
|
23
|
+
|
|
24
|
+
conn = sqlite3.connect(str(db_file))
|
|
25
|
+
try:
|
|
26
|
+
cursor = conn.cursor()
|
|
27
|
+
result = func(self, cursor, *args, **kwargs)
|
|
28
|
+
except Exception:
|
|
29
|
+
if conn.in_transaction:
|
|
30
|
+
conn.rollback()
|
|
31
|
+
raise
|
|
32
|
+
else:
|
|
33
|
+
conn.commit()
|
|
34
|
+
return result
|
|
35
|
+
finally:
|
|
36
|
+
conn.close()
|
|
37
|
+
|
|
38
|
+
return func_wrapper
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class BuildDatabase(ABC):
|
|
42
|
+
"""
|
|
43
|
+
Abstract base class for building a SQLite database.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
DEFAULT_PATH: Optional[str] = None
|
|
47
|
+
DEFAULT_SCHEMA: Optional[Dict[str, str]] = None
|
|
48
|
+
|
|
49
|
+
def __init__(self, database_path: DatabasePath, database_name: Optional[str] = None) -> None:
|
|
50
|
+
"""
|
|
51
|
+
Initialize the BuildDatabase class.
|
|
52
|
+
"""
|
|
53
|
+
if not self.DEFAULT_PATH or not self.DEFAULT_SCHEMA:
|
|
54
|
+
raise ValueError("Both DEFAULT_PATH and DEFAULT_SCHEMA must be implemented in the inheriting child class!")
|
|
55
|
+
self.db_name = database_name or self.DEFAULT_PATH
|
|
56
|
+
self.database_path = Path(database_path).expanduser()
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def database(self) -> str:
|
|
60
|
+
"""
|
|
61
|
+
Get the full path of the database file.
|
|
62
|
+
"""
|
|
63
|
+
db_path = self.database_path / f"{self.db_name}.db"
|
|
64
|
+
if not db_path.exists():
|
|
65
|
+
raise FileNotFoundError(
|
|
66
|
+
f"Database file '{db_path}' does not exist, please create first!")
|
|
67
|
+
return str(db_path)
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def conn(self) -> sqlite3.Connection:
|
|
71
|
+
"""
|
|
72
|
+
Establish a connection to the SQLite database.
|
|
73
|
+
"""
|
|
74
|
+
return sqlite3.connect(self.database)
|
|
75
|
+
|
|
76
|
+
@sqlite3_process
|
|
77
|
+
def exists(self, cursor: sqlite3.Cursor) -> bool:
|
|
78
|
+
"""
|
|
79
|
+
Check if a specified table exists in the database.
|
|
80
|
+
"""
|
|
81
|
+
query = f"SELECT name FROM sqlite_master WHERE type='table' AND name='{self.db_name}';"
|
|
82
|
+
cursor.execute(query)
|
|
83
|
+
return cursor.fetchone() is not None
|