soflytics 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- soflytics-0.1.0/PKG-INFO +89 -0
- soflytics-0.1.0/README.md +51 -0
- soflytics-0.1.0/pyproject.toml +63 -0
- soflytics-0.1.0/setup.cfg +4 -0
- soflytics-0.1.0/soflytics/__init__.py +14 -0
- soflytics-0.1.0/soflytics/cli.py +79 -0
- soflytics-0.1.0/soflytics/connectors/__init__.py +7 -0
- soflytics-0.1.0/soflytics/connectors/loader.py +134 -0
- soflytics-0.1.0/soflytics/core.py +40 -0
- soflytics-0.1.0/soflytics/dashboard/__init__.py +7 -0
- soflytics-0.1.0/soflytics/dashboard/app.py +7 -0
- soflytics-0.1.0/soflytics/dashboard/server.py +93 -0
- soflytics-0.1.0/soflytics/dashboard/static/css/styles.css +667 -0
- soflytics-0.1.0/soflytics/dashboard/static/index.html +79 -0
- soflytics-0.1.0/soflytics/dashboard/static/js/app.js +211 -0
- soflytics-0.1.0/soflytics/profiler/__init__.py +7 -0
- soflytics-0.1.0/soflytics/profiler/models.py +131 -0
- soflytics-0.1.0/soflytics/profiler/stats.py +219 -0
- soflytics-0.1.0/soflytics/report/__init__.py +7 -0
- soflytics-0.1.0/soflytics/report/audit_report.py +90 -0
- soflytics-0.1.0/soflytics/report/console.py +169 -0
- soflytics-0.1.0/soflytics/report/html.py +400 -0
- soflytics-0.1.0/soflytics/rules/__init__.py +8 -0
- soflytics-0.1.0/soflytics/rules/models.py +96 -0
- soflytics-0.1.0/soflytics/rules/suggester.py +134 -0
- soflytics-0.1.0/soflytics/validator.py +200 -0
- soflytics-0.1.0/soflytics.egg-info/PKG-INFO +89 -0
- soflytics-0.1.0/soflytics.egg-info/SOURCES.txt +31 -0
- soflytics-0.1.0/soflytics.egg-info/dependency_links.txt +1 -0
- soflytics-0.1.0/soflytics.egg-info/entry_points.txt +2 -0
- soflytics-0.1.0/soflytics.egg-info/requires.txt +20 -0
- soflytics-0.1.0/soflytics.egg-info/top_level.txt +1 -0
- soflytics-0.1.0/tests/test_loader.py +145 -0
soflytics-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: soflytics
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Zero-config data quality for humans. Profile, suggest rules, validate, and visualize — all in one call.
|
|
5
|
+
Author: Saman
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/saman/soflytics
|
|
8
|
+
Project-URL: Repository, https://github.com/saman/soflytics
|
|
9
|
+
Keywords: data-quality,profiling,validation,data-engineering
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Requires-Dist: pandas>=1.5.0
|
|
22
|
+
Requires-Dist: fastapi>=0.100.0
|
|
23
|
+
Requires-Dist: uvicorn>=0.20.0
|
|
24
|
+
Requires-Dist: rich>=13.0.0
|
|
25
|
+
Requires-Dist: click>=8.0.0
|
|
26
|
+
Requires-Dist: jinja2>=3.1.0
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
30
|
+
Provides-Extra: db
|
|
31
|
+
Requires-Dist: sqlalchemy>=2.0.0; extra == "db"
|
|
32
|
+
Requires-Dist: pyodbc>=5.0.0; extra == "db"
|
|
33
|
+
Requires-Dist: pymssql>=2.2.0; extra == "db"
|
|
34
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "db"
|
|
35
|
+
Requires-Dist: pymysql>=1.1.0; extra == "db"
|
|
36
|
+
Provides-Extra: all
|
|
37
|
+
Requires-Dist: soflytics[db,dev]; extra == "all"
|
|
38
|
+
|
|
39
|
+
# 🩺 Soflytics
|
|
40
|
+
|
|
41
|
+
**Zero-config data quality for humans.**
|
|
42
|
+
|
|
43
|
+
Point it at any data source — a DataFrame, a CSV — and get instant profiling, smart validation rules, and a gorgeous visual health report. No YAML, no boilerplate.
|
|
44
|
+
|
|
45
|
+
## Installation
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install soflytics
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Quickstart
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
import soflytics
|
|
55
|
+
|
|
56
|
+
# Audit any data source
|
|
57
|
+
report = soflytics.audit("data.csv") # CSV file
|
|
58
|
+
report = soflytics.audit(df) # Pandas DataFrame
|
|
59
|
+
|
|
60
|
+
# View results
|
|
61
|
+
report.to_console() # Pretty terminal output
|
|
62
|
+
report.show() # Opens browser dashboard
|
|
63
|
+
report.to_html("report.html") # Save standalone HTML
|
|
64
|
+
|
|
65
|
+
# Smart rule suggestions
|
|
66
|
+
rules = report.suggest() # Auto-generated rules
|
|
67
|
+
result = report.validate(rules) # Run validation
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## CLI
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
# Audit a CSV file
|
|
74
|
+
soflytics audit data.csv
|
|
75
|
+
|
|
76
|
+
# Launch interactive dashboard
|
|
77
|
+
soflytics dashboard data.csv
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## What You Get
|
|
81
|
+
|
|
82
|
+
- **🧠 Auto-Profiling** — Nulls, duplicates, outliers, type mismatches, pattern detection
|
|
83
|
+
- **🤖 Smart Rules** — Plain-English rule suggestions based on your data
|
|
84
|
+
- **📊 Visual Dashboard** — Per-column health grades, quality heatmaps, drill-down details
|
|
85
|
+
- **⚡ Dead Simple** — One function call. Zero configuration.
|
|
86
|
+
|
|
87
|
+
## License
|
|
88
|
+
|
|
89
|
+
MIT
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# 🩺 Soflytics
|
|
2
|
+
|
|
3
|
+
**Zero-config data quality for humans.**
|
|
4
|
+
|
|
5
|
+
Point it at any data source — a DataFrame, a CSV — and get instant profiling, smart validation rules, and a gorgeous visual health report. No YAML, no boilerplate.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install soflytics
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quickstart
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
import soflytics
|
|
17
|
+
|
|
18
|
+
# Audit any data source
|
|
19
|
+
report = soflytics.audit("data.csv") # CSV file
|
|
20
|
+
report = soflytics.audit(df) # Pandas DataFrame
|
|
21
|
+
|
|
22
|
+
# View results
|
|
23
|
+
report.to_console() # Pretty terminal output
|
|
24
|
+
report.show() # Opens browser dashboard
|
|
25
|
+
report.to_html("report.html") # Save standalone HTML
|
|
26
|
+
|
|
27
|
+
# Smart rule suggestions
|
|
28
|
+
rules = report.suggest() # Auto-generated rules
|
|
29
|
+
result = report.validate(rules) # Run validation
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## CLI
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
# Audit a CSV file
|
|
36
|
+
soflytics audit data.csv
|
|
37
|
+
|
|
38
|
+
# Launch interactive dashboard
|
|
39
|
+
soflytics dashboard data.csv
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## What You Get
|
|
43
|
+
|
|
44
|
+
- **🧠 Auto-Profiling** — Nulls, duplicates, outliers, type mismatches, pattern detection
|
|
45
|
+
- **🤖 Smart Rules** — Plain-English rule suggestions based on your data
|
|
46
|
+
- **📊 Visual Dashboard** — Per-column health grades, quality heatmaps, drill-down details
|
|
47
|
+
- **⚡ Dead Simple** — One function call. Zero configuration.
|
|
48
|
+
|
|
49
|
+
## License
|
|
50
|
+
|
|
51
|
+
MIT
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "soflytics"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Zero-config data quality for humans. Profile, suggest rules, validate, and visualize — all in one call."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Saman"},
|
|
14
|
+
]
|
|
15
|
+
keywords = ["data-quality", "profiling", "validation", "data-engineering"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.9",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Topic :: Software Development :: Quality Assurance",
|
|
26
|
+
]
|
|
27
|
+
dependencies = [
|
|
28
|
+
"pandas>=1.5.0",
|
|
29
|
+
"fastapi>=0.100.0",
|
|
30
|
+
"uvicorn>=0.20.0",
|
|
31
|
+
"rich>=13.0.0",
|
|
32
|
+
"click>=8.0.0",
|
|
33
|
+
"jinja2>=3.1.0",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
dev = [
|
|
38
|
+
"pytest>=7.0.0",
|
|
39
|
+
"pytest-cov>=4.0.0",
|
|
40
|
+
]
|
|
41
|
+
db = [
|
|
42
|
+
"sqlalchemy>=2.0.0",
|
|
43
|
+
"pyodbc>=5.0.0", # SQL Server (Requires ODBC Driver)
|
|
44
|
+
"pymssql>=2.2.0", # SQL Server (Portable, no system driver needed)
|
|
45
|
+
"psycopg2-binary>=2.9.0", # PostgreSQL
|
|
46
|
+
"pymysql>=1.1.0", # MySQL
|
|
47
|
+
]
|
|
48
|
+
all = [
|
|
49
|
+
"soflytics[dev,db]"
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
[project.scripts]
|
|
53
|
+
soflytics = "soflytics.cli:main"
|
|
54
|
+
|
|
55
|
+
[project.urls]
|
|
56
|
+
Homepage = "https://github.com/saman/soflytics"
|
|
57
|
+
Repository = "https://github.com/saman/soflytics"
|
|
58
|
+
|
|
59
|
+
[tool.setuptools.packages.find]
|
|
60
|
+
include = ["soflytics*"]
|
|
61
|
+
|
|
62
|
+
[tool.setuptools.package-data]
|
|
63
|
+
soflytics = ["dashboard/static/**/*"]
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Soflytics CLI — command-line interface.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
soflytics audit data.csv
|
|
6
|
+
soflytics audit data.csv --output html --open
|
|
7
|
+
soflytics audit sample_data.db --table employees
|
|
8
|
+
soflytics audit "sqlite:///my.db" --table users
|
|
9
|
+
soflytics dashboard data.csv
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import click
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.group()
|
|
16
|
+
@click.version_option(version="0.1.0", prog_name="soflytics")
|
|
17
|
+
def main():
|
|
18
|
+
"""🩺 Soflytics — Zero-config data quality for humans."""
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@main.command()
|
|
23
|
+
@click.argument("source")
|
|
24
|
+
@click.option(
|
|
25
|
+
"--output", "-o",
|
|
26
|
+
type=click.Choice(["console", "html", "json"]),
|
|
27
|
+
default="console",
|
|
28
|
+
help="Output format",
|
|
29
|
+
)
|
|
30
|
+
@click.option("--table", "-t", default=None, help="Database table name (auto-detects if omitted)")
|
|
31
|
+
@click.option("--query", "-q", default=None, help="SQL query to run instead of reading a table")
|
|
32
|
+
@click.option("--html-path", default="soflytics_report.html", help="Path for HTML output")
|
|
33
|
+
@click.option("--open", "open_browser", is_flag=True, help="Open HTML report in browser")
|
|
34
|
+
@click.option("--validate", "run_validate", is_flag=True, help="Also run validation")
|
|
35
|
+
def audit(source, output, table, query, html_path, open_browser, run_validate):
|
|
36
|
+
"""Audit a data source for quality issues.
|
|
37
|
+
|
|
38
|
+
SOURCE can be a file path (CSV, Parquet, JSON, Excel, .db) or a database URL
|
|
39
|
+
(sqlite:///path.db, postgresql://user:pass@host/db, mysql://...).
|
|
40
|
+
"""
|
|
41
|
+
import soflytics
|
|
42
|
+
|
|
43
|
+
click.echo(f"\n 🩺 Auditing: {source}\n")
|
|
44
|
+
|
|
45
|
+
report = soflytics.audit(source, table=table, query=query)
|
|
46
|
+
|
|
47
|
+
if run_validate:
|
|
48
|
+
report.validate()
|
|
49
|
+
|
|
50
|
+
if output == "console":
|
|
51
|
+
report.to_console()
|
|
52
|
+
elif output == "html":
|
|
53
|
+
path = report.to_html(html_path)
|
|
54
|
+
click.echo(f" 📄 Report saved: {path}")
|
|
55
|
+
if open_browser:
|
|
56
|
+
import webbrowser
|
|
57
|
+
webbrowser.open(f"file://{path}")
|
|
58
|
+
elif output == "json":
|
|
59
|
+
click.echo(report.to_json())
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@main.command()
|
|
63
|
+
@click.argument("source")
|
|
64
|
+
@click.option("--table", "-t", default=None, help="Database table name")
|
|
65
|
+
@click.option("--port", "-p", default=8765, help="Dashboard port")
|
|
66
|
+
def dashboard(source, table, port):
|
|
67
|
+
"""Launch the interactive dashboard for a data source.
|
|
68
|
+
|
|
69
|
+
SOURCE can be a file path or database URL.
|
|
70
|
+
"""
|
|
71
|
+
import soflytics
|
|
72
|
+
|
|
73
|
+
click.echo(f"\n 🩺 Profiling: {source}\n")
|
|
74
|
+
report = soflytics.audit(source, table=table)
|
|
75
|
+
report.show(port=port)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
if __name__ == "__main__":
|
|
79
|
+
main()
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data source loader — auto-detect and load any supported data source into a Pandas DataFrame.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from typing import Optional, Union
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Known database URL prefixes
|
|
12
|
+
_DB_PREFIXES = (
|
|
13
|
+
"sqlite:///",
|
|
14
|
+
"postgresql://",
|
|
15
|
+
"postgres://",
|
|
16
|
+
"mysql://",
|
|
17
|
+
"mysql+pymysql://",
|
|
18
|
+
"mssql://",
|
|
19
|
+
"mssql+pyodbc://",
|
|
20
|
+
"mssql+pymssql://",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def load(
|
|
25
|
+
source: Union[str, pd.DataFrame, "polars.DataFrame"],
|
|
26
|
+
table: Optional[str] = None,
|
|
27
|
+
query: Optional[str] = None,
|
|
28
|
+
) -> pd.DataFrame:
|
|
29
|
+
"""
|
|
30
|
+
Load a data source into a Pandas DataFrame.
|
|
31
|
+
|
|
32
|
+
Supported sources:
|
|
33
|
+
- pandas.DataFrame (pass-through)
|
|
34
|
+
- polars.DataFrame (auto-convert)
|
|
35
|
+
- str: file path to CSV, Parquet, JSON, Excel
|
|
36
|
+
- str: database connection string (sqlite:///..., postgresql://..., mysql://...)
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
source: Data source — DataFrame, file path, or database URL.
|
|
40
|
+
table: Table name to read (for database sources). If omitted, reads the first table found.
|
|
41
|
+
query: SQL query to execute instead of reading a full table.
|
|
42
|
+
"""
|
|
43
|
+
# Already a Pandas DataFrame
|
|
44
|
+
if isinstance(source, pd.DataFrame):
|
|
45
|
+
return source
|
|
46
|
+
|
|
47
|
+
# Polars DataFrame — convert
|
|
48
|
+
try:
|
|
49
|
+
import polars as pl
|
|
50
|
+
|
|
51
|
+
if isinstance(pl.DataFrame, type) and isinstance(source, pl.DataFrame):
|
|
52
|
+
return source.to_pandas()
|
|
53
|
+
except ImportError:
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
# String — auto-detect: database URL or file path
|
|
57
|
+
if isinstance(source, str):
|
|
58
|
+
if any(source.startswith(prefix) for prefix in _DB_PREFIXES):
|
|
59
|
+
return _load_database(source, table=table, query=query)
|
|
60
|
+
return _load_file(source)
|
|
61
|
+
|
|
62
|
+
raise ValueError(
|
|
63
|
+
f"Unsupported data source type: {type(source).__name__}. "
|
|
64
|
+
f"Pass a DataFrame, file path (CSV/Parquet/JSON), or database URL (sqlite:///...)."
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _load_database(
|
|
69
|
+
url: str,
|
|
70
|
+
table: Optional[str] = None,
|
|
71
|
+
query: Optional[str] = None,
|
|
72
|
+
) -> pd.DataFrame:
|
|
73
|
+
"""Load data from a database via SQLAlchemy."""
|
|
74
|
+
try:
|
|
75
|
+
from sqlalchemy import create_engine, inspect
|
|
76
|
+
except ImportError:
|
|
77
|
+
raise ImportError(
|
|
78
|
+
"SQLAlchemy is required for database connections. "
|
|
79
|
+
"Install it with: pip install sqlalchemy"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Auto-fallback to pymssql for SQL Server to avoid ODBC driver issues
|
|
83
|
+
if url.startswith("mssql://"):
|
|
84
|
+
url = url.replace("mssql://", "mssql+pymssql://", 1)
|
|
85
|
+
print(" 💡 Auto-switching to pymssql driver for SQL Server (no ODBC needed).")
|
|
86
|
+
|
|
87
|
+
engine = create_engine(url)
|
|
88
|
+
|
|
89
|
+
# If a custom SQL query is provided, run it directly
|
|
90
|
+
if query:
|
|
91
|
+
return pd.read_sql_query(query, engine)
|
|
92
|
+
|
|
93
|
+
# If no table name given, auto-detect the first table
|
|
94
|
+
if not table:
|
|
95
|
+
inspector = inspect(engine)
|
|
96
|
+
tables = inspector.get_table_names()
|
|
97
|
+
if not tables:
|
|
98
|
+
raise ValueError(f"No tables found in database: {url}")
|
|
99
|
+
table = tables[0]
|
|
100
|
+
print(f" 📋 Auto-selected table: '{table}' (found {len(tables)} tables)")
|
|
101
|
+
|
|
102
|
+
return pd.read_sql_table(table, engine)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _load_file(path: str) -> pd.DataFrame:
|
|
106
|
+
"""Load a file into a DataFrame based on extension."""
|
|
107
|
+
if not os.path.exists(path):
|
|
108
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
109
|
+
|
|
110
|
+
ext = os.path.splitext(path)[1].lower()
|
|
111
|
+
|
|
112
|
+
if ext == ".csv":
|
|
113
|
+
return pd.read_csv(path)
|
|
114
|
+
elif ext == ".tsv":
|
|
115
|
+
return pd.read_csv(path, sep="\t")
|
|
116
|
+
elif ext in (".parquet", ".pq"):
|
|
117
|
+
return pd.read_parquet(path)
|
|
118
|
+
elif ext == ".json":
|
|
119
|
+
return pd.read_json(path)
|
|
120
|
+
elif ext in (".xlsx", ".xls"):
|
|
121
|
+
return pd.read_excel(path)
|
|
122
|
+
elif ext in (".db", ".sqlite", ".sqlite3"):
|
|
123
|
+
# Auto-detect SQLite file and load via SQLAlchemy
|
|
124
|
+
return _load_database(f"sqlite:///{os.path.abspath(path)}")
|
|
125
|
+
else:
|
|
126
|
+
# Try CSV as fallback
|
|
127
|
+
try:
|
|
128
|
+
return pd.read_csv(path)
|
|
129
|
+
except Exception:
|
|
130
|
+
raise ValueError(
|
|
131
|
+
f"Unsupported file format: {ext}. "
|
|
132
|
+
f"Supported: .csv, .tsv, .parquet, .json, .xlsx, .db"
|
|
133
|
+
)
|
|
134
|
+
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core module — the main audit() entry point.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Optional, Union
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from soflytics.connectors import load
|
|
10
|
+
from soflytics.profiler import profile
|
|
11
|
+
from soflytics.report.audit_report import AuditReport
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def audit(
|
|
15
|
+
source: Union[str, pd.DataFrame],
|
|
16
|
+
table: Optional[str] = None,
|
|
17
|
+
query: Optional[str] = None,
|
|
18
|
+
) -> AuditReport:
|
|
19
|
+
"""
|
|
20
|
+
Audit any data source — profile it, detect issues, suggest rules.
|
|
21
|
+
|
|
22
|
+
Usage:
|
|
23
|
+
report = soflytics.audit("data.csv")
|
|
24
|
+
report = soflytics.audit(df)
|
|
25
|
+
report = soflytics.audit("sqlite:///my.db", table="users")
|
|
26
|
+
report = soflytics.audit("postgresql://localhost/mydb", query="SELECT * FROM orders")
|
|
27
|
+
|
|
28
|
+
report.to_console() # Terminal output
|
|
29
|
+
report.show() # Browser dashboard
|
|
30
|
+
rules = report.suggest() # Auto-generated rules
|
|
31
|
+
"""
|
|
32
|
+
# Load data into a DataFrame
|
|
33
|
+
df = load(source, table=table, query=query)
|
|
34
|
+
|
|
35
|
+
# Profile the data
|
|
36
|
+
profile_result = profile(df)
|
|
37
|
+
|
|
38
|
+
# Return an AuditReport
|
|
39
|
+
return AuditReport(df=df, profile_result=profile_result)
|
|
40
|
+
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dashboard server — FastAPI app and launch function.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import threading
|
|
8
|
+
import time
|
|
9
|
+
import webbrowser
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
import uvicorn
|
|
13
|
+
from fastapi import FastAPI
|
|
14
|
+
from fastapi.responses import HTMLResponse, JSONResponse
|
|
15
|
+
from fastapi.staticfiles import StaticFiles
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from soflytics.report.audit_report import AuditReport
|
|
19
|
+
|
|
20
|
+
# Global reference for the current report
|
|
21
|
+
_current_report = None
|
|
22
|
+
|
|
23
|
+
server_app = FastAPI(title="Soflytics Dashboard")
|
|
24
|
+
|
|
25
|
+
# Mount static files
|
|
26
|
+
_static_dir = os.path.join(os.path.dirname(__file__), "static")
|
|
27
|
+
server_app.mount("/static", StaticFiles(directory=_static_dir), name="static")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@server_app.get("/", response_class=HTMLResponse)
|
|
31
|
+
async def dashboard():
|
|
32
|
+
"""Serve the main dashboard page."""
|
|
33
|
+
index_path = os.path.join(_static_dir, "index.html")
|
|
34
|
+
|
|
35
|
+
with open(index_path, "r", encoding="utf-8") as f:
|
|
36
|
+
html = f.read()
|
|
37
|
+
|
|
38
|
+
# Inject profile data into the HTML
|
|
39
|
+
if _current_report:
|
|
40
|
+
profile_json = json.dumps(_current_report.profile.to_dict(), default=str)
|
|
41
|
+
rules_json = json.dumps(
|
|
42
|
+
[r.to_dict() for r in _current_report.suggest()], default=str
|
|
43
|
+
)
|
|
44
|
+
html = html.replace("__PROFILE_DATA__", profile_json)
|
|
45
|
+
html = html.replace("__RULES_DATA__", rules_json)
|
|
46
|
+
|
|
47
|
+
return HTMLResponse(content=html)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@server_app.get("/api/profile")
|
|
51
|
+
async def get_profile():
|
|
52
|
+
"""Return profile data as JSON."""
|
|
53
|
+
if _current_report:
|
|
54
|
+
return JSONResponse(content=_current_report.profile.to_dict())
|
|
55
|
+
return JSONResponse(content={}, status_code=404)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@server_app.get("/api/rules")
|
|
59
|
+
async def get_rules():
|
|
60
|
+
"""Return suggested rules as JSON."""
|
|
61
|
+
if _current_report:
|
|
62
|
+
return JSONResponse(
|
|
63
|
+
content=[r.to_dict() for r in _current_report.suggest()]
|
|
64
|
+
)
|
|
65
|
+
return JSONResponse(content=[], status_code=404)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@server_app.post("/api/validate")
|
|
69
|
+
async def run_validation():
|
|
70
|
+
"""Run validation with suggested rules and return results."""
|
|
71
|
+
if _current_report:
|
|
72
|
+
result = _current_report.validate()
|
|
73
|
+
# Use json.dumps/loads to ensure numpy types are serializable
|
|
74
|
+
content = json.loads(json.dumps(result.to_dict(), default=str))
|
|
75
|
+
return JSONResponse(content=content)
|
|
76
|
+
return JSONResponse(content={}, status_code=404)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def launch(report: "AuditReport", port: int = 8765):
|
|
80
|
+
"""Launch the dashboard server and open the browser."""
|
|
81
|
+
global _current_report
|
|
82
|
+
_current_report = report
|
|
83
|
+
|
|
84
|
+
def open_browser():
|
|
85
|
+
time.sleep(1.2)
|
|
86
|
+
webbrowser.open(f"http://localhost:{port}")
|
|
87
|
+
|
|
88
|
+
threading.Thread(target=open_browser, daemon=True).start()
|
|
89
|
+
|
|
90
|
+
print(f"\n 🩺 Soflytics Dashboard running at http://localhost:{port}")
|
|
91
|
+
print(f" Press Ctrl+C to stop.\n")
|
|
92
|
+
|
|
93
|
+
uvicorn.run(server_app, host="127.0.0.1", port=port, log_level="warning")
|