soflytics 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
soflytics/__init__.py ADDED
@@ -0,0 +1,14 @@
1
+ """
2
+ Soflytics — Zero-config data quality for humans.
3
+
4
+ Usage:
5
+ import soflytics
6
+ report = soflytics.audit(df)
7
+ report.show()
8
+ """
9
+
10
+ __version__ = "0.1.0"
11
+
12
+ from soflytics.core import audit
13
+
14
+ __all__ = ["audit", "__version__"]
soflytics/cli.py ADDED
@@ -0,0 +1,79 @@
1
+ """
2
+ Soflytics CLI — command-line interface.
3
+
4
+ Usage:
5
+ soflytics audit data.csv
6
+ soflytics audit data.csv --output html --open
7
+ soflytics audit sample_data.db --table employees
8
+ soflytics audit "sqlite:///my.db" --table users
9
+ soflytics dashboard data.csv
10
+ """
11
+
12
+ import click
13
+
14
+
15
+ @click.group()
16
+ @click.version_option(version="0.1.0", prog_name="soflytics")
17
+ def main():
18
+ """🩺 Soflytics — Zero-config data quality for humans."""
19
+ pass
20
+
21
+
22
+ @main.command()
23
+ @click.argument("source")
24
+ @click.option(
25
+ "--output", "-o",
26
+ type=click.Choice(["console", "html", "json"]),
27
+ default="console",
28
+ help="Output format",
29
+ )
30
+ @click.option("--table", "-t", default=None, help="Database table name (auto-detects if omitted)")
31
+ @click.option("--query", "-q", default=None, help="SQL query to run instead of reading a table")
32
+ @click.option("--html-path", default="soflytics_report.html", help="Path for HTML output")
33
+ @click.option("--open", "open_browser", is_flag=True, help="Open HTML report in browser")
34
+ @click.option("--validate", "run_validate", is_flag=True, help="Also run validation")
35
+ def audit(source, output, table, query, html_path, open_browser, run_validate):
36
+ """Audit a data source for quality issues.
37
+
38
+ SOURCE can be a file path (CSV, Parquet, JSON, Excel, .db) or a database URL
39
+ (sqlite:///path.db, postgresql://user:pass@host/db, mysql://...).
40
+ """
41
+ import soflytics
42
+
43
+ click.echo(f"\n 🩺 Auditing: {source}\n")
44
+
45
+ report = soflytics.audit(source, table=table, query=query)
46
+
47
+ if run_validate:
48
+ report.validate()
49
+
50
+ if output == "console":
51
+ report.to_console()
52
+ elif output == "html":
53
+ path = report.to_html(html_path)
54
+ click.echo(f" 📄 Report saved: {path}")
55
+ if open_browser:
56
+ import webbrowser
57
+ webbrowser.open(f"file://{path}")
58
+ elif output == "json":
59
+ click.echo(report.to_json())
60
+
61
+
62
+ @main.command()
63
+ @click.argument("source")
64
+ @click.option("--table", "-t", default=None, help="Database table name")
65
+ @click.option("--port", "-p", default=8765, help="Dashboard port")
66
+ def dashboard(source, table, port):
67
+ """Launch the interactive dashboard for a data source.
68
+
69
+ SOURCE can be a file path or database URL.
70
+ """
71
+ import soflytics
72
+
73
+ click.echo(f"\n 🩺 Profiling: {source}\n")
74
+ report = soflytics.audit(source, table=table)
75
+ report.show(port=port)
76
+
77
+
78
+ if __name__ == "__main__":
79
+ main()
@@ -0,0 +1,7 @@
1
+ """
2
+ Connectors package — auto-detect and load data sources.
3
+ """
4
+
5
+ from soflytics.connectors.loader import load
6
+
7
+ __all__ = ["load"]
@@ -0,0 +1,134 @@
1
+ """
2
+ Data source loader — auto-detect and load any supported data source into a Pandas DataFrame.
3
+ """
4
+
5
+ import os
6
+ from typing import Optional, Union
7
+
8
+ import pandas as pd
9
+
10
+
11
+ # Known database URL prefixes
12
+ _DB_PREFIXES = (
13
+ "sqlite:///",
14
+ "postgresql://",
15
+ "postgres://",
16
+ "mysql://",
17
+ "mysql+pymysql://",
18
+ "mssql://",
19
+ "mssql+pyodbc://",
20
+ "mssql+pymssql://",
21
+ )
22
+
23
+
24
+ def load(
25
+ source: Union[str, pd.DataFrame, "polars.DataFrame"],
26
+ table: Optional[str] = None,
27
+ query: Optional[str] = None,
28
+ ) -> pd.DataFrame:
29
+ """
30
+ Load a data source into a Pandas DataFrame.
31
+
32
+ Supported sources:
33
+ - pandas.DataFrame (pass-through)
34
+ - polars.DataFrame (auto-convert)
35
+ - str: file path to CSV, Parquet, JSON, Excel
36
+ - str: database connection string (sqlite:///..., postgresql://..., mysql://...)
37
+
38
+ Args:
39
+ source: Data source — DataFrame, file path, or database URL.
40
+ table: Table name to read (for database sources). If omitted, reads the first table found.
41
+ query: SQL query to execute instead of reading a full table.
42
+ """
43
+ # Already a Pandas DataFrame
44
+ if isinstance(source, pd.DataFrame):
45
+ return source
46
+
47
+ # Polars DataFrame — convert
48
+ try:
49
+ import polars as pl
50
+
51
+ if isinstance(pl.DataFrame, type) and isinstance(source, pl.DataFrame):
52
+ return source.to_pandas()
53
+ except ImportError:
54
+ pass
55
+
56
+ # String — auto-detect: database URL or file path
57
+ if isinstance(source, str):
58
+ if any(source.startswith(prefix) for prefix in _DB_PREFIXES):
59
+ return _load_database(source, table=table, query=query)
60
+ return _load_file(source)
61
+
62
+ raise ValueError(
63
+ f"Unsupported data source type: {type(source).__name__}. "
64
+ f"Pass a DataFrame, file path (CSV/Parquet/JSON), or database URL (sqlite:///...)."
65
+ )
66
+
67
+
68
+ def _load_database(
69
+ url: str,
70
+ table: Optional[str] = None,
71
+ query: Optional[str] = None,
72
+ ) -> pd.DataFrame:
73
+ """Load data from a database via SQLAlchemy."""
74
+ try:
75
+ from sqlalchemy import create_engine, inspect
76
+ except ImportError:
77
+ raise ImportError(
78
+ "SQLAlchemy is required for database connections. "
79
+ "Install it with: pip install sqlalchemy"
80
+ )
81
+
82
+ # Auto-fallback to pymssql for SQL Server to avoid ODBC driver issues
83
+ if url.startswith("mssql://"):
84
+ url = url.replace("mssql://", "mssql+pymssql://", 1)
85
+ print(" 💡 Auto-switching to pymssql driver for SQL Server (no ODBC needed).")
86
+
87
+ engine = create_engine(url)
88
+
89
+ # If a custom SQL query is provided, run it directly
90
+ if query:
91
+ return pd.read_sql_query(query, engine)
92
+
93
+ # If no table name given, auto-detect the first table
94
+ if not table:
95
+ inspector = inspect(engine)
96
+ tables = inspector.get_table_names()
97
+ if not tables:
98
+ raise ValueError(f"No tables found in database: {url}")
99
+ table = tables[0]
100
+ print(f" 📋 Auto-selected table: '{table}' (found {len(tables)} tables)")
101
+
102
+ return pd.read_sql_table(table, engine)
103
+
104
+
105
+ def _load_file(path: str) -> pd.DataFrame:
106
+ """Load a file into a DataFrame based on extension."""
107
+ if not os.path.exists(path):
108
+ raise FileNotFoundError(f"File not found: {path}")
109
+
110
+ ext = os.path.splitext(path)[1].lower()
111
+
112
+ if ext == ".csv":
113
+ return pd.read_csv(path)
114
+ elif ext == ".tsv":
115
+ return pd.read_csv(path, sep="\t")
116
+ elif ext in (".parquet", ".pq"):
117
+ return pd.read_parquet(path)
118
+ elif ext == ".json":
119
+ return pd.read_json(path)
120
+ elif ext in (".xlsx", ".xls"):
121
+ return pd.read_excel(path)
122
+ elif ext in (".db", ".sqlite", ".sqlite3"):
123
+ # Auto-detect SQLite file and load via SQLAlchemy
124
+ return _load_database(f"sqlite:///{os.path.abspath(path)}")
125
+ else:
126
+ # Try CSV as fallback
127
+ try:
128
+ return pd.read_csv(path)
129
+ except Exception:
130
+ raise ValueError(
131
+ f"Unsupported file format: {ext}. "
132
+ f"Supported: .csv, .tsv, .parquet, .json, .xlsx, .db"
133
+ )
134
+
soflytics/core.py ADDED
@@ -0,0 +1,40 @@
1
+ """
2
+ Core module — the main audit() entry point.
3
+ """
4
+
5
+ from typing import Optional, Union
6
+
7
+ import pandas as pd
8
+
9
+ from soflytics.connectors import load
10
+ from soflytics.profiler import profile
11
+ from soflytics.report.audit_report import AuditReport
12
+
13
+
14
+ def audit(
15
+ source: Union[str, pd.DataFrame],
16
+ table: Optional[str] = None,
17
+ query: Optional[str] = None,
18
+ ) -> AuditReport:
19
+ """
20
+ Audit any data source — profile it, detect issues, suggest rules.
21
+
22
+ Usage:
23
+ report = soflytics.audit("data.csv")
24
+ report = soflytics.audit(df)
25
+ report = soflytics.audit("sqlite:///my.db", table="users")
26
+ report = soflytics.audit("postgresql://localhost/mydb", query="SELECT * FROM orders")
27
+
28
+ report.to_console() # Terminal output
29
+ report.show() # Browser dashboard
30
+ rules = report.suggest() # Auto-generated rules
31
+ """
32
+ # Load data into a DataFrame
33
+ df = load(source, table=table, query=query)
34
+
35
+ # Profile the data
36
+ profile_result = profile(df)
37
+
38
+ # Return an AuditReport
39
+ return AuditReport(df=df, profile_result=profile_result)
40
+
@@ -0,0 +1,7 @@
1
+ """
2
+ Dashboard package — serves the interactive browser UI.
3
+ """
4
+
5
+ from soflytics.dashboard.server import launch
6
+
7
+ __all__ = ["launch"]
@@ -0,0 +1,7 @@
1
+ """
2
+ Dashboard — app module (re-exports for convenience).
3
+ """
4
+
5
+ from soflytics.dashboard.server import server_app, launch
6
+
7
+ __all__ = ["server_app", "launch"]
@@ -0,0 +1,93 @@
1
+ """
2
+ Dashboard server — FastAPI app and launch function.
3
+ """
4
+
5
+ import json
6
+ import os
7
+ import threading
8
+ import time
9
+ import webbrowser
10
+ from typing import TYPE_CHECKING
11
+
12
+ import uvicorn
13
+ from fastapi import FastAPI
14
+ from fastapi.responses import HTMLResponse, JSONResponse
15
+ from fastapi.staticfiles import StaticFiles
16
+
17
+ if TYPE_CHECKING:
18
+ from soflytics.report.audit_report import AuditReport
19
+
20
+ # Global reference for the current report
21
+ _current_report = None
22
+
23
+ server_app = FastAPI(title="Soflytics Dashboard")
24
+
25
+ # Mount static files
26
+ _static_dir = os.path.join(os.path.dirname(__file__), "static")
27
+ server_app.mount("/static", StaticFiles(directory=_static_dir), name="static")
28
+
29
+
30
+ @server_app.get("/", response_class=HTMLResponse)
31
+ async def dashboard():
32
+ """Serve the main dashboard page."""
33
+ index_path = os.path.join(_static_dir, "index.html")
34
+
35
+ with open(index_path, "r", encoding="utf-8") as f:
36
+ html = f.read()
37
+
38
+ # Inject profile data into the HTML
39
+ if _current_report:
40
+ profile_json = json.dumps(_current_report.profile.to_dict(), default=str)
41
+ rules_json = json.dumps(
42
+ [r.to_dict() for r in _current_report.suggest()], default=str
43
+ )
44
+ html = html.replace("__PROFILE_DATA__", profile_json)
45
+ html = html.replace("__RULES_DATA__", rules_json)
46
+
47
+ return HTMLResponse(content=html)
48
+
49
+
50
+ @server_app.get("/api/profile")
51
+ async def get_profile():
52
+ """Return profile data as JSON."""
53
+ if _current_report:
54
+ return JSONResponse(content=_current_report.profile.to_dict())
55
+ return JSONResponse(content={}, status_code=404)
56
+
57
+
58
+ @server_app.get("/api/rules")
59
+ async def get_rules():
60
+ """Return suggested rules as JSON."""
61
+ if _current_report:
62
+ return JSONResponse(
63
+ content=[r.to_dict() for r in _current_report.suggest()]
64
+ )
65
+ return JSONResponse(content=[], status_code=404)
66
+
67
+
68
+ @server_app.post("/api/validate")
69
+ async def run_validation():
70
+ """Run validation with suggested rules and return results."""
71
+ if _current_report:
72
+ result = _current_report.validate()
73
+ # Use json.dumps/loads to ensure numpy types are serializable
74
+ content = json.loads(json.dumps(result.to_dict(), default=str))
75
+ return JSONResponse(content=content)
76
+ return JSONResponse(content={}, status_code=404)
77
+
78
+
79
+ def launch(report: "AuditReport", port: int = 8765):
80
+ """Launch the dashboard server and open the browser."""
81
+ global _current_report
82
+ _current_report = report
83
+
84
+ def open_browser():
85
+ time.sleep(1.2)
86
+ webbrowser.open(f"http://localhost:{port}")
87
+
88
+ threading.Thread(target=open_browser, daemon=True).start()
89
+
90
+ print(f"\n 🩺 Soflytics Dashboard running at http://localhost:{port}")
91
+ print(f" Press Ctrl+C to stop.\n")
92
+
93
+ uvicorn.run(server_app, host="127.0.0.1", port=port, log_level="warning")