viperx 0.9.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- viperx/__init__.py +2 -0
- viperx/config_engine.py +141 -0
- viperx/constants.py +35 -0
- viperx/core.py +416 -0
- viperx/licenses.py +248 -0
- viperx/main.py +278 -0
- viperx/templates/Base.ipynb.j2 +119 -0
- viperx/templates/Base_General.ipynb.j2 +119 -0
- viperx/templates/Base_Kaggle.ipynb.j2 +114 -0
- viperx/templates/README.md.j2 +122 -0
- viperx/templates/__init__.py.j2 +8 -0
- viperx/templates/config.py.j2 +40 -0
- viperx/templates/config.yaml.j2 +14 -0
- viperx/templates/data_loader.py.j2 +112 -0
- viperx/templates/main.py.j2 +13 -0
- viperx/templates/pyproject.toml.j2 +59 -0
- viperx/templates/viperx_config.yaml.j2 +45 -0
- viperx/utils.py +47 -0
- viperx-0.9.14.dist-info/METADATA +236 -0
- viperx-0.9.14.dist-info/RECORD +22 -0
- viperx-0.9.14.dist-info/WHEEL +4 -0
- viperx-0.9.14.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "markdown",
|
|
5
|
+
"metadata": {},
|
|
6
|
+
"source": [
|
|
7
|
+
"# {{ project_name }} - General Base\n",
|
|
8
|
+
"\n",
|
|
9
|
+
"**Author:** {{ author_name }}\n",
|
|
10
|
+
"**Description:** {{ description }}\n",
|
|
11
|
+
"\n",
|
|
12
|
+
"This notebook demonstrates the **Smart Data Loader** capabilities.\n",
|
|
13
|
+
"\n",
|
|
14
|
+
"It supports two modes:\n",
|
|
15
|
+
"1. **Global Cache (Default)**: Downloads to `~/.cache/viperx/data`. Ideal for shared datasets.\n",
|
|
16
|
+
"2. **Local Project (Optional)**: Downloads to `./data`. Ideal for project-specific datasets."
|
|
17
|
+
]
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"cell_type": "code",
|
|
21
|
+
"execution_count": null,
|
|
22
|
+
"metadata": {},
|
|
23
|
+
"outputs": [],
|
|
24
|
+
"source": [
|
|
25
|
+
"# Universal Setup\n",
|
|
26
|
+
"import sys\n",
|
|
27
|
+
"from {{ package_name }} import get_config\n",
|
|
28
|
+
"from {{ package_name }}.data_loader import load_csv, download_file\n",
|
|
29
|
+
"\n",
|
|
30
|
+
"print(f\"Project: {get_config('project_name')}\")"
|
|
31
|
+
]
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"cell_type": "code",
|
|
35
|
+
"execution_count": null,
|
|
36
|
+
"metadata": {},
|
|
37
|
+
"outputs": [],
|
|
38
|
+
"source": [
|
|
39
|
+
"# Standard Imports\n",
|
|
40
|
+
"import numpy as np\n",
|
|
41
|
+
"import pandas as pd\n",
|
|
42
|
+
"import matplotlib.pyplot as plt\n",
|
|
43
|
+
"import seaborn as sns\n",
|
|
44
|
+
"\n",
|
|
45
|
+
"{% if project_type == 'dl' %}\n",
|
|
46
|
+
"{% if framework == 'pytorch' %}\n",
|
|
47
|
+
"import torch\n",
|
|
48
|
+
"print(f\"PyTorch Version: {torch.__version__}\")\n",
|
|
49
|
+
"{% elif framework == 'tensorflow' %}\n",
|
|
50
|
+
"import tensorflow as tf\n",
|
|
51
|
+
"print(f\"TensorFlow Version: {tf.__version__}\")\n",
|
|
52
|
+
"{% endif %}\n",
|
|
53
|
+
"{% endif %}\n",
|
|
54
|
+
"\n",
|
|
55
|
+
"%matplotlib inline"
|
|
56
|
+
]
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"cell_type": "code",
|
|
60
|
+
"execution_count": null,
|
|
61
|
+
"metadata": {},
|
|
62
|
+
"outputs": [],
|
|
63
|
+
"source": [
|
|
64
|
+
"# Mode 1: Global Cache (Default)\n",
|
|
65
|
+
"# Does NOT create a 'data/' folder in your project.\n",
|
|
66
|
+
"# Uses config.yaml key 'iris'\n",
|
|
67
|
+
"\n",
|
|
68
|
+
"try:\n",
|
|
69
|
+
" print(\"Loading Iris (Global Cache)...\")\n",
|
|
70
|
+
" df_iris = load_csv('iris')\n",
|
|
71
|
+
" display(df_iris.head())\n",
|
|
72
|
+
"except Exception as e:\n",
|
|
73
|
+
" print(f\"Error: {e}\")"
|
|
74
|
+
]
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"cell_type": "code",
|
|
78
|
+
"execution_count": null,
|
|
79
|
+
"metadata": {},
|
|
80
|
+
"outputs": [],
|
|
81
|
+
"source": [
|
|
82
|
+
"# Mode 2: Local Project Data\n",
|
|
83
|
+
"# Forces download to ./data/ folder\n",
|
|
84
|
+
"# Uses config.yaml key 'titanic'\n",
|
|
85
|
+
"\n",
|
|
86
|
+
"try:\n",
|
|
87
|
+
" print(\"Loading Titanic (Local Download)...\")\n",
|
|
88
|
+
" # Passing local=True triggers ./data creation\n",
|
|
89
|
+
" df_titanic = load_csv('titanic', local=True)\n",
|
|
90
|
+
" display(df_titanic.head())\n",
|
|
91
|
+
" \n",
|
|
92
|
+
" print(\"\\nCheck your project root: 'data/' folder should now exist!\")\n",
|
|
93
|
+
"except Exception as e:\n",
|
|
94
|
+
" print(f\"Error: {e}\")"
|
|
95
|
+
]
|
|
96
|
+
}
|
|
97
|
+
],
|
|
98
|
+
"metadata": {
|
|
99
|
+
"kernelspec": {
|
|
100
|
+
"display_name": "Python 3",
|
|
101
|
+
"language": "python",
|
|
102
|
+
"name": "python3"
|
|
103
|
+
},
|
|
104
|
+
"language_info": {
|
|
105
|
+
"codemirror_mode": {
|
|
106
|
+
"name": "ipython",
|
|
107
|
+
"version": 3
|
|
108
|
+
},
|
|
109
|
+
"file_extension": ".py",
|
|
110
|
+
"mimetype": "text/x-python",
|
|
111
|
+
"name": "python",
|
|
112
|
+
"nbconvert_exporter": "python",
|
|
113
|
+
"pygments_lexer": "ipython3",
|
|
114
|
+
"version": "3.11.0"
|
|
115
|
+
}
|
|
116
|
+
},
|
|
117
|
+
"nbformat": 4,
|
|
118
|
+
"nbformat_minor": 5
|
|
119
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "markdown",
|
|
5
|
+
"metadata": {},
|
|
6
|
+
"source": [
|
|
7
|
+
"# {{ project_name }} - Kaggle Base\n",
|
|
8
|
+
"\n",
|
|
9
|
+
"**Author:** {{ author_name }}\n",
|
|
10
|
+
"**Description:** {{ description }}\n",
|
|
11
|
+
"\n",
|
|
12
|
+
"This notebook demonstrates data loading using **KaggleHub**."
|
|
13
|
+
]
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"cell_type": "code",
|
|
17
|
+
"execution_count": null,
|
|
18
|
+
"metadata": {},
|
|
19
|
+
"outputs": [],
|
|
20
|
+
"source": [
|
|
21
|
+
"# Universal Setup (Local / Colab / Kaggle)\n",
|
|
22
|
+
"import sys\n",
|
|
23
|
+
"import os\n",
|
|
24
|
+
"import yaml\n",
|
|
25
|
+
"# Check if running on Colab\n",
|
|
26
|
+
"if 'google.colab' in sys.modules:\n",
|
|
27
|
+
" print('Detected Google Colab environment')\n",
|
|
28
|
+
" # Clone/Install the package if needed\n",
|
|
29
|
+
" # !pip install git+https://github.com/{{ author_name | lower | replace(' ', '') }}/{{ project_name }}.git\n",
|
|
30
|
+
"\n",
|
|
31
|
+
"from {{ package_name }} import get_config\n",
|
|
32
|
+
"\n",
|
|
33
|
+
"print(f\"Project: {get_config('project_name')}\")"
|
|
34
|
+
]
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
"cell_type": "code",
|
|
38
|
+
"execution_count": null,
|
|
39
|
+
"metadata": {},
|
|
40
|
+
"outputs": [],
|
|
41
|
+
"source": [
|
|
42
|
+
"# Standard Imports\n",
|
|
43
|
+
"import numpy as np\n",
|
|
44
|
+
"import pandas as pd\n",
|
|
45
|
+
"import matplotlib.pyplot as plt\n",
|
|
46
|
+
"import seaborn as sns\n",
|
|
47
|
+
"import kagglehub as kh\n",
|
|
48
|
+
"\n",
|
|
49
|
+
"{% if project_type == 'dl' %}\n",
|
|
50
|
+
"{% if framework == 'pytorch' %}\n",
|
|
51
|
+
"import torch\n",
|
|
52
|
+
"print(f\"PyTorch Version: {torch.__version__}\")\n",
|
|
53
|
+
"print(f\"CUDA Available: {torch.cuda.is_available()}\")\n",
|
|
54
|
+
"{% elif framework == 'tensorflow' %}\n",
|
|
55
|
+
"import tensorflow as tf\n",
|
|
56
|
+
"print(f\"TensorFlow Version: {tf.__version__}\")\n",
|
|
57
|
+
"print(f\"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}\")\n",
|
|
58
|
+
"{% endif %}\n",
|
|
59
|
+
"{% endif %}\n",
|
|
60
|
+
"\n",
|
|
61
|
+
"%matplotlib inline"
|
|
62
|
+
]
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"cell_type": "code",
|
|
66
|
+
"execution_count": null,
|
|
67
|
+
"metadata": {},
|
|
68
|
+
"outputs": [],
|
|
69
|
+
"source": [
|
|
70
|
+
"# Load Dataset using Config + KaggleHub\n",
|
|
71
|
+
"try:\n",
|
|
72
|
+
" # Load dataset handle from config\n",
|
|
73
|
+
" dataset_handle = get_config('datasets', {}).get('Base_Kaggle', 'titanic')\n",
|
|
74
|
+
" print(f\"Loading dataset: {dataset_handle}...\")\n",
|
|
75
|
+
" \n",
|
|
76
|
+
" path = kh.dataset_download(dataset_handle)\n",
|
|
77
|
+
" print(f\"Path: {path}\")\n",
|
|
78
|
+
" \n",
|
|
79
|
+
" # Logic to find csv in path\n",
|
|
80
|
+
" import glob\n",
|
|
81
|
+
" csv_files = glob.glob(f\"{path}/*.csv\")\n",
|
|
82
|
+
" if csv_files:\n",
|
|
83
|
+
" df = pd.read_csv(csv_files[0])\n",
|
|
84
|
+
" display(df.head())\n",
|
|
85
|
+
" df.info()\n",
|
|
86
|
+
" else:\n",
|
|
87
|
+
" print(\"No CSV found in dataset.\")\n",
|
|
88
|
+
"except Exception as e:\n",
|
|
89
|
+
" print(f\"Failed to load dataset: {e}\")"
|
|
90
|
+
]
|
|
91
|
+
}
|
|
92
|
+
],
|
|
93
|
+
"metadata": {
|
|
94
|
+
"kernelspec": {
|
|
95
|
+
"display_name": "Python 3",
|
|
96
|
+
"language": "python",
|
|
97
|
+
"name": "python3"
|
|
98
|
+
},
|
|
99
|
+
"language_info": {
|
|
100
|
+
"codemirror_mode": {
|
|
101
|
+
"name": "ipython",
|
|
102
|
+
"version": 3
|
|
103
|
+
},
|
|
104
|
+
"file_extension": ".py",
|
|
105
|
+
"mimetype": "text/x-python",
|
|
106
|
+
"name": "python",
|
|
107
|
+
"nbconvert_exporter": "python",
|
|
108
|
+
"pygments_lexer": "ipython3",
|
|
109
|
+
"version": "3.11.0"
|
|
110
|
+
}
|
|
111
|
+
},
|
|
112
|
+
"nbformat": 4,
|
|
113
|
+
"nbformat_minor": 5
|
|
114
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# {{ project_name }}
|
|
2
|
+
|
|
3
|
+
{{ description }}
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## 🧐 Philosophy & Architecture
|
|
8
|
+
|
|
9
|
+
Values transparency and standard tooling over "black box" magic.
|
|
10
|
+
This project was generated with [ViperX](https://github.com/kpihx/viperx).sh/uv/)**, the extremely fast Python package and project manager written in Rust.
|
|
11
|
+
|
|
12
|
+
### Why `uv`?
|
|
13
|
+
Unlike traditional workflows (pip, poetry, venv mixing), `uv` manages the **entire lifecycle**:
|
|
14
|
+
- **Python Version**: It installs and manages the correct Python version for this project automatically.
|
|
15
|
+
- **Dependencies**: Locking is instant.
|
|
16
|
+
{% if project_type == 'dl' %}
|
|
17
|
+
- **Stack**: {{ framework|title }}
|
|
18
|
+
{% endif %}
|
|
19
|
+
- **Environment**: Virtual environments are managed internally, you just run `uv run`.
|
|
20
|
+
|
|
21
|
+
### ⚙️ Configuration
|
|
22
|
+
|
|
23
|
+
- **Config**: `src/{{ package_name }}/config.yaml` (Loaded automatically)
|
|
24
|
+
{% if has_config %}
|
|
25
|
+
- **Environment**: `src/{{ package_name }}/.env` (Isolated variables)
|
|
26
|
+
{% endif %}
|
|
27
|
+
|
|
28
|
+
Edit `config.yaml` to change parameters. The project uses a **Config-in-Package** architecture:
|
|
29
|
+
1. `config.yaml` is inside the package.
|
|
30
|
+
2. `config.py` loads it safely (even in production wheels).
|
|
31
|
+
3. `.env` is isolated within the package source.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## 🚀 Getting Started
|
|
36
|
+
|
|
37
|
+
### Prerequisites
|
|
38
|
+
|
|
39
|
+
You only need **[uv](https://docs.astral.sh/uv/)**.
|
|
40
|
+
No need to install Python or create venvs manually.
|
|
41
|
+
|
|
42
|
+
### Installation
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
# Clone the repo
|
|
46
|
+
git clone https://github.com/{{ author_name | lower | replace(" ", "") }}/{{ project_name }}.git
|
|
47
|
+
cd {{ project_name }}
|
|
48
|
+
|
|
49
|
+
# Sync dependencies (creates .venv and installs python if needed)
|
|
50
|
+
uv sync
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## 🧑💻 Usage
|
|
54
|
+
|
|
55
|
+
The entry point depends on your project type.
|
|
56
|
+
|
|
57
|
+
### For Developers (Code)
|
|
58
|
+
|
|
59
|
+
To run the package entry point or scripts:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
# Run the main package
|
|
63
|
+
uv run {{ package_name }}
|
|
64
|
+
|
|
65
|
+
# Or run a specific script
|
|
66
|
+
uv run python src/{{ package_name }}/main.py
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
{% if project_type in ['ml', 'dl'] %}
|
|
70
|
+
### For Data Scientists (Notebooks)
|
|
71
|
+
|
|
72
|
+
We use `uv` to launch Jupyter, ensuring it sees the local package and config.
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
uv run jupyter notebook
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
- Open `notebooks/Base.ipynb`.
|
|
79
|
+
- Note how it imports `config` from the package.
|
|
80
|
+
{% endif %}
|
|
81
|
+
|
|
82
|
+
### ☁️ Cloud (Colab / Kaggle)
|
|
83
|
+
|
|
84
|
+
You can use the code and config from this repository directly in cloud environments without cloning.
|
|
85
|
+
|
|
86
|
+
**Step 1: Install directly from GitHub**
|
|
87
|
+
```python
|
|
88
|
+
!pip install git+https://github.com/{{ author_name | lower | replace(" ", "") }}/{{ project_name }}.git
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
**Step 2: Use the unified config**
|
|
92
|
+
```python
|
|
93
|
+
from {{ package_name }} import get_dataset_path, SETTINGS
|
|
94
|
+
import kagglehub as kh
|
|
95
|
+
|
|
96
|
+
# Transparency: You can inspect what was loaded
|
|
97
|
+
print(f"Loaded config for: {SETTINGS['project_name']}")
|
|
98
|
+
|
|
99
|
+
# Download datasets defined in config.yaml
|
|
100
|
+
# The key 'titanic' maps to 'heptapod/titanic' in the yaml
|
|
101
|
+
path = kh.dataset_download(SETTINGS['datasets']['titanic'])
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## 🔧 Internal Structure
|
|
105
|
+
|
|
106
|
+
```text
|
|
107
|
+
{{ project_name }}/
|
|
108
|
+
├── pyproject.toml # The Single Source of Truth (Dependencies, Metadata)
|
|
109
|
+
├── uv.lock # Exact versions lockfile
|
|
110
|
+
├── .python-version # Pinned Python version
|
|
111
|
+
├── src/
|
|
112
|
+
│ └── {{ package_name }}/
|
|
113
|
+
│ ├── __init__.py
|
|
114
|
+
│ ├── config.yaml # EDIT THIS for project settings
|
|
115
|
+
│ ├── config.py # Code that loads the yaml above
|
|
116
|
+
│ └── tests/ # Unit tests
|
|
117
|
+
{% if project_type in ['ml', 'dl'] %}
|
|
118
|
+
└── notebooks/ # Experimentation (Jupyter)
|
|
119
|
+
{% endif %}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import yaml
|
|
2
|
+
import importlib.resources
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Dict
|
|
5
|
+
{% if use_env %}
|
|
6
|
+
from dotenv import load_dotenv
|
|
7
|
+
|
|
8
|
+
# Load Environment Variables from the isolated .env file in this package
|
|
9
|
+
load_dotenv(Path(__file__).parent / ".env")
|
|
10
|
+
{% endif %}
|
|
11
|
+
|
|
12
|
+
# Load configuration safely whether installed or local
|
|
13
|
+
try:
|
|
14
|
+
# Modern Way (Python 3.9+) - works when installed as a package
|
|
15
|
+
_config_path = importlib.resources.files("{{ package_name }}").joinpath("config.yaml")
|
|
16
|
+
with _config_path.open("r") as f:
|
|
17
|
+
SETTINGS: Dict[str, Any] = yaml.safe_load(f)
|
|
18
|
+
except Exception:
|
|
19
|
+
# Fallback for local dev without install or older python
|
|
20
|
+
_local_path = Path(__file__).parent / "config.yaml"
|
|
21
|
+
if _local_path.exists():
|
|
22
|
+
with open(_local_path, "r") as f:
|
|
23
|
+
SETTINGS = yaml.safe_load(f)
|
|
24
|
+
else:
|
|
25
|
+
SETTINGS = {}
|
|
26
|
+
|
|
27
|
+
def get_config(key: str, default: Any = None) -> Any:
|
|
28
|
+
"""Retrieve a value from the globally loaded settings."""
|
|
29
|
+
return SETTINGS.get(key, default)
|
|
30
|
+
|
|
31
|
+
def get_dataset_path(notebook_name: str, key: str = "datasets", extension: str = ".csv") -> str | None:
|
|
32
|
+
"""
|
|
33
|
+
Helper for notebook data loading.
|
|
34
|
+
Looks up 'notebook_name' in the 'key' section of config.yaml.
|
|
35
|
+
"""
|
|
36
|
+
datasets = SETTINGS.get(key, {})
|
|
37
|
+
dataset_name = datasets.get(notebook_name)
|
|
38
|
+
if not dataset_name:
|
|
39
|
+
return None
|
|
40
|
+
return f"{dataset_name}{extension}"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{% if project_type in ['ml', 'dl'] %}
|
|
2
|
+
data_urls:
|
|
3
|
+
iris: "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv"
|
|
4
|
+
titanic: "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
|
|
5
|
+
|
|
6
|
+
datasets:
|
|
7
|
+
# Notebook Name: Kaggle Dataset Handle
|
|
8
|
+
Base_Kaggle: "titanic"
|
|
9
|
+
# Usage: kh.dataset_download(SETTINGS['datasets']['titanic'])
|
|
10
|
+
titanic: "heptapod/titanic"
|
|
11
|
+
{% else %}
|
|
12
|
+
# Configuration file for {{ package_name }}
|
|
13
|
+
# Add your settings here.
|
|
14
|
+
{% endif %}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import requests
|
|
3
|
+
import hashlib
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from tqdm import tqdm
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from {{ package_name }}.config import get_config
|
|
8
|
+
|
|
9
|
+
def get_cache_dir(local: bool = False) -> Path:
|
|
10
|
+
"""
|
|
11
|
+
Get the directory for storing data.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
local: If True, returns <project_root>/data.
|
|
15
|
+
If False, returns ~/.cache/viperx/data (Global Cache).
|
|
16
|
+
"""
|
|
17
|
+
if local:
|
|
18
|
+
# Project Root Strategy:
|
|
19
|
+
# This script is likely in src/pkg/data_loader.py or src/pkg/utils/data_loader.py
|
|
20
|
+
# We assume project root is grandparents of this file until we hit pyproject.toml ideally
|
|
21
|
+
# But simple fallback: 3 levels up from src/pkg/data_loader.py is root
|
|
22
|
+
current_file = Path(__file__).resolve()
|
|
23
|
+
# src/pkg/data_loader.py -> parent=pkg -> parent=src -> parent=root
|
|
24
|
+
# Verify structure:
|
|
25
|
+
# if in src/pkg/utils/data_loader.py -> 4 levels
|
|
26
|
+
# Let's try to detect root marker
|
|
27
|
+
root = current_file.parent
|
|
28
|
+
while not (root / "pyproject.toml").exists():
|
|
29
|
+
if root.parent == root: # hit filesystem root
|
|
30
|
+
# Fallback to cwd if running from notebook
|
|
31
|
+
return Path.cwd() / "data"
|
|
32
|
+
root = root.parent
|
|
33
|
+
|
|
34
|
+
data_dir = root / "data"
|
|
35
|
+
else:
|
|
36
|
+
# Global Cache Strategy
|
|
37
|
+
data_dir = Path.home() / ".cache" / "viperx" / "data"
|
|
38
|
+
|
|
39
|
+
data_dir.mkdir(parents=True, exist_ok=True)
|
|
40
|
+
return data_dir
|
|
41
|
+
|
|
42
|
+
def download_file(url: str, filename: str = None, local: bool = False, force: bool = False) -> Path:
|
|
43
|
+
"""
|
|
44
|
+
Download a file from a URL.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
url: Source URL.
|
|
48
|
+
filename: Target filename. If None, derived from URL.
|
|
49
|
+
local: If True, downloads to project 'data/' folder. If False, uses Global Cache.
|
|
50
|
+
force: If True, redownload even if exists.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Path to the downloaded file.
|
|
54
|
+
"""
|
|
55
|
+
target_dir = get_cache_dir(local=local)
|
|
56
|
+
|
|
57
|
+
if not filename:
|
|
58
|
+
filename = url.split("/")[-1]
|
|
59
|
+
|
|
60
|
+
target_path = target_dir / filename
|
|
61
|
+
|
|
62
|
+
if target_path.exists() and not force:
|
|
63
|
+
print(f"Using cached file: {target_path}")
|
|
64
|
+
return target_path
|
|
65
|
+
|
|
66
|
+
print(f"Downloading {url} to {target_path}...")
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
response = requests.get(url, stream=True)
|
|
70
|
+
response.raise_for_status()
|
|
71
|
+
|
|
72
|
+
total_size = int(response.headers.get('content-length', 0))
|
|
73
|
+
block_size = 1024 # 1 Kibibyte
|
|
74
|
+
|
|
75
|
+
with open(target_path, "wb") as f, tqdm(
|
|
76
|
+
desc=filename,
|
|
77
|
+
total=total_size,
|
|
78
|
+
unit='iB',
|
|
79
|
+
unit_scale=True,
|
|
80
|
+
unit_divisor=1024,
|
|
81
|
+
) as bar:
|
|
82
|
+
for data in response.iter_content(block_size):
|
|
83
|
+
size = f.write(data)
|
|
84
|
+
bar.update(size)
|
|
85
|
+
|
|
86
|
+
print("Download complete.")
|
|
87
|
+
return target_path
|
|
88
|
+
except Exception as e:
|
|
89
|
+
print(f"Failed to download: {e}")
|
|
90
|
+
if target_path.exists():
|
|
91
|
+
target_path.unlink() # Clean up partial file
|
|
92
|
+
raise
|
|
93
|
+
|
|
94
|
+
def load_csv(key_or_url: str, local: bool = False, **kwargs) -> pd.DataFrame:
|
|
95
|
+
"""
|
|
96
|
+
Load a CSV file.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
key_or_url: Config key ('iris') OR direct URL.
|
|
100
|
+
local: If True, ensures file is in local 'data/' folder.
|
|
101
|
+
"""
|
|
102
|
+
# 1. Check if it's a config key
|
|
103
|
+
urls_config = get_config("data_urls", {})
|
|
104
|
+
url = urls_config.get(key_or_url, key_or_url)
|
|
105
|
+
|
|
106
|
+
# 2. Check if it's a URL
|
|
107
|
+
if url.startswith("http"):
|
|
108
|
+
path = download_file(url, local=local)
|
|
109
|
+
return pd.read_csv(path, **kwargs)
|
|
110
|
+
|
|
111
|
+
# 3. Assume local path (pass through)
|
|
112
|
+
return pd.read_csv(url, **kwargs)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from {{ package_name }} import hello
|
|
2
|
+
{% if has_config %}
|
|
3
|
+
from {{ package_name }} import SETTINGS
|
|
4
|
+
{% endif %}
|
|
5
|
+
|
|
6
|
+
def main():
|
|
7
|
+
hello()
|
|
8
|
+
{% if has_config %}
|
|
9
|
+
print(f"Project config loaded: {SETTINGS['project_name']}")
|
|
10
|
+
{% endif %}
|
|
11
|
+
|
|
12
|
+
if __name__ == "__main__":
|
|
13
|
+
main()
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "{{ project_name }}"
|
|
3
|
+
version = "{{ version }}"
|
|
4
|
+
description = "{{ description }}"
|
|
5
|
+
{% if use_readme %}
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
{% endif %}
|
|
8
|
+
requires-python = ">={{ python_version }}"
|
|
9
|
+
authors = [
|
|
10
|
+
{ name = "{{ author_name }}", email = "{{ author_email }}" }
|
|
11
|
+
]
|
|
12
|
+
license = { text = "{{ license }}" }
|
|
13
|
+
dependencies = [
|
|
14
|
+
"pyyaml>=6.0",
|
|
15
|
+
"python-dotenv>=1.0.0",
|
|
16
|
+
"kagglehub>=0.2.0",
|
|
17
|
+
{% if project_type == 'ml' %}
|
|
18
|
+
"numpy>=1.24.0",
|
|
19
|
+
"pandas>=2.0.0",
|
|
20
|
+
"scikit-learn>=1.3.0",
|
|
21
|
+
"matplotlib>=3.7.0",
|
|
22
|
+
"seaborn>=0.12.0",
|
|
23
|
+
"requests>=2.30.0",
|
|
24
|
+
"tqdm>=4.65.0",
|
|
25
|
+
{% elif project_type == 'dl' %}
|
|
26
|
+
{% if framework == 'pytorch' %}
|
|
27
|
+
"torch>=2.0.0",
|
|
28
|
+
"torchvision>=0.15.0",
|
|
29
|
+
{% elif framework == 'tensorflow' %}
|
|
30
|
+
"tensorflow>=2.13.0",
|
|
31
|
+
# "keras>=3.0.0", # Optional, included in tf usually
|
|
32
|
+
{% endif %}
|
|
33
|
+
"numpy>=1.24.0",
|
|
34
|
+
"pandas>=2.0.0",
|
|
35
|
+
"matplotlib>=3.7.0",
|
|
36
|
+
"seaborn>=0.12.0",
|
|
37
|
+
"requests>=2.30.0",
|
|
38
|
+
"tqdm>=4.65.0",
|
|
39
|
+
{% endif %}
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[build-system]
|
|
43
|
+
{% if builder == 'hatch' %}
|
|
44
|
+
requires = ["hatchling"]
|
|
45
|
+
build-backend = "hatchling.build"
|
|
46
|
+
{% elif builder == 'flit' %}
|
|
47
|
+
requires = ["flit_core>=3.2,<4"]
|
|
48
|
+
build-backend = "flit_core.buildapi"
|
|
49
|
+
{% else %}
|
|
50
|
+
# Default to Setuptools (Standard/Robust) to avoid Hatchling unless requested
|
|
51
|
+
requires = ["setuptools>=61.0"]
|
|
52
|
+
build-backend = "setuptools.build_meta"
|
|
53
|
+
{% endif %}
|
|
54
|
+
|
|
55
|
+
{% if use_uv %}
|
|
56
|
+
[tool.uv]
|
|
57
|
+
managed = true
|
|
58
|
+
{% endif %}
|
|
59
|
+
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# 🐍 ViperX Project Configuration (v0.9.3)
|
|
2
|
+
# =============================================================================
|
|
3
|
+
# "Source of Truth" for your project infrastructure.
|
|
4
|
+
# Run `viperx init -c viperx.yaml` to apply changes.
|
|
5
|
+
# =============================================================================
|
|
6
|
+
|
|
7
|
+
project:
|
|
8
|
+
# [Required] Root project name
|
|
9
|
+
name: "my-project"
|
|
10
|
+
|
|
11
|
+
# [Optional] Defaults (Inferred from git/system if omitted)
|
|
12
|
+
# description: "My robust project"
|
|
13
|
+
# author: "KpihX"
|
|
14
|
+
# license: "MIT"
|
|
15
|
+
# builder: "uv"
|
|
16
|
+
|
|
17
|
+
settings:
|
|
18
|
+
# [Optional] Project Defaults (Applied to root & workspace members)
|
|
19
|
+
|
|
20
|
+
# Generate isolated .env files in src/<pkg>/? (Default: false)
|
|
21
|
+
use_env: false
|
|
22
|
+
|
|
23
|
+
# Generate config.py embedded loader? (Default: true)
|
|
24
|
+
use_config: true
|
|
25
|
+
|
|
26
|
+
# Generate tests/ directory? (Default: true)
|
|
27
|
+
use_tests: true
|
|
28
|
+
|
|
29
|
+
# Project Type: classic | ml | dl (Default: classic, affects ROOT only)
|
|
30
|
+
type: "classic"
|
|
31
|
+
|
|
32
|
+
# DL Framework: pytorch | tensorflow (Only if type is dl)
|
|
33
|
+
# framework: "pytorch"
|
|
34
|
+
|
|
35
|
+
workspace:
|
|
36
|
+
# Define workspace members (Monorepo / Utility Packages).
|
|
37
|
+
# These are minimal packages created at the workspace root (Flat Layout).
|
|
38
|
+
packages:
|
|
39
|
+
# --- Example 1: Preprocess Package ---
|
|
40
|
+
# - name: "preprocess"
|
|
41
|
+
# description: "reprocessing utilities"
|
|
42
|
+
# use_env: false
|
|
43
|
+
# use_config: true
|
|
44
|
+
# use_tests: true
|
|
45
|
+
|