squirrels 0.4.0__tar.gz → 0.5.0rc0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of squirrels might be problematic. Click here for more details.

Files changed (126) hide show
  1. squirrels-0.5.0rc0/.cursorignore +2 -0
  2. squirrels-0.5.0rc0/.github/workflows/python-publish.yml +45 -0
  3. squirrels-0.5.0rc0/.gitignore +81 -0
  4. {squirrels-0.4.0 → squirrels-0.5.0rc0}/LICENSE +1 -1
  5. {squirrels-0.4.0 → squirrels-0.5.0rc0}/PKG-INFO +31 -32
  6. {squirrels-0.4.0 → squirrels-0.5.0rc0}/README.md +1 -1
  7. squirrels-0.5.0rc0/database_elt/expenses/.gitignore +1 -0
  8. squirrels-0.5.0rc0/database_elt/expenses/create-expenses.py +90 -0
  9. squirrels-0.5.0rc0/database_elt/expenses/create-lookups.py +55 -0
  10. squirrels-0.5.0rc0/database_elt/seattle_weather/create_db.py +13 -0
  11. squirrels-0.5.0rc0/database_elt/seattle_weather/seattle-weather.csv +1462 -0
  12. squirrels-0.5.0rc0/pyproject.toml +69 -0
  13. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/__init__.py +10 -6
  14. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/_api_response_models.py +93 -44
  15. squirrels-0.5.0rc0/squirrels/_api_server.py +904 -0
  16. squirrels-0.5.0rc0/squirrels/_auth.py +451 -0
  17. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/_command_line.py +61 -20
  18. squirrels-0.5.0rc0/squirrels/_connection_set.py +93 -0
  19. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/_constants.py +44 -34
  20. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/_dashboards_io.py +34 -16
  21. squirrels-0.5.0rc0/squirrels/_exceptions.py +57 -0
  22. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/_initializer.py +117 -44
  23. squirrels-0.5.0rc0/squirrels/_manifest.py +279 -0
  24. squirrels-0.5.0rc0/squirrels/_model_builder.py +111 -0
  25. squirrels-0.5.0rc0/squirrels/_model_configs.py +74 -0
  26. squirrels-0.5.0rc0/squirrels/_model_queries.py +52 -0
  27. squirrels-0.5.0rc0/squirrels/_models.py +1054 -0
  28. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/_package_loader.py +8 -4
  29. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/_parameter_configs.py +45 -65
  30. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/_parameter_sets.py +15 -13
  31. squirrels-0.5.0rc0/squirrels/_project.py +561 -0
  32. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/_py_module.py +4 -3
  33. squirrels-0.5.0rc0/squirrels/_seeds.py +58 -0
  34. squirrels-0.5.0rc0/squirrels/_sources.py +106 -0
  35. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/_utils.py +166 -63
  36. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/_version.py +1 -1
  37. squirrels-0.5.0rc0/squirrels/arguments/init_time_args.py +103 -0
  38. squirrels-0.5.0rc0/squirrels/arguments/run_time_args.py +169 -0
  39. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/dashboards.py +4 -4
  40. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/data_sources.py +94 -162
  41. squirrels-0.5.0rc0/squirrels/dataset_result.py +86 -0
  42. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/dateutils.py +4 -4
  43. squirrels-0.5.0rc0/squirrels/package_data/base_project/.env +30 -0
  44. squirrels-0.5.0rc0/squirrels/package_data/base_project/.env.example +30 -0
  45. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/package_data/base_project/.gitignore +3 -2
  46. squirrels-0.5.0rc0/squirrels/package_data/base_project/assets/expenses.db +0 -0
  47. squirrels-0.5.0rc0/squirrels/package_data/base_project/connections.yml +15 -0
  48. squirrels-0.5.0rc0/squirrels/package_data/base_project/dashboards/dashboard_example.py +34 -0
  49. squirrels-0.5.0rc0/squirrels/package_data/base_project/dashboards/dashboard_example.yml +22 -0
  50. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/package_data/base_project/docker/.dockerignore +5 -2
  51. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/package_data/base_project/docker/Dockerfile +3 -3
  52. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/package_data/base_project/docker/compose.yml +1 -1
  53. squirrels-0.5.0rc0/squirrels/package_data/base_project/duckdb_init.sql +9 -0
  54. squirrels-0.5.0rc0/squirrels/package_data/base_project/macros/macros_example.sql +15 -0
  55. squirrels-0.5.0rc0/squirrels/package_data/base_project/models/builds/build_example.py +26 -0
  56. squirrels-0.5.0rc0/squirrels/package_data/base_project/models/builds/build_example.sql +16 -0
  57. squirrels-0.5.0rc0/squirrels/package_data/base_project/models/builds/build_example.yml +55 -0
  58. squirrels-0.5.0rc0/squirrels/package_data/base_project/models/dbviews/dbview_example.sql +12 -0
  59. squirrels-0.5.0rc0/squirrels/package_data/base_project/models/dbviews/dbview_example.yml +26 -0
  60. squirrels-0.5.0rc0/squirrels/package_data/base_project/models/federates/federate_example.py +44 -0
  61. squirrels-0.5.0rc0/squirrels/package_data/base_project/models/federates/federate_example.sql +17 -0
  62. squirrels-0.5.0rc0/squirrels/package_data/base_project/models/federates/federate_example.yml +65 -0
  63. squirrels-0.5.0rc0/squirrels/package_data/base_project/models/sources.yml +39 -0
  64. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/package_data/base_project/parameters.yml +36 -21
  65. squirrels-0.5.0rc0/squirrels/package_data/base_project/pyconfigs/connections.py +14 -0
  66. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/package_data/base_project/pyconfigs/context.py +20 -33
  67. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/package_data/base_project/pyconfigs/parameters.py +19 -21
  68. squirrels-0.5.0rc0/squirrels/package_data/base_project/pyconfigs/user.py +23 -0
  69. squirrels-0.5.0rc0/squirrels/package_data/base_project/seeds/seed_categories.yml +15 -0
  70. squirrels-0.5.0rc0/squirrels/package_data/base_project/seeds/seed_subcategories.csv +15 -0
  71. squirrels-0.5.0rc0/squirrels/package_data/base_project/seeds/seed_subcategories.yml +21 -0
  72. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/package_data/base_project/squirrels.yml.j2 +17 -40
  73. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/parameters.py +20 -20
  74. squirrels-0.5.0rc0/tests/__init__.py +0 -0
  75. squirrels-0.5.0rc0/tests/_auth_test.py +166 -0
  76. squirrels-0.5.0rc0/tests/_connection_set_test.py +46 -0
  77. squirrels-0.5.0rc0/tests/_manifest_test.py +290 -0
  78. squirrels-0.5.0rc0/tests/_model_builder_test.py +168 -0
  79. squirrels-0.5.0rc0/tests/_model_configs_test.py +16 -0
  80. squirrels-0.5.0rc0/tests/_models_basic_test.py +152 -0
  81. squirrels-0.5.0rc0/tests/_models_test.py +167 -0
  82. squirrels-0.5.0rc0/tests/_seeds_test.py +124 -0
  83. squirrels-0.5.0rc0/tests/_sources_test.py +151 -0
  84. squirrels-0.5.0rc0/tests/_utils_test.py +22 -0
  85. squirrels-0.5.0rc0/tests/arguments/run_time_args_test.py +16 -0
  86. squirrels-0.5.0rc0/tests/conftest.py +19 -0
  87. squirrels-0.5.0rc0/tests/data_sources_test.py +206 -0
  88. squirrels-0.5.0rc0/tests/dateutils_test.py +132 -0
  89. squirrels-0.5.0rc0/tests/parameter_configs_tests/_parameter_configs_test.py +184 -0
  90. squirrels-0.5.0rc0/tests/parameter_configs_tests/_parameter_sets_test.py +237 -0
  91. squirrels-0.5.0rc0/tests/parameter_configs_tests/conftest.py +111 -0
  92. squirrels-0.5.0rc0/tests/parameter_options_test.py +122 -0
  93. squirrels-0.5.0rc0/tests/parameters_test.py +386 -0
  94. squirrels-0.5.0rc0/uv.lock +2055 -0
  95. squirrels-0.4.0/pyproject.toml +0 -54
  96. squirrels-0.4.0/squirrels/_api_server.py +0 -552
  97. squirrels-0.4.0/squirrels/_authenticator.py +0 -85
  98. squirrels-0.4.0/squirrels/_connection_set.py +0 -80
  99. squirrels-0.4.0/squirrels/_environcfg.py +0 -84
  100. squirrels-0.4.0/squirrels/_manifest.py +0 -217
  101. squirrels-0.4.0/squirrels/_models.py +0 -548
  102. squirrels-0.4.0/squirrels/_seeds.py +0 -39
  103. squirrels-0.4.0/squirrels/arguments/init_time_args.py +0 -40
  104. squirrels-0.4.0/squirrels/arguments/run_time_args.py +0 -208
  105. squirrels-0.4.0/squirrels/package_data/assets/favicon.ico +0 -0
  106. squirrels-0.4.0/squirrels/package_data/assets/index.css +0 -1
  107. squirrels-0.4.0/squirrels/package_data/assets/index.js +0 -58
  108. squirrels-0.4.0/squirrels/package_data/base_project/assets/expenses.db +0 -0
  109. squirrels-0.4.0/squirrels/package_data/base_project/connections.yml +0 -7
  110. squirrels-0.4.0/squirrels/package_data/base_project/dashboards/dashboard_example.py +0 -32
  111. squirrels-0.4.0/squirrels/package_data/base_project/dashboards.yml +0 -10
  112. squirrels-0.4.0/squirrels/package_data/base_project/env.yml +0 -29
  113. squirrels-0.4.0/squirrels/package_data/base_project/models/dbviews/dbview_example.py +0 -47
  114. squirrels-0.4.0/squirrels/package_data/base_project/models/dbviews/dbview_example.sql +0 -22
  115. squirrels-0.4.0/squirrels/package_data/base_project/models/federates/federate_example.py +0 -21
  116. squirrels-0.4.0/squirrels/package_data/base_project/models/federates/federate_example.sql +0 -3
  117. squirrels-0.4.0/squirrels/package_data/base_project/pyconfigs/auth.py +0 -45
  118. squirrels-0.4.0/squirrels/package_data/base_project/pyconfigs/connections.py +0 -19
  119. squirrels-0.4.0/squirrels/package_data/base_project/seeds/seed_subcategories.csv +0 -15
  120. squirrels-0.4.0/squirrels/package_data/templates/index.html +0 -18
  121. squirrels-0.4.0/squirrels/project.py +0 -378
  122. squirrels-0.4.0/squirrels/user_base.py +0 -55
  123. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/package_data/base_project/assets/weather.db +0 -0
  124. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/package_data/base_project/seeds/seed_categories.csv +0 -0
  125. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/package_data/base_project/tmp/.gitignore +0 -0
  126. {squirrels-0.4.0 → squirrels-0.5.0rc0}/squirrels/parameter_options.py +0 -0
@@ -0,0 +1,2 @@
1
+ database_elt/
2
+ poetry.lock
@@ -0,0 +1,45 @@
1
+ name: Publish Python Package
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ jobs:
12
+ deploy:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v3
16
+
17
+ - name: Set up Python
18
+ uses: actions/setup-python@v4
19
+ with:
20
+ python-version: '3.x'
21
+
22
+ - name: Extract version from tag
23
+ id: get_version
24
+ run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT
25
+
26
+ - name: Update version in pyproject.toml
27
+ run: |
28
+ # Use sed to replace the version in pyproject.toml
29
+ sed -i "s/^version = \".*\"/version = \"${{ steps.get_version.outputs.VERSION }}\"/" pyproject.toml
30
+ echo "Updated version in pyproject.toml to ${{ steps.get_version.outputs.VERSION }}"
31
+
32
+ - name: Install build dependencies
33
+ run: |
34
+ python -m pip install --upgrade pip
35
+ pip install build twine
36
+
37
+ - name: Build package
38
+ run: python -m build
39
+
40
+ - name: Publish to PyPI
41
+ env:
42
+ TWINE_USERNAME: __token__
43
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
44
+ run: |
45
+ python -m twine upload --username $TWINE_USERNAME --password $TWINE_PASSWORD dist/*
@@ -0,0 +1,81 @@
1
+ sandbox*
2
+ playground*
3
+ site/
4
+ .venv/
5
+
6
+ # pyenv files
7
+ .python-version
8
+
9
+ # Pipenv files
10
+ Pipfile
11
+ Pipfile.lock
12
+
13
+ ### PythonVanilla ###
14
+ # Byte-compiled / optimized / DLL files
15
+ __pycache__/
16
+ *.py[cod]
17
+ *$py.class
18
+
19
+ # C extensions
20
+ *.so
21
+
22
+ # Distribution / packaging
23
+ .Python
24
+ build/
25
+ develop-eggs/
26
+ dist/
27
+ downloads/
28
+ eggs/
29
+ .eggs/
30
+ lib/
31
+ lib64/
32
+ parts/
33
+ sdist/
34
+ var/
35
+ wheels/
36
+ share/python-wheels/
37
+ *.egg-info/
38
+ .installed.cfg
39
+ *.egg
40
+ MANIFEST
41
+
42
+ # Installer logs
43
+ pip-log.txt
44
+ pip-delete-this-directory.txt
45
+
46
+ # Unit test / coverage reports
47
+ htmlcov/
48
+ .tox/
49
+ .nox/
50
+ .coverage
51
+ .coverage.*
52
+ .cache
53
+ nosetests.xml
54
+ coverage.xml
55
+ *.cover
56
+ *.py,cover
57
+ .hypothesis/
58
+ .pytest_cache/
59
+ cover/
60
+
61
+ # Translations
62
+ *.mo
63
+ *.pot
64
+
65
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
66
+ __pypackages__/
67
+
68
+
69
+ ### VisualStudioCode ###
70
+ .vscode
71
+
72
+ # Local History for Visual Studio Code
73
+ .history/
74
+
75
+ # Built Visual Studio Code Extensions
76
+ *.vsix
77
+
78
+ ### VisualStudioCode Patch ###
79
+ # Ignore all local history of files
80
+ .history
81
+ .ionide
@@ -186,7 +186,7 @@
186
186
  same "printed page" as the copyright notice for easier
187
187
  identification within third-party archives.
188
188
 
189
- Copyright 2024 Tim Huang
189
+ Copyright 2025 Tim Huang
190
190
 
191
191
  Licensed under the Apache License, Version 2.0 (the "License");
192
192
  you may not use this file except in compliance with the License.
@@ -1,39 +1,39 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: squirrels
3
- Version: 0.4.0
3
+ Version: 0.5.0rc0
4
4
  Summary: Squirrels - API Framework for Data Analytics
5
- Home-page: https://squirrels-analytics.github.io
6
- License: Apache-2.0
7
- Author: Tim Huang
8
- Author-email: tim.yuting@hotmail.com
9
- Requires-Python: >=3.10,<4.0
5
+ Project-URL: Homepage, https://squirrels-analytics.github.io
6
+ Project-URL: Repository, https://github.com/squirrels-analytics/squirrels
7
+ Project-URL: Documentation, https://squirrels-analytics.github.io
8
+ Author-email: Tim Huang <tim.yuting@hotmail.com>
9
+ License-Expression: Apache-2.0
10
+ License-File: LICENSE
10
11
  Classifier: Intended Audience :: Developers
11
- Classifier: License :: OSI Approved :: Apache Software License
12
- Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.10
14
- Classifier: Programming Language :: Python :: 3.11
15
- Classifier: Programming Language :: Python :: 3.12
16
12
  Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
17
13
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
14
  Classifier: Typing :: Typed
19
- Provides-Extra: duckdb
20
- Requires-Dist: cachetools (>=5.3.2,<6.0.0)
21
- Requires-Dist: duckdb-engine (>=0.13.0,<1.0.0) ; extra == "duckdb"
22
- Requires-Dist: fastapi (>=0.112.1,<0.113.0)
23
- Requires-Dist: gitpython (>=3.1.41,<4.0.0)
24
- Requires-Dist: inquirer (>=3.2.1,<4.0.0)
25
- Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
26
- Requires-Dist: matplotlib (>=3.8.3,<4.0.0)
27
- Requires-Dist: networkx (>=3.2.1,<4.0.0)
28
- Requires-Dist: pandas (>=2.1.4,<3.0.0)
29
- Requires-Dist: pydantic (>=2.8.2,<3.0.0)
30
- Requires-Dist: pyjwt (>=2.8.0,<3.0.0)
31
- Requires-Dist: python-multipart (>=0.0.9,<0.0.10)
32
- Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
33
- Requires-Dist: sqlalchemy (>=2.0.25,<3.0.0)
34
- Requires-Dist: uvicorn (>=0.30.6,<0.31.0)
35
- Project-URL: Documentation, https://squirrels-analytics.github.io
36
- Project-URL: Repository, https://github.com/squirrels-analytics/squirrels
15
+ Requires-Python: ~=3.10
16
+ Requires-Dist: bcrypt<5,>=4.0.1
17
+ Requires-Dist: cachetools<6,>=5.3.2
18
+ Requires-Dist: duckdb<2,>=1.1.3
19
+ Requires-Dist: fastapi<1,>=0.112.1
20
+ Requires-Dist: gitpython<4,>=3.1.41
21
+ Requires-Dist: inquirer<4,>=3.2.1
22
+ Requires-Dist: jinja2<4,>=3.1.3
23
+ Requires-Dist: matplotlib<4,>=3.8.3
24
+ Requires-Dist: networkx<4,>=3.2.1
25
+ Requires-Dist: pandas<3,>=2.1.4
26
+ Requires-Dist: passlib<2,>=1.7.4
27
+ Requires-Dist: polars<2,>=1.14.0
28
+ Requires-Dist: pyarrow<19,>=18.0.0
29
+ Requires-Dist: pydantic<3,>=2.8.2
30
+ Requires-Dist: pyjwt<3,>=2.8.0
31
+ Requires-Dist: python-dotenv<2,>=1.0.1
32
+ Requires-Dist: python-multipart<1,>=0.0.9
33
+ Requires-Dist: pyyaml<7,>=6.0.1
34
+ Requires-Dist: sqlalchemy<3,>=2.0.25
35
+ Requires-Dist: sqlglot<26,>=25.32.1
36
+ Requires-Dist: uvicorn<1,>=0.30.6
37
37
  Description-Content-Type: text/markdown
38
38
 
39
39
  # Squirrels
@@ -60,7 +60,7 @@ Here are a few of the things that squirrels can do:
60
60
  - Connect to any database by specifying its SQLAlchemy url (in `squirrels.yml`) or by using its native connector library in python (in `connections.py`).
61
61
  - Configure API routes for datasets (in `squirrels.yml`) without writing code.
62
62
  - Configure parameter widgets (types include single-select, multi-select, date, number, etc.) for your datasets (in `parameters.py`).
63
- - Use Jinja SQL templates (just like dbt!) or python functions (that return a pandas dataframe) to define dynamic query logic based on parameter selections.
63
+ - Use Jinja SQL templates (just like dbt!) or python functions (that return a Python dataframe such as polars or pandas) to define dynamic query logic based on parameter selections.
64
64
  - Query multiple databases and join the results together in a final view in one API endpoint/dataset!
65
65
  - Test your API endpoints with an interactive UI or by a command line that generates rendered sql queries and results (for a given set of parameter selections).
66
66
  - Define authentication logic (in `auth.py`) and authorize privacy scope per dataset (in `squirrels.yml`). The user's attributes can even be used in your query logic!
@@ -114,4 +114,3 @@ The library version is maintained in both the `pyproject.toml` and the `squirrel
114
114
  When a user initializes a squirrels project using `sqrl init`, the files are copied from the `squirrels/package_data/base_project` folder. The contents in the `database` subfolder were constructed from the scripts in the `database_elt` folder.
115
115
 
116
116
  For the Squirrels UI activated by `sqrl run`, the HTML, CSS, and Javascript files can be found in the `static` and `templates` subfolders of `squirrels/package_data`. The CSS and Javascript files are minified and built from the source files in this project: https://github.com/squirrels-analytics/squirrels-testing-ui.
117
-
@@ -22,7 +22,7 @@ Here are a few of the things that squirrels can do:
22
22
  - Connect to any database by specifying its SQLAlchemy url (in `squirrels.yml`) or by using its native connector library in python (in `connections.py`).
23
23
  - Configure API routes for datasets (in `squirrels.yml`) without writing code.
24
24
  - Configure parameter widgets (types include single-select, multi-select, date, number, etc.) for your datasets (in `parameters.py`).
25
- - Use Jinja SQL templates (just like dbt!) or python functions (that return a pandas dataframe) to define dynamic query logic based on parameter selections.
25
+ - Use Jinja SQL templates (just like dbt!) or python functions (that return a Python dataframe such as polars or pandas) to define dynamic query logic based on parameter selections.
26
26
  - Query multiple databases and join the results together in a final view in one API endpoint/dataset!
27
27
  - Test your API endpoints with an interactive UI or by a command line that generates rendered sql queries and results (for a given set of parameter selections).
28
28
  - Define authentication logic (in `auth.py`) and authorize privacy scope per dataset (in `squirrels.yml`). The user's attributes can even be used in your query logic!
@@ -0,0 +1 @@
1
+ expenses.db
@@ -0,0 +1,90 @@
1
+ import sqlite3, random, polars as pl, tqdm, numpy as np
2
+
3
+ def generate_expense_description(vendors=None, items=None, adjectives=None):
4
+ """
5
+ Generates a random expense description.
6
+
7
+ Args:
8
+ categories: A list of expense categories (e.g., ["Groceries", "Dining", "Travel", "Utilities"]).
9
+ vendors: A list of vendor names (e.g., ["Walmart", "Starbucks", "Amazon", "Gas Station"]).
10
+ items: A list of items purchased (e.g., ["Milk", "Coffee", "Laptop", "Gas"]).
11
+ adjectives: A list of adjectives to add variety(e.g., ["Monthly", "Quick", "Online", "Unexpected"]).
12
+
13
+ Returns:
14
+ A randomly generated expense description string.
15
+ """
16
+
17
+ if vendors is None:
18
+ vendors = ["Vendor A", "Vendor B", "Vendor C", "Vendor D", "Vendor E", "Online Store", "Local Shop", "Restaurant"]
19
+ if items is None:
20
+ items = ["Item 1", "Item 2", "Item 3", "Item 4", "Item 5", "Service", "Subscription", "Purchase", "Merchandise", "Goods"]
21
+ if adjectives is None:
22
+ adjectives = ["Daily", "Monthly", "Quick", "Online", "Unexpected", "Recurring", "Personal", "Business"]
23
+
24
+ description_parts = []
25
+
26
+ # Choose a category, vendor, and item
27
+ adjective = random.choice(adjectives)
28
+ vendor = random.choice(vendors)
29
+ item = random.choice(items)
30
+
31
+ # Build the description
32
+ description_parts.append(f"{adjective} {item} - {vendor}")
33
+
34
+ return " ".join(description_parts).strip()
35
+
36
+ # Define the number of transactions to generate
37
+ num_transactions = 1_000
38
+ batches = 100
39
+ batch_size = num_transactions // batches
40
+
41
+ # Generate the data
42
+ descriptions_df = pl.DataFrame({
43
+ 'description': [generate_expense_description() for _ in range(10**4)]
44
+ })
45
+
46
+ df_list: list[pl.DataFrame] = []
47
+ rng = np.random.default_rng()
48
+ for _ in tqdm.tqdm(range(batches)):
49
+ df_current = descriptions_df.sample(batch_size, with_replacement=True, shuffle=True)
50
+ df_current = df_current.with_columns(
51
+ pl.lit(rng.integers(
52
+ np.datetime64('2024-01-01').astype(int),
53
+ np.datetime64('2025-01-01').astype(int),
54
+ size=batch_size
55
+ ).astype('datetime64[D]')).alias('date'),
56
+ pl.lit(rng.integers(0, 14, size=batch_size)).alias('subcategory_id'),
57
+ pl.lit(rng.exponential(30, size=batch_size).round(2)).alias('amount'),
58
+ )
59
+ df_list.append(df_current)
60
+
61
+ df = pl.concat(df_list).select('date', 'subcategory_id', 'amount', 'description')
62
+ df = df.sort('date').with_row_index(name='id', offset=1)
63
+
64
+ # Connect to SQLite database
65
+ conn = sqlite3.connect('expenses.db')
66
+
67
+ try:
68
+ # Create the expenses table
69
+ conn.execute("DROP TABLE IF EXISTS expenses")
70
+ conn.execute('''
71
+ CREATE TABLE IF NOT EXISTS expenses (
72
+ id INTEGER PRIMARY KEY,
73
+ date DATE,
74
+ subcategory_id INTEGER,
75
+ amount DECIMAL(10,2),
76
+ description TEXT
77
+ )
78
+ ''')
79
+
80
+ # Convert DataFrame to records and insert into database
81
+ records = df.to_numpy().tolist()
82
+ conn.executemany(
83
+ 'INSERT INTO expenses (id, date, subcategory_id, amount, description) VALUES (?, ?, ?, ?, ?)',
84
+ records
85
+ )
86
+
87
+ # Commit changes and close connection
88
+ conn.commit()
89
+ finally:
90
+ conn.close()
@@ -0,0 +1,55 @@
1
+ import sqlite3
2
+ import csv
3
+ from io import StringIO
4
+
5
+ category_id_mapping = """
6
+ "category_id","subcategory_id"
7
+ 0,0
8
+ 0,1
9
+ 1,2
10
+ 2,3
11
+ 3,4
12
+ 1,5
13
+ 2,6
14
+ 1,7
15
+ 4,8
16
+ 4,9
17
+ 2,10
18
+ 3,11
19
+ 2,12
20
+ 4,13
21
+ """
22
+
23
+ # Connect to the SQLite database
24
+ conn = sqlite3.connect('expenses.db')
25
+ try:
26
+ cursor = conn.cursor()
27
+
28
+ # Create the category_mapping table
29
+ cursor.execute('''
30
+ DROP TABLE IF EXISTS category_mapping
31
+ ''')
32
+ cursor.execute('''
33
+ CREATE TABLE category_mapping (
34
+ category_id INTEGER,
35
+ subcategory_id INTEGER,
36
+ PRIMARY KEY (subcategory_id)
37
+ )
38
+ ''')
39
+
40
+ # Parse the CSV string and insert data
41
+ csv_file = StringIO(category_id_mapping.strip())
42
+ csv_reader = csv.DictReader(csv_file)
43
+
44
+ # Insert the data
45
+ for row in csv_reader:
46
+ cursor.execute('''
47
+ INSERT OR REPLACE INTO category_mapping (category_id, subcategory_id)
48
+ VALUES (?, ?)
49
+ ''', (int(row['category_id']), int(row['subcategory_id'])))
50
+
51
+ # Commit the changes and close the connection
52
+ conn.commit()
53
+
54
+ finally:
55
+ conn.close()
@@ -0,0 +1,13 @@
1
+ import pandas as pd
2
+ import sqlite3, os
3
+
4
+ os.chdir(os.path.dirname(__file__))
5
+
6
+ df = pd.read_csv('seattle-weather.csv')
7
+
8
+ # Connect to SQLite database
9
+ conn = sqlite3.connect('weather.db')
10
+ try:
11
+ df.to_sql('weather', conn, index=False, if_exists='replace')
12
+ finally:
13
+ conn.close()