squirrels 0.5.0rc0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of squirrels might be problematic. Click here for more details.
- dateutils/__init__.py +6 -0
- dateutils/_enums.py +25 -0
- squirrels/dateutils.py → dateutils/_implementation.py +58 -111
- dateutils/types.py +6 -0
- squirrels/__init__.py +10 -12
- squirrels/_api_routes/__init__.py +5 -0
- squirrels/_api_routes/auth.py +271 -0
- squirrels/_api_routes/base.py +171 -0
- squirrels/_api_routes/dashboards.py +158 -0
- squirrels/_api_routes/data_management.py +148 -0
- squirrels/_api_routes/datasets.py +265 -0
- squirrels/_api_routes/oauth2.py +298 -0
- squirrels/_api_routes/project.py +252 -0
- squirrels/_api_server.py +245 -781
- squirrels/_arguments/__init__.py +0 -0
- squirrels/{arguments → _arguments}/init_time_args.py +7 -2
- squirrels/{arguments → _arguments}/run_time_args.py +13 -35
- squirrels/_auth.py +720 -212
- squirrels/_command_line.py +81 -41
- squirrels/_compile_prompts.py +147 -0
- squirrels/_connection_set.py +16 -7
- squirrels/_constants.py +29 -9
- squirrels/{_dashboards_io.py → _dashboards.py} +87 -6
- squirrels/_data_sources.py +570 -0
- squirrels/{dataset_result.py → _dataset_types.py} +2 -4
- squirrels/_exceptions.py +9 -37
- squirrels/_initializer.py +83 -59
- squirrels/_logging.py +117 -0
- squirrels/_manifest.py +129 -62
- squirrels/_model_builder.py +10 -52
- squirrels/_model_configs.py +3 -3
- squirrels/_model_queries.py +1 -1
- squirrels/_models.py +249 -118
- squirrels/{package_data → _package_data}/base_project/.env +16 -4
- squirrels/{package_data → _package_data}/base_project/.env.example +15 -3
- squirrels/{package_data → _package_data}/base_project/connections.yml +4 -3
- squirrels/{package_data → _package_data}/base_project/dashboards/dashboard_example.py +4 -4
- squirrels/_package_data/base_project/dashboards/dashboard_example.yml +22 -0
- squirrels/{package_data → _package_data}/base_project/duckdb_init.sql +1 -0
- squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.py +2 -2
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.sql +1 -1
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.yml +2 -0
- squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +17 -0
- squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +32 -0
- squirrels/_package_data/base_project/models/federates/federate_example.py +48 -0
- squirrels/_package_data/base_project/models/federates/federate_example.sql +21 -0
- squirrels/{package_data → _package_data}/base_project/models/federates/federate_example.yml +7 -7
- squirrels/{package_data → _package_data}/base_project/models/sources.yml +5 -6
- squirrels/{package_data → _package_data}/base_project/parameters.yml +32 -45
- squirrels/_package_data/base_project/pyconfigs/connections.py +18 -0
- squirrels/{package_data → _package_data}/base_project/pyconfigs/context.py +31 -22
- squirrels/_package_data/base_project/pyconfigs/parameters.py +141 -0
- squirrels/_package_data/base_project/pyconfigs/user.py +44 -0
- squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.yml +1 -1
- squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.yml +1 -1
- squirrels/_package_data/base_project/squirrels.yml.j2 +61 -0
- squirrels/_package_data/templates/dataset_results.html +112 -0
- squirrels/_package_data/templates/oauth_login.html +271 -0
- squirrels/_package_data/templates/squirrels_studio.html +20 -0
- squirrels/_parameter_configs.py +76 -55
- squirrels/_parameter_options.py +348 -0
- squirrels/_parameter_sets.py +53 -45
- squirrels/_parameters.py +1664 -0
- squirrels/_project.py +403 -242
- squirrels/_py_module.py +3 -2
- squirrels/_request_context.py +33 -0
- squirrels/_schemas/__init__.py +0 -0
- squirrels/_schemas/auth_models.py +167 -0
- squirrels/_schemas/query_param_models.py +75 -0
- squirrels/{_api_response_models.py → _schemas/response_models.py} +48 -18
- squirrels/_seeds.py +1 -1
- squirrels/_sources.py +23 -19
- squirrels/_utils.py +121 -39
- squirrels/_version.py +1 -1
- squirrels/arguments.py +7 -0
- squirrels/auth.py +4 -0
- squirrels/connections.py +3 -0
- squirrels/dashboards.py +2 -81
- squirrels/data_sources.py +14 -563
- squirrels/parameter_options.py +13 -348
- squirrels/parameters.py +14 -1266
- squirrels/types.py +16 -0
- {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/METADATA +42 -30
- squirrels-0.5.1.dist-info/RECORD +98 -0
- squirrels/package_data/base_project/dashboards/dashboard_example.yml +0 -22
- squirrels/package_data/base_project/macros/macros_example.sql +0 -15
- squirrels/package_data/base_project/models/dbviews/dbview_example.sql +0 -12
- squirrels/package_data/base_project/models/dbviews/dbview_example.yml +0 -26
- squirrels/package_data/base_project/models/federates/federate_example.py +0 -44
- squirrels/package_data/base_project/models/federates/federate_example.sql +0 -17
- squirrels/package_data/base_project/pyconfigs/connections.py +0 -14
- squirrels/package_data/base_project/pyconfigs/parameters.py +0 -93
- squirrels/package_data/base_project/pyconfigs/user.py +0 -23
- squirrels/package_data/base_project/squirrels.yml.j2 +0 -71
- squirrels-0.5.0rc0.dist-info/RECORD +0 -70
- /squirrels/{package_data → _package_data}/base_project/assets/expenses.db +0 -0
- /squirrels/{package_data → _package_data}/base_project/assets/weather.db +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/.dockerignore +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/Dockerfile +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/compose.yml +0 -0
- /squirrels/{package_data/base_project/.gitignore → _package_data/base_project/gitignore} +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.csv +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.csv +0 -0
- /squirrels/{package_data → _package_data}/base_project/tmp/.gitignore +0 -0
- {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/WHEEL +0 -0
- {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/entry_points.txt +0 -0
- {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,23 +8,35 @@ SQRL_SECRET__ADMIN_PASSWORD="{{ random_admin_password }}"
|
|
|
8
8
|
|
|
9
9
|
# Optional variables used by the Squirrels framework that are safe to include in version control if desired
|
|
10
10
|
# (default values are shown below)
|
|
11
|
-
SQRL_AUTH__DB_FILE_PATH="target/auth.sqlite"
|
|
11
|
+
SQRL_AUTH__DB_FILE_PATH="{project_path}/target/auth.sqlite"
|
|
12
12
|
SQRL_AUTH__TOKEN_EXPIRE_MINUTES="30"
|
|
13
|
+
SQRL_AUTH__ALLOWED_ORIGINS_FOR_COOKIES="https://squirrels-analytics.github.io"
|
|
13
14
|
|
|
14
15
|
SQRL_PARAMETERS__CACHE_SIZE="1024"
|
|
15
16
|
SQRL_PARAMETERS__CACHE_TTL_MINUTES="60"
|
|
17
|
+
SQRL_PARAMETERS__DATASOURCE_REFRESH_MINUTES="60"
|
|
16
18
|
|
|
17
19
|
SQRL_DATASETS__CACHE_SIZE="128"
|
|
18
20
|
SQRL_DATASETS__CACHE_TTL_MINUTES="60"
|
|
21
|
+
SQRL_DATASETS__MAX_ROWS_FOR_AI="100"
|
|
19
22
|
|
|
20
23
|
SQRL_DASHBOARDS__CACHE_SIZE="128"
|
|
21
24
|
SQRL_DASHBOARDS__CACHE_TTL_MINUTES="60"
|
|
22
25
|
|
|
26
|
+
SQRL_PERMISSIONS__ELEVATED_ACCESS_LEVEL="admin" # one of "admin", "member", "guest"
|
|
27
|
+
|
|
23
28
|
SQRL_SEEDS__INFER_SCHEMA="true"
|
|
24
29
|
SQRL_SEEDS__NA_VALUES=["NA"] # must be a JSON list
|
|
25
30
|
|
|
26
|
-
SQRL_TEST_SETS__DEFAULT_NAME_USED="default"
|
|
27
|
-
|
|
28
31
|
SQRL_CONNECTIONS__DEFAULT_NAME_USED="default"
|
|
29
32
|
|
|
30
|
-
|
|
33
|
+
SQRL_VDL__CATALOG_DB_PATH="ducklake:{project_path}/target/vdl_catalog.duckdb"
|
|
34
|
+
SQRL_VDL__DATA_PATH="{project_path}/target/vdl_data/"
|
|
35
|
+
|
|
36
|
+
SQRL_STUDIO__BASE_URL="https://squirrels-analytics.github.io/squirrels-studio-v1"
|
|
37
|
+
|
|
38
|
+
SQRL_LOGGING__LOG_LEVEL="INFO" # one of "DEBUG", "INFO", "WARNING"
|
|
39
|
+
SQRL_LOGGING__LOG_FORMAT="text"
|
|
40
|
+
SQRL_LOGGING__LOG_TO_FILE="false"
|
|
41
|
+
SQRL_LOGGING__LOG_FILE_SIZE_MB="50"
|
|
42
|
+
SQRL_LOGGING__LOG_FILE_BACKUP_COUNT="1"
|
|
@@ -10,21 +10,33 @@ SQRL_SECRET__ADMIN_PASSWORD=""
|
|
|
10
10
|
# (default values are shown below)
|
|
11
11
|
SQRL_AUTH__DB_FILE_PATH="target/auth.sqlite"
|
|
12
12
|
SQRL_AUTH__TOKEN_EXPIRE_MINUTES="30"
|
|
13
|
+
SQRL_AUTH__ALLOWED_ORIGINS_FOR_COOKIES="https://squirrels-analytics.github.io"
|
|
13
14
|
|
|
14
15
|
SQRL_PARAMETERS__CACHE_SIZE="1024"
|
|
15
16
|
SQRL_PARAMETERS__CACHE_TTL_MINUTES="60"
|
|
17
|
+
SQRL_PARAMETERS__DATASOURCE_REFRESH_MINUTES="60"
|
|
16
18
|
|
|
17
19
|
SQRL_DATASETS__CACHE_SIZE="128"
|
|
18
20
|
SQRL_DATASETS__CACHE_TTL_MINUTES="60"
|
|
21
|
+
SQRL_DATASETS__MAX_ROWS_FOR_AI="100"
|
|
19
22
|
|
|
20
23
|
SQRL_DASHBOARDS__CACHE_SIZE="128"
|
|
21
24
|
SQRL_DASHBOARDS__CACHE_TTL_MINUTES="60"
|
|
22
25
|
|
|
26
|
+
SQRL_PERMISSIONS__ELEVATED_ACCESS_LEVEL="admin" # one of "admin", "member", "guest"
|
|
27
|
+
|
|
23
28
|
SQRL_SEEDS__INFER_SCHEMA="true"
|
|
24
29
|
SQRL_SEEDS__NA_VALUES=["NA"] # must be a JSON list
|
|
25
30
|
|
|
26
|
-
SQRL_TEST_SETS__DEFAULT_NAME_USED="default"
|
|
27
|
-
|
|
28
31
|
SQRL_CONNECTIONS__DEFAULT_NAME_USED="default"
|
|
29
32
|
|
|
30
|
-
|
|
33
|
+
SQRL_VDL__CATALOG_DB_PATH="ducklake:{project_path}/target/vdl_catalog.duckdb"
|
|
34
|
+
SQRL_VDL__DATA_PATH="{project_path}/target/vdl_data/"
|
|
35
|
+
|
|
36
|
+
SQRL_STUDIO__BASE_URL="https://squirrels-analytics.github.io/squirrels-studio-v1"
|
|
37
|
+
|
|
38
|
+
SQRL_LOGGING__LOG_LEVEL="INFO" # one of "DEBUG", "INFO", "WARNING"
|
|
39
|
+
SQRL_LOGGING__LOG_FORMAT="text"
|
|
40
|
+
SQRL_LOGGING__LOG_TO_FILE="false"
|
|
41
|
+
SQRL_LOGGING__LOG_FILE_SIZE_MB="50"
|
|
42
|
+
SQRL_LOGGING__LOG_FILE_BACKUP_COUNT="1"
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
## Connection URIs are usually in format "dialect://username:password@host:port/database" for database connections
|
|
2
2
|
## However, subtle differences exist depending on the "type" specified. For example, sqlite URIs are slightly different.
|
|
3
|
-
## sqlalchemy: sqlite:///relative/path/to/database.db
|
|
4
|
-
## connectorx/adbc: sqlite://relative/path/to/database.db (adbc URI format matches connectorx
|
|
3
|
+
## sqlalchemy: sqlite:///{project_path}/relative/path/to/database.db
|
|
4
|
+
## connectorx/adbc: sqlite://{project_path}/relative/path/to/database.db (adbc URI format matches connectorx)
|
|
5
|
+
## duckdb: sqlite:{project_path}/relative/path/to/database.db
|
|
5
6
|
## Refer to specific documentation for supported databases by type (with URI examples):
|
|
6
7
|
## sqlalchemy: https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls
|
|
7
8
|
## connectorx: https://sfu-db.github.io/connector-x/databases.html
|
|
@@ -9,7 +10,7 @@
|
|
|
9
10
|
connections:
|
|
10
11
|
- name: default
|
|
11
12
|
label: SQLite Expenses Database
|
|
12
|
-
type: sqlalchemy ## one of: sqlalchemy, connectorx, or
|
|
13
|
+
type: sqlalchemy ## one of: sqlalchemy, connectorx, adbc, or duckdb
|
|
13
14
|
uri: {{ env_vars.SQLITE_URI }} ## using Jinja to substitute environment variables
|
|
14
15
|
|
|
15
16
|
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from squirrels import
|
|
1
|
+
from squirrels import arguments as args, dashboards as d
|
|
2
2
|
from matplotlib import pyplot as plt, figure as f, axes as a
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
async def main(sqrl: DashboardArgs) -> d.PngDashboard:
|
|
6
|
-
spending_by_month_df = await sqrl.dataset("
|
|
7
|
-
spending_by_subcategory_df = await sqrl.dataset("
|
|
5
|
+
async def main(sqrl: args.DashboardArgs) -> d.PngDashboard:
|
|
6
|
+
spending_by_month_df = await sqrl.dataset("grouped_expenses", fixed_parameters={"group_by": "month"})
|
|
7
|
+
spending_by_subcategory_df = await sqrl.dataset("grouped_expenses", fixed_parameters={"group_by": "subcat"})
|
|
8
8
|
|
|
9
9
|
# Create a figure with two subplots
|
|
10
10
|
fig, (ax0, ax1) = plt.subplots(2, 1, figsize=(8, 8), height_ratios=(1, 2))
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
label: Expense Dashboard
|
|
2
|
+
|
|
3
|
+
description: This is a dashboard showing the total expense amounts by month as a line chart and by subcategory as a pie chart
|
|
4
|
+
|
|
5
|
+
scope: protected
|
|
6
|
+
|
|
7
|
+
format: png
|
|
8
|
+
|
|
9
|
+
parameters:
|
|
10
|
+
- date_range
|
|
11
|
+
- category
|
|
12
|
+
|
|
13
|
+
depends_on:
|
|
14
|
+
- name: dataset_example_month
|
|
15
|
+
dataset: grouped_expenses
|
|
16
|
+
fixed_parameters:
|
|
17
|
+
- group_by: month (Month)
|
|
18
|
+
|
|
19
|
+
- name: dataset_example_subcategory
|
|
20
|
+
dataset: grouped_expenses
|
|
21
|
+
fixed_parameters:
|
|
22
|
+
- group_by: subcat (Subcategory)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{%- macro date_and_amount_filters(use_from_range) -%}
|
|
2
|
+
{%- if use_from_range -%}
|
|
3
|
+
|
|
4
|
+
date >= {{ ctx.start_date_from_range | quote }}
|
|
5
|
+
AND date <= {{ ctx.end_date_from_range | quote }}
|
|
6
|
+
AND amount >= {{ ctx.min_amount_from_range }}
|
|
7
|
+
AND amount <= {{ ctx.max_amount_from_range }}
|
|
8
|
+
|
|
9
|
+
{%- else -%}
|
|
10
|
+
|
|
11
|
+
date >= {{ ctx.start_date | quote }}
|
|
12
|
+
AND date <= {{ ctx.end_date | quote }}
|
|
13
|
+
AND amount >= {{ ctx.min_amount }}
|
|
14
|
+
AND amount <= {{ ctx.max_amount }}
|
|
15
|
+
|
|
16
|
+
{%- endif -%}
|
|
17
|
+
{%- endmacro -%}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from squirrels import
|
|
1
|
+
from squirrels import arguments as args
|
|
2
2
|
import polars as pl, pandas as pd
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
def main(sqrl: BuildModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
|
|
5
|
+
def main(sqrl: args.BuildModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
|
|
6
6
|
"""
|
|
7
7
|
Create a build model by joining/processing sources or other build models to form a new
|
|
8
8
|
Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
description: |
|
|
2
2
|
This is an example of a build model. It adds a new column called "month" to the source table "src_transactions".
|
|
3
3
|
|
|
4
|
+
materialization: TABLE # optional - defaults to "VIEW" for SQL models, ignored and always a "TABLE" for Python models
|
|
5
|
+
|
|
4
6
|
depends_on: # optional for SQL models - the "ref" macro also adds to this set
|
|
5
7
|
- src_transactions
|
|
6
8
|
- seed_categories
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{#- SQLite dialect (based on connection used) -#}
|
|
2
|
+
|
|
3
|
+
SELECT
|
|
4
|
+
date,
|
|
5
|
+
printf('%.2f', amount) as amount,
|
|
6
|
+
CASE
|
|
7
|
+
WHEN '{{ user.custom_fields.role }}' = 'manager' THEN description
|
|
8
|
+
ELSE '***MASKED***'
|
|
9
|
+
END as description
|
|
10
|
+
|
|
11
|
+
FROM {{ source("src_transactions") }}
|
|
12
|
+
|
|
13
|
+
WHERE {{ date_and_amount_filters(use_from_range=false) }}
|
|
14
|
+
|
|
15
|
+
GROUP BY 1
|
|
16
|
+
|
|
17
|
+
ORDER BY 1 DESC
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
description: |
|
|
2
|
+
This is an example of a database view model. It shows transaction details including date, amount, and description.
|
|
3
|
+
Description is masked for non-manager users.
|
|
4
|
+
|
|
5
|
+
Parameters are available to filter the date and amount of the transactions.
|
|
6
|
+
|
|
7
|
+
connection: default # optional - if not provided, will use default connection specified in the SQRL_CONNECTIONS__DEFAULT_NAME_USED setting
|
|
8
|
+
|
|
9
|
+
translate_to_duckdb: false # optional - default is false - if true, then the model will be translated to duckdb for supported dialects (SQLite, PostgreSQL, MySQL)
|
|
10
|
+
|
|
11
|
+
depends_on: # optional - Squirrels is able to derive this from the "source" macro in the SQL file
|
|
12
|
+
- src_transactions
|
|
13
|
+
|
|
14
|
+
columns:
|
|
15
|
+
- name: date
|
|
16
|
+
depends_on:
|
|
17
|
+
- src_transactions.date
|
|
18
|
+
pass_through: true
|
|
19
|
+
|
|
20
|
+
- name: amount
|
|
21
|
+
type: float
|
|
22
|
+
description: The amount of the transaction, formatted to 2 decimal places
|
|
23
|
+
category: measure
|
|
24
|
+
depends_on:
|
|
25
|
+
- src_transactions.amount
|
|
26
|
+
|
|
27
|
+
- name: description
|
|
28
|
+
type: string
|
|
29
|
+
description: The description of the transaction (masked for non-manager users)
|
|
30
|
+
category: dimension
|
|
31
|
+
depends_on:
|
|
32
|
+
- src_transactions.description
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from squirrels import arguments as args
|
|
2
|
+
import polars as pl, pandas as pd
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def main(sqrl: args.ModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
|
|
6
|
+
"""
|
|
7
|
+
Create federated models by joining/processing dependent models (sources, seeds, builds, dbviews, other federates, etc.) to
|
|
8
|
+
form a new Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
|
|
9
|
+
"""
|
|
10
|
+
df = sqrl.ref("build_example")
|
|
11
|
+
|
|
12
|
+
df = df.filter(
|
|
13
|
+
(pl.col("date") >= sqrl.ctx["start_date_from_range"]) &
|
|
14
|
+
(pl.col("date") <= sqrl.ctx["end_date_from_range"]) &
|
|
15
|
+
(pl.col("amount") >= sqrl.ctx["min_amount_from_range"]) &
|
|
16
|
+
(pl.col("amount") <= sqrl.ctx["max_amount_from_range"])
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
if sqrl.ctx.get("has_categories"):
|
|
20
|
+
categories: list[str] = sqrl.ctx["categories"]
|
|
21
|
+
df = df.filter(pl.col("category_id").is_in(categories))
|
|
22
|
+
|
|
23
|
+
if sqrl.ctx.get("has_subcategories"):
|
|
24
|
+
subcategories: list[str] = sqrl.ctx["subcategories"]
|
|
25
|
+
df = df.filter(pl.col("subcategory_id").is_in(subcategories))
|
|
26
|
+
|
|
27
|
+
df = df.rename(sqrl.ctx.get("column_to_alias_mapping", {}))
|
|
28
|
+
|
|
29
|
+
dimension_cols: list[str] | None = sqrl.ctx.get("group_by_cols")
|
|
30
|
+
if dimension_cols is not None:
|
|
31
|
+
df = df.group_by(dimension_cols).agg(
|
|
32
|
+
pl.sum("amount").cast(pl.Decimal(precision=15, scale=2)).alias("total_amount")
|
|
33
|
+
)
|
|
34
|
+
else:
|
|
35
|
+
df = df.rename({"amount": "total_amount"})
|
|
36
|
+
|
|
37
|
+
order_by_cols: list[str] = sqrl.ctx.get("order_by_cols")
|
|
38
|
+
if order_by_cols is not None:
|
|
39
|
+
df = df.select(*order_by_cols, "total_amount").sort(order_by_cols, descending=True)
|
|
40
|
+
|
|
41
|
+
# Apply mask_column_function to description column if it exists
|
|
42
|
+
mask_column_func = sqrl.ctx.get("mask_column_function")
|
|
43
|
+
if "description" in order_by_cols and mask_column_func:
|
|
44
|
+
df = df.with_columns(
|
|
45
|
+
pl.col("description").map_elements(mask_column_func, return_dtype=pl.String).alias("description")
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
return df
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{#- DuckDB dialect -#}
|
|
2
|
+
|
|
3
|
+
SELECT {{ ctx.select_dim_cols | join }}
|
|
4
|
+
, CAST({{ ctx.aggregator }}(amount) AS DECIMAL(15, 2)) as total_amount
|
|
5
|
+
|
|
6
|
+
{# ref() can be used on a sources, seeds, builds, dbviews, or other federate models -#}
|
|
7
|
+
FROM {{ ref("build_example") }} AS a
|
|
8
|
+
|
|
9
|
+
WHERE {{ date_and_amount_filters(use_from_range=true) }}
|
|
10
|
+
{%- if ctx.has_categories %}
|
|
11
|
+
AND category_id IN ({{ ctx.categories | quote_and_join }})
|
|
12
|
+
{%- endif %}
|
|
13
|
+
{%- if ctx.has_subcategories %}
|
|
14
|
+
AND subcategory_id IN ({{ ctx.subcategories | quote_and_join }})
|
|
15
|
+
{%- endif %}
|
|
16
|
+
|
|
17
|
+
{%- if ctx.group_by_cols %}
|
|
18
|
+
GROUP BY {{ ctx.group_by_cols | join }}
|
|
19
|
+
{%- endif %}
|
|
20
|
+
|
|
21
|
+
ORDER BY {{ ctx.order_by_cols_desc | join }}
|
|
@@ -11,7 +11,7 @@ eager: false # optional - defaults to false. Only applies to SQL m
|
|
|
11
11
|
columns:
|
|
12
12
|
- name: date
|
|
13
13
|
type: string
|
|
14
|
-
condition: parameter 'group_by' (Group By) is '
|
|
14
|
+
condition: parameter 'group_by' (Group By) is 'trans' (Transaction)
|
|
15
15
|
description: The date of the transaction in 'YYYY-MM-DD' format, in descending order
|
|
16
16
|
category: dimension
|
|
17
17
|
depends_on:
|
|
@@ -19,15 +19,15 @@ columns:
|
|
|
19
19
|
|
|
20
20
|
- name: description
|
|
21
21
|
type: string
|
|
22
|
-
condition: parameter 'group_by' (Group By) is '
|
|
23
|
-
description: The description of the transaction
|
|
22
|
+
condition: parameter 'group_by' (Group By) is 'trans' (Transaction)
|
|
23
|
+
description: The description of the transaction (masked for non-manager users)
|
|
24
24
|
category: dimension
|
|
25
25
|
depends_on:
|
|
26
26
|
- build_example.description
|
|
27
27
|
|
|
28
28
|
- name: day
|
|
29
29
|
type: string
|
|
30
|
-
condition: parameter 'group_by' (Group By) is '
|
|
30
|
+
condition: parameter 'group_by' (Group By) is 'day' (Day)
|
|
31
31
|
description: The day for which the amount is aggregated by, in descending order
|
|
32
32
|
category: dimension
|
|
33
33
|
depends_on:
|
|
@@ -35,7 +35,7 @@ columns:
|
|
|
35
35
|
|
|
36
36
|
- name: month
|
|
37
37
|
type: string
|
|
38
|
-
condition: parameter 'group_by' (Group By) is '
|
|
38
|
+
condition: parameter 'group_by' (Group By) is 'month' (Month)
|
|
39
39
|
description: The month for which the amount is aggregated by, in descending order
|
|
40
40
|
category: dimension
|
|
41
41
|
depends_on:
|
|
@@ -43,7 +43,7 @@ columns:
|
|
|
43
43
|
|
|
44
44
|
- name: category
|
|
45
45
|
type: string
|
|
46
|
-
condition: parameter `group_by` (Group By) is `
|
|
46
|
+
condition: parameter `group_by` (Group By) is `trans` (Transaction), `cat` (Category), or `subcat` (Subcategory)
|
|
47
47
|
description: The category for which the amount is aggregated by
|
|
48
48
|
category: dimension
|
|
49
49
|
depends_on:
|
|
@@ -51,7 +51,7 @@ columns:
|
|
|
51
51
|
|
|
52
52
|
- name: subcategory
|
|
53
53
|
type: string
|
|
54
|
-
condition: parameter `group_by` (Group By) is `
|
|
54
|
+
condition: parameter `group_by` (Group By) is `trans` (Transaction) or `subcat` (Subcategory)
|
|
55
55
|
description: The subcategory for which the amount is aggregated by
|
|
56
56
|
category: dimension
|
|
57
57
|
depends_on:
|
|
@@ -3,16 +3,15 @@ sources:
|
|
|
3
3
|
description: "The source table for transactions" # optional
|
|
4
4
|
connection: default # optional - if not provided, will use the connection named "default" or the default connection specified in settings
|
|
5
5
|
table: expenses # optional - if not provided, will use the "name" field of the source
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
# For performance reasons, avoid specifying primary_key for large tables if upserts are not required
|
|
9
|
-
primary_key: [id] # optional - if not provided, then this is an insert-only table for incremental loads - otherwise, this uses upsert
|
|
6
|
+
load_to_vdl: true # optional - default is false - other than dbview models (with translate_to_duckdb set to false), other models can only reference this source if load_to_vdl is true or connection type is duckdb
|
|
10
7
|
|
|
11
8
|
update_hints:
|
|
12
9
|
increasing_column: date # optional - if not provided, will always do full refresh, otherwise uses this column for incremental loads
|
|
13
|
-
strictly_increasing: false # optional - default is true - if false, then maximum value of column is removed before incremental
|
|
10
|
+
strictly_increasing: false # optional - default is true - if false, then maximum value of column is removed before incremental load is performed
|
|
14
11
|
|
|
15
|
-
|
|
12
|
+
primary_key: [id] # optional - if not provided, then this is an insert-only table for incremental loads
|
|
13
|
+
|
|
14
|
+
columns: # optional - if load_to_vdl is true, then only the columns listed here are loaded to the Virtual Data Lake (VDL)
|
|
16
15
|
- name: id
|
|
17
16
|
type: string
|
|
18
17
|
description: The unique identifier for the transaction
|
|
@@ -5,46 +5,33 @@ parameters:
|
|
|
5
5
|
name: group_by
|
|
6
6
|
label: Group By
|
|
7
7
|
description: Dimension(s) to aggregate by ## optional, default is empty string
|
|
8
|
-
user_attribute:
|
|
8
|
+
user_attribute: access_level ## optional, default is null
|
|
9
9
|
all_options:
|
|
10
|
-
- id:
|
|
10
|
+
- id: trans
|
|
11
11
|
label: Transaction
|
|
12
|
-
columns: ["date", "category", "subcategory", "description"] ## custom field
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
12
|
+
columns: ["id", "date", "category", "subcategory", "description"] ## custom field
|
|
13
|
+
aliases: ["_id", "date", "category", "subcategory", "description"] ## custom field (any alias starting with "_" will not be selected - see context.py for implementation)
|
|
14
|
+
is_default: false ## optional, shown is default - exists for SingleSelect or MultiSelect options only
|
|
15
|
+
user_groups: ["admin"] ## optional, default is empty list
|
|
16
|
+
parent_option_ids: [] ## optional, shown is default - exists for all parameter options
|
|
17
|
+
- id: day
|
|
17
18
|
label: Day
|
|
18
19
|
columns: [date]
|
|
19
|
-
aliases: [day]
|
|
20
|
-
user_groups: ["
|
|
21
|
-
- id:
|
|
20
|
+
aliases: [day]
|
|
21
|
+
user_groups: ["admin", "member"]
|
|
22
|
+
- id: month
|
|
22
23
|
label: Month
|
|
23
24
|
columns: [month]
|
|
24
|
-
user_groups: ["
|
|
25
|
-
- id:
|
|
25
|
+
user_groups: ["admin", "member", "guest"]
|
|
26
|
+
- id: cat
|
|
26
27
|
label: Category
|
|
27
28
|
columns: [category]
|
|
28
|
-
user_groups: ["
|
|
29
|
-
- id:
|
|
29
|
+
user_groups: ["admin", "member", "guest"]
|
|
30
|
+
- id: subcat
|
|
30
31
|
label: Subcategory
|
|
31
32
|
columns: [category, subcategory]
|
|
32
|
-
user_groups: ["
|
|
33
|
-
parent_name: null ## optional, default
|
|
34
|
-
|
|
35
|
-
- type: NumberParameter
|
|
36
|
-
factory: CreateWithOptions
|
|
37
|
-
arguments:
|
|
38
|
-
name: limit
|
|
39
|
-
label: Max Number of Rows
|
|
40
|
-
description: Maximum number of rows to return
|
|
41
|
-
parent_name: group_by
|
|
42
|
-
all_options:
|
|
43
|
-
- min_value: 0
|
|
44
|
-
max_value: 1000
|
|
45
|
-
increment: 10
|
|
46
|
-
default_value: 1000
|
|
47
|
-
parent_option_ids: g0
|
|
33
|
+
user_groups: ["admin", "member", "guest"]
|
|
34
|
+
parent_name: null ## optional, shown is default - exists for all parameter types
|
|
48
35
|
|
|
49
36
|
- type: DateParameter
|
|
50
37
|
factory: CreateFromSource
|
|
@@ -91,14 +78,14 @@ parameters:
|
|
|
91
78
|
table_or_query: seed_categories
|
|
92
79
|
id_col: category_id
|
|
93
80
|
options_col: category
|
|
94
|
-
|
|
95
|
-
order_by_col: null ## optional, default
|
|
96
|
-
is_default_col: null ## optional, default
|
|
97
|
-
custom_cols: {} ## optional, default
|
|
98
|
-
include_all: true ## optional, default
|
|
99
|
-
order_matters: false ## optional, default
|
|
100
|
-
user_group_col: null ## optional, default
|
|
101
|
-
connection_name: default ## optional, default
|
|
81
|
+
source: seeds ## optional, default is "connection" - must be one of "connection", "seeds", or "vdl" - exists for data_source of any parameters
|
|
82
|
+
order_by_col: null ## optional, shown is default - exists for data_source of SingleSelect and MultiSelect
|
|
83
|
+
is_default_col: null ## optional, shown is default - exists for data_source of SingleSelect and MultiSelect
|
|
84
|
+
custom_cols: {} ## optional, shown is default - exists for data_source of SingleSelect and MultiSelect
|
|
85
|
+
include_all: true ## optional, shown is default - exists for data_source of MultiSelect only
|
|
86
|
+
order_matters: false ## optional, shown is default - exists for data_source of MultiSelect only
|
|
87
|
+
user_group_col: null ## optional, shown is default - exists for data_source of any parameters
|
|
88
|
+
connection_name: default ## optional, shown is default - exists for data_source of any parameters
|
|
102
89
|
|
|
103
90
|
- type: MultiSelectParameter
|
|
104
91
|
factory: CreateFromSource
|
|
@@ -111,8 +98,8 @@ parameters:
|
|
|
111
98
|
table_or_query: seed_subcategories
|
|
112
99
|
id_col: subcategory_id
|
|
113
100
|
options_col: subcategory
|
|
114
|
-
|
|
115
|
-
parent_id_col: category_id ## optional, default is null
|
|
101
|
+
source: seeds
|
|
102
|
+
parent_id_col: category_id ## optional, default is null - exists for all parameter types
|
|
116
103
|
|
|
117
104
|
- type: NumberParameter
|
|
118
105
|
factory: CreateWithOptions
|
|
@@ -123,8 +110,8 @@ parameters:
|
|
|
123
110
|
all_options:
|
|
124
111
|
- min_value: 0
|
|
125
112
|
max_value: 300
|
|
126
|
-
increment: 10 ## optional, default is 1
|
|
127
|
-
default_value: null ## optional, default
|
|
113
|
+
increment: 10 ## optional, default is 1 - exists for Number and NumberRange options
|
|
114
|
+
default_value: null ## optional, shown is default - exists for Number options only
|
|
128
115
|
|
|
129
116
|
- type: NumberParameter
|
|
130
117
|
factory: CreateFromSource
|
|
@@ -138,7 +125,7 @@ parameters:
|
|
|
138
125
|
max_value_col: max_value
|
|
139
126
|
increment_col: increment ## optional, default is null
|
|
140
127
|
default_value_col: max_value ## optional, default is null
|
|
141
|
-
id_col: null ## optional, default
|
|
128
|
+
id_col: null ## optional, shown is default - required for SingleSelect and MultiSelect, optional for others
|
|
142
129
|
|
|
143
130
|
- type: NumberRangeParameter
|
|
144
131
|
factory: CreateWithOptions
|
|
@@ -149,7 +136,7 @@ parameters:
|
|
|
149
136
|
all_options:
|
|
150
137
|
- min_value: 0
|
|
151
138
|
max_value: 300
|
|
152
|
-
default_lower_value: 0 ## optional, default is null (or min_value)
|
|
153
|
-
default_upper_value: 300 ## optional, default is null (or max_value)
|
|
139
|
+
default_lower_value: 0 ## optional, default is null (or min_value) - exists for NumberRange options only
|
|
140
|
+
default_upper_value: 300 ## optional, default is null (or max_value) - exists for NumberRange options only
|
|
154
141
|
|
|
155
142
|
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
from squirrels import arguments as args, connections as cn
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def main(connections: dict[str, cn.ConnectionProperties | Any], sqrl: args.ConnectionsArgs) -> None:
|
|
6
|
+
"""
|
|
7
|
+
Define sqlalchemy engines by adding them to the "connections" dictionary
|
|
8
|
+
"""
|
|
9
|
+
## SQLAlchemy URL for a connection engine
|
|
10
|
+
conn_str: str = sqrl.env_vars["SQLITE_URI"].format(project_path=sqrl.project_path)
|
|
11
|
+
|
|
12
|
+
## Assigning names to connection engines
|
|
13
|
+
connections["default"] = cn.ConnectionProperties(
|
|
14
|
+
label="SQLite Expenses Database",
|
|
15
|
+
type=cn.ConnectionTypeEnum.SQLALCHEMY,
|
|
16
|
+
uri=conn_str
|
|
17
|
+
)
|
|
18
|
+
|
|
@@ -1,69 +1,78 @@
|
|
|
1
|
-
from typing import Any
|
|
2
|
-
from squirrels import
|
|
1
|
+
from typing import cast, Any
|
|
2
|
+
from squirrels import arguments as args, parameters as p
|
|
3
3
|
|
|
4
|
+
from pyconfigs.user import CustomUserFields
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
|
|
7
|
+
def main(ctx: dict[str, Any], sqrl: args.ContextArgs) -> None:
|
|
6
8
|
"""
|
|
7
9
|
Define context variables AFTER parameter selections are made by adding entries to the dictionary "ctx".
|
|
8
10
|
These context variables can then be used in the models.
|
|
9
11
|
|
|
10
12
|
Note that the code here is used by all datasets, regardless of the parameters they use. You can use
|
|
11
|
-
sqrl.
|
|
13
|
+
sqrl.param_exists to determine the conditions to execute certain blocks of code.
|
|
12
14
|
"""
|
|
15
|
+
custom_fields = cast(CustomUserFields, sqrl.user.custom_fields)
|
|
16
|
+
|
|
13
17
|
if sqrl.param_exists("group_by"):
|
|
14
18
|
group_by_param = sqrl.prms["group_by"]
|
|
15
19
|
assert isinstance(group_by_param, p.SingleSelectParameter)
|
|
16
20
|
|
|
21
|
+
selected_id = group_by_param.get_selected_id()
|
|
17
22
|
columns = group_by_param.get_selected("columns")
|
|
18
23
|
aliases = group_by_param.get_selected("aliases", default_field="columns")
|
|
19
24
|
assert isinstance(columns, list) and isinstance(aliases, list) and len(columns) == len(aliases)
|
|
20
25
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
ctx["order_by_cols"] = ", ".join((x+" DESC") for x in aliases)
|
|
24
|
-
ctx["group_by_cols_list"] = columns
|
|
25
|
-
ctx["rename_dict"] = {old: new for old, new in zip(columns, aliases)}
|
|
26
|
-
|
|
27
|
-
if sqrl.param_exists("limit"):
|
|
28
|
-
limit_param = sqrl.prms["limit"]
|
|
29
|
-
assert isinstance(limit_param, p.NumberParameter)
|
|
26
|
+
column_to_alias_mapping = {x: y for x, y in zip(columns, aliases) if not y.startswith("_")}
|
|
27
|
+
order_by_cols = list(column_to_alias_mapping.values())
|
|
30
28
|
|
|
31
|
-
ctx["
|
|
32
|
-
|
|
33
|
-
ctx["limit_clause"] = ""
|
|
29
|
+
ctx["column_to_alias_mapping"] = column_to_alias_mapping
|
|
30
|
+
ctx["group_by_cols"] = order_by_cols if selected_id != "trans" else None
|
|
34
31
|
|
|
32
|
+
# Only used if federate_example is a Python model
|
|
33
|
+
mask_column = lambda x: x if custom_fields.role == "manager" else "***MASKED***"
|
|
34
|
+
ctx["order_by_cols"] = order_by_cols
|
|
35
|
+
ctx["mask_column_function"] = mask_column
|
|
36
|
+
|
|
37
|
+
# Only used if federate_example is a SQL model
|
|
38
|
+
mask_column = lambda x: x if custom_fields.role == "manager" else "'***MASKED***'"
|
|
39
|
+
x_as_y = lambda x, y: (mask_column(x) if x in ["description"] else x)+" as "+y
|
|
40
|
+
ctx["select_dim_cols"] = list(x_as_y(x, y) for x, y in column_to_alias_mapping.items())
|
|
41
|
+
ctx["aggregator"] = "SUM" if selected_id != "trans" else ""
|
|
42
|
+
ctx["order_by_cols_desc"] = list(y+" DESC" for y in order_by_cols)
|
|
43
|
+
|
|
35
44
|
if sqrl.param_exists("start_date"):
|
|
36
45
|
start_date_param = sqrl.prms["start_date"]
|
|
37
46
|
assert isinstance(start_date_param, p.DateParameter)
|
|
38
47
|
|
|
39
|
-
ctx["start_date"] = start_date_param.
|
|
48
|
+
ctx["start_date"] = start_date_param.get_selected_date()
|
|
40
49
|
|
|
41
50
|
if sqrl.param_exists("end_date"):
|
|
42
51
|
end_date_param = sqrl.prms["end_date"]
|
|
43
52
|
assert isinstance(end_date_param, p.DateParameter)
|
|
44
53
|
|
|
45
|
-
ctx["end_date"] = end_date_param.
|
|
54
|
+
ctx["end_date"] = end_date_param.get_selected_date()
|
|
46
55
|
|
|
47
56
|
if sqrl.param_exists("date_range"):
|
|
48
57
|
date_range_param = sqrl.prms["date_range"]
|
|
49
58
|
assert isinstance(date_range_param, p.DateRangeParameter)
|
|
50
59
|
|
|
51
|
-
ctx["start_date_from_range"] = date_range_param.
|
|
52
|
-
ctx["end_date_from_range"] = date_range_param.
|
|
60
|
+
ctx["start_date_from_range"] = date_range_param.get_selected_start_date()
|
|
61
|
+
ctx["end_date_from_range"] = date_range_param.get_selected_end_date()
|
|
53
62
|
|
|
54
63
|
if sqrl.param_exists("category"):
|
|
55
64
|
category_param = sqrl.prms["category"]
|
|
56
65
|
assert isinstance(category_param, p.MultiSelectParameter)
|
|
57
66
|
|
|
58
67
|
ctx["has_categories"] = category_param.has_non_empty_selection()
|
|
59
|
-
ctx["categories"] = category_param.
|
|
68
|
+
ctx["categories"] = category_param.get_selected_ids_as_list()
|
|
60
69
|
|
|
61
70
|
if sqrl.param_exists("subcategory"):
|
|
62
71
|
subcategory_param = sqrl.prms["subcategory"]
|
|
63
72
|
assert isinstance(subcategory_param, p.MultiSelectParameter)
|
|
64
73
|
|
|
65
74
|
ctx["has_subcategories"] = subcategory_param.has_non_empty_selection()
|
|
66
|
-
ctx["subcategories"] = subcategory_param.
|
|
75
|
+
ctx["subcategories"] = subcategory_param.get_selected_ids_as_list()
|
|
67
76
|
|
|
68
77
|
if sqrl.param_exists("min_filter"):
|
|
69
78
|
min_amount_filter = sqrl.prms["min_filter"]
|