squirrels 0.4.1__py3-none-any.whl → 0.5.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of squirrels might be problematic. Click here for more details.
- squirrels/__init__.py +10 -6
- squirrels/_api_response_models.py +93 -44
- squirrels/_api_server.py +571 -219
- squirrels/_auth.py +451 -0
- squirrels/_command_line.py +61 -20
- squirrels/_connection_set.py +38 -25
- squirrels/_constants.py +44 -34
- squirrels/_dashboards_io.py +34 -16
- squirrels/_exceptions.py +57 -0
- squirrels/_initializer.py +117 -44
- squirrels/_manifest.py +124 -62
- squirrels/_model_builder.py +111 -0
- squirrels/_model_configs.py +74 -0
- squirrels/_model_queries.py +52 -0
- squirrels/_models.py +860 -354
- squirrels/_package_loader.py +8 -4
- squirrels/_parameter_configs.py +45 -65
- squirrels/_parameter_sets.py +15 -13
- squirrels/_project.py +561 -0
- squirrels/_py_module.py +4 -3
- squirrels/_seeds.py +35 -16
- squirrels/_sources.py +106 -0
- squirrels/_utils.py +166 -63
- squirrels/_version.py +1 -1
- squirrels/arguments/init_time_args.py +78 -15
- squirrels/arguments/run_time_args.py +62 -101
- squirrels/dashboards.py +4 -4
- squirrels/data_sources.py +94 -162
- squirrels/dataset_result.py +86 -0
- squirrels/dateutils.py +4 -4
- squirrels/package_data/base_project/.env +30 -0
- squirrels/package_data/base_project/.env.example +30 -0
- squirrels/package_data/base_project/.gitignore +3 -2
- squirrels/package_data/base_project/assets/expenses.db +0 -0
- squirrels/package_data/base_project/connections.yml +11 -3
- squirrels/package_data/base_project/dashboards/dashboard_example.py +15 -13
- squirrels/package_data/base_project/dashboards/dashboard_example.yml +22 -0
- squirrels/package_data/base_project/docker/.dockerignore +5 -2
- squirrels/package_data/base_project/docker/Dockerfile +3 -3
- squirrels/package_data/base_project/docker/compose.yml +1 -1
- squirrels/package_data/base_project/duckdb_init.sql +9 -0
- squirrels/package_data/base_project/macros/macros_example.sql +15 -0
- squirrels/package_data/base_project/models/builds/build_example.py +26 -0
- squirrels/package_data/base_project/models/builds/build_example.sql +16 -0
- squirrels/package_data/base_project/models/builds/build_example.yml +55 -0
- squirrels/package_data/base_project/models/dbviews/dbview_example.sql +12 -22
- squirrels/package_data/base_project/models/dbviews/dbview_example.yml +26 -0
- squirrels/package_data/base_project/models/federates/federate_example.py +38 -15
- squirrels/package_data/base_project/models/federates/federate_example.sql +16 -2
- squirrels/package_data/base_project/models/federates/federate_example.yml +65 -0
- squirrels/package_data/base_project/models/sources.yml +39 -0
- squirrels/package_data/base_project/parameters.yml +36 -21
- squirrels/package_data/base_project/pyconfigs/connections.py +6 -11
- squirrels/package_data/base_project/pyconfigs/context.py +20 -33
- squirrels/package_data/base_project/pyconfigs/parameters.py +19 -21
- squirrels/package_data/base_project/pyconfigs/user.py +23 -0
- squirrels/package_data/base_project/seeds/seed_categories.yml +15 -0
- squirrels/package_data/base_project/seeds/seed_subcategories.csv +15 -15
- squirrels/package_data/base_project/seeds/seed_subcategories.yml +21 -0
- squirrels/package_data/base_project/squirrels.yml.j2 +17 -40
- squirrels/parameters.py +20 -20
- {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info}/METADATA +31 -32
- squirrels-0.5.0rc0.dist-info/RECORD +70 -0
- {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info}/WHEEL +1 -1
- squirrels-0.5.0rc0.dist-info/entry_points.txt +3 -0
- {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info/licenses}/LICENSE +1 -1
- squirrels/_authenticator.py +0 -85
- squirrels/_environcfg.py +0 -84
- squirrels/package_data/assets/favicon.ico +0 -0
- squirrels/package_data/assets/index.css +0 -1
- squirrels/package_data/assets/index.js +0 -58
- squirrels/package_data/base_project/dashboards.yml +0 -10
- squirrels/package_data/base_project/env.yml +0 -29
- squirrels/package_data/base_project/models/dbviews/dbview_example.py +0 -47
- squirrels/package_data/base_project/pyconfigs/auth.py +0 -45
- squirrels/package_data/templates/index.html +0 -18
- squirrels/project.py +0 -378
- squirrels/user_base.py +0 -55
- squirrels-0.4.1.dist-info/RECORD +0 -60
- squirrels-0.4.1.dist-info/entry_points.txt +0 -4
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Custom environment variables
|
|
2
|
+
SQLITE_URI="sqlite:///{project_path}/assets/expenses.db"
|
|
3
|
+
|
|
4
|
+
# Secrets used by the Squirrels framework that are NOT SAFE TO INCLUDE IN VERSION CONTROL
|
|
5
|
+
# Required if your project uses authentication. Otherwise, optional.
|
|
6
|
+
SQRL_SECRET__KEY="{{ random_secret_key }}"
|
|
7
|
+
SQRL_SECRET__ADMIN_PASSWORD="{{ random_admin_password }}"
|
|
8
|
+
|
|
9
|
+
# Optional variables used by the Squirrels framework that are safe to include in version control if desired
|
|
10
|
+
# (default values are shown below)
|
|
11
|
+
SQRL_AUTH__DB_FILE_PATH="target/auth.sqlite"
|
|
12
|
+
SQRL_AUTH__TOKEN_EXPIRE_MINUTES="30"
|
|
13
|
+
|
|
14
|
+
SQRL_PARAMETERS__CACHE_SIZE="1024"
|
|
15
|
+
SQRL_PARAMETERS__CACHE_TTL_MINUTES="60"
|
|
16
|
+
|
|
17
|
+
SQRL_DATASETS__CACHE_SIZE="128"
|
|
18
|
+
SQRL_DATASETS__CACHE_TTL_MINUTES="60"
|
|
19
|
+
|
|
20
|
+
SQRL_DASHBOARDS__CACHE_SIZE="128"
|
|
21
|
+
SQRL_DASHBOARDS__CACHE_TTL_MINUTES="60"
|
|
22
|
+
|
|
23
|
+
SQRL_SEEDS__INFER_SCHEMA="true"
|
|
24
|
+
SQRL_SEEDS__NA_VALUES=["NA"] # must be a JSON list
|
|
25
|
+
|
|
26
|
+
SQRL_TEST_SETS__DEFAULT_NAME_USED="default"
|
|
27
|
+
|
|
28
|
+
SQRL_CONNECTIONS__DEFAULT_NAME_USED="default"
|
|
29
|
+
|
|
30
|
+
SQRL_DUCKDB_VENV__DB_FILE_PATH="target/venv.duckdb"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Custom environment variables
|
|
2
|
+
SQLITE_URI="sqlite:///{project_path}/assets/expenses.db"
|
|
3
|
+
|
|
4
|
+
# Secrets used by the Squirrels framework that are NOT SAFE TO INCLUDE IN VERSION CONTROL
|
|
5
|
+
# Required if your project uses authentication. Otherwise, optional.
|
|
6
|
+
SQRL_SECRET__KEY="" # a random 32 byte hex string - one way to generate this is by running "openssl rand -hex 32" in bash
|
|
7
|
+
SQRL_SECRET__ADMIN_PASSWORD=""
|
|
8
|
+
|
|
9
|
+
# Optional variables used by the Squirrels framework that are safe to include in version control if desired
|
|
10
|
+
# (default values are shown below)
|
|
11
|
+
SQRL_AUTH__DB_FILE_PATH="target/auth.sqlite"
|
|
12
|
+
SQRL_AUTH__TOKEN_EXPIRE_MINUTES="30"
|
|
13
|
+
|
|
14
|
+
SQRL_PARAMETERS__CACHE_SIZE="1024"
|
|
15
|
+
SQRL_PARAMETERS__CACHE_TTL_MINUTES="60"
|
|
16
|
+
|
|
17
|
+
SQRL_DATASETS__CACHE_SIZE="128"
|
|
18
|
+
SQRL_DATASETS__CACHE_TTL_MINUTES="60"
|
|
19
|
+
|
|
20
|
+
SQRL_DASHBOARDS__CACHE_SIZE="128"
|
|
21
|
+
SQRL_DASHBOARDS__CACHE_TTL_MINUTES="60"
|
|
22
|
+
|
|
23
|
+
SQRL_SEEDS__INFER_SCHEMA="true"
|
|
24
|
+
SQRL_SEEDS__NA_VALUES=["NA"] # must be a JSON list
|
|
25
|
+
|
|
26
|
+
SQRL_TEST_SETS__DEFAULT_NAME_USED="default"
|
|
27
|
+
|
|
28
|
+
SQRL_CONNECTIONS__DEFAULT_NAME_USED="default"
|
|
29
|
+
|
|
30
|
+
SQRL_DUCKDB_VENV__DB_FILE_PATH="target/venv.duckdb"
|
|
Binary file
|
|
@@ -1,7 +1,15 @@
|
|
|
1
|
-
##
|
|
1
|
+
## Connection URIs are usually in format "dialect://username:password@host:port/database" for database connections
|
|
2
|
+
## However, subtle differences exist depending on the "type" specified. For example, sqlite URIs are slightly different.
|
|
3
|
+
## sqlalchemy: sqlite:///relative/path/to/database.db
|
|
4
|
+
## connectorx/adbc: sqlite://relative/path/to/database.db (adbc URI format matches connectorx thanks to polars integration)
|
|
5
|
+
## Refer to specific documentation for supported databases by type (with URI examples):
|
|
6
|
+
## sqlalchemy: https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls
|
|
7
|
+
## connectorx: https://sfu-db.github.io/connector-x/databases.html
|
|
8
|
+
## adbc: https://arrow.apache.org/adbc/ (see connectorx documentation for URI examples)
|
|
2
9
|
connections:
|
|
3
10
|
- name: default
|
|
4
|
-
|
|
5
|
-
|
|
11
|
+
label: SQLite Expenses Database
|
|
12
|
+
type: sqlalchemy ## one of: sqlalchemy, connectorx, or adbc
|
|
13
|
+
uri: {{ env_vars.SQLITE_URI }} ## using Jinja to substitute environment variables
|
|
6
14
|
|
|
7
15
|
|
|
@@ -3,16 +3,8 @@ from matplotlib import pyplot as plt, figure as f, axes as a
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
async def main(sqrl: DashboardArgs) -> d.PngDashboard:
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
- The PngDashboard constructor takes a single argument for either a matplotlib.figure.Figure or io.BytesIO/bytes of PNG data
|
|
9
|
-
- The HtmlDashboard constructor takes a single argument for a io.StringIO/string of HTML data
|
|
10
|
-
|
|
11
|
-
It is imperative to set the correct return type in the function signature for "main" above! It allows Squirrels to provide the correct format to
|
|
12
|
-
the data catalog without having to run this function.
|
|
13
|
-
"""
|
|
14
|
-
spending_by_month_df = await sqrl.dataset("dataset_example", fixed_parameters={"group_by": "g4"})
|
|
15
|
-
spending_by_subcategory_df = await sqrl.dataset("dataset_example", fixed_parameters={"group_by": "g3"})
|
|
6
|
+
spending_by_month_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "g4"})
|
|
7
|
+
spending_by_subcategory_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "g3"})
|
|
16
8
|
|
|
17
9
|
# Create a figure with two subplots
|
|
18
10
|
fig, (ax0, ax1) = plt.subplots(2, 1, figsize=(8, 8), height_ratios=(1, 2))
|
|
@@ -20,13 +12,23 @@ async def main(sqrl: DashboardArgs) -> d.PngDashboard:
|
|
|
20
12
|
fig.tight_layout(pad=4, h_pad=6)
|
|
21
13
|
|
|
22
14
|
# Create a bar chart of spending by month
|
|
23
|
-
|
|
15
|
+
|
|
16
|
+
# Convert to pandas and ensure total_amount is numeric
|
|
17
|
+
spending_by_month_pandas = spending_by_month_df.sort("month").to_pandas()
|
|
18
|
+
spending_by_month_pandas["total_amount"] = spending_by_month_pandas["total_amount"].astype(float)
|
|
19
|
+
|
|
20
|
+
spending_by_month_pandas.plot(x="month", y="total_amount", ax=ax0)
|
|
24
21
|
ax0.set_title("Spending by Month")
|
|
25
22
|
|
|
26
23
|
# Create a pie chart of spending by subcategory
|
|
27
|
-
|
|
24
|
+
|
|
25
|
+
# Convert to pandas and ensure total_amount is numeric
|
|
26
|
+
subcategory_pandas = spending_by_subcategory_df.sort("total_amount", descending=True).to_pandas()
|
|
27
|
+
subcategory_pandas["total_amount"] = subcategory_pandas["total_amount"].astype(float)
|
|
28
|
+
subcategory_pandas.set_index("subcategory", inplace=True)
|
|
29
|
+
|
|
28
30
|
autopct = lambda pct: ('%.1f%%' % pct) if pct > 6 else ''
|
|
29
|
-
|
|
31
|
+
subcategory_pandas.plot(y="total_amount", kind='pie', ax=ax1, autopct=autopct, legend=False, ylabel="")
|
|
30
32
|
ax1.set_title("Spending by Subcategory")
|
|
31
33
|
|
|
32
34
|
return d.PngDashboard(fig)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
label: Dashboard Example
|
|
2
|
+
|
|
3
|
+
description: This is an example dashboard
|
|
4
|
+
|
|
5
|
+
scope: protected
|
|
6
|
+
|
|
7
|
+
format: png
|
|
8
|
+
|
|
9
|
+
parameters:
|
|
10
|
+
- date_range
|
|
11
|
+
- category
|
|
12
|
+
|
|
13
|
+
depends_on:
|
|
14
|
+
- name: dataset_example_month
|
|
15
|
+
dataset: federate_dataset_example
|
|
16
|
+
fixed_parameters:
|
|
17
|
+
- group_by: g4 (Month)
|
|
18
|
+
|
|
19
|
+
- name: dataset_example_subcategory
|
|
20
|
+
dataset: federate_dataset_example
|
|
21
|
+
fixed_parameters:
|
|
22
|
+
- group_by: g3 (Subcategory)
|
|
@@ -8,9 +8,9 @@ COPY . .
|
|
|
8
8
|
# "sqrl deps" command if there are packages defined in "squirrels.yml"
|
|
9
9
|
RUN apt-get update && apt-get install -y git
|
|
10
10
|
|
|
11
|
-
RUN pip install --no-cache-dir -r requirements
|
|
11
|
+
RUN pip install --no-cache-dir -r requirements.txt
|
|
12
12
|
|
|
13
|
-
RUN
|
|
13
|
+
RUN sqrl deps
|
|
14
14
|
|
|
15
15
|
EXPOSE 4465
|
|
16
|
-
CMD ["
|
|
16
|
+
CMD ["sqrl", "run", "--build", "--host", "0.0.0.0", "--port", "4465"]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{%- macro date_and_amount_filters(use_from_range) -%}
|
|
2
|
+
|
|
3
|
+
{%- if use_from_range -%}
|
|
4
|
+
date >= {{ ctx.start_date_from_range }}
|
|
5
|
+
AND date <= {{ ctx.end_date_from_range }}
|
|
6
|
+
AND amount >= {{ ctx.min_amount_from_range }}
|
|
7
|
+
AND amount <= {{ ctx.max_amount_from_range }}
|
|
8
|
+
{%- else -%}
|
|
9
|
+
date >= {{ ctx.start_date }}
|
|
10
|
+
AND date <= {{ ctx.end_date }}
|
|
11
|
+
AND amount >= {{ ctx.min_amount }}
|
|
12
|
+
AND amount <= {{ ctx.max_amount }}
|
|
13
|
+
{%- endif -%}
|
|
14
|
+
|
|
15
|
+
{%- endmacro -%}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from squirrels import BuildModelArgs
|
|
2
|
+
import polars as pl, pandas as pd
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def main(sqrl: BuildModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
|
|
6
|
+
"""
|
|
7
|
+
Create a build model by joining/processing sources or other build models to form a new
|
|
8
|
+
Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
|
|
9
|
+
"""
|
|
10
|
+
# sqrl.ref() can be used on a sources, seeds, or other build models
|
|
11
|
+
expenses_df = sqrl.ref("src_transactions")
|
|
12
|
+
categories_df = sqrl.ref("seed_categories")
|
|
13
|
+
subcategories_df = sqrl.ref("seed_subcategories")
|
|
14
|
+
|
|
15
|
+
df = expenses_df \
|
|
16
|
+
.join(subcategories_df, on="subcategory_id", how="left") \
|
|
17
|
+
.join(categories_df, on="category_id", how="left")
|
|
18
|
+
|
|
19
|
+
df = df.with_columns(
|
|
20
|
+
pl.col("date").dt.strftime("%Y-%m").alias("month"),
|
|
21
|
+
pl.col("date").dt.strftime("%Y-%m-%d").alias("date"),
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
return df.select(
|
|
25
|
+
"id", "date", "month", "category_id", "category", "subcategory_id", "subcategory", "amount", "description"
|
|
26
|
+
)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{# DuckDB dialect #}
|
|
2
|
+
|
|
3
|
+
SELECT a.id,
|
|
4
|
+
STRFTIME(a.date, '%Y-%m-%d') AS date,
|
|
5
|
+
STRFTIME(a.date, '%Y-%m') AS month,
|
|
6
|
+
c.category_id,
|
|
7
|
+
c.category,
|
|
8
|
+
b.subcategory_id,
|
|
9
|
+
b.subcategory,
|
|
10
|
+
a.amount,
|
|
11
|
+
a.description
|
|
12
|
+
|
|
13
|
+
{# ref() can be used on a sources, seeds, or other build models -#}
|
|
14
|
+
FROM {{ ref("src_transactions") }} AS a
|
|
15
|
+
LEFT JOIN {{ ref("seed_subcategories") }} AS b ON a.subcategory_id = b.subcategory_id
|
|
16
|
+
LEFT JOIN {{ ref("seed_categories") }} AS c ON b.category_id = c.category_id
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
description: |
|
|
2
|
+
This is an example of a build model. It adds a new column called "month" to the source table "src_transactions".
|
|
3
|
+
|
|
4
|
+
depends_on: # optional for SQL models - the "ref" macro also adds to this set
|
|
5
|
+
- src_transactions
|
|
6
|
+
- seed_categories
|
|
7
|
+
- seed_subcategories
|
|
8
|
+
|
|
9
|
+
columns:
|
|
10
|
+
- name: id
|
|
11
|
+
depends_on:
|
|
12
|
+
- src_transactions.id
|
|
13
|
+
pass_through: true
|
|
14
|
+
|
|
15
|
+
- name: date
|
|
16
|
+
type: string
|
|
17
|
+
description: The day of the transaction as a string in 'YYYY-MM-DD' format
|
|
18
|
+
depends_on:
|
|
19
|
+
- src_transactions.date
|
|
20
|
+
|
|
21
|
+
- name: month
|
|
22
|
+
type: string
|
|
23
|
+
description: The month of the transaction as a string in 'YYYY-MM' format
|
|
24
|
+
depends_on:
|
|
25
|
+
- src_transactions.date
|
|
26
|
+
|
|
27
|
+
- name: category_id
|
|
28
|
+
depends_on:
|
|
29
|
+
- seed_categories.category_id
|
|
30
|
+
pass_through: true
|
|
31
|
+
|
|
32
|
+
- name: category
|
|
33
|
+
depends_on:
|
|
34
|
+
- seed_categories.category
|
|
35
|
+
pass_through: true
|
|
36
|
+
|
|
37
|
+
- name: subcategory_id
|
|
38
|
+
depends_on:
|
|
39
|
+
- seed_subcategories.subcategory_id
|
|
40
|
+
pass_through: true
|
|
41
|
+
|
|
42
|
+
- name: subcategory
|
|
43
|
+
depends_on:
|
|
44
|
+
- seed_subcategories.subcategory
|
|
45
|
+
pass_through: true
|
|
46
|
+
|
|
47
|
+
- name: amount
|
|
48
|
+
depends_on:
|
|
49
|
+
- src_transactions.amount
|
|
50
|
+
pass_through: true
|
|
51
|
+
|
|
52
|
+
- name: description
|
|
53
|
+
depends_on:
|
|
54
|
+
- src_transactions.description
|
|
55
|
+
pass_through: true
|
|
@@ -1,22 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
{
|
|
7
|
-
|
|
8
|
-
{
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
, SUM(-amount) as total_amount
|
|
14
|
-
FROM transactions_with_masked_id
|
|
15
|
-
WHERE date >= :start_date
|
|
16
|
-
AND date <= :end_date
|
|
17
|
-
AND -amount >= :min_amount
|
|
18
|
-
AND -amount <= :max_amount
|
|
19
|
-
{% if is_placeholder("desc_pattern") -%} AND description LIKE :desc_pattern {%- endif %}
|
|
20
|
-
{% if ctx.has_categories -%} AND category IN ({{ ctx.categories }}) {%- endif %}
|
|
21
|
-
{% if ctx.has_subcategories -%} AND subcategory IN ({{ ctx.subcategories }}) {%- endif %}
|
|
22
|
-
GROUP BY {{ ctx.group_by_cols }}
|
|
1
|
+
{# SQLite dialect (based on connection used) #}
|
|
2
|
+
|
|
3
|
+
SELECT STRFTIME('%Y-%m', date) AS month
|
|
4
|
+
, printf('%.2f', SUM(amount)) as total_amount
|
|
5
|
+
|
|
6
|
+
FROM {{ source("src_transactions") }}
|
|
7
|
+
|
|
8
|
+
WHERE {{ date_and_amount_filters(use_from_range=false) }}
|
|
9
|
+
|
|
10
|
+
GROUP BY 1
|
|
11
|
+
|
|
12
|
+
ORDER BY 1 DESC
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
description: |
|
|
2
|
+
This is an example of a database view model. It finds the total amount spent by month.
|
|
3
|
+
|
|
4
|
+
Parameters are available to filter the date and amount of the transactions.
|
|
5
|
+
|
|
6
|
+
connection: default # optional - if not provided, will use default connection specified in the SQRL_CONNECTIONS__DEFAULT_NAME_USED setting
|
|
7
|
+
|
|
8
|
+
translate_to_duckdb: true # optional - default is false - if true, then the model will be translated to duckdb for supported dialects
|
|
9
|
+
|
|
10
|
+
depends_on: # optional - the "source" macro also adds to this set
|
|
11
|
+
- src_transactions
|
|
12
|
+
|
|
13
|
+
columns:
|
|
14
|
+
- name: month
|
|
15
|
+
type: string
|
|
16
|
+
description: The months for which the amount is aggregated by, in descending order
|
|
17
|
+
category: dimension
|
|
18
|
+
depends_on:
|
|
19
|
+
- src_transactions.date
|
|
20
|
+
|
|
21
|
+
- name: total_amount
|
|
22
|
+
type: float
|
|
23
|
+
description: The total amount spent by the group-by dimension
|
|
24
|
+
category: measure
|
|
25
|
+
depends_on:
|
|
26
|
+
- src_transactions.amount
|
|
@@ -1,21 +1,44 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
import pandas as pd
|
|
1
|
+
from squirrels import ModelArgs, parameters as p
|
|
2
|
+
import polars as pl, pandas as pd
|
|
4
3
|
|
|
4
|
+
def dequote(value: str) -> str:
|
|
5
|
+
return value[1:-1]
|
|
5
6
|
|
|
6
|
-
def
|
|
7
|
-
""
|
|
8
|
-
Define list of dependent models here. This will determine the dependencies first, at compile-time,
|
|
9
|
-
before running the model.
|
|
10
|
-
"""
|
|
11
|
-
return ["dbview_example"]
|
|
7
|
+
def joined_str_to_list(value: str) -> list[str]:
|
|
8
|
+
return [dequote(category) for category in str(value).split(",")]
|
|
12
9
|
|
|
13
10
|
|
|
14
|
-
def main(sqrl: ModelArgs) -> pd.DataFrame:
|
|
11
|
+
def main(sqrl: ModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
|
|
15
12
|
"""
|
|
16
|
-
Create federated models by joining/processing dependent
|
|
17
|
-
form
|
|
13
|
+
Create federated models by joining/processing dependent models (sources, seeds, builds, dbviews, other federates, etc.) to
|
|
14
|
+
form a new Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
|
|
18
15
|
"""
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
16
|
+
df = sqrl.ref("build_example")
|
|
17
|
+
|
|
18
|
+
df = df.filter(
|
|
19
|
+
(pl.col("amount") >= sqrl.ctx["min_amount_from_range"]) &
|
|
20
|
+
(pl.col("amount") <= sqrl.ctx["max_amount_from_range"]) &
|
|
21
|
+
(pl.col("date") >= dequote(sqrl.ctx["start_date_from_range"])) &
|
|
22
|
+
(pl.col("date") <= dequote(sqrl.ctx["end_date_from_range"]))
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
if sqrl.ctx["has_categories"]:
|
|
26
|
+
categories_list = joined_str_to_list(sqrl.ctx["categories"])
|
|
27
|
+
df = df.filter(pl.col("category_id").is_in(categories_list))
|
|
28
|
+
|
|
29
|
+
if sqrl.ctx["has_subcategories"]:
|
|
30
|
+
subcategories_list = joined_str_to_list(sqrl.ctx["subcategories"])
|
|
31
|
+
df = df.filter(pl.col("subcategory_id").is_in(subcategories_list))
|
|
32
|
+
|
|
33
|
+
dimension_cols = sqrl.ctx["group_by_cols_list"]
|
|
34
|
+
df = df.group_by(dimension_cols).agg(
|
|
35
|
+
pl.sum("amount").cast(pl.Decimal(precision=15, scale=2)).alias("total_amount")
|
|
36
|
+
)
|
|
37
|
+
df = df.sort(dimension_cols, descending=True)
|
|
38
|
+
|
|
39
|
+
if sqrl.param_exists("limit"):
|
|
40
|
+
assert isinstance(limit := sqrl.prms["limit"], p.NumberParameter)
|
|
41
|
+
df = df.limit(int(limit.get_selected_value()))
|
|
42
|
+
|
|
43
|
+
df = df.select(*dimension_cols, "total_amount")
|
|
44
|
+
return df.rename(sqrl.ctx["rename_dict"])
|
|
@@ -1,3 +1,17 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
{# DuckDB dialect #}
|
|
2
|
+
|
|
3
|
+
SELECT {{ ctx.select_dim_cols }}
|
|
4
|
+
, CAST(SUM(amount) AS DECIMAL(15, 2)) as total_amount
|
|
5
|
+
|
|
6
|
+
{# ref() can be used on a sources, seeds, builds, dbviews, or other federate models -#}
|
|
7
|
+
FROM {{ ref("build_example") }} AS a
|
|
8
|
+
|
|
9
|
+
WHERE {{ date_and_amount_filters(use_from_range=true) }}
|
|
10
|
+
{% if ctx.has_categories -%} AND category_id IN ({{ ctx.categories }}) {%- endif %}
|
|
11
|
+
{% if ctx.has_subcategories -%} AND subcategory_id IN ({{ ctx.subcategories }}) {%- endif %}
|
|
12
|
+
|
|
13
|
+
GROUP BY {{ ctx.group_by_cols }}
|
|
14
|
+
|
|
3
15
|
ORDER BY {{ ctx.order_by_cols }}
|
|
16
|
+
|
|
17
|
+
{{ ctx.limit_clause }}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
description: |
|
|
2
|
+
This is an example of a federate view model. It takes the build example model and groups or filters the results based on the parameter selections provided.
|
|
3
|
+
|
|
4
|
+
Parameters are available to specify the group by dimension and filter by date, amount of the transaction, category, and subcategory.
|
|
5
|
+
|
|
6
|
+
depends_on: # optional for SQL models - the "ref" macro also adds to this set
|
|
7
|
+
- build_example
|
|
8
|
+
|
|
9
|
+
eager: false # optional - defaults to false. Only applies to SQL models.
|
|
10
|
+
|
|
11
|
+
columns:
|
|
12
|
+
- name: date
|
|
13
|
+
type: string
|
|
14
|
+
condition: parameter 'group_by' (Group By) is 'g0' (Transaction)
|
|
15
|
+
description: The date of the transaction in 'YYYY-MM-DD' format, in descending order
|
|
16
|
+
category: dimension
|
|
17
|
+
depends_on:
|
|
18
|
+
- build_example.date
|
|
19
|
+
|
|
20
|
+
- name: description
|
|
21
|
+
type: string
|
|
22
|
+
condition: parameter 'group_by' (Group By) is 'g0' (Transaction)
|
|
23
|
+
description: The description of the transaction
|
|
24
|
+
category: dimension
|
|
25
|
+
depends_on:
|
|
26
|
+
- build_example.description
|
|
27
|
+
|
|
28
|
+
- name: day
|
|
29
|
+
type: string
|
|
30
|
+
condition: parameter 'group_by' (Group By) is 'g1' (Day)
|
|
31
|
+
description: The day for which the amount is aggregated by, in descending order
|
|
32
|
+
category: dimension
|
|
33
|
+
depends_on:
|
|
34
|
+
- build_example.date
|
|
35
|
+
|
|
36
|
+
- name: month
|
|
37
|
+
type: string
|
|
38
|
+
condition: parameter 'group_by' (Group By) is 'g4' (Month)
|
|
39
|
+
description: The month for which the amount is aggregated by, in descending order
|
|
40
|
+
category: dimension
|
|
41
|
+
depends_on:
|
|
42
|
+
- build_example.month
|
|
43
|
+
|
|
44
|
+
- name: category
|
|
45
|
+
type: string
|
|
46
|
+
condition: parameter `group_by` (Group By) is `g0` (Transaction), `g2` (Category), or `g3` (Subcategory)
|
|
47
|
+
description: The category for which the amount is aggregated by
|
|
48
|
+
category: dimension
|
|
49
|
+
depends_on:
|
|
50
|
+
- build_example.category
|
|
51
|
+
|
|
52
|
+
- name: subcategory
|
|
53
|
+
type: string
|
|
54
|
+
condition: parameter `group_by` (Group By) is `g0` (Transaction) or `g3` (Subcategory)
|
|
55
|
+
description: The subcategory for which the amount is aggregated by
|
|
56
|
+
category: dimension
|
|
57
|
+
depends_on:
|
|
58
|
+
- build_example.subcategory
|
|
59
|
+
|
|
60
|
+
- name: total_amount
|
|
61
|
+
type: float
|
|
62
|
+
description: The total amount spent by the group by dimension
|
|
63
|
+
category: measure
|
|
64
|
+
depends_on:
|
|
65
|
+
- build_example.total_amount
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
sources:
|
|
2
|
+
- name: src_transactions
|
|
3
|
+
description: "The source table for transactions" # optional
|
|
4
|
+
connection: default # optional - if not provided, will use the connection named "default" or the default connection specified in settings
|
|
5
|
+
table: expenses # optional - if not provided, will use the "name" field of the source
|
|
6
|
+
load_to_duckdb: true # optional - default is false - outside of dbview models that have translate_to_duckdb as false, other models can only reference this source if load_to_duckdb is true
|
|
7
|
+
|
|
8
|
+
# For performance reasons, avoid specifying primary_key for large tables if upserts are not required
|
|
9
|
+
primary_key: [id] # optional - if not provided, then this is an insert-only table for incremental loads - otherwise, this uses upsert
|
|
10
|
+
|
|
11
|
+
update_hints:
|
|
12
|
+
increasing_column: date # optional - if not provided, will always do full refresh, otherwise uses this column for incremental loads
|
|
13
|
+
strictly_increasing: false # optional - default is true - if false, then maximum value of column is removed before incremental loads
|
|
14
|
+
|
|
15
|
+
columns: # optional - if load_to_duckdb is true, then only the columns listed here are loaded to duckdb
|
|
16
|
+
- name: id
|
|
17
|
+
type: string
|
|
18
|
+
description: The unique identifier for the transaction
|
|
19
|
+
category: dimension
|
|
20
|
+
|
|
21
|
+
- name: date
|
|
22
|
+
type: date
|
|
23
|
+
description: The date of the transaction
|
|
24
|
+
category: dimension
|
|
25
|
+
|
|
26
|
+
- name: subcategory_id
|
|
27
|
+
type: string
|
|
28
|
+
description: The ID of the subcategory of the transaction
|
|
29
|
+
category: dimension
|
|
30
|
+
|
|
31
|
+
- name: amount
|
|
32
|
+
type: float
|
|
33
|
+
description: The amount of the transaction
|
|
34
|
+
category: measure
|
|
35
|
+
|
|
36
|
+
- name: description
|
|
37
|
+
type: string
|
|
38
|
+
description: The description of the transaction
|
|
39
|
+
category: dimension
|