squirrels 0.4.1__py3-none-any.whl → 0.5.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of squirrels might be problematic. Click here for more details.

Files changed (80) hide show
  1. squirrels/__init__.py +10 -6
  2. squirrels/_api_response_models.py +93 -44
  3. squirrels/_api_server.py +571 -219
  4. squirrels/_auth.py +451 -0
  5. squirrels/_command_line.py +61 -20
  6. squirrels/_connection_set.py +38 -25
  7. squirrels/_constants.py +44 -34
  8. squirrels/_dashboards_io.py +34 -16
  9. squirrels/_exceptions.py +57 -0
  10. squirrels/_initializer.py +117 -44
  11. squirrels/_manifest.py +124 -62
  12. squirrels/_model_builder.py +111 -0
  13. squirrels/_model_configs.py +74 -0
  14. squirrels/_model_queries.py +52 -0
  15. squirrels/_models.py +860 -354
  16. squirrels/_package_loader.py +8 -4
  17. squirrels/_parameter_configs.py +45 -65
  18. squirrels/_parameter_sets.py +15 -13
  19. squirrels/_project.py +561 -0
  20. squirrels/_py_module.py +4 -3
  21. squirrels/_seeds.py +35 -16
  22. squirrels/_sources.py +106 -0
  23. squirrels/_utils.py +166 -63
  24. squirrels/_version.py +1 -1
  25. squirrels/arguments/init_time_args.py +78 -15
  26. squirrels/arguments/run_time_args.py +62 -101
  27. squirrels/dashboards.py +4 -4
  28. squirrels/data_sources.py +94 -162
  29. squirrels/dataset_result.py +86 -0
  30. squirrels/dateutils.py +4 -4
  31. squirrels/package_data/base_project/.env +30 -0
  32. squirrels/package_data/base_project/.env.example +30 -0
  33. squirrels/package_data/base_project/.gitignore +3 -2
  34. squirrels/package_data/base_project/assets/expenses.db +0 -0
  35. squirrels/package_data/base_project/connections.yml +11 -3
  36. squirrels/package_data/base_project/dashboards/dashboard_example.py +15 -13
  37. squirrels/package_data/base_project/dashboards/dashboard_example.yml +22 -0
  38. squirrels/package_data/base_project/docker/.dockerignore +5 -2
  39. squirrels/package_data/base_project/docker/Dockerfile +3 -3
  40. squirrels/package_data/base_project/docker/compose.yml +1 -1
  41. squirrels/package_data/base_project/duckdb_init.sql +9 -0
  42. squirrels/package_data/base_project/macros/macros_example.sql +15 -0
  43. squirrels/package_data/base_project/models/builds/build_example.py +26 -0
  44. squirrels/package_data/base_project/models/builds/build_example.sql +16 -0
  45. squirrels/package_data/base_project/models/builds/build_example.yml +55 -0
  46. squirrels/package_data/base_project/models/dbviews/dbview_example.sql +12 -22
  47. squirrels/package_data/base_project/models/dbviews/dbview_example.yml +26 -0
  48. squirrels/package_data/base_project/models/federates/federate_example.py +38 -15
  49. squirrels/package_data/base_project/models/federates/federate_example.sql +16 -2
  50. squirrels/package_data/base_project/models/federates/federate_example.yml +65 -0
  51. squirrels/package_data/base_project/models/sources.yml +39 -0
  52. squirrels/package_data/base_project/parameters.yml +36 -21
  53. squirrels/package_data/base_project/pyconfigs/connections.py +6 -11
  54. squirrels/package_data/base_project/pyconfigs/context.py +20 -33
  55. squirrels/package_data/base_project/pyconfigs/parameters.py +19 -21
  56. squirrels/package_data/base_project/pyconfigs/user.py +23 -0
  57. squirrels/package_data/base_project/seeds/seed_categories.yml +15 -0
  58. squirrels/package_data/base_project/seeds/seed_subcategories.csv +15 -15
  59. squirrels/package_data/base_project/seeds/seed_subcategories.yml +21 -0
  60. squirrels/package_data/base_project/squirrels.yml.j2 +17 -40
  61. squirrels/parameters.py +20 -20
  62. {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info}/METADATA +31 -32
  63. squirrels-0.5.0rc0.dist-info/RECORD +70 -0
  64. {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info}/WHEEL +1 -1
  65. squirrels-0.5.0rc0.dist-info/entry_points.txt +3 -0
  66. {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info/licenses}/LICENSE +1 -1
  67. squirrels/_authenticator.py +0 -85
  68. squirrels/_environcfg.py +0 -84
  69. squirrels/package_data/assets/favicon.ico +0 -0
  70. squirrels/package_data/assets/index.css +0 -1
  71. squirrels/package_data/assets/index.js +0 -58
  72. squirrels/package_data/base_project/dashboards.yml +0 -10
  73. squirrels/package_data/base_project/env.yml +0 -29
  74. squirrels/package_data/base_project/models/dbviews/dbview_example.py +0 -47
  75. squirrels/package_data/base_project/pyconfigs/auth.py +0 -45
  76. squirrels/package_data/templates/index.html +0 -18
  77. squirrels/project.py +0 -378
  78. squirrels/user_base.py +0 -55
  79. squirrels-0.4.1.dist-info/RECORD +0 -60
  80. squirrels-0.4.1.dist-info/entry_points.txt +0 -4
@@ -0,0 +1,30 @@
1
+ # Custom environment variables
2
+ SQLITE_URI="sqlite:///{project_path}/assets/expenses.db"
3
+
4
+ # Secrets used by the Squirrels framework that are NOT SAFE TO INCLUDE IN VERSION CONTROL
5
+ # Required if your project uses authentication. Otherwise, optional.
6
+ SQRL_SECRET__KEY="{{ random_secret_key }}"
7
+ SQRL_SECRET__ADMIN_PASSWORD="{{ random_admin_password }}"
8
+
9
+ # Optional variables used by the Squirrels framework that are safe to include in version control if desired
10
+ # (default values are shown below)
11
+ SQRL_AUTH__DB_FILE_PATH="target/auth.sqlite"
12
+ SQRL_AUTH__TOKEN_EXPIRE_MINUTES="30"
13
+
14
+ SQRL_PARAMETERS__CACHE_SIZE="1024"
15
+ SQRL_PARAMETERS__CACHE_TTL_MINUTES="60"
16
+
17
+ SQRL_DATASETS__CACHE_SIZE="128"
18
+ SQRL_DATASETS__CACHE_TTL_MINUTES="60"
19
+
20
+ SQRL_DASHBOARDS__CACHE_SIZE="128"
21
+ SQRL_DASHBOARDS__CACHE_TTL_MINUTES="60"
22
+
23
+ SQRL_SEEDS__INFER_SCHEMA="true"
24
+ SQRL_SEEDS__NA_VALUES=["NA"] # must be a JSON list
25
+
26
+ SQRL_TEST_SETS__DEFAULT_NAME_USED="default"
27
+
28
+ SQRL_CONNECTIONS__DEFAULT_NAME_USED="default"
29
+
30
+ SQRL_DUCKDB_VENV__DB_FILE_PATH="target/venv.duckdb"
@@ -0,0 +1,30 @@
1
+ # Custom environment variables
2
+ SQLITE_URI="sqlite:///{project_path}/assets/expenses.db"
3
+
4
+ # Secrets used by the Squirrels framework that are NOT SAFE TO INCLUDE IN VERSION CONTROL
5
+ # Required if your project uses authentication. Otherwise, optional.
6
+ SQRL_SECRET__KEY="" # a random 32 byte hex string - one way to generate this is by running "openssl rand -hex 32" in bash
7
+ SQRL_SECRET__ADMIN_PASSWORD=""
8
+
9
+ # Optional variables used by the Squirrels framework that are safe to include in version control if desired
10
+ # (default values are shown below)
11
+ SQRL_AUTH__DB_FILE_PATH="target/auth.sqlite"
12
+ SQRL_AUTH__TOKEN_EXPIRE_MINUTES="30"
13
+
14
+ SQRL_PARAMETERS__CACHE_SIZE="1024"
15
+ SQRL_PARAMETERS__CACHE_TTL_MINUTES="60"
16
+
17
+ SQRL_DATASETS__CACHE_SIZE="128"
18
+ SQRL_DATASETS__CACHE_TTL_MINUTES="60"
19
+
20
+ SQRL_DASHBOARDS__CACHE_SIZE="128"
21
+ SQRL_DASHBOARDS__CACHE_TTL_MINUTES="60"
22
+
23
+ SQRL_SEEDS__INFER_SCHEMA="true"
24
+ SQRL_SEEDS__NA_VALUES=["NA"] # must be a JSON list
25
+
26
+ SQRL_TEST_SETS__DEFAULT_NAME_USED="default"
27
+
28
+ SQRL_CONNECTIONS__DEFAULT_NAME_USED="default"
29
+
30
+ SQRL_DUCKDB_VENV__DB_FILE_PATH="target/venv.duckdb"
@@ -1,12 +1,13 @@
1
1
  **/__pycache__/
2
2
 
3
3
  # common virtual environment names
4
- .env/
5
4
  .venv/
6
5
  venv/
7
6
 
8
7
  # squirrels files to ignore
9
- env.yml
8
+ .env
9
+ .env.local
10
+ duckdb_init.sql
10
11
  logs/
11
12
  target/
12
13
  sqrl_packages/
@@ -1,7 +1,15 @@
1
- ## Uses SQLAlchemy URLs. More details here: https://docs.sqlalchemy.org/en/latest/core/engines.html
1
+ ## Connection URIs are usually in format "dialect://username:password@host:port/database" for database connections
2
+ ## However, subtle differences exist depending on the "type" specified. For example, sqlite URIs are slightly different.
3
+ ## sqlalchemy: sqlite:///relative/path/to/database.db
4
+ ## connectorx/adbc: sqlite://relative/path/to/database.db (adbc URI format matches connectorx thanks to polars integration)
5
+ ## Refer to specific documentation for supported databases by type (with URI examples):
6
+ ## sqlalchemy: https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls
7
+ ## connectorx: https://sfu-db.github.io/connector-x/databases.html
8
+ ## adbc: https://arrow.apache.org/adbc/ (see connectorx documentation for URI examples)
2
9
  connections:
3
10
  - name: default
4
- credential: null
5
- url: {{ env_vars.sqlite_conn_str }} ## using Jinja to substitute environment variable from env.yml
11
+ label: SQLite Expenses Database
12
+ type: sqlalchemy ## one of: sqlalchemy, connectorx, or adbc
13
+ uri: {{ env_vars.SQLITE_URI }} ## using Jinja to substitute environment variables
6
14
 
7
15
 
@@ -3,16 +3,8 @@ from matplotlib import pyplot as plt, figure as f, axes as a
3
3
 
4
4
 
5
5
  async def main(sqrl: DashboardArgs) -> d.PngDashboard:
6
- """
7
- Create a dashboard by retrieving datasets using "sqrl.dataset" method and transform the datasets to return as a PngDashboard or a HtmlDashboard.
8
- - The PngDashboard constructor takes a single argument for either a matplotlib.figure.Figure or io.BytesIO/bytes of PNG data
9
- - The HtmlDashboard constructor takes a single argument for a io.StringIO/string of HTML data
10
-
11
- It is imperative to set the correct return type in the function signature for "main" above! It allows Squirrels to provide the correct format to
12
- the data catalog without having to run this function.
13
- """
14
- spending_by_month_df = await sqrl.dataset("dataset_example", fixed_parameters={"group_by": "g4"})
15
- spending_by_subcategory_df = await sqrl.dataset("dataset_example", fixed_parameters={"group_by": "g3"})
6
+ spending_by_month_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "g4"})
7
+ spending_by_subcategory_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "g3"})
16
8
 
17
9
  # Create a figure with two subplots
18
10
  fig, (ax0, ax1) = plt.subplots(2, 1, figsize=(8, 8), height_ratios=(1, 2))
@@ -20,13 +12,23 @@ async def main(sqrl: DashboardArgs) -> d.PngDashboard:
20
12
  fig.tight_layout(pad=4, h_pad=6)
21
13
 
22
14
  # Create a bar chart of spending by month
23
- spending_by_month_df.sort_values("month").plot(x="month", y="total_amount", ax=ax0)
15
+
16
+ # Convert to pandas and ensure total_amount is numeric
17
+ spending_by_month_pandas = spending_by_month_df.sort("month").to_pandas()
18
+ spending_by_month_pandas["total_amount"] = spending_by_month_pandas["total_amount"].astype(float)
19
+
20
+ spending_by_month_pandas.plot(x="month", y="total_amount", ax=ax0)
24
21
  ax0.set_title("Spending by Month")
25
22
 
26
23
  # Create a pie chart of spending by subcategory
27
- df_by_subcategory = spending_by_subcategory_df.set_index("subcategory").sort_values("total_amount", ascending=False)
24
+
25
+ # Convert to pandas and ensure total_amount is numeric
26
+ subcategory_pandas = spending_by_subcategory_df.sort("total_amount", descending=True).to_pandas()
27
+ subcategory_pandas["total_amount"] = subcategory_pandas["total_amount"].astype(float)
28
+ subcategory_pandas.set_index("subcategory", inplace=True)
29
+
28
30
  autopct = lambda pct: ('%.1f%%' % pct) if pct > 6 else ''
29
- df_by_subcategory.plot(y="total_amount", kind='pie', ax=ax1, autopct=autopct, legend=False, ylabel="")
31
+ subcategory_pandas.plot(y="total_amount", kind='pie', ax=ax1, autopct=autopct, legend=False, ylabel="")
30
32
  ax1.set_title("Spending by Subcategory")
31
33
 
32
34
  return d.PngDashboard(fig)
@@ -0,0 +1,22 @@
1
+ label: Dashboard Example
2
+
3
+ description: This is an example dashboard
4
+
5
+ scope: protected
6
+
7
+ format: png
8
+
9
+ parameters:
10
+ - date_range
11
+ - category
12
+
13
+ depends_on:
14
+ - name: dataset_example_month
15
+ dataset: federate_dataset_example
16
+ fixed_parameters:
17
+ - group_by: g4 (Month)
18
+
19
+ - name: dataset_example_subcategory
20
+ dataset: federate_dataset_example
21
+ fixed_parameters:
22
+ - group_by: g3 (Subcategory)
@@ -2,12 +2,15 @@
2
2
 
3
3
  # common virtual environment names
4
4
  .venv/
5
+ venv/
5
6
 
6
7
  # squirrels files to ignore
7
- env.yml
8
+ .env
9
+ .env.local
10
+ duckdb_init.sql
11
+ logs/
8
12
  target/
9
13
  sqrl_packages/
10
14
 
11
15
  # additional files for docker to ignore
12
- Dockerfile
13
16
  .git/
@@ -8,9 +8,9 @@ COPY . .
8
8
  # "sqrl deps" command if there are packages defined in "squirrels.yml"
9
9
  RUN apt-get update && apt-get install -y git
10
10
 
11
- RUN pip install --no-cache-dir -r requirements-lock.txt
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
12
 
13
- RUN squirrels deps
13
+ RUN sqrl deps
14
14
 
15
15
  EXPOSE 4465
16
- CMD ["squirrels", "run", "--host", "0.0.0.0", "--port", "4465"]
16
+ CMD ["sqrl", "run", "--build", "--host", "0.0.0.0", "--port", "4465"]
@@ -4,4 +4,4 @@ services:
4
4
  ports:
5
5
  - "4465:4465"
6
6
  volumes:
7
- - ./env.yml:/app/env.yml
7
+ - ./.env:/app/.env
@@ -0,0 +1,9 @@
1
+ -- SQL statements that run at the start of every DuckDB session
2
+
3
+ -- Example:
4
+
5
+ -- SET threads = 4;
6
+
7
+ -- SET temp_directory = '/path/to/tmp/';
8
+
9
+ -- CREATE SECRET (TYPE S3, PROVIDER CREDENTIAL_CHAIN);
@@ -0,0 +1,15 @@
1
+ {%- macro date_and_amount_filters(use_from_range) -%}
2
+
3
+ {%- if use_from_range -%}
4
+ date >= {{ ctx.start_date_from_range }}
5
+ AND date <= {{ ctx.end_date_from_range }}
6
+ AND amount >= {{ ctx.min_amount_from_range }}
7
+ AND amount <= {{ ctx.max_amount_from_range }}
8
+ {%- else -%}
9
+ date >= {{ ctx.start_date }}
10
+ AND date <= {{ ctx.end_date }}
11
+ AND amount >= {{ ctx.min_amount }}
12
+ AND amount <= {{ ctx.max_amount }}
13
+ {%- endif -%}
14
+
15
+ {%- endmacro -%}
@@ -0,0 +1,26 @@
1
+ from squirrels import BuildModelArgs
2
+ import polars as pl, pandas as pd
3
+
4
+
5
+ def main(sqrl: BuildModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
6
+ """
7
+ Create a build model by joining/processing sources or other build models to form a new
8
+ Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
9
+ """
10
+ # sqrl.ref() can be used on a sources, seeds, or other build models
11
+ expenses_df = sqrl.ref("src_transactions")
12
+ categories_df = sqrl.ref("seed_categories")
13
+ subcategories_df = sqrl.ref("seed_subcategories")
14
+
15
+ df = expenses_df \
16
+ .join(subcategories_df, on="subcategory_id", how="left") \
17
+ .join(categories_df, on="category_id", how="left")
18
+
19
+ df = df.with_columns(
20
+ pl.col("date").dt.strftime("%Y-%m").alias("month"),
21
+ pl.col("date").dt.strftime("%Y-%m-%d").alias("date"),
22
+ )
23
+
24
+ return df.select(
25
+ "id", "date", "month", "category_id", "category", "subcategory_id", "subcategory", "amount", "description"
26
+ )
@@ -0,0 +1,16 @@
1
+ {# DuckDB dialect #}
2
+
3
+ SELECT a.id,
4
+ STRFTIME(a.date, '%Y-%m-%d') AS date,
5
+ STRFTIME(a.date, '%Y-%m') AS month,
6
+ c.category_id,
7
+ c.category,
8
+ b.subcategory_id,
9
+ b.subcategory,
10
+ a.amount,
11
+ a.description
12
+
13
+ {# ref() can be used on a sources, seeds, or other build models -#}
14
+ FROM {{ ref("src_transactions") }} AS a
15
+ LEFT JOIN {{ ref("seed_subcategories") }} AS b ON a.subcategory_id = b.subcategory_id
16
+ LEFT JOIN {{ ref("seed_categories") }} AS c ON b.category_id = c.category_id
@@ -0,0 +1,55 @@
1
+ description: |
2
+ This is an example of a build model. It adds a new column called "month" to the source table "src_transactions".
3
+
4
+ depends_on: # optional for SQL models - the "ref" macro also adds to this set
5
+ - src_transactions
6
+ - seed_categories
7
+ - seed_subcategories
8
+
9
+ columns:
10
+ - name: id
11
+ depends_on:
12
+ - src_transactions.id
13
+ pass_through: true
14
+
15
+ - name: date
16
+ type: string
17
+ description: The day of the transaction as a string in 'YYYY-MM-DD' format
18
+ depends_on:
19
+ - src_transactions.date
20
+
21
+ - name: month
22
+ type: string
23
+ description: The month of the transaction as a string in 'YYYY-MM' format
24
+ depends_on:
25
+ - src_transactions.date
26
+
27
+ - name: category_id
28
+ depends_on:
29
+ - seed_categories.category_id
30
+ pass_through: true
31
+
32
+ - name: category
33
+ depends_on:
34
+ - seed_categories.category
35
+ pass_through: true
36
+
37
+ - name: subcategory_id
38
+ depends_on:
39
+ - seed_subcategories.subcategory_id
40
+ pass_through: true
41
+
42
+ - name: subcategory
43
+ depends_on:
44
+ - seed_subcategories.subcategory
45
+ pass_through: true
46
+
47
+ - name: amount
48
+ depends_on:
49
+ - src_transactions.amount
50
+ pass_through: true
51
+
52
+ - name: description
53
+ depends_on:
54
+ - src_transactions.description
55
+ pass_through: true
@@ -1,22 +1,12 @@
1
- WITH
2
- transactions_with_masked_id AS (
3
- SELECT *,
4
- {%- if user.role == "manager" %}
5
- id as masked_id
6
- {%- else %}
7
- '***' as masked_id
8
- {%- endif %},
9
- STRFTIME('%Y-%m', date) AS month
10
- FROM transactions
11
- )
12
- SELECT {{ ctx.select_dim_cols }}
13
- , SUM(-amount) as total_amount
14
- FROM transactions_with_masked_id
15
- WHERE date >= :start_date
16
- AND date <= :end_date
17
- AND -amount >= :min_amount
18
- AND -amount <= :max_amount
19
- {% if is_placeholder("desc_pattern") -%} AND description LIKE :desc_pattern {%- endif %}
20
- {% if ctx.has_categories -%} AND category IN ({{ ctx.categories }}) {%- endif %}
21
- {% if ctx.has_subcategories -%} AND subcategory IN ({{ ctx.subcategories }}) {%- endif %}
22
- GROUP BY {{ ctx.group_by_cols }}
1
+ {# SQLite dialect (based on connection used) #}
2
+
3
+ SELECT STRFTIME('%Y-%m', date) AS month
4
+ , printf('%.2f', SUM(amount)) as total_amount
5
+
6
+ FROM {{ source("src_transactions") }}
7
+
8
+ WHERE {{ date_and_amount_filters(use_from_range=false) }}
9
+
10
+ GROUP BY 1
11
+
12
+ ORDER BY 1 DESC
@@ -0,0 +1,26 @@
1
+ description: |
2
+ This is an example of a database view model. It finds the total amount spent by month.
3
+
4
+ Parameters are available to filter the date and amount of the transactions.
5
+
6
+ connection: default # optional - if not provided, will use default connection specified in the SQRL_CONNECTIONS__DEFAULT_NAME_USED setting
7
+
8
+ translate_to_duckdb: true # optional - default is false - if true, then the model will be translated to duckdb for supported dialects
9
+
10
+ depends_on: # optional - the "source" macro also adds to this set
11
+ - src_transactions
12
+
13
+ columns:
14
+ - name: month
15
+ type: string
16
+ description: The months for which the amount is aggregated by, in descending order
17
+ category: dimension
18
+ depends_on:
19
+ - src_transactions.date
20
+
21
+ - name: total_amount
22
+ type: float
23
+ description: The total amount spent by the group-by dimension
24
+ category: measure
25
+ depends_on:
26
+ - src_transactions.amount
@@ -1,21 +1,44 @@
1
- from typing import Sequence
2
- from squirrels import ModelDepsArgs, ModelArgs
3
- import pandas as pd
1
+ from squirrels import ModelArgs, parameters as p
2
+ import polars as pl, pandas as pd
4
3
 
4
+ def dequote(value: str) -> str:
5
+ return value[1:-1]
5
6
 
6
- def dependencies(sqrl: ModelDepsArgs) -> Sequence[str]:
7
- """
8
- Define list of dependent models here. This will determine the dependencies first, at compile-time,
9
- before running the model.
10
- """
11
- return ["dbview_example"]
7
+ def joined_str_to_list(value: str) -> list[str]:
8
+ return [dequote(category) for category in str(value).split(",")]
12
9
 
13
10
 
14
- def main(sqrl: ModelArgs) -> pd.DataFrame:
11
+ def main(sqrl: ModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
15
12
  """
16
- Create federated models by joining/processing dependent database views and/or other federated models to
17
- form and return the result as a new pandas DataFrame.
13
+ Create federated models by joining/processing dependent models (sources, seeds, builds, dbviews, other federates, etc.) to
14
+ form a new Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
18
15
  """
19
- (DBVIEW_EXAMPLE,) = dependencies(sqrl)
20
- df = sqrl.ref(DBVIEW_EXAMPLE)
21
- return df.sort_values(sqrl.ctx["order_by_cols_list"], ascending=False)
16
+ df = sqrl.ref("build_example")
17
+
18
+ df = df.filter(
19
+ (pl.col("amount") >= sqrl.ctx["min_amount_from_range"]) &
20
+ (pl.col("amount") <= sqrl.ctx["max_amount_from_range"]) &
21
+ (pl.col("date") >= dequote(sqrl.ctx["start_date_from_range"])) &
22
+ (pl.col("date") <= dequote(sqrl.ctx["end_date_from_range"]))
23
+ )
24
+
25
+ if sqrl.ctx["has_categories"]:
26
+ categories_list = joined_str_to_list(sqrl.ctx["categories"])
27
+ df = df.filter(pl.col("category_id").is_in(categories_list))
28
+
29
+ if sqrl.ctx["has_subcategories"]:
30
+ subcategories_list = joined_str_to_list(sqrl.ctx["subcategories"])
31
+ df = df.filter(pl.col("subcategory_id").is_in(subcategories_list))
32
+
33
+ dimension_cols = sqrl.ctx["group_by_cols_list"]
34
+ df = df.group_by(dimension_cols).agg(
35
+ pl.sum("amount").cast(pl.Decimal(precision=15, scale=2)).alias("total_amount")
36
+ )
37
+ df = df.sort(dimension_cols, descending=True)
38
+
39
+ if sqrl.param_exists("limit"):
40
+ assert isinstance(limit := sqrl.prms["limit"], p.NumberParameter)
41
+ df = df.limit(int(limit.get_selected_value()))
42
+
43
+ df = df.select(*dimension_cols, "total_amount")
44
+ return df.rename(sqrl.ctx["rename_dict"])
@@ -1,3 +1,17 @@
1
- SELECT *
2
- FROM {{ ref("dbview_example") }}
1
+ {# DuckDB dialect #}
2
+
3
+ SELECT {{ ctx.select_dim_cols }}
4
+ , CAST(SUM(amount) AS DECIMAL(15, 2)) as total_amount
5
+
6
+ {# ref() can be used on a sources, seeds, builds, dbviews, or other federate models -#}
7
+ FROM {{ ref("build_example") }} AS a
8
+
9
+ WHERE {{ date_and_amount_filters(use_from_range=true) }}
10
+ {% if ctx.has_categories -%} AND category_id IN ({{ ctx.categories }}) {%- endif %}
11
+ {% if ctx.has_subcategories -%} AND subcategory_id IN ({{ ctx.subcategories }}) {%- endif %}
12
+
13
+ GROUP BY {{ ctx.group_by_cols }}
14
+
3
15
  ORDER BY {{ ctx.order_by_cols }}
16
+
17
+ {{ ctx.limit_clause }}
@@ -0,0 +1,65 @@
1
+ description: |
2
+ This is an example of a federate view model. It takes the build example model and groups or filters the results based on the parameter selections provided.
3
+
4
+ Parameters are available to specify the group by dimension and filter by date, amount of the transaction, category, and subcategory.
5
+
6
+ depends_on: # optional for SQL models - the "ref" macro also adds to this set
7
+ - build_example
8
+
9
+ eager: false # optional - defaults to false. Only applies to SQL models.
10
+
11
+ columns:
12
+ - name: date
13
+ type: string
14
+ condition: parameter 'group_by' (Group By) is 'g0' (Transaction)
15
+ description: The date of the transaction in 'YYYY-MM-DD' format, in descending order
16
+ category: dimension
17
+ depends_on:
18
+ - build_example.date
19
+
20
+ - name: description
21
+ type: string
22
+ condition: parameter 'group_by' (Group By) is 'g0' (Transaction)
23
+ description: The description of the transaction
24
+ category: dimension
25
+ depends_on:
26
+ - build_example.description
27
+
28
+ - name: day
29
+ type: string
30
+ condition: parameter 'group_by' (Group By) is 'g1' (Day)
31
+ description: The day for which the amount is aggregated by, in descending order
32
+ category: dimension
33
+ depends_on:
34
+ - build_example.date
35
+
36
+ - name: month
37
+ type: string
38
+ condition: parameter 'group_by' (Group By) is 'g4' (Month)
39
+ description: The month for which the amount is aggregated by, in descending order
40
+ category: dimension
41
+ depends_on:
42
+ - build_example.month
43
+
44
+ - name: category
45
+ type: string
46
+ condition: parameter `group_by` (Group By) is `g0` (Transaction), `g2` (Category), or `g3` (Subcategory)
47
+ description: The category for which the amount is aggregated by
48
+ category: dimension
49
+ depends_on:
50
+ - build_example.category
51
+
52
+ - name: subcategory
53
+ type: string
54
+ condition: parameter `group_by` (Group By) is `g0` (Transaction) or `g3` (Subcategory)
55
+ description: The subcategory for which the amount is aggregated by
56
+ category: dimension
57
+ depends_on:
58
+ - build_example.subcategory
59
+
60
+ - name: total_amount
61
+ type: float
62
+ description: The total amount spent by the group by dimension
63
+ category: measure
64
+ depends_on:
65
+ - build_example.total_amount
@@ -0,0 +1,39 @@
1
+ sources:
2
+ - name: src_transactions
3
+ description: "The source table for transactions" # optional
4
+ connection: default # optional - if not provided, will use the connection named "default" or the default connection specified in settings
5
+ table: expenses # optional - if not provided, will use the "name" field of the source
6
+ load_to_duckdb: true # optional - default is false - outside of dbview models that have translate_to_duckdb as false, other models can only reference this source if load_to_duckdb is true
7
+
8
+ # For performance reasons, avoid specifying primary_key for large tables if upserts are not required
9
+ primary_key: [id] # optional - if not provided, then this is an insert-only table for incremental loads - otherwise, this uses upsert
10
+
11
+ update_hints:
12
+ increasing_column: date # optional - if not provided, will always do full refresh, otherwise uses this column for incremental loads
13
+ strictly_increasing: false # optional - default is true - if false, then maximum value of column is removed before incremental loads
14
+
15
+ columns: # optional - if load_to_duckdb is true, then only the columns listed here are loaded to duckdb
16
+ - name: id
17
+ type: string
18
+ description: The unique identifier for the transaction
19
+ category: dimension
20
+
21
+ - name: date
22
+ type: date
23
+ description: The date of the transaction
24
+ category: dimension
25
+
26
+ - name: subcategory_id
27
+ type: string
28
+ description: The ID of the subcategory of the transaction
29
+ category: dimension
30
+
31
+ - name: amount
32
+ type: float
33
+ description: The amount of the transaction
34
+ category: measure
35
+
36
+ - name: description
37
+ type: string
38
+ description: The description of the transaction
39
+ category: dimension