squirrels 0.1.0__py3-none-any.whl → 0.6.0.post0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. dateutils/__init__.py +6 -0
  2. dateutils/_enums.py +25 -0
  3. squirrels/dateutils.py → dateutils/_implementation.py +409 -380
  4. dateutils/types.py +6 -0
  5. squirrels/__init__.py +21 -18
  6. squirrels/_api_routes/__init__.py +5 -0
  7. squirrels/_api_routes/auth.py +337 -0
  8. squirrels/_api_routes/base.py +196 -0
  9. squirrels/_api_routes/dashboards.py +156 -0
  10. squirrels/_api_routes/data_management.py +148 -0
  11. squirrels/_api_routes/datasets.py +220 -0
  12. squirrels/_api_routes/project.py +289 -0
  13. squirrels/_api_server.py +552 -134
  14. squirrels/_arguments/__init__.py +0 -0
  15. squirrels/_arguments/init_time_args.py +83 -0
  16. squirrels/_arguments/run_time_args.py +111 -0
  17. squirrels/_auth.py +777 -0
  18. squirrels/_command_line.py +239 -107
  19. squirrels/_compile_prompts.py +147 -0
  20. squirrels/_connection_set.py +94 -0
  21. squirrels/_constants.py +141 -64
  22. squirrels/_dashboards.py +179 -0
  23. squirrels/_data_sources.py +570 -0
  24. squirrels/_dataset_types.py +91 -0
  25. squirrels/_env_vars.py +209 -0
  26. squirrels/_exceptions.py +29 -0
  27. squirrels/_http_error_responses.py +52 -0
  28. squirrels/_initializer.py +319 -110
  29. squirrels/_logging.py +121 -0
  30. squirrels/_manifest.py +357 -187
  31. squirrels/_mcp_server.py +578 -0
  32. squirrels/_model_builder.py +69 -0
  33. squirrels/_model_configs.py +74 -0
  34. squirrels/_model_queries.py +52 -0
  35. squirrels/_models.py +1201 -0
  36. squirrels/_package_data/base_project/.env +7 -0
  37. squirrels/_package_data/base_project/.env.example +44 -0
  38. squirrels/_package_data/base_project/connections.yml +16 -0
  39. squirrels/_package_data/base_project/dashboards/dashboard_example.py +40 -0
  40. squirrels/_package_data/base_project/dashboards/dashboard_example.yml +22 -0
  41. squirrels/_package_data/base_project/docker/.dockerignore +16 -0
  42. squirrels/_package_data/base_project/docker/Dockerfile +16 -0
  43. squirrels/_package_data/base_project/docker/compose.yml +7 -0
  44. squirrels/_package_data/base_project/duckdb_init.sql +10 -0
  45. squirrels/_package_data/base_project/gitignore +13 -0
  46. squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
  47. squirrels/_package_data/base_project/models/builds/build_example.py +26 -0
  48. squirrels/_package_data/base_project/models/builds/build_example.sql +16 -0
  49. squirrels/_package_data/base_project/models/builds/build_example.yml +57 -0
  50. squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +17 -0
  51. squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +32 -0
  52. squirrels/_package_data/base_project/models/federates/federate_example.py +51 -0
  53. squirrels/_package_data/base_project/models/federates/federate_example.sql +21 -0
  54. squirrels/_package_data/base_project/models/federates/federate_example.yml +65 -0
  55. squirrels/_package_data/base_project/models/sources.yml +38 -0
  56. squirrels/_package_data/base_project/parameters.yml +142 -0
  57. squirrels/_package_data/base_project/pyconfigs/connections.py +19 -0
  58. squirrels/_package_data/base_project/pyconfigs/context.py +96 -0
  59. squirrels/_package_data/base_project/pyconfigs/parameters.py +141 -0
  60. squirrels/_package_data/base_project/pyconfigs/user.py +56 -0
  61. squirrels/_package_data/base_project/resources/expenses.db +0 -0
  62. squirrels/_package_data/base_project/resources/public/.gitkeep +0 -0
  63. squirrels/_package_data/base_project/resources/weather.db +0 -0
  64. squirrels/_package_data/base_project/seeds/seed_categories.csv +6 -0
  65. squirrels/_package_data/base_project/seeds/seed_categories.yml +15 -0
  66. squirrels/_package_data/base_project/seeds/seed_subcategories.csv +15 -0
  67. squirrels/_package_data/base_project/seeds/seed_subcategories.yml +21 -0
  68. squirrels/_package_data/base_project/squirrels.yml.j2 +61 -0
  69. squirrels/_package_data/base_project/tmp/.gitignore +2 -0
  70. squirrels/_package_data/templates/login_successful.html +53 -0
  71. squirrels/_package_data/templates/squirrels_studio.html +22 -0
  72. squirrels/_package_loader.py +29 -0
  73. squirrels/_parameter_configs.py +592 -0
  74. squirrels/_parameter_options.py +348 -0
  75. squirrels/_parameter_sets.py +207 -0
  76. squirrels/_parameters.py +1703 -0
  77. squirrels/_project.py +796 -0
  78. squirrels/_py_module.py +122 -0
  79. squirrels/_request_context.py +33 -0
  80. squirrels/_schemas/__init__.py +0 -0
  81. squirrels/_schemas/auth_models.py +83 -0
  82. squirrels/_schemas/query_param_models.py +70 -0
  83. squirrels/_schemas/request_models.py +26 -0
  84. squirrels/_schemas/response_models.py +286 -0
  85. squirrels/_seeds.py +97 -0
  86. squirrels/_sources.py +112 -0
  87. squirrels/_utils.py +540 -149
  88. squirrels/_version.py +1 -3
  89. squirrels/arguments.py +7 -0
  90. squirrels/auth.py +4 -0
  91. squirrels/connections.py +3 -0
  92. squirrels/dashboards.py +3 -0
  93. squirrels/data_sources.py +14 -282
  94. squirrels/parameter_options.py +13 -189
  95. squirrels/parameters.py +14 -801
  96. squirrels/types.py +18 -0
  97. squirrels-0.6.0.post0.dist-info/METADATA +148 -0
  98. squirrels-0.6.0.post0.dist-info/RECORD +101 -0
  99. {squirrels-0.1.0.dist-info → squirrels-0.6.0.post0.dist-info}/WHEEL +1 -2
  100. {squirrels-0.1.0.dist-info → squirrels-0.6.0.post0.dist-info}/entry_points.txt +1 -0
  101. squirrels-0.6.0.post0.dist-info/licenses/LICENSE +201 -0
  102. squirrels/_credentials_manager.py +0 -87
  103. squirrels/_module_loader.py +0 -37
  104. squirrels/_parameter_set.py +0 -151
  105. squirrels/_renderer.py +0 -286
  106. squirrels/_timed_imports.py +0 -37
  107. squirrels/connection_set.py +0 -126
  108. squirrels/package_data/base_project/.gitignore +0 -4
  109. squirrels/package_data/base_project/connections.py +0 -21
  110. squirrels/package_data/base_project/database/sample_database.db +0 -0
  111. squirrels/package_data/base_project/database/seattle_weather.db +0 -0
  112. squirrels/package_data/base_project/datasets/sample_dataset/context.py +0 -8
  113. squirrels/package_data/base_project/datasets/sample_dataset/database_view1.py +0 -23
  114. squirrels/package_data/base_project/datasets/sample_dataset/database_view1.sql.j2 +0 -7
  115. squirrels/package_data/base_project/datasets/sample_dataset/final_view.py +0 -10
  116. squirrels/package_data/base_project/datasets/sample_dataset/final_view.sql.j2 +0 -2
  117. squirrels/package_data/base_project/datasets/sample_dataset/parameters.py +0 -30
  118. squirrels/package_data/base_project/datasets/sample_dataset/selections.cfg +0 -6
  119. squirrels/package_data/base_project/squirrels.yaml +0 -26
  120. squirrels/package_data/static/favicon.ico +0 -0
  121. squirrels/package_data/static/script.js +0 -234
  122. squirrels/package_data/static/style.css +0 -110
  123. squirrels/package_data/templates/index.html +0 -32
  124. squirrels-0.1.0.dist-info/LICENSE +0 -22
  125. squirrels-0.1.0.dist-info/METADATA +0 -67
  126. squirrels-0.1.0.dist-info/RECORD +0 -40
  127. squirrels-0.1.0.dist-info/top_level.txt +0 -1
@@ -0,0 +1,7 @@
1
+ # Custom environment variables
2
+ SQLITE_URI="sqlite:///{project_path}/resources/expenses.db"
3
+
4
+ # Secrets used by the Squirrels framework that are NOT SAFE TO INCLUDE IN VERSION CONTROL
5
+ # Required if your project uses authentication. Otherwise, optional.
6
+ SQRL_SECRET__KEY="{{ random_secret_key }}"
7
+ SQRL_SECRET__ADMIN_PASSWORD="{{ random_admin_password }}"
@@ -0,0 +1,44 @@
1
+ # Custom environment variables
2
+ SQLITE_URI="sqlite:///{project_path}/resources/expenses.db"
3
+
4
+ # Secrets used by the Squirrels framework that are NOT SAFE TO INCLUDE IN VERSION CONTROL
5
+ # Required if your project uses authentication. Otherwise, optional.
6
+ SQRL_SECRET__KEY="" # a random 32 byte hex string - you can generate this is by running `python -c "import secrets; print(secrets.token_hex(32))"`
7
+ SQRL_SECRET__ADMIN_PASSWORD=""
8
+
9
+ # # Optional variables used by the Squirrels framework that are safe to include in version control if desired
10
+ # # (default values are shown below)
11
+ # SQRL_AUTH__DB_FILE_PATH="{project_path}/target/auth.sqlite"
12
+ # SQRL_AUTH__TOKEN_EXPIRE_MINUTES="30"
13
+ # SQRL_AUTH__ALLOWED_ORIGINS_FOR_COOKIES="https://squirrels-analytics.github.io"
14
+
15
+ # SQRL_PARAMETERS__CACHE_SIZE="1024"
16
+ # SQRL_PARAMETERS__CACHE_TTL_MINUTES="60"
17
+ # SQRL_PARAMETERS__DATASOURCE_REFRESH_MINUTES="60"
18
+
19
+ # SQRL_DATASETS__CACHE_SIZE="128"
20
+ # SQRL_DATASETS__CACHE_TTL_MINUTES="60"
21
+ # SQRL_DATASETS__MAX_ROWS_FOR_AI="100"
22
+ # SQRL_DATASETS__MAX_ROWS_OUTPUT="100000"
23
+ # SQRL_DATASETS__SQL_TIMEOUT_SECONDS="2"
24
+
25
+ # SQRL_DASHBOARDS__CACHE_SIZE="128"
26
+ # SQRL_DASHBOARDS__CACHE_TTL_MINUTES="60"
27
+
28
+ # SQRL_PERMISSIONS__ELEVATED_ACCESS_LEVEL="admin" # one of "admin", "member", "guest"
29
+
30
+ # SQRL_SEEDS__INFER_SCHEMA="true"
31
+ # SQRL_SEEDS__NA_VALUES=["NA"] # must be a JSON list
32
+
33
+ # SQRL_CONNECTIONS__DEFAULT_NAME_USED="default"
34
+
35
+ # SQRL_VDL__CATALOG_DB_PATH="ducklake:{project_path}/target/vdl_catalog.duckdb"
36
+ # SQRL_VDL__DATA_PATH="{project_path}/target/vdl_data/"
37
+
38
+ # SQRL_STUDIO__BASE_URL="https://squirrels-analytics.github.io/squirrels-studio-v2"
39
+
40
+ # SQRL_LOGGING__LEVEL="INFO" # one of "DEBUG", "INFO", "WARNING"
41
+ # SQRL_LOGGING__FORMAT="text"
42
+ # SQRL_LOGGING__TO_FILE="false"
43
+ # SQRL_LOGGING__FILE_SIZE_MB="50"
44
+ # SQRL_LOGGING__FILE_BACKUP_COUNT="1"
@@ -0,0 +1,16 @@
1
+ ## Connection URIs are usually in format "dialect://username:password@host:port/database" for database connections
2
+ ## However, subtle differences exist depending on the "type" specified. For example, sqlite URIs are slightly different.
3
+ ## sqlalchemy: sqlite:///{project_path}/relative/path/to/database.db
4
+ ## connectorx/adbc: sqlite://{project_path}/relative/path/to/database.db (adbc URI format matches connectorx)
5
+ ## duckdb: sqlite:{project_path}/relative/path/to/database.db
6
+ ## Refer to specific documentation for supported databases by type (with URI examples):
7
+ ## sqlalchemy: https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls
8
+ ## connectorx: https://sfu-db.github.io/connector-x/databases.html
9
+ ## adbc: https://arrow.apache.org/adbc/ (see connectorx documentation for URI examples)
10
+ connections:
11
+ - name: default
12
+ label: SQLite Expenses Database
13
+ type: sqlalchemy ## one of: sqlalchemy, connectorx, adbc, or duckdb
14
+ uri: {{ env_vars.SQLITE_URI }} ## using Jinja to substitute environment variables
15
+
16
+
@@ -0,0 +1,40 @@
1
+ from squirrels.arguments import DashboardArgs
2
+ from squirrels.dashboards import PngDashboard, HtmlDashboard
3
+ from matplotlib import pyplot as plt, figure as fg, axes as a
4
+ import asyncio
5
+
6
+
7
+ async def main(sqrl: DashboardArgs) -> PngDashboard | HtmlDashboard:
8
+ # Get dataset instances concurrently
9
+ all_dataframes = await asyncio.gather(
10
+ sqrl.dataset("grouped_expenses", fixed_parameters={"group_by": "month"}),
11
+ sqrl.dataset("grouped_expenses", fixed_parameters={"group_by": "subcat"})
12
+ )
13
+ spending_by_month_df, spending_by_subcategory_df = all_dataframes
14
+
15
+ # Create a figure with two subplots
16
+ fig, (ax0, ax1) = plt.subplots(2, 1, figsize=(8, 8), height_ratios=(1, 2))
17
+ fig: fg.Figure; ax0: a.Axes; ax1: a.Axes
18
+ fig.tight_layout(pad=4, h_pad=6)
19
+
20
+ # Create a bar chart of spending by month
21
+
22
+ # Convert to pandas and ensure total_amount is numeric
23
+ spending_by_month_pandas = spending_by_month_df.sort("month").to_pandas()
24
+ spending_by_month_pandas["total_amount"] = spending_by_month_pandas["total_amount"].astype(float)
25
+
26
+ spending_by_month_pandas.plot(x="month", y="total_amount", ax=ax0)
27
+ ax0.set_title("Spending by Month")
28
+
29
+ # Create a pie chart of spending by subcategory
30
+
31
+ # Convert to pandas and ensure total_amount is numeric
32
+ subcategory_pandas = spending_by_subcategory_df.sort("total_amount", descending=True).to_pandas()
33
+ subcategory_pandas["total_amount"] = subcategory_pandas["total_amount"].astype(float)
34
+ subcategory_pandas.set_index("subcategory", inplace=True)
35
+
36
+ autopct = lambda pct: ('%.1f%%' % pct) if pct > 6 else ''
37
+ subcategory_pandas.plot(y="total_amount", kind='pie', ax=ax1, autopct=autopct, legend=False, ylabel="")
38
+ ax1.set_title("Spending by Subcategory")
39
+
40
+ return PngDashboard(fig)
@@ -0,0 +1,22 @@
1
+ label: Expense Dashboard
2
+
3
+ description: This is a dashboard showing the total expense amounts by month as a line chart and by subcategory as a pie chart
4
+
5
+ scope: protected
6
+
7
+ format: png
8
+
9
+ parameters:
10
+ - date_range
11
+ - category
12
+
13
+ depends_on:
14
+ - name: dataset_example_month
15
+ dataset: grouped_expenses
16
+ fixed_parameters:
17
+ group_by: month (Month)
18
+
19
+ - name: dataset_example_subcategory
20
+ dataset: grouped_expenses
21
+ fixed_parameters:
22
+ group_by: subcat (Subcategory)
@@ -0,0 +1,16 @@
1
+ **/__pycache__/
2
+
3
+ # common virtual environment names
4
+ .venv/
5
+ venv/
6
+
7
+ # squirrels files to ignore
8
+ .env
9
+ .env.local
10
+ duckdb_init.sql
11
+ logs/
12
+ target/
13
+ sqrl_packages/
14
+
15
+ # additional files for docker to ignore
16
+ .git/
@@ -0,0 +1,16 @@
1
+ # Change here to use different python version (ex. 3.11-slim for version 3.11)
2
+ FROM python:3.12-slim
3
+ WORKDIR /app
4
+
5
+ COPY . .
6
+
7
+ # Only needed if there are python dependencies installed using git, or for the
8
+ # "sqrl deps" command if there are packages defined in "squirrels.yml"
9
+ RUN apt-get update && apt-get install -y git
10
+
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ RUN sqrl deps
14
+
15
+ EXPOSE 8000
16
+ CMD ["sqrl", "run", "--build", "--host", "0.0.0.0", "--port", "8000"]
@@ -0,0 +1,7 @@
1
+ services:
2
+ squirrels:
3
+ build: .
4
+ ports:
5
+ - "8000:8000"
6
+ volumes:
7
+ - ./.env:/app/.env
@@ -0,0 +1,10 @@
1
+ -- SQL statements that run at the start of every DuckDB session
2
+ -- If the VDL catalog path is provided, the project will ATTACH it as 'vdl' (READ_ONLY)
3
+
4
+ -- Example:
5
+
6
+ -- SET threads = 4;
7
+
8
+ -- SET temp_directory = '/path/to/tmp/';
9
+
10
+ -- CREATE SECRET (TYPE S3, PROVIDER CREDENTIAL_CHAIN);
@@ -0,0 +1,13 @@
1
+ **/__pycache__/
2
+
3
+ # common virtual environment names
4
+ .venv/
5
+ venv/
6
+
7
+ # squirrels files to ignore
8
+ .env
9
+ .env.local
10
+ duckdb_init.sql
11
+ logs/
12
+ target/
13
+ sqrl_packages/
@@ -0,0 +1,17 @@
1
+ {%- macro date_and_amount_filters(use_from_range) -%}
2
+ {%- if use_from_range -%}
3
+
4
+ date >= {{ ctx.start_date_from_range | quote }}
5
+ AND date <= {{ ctx.end_date_from_range | quote }}
6
+ AND amount >= {{ ctx.min_amount_from_range }}
7
+ AND amount <= {{ ctx.max_amount_from_range }}
8
+
9
+ {%- else -%}
10
+
11
+ date >= {{ ctx.start_date | quote }}
12
+ AND date <= {{ ctx.end_date | quote }}
13
+ AND amount >= {{ ctx.min_amount }}
14
+ AND amount <= {{ ctx.max_amount }}
15
+
16
+ {%- endif -%}
17
+ {%- endmacro -%}
@@ -0,0 +1,26 @@
1
+ from squirrels.arguments import BuildModelArgs
2
+ import polars as pl, pandas as pd
3
+
4
+
5
+ def main(sqrl: BuildModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
6
+ """
7
+ Create a build model by joining/processing sources or other build models to form a new
8
+ Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
9
+ """
10
+ # sqrl.ref() can be used on a sources, seeds, or other build models
11
+ expenses_df = sqrl.ref("src_transactions")
12
+ categories_df = sqrl.ref("seed_categories")
13
+ subcategories_df = sqrl.ref("seed_subcategories")
14
+
15
+ df = expenses_df \
16
+ .join(subcategories_df, on="subcategory_id", how="left") \
17
+ .join(categories_df, on="category_id", how="left")
18
+
19
+ df = df.with_columns(
20
+ pl.col("date").dt.strftime("%Y-%m").alias("month"),
21
+ pl.col("date").dt.strftime("%Y-%m-%d").alias("date"),
22
+ )
23
+
24
+ return df.select(
25
+ "id", "date", "month", "category_id", "category", "subcategory_id", "subcategory", "amount", "description"
26
+ )
@@ -0,0 +1,16 @@
1
+ {#- DuckDB dialect -#}
2
+
3
+ SELECT a.id,
4
+ STRFTIME(a.date, '%Y-%m-%d') AS date,
5
+ STRFTIME(a.date, '%Y-%m') AS month,
6
+ c.category_id,
7
+ c.category,
8
+ b.subcategory_id,
9
+ b.subcategory,
10
+ a.amount,
11
+ a.description
12
+
13
+ {# ref() can be used on a sources, seeds, or other build models -#}
14
+ FROM {{ ref("src_transactions") }} AS a
15
+ LEFT JOIN {{ ref("seed_subcategories") }} AS b ON a.subcategory_id = b.subcategory_id
16
+ LEFT JOIN {{ ref("seed_categories") }} AS c ON b.category_id = c.category_id
@@ -0,0 +1,57 @@
1
+ description: |
2
+ This is an example of a build model. It adds a new column called "month" to the source table "src_transactions".
3
+
4
+ materialization: TABLE # optional - defaults to "VIEW" for SQL models, ignored and always a "TABLE" for Python models
5
+
6
+ depends_on: # optional for SQL models - the "ref" macro also adds to this set
7
+ - src_transactions
8
+ - seed_categories
9
+ - seed_subcategories
10
+
11
+ columns:
12
+ - name: id
13
+ depends_on:
14
+ - src_transactions.id
15
+ pass_through: true
16
+
17
+ - name: date
18
+ type: string
19
+ description: The day of the transaction as a string in 'YYYY-MM-DD' format
20
+ depends_on:
21
+ - src_transactions.date
22
+
23
+ - name: month
24
+ type: string
25
+ description: The month of the transaction as a string in 'YYYY-MM' format
26
+ depends_on:
27
+ - src_transactions.date
28
+
29
+ - name: category_id
30
+ depends_on:
31
+ - seed_categories.category_id
32
+ pass_through: true
33
+
34
+ - name: category
35
+ depends_on:
36
+ - seed_categories.category
37
+ pass_through: true
38
+
39
+ - name: subcategory_id
40
+ depends_on:
41
+ - seed_subcategories.subcategory_id
42
+ pass_through: true
43
+
44
+ - name: subcategory
45
+ depends_on:
46
+ - seed_subcategories.subcategory
47
+ pass_through: true
48
+
49
+ - name: amount
50
+ depends_on:
51
+ - src_transactions.amount
52
+ pass_through: true
53
+
54
+ - name: description
55
+ depends_on:
56
+ - src_transactions.description
57
+ pass_through: true
@@ -0,0 +1,17 @@
1
+ {#- SQLite dialect (based on connection used) -#}
2
+
3
+ SELECT
4
+ date,
5
+ printf('%.2f', amount) as amount,
6
+ CASE
7
+ WHEN '{{ user.custom_fields.role }}' = 'manager' THEN description
8
+ ELSE '***MASKED***'
9
+ END as description
10
+
11
+ FROM {{ source("src_transactions") }}
12
+
13
+ WHERE {{ date_and_amount_filters(use_from_range=false) }}
14
+
15
+ GROUP BY 1
16
+
17
+ ORDER BY 1 DESC
@@ -0,0 +1,32 @@
1
+ description: |
2
+ This is an example of a database view model. It shows transaction details including date, amount, and description.
3
+ Description is masked for non-manager users.
4
+
5
+ Parameters are available to filter the date and amount of the transactions.
6
+
7
+ connection: default # optional - if not provided, will use default connection specified in the SQRL_CONNECTIONS__DEFAULT_NAME_USED setting
8
+
9
+ translate_to_duckdb: false # optional - default is false - if true, then the model will be translated to duckdb if dialect is supported by sqlglot
10
+
11
+ depends_on: # optional - Squirrels is able to derive this from the "source" macro in the SQL file
12
+ - src_transactions
13
+
14
+ columns:
15
+ - name: date
16
+ depends_on:
17
+ - src_transactions.date
18
+ pass_through: true
19
+
20
+ - name: amount
21
+ type: float
22
+ description: The amount of the transaction, formatted to 2 decimal places
23
+ category: measure
24
+ depends_on:
25
+ - src_transactions.amount
26
+
27
+ - name: description
28
+ type: string
29
+ description: The description of the transaction (masked for non-manager users)
30
+ category: dimension
31
+ depends_on:
32
+ - src_transactions.description
@@ -0,0 +1,51 @@
1
+ from squirrels.arguments import ModelArgs
2
+ import polars as pl, pandas as pd
3
+
4
+
5
+ def main(sqrl: ModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
6
+ """
7
+ Create federated models by joining/processing dependent models (sources, seeds, builds, dbviews, other federates, etc.) to
8
+ form a new Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
9
+ """
10
+ df = sqrl.ref("build_example")
11
+
12
+ df = df.filter(
13
+ (pl.col("date") >= sqrl.ctx["start_date_from_range"]) &
14
+ (pl.col("date") <= sqrl.ctx["end_date_from_range"]) &
15
+ (pl.col("amount") >= sqrl.ctx["min_amount_from_range"]) &
16
+ (pl.col("amount") <= sqrl.ctx["max_amount_from_range"])
17
+ )
18
+
19
+ if sqrl.ctx.get("has_categories"):
20
+ categories: list[str] = sqrl.ctx["categories"]
21
+ df = df.filter(pl.col("category_id").is_in(categories))
22
+
23
+ if sqrl.ctx.get("has_subcategories"):
24
+ subcategories: list[str] = sqrl.ctx["subcategories"]
25
+ df = df.filter(pl.col("subcategory_id").is_in(subcategories))
26
+
27
+ df = df.rename(sqrl.ctx.get("column_to_alias_mapping", {}))
28
+
29
+ dimension_cols: list[str] | None = sqrl.ctx.get("group_by_cols")
30
+ decimal_type = pl.Decimal(precision=15, scale=2)
31
+ if dimension_cols is not None:
32
+ df = df.group_by(dimension_cols).agg(
33
+ pl.sum("amount").cast(decimal_type).alias("total_amount")
34
+ )
35
+ else:
36
+ df = df.with_columns(
37
+ pl.col("amount").cast(decimal_type).alias("total_amount")
38
+ )
39
+
40
+ order_by_cols: list[str] = sqrl.ctx.get("order_by_cols")
41
+ if order_by_cols is not None:
42
+ df = df.select(*order_by_cols, "total_amount").sort(order_by_cols, descending=True)
43
+
44
+ # Apply mask_column_function to description column if it exists
45
+ mask_column_func = sqrl.ctx.get("mask_column_function")
46
+ if "description" in order_by_cols and mask_column_func:
47
+ df = df.with_columns(
48
+ pl.col("description").map_elements(mask_column_func, return_dtype=pl.String).alias("description")
49
+ )
50
+
51
+ return df
@@ -0,0 +1,21 @@
1
+ {#- DuckDB dialect -#}
2
+
3
+ SELECT {{ ctx.select_dim_cols | join }}
4
+ , CAST({{ ctx.aggregator }}(amount) AS DECIMAL(15, 2)) as total_amount
5
+
6
+ {# ref() can be used on a sources, seeds, builds, dbviews, or other federate models -#}
7
+ FROM {{ ref("build_example") }} AS a
8
+
9
+ WHERE {{ date_and_amount_filters(use_from_range=true) }}
10
+ {%- if ctx.has_categories %}
11
+ AND category_id IN ({{ ctx.categories | quote_and_join }})
12
+ {%- endif %}
13
+ {%- if ctx.has_subcategories %}
14
+ AND subcategory_id IN ({{ ctx.subcategories | quote_and_join }})
15
+ {%- endif %}
16
+
17
+ {%- if ctx.group_by_cols %}
18
+ GROUP BY {{ ctx.group_by_cols | join }}
19
+ {%- endif %}
20
+
21
+ ORDER BY {{ ctx.order_by_cols_desc | join }}
@@ -0,0 +1,65 @@
1
+ description: |
2
+ This is an example of a federate view model. It takes the build example model and groups or filters the results based on the parameter selections provided.
3
+
4
+ Parameters are available to specify the group by dimension and filter by date, amount of the transaction, category, and subcategory.
5
+
6
+ depends_on: # optional for SQL models - the "ref" macro also adds to this set
7
+ - build_example
8
+
9
+ eager: false # optional - defaults to false. Only applies to SQL models.
10
+
11
+ columns:
12
+ - name: date
13
+ type: string
14
+ condition: ['group_by == "Transaction"']
15
+ description: The date of the transaction in 'YYYY-MM-DD' format, in descending order
16
+ category: dimension
17
+ depends_on:
18
+ - build_example.date
19
+
20
+ - name: description
21
+ type: string
22
+ condition: ['group_by == "Transaction"']
23
+ description: The description of the transaction (masked for non-manager users)
24
+ category: dimension
25
+ depends_on:
26
+ - build_example.description
27
+
28
+ - name: day
29
+ type: string
30
+ condition: ['group_by == "Day"']
31
+ description: The day for which the amount is aggregated by, in descending order
32
+ category: dimension
33
+ depends_on:
34
+ - build_example.date
35
+
36
+ - name: month
37
+ type: string
38
+ condition: ['group_by == "Month"']
39
+ description: The month for which the amount is aggregated by, in descending order
40
+ category: dimension
41
+ depends_on:
42
+ - build_example.month
43
+
44
+ - name: category
45
+ type: string
46
+ condition: ['group_by == "Transaction"', 'group_by == "Category"', 'group_by == "Subcategory"']
47
+ description: The category for which the amount is aggregated by
48
+ category: dimension
49
+ depends_on:
50
+ - build_example.category
51
+
52
+ - name: subcategory
53
+ type: string
54
+ condition: ['group_by == "Transaction"', 'group_by == "Subcategory"']
55
+ description: The subcategory for which the amount is aggregated by
56
+ category: dimension
57
+ depends_on:
58
+ - build_example.subcategory
59
+
60
+ - name: total_amount
61
+ type: float
62
+ description: The total amount spent by the group by dimension
63
+ category: measure
64
+ depends_on:
65
+ - build_example.total_amount
@@ -0,0 +1,38 @@
1
+ sources:
2
+ - name: src_transactions
3
+ description: "The source table for transactions" # optional
4
+ connection: default # optional - if not provided, will use the connection named "default" or the default connection specified in settings
5
+ table: expenses # optional - if not provided, will use the "name" field of the source
6
+ load_to_vdl: true # optional - default is false - other than dbview models (with translate_to_duckdb set to false), other models can only reference this source if load_to_vdl is true or connection type is duckdb
7
+
8
+ update_hints:
9
+ increasing_column: date # optional - if not provided, will always do full refresh, otherwise uses this column for incremental loads
10
+ strictly_increasing: false # optional - default is true - if false, then maximum value of column is removed before incremental load is performed
11
+
12
+ primary_key: [id] # optional - if not provided, then this is an insert-only table for incremental loads
13
+
14
+ columns: # optional - if load_to_vdl is true, then only the columns listed here are loaded to the Virtual Data Lake (VDL)
15
+ - name: id
16
+ type: string
17
+ description: The unique identifier for the transaction
18
+ category: dimension
19
+
20
+ - name: date
21
+ type: date
22
+ description: The date of the transaction
23
+ category: dimension
24
+
25
+ - name: subcategory_id
26
+ type: string
27
+ description: The ID of the subcategory of the transaction
28
+ category: dimension
29
+
30
+ - name: amount
31
+ type: float
32
+ description: The amount of the transaction
33
+ category: measure
34
+
35
+ - name: description
36
+ type: string
37
+ description: The description of the transaction
38
+ category: dimension