squirrels 0.4.1__py3-none-any.whl → 0.5.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of squirrels might be problematic. Click here for more details.

Files changed (80) hide show
  1. squirrels/__init__.py +10 -6
  2. squirrels/_api_response_models.py +93 -44
  3. squirrels/_api_server.py +571 -219
  4. squirrels/_auth.py +451 -0
  5. squirrels/_command_line.py +61 -20
  6. squirrels/_connection_set.py +38 -25
  7. squirrels/_constants.py +44 -34
  8. squirrels/_dashboards_io.py +34 -16
  9. squirrels/_exceptions.py +57 -0
  10. squirrels/_initializer.py +117 -44
  11. squirrels/_manifest.py +124 -62
  12. squirrels/_model_builder.py +111 -0
  13. squirrels/_model_configs.py +74 -0
  14. squirrels/_model_queries.py +52 -0
  15. squirrels/_models.py +860 -354
  16. squirrels/_package_loader.py +8 -4
  17. squirrels/_parameter_configs.py +45 -65
  18. squirrels/_parameter_sets.py +15 -13
  19. squirrels/_project.py +561 -0
  20. squirrels/_py_module.py +4 -3
  21. squirrels/_seeds.py +35 -16
  22. squirrels/_sources.py +106 -0
  23. squirrels/_utils.py +166 -63
  24. squirrels/_version.py +1 -1
  25. squirrels/arguments/init_time_args.py +78 -15
  26. squirrels/arguments/run_time_args.py +62 -101
  27. squirrels/dashboards.py +4 -4
  28. squirrels/data_sources.py +94 -162
  29. squirrels/dataset_result.py +86 -0
  30. squirrels/dateutils.py +4 -4
  31. squirrels/package_data/base_project/.env +30 -0
  32. squirrels/package_data/base_project/.env.example +30 -0
  33. squirrels/package_data/base_project/.gitignore +3 -2
  34. squirrels/package_data/base_project/assets/expenses.db +0 -0
  35. squirrels/package_data/base_project/connections.yml +11 -3
  36. squirrels/package_data/base_project/dashboards/dashboard_example.py +15 -13
  37. squirrels/package_data/base_project/dashboards/dashboard_example.yml +22 -0
  38. squirrels/package_data/base_project/docker/.dockerignore +5 -2
  39. squirrels/package_data/base_project/docker/Dockerfile +3 -3
  40. squirrels/package_data/base_project/docker/compose.yml +1 -1
  41. squirrels/package_data/base_project/duckdb_init.sql +9 -0
  42. squirrels/package_data/base_project/macros/macros_example.sql +15 -0
  43. squirrels/package_data/base_project/models/builds/build_example.py +26 -0
  44. squirrels/package_data/base_project/models/builds/build_example.sql +16 -0
  45. squirrels/package_data/base_project/models/builds/build_example.yml +55 -0
  46. squirrels/package_data/base_project/models/dbviews/dbview_example.sql +12 -22
  47. squirrels/package_data/base_project/models/dbviews/dbview_example.yml +26 -0
  48. squirrels/package_data/base_project/models/federates/federate_example.py +38 -15
  49. squirrels/package_data/base_project/models/federates/federate_example.sql +16 -2
  50. squirrels/package_data/base_project/models/federates/federate_example.yml +65 -0
  51. squirrels/package_data/base_project/models/sources.yml +39 -0
  52. squirrels/package_data/base_project/parameters.yml +36 -21
  53. squirrels/package_data/base_project/pyconfigs/connections.py +6 -11
  54. squirrels/package_data/base_project/pyconfigs/context.py +20 -33
  55. squirrels/package_data/base_project/pyconfigs/parameters.py +19 -21
  56. squirrels/package_data/base_project/pyconfigs/user.py +23 -0
  57. squirrels/package_data/base_project/seeds/seed_categories.yml +15 -0
  58. squirrels/package_data/base_project/seeds/seed_subcategories.csv +15 -15
  59. squirrels/package_data/base_project/seeds/seed_subcategories.yml +21 -0
  60. squirrels/package_data/base_project/squirrels.yml.j2 +17 -40
  61. squirrels/parameters.py +20 -20
  62. {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info}/METADATA +31 -32
  63. squirrels-0.5.0rc0.dist-info/RECORD +70 -0
  64. {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info}/WHEEL +1 -1
  65. squirrels-0.5.0rc0.dist-info/entry_points.txt +3 -0
  66. {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info/licenses}/LICENSE +1 -1
  67. squirrels/_authenticator.py +0 -85
  68. squirrels/_environcfg.py +0 -84
  69. squirrels/package_data/assets/favicon.ico +0 -0
  70. squirrels/package_data/assets/index.css +0 -1
  71. squirrels/package_data/assets/index.js +0 -58
  72. squirrels/package_data/base_project/dashboards.yml +0 -10
  73. squirrels/package_data/base_project/env.yml +0 -29
  74. squirrels/package_data/base_project/models/dbviews/dbview_example.py +0 -47
  75. squirrels/package_data/base_project/pyconfigs/auth.py +0 -45
  76. squirrels/package_data/templates/index.html +0 -18
  77. squirrels/project.py +0 -378
  78. squirrels/user_base.py +0 -55
  79. squirrels-0.4.1.dist-info/RECORD +0 -60
  80. squirrels-0.4.1.dist-info/entry_points.txt +0 -4
@@ -0,0 +1,111 @@
1
+ from dataclasses import dataclass, field
2
+ import asyncio, shutil, duckdb, time
3
+
4
+ from . import _utils as u, _connection_set as cs, _models as m
5
+ from ._exceptions import InvalidInputError
6
+
7
+
8
+ @dataclass
9
+ class ModelBuilder:
10
+ _duckdb_venv_path: str
11
+ _conn_set: cs.ConnectionSet
12
+ _static_models: dict[str, m.StaticModel]
13
+ _conn_args: cs.ConnectionsArgs = field(default_factory=lambda: cs.ConnectionsArgs(".", {}, {}))
14
+ _logger: u.Logger = field(default_factory=lambda: u.Logger(""))
15
+
16
+ def _attach_connections(self, duckdb_conn: duckdb.DuckDBPyConnection) -> dict[str, str]:
17
+ dialect_by_conn_name: dict[str, str] = {}
18
+ for conn_name, conn_props in self._conn_set.get_connections_as_dict().items():
19
+ if not isinstance(conn_props, m.ConnectionProperties):
20
+ continue
21
+ dialect = conn_props.dialect
22
+ attach_uri = conn_props.attach_uri_for_duckdb
23
+ if attach_uri is None:
24
+ continue # skip unsupported dialects
25
+ attach_stmt = f"ATTACH IF NOT EXISTS '{attach_uri}' AS db_{conn_name} (TYPE {dialect}, READ_ONLY)"
26
+ u.run_duckdb_stmt(self._logger, duckdb_conn, attach_stmt, redacted_values=[attach_uri])
27
+ dialect_by_conn_name[conn_name] = dialect
28
+ return dialect_by_conn_name
29
+
30
+ async def _build_models(self, duckdb_conn: duckdb.DuckDBPyConnection, select: str | None, full_refresh: bool) -> None:
31
+ """
32
+ Compile and construct the build models as DuckDB tables.
33
+ """
34
+ # Compile the build models
35
+ models_list = self._static_models.values() if select is None else [self._static_models[select]]
36
+ for model in models_list:
37
+ model.compile_for_build(self._conn_args, self._static_models)
38
+
39
+ # Find all terminal nodes
40
+ terminal_nodes = set()
41
+ if select is None:
42
+ for model in models_list:
43
+ terminal_nodes.update(model.get_terminal_nodes_for_build(set()))
44
+ for model in models_list:
45
+ model.confirmed_no_cycles = False
46
+ else:
47
+ terminal_nodes.add(select)
48
+
49
+ # Run the build models
50
+ coroutines = []
51
+ for model_name in terminal_nodes:
52
+ model = self._static_models[model_name]
53
+ coro = model.build_model(duckdb_conn, full_refresh)
54
+ coroutines.append(coro)
55
+ await u.asyncio_gather(coroutines)
56
+
57
+ async def build(self, full_refresh: bool, select: str | None, stage_file: bool) -> None:
58
+ start = time.time()
59
+
60
+ # Create target folder if it doesn't exist
61
+ duckdb_path = u.Path(self._duckdb_venv_path)
62
+ duckdb_path.parent.mkdir(parents=True, exist_ok=True)
63
+
64
+ # Delete any existing DuckDB file if full refresh is requested
65
+ duckdb_dev_path = u.Path(self._duckdb_venv_path + ".dev")
66
+ duckdb_stg_path = u.Path(self._duckdb_venv_path + ".stg")
67
+
68
+ # If the development copy is already in use, a concurrent build is not allowed
69
+ duckdb_dev_lock_path = u.Path(self._duckdb_venv_path + ".dev.lock")
70
+ if duckdb_dev_lock_path.exists():
71
+ raise InvalidInputError(60, "An existing build process is already running and a concurrent build is not allowed")
72
+ duckdb_dev_lock_path.touch(exist_ok=False)
73
+
74
+ # Ensure the lock file is deleted even if an exception is raised
75
+ try:
76
+ # If not full refresh, create a development copy of the existing virtual data environment
77
+ if not full_refresh:
78
+ if duckdb_stg_path.exists():
79
+ duckdb_stg_path.replace(duckdb_dev_path)
80
+ elif duckdb_path.exists():
81
+ shutil.copy(duckdb_path, duckdb_dev_path)
82
+
83
+ self._logger.log_activity_time("creating development copy of virtual data environment", start)
84
+
85
+ # Connect to DuckDB file
86
+ duckdb_conn = u.create_duckdb_connection(duckdb_dev_path)
87
+
88
+ except Exception:
89
+ duckdb_dev_lock_path.unlink()
90
+ raise
91
+
92
+ # Sometimes code after conn.close() doesn't run (as if the python process is killed but no error is raised)
93
+ # Using a new try block to ensure the lock file is removed before closing the connection
94
+ try:
95
+ # Attach connections
96
+ self._attach_connections(duckdb_conn)
97
+
98
+ # Construct build models
99
+ await self._build_models(duckdb_conn, select, full_refresh)
100
+
101
+ finally:
102
+ duckdb_dev_lock_path.unlink()
103
+ duckdb_conn.close()
104
+
105
+ # Rename duckdb_dev_path to duckdb_path (or duckdb_stg_path if stage_file is True)
106
+ if stage_file:
107
+ duckdb_dev_path.replace(duckdb_stg_path)
108
+ else:
109
+ duckdb_dev_path.replace(duckdb_path)
110
+
111
+ self._logger.log_activity_time("TOTAL TIME to build virtual data environment", start)
@@ -0,0 +1,74 @@
1
+ from enum import Enum
2
+ from pydantic import BaseModel, Field
3
+
4
+ from . import _constants as c
5
+
6
+
7
+ class ColumnCategory(Enum):
8
+ DIMENSION = "dimension"
9
+ MEASURE = "measure"
10
+ MISC = "misc"
11
+
12
+
13
+ class ColumnConfig(BaseModel):
14
+ name: str = Field(description="The name of the column")
15
+ type: str = Field(default="", description="The type of the column such as 'string', 'integer', 'float', 'boolean', 'datetime', etc.")
16
+ condition: str = Field(default="", description="The condition of when the column is included")
17
+ description: str = Field(default="", description="The description of the column")
18
+ category: ColumnCategory = Field(default=ColumnCategory.MISC, description="The category of the column, either 'dimension', 'measure', or 'misc'")
19
+ depends_on: set[str] = Field(default_factory=set, description="List of dependent columns")
20
+ pass_through: bool = Field(default=False, description="Whether the column should be passed through to the federate")
21
+
22
+
23
+ class ModelConfig(BaseModel):
24
+ description: str = Field(default="", description="The description of the model")
25
+ columns: list[ColumnConfig] = Field(default_factory=list, description="The columns of the model")
26
+
27
+
28
+ class SeedConfig(ModelConfig):
29
+ cast_column_types: bool = Field(default=False, description="Whether the column types should be cast to the appropriate type")
30
+
31
+
32
+ class ConnectionInterface(BaseModel):
33
+ connection: str | None = Field(default=None, description="The connection name of the source model / database view")
34
+
35
+ def finalize_connection(self, env_vars: dict[str, str]):
36
+ if self.connection is None:
37
+ self.connection = env_vars.get(c.SQRL_CONNECTIONS_DEFAULT_NAME_USED, "default")
38
+ return self
39
+
40
+ def get_connection(self) -> str:
41
+ assert self.connection is not None, "Connection must be set"
42
+ return self.connection
43
+
44
+
45
+ class QueryModelConfig(ModelConfig):
46
+ depends_on: set[str] = Field(default_factory=set, description="The dependencies of the model")
47
+
48
+
49
+ class BuildModelConfig(QueryModelConfig):
50
+ materialization: str = Field(default="TABLE", description="The materialization of the model (ignored if Python model which is always a table)")
51
+
52
+ def get_sql_for_build(self, model_name: str, select_query: str) -> str:
53
+ if self.materialization.upper() == "TABLE":
54
+ materialization = "TABLE"
55
+ elif self.materialization.upper() == "VIEW":
56
+ materialization = "VIEW"
57
+ else:
58
+ raise ValueError(f"Invalid materialization: {self.materialization}")
59
+
60
+ create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n"
61
+ return create_prefix + select_query
62
+
63
+
64
+ class DbviewModelConfig(ConnectionInterface, QueryModelConfig):
65
+ translate_to_duckdb: bool = Field(default=False, description="Whether to translate the query to DuckDB and use DuckDB tables at runtime")
66
+
67
+
68
+ class FederateModelConfig(QueryModelConfig):
69
+ eager: bool = Field(default=False, description="Whether the model should be materialized for SQL models")
70
+
71
+ def get_sql_for_create(self, model_name: str, select_query: str) -> str:
72
+ materialization = "TABLE" if self.eager else "VIEW"
73
+ create_prefix = f"CREATE {materialization} {model_name} AS\n"
74
+ return create_prefix + select_query
@@ -0,0 +1,52 @@
1
+ from abc import ABCMeta
2
+ from dataclasses import dataclass, field
3
+ from typing import Callable, Generic, TypeVar, Any
4
+ import polars as pl, pandas as pd
5
+
6
+ from .arguments.run_time_args import BuildModelArgs
7
+ from ._model_configs import ModelConfig
8
+
9
+
10
+ # Input query file classes
11
+
12
+ @dataclass(frozen=True)
13
+ class QueryFile(metaclass=ABCMeta):
14
+ filepath: str
15
+ raw_query: Any
16
+
17
+ @dataclass(frozen=True)
18
+ class SqlQueryFile(QueryFile):
19
+ raw_query: str
20
+
21
+ @dataclass(frozen=True)
22
+ class PyQueryFile(QueryFile):
23
+ raw_query: Callable[[BuildModelArgs], pl.LazyFrame | pd.DataFrame]
24
+
25
+
26
+ Q = TypeVar('Q', bound=QueryFile)
27
+ M = TypeVar('M', bound=ModelConfig)
28
+
29
+ @dataclass(frozen=True)
30
+ class QueryFileWithConfig(Generic[Q, M]):
31
+ query_file: Q
32
+ config: M
33
+
34
+
35
+ # Compiled query classes
36
+
37
+ @dataclass
38
+ class Query(metaclass=ABCMeta):
39
+ query: Any
40
+
41
+ @dataclass
42
+ class WorkInProgress(Query):
43
+ query: None = field(default=None, init=False)
44
+
45
+ @dataclass
46
+ class SqlModelQuery(Query):
47
+ query: str
48
+ is_duckdb: bool
49
+
50
+ @dataclass
51
+ class PyModelQuery(Query):
52
+ query: Callable[[], pl.LazyFrame | pd.DataFrame]