squirrels 0.5.0b3__py3-none-any.whl → 0.6.0.post0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. squirrels/__init__.py +4 -0
  2. squirrels/_api_routes/__init__.py +5 -0
  3. squirrels/_api_routes/auth.py +337 -0
  4. squirrels/_api_routes/base.py +196 -0
  5. squirrels/_api_routes/dashboards.py +156 -0
  6. squirrels/_api_routes/data_management.py +148 -0
  7. squirrels/_api_routes/datasets.py +220 -0
  8. squirrels/_api_routes/project.py +289 -0
  9. squirrels/_api_server.py +440 -792
  10. squirrels/_arguments/__init__.py +0 -0
  11. squirrels/_arguments/{_init_time_args.py → init_time_args.py} +23 -43
  12. squirrels/_arguments/{_run_time_args.py → run_time_args.py} +32 -68
  13. squirrels/_auth.py +590 -264
  14. squirrels/_command_line.py +130 -58
  15. squirrels/_compile_prompts.py +147 -0
  16. squirrels/_connection_set.py +16 -15
  17. squirrels/_constants.py +36 -11
  18. squirrels/_dashboards.py +179 -0
  19. squirrels/_data_sources.py +40 -34
  20. squirrels/_dataset_types.py +16 -11
  21. squirrels/_env_vars.py +209 -0
  22. squirrels/_exceptions.py +9 -37
  23. squirrels/_http_error_responses.py +52 -0
  24. squirrels/_initializer.py +7 -6
  25. squirrels/_logging.py +121 -0
  26. squirrels/_manifest.py +155 -77
  27. squirrels/_mcp_server.py +578 -0
  28. squirrels/_model_builder.py +11 -55
  29. squirrels/_model_configs.py +5 -5
  30. squirrels/_model_queries.py +1 -1
  31. squirrels/_models.py +276 -143
  32. squirrels/_package_data/base_project/.env +1 -24
  33. squirrels/_package_data/base_project/.env.example +31 -17
  34. squirrels/_package_data/base_project/connections.yml +4 -3
  35. squirrels/_package_data/base_project/dashboards/dashboard_example.py +13 -7
  36. squirrels/_package_data/base_project/dashboards/dashboard_example.yml +6 -6
  37. squirrels/_package_data/base_project/docker/Dockerfile +2 -2
  38. squirrels/_package_data/base_project/docker/compose.yml +1 -1
  39. squirrels/_package_data/base_project/duckdb_init.sql +1 -0
  40. squirrels/_package_data/base_project/models/builds/build_example.py +2 -2
  41. squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +7 -2
  42. squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +16 -10
  43. squirrels/_package_data/base_project/models/federates/federate_example.py +27 -17
  44. squirrels/_package_data/base_project/models/federates/federate_example.sql +3 -7
  45. squirrels/_package_data/base_project/models/federates/federate_example.yml +7 -7
  46. squirrels/_package_data/base_project/models/sources.yml +5 -6
  47. squirrels/_package_data/base_project/parameters.yml +24 -38
  48. squirrels/_package_data/base_project/pyconfigs/connections.py +8 -3
  49. squirrels/_package_data/base_project/pyconfigs/context.py +26 -14
  50. squirrels/_package_data/base_project/pyconfigs/parameters.py +124 -81
  51. squirrels/_package_data/base_project/pyconfigs/user.py +48 -15
  52. squirrels/_package_data/base_project/resources/public/.gitkeep +0 -0
  53. squirrels/_package_data/base_project/seeds/seed_categories.yml +1 -1
  54. squirrels/_package_data/base_project/seeds/seed_subcategories.yml +1 -1
  55. squirrels/_package_data/base_project/squirrels.yml.j2 +21 -31
  56. squirrels/_package_data/templates/login_successful.html +53 -0
  57. squirrels/_package_data/templates/squirrels_studio.html +22 -0
  58. squirrels/_parameter_configs.py +43 -22
  59. squirrels/_parameter_options.py +1 -1
  60. squirrels/_parameter_sets.py +41 -30
  61. squirrels/_parameters.py +560 -123
  62. squirrels/_project.py +487 -277
  63. squirrels/_py_module.py +71 -10
  64. squirrels/_request_context.py +33 -0
  65. squirrels/_schemas/__init__.py +0 -0
  66. squirrels/_schemas/auth_models.py +83 -0
  67. squirrels/_schemas/query_param_models.py +70 -0
  68. squirrels/_schemas/request_models.py +26 -0
  69. squirrels/_schemas/response_models.py +286 -0
  70. squirrels/_seeds.py +52 -13
  71. squirrels/_sources.py +29 -23
  72. squirrels/_utils.py +221 -42
  73. squirrels/_version.py +1 -3
  74. squirrels/arguments.py +7 -2
  75. squirrels/auth.py +4 -0
  76. squirrels/connections.py +2 -0
  77. squirrels/dashboards.py +3 -1
  78. squirrels/data_sources.py +6 -0
  79. squirrels/parameter_options.py +5 -0
  80. squirrels/parameters.py +5 -0
  81. squirrels/types.py +10 -3
  82. squirrels-0.6.0.post0.dist-info/METADATA +148 -0
  83. squirrels-0.6.0.post0.dist-info/RECORD +101 -0
  84. {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/WHEEL +1 -1
  85. squirrels/_api_response_models.py +0 -190
  86. squirrels/_dashboard_types.py +0 -82
  87. squirrels/_dashboards_io.py +0 -79
  88. squirrels-0.5.0b3.dist-info/METADATA +0 -110
  89. squirrels-0.5.0b3.dist-info/RECORD +0 -80
  90. /squirrels/_package_data/base_project/{assets → resources}/expenses.db +0 -0
  91. /squirrels/_package_data/base_project/{assets → resources}/weather.db +0 -0
  92. {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/entry_points.txt +0 -0
  93. {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/licenses/LICENSE +0 -0
@@ -1,31 +1,26 @@
1
1
  from dataclasses import dataclass, field
2
- import asyncio, shutil, duckdb, time
2
+ import duckdb, time
3
3
 
4
4
  from . import _utils as u, _connection_set as cs, _models as m
5
- from ._exceptions import InvalidInputError
6
5
 
7
6
 
8
7
  @dataclass
9
8
  class ModelBuilder:
10
- _duckdb_venv_path: str
9
+ _datalake_db_path: str
11
10
  _conn_set: cs.ConnectionSet
12
11
  _static_models: dict[str, m.StaticModel]
13
- _conn_args: cs.ConnectionsArgs = field(default_factory=lambda: cs.ConnectionsArgs(".", {}, {}))
12
+ _conn_args: cs.ConnectionsArgs
14
13
  _logger: u.Logger = field(default_factory=lambda: u.Logger(""))
15
14
 
16
- def _attach_connections(self, duckdb_conn: duckdb.DuckDBPyConnection) -> dict[str, str]:
17
- dialect_by_conn_name: dict[str, str] = {}
15
+ def _attach_connections(self, duckdb_conn: duckdb.DuckDBPyConnection) -> None:
18
16
  for conn_name, conn_props in self._conn_set.get_connections_as_dict().items():
19
17
  if not isinstance(conn_props, m.ConnectionProperties):
20
18
  continue
21
- dialect = conn_props.dialect
22
19
  attach_uri = conn_props.attach_uri_for_duckdb
23
20
  if attach_uri is None:
24
21
  continue # skip unsupported dialects
25
- attach_stmt = f"ATTACH IF NOT EXISTS '{attach_uri}' AS db_{conn_name} (TYPE {dialect}, READ_ONLY)"
22
+ attach_stmt = f"ATTACH IF NOT EXISTS '{attach_uri}' AS db_{conn_name} (READ_ONLY)"
26
23
  u.run_duckdb_stmt(self._logger, duckdb_conn, attach_stmt, redacted_values=[attach_uri])
27
- dialect_by_conn_name[conn_name] = dialect
28
- return dialect_by_conn_name
29
24
 
30
25
  async def _build_models(self, duckdb_conn: duckdb.DuckDBPyConnection, select: str | None, full_refresh: bool) -> None:
31
26
  """
@@ -50,64 +45,25 @@ class ModelBuilder:
50
45
  coroutines = []
51
46
  for model_name in terminal_nodes:
52
47
  model = self._static_models[model_name]
48
+ # await model.build_model(duckdb_conn, full_refresh)
53
49
  coro = model.build_model(duckdb_conn, full_refresh)
54
50
  coroutines.append(coro)
55
51
  await u.asyncio_gather(coroutines)
56
52
 
57
- async def build(self, full_refresh: bool, select: str | None, stage_file: bool) -> None:
53
+ async def build(self, full_refresh: bool, select: str | None) -> None:
58
54
  start = time.time()
59
55
 
60
- # Create target folder if it doesn't exist
61
- duckdb_path = u.Path(self._duckdb_venv_path)
62
- duckdb_path.parent.mkdir(parents=True, exist_ok=True)
63
-
64
- # Delete any existing DuckDB file if full refresh is requested
65
- duckdb_dev_path = u.Path(self._duckdb_venv_path + ".dev")
66
- duckdb_stg_path = u.Path(self._duckdb_venv_path + ".stg")
67
-
68
- # If the development copy is already in use, a concurrent build is not allowed
69
- duckdb_dev_lock_path = u.Path(self._duckdb_venv_path + ".dev.lock")
70
- if duckdb_dev_lock_path.exists():
71
- raise InvalidInputError(60, "An existing build process is already running and a concurrent build is not allowed")
72
- duckdb_dev_lock_path.touch(exist_ok=False)
73
-
74
- # Ensure the lock file is deleted even if an exception is raised
75
- try:
76
- # If not full refresh, create a development copy of the existing virtual data environment
77
- if not full_refresh:
78
- if duckdb_stg_path.exists():
79
- duckdb_stg_path.replace(duckdb_dev_path)
80
- elif duckdb_path.exists():
81
- shutil.copy(duckdb_path, duckdb_dev_path)
82
- else:
83
- duckdb_dev_path.unlink(missing_ok=True) # delete any lingering development copy to create a fresh one later
84
-
85
- self._logger.log_activity_time("creating development copy of virtual data environment", start)
86
-
87
- # Connect to DuckDB file
88
- duckdb_conn = u.create_duckdb_connection(duckdb_dev_path)
89
-
90
- except Exception:
91
- duckdb_dev_lock_path.unlink()
92
- raise
56
+ # Connect directly to DuckLake instead of attaching (supports concurrent connections)
57
+ duckdb_conn = u.create_duckdb_connection(self._datalake_db_path)
93
58
 
94
- # Sometimes code after conn.close() doesn't run (as if the python process is killed but no error is raised)
95
- # Using a new try block to ensure the lock file is removed before closing the connection
96
59
  try:
97
60
  # Attach connections
98
61
  self._attach_connections(duckdb_conn)
99
62
 
100
63
  # Construct build models
101
64
  await self._build_models(duckdb_conn, select, full_refresh)
102
-
65
+
103
66
  finally:
104
- duckdb_dev_lock_path.unlink()
105
67
  duckdb_conn.close()
106
68
 
107
- # Rename duckdb_dev_path to duckdb_path (or duckdb_stg_path if stage_file is True)
108
- if stage_file:
109
- duckdb_dev_path.replace(duckdb_stg_path)
110
- else:
111
- duckdb_dev_path.replace(duckdb_path)
112
-
113
- self._logger.log_activity_time("TOTAL TIME to build virtual data environment", start)
69
+ self._logger.log_activity_time("TOTAL TIME to build the Virtual Data Lake (VDL)", start)
@@ -1,7 +1,7 @@
1
1
  from enum import Enum
2
2
  from pydantic import BaseModel, Field
3
3
 
4
- from . import _constants as c
4
+ from ._env_vars import SquirrelsEnvVars
5
5
 
6
6
 
7
7
  class ColumnCategory(Enum):
@@ -13,7 +13,7 @@ class ColumnCategory(Enum):
13
13
  class ColumnConfig(BaseModel):
14
14
  name: str = Field(description="The name of the column")
15
15
  type: str = Field(default="", description="The type of the column such as 'string', 'integer', 'float', 'boolean', 'datetime', etc.")
16
- condition: str = Field(default="", description="The condition of when the column is included")
16
+ condition: list[str] = Field(default_factory=list, description="The condition(s) of when the column is included. Only for documentation purposes.")
17
17
  description: str = Field(default="", description="The description of the column")
18
18
  category: ColumnCategory = Field(default=ColumnCategory.MISC, description="The category of the column, either 'dimension', 'measure', or 'misc'")
19
19
  depends_on: set[str] = Field(default_factory=set, description="List of dependent columns")
@@ -32,9 +32,9 @@ class SeedConfig(ModelConfig):
32
32
  class ConnectionInterface(BaseModel):
33
33
  connection: str | None = Field(default=None, description="The connection name of the source model / database view")
34
34
 
35
- def finalize_connection(self, env_vars: dict[str, str]):
35
+ def finalize_connection(self, *, default_conn_name: str = "default"):
36
36
  if self.connection is None:
37
- self.connection = env_vars.get(c.SQRL_CONNECTIONS_DEFAULT_NAME_USED, "default")
37
+ self.connection = default_conn_name
38
38
  return self
39
39
 
40
40
  def get_connection(self) -> str:
@@ -66,7 +66,7 @@ class DbviewModelConfig(ConnectionInterface, QueryModelConfig):
66
66
 
67
67
 
68
68
  class FederateModelConfig(QueryModelConfig):
69
- eager: bool = Field(default=False, description="Whether the model should be materialized for SQL models")
69
+ eager: bool = Field(default=False, description="Whether the model should always be materialized in memory for SQL models")
70
70
 
71
71
  def get_sql_for_create(self, model_name: str, select_query: str) -> str:
72
72
  materialization = "TABLE" if self.eager else "VIEW"
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
3
3
  from typing import Callable, Generic, TypeVar, Any
4
4
  import polars as pl, pandas as pd
5
5
 
6
- from ._arguments._run_time_args import BuildModelArgs
6
+ from ._arguments.run_time_args import BuildModelArgs
7
7
  from ._model_configs import ModelConfig
8
8
 
9
9