squirrels 0.1.0__py3-none-any.whl → 0.6.0.post0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. dateutils/__init__.py +6 -0
  2. dateutils/_enums.py +25 -0
  3. squirrels/dateutils.py → dateutils/_implementation.py +409 -380
  4. dateutils/types.py +6 -0
  5. squirrels/__init__.py +21 -18
  6. squirrels/_api_routes/__init__.py +5 -0
  7. squirrels/_api_routes/auth.py +337 -0
  8. squirrels/_api_routes/base.py +196 -0
  9. squirrels/_api_routes/dashboards.py +156 -0
  10. squirrels/_api_routes/data_management.py +148 -0
  11. squirrels/_api_routes/datasets.py +220 -0
  12. squirrels/_api_routes/project.py +289 -0
  13. squirrels/_api_server.py +552 -134
  14. squirrels/_arguments/__init__.py +0 -0
  15. squirrels/_arguments/init_time_args.py +83 -0
  16. squirrels/_arguments/run_time_args.py +111 -0
  17. squirrels/_auth.py +777 -0
  18. squirrels/_command_line.py +239 -107
  19. squirrels/_compile_prompts.py +147 -0
  20. squirrels/_connection_set.py +94 -0
  21. squirrels/_constants.py +141 -64
  22. squirrels/_dashboards.py +179 -0
  23. squirrels/_data_sources.py +570 -0
  24. squirrels/_dataset_types.py +91 -0
  25. squirrels/_env_vars.py +209 -0
  26. squirrels/_exceptions.py +29 -0
  27. squirrels/_http_error_responses.py +52 -0
  28. squirrels/_initializer.py +319 -110
  29. squirrels/_logging.py +121 -0
  30. squirrels/_manifest.py +357 -187
  31. squirrels/_mcp_server.py +578 -0
  32. squirrels/_model_builder.py +69 -0
  33. squirrels/_model_configs.py +74 -0
  34. squirrels/_model_queries.py +52 -0
  35. squirrels/_models.py +1201 -0
  36. squirrels/_package_data/base_project/.env +7 -0
  37. squirrels/_package_data/base_project/.env.example +44 -0
  38. squirrels/_package_data/base_project/connections.yml +16 -0
  39. squirrels/_package_data/base_project/dashboards/dashboard_example.py +40 -0
  40. squirrels/_package_data/base_project/dashboards/dashboard_example.yml +22 -0
  41. squirrels/_package_data/base_project/docker/.dockerignore +16 -0
  42. squirrels/_package_data/base_project/docker/Dockerfile +16 -0
  43. squirrels/_package_data/base_project/docker/compose.yml +7 -0
  44. squirrels/_package_data/base_project/duckdb_init.sql +10 -0
  45. squirrels/_package_data/base_project/gitignore +13 -0
  46. squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
  47. squirrels/_package_data/base_project/models/builds/build_example.py +26 -0
  48. squirrels/_package_data/base_project/models/builds/build_example.sql +16 -0
  49. squirrels/_package_data/base_project/models/builds/build_example.yml +57 -0
  50. squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +17 -0
  51. squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +32 -0
  52. squirrels/_package_data/base_project/models/federates/federate_example.py +51 -0
  53. squirrels/_package_data/base_project/models/federates/federate_example.sql +21 -0
  54. squirrels/_package_data/base_project/models/federates/federate_example.yml +65 -0
  55. squirrels/_package_data/base_project/models/sources.yml +38 -0
  56. squirrels/_package_data/base_project/parameters.yml +142 -0
  57. squirrels/_package_data/base_project/pyconfigs/connections.py +19 -0
  58. squirrels/_package_data/base_project/pyconfigs/context.py +96 -0
  59. squirrels/_package_data/base_project/pyconfigs/parameters.py +141 -0
  60. squirrels/_package_data/base_project/pyconfigs/user.py +56 -0
  61. squirrels/_package_data/base_project/resources/expenses.db +0 -0
  62. squirrels/_package_data/base_project/resources/public/.gitkeep +0 -0
  63. squirrels/_package_data/base_project/resources/weather.db +0 -0
  64. squirrels/_package_data/base_project/seeds/seed_categories.csv +6 -0
  65. squirrels/_package_data/base_project/seeds/seed_categories.yml +15 -0
  66. squirrels/_package_data/base_project/seeds/seed_subcategories.csv +15 -0
  67. squirrels/_package_data/base_project/seeds/seed_subcategories.yml +21 -0
  68. squirrels/_package_data/base_project/squirrels.yml.j2 +61 -0
  69. squirrels/_package_data/base_project/tmp/.gitignore +2 -0
  70. squirrels/_package_data/templates/login_successful.html +53 -0
  71. squirrels/_package_data/templates/squirrels_studio.html +22 -0
  72. squirrels/_package_loader.py +29 -0
  73. squirrels/_parameter_configs.py +592 -0
  74. squirrels/_parameter_options.py +348 -0
  75. squirrels/_parameter_sets.py +207 -0
  76. squirrels/_parameters.py +1703 -0
  77. squirrels/_project.py +796 -0
  78. squirrels/_py_module.py +122 -0
  79. squirrels/_request_context.py +33 -0
  80. squirrels/_schemas/__init__.py +0 -0
  81. squirrels/_schemas/auth_models.py +83 -0
  82. squirrels/_schemas/query_param_models.py +70 -0
  83. squirrels/_schemas/request_models.py +26 -0
  84. squirrels/_schemas/response_models.py +286 -0
  85. squirrels/_seeds.py +97 -0
  86. squirrels/_sources.py +112 -0
  87. squirrels/_utils.py +540 -149
  88. squirrels/_version.py +1 -3
  89. squirrels/arguments.py +7 -0
  90. squirrels/auth.py +4 -0
  91. squirrels/connections.py +3 -0
  92. squirrels/dashboards.py +3 -0
  93. squirrels/data_sources.py +14 -282
  94. squirrels/parameter_options.py +13 -189
  95. squirrels/parameters.py +14 -801
  96. squirrels/types.py +18 -0
  97. squirrels-0.6.0.post0.dist-info/METADATA +148 -0
  98. squirrels-0.6.0.post0.dist-info/RECORD +101 -0
  99. {squirrels-0.1.0.dist-info → squirrels-0.6.0.post0.dist-info}/WHEEL +1 -2
  100. {squirrels-0.1.0.dist-info → squirrels-0.6.0.post0.dist-info}/entry_points.txt +1 -0
  101. squirrels-0.6.0.post0.dist-info/licenses/LICENSE +201 -0
  102. squirrels/_credentials_manager.py +0 -87
  103. squirrels/_module_loader.py +0 -37
  104. squirrels/_parameter_set.py +0 -151
  105. squirrels/_renderer.py +0 -286
  106. squirrels/_timed_imports.py +0 -37
  107. squirrels/connection_set.py +0 -126
  108. squirrels/package_data/base_project/.gitignore +0 -4
  109. squirrels/package_data/base_project/connections.py +0 -21
  110. squirrels/package_data/base_project/database/sample_database.db +0 -0
  111. squirrels/package_data/base_project/database/seattle_weather.db +0 -0
  112. squirrels/package_data/base_project/datasets/sample_dataset/context.py +0 -8
  113. squirrels/package_data/base_project/datasets/sample_dataset/database_view1.py +0 -23
  114. squirrels/package_data/base_project/datasets/sample_dataset/database_view1.sql.j2 +0 -7
  115. squirrels/package_data/base_project/datasets/sample_dataset/final_view.py +0 -10
  116. squirrels/package_data/base_project/datasets/sample_dataset/final_view.sql.j2 +0 -2
  117. squirrels/package_data/base_project/datasets/sample_dataset/parameters.py +0 -30
  118. squirrels/package_data/base_project/datasets/sample_dataset/selections.cfg +0 -6
  119. squirrels/package_data/base_project/squirrels.yaml +0 -26
  120. squirrels/package_data/static/favicon.ico +0 -0
  121. squirrels/package_data/static/script.js +0 -234
  122. squirrels/package_data/static/style.css +0 -110
  123. squirrels/package_data/templates/index.html +0 -32
  124. squirrels-0.1.0.dist-info/LICENSE +0 -22
  125. squirrels-0.1.0.dist-info/METADATA +0 -67
  126. squirrels-0.1.0.dist-info/RECORD +0 -40
  127. squirrels-0.1.0.dist-info/top_level.txt +0 -1
squirrels/_seeds.py ADDED
@@ -0,0 +1,97 @@
1
+ from dataclasses import dataclass
2
+ import os
3
+ import re
4
+ import time
5
+ import glob
6
+ import json
7
+
8
+ import polars as pl
9
+
10
+ from ._exceptions import ConfigurationError
11
+ from . import _utils as u, _constants as c, _model_configs as mc
12
+ from ._env_vars import SquirrelsEnvVars
13
+
14
+
15
+ @dataclass
16
+ class Seed:
17
+ config: mc.SeedConfig
18
+ df: pl.LazyFrame
19
+
20
+ def __post_init__(self):
21
+ if self.config.cast_column_types:
22
+ exprs = []
23
+ for col_config in self.config.columns:
24
+ col_type = col_config.type.lower()
25
+ if col_type.startswith("decimal"):
26
+ polars_dtype = self._parse_decimal_type(col_type)
27
+ else:
28
+ try:
29
+ polars_dtype = u.sqrl_dtypes_to_polars_dtypes[col_type]
30
+ except KeyError as e:
31
+ raise ConfigurationError(f"Unknown column type: '{col_type}'") from e
32
+
33
+ exprs.append(pl.col(col_config.name).cast(polars_dtype))
34
+
35
+ self.df = self.df.with_columns(*exprs)
36
+
37
+ @staticmethod
38
+ def _parse_decimal_type(col_type: str) -> pl.Decimal:
39
+ """Parse a decimal type string and return the appropriate polars Decimal type.
40
+
41
+ Supports formats: "decimal" or "decimal(precision, scale)"
42
+ """
43
+
44
+ # Match decimal(precision, scale) pattern
45
+ match = re.match(r"decimal\s*\(\s*(\d+)\s*,\s*(\d+)\s*\)", col_type)
46
+ if match:
47
+ precision = int(match.group(1))
48
+ scale = int(match.group(2))
49
+ return pl.Decimal(precision=precision, scale=scale)
50
+
51
+ if col_type == "decimal":
52
+ return pl.Decimal(precision=18, scale=2)
53
+
54
+ raise ConfigurationError(f"Unknown column type: '{col_type}'")
55
+
56
+
57
+ @dataclass
58
+ class Seeds:
59
+ _data: dict[str, Seed]
60
+
61
+ def run_query(self, sql_query: str) -> pl.DataFrame:
62
+ dataframes = {key: seed.df for key, seed in self._data.items()}
63
+ return u.run_sql_on_dataframes(sql_query, dataframes)
64
+
65
+ def get_dataframes(self) -> dict[str, Seed]:
66
+ return self._data.copy()
67
+
68
+
69
+ class SeedsIO:
70
+
71
+ @classmethod
72
+ def load_files(cls, logger: u.Logger, env_vars: SquirrelsEnvVars) -> Seeds:
73
+ start = time.time()
74
+ project_path = env_vars.project_path
75
+ infer_schema_setting: bool = env_vars.seeds_infer_schema
76
+ na_values_setting: list[str] = env_vars.seeds_na_values
77
+
78
+ seeds_dict = {}
79
+ csv_files = glob.glob(os.path.join(project_path, c.SEEDS_FOLDER, '**/*.csv'), recursive=True)
80
+ for csv_file in csv_files:
81
+ config_file = os.path.splitext(csv_file)[0] + '.yml'
82
+ config_dict = u.load_yaml_config(config_file) if os.path.exists(config_file) else {}
83
+ config = mc.SeedConfig(**config_dict)
84
+
85
+ file_stem = os.path.splitext(os.path.basename(csv_file))[0]
86
+ infer_schema = not config.cast_column_types and infer_schema_setting
87
+ df = pl.read_csv(
88
+ csv_file, try_parse_dates=True,
89
+ infer_schema=infer_schema,
90
+ null_values=na_values_setting
91
+ ).lazy()
92
+
93
+ seeds_dict[file_stem] = Seed(config, df)
94
+
95
+ seeds = Seeds(seeds_dict)
96
+ logger.log_activity_time("loading seed files", start)
97
+ return seeds
squirrels/_sources.py ADDED
@@ -0,0 +1,112 @@
1
+ from typing import Any
2
+ from pydantic import BaseModel, Field, model_validator
3
+ import time, yaml
4
+
5
+ from . import _utils as u, _constants as c, _model_configs as mc
6
+ from ._env_vars import SquirrelsEnvVars
7
+
8
+
9
+ class UpdateHints(BaseModel):
10
+ increasing_column: str | None = Field(default=None)
11
+ strictly_increasing: bool = Field(default=True, description="Delete the max value of the increasing column, ignored if selective_overwrite_value is set")
12
+ selective_overwrite_value: Any = Field(default=None, description="Delete all values of the increasing column greater than or equal to this value")
13
+
14
+
15
+ class Source(mc.ConnectionInterface, mc.ModelConfig):
16
+ table: str | None = Field(default=None)
17
+ load_to_vdl: bool = Field(default=False, description="Whether to load the data to the 'virtual data lake' (VDL)")
18
+ primary_key: list[str] = Field(default_factory=list)
19
+ update_hints: UpdateHints = Field(default_factory=UpdateHints)
20
+
21
+ def finalize_table(self, source_name: str):
22
+ if self.table is None:
23
+ self.table = source_name
24
+ return self
25
+
26
+ def get_table(self) -> str:
27
+ assert self.table is not None, "Table must be set"
28
+ return self.table
29
+
30
+ def get_cols_for_create_table_stmt(self) -> str:
31
+ cols_clause = ", ".join([f"{col.name} {col.type}" for col in self.columns])
32
+ return cols_clause
33
+
34
+ def get_max_incr_col_query(self, source_name: str) -> str:
35
+ return f"SELECT max({self.update_hints.increasing_column}) FROM {source_name}"
36
+
37
+ def get_query_for_upsert(self, dialect: str, conn_name: str, table_name: str, max_value_of_increasing_col: Any | None, *, full_refresh: bool = True) -> str:
38
+ select_cols = ", ".join([col.name for col in self.columns])
39
+ if full_refresh or max_value_of_increasing_col is None:
40
+ return f"SELECT {select_cols} FROM db_{conn_name}.{table_name}"
41
+
42
+ increasing_col = self.update_hints.increasing_column
43
+ increasing_col_type = next(col.type for col in self.columns if col.name == increasing_col)
44
+ where_cond = f"{increasing_col}::{increasing_col_type} > '{max_value_of_increasing_col}'::{increasing_col_type}"
45
+
46
+ # TODO: figure out if using pushdown query is worth it
47
+ # if dialect in ['postgres', 'mysql']:
48
+ # pushdown_query = f"SELECT {select_cols} FROM {table_name} WHERE {where_cond}"
49
+ # transpiled_query = sqlglot.transpile(pushdown_query, read='duckdb', write=dialect)[0].replace("'", "''")
50
+ # return f"FROM {dialect}_query('db_{conn_name}', '{transpiled_query}')"
51
+
52
+ return f"SELECT {select_cols} FROM db_{conn_name}.{table_name} WHERE {where_cond}"
53
+
54
+
55
+ class Sources(BaseModel):
56
+ sources: dict[str, Source] = Field(default_factory=dict)
57
+
58
+ @model_validator(mode="before")
59
+ @classmethod
60
+ def convert_sources_list_to_dict(cls, data: dict[str, Any]) -> dict[str, Any]:
61
+ if "sources" in data and isinstance(data["sources"], list):
62
+ # Convert list of sources to dictionary
63
+ sources_dict = {}
64
+ for source in data["sources"]:
65
+ if isinstance(source, dict) and "name" in source:
66
+ name = source.pop("name") # Remove name from source config
67
+ if name in sources_dict:
68
+ raise u.ConfigurationError(f"Duplicate source name found: {name}")
69
+ sources_dict[name] = source
70
+ else:
71
+ raise u.ConfigurationError(f"All sources must have a name field in sources file")
72
+ data["sources"] = sources_dict
73
+ return data
74
+
75
+ @model_validator(mode="after")
76
+ def validate_column_types(self):
77
+ for source_name, source in self.sources.items():
78
+ for col in source.columns:
79
+ if not col.type:
80
+ raise u.ConfigurationError(f"Column '{col.name}' in source '{source_name}' must have a type specified")
81
+ return self
82
+
83
+ def finalize_null_fields(self, env_vars: SquirrelsEnvVars):
84
+ default_conn_name = env_vars.connections_default_name_used
85
+ for source_name, source in self.sources.items():
86
+ source.finalize_connection(default_conn_name=default_conn_name)
87
+ source.finalize_table(source_name)
88
+ return self
89
+
90
+
91
+ class SourcesIO:
92
+ @classmethod
93
+ def load_file(cls, logger: u.Logger, env_vars: SquirrelsEnvVars, env_vars_unformatted: dict[str, str]) -> Sources:
94
+ start = time.time()
95
+
96
+ sources_path = u.Path(env_vars.project_path, c.MODELS_FOLDER, c.SOURCES_FILE)
97
+ if sources_path.exists():
98
+ raw_content = u.read_file(sources_path)
99
+ rendered = u.render_string(raw_content, project_path=env_vars.project_path, env_vars=env_vars_unformatted)
100
+ sources_data = yaml.safe_load(rendered) or {}
101
+ else:
102
+ sources_data = {}
103
+
104
+ if not isinstance(sources_data, dict):
105
+ raise u.ConfigurationError(
106
+ f"Parsed content from YAML file must be a dictionary. Got: {sources_data}"
107
+ )
108
+
109
+ sources = Sources(**sources_data).finalize_null_fields(env_vars)
110
+
111
+ logger.log_activity_time("loading sources", start)
112
+ return sources