sunstone-py 0.4.2__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/PKG-INFO +5 -4
  2. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/README.md +3 -3
  3. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/pyproject.toml +7 -1
  4. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone/_release.py +3 -20
  5. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone/dataframe.py +39 -7
  6. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone/datasets.py +98 -4
  7. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone_py.egg-info/PKG-INFO +5 -4
  8. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone_py.egg-info/requires.txt +1 -0
  9. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/tests/test_dataframe.py +17 -16
  10. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/tests/test_datasets.py +42 -41
  11. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/tests/test_lineage_persistence.py +6 -4
  12. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/tests/test_pandas_compatibility.py +55 -54
  13. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/LICENSE +0 -0
  14. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/setup.cfg +0 -0
  15. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone/__init__.py +0 -0
  16. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone/exceptions.py +0 -0
  17. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone/lineage.py +0 -0
  18. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone/pandas.py +0 -0
  19. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone/py.typed +0 -0
  20. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone/validation.py +0 -0
  21. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone_py.egg-info/SOURCES.txt +0 -0
  22. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone_py.egg-info/dependency_links.txt +0 -0
  23. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone_py.egg-info/entry_points.txt +0 -0
  24. {sunstone_py-0.4.2 → sunstone_py-0.5.1}/src/sunstone_py.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunstone-py
3
- Version: 0.4.2
3
+ Version: 0.5.1
4
4
  Summary: Python library for managing datasets with lineage tracking in Sunstone projects
5
5
  Author-email: Sunstone Institute <stig@sunstone.institute>
6
6
  License: MIT
@@ -24,6 +24,7 @@ Requires-Dist: google-auth>=2.43.0
24
24
  Requires-Dist: pandas>=2.0.0
25
25
  Requires-Dist: pyyaml>=6.0
26
26
  Requires-Dist: requests>=2.31.0
27
+ Requires-Dist: ruamel-yaml>=0.18
27
28
  Dynamic: license-file
28
29
 
29
30
  # sunstone-py
@@ -324,14 +325,14 @@ uv run pytest
324
325
  ### Type Checking
325
326
 
326
327
  ```bash
327
- uv run mypy src/sunstone
328
+ uv run mypy
328
329
  ```
329
330
 
330
331
  ### Linting and Formatting
331
332
 
332
333
  ```bash
333
- uv run ruff check src/sunstone
334
- uv run ruff format src/sunstone
334
+ uv run ruff check
335
+ uv run ruff format
335
336
  ```
336
337
 
337
338
  ## About Sunstone Institute
@@ -296,14 +296,14 @@ uv run pytest
296
296
  ### Type Checking
297
297
 
298
298
  ```bash
299
- uv run mypy src/sunstone
299
+ uv run mypy
300
300
  ```
301
301
 
302
302
  ### Linting and Formatting
303
303
 
304
304
  ```bash
305
- uv run ruff check src/sunstone
306
- uv run ruff format src/sunstone
305
+ uv run ruff check
306
+ uv run ruff format
307
307
  ```
308
308
 
309
309
  ## About Sunstone Institute
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "sunstone-py"
7
- version = "0.4.2"
7
+ version = "0.5.1"
8
8
  description = "Python library for managing datasets with lineage tracking in Sunstone projects"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -30,6 +30,7 @@ dependencies = [
30
30
  "pandas>=2.0.0",
31
31
  "pyyaml>=6.0",
32
32
  "requests>=2.31.0",
33
+ "ruamel-yaml>=0.18",
33
34
  ]
34
35
 
35
36
  [project.urls]
@@ -59,6 +60,10 @@ python_version = "3.10"
59
60
  warn_return_any = true
60
61
  warn_unused_configs = true
61
62
  disallow_untyped_defs = true
63
+ check_untyped_defs = false
64
+ exclude = [
65
+ "^tests/testdata/",
66
+ ]
62
67
 
63
68
  [dependency-groups]
64
69
  dev = [
@@ -69,6 +74,7 @@ dev = [
69
74
  "types-requests>=2.32.4.20250913",
70
75
  "pandas-stubs>=2.3.2.250926",
71
76
  "types-pyyaml>=6.0.12.20250915",
77
+ "markdown>=3.10",
72
78
  ]
73
79
  docs = [
74
80
  "mkdocs-material>=9.5.0",
@@ -164,29 +164,12 @@ def get_last_tag() -> str | None:
164
164
  def generate_changelog_from_git() -> str:
165
165
  """Generate changelog entries from git commits since last tag using Claude."""
166
166
  last_tag = get_last_tag()
167
- if last_tag:
168
- commit_range = f"{last_tag}..HEAD"
169
- else:
170
- commit_range = "HEAD"
171
-
172
- # Get commits since last tag
173
- result = run_git("log", commit_range, "--pretty=format:%s")
174
- if result.returncode != 0 or not result.stdout.strip():
175
- return ""
176
-
177
- commits = result.stdout.strip()
178
-
179
- prompt = f"""Convert these git commit messages into Keep a Changelog format entries.
180
- Categorize under: Added, Changed, Fixed, Removed, Security (only include categories that apply).
181
- Be concise. Skip merge commits, version bump commits, and release commits.
182
- Output ONLY the markdown entries with ### headers for categories, nothing else.
183
-
184
- Commits:
185
- {commits}"""
167
+ if last_tag is None:
168
+ last_tag = "HEAD~1"
186
169
 
187
170
  print("Generating changelog entries with Claude...")
188
171
  claude_result = subprocess.run(
189
- ["claude", "-p", "--model=haiku", prompt],
172
+ ["claude", "-p", f"/generate-changelog {last_tag}"],
190
173
  capture_output=True,
191
174
  text=True,
192
175
  cwd=get_root_dir(),
@@ -323,7 +323,7 @@ class DataFrame:
323
323
  path_or_buf: File path for the output CSV.
324
324
  slug: Dataset slug (required in relaxed mode if not registered).
325
325
  name: Dataset name (required in relaxed mode if not registered).
326
- publish: Whether to publish the dataset.
326
+ publish: bool = False,
327
327
  **kwargs: Additional arguments passed to pandas.to_csv.
328
328
 
329
329
  Raises:
@@ -366,6 +366,9 @@ class DataFrame:
366
366
  # Record the operation
367
367
  self.lineage.add_operation(f"to_csv({dataset.slug})")
368
368
 
369
+ # Persist lineage metadata to datasets.yaml
370
+ manager.update_output_lineage(slug=dataset.slug, lineage=self.lineage, strict=self.strict_mode)
371
+
369
372
  def _infer_field_schema(self) -> List[FieldSchema]:
370
373
  """
371
374
  Infer field schema from the DataFrame.
@@ -510,13 +513,13 @@ class DataFrame:
510
513
  project_path=self.lineage.project_path,
511
514
  )
512
515
 
513
- def _wrap_result(self, result: Any, operation: str = "pandas_operation") -> Any:
516
+ def _wrap_result(self, result: Any, operation: Optional[str] = None) -> Any:
514
517
  """
515
518
  Wrap a pandas result in a Sunstone DataFrame if applicable.
516
519
 
517
520
  Args:
518
521
  result: The result from a pandas operation.
519
- operation: Name of the operation performed.
522
+ operation: Name of the operation performed. If None, no operation is recorded.
520
523
 
521
524
  Returns:
522
525
  Wrapped DataFrame if result is a DataFrame, otherwise the result.
@@ -527,7 +530,8 @@ class DataFrame:
527
530
  operations=self.lineage.operations.copy(),
528
531
  project_path=self.lineage.project_path,
529
532
  )
530
- new_lineage.add_operation(operation)
533
+ if operation is not None:
534
+ new_lineage.add_operation(operation)
531
535
 
532
536
  return DataFrame(
533
537
  data=result,
@@ -537,6 +541,28 @@ class DataFrame:
537
541
  )
538
542
  return result
539
543
 
544
+ # Methods that don't represent meaningful data transformations
545
+ # These return DataFrames but shouldn't be tracked in lineage
546
+ _NON_TRACKING_METHODS = frozenset(
547
+ {
548
+ # Copy operations - same data, no transformation
549
+ "copy",
550
+ # Index operations - same data, different index
551
+ "reset_index",
552
+ "set_index",
553
+ "reindex",
554
+ # Type conversions without data change
555
+ "astype",
556
+ "infer_objects",
557
+ # Column/index renaming - same data, different labels
558
+ "rename",
559
+ "rename_axis",
560
+ # Reshaping without data loss
561
+ "T",
562
+ "transpose",
563
+ }
564
+ )
565
+
540
566
  def __getattr__(self, name: str) -> Any:
541
567
  """
542
568
  Delegate attribute access to the underlying pandas DataFrame.
@@ -557,11 +583,14 @@ class DataFrame:
557
583
 
558
584
  def wrapper(*args: Any, **kwargs: Any) -> Any:
559
585
  result = attr(*args, **kwargs)
586
+ # Don't track non-transforming methods
587
+ if name in DataFrame._NON_TRACKING_METHODS:
588
+ return self._wrap_result(result, operation=None)
560
589
  return self._wrap_result(result, operation=f"{name}")
561
590
 
562
591
  return wrapper
563
592
 
564
- return self._wrap_result(attr, operation=f"access_attribute_{name}")
593
+ return self._wrap_result(attr, operation=None) # Don't track attribute access
565
594
 
566
595
  def __getitem__(self, key: Any) -> Any:
567
596
  """
@@ -574,7 +603,9 @@ class DataFrame:
574
603
  The item from the underlying DataFrame, wrapped if it's a DataFrame.
575
604
  """
576
605
  result = self.data[key]
577
- return self._wrap_result(result, operation="__getitem__")
606
+ # Don't track __getitem__ as an operation - it's just column/row access
607
+ # not a meaningful transformation
608
+ return self._wrap_result(result, operation=None)
578
609
 
579
610
  def __setitem__(self, key: Any, value: Any) -> None:
580
611
  """
@@ -585,7 +616,8 @@ class DataFrame:
585
616
  value: Value to assign.
586
617
  """
587
618
  self.data[key] = value
588
- self.lineage.add_operation("__setitem__")
619
+ # Track column assignment in lineage
620
+ self.lineage.add_operation(f"__setitem__({key!r})")
589
621
 
590
622
  def __repr__(self) -> str:
591
623
  """String representation of the DataFrame."""
@@ -4,19 +4,27 @@ Parser and manager for datasets.yaml files.
4
4
 
5
5
  import ipaddress
6
6
  import logging
7
+ import os
7
8
  import socket
9
+ import tempfile
8
10
  from pathlib import Path
9
11
  from typing import Any, Dict, List, Optional, Union
10
12
  from urllib.parse import urljoin, urlparse
11
13
 
12
14
  import requests
13
- import yaml
15
+ from ruamel.yaml import YAML
14
16
 
15
17
  from .exceptions import DatasetNotFoundError, DatasetValidationError
16
- from .lineage import DatasetMetadata, FieldSchema, Source, SourceLocation
18
+ from .lineage import DatasetMetadata, FieldSchema, LineageMetadata, Source, SourceLocation
17
19
 
18
20
  logger = logging.getLogger(__name__)
19
21
 
22
+ # Configure ruamel.yaml for round-trip parsing (preserves comments) with proper indentation
23
+ _yaml = YAML()
24
+ _yaml.preserve_quotes = True
25
+ _yaml.default_flow_style = False
26
+ _yaml.indent(mapping=2, sequence=4, offset=2)
27
+
20
28
 
21
29
  def _is_public_url(url: str) -> bool:
22
30
  """
@@ -109,7 +117,7 @@ class DatasetsManager:
109
117
  def _load(self) -> None:
110
118
  """Load and parse the datasets.yaml file."""
111
119
  with open(self.datasets_file, "r") as f:
112
- self._data = yaml.safe_load(f) or {}
120
+ self._data = _yaml.load(f) or {}
113
121
 
114
122
  if "inputs" not in self._data:
115
123
  self._data["inputs"] = []
@@ -119,7 +127,7 @@ class DatasetsManager:
119
127
  def _save(self) -> None:
120
128
  """Save the current data back to datasets.yaml."""
121
129
  with open(self.datasets_file, "w") as f:
122
- yaml.dump(self._data, f, default_flow_style=False, sort_keys=False)
130
+ _yaml.dump(self._data, f)
123
131
 
124
132
  def _parse_source_location(self, loc_data: Dict[str, Any]) -> SourceLocation:
125
133
  """Parse source location data from YAML."""
@@ -372,6 +380,92 @@ class DatasetsManager:
372
380
 
373
381
  raise DatasetNotFoundError(f"Output dataset with slug '{slug}' not found")
374
382
 
383
+ def update_output_lineage(self, slug: str, lineage: LineageMetadata, strict: bool = False) -> None:
384
+ """
385
+ Update lineage metadata for an output dataset.
386
+
387
+ In strict mode, validates that the lineage matches what would be written
388
+ without modifying the file. In relaxed mode, updates the file with lineage.
389
+
390
+ Args:
391
+ slug: The slug of the output dataset to update.
392
+ lineage: The lineage metadata to persist.
393
+ strict: If True, validate without modifying. If False, update the file.
394
+
395
+ Raises:
396
+ DatasetNotFoundError: If the dataset doesn't exist.
397
+ DatasetValidationError: In strict mode, if lineage differs from what's in the file.
398
+ """
399
+ # Find the output dataset
400
+ dataset_idx = None
401
+ for i, dataset_data in enumerate(self._data["outputs"]):
402
+ if dataset_data["slug"] == slug:
403
+ dataset_idx = i
404
+ break
405
+
406
+ if dataset_idx is None:
407
+ raise DatasetNotFoundError(f"Output dataset with slug '{slug}' not found")
408
+
409
+ # Build lineage metadata to add
410
+ lineage_data: dict[str, Any] = {}
411
+
412
+ if lineage.sources:
413
+ lineage_data["sources"] = [
414
+ {
415
+ "slug": src.slug,
416
+ "name": src.name,
417
+ }
418
+ for src in lineage.sources
419
+ ]
420
+
421
+ if lineage.operations:
422
+ lineage_data["operations"] = lineage.operations.copy()
423
+
424
+ if lineage.created_at:
425
+ lineage_data["created_at"] = lineage.created_at.isoformat()
426
+
427
+ # Create a copy of the data with updated lineage
428
+ updated_data = self._data.copy()
429
+ updated_data["outputs"] = [dict(d) for d in self._data["outputs"]]
430
+ updated_data["outputs"][dataset_idx] = dict(self._data["outputs"][dataset_idx])
431
+
432
+ # Add or update lineage in the copy
433
+ if lineage_data:
434
+ updated_data["outputs"][dataset_idx]["lineage"] = lineage_data
435
+
436
+ # Write to temp file
437
+ temp_fd, temp_path = tempfile.mkstemp(suffix=".yaml", prefix="datasets_", dir=self.project_path)
438
+
439
+ try:
440
+ with os.fdopen(temp_fd, "w") as f:
441
+ _yaml.dump(updated_data, f)
442
+
443
+ if strict:
444
+ # In strict mode, check if files differ
445
+ import filecmp
446
+
447
+ if not filecmp.cmp(self.datasets_file, temp_path, shallow=False):
448
+ # Files differ - this is an error in strict mode
449
+ os.unlink(temp_path)
450
+ raise DatasetValidationError(
451
+ f"In strict mode, lineage metadata for '{slug}' would be updated in datasets.yaml. "
452
+ f"Expected lineage is already present in the file, but found differences."
453
+ )
454
+ else:
455
+ # Files are the same - clean up temp file
456
+ os.unlink(temp_path)
457
+ else:
458
+ # In relaxed mode, replace the file
459
+ os.replace(temp_path, self.datasets_file)
460
+ # Reload the data
461
+ self._load()
462
+
463
+ except Exception:
464
+ # Clean up temp file on error
465
+ if os.path.exists(temp_path):
466
+ os.unlink(temp_path)
467
+ raise
468
+
375
469
  def get_absolute_path(self, location: str) -> Path:
376
470
  """
377
471
  Get the absolute path for a dataset location.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunstone-py
3
- Version: 0.4.2
3
+ Version: 0.5.1
4
4
  Summary: Python library for managing datasets with lineage tracking in Sunstone projects
5
5
  Author-email: Sunstone Institute <stig@sunstone.institute>
6
6
  License: MIT
@@ -24,6 +24,7 @@ Requires-Dist: google-auth>=2.43.0
24
24
  Requires-Dist: pandas>=2.0.0
25
25
  Requires-Dist: pyyaml>=6.0
26
26
  Requires-Dist: requests>=2.31.0
27
+ Requires-Dist: ruamel-yaml>=0.18
27
28
  Dynamic: license-file
28
29
 
29
30
  # sunstone-py
@@ -324,14 +325,14 @@ uv run pytest
324
325
  ### Type Checking
325
326
 
326
327
  ```bash
327
- uv run mypy src/sunstone
328
+ uv run mypy
328
329
  ```
329
330
 
330
331
  ### Linting and Formatting
331
332
 
332
333
  ```bash
333
- uv run ruff check src/sunstone
334
- uv run ruff format src/sunstone
334
+ uv run ruff check
335
+ uv run ruff format
335
336
  ```
336
337
 
337
338
  ## About Sunstone Institute
@@ -3,3 +3,4 @@ google-auth>=2.43.0
3
3
  pandas>=2.0.0
4
4
  pyyaml>=6.0
5
5
  requests>=2.31.0
6
+ ruamel-yaml>=0.18
@@ -3,6 +3,7 @@ Tests for Sunstone DataFrame functionality.
3
3
  """
4
4
 
5
5
  from pathlib import Path
6
+ from typing import Any
6
7
 
7
8
  import pytest
8
9
 
@@ -12,7 +13,7 @@ import sunstone
12
13
  class TestDataFrameBasics:
13
14
  """Tests for basic DataFrame operations."""
14
15
 
15
- def test_read_csv(self, project_path: Path):
16
+ def test_read_csv(self, project_path: Path) -> None:
16
17
  """Test reading a CSV file into a DataFrame."""
17
18
  df = sunstone.DataFrame.read_csv(
18
19
  "inputs/official_un_member_states_raw.csv",
@@ -26,7 +27,7 @@ class TestDataFrameBasics:
26
27
  assert len(df.lineage.sources) > 0
27
28
  assert df.lineage.operations is not None
28
29
 
29
- def test_apply_operation(self, project_path: Path):
30
+ def test_apply_operation(self, project_path: Path) -> None:
30
31
  """Test applying an operation to a DataFrame."""
31
32
  df = sunstone.DataFrame.read_csv(
32
33
  "inputs/official_un_member_states_raw.csv",
@@ -39,7 +40,7 @@ class TestDataFrameBasics:
39
40
  assert len(filtered.data) == 10
40
41
  assert len(filtered.lineage.operations) > len(df.lineage.operations)
41
42
 
42
- def test_read_second_dataset(self, project_path: Path):
43
+ def test_read_second_dataset(self, project_path: Path) -> None:
43
44
  """Test reading the same dataset twice creates separate lineage."""
44
45
  members1 = sunstone.DataFrame.read_csv(
45
46
  "inputs/official_un_member_states_raw.csv", project_path=project_path, strict=False
@@ -59,7 +60,7 @@ class TestDataFrameMerge:
59
60
  """Tests for DataFrame merge operations."""
60
61
 
61
62
  @pytest.fixture
62
- def un_members_df1(self, project_path: Path):
63
+ def un_members_df1(self, project_path: Path) -> Any:
63
64
  """Load UN members DataFrame (first instance)."""
64
65
  df = sunstone.DataFrame.read_csv(
65
66
  "inputs/official_un_member_states_raw.csv",
@@ -73,7 +74,7 @@ class TestDataFrameMerge:
73
74
  )
74
75
 
75
76
  @pytest.fixture
76
- def un_members_df2(self, project_path: Path):
77
+ def un_members_df2(self, project_path: Path) -> Any:
77
78
  """Load UN members DataFrame (second instance)."""
78
79
  df = sunstone.DataFrame.read_csv(
79
80
  "inputs/official_un_member_states_raw.csv",
@@ -86,7 +87,7 @@ class TestDataFrameMerge:
86
87
  description="Select subset of columns",
87
88
  )
88
89
 
89
- def test_merge_dataframes(self, un_members_df1, un_members_df2):
90
+ def test_merge_dataframes(self, un_members_df1: Any, un_members_df2: Any) -> None:
90
91
  """Test merging two DataFrames."""
91
92
  merged = un_members_df1.merge(un_members_df2, left_on="ISO Code", right_on="ISO Code", how="inner")
92
93
 
@@ -96,7 +97,7 @@ class TestDataFrameMerge:
96
97
  assert len(merged.lineage.sources) >= 1
97
98
  assert len(merged.lineage.operations) > 0
98
99
 
99
- def test_merge_lineage_tracking(self, un_members_df1, un_members_df2):
100
+ def test_merge_lineage_tracking(self, un_members_df1: Any, un_members_df2: Any) -> None:
100
101
  """Test that merge properly tracks lineage."""
101
102
  merged = un_members_df1.merge(un_members_df2, left_on="ISO Code", right_on="ISO Code", how="inner")
102
103
 
@@ -109,7 +110,7 @@ class TestLineageMetadata:
109
110
  """Tests for lineage metadata functionality."""
110
111
 
111
112
  @pytest.fixture
112
- def processed_df(self, project_path: Path):
113
+ def processed_df(self, project_path: Path) -> Any:
113
114
  """Create a processed DataFrame for testing."""
114
115
  un_members = sunstone.DataFrame.read_csv(
115
116
  "inputs/official_un_member_states_raw.csv",
@@ -122,7 +123,7 @@ class TestLineageMetadata:
122
123
  )
123
124
  return filtered.apply_operation(lambda d: d.head(100), description="Select first 100 countries")
124
125
 
125
- def test_lineage_to_dict(self, processed_df):
126
+ def test_lineage_to_dict(self, processed_df: Any) -> None:
126
127
  """Test converting lineage to dictionary."""
127
128
  lineage_dict = processed_df.lineage.to_dict()
128
129
 
@@ -138,7 +139,7 @@ class TestLineageMetadata:
138
139
  class TestStrictMode:
139
140
  """Tests for strict mode functionality."""
140
141
 
141
- def test_strict_mode_load(self, project_path: Path, monkeypatch):
142
+ def test_strict_mode_load(self, project_path: Path, monkeypatch: Any) -> None:
142
143
  """Test loading DataFrame in strict mode."""
143
144
  monkeypatch.setenv("SUNSTONE_DATAFRAME_STRICT", "1")
144
145
 
@@ -146,7 +147,7 @@ class TestStrictMode:
146
147
 
147
148
  assert strict_df.strict_mode is True
148
149
 
149
- def test_strict_mode_prevents_unregistered_write(self, project_path: Path, monkeypatch):
150
+ def test_strict_mode_prevents_unregistered_write(self, project_path: Path, monkeypatch: Any) -> None:
150
151
  """Test that strict mode prevents writing to unregistered locations."""
151
152
  monkeypatch.setenv("SUNSTONE_DATAFRAME_STRICT", "1")
152
153
 
@@ -159,7 +160,7 @@ class TestStrictMode:
159
160
  class TestReadDataset:
160
161
  """Tests for read_dataset() functionality with format auto-detection."""
161
162
 
162
- def test_read_dataset_by_slug(self, project_path: Path):
163
+ def test_read_dataset_by_slug(self, project_path: Path) -> None:
163
164
  """Test reading a dataset by slug with auto-detection."""
164
165
  df = sunstone.DataFrame.read_dataset(
165
166
  "official-un-member-states",
@@ -174,7 +175,7 @@ class TestReadDataset:
174
175
  # Check that the lineage operation mentions the format
175
176
  assert any("format=csv" in op for op in df.lineage.operations)
176
177
 
177
- def test_read_dataset_with_explicit_format(self, project_path: Path):
178
+ def test_read_dataset_with_explicit_format(self, project_path: Path) -> None:
178
179
  """Test reading a dataset with explicit format override."""
179
180
  df = sunstone.DataFrame.read_dataset(
180
181
  "official-un-member-states",
@@ -187,7 +188,7 @@ class TestReadDataset:
187
188
  assert len(df.data) > 0
188
189
  assert any("format=csv" in op for op in df.lineage.operations)
189
190
 
190
- def test_read_dataset_slug_not_found(self, project_path: Path):
191
+ def test_read_dataset_slug_not_found(self, project_path: Path) -> None:
191
192
  """Test that reading non-existent slug raises error."""
192
193
  with pytest.raises(sunstone.DatasetNotFoundError) as exc_info:
193
194
  sunstone.DataFrame.read_dataset(
@@ -197,7 +198,7 @@ class TestReadDataset:
197
198
 
198
199
  assert "not found in datasets.yaml" in str(exc_info.value)
199
200
 
200
- def test_read_dataset_via_pandas_api(self, project_path: Path):
201
+ def test_read_dataset_via_pandas_api(self, project_path: Path) -> None:
201
202
  """Test reading dataset via pandas-like API."""
202
203
  from sunstone import pandas as pd
203
204
 
@@ -210,7 +211,7 @@ class TestReadDataset:
210
211
  assert len(df.data) > 0
211
212
  assert isinstance(df, sunstone.DataFrame)
212
213
 
213
- def test_read_csv_with_slug_delegates_to_read_dataset(self, project_path: Path):
214
+ def test_read_csv_with_slug_delegates_to_read_dataset(self, project_path: Path) -> None:
214
215
  """Test that read_csv with slug delegates to read_dataset."""
215
216
  df = sunstone.DataFrame.read_csv(
216
217
  "official-un-member-states",