sunstone-py 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sunstone/__init__.py ADDED
@@ -0,0 +1,84 @@
1
+ """
2
+ Sunstone: Python library for managing datasets with lineage tracking.
3
+
4
+ This library provides tools for data scientists working on Sunstone projects
5
+ to manage datasets with full lineage tracking and integration with datasets.yaml.
6
+
7
+ Example:
8
+ >>> import sunstone
9
+ >>>
10
+ >>> # Read a dataset (must be in datasets.yaml)
11
+ >>> df = sunstone.DataFrame.read_csv(
12
+ ... 'official_un_member_states_raw.csv',
13
+ ... project_path='/path/to/project'
14
+ ... )
15
+ >>>
16
+ >>> # Perform operations - lineage is tracked
17
+ >>> result = df.apply_operation(
18
+ ... lambda d: d[d['Amount'] > 100],
19
+ ... description="Filter countries with >100 schools"
20
+ ... )
21
+ >>>
22
+ >>> # Write output (auto-registers in relaxed mode)
23
+ >>> result.to_csv(
24
+ ... 'filtered_schools.csv',
25
+ ... slug='filtered-schools',
26
+ ... name='Filtered School Counts',
27
+ ... index=False
28
+ ... )
29
+ """
30
+
31
+ from .dataframe import DataFrame
32
+ from .datasets import DatasetsManager
33
+ from .exceptions import (
34
+ DatasetNotFoundError,
35
+ DatasetValidationError,
36
+ LineageError,
37
+ StrictModeError,
38
+ SunstoneError,
39
+ )
40
+ from .lineage import (
41
+ DatasetMetadata,
42
+ FieldSchema,
43
+ LineageMetadata,
44
+ Source,
45
+ SourceLocation,
46
+ )
47
+
48
+ # Import pandas module for pd-like interface
49
+ from . import pandas
50
+
51
+ # Import validation utilities
52
+ from .validation import (
53
+ ImportCheckResult,
54
+ check_notebook_imports,
55
+ check_script_imports,
56
+ validate_project_notebooks,
57
+ )
58
+
59
+ __version__ = "0.1.0"
60
+
61
+ __all__ = [
62
+ # Main classes
63
+ "DataFrame",
64
+ "DatasetsManager",
65
+ # Pandas-like interface
66
+ "pandas",
67
+ # Validation utilities
68
+ "ImportCheckResult",
69
+ "check_notebook_imports",
70
+ "check_script_imports",
71
+ "validate_project_notebooks",
72
+ # Lineage classes
73
+ "LineageMetadata",
74
+ "DatasetMetadata",
75
+ "FieldSchema",
76
+ "Source",
77
+ "SourceLocation",
78
+ # Exceptions
79
+ "SunstoneError",
80
+ "DatasetNotFoundError",
81
+ "DatasetValidationError",
82
+ "StrictModeError",
83
+ "LineageError",
84
+ ]
sunstone/_release.py ADDED
@@ -0,0 +1,403 @@
1
+ #!/usr/bin/env python3
2
+ """Release script for sunstone-py.
3
+
4
+ Handles version bumping, CHANGELOG updates, git tagging, and pushing.
5
+ """
6
+
7
+ import argparse
8
+ import os
9
+ import re
10
+ import subprocess
11
+ import sys
12
+ from datetime import date
13
+ from pathlib import Path
14
+
15
+
16
+ def get_root_dir() -> Path:
17
+ """Get the root directory (where pyproject.toml lives)."""
18
+ # Navigate from src/sunstone/_release.py up to root/
19
+ return Path(__file__).parent.parent.parent
20
+
21
+
22
+ def run_git(*args: str, capture: bool = True) -> subprocess.CompletedProcess[str]:
23
+ """Run a git command and return the result."""
24
+ result = subprocess.run(
25
+ ["git", *args],
26
+ capture_output=capture,
27
+ text=True,
28
+ cwd=get_root_dir(),
29
+ )
30
+ return result
31
+
32
+
33
+ def check_git_clean() -> None:
34
+ """Fail if the git workspace is not clean."""
35
+ result = run_git("status", "--porcelain")
36
+ if result.returncode != 0:
37
+ print("Error: Failed to check git status", file=sys.stderr)
38
+ sys.exit(1)
39
+ if result.stdout.strip():
40
+ print("Error: Git workspace is not clean. Commit or stash changes first.", file=sys.stderr)
41
+ print(result.stdout, file=sys.stderr)
42
+ sys.exit(1)
43
+
44
+
45
+ def check_on_main_branch() -> None:
46
+ """Fail if HEAD is not on the main branch."""
47
+ result = run_git("rev-parse", "--abbrev-ref", "HEAD")
48
+ if result.returncode != 0:
49
+ print("Error: Failed to get current branch", file=sys.stderr)
50
+ sys.exit(1)
51
+ branch = result.stdout.strip()
52
+ if branch != "main":
53
+ print(f"Error: Not on main branch (currently on '{branch}')", file=sys.stderr)
54
+ sys.exit(1)
55
+
56
+
57
+ def check_up_to_date_with_origin() -> None:
58
+ """Fail if main is not up to date with origin/main."""
59
+ # Fetch latest from origin
60
+ result = run_git("fetch", "origin", "main")
61
+ if result.returncode != 0:
62
+ print("Error: Failed to fetch from origin", file=sys.stderr)
63
+ sys.exit(1)
64
+
65
+ # Get local and remote commit hashes
66
+ local = run_git("rev-parse", "HEAD")
67
+ remote = run_git("rev-parse", "origin/main")
68
+
69
+ if local.returncode != 0 or remote.returncode != 0:
70
+ print("Error: Failed to get commit hashes", file=sys.stderr)
71
+ sys.exit(1)
72
+
73
+ local_hash = local.stdout.strip()
74
+ remote_hash = remote.stdout.strip()
75
+
76
+ if local_hash != remote_hash:
77
+ # Check if local is behind
78
+ merge_base = run_git("merge-base", "HEAD", "origin/main")
79
+ if merge_base.returncode != 0:
80
+ print("Error: Failed to find merge base", file=sys.stderr)
81
+ sys.exit(1)
82
+
83
+ base_hash = merge_base.stdout.strip()
84
+ if base_hash == local_hash:
85
+ print("Error: Local main is behind origin/main. Pull first.", file=sys.stderr)
86
+ elif base_hash == remote_hash:
87
+ print("Error: Local main is ahead of origin/main. Push first.", file=sys.stderr)
88
+ else:
89
+ print("Error: Local main has diverged from origin/main.", file=sys.stderr)
90
+ sys.exit(1)
91
+
92
+
93
+ def get_last_tag() -> str | None:
94
+ """Get the most recent git tag, or None if no tags exist."""
95
+ result = run_git("describe", "--tags", "--abbrev=0")
96
+ if result.returncode != 0:
97
+ return None
98
+ return result.stdout.strip()
99
+
100
+
101
+ def generate_changelog_from_git() -> str:
102
+ """Generate changelog entries from git commits since last tag using Claude."""
103
+ last_tag = get_last_tag()
104
+ if last_tag:
105
+ commit_range = f"{last_tag}..HEAD"
106
+ else:
107
+ commit_range = "HEAD"
108
+
109
+ # Get commits since last tag
110
+ result = run_git("log", commit_range, "--pretty=format:%s")
111
+ if result.returncode != 0 or not result.stdout.strip():
112
+ return ""
113
+
114
+ commits = result.stdout.strip()
115
+
116
+ prompt = f"""Convert these git commit messages into Keep a Changelog format entries.
117
+ Categorize under: Added, Changed, Fixed, Removed, Security (only include categories that apply).
118
+ Be concise. Skip merge commits, version bump commits, and release commits.
119
+ Output ONLY the markdown entries with ### headers for categories, nothing else.
120
+
121
+ Commits:
122
+ {commits}"""
123
+
124
+ print("Generating changelog entries with Claude...")
125
+ claude_result = subprocess.run(
126
+ ["claude", "-p", "--model=haiku", prompt],
127
+ capture_output=True,
128
+ text=True,
129
+ cwd=get_root_dir(),
130
+ )
131
+
132
+ if claude_result.returncode != 0:
133
+ print("Warning: Claude changelog generation failed", file=sys.stderr)
134
+ return ""
135
+
136
+ return claude_result.stdout.strip()
137
+
138
+
139
+ def populate_unreleased(content: str) -> None:
140
+ """Insert generated changelog content into Unreleased section."""
141
+ if not content:
142
+ return
143
+
144
+ changelog_path = get_root_dir() / "CHANGELOG.md"
145
+ existing = changelog_path.read_text()
146
+
147
+ new_content = existing.replace(
148
+ "## [Unreleased]\n",
149
+ f"## [Unreleased]\n\n{content}\n",
150
+ )
151
+ changelog_path.write_text(new_content)
152
+
153
+
154
+ def open_in_editor(file_path: Path) -> None:
155
+ """Open a file in the user's preferred editor."""
156
+ editor = os.environ.get("VISUAL") or os.environ.get("EDITOR") or "vi"
157
+ subprocess.run([editor, str(file_path)], cwd=get_root_dir())
158
+
159
+
160
+ def confirm_release(new_version: str) -> bool:
161
+ """Ask user to confirm the release."""
162
+ try:
163
+ response = input(f"\nProceed with release v{new_version}? [y/N] ").strip().lower()
164
+ return response in ("y", "yes")
165
+ except (EOFError, KeyboardInterrupt):
166
+ print()
167
+ return False
168
+
169
+
170
+ def get_current_version() -> str:
171
+ """Get the current version from pyproject.toml."""
172
+ pyproject_path = get_root_dir() / "pyproject.toml"
173
+ content = pyproject_path.read_text()
174
+ match = re.search(r'^version\s*=\s*"([^"]+)"', content, re.MULTILINE)
175
+ if not match:
176
+ print("Error: Could not find version in pyproject.toml", file=sys.stderr)
177
+ sys.exit(1)
178
+ return match.group(1)
179
+
180
+
181
+ def bump_version(version: str, bump: str) -> str:
182
+ """Bump the version according to semver."""
183
+ match = re.match(r"^(\d+)\.(\d+)\.(\d+)$", version)
184
+ if not match:
185
+ print(f"Error: Invalid version format: {version}", file=sys.stderr)
186
+ sys.exit(1)
187
+
188
+ major, minor, patch = int(match.group(1)), int(match.group(2)), int(match.group(3))
189
+
190
+ if bump == "major":
191
+ return f"{major + 1}.0.0"
192
+ elif bump == "minor":
193
+ return f"{major}.{minor + 1}.0"
194
+ else: # patch
195
+ return f"{major}.{minor}.{patch + 1}"
196
+
197
+
198
+ def update_pyproject_version(new_version: str) -> None:
199
+ """Update the version in pyproject.toml."""
200
+ pyproject_path = get_root_dir() / "pyproject.toml"
201
+ content = pyproject_path.read_text()
202
+ new_content = re.sub(
203
+ r'^(version\s*=\s*)"[^"]+"',
204
+ f'\\1"{new_version}"',
205
+ content,
206
+ flags=re.MULTILINE,
207
+ )
208
+ pyproject_path.write_text(new_content)
209
+
210
+
211
+ def update_changelog(new_version: str) -> None:
212
+ """Update CHANGELOG.md to move Unreleased to the new version."""
213
+ changelog_path = get_root_dir() / "CHANGELOG.md"
214
+ content = changelog_path.read_text()
215
+
216
+ today = date.today().isoformat()
217
+
218
+ # Replace [Unreleased] section header with new version
219
+ # Keep [Unreleased] but add new version section after it
220
+ new_unreleased = "## [Unreleased]\n"
221
+ version_header = f"## [{new_version}] - {today}\n"
222
+
223
+ # Find the Unreleased section and the content until next version
224
+ pattern = r"(## \[Unreleased\]\n)(.*?)(## \[\d)"
225
+ match = re.search(pattern, content, re.DOTALL)
226
+
227
+ if match:
228
+ unreleased_content = match.group(2)
229
+ if unreleased_content.strip():
230
+ # There's content in Unreleased, move it to new version
231
+ new_content = content.replace(
232
+ match.group(0),
233
+ f"{new_unreleased}\n{version_header}{unreleased_content}{match.group(3)}",
234
+ )
235
+ else:
236
+ # No content in Unreleased, just add version header
237
+ new_content = content.replace(
238
+ match.group(0),
239
+ f"{new_unreleased}\n{version_header}\n{match.group(3)}",
240
+ )
241
+ else:
242
+ # Unreleased is at the end or there's no previous version
243
+ pattern = r"(## \[Unreleased\]\n)(.*?)$"
244
+ match = re.search(pattern, content, re.DOTALL)
245
+ if match:
246
+ unreleased_content = match.group(2)
247
+ new_content = content.replace(
248
+ match.group(0),
249
+ f"{new_unreleased}\n{version_header}{unreleased_content}",
250
+ )
251
+ else:
252
+ print("Error: Could not find [Unreleased] section in CHANGELOG.md", file=sys.stderr)
253
+ sys.exit(1)
254
+
255
+ changelog_path.write_text(new_content)
256
+
257
+
258
+ def git_commit_and_tag(new_version: str) -> None:
259
+ """Commit changes and create version tag."""
260
+ root_dir = get_root_dir()
261
+
262
+ # Stage changed files
263
+ result = run_git(
264
+ "add",
265
+ str(root_dir / "pyproject.toml"),
266
+ str(root_dir / "CHANGELOG.md"),
267
+ str(root_dir / "uv.lock"),
268
+ )
269
+ if result.returncode != 0:
270
+ print("Error: Failed to stage files", file=sys.stderr)
271
+ sys.exit(1)
272
+
273
+ # Commit
274
+ commit_msg = f"Release v{new_version}"
275
+ result = run_git("commit", "-m", commit_msg)
276
+ if result.returncode != 0:
277
+ print("Error: Failed to commit", file=sys.stderr)
278
+ sys.exit(1)
279
+
280
+ # Tag
281
+ tag = f"v{new_version}"
282
+ result = run_git("tag", "-a", tag, "-m", f"Release {tag}")
283
+ if result.returncode != 0:
284
+ print("Error: Failed to create tag", file=sys.stderr)
285
+ sys.exit(1)
286
+
287
+ print(f"Created commit and tag {tag}")
288
+
289
+
290
+ def git_push() -> None:
291
+ """Push commits and tags to origin."""
292
+ result = run_git("push", "origin", "main", "--follow-tags", capture=False)
293
+ if result.returncode != 0:
294
+ print("Error: Failed to push to origin", file=sys.stderr)
295
+ sys.exit(1)
296
+
297
+
298
+ def main() -> None:
299
+ parser = argparse.ArgumentParser(
300
+ description="Release a new version of sunstone-py",
301
+ formatter_class=argparse.RawDescriptionHelpFormatter,
302
+ epilog="""
303
+ Examples:
304
+ uv run release # Bump patch version (0.1.0 -> 0.1.1)
305
+ uv run release --bump=patch # Bump patch version (0.1.0 -> 0.1.1)
306
+ uv run release --bump=minor # Bump minor version (0.1.0 -> 0.2.0)
307
+ uv run release --bump-minor # Bump minor version (0.1.0 -> 0.2.0)
308
+ uv run release --bump=major # Bump major version (0.1.0 -> 1.0.0)
309
+ uv run release --bump-major # Bump major version (0.1.0 -> 1.0.0)
310
+ """,
311
+ )
312
+ parser.add_argument(
313
+ "--bump",
314
+ choices=["patch", "minor", "major"],
315
+ default=None,
316
+ help="Version component to bump (default: patch)",
317
+ )
318
+ parser.add_argument(
319
+ "--bump-minor",
320
+ action="store_true",
321
+ help="Bump minor version (shorthand for --bump=minor)",
322
+ )
323
+ parser.add_argument(
324
+ "--bump-major",
325
+ action="store_true",
326
+ help="Bump major version (shorthand for --bump=major)",
327
+ )
328
+ parser.add_argument(
329
+ "--dry-run",
330
+ action="store_true",
331
+ help="Show what would be done without making changes",
332
+ )
333
+ args = parser.parse_args()
334
+
335
+ # Resolve bump level from flags
336
+ if args.bump_major:
337
+ bump = "major"
338
+ elif args.bump_minor:
339
+ bump = "minor"
340
+ elif args.bump:
341
+ bump = args.bump
342
+ else:
343
+ bump = "patch"
344
+
345
+ print("Checking git status...")
346
+ check_git_clean()
347
+ check_on_main_branch()
348
+ check_up_to_date_with_origin()
349
+ print("Git checks passed.")
350
+
351
+ current_version = get_current_version()
352
+ new_version = bump_version(current_version, bump)
353
+
354
+ print(f"Version: {current_version} -> {new_version}")
355
+
356
+ if args.dry_run:
357
+ print("Dry run - no changes made.")
358
+ return
359
+
360
+ # Generate and populate changelog entries
361
+ changelog_content = generate_changelog_from_git()
362
+ if changelog_content:
363
+ populate_unreleased(changelog_content)
364
+ print("Generated changelog entries from git commits.")
365
+ else:
366
+ print("No new commits to generate changelog from.")
367
+
368
+ print("Updating pyproject.toml...")
369
+ update_pyproject_version(new_version)
370
+
371
+ print("Syncing uv.lock...")
372
+ uv_result = subprocess.run(["uv", "sync"], cwd=get_root_dir(), capture_output=True, text=True)
373
+ if uv_result.returncode != 0:
374
+ print("Error: uv sync failed", file=sys.stderr)
375
+ print(uv_result.stderr, file=sys.stderr)
376
+ sys.exit(1)
377
+
378
+ print("Updating CHANGELOG.md...")
379
+ update_changelog(new_version)
380
+
381
+ # Open in editor for review
382
+ changelog_path = get_root_dir() / "CHANGELOG.md"
383
+ print(f"\nOpening {changelog_path} for review...")
384
+ open_in_editor(changelog_path)
385
+
386
+ # Confirm before proceeding
387
+ if not confirm_release(new_version):
388
+ # Revert changes
389
+ print("Release cancelled. Reverting changes...")
390
+ run_git("checkout", "pyproject.toml", "CHANGELOG.md", "uv.lock")
391
+ sys.exit(0)
392
+
393
+ print("Committing and tagging...")
394
+ git_commit_and_tag(new_version)
395
+
396
+ print("Pushing to origin...")
397
+ git_push()
398
+
399
+ print(f"\nSuccessfully released v{new_version}!")
400
+
401
+
402
+ if __name__ == "__main__":
403
+ main()