sunstone-py 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunstone/_release.py +23 -12
- sunstone/dataframe.py +16 -89
- sunstone/datasets.py +28 -6
- sunstone/lineage.py +37 -27
- {sunstone_py-0.5.1.dist-info → sunstone_py-0.5.3.dist-info}/METADATA +3 -5
- sunstone_py-0.5.3.dist-info/RECORD +15 -0
- sunstone_py-0.5.1.dist-info/RECORD +0 -15
- {sunstone_py-0.5.1.dist-info → sunstone_py-0.5.3.dist-info}/WHEEL +0 -0
- {sunstone_py-0.5.1.dist-info → sunstone_py-0.5.3.dist-info}/entry_points.txt +0 -0
- {sunstone_py-0.5.1.dist-info → sunstone_py-0.5.3.dist-info}/licenses/LICENSE +0 -0
- {sunstone_py-0.5.1.dist-info → sunstone_py-0.5.3.dist-info}/top_level.txt +0 -0
sunstone/_release.py
CHANGED
|
@@ -13,6 +13,17 @@ import sys
|
|
|
13
13
|
from datetime import date
|
|
14
14
|
from pathlib import Path
|
|
15
15
|
|
|
16
|
+
try:
|
|
17
|
+
import tomllib
|
|
18
|
+
except ModuleNotFoundError:
|
|
19
|
+
import tomli as tomllib # type: ignore[import-not-found,no-redef]
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
import tomli_w
|
|
23
|
+
except ModuleNotFoundError:
|
|
24
|
+
print("Error: tomli_w not found. Install with: uv add --dev tomli-w", file=sys.stderr)
|
|
25
|
+
sys.exit(1)
|
|
26
|
+
|
|
16
27
|
|
|
17
28
|
def get_root_dir() -> Path:
|
|
18
29
|
"""Get the root directory (where pyproject.toml lives)."""
|
|
@@ -216,12 +227,13 @@ def confirm_release(new_version: str) -> bool:
|
|
|
216
227
|
def get_current_version() -> str:
|
|
217
228
|
"""Get the current version from pyproject.toml."""
|
|
218
229
|
pyproject_path = get_root_dir() / "pyproject.toml"
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
230
|
+
with open(pyproject_path, "rb") as f:
|
|
231
|
+
data = tomllib.load(f)
|
|
232
|
+
version = data.get("project", {}).get("version")
|
|
233
|
+
if not version:
|
|
222
234
|
print("Error: Could not find version in pyproject.toml", file=sys.stderr)
|
|
223
235
|
sys.exit(1)
|
|
224
|
-
return
|
|
236
|
+
return str(version)
|
|
225
237
|
|
|
226
238
|
|
|
227
239
|
def bump_version(version: str, bump: str) -> str:
|
|
@@ -244,14 +256,13 @@ def bump_version(version: str, bump: str) -> str:
|
|
|
244
256
|
def update_pyproject_version(new_version: str) -> None:
|
|
245
257
|
"""Update the version in pyproject.toml."""
|
|
246
258
|
pyproject_path = get_root_dir() / "pyproject.toml"
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
pyproject_path.write_text(new_content)
|
|
259
|
+
with open(pyproject_path, "rb") as f:
|
|
260
|
+
data = tomllib.load(f)
|
|
261
|
+
|
|
262
|
+
data["project"]["version"] = new_version
|
|
263
|
+
|
|
264
|
+
with open(pyproject_path, "wb") as f:
|
|
265
|
+
tomli_w.dump(data, f)
|
|
255
266
|
|
|
256
267
|
|
|
257
268
|
def update_changelog(new_version: str) -> None:
|
sunstone/dataframe.py
CHANGED
|
@@ -10,7 +10,7 @@ import pandas as pd
|
|
|
10
10
|
|
|
11
11
|
from .datasets import DatasetsManager
|
|
12
12
|
from .exceptions import DatasetNotFoundError, StrictModeError
|
|
13
|
-
from .lineage import FieldSchema, LineageMetadata
|
|
13
|
+
from .lineage import FieldSchema, LineageMetadata, compute_dataframe_hash
|
|
14
14
|
|
|
15
15
|
pd.options.mode.copy_on_write = True
|
|
16
16
|
|
|
@@ -196,7 +196,6 @@ class DataFrame:
|
|
|
196
196
|
# Create lineage metadata
|
|
197
197
|
lineage = LineageMetadata(project_path=str(manager.project_path))
|
|
198
198
|
lineage.add_source(dataset)
|
|
199
|
-
lineage.add_operation(f"read_dataset({dataset.slug}, format={format})")
|
|
200
199
|
|
|
201
200
|
# Return wrapped DataFrame
|
|
202
201
|
return cls(data=df, lineage=lineage, strict=strict, project_path=project_path)
|
|
@@ -294,7 +293,6 @@ class DataFrame:
|
|
|
294
293
|
# Create lineage metadata
|
|
295
294
|
lineage = LineageMetadata(project_path=str(manager.project_path))
|
|
296
295
|
lineage.add_source(dataset)
|
|
297
|
-
lineage.add_operation(f"read_csv({dataset.slug})")
|
|
298
296
|
|
|
299
297
|
# Return wrapped DataFrame
|
|
300
298
|
return cls(data=df, lineage=lineage, strict=strict, project_path=project_path)
|
|
@@ -363,11 +361,13 @@ class DataFrame:
|
|
|
363
361
|
absolute_path.parent.mkdir(parents=True, exist_ok=True)
|
|
364
362
|
self.data.to_csv(absolute_path, **kwargs)
|
|
365
363
|
|
|
366
|
-
#
|
|
367
|
-
self.
|
|
364
|
+
# Compute content hash for change detection
|
|
365
|
+
content_hash = compute_dataframe_hash(self.data)
|
|
368
366
|
|
|
369
367
|
# Persist lineage metadata to datasets.yaml
|
|
370
|
-
manager.update_output_lineage(
|
|
368
|
+
manager.update_output_lineage(
|
|
369
|
+
slug=dataset.slug, lineage=self.lineage, content_hash=content_hash, strict=self.strict_mode
|
|
370
|
+
)
|
|
371
371
|
|
|
372
372
|
def _infer_field_schema(self) -> List[FieldSchema]:
|
|
373
373
|
"""
|
|
@@ -410,11 +410,8 @@ class DataFrame:
|
|
|
410
410
|
# Perform the merge
|
|
411
411
|
merged_data = pd.merge(self.data, right.data, **kwargs)
|
|
412
412
|
|
|
413
|
-
# Combine lineage
|
|
413
|
+
# Combine lineage (sources from both DataFrames)
|
|
414
414
|
merged_lineage = self.lineage.merge(right.lineage)
|
|
415
|
-
merged_lineage.add_operation(
|
|
416
|
-
f"merge(left={len(self.lineage.sources)} sources, right={len(right.lineage.sources)} sources)"
|
|
417
|
-
)
|
|
418
415
|
|
|
419
416
|
return DataFrame(
|
|
420
417
|
data=merged_data,
|
|
@@ -437,11 +434,8 @@ class DataFrame:
|
|
|
437
434
|
# Perform the join
|
|
438
435
|
joined_data = self.data.join(other.data, **kwargs)
|
|
439
436
|
|
|
440
|
-
# Combine lineage
|
|
437
|
+
# Combine lineage (sources from both DataFrames)
|
|
441
438
|
joined_lineage = self.lineage.merge(other.lineage)
|
|
442
|
-
joined_lineage.add_operation(
|
|
443
|
-
f"join(left={len(self.lineage.sources)} sources, right={len(other.lineage.sources)} sources)"
|
|
444
|
-
)
|
|
445
439
|
|
|
446
440
|
return DataFrame(
|
|
447
441
|
data=joined_data,
|
|
@@ -467,16 +461,11 @@ class DataFrame:
|
|
|
467
461
|
# Concatenate
|
|
468
462
|
concatenated_data = pd.concat(all_dfs, **kwargs)
|
|
469
463
|
|
|
470
|
-
# Combine lineage from all DataFrames
|
|
464
|
+
# Combine lineage (sources from all DataFrames)
|
|
471
465
|
combined_lineage = self.lineage
|
|
472
466
|
for other in others:
|
|
473
467
|
combined_lineage = combined_lineage.merge(other.lineage)
|
|
474
468
|
|
|
475
|
-
combined_lineage.add_operation(
|
|
476
|
-
f"concat({len(others) + 1} dataframes, "
|
|
477
|
-
f"{sum(len(df.lineage.sources) for df in [self] + others)} total sources)"
|
|
478
|
-
)
|
|
479
|
-
|
|
480
469
|
return DataFrame(
|
|
481
470
|
data=concatenated_data,
|
|
482
471
|
lineage=combined_lineage,
|
|
@@ -484,42 +473,12 @@ class DataFrame:
|
|
|
484
473
|
project_path=self.lineage.project_path,
|
|
485
474
|
)
|
|
486
475
|
|
|
487
|
-
def
|
|
488
|
-
"""
|
|
489
|
-
Apply a transformation operation to the DataFrame.
|
|
490
|
-
|
|
491
|
-
Args:
|
|
492
|
-
operation: Function that takes a pandas DataFrame and returns a DataFrame.
|
|
493
|
-
description: Human-readable description of the operation.
|
|
494
|
-
|
|
495
|
-
Returns:
|
|
496
|
-
A new DataFrame with the operation applied and recorded in lineage.
|
|
497
|
-
"""
|
|
498
|
-
# Apply the operation
|
|
499
|
-
new_data = operation(self.data)
|
|
500
|
-
|
|
501
|
-
# Copy lineage and add operation
|
|
502
|
-
new_lineage = LineageMetadata(
|
|
503
|
-
sources=self.lineage.sources.copy(),
|
|
504
|
-
operations=self.lineage.operations.copy(),
|
|
505
|
-
project_path=self.lineage.project_path,
|
|
506
|
-
)
|
|
507
|
-
new_lineage.add_operation(description)
|
|
508
|
-
|
|
509
|
-
return DataFrame(
|
|
510
|
-
data=new_data,
|
|
511
|
-
lineage=new_lineage,
|
|
512
|
-
strict=self.strict_mode,
|
|
513
|
-
project_path=self.lineage.project_path,
|
|
514
|
-
)
|
|
515
|
-
|
|
516
|
-
def _wrap_result(self, result: Any, operation: Optional[str] = None) -> Any:
|
|
476
|
+
def _wrap_result(self, result: Any) -> Any:
|
|
517
477
|
"""
|
|
518
478
|
Wrap a pandas result in a Sunstone DataFrame if applicable.
|
|
519
479
|
|
|
520
480
|
Args:
|
|
521
481
|
result: The result from a pandas operation.
|
|
522
|
-
operation: Name of the operation performed. If None, no operation is recorded.
|
|
523
482
|
|
|
524
483
|
Returns:
|
|
525
484
|
Wrapped DataFrame if result is a DataFrame, otherwise the result.
|
|
@@ -527,11 +486,8 @@ class DataFrame:
|
|
|
527
486
|
if isinstance(result, pd.DataFrame):
|
|
528
487
|
new_lineage = LineageMetadata(
|
|
529
488
|
sources=self.lineage.sources.copy(),
|
|
530
|
-
operations=self.lineage.operations.copy(),
|
|
531
489
|
project_path=self.lineage.project_path,
|
|
532
490
|
)
|
|
533
|
-
if operation is not None:
|
|
534
|
-
new_lineage.add_operation(operation)
|
|
535
491
|
|
|
536
492
|
return DataFrame(
|
|
537
493
|
data=result,
|
|
@@ -541,28 +497,6 @@ class DataFrame:
|
|
|
541
497
|
)
|
|
542
498
|
return result
|
|
543
499
|
|
|
544
|
-
# Methods that don't represent meaningful data transformations
|
|
545
|
-
# These return DataFrames but shouldn't be tracked in lineage
|
|
546
|
-
_NON_TRACKING_METHODS = frozenset(
|
|
547
|
-
{
|
|
548
|
-
# Copy operations - same data, no transformation
|
|
549
|
-
"copy",
|
|
550
|
-
# Index operations - same data, different index
|
|
551
|
-
"reset_index",
|
|
552
|
-
"set_index",
|
|
553
|
-
"reindex",
|
|
554
|
-
# Type conversions without data change
|
|
555
|
-
"astype",
|
|
556
|
-
"infer_objects",
|
|
557
|
-
# Column/index renaming - same data, different labels
|
|
558
|
-
"rename",
|
|
559
|
-
"rename_axis",
|
|
560
|
-
# Reshaping without data loss
|
|
561
|
-
"T",
|
|
562
|
-
"transpose",
|
|
563
|
-
}
|
|
564
|
-
)
|
|
565
|
-
|
|
566
500
|
def __getattr__(self, name: str) -> Any:
|
|
567
501
|
"""
|
|
568
502
|
Delegate attribute access to the underlying pandas DataFrame.
|
|
@@ -583,14 +517,11 @@ class DataFrame:
|
|
|
583
517
|
|
|
584
518
|
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
585
519
|
result = attr(*args, **kwargs)
|
|
586
|
-
|
|
587
|
-
if name in DataFrame._NON_TRACKING_METHODS:
|
|
588
|
-
return self._wrap_result(result, operation=None)
|
|
589
|
-
return self._wrap_result(result, operation=f"{name}")
|
|
520
|
+
return self._wrap_result(result)
|
|
590
521
|
|
|
591
522
|
return wrapper
|
|
592
523
|
|
|
593
|
-
return self._wrap_result(attr
|
|
524
|
+
return self._wrap_result(attr)
|
|
594
525
|
|
|
595
526
|
def __getitem__(self, key: Any) -> Any:
|
|
596
527
|
"""
|
|
@@ -603,9 +534,7 @@ class DataFrame:
|
|
|
603
534
|
The item from the underlying DataFrame, wrapped if it's a DataFrame.
|
|
604
535
|
"""
|
|
605
536
|
result = self.data[key]
|
|
606
|
-
|
|
607
|
-
# not a meaningful transformation
|
|
608
|
-
return self._wrap_result(result, operation=None)
|
|
537
|
+
return self._wrap_result(result)
|
|
609
538
|
|
|
610
539
|
def __setitem__(self, key: Any, value: Any) -> None:
|
|
611
540
|
"""
|
|
@@ -616,14 +545,12 @@ class DataFrame:
|
|
|
616
545
|
value: Value to assign.
|
|
617
546
|
"""
|
|
618
547
|
self.data[key] = value
|
|
619
|
-
#
|
|
620
|
-
|
|
548
|
+
# Don't track column assignments automatically
|
|
549
|
+
# Users should use add_operation() for meaningful transformations
|
|
621
550
|
|
|
622
551
|
def __repr__(self) -> str:
|
|
623
552
|
"""String representation of the DataFrame."""
|
|
624
|
-
lineage_info = (
|
|
625
|
-
f"\n\nLineage: {len(self.lineage.sources)} source(s), {len(self.lineage.operations)} operation(s)"
|
|
626
|
-
)
|
|
553
|
+
lineage_info = f"\n\nLineage: {len(self.lineage.sources)} source(s)"
|
|
627
554
|
return repr(self.data) + lineage_info
|
|
628
555
|
|
|
629
556
|
def __str__(self) -> str:
|
sunstone/datasets.py
CHANGED
|
@@ -380,22 +380,30 @@ class DatasetsManager:
|
|
|
380
380
|
|
|
381
381
|
raise DatasetNotFoundError(f"Output dataset with slug '{slug}' not found")
|
|
382
382
|
|
|
383
|
-
def update_output_lineage(
|
|
383
|
+
def update_output_lineage(
|
|
384
|
+
self, slug: str, lineage: LineageMetadata, content_hash: str, strict: bool = False
|
|
385
|
+
) -> None:
|
|
384
386
|
"""
|
|
385
387
|
Update lineage metadata for an output dataset.
|
|
386
388
|
|
|
389
|
+
The timestamp is only updated when the content hash changes, preventing
|
|
390
|
+
unnecessary updates when the data hasn't changed.
|
|
391
|
+
|
|
387
392
|
In strict mode, validates that the lineage matches what would be written
|
|
388
393
|
without modifying the file. In relaxed mode, updates the file with lineage.
|
|
389
394
|
|
|
390
395
|
Args:
|
|
391
396
|
slug: The slug of the output dataset to update.
|
|
392
397
|
lineage: The lineage metadata to persist.
|
|
398
|
+
content_hash: SHA256 hash of the DataFrame content.
|
|
393
399
|
strict: If True, validate without modifying. If False, update the file.
|
|
394
400
|
|
|
395
401
|
Raises:
|
|
396
402
|
DatasetNotFoundError: If the dataset doesn't exist.
|
|
397
403
|
DatasetValidationError: In strict mode, if lineage differs from what's in the file.
|
|
398
404
|
"""
|
|
405
|
+
from datetime import datetime
|
|
406
|
+
|
|
399
407
|
# Find the output dataset
|
|
400
408
|
dataset_idx = None
|
|
401
409
|
for i, dataset_data in enumerate(self._data["outputs"]):
|
|
@@ -406,6 +414,21 @@ class DatasetsManager:
|
|
|
406
414
|
if dataset_idx is None:
|
|
407
415
|
raise DatasetNotFoundError(f"Output dataset with slug '{slug}' not found")
|
|
408
416
|
|
|
417
|
+
# Get existing lineage data if present
|
|
418
|
+
existing_lineage = self._data["outputs"][dataset_idx].get("lineage", {})
|
|
419
|
+
existing_hash = existing_lineage.get("content_hash")
|
|
420
|
+
existing_timestamp = existing_lineage.get("created_at")
|
|
421
|
+
|
|
422
|
+
# Determine if content has changed
|
|
423
|
+
content_changed = existing_hash != content_hash
|
|
424
|
+
|
|
425
|
+
# Only update timestamp if content changed
|
|
426
|
+
if content_changed:
|
|
427
|
+
timestamp = datetime.now().isoformat()
|
|
428
|
+
else:
|
|
429
|
+
# Preserve existing timestamp
|
|
430
|
+
timestamp = existing_timestamp
|
|
431
|
+
|
|
409
432
|
# Build lineage metadata to add
|
|
410
433
|
lineage_data: dict[str, Any] = {}
|
|
411
434
|
|
|
@@ -414,15 +437,14 @@ class DatasetsManager:
|
|
|
414
437
|
{
|
|
415
438
|
"slug": src.slug,
|
|
416
439
|
"name": src.name,
|
|
440
|
+
"location": src.location,
|
|
417
441
|
}
|
|
418
442
|
for src in lineage.sources
|
|
419
443
|
]
|
|
420
444
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
if lineage.created_at:
|
|
425
|
-
lineage_data["created_at"] = lineage.created_at.isoformat()
|
|
445
|
+
lineage_data["content_hash"] = content_hash
|
|
446
|
+
if timestamp:
|
|
447
|
+
lineage_data["created_at"] = timestamp
|
|
426
448
|
|
|
427
449
|
# Create a copy of the data with updated lineage
|
|
428
450
|
updated_data = self._data.copy()
|
sunstone/lineage.py
CHANGED
|
@@ -2,9 +2,13 @@
|
|
|
2
2
|
Lineage metadata structures for tracking data provenance.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import hashlib
|
|
5
6
|
from dataclasses import dataclass, field
|
|
6
7
|
from datetime import datetime
|
|
7
|
-
from typing import Any, Dict, List, Optional
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
import pandas as pd
|
|
8
12
|
|
|
9
13
|
|
|
10
14
|
@dataclass
|
|
@@ -88,23 +92,41 @@ class DatasetMetadata:
|
|
|
88
92
|
"""Type of dataset: 'input' or 'output'."""
|
|
89
93
|
|
|
90
94
|
|
|
95
|
+
def compute_dataframe_hash(df: "pd.DataFrame") -> str:
|
|
96
|
+
"""
|
|
97
|
+
Compute a fast SHA256 hash of a pandas DataFrame's content.
|
|
98
|
+
|
|
99
|
+
Uses pickle serialization for a consistent, fast representation of the data.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
df: The pandas DataFrame to hash.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
A SHA256 hex digest string representing the DataFrame content.
|
|
106
|
+
"""
|
|
107
|
+
import pickle
|
|
108
|
+
|
|
109
|
+
# Use pickle protocol 5 for efficiency; hash the bytes directly
|
|
110
|
+
data_bytes = pickle.dumps(df, protocol=5)
|
|
111
|
+
return hashlib.sha256(data_bytes).hexdigest()
|
|
112
|
+
|
|
113
|
+
|
|
91
114
|
@dataclass
|
|
92
115
|
class LineageMetadata:
|
|
93
116
|
"""
|
|
94
117
|
Lineage metadata tracking the provenance of data in a DataFrame.
|
|
95
118
|
|
|
96
|
-
This tracks all source datasets that contributed to the current DataFrame
|
|
97
|
-
including information about transformations and operations performed.
|
|
119
|
+
This tracks all source datasets that contributed to the current DataFrame.
|
|
98
120
|
"""
|
|
99
121
|
|
|
100
122
|
sources: List[DatasetMetadata] = field(default_factory=list)
|
|
101
123
|
"""List of source datasets that contributed to this data."""
|
|
102
124
|
|
|
103
|
-
|
|
104
|
-
"""
|
|
125
|
+
created_at: Optional[datetime] = None
|
|
126
|
+
"""Timestamp when this lineage was last updated (content changed)."""
|
|
105
127
|
|
|
106
|
-
|
|
107
|
-
"""
|
|
128
|
+
content_hash: Optional[str] = None
|
|
129
|
+
"""SHA256 hash of the DataFrame content, used to detect changes."""
|
|
108
130
|
|
|
109
131
|
project_path: Optional[str] = None
|
|
110
132
|
"""Path to the project directory containing datasets.yaml."""
|
|
@@ -119,15 +141,6 @@ class LineageMetadata:
|
|
|
119
141
|
if dataset not in self.sources:
|
|
120
142
|
self.sources.append(dataset)
|
|
121
143
|
|
|
122
|
-
def add_operation(self, operation: str) -> None:
|
|
123
|
-
"""
|
|
124
|
-
Record an operation performed on the data.
|
|
125
|
-
|
|
126
|
-
Args:
|
|
127
|
-
operation: Description of the operation.
|
|
128
|
-
"""
|
|
129
|
-
self.operations.append(operation)
|
|
130
|
-
|
|
131
144
|
def merge(self, other: "LineageMetadata") -> "LineageMetadata":
|
|
132
145
|
"""
|
|
133
146
|
Merge lineage from another DataFrame.
|
|
@@ -136,12 +149,10 @@ class LineageMetadata:
|
|
|
136
149
|
other: The other lineage metadata to merge.
|
|
137
150
|
|
|
138
151
|
Returns:
|
|
139
|
-
A new LineageMetadata with combined sources
|
|
152
|
+
A new LineageMetadata with combined sources.
|
|
140
153
|
"""
|
|
141
154
|
merged = LineageMetadata(
|
|
142
155
|
sources=self.sources.copy(),
|
|
143
|
-
operations=self.operations.copy(),
|
|
144
|
-
created_at=datetime.now(),
|
|
145
156
|
project_path=self.project_path or other.project_path,
|
|
146
157
|
)
|
|
147
158
|
|
|
@@ -150,9 +161,6 @@ class LineageMetadata:
|
|
|
150
161
|
if source not in merged.sources:
|
|
151
162
|
merged.sources.append(source)
|
|
152
163
|
|
|
153
|
-
# Combine operations
|
|
154
|
-
merged.operations.extend(other.operations)
|
|
155
|
-
|
|
156
164
|
return merged
|
|
157
165
|
|
|
158
166
|
def get_licenses(self) -> List[str]:
|
|
@@ -175,16 +183,18 @@ class LineageMetadata:
|
|
|
175
183
|
Returns:
|
|
176
184
|
Dictionary containing lineage information.
|
|
177
185
|
"""
|
|
178
|
-
|
|
186
|
+
result: Dict[str, Any] = {
|
|
179
187
|
"sources": [
|
|
180
188
|
{
|
|
181
|
-
"name": src.name,
|
|
182
189
|
"slug": src.slug,
|
|
190
|
+
"name": src.name,
|
|
183
191
|
"location": src.location,
|
|
184
192
|
}
|
|
185
193
|
for src in self.sources
|
|
186
194
|
],
|
|
187
|
-
"operations": self.operations,
|
|
188
|
-
"created_at": self.created_at.isoformat(),
|
|
189
|
-
"licenses": self.get_licenses(),
|
|
190
195
|
}
|
|
196
|
+
if self.created_at is not None:
|
|
197
|
+
result["created_at"] = self.created_at.isoformat()
|
|
198
|
+
if self.content_hash is not None:
|
|
199
|
+
result["content_hash"] = self.content_hash
|
|
200
|
+
return result
|
|
@@ -1,22 +1,20 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sunstone-py
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.3
|
|
4
4
|
Summary: Python library for managing datasets with lineage tracking in Sunstone projects
|
|
5
5
|
Author-email: Sunstone Institute <stig@sunstone.institute>
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/sunstoneinstitute/sunstone-py
|
|
8
8
|
Project-URL: Documentation, https://sunstoneinstitute.github.io/sunstone-py/
|
|
9
9
|
Project-URL: Repository, https://github.com/sunstoneinstitute/sunstone-py
|
|
10
|
-
Classifier: Development Status ::
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
11
|
Classifier: Intended Audience :: Science/Research
|
|
12
12
|
Classifier: License :: OSI Approved :: MIT License
|
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
16
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
18
16
|
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
-
Requires-Python: >=3.
|
|
17
|
+
Requires-Python: >=3.12
|
|
20
18
|
Description-Content-Type: text/markdown
|
|
21
19
|
License-File: LICENSE
|
|
22
20
|
Requires-Dist: frictionless>=5.18.1
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
sunstone/__init__.py,sha256=LC0ZtmxP26eXPLKejbg7UStcHOnE_lwttNTL4m3F4yM,2032
|
|
2
|
+
sunstone/_release.py,sha256=MQNaUD7mSK6h8vu6EIgJuaMlAxuFxv82NQwHgBpLZm4,14907
|
|
3
|
+
sunstone/dataframe.py,sha256=rFGuMq-63Haua_QQfR3E708KYc1g43yEyCej11_Gl3A,20679
|
|
4
|
+
sunstone/datasets.py,sha256=V2psK5G2IwpxNFL_DdoVistIT8O53ASbJ0Y3nPDtEx4,21970
|
|
5
|
+
sunstone/exceptions.py,sha256=fiixXazur3LtQGy21bGEaSr356DObFcYxQJ3FvOxNec,623
|
|
6
|
+
sunstone/lineage.py,sha256=SRCpdsYDeAPTO2H-3ul8BP8AUihmhezcV8Ggwa0eTfs,5460
|
|
7
|
+
sunstone/pandas.py,sha256=CLEqIIgTbMmpH73TPy_vDUPxQa37Hpmqn4r6No8PJwo,8188
|
|
8
|
+
sunstone/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
sunstone/validation.py,sha256=1356vcUc72a1zGBUe9Xjrcb5h41Xo53PaK2nnQ_FuSM,8286
|
|
10
|
+
sunstone_py-0.5.3.dist-info/licenses/LICENSE,sha256=pB6VuR4QRjwjMjy8RSNGho-N1SUdu07ntIhT5lrhkzU,1078
|
|
11
|
+
sunstone_py-0.5.3.dist-info/METADATA,sha256=qwq_KyzHzGljeHFUUJwVEGJL1l-JrAxiB8RVS-8bqt4,9460
|
|
12
|
+
sunstone_py-0.5.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
+
sunstone_py-0.5.3.dist-info/entry_points.txt,sha256=0h6E88rH9a_503BAzXvFPR-UfmkrRFjcOf29DXgJNjk,51
|
|
14
|
+
sunstone_py-0.5.3.dist-info/top_level.txt,sha256=A2fW-7JO10rlx_L28Bc4FVvWt2R8kgvS8_TGPBhQp3c,9
|
|
15
|
+
sunstone_py-0.5.3.dist-info/RECORD,,
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
sunstone/__init__.py,sha256=LC0ZtmxP26eXPLKejbg7UStcHOnE_lwttNTL4m3F4yM,2032
|
|
2
|
-
sunstone/_release.py,sha256=_yjAl_vZQ_5IYr0ugPlqtmUvsGnyGDx7LyiI_2HToVM,14649
|
|
3
|
-
sunstone/dataframe.py,sha256=UJgQx7auiNb6hSIvhB8EQs2afu-7S22xdWL5DZUr29g,23602
|
|
4
|
-
sunstone/datasets.py,sha256=LdHk3Vkfc7QH2VxhSskRCm9wUFSkldCmgS_1c2KDAPA,21142
|
|
5
|
-
sunstone/exceptions.py,sha256=fiixXazur3LtQGy21bGEaSr356DObFcYxQJ3FvOxNec,623
|
|
6
|
-
sunstone/lineage.py,sha256=B9GKMu5-v8Izos5G40K_EvsCPJL3Z2Tg1T_Fc7ezSMI,5240
|
|
7
|
-
sunstone/pandas.py,sha256=CLEqIIgTbMmpH73TPy_vDUPxQa37Hpmqn4r6No8PJwo,8188
|
|
8
|
-
sunstone/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
sunstone/validation.py,sha256=1356vcUc72a1zGBUe9Xjrcb5h41Xo53PaK2nnQ_FuSM,8286
|
|
10
|
-
sunstone_py-0.5.1.dist-info/licenses/LICENSE,sha256=pB6VuR4QRjwjMjy8RSNGho-N1SUdu07ntIhT5lrhkzU,1078
|
|
11
|
-
sunstone_py-0.5.1.dist-info/METADATA,sha256=DMLR03NMB5_t14rsBo4GtqY0oQFHnKQtbdUGEfxFcq8,9563
|
|
12
|
-
sunstone_py-0.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
-
sunstone_py-0.5.1.dist-info/entry_points.txt,sha256=0h6E88rH9a_503BAzXvFPR-UfmkrRFjcOf29DXgJNjk,51
|
|
14
|
-
sunstone_py-0.5.1.dist-info/top_level.txt,sha256=A2fW-7JO10rlx_L28Bc4FVvWt2R8kgvS8_TGPBhQp3c,9
|
|
15
|
-
sunstone_py-0.5.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|