woolly 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,130 @@
1
+ """
2
+ Rust/crates.io language provider.
3
+
4
+ This provider fetches package information from crates.io and checks
5
+ Fedora repositories for Rust crate packages.
6
+ """
7
+
8
+ from typing import Optional
9
+
10
+ from woolly import http
11
+ from woolly.cache import DEFAULT_CACHE_TTL, read_cache, write_cache
12
+ from woolly.debug import (
13
+ log_api_request,
14
+ log_api_response,
15
+ log_cache_hit,
16
+ log_cache_miss,
17
+ )
18
+ from woolly.languages.base import Dependency, LanguageProvider, PackageInfo
19
+
20
+ CRATES_API = "https://crates.io/api/v1/crates"
21
+
22
+
23
+ class RustProvider(LanguageProvider):
24
+ """Provider for Rust crates via crates.io."""
25
+
26
+ name = "rust"
27
+ display_name = "Rust"
28
+ registry_name = "crates.io"
29
+ fedora_provides_prefix = "crate"
30
+ cache_namespace = "crates"
31
+
32
+ def fetch_package_info(self, package_name: str) -> Optional[PackageInfo]:
33
+ """Fetch crate information from crates.io."""
34
+ cache_key = f"info:{package_name}"
35
+ cached = read_cache(self.cache_namespace, cache_key, DEFAULT_CACHE_TTL)
36
+ if cached is not None:
37
+ log_cache_hit(self.cache_namespace, cache_key)
38
+ if cached is False: # Explicit "not found" cache
39
+ return None
40
+ return PackageInfo(
41
+ name=cached["crate"]["name"],
42
+ latest_version=cached["crate"]["newest_version"],
43
+ description=cached["crate"].get("description"),
44
+ homepage=cached["crate"].get("homepage"),
45
+ repository=cached["crate"].get("repository"),
46
+ )
47
+
48
+ log_cache_miss(self.cache_namespace, cache_key)
49
+ url = f"{CRATES_API}/{package_name}"
50
+ log_api_request("GET", url)
51
+ r = http.get(url)
52
+ log_api_response(r.status_code, r.text[:500] if r.text else None)
53
+
54
+ if r.status_code == 404:
55
+ write_cache(self.cache_namespace, cache_key, False)
56
+ return None
57
+ if r.status_code != 200:
58
+ raise RuntimeError(
59
+ f"Failed to fetch metadata for crate {package_name}: {r.status_code}"
60
+ )
61
+
62
+ data = r.json()
63
+ write_cache(self.cache_namespace, cache_key, data)
64
+
65
+ return PackageInfo(
66
+ name=data["crate"]["name"],
67
+ latest_version=data["crate"]["newest_version"],
68
+ description=data["crate"].get("description"),
69
+ homepage=data["crate"].get("homepage"),
70
+ repository=data["crate"].get("repository"),
71
+ )
72
+
73
+ def fetch_dependencies(self, package_name: str, version: str) -> list[Dependency]:
74
+ """Fetch dependencies for a specific crate version."""
75
+ cache_key = f"deps:{package_name}:{version}"
76
+ cached = read_cache(self.cache_namespace, cache_key, DEFAULT_CACHE_TTL)
77
+ if cached is not None:
78
+ log_cache_hit(self.cache_namespace, cache_key)
79
+ return [
80
+ Dependency(
81
+ name=d["crate_id"],
82
+ version_requirement=d["req"],
83
+ optional=d.get("optional", False),
84
+ kind=d.get("kind", "normal"),
85
+ )
86
+ for d in cached
87
+ ]
88
+
89
+ log_cache_miss(self.cache_namespace, cache_key)
90
+ url = f"{CRATES_API}/{package_name}/{version}/dependencies"
91
+ log_api_request("GET", url)
92
+ r = http.get(url)
93
+ log_api_response(r.status_code, r.text[:500] if r.text else None)
94
+
95
+ if r.status_code != 200:
96
+ write_cache(self.cache_namespace, cache_key, [])
97
+ return []
98
+
99
+ data = r.json()
100
+ deps = data.get("dependencies", [])
101
+ write_cache(self.cache_namespace, cache_key, deps)
102
+
103
+ return [
104
+ Dependency(
105
+ name=d["crate_id"],
106
+ version_requirement=d["req"],
107
+ optional=d.get("optional", False),
108
+ kind=d.get("kind", "normal"),
109
+ )
110
+ for d in deps
111
+ ]
112
+
113
+ def get_alternative_names(self, package_name: str) -> list[str]:
114
+ """
115
+ Get alternative names to try for crate lookup.
116
+
117
+ Rust crates can use either hyphens or underscores in names,
118
+ but they're treated as equivalent by Cargo.
119
+ """
120
+ alternatives = []
121
+
122
+ alt_underscore = package_name.replace("-", "_")
123
+ if alt_underscore != package_name:
124
+ alternatives.append(alt_underscore)
125
+
126
+ alt_hyphen = package_name.replace("_", "-")
127
+ if alt_hyphen != package_name:
128
+ alternatives.append(alt_hyphen)
129
+
130
+ return alternatives
woolly/progress.py ADDED
@@ -0,0 +1,69 @@
1
+ """
2
+ Progress tracking utilities for dependency analysis.
3
+ """
4
+
5
+ from rich.console import Console
6
+ from rich.progress import (
7
+ BarColumn,
8
+ Progress,
9
+ SpinnerColumn,
10
+ TaskID,
11
+ TaskProgressColumn,
12
+ TextColumn,
13
+ TimeElapsedColumn,
14
+ )
15
+
16
+
17
+ class ProgressTracker:
18
+ """Tracks progress of dependency tree analysis."""
19
+
20
+ def __init__(self, console: Console):
21
+ self.console = console
22
+ self.progress = Progress(
23
+ SpinnerColumn(),
24
+ TextColumn("[bold blue]{task.description}"),
25
+ BarColumn(bar_width=30),
26
+ TaskProgressColumn(),
27
+ TextColumn("•"),
28
+ TimeElapsedColumn(),
29
+ TextColumn("[dim]{task.fields[status]}[/dim]"),
30
+ console=console,
31
+ )
32
+ self.task: TaskID = TaskID(0)
33
+ self.processed = 0
34
+ self.total_discovered = 0
35
+
36
+ def start(self, description: str = "Analyzing dependencies") -> None:
37
+ """Start the progress tracker."""
38
+ self.task = self.progress.add_task(
39
+ description, total=None, status="starting..."
40
+ )
41
+ self.progress.start()
42
+
43
+ def stop(self) -> None:
44
+ """Stop the progress tracker."""
45
+ self.progress.stop()
46
+
47
+ def update(self, package_name: str, discovered: int = 0) -> None:
48
+ """Update progress with current package being checked."""
49
+ self.processed += 1
50
+ self.total_discovered += discovered
51
+
52
+ if self.total_discovered > 0:
53
+ self.progress.update(
54
+ self.task,
55
+ completed=self.processed,
56
+ total=self.processed + self.total_discovered,
57
+ status=f"checking: {package_name}",
58
+ )
59
+ else:
60
+ self.progress.update(self.task, status=f"checking: {package_name}")
61
+
62
+ def finish(self) -> None:
63
+ """Mark progress as complete."""
64
+ self.progress.update(
65
+ self.task,
66
+ completed=self.processed,
67
+ total=self.processed,
68
+ status="[green]complete![/green]",
69
+ )
@@ -0,0 +1,111 @@
1
+ """
2
+ Report generators registry.
3
+
4
+ This module provides automatic discovery and registration of report generators.
5
+ To add a new format, create a module in this directory that defines a class
6
+ inheriting from Reporter and add it to REPORTERS dict.
7
+ """
8
+
9
+ from typing import Optional
10
+
11
+ from pydantic import BaseModel, Field
12
+ from rich.console import Console
13
+
14
+ from woolly.reporters.base import ReportData, Reporter, strip_markup
15
+ from woolly.reporters.json import JsonReporter
16
+ from woolly.reporters.markdown import MarkdownReporter
17
+ from woolly.reporters.stdout import StdoutReporter
18
+
19
+
20
+ class ReporterInfo(BaseModel):
21
+ """Information about an available reporter."""
22
+
23
+ format_id: str
24
+ description: str
25
+ aliases: list[str] = Field(default_factory=list)
26
+
27
+
28
+ # Registry of available reporters
29
+ # Key: format identifier (used in CLI)
30
+ # Value: Reporter class
31
+ REPORTERS: dict[str, type[Reporter]] = {
32
+ "stdout": StdoutReporter,
33
+ "markdown": MarkdownReporter,
34
+ "json": JsonReporter,
35
+ }
36
+
37
+ # Aliases for convenience
38
+ ALIASES: dict[str, str] = {
39
+ "md": "markdown",
40
+ "console": "stdout",
41
+ "terminal": "stdout",
42
+ }
43
+
44
+
45
+ def get_reporter(
46
+ format_name: str, console: Optional[Console] = None
47
+ ) -> Optional[Reporter]:
48
+ """
49
+ Get an instantiated reporter for the specified format.
50
+
51
+ Args:
52
+ format_name: Format identifier or alias (e.g., "json", "markdown", "md")
53
+ console: Console instance for stdout reporter.
54
+
55
+ Returns:
56
+ Instantiated Reporter, or None if not found.
57
+ """
58
+ # Resolve aliases
59
+ format_name = format_name.lower()
60
+ if format_name in ALIASES:
61
+ format_name = ALIASES[format_name]
62
+
63
+ reporter_class = REPORTERS.get(format_name)
64
+ if reporter_class is None:
65
+ return None
66
+
67
+ # StdoutReporter needs a console
68
+ if format_name == "stdout" and console:
69
+ return StdoutReporter(console=console)
70
+
71
+ return reporter_class()
72
+
73
+
74
+ def list_reporters() -> list[ReporterInfo]:
75
+ """
76
+ List all available reporters.
77
+
78
+ Returns:
79
+ List of ReporterInfo objects with format details.
80
+ """
81
+ result = []
82
+ for format_id, reporter_class in REPORTERS.items():
83
+ # Find aliases for this format
84
+ aliases = [alias for alias, target in ALIASES.items() if target == format_id]
85
+ result.append(
86
+ ReporterInfo(
87
+ format_id=format_id,
88
+ description=reporter_class.description,
89
+ aliases=aliases,
90
+ )
91
+ )
92
+ return result
93
+
94
+
95
+ def get_available_formats() -> list[str]:
96
+ """Get list of available format identifiers."""
97
+ return list(REPORTERS.keys())
98
+
99
+
100
+ __all__ = [
101
+ "Reporter",
102
+ "ReportData",
103
+ "ReporterInfo",
104
+ "StdoutReporter",
105
+ "MarkdownReporter",
106
+ "JsonReporter",
107
+ "get_reporter",
108
+ "list_reporters",
109
+ "get_available_formats",
110
+ "strip_markup",
111
+ ]
@@ -0,0 +1,213 @@
1
+ """
2
+ Base abstract class defining the contract for report generators.
3
+
4
+ To add support for a new report format, create a new module in the `reporters/` directory
5
+ that implements a class inheriting from `Reporter`. The class must implement
6
+ all abstract methods defined here.
7
+
8
+ Example:
9
+ class HtmlReporter(Reporter):
10
+ name = "html"
11
+ description = "HTML report with interactive tree"
12
+ file_extension = "html"
13
+
14
+ def generate(self, data: ReportData) -> str:
15
+ # Generate HTML content
16
+ ...
17
+ """
18
+
19
+ import re
20
+ from abc import ABC, abstractmethod
21
+ from datetime import datetime
22
+ from functools import cached_property
23
+ from pathlib import Path
24
+ from typing import Any, Optional
25
+
26
+ from pydantic import BaseModel, ConfigDict, Field
27
+
28
+
29
+ def strip_markup(text: str) -> str:
30
+ """
31
+ Strip Rich markup from text.
32
+
33
+ Removes Rich markup tags like [bold], [/bold], [green], etc.
34
+
35
+ Args:
36
+ text: Text containing Rich markup.
37
+
38
+ Returns:
39
+ Plain text without markup.
40
+ """
41
+ return re.sub(r"\[/?[^\]]+\]", "", text)
42
+
43
+
44
+ class ReportData(BaseModel):
45
+ """All data needed to generate a report."""
46
+
47
+ model_config = ConfigDict(arbitrary_types_allowed=True)
48
+
49
+ # Package info
50
+ root_package: str
51
+ language: str
52
+ registry: str
53
+
54
+ # Statistics
55
+ total_dependencies: int
56
+ packaged_count: int
57
+ missing_count: int
58
+ missing_packages: list[str] = Field(default_factory=list)
59
+ packaged_packages: list[str] = Field(default_factory=list)
60
+
61
+ # Optional dependency statistics
62
+ include_optional: bool = False
63
+ optional_total: int = 0
64
+ optional_packaged: int = 0
65
+ optional_missing: int = 0
66
+ optional_missing_packages: list[str] = Field(default_factory=list)
67
+
68
+ # Full tree for detailed reports
69
+ tree: Any # Rich Tree object - not JSON serializable
70
+
71
+ # Metadata
72
+ timestamp: datetime = Field(default_factory=datetime.now)
73
+ max_depth: int = 50
74
+ version: Optional[str] = None
75
+
76
+ @cached_property
77
+ def required_missing_packages(self) -> set[str]:
78
+ """Get the set of required (non-optional) missing packages."""
79
+ return set(self.missing_packages) - set(self.optional_missing_packages)
80
+
81
+ @cached_property
82
+ def optional_missing_set(self) -> set[str]:
83
+ """Get the set of optional missing packages."""
84
+ return set(self.optional_missing_packages)
85
+
86
+ @cached_property
87
+ def unique_packaged_packages(self) -> set[str]:
88
+ """Get the unique set of packaged packages."""
89
+ return set(self.packaged_packages)
90
+
91
+
92
+ class Reporter(ABC):
93
+ """
94
+ Abstract base class for report generators.
95
+
96
+ Each report format (stdout, markdown, json, etc.) must implement this interface.
97
+
98
+ Attributes:
99
+ name: Short identifier for the format (e.g., "json", "markdown")
100
+ description: Human-readable description
101
+ file_extension: File extension for output files (None for stdout)
102
+ writes_to_file: Whether this reporter writes to a file
103
+ """
104
+
105
+ # Class attributes that must be defined by subclasses
106
+ name: str
107
+ description: str
108
+ file_extension: Optional[str] = None
109
+ writes_to_file: bool = False
110
+
111
+ @abstractmethod
112
+ def generate(self, data: ReportData) -> str:
113
+ """
114
+ Generate the report content.
115
+
116
+ Args:
117
+ data: Report data containing all information.
118
+
119
+ Returns:
120
+ Report content as a string.
121
+ """
122
+ pass
123
+
124
+ def get_output_filename(self, data: ReportData) -> str:
125
+ """
126
+ Get the output filename for file-based reporters.
127
+
128
+ Args:
129
+ data: Report data.
130
+
131
+ Returns:
132
+ Filename string.
133
+ """
134
+ timestamp = data.timestamp.strftime("%Y%m%d_%H%M%S")
135
+ return f"woolly_{data.root_package}_{timestamp}.{self.file_extension}"
136
+
137
+ def write_report(
138
+ self, data: ReportData, output_dir: Optional[Path] = None
139
+ ) -> Optional[Path]:
140
+ """
141
+ Write the report to a file.
142
+
143
+ Args:
144
+ data: Report data.
145
+ output_dir: Directory to write to. Defaults to current directory.
146
+
147
+ Returns:
148
+ Path to the written file, or None for stdout reporters.
149
+ """
150
+ if not self.writes_to_file:
151
+ return None
152
+
153
+ content = self.generate(data)
154
+ output_dir = output_dir or Path.cwd()
155
+ output_path = output_dir / self.get_output_filename(data)
156
+ output_path.write_text(content)
157
+ return output_path
158
+
159
+ # ----------------------------------------------------------------
160
+ # Shared tree traversal utilities for subclasses
161
+ # ----------------------------------------------------------------
162
+
163
+ def _get_label(self, node) -> str:
164
+ """
165
+ Extract the label text from a tree node, handling nested Trees.
166
+
167
+ Args:
168
+ node: A Rich Tree node or string.
169
+
170
+ Returns:
171
+ The label text as a string.
172
+ """
173
+ # If it's a string, return it directly
174
+ if isinstance(node, str):
175
+ return node
176
+
177
+ # Try to get label attribute (Rich Tree has this)
178
+ if hasattr(node, "label"):
179
+ label = node.label
180
+ # If label is None, return empty string
181
+ if label is None:
182
+ return ""
183
+ # If label is another Tree-like object (has its own label), recurse
184
+ if hasattr(label, "label"):
185
+ return self._get_label(label)
186
+ # Otherwise convert to string
187
+ return str(label)
188
+
189
+ # Fallback - shouldn't happen
190
+ return str(node)
191
+
192
+ def _get_children(self, node) -> list:
193
+ """
194
+ Get all children from a tree node, flattening nested Trees.
195
+
196
+ Args:
197
+ node: A Rich Tree node.
198
+
199
+ Returns:
200
+ List of child nodes.
201
+ """
202
+ children = []
203
+
204
+ if hasattr(node, "children"):
205
+ for child in node.children:
206
+ # If the child's label is itself a Tree, use that Tree's children
207
+ if hasattr(child, "label") and hasattr(child.label, "children"):
208
+ # The child is a wrapper around another tree
209
+ children.append(child.label)
210
+ else:
211
+ children.append(child)
212
+
213
+ return children
@@ -0,0 +1,175 @@
1
+ """
2
+ JSON report generator.
3
+
4
+ Generates a JSON file with structured data for machine consumption.
5
+ """
6
+
7
+ import re
8
+ from typing import Optional
9
+
10
+ from pydantic import BaseModel, Field
11
+
12
+ from woolly.reporters.base import ReportData, Reporter, strip_markup
13
+
14
+
15
+ class TreeNodeData(BaseModel):
16
+ """Structured data for a single node in the dependency tree."""
17
+
18
+ raw: str
19
+ name: Optional[str] = None
20
+ version: Optional[str] = None
21
+ optional: bool = False
22
+ status: Optional[str] = None
23
+ is_packaged: Optional[bool] = None
24
+ fedora_versions: list[str] = Field(default_factory=list)
25
+ fedora_packages: list[str] = Field(default_factory=list)
26
+ dependencies: list["TreeNodeData"] = Field(default_factory=list)
27
+
28
+
29
+ class ReportMetadata(BaseModel):
30
+ """Metadata for the JSON report."""
31
+
32
+ generated_at: str
33
+ tool: str = "woolly"
34
+ root_package: str
35
+ language: str
36
+ registry: str
37
+ version: Optional[str] = None
38
+ max_depth: int
39
+ include_optional: bool
40
+
41
+
42
+ class ReportSummary(BaseModel):
43
+ """Summary statistics for the JSON report."""
44
+
45
+ total_dependencies: int
46
+ packaged_count: int
47
+ missing_count: int
48
+ optional: "OptionalSummary"
49
+
50
+
51
+ class OptionalSummary(BaseModel):
52
+ """Optional dependency statistics."""
53
+
54
+ total: int
55
+ packaged: int
56
+ missing: int
57
+
58
+
59
+ class JsonReport(BaseModel):
60
+ """Complete JSON report structure."""
61
+
62
+ metadata: ReportMetadata
63
+ summary: ReportSummary
64
+ missing_packages: list[str]
65
+ missing_optional_packages: list[str]
66
+ packaged_packages: list[str]
67
+ dependency_tree: TreeNodeData
68
+
69
+
70
+ class JsonReporter(Reporter):
71
+ """Reporter that generates a JSON file."""
72
+
73
+ name = "json"
74
+ description = "JSON report file (machine-readable)"
75
+ file_extension = "json"
76
+ writes_to_file = True
77
+
78
+ def generate(self, data: ReportData) -> str:
79
+ """Generate JSON report content."""
80
+ report = JsonReport(
81
+ metadata=ReportMetadata(
82
+ generated_at=data.timestamp.isoformat(),
83
+ root_package=data.root_package,
84
+ language=data.language,
85
+ registry=data.registry,
86
+ version=data.version,
87
+ max_depth=data.max_depth,
88
+ include_optional=data.include_optional,
89
+ ),
90
+ summary=ReportSummary(
91
+ total_dependencies=data.total_dependencies,
92
+ packaged_count=data.packaged_count,
93
+ missing_count=data.missing_count,
94
+ optional=OptionalSummary(
95
+ total=data.optional_total,
96
+ packaged=data.optional_packaged,
97
+ missing=data.optional_missing,
98
+ ),
99
+ ),
100
+ missing_packages=sorted(data.required_missing_packages),
101
+ missing_optional_packages=sorted(data.optional_missing_set),
102
+ packaged_packages=sorted(data.unique_packaged_packages),
103
+ dependency_tree=self._tree_to_model(data.tree),
104
+ )
105
+
106
+ return report.model_dump_json(indent=2)
107
+
108
+ def _tree_to_model(self, tree) -> TreeNodeData:
109
+ """Convert Rich Tree to TreeNodeData model."""
110
+ label = self._get_label(tree)
111
+ node_data = self._parse_label(label)
112
+
113
+ # Get children using inherited method
114
+ children = self._get_children(tree)
115
+
116
+ if children:
117
+ node_data.dependencies = [self._tree_to_model(child) for child in children]
118
+
119
+ return node_data
120
+
121
+ def _parse_label(self, label: str) -> TreeNodeData:
122
+ """Parse a tree label into structured TreeNodeData."""
123
+ # Strip Rich markup using shared utility
124
+ clean_label = strip_markup(label)
125
+
126
+ node = TreeNodeData(raw=clean_label.strip())
127
+
128
+ # Check if this is an optional dependency
129
+ node.optional = "(optional)" in clean_label
130
+
131
+ # Try to extract package name and version
132
+ # Pattern: "package_name vX.Y.Z (optional) • status" or "package_name vX.Y.Z • status"
133
+ match = re.match(
134
+ r"^(\S+)\s*(?:v([\d.]+))?\s*(?:\(optional\))?\s*•\s*(.+)$",
135
+ clean_label.strip(),
136
+ )
137
+ if match:
138
+ node.name = match.group(1)
139
+ if match.group(2):
140
+ node.version = match.group(2)
141
+
142
+ status_text = match.group(3).strip()
143
+ if (
144
+ "packaged" in status_text.lower()
145
+ and "not packaged" not in status_text.lower()
146
+ ):
147
+ node.status = "packaged"
148
+ # Try to extract Fedora versions
149
+ ver_match = re.search(r"\(([\d., ]+)\)", status_text)
150
+ if ver_match:
151
+ node.fedora_versions = [
152
+ v.strip() for v in ver_match.group(1).split(",")
153
+ ]
154
+ # Try to extract package names
155
+ pkg_match = re.search(r"\[([^\]]+)\]", status_text)
156
+ if pkg_match:
157
+ node.fedora_packages = [
158
+ p.strip() for p in pkg_match.group(1).split(",")
159
+ ]
160
+ elif "not packaged" in status_text.lower():
161
+ node.status = "not_packaged"
162
+ elif "not found" in status_text.lower():
163
+ node.status = "not_found"
164
+ elif "already visited" in status_text.lower():
165
+ node.status = "visited"
166
+ node.is_packaged = "✓" in status_text
167
+ else:
168
+ # Simpler patterns
169
+ if "already visited" in clean_label:
170
+ node.status = "visited"
171
+ node.is_packaged = "✓" in clean_label
172
+ elif "max depth" in clean_label:
173
+ node.status = "max_depth_reached"
174
+
175
+ return node