speedy-utils 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,215 @@
1
+ import inspect
2
+ import os
3
+ import time
4
+
5
+ from loguru import logger
6
+ from tabulate import tabulate
7
+
8
+ __all__ = ["Clock", "timef"]
9
+
10
+
11
+ def timef(func):
12
+ "Decorator to print the execution time of a function"
13
+
14
+ def wrapper(*args, **kwargs):
15
+ start_time = time.time()
16
+ result = func(*args, **kwargs)
17
+ end_time = time.time()
18
+ execution_time = end_time - start_time
19
+ logger.opt(depth=2).info(
20
+ f"{func.__name__} took {execution_time:0.2f} seconds to execute."
21
+ )
22
+ return result
23
+
24
+ return wrapper
25
+
26
+
27
+ class Clock:
28
+ """
29
+ A simple timer utility to measure and log time intervals.
30
+
31
+ Usage:
32
+
33
+ 1. Creating and starting the timer:
34
+ timer = Timer(start_now=True)
35
+ # or
36
+ timer = Timer(start_now=False)
37
+ timer.start()
38
+
39
+ 2. Measure time since the timer started:
40
+ elapsed_time = timer.elapsed_time()
41
+
42
+ 3. Log the time elapsed since the timer started:
43
+ timer.log_elapsed_time()
44
+ # or use a custom logger
45
+ timer.log_elapsed_time(custom_logger=my_custom_logger)
46
+
47
+ 4. Measure time since the last checkpoint:
48
+ time_since_last_checkpoint = timer.time_since_last_checkpoint()
49
+
50
+ 5. Update a named task in the internal task time table:
51
+ timer.update_task("task_name")
52
+
53
+ 6. Print the task time table every 'interval' seconds:
54
+ timer.print_task_table(interval=1)
55
+ """
56
+
57
+ def __init__(self, start_now=True):
58
+ """Initialize the timer and optionally start it immediately."""
59
+ self.start_time = None
60
+ self.task_times = {}
61
+ self.last_checkpoint = None
62
+ if start_now:
63
+ self.start()
64
+ self.print_counter = 0
65
+ self.last_print_time = time.time()
66
+ self.min_depth = float("inf")
67
+
68
+ def start(self):
69
+ """Start the timer or reset if already started."""
70
+ if self.start_time is not None:
71
+ raise ValueError("Timer has already been started.")
72
+ self.start_time = time.time()
73
+ self.last_checkpoint = self.start_time
74
+ # logger.opt(depth=2).info(f"Timer started. {id(self)=}")
75
+
76
+ def elapsed_time(self):
77
+ """Return the time elapsed since the timer started."""
78
+ if self.start_time is None:
79
+ raise ValueError("Timer has not been started.")
80
+ return time.time() - self.start_time
81
+
82
+ def log_elapsed_time(self, custom_logger=None):
83
+ """Log the time elapsed since the timer started."""
84
+ msg = f"Time elapsed: {self.elapsed_time():.2f} seconds."
85
+ if custom_logger:
86
+ custom_logger(msg)
87
+ else:
88
+ logger.opt(depth=2).info(msg)
89
+
90
+ def _tick(self):
91
+ """Return the time elapsed since the last checkpoint and update the last checkpoint."""
92
+ # assert self.start_time is not None, f"Timer has not been started. {id(self)=}"
93
+ if not self.start_time:
94
+ logger.opt(depth=2).warning(
95
+ "Timer has not been started. Please call start() before using this method."
96
+ )
97
+ return
98
+ current_time = time.time()
99
+ if self.last_checkpoint is None:
100
+ logger.opt(depth=2).warning(
101
+ "Last checkpoint is not set. Please call start() before using this method."
102
+ )
103
+ return
104
+ elapsed = current_time - self.last_checkpoint
105
+ self.last_checkpoint = current_time
106
+ return elapsed
107
+
108
+ def tick(self):
109
+ return self._tick()
110
+
111
+ def time_since_last_checkpoint(self):
112
+ """Return the time elapsed since the last checkpoint."""
113
+ if self.start_time is None:
114
+ # raise ValueError("Timer has not been started.")
115
+ logger.opt(depth=2).warning(
116
+ "Timer has not been started. Please call start() before using this method."
117
+ )
118
+ return
119
+ if self.last_checkpoint is None:
120
+ logger.opt(depth=2).warning(
121
+ "Last checkpoint is not set. Please call start() before using this method."
122
+ )
123
+ return
124
+ return time.time() - self.last_checkpoint
125
+
126
+ def update_task(self, task_name):
127
+ """Update the elapsed time for the specified task, including file, line, and call depth."""
128
+
129
+ # Get the full call stack
130
+ stack = inspect.stack()
131
+
132
+ # Get the file and line number of the caller (the previous frame in the stack)
133
+ caller_frame = stack[1]
134
+ file_lineno = f"{os.path.basename(caller_frame.filename)}:{caller_frame.lineno}"
135
+
136
+ # Calculate the depth of the current call (i.e., how far it is in the stack)
137
+ call_depth = (
138
+ len(stack) - 1
139
+ ) # Subtract 1 to exclude the current frame from the depth count
140
+ if call_depth < self.min_depth:
141
+ self.min_depth = call_depth
142
+
143
+ # Update the task time in the internal task table
144
+ if task_name not in self.task_times:
145
+ self.task_times[task_name] = {
146
+ "time": 0,
147
+ "file_lineno": file_lineno,
148
+ "depth": call_depth,
149
+ }
150
+ self.task_times[task_name]["time"] += self.tick()
151
+
152
+ def get_percentage_color(self, percentage):
153
+ """Return ANSI color code based on percentage."""
154
+ if percentage >= 75:
155
+ return "\033[91m" # Red
156
+ elif percentage >= 50:
157
+ return "\033[93m" # Yellow
158
+ elif percentage >= 25:
159
+ return "\033[92m" # Green
160
+ else:
161
+ return "\033[94m" # Blue
162
+
163
+ def print_task_table(self, interval=1, max_depth=None):
164
+ """Print the task time table at regular intervals."""
165
+ current_time = time.time()
166
+
167
+ if current_time - self.last_print_time > interval:
168
+ self.print_counter += 1
169
+ total_time = (
170
+ sum(data["time"] for data in self.task_times.values()) or 1
171
+ ) # Avoid division by zero
172
+
173
+ # Prepare data for the table
174
+ table_data = []
175
+ for task_name, data in self.task_times.items():
176
+ time_spent = data["time"]
177
+ file_lineno = data["file_lineno"]
178
+ depth = data["depth"] - self.min_depth
179
+ if max_depth is not None and depth > max_depth:
180
+ continue
181
+ percentage = (time_spent / total_time) * 100
182
+
183
+ # Get color code based on percentage
184
+ color_code = self.get_percentage_color(percentage)
185
+ percentage_str = f"{percentage:.2f} %"
186
+ colored_percentage = f"{color_code}{percentage_str}\033[0m"
187
+
188
+ table_data.append(
189
+ [
190
+ task_name,
191
+ file_lineno,
192
+ # depth,
193
+ f"{time_spent:.2f} s",
194
+ colored_percentage,
195
+ ]
196
+ )
197
+
198
+ # Add headers and log using tabulate
199
+ table = tabulate(
200
+ table_data,
201
+ headers=["Task", "File:Line", "Time (s)", "Percentage (%)"],
202
+ tablefmt="grid",
203
+ )
204
+
205
+ self.last_print_time = current_time
206
+ # total_time_str = f"\nTotal time elapsed: {total_time:.2f} seconds."
207
+ logger.opt(depth=2).info(f"\n{table}")
208
+
209
+
210
+ # Example of how to instantiate the Timer
211
+ speedy_timer = Clock(start_now=False)
212
+
213
+
214
+ # Clock, speedy_timer, timef
215
+ __all__ = ["Clock", "speedy_timer", "timef"]
@@ -0,0 +1,66 @@
1
+ import functools
2
+ import time
3
+ import traceback
4
+ from collections.abc import Callable
5
+ from typing import Any, Tuple, Type, Union
6
+
7
+ from loguru import logger
8
+
9
+
10
+ def retry_runtime(
11
+ sleep_seconds: int = 5,
12
+ max_retry: int = 12,
13
+ exceptions: type[Exception] | tuple[type[Exception], ...] = (RuntimeError,),
14
+ ) -> Callable:
15
+ """Decorator that retries the function with exponential backoff on specified runtime exceptions.
16
+
17
+ Args:
18
+ sleep_seconds (int): Initial sleep time between retries in seconds
19
+ max_retry (int): Maximum number of retry attempts
20
+ exceptions (Union[Type[Exception], Tuple[Type[Exception], ...]]): Exception types to retry on
21
+
22
+ Returns:
23
+ Callable: Decorated function
24
+ """
25
+
26
+ def decorator(func: Callable) -> Callable:
27
+ @functools.wraps(func)
28
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
29
+ last_exception = None
30
+
31
+ for attempt in range(1, max_retry + 1):
32
+ try:
33
+ return func(*args, **kwargs)
34
+
35
+ except (SyntaxError, NameError, ImportError, TypeError) as e:
36
+ # Don't retry on syntax/compilation errors
37
+ logger.opt(depth=1).error(
38
+ f"Critical error in {func.__name__}: {str(e)}\n{traceback.format_exc()}"
39
+ )
40
+ raise
41
+
42
+ except exceptions as e:
43
+ last_exception = e
44
+ if attempt == max_retry:
45
+ logger.opt(depth=1).error(
46
+ f"Function {func.__name__} failed after {max_retry} retries: {str(e)}"
47
+ )
48
+ raise
49
+
50
+ backoff_time = sleep_seconds * (
51
+ 2 ** (attempt - 1)
52
+ ) # Exponential backoff
53
+ logger.opt(depth=1).warning(
54
+ f"Attempt {attempt}/{max_retry} failed: {str(e)[:100]}. "
55
+ f"Retrying in {backoff_time} seconds."
56
+ )
57
+ time.sleep(backoff_time)
58
+
59
+ return None # This line should never be reached
60
+
61
+ return wrapper
62
+
63
+ return decorator
64
+
65
+
66
+ __all__ = ["retry_runtime"]
@@ -0,0 +1,207 @@
1
+ # utils/utils_print.py
2
+
3
+ import inspect
4
+ import re
5
+ import sys
6
+ import time
7
+ from collections import OrderedDict
8
+ from typing import Annotated, Literal, Optional
9
+
10
+ from loguru import logger
11
+
12
+
13
+ # A subclass of OrderedDict to automatically evict the oldest item after max_size is exceeded
14
+ class _RateLimitCache(OrderedDict):
15
+ def __init__(self, max_size: int, *args, **kwargs):
16
+ super().__init__(*args, **kwargs)
17
+ self.max_size = max_size
18
+
19
+ def __setitem__(self, key, value):
20
+ # If the key already exists, move it to the end (so it's considered "newer")
21
+ if key in self:
22
+ self.move_to_end(key)
23
+ # Use normal __setitem__
24
+ super().__setitem__(key, value)
25
+ # Evict the oldest if we're over capacity
26
+ if len(self) > self.max_size:
27
+ self.popitem(last=False) # pop the *first* item
28
+
29
+
30
+ # Create a global rate-limit cache with, say, 2,000 distinct entries max
31
+ _last_log_times = _RateLimitCache(max_size=2000)
32
+
33
+
34
+ def setup_logger(
35
+ level: Annotated[
36
+ Literal[
37
+ "Trace",
38
+ "Debug",
39
+ "Info",
40
+ "Success",
41
+ "Warning",
42
+ "Error",
43
+ "Critical",
44
+ "Disable",
45
+ "T",
46
+ "D",
47
+ "I",
48
+ "S",
49
+ "W",
50
+ "E",
51
+ "C",
52
+ ],
53
+ "The desired log level",
54
+ ] = "Info",
55
+ enable_grep: Annotated[str, "Comma-separated patterns for enabling logs"] = "",
56
+ disable_grep: Annotated[str, "Comma-separated patterns for disabling logs"] = "",
57
+ min_interval: float = -1,
58
+ max_cache_entries: int = 2000,
59
+ ) -> None:
60
+ """
61
+ Setup the logger with a rate-limiting feature:
62
+ - No more than 1 log from the same file:line within `min_interval` seconds.
63
+ - Track up to `max_cache_entries` distinct file:line pairs in memory.
64
+ """
65
+ # Update the cache size if desired
66
+ _last_log_times.max_size = max_cache_entries
67
+
68
+ # Map the shorthand level to the full name
69
+ level_mapping = {
70
+ "T": "TRACE",
71
+ "D": "DEBUG",
72
+ "I": "INFO",
73
+ "S": "SUCCESS",
74
+ "W": "WARNING",
75
+ "E": "ERROR",
76
+ "C": "CRITICAL",
77
+ }
78
+ level_str = level_mapping.get(level.upper(), level.upper())
79
+
80
+ # Set the log level
81
+ logger.level(level_str)
82
+
83
+ # Remove any existing handlers to avoid duplication
84
+ logger.remove()
85
+
86
+ # Prepare grep patterns
87
+ enable_patterns = [p.strip() for p in enable_grep.split(",") if p.strip()]
88
+ disable_patterns = [p.strip() for p in disable_grep.split(",") if p.strip()]
89
+
90
+ def log_filter(record):
91
+ """
92
+ 1. Filters out messages below the specified log level.
93
+ 2. Applies 'enable'/'disable' grep filters.
94
+ 3. Rate-limits same file:line messages if they occur within `min_interval` seconds.
95
+ 4. Enforces a max size on the (file:line) dictionary.
96
+ """
97
+ # ---------- 1) Log-level check ----------
98
+ if record["level"].no < logger.level(level_str).no:
99
+ return False
100
+
101
+ # ---------- 2) Grep pattern handling ----------
102
+ log_message = f"{record['file']}:{record['line']} ({record['function']})"
103
+ if enable_patterns and not any(
104
+ re.search(p, log_message) for p in enable_patterns
105
+ ):
106
+ return False
107
+ if disable_patterns and any(
108
+ re.search(p, log_message) for p in disable_patterns
109
+ ):
110
+ return False
111
+
112
+ # ---------- 3) Rate limiting by file:line ----------
113
+ file_line_key = f"{record['file']}:{record['line']}"
114
+ now = time.time()
115
+
116
+ last_time = _last_log_times.get(file_line_key)
117
+ if last_time is not None and min_interval > 0:
118
+ try:
119
+ if now - last_time < min_interval:
120
+ return False # Skip logging within min_interval
121
+ except TypeError:
122
+ # Handle case in tests where last_time might be a mock
123
+ pass
124
+
125
+ # Update the cache with new time (will also handle size eviction)
126
+ _last_log_times[file_line_key] = now
127
+ return True
128
+
129
+ # Add the handler
130
+ logger.add(
131
+ sys.stdout,
132
+ colorize=True,
133
+ format=(
134
+ "<green>{time:HH:mm:ss}</green> | "
135
+ "<level>{level: <8}</level> | "
136
+ "<cyan>{file}:{line} ({function})</cyan> - <level>{message}</level>"
137
+ ),
138
+ filter=log_filter,
139
+ )
140
+
141
+ # ---------- 4) Handle "DISABLE" level ----------
142
+ if level_str.upper() == "DISABLE":
143
+ logger.disable("")
144
+ logger.info("Logging disabled")
145
+ else:
146
+ logger.enable("")
147
+ logger.debug(f"Logging set to {level_str}")
148
+
149
+
150
+ _logged_once_set = set()
151
+ _last_log_intervals = {}
152
+
153
+
154
+ def _get_call_site_id(depth=2) -> str:
155
+ """
156
+ Generate a unique identifier for the call site based on filename and line number.
157
+ Adjusts for test environment where frame information may change.
158
+ """
159
+ frame = inspect.stack()[depth]
160
+ # Use a stable identifier in test environment to handle mocking
161
+ return f"{frame.filename}:{frame.lineno}"
162
+
163
+
164
+ def log(
165
+ msg: str,
166
+ *,
167
+ level: Literal["info", "warning", "error", "critical", "success"] = "info",
168
+ once: bool = False,
169
+ interval: float | None = None,
170
+ ) -> None:
171
+ """
172
+ Log a message using loguru with optional `once` and `interval` control.
173
+
174
+ Args:
175
+ msg (str): The log message.
176
+ level (str): Log level (e.g., "info", "warning").
177
+ once (bool): If True, log only once per call site.
178
+ interval (float): If set, log only once every `interval` seconds per call site.
179
+ """
180
+ identifier = _get_call_site_id(depth=2)
181
+
182
+ # Handle once parameter - check before logging
183
+ if once and identifier in _logged_once_set:
184
+ return
185
+
186
+ # Handle interval parameter - check before logging
187
+ if interval is not None:
188
+ now = time.time()
189
+ last = _last_log_intervals.get(identifier)
190
+ if last is not None:
191
+ try:
192
+ if now - last < interval:
193
+ return
194
+ except TypeError:
195
+ # Handle case in tests where last might be a mock
196
+ pass
197
+
198
+ # Log the message
199
+ fn = getattr(logger.opt(depth=1), level)
200
+ fn(msg)
201
+
202
+ # Update rate-limiting caches after successful logging
203
+ if once:
204
+ _logged_once_set.add(identifier)
205
+
206
+ if interval is not None:
207
+ _last_log_intervals[identifier] = time.time()
@@ -0,0 +1,112 @@
1
+ import os
2
+ from collections import defaultdict
3
+ from datetime import datetime
4
+
5
+ from fastcore.all import threaded
6
+
7
+
8
+ class ReportManager:
9
+ def __init__(self):
10
+ self.cache_dir = os.path.expanduser("~/.cache/speedy_utils")
11
+ os.makedirs(self.cache_dir, exist_ok=True)
12
+
13
+ def save_report(self, errors, results, execution_time=None, metadata=None):
14
+ report_path = os.path.join(
15
+ self.cache_dir, f"report_{datetime.now().strftime('%m%d_%H%M')}.md"
16
+ )
17
+ os.makedirs(os.path.dirname(report_path), exist_ok=True)
18
+
19
+ # Group errors by error type
20
+ error_groups = defaultdict(list)
21
+ for err in errors[:10]:
22
+ error_type = err["error"].__class__.__name__
23
+ error_groups[error_type].append(err)
24
+
25
+ md_content = [
26
+ "# Multi-thread Execution Report",
27
+ f"\n## Summary (Generated at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')})",
28
+ ]
29
+
30
+ if metadata:
31
+ md_content.extend(
32
+ [
33
+ "\n### Execution Configuration",
34
+ f"- Mode: {metadata['mode']}",
35
+ f"- Workers: {metadata['max_workers']}",
36
+ f"- Execution type: {metadata['execution_mode']}",
37
+ f"- Total inputs: {metadata['total_inputs']}",
38
+ ]
39
+ )
40
+
41
+ md_content.extend(
42
+ [
43
+ f"\n### Results Overview",
44
+ f"- Total items processed: {len(results)}",
45
+ f"- Success rate: {(len(results) - len(errors))/len(results)*100:.1f}%",
46
+ f"- Total errors: {len(errors)}",
47
+ ]
48
+ )
49
+
50
+ if execution_time:
51
+ md_content.append(f"- Execution time: {execution_time:.2f}s")
52
+ md_content.append(
53
+ f"- Average speed: {len(results)/execution_time:.1f} items/second"
54
+ )
55
+
56
+ if error_groups:
57
+ md_content.extend(
58
+ ["\n## Errors by Type", "Click headers to expand error details."]
59
+ )
60
+
61
+ for error_type, errs in error_groups.items():
62
+ md_content.extend(
63
+ [
64
+ f"\n<details>",
65
+ f"<summary><b>{error_type}</b> ({len(errs)} occurrences)</summary>\n",
66
+ "| Index | Input | Error Message |",
67
+ "|-------|-------|---------------|",
68
+ ]
69
+ )
70
+
71
+ for err in errs:
72
+ md_content.append(
73
+ f"| {err['index']} | `{err['input']}` | {str(err['error'])} |"
74
+ )
75
+
76
+ # Add first traceback as example
77
+ md_content.extend(
78
+ [
79
+ "\nExample traceback:",
80
+ "```python",
81
+ errs[0]["traceback"],
82
+ "```",
83
+ "</details>",
84
+ ]
85
+ )
86
+
87
+ # Add a section listing all error indices
88
+ md_content.extend(
89
+ [
90
+ "\n## Error Indices",
91
+ "List of indices for items that encountered errors:",
92
+ ", ".join(str(err["index"]) for err in errors),
93
+ ]
94
+ )
95
+
96
+ md_content.extend(
97
+ [
98
+ "\n## Results Summary",
99
+ f"- Successful executions: {len(results) - len(errors)}",
100
+ f"- Failed executions: {len(errors)}",
101
+ "\n<details>",
102
+ "<summary>First 5 successful results</summary>\n",
103
+ "```python",
104
+ str([r for r in results[:5] if r is not None]),
105
+ "```",
106
+ "</details>",
107
+ ]
108
+ )
109
+
110
+ with open(report_path, "w", encoding="utf-8") as f:
111
+ f.write("\n".join(md_content))
112
+ print(f"Report saved at: {report_path}")