upwork-learning 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
src/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """Upwork Learning - Automation and Integration Tools."""
2
+
3
+ __version__ = "0.1.0"
src/cli.py ADDED
@@ -0,0 +1,349 @@
1
+ """CLI entry point using Typer."""
2
+
3
+ from pathlib import Path
4
+ from typing import Annotated
5
+
6
+ import typer
7
+ from rich.console import Console
8
+ from rich.table import Table
9
+
10
+ from src import __version__
11
+ from src.integrations.email_handler import Email, EmailClient, EmailConfig
12
+ from src.integrations.google_sheets import GoogleSheetsClient, GoogleSheetsConfig
13
+ from src.integrations.pdf_processor import PDFProcessor
14
+ from src.utils.logger import bind_request_id, get_logger
15
+
16
+ app = typer.Typer(
17
+ name="upwork-learn",
18
+ help="Automation and Integration Tools for Upwork Projects",
19
+ add_completion=False,
20
+ )
21
+ console = Console()
22
+ logger = get_logger(__name__)
23
+
24
+
25
+ @app.command()
26
+ def version() -> None:
27
+ """Show version information."""
28
+ console.print(f"[bold blue]upwork-learn[/bold blue] v{__version__}")
29
+
30
+
31
+ @app.command()
32
+ def sheets_read(
33
+ spreadsheet_id: Annotated[str, typer.Option("--spreadsheet-id", "-s", help="Spreadsheet ID")],
34
+ range_name: Annotated[str, typer.Option("--range", "-r", help="Range in A1 notation")],
35
+ credentials_path: Annotated[str | None, typer.Option("--credentials", "-c")] = None,
36
+ ) -> None:
37
+ """Read data from Google Sheets."""
38
+ try:
39
+ config = GoogleSheetsConfig(spreadsheet_id=spreadsheet_id)
40
+ if credentials_path:
41
+ config.credentials_path = Path(credentials_path)
42
+ client = GoogleSheetsClient(config=config)
43
+
44
+ with client:
45
+ data = client.read_range(range_name)
46
+
47
+ table = Table(title=f"Data from {range_name}")
48
+ table.add_column("Row", style="cyan")
49
+
50
+ for i, row in enumerate(data):
51
+ table.add_row(str(i + 1), " | ".join(str(cell) for cell in row))
52
+
53
+ console.print(table)
54
+
55
+ except Exception as e:
56
+ console.print(f"[bold red]Error:[/bold red] {e}")
57
+ raise typer.Exit(1) from e
58
+
59
+
60
+ @app.command()
61
+ def sheets_write(
62
+ spreadsheet_id: Annotated[str, typer.Option("--spreadsheet-id", "-s", help="Spreadsheet ID")],
63
+ range_name: Annotated[str, typer.Option("--range", "-r", help="Range in A1 notation")],
64
+ values: Annotated[str, typer.Option("--values", "-v", help="JSON array of values")],
65
+ credentials_path: Annotated[str | None, typer.Option("--credentials", "-c")] = None,
66
+ ) -> None:
67
+ """Write data to Google Sheets."""
68
+ import json
69
+
70
+ try:
71
+ config = GoogleSheetsConfig(spreadsheet_id=spreadsheet_id)
72
+ if credentials_path:
73
+ config.credentials_path = Path(credentials_path)
74
+ client = GoogleSheetsClient(config=config)
75
+
76
+ data = json.loads(values)
77
+
78
+ with client:
79
+ client.write_range(range_name, data)
80
+
81
+ console.print(f"[green]Successfully wrote {len(data)} rows[/green]")
82
+
83
+ except json.JSONDecodeError as e:
84
+ console.print("[bold red]Error:[/bold red] Invalid JSON format for values")
85
+ raise typer.Exit(1) from e
86
+ except Exception as e:
87
+ console.print(f"[bold red]Error:[/bold red] {e}")
88
+ raise typer.Exit(1) from e
89
+
90
+
91
+ @app.command()
92
+ def sheets_list(
93
+ spreadsheet_id: Annotated[str, typer.Option("--spreadsheet-id", "-s", help="Spreadsheet ID")],
94
+ credentials_path: Annotated[str | None, typer.Option("--credentials", "-c")] = None,
95
+ ) -> None:
96
+ """List worksheets in a spreadsheet."""
97
+ try:
98
+ config = GoogleSheetsConfig(spreadsheet_id=spreadsheet_id)
99
+ if credentials_path:
100
+ config.credentials_path = Path(credentials_path)
101
+ client = GoogleSheetsClient(config=config)
102
+
103
+ with client:
104
+ worksheets = client.get_worksheets()
105
+
106
+ table = Table(title="Worksheets")
107
+ table.add_column("Name", style="cyan")
108
+
109
+ for ws in worksheets:
110
+ table.add_row(ws)
111
+
112
+ console.print(table)
113
+
114
+ except Exception as e:
115
+ console.print(f"[bold red]Error:[/bold red] {e}")
116
+ raise typer.Exit(1) from e
117
+
118
+
119
+ @app.command()
120
+ def pdf_extract_text(
121
+ path: Annotated[str, typer.Argument(help="Path to PDF file")],
122
+ pages: Annotated[
123
+ str | None, typer.Option("--pages", "-p", help="Page numbers (comma-separated)")
124
+ ] = None,
125
+ output: Annotated[str | None, typer.Option("--output", "-o", help="Output file")] = None,
126
+ ) -> None:
127
+ """Extract text from PDF."""
128
+ try:
129
+ processor = PDFProcessor()
130
+
131
+ page_numbers = None
132
+ if pages:
133
+ page_numbers = [int(p.strip()) for p in pages.split(",")]
134
+
135
+ with processor:
136
+ if page_numbers:
137
+ data = processor.extract_text(path=path, page_numbers=page_numbers)
138
+ else:
139
+ data = processor.extract_text(path=path)
140
+
141
+ text = "\n\n".join(f"--- Page {i + 1} ---\n{text}" for i, text in data.items())
142
+
143
+ if output:
144
+ with open(output, "w", encoding="utf-8") as f:
145
+ f.write(text)
146
+ console.print(f"[green]Text extracted to {output}[/green]")
147
+ else:
148
+ console.print(text)
149
+
150
+ except Exception as e:
151
+ console.print(f"[bold red]Error:[/bold red] {e}")
152
+ raise typer.Exit(1) from e
153
+
154
+
155
+ @app.command()
156
+ def pdf_extract_tables(
157
+ path: Annotated[str, typer.Argument(help="Path to PDF file")],
158
+ output: Annotated[str | None, typer.Option("--output", "-o", help="Output file")] = None,
159
+ ) -> None:
160
+ """Extract tables from PDF."""
161
+ import json
162
+
163
+ try:
164
+ processor = PDFProcessor()
165
+
166
+ with processor:
167
+ tables = processor.extract_tables(path=path)
168
+ dicts = []
169
+ for table in tables:
170
+ dicts.extend(table.as_dicts)
171
+
172
+ if output:
173
+ with open(output, "w", encoding="utf-8") as f:
174
+ json.dump(dicts, f, indent=2, ensure_ascii=False)
175
+ console.print(f"[green]Tables extracted to {output}[/green]")
176
+ else:
177
+ console.print_json(data=dicts)
178
+
179
+ except Exception as e:
180
+ console.print(f"[bold red]Error:[/bold red] {e}")
181
+ raise typer.Exit(1) from e
182
+
183
+
184
+ @app.command()
185
+ def pdf_extract_invoice(
186
+ path: Annotated[str, typer.Argument(help="Path to PDF file")],
187
+ ) -> None:
188
+ """Extract invoice data from PDF."""
189
+ try:
190
+ processor = PDFProcessor()
191
+
192
+ with processor:
193
+ data = processor.extract_invoice_data(path)
194
+
195
+ table = Table(title="Extracted Invoice Data")
196
+ table.add_column("Field", style="cyan")
197
+ table.add_column("Value", style="green")
198
+
199
+ for key, value in data.items():
200
+ if key != "line_items":
201
+ table.add_row(key, str(value))
202
+
203
+ console.print(table)
204
+
205
+ if data.get("line_items"):
206
+ console.print("\n[bold]Line Items:[/bold]")
207
+ console.print_json(data=data["line_items"])
208
+
209
+ except Exception as e:
210
+ console.print(f"[bold red]Error:[/bold red] {e}")
211
+ raise typer.Exit(1) from e
212
+
213
+
214
+ @app.command()
215
+ def email_send(
216
+ to: Annotated[str, typer.Option("--to", "-t", help="Recipient email(s), comma-separated")],
217
+ subject: Annotated[str, typer.Option("--subject", "-s", help="Email subject")],
218
+ body: Annotated[str, typer.Option("--body", "-b", help="Email body")],
219
+ cc: Annotated[str | None, typer.Option("--cc", help="CC recipients, comma-separated")] = None,
220
+ ) -> None:
221
+ """Send an email."""
222
+ try:
223
+ config = EmailConfig()
224
+ client = EmailClient(config=config)
225
+
226
+ recipients = [r.strip() for r in to.split(",")]
227
+ cc_list = [r.strip() for r in cc.split(",")] if cc else []
228
+
229
+ email_msg = Email(
230
+ to=recipients,
231
+ subject=subject,
232
+ body=body,
233
+ cc=cc_list,
234
+ )
235
+
236
+ with client:
237
+ client.send_email(email_msg)
238
+
239
+ console.print("[green]Email sent successfully![/green]")
240
+
241
+ except Exception as e:
242
+ console.print(f"[bold red]Error:[/bold red] {e}")
243
+ raise typer.Exit(1) from e
244
+
245
+
246
+ @app.command()
247
+ def email_fetch(
248
+ folder: Annotated[str, typer.Option("--folder", "-f", help="IMAP folder")] = "INBOX",
249
+ limit: Annotated[int, typer.Option("--limit", "-l", help="Max emails to fetch")] = 10,
250
+ unread_only: Annotated[
251
+ bool, typer.Option("--unread-only/--all", help="Only fetch unread")
252
+ ] = False,
253
+ ) -> None:
254
+ """Fetch emails from IMAP server."""
255
+ try:
256
+ config = EmailConfig()
257
+ client = EmailClient(config=config)
258
+
259
+ with client:
260
+ emails = client.fetch_emails(
261
+ folder=folder,
262
+ limit=limit,
263
+ unread_only=unread_only,
264
+ )
265
+
266
+ table = Table(title=f"Emails from {folder}")
267
+ table.add_column("UID", style="cyan", width=6)
268
+ table.add_column("From", style="yellow", width=20)
269
+ table.add_column("Subject", style="white")
270
+ table.add_column("Date", style="green", width=12)
271
+
272
+ for email in emails:
273
+ table.add_row(
274
+ str(email.uid),
275
+ email.from_address[:18] + "..."
276
+ if len(email.from_address) > 18
277
+ else email.from_address,
278
+ email.subject[:40] + "..." if len(email.subject) > 40 else email.subject,
279
+ email.date.strftime("%Y-%m-%d"),
280
+ )
281
+
282
+ console.print(table)
283
+
284
+ except Exception as e:
285
+ console.print(f"[bold red]Error:[/bold red] {e}")
286
+ raise typer.Exit(1) from e
287
+
288
+
289
+ @app.command()
290
+ def health() -> None:
291
+ """Check connectivity to configured external services."""
292
+ import socket
293
+
294
+ from src.utils.config import load_config
295
+
296
+ cfg = load_config()
297
+ all_ok = True
298
+
299
+ # SMTP check
300
+ smtp_host = cfg.email.smtp_host
301
+ smtp_port = cfg.email.smtp_port
302
+ try:
303
+ with socket.create_connection((smtp_host, smtp_port), timeout=5):
304
+ console.print(f"[green]SMTP {smtp_host}:{smtp_port} — OK[/green]")
305
+ except OSError as e:
306
+ console.print(f"[red]SMTP {smtp_host}:{smtp_port} — FAIL ({e})[/red]")
307
+ all_ok = False
308
+
309
+ # IMAP check
310
+ imap_host = cfg.email.imap_host
311
+ imap_port = cfg.email.imap_port
312
+ try:
313
+ with socket.create_connection((imap_host, imap_port), timeout=5):
314
+ console.print(f"[green]IMAP {imap_host}:{imap_port} — OK[/green]")
315
+ except OSError as e:
316
+ console.print(f"[red]IMAP {imap_host}:{imap_port} — FAIL ({e})[/red]")
317
+ all_ok = False
318
+
319
+ # Credentials file check
320
+ creds = cfg.google_sheets.credentials_path
321
+ import os as _os
322
+
323
+ creds_json = _os.environ.get("GOOGLE_SHEETS_CREDENTIALS_JSON")
324
+ if creds_json:
325
+ console.print("[green]Google credentials — OK (env var)[/green]")
326
+ elif creds.exists():
327
+ console.print(f"[green]Google credentials — OK ({creds})[/green]")
328
+ else:
329
+ console.print(f"[yellow]Google credentials — NOT FOUND ({creds})[/yellow]")
330
+
331
+ if not all_ok:
332
+ raise typer.Exit(1)
333
+
334
+
335
+ def main() -> None:
336
+ """Main entry point."""
337
+ import logging as _logging
338
+
339
+ from src.utils.config import load_config
340
+ from src.utils.logger import configure_logging
341
+
342
+ cfg = load_config()
343
+ configure_logging(getattr(_logging, cfg.app.log_level, _logging.INFO))
344
+ bind_request_id()
345
+ app()
346
+
347
+
348
+ if __name__ == "__main__":
349
+ main()
@@ -0,0 +1,9 @@
1
+ """Integration modules for external services."""
2
+
3
+ from src.integrations.base import BaseIntegration, IntegrationError, RateLimitError
4
+
5
+ __all__ = [
6
+ "BaseIntegration",
7
+ "IntegrationError",
8
+ "RateLimitError",
9
+ ]
@@ -0,0 +1,237 @@
1
+ """Base integration classes and common patterns."""
2
+
3
+ import threading
4
+ import time
5
+ from abc import ABC, abstractmethod
6
+ from collections.abc import Callable
7
+ from dataclasses import dataclass
8
+ from typing import Any, TypeVar
9
+
10
+ import tenacity
11
+ from tenacity import (
12
+ RetryCallState,
13
+ stop_after_attempt,
14
+ )
15
+
16
+ from src.utils.logger import get_logger
17
+
18
+ logger = get_logger(__name__)
19
+
20
+ T = TypeVar("T")
21
+
22
+
23
+ class IntegrationError(Exception):
24
+ """Base exception for integration errors."""
25
+
26
+
27
+ class RateLimitError(IntegrationError):
28
+ """Raised when rate limit is exceeded."""
29
+
30
+ def __init__(self, message: str, retry_after: int | None = None) -> None:
31
+ super().__init__(message)
32
+ self.retry_after = retry_after
33
+
34
+
35
+ class AuthenticationError(IntegrationError):
36
+ """Raised when authentication fails."""
37
+
38
+
39
+ class IntegrationConnectionError(IntegrationError):
40
+ """Raised when connection fails."""
41
+
42
+
43
+ class _TokenBucket:
44
+ """Thread-safe token-bucket rate limiter.
45
+
46
+ Tokens refill at *rate* per second up to *capacity*. Call ``acquire()``
47
+ before each outbound request; it blocks until a token is available.
48
+
49
+ Args:
50
+ rate: Tokens added per second (= maximum sustained requests/s).
51
+ capacity: Burst capacity (defaults to *rate*, i.e. no burst).
52
+ """
53
+
54
+ def __init__(self, rate: float, capacity: float | None = None) -> None:
55
+ if rate <= 0:
56
+ raise ValueError("rate must be positive")
57
+ self._rate = rate
58
+ self._capacity = capacity if capacity is not None else rate
59
+ self._tokens = self._capacity
60
+ self._last_refill = time.monotonic()
61
+ self._lock = threading.Lock()
62
+
63
+ def acquire(self) -> None:
64
+ """Block until a token is available, then consume it."""
65
+ while True:
66
+ with self._lock:
67
+ now = time.monotonic()
68
+ elapsed = now - self._last_refill
69
+ self._tokens = min(
70
+ self._capacity, self._tokens + elapsed * self._rate
71
+ )
72
+ self._last_refill = now
73
+ if self._tokens >= 1.0:
74
+ self._tokens -= 1.0
75
+ return
76
+ wait = (1.0 - self._tokens) / self._rate
77
+ # Sleep outside the lock so other threads can proceed.
78
+ time.sleep(wait)
79
+
80
+
81
+ @dataclass
82
+ class IntegrationConfig:
83
+ """Base configuration for integrations."""
84
+
85
+ max_retries: int = 3
86
+ timeout: float = 30.0
87
+ rate_limit_delay: float = 1.0
88
+ #: Maximum outbound requests per second (0 = unlimited / no throttle).
89
+ requests_per_second: float = 0.0
90
+
91
+
92
+ class BaseIntegration(ABC):
93
+ """Abstract base class for integrations.
94
+
95
+ Provides common patterns:
96
+ - Exponential backoff retry logic
97
+ - Rate limiting awareness
98
+ - Connection management
99
+ - Standardized logging
100
+ """
101
+
102
+ def __init__(self, config: IntegrationConfig | None = None) -> None:
103
+ self._config = config or IntegrationConfig()
104
+ self._connected = False
105
+ self._logger = get_logger(f"{__name__}.{self.__class__.__name__}")
106
+ self._bucket: _TokenBucket | None = (
107
+ _TokenBucket(self._config.requests_per_second)
108
+ if self._config.requests_per_second > 0
109
+ else None
110
+ )
111
+
112
+ @property
113
+ @abstractmethod
114
+ def service_name(self) -> str:
115
+ """Return the name of the service."""
116
+ raise NotImplementedError
117
+
118
+ @abstractmethod
119
+ def connect(self) -> None:
120
+ """Establish connection to the service."""
121
+ raise NotImplementedError
122
+
123
+ @abstractmethod
124
+ def disconnect(self) -> None:
125
+ """Close connection to the service."""
126
+ raise NotImplementedError
127
+
128
+ def __enter__(self) -> "BaseIntegration":
129
+ """Context manager entry."""
130
+ self.connect()
131
+ return self
132
+
133
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
134
+ """Context manager exit."""
135
+ self.disconnect()
136
+
137
+ def _throttle(self) -> None:
138
+ """Pace outgoing requests to the configured rate.
139
+
140
+ Call once before each outbound API request. When
141
+ ``IntegrationConfig.requests_per_second`` is 0 (the default) this is a
142
+ no-op. When a positive rate is set this blocks until the token bucket
143
+ has a token available, ensuring the sustained request rate never exceeds
144
+ the configured limit.
145
+
146
+ Example::
147
+
148
+ def fetch_data(self) -> dict:
149
+ self._throttle() # pace before hitting the API
150
+ return self._client.get("/data")
151
+ """
152
+ if self._bucket is not None:
153
+ self._bucket.acquire()
154
+
155
+ def retry_with_backoff(
156
+ self,
157
+ max_attempts: int | None = None,
158
+ initial_wait: float = 1.0,
159
+ max_wait: float = 60.0,
160
+ ) -> Callable[[Callable[..., T]], Callable[..., T]]:
161
+ """Decorator for retry with exponential backoff.
162
+
163
+ Args:
164
+ max_attempts: Maximum number of retry attempts
165
+ initial_wait: Initial wait time in seconds
166
+ max_wait: Maximum wait time in seconds
167
+
168
+ Returns:
169
+ Decorated function with retry logic
170
+ """
171
+ if max_attempts is None:
172
+ max_attempts = self._config.max_retries
173
+
174
+ stop_strategy = stop_after_attempt(max_attempts)
175
+ wait_strategy = tenacity.wait_exponential(
176
+ multiplier=initial_wait,
177
+ max=max_wait,
178
+ )
179
+
180
+ def before_callback(retry_state: RetryCallState) -> None:
181
+ """Log retry attempts."""
182
+ attempt = retry_state.attempt_number
183
+ exception = retry_state.outcome.exception() if retry_state.outcome else None
184
+ self._logger.warning(
185
+ f"Retry attempt {attempt}/{max_attempts} for {self.service_name}",
186
+ extra={"exception": str(exception) if exception else None},
187
+ )
188
+
189
+ return tenacity.retry(
190
+ stop=stop_strategy,
191
+ wait=wait_strategy,
192
+ before_sleep=before_callback,
193
+ reraise=True,
194
+ )
195
+
196
+ def _handle_rate_limit(self, response: Any) -> None:
197
+ """Handle rate limit response with automatic backoff.
198
+
199
+ If the response carries a 429 status, waits for the ``Retry-After``
200
+ interval (default: ``rate_limit_delay``) before raising
201
+ ``RateLimitError`` so the caller's retry logic gets a real chance to
202
+ succeed.
203
+
204
+ Args:
205
+ response: API response object
206
+
207
+ Raises:
208
+ RateLimitError: When rate limit is exceeded
209
+ """
210
+ if hasattr(response, "status_code") and response.status_code == 429:
211
+ retry_after_raw = response.headers.get("Retry-After")
212
+ retry_after = int(retry_after_raw) if retry_after_raw else None
213
+ wait = retry_after if retry_after is not None else self._config.rate_limit_delay
214
+ self._logger.warning(
215
+ "rate_limit_hit",
216
+ wait_seconds=wait,
217
+ retry_after=retry_after,
218
+ )
219
+ time.sleep(wait)
220
+ raise RateLimitError(
221
+ "Rate limit exceeded",
222
+ retry_after=retry_after,
223
+ )
224
+
225
+ def _validate_config(self) -> None:
226
+ """Validate integration configuration."""
227
+ if self._config.max_retries < 1:
228
+ raise ValueError("max_retries must be at least 1")
229
+ if self._config.timeout <= 0:
230
+ raise ValueError("timeout must be positive")
231
+ if self._config.rate_limit_delay < 0:
232
+ raise ValueError("rate_limit_delay cannot be negative")
233
+
234
+ @property
235
+ def is_connected(self) -> bool:
236
+ """Check if integration is connected."""
237
+ return self._connected