xenfra-sdk 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xenfra_sdk/privacy.py CHANGED
@@ -1,153 +1,153 @@
1
- """
2
- This module contains the Privacy Scrubber for the Xenfra SDK.
3
- Its purpose is to redact sensitive information from logs or other text
4
- before it is sent to diagnostic endpoints, upholding privacy-first principles.
5
- """
6
-
7
- import json
8
- import logging
9
- import os
10
- import re
11
- from pathlib import Path
12
- from typing import List, Optional
13
-
14
- import httpx # For fetching patterns from URL
15
-
16
- logger = logging.getLogger(__name__)
17
-
18
- # Path to the patterns file within the SDK
19
- _PATTERNS_FILE_PATH = Path(__file__).parent / "patterns.json"
20
- _REDACTION_PLACEHOLDER = "[REDACTED]"
21
- _CACHED_PATTERNS: List[re.Pattern] = []
22
-
23
-
24
- def _load_patterns_from_file(file_path: Path) -> List[str]:
25
- """Loads raw regex patterns from a JSON file."""
26
- if not file_path.exists():
27
- logger.warning(
28
- f"Patterns file not found at {file_path}. No patterns will be used for scrubbing."
29
- )
30
- return []
31
- try:
32
- with open(file_path, "r") as f:
33
- config = json.load(f)
34
- return config.get("redaction_patterns", [])
35
- except json.JSONDecodeError as e:
36
- logger.error(f"Error decoding patterns.json: {e}. Falling back to empty patterns.")
37
- return []
38
-
39
-
40
- async def _refresh_patterns_from_url(url: str) -> Optional[List[str]]:
41
- """
42
- Fetches updated patterns from a URL asynchronously.
43
- """
44
- try:
45
- # Configure timeout from environment or default to 30 seconds
46
- timeout_seconds = float(os.getenv("XENFRA_SDK_TIMEOUT", "30.0"))
47
- timeout = httpx.Timeout(timeout_seconds, connect=10.0)
48
-
49
- async with httpx.AsyncClient(timeout=timeout) as client:
50
- response = await client.get(url)
51
- response.raise_for_status()
52
-
53
- # Safe JSON parsing with content-type check
54
- content_type = response.headers.get("content-type", "")
55
- if "application/json" not in content_type:
56
- logger.warning(
57
- f"Expected JSON response from {url}, got {content_type}. "
58
- "Skipping pattern refresh."
59
- )
60
- return None
61
-
62
- try:
63
- config = response.json()
64
- except (ValueError, TypeError) as e:
65
- logger.error(f"Failed to parse JSON from patterns URL {url}: {e}")
66
- return None
67
-
68
- if not isinstance(config, dict):
69
- logger.error(
70
- f"Expected dictionary from patterns URL {url}, got {type(config).__name__}"
71
- )
72
- return None
73
-
74
- return config.get("redaction_patterns", [])
75
- except httpx.TimeoutException as e:
76
- logger.warning(f"Timeout fetching patterns from {url}: {e}")
77
- return None
78
- except httpx.RequestError as e:
79
- logger.warning(f"Error fetching patterns from {url}: {e}")
80
- return None
81
- except json.JSONDecodeError as e:
82
- logger.error(f"Error decoding JSON from patterns URL {url}: {e}")
83
- return None
84
- except Exception as e:
85
- logger.error(f"Unexpected error fetching patterns from {url}: {e}")
86
- return None
87
-
88
-
89
- async def initialize_scrubber(refresh_from_url: Optional[str] = None):
90
- """
91
- Initializes or refreshes the scrubber patterns.
92
- Can optionally fetch patterns from a URL. This should be called on app startup.
93
- """
94
- global _CACHED_PATTERNS
95
- raw_patterns = []
96
-
97
- if refresh_from_url:
98
- refreshed = await _refresh_patterns_from_url(refresh_from_url)
99
- if refreshed:
100
- raw_patterns = refreshed
101
-
102
- if not raw_patterns: # Fallback to file if no refresh URL or refresh failed
103
- raw_patterns = _load_patterns_from_file(_PATTERNS_FILE_PATH)
104
-
105
- _CACHED_PATTERNS = [re.compile(p, re.IGNORECASE) for p in raw_patterns]
106
-
107
-
108
- # Initialize patterns on module load (synchronously for initial load)
109
- # For dynamic refresh, initialize_scrubber should be called during app startup
110
- _raw_initial_patterns = _load_patterns_from_file(_PATTERNS_FILE_PATH)
111
- _CACHED_PATTERNS = [re.compile(p, re.IGNORECASE) for p in _raw_initial_patterns]
112
-
113
-
114
- def scrub_logs(logs: str) -> str:
115
- """
116
- Redacts sensitive information from log strings using loaded patterns.
117
- """
118
- if not logs:
119
- return logs
120
-
121
- scrubbed_logs = logs
122
- for pattern_re in _CACHED_PATTERNS:
123
- scrubbed_logs = pattern_re.sub(_REDACTION_PLACEHOLDER, scrubbed_logs)
124
-
125
- return scrubbed_logs
126
-
127
-
128
- if __name__ == "__main__":
129
- # Example Usage
130
- test_logs = """
131
- Deployment failed. Error: Authentication failed with token dop_v1_abcdefghijklmnopqrstuvwxyz1234567890abcdef.
132
- Connecting to database at postgres://user:mypassword@127.0.0.1:5432/mydb.
133
- Received request from 192.168.1.100. User: test@example.com.
134
- Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.
135
- eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.
136
- SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c.
137
- AWS Secret: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY.
138
- """
139
-
140
- # Test with file-based patterns
141
- print("--- Original Logs ---")
142
- print(test_logs)
143
- print("\n--- Scrubbed Logs (from file) ---")
144
- scrubbed_logs_from_file = scrub_logs(test_logs)
145
- print(scrubbed_logs_from_file)
146
-
147
- # Example of refreshing (conceptual)
148
- # import asyncio
149
- # async def demo_refresh():
150
- # await initialize_scrubber(refresh_from_url="http://example.com/new-patterns.json")
151
- # print("\n--- Scrubbed Logs (after conceptual refresh) ---")
152
- # print(scrub_logs(test_logs))
153
- # asyncio.run(demo_refresh())
1
+ """
2
+ This module contains the Privacy Scrubber for the Xenfra SDK.
3
+ Its purpose is to redact sensitive information from logs or other text
4
+ before it is sent to diagnostic endpoints, upholding privacy-first principles.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ import os
10
+ import re
11
+ from pathlib import Path
12
+ from typing import List, Optional
13
+
14
+ import httpx # For fetching patterns from URL
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Path to the patterns file within the SDK
19
+ _PATTERNS_FILE_PATH = Path(__file__).parent / "patterns.json"
20
+ _REDACTION_PLACEHOLDER = "[REDACTED]"
21
+ _CACHED_PATTERNS: List[re.Pattern] = []
22
+
23
+
24
+ def _load_patterns_from_file(file_path: Path) -> List[str]:
25
+ """Loads raw regex patterns from a JSON file."""
26
+ if not file_path.exists():
27
+ logger.warning(
28
+ f"Patterns file not found at {file_path}. No patterns will be used for scrubbing."
29
+ )
30
+ return []
31
+ try:
32
+ with open(file_path, "r") as f:
33
+ config = json.load(f)
34
+ return config.get("redaction_patterns", [])
35
+ except json.JSONDecodeError as e:
36
+ logger.error(f"Error decoding patterns.json: {e}. Falling back to empty patterns.")
37
+ return []
38
+
39
+
40
+ async def _refresh_patterns_from_url(url: str) -> Optional[List[str]]:
41
+ """
42
+ Fetches updated patterns from a URL asynchronously.
43
+ """
44
+ try:
45
+ # Configure timeout from environment or default to 30 seconds
46
+ timeout_seconds = float(os.getenv("XENFRA_SDK_TIMEOUT", "30.0"))
47
+ timeout = httpx.Timeout(timeout_seconds, connect=10.0)
48
+
49
+ async with httpx.AsyncClient(timeout=timeout) as client:
50
+ response = await client.get(url)
51
+ response.raise_for_status()
52
+
53
+ # Safe JSON parsing with content-type check
54
+ content_type = response.headers.get("content-type", "")
55
+ if "application/json" not in content_type:
56
+ logger.warning(
57
+ f"Expected JSON response from {url}, got {content_type}. "
58
+ "Skipping pattern refresh."
59
+ )
60
+ return None
61
+
62
+ try:
63
+ config = response.json()
64
+ except (ValueError, TypeError) as e:
65
+ logger.error(f"Failed to parse JSON from patterns URL {url}: {e}")
66
+ return None
67
+
68
+ if not isinstance(config, dict):
69
+ logger.error(
70
+ f"Expected dictionary from patterns URL {url}, got {type(config).__name__}"
71
+ )
72
+ return None
73
+
74
+ return config.get("redaction_patterns", [])
75
+ except httpx.TimeoutException as e:
76
+ logger.warning(f"Timeout fetching patterns from {url}: {e}")
77
+ return None
78
+ except httpx.RequestError as e:
79
+ logger.warning(f"Error fetching patterns from {url}: {e}")
80
+ return None
81
+ except json.JSONDecodeError as e:
82
+ logger.error(f"Error decoding JSON from patterns URL {url}: {e}")
83
+ return None
84
+ except Exception as e:
85
+ logger.error(f"Unexpected error fetching patterns from {url}: {e}")
86
+ return None
87
+
88
+
89
+ async def initialize_scrubber(refresh_from_url: Optional[str] = None):
90
+ """
91
+ Initializes or refreshes the scrubber patterns.
92
+ Can optionally fetch patterns from a URL. This should be called on app startup.
93
+ """
94
+ global _CACHED_PATTERNS
95
+ raw_patterns = []
96
+
97
+ if refresh_from_url:
98
+ refreshed = await _refresh_patterns_from_url(refresh_from_url)
99
+ if refreshed:
100
+ raw_patterns = refreshed
101
+
102
+ if not raw_patterns: # Fallback to file if no refresh URL or refresh failed
103
+ raw_patterns = _load_patterns_from_file(_PATTERNS_FILE_PATH)
104
+
105
+ _CACHED_PATTERNS = [re.compile(p, re.IGNORECASE) for p in raw_patterns]
106
+
107
+
108
+ # Initialize patterns on module load (synchronously for initial load)
109
+ # For dynamic refresh, initialize_scrubber should be called during app startup
110
+ _raw_initial_patterns = _load_patterns_from_file(_PATTERNS_FILE_PATH)
111
+ _CACHED_PATTERNS = [re.compile(p, re.IGNORECASE) for p in _raw_initial_patterns]
112
+
113
+
114
+ def scrub_logs(logs: str) -> str:
115
+ """
116
+ Redacts sensitive information from log strings using loaded patterns.
117
+ """
118
+ if not logs:
119
+ return logs
120
+
121
+ scrubbed_logs = logs
122
+ for pattern_re in _CACHED_PATTERNS:
123
+ scrubbed_logs = pattern_re.sub(_REDACTION_PLACEHOLDER, scrubbed_logs)
124
+
125
+ return scrubbed_logs
126
+
127
+
128
+ if __name__ == "__main__":
129
+ # Example Usage
130
+ test_logs = """
131
+ Deployment failed. Error: Authentication failed with token dop_v1_abcdefghijklmnopqrstuvwxyz1234567890abcdef.
132
+ Connecting to database at postgres://user:mypassword@127.0.0.1:5432/mydb.
133
+ Received request from 192.168.1.100. User: test@example.com.
134
+ Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.
135
+ eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.
136
+ SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c.
137
+ AWS Secret: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY.
138
+ """
139
+
140
+ # Test with file-based patterns
141
+ print("--- Original Logs ---")
142
+ print(test_logs)
143
+ print("\n--- Scrubbed Logs (from file) ---")
144
+ scrubbed_logs_from_file = scrub_logs(test_logs)
145
+ print(scrubbed_logs_from_file)
146
+
147
+ # Example of refreshing (conceptual)
148
+ # import asyncio
149
+ # async def demo_refresh():
150
+ # await initialize_scrubber(refresh_from_url="http://example.com/new-patterns.json")
151
+ # print("\n--- Scrubbed Logs (after conceptual refresh) ---")
152
+ # print(scrub_logs(test_logs))
153
+ # asyncio.run(demo_refresh())
xenfra_sdk/recipes.py CHANGED
@@ -1,26 +1,26 @@
1
- from pathlib import Path
2
-
3
- from jinja2 import Environment, FileSystemLoader
4
-
5
-
6
- def generate_stack(context: dict, is_dockerized: bool = True):
7
- """
8
- Generates a cloud-init startup script from a Jinja2 template.
9
-
10
- Args:
11
- context: A dictionary containing information for rendering the template,
12
- e.g., {'domain': 'example.com', 'email': 'user@example.com'}
13
- is_dockerized: Whether to setup Docker and Docker Compose (default: True)
14
- """
15
- # Path to the templates directory
16
- template_dir = Path(__file__).parent / "templates"
17
- env = Environment(loader=FileSystemLoader(template_dir))
18
-
19
- template = env.get_template("cloud-init.sh.j2")
20
-
21
- # The context will contain all necessary variables for the template.
22
- # Pass is_dockerized to the template for conditional setup
23
- render_context = {**context, "is_dockerized": is_dockerized}
24
- script = template.render(render_context)
25
-
26
- return script
1
+ from pathlib import Path
2
+
3
+ from jinja2 import Environment, FileSystemLoader
4
+
5
+
6
+ def generate_stack(context: dict, is_dockerized: bool = True):
7
+ """
8
+ Generates a cloud-init startup script from a Jinja2 template.
9
+
10
+ Args:
11
+ context: A dictionary containing information for rendering the template,
12
+ e.g., {'domain': 'example.com', 'email': 'user@example.com'}
13
+ is_dockerized: Whether to setup Docker and Docker Compose (default: True)
14
+ """
15
+ # Path to the templates directory
16
+ template_dir = Path(__file__).parent / "templates"
17
+ env = Environment(loader=FileSystemLoader(template_dir))
18
+
19
+ template = env.get_template("cloud-init.sh.j2")
20
+
21
+ # The context will contain all necessary variables for the template.
22
+ # Pass is_dockerized to the template for conditional setup
23
+ render_context = {**context, "is_dockerized": is_dockerized}
24
+ script = template.render(render_context)
25
+
26
+ return script
@@ -1,3 +1,3 @@
1
- class BaseManager:
2
- def __init__(self, client):
3
- self._client = client
1
+ class BaseManager:
2
+ def __init__(self, client):
3
+ self._client = client