xsl 0.1.5__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xsl/editor.py CHANGED
@@ -1,5 +1,491 @@
1
- # Core FileEditor class
2
1
  """
3
- editor.py
2
+ Core FileEditor class for xsl package.
3
+ Provides functionality for editing XML/HTML/SVG files with XPath and CSS selectors.
4
4
  """
5
5
 
6
+ import os
7
+ import re
8
+ import base64
9
+ import logging
10
+ import shutil
11
+ from typing import Any, Dict, Optional, Union, List
12
+ from pathlib import Path
13
+ import xml.etree.ElementTree as ET
14
+
15
+ from .utils import is_data_uri, parse_data_uri
16
+
17
+ try:
18
+ from lxml import etree, html
19
+ LXML_AVAILABLE = True
20
+ except ImportError:
21
+ LXML_AVAILABLE = False
22
+ print("Warning: lxml not available. Installing: pip install lxml")
23
+
24
+ try:
25
+ from bs4 import BeautifulSoup
26
+ BS4_AVAILABLE = True
27
+ except ImportError:
28
+ BS4_AVAILABLE = False
29
+ print("Warning: BeautifulSoup4 not available. Installing: pip install beautifulsoup4")
30
+
31
+ try:
32
+ import requests
33
+ REQUESTS_AVAILABLE = True
34
+ except ImportError:
35
+ REQUESTS_AVAILABLE = False
36
+ print("Warning: requests not available. Installing: pip install requests")
37
+
38
+
39
+ class FileEditor:
40
+ """Main class for XML/HTML/SVG file editing with XPath and CSS selector support."""
41
+
42
+ def __init__(self, file_path: str):
43
+ """Initialize FileEditor with a file path or URL.
44
+
45
+ Args:
46
+ file_path: Path to file or URL
47
+
48
+ Raises:
49
+ ValueError: If the file cannot be loaded or parsed
50
+ """
51
+ self.file_path = file_path
52
+ self.tree = None
53
+ self.original_content = None
54
+ # Default namespaces for common XML formats
55
+ self.ns = {
56
+ 'svg': 'http://www.w3.org/2000/svg',
57
+ 'xlink': 'http://www.w3.org/1999/xlink',
58
+ 'html': 'http://www.w3.org/1999/xhtml',
59
+ 'xhtml': 'http://www.w3.org/1999/xhtml'
60
+ }
61
+ self._load_file()
62
+
63
+ @property
64
+ def is_remote(self) -> bool:
65
+ """Check if the file is a remote URL.
66
+
67
+ Returns:
68
+ bool: True if file_path is a URL, False otherwise
69
+ """
70
+ return (isinstance(self.file_path, str) and
71
+ (self.file_path.startswith('http://') or
72
+ self.file_path.startswith('https://') or
73
+ self.file_path.startswith('ftp://')))
74
+
75
+ def _load_file(self):
76
+ """Load file content from path or URL."""
77
+ if not self.file_path:
78
+ raise ValueError("No file path provided")
79
+
80
+ if self.is_remote:
81
+ import requests
82
+ try:
83
+ response = requests.get(self.file_path)
84
+ response.raise_for_status()
85
+ content = response.content
86
+ except Exception as e:
87
+ raise IOError(f"Failed to fetch remote file: {str(e)}")
88
+ else:
89
+ with open(self.file_path, 'rb') as f:
90
+ content = f.read()
91
+
92
+ self.original_content = content.decode('utf-8')
93
+ self._parse_content(content)
94
+
95
+ def _parse_content(self, content: bytes):
96
+ """Parse file content with appropriate parser.
97
+
98
+ Args:
99
+ content: The raw bytes content to parse
100
+
101
+ Raises:
102
+ ValueError: If the content cannot be parsed
103
+ """
104
+ if LXML_AVAILABLE:
105
+ try:
106
+ self.tree = etree.fromstring(content)
107
+ # Update namespaces from the document
108
+ if hasattr(self.tree, 'nsmap'):
109
+ for prefix, uri in self.tree.nsmap.items():
110
+ if prefix is not None: # Skip default namespace
111
+ self.ns[prefix] = uri
112
+ return
113
+ except etree.XMLSyntaxError as e:
114
+ logging.warning(f"Failed to parse with lxml: {e}")
115
+ # Continue to standard library fallback
116
+
117
+ # Fallback to standard library
118
+ try:
119
+ self.tree = ET.fromstring(content)
120
+ except ET.ParseError as e:
121
+ raise ValueError(f"Cannot parse file: {str(e)}")
122
+
123
+ def query(self, xpath: str) -> List[Any]:
124
+ """Query elements using XPath.
125
+
126
+ Args:
127
+ xpath: XPath expression (can include namespaces)
128
+
129
+ Returns:
130
+ List of matching elements
131
+
132
+ Raises:
133
+ ValueError: If no file is loaded or XPath is invalid
134
+ """
135
+ if self.tree is None:
136
+ raise ValueError("No file loaded")
137
+
138
+ try:
139
+ if LXML_AVAILABLE:
140
+ # Register namespaces with lxml
141
+ return self.tree.xpath(xpath, namespaces=self.ns)
142
+ else:
143
+ # Basic XPath support with standard library
144
+ # Replace namespace prefixes in XPath for standard library
145
+ if ':' in xpath:
146
+ # Simple namespace handling for standard library
147
+ for prefix, uri in self.ns.items():
148
+ xpath = xpath.replace(f"{prefix}:", f"{{'{uri}'}}")
149
+ return self.tree.findall(xpath)
150
+ except Exception as e:
151
+ raise ValueError(f"Invalid XPath expression '{xpath}': {str(e)}")
152
+
153
+ def set_value(self, xpath: str, value: str) -> bool:
154
+ """Set value of elements matching XPath.
155
+
156
+ Args:
157
+ xpath: XPath expression
158
+ value: New value
159
+
160
+ Returns:
161
+ True if successful
162
+ """
163
+ elements = self.query(xpath)
164
+ if not elements:
165
+ return False
166
+
167
+ for elem in elements:
168
+ if hasattr(elem, 'text'):
169
+ elem.text = value
170
+ return True
171
+
172
+ def save(self, output_path: str = None, create_backup: bool = False) -> str:
173
+ """Save changes to file.
174
+
175
+ Args:
176
+ output_path: Output file path (default: overwrite original)
177
+ create_backup: If True, create a backup before saving
178
+
179
+ Returns:
180
+ Path to saved file
181
+
182
+ Raises:
183
+ IOError: If file cannot be written
184
+ """
185
+ output_path = output_path or self.file_path
186
+
187
+ if create_backup and os.path.exists(output_path):
188
+ backup_path = f"{output_path}.bak"
189
+ import shutil
190
+ shutil.copy2(output_path, backup_path)
191
+
192
+ try:
193
+ if LXML_AVAILABLE:
194
+ etree.ElementTree(self.tree).write(
195
+ output_path,
196
+ encoding='utf-8',
197
+ xml_declaration=True,
198
+ pretty_print=True
199
+ )
200
+ else:
201
+ ET.ElementTree(self.tree).write(
202
+ output_path,
203
+ encoding='utf-8',
204
+ xml_declaration=True
205
+ )
206
+ return output_path
207
+ except Exception as e:
208
+ raise IOError(f"Failed to save file {output_path}: {str(e)}")
209
+
210
+ def find_by_xpath(self, xpath: str) -> list:
211
+ """Find elements by XPath.
212
+
213
+ Args:
214
+ xpath: XPath expression
215
+
216
+ Returns:
217
+ List of matching elements
218
+ """
219
+ return self.query(xpath)
220
+
221
+ def get_element_text(self, xpath: str, default: str = "") -> str:
222
+ """Get text content of first element matching XPath.
223
+
224
+ Args:
225
+ xpath: XPath expression
226
+ default: Default value if element not found
227
+
228
+ Returns:
229
+ Text content of the element or default value
230
+ """
231
+ elements = self.query(xpath)
232
+ if elements and hasattr(elements[0], 'text'):
233
+ return elements[0].text or default
234
+ return default
235
+
236
+ def detect_file_type(self) -> str:
237
+ """Detect the type of the loaded file.
238
+
239
+ Returns:
240
+ str: File type ('svg', 'html', 'xml', or 'unknown')
241
+ """
242
+ if not self.tree:
243
+ return "unknown"
244
+
245
+ # Check for SVG
246
+ if hasattr(self.tree, 'tag') and 'svg' in self.tree.tag:
247
+ return 'svg'
248
+
249
+ # Check for HTML
250
+ if hasattr(self.tree, 'find'):
251
+ if self.tree.find('.//html') is not None:
252
+ return 'html'
253
+
254
+ # Default to XML
255
+ return 'xml'
256
+
257
+ def extract_data_uri(self, xpath: str) -> dict:
258
+ """Extract data URI from element's attribute.
259
+
260
+ Args:
261
+ xpath: XPath to the element containing data URI
262
+
263
+ Returns:
264
+ dict: Parsed data URI components with 'mime_type' and other metadata,
265
+ or {'error': str} if not found
266
+ """
267
+ try:
268
+ elements = self.query(xpath)
269
+ if not elements:
270
+ return {'error': 'No elements found matching XPATH'}
271
+
272
+ # Handle attribute XPath (e.g., @xlink:href)
273
+ attr = None
274
+ if xpath.endswith('/@xlink:href'):
275
+ element_xpath = xpath.rsplit('/', 1)[0]
276
+ attr = 'xlink:href'
277
+ elements = self.query(element_xpath)
278
+ elif xpath.endswith('/@href'):
279
+ element_xpath = xpath.rsplit('/', 1)[0]
280
+ attr = 'href'
281
+ elements = self.query(element_xpath)
282
+
283
+ if not elements:
284
+ return {
285
+ 'error': 'No elements found matching XPath',
286
+ 'mime_type': 'text/plain',
287
+ 'data': ''
288
+ }
289
+
290
+ # Try to find a data URI in the elements
291
+ for elem in elements:
292
+ # If we have a specific attribute, check that first
293
+ if attr:
294
+ uri = self._get_attribute(elem, attr)
295
+ if uri and is_data_uri(uri):
296
+ try:
297
+ result = parse_data_uri(uri)
298
+ # For image data, ensure we keep the original mime type
299
+ if 'image/' in result.get('mime_type', ''):
300
+ result['base64_data'] = uri.split(',', 1)[1] # Store base64 data
301
+ result['data'] = result['base64_data'] # Keep for backward compatibility
302
+ result['size'] = len(result['base64_data']) # Add size field
303
+ result['xpath'] = xpath
304
+ return result
305
+ except Exception as e:
306
+ return {
307
+ 'error': f'Error parsing data URI: {str(e)}',
308
+ 'mime_type': 'text/plain',
309
+ 'data': ''
310
+ }
311
+
312
+ # Otherwise check common attributes
313
+ for attr_name in ['xlink:href', 'href', 'data', 'src']:
314
+ uri = self._get_attribute(elem, attr_name)
315
+ if uri and is_data_uri(uri):
316
+ try:
317
+ result = parse_data_uri(uri)
318
+ # For image data, ensure we keep the original mime type
319
+ if 'image/' in result.get('mime_type', ''):
320
+ result['base64_data'] = uri.split(',', 1)[1] # Store base64 data
321
+ result['data'] = result['base64_data'] # Keep for backward compatibility
322
+ result['size'] = len(result['base64_data']) # Add size field
323
+ result['xpath'] = xpath
324
+ return result
325
+ except Exception as e:
326
+ return {
327
+ 'error': f'Error parsing data URI: {str(e)}',
328
+ 'mime_type': 'text/plain',
329
+ 'data': ''
330
+ }
331
+
332
+ return {
333
+ 'error': 'No data URI found in element attributes',
334
+ 'mime_type': 'text/plain',
335
+ 'data': ''
336
+ }
337
+
338
+ except Exception as e:
339
+ return {
340
+ 'error': f'Error extracting data URI: {str(e)}',
341
+ 'mime_type': 'text/plain',
342
+ 'data': ''
343
+ }
344
+
345
+ def _get_attribute(self, element, name: str) -> str:
346
+ """Get an attribute from an element, handling namespaces.
347
+
348
+ Args:
349
+ element: The XML element
350
+ name: Attribute name (can include namespace prefix)
351
+
352
+ Returns:
353
+ The attribute value or None if not found
354
+ """
355
+ if not hasattr(element, 'get'):
356
+ return None
357
+
358
+ # Try direct attribute first
359
+ value = element.get(name)
360
+ if value is not None:
361
+ return value
362
+
363
+ # Try with xlink: prefix
364
+ if name.startswith('xlink:'):
365
+ return element.get(f"{{{self.ns.get('xlink', '')}}}{name[6:]}")
366
+
367
+ # Try with full namespace
368
+ if ':' in name:
369
+ prefix = name.split(':', 1)[0]
370
+ if prefix in self.ns:
371
+ return element.get(f"{{{self.ns[prefix]}}}{name.split(':', 1)[1]}")
372
+
373
+ return None
374
+
375
+ @property
376
+ def file_type(self) -> str:
377
+ """Get the type of the loaded file.
378
+
379
+ Returns:
380
+ str: File type ('svg', 'html', 'xml', or 'unknown')
381
+ """
382
+ return self.detect_file_type()
383
+
384
+ def get_element_attribute(self, xpath: str, attr_name: str, default: str = None) -> str:
385
+ """Get an attribute value from an element.
386
+
387
+ Args:
388
+ xpath: XPath to the element
389
+ attr_name: Name of the attribute to get
390
+ default: Default value if attribute not found
391
+
392
+ Returns:
393
+ The attribute value or default if not found
394
+ """
395
+ elements = self.query(xpath)
396
+ if not elements:
397
+ return default
398
+
399
+ return self._get_attribute(elements[0], attr_name) or default
400
+
401
+ def set_element_text(self, xpath: str, text: str) -> bool:
402
+ """Set the text content of an element.
403
+
404
+ Args:
405
+ xpath: XPath to the element
406
+ text: New text content
407
+
408
+ Returns:
409
+ bool: True if successful, False otherwise
410
+ """
411
+ elements = self.query(xpath)
412
+ if not elements:
413
+ return False
414
+
415
+ for elem in elements:
416
+ if hasattr(elem, 'text'):
417
+ elem.text = text
418
+ return True
419
+
420
+ def set_element_attribute(self, xpath: str, attr_name: str, attr_value: str) -> bool:
421
+ """Set an attribute on elements matching XPath.
422
+
423
+ Args:
424
+ xpath: XPath to the element(s)
425
+ attr_name: Name of the attribute to set
426
+ attr_value: Value to set
427
+
428
+ Returns:
429
+ bool: True if any elements were modified, False otherwise
430
+ """
431
+ elements = self.query(xpath)
432
+ if not elements:
433
+ return False
434
+
435
+ modified = False
436
+ for elem in elements:
437
+ if hasattr(elem, 'set'):
438
+ # Handle namespaced attributes (e.g., xlink:href)
439
+ if ':' in attr_name:
440
+ prefix = attr_name.split(':', 1)[0]
441
+ if prefix in self.ns:
442
+ ns_attr = f"{{{self.ns[prefix]}}}{attr_name.split(':', 1)[1]}"
443
+ elem.set(ns_attr, attr_value)
444
+ modified = True
445
+ else:
446
+ elem.set(attr_name, attr_value)
447
+ modified = True
448
+
449
+ return modified
450
+
451
+ def list_elements(self, xpath: str) -> List[Dict[str, Any]]:
452
+ """List elements matching XPath with their attributes.
453
+
454
+ Args:
455
+ xpath: XPath expression to find elements
456
+
457
+ Returns:
458
+ List of dictionaries with element information
459
+ """
460
+ elements = self.query(xpath)
461
+ result = []
462
+
463
+ for elem in elements:
464
+ if hasattr(elem, 'attrib'):
465
+ result.append({
466
+ 'tag': elem.tag,
467
+ 'text': getattr(elem, 'text', ''),
468
+ 'attributes': dict(elem.attrib)
469
+ })
470
+
471
+ return result
472
+
473
+ def backup(self) -> str:
474
+ """Create a backup of the current file.
475
+
476
+ Returns:
477
+ str: Path to the backup file
478
+
479
+ Raises:
480
+ IOError: If backup creation fails
481
+ """
482
+ if not self.file_path:
483
+ raise IOError("No file loaded to back up")
484
+
485
+ backup_path = f"{self.file_path}.bak"
486
+ try:
487
+ import shutil
488
+ shutil.copy2(self.file_path, backup_path)
489
+ return backup_path
490
+ except Exception as e:
491
+ raise IOError(f"Failed to create backup: {str(e)}")
xsl/server.py CHANGED
@@ -1,5 +1,167 @@
1
- # HTTP server
2
1
  """
3
- server.py
2
+ HTTP Server for xsl - Web interface for remote file editing.
4
3
  """
5
4
 
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from http.server import BaseHTTPRequestHandler, HTTPServer
9
+ from typing import Dict
10
+ from urllib.parse import parse_qs, urlparse
11
+
12
+ from . import __version__
13
+ from .editor import FileEditor
14
+
15
+
16
+ class FileEditorServer(BaseHTTPRequestHandler):
17
+ """HTTP request handler for xsl server."""
18
+
19
+ # Class variable to store loaded editors
20
+ editors: Dict[str, FileEditor] = {}
21
+
22
+ def do_GET(self):
23
+ """Handle GET requests."""
24
+ parsed_url = urlparse(self.path)
25
+ path = parsed_url.path
26
+ query = parse_qs(parsed_url.query)
27
+
28
+ if path == "/":
29
+ self._serve_interface()
30
+ elif path == "/api/health":
31
+ self._send_json_response({"status": "ok", "version": __version__})
32
+ elif path == "/api/extract":
33
+ # Direct extraction endpoint with URL + XPath
34
+ self._extract_from_url(query)
35
+ else:
36
+ self._send_error(404, "Not Found")
37
+
38
+ def do_POST(self):
39
+ """Handle POST requests."""
40
+ content_length = int(self.headers["Content-Length"])
41
+ post_data = self.rfile.read(content_length).decode("utf-8")
42
+
43
+ try:
44
+ data = json.loads(post_data)
45
+ except json.JSONDecodeError:
46
+ self._send_error(400, "Invalid JSON")
47
+ return
48
+
49
+ parsed_url = urlparse(self.path)
50
+ path = parsed_url.path
51
+
52
+ if path == "/api/load":
53
+ self._load_file(data)
54
+ elif path == "/api/query":
55
+ self._query_elements(data)
56
+ elif path == "/api/update":
57
+ self._update_element(data)
58
+ elif path == "/api/save":
59
+ self._save_file(data)
60
+ elif path == "/api/extract_data_uri":
61
+ self._extract_data_uri(data)
62
+ elif path == "/api/add":
63
+ self._add_element(data)
64
+ elif path == "/api/remove":
65
+ self._remove_element(data)
66
+ elif path == "/api/info":
67
+ self._get_file_info(data)
68
+ else:
69
+ self._send_error(404, "Not Found")
70
+
71
+ def do_OPTIONS(self):
72
+ """Handle OPTIONS requests for CORS."""
73
+ self.send_response(200)
74
+ self.send_header("Access-Control-Allow-Origin", "*")
75
+ self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
76
+ self.send_header("Access-Control-Allow-Headers", "Content-Type")
77
+ self.end_headers()
78
+
79
+ def _send_response(self, status_code, content, content_type="text/plain"):
80
+ """Send HTTP response."""
81
+ self.send_response(status_code)
82
+ self.send_header("Content-type", content_type)
83
+ self.send_header("Access-Control-Allow-Origin", "*")
84
+ self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
85
+ self.send_header("Access-Control-Allow-Headers", "Content-Type")
86
+ self.end_headers()
87
+ self.wfile.write(
88
+ content.encode("utf-8") if isinstance(content, str) else content
89
+ )
90
+
91
+ def _send_json_response(self, data):
92
+ """Send JSON response."""
93
+ self._send_response(200, json.dumps(data, indent=2), "application/json")
94
+
95
+ def _send_error(self, status_code, message):
96
+ """Send error response."""
97
+ self._send_response(
98
+ status_code, json.dumps({"error": message}), "application/json"
99
+ )
100
+
101
+ def log_message(self, format, *args):
102
+ """Override to customize logging."""
103
+ print(f"{self.address_string()} - {format % args}")
104
+
105
+
106
+ def start_server(host="localhost", port=8080):
107
+ """Start the xsl HTTP server."""
108
+ try:
109
+ server = HTTPServer((host, port), FileEditorServer)
110
+ print(f"🌐 xsl Server v{__version__} starting on {host}:{port}")
111
+ print(f"📖 Open http://{host}:{port} in your browser")
112
+ print("🔗 API endpoints:")
113
+ print(f" GET http://{host}:{port}/api/extract?url=<URL>&xpath=<XPATH>")
114
+ print(f" POST http://{host}:{port}/api/load")
115
+ print(f" POST http://{host}:{port}/api/query")
116
+ print(f" POST http://{host}:{port}/api/update")
117
+ print(f" POST http://{host}:{port}/api/save")
118
+ print("\n⏹️ Press Ctrl+C to stop the server")
119
+ print("-" * 60)
120
+
121
+ server.serve_forever()
122
+ except KeyboardInterrupt:
123
+ print("\n\n👋 Server stopped by user")
124
+ except Exception as e:
125
+ print(f"❌ Server error: {e}")
126
+ sys.exit(1)
127
+
128
+
129
+ def main(args: list = None) -> int:
130
+ """Entry point for xsl-server command.
131
+
132
+ Args:
133
+ args: Command line arguments (default: None, uses sys.argv[1:])
134
+
135
+ Returns:
136
+ int: Exit code (0 for success, non-zero for error)
137
+ """
138
+ parser = argparse.ArgumentParser(description="Start xsl HTTP server")
139
+ parser.add_argument(
140
+ "--host",
141
+ default="localhost",
142
+ help="Host to bind to (default: localhost)",
143
+ )
144
+ parser.add_argument(
145
+ "--port",
146
+ type=int,
147
+ default=8082,
148
+ help="Port to listen on (default: 8082)",
149
+ )
150
+ args = parser.parse_args(args)
151
+
152
+ print(f"Starting xsl server on http://{args.host}:{args.port}")
153
+ print("Press Ctrl+C to stop")
154
+
155
+ try:
156
+ start_server(host=args.host, port=args.port)
157
+ return 0
158
+ except KeyboardInterrupt:
159
+ print("\nServer stopped")
160
+ return 0
161
+ except Exception as e:
162
+ print(f"Error: {e}", file=sys.stderr)
163
+ return 1
164
+
165
+
166
+ if __name__ == "__main__":
167
+ main()