PyPI - xsl - Versions diffs - 0.1.5__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

xsl 0.1.5py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

xsl/__init__.py +19 -1
xsl/__main__.py +15 -0
xsl/cli.py +337 -1103
xsl/editor.py +488 -2
xsl/server.py +164 -2
xsl/utils.py +103 -2
xsl-0.1.8.dist-info/METADATA +372 -0
xsl-0.1.8.dist-info/RECORD +11 -0
xsl-0.1.8.dist-info/entry_points.txt +4 -0
xsl-0.1.5.dist-info/METADATA +0 -110
xsl-0.1.5.dist-info/RECORD +0 -10
xsl-0.1.5.dist-info/entry_points.txt +0 -4
{xsl-0.1.5.dist-info → xsl-0.1.8.dist-info}/LICENSE +0 -0
{xsl-0.1.5.dist-info → xsl-0.1.8.dist-info}/WHEEL +0 -0

xsl/editor.py CHANGED Viewed

@@ -1,5 +1,491 @@
-# Core FileEditor class
 """
-editor.py
+Core FileEditor class for xsl package.
+Provides functionality for editing XML/HTML/SVG files with XPath and CSS selectors.
 """
+import os
+import re
+import base64
+import logging
+import shutil
+from typing import Any, Dict, Optional, Union, List
+from pathlib import Path
+import xml.etree.ElementTree as ET
+from .utils import is_data_uri, parse_data_uri
+try:
+    from lxml import etree, html
+    LXML_AVAILABLE = True
+except ImportError:
+    LXML_AVAILABLE = False
+    print("Warning: lxml not available. Installing: pip install lxml")
+try:
+    from bs4 import BeautifulSoup
+    BS4_AVAILABLE = True
+except ImportError:
+    BS4_AVAILABLE = False
+    print("Warning: BeautifulSoup4 not available. Installing: pip install beautifulsoup4")
+try:
+    import requests
+    REQUESTS_AVAILABLE = True
+except ImportError:
+    REQUESTS_AVAILABLE = False
+    print("Warning: requests not available. Installing: pip install requests")
+class FileEditor:
+    """Main class for XML/HTML/SVG file editing with XPath and CSS selector support."""
+    def __init__(self, file_path: str):
+        """Initialize FileEditor with a file path or URL.
+        Args:
+            file_path: Path to file or URL
+        Raises:
+            ValueError: If the file cannot be loaded or parsed
+        """
+        self.file_path = file_path
+        self.tree = None
+        self.original_content = None
+        # Default namespaces for common XML formats
+        self.ns = {
+            'svg': 'http://www.w3.org/2000/svg',
+            'xlink': 'http://www.w3.org/1999/xlink',
+            'html': 'http://www.w3.org/1999/xhtml',
+            'xhtml': 'http://www.w3.org/1999/xhtml'
+        }
+        self._load_file()
+    @property
+    def is_remote(self) -> bool:
+        """Check if the file is a remote URL.
+        Returns:
+            bool: True if file_path is a URL, False otherwise
+        """
+        return (isinstance(self.file_path, str) and
+                (self.file_path.startswith('http://') or
+                 self.file_path.startswith('https://') or
+                 self.file_path.startswith('ftp://')))
+    def _load_file(self):
+        """Load file content from path or URL."""
+        if not self.file_path:
+            raise ValueError("No file path provided")
+        if self.is_remote:
+            import requests
+            try:
+                response = requests.get(self.file_path)
+                response.raise_for_status()
+                content = response.content
+            except Exception as e:
+                raise IOError(f"Failed to fetch remote file: {str(e)}")
+        else:
+            with open(self.file_path, 'rb') as f:
+                content = f.read()
+        self.original_content = content.decode('utf-8')
+        self._parse_content(content)
+    def _parse_content(self, content: bytes):
+        """Parse file content with appropriate parser.
+        Args:
+            content: The raw bytes content to parse
+        Raises:
+            ValueError: If the content cannot be parsed
+        """
+        if LXML_AVAILABLE:
+            try:
+                self.tree = etree.fromstring(content)
+                # Update namespaces from the document
+                if hasattr(self.tree, 'nsmap'):
+                    for prefix, uri in self.tree.nsmap.items():
+                        if prefix is not None:  # Skip default namespace
+                            self.ns[prefix] = uri
+                return
+            except etree.XMLSyntaxError as e:
+                logging.warning(f"Failed to parse with lxml: {e}")
+                # Continue to standard library fallback
+        # Fallback to standard library
+        try:
+            self.tree = ET.fromstring(content)
+        except ET.ParseError as e:
+            raise ValueError(f"Cannot parse file: {str(e)}")
+    def query(self, xpath: str) -> List[Any]:
+        """Query elements using XPath.
+        Args:
+            xpath: XPath expression (can include namespaces)
+        Returns:
+            List of matching elements
+        Raises:
+            ValueError: If no file is loaded or XPath is invalid
+        """
+        if self.tree is None:
+            raise ValueError("No file loaded")
+        try:
+            if LXML_AVAILABLE:
+                # Register namespaces with lxml
+                return self.tree.xpath(xpath, namespaces=self.ns)
+            else:
+                # Basic XPath support with standard library
+                # Replace namespace prefixes in XPath for standard library
+                if ':' in xpath:
+                    # Simple namespace handling for standard library
+                    for prefix, uri in self.ns.items():
+                        xpath = xpath.replace(f"{prefix}:", f"{{'{uri}'}}")
+                return self.tree.findall(xpath)
+        except Exception as e:
+            raise ValueError(f"Invalid XPath expression '{xpath}': {str(e)}")
+    def set_value(self, xpath: str, value: str) -> bool:
+        """Set value of elements matching XPath.
+        Args:
+            xpath: XPath expression
+            value: New value
+        Returns:
+            True if successful
+        """
+        elements = self.query(xpath)
+        if not elements:
+            return False
+        for elem in elements:
+            if hasattr(elem, 'text'):
+                elem.text = value
+        return True
+    def save(self, output_path: str = None, create_backup: bool = False) -> str:
+        """Save changes to file.
+        Args:
+            output_path: Output file path (default: overwrite original)
+            create_backup: If True, create a backup before saving
+        Returns:
+            Path to saved file
+        Raises:
+            IOError: If file cannot be written
+        """
+        output_path = output_path or self.file_path
+        if create_backup and os.path.exists(output_path):
+            backup_path = f"{output_path}.bak"
+            import shutil
+            shutil.copy2(output_path, backup_path)
+        try:
+            if LXML_AVAILABLE:
+                etree.ElementTree(self.tree).write(
+                    output_path,
+                    encoding='utf-8',
+                    xml_declaration=True,
+                    pretty_print=True
+                )
+            else:
+                ET.ElementTree(self.tree).write(
+                    output_path,
+                    encoding='utf-8',
+                    xml_declaration=True
+                )
+            return output_path
+        except Exception as e:
+            raise IOError(f"Failed to save file {output_path}: {str(e)}")
+    def find_by_xpath(self, xpath: str) -> list:
+        """Find elements by XPath.
+        Args:
+            xpath: XPath expression
+        Returns:
+            List of matching elements
+        """
+        return self.query(xpath)
+    def get_element_text(self, xpath: str, default: str = "") -> str:
+        """Get text content of first element matching XPath.
+        Args:
+            xpath: XPath expression
+            default: Default value if element not found
+        Returns:
+            Text content of the element or default value
+        """
+        elements = self.query(xpath)
+        if elements and hasattr(elements[0], 'text'):
+            return elements[0].text or default
+        return default
+    def detect_file_type(self) -> str:
+        """Detect the type of the loaded file.
+        Returns:
+            str: File type ('svg', 'html', 'xml', or 'unknown')
+        """
+        if not self.tree:
+            return "unknown"
+        # Check for SVG
+        if hasattr(self.tree, 'tag') and 'svg' in self.tree.tag:
+            return 'svg'
+        # Check for HTML
+        if hasattr(self.tree, 'find'):
+            if self.tree.find('.//html') is not None:
+                return 'html'
+        # Default to XML
+        return 'xml'
+    def extract_data_uri(self, xpath: str) -> dict:
+        """Extract data URI from element's attribute.
+        Args:
+            xpath: XPath to the element containing data URI
+        Returns:
+            dict: Parsed data URI components with 'mime_type' and other metadata,
+                  or {'error': str} if not found
+        """
+        try:
+            elements = self.query(xpath)
+            if not elements:
+                return {'error': 'No elements found matching XPATH'}
+            # Handle attribute XPath (e.g., @xlink:href)
+            attr = None
+            if xpath.endswith('/@xlink:href'):
+                element_xpath = xpath.rsplit('/', 1)[0]
+                attr = 'xlink:href'
+                elements = self.query(element_xpath)
+            elif xpath.endswith('/@href'):
+                element_xpath = xpath.rsplit('/', 1)[0]
+                attr = 'href'
+                elements = self.query(element_xpath)
+            if not elements:
+                return {
+                    'error': 'No elements found matching XPath',
+                    'mime_type': 'text/plain',
+                    'data': ''
+                }
+            # Try to find a data URI in the elements
+            for elem in elements:
+                # If we have a specific attribute, check that first
+                if attr:
+                    uri = self._get_attribute(elem, attr)
+                    if uri and is_data_uri(uri):
+                        try:
+                            result = parse_data_uri(uri)
+                            # For image data, ensure we keep the original mime type
+                            if 'image/' in result.get('mime_type', ''):
+                                result['base64_data'] = uri.split(',', 1)[1]  # Store base64 data
+                                result['data'] = result['base64_data']  # Keep for backward compatibility
+                                result['size'] = len(result['base64_data'])  # Add size field
+                            result['xpath'] = xpath
+                            return result
+                        except Exception as e:
+                            return {
+                                'error': f'Error parsing data URI: {str(e)}',
+                                'mime_type': 'text/plain',
+                                'data': ''
+                            }
+                # Otherwise check common attributes
+                for attr_name in ['xlink:href', 'href', 'data', 'src']:
+                    uri = self._get_attribute(elem, attr_name)
+                    if uri and is_data_uri(uri):
+                        try:
+                            result = parse_data_uri(uri)
+                            # For image data, ensure we keep the original mime type
+                            if 'image/' in result.get('mime_type', ''):
+                                result['base64_data'] = uri.split(',', 1)[1]  # Store base64 data
+                                result['data'] = result['base64_data']  # Keep for backward compatibility
+                                result['size'] = len(result['base64_data'])  # Add size field
+                            result['xpath'] = xpath
+                            return result
+                        except Exception as e:
+                            return {
+                                'error': f'Error parsing data URI: {str(e)}',
+                                'mime_type': 'text/plain',
+                                'data': ''
+                            }
+            return {
+                'error': 'No data URI found in element attributes',
+                'mime_type': 'text/plain',
+                'data': ''
+            }
+        except Exception as e:
+            return {
+                'error': f'Error extracting data URI: {str(e)}',
+                'mime_type': 'text/plain',
+                'data': ''
+            }
+    def _get_attribute(self, element, name: str) -> str:
+        """Get an attribute from an element, handling namespaces.
+        Args:
+            element: The XML element
+            name: Attribute name (can include namespace prefix)
+        Returns:
+            The attribute value or None if not found
+        """
+        if not hasattr(element, 'get'):
+            return None
+        # Try direct attribute first
+        value = element.get(name)
+        if value is not None:
+            return value
+        # Try with xlink: prefix
+        if name.startswith('xlink:'):
+            return element.get(f"{{{self.ns.get('xlink', '')}}}{name[6:]}")
+        # Try with full namespace
+        if ':' in name:
+            prefix = name.split(':', 1)[0]
+            if prefix in self.ns:
+                return element.get(f"{{{self.ns[prefix]}}}{name.split(':', 1)[1]}")
+        return None
+    @property
+    def file_type(self) -> str:
+        """Get the type of the loaded file.
+        Returns:
+            str: File type ('svg', 'html', 'xml', or 'unknown')
+        """
+        return self.detect_file_type()
+    def get_element_attribute(self, xpath: str, attr_name: str, default: str = None) -> str:
+        """Get an attribute value from an element.
+        Args:
+            xpath: XPath to the element
+            attr_name: Name of the attribute to get
+            default: Default value if attribute not found
+        Returns:
+            The attribute value or default if not found
+        """
+        elements = self.query(xpath)
+        if not elements:
+            return default
+        return self._get_attribute(elements[0], attr_name) or default
+    def set_element_text(self, xpath: str, text: str) -> bool:
+        """Set the text content of an element.
+        Args:
+            xpath: XPath to the element
+            text: New text content
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        elements = self.query(xpath)
+        if not elements:
+            return False
+        for elem in elements:
+            if hasattr(elem, 'text'):
+                elem.text = text
+        return True
+    def set_element_attribute(self, xpath: str, attr_name: str, attr_value: str) -> bool:
+        """Set an attribute on elements matching XPath.
+        Args:
+            xpath: XPath to the element(s)
+            attr_name: Name of the attribute to set
+            attr_value: Value to set
+        Returns:
+            bool: True if any elements were modified, False otherwise
+        """
+        elements = self.query(xpath)
+        if not elements:
+            return False
+        modified = False
+        for elem in elements:
+            if hasattr(elem, 'set'):
+                # Handle namespaced attributes (e.g., xlink:href)
+                if ':' in attr_name:
+                    prefix = attr_name.split(':', 1)[0]
+                    if prefix in self.ns:
+                        ns_attr = f"{{{self.ns[prefix]}}}{attr_name.split(':', 1)[1]}"
+                        elem.set(ns_attr, attr_value)
+                        modified = True
+                else:
+                    elem.set(attr_name, attr_value)
+                    modified = True
+        return modified
+    def list_elements(self, xpath: str) -> List[Dict[str, Any]]:
+        """List elements matching XPath with their attributes.
+        Args:
+            xpath: XPath expression to find elements
+        Returns:
+            List of dictionaries with element information
+        """
+        elements = self.query(xpath)
+        result = []
+        for elem in elements:
+            if hasattr(elem, 'attrib'):
+                result.append({
+                    'tag': elem.tag,
+                    'text': getattr(elem, 'text', ''),
+                    'attributes': dict(elem.attrib)
+                })
+        return result
+    def backup(self) -> str:
+        """Create a backup of the current file.
+        Returns:
+            str: Path to the backup file
+        Raises:
+            IOError: If backup creation fails
+        """
+        if not self.file_path:
+            raise IOError("No file loaded to back up")
+        backup_path = f"{self.file_path}.bak"
+        try:
+            import shutil
+            shutil.copy2(self.file_path, backup_path)
+            return backup_path
+        except Exception as e:
+            raise IOError(f"Failed to create backup: {str(e)}")

xsl/server.py CHANGED Viewed

@@ -1,5 +1,167 @@
-# HTTP server
 """
-server.py
+HTTP Server for xsl - Web interface for remote file editing.
 """
+import argparse
+import json
+import sys
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from typing import Dict
+from urllib.parse import parse_qs, urlparse
+from . import __version__
+from .editor import FileEditor
+class FileEditorServer(BaseHTTPRequestHandler):
+    """HTTP request handler for xsl server."""
+    # Class variable to store loaded editors
+    editors: Dict[str, FileEditor] = {}
+    def do_GET(self):
+        """Handle GET requests."""
+        parsed_url = urlparse(self.path)
+        path = parsed_url.path
+        query = parse_qs(parsed_url.query)
+        if path == "/":
+            self._serve_interface()
+        elif path == "/api/health":
+            self._send_json_response({"status": "ok", "version": __version__})
+        elif path == "/api/extract":
+            # Direct extraction endpoint with URL + XPath
+            self._extract_from_url(query)
+        else:
+            self._send_error(404, "Not Found")
+    def do_POST(self):
+        """Handle POST requests."""
+        content_length = int(self.headers["Content-Length"])
+        post_data = self.rfile.read(content_length).decode("utf-8")
+        try:
+            data = json.loads(post_data)
+        except json.JSONDecodeError:
+            self._send_error(400, "Invalid JSON")
+            return
+        parsed_url = urlparse(self.path)
+        path = parsed_url.path
+        if path == "/api/load":
+            self._load_file(data)
+        elif path == "/api/query":
+            self._query_elements(data)
+        elif path == "/api/update":
+            self._update_element(data)
+        elif path == "/api/save":
+            self._save_file(data)
+        elif path == "/api/extract_data_uri":
+            self._extract_data_uri(data)
+        elif path == "/api/add":
+            self._add_element(data)
+        elif path == "/api/remove":
+            self._remove_element(data)
+        elif path == "/api/info":
+            self._get_file_info(data)
+        else:
+            self._send_error(404, "Not Found")
+    def do_OPTIONS(self):
+        """Handle OPTIONS requests for CORS."""
+        self.send_response(200)
+        self.send_header("Access-Control-Allow-Origin", "*")
+        self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
+        self.send_header("Access-Control-Allow-Headers", "Content-Type")
+        self.end_headers()
+    def _send_response(self, status_code, content, content_type="text/plain"):
+        """Send HTTP response."""
+        self.send_response(status_code)
+        self.send_header("Content-type", content_type)
+        self.send_header("Access-Control-Allow-Origin", "*")
+        self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
+        self.send_header("Access-Control-Allow-Headers", "Content-Type")
+        self.end_headers()
+        self.wfile.write(
+            content.encode("utf-8") if isinstance(content, str) else content
+        )
+    def _send_json_response(self, data):
+        """Send JSON response."""
+        self._send_response(200, json.dumps(data, indent=2), "application/json")
+    def _send_error(self, status_code, message):
+        """Send error response."""
+        self._send_response(
+            status_code, json.dumps({"error": message}), "application/json"
+        )
+    def log_message(self, format, *args):
+        """Override to customize logging."""
+        print(f"{self.address_string()} - {format % args}")
+def start_server(host="localhost", port=8080):
+    """Start the xsl HTTP server."""
+    try:
+        server = HTTPServer((host, port), FileEditorServer)
+        print(f"🌐 xsl Server v{__version__} starting on {host}:{port}")
+        print(f"📖 Open http://{host}:{port} in your browser")
+        print("🔗 API endpoints:")
+        print(f"   GET  http://{host}:{port}/api/extract?url=<URL>&xpath=<XPATH>")
+        print(f"   POST http://{host}:{port}/api/load")
+        print(f"   POST http://{host}:{port}/api/query")
+        print(f"   POST http://{host}:{port}/api/update")
+        print(f"   POST http://{host}:{port}/api/save")
+        print("\n⏹️  Press Ctrl+C to stop the server")
+        print("-" * 60)
+        server.serve_forever()
+    except KeyboardInterrupt:
+        print("\n\n👋 Server stopped by user")
+    except Exception as e:
+        print(f"❌ Server error: {e}")
+        sys.exit(1)
+def main(args: list = None) -> int:
+    """Entry point for xsl-server command.
+    Args:
+        args: Command line arguments (default: None, uses sys.argv[1:])
+    Returns:
+        int: Exit code (0 for success, non-zero for error)
+    """
+    parser = argparse.ArgumentParser(description="Start xsl HTTP server")
+    parser.add_argument(
+        "--host",
+        default="localhost",
+        help="Host to bind to (default: localhost)",
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=8082,
+        help="Port to listen on (default: 8082)",
+    )
+    args = parser.parse_args(args)
+    print(f"Starting xsl server on http://{args.host}:{args.port}")
+    print("Press Ctrl+C to stop")
+    try:
+        start_server(host=args.host, port=args.port)
+        return 0
+    except KeyboardInterrupt:
+        print("\nServer stopped")
+        return 0
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        return 1
+if __name__ == "__main__":
+    main()

xsl 0.1.5__py3-none-any.whl → 0.1.8__py3-none-any.whl

xsl 0.1.5py3-none-any.whl → 0.1.8py3-none-any.whl