PyPI - ursaproxy - Versions diffs - 0.1.2__py3-none-any.whl - Mend

ursaproxy 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

ursaproxy/__init__.py +203 -0
ursaproxy/cache.py +37 -0
ursaproxy/config.py +39 -0
ursaproxy/converter.py +78 -0
ursaproxy/fetcher.py +53 -0
ursaproxy/templates/about.gmi +6 -0
ursaproxy/templates/feed.xml +18 -0
ursaproxy/templates/index.gmi +15 -0
ursaproxy/templates/post.gmi +10 -0
ursaproxy-0.1.2.dist-info/METADATA +181 -0
ursaproxy-0.1.2.dist-info/RECORD +13 -0
ursaproxy-0.1.2.dist-info/WHEEL +4 -0
ursaproxy-0.1.2.dist-info/entry_points.txt +3 -0

ursaproxy/__init__.py ADDED Viewed

@@ -0,0 +1,203 @@
+from datetime import datetime
+from email.utils import parsedate_to_datetime
+import httpx
+from jinja2 import Environment, PackageLoader
+from xitzin import NotFound, Request, Response, TemporaryFailure, Xitzin
+from .cache import cache
+from .config import settings
+from .converter import extract_metadata, extract_slug, html_to_gemtext
+from .fetcher import NotFoundError, ServerError, fetch_feed, fetch_html
+app = Xitzin()
+# Template environments
+templates = Environment(
+    loader=PackageLoader("ursaproxy", "templates"),
+    autoescape=False,  # Gemtext doesn't need HTML escaping
+)
+xml_templates = Environment(
+    loader=PackageLoader("ursaproxy", "templates"),
+    autoescape=True,  # XML escaping for feed
+)
+@app.on_startup
+async def startup() -> None:
+    """Initialize shared HTTP client."""
+    app.state.client = httpx.AsyncClient(timeout=30.0)
+@app.on_shutdown
+async def shutdown() -> None:
+    """Close HTTP client."""
+    await app.state.client.aclose()
+async def _get_feed(client: httpx.AsyncClient):
+    """Fetch feed with caching and error handling."""
+    if cached := cache.get("feed", settings.cache_ttl_feed):
+        return cached
+    try:
+        feed = await fetch_feed(client)
+        cache.set("feed", feed)
+        return feed
+    except ServerError as e:
+        raise TemporaryFailure(str(e)) from e
+    except NotFoundError as e:
+        raise NotFound(str(e)) from e
+async def _render_content(
+    client: httpx.AsyncClient,
+    slug: str,
+    content_type: str,
+    include_date: bool = True,
+) -> str:
+    """Fetch and render content as gemtext with caching."""
+    cache_key = f"{content_type}:{slug}"
+    if cached := cache.get(cache_key, settings.cache_ttl_post):
+        return cached
+    try:
+        html = await fetch_html(client, slug)
+    except NotFoundError as e:
+        raise NotFound(str(e)) from e
+    except ServerError as e:
+        raise TemporaryFailure(str(e)) from e
+    title, date = extract_metadata(html)
+    content = html_to_gemtext(html)
+    template = templates.get_template("post.gmi")
+    gemtext = template.render(
+        title=title,
+        date=date if include_date else None,
+        content=content,
+        web_url=f"{settings.bearblog_url}/{slug}/",
+    )
+    cache.set(cache_key, gemtext)
+    return gemtext
+@app.gemini("/")
+async def index(request: Request) -> str:
+    """Landing page with recent posts and page links."""
+    feed = await _get_feed(request.app.state.client)
+    posts = []
+    for entry in feed.entries[:10]:
+        link = getattr(entry, "link", None)
+        if not link:
+            continue
+        slug = extract_slug(link)
+        if not slug:
+            continue
+        date = entry.get("published", "")[:16] if entry.get("published") else ""
+        title = getattr(entry, "title", "Untitled")
+        posts.append({"slug": slug, "title": title, "date": date})
+    template = templates.get_template("index.gmi")
+    return template.render(
+        blog_name=settings.blog_name,
+        description=feed.feed.get("description", ""),
+        pages=settings.pages,
+        posts=posts,
+    )
+@app.gemini("/post/{slug}")
+async def post(request: Request, slug: str) -> str:
+    """Individual blog post."""
+    return await _render_content(
+        request.app.state.client, slug, "post", include_date=True
+    )
+@app.gemini("/page/{slug}")
+async def page(request: Request, slug: str) -> str:
+    """Static page (projects, now, etc.)."""
+    return await _render_content(
+        request.app.state.client, slug, "page", include_date=False
+    )
+@app.gemini("/about")
+async def about(request: Request) -> str:
+    """About page from feed metadata."""
+    feed = await _get_feed(request.app.state.client)
+    description = feed.feed.get("description", "A personal blog.")
+    template = templates.get_template("about.gmi")
+    return template.render(
+        blog_name=settings.blog_name,
+        description=description,
+        bearblog_url=settings.bearblog_url,
+    )
+def _rfc822_to_iso(date_str: str) -> str:
+    """Convert RFC 822 date to ISO 8601 format for Atom feeds."""
+    if not date_str:
+        return datetime.now().isoformat() + "Z"
+    try:
+        dt = parsedate_to_datetime(date_str)
+        return dt.isoformat().replace("+00:00", "Z")
+    except (ValueError, TypeError):
+        return datetime.now().isoformat() + "Z"
+@app.gemini("/feed")
+async def feed(request: Request) -> Response:
+    """Atom feed with Gemini URLs."""
+    rss = await _get_feed(request.app.state.client)
+    # Use configured gemini_host or fall back to request hostname
+    host = settings.gemini_host or request.hostname or "localhost"
+    base_url = f"gemini://{host}"
+    # Get the most recent update time
+    updated = _rfc822_to_iso(rss.feed.get("updated", ""))
+    entries = []
+    for entry in rss.entries:
+        link = getattr(entry, "link", None)
+        if not link:
+            continue
+        slug = extract_slug(link)
+        if not slug:
+            continue
+        entries.append(
+            {
+                "title": getattr(entry, "title", "Untitled"),
+                "url": f"{base_url}/post/{slug}",
+                "published": _rfc822_to_iso(entry.get("published", "")),
+                "summary": getattr(entry, "description", ""),
+            }
+        )
+    template = xml_templates.get_template("feed.xml")
+    atom_xml = template.render(
+        blog_name=settings.blog_name,
+        base_url=base_url,
+        updated=updated,
+        entries=entries,
+    )
+    return Response(body=atom_xml, mime_type="application/atom+xml")
+def main() -> None:
+    """Entry point."""
+    app.run(
+        host=settings.host,
+        port=settings.port,
+        certfile=settings.cert_file,
+        keyfile=settings.key_file,
+    )

ursaproxy/cache.py ADDED Viewed

@@ -0,0 +1,37 @@
+from time import time
+from typing import Any
+class Cache:
+    """Simple TTL cache using dict + timestamps with size limit."""
+    def __init__(self, max_size: int = 1000) -> None:
+        self._data: dict[str, tuple[Any, float]] = {}
+        self._max_size = max_size
+    def get(self, key: str, ttl: int) -> Any | None:
+        """Get value if exists and not expired."""
+        entry = self._data.get(key)
+        if entry:
+            value, timestamp = entry
+            if time() - timestamp < ttl:
+                return value
+            # Use pop to avoid race conditions
+            self._data.pop(key, None)
+        return None
+    def set(self, key: str, value: Any) -> None:
+        """Set value with current timestamp."""
+        self._evict_if_full()
+        self._data[key] = (value, time())
+    def _evict_if_full(self) -> None:
+        """Remove oldest entries if cache is full."""
+        if len(self._data) >= self._max_size:
+            # Remove oldest 10% of entries
+            sorted_keys = sorted(self._data.keys(), key=lambda k: self._data[k][1])
+            for key in sorted_keys[: len(sorted_keys) // 10 or 1]:
+                self._data.pop(key, None)
+cache = Cache()

ursaproxy/config.py ADDED Viewed

@@ -0,0 +1,39 @@
+from pydantic import field_validator
+from pydantic_settings import BaseSettings
+class Settings(BaseSettings):
+    """Configuration from environment variables."""
+    # Required: the Bearblog URL to proxy
+    bearblog_url: str
+    blog_name: str
+    cache_ttl_feed: int = 300  # 5 minutes
+    cache_ttl_post: int = 1800  # 30 minutes
+    # Static pages (slug -> title) - pages not in RSS feed
+    # Override via PAGES='{"about": "About Me", "now": "Now"}'
+    pages: dict[str, str] = {}
+    # Gemini capsule hostname (for feed URLs)
+    # e.g., "gemini.example.com" -> gemini://gemini.example.com/post/...
+    gemini_host: str | None = None
+    # Server settings
+    host: str = "localhost"
+    port: int = 1965
+    cert_file: str | None = None
+    key_file: str | None = None
+    @field_validator("bearblog_url")
+    @classmethod
+    def normalize_url(cls, v: str) -> str:
+        """Remove trailing slash to prevent double slashes in URLs."""
+        v = v.rstrip("/")
+        if not v.startswith(("http://", "https://")):
+            raise ValueError("bearblog_url must start with http:// or https://")
+        return v
+settings = Settings()  # type: ignore[call-arg]  # pydantic-settings reads from env

ursaproxy/converter.py ADDED Viewed

@@ -0,0 +1,78 @@
+from bs4 import BeautifulSoup
+from markdownify import markdownify
+from md2gemini import md2gemini
+def html_to_gemtext(html: str) -> str:
+    """
+    Convert Bearblog HTML to Gemtext.
+    Bearblog structure:
+    - Content is in <main> element
+    - Title is <h1> (extracted separately)
+    - Date is in <time> element
+    - Nav/footer should be stripped
+    """
+    soup = BeautifulSoup(html, "html.parser")
+    # Bearblog uses <main> for content, not <article>
+    main = soup.find("main")
+    if not main:
+        main = soup.body
+    if not main:
+        return ""
+    # Remove elements we don't want
+    for tag in main.find_all(["script", "style", "nav", "footer", "form"]):
+        tag.decompose()
+    # Remove the h1 (title handled separately)
+    if h1 := main.find("h1"):
+        h1.decompose()
+    # HTML -> Markdown -> Gemtext
+    markdown = markdownify(str(main), heading_style="ATX")
+    gemtext = md2gemini(markdown, links="paragraph", plain=True)
+    return gemtext.strip()
+def extract_metadata(html: str) -> tuple[str, str]:
+    """
+    Extract title and date from Bearblog HTML.
+    Returns: (title, date_str)
+    """
+    soup = BeautifulSoup(html, "html.parser")
+    # Title is the first h1
+    h1 = soup.find("h1")
+    title = h1.get_text(strip=True) if h1 else "Untitled"
+    # Date is in <time datetime="2026-01-31">
+    time_el = soup.find("time")
+    if time_el and time_el.get("datetime"):
+        date_str = time_el["datetime"]
+    elif time_el:
+        date_str = time_el.get_text(strip=True)
+    else:
+        date_str = ""
+    return title, date_str
+def extract_slug(url: str) -> str:
+    """
+    Extract slug from Bearblog URL.
+    Input:  "https://alanbato.com/el-internetsito/"
+    Output: "el-internetsito"
+    """
+    if not url:
+        return ""
+    path = url.rstrip("/").split("/")[-1]
+    # If it looks like a domain (has a dot), there's no slug
+    if "." in path:
+        return ""
+    return path

ursaproxy/fetcher.py ADDED Viewed

@@ -0,0 +1,53 @@
+import feedparser
+import httpx
+from .config import settings
+class FetchError(Exception):
+    """Base error for fetch operations."""
+class NotFoundError(FetchError):
+    """Resource not found (404)."""
+class ServerError(FetchError):
+    """Server or network error."""
+async def _fetch(
+    client: httpx.AsyncClient, url: str, not_found_msg: str
+) -> httpx.Response:
+    """Fetch URL with standardized error handling."""
+    try:
+        response = await client.get(url)
+        if response.status_code == 404:
+            raise NotFoundError(not_found_msg)
+        if response.status_code >= 500:
+            raise ServerError(f"Server error {response.status_code}")
+        if response.status_code >= 400:
+            raise ServerError(f"HTTP error {response.status_code}")
+        return response
+    except httpx.HTTPStatusError as e:
+        raise ServerError(f"HTTP error: {e}") from e
+    except httpx.RequestError as e:
+        raise ServerError(f"Network error: {e}") from e
+async def fetch_feed(client: httpx.AsyncClient) -> feedparser.FeedParserDict:
+    """Fetch RSS feed from Bearblog."""
+    url = f"{settings.bearblog_url}/feed/?type=rss"
+    response = await _fetch(client, url, f"Feed not found at {url}")
+    return feedparser.parse(response.text)
+async def fetch_html(client: httpx.AsyncClient, slug: str) -> str:
+    """
+    Fetch HTML page from Bearblog.
+    Note: Bearblog URLs have trailing slashes: /{slug}/
+    """
+    url = f"{settings.bearblog_url}/{slug}/"
+    response = await _fetch(client, url, f"Page not found: {slug}")
+    return response.text

ursaproxy/templates/about.gmi ADDED Viewed

@@ -0,0 +1,6 @@
+# About {{ blog_name }}
+{{ description }}
+=> / <- Back to index
+=> {{ bearblog_url }} Visit on the web

ursaproxy/templates/feed.xml ADDED Viewed

@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <title>{{ blog_name }}</title>
+  <link href="{{ base_url }}/" rel="alternate"/>
+  <link href="{{ base_url }}/feed" rel="self"/>
+  <id>{{ base_url }}/</id>
+  <updated>{{ updated }}</updated>
+{% for entry in entries %}
+  <entry>
+    <title>{{ entry.title }}</title>
+    <link href="{{ entry.url }}" rel="alternate"/>
+    <id>{{ entry.url }}</id>
+    <published>{{ entry.published }}</published>
+    <updated>{{ entry.published }}</updated>
+    <summary>{{ entry.summary }}</summary>
+  </entry>
+{% endfor %}
+</feed>

ursaproxy/templates/index.gmi ADDED Viewed

@@ -0,0 +1,15 @@
+# {{ blog_name }}
+{{ description }}
+## Pages
+{% for slug, title in pages.items() %}
+=> /page/{{ slug }} {{ title }}
+{% endfor %}
+## Recent Posts
+=> /feed Atom Feed
+{% for post in posts %}
+=> /post/{{ post.slug }} {{ post.title }} ({{ post.date }})
+{% endfor %}

ursaproxy/templates/post.gmi ADDED Viewed

@@ -0,0 +1,10 @@
+# {{ title }}
+{% if date %}Published: {{ date }}
+{% endif %}
+{{ content }}
+---
+=> / <- Back to index
+=> {{ web_url }} View on web

ursaproxy-0.1.2.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,181 @@
+Metadata-Version: 2.3
+Name: ursaproxy
+Version: 0.1.2
+Summary: A Bearblog-to-Gemini proxy showcasing Xitzin
+Author: Alan Velasco
+Author-email: Alan Velasco <ursaproxy@alanbato.com>
+License: MIT
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3.13
+Requires-Dist: xitzin>=0.6.1
+Requires-Dist: feedparser>=6.0.11
+Requires-Dist: httpx>=0.27.0
+Requires-Dist: beautifulsoup4>=4.12.3
+Requires-Dist: markdownify>=0.12.1
+Requires-Dist: md2gemini>=1.9.1
+Requires-Dist: pydantic-settings>=2.0.0
+Requires-Dist: jinja2>=3.1.0
+Requires-Python: >=3.13
+Description-Content-Type: text/markdown
+# UrsaProxy
+A Bearblog-to-Gemini proxy built with [Xitzin](https://github.com/alanbato/xitzin). It fetches content from a Bearblog RSS feed and HTML pages, converts them to Gemtext format, and serves them over the Gemini protocol.
+## Features
+- Proxies Bearblog content to Gemini protocol
+- Converts HTML to Gemtext via Markdown intermediate format
+- Generates Atom feeds with Gemini URLs
+- Configurable TTL caching for feed and post data
+- Support for static pages not in RSS feed
+## Installation
+Requires Python 3.13+.
+```bash
+# Using uv
+uv add ursaproxy
+# Using pip
+pip install ursaproxy
+```
+## Configuration
+UrsaProxy is configured via environment variables:
+### Required
+| Variable | Description |
+|----------|-------------|
+| `BEARBLOG_URL` | The Bearblog URL to proxy (e.g., `https://example.bearblog.dev`) |
+| `BLOG_NAME` | Display name for the blog |
+| `CERT_FILE` | Path to TLS certificate file |
+| `KEY_FILE` | Path to TLS private key file |
+### Optional
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `PAGES` | `{}` | JSON dict of static pages `{"slug": "Title"}` |
+| `GEMINI_HOST` | `None` | Hostname for Gemini URLs in feed |
+| `CACHE_TTL_FEED` | `300` | Feed cache TTL in seconds (5 min) |
+| `CACHE_TTL_POST` | `1800` | Post cache TTL in seconds (30 min) |
+| `HOST` | `localhost` | Server bind address |
+| `PORT` | `1965` | Server port (Gemini default) |
+### Example
+```bash
+export BEARBLOG_URL="https://example.bearblog.dev"
+export BLOG_NAME="My Gemini Blog"
+export CERT_FILE="/path/to/cert.pem"
+export KEY_FILE="/path/to/key.pem"
+export PAGES='{"about": "About Me", "now": "What I am doing now"}'
+export GEMINI_HOST="gemini.example.com"
+```
+## Usage
+```bash
+ursaproxy
+```
+The server will start on `gemini://localhost:1965/` by default.
+### Routes
+| Route | Description |
+|-------|-------------|
+| `/` | Landing page with recent posts and page links |
+| `/post/{slug}` | Individual blog post with date |
+| `/page/{slug}` | Static page (without date) |
+| `/about` | About page from feed metadata |
+| `/feed` | Atom feed with Gemini URLs |
+## Development
+For contributing, clone the repository and install with dev dependencies:
+```bash
+git clone https://github.com/alanbato/ursaproxy.git
+cd ursaproxy
+uv sync --group dev --group test
+```
+### Commands
+```bash
+# Run linting
+uv run ruff check .
+# Run linting with auto-fix
+uv run ruff check --fix .
+# Format code
+uv run ruff format .
+# Type check
+uv run ty check
+# Run all pre-commit hooks
+uv run pre-commit run --all-files
+# Run tests
+uv run pytest
+# Run tests with verbose output
+uv run pytest -v
+```
+### Project Structure
+```
+src/ursaproxy/
+├── __init__.py      # Xitzin app, routes, and entry point
+├── config.py        # Pydantic settings for environment config
+├── fetcher.py       # HTTP client for fetching Bearblog content
+├── converter.py     # HTML -> Markdown -> Gemtext pipeline
+├── cache.py         # Simple TTL cache implementation
+└── templates/       # Jinja2 templates
+    ├── index.gmi    # Landing page template
+    ├── post.gmi     # Post/page template
+    ├── about.gmi    # About page template
+    └── feed.xml     # Atom feed template
+```
+### Testing
+The test suite uses pytest with fixtures for offline testing:
+```bash
+# Run all 111 tests
+uv run pytest
+# Run specific test file
+uv run pytest tests/test_converter.py
+# Run with coverage (if installed)
+uv run pytest --cov=ursaproxy
+```
+HTTP requests are mocked using [respx](https://github.com/lundberg/respx), so tests run completely offline.
+## How It Works
+1. **Feed Fetching**: Fetches RSS feed from `{BEARBLOG_URL}/feed/?type=rss`
+2. **HTML Fetching**: Fetches individual pages from `{BEARBLOG_URL}/{slug}/`
+3. **Conversion Pipeline**:
+   - Parse HTML with BeautifulSoup
+   - Extract content from `<main>` element
+   - Remove nav, footer, scripts, styles
+   - Convert to Markdown with markdownify
+   - Convert to Gemtext with md2gemini
+4. **Caching**: Feed and posts are cached with configurable TTLs
+5. **Serving**: Content served via Gemini protocol using Xitzin
+## License
+MIT

ursaproxy-0.1.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+ursaproxy/__init__.py,sha256=bO2ym_YzTMOUVBbqTjXWWI0ZQqRc81HnZ2o0JBkkZso,5777
+ursaproxy/cache.py,sha256=Q5cypE91xGte1ph-_NqcEwZ0NoT7_LszYT1DVpfd2bM,1205
+ursaproxy/config.py,sha256=mjyZPOtW7f_IhP4ukMj8TOHpyGaXfoSXmExYGv2Z-FI,1216
+ursaproxy/converter.py,sha256=FAW0fA7a3WtlPYMtZbDlsM0Gl7twXK73DAV911R7SPI,1955
+ursaproxy/fetcher.py,sha256=1Bsm96QYjnKQMorS2xwp9e3WRq8GbA6tKZrs1Z4ObOM,1591
+ursaproxy/templates/about.gmi,sha256=hJxK25i9uXr2geBo1HrdkLztqkebICDy-_bdnQ4jlGI,105
+ursaproxy/templates/feed.xml,sha256=0aiFNOfgHeMH0427LPc58pEf0NdvcDHIRa-x9tc_Ty8,597
+ursaproxy/templates/index.gmi,sha256=2J-1lQRcwoxr46bqasW969RPpo_X5n2sh9sVuXIMdFg,265
+ursaproxy/templates/post.gmi,sha256=DMPbqZl52v-G-6wmwbXh15kg8dYpTDUx3mfqkce-HhQ,133
+ursaproxy-0.1.2.dist-info/WHEEL,sha256=iHtWm8nRfs0VRdCYVXocAWFW8ppjHL-uTJkAdZJKOBM,80
+ursaproxy-0.1.2.dist-info/entry_points.txt,sha256=uM3A9bQS-p_6VhWbkFRtob2oN-SuBboJWS3ZwCjlAFk,46
+ursaproxy-0.1.2.dist-info/METADATA,sha256=S1WUJKOfwq-hNgMslrbIzqN53x9cOosPRrVAc9lUTqM,4781
+ursaproxy-0.1.2.dist-info/RECORD,,

ursaproxy-0.1.2.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: uv 0.9.30
+Root-Is-Purelib: true
+Tag: py3-none-any

ursaproxy-0.1.2.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,3 @@
+[console_scripts]
+ursaproxy = ursaproxy:main