PyPI - ssb-pubmd - Versions diffs - 0.0.19__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

ssb-pubmd 0.0.19py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

ssb_pubmd/__init__.py +3 -5
ssb_pubmd/__main__.py +7 -157
ssb_pubmd/adapters/content_parser.py +185 -0
ssb_pubmd/adapters/document_processor.py +149 -0
ssb_pubmd/adapters/publish_client.py +124 -0
ssb_pubmd/adapters/storage.py +42 -0
ssb_pubmd/cli.py +78 -0
ssb_pubmd/config.py +23 -0
ssb_pubmd/domain/document_publisher.py +46 -0
ssb_pubmd/notebook_client.py +130 -0
{ssb_pubmd-0.0.19.dist-info → ssb_pubmd-0.1.1.dist-info}/METADATA +19 -22
ssb_pubmd-0.1.1.dist-info/RECORD +16 -0
{ssb_pubmd-0.0.19.dist-info → ssb_pubmd-0.1.1.dist-info}/WHEEL +1 -1
ssb_pubmd-0.1.1.dist-info/entry_points.txt +3 -0
ssb_pubmd/browser_request_handler.py +0 -85
ssb_pubmd/constants.py +0 -22
ssb_pubmd/jwt_request_handler.py +0 -99
ssb_pubmd/markdown_syncer.py +0 -183
ssb_pubmd/request_handler.py +0 -56
ssb_pubmd-0.0.19.dist-info/RECORD +0 -13
ssb_pubmd-0.0.19.dist-info/entry_points.txt +0 -3
{ssb_pubmd-0.0.19.dist-info → ssb_pubmd-0.1.1.dist-info}/LICENSE +0 -0

ssb_pubmd/cli.py ADDED Viewed

@@ -0,0 +1,78 @@
+import subprocess
+import sys
+from pathlib import Path
+from watchfiles import watch
+from ssb_pubmd.adapters.content_parser import MimirContentParser
+from ssb_pubmd.adapters.document_processor import PandocDocumentProcessor
+from ssb_pubmd.adapters.publish_client import PublishClient
+from ssb_pubmd.adapters.publish_client import get_publish_client
+from ssb_pubmd.adapters.storage import LocalFileStorage
+from ssb_pubmd.config import Config
+from ssb_pubmd.domain.document_publisher import sync_document
+def run_cli(system_arguments: list[str], config: Config) -> None:
+    match system_arguments:
+        case [_, "preview", file_path]:
+            _preview(file_path, config)
+        case _:
+            print("Usage: ssb-pubmd preview QUARTO_MARKDOWN_FILE")
+            sys.exit(1)
+def _preview(file_path: str, config: Config) -> None:
+    if Path(file_path).suffix != ".qmd":
+        print("Only Quarto Markdown (.qmd) files are supported.")
+        sys.exit(1)
+    try:
+        print("Fetching labid token...")
+        publish_client = get_publish_client(config, use_dapla_token_client=True)
+    except Exception:
+        print("Failed to fetch labid token; using environment variable...")
+        publish_client = get_publish_client(config, use_dapla_token_client=False)
+    _sync_updated_file(file_path, publish_client)
+    print("Watching for file changes...")
+    for changes in watch(file_path):
+        _sync_updated_file(file_path, publish_client)
+def _sync_updated_file(file_path: str, publish_client: PublishClient) -> None:
+    print("Syncing updated document...")
+    try:
+        preview_url = _sync_quarto_file(file_path, publish_client)
+        print(f"Content synced successfully. Preview URL: {preview_url}")
+    except Exception as e:
+        print(f"Error during sync: {e}")
+def _sync_quarto_file(file_path: str, publish_client: PublishClient) -> str:
+    pandoc_document = _quarto_to_pandoc(file_path)
+    adapters = (
+        PandocDocumentProcessor(),
+        MimirContentParser(),
+        LocalFileStorage(project_folder=Path(file_path).parent),
+        publish_client,
+    )
+    return sync_document(pandoc_document, *adapters)
+def _quarto_to_pandoc(file_path: str) -> str:
+    result = subprocess.run(
+        [
+            "quarto",
+            "render",
+            file_path,
+            "--to",
+            "json",
+            "-M",
+            "include:false",
+            "--output",
+            "-",
+        ],
+        text=True,
+        capture_output=True,
+        check=True,
+    )
+    return result.stdout

ssb_pubmd/config.py ADDED Viewed

@@ -0,0 +1,23 @@
+"""App configuration through environment variables."""
+import os
+from dataclasses import dataclass
+from pathlib import Path
+APP_NAME = "SSB_PUBMD"
+@dataclass
+class Config:
+    publish_base_url: str
+    publish_endpoint: str
+    publish_preview_base_path: str
+def get_config(metadata_file_path: Path | None = None) -> Config:
+    """Get config from enviromnent variables."""
+    return Config(
+        publish_base_url=os.environ[f"{APP_NAME}_BASE_URL"],
+        publish_endpoint=os.environ[f"{APP_NAME}_ENDPOINT"],
+        publish_preview_base_path=os.environ[f"{APP_NAME}_PREVIEW_BASE_PATH"],
+    )

ssb_pubmd/domain/document_publisher.py ADDED Viewed

@@ -0,0 +1,46 @@
+from ssb_pubmd.adapters.content_parser import ContentParser
+from ssb_pubmd.adapters.document_processor import DocumentProcessor
+from ssb_pubmd.adapters.publish_client import PublishClient
+from ssb_pubmd.adapters.storage import Storage
+USER_KEY_PREFIX = "user:"
+DOCUMENT_KEY = "app:document"
+def sync_document(
+    raw_document_content: str,
+    document_processor: DocumentProcessor,
+    content_parser: ContentParser,
+    storage: Storage,
+    publish_client: PublishClient,
+) -> str:
+    document_processor.load(raw_document_content)
+    document_metadata = document_processor.extract_metadata(target_key="ssb")
+    document_metadata["content_type"] = "article"
+    document_publish_path = storage.get(DOCUMENT_KEY).get("publish_path")
+    if not document_publish_path:
+        content = content_parser.parse(metadata=document_metadata, html=None)
+        response = publish_client.send_content(content)
+        storage.update(
+            DOCUMENT_KEY,
+            {"publish_id": response.publish_id, "publish_path": response.publish_path},
+        )
+        document_publish_path = response.publish_path
+    document_elements = document_processor.extract_elements(target_class="ssb")
+    for id_, html in document_elements:
+        key = USER_KEY_PREFIX + id_
+        metadata = storage.get(key) | {"publish_folder": document_publish_path}
+        component = content_parser.parse(metadata, html)
+        response = publish_client.send_content(component)
+        storage.update(key, {"publish_id": response.publish_id})
+        document_processor.replace_element(id_, response.publish_html)
+    article_metadata = document_metadata | {
+        "publish_id": storage.get(DOCUMENT_KEY).get("publish_id")
+    }
+    html = document_processor.extract_html()
+    article = content_parser.parse(metadata=article_metadata, html=html)
+    response = publish_client.send_content(article)
+    return response.publish_url

ssb_pubmd/notebook_client.py ADDED Viewed

@@ -0,0 +1,130 @@
+from pathlib import Path
+from typing import Literal
+import narwhals as nw
+from narwhals.typing import IntoDataFrame
+from ssb_pubmd.adapters.content_parser import Content
+from ssb_pubmd.adapters.content_parser import ContentParser
+from ssb_pubmd.adapters.content_parser import MimirContentParser
+from ssb_pubmd.adapters.storage import LocalFileStorage
+from ssb_pubmd.adapters.storage import Storage
+from ssb_pubmd.domain.document_publisher import USER_KEY_PREFIX
+STORAGE: Storage = LocalFileStorage(project_folder=Path.cwd())
+CONTENT_PARSER: ContentParser = MimirContentParser()
+class NotebookClientError(Exception): ...
+def configure_factbox(
+    key: str,
+    title: str,
+    display_type: Literal["default", "sneakPeek", "aiIcon"] = "default",
+) -> None:
+    """Oppretter en faktaboks og printer en Markdown-snippet som kan limes inn i artikkelen (på en ny linje).
+    :param key: En unik nøkkel for innholdet.
+    :param title: Tittelen til faktaboksen.
+    :param display_type: Visning av faktaboksen.
+        Alternativer:
+        * "default": Bare tittel (standard)
+        * "sneakPeek": Tittel og litt av forklaringsteksten
+        * "aiIcon": Tittel og litt av forklaringsteksten + KI-ikon
+    """
+    metadata = {
+        "content_type": "factBox",
+        "title": title,
+        "display_type": display_type,
+    }
+    content = CONTENT_PARSER.parse(
+        metadata=metadata,
+        html=None,
+    )
+    _store_user_content(user_key=key, content=content)
+    md = _get_markdown_snippet(key, placeholder_text="Faktaboksens tekst skrives her.")
+    print(md)
+def create_highchart(
+    key: str,
+    title: str,
+    dataframe: IntoDataFrame | None = None,
+    tbml: str | None = None,
+    graph_type: Literal["line", "pie", "column", "bar", "area", "barNegative"] = "line",
+    xlabel: str = "x",
+    ylabel: str = "y"
+) -> None:
+    """Oppretter et highchart og printer en Markdown-snippet som kan limes inn i artikkelen (på en ny linje).
+    Som datakilde er det nødvendig å spesifisere enten `dataframe` eller `tbml`.
+    :param key: En unik nøkkel for innholdet.
+    :param title: Tittelen til highchartet.
+    :param dataframe: En pandas, Polars eller PyArrow dataframe.
+    :param tbml: URL eller TBML-id.
+    :param graph_type: Graftype.
+        Alternativer:
+        * "line": Linje (standard)
+        * "pie": Kake
+        * "column": Stolpe
+        * "bar": Liggende stolpe
+        * "area": Areal
+        * "barNegative": Pyramide
+    :param xlabel: X-akse, tittel.
+    :param ylabel: Y-akse, tittel.
+    """
+    if dataframe is None and tbml is None:
+        raise NotebookClientError("Either 'dataframe' or 'tbml' must be specified.")
+    metadata = {
+        "content_type": "highchart",
+        "title": title,
+        "graph_type": graph_type,
+        "xlabel": xlabel,
+        "ylabel": ylabel
+    }
+    if tbml is not None:
+        metadata["tbml"] = tbml
+    html = _dataframe_to_html_table(dataframe) if dataframe is not None else None
+    content = CONTENT_PARSER.parse(metadata, html)
+    _store_user_content(user_key=key, content=content)
+    md = _get_markdown_snippet(key)
+    print(md)
+def _store_user_content(user_key: str, content: Content) -> None:
+    key = USER_KEY_PREFIX + str(user_key)
+    STORAGE.update(key, content.to_dict())
+def _dataframe_to_html_table(dataframe: IntoDataFrame) -> str:
+    df = nw.from_native(dataframe)
+    html = "<table><tbody>\n"
+    html += "<tr>\n"
+    for name in df.columns:
+        html += f"    <td>{name}</td>\n"
+    html += "</tr>\n"
+    for row in df.iter_rows():
+        html += "  <tr>\n"
+        for value in row:
+            html += f"    <td>{value}</td>\n"
+        html += "  </tr>\n"
+    html += "</tbody></table>"
+    return html
+def _get_markdown_snippet(key: str, placeholder_text: str | None = None) -> str:
+    div_config = f"{{ #{key} .ssb }}"
+    div_content = f"\n{placeholder_text}\n\n" if placeholder_text is not None else ""
+    return f"::: {div_config}\n{div_content}:::"

{ssb_pubmd-0.0.19.dist-info → ssb_pubmd-0.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,27 +1,27 @@
 Metadata-Version: 2.3
 Name: ssb-pubmd
-Version: 0.0.19
+Version: 0.1.1
 Summary: SSB Pubmd
 License: MIT
 Author: Olav Landsverk
 Author-email: stud-oll@ssb.no
 Requires-Python: >=3.10,<4.0
-Classifier: Development Status :: 1 - Planning
+Classifier: Development Status :: 3 - Alpha
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
-Requires-Dist: click (>=8.0.1)
-Requires-Dist: cryptography (>=45.0.4,<46.0.0)
-Requires-Dist: google-cloud-secret-manager (>=2.24.0,<3.0.0)
+Requires-Dist: dapla-auth-client (>=1.2.5,<2.0.0)
+Requires-Dist: ipynbname (>=2025.8.0.0,<2026.0.0.0)
+Requires-Dist: narwhals (>=2.15.0,<3.0.0)
 Requires-Dist: nbformat (>=5.10.4,<6.0.0)
-Requires-Dist: platformdirs (>=4.3.8,<5.0.0)
-Requires-Dist: playwright (>=1.51.0,<2.0.0)
-Requires-Dist: pyjwt (>=2.10.1,<3.0.0)
-Requires-Dist: requests (>=2.32.3,<3.0.0)
-Requires-Dist: types-requests (>=2.32.0.20250306,<3.0.0.0)
+Requires-Dist: nh3 (>=0.3.2,<0.4.0)
+Requires-Dist: pandocfilters (>=1.5.1,<2.0.0)
+Requires-Dist: pydantic (>=2.12.5,<3.0.0)
+Requires-Dist: requests (>=2.32.4,<3.0.0)
+Requires-Dist: watchfiles (>=1.1.1,<2.0.0)
 Project-URL: Changelog, https://github.com/statisticsnorway/ssb-pubmd/releases
 Project-URL: Documentation, https://statisticsnorway.github.io/ssb-pubmd
 Project-URL: Homepage, https://github.com/statisticsnorway/ssb-pubmd
@@ -55,28 +55,25 @@ Description-Content-Type: text/markdown
 [black]: https://github.com/psf/black
 [poetry]: https://python-poetry.org/
+## Features
-## Installation
+- TODO
-Installation with pip:
+## Requirements
-```console
-pip install ssb-pubmd
-```
+- TODO
+## Installation
-If you need to create a logged-in browser context, you will also need to install a [Playwright browser](https://playwright.dev/python/docs/browsers#install-browsers):
+You can install _SSB Pubmd_ via [pip] from [PyPI]:
 ```console
-playwright install --with-deps chromium
+pip install ssb-pubmd
 ```
 ## Usage
-Run the main command in a terminal to see available subcommands with documentation:
-```console
-pubmd
-```
+Please see the [Reference Guide] for details.
 ## Contributing

ssb_pubmd-0.1.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+ssb_pubmd/__init__.py,sha256=GmZebzEEIJcwwYD6_J5irpY5-lGAKFr8lRrTGB_nAPA,170
+ssb_pubmd/__main__.py,sha256=7Trn-DZkNbVn6s5J8FVg9JFtpFEHv-4VKZ34MVs23Cc,204
+ssb_pubmd/adapters/content_parser.py,sha256=ExOULfwoFBVb9h2wH8m61fWFy192ZEIG2LI3fuFRpbE,5141
+ssb_pubmd/adapters/document_processor.py,sha256=GN4FJmWcyiCdTBzWkyUReFGd7AME-F4Eq5N4JmarddQ,4271
+ssb_pubmd/adapters/publish_client.py,sha256=mBRfOEEcrmKlDHJVsaqQR984XlJel9oBK4PGP-GhriE,3451
+ssb_pubmd/adapters/storage.py,sha256=Dexfgw0csQ9wljC6lqf9kFmoM2CHdfMghm-qBrgdWjM,1227
+ssb_pubmd/cli.py,sha256=dusmoCX3U6Lpc_uSqqgCRC0U0m8fRq48ExdsYBXRtr4,2555
+ssb_pubmd/config.py,sha256=chnW-GC5Ie5kEcjVb-4_a5_Vq6glhATorDVIghc50SI,606
+ssb_pubmd/domain/document_publisher.py,sha256=hgzJx9kGZJOVLrgFRg-JGAmNsasvHA9_BA1g3htcWrQ,1920
+ssb_pubmd/notebook_client.py,sha256=MANyeyIdTuci8b0flD9e3jgFnEwx5dGo4zLMp39tkRE,4007
+ssb_pubmd/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ssb_pubmd-0.1.1.dist-info/LICENSE,sha256=tF5bnYv09fgH5ph9t1EpH1MGrVOGTQeswL4dzVeZ_ak,1073
+ssb_pubmd-0.1.1.dist-info/METADATA,sha256=z_bEnr4p9KiPhscq4X5r8mHmn34Wambsacj7DLlp_bA,4101
+ssb_pubmd-0.1.1.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
+ssb_pubmd-0.1.1.dist-info/entry_points.txt,sha256=o4oU99zbZNIBKGYWdgdEG6ev-62ZRWEJOe7EOjJaajk,53
+ssb_pubmd-0.1.1.dist-info/RECORD,,

{ssb_pubmd-0.0.19.dist-info → ssb_pubmd-0.1.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.1.3
+Generator: poetry-core 2.1.1
 Root-Is-Purelib: true
 Tag: py3-none-any

ssb_pubmd-0.1.1.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,3 @@
+[console_scripts]
+ssb-pubmd=ssb_pubmd.__main__:main

ssb_pubmd/browser_request_handler.py DELETED Viewed

@@ -1,85 +0,0 @@
-from collections.abc import Iterator
-from contextlib import contextmanager
-from enum import Enum
-from pathlib import Path
-from playwright.sync_api import BrowserContext
-from playwright.sync_api import sync_playwright
-from ssb_pubmd.request_handler import Response
-class CreateContextMethod(Enum):
-    """The method used to create the browser context.
-    Can be either from a file containing the context data,
-        or from a login popup window.
-    """
-    FROM_FILE = "from_file"
-    FROM_LOGIN = "from_login"
-class BrowserRequestHandler:
-    """This class is used to create a logged in browser context from which to send requests."""
-    def __init__(self, context_file_path: Path, login_url: str) -> None:
-        """Initializes an empty browser context object."""
-        self._context_file_path: Path = context_file_path
-        self._login_url: str = login_url
-        self._context: BrowserContext | None = None
-    @contextmanager
-    def new_context(
-        self, method: CreateContextMethod = CreateContextMethod.FROM_FILE
-    ) -> Iterator[BrowserContext]:
-        """Wrapper around playwright's context manager.
-        The default is to create a new context from a file.
-        If `from_file` is  set False, a new context is created through a browser popup with user login,
-            and the context is saved to a file.
-        """
-        with sync_playwright() as playwright:
-            browser = playwright.chromium.launch(headless=False)
-            match method:
-                case CreateContextMethod.FROM_FILE:
-                    self._context = browser.new_context(
-                        storage_state=self._context_file_path
-                    )
-                case CreateContextMethod.FROM_LOGIN:
-                    self._context = browser.new_context()
-                    login_page = self._context.new_page()
-                    login_page.goto(self._login_url)
-                    login_page.wait_for_event("close", timeout=0)
-                    self._context.storage_state(path=self._context_file_path)
-            yield self._context
-            self._context.close()
-            browser.close()
-    def send_request(
-        self,
-        url: str,
-        headers: dict[str, str] | None = None,
-        data: dict[str, str] | None = None,
-    ) -> Response:
-        """Sends a request to the specified url, optionally with headers and data, within the browser context."""
-        if self._context is None:
-            raise ValueError("Browser context has not been created.")
-        api_response = self._context.request.post(
-            url,
-            data=data,
-        )
-        try:
-            body = api_response.json()
-            body = dict(body)
-        except Exception:
-            body = {}
-        response = Response(
-            status_code=api_response.status,
-            body=body,
-        )
-        return response

ssb_pubmd/constants.py DELETED Viewed

@@ -1,22 +0,0 @@
-from enum import Enum
-from platformdirs import user_cache_path
-from platformdirs import user_config_path
-from platformdirs import user_data_path
-APP_NAME = "pubmd"
-METADATA_FILE = user_data_path(APP_NAME, ensure_exists=True) / "metadata.json"
-CACHE_FILE = user_cache_path(APP_NAME, ensure_exists=True) / "cache.json"
-CONFIG_FILE = user_config_path(APP_NAME, ensure_exists=True) / "config.json"
-CACHE_FILE.touch()
-CONFIG_FILE.touch()
-METADATA_FILE.touch()
-class ContentType(Enum):
-    """Allowed content types."""
-    MARKDOWN = ".md"
-    NOTEBOOK = ".ipynb"

ssb_pubmd/jwt_request_handler.py DELETED Viewed

@@ -1,99 +0,0 @@
-import json
-from dataclasses import dataclass
-from datetime import datetime
-import jwt
-import requests
-from google.cloud import secretmanager
-from ssb_pubmd.request_handler import Response
-TYPE = "JWT"
-ALGORITHM = "RS256"
-@dataclass
-class SecretData:
-    """Data class to hold private key and connected data."""
-    private_key: str
-    kid: str
-    principal_key: str
-class JWTRequestHandler:
-    """This class is used to send requests with a JSON Web Token (JWT) in the header."""
-    def __init__(self, gc_secret_resource_name: str) -> None:
-        """Initializes a JWT request handler object."""
-        self._gc_secret_resource_name: str = gc_secret_resource_name
-    def _private_key_from_secret_manager(self) -> SecretData:
-        """Fetches the private key from Google Cloud Secret Manager."""
-        client = secretmanager.SecretManagerServiceClient()
-        print(f"Fetching secret from {self._gc_secret_resource_name}")
-        response = client.access_secret_version(name=self._gc_secret_resource_name)
-        raw_data = response.payload.data.decode("UTF-8")
-        data = json.loads(raw_data)
-        try:
-            secret_data = SecretData(
-                private_key=data["privateKey"],
-                kid=data["kid"],
-                principal_key=data["principalKey"],
-            )
-        except KeyError as e:
-            raise ValueError(
-                "The secret must be a JSON object with keys 'privateKey', 'kid' and 'principalKey'."
-            ) from e
-        return secret_data
-    def _generate_token(self) -> str:
-        secret_data = self._private_key_from_secret_manager()
-        header = {
-            "kid": secret_data.kid,
-            "typ": TYPE,
-            "alg": ALGORITHM,
-        }
-        iat = int(datetime.now().timestamp())
-        exp = iat + 30
-        payload = {
-            "sub": secret_data.principal_key,
-            "iat": iat,
-            "exp": exp,
-        }
-        token = jwt.encode(
-            payload, secret_data.private_key, algorithm=ALGORITHM, headers=header
-        )
-        return token
-    def send_request(
-        self,
-        url: str,
-        headers: dict[str, str] | None = None,
-        data: dict[str, str] | None = None,
-    ) -> Response:
-        """Sends the request to the specified url with bearer token in header."""
-        token = self._generate_token()
-        headers = {
-            "Authorization": f"Bearer {token}",
-            "Content-Type": "application/json",
-        }
-        response = requests.post(
-            url,
-            headers=headers,
-            json=data,
-        )
-        try:
-            body = response.json()
-            body = dict(body)
-        except Exception:
-            body = {}
-        return Response(
-            status_code=response.status_code,
-            body=body,
-        )

ssb-pubmd 0.0.19__py3-none-any.whl → 0.1.1__py3-none-any.whl

ssb-pubmd 0.0.19py3-none-any.whl → 0.1.1py3-none-any.whl