ssb-pubmd 0.0.19__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,27 +1,24 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ssb-pubmd
3
- Version: 0.0.19
3
+ Version: 0.1.0
4
4
  Summary: SSB Pubmd
5
5
  License: MIT
6
6
  Author: Olav Landsverk
7
7
  Author-email: stud-oll@ssb.no
8
8
  Requires-Python: >=3.10,<4.0
9
- Classifier: Development Status :: 1 - Planning
9
+ Classifier: Development Status :: 3 - Alpha
10
10
  Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
- Requires-Dist: click (>=8.0.1)
17
- Requires-Dist: cryptography (>=45.0.4,<46.0.0)
16
+ Requires-Dist: cryptography (>=45.0.5,<46.0.0)
18
17
  Requires-Dist: google-cloud-secret-manager (>=2.24.0,<3.0.0)
19
18
  Requires-Dist: nbformat (>=5.10.4,<6.0.0)
20
- Requires-Dist: platformdirs (>=4.3.8,<5.0.0)
21
- Requires-Dist: playwright (>=1.51.0,<2.0.0)
22
19
  Requires-Dist: pyjwt (>=2.10.1,<3.0.0)
23
- Requires-Dist: requests (>=2.32.3,<3.0.0)
24
- Requires-Dist: types-requests (>=2.32.0.20250306,<3.0.0.0)
20
+ Requires-Dist: requests (>=2.32.4,<3.0.0)
21
+ Requires-Dist: types-requests (>=2.32.4.20250611,<3.0.0.0)
25
22
  Project-URL: Changelog, https://github.com/statisticsnorway/ssb-pubmd/releases
26
23
  Project-URL: Documentation, https://statisticsnorway.github.io/ssb-pubmd
27
24
  Project-URL: Homepage, https://github.com/statisticsnorway/ssb-pubmd
@@ -55,28 +52,25 @@ Description-Content-Type: text/markdown
55
52
  [black]: https://github.com/psf/black
56
53
  [poetry]: https://python-poetry.org/
57
54
 
55
+ ## Features
58
56
 
59
- ## Installation
57
+ - TODO
60
58
 
61
- Installation with pip:
59
+ ## Requirements
62
60
 
63
- ```console
64
- pip install ssb-pubmd
65
- ```
61
+ - TODO
62
+
63
+ ## Installation
66
64
 
67
- If you need to create a logged-in browser context, you will also need to install a [Playwright browser](https://playwright.dev/python/docs/browsers#install-browsers):
65
+ You can install _SSB Pubmd_ via [pip] from [PyPI]:
68
66
 
69
67
  ```console
70
- playwright install --with-deps chromium
68
+ pip install ssb-pubmd
71
69
  ```
72
70
 
73
71
  ## Usage
74
72
 
75
- Run the main command in a terminal to see available subcommands with documentation:
76
-
77
- ```console
78
- pubmd
79
- ```
73
+ Please see the [Reference Guide] for details.
80
74
 
81
75
  ## Contributing
82
76
 
@@ -25,28 +25,25 @@
25
25
  [black]: https://github.com/psf/black
26
26
  [poetry]: https://python-poetry.org/
27
27
 
28
+ ## Features
28
29
 
29
- ## Installation
30
+ - TODO
30
31
 
31
- Installation with pip:
32
+ ## Requirements
32
33
 
33
- ```console
34
- pip install ssb-pubmd
35
- ```
34
+ - TODO
35
+
36
+ ## Installation
36
37
 
37
- If you need to create a logged-in browser context, you will also need to install a [Playwright browser](https://playwright.dev/python/docs/browsers#install-browsers):
38
+ You can install _SSB Pubmd_ via [pip] from [PyPI]:
38
39
 
39
40
  ```console
40
- playwright install --with-deps chromium
41
+ pip install ssb-pubmd
41
42
  ```
42
43
 
43
44
  ## Usage
44
45
 
45
- Run the main command in a terminal to see available subcommands with documentation:
46
-
47
- ```console
48
- pubmd
49
- ```
46
+ Please see the [Reference Guide] for details.
50
47
 
51
48
  ## Contributing
52
49
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ssb-pubmd"
3
- version = "0.0.19"
3
+ version = "0.1.0"
4
4
  description = "SSB Pubmd"
5
5
  authors = ["Olav Landsverk <stud-oll@ssb.no>"]
6
6
  license = "MIT"
@@ -8,28 +8,23 @@ readme = "README.md"
8
8
  homepage = "https://github.com/statisticsnorway/ssb-pubmd"
9
9
  repository = "https://github.com/statisticsnorway/ssb-pubmd"
10
10
  documentation = "https://statisticsnorway.github.io/ssb-pubmd"
11
- classifiers = ["Development Status :: 1 - Planning"]
11
+ classifiers = ["Development Status :: 3 - Alpha"]
12
12
 
13
13
  [tool.poetry.urls]
14
14
  Changelog = "https://github.com/statisticsnorway/ssb-pubmd/releases"
15
15
 
16
16
  [tool.poetry.dependencies]
17
17
  python = "^3.10"
18
- click = ">=8.0.1"
19
- nbformat = "^5.10.4"
20
- requests = "^2.32.3"
21
- types-requests = "^2.32.0.20250306"
22
- playwright = "^1.51.0"
23
- platformdirs = "^4.3.8"
24
- pyjwt = "^2.10.1"
18
+ requests = "^2.32.4"
19
+ types-requests = "^2.32.4.20250611"
25
20
  google-cloud-secret-manager = "^2.24.0"
26
- cryptography = "^45.0.4"
21
+ pyjwt = "^2.10.1"
22
+ cryptography = "^45.0.5"
23
+ nbformat = "^5.10.4"
27
24
 
28
25
  [tool.poetry.group.dev.dependencies]
29
26
  pygments = ">=2.10.0"
30
- black = { extras = ["jupyter"], version = ">=23.1.0" }
31
27
  coverage = { extras = ["toml"], version = ">=6.2" }
32
- darglint = ">=1.8.1"
33
28
  furo = ">=2021.11.12"
34
29
  mypy = ">=0.930"
35
30
  pre-commit = ">=2.16.0"
@@ -43,18 +38,15 @@ sphinx-click = ">=3.0.2"
43
38
  typeguard = ">=2.13.3"
44
39
  xdoctest = { extras = ["colors"], version = ">=0.15.10" }
45
40
  myst-parser = { version = ">=0.16.1" }
46
- uuid = "^1.30"
47
- ipykernel = "^6.29.5"
48
- pytest-asyncio = "^0.26.0"
49
41
 
50
42
  [tool.pytest.ini_options]
51
- pythonpath = ["src"]
43
+ pythonpath = ["ssb_pubmd"]
52
44
 
53
45
  [tool.poetry.scripts]
54
- pubmd = "ssb_pubmd.__main__:cli"
46
+ ssb-pubmd = "ssb_pubmd.__main__:main"
55
47
 
56
48
  [tool.coverage.paths]
57
- source = ["src", "*/site-packages"]
49
+ source = ["ssb_pubmd", "*/site-packages"]
58
50
  tests = ["tests", "*/tests"]
59
51
 
60
52
  [tool.coverage.run]
@@ -72,6 +64,7 @@ warn_unreachable = true
72
64
  pretty = true
73
65
  show_column_numbers = true
74
66
  show_error_context = true
67
+ explicit_package_bases = true
75
68
 
76
69
  [tool.ruff]
77
70
  force-exclude = true # Apply excludes to pre-commit
@@ -93,7 +86,6 @@ select = [
93
86
  "A", # prevent using keywords that clobber python builtins
94
87
  "ANN", # check type annotations
95
88
  "B", # bugbear: security warnings
96
- "D", # documentation
97
89
  "E", # pycodestyle
98
90
  "F", # pyflakes
99
91
  "ISC", # implicit string concatenation
@@ -0,0 +1,24 @@
1
+ import sys
2
+
3
+ from ssb_pubmd.adapters.cli import CliAdapter
4
+ from ssb_pubmd.adapters.cms_client import MimirCmsClient
5
+ from ssb_pubmd.adapters.local_storage import LocalStorageAdapter
6
+ from ssb_pubmd.adapters.secret_manager_client import GoogleSecretManagerClient
7
+ from ssb_pubmd.config import get_config
8
+ from ssb_pubmd.enonic_cms_manager import EnonicCmsManager
9
+
10
+
11
+ def main() -> None:
12
+ config = get_config()
13
+ cms_manager = EnonicCmsManager(
14
+ config=config,
15
+ cms_client=MimirCmsClient(config.cms_base_url),
16
+ secret_manager_client=GoogleSecretManagerClient(config.gc_secret_resource_name),
17
+ content_file_handler=LocalStorageAdapter(config.metadata_file_path),
18
+ )
19
+ cli_adapter = CliAdapter(cms_manager=cms_manager)
20
+ cli_adapter.run(sys.argv)
21
+
22
+
23
+ if __name__ == "__main__":
24
+ main()
@@ -0,0 +1,21 @@
1
+ import sys
2
+ from dataclasses import dataclass
3
+
4
+ from ssb_pubmd.ports import CmsManager
5
+
6
+
7
+ @dataclass
8
+ class CliAdapter:
9
+ cms_manager: CmsManager
10
+
11
+ def run(self, system_arguments: list[str]) -> None:
12
+ match system_arguments:
13
+ case [_, "sync", file_path]:
14
+ preview = self.cms_manager.sync(file_path)
15
+ print(f"Preview URL: {preview}")
16
+ # except Exception as e:
17
+ # print(f"Error during sync: {e}")
18
+ # sys.exit(1)
19
+ case _:
20
+ print("Usage: ssb-pubmd sync <content_file_path>")
21
+ sys.exit(1)
@@ -0,0 +1,47 @@
1
+ from urllib.parse import urlparse
2
+
3
+ import requests
4
+
5
+ from ssb_pubmd.models import CmsResponse
6
+ from ssb_pubmd.models import Content
7
+
8
+
9
+ class CmsClientError(Exception): ...
10
+
11
+
12
+ class MimirCmsClient:
13
+ base_url: str
14
+
15
+ def __init__(self, base_url: str) -> None:
16
+ self.base_url = base_url
17
+
18
+ def _convert_preview_url(self, url_from_response: str) -> str:
19
+ """Convert the preview URL to a full URL if it's relative."""
20
+ url = urlparse(url_from_response)
21
+ if url.scheme and url.netloc:
22
+ return url.geturl()
23
+ else:
24
+ return urlparse(self.base_url)._replace(path=url.path).geturl()
25
+
26
+ def send(self, token: str, content: Content) -> CmsResponse:
27
+ """Sends a request to the Enonic CMS, assumed to have the mimir application installed (currently this only works with the feature branch https://github.com/statisticsnorway/mimir/pull/3192)."""
28
+ try:
29
+ response = requests.post(
30
+ f"{self.base_url}/_/service/mimir/postMarkdown",
31
+ headers={
32
+ "Authorization": f"Bearer {token}",
33
+ "Content-Type": "application/json",
34
+ },
35
+ json=content.to_json(),
36
+ )
37
+ if response.status_code != 200:
38
+ raise CmsClientError(
39
+ f"Request to CMS failed with status code {response.status_code}."
40
+ )
41
+ body = response.json()
42
+ return CmsResponse(
43
+ id=body["_id"],
44
+ preview_url=self._convert_preview_url(body["previewPath"]),
45
+ )
46
+ except Exception as e:
47
+ raise CmsClientError("Request to CMS failed.") from e
@@ -0,0 +1,72 @@
1
+ import json
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+
5
+ import nbformat
6
+
7
+ from ssb_pubmd.models import Content
8
+
9
+ ID_KEY = "_id"
10
+
11
+
12
+ class LocalStorageError(Exception): ...
13
+
14
+
15
+ @dataclass
16
+ class LocalStorageAdapter:
17
+ metadata_file_path: Path
18
+
19
+ def get_file_id(self, file_path: Path) -> str:
20
+ """
21
+ Returns the content id of a given file path.
22
+ If no id is registered for the given file path,
23
+ it returns an empty string.
24
+ """
25
+ with open(self.metadata_file_path) as metadata_file:
26
+ metadata = json.load(metadata_file)
27
+ return str(metadata.get(str(file_path.absolute()), {}).get(ID_KEY, ""))
28
+
29
+ def set_file_id(self, file_path: Path, content_id: str) -> None:
30
+ """Stores a given file's content id in the metadata file"""
31
+ with open(self.metadata_file_path) as metadata_file:
32
+ metadata = json.load(metadata_file)
33
+ metadata[str(file_path.absolute())] = {ID_KEY: content_id}
34
+ with open(self.metadata_file_path, "w") as metadata_file:
35
+ json.dump(metadata, metadata_file)
36
+
37
+ def get_content(self, file_path: Path) -> Content:
38
+ """
39
+ Returns the markdown content of a given file.
40
+ If the file is neither a .md or a .ipynb file,
41
+ this function will throw a `LocalStorageError`
42
+ """
43
+ return Content(
44
+ content_id=self.get_file_id(file_path),
45
+ file_path=file_path,
46
+ markdown=self._get_content(file_path),
47
+ )
48
+
49
+ def _get_content_from_notebook_file(self, file_path: Path) -> str:
50
+ """Extracts all markdown cells from the notebook and returns it as a string."""
51
+ notebook = nbformat.read(file_path, as_version=nbformat.NO_CONVERT) # type: ignore
52
+ markdown_cells = []
53
+ for cell in notebook.cells:
54
+ if cell.cell_type == "markdown":
55
+ markdown_cells.append(cell.source)
56
+ sep = "\n\n"
57
+ return sep.join(markdown_cells)
58
+
59
+ def _get_content_from_markdown_file(self, file_path: Path) -> str:
60
+ """Returns the content of a markdown file as a string."""
61
+ with open(file_path) as file:
62
+ return file.read()
63
+
64
+ def _get_content(self, file_path: Path) -> str:
65
+ file_type = file_path.suffix
66
+ match file_type:
67
+ case ".md":
68
+ return self._get_content_from_markdown_file(file_path)
69
+ case ".ipynb":
70
+ return self._get_content_from_notebook_file(file_path)
71
+ case _:
72
+ raise LocalStorageError(f"Unsupported file type: {file_type}")
@@ -0,0 +1,66 @@
1
+ """This module handles HTTP requests and responses to and from the CMS."""
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from datetime import datetime
6
+
7
+ import jwt
8
+ from google.cloud import secretmanager
9
+
10
+
11
+ @dataclass
12
+ class Secret:
13
+ private_key: str
14
+ kid: str
15
+ principal_key: str
16
+
17
+
18
+ class SecretManagerError(Exception): ...
19
+
20
+
21
+ class GoogleSecretManagerClient:
22
+ TYPE = "JWT"
23
+ ALGORITHM = "RS256"
24
+ _gc_secret_resource_name: str
25
+
26
+ def __init__(self, gc_secret_resource_name: str) -> None:
27
+ self._gc_secret_resource_name = gc_secret_resource_name
28
+
29
+ def _get_secret(self) -> Secret:
30
+ """Fetches the private key and related data from Google Cloud Secret Manager."""
31
+ client = secretmanager.SecretManagerServiceClient()
32
+ response = client.access_secret_version(name=self._gc_secret_resource_name)
33
+ raw_data = response.payload.data.decode("UTF-8")
34
+ data = json.loads(raw_data)
35
+ try:
36
+ return Secret(
37
+ private_key=data["privateKey"],
38
+ kid=data["kid"],
39
+ principal_key=data["principalKey"],
40
+ )
41
+ except KeyError as e:
42
+ raise SecretManagerError(
43
+ "The secret must be a JSON object with keys 'privateKey', 'kid' and 'principalKey'."
44
+ ) from e
45
+
46
+ def generate_token(self) -> str:
47
+ secret = self._get_secret()
48
+
49
+ header = {
50
+ "kid": secret.kid,
51
+ "typ": self.TYPE,
52
+ "alg": self.ALGORITHM,
53
+ }
54
+
55
+ iat = int(datetime.now().timestamp())
56
+ exp = iat + 30
57
+ payload = {
58
+ "sub": secret.principal_key,
59
+ "iat": iat,
60
+ "exp": exp,
61
+ }
62
+
63
+ token = jwt.encode(
64
+ payload, secret.private_key, algorithm=self.ALGORITHM, headers=header
65
+ )
66
+ return token
@@ -0,0 +1,27 @@
1
+ import os
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+
5
+ APP_NAME = "SSB_PUBMD"
6
+
7
+
8
+ @dataclass
9
+ class Config:
10
+ metadata_file_path: Path
11
+ cms_base_url: str
12
+ gc_secret_resource_name: str
13
+
14
+
15
+ def get_config(metadata_file_path: Path | None = None) -> Config:
16
+ user_data_dir = Path.home() / ".local" / "share" / APP_NAME
17
+ if not metadata_file_path:
18
+ user_data_dir.mkdir(parents=True, exist_ok=True)
19
+ metadata_file_path = user_data_dir / "metadata.json"
20
+ if not metadata_file_path.exists():
21
+ with open(metadata_file_path, "x") as metadata_file:
22
+ metadata_file.write("{}\n")
23
+ return Config(
24
+ metadata_file_path=metadata_file_path,
25
+ cms_base_url=os.environ[f"{APP_NAME}_BASE_URL"],
26
+ gc_secret_resource_name=os.environ[f"{APP_NAME}_GC_SECRET_RESOURCE_NAME"],
27
+ )
@@ -0,0 +1,30 @@
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+
4
+ from ssb_pubmd.config import Config
5
+ from ssb_pubmd.ports import CmsClient
6
+ from ssb_pubmd.ports import ContentFileHandler
7
+ from ssb_pubmd.ports import SecretManagerClient
8
+
9
+
10
+ @dataclass
11
+ class EnonicCmsManager:
12
+ """A CMS Mananager tailored to the Enonic CMS."""
13
+
14
+ config: Config
15
+ cms_client: CmsClient
16
+ secret_manager_client: SecretManagerClient
17
+ content_file_handler: ContentFileHandler
18
+
19
+ def sync(self, content_file_path: str) -> str:
20
+ """Requests that Enonic stores/updates the given contant file and gives back a rendered preview.
21
+
22
+ The details of the communication are handled by the CmsClient implementation, which in turn depends on the services that are exposed by the Enonic XP application. The only thing this class cares is that it receives a CmsResponse object, which contains an id and preview url of the content.
23
+ """
24
+ content = self.content_file_handler.get_content(Path(content_file_path))
25
+ response = self.cms_client.send(
26
+ token=self.secret_manager_client.generate_token(),
27
+ content=content,
28
+ )
29
+ self.content_file_handler.set_file_id(content.file_path, response.id)
30
+ return response.preview_url
@@ -0,0 +1,28 @@
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+
4
+
5
+ @dataclass
6
+ class CmsResponse:
7
+ id: str
8
+ preview_url: str
9
+
10
+
11
+ @dataclass
12
+ class Content:
13
+ content_id: str
14
+ file_path: Path
15
+ markdown: str
16
+
17
+ @property
18
+ def display_name(self) -> str:
19
+ """Generate a display name for the content."""
20
+ return self.file_path.stem.replace("_", " ").title()
21
+
22
+ def to_json(self) -> dict[str, str]:
23
+ """Returns a json representation of the content."""
24
+ return {
25
+ "_id": self.content_id,
26
+ "displayName": self.display_name,
27
+ "markdown": self.markdown,
28
+ }
@@ -0,0 +1,57 @@
1
+ from pathlib import Path
2
+ from typing import Protocol
3
+
4
+ from ssb_pubmd.models import CmsResponse
5
+ from ssb_pubmd.models import Content
6
+
7
+
8
+ class CmsManager(Protocol):
9
+ """An interface (primary port) for managing a CMS.
10
+
11
+ An implementing class is tailored to a specific CMS application.
12
+ """
13
+
14
+ def sync(self, content_file_path: str) -> str:
15
+ """Requests that the CMS stores and renders the given content file.
16
+
17
+ :param content_file_path: The path to the content file to be rendered.
18
+ :return: A URL to the rendered preview.
19
+ """
20
+ ...
21
+
22
+
23
+ class CmsClient(Protocol):
24
+ """An interface (secondary port) for communicating with a CMS.
25
+
26
+ An implementing class is tailored to a specific CMS, with its specific exposed services and token authentication flow.
27
+ """
28
+
29
+ def send(self, token: str, content: Content) -> CmsResponse: ...
30
+
31
+
32
+ class ContentFileHandler(Protocol):
33
+ """An interface (secondary port) for handling content files, including extraction and handling of metadata."""
34
+
35
+ def get_file_id(self, file_path: Path) -> str:
36
+ """
37
+ Returns the id of a given file path.
38
+ If no id is registered for the given file path,
39
+ it returns an empty string.
40
+ """
41
+ ...
42
+
43
+ def set_file_id(self, file_path: Path, content_id: str) -> None:
44
+ """Sets the id of a given file path."""
45
+ ...
46
+
47
+ def get_content(self, file_path: Path) -> Content:
48
+ """
49
+ Extracts the content of a given file.
50
+ """
51
+ ...
52
+
53
+
54
+ class SecretManagerClient(Protocol):
55
+ """An interface (secondary port) for communicating with a secret manager and generating tokens."""
56
+
57
+ def generate_token(self) -> str: ...
@@ -1,6 +0,0 @@
1
- """SSB Pubmd."""
2
-
3
- from .browser_request_handler import BrowserRequestHandler
4
- from .markdown_syncer import MarkdownSyncer
5
-
6
- __all__ = ["BrowserRequestHandler", "MarkdownSyncer"]
@@ -1,163 +0,0 @@
1
- """Command-line interface."""
2
-
3
- import json
4
- import os
5
- from enum import Enum
6
- from pathlib import Path
7
- from urllib.parse import urlparse
8
-
9
- import click
10
-
11
- from ssb_pubmd.browser_request_handler import BrowserRequestHandler
12
- from ssb_pubmd.browser_request_handler import CreateContextMethod
13
- from ssb_pubmd.constants import APP_NAME
14
- from ssb_pubmd.constants import CACHE_FILE
15
- from ssb_pubmd.constants import CONFIG_FILE
16
- from ssb_pubmd.jwt_request_handler import JWTRequestHandler
17
- from ssb_pubmd.markdown_syncer import MarkdownSyncer
18
-
19
-
20
- class ConfigKey(Enum):
21
- """Configuration keys for the application."""
22
-
23
- BASE_URL = "base_url"
24
- LOGIN_URL = "login_url"
25
- POST_URL = "post_url"
26
- AUTH_METHOD = "auth_method"
27
- GC_SECRET_RESOURCE_NAME = "gc_secret_resource_name"
28
-
29
-
30
- def get_config_value(config_key: ConfigKey) -> str:
31
- """Load a configuration value, with precedence environment variable > config file."""
32
- key = config_key.value
33
-
34
- def get_env_value() -> str:
35
- """Get value from environment variable, by uppercasing the key and adding prefix."""
36
- prefix = f"{APP_NAME.upper()}_"
37
- value = os.getenv(f"{prefix}{key.upper()}", "")
38
-
39
- return value
40
-
41
- def get_config_file_value() -> str:
42
- """Get value from the config file."""
43
- try:
44
- with open(CONFIG_FILE) as f:
45
- data = json.load(f)
46
-
47
- value = data.get(key)
48
- except Exception:
49
- value = ""
50
-
51
- return str(value)
52
-
53
- return get_env_value() or get_config_file_value()
54
-
55
-
56
- def set_config_value(config_key: ConfigKey, value: str) -> None:
57
- """Set a configuration value in the config file."""
58
- key = config_key.value
59
-
60
- with open(CONFIG_FILE) as f:
61
- try:
62
- data = json.load(f)
63
- except json.JSONDecodeError:
64
- data = {}
65
-
66
- data[key] = value
67
-
68
- with open(CONFIG_FILE, "w") as f:
69
- json.dump(data, f, indent=4)
70
-
71
-
72
- @click.group()
73
- def cli() -> None:
74
- """Pubmd - a tool to sync markdown and notebook files to a CMS."""
75
- pass
76
-
77
-
78
- @cli.command()
79
- def settings() -> None:
80
- """Set the login and post URL for the CMS."""
81
- login_url = click.prompt("Enter the login URL", type=str)
82
- set_config_value(ConfigKey.LOGIN_URL, login_url)
83
-
84
- post_url = click.prompt("Enter the post URL", type=str)
85
- set_config_value(ConfigKey.POST_URL, post_url)
86
-
87
- click.echo(f"\nSettings stored in:\n{click.format_filename(CONFIG_FILE)}")
88
-
89
-
90
- @cli.command()
91
- def login() -> None:
92
- """Log in to the CMS application."""
93
- login_url = get_config_value(ConfigKey.LOGIN_URL)
94
- request_handler = BrowserRequestHandler(CACHE_FILE, login_url)
95
-
96
- method = CreateContextMethod.FROM_LOGIN
97
- with request_handler.new_context(method=method):
98
- click.echo("Logging in...")
99
-
100
- click.echo(f"\nBrowser context stored in:\n{CACHE_FILE}")
101
-
102
-
103
- def sync_with_browser(content_file_path: str) -> None:
104
- """Sync a markdown or notebook file to the CMS."""
105
- login_url = get_config_value(ConfigKey.LOGIN_URL)
106
- request_handler = BrowserRequestHandler(CACHE_FILE, login_url)
107
-
108
- with request_handler.new_context() as context:
109
- post_url = get_config_value(ConfigKey.POST_URL)
110
- syncer = MarkdownSyncer(post_url, request_handler)
111
-
112
- syncer.content_file_path = Path(content_file_path)
113
- response = syncer.sync_content()
114
-
115
- click.echo("Content synced successfully.")
116
-
117
- path = response.body.get("previewPath", "")
118
- preview = urlparse(login_url)._replace(path=path).geturl()
119
- if preview:
120
- page = context.new_page()
121
- page.goto(preview)
122
- click.echo(f"Preview opened in new browser: {preview}")
123
- click.echo("Close the browser tab to finish.")
124
- page.wait_for_event("close", timeout=0)
125
- else:
126
- click.echo("No preview url found in the response.")
127
-
128
-
129
- def sync_with_jwt(content_file_path: str) -> None:
130
- """Sync a markdown or notebook file to the CMS."""
131
- gc_secret_resource_name = get_config_value(ConfigKey.GC_SECRET_RESOURCE_NAME)
132
- request_handler = JWTRequestHandler(gc_secret_resource_name)
133
-
134
- post_url = get_config_value(ConfigKey.POST_URL)
135
- syncer = MarkdownSyncer(post_url, request_handler)
136
-
137
- syncer.content_file_path = Path(content_file_path)
138
- response = syncer.sync_content()
139
-
140
- click.echo("Content synced successfully.")
141
-
142
- preview_path = response.body.get("previewPath", "")
143
- if preview_path:
144
- base_url = get_config_value(ConfigKey.BASE_URL)
145
- preview = urlparse(base_url)._replace(path=preview_path).geturl()
146
- click.echo(f"Preview url found in the response: {preview}")
147
- else:
148
- click.echo("No preview url found in the response.")
149
-
150
-
151
- @cli.command()
152
- @click.argument("content_file_path", type=click.Path())
153
- def sync(content_file_path: str) -> None:
154
- """Sync a markdown or notebook file to the CMS."""
155
- auth_method = get_config_value(ConfigKey.AUTH_METHOD)
156
- if auth_method == "browser":
157
- sync_with_browser(content_file_path)
158
- else:
159
- sync_with_jwt(content_file_path)
160
-
161
-
162
- if __name__ == "__main__":
163
- cli()
@@ -1,85 +0,0 @@
1
- from collections.abc import Iterator
2
- from contextlib import contextmanager
3
- from enum import Enum
4
- from pathlib import Path
5
-
6
- from playwright.sync_api import BrowserContext
7
- from playwright.sync_api import sync_playwright
8
-
9
- from ssb_pubmd.request_handler import Response
10
-
11
-
12
- class CreateContextMethod(Enum):
13
- """The method used to create the browser context.
14
-
15
- Can be either from a file containing the context data,
16
- or from a login popup window.
17
- """
18
-
19
- FROM_FILE = "from_file"
20
- FROM_LOGIN = "from_login"
21
-
22
-
23
- class BrowserRequestHandler:
24
- """This class is used to create a logged in browser context from which to send requests."""
25
-
26
- def __init__(self, context_file_path: Path, login_url: str) -> None:
27
- """Initializes an empty browser context object."""
28
- self._context_file_path: Path = context_file_path
29
- self._login_url: str = login_url
30
- self._context: BrowserContext | None = None
31
-
32
- @contextmanager
33
- def new_context(
34
- self, method: CreateContextMethod = CreateContextMethod.FROM_FILE
35
- ) -> Iterator[BrowserContext]:
36
- """Wrapper around playwright's context manager.
37
-
38
- The default is to create a new context from a file.
39
- If `from_file` is set False, a new context is created through a browser popup with user login,
40
- and the context is saved to a file.
41
- """
42
- with sync_playwright() as playwright:
43
- browser = playwright.chromium.launch(headless=False)
44
- match method:
45
- case CreateContextMethod.FROM_FILE:
46
- self._context = browser.new_context(
47
- storage_state=self._context_file_path
48
- )
49
- case CreateContextMethod.FROM_LOGIN:
50
- self._context = browser.new_context()
51
- login_page = self._context.new_page()
52
- login_page.goto(self._login_url)
53
- login_page.wait_for_event("close", timeout=0)
54
- self._context.storage_state(path=self._context_file_path)
55
- yield self._context
56
- self._context.close()
57
- browser.close()
58
-
59
- def send_request(
60
- self,
61
- url: str,
62
- headers: dict[str, str] | None = None,
63
- data: dict[str, str] | None = None,
64
- ) -> Response:
65
- """Sends a request to the specified url, optionally with headers and data, within the browser context."""
66
- if self._context is None:
67
- raise ValueError("Browser context has not been created.")
68
-
69
- api_response = self._context.request.post(
70
- url,
71
- data=data,
72
- )
73
-
74
- try:
75
- body = api_response.json()
76
- body = dict(body)
77
- except Exception:
78
- body = {}
79
-
80
- response = Response(
81
- status_code=api_response.status,
82
- body=body,
83
- )
84
-
85
- return response
@@ -1,22 +0,0 @@
1
- from enum import Enum
2
-
3
- from platformdirs import user_cache_path
4
- from platformdirs import user_config_path
5
- from platformdirs import user_data_path
6
-
7
- APP_NAME = "pubmd"
8
-
9
- METADATA_FILE = user_data_path(APP_NAME, ensure_exists=True) / "metadata.json"
10
- CACHE_FILE = user_cache_path(APP_NAME, ensure_exists=True) / "cache.json"
11
- CONFIG_FILE = user_config_path(APP_NAME, ensure_exists=True) / "config.json"
12
-
13
- CACHE_FILE.touch()
14
- CONFIG_FILE.touch()
15
- METADATA_FILE.touch()
16
-
17
-
18
- class ContentType(Enum):
19
- """Allowed content types."""
20
-
21
- MARKDOWN = ".md"
22
- NOTEBOOK = ".ipynb"
@@ -1,99 +0,0 @@
1
- import json
2
- from dataclasses import dataclass
3
- from datetime import datetime
4
-
5
- import jwt
6
- import requests
7
- from google.cloud import secretmanager
8
-
9
- from ssb_pubmd.request_handler import Response
10
-
11
- TYPE = "JWT"
12
- ALGORITHM = "RS256"
13
-
14
-
15
- @dataclass
16
- class SecretData:
17
- """Data class to hold private key and connected data."""
18
-
19
- private_key: str
20
- kid: str
21
- principal_key: str
22
-
23
-
24
- class JWTRequestHandler:
25
- """This class is used to send requests with a JSON Web Token (JWT) in the header."""
26
-
27
- def __init__(self, gc_secret_resource_name: str) -> None:
28
- """Initializes a JWT request handler object."""
29
- self._gc_secret_resource_name: str = gc_secret_resource_name
30
-
31
- def _private_key_from_secret_manager(self) -> SecretData:
32
- """Fetches the private key from Google Cloud Secret Manager."""
33
- client = secretmanager.SecretManagerServiceClient()
34
- print(f"Fetching secret from {self._gc_secret_resource_name}")
35
- response = client.access_secret_version(name=self._gc_secret_resource_name)
36
- raw_data = response.payload.data.decode("UTF-8")
37
- data = json.loads(raw_data)
38
- try:
39
- secret_data = SecretData(
40
- private_key=data["privateKey"],
41
- kid=data["kid"],
42
- principal_key=data["principalKey"],
43
- )
44
- except KeyError as e:
45
- raise ValueError(
46
- "The secret must be a JSON object with keys 'privateKey', 'kid' and 'principalKey'."
47
- ) from e
48
- return secret_data
49
-
50
- def _generate_token(self) -> str:
51
- secret_data = self._private_key_from_secret_manager()
52
-
53
- header = {
54
- "kid": secret_data.kid,
55
- "typ": TYPE,
56
- "alg": ALGORITHM,
57
- }
58
-
59
- iat = int(datetime.now().timestamp())
60
- exp = iat + 30
61
- payload = {
62
- "sub": secret_data.principal_key,
63
- "iat": iat,
64
- "exp": exp,
65
- }
66
-
67
- token = jwt.encode(
68
- payload, secret_data.private_key, algorithm=ALGORITHM, headers=header
69
- )
70
- return token
71
-
72
- def send_request(
73
- self,
74
- url: str,
75
- headers: dict[str, str] | None = None,
76
- data: dict[str, str] | None = None,
77
- ) -> Response:
78
- """Sends the request to the specified url with bearer token in header."""
79
- token = self._generate_token()
80
- headers = {
81
- "Authorization": f"Bearer {token}",
82
- "Content-Type": "application/json",
83
- }
84
- response = requests.post(
85
- url,
86
- headers=headers,
87
- json=data,
88
- )
89
-
90
- try:
91
- body = response.json()
92
- body = dict(body)
93
- except Exception:
94
- body = {}
95
-
96
- return Response(
97
- status_code=response.status_code,
98
- body=body,
99
- )
@@ -1,183 +0,0 @@
1
- import json
2
- from pathlib import Path
3
-
4
- import nbformat
5
- from nbformat import NotebookNode
6
-
7
- from ssb_pubmd.constants import METADATA_FILE
8
- from ssb_pubmd.constants import ContentType
9
- from ssb_pubmd.request_handler import RequestHandler
10
- from ssb_pubmd.request_handler import Response
11
-
12
-
13
- class MarkdownSyncer:
14
- """This class syncs a content file to a CMS (Content Management System).
15
-
16
- The CMS must have an endpoint that satisfies the following constraints:
17
-
18
- - It must accept a post request with fields *_id*, *displayName* and *markdown*.
19
- - The response body must have a key *_id* whose value should be
20
- a unique string identifier of the content.
21
-
22
- Creating and updating content is handled in the following way:
23
-
24
- - On the first request, an empty string is sent as *_id*.
25
- - If the request succeeds, the value of *_id* (in the response) is stored in a JSON file
26
- (created in the same directory as the markdown/notebook file).
27
- - On subsequent requests, the stored value is sent as *_id*.
28
- """
29
-
30
- ID_KEY = "_id"
31
-
32
- def __init__(
33
- self,
34
- post_url: str,
35
- request_handler: RequestHandler,
36
- metadata_file: Path = METADATA_FILE,
37
- ) -> None:
38
- """Creates a markdown syncer instance that connects to the CMS through the post url."""
39
- self._post_url: str = post_url
40
- self._request_handler: RequestHandler = request_handler
41
- self._content_file_path: Path = Path()
42
- self._content_file_type: ContentType = ContentType.MARKDOWN
43
- self._metadata_file_path: Path = metadata_file
44
-
45
- @property
46
- def content_file_path(self) -> Path:
47
- """Returns the path of the content file."""
48
- return self._content_file_path
49
-
50
- @content_file_path.setter
51
- def content_file_path(self, path: Path) -> None:
52
- """Sets the path of the content file."""
53
- if not path.is_file():
54
- raise FileNotFoundError(f"The file {path} does not exist.")
55
-
56
- ext = path.suffix.lower()
57
- for t in ContentType:
58
- if ext == t.value:
59
- self._content_file_type = t
60
- break
61
- else:
62
- allowed_extensions = [t.value for t in ContentType]
63
- sep = ", "
64
- raise ValueError(
65
- f"The file {path} has extension {ext}, but should be one of: {sep.join(allowed_extensions)}."
66
- )
67
-
68
- self._content_file_path = path
69
-
70
- @property
71
- def basename(self) -> str:
72
- """The name of the content file without extension."""
73
- return self._content_file_path.stem
74
-
75
- @property
76
- def display_name(self) -> str:
77
- """Generate a display name for the content."""
78
- return self.basename.replace("_", " ").title()
79
-
80
- @property
81
- def metadata_file_path(self) -> Path:
82
- """The path of the metadata file."""
83
- return self._metadata_file_path
84
-
85
- @property
86
- def metadata_key(self) -> str:
87
- """The key that the content metadata will be stored under in the metadata file."""
88
- return str(self._content_file_path.absolute())
89
-
90
- def _save_content_id(self, content_id: str) -> None:
91
- """Saves the content id to the metadata file."""
92
- with open(self._metadata_file_path) as f:
93
- try:
94
- data = json.load(f)
95
- except json.JSONDecodeError:
96
- data = {}
97
-
98
- data[self.metadata_key] = {
99
- self.ID_KEY: content_id,
100
- }
101
-
102
- with open(self._metadata_file_path, "w") as f:
103
- json.dump(data, f, indent=4)
104
-
105
- def _get_content_id(self) -> str:
106
- """Fetches the content id from the metadata file if it exists, otherwise an empty string."""
107
- with open(self._metadata_file_path) as f:
108
- try:
109
- data = json.load(f)
110
- except json.JSONDecodeError:
111
- data = {}
112
-
113
- metadata: dict[str, str] = data.get(self.metadata_key, {})
114
-
115
- content_id = metadata.get(self.ID_KEY, "")
116
-
117
- return content_id
118
-
119
- def _read_notebook(self) -> NotebookNode:
120
- """Reads the notebook file and returns its content."""
121
- return nbformat.read(self._content_file_path, as_version=nbformat.NO_CONVERT) # type: ignore
122
-
123
- def _get_content_from_notebook_file(self) -> str:
124
- """Extracts all markdown cells from the notebook and returns it as a merged string."""
125
- notebook = self._read_notebook()
126
-
127
- markdown_cells = []
128
- for cell in notebook.cells:
129
- if cell.cell_type == "markdown":
130
- markdown_cells.append(cell.source)
131
-
132
- markdown_content = "\n\n".join(markdown_cells)
133
-
134
- return markdown_content
135
-
136
- def _get_content_from_markdown_file(self) -> str:
137
- """Returns the content of a markdown file."""
138
- with open(self._content_file_path) as file:
139
- markdown_content = file.read()
140
- return markdown_content
141
-
142
- def _get_content(self) -> str:
143
- content = ""
144
- match self._content_file_type:
145
- case ContentType.MARKDOWN:
146
- content = self._get_content_from_markdown_file()
147
- case ContentType.NOTEBOOK:
148
- content = self._get_content_from_notebook_file()
149
- return content
150
-
151
- def _request_data(self) -> dict[str, str]:
152
- """Prepares the request data to be sent to the CMS endpoint."""
153
- return {
154
- "_id": self._get_content_id(),
155
- "displayName": self.display_name,
156
- "markdown": self._get_content(),
157
- }
158
-
159
- def sync_content(self) -> Response:
160
- """Sends the request to the CMS endpoint and returns the content id from the response."""
161
- response = self._request_handler.send_request(
162
- url=self._post_url, data=self._request_data()
163
- )
164
-
165
- if response.status_code != 200:
166
- raise ValueError(
167
- f"Request to the CMS failed with status code {response.status_code}."
168
- )
169
- if response.body is None:
170
- raise ValueError("Response body from CMS could not be parsed.")
171
- if self.ID_KEY not in response.body:
172
- raise ValueError(
173
- f"Response from the CMS does not contain the expected key '{self.ID_KEY}'."
174
- )
175
- result = response.body[self.ID_KEY]
176
- if not isinstance(result, str):
177
- raise ValueError(
178
- f"Response from the CMS does not contain a valid content id: {result}"
179
- )
180
- content_id: str = result
181
- self._save_content_id(content_id)
182
-
183
- return response
@@ -1,56 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Any
3
- from typing import Protocol
4
-
5
- import requests
6
-
7
-
8
- @dataclass
9
- class Response:
10
- """The response object used in the package."""
11
-
12
- status_code: int
13
- body: dict[str, Any]
14
-
15
-
16
- class RequestHandler(Protocol):
17
- """Interface for handling how a request are sent.
18
-
19
- Implementing classes may handle authentication, sessions, etc.
20
- """
21
-
22
- def send_request(
23
- self,
24
- url: str,
25
- headers: dict[str, str] | None = None,
26
- data: dict[str, str] | None = None,
27
- ) -> Response:
28
- """Sends the request to the specified url, optionally with headers and data, and returns the response."""
29
- ...
30
-
31
-
32
- class BasicRequestHandler:
33
- """Basic, unauthenticated request handler."""
34
-
35
- def send_request(
36
- self,
37
- url: str,
38
- headers: dict[str, str] | None = None,
39
- data: dict[str, str] | None = None,
40
- ) -> Response:
41
- """Sends the request to the specified url without any headers."""
42
- response = requests.post(
43
- url,
44
- data=data,
45
- )
46
-
47
- try:
48
- body = response.json()
49
- body = dict(body)
50
- except Exception:
51
- body = {}
52
-
53
- return Response(
54
- status_code=response.status_code,
55
- body=body,
56
- )
File without changes