ssb-pubmd 0.0.19__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ssb_pubmd/__init__.py CHANGED
@@ -1,6 +1,4 @@
1
- """SSB Pubmd."""
1
+ from ssb_pubmd.notebook_client import configure_factbox as Factbox
2
+ from ssb_pubmd.notebook_client import create_highchart as Highchart
2
3
 
3
- from .browser_request_handler import BrowserRequestHandler
4
- from .markdown_syncer import MarkdownSyncer
5
-
6
- __all__ = ["BrowserRequestHandler", "MarkdownSyncer"]
4
+ __all__ = ["Factbox", "Highchart"]
ssb_pubmd/__main__.py CHANGED
@@ -1,163 +1,13 @@
1
- """Command-line interface."""
1
+ import sys
2
2
 
3
- import json
4
- import os
5
- from enum import Enum
6
- from pathlib import Path
7
- from urllib.parse import urlparse
3
+ from ssb_pubmd.cli import run_cli
4
+ from ssb_pubmd.config import get_config
8
5
 
9
- import click
10
6
 
11
- from ssb_pubmd.browser_request_handler import BrowserRequestHandler
12
- from ssb_pubmd.browser_request_handler import CreateContextMethod
13
- from ssb_pubmd.constants import APP_NAME
14
- from ssb_pubmd.constants import CACHE_FILE
15
- from ssb_pubmd.constants import CONFIG_FILE
16
- from ssb_pubmd.jwt_request_handler import JWTRequestHandler
17
- from ssb_pubmd.markdown_syncer import MarkdownSyncer
18
-
19
-
20
- class ConfigKey(Enum):
21
- """Configuration keys for the application."""
22
-
23
- BASE_URL = "base_url"
24
- LOGIN_URL = "login_url"
25
- POST_URL = "post_url"
26
- AUTH_METHOD = "auth_method"
27
- GC_SECRET_RESOURCE_NAME = "gc_secret_resource_name"
28
-
29
-
30
- def get_config_value(config_key: ConfigKey) -> str:
31
- """Load a configuration value, with precedence environment variable > config file."""
32
- key = config_key.value
33
-
34
- def get_env_value() -> str:
35
- """Get value from environment variable, by uppercasing the key and adding prefix."""
36
- prefix = f"{APP_NAME.upper()}_"
37
- value = os.getenv(f"{prefix}{key.upper()}", "")
38
-
39
- return value
40
-
41
- def get_config_file_value() -> str:
42
- """Get value from the config file."""
43
- try:
44
- with open(CONFIG_FILE) as f:
45
- data = json.load(f)
46
-
47
- value = data.get(key)
48
- except Exception:
49
- value = ""
50
-
51
- return str(value)
52
-
53
- return get_env_value() or get_config_file_value()
54
-
55
-
56
- def set_config_value(config_key: ConfigKey, value: str) -> None:
57
- """Set a configuration value in the config file."""
58
- key = config_key.value
59
-
60
- with open(CONFIG_FILE) as f:
61
- try:
62
- data = json.load(f)
63
- except json.JSONDecodeError:
64
- data = {}
65
-
66
- data[key] = value
67
-
68
- with open(CONFIG_FILE, "w") as f:
69
- json.dump(data, f, indent=4)
70
-
71
-
72
- @click.group()
73
- def cli() -> None:
74
- """Pubmd - a tool to sync markdown and notebook files to a CMS."""
75
- pass
76
-
77
-
78
- @cli.command()
79
- def settings() -> None:
80
- """Set the login and post URL for the CMS."""
81
- login_url = click.prompt("Enter the login URL", type=str)
82
- set_config_value(ConfigKey.LOGIN_URL, login_url)
83
-
84
- post_url = click.prompt("Enter the post URL", type=str)
85
- set_config_value(ConfigKey.POST_URL, post_url)
86
-
87
- click.echo(f"\nSettings stored in:\n{click.format_filename(CONFIG_FILE)}")
88
-
89
-
90
- @cli.command()
91
- def login() -> None:
92
- """Log in to the CMS application."""
93
- login_url = get_config_value(ConfigKey.LOGIN_URL)
94
- request_handler = BrowserRequestHandler(CACHE_FILE, login_url)
95
-
96
- method = CreateContextMethod.FROM_LOGIN
97
- with request_handler.new_context(method=method):
98
- click.echo("Logging in...")
99
-
100
- click.echo(f"\nBrowser context stored in:\n{CACHE_FILE}")
101
-
102
-
103
- def sync_with_browser(content_file_path: str) -> None:
104
- """Sync a markdown or notebook file to the CMS."""
105
- login_url = get_config_value(ConfigKey.LOGIN_URL)
106
- request_handler = BrowserRequestHandler(CACHE_FILE, login_url)
107
-
108
- with request_handler.new_context() as context:
109
- post_url = get_config_value(ConfigKey.POST_URL)
110
- syncer = MarkdownSyncer(post_url, request_handler)
111
-
112
- syncer.content_file_path = Path(content_file_path)
113
- response = syncer.sync_content()
114
-
115
- click.echo("Content synced successfully.")
116
-
117
- path = response.body.get("previewPath", "")
118
- preview = urlparse(login_url)._replace(path=path).geturl()
119
- if preview:
120
- page = context.new_page()
121
- page.goto(preview)
122
- click.echo(f"Preview opened in new browser: {preview}")
123
- click.echo("Close the browser tab to finish.")
124
- page.wait_for_event("close", timeout=0)
125
- else:
126
- click.echo("No preview url found in the response.")
127
-
128
-
129
- def sync_with_jwt(content_file_path: str) -> None:
130
- """Sync a markdown or notebook file to the CMS."""
131
- gc_secret_resource_name = get_config_value(ConfigKey.GC_SECRET_RESOURCE_NAME)
132
- request_handler = JWTRequestHandler(gc_secret_resource_name)
133
-
134
- post_url = get_config_value(ConfigKey.POST_URL)
135
- syncer = MarkdownSyncer(post_url, request_handler)
136
-
137
- syncer.content_file_path = Path(content_file_path)
138
- response = syncer.sync_content()
139
-
140
- click.echo("Content synced successfully.")
141
-
142
- preview_path = response.body.get("previewPath", "")
143
- if preview_path:
144
- base_url = get_config_value(ConfigKey.BASE_URL)
145
- preview = urlparse(base_url)._replace(path=preview_path).geturl()
146
- click.echo(f"Preview url found in the response: {preview}")
147
- else:
148
- click.echo("No preview url found in the response.")
149
-
150
-
151
- @cli.command()
152
- @click.argument("content_file_path", type=click.Path())
153
- def sync(content_file_path: str) -> None:
154
- """Sync a markdown or notebook file to the CMS."""
155
- auth_method = get_config_value(ConfigKey.AUTH_METHOD)
156
- if auth_method == "browser":
157
- sync_with_browser(content_file_path)
158
- else:
159
- sync_with_jwt(content_file_path)
7
+ def main() -> None:
8
+ config = get_config()
9
+ run_cli(sys.argv, config)
160
10
 
161
11
 
162
12
  if __name__ == "__main__":
163
- cli()
13
+ main()
@@ -0,0 +1,185 @@
1
+ from collections.abc import Mapping
2
+ from dataclasses import asdict
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+ from typing import Literal
6
+ from typing import Protocol
7
+
8
+ import nh3
9
+
10
+
11
+ @dataclass
12
+ class Content:
13
+ title: str
14
+ content_type: str
15
+ publish_folder: str | None = None
16
+ publish_id: str | None = None
17
+
18
+ def to_dict(self) -> dict[str, Any]:
19
+ return asdict(self)
20
+
21
+ def serialize(self) -> dict[str, Any]:
22
+ raise NotImplementedError()
23
+
24
+ class ContentParser(Protocol):
25
+ def parse(self, metadata: Mapping[str, Any], html: str | None) -> Content: ...
26
+
27
+
28
+ @dataclass
29
+ class MimirContent(Content):
30
+ def is_publishable(self) -> bool:
31
+ if self.title == "":
32
+ return False
33
+ if self.publish_id is None and self.publish_folder is None:
34
+ return False
35
+ return True
36
+
37
+ def serialize(self) -> dict[str, Any]:
38
+ if not self.is_publishable():
39
+ raise Exception()
40
+ s: dict[str, Any] = {
41
+ "contentType": "mimir:" + self.content_type,
42
+ "displayName": self.title,
43
+ "parentPath": self.publish_folder,
44
+ "data": {},
45
+ }
46
+ if self.publish_id is not None:
47
+ s["_id"] = self.publish_id
48
+ return s
49
+
50
+
51
+ @dataclass
52
+ class Author:
53
+ name: str
54
+ email: str
55
+
56
+ @dataclass
57
+ class Article(MimirContent):
58
+ content_type: str = "article"
59
+ authors: list[Author] | None = None
60
+ ingress: str = ""
61
+ html_text: str = ""
62
+
63
+ def serialize(self) -> dict[str, Any]:
64
+ s = super().serialize()
65
+ if self.authors:
66
+ s["data"]["authorItemSet"] = [asdict(author) for author in self.authors]
67
+ s["data"]["ingress"] = self.ingress
68
+ s["data"]["articleText"] = self.html_text
69
+ return s
70
+
71
+
72
+ GraphType = Literal["line", "pie", "column", "bar", "area", "barNegative"]
73
+
74
+
75
+ @dataclass
76
+ class Highchart(MimirContent):
77
+ content_type: str = "highchart"
78
+ graph_type: GraphType = "line"
79
+ html_table: str | None = None
80
+ tbml: str | None = None
81
+ xlabel: str = "x"
82
+ ylabel: str = "y"
83
+
84
+ def serialize(self) -> dict[str, Any]:
85
+ s = super().serialize()
86
+
87
+ if self.html_table is not None:
88
+ s["data"]["htmlTable"] = self.html_table
89
+ elif self.tbml is not None:
90
+ s["data"]["dataSource"] = {
91
+ "_selected": "tbprocessor",
92
+ "tbprocessor": {"urlOrId": self.tbml},
93
+ }
94
+
95
+ s["data"]["xAxisTitle"] = self.xlabel
96
+ s["data"]["yAxisTitle"] = self.ylabel
97
+
98
+ return s
99
+
100
+
101
+ @dataclass
102
+ class FactBox(MimirContent):
103
+ content_type: str = "factBox"
104
+ display_type: Literal["default", "sneakPeek", "aiIcon"] = "default"
105
+ html_text: str = ""
106
+
107
+ def serialize(self) -> dict[str, Any]:
108
+ s = super().serialize()
109
+ s["data"]["expansionBoxType"] = self.display_type
110
+ s["data"]["text"] = self.html_text
111
+ return s
112
+
113
+
114
+ BASIC_HTML_TAGS = {
115
+ "p",
116
+ "br",
117
+ "strong",
118
+ "em",
119
+ "b",
120
+ "i",
121
+ "ul",
122
+ "ol",
123
+ "li",
124
+ "blockquote",
125
+ "h1",
126
+ "h2",
127
+ "h3",
128
+ "h4",
129
+ "h5",
130
+ "a",
131
+ }
132
+
133
+
134
+ class MimirContentParser:
135
+ def parse(self, metadata: Mapping[str, Any], html: str | None) -> Content:
136
+ match metadata.get("content_type"):
137
+ case "article":
138
+ return self._parse_article(metadata, html)
139
+ case "factBox":
140
+ return self._parse_factbox(metadata, html)
141
+ case "highchart":
142
+ return self._parse_highchart(metadata, html)
143
+ case _:
144
+ return MimirContent(**metadata)
145
+
146
+ def serialize(self, content: Content) -> dict[str, Any]:
147
+ if isinstance(content, MimirContent):
148
+ return content.serialize()
149
+ else:
150
+ raise Exception()
151
+
152
+ @classmethod
153
+ def _parse_article(cls, metadata: Mapping[str, Any], html: str | None) -> Article:
154
+ article = Article(
155
+ title=metadata["title"],
156
+ publish_folder="/ssb" + metadata["path"],
157
+ publish_id=metadata.get("publish_id"),
158
+ authors=[Author(**data) for data in metadata.get("authors", [])],
159
+ ingress=metadata.get("ingress", ""),
160
+ )
161
+ if html is not None:
162
+ allowed_html_tags = BASIC_HTML_TAGS
163
+ html_text = nh3.clean(html, tags=allowed_html_tags)
164
+ article.html_text = html_text
165
+ return article
166
+
167
+ @classmethod
168
+ def _parse_factbox(cls, metadata: Mapping[str, Any], html: str | None) -> FactBox:
169
+ factbox = FactBox(**metadata)
170
+ if html is not None:
171
+ allowed_html_tags = BASIC_HTML_TAGS - {"h2"}
172
+ html_text = nh3.clean(html, tags=allowed_html_tags)
173
+ factbox.html_text = html_text
174
+ return factbox
175
+
176
+ @classmethod
177
+ def _parse_highchart(
178
+ cls, metadata: Mapping[str, Any], html: str | None
179
+ ) -> Highchart:
180
+ highchart = Highchart(**metadata)
181
+ if html is not None:
182
+ allowed_html_tags = {"table", "tbody", "tr", "td"}
183
+ html_table = nh3.clean(html, tags=allowed_html_tags)
184
+ highchart.html_table = html_table
185
+ return highchart
@@ -0,0 +1,149 @@
1
+
2
+ import json
3
+ import subprocess
4
+ from collections.abc import Iterator
5
+ from typing import Any
6
+ from typing import NamedTuple
7
+ from typing import Protocol
8
+ from typing import TypedDict
9
+
10
+ import pandocfilters as pf # type: ignore
11
+
12
+
13
+ class Element(NamedTuple):
14
+ id: str
15
+ inner_html: str | None
16
+
17
+
18
+ class DocumentProcessor(Protocol):
19
+ def load(self, raw_content: str) -> None: ...
20
+ def extract_metadata(self, target_key: str) -> dict[str, Any]: ...
21
+ def extract_elements(self, target_class: str) -> Iterator[Element]: ...
22
+ def replace_element(self, id_: str, new_html: str) -> None: ...
23
+ def extract_html(self) -> str: ...
24
+
25
+
26
+
27
+ class PandocElement(TypedDict):
28
+ t: str
29
+ c: Any
30
+
31
+
32
+ PandocDocument = TypedDict(
33
+ "PandocDocument",
34
+ {
35
+ "pandoc-api-version": list[int],
36
+ "meta": dict[str, Any],
37
+ "blocks": list[PandocElement],
38
+ },
39
+ )
40
+
41
+
42
+ class PandocDocumentProcessor:
43
+ """
44
+ Processor for a pandoc document, i.e. the JSON-serialized pandoc AST of a document.
45
+
46
+ Example pandoc AST with exactly one div:
47
+
48
+ ```json
49
+ {
50
+ "pandoc-api-version": [1, 23, 1],
51
+ "meta": {},
52
+ "blocks": [
53
+ {
54
+ "t": "Div",
55
+ "c": [
56
+ ["my-highchart", ["ssb"], [["title", "My highchart"]]],
57
+ []
58
+ ]
59
+ }
60
+ ]
61
+ }
62
+ ```
63
+ Html equivalent:
64
+ ```html
65
+ <div id="my-highchart" class="ssb" title="My highchart">
66
+ </div>
67
+ ```
68
+ References:
69
+ - Studying the result of command `pandoc FILE -t json`, where FILE is a minimal example document (e.g. Markdown or html).
70
+ - https://github.com/jgm/pandocfilters has some examples of how to work with the format.
71
+ - Note: no formal specification exists.
72
+ """
73
+
74
+ document: PandocDocument
75
+ _element_index: dict[str, int]
76
+
77
+ def load(self, raw_content: str) -> None:
78
+ self.document: PandocDocument = json.loads(raw_content)
79
+ self._element_index = {}
80
+
81
+ def extract_metadata(self, target_key: str) -> dict[str, Any]:
82
+ def meta_to_dict(meta: Any) -> Any:
83
+ t, c = meta.get("t"), meta.get("c")
84
+ if t == "MetaMap":
85
+ return {k: meta_to_dict(v) for k, v in c.items()}
86
+ elif t == "MetaList":
87
+ return [meta_to_dict(v) for v in c]
88
+ else:
89
+ return pf.stringify(c)
90
+
91
+ return meta_to_dict(self.document["meta"][target_key]) # type: ignore
92
+
93
+ def extract_html(self) -> str:
94
+ return self._document_to_html(self.document)
95
+
96
+ def extract_elements(self, target_class: str) -> Iterator[Element]:
97
+ self._element_index = self._generate_element_index(target_class)
98
+
99
+ for id_, i in self._element_index.items():
100
+ element = self.document["blocks"][i]
101
+ inner_blocks: list[PandocElement] = element["c"][1]
102
+ inner_html = self._blocks_to_html(inner_blocks) if inner_blocks else None
103
+ yield Element(id_, inner_html)
104
+
105
+ def replace_element(self, id_: str, new_html: str) -> None:
106
+ i = self._element_index[id_]
107
+ self.document["blocks"][i] = {
108
+ "t": "RawBlock",
109
+ "c": ["html", new_html],
110
+ }
111
+
112
+ def _generate_element_index(self, target_class: str) -> dict[str, int]:
113
+ index = {}
114
+ for i, element in enumerate(self.document["blocks"]):
115
+ if element["t"] != "Div":
116
+ continue
117
+
118
+ id_: str = element["c"][0][0]
119
+ if not id_:
120
+ continue
121
+
122
+ classes: list[str] = element["c"][0][1]
123
+ if target_class not in classes:
124
+ continue
125
+
126
+ index[id_] = i
127
+
128
+ return index
129
+
130
+ @classmethod
131
+ def _blocks_to_html(cls, blocks: list[PandocElement]) -> str:
132
+ document: PandocDocument = {
133
+ "pandoc-api-version": [1, 23, 1],
134
+ "meta": {},
135
+ "blocks": blocks,
136
+ }
137
+ return cls._document_to_html(document)
138
+
139
+ @classmethod
140
+ def _document_to_html(cls, document: PandocDocument) -> str:
141
+ result = subprocess.run(
142
+ ["pandoc", "-f", "json", "-t", "html"],
143
+ input=json.dumps(document),
144
+ text=True,
145
+ capture_output=True,
146
+ check=True,
147
+ )
148
+ html = result.stdout
149
+ return html
@@ -0,0 +1,124 @@
1
+ import os
2
+ from dataclasses import dataclass
3
+ from typing import Any
4
+ from typing import NamedTuple
5
+ from typing import Protocol
6
+
7
+ import requests
8
+ from dapla_auth_client import AuthClient
9
+
10
+ from ssb_pubmd.adapters.content_parser import Content
11
+ from ssb_pubmd.config import Config
12
+
13
+
14
+ class PublishClientError(Exception): ...
15
+
16
+ class HttpClient(Protocol):
17
+ def post(
18
+ self, url: str, headers: dict[str, str], payload: dict[str, Any]
19
+ ) -> dict[str, str]: ...
20
+
21
+ class RequestsHttpClient:
22
+ def post(
23
+ self, url: str, headers: dict[str, str], payload: dict[str, Any]
24
+ ) -> dict[str, str]:
25
+ response = requests.post(
26
+ url,
27
+ headers=headers,
28
+ json=payload,
29
+ )
30
+ body = response.json()
31
+ if not response.ok:
32
+ raise PublishClientError(
33
+ f"Sync failed. Response message: {body.get('msg', 'no message')}"
34
+ )
35
+ return body # type: ignore
36
+
37
+ class TokenClient(Protocol):
38
+ def get_token(self) -> str: ...
39
+
40
+
41
+ class LocalTokenClient:
42
+ def get_token(self) -> str:
43
+ return os.environ.get("OIDC_TOKEN", "")
44
+
45
+
46
+ class DaplaTokenClient:
47
+ token: str
48
+
49
+ def __init__(self) -> None:
50
+ self.token = AuthClient.fetch_personal_token(audiences=["ssbno"])
51
+
52
+ def get_token(self) -> str:
53
+ return self.token
54
+
55
+ class Response(NamedTuple):
56
+ publish_path: str
57
+ publish_id: str
58
+ publish_url: str
59
+ publish_html: str
60
+
61
+ class PublishClient(Protocol):
62
+ http_client: HttpClient
63
+
64
+ def send_content(self, content: Content) -> Response: ...
65
+
66
+ DEFAULT_HTTP_CLIENT = RequestsHttpClient()
67
+ DEFULT_TOKEN_CLIENT = LocalTokenClient()
68
+ @dataclass
69
+ class MimirPublishClient:
70
+ base_url: str
71
+ endpoint: str
72
+ preview_base_path: str
73
+ http_client: HttpClient = DEFAULT_HTTP_CLIENT
74
+ token_client: TokenClient = DEFULT_TOKEN_CLIENT
75
+
76
+ def _create_headers(self) -> dict[str, str]:
77
+ return {
78
+ "Authorization": f"Bearer {self.token_client.get_token()}",
79
+ "Content-Type": "application/json",
80
+ }
81
+
82
+ def send_content(self, content: Content) -> Response:
83
+ headers = self._create_headers()
84
+ response_body = self.http_client.post(
85
+ url=f"{self.base_url}{self.endpoint}",
86
+ headers=headers,
87
+ payload=content.serialize(),
88
+ )
89
+
90
+ id_ = response_body.get("_id")
91
+ path = response_body.get("_path")
92
+
93
+ if path is None or id_ is None:
94
+ raise PublishClientError("Sync failed. Could not parse response body.")
95
+
96
+ macro_type = (
97
+ content.content_type
98
+ if content.content_type in ["highchart", "factBox"]
99
+ else None
100
+ )
101
+ if id_ is not None and macro_type is not None:
102
+ html = f"<p>[ {macro_type} {content.content_type}=&quot;{id_}&quot; /]</p>"
103
+ else:
104
+ html = ""
105
+
106
+ return Response(
107
+ publish_path=path,
108
+ publish_id=id_,
109
+ publish_url=self.base_url + self.preview_base_path + path,
110
+ publish_html=html,
111
+ )
112
+
113
+
114
+ def get_publish_client(
115
+ config: Config, use_dapla_token_client: bool = False
116
+ ) -> PublishClient:
117
+ return MimirPublishClient(
118
+ base_url=config.publish_base_url,
119
+ endpoint=config.publish_endpoint,
120
+ preview_base_path=config.publish_preview_base_path,
121
+ token_client=DaplaTokenClient()
122
+ if use_dapla_token_client
123
+ else DEFULT_TOKEN_CLIENT,
124
+ )
@@ -0,0 +1,42 @@
1
+ import json
2
+ from collections.abc import Mapping
3
+ from pathlib import Path
4
+ from typing import Any
5
+ from typing import Protocol
6
+
7
+
8
+ class Storage(Protocol):
9
+ def update(self, key: str, data: Mapping[str, Any]) -> None: ...
10
+ def get(self, key: str) -> dict[str, Any]: ...
11
+
12
+ class LocalFileStorage:
13
+ path: Path
14
+
15
+ def __init__(self, project_folder: Path) -> None:
16
+ self.path = project_folder / ".ssbno.json"
17
+ if not self.path.exists():
18
+ with self.path.open("w") as f:
19
+ json.dump({}, f)
20
+
21
+ def _load(self) -> dict[str, dict[str, Any]]:
22
+ with self.path.open() as f:
23
+ return json.load(f) # type: ignore
24
+
25
+ def _save(self, data: dict[str, dict[str, Any]]) -> None:
26
+ with self.path.open("w") as f:
27
+ json.dump(data, f, indent=2)
28
+
29
+ def update(self, key: str, data: Mapping[str, Any]) -> None:
30
+ store = self._load()
31
+
32
+ current = store.get(key, {})
33
+ for field, value in data.items():
34
+ if value is not None:
35
+ current[field] = value
36
+
37
+ store[key] = current
38
+ self._save(store)
39
+
40
+ def get(self, key: str) -> dict[str, Any]:
41
+ store = self._load()
42
+ return store.get(key, {}).copy()