ssb-pubmd 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ssb_pubmd/__init__.py +4 -0
- ssb_pubmd/__main__.py +2 -13
- ssb_pubmd/adapters/content_parser.py +185 -0
- ssb_pubmd/adapters/document_processor.py +149 -0
- ssb_pubmd/adapters/publish_client.py +124 -0
- ssb_pubmd/adapters/storage.py +42 -0
- ssb_pubmd/cli.py +78 -0
- ssb_pubmd/config.py +9 -13
- ssb_pubmd/domain/document_publisher.py +46 -0
- ssb_pubmd/notebook_client.py +130 -0
- {ssb_pubmd-0.1.0.dist-info → ssb_pubmd-0.1.1.dist-info}/METADATA +8 -5
- ssb_pubmd-0.1.1.dist-info/RECORD +16 -0
- ssb_pubmd/adapters/cli.py +0 -21
- ssb_pubmd/adapters/cms_client.py +0 -47
- ssb_pubmd/adapters/local_storage.py +0 -72
- ssb_pubmd/adapters/secret_manager_client.py +0 -66
- ssb_pubmd/enonic_cms_manager.py +0 -30
- ssb_pubmd/models.py +0 -28
- ssb_pubmd/ports.py +0 -57
- ssb_pubmd-0.1.0.dist-info/RECORD +0 -15
- {ssb_pubmd-0.1.0.dist-info → ssb_pubmd-0.1.1.dist-info}/LICENSE +0 -0
- {ssb_pubmd-0.1.0.dist-info → ssb_pubmd-0.1.1.dist-info}/WHEEL +0 -0
- {ssb_pubmd-0.1.0.dist-info → ssb_pubmd-0.1.1.dist-info}/entry_points.txt +0 -0
ssb_pubmd/__init__.py
ADDED
ssb_pubmd/__main__.py
CHANGED
|
@@ -1,23 +1,12 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
|
|
3
|
-
from ssb_pubmd.
|
|
4
|
-
from ssb_pubmd.adapters.cms_client import MimirCmsClient
|
|
5
|
-
from ssb_pubmd.adapters.local_storage import LocalStorageAdapter
|
|
6
|
-
from ssb_pubmd.adapters.secret_manager_client import GoogleSecretManagerClient
|
|
3
|
+
from ssb_pubmd.cli import run_cli
|
|
7
4
|
from ssb_pubmd.config import get_config
|
|
8
|
-
from ssb_pubmd.enonic_cms_manager import EnonicCmsManager
|
|
9
5
|
|
|
10
6
|
|
|
11
7
|
def main() -> None:
|
|
12
8
|
config = get_config()
|
|
13
|
-
|
|
14
|
-
config=config,
|
|
15
|
-
cms_client=MimirCmsClient(config.cms_base_url),
|
|
16
|
-
secret_manager_client=GoogleSecretManagerClient(config.gc_secret_resource_name),
|
|
17
|
-
content_file_handler=LocalStorageAdapter(config.metadata_file_path),
|
|
18
|
-
)
|
|
19
|
-
cli_adapter = CliAdapter(cms_manager=cms_manager)
|
|
20
|
-
cli_adapter.run(sys.argv)
|
|
9
|
+
run_cli(sys.argv, config)
|
|
21
10
|
|
|
22
11
|
|
|
23
12
|
if __name__ == "__main__":
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
from collections.abc import Mapping
|
|
2
|
+
from dataclasses import asdict
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any
|
|
5
|
+
from typing import Literal
|
|
6
|
+
from typing import Protocol
|
|
7
|
+
|
|
8
|
+
import nh3
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class Content:
|
|
13
|
+
title: str
|
|
14
|
+
content_type: str
|
|
15
|
+
publish_folder: str | None = None
|
|
16
|
+
publish_id: str | None = None
|
|
17
|
+
|
|
18
|
+
def to_dict(self) -> dict[str, Any]:
|
|
19
|
+
return asdict(self)
|
|
20
|
+
|
|
21
|
+
def serialize(self) -> dict[str, Any]:
|
|
22
|
+
raise NotImplementedError()
|
|
23
|
+
|
|
24
|
+
class ContentParser(Protocol):
|
|
25
|
+
def parse(self, metadata: Mapping[str, Any], html: str | None) -> Content: ...
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class MimirContent(Content):
|
|
30
|
+
def is_publishable(self) -> bool:
|
|
31
|
+
if self.title == "":
|
|
32
|
+
return False
|
|
33
|
+
if self.publish_id is None and self.publish_folder is None:
|
|
34
|
+
return False
|
|
35
|
+
return True
|
|
36
|
+
|
|
37
|
+
def serialize(self) -> dict[str, Any]:
|
|
38
|
+
if not self.is_publishable():
|
|
39
|
+
raise Exception()
|
|
40
|
+
s: dict[str, Any] = {
|
|
41
|
+
"contentType": "mimir:" + self.content_type,
|
|
42
|
+
"displayName": self.title,
|
|
43
|
+
"parentPath": self.publish_folder,
|
|
44
|
+
"data": {},
|
|
45
|
+
}
|
|
46
|
+
if self.publish_id is not None:
|
|
47
|
+
s["_id"] = self.publish_id
|
|
48
|
+
return s
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class Author:
|
|
53
|
+
name: str
|
|
54
|
+
email: str
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class Article(MimirContent):
|
|
58
|
+
content_type: str = "article"
|
|
59
|
+
authors: list[Author] | None = None
|
|
60
|
+
ingress: str = ""
|
|
61
|
+
html_text: str = ""
|
|
62
|
+
|
|
63
|
+
def serialize(self) -> dict[str, Any]:
|
|
64
|
+
s = super().serialize()
|
|
65
|
+
if self.authors:
|
|
66
|
+
s["data"]["authorItemSet"] = [asdict(author) for author in self.authors]
|
|
67
|
+
s["data"]["ingress"] = self.ingress
|
|
68
|
+
s["data"]["articleText"] = self.html_text
|
|
69
|
+
return s
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
GraphType = Literal["line", "pie", "column", "bar", "area", "barNegative"]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class Highchart(MimirContent):
|
|
77
|
+
content_type: str = "highchart"
|
|
78
|
+
graph_type: GraphType = "line"
|
|
79
|
+
html_table: str | None = None
|
|
80
|
+
tbml: str | None = None
|
|
81
|
+
xlabel: str = "x"
|
|
82
|
+
ylabel: str = "y"
|
|
83
|
+
|
|
84
|
+
def serialize(self) -> dict[str, Any]:
|
|
85
|
+
s = super().serialize()
|
|
86
|
+
|
|
87
|
+
if self.html_table is not None:
|
|
88
|
+
s["data"]["htmlTable"] = self.html_table
|
|
89
|
+
elif self.tbml is not None:
|
|
90
|
+
s["data"]["dataSource"] = {
|
|
91
|
+
"_selected": "tbprocessor",
|
|
92
|
+
"tbprocessor": {"urlOrId": self.tbml},
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
s["data"]["xAxisTitle"] = self.xlabel
|
|
96
|
+
s["data"]["yAxisTitle"] = self.ylabel
|
|
97
|
+
|
|
98
|
+
return s
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class FactBox(MimirContent):
|
|
103
|
+
content_type: str = "factBox"
|
|
104
|
+
display_type: Literal["default", "sneakPeek", "aiIcon"] = "default"
|
|
105
|
+
html_text: str = ""
|
|
106
|
+
|
|
107
|
+
def serialize(self) -> dict[str, Any]:
|
|
108
|
+
s = super().serialize()
|
|
109
|
+
s["data"]["expansionBoxType"] = self.display_type
|
|
110
|
+
s["data"]["text"] = self.html_text
|
|
111
|
+
return s
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
BASIC_HTML_TAGS = {
|
|
115
|
+
"p",
|
|
116
|
+
"br",
|
|
117
|
+
"strong",
|
|
118
|
+
"em",
|
|
119
|
+
"b",
|
|
120
|
+
"i",
|
|
121
|
+
"ul",
|
|
122
|
+
"ol",
|
|
123
|
+
"li",
|
|
124
|
+
"blockquote",
|
|
125
|
+
"h1",
|
|
126
|
+
"h2",
|
|
127
|
+
"h3",
|
|
128
|
+
"h4",
|
|
129
|
+
"h5",
|
|
130
|
+
"a",
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class MimirContentParser:
|
|
135
|
+
def parse(self, metadata: Mapping[str, Any], html: str | None) -> Content:
|
|
136
|
+
match metadata.get("content_type"):
|
|
137
|
+
case "article":
|
|
138
|
+
return self._parse_article(metadata, html)
|
|
139
|
+
case "factBox":
|
|
140
|
+
return self._parse_factbox(metadata, html)
|
|
141
|
+
case "highchart":
|
|
142
|
+
return self._parse_highchart(metadata, html)
|
|
143
|
+
case _:
|
|
144
|
+
return MimirContent(**metadata)
|
|
145
|
+
|
|
146
|
+
def serialize(self, content: Content) -> dict[str, Any]:
|
|
147
|
+
if isinstance(content, MimirContent):
|
|
148
|
+
return content.serialize()
|
|
149
|
+
else:
|
|
150
|
+
raise Exception()
|
|
151
|
+
|
|
152
|
+
@classmethod
|
|
153
|
+
def _parse_article(cls, metadata: Mapping[str, Any], html: str | None) -> Article:
|
|
154
|
+
article = Article(
|
|
155
|
+
title=metadata["title"],
|
|
156
|
+
publish_folder="/ssb" + metadata["path"],
|
|
157
|
+
publish_id=metadata.get("publish_id"),
|
|
158
|
+
authors=[Author(**data) for data in metadata.get("authors", [])],
|
|
159
|
+
ingress=metadata.get("ingress", ""),
|
|
160
|
+
)
|
|
161
|
+
if html is not None:
|
|
162
|
+
allowed_html_tags = BASIC_HTML_TAGS
|
|
163
|
+
html_text = nh3.clean(html, tags=allowed_html_tags)
|
|
164
|
+
article.html_text = html_text
|
|
165
|
+
return article
|
|
166
|
+
|
|
167
|
+
@classmethod
|
|
168
|
+
def _parse_factbox(cls, metadata: Mapping[str, Any], html: str | None) -> FactBox:
|
|
169
|
+
factbox = FactBox(**metadata)
|
|
170
|
+
if html is not None:
|
|
171
|
+
allowed_html_tags = BASIC_HTML_TAGS - {"h2"}
|
|
172
|
+
html_text = nh3.clean(html, tags=allowed_html_tags)
|
|
173
|
+
factbox.html_text = html_text
|
|
174
|
+
return factbox
|
|
175
|
+
|
|
176
|
+
@classmethod
|
|
177
|
+
def _parse_highchart(
|
|
178
|
+
cls, metadata: Mapping[str, Any], html: str | None
|
|
179
|
+
) -> Highchart:
|
|
180
|
+
highchart = Highchart(**metadata)
|
|
181
|
+
if html is not None:
|
|
182
|
+
allowed_html_tags = {"table", "tbody", "tr", "td"}
|
|
183
|
+
html_table = nh3.clean(html, tags=allowed_html_tags)
|
|
184
|
+
highchart.html_table = html_table
|
|
185
|
+
return highchart
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
|
|
2
|
+
import json
|
|
3
|
+
import subprocess
|
|
4
|
+
from collections.abc import Iterator
|
|
5
|
+
from typing import Any
|
|
6
|
+
from typing import NamedTuple
|
|
7
|
+
from typing import Protocol
|
|
8
|
+
from typing import TypedDict
|
|
9
|
+
|
|
10
|
+
import pandocfilters as pf # type: ignore
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Element(NamedTuple):
|
|
14
|
+
id: str
|
|
15
|
+
inner_html: str | None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DocumentProcessor(Protocol):
|
|
19
|
+
def load(self, raw_content: str) -> None: ...
|
|
20
|
+
def extract_metadata(self, target_key: str) -> dict[str, Any]: ...
|
|
21
|
+
def extract_elements(self, target_class: str) -> Iterator[Element]: ...
|
|
22
|
+
def replace_element(self, id_: str, new_html: str) -> None: ...
|
|
23
|
+
def extract_html(self) -> str: ...
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class PandocElement(TypedDict):
|
|
28
|
+
t: str
|
|
29
|
+
c: Any
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
PandocDocument = TypedDict(
|
|
33
|
+
"PandocDocument",
|
|
34
|
+
{
|
|
35
|
+
"pandoc-api-version": list[int],
|
|
36
|
+
"meta": dict[str, Any],
|
|
37
|
+
"blocks": list[PandocElement],
|
|
38
|
+
},
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class PandocDocumentProcessor:
|
|
43
|
+
"""
|
|
44
|
+
Processor for a pandoc document, i.e. the JSON-serialized pandoc AST of a document.
|
|
45
|
+
|
|
46
|
+
Example pandoc AST with exactly one div:
|
|
47
|
+
|
|
48
|
+
```json
|
|
49
|
+
{
|
|
50
|
+
"pandoc-api-version": [1, 23, 1],
|
|
51
|
+
"meta": {},
|
|
52
|
+
"blocks": [
|
|
53
|
+
{
|
|
54
|
+
"t": "Div",
|
|
55
|
+
"c": [
|
|
56
|
+
["my-highchart", ["ssb"], [["title", "My highchart"]]],
|
|
57
|
+
[]
|
|
58
|
+
]
|
|
59
|
+
}
|
|
60
|
+
]
|
|
61
|
+
}
|
|
62
|
+
```
|
|
63
|
+
Html equivalent:
|
|
64
|
+
```html
|
|
65
|
+
<div id="my-highchart" class="ssb" title="My highchart">
|
|
66
|
+
</div>
|
|
67
|
+
```
|
|
68
|
+
References:
|
|
69
|
+
- Studying the result of command `pandoc FILE -t json`, where FILE is a minimal example document (e.g. Markdown or html).
|
|
70
|
+
- https://github.com/jgm/pandocfilters has some examples of how to work with the format.
|
|
71
|
+
- Note: no formal specification exists.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
document: PandocDocument
|
|
75
|
+
_element_index: dict[str, int]
|
|
76
|
+
|
|
77
|
+
def load(self, raw_content: str) -> None:
|
|
78
|
+
self.document: PandocDocument = json.loads(raw_content)
|
|
79
|
+
self._element_index = {}
|
|
80
|
+
|
|
81
|
+
def extract_metadata(self, target_key: str) -> dict[str, Any]:
|
|
82
|
+
def meta_to_dict(meta: Any) -> Any:
|
|
83
|
+
t, c = meta.get("t"), meta.get("c")
|
|
84
|
+
if t == "MetaMap":
|
|
85
|
+
return {k: meta_to_dict(v) for k, v in c.items()}
|
|
86
|
+
elif t == "MetaList":
|
|
87
|
+
return [meta_to_dict(v) for v in c]
|
|
88
|
+
else:
|
|
89
|
+
return pf.stringify(c)
|
|
90
|
+
|
|
91
|
+
return meta_to_dict(self.document["meta"][target_key]) # type: ignore
|
|
92
|
+
|
|
93
|
+
def extract_html(self) -> str:
|
|
94
|
+
return self._document_to_html(self.document)
|
|
95
|
+
|
|
96
|
+
def extract_elements(self, target_class: str) -> Iterator[Element]:
|
|
97
|
+
self._element_index = self._generate_element_index(target_class)
|
|
98
|
+
|
|
99
|
+
for id_, i in self._element_index.items():
|
|
100
|
+
element = self.document["blocks"][i]
|
|
101
|
+
inner_blocks: list[PandocElement] = element["c"][1]
|
|
102
|
+
inner_html = self._blocks_to_html(inner_blocks) if inner_blocks else None
|
|
103
|
+
yield Element(id_, inner_html)
|
|
104
|
+
|
|
105
|
+
def replace_element(self, id_: str, new_html: str) -> None:
|
|
106
|
+
i = self._element_index[id_]
|
|
107
|
+
self.document["blocks"][i] = {
|
|
108
|
+
"t": "RawBlock",
|
|
109
|
+
"c": ["html", new_html],
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
def _generate_element_index(self, target_class: str) -> dict[str, int]:
|
|
113
|
+
index = {}
|
|
114
|
+
for i, element in enumerate(self.document["blocks"]):
|
|
115
|
+
if element["t"] != "Div":
|
|
116
|
+
continue
|
|
117
|
+
|
|
118
|
+
id_: str = element["c"][0][0]
|
|
119
|
+
if not id_:
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
classes: list[str] = element["c"][0][1]
|
|
123
|
+
if target_class not in classes:
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
index[id_] = i
|
|
127
|
+
|
|
128
|
+
return index
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def _blocks_to_html(cls, blocks: list[PandocElement]) -> str:
|
|
132
|
+
document: PandocDocument = {
|
|
133
|
+
"pandoc-api-version": [1, 23, 1],
|
|
134
|
+
"meta": {},
|
|
135
|
+
"blocks": blocks,
|
|
136
|
+
}
|
|
137
|
+
return cls._document_to_html(document)
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def _document_to_html(cls, document: PandocDocument) -> str:
|
|
141
|
+
result = subprocess.run(
|
|
142
|
+
["pandoc", "-f", "json", "-t", "html"],
|
|
143
|
+
input=json.dumps(document),
|
|
144
|
+
text=True,
|
|
145
|
+
capture_output=True,
|
|
146
|
+
check=True,
|
|
147
|
+
)
|
|
148
|
+
html = result.stdout
|
|
149
|
+
return html
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Any
|
|
4
|
+
from typing import NamedTuple
|
|
5
|
+
from typing import Protocol
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
from dapla_auth_client import AuthClient
|
|
9
|
+
|
|
10
|
+
from ssb_pubmd.adapters.content_parser import Content
|
|
11
|
+
from ssb_pubmd.config import Config
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PublishClientError(Exception): ...
|
|
15
|
+
|
|
16
|
+
class HttpClient(Protocol):
|
|
17
|
+
def post(
|
|
18
|
+
self, url: str, headers: dict[str, str], payload: dict[str, Any]
|
|
19
|
+
) -> dict[str, str]: ...
|
|
20
|
+
|
|
21
|
+
class RequestsHttpClient:
|
|
22
|
+
def post(
|
|
23
|
+
self, url: str, headers: dict[str, str], payload: dict[str, Any]
|
|
24
|
+
) -> dict[str, str]:
|
|
25
|
+
response = requests.post(
|
|
26
|
+
url,
|
|
27
|
+
headers=headers,
|
|
28
|
+
json=payload,
|
|
29
|
+
)
|
|
30
|
+
body = response.json()
|
|
31
|
+
if not response.ok:
|
|
32
|
+
raise PublishClientError(
|
|
33
|
+
f"Sync failed. Response message: {body.get('msg', 'no message')}"
|
|
34
|
+
)
|
|
35
|
+
return body # type: ignore
|
|
36
|
+
|
|
37
|
+
class TokenClient(Protocol):
|
|
38
|
+
def get_token(self) -> str: ...
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LocalTokenClient:
|
|
42
|
+
def get_token(self) -> str:
|
|
43
|
+
return os.environ.get("OIDC_TOKEN", "")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class DaplaTokenClient:
|
|
47
|
+
token: str
|
|
48
|
+
|
|
49
|
+
def __init__(self) -> None:
|
|
50
|
+
self.token = AuthClient.fetch_personal_token(audiences=["ssbno"])
|
|
51
|
+
|
|
52
|
+
def get_token(self) -> str:
|
|
53
|
+
return self.token
|
|
54
|
+
|
|
55
|
+
class Response(NamedTuple):
|
|
56
|
+
publish_path: str
|
|
57
|
+
publish_id: str
|
|
58
|
+
publish_url: str
|
|
59
|
+
publish_html: str
|
|
60
|
+
|
|
61
|
+
class PublishClient(Protocol):
|
|
62
|
+
http_client: HttpClient
|
|
63
|
+
|
|
64
|
+
def send_content(self, content: Content) -> Response: ...
|
|
65
|
+
|
|
66
|
+
DEFAULT_HTTP_CLIENT = RequestsHttpClient()
|
|
67
|
+
DEFULT_TOKEN_CLIENT = LocalTokenClient()
|
|
68
|
+
@dataclass
|
|
69
|
+
class MimirPublishClient:
|
|
70
|
+
base_url: str
|
|
71
|
+
endpoint: str
|
|
72
|
+
preview_base_path: str
|
|
73
|
+
http_client: HttpClient = DEFAULT_HTTP_CLIENT
|
|
74
|
+
token_client: TokenClient = DEFULT_TOKEN_CLIENT
|
|
75
|
+
|
|
76
|
+
def _create_headers(self) -> dict[str, str]:
|
|
77
|
+
return {
|
|
78
|
+
"Authorization": f"Bearer {self.token_client.get_token()}",
|
|
79
|
+
"Content-Type": "application/json",
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
def send_content(self, content: Content) -> Response:
|
|
83
|
+
headers = self._create_headers()
|
|
84
|
+
response_body = self.http_client.post(
|
|
85
|
+
url=f"{self.base_url}{self.endpoint}",
|
|
86
|
+
headers=headers,
|
|
87
|
+
payload=content.serialize(),
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
id_ = response_body.get("_id")
|
|
91
|
+
path = response_body.get("_path")
|
|
92
|
+
|
|
93
|
+
if path is None or id_ is None:
|
|
94
|
+
raise PublishClientError("Sync failed. Could not parse response body.")
|
|
95
|
+
|
|
96
|
+
macro_type = (
|
|
97
|
+
content.content_type
|
|
98
|
+
if content.content_type in ["highchart", "factBox"]
|
|
99
|
+
else None
|
|
100
|
+
)
|
|
101
|
+
if id_ is not None and macro_type is not None:
|
|
102
|
+
html = f"<p>[ {macro_type} {content.content_type}="{id_}" /]</p>"
|
|
103
|
+
else:
|
|
104
|
+
html = ""
|
|
105
|
+
|
|
106
|
+
return Response(
|
|
107
|
+
publish_path=path,
|
|
108
|
+
publish_id=id_,
|
|
109
|
+
publish_url=self.base_url + self.preview_base_path + path,
|
|
110
|
+
publish_html=html,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def get_publish_client(
|
|
115
|
+
config: Config, use_dapla_token_client: bool = False
|
|
116
|
+
) -> PublishClient:
|
|
117
|
+
return MimirPublishClient(
|
|
118
|
+
base_url=config.publish_base_url,
|
|
119
|
+
endpoint=config.publish_endpoint,
|
|
120
|
+
preview_base_path=config.publish_preview_base_path,
|
|
121
|
+
token_client=DaplaTokenClient()
|
|
122
|
+
if use_dapla_token_client
|
|
123
|
+
else DEFULT_TOKEN_CLIENT,
|
|
124
|
+
)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from collections.abc import Mapping
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
from typing import Protocol
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Storage(Protocol):
|
|
9
|
+
def update(self, key: str, data: Mapping[str, Any]) -> None: ...
|
|
10
|
+
def get(self, key: str) -> dict[str, Any]: ...
|
|
11
|
+
|
|
12
|
+
class LocalFileStorage:
|
|
13
|
+
path: Path
|
|
14
|
+
|
|
15
|
+
def __init__(self, project_folder: Path) -> None:
|
|
16
|
+
self.path = project_folder / ".ssbno.json"
|
|
17
|
+
if not self.path.exists():
|
|
18
|
+
with self.path.open("w") as f:
|
|
19
|
+
json.dump({}, f)
|
|
20
|
+
|
|
21
|
+
def _load(self) -> dict[str, dict[str, Any]]:
|
|
22
|
+
with self.path.open() as f:
|
|
23
|
+
return json.load(f) # type: ignore
|
|
24
|
+
|
|
25
|
+
def _save(self, data: dict[str, dict[str, Any]]) -> None:
|
|
26
|
+
with self.path.open("w") as f:
|
|
27
|
+
json.dump(data, f, indent=2)
|
|
28
|
+
|
|
29
|
+
def update(self, key: str, data: Mapping[str, Any]) -> None:
|
|
30
|
+
store = self._load()
|
|
31
|
+
|
|
32
|
+
current = store.get(key, {})
|
|
33
|
+
for field, value in data.items():
|
|
34
|
+
if value is not None:
|
|
35
|
+
current[field] = value
|
|
36
|
+
|
|
37
|
+
store[key] = current
|
|
38
|
+
self._save(store)
|
|
39
|
+
|
|
40
|
+
def get(self, key: str) -> dict[str, Any]:
|
|
41
|
+
store = self._load()
|
|
42
|
+
return store.get(key, {}).copy()
|
ssb_pubmd/cli.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from watchfiles import watch
|
|
6
|
+
|
|
7
|
+
from ssb_pubmd.adapters.content_parser import MimirContentParser
|
|
8
|
+
from ssb_pubmd.adapters.document_processor import PandocDocumentProcessor
|
|
9
|
+
from ssb_pubmd.adapters.publish_client import PublishClient
|
|
10
|
+
from ssb_pubmd.adapters.publish_client import get_publish_client
|
|
11
|
+
from ssb_pubmd.adapters.storage import LocalFileStorage
|
|
12
|
+
from ssb_pubmd.config import Config
|
|
13
|
+
from ssb_pubmd.domain.document_publisher import sync_document
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def run_cli(system_arguments: list[str], config: Config) -> None:
|
|
17
|
+
match system_arguments:
|
|
18
|
+
case [_, "preview", file_path]:
|
|
19
|
+
_preview(file_path, config)
|
|
20
|
+
case _:
|
|
21
|
+
print("Usage: ssb-pubmd preview QUARTO_MARKDOWN_FILE")
|
|
22
|
+
sys.exit(1)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _preview(file_path: str, config: Config) -> None:
|
|
26
|
+
if Path(file_path).suffix != ".qmd":
|
|
27
|
+
print("Only Quarto Markdown (.qmd) files are supported.")
|
|
28
|
+
sys.exit(1)
|
|
29
|
+
try:
|
|
30
|
+
print("Fetching labid token...")
|
|
31
|
+
publish_client = get_publish_client(config, use_dapla_token_client=True)
|
|
32
|
+
except Exception:
|
|
33
|
+
print("Failed to fetch labid token; using environment variable...")
|
|
34
|
+
publish_client = get_publish_client(config, use_dapla_token_client=False)
|
|
35
|
+
|
|
36
|
+
_sync_updated_file(file_path, publish_client)
|
|
37
|
+
|
|
38
|
+
print("Watching for file changes...")
|
|
39
|
+
for changes in watch(file_path):
|
|
40
|
+
_sync_updated_file(file_path, publish_client)
|
|
41
|
+
|
|
42
|
+
def _sync_updated_file(file_path: str, publish_client: PublishClient) -> None:
|
|
43
|
+
print("Syncing updated document...")
|
|
44
|
+
try:
|
|
45
|
+
preview_url = _sync_quarto_file(file_path, publish_client)
|
|
46
|
+
print(f"Content synced successfully. Preview URL: {preview_url}")
|
|
47
|
+
except Exception as e:
|
|
48
|
+
print(f"Error during sync: {e}")
|
|
49
|
+
|
|
50
|
+
def _sync_quarto_file(file_path: str, publish_client: PublishClient) -> str:
|
|
51
|
+
pandoc_document = _quarto_to_pandoc(file_path)
|
|
52
|
+
adapters = (
|
|
53
|
+
PandocDocumentProcessor(),
|
|
54
|
+
MimirContentParser(),
|
|
55
|
+
LocalFileStorage(project_folder=Path(file_path).parent),
|
|
56
|
+
publish_client,
|
|
57
|
+
)
|
|
58
|
+
return sync_document(pandoc_document, *adapters)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _quarto_to_pandoc(file_path: str) -> str:
|
|
62
|
+
result = subprocess.run(
|
|
63
|
+
[
|
|
64
|
+
"quarto",
|
|
65
|
+
"render",
|
|
66
|
+
file_path,
|
|
67
|
+
"--to",
|
|
68
|
+
"json",
|
|
69
|
+
"-M",
|
|
70
|
+
"include:false",
|
|
71
|
+
"--output",
|
|
72
|
+
"-",
|
|
73
|
+
],
|
|
74
|
+
text=True,
|
|
75
|
+
capture_output=True,
|
|
76
|
+
check=True,
|
|
77
|
+
)
|
|
78
|
+
return result.stdout
|
ssb_pubmd/config.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""App configuration through environment variables."""
|
|
2
|
+
|
|
1
3
|
import os
|
|
2
4
|
from dataclasses import dataclass
|
|
3
5
|
from pathlib import Path
|
|
@@ -7,21 +9,15 @@ APP_NAME = "SSB_PUBMD"
|
|
|
7
9
|
|
|
8
10
|
@dataclass
|
|
9
11
|
class Config:
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
12
|
+
publish_base_url: str
|
|
13
|
+
publish_endpoint: str
|
|
14
|
+
publish_preview_base_path: str
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
def get_config(metadata_file_path: Path | None = None) -> Config:
|
|
16
|
-
|
|
17
|
-
if not metadata_file_path:
|
|
18
|
-
user_data_dir.mkdir(parents=True, exist_ok=True)
|
|
19
|
-
metadata_file_path = user_data_dir / "metadata.json"
|
|
20
|
-
if not metadata_file_path.exists():
|
|
21
|
-
with open(metadata_file_path, "x") as metadata_file:
|
|
22
|
-
metadata_file.write("{}\n")
|
|
18
|
+
"""Get config from enviromnent variables."""
|
|
23
19
|
return Config(
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
20
|
+
publish_base_url=os.environ[f"{APP_NAME}_BASE_URL"],
|
|
21
|
+
publish_endpoint=os.environ[f"{APP_NAME}_ENDPOINT"],
|
|
22
|
+
publish_preview_base_path=os.environ[f"{APP_NAME}_PREVIEW_BASE_PATH"],
|
|
27
23
|
)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from ssb_pubmd.adapters.content_parser import ContentParser
|
|
2
|
+
from ssb_pubmd.adapters.document_processor import DocumentProcessor
|
|
3
|
+
from ssb_pubmd.adapters.publish_client import PublishClient
|
|
4
|
+
from ssb_pubmd.adapters.storage import Storage
|
|
5
|
+
|
|
6
|
+
USER_KEY_PREFIX = "user:"
|
|
7
|
+
DOCUMENT_KEY = "app:document"
|
|
8
|
+
|
|
9
|
+
def sync_document(
|
|
10
|
+
raw_document_content: str,
|
|
11
|
+
document_processor: DocumentProcessor,
|
|
12
|
+
content_parser: ContentParser,
|
|
13
|
+
storage: Storage,
|
|
14
|
+
publish_client: PublishClient,
|
|
15
|
+
) -> str:
|
|
16
|
+
document_processor.load(raw_document_content)
|
|
17
|
+
|
|
18
|
+
document_metadata = document_processor.extract_metadata(target_key="ssb")
|
|
19
|
+
document_metadata["content_type"] = "article"
|
|
20
|
+
|
|
21
|
+
document_publish_path = storage.get(DOCUMENT_KEY).get("publish_path")
|
|
22
|
+
if not document_publish_path:
|
|
23
|
+
content = content_parser.parse(metadata=document_metadata, html=None)
|
|
24
|
+
response = publish_client.send_content(content)
|
|
25
|
+
storage.update(
|
|
26
|
+
DOCUMENT_KEY,
|
|
27
|
+
{"publish_id": response.publish_id, "publish_path": response.publish_path},
|
|
28
|
+
)
|
|
29
|
+
document_publish_path = response.publish_path
|
|
30
|
+
|
|
31
|
+
document_elements = document_processor.extract_elements(target_class="ssb")
|
|
32
|
+
for id_, html in document_elements:
|
|
33
|
+
key = USER_KEY_PREFIX + id_
|
|
34
|
+
metadata = storage.get(key) | {"publish_folder": document_publish_path}
|
|
35
|
+
component = content_parser.parse(metadata, html)
|
|
36
|
+
response = publish_client.send_content(component)
|
|
37
|
+
storage.update(key, {"publish_id": response.publish_id})
|
|
38
|
+
document_processor.replace_element(id_, response.publish_html)
|
|
39
|
+
|
|
40
|
+
article_metadata = document_metadata | {
|
|
41
|
+
"publish_id": storage.get(DOCUMENT_KEY).get("publish_id")
|
|
42
|
+
}
|
|
43
|
+
html = document_processor.extract_html()
|
|
44
|
+
article = content_parser.parse(metadata=article_metadata, html=html)
|
|
45
|
+
response = publish_client.send_content(article)
|
|
46
|
+
return response.publish_url
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
import narwhals as nw
|
|
5
|
+
from narwhals.typing import IntoDataFrame
|
|
6
|
+
|
|
7
|
+
from ssb_pubmd.adapters.content_parser import Content
|
|
8
|
+
from ssb_pubmd.adapters.content_parser import ContentParser
|
|
9
|
+
from ssb_pubmd.adapters.content_parser import MimirContentParser
|
|
10
|
+
from ssb_pubmd.adapters.storage import LocalFileStorage
|
|
11
|
+
from ssb_pubmd.adapters.storage import Storage
|
|
12
|
+
from ssb_pubmd.domain.document_publisher import USER_KEY_PREFIX
|
|
13
|
+
|
|
14
|
+
STORAGE: Storage = LocalFileStorage(project_folder=Path.cwd())
|
|
15
|
+
CONTENT_PARSER: ContentParser = MimirContentParser()
|
|
16
|
+
|
|
17
|
+
class NotebookClientError(Exception): ...
|
|
18
|
+
|
|
19
|
+
def configure_factbox(
|
|
20
|
+
key: str,
|
|
21
|
+
title: str,
|
|
22
|
+
display_type: Literal["default", "sneakPeek", "aiIcon"] = "default",
|
|
23
|
+
) -> None:
|
|
24
|
+
"""Oppretter en faktaboks og printer en Markdown-snippet som kan limes inn i artikkelen (på en ny linje).
|
|
25
|
+
|
|
26
|
+
:param key: En unik nøkkel for innholdet.
|
|
27
|
+
:param title: Tittelen til faktaboksen.
|
|
28
|
+
:param display_type: Visning av faktaboksen.
|
|
29
|
+
|
|
30
|
+
Alternativer:
|
|
31
|
+
|
|
32
|
+
* "default": Bare tittel (standard)
|
|
33
|
+
* "sneakPeek": Tittel og litt av forklaringsteksten
|
|
34
|
+
* "aiIcon": Tittel og litt av forklaringsteksten + KI-ikon
|
|
35
|
+
"""
|
|
36
|
+
metadata = {
|
|
37
|
+
"content_type": "factBox",
|
|
38
|
+
"title": title,
|
|
39
|
+
"display_type": display_type,
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
content = CONTENT_PARSER.parse(
|
|
43
|
+
metadata=metadata,
|
|
44
|
+
html=None,
|
|
45
|
+
)
|
|
46
|
+
_store_user_content(user_key=key, content=content)
|
|
47
|
+
|
|
48
|
+
md = _get_markdown_snippet(key, placeholder_text="Faktaboksens tekst skrives her.")
|
|
49
|
+
print(md)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def create_highchart(
|
|
53
|
+
key: str,
|
|
54
|
+
title: str,
|
|
55
|
+
dataframe: IntoDataFrame | None = None,
|
|
56
|
+
tbml: str | None = None,
|
|
57
|
+
graph_type: Literal["line", "pie", "column", "bar", "area", "barNegative"] = "line",
|
|
58
|
+
xlabel: str = "x",
|
|
59
|
+
ylabel: str = "y"
|
|
60
|
+
) -> None:
|
|
61
|
+
"""Oppretter et highchart og printer en Markdown-snippet som kan limes inn i artikkelen (på en ny linje).
|
|
62
|
+
|
|
63
|
+
Som datakilde er det nødvendig å spesifisere enten `dataframe` eller `tbml`.
|
|
64
|
+
|
|
65
|
+
:param key: En unik nøkkel for innholdet.
|
|
66
|
+
:param title: Tittelen til highchartet.
|
|
67
|
+
:param dataframe: En pandas, Polars eller PyArrow dataframe.
|
|
68
|
+
:param tbml: URL eller TBML-id.
|
|
69
|
+
:param graph_type: Graftype.
|
|
70
|
+
|
|
71
|
+
Alternativer:
|
|
72
|
+
|
|
73
|
+
* "line": Linje (standard)
|
|
74
|
+
* "pie": Kake
|
|
75
|
+
* "column": Stolpe
|
|
76
|
+
* "bar": Liggende stolpe
|
|
77
|
+
* "area": Areal
|
|
78
|
+
* "barNegative": Pyramide
|
|
79
|
+
|
|
80
|
+
:param xlabel: X-akse, tittel.
|
|
81
|
+
:param ylabel: Y-akse, tittel.
|
|
82
|
+
"""
|
|
83
|
+
if dataframe is None and tbml is None:
|
|
84
|
+
raise NotebookClientError("Either 'dataframe' or 'tbml' must be specified.")
|
|
85
|
+
metadata = {
|
|
86
|
+
"content_type": "highchart",
|
|
87
|
+
"title": title,
|
|
88
|
+
"graph_type": graph_type,
|
|
89
|
+
"xlabel": xlabel,
|
|
90
|
+
"ylabel": ylabel
|
|
91
|
+
}
|
|
92
|
+
if tbml is not None:
|
|
93
|
+
metadata["tbml"] = tbml
|
|
94
|
+
html = _dataframe_to_html_table(dataframe) if dataframe is not None else None
|
|
95
|
+
content = CONTENT_PARSER.parse(metadata, html)
|
|
96
|
+
_store_user_content(user_key=key, content=content)
|
|
97
|
+
|
|
98
|
+
md = _get_markdown_snippet(key)
|
|
99
|
+
print(md)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _store_user_content(user_key: str, content: Content) -> None:
|
|
103
|
+
key = USER_KEY_PREFIX + str(user_key)
|
|
104
|
+
STORAGE.update(key, content.to_dict())
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _dataframe_to_html_table(dataframe: IntoDataFrame) -> str:
|
|
108
|
+
df = nw.from_native(dataframe)
|
|
109
|
+
html = "<table><tbody>\n"
|
|
110
|
+
|
|
111
|
+
html += "<tr>\n"
|
|
112
|
+
for name in df.columns:
|
|
113
|
+
html += f" <td>{name}</td>\n"
|
|
114
|
+
html += "</tr>\n"
|
|
115
|
+
|
|
116
|
+
for row in df.iter_rows():
|
|
117
|
+
html += " <tr>\n"
|
|
118
|
+
for value in row:
|
|
119
|
+
html += f" <td>{value}</td>\n"
|
|
120
|
+
html += " </tr>\n"
|
|
121
|
+
|
|
122
|
+
html += "</tbody></table>"
|
|
123
|
+
|
|
124
|
+
return html
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _get_markdown_snippet(key: str, placeholder_text: str | None = None) -> str:
|
|
128
|
+
div_config = f"{{ #{key} .ssb }}"
|
|
129
|
+
div_content = f"\n{placeholder_text}\n\n" if placeholder_text is not None else ""
|
|
130
|
+
return f"::: {div_config}\n{div_content}:::"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ssb-pubmd
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: SSB Pubmd
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Olav Landsverk
|
|
@@ -13,12 +13,15 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
-
Requires-Dist:
|
|
17
|
-
Requires-Dist:
|
|
16
|
+
Requires-Dist: dapla-auth-client (>=1.2.5,<2.0.0)
|
|
17
|
+
Requires-Dist: ipynbname (>=2025.8.0.0,<2026.0.0.0)
|
|
18
|
+
Requires-Dist: narwhals (>=2.15.0,<3.0.0)
|
|
18
19
|
Requires-Dist: nbformat (>=5.10.4,<6.0.0)
|
|
19
|
-
Requires-Dist:
|
|
20
|
+
Requires-Dist: nh3 (>=0.3.2,<0.4.0)
|
|
21
|
+
Requires-Dist: pandocfilters (>=1.5.1,<2.0.0)
|
|
22
|
+
Requires-Dist: pydantic (>=2.12.5,<3.0.0)
|
|
20
23
|
Requires-Dist: requests (>=2.32.4,<3.0.0)
|
|
21
|
-
Requires-Dist:
|
|
24
|
+
Requires-Dist: watchfiles (>=1.1.1,<2.0.0)
|
|
22
25
|
Project-URL: Changelog, https://github.com/statisticsnorway/ssb-pubmd/releases
|
|
23
26
|
Project-URL: Documentation, https://statisticsnorway.github.io/ssb-pubmd
|
|
24
27
|
Project-URL: Homepage, https://github.com/statisticsnorway/ssb-pubmd
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
ssb_pubmd/__init__.py,sha256=GmZebzEEIJcwwYD6_J5irpY5-lGAKFr8lRrTGB_nAPA,170
|
|
2
|
+
ssb_pubmd/__main__.py,sha256=7Trn-DZkNbVn6s5J8FVg9JFtpFEHv-4VKZ34MVs23Cc,204
|
|
3
|
+
ssb_pubmd/adapters/content_parser.py,sha256=ExOULfwoFBVb9h2wH8m61fWFy192ZEIG2LI3fuFRpbE,5141
|
|
4
|
+
ssb_pubmd/adapters/document_processor.py,sha256=GN4FJmWcyiCdTBzWkyUReFGd7AME-F4Eq5N4JmarddQ,4271
|
|
5
|
+
ssb_pubmd/adapters/publish_client.py,sha256=mBRfOEEcrmKlDHJVsaqQR984XlJel9oBK4PGP-GhriE,3451
|
|
6
|
+
ssb_pubmd/adapters/storage.py,sha256=Dexfgw0csQ9wljC6lqf9kFmoM2CHdfMghm-qBrgdWjM,1227
|
|
7
|
+
ssb_pubmd/cli.py,sha256=dusmoCX3U6Lpc_uSqqgCRC0U0m8fRq48ExdsYBXRtr4,2555
|
|
8
|
+
ssb_pubmd/config.py,sha256=chnW-GC5Ie5kEcjVb-4_a5_Vq6glhATorDVIghc50SI,606
|
|
9
|
+
ssb_pubmd/domain/document_publisher.py,sha256=hgzJx9kGZJOVLrgFRg-JGAmNsasvHA9_BA1g3htcWrQ,1920
|
|
10
|
+
ssb_pubmd/notebook_client.py,sha256=MANyeyIdTuci8b0flD9e3jgFnEwx5dGo4zLMp39tkRE,4007
|
|
11
|
+
ssb_pubmd/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
ssb_pubmd-0.1.1.dist-info/LICENSE,sha256=tF5bnYv09fgH5ph9t1EpH1MGrVOGTQeswL4dzVeZ_ak,1073
|
|
13
|
+
ssb_pubmd-0.1.1.dist-info/METADATA,sha256=z_bEnr4p9KiPhscq4X5r8mHmn34Wambsacj7DLlp_bA,4101
|
|
14
|
+
ssb_pubmd-0.1.1.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
15
|
+
ssb_pubmd-0.1.1.dist-info/entry_points.txt,sha256=o4oU99zbZNIBKGYWdgdEG6ev-62ZRWEJOe7EOjJaajk,53
|
|
16
|
+
ssb_pubmd-0.1.1.dist-info/RECORD,,
|
ssb_pubmd/adapters/cli.py
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import sys
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
from ssb_pubmd.ports import CmsManager
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
@dataclass
|
|
8
|
-
class CliAdapter:
|
|
9
|
-
cms_manager: CmsManager
|
|
10
|
-
|
|
11
|
-
def run(self, system_arguments: list[str]) -> None:
|
|
12
|
-
match system_arguments:
|
|
13
|
-
case [_, "sync", file_path]:
|
|
14
|
-
preview = self.cms_manager.sync(file_path)
|
|
15
|
-
print(f"Preview URL: {preview}")
|
|
16
|
-
# except Exception as e:
|
|
17
|
-
# print(f"Error during sync: {e}")
|
|
18
|
-
# sys.exit(1)
|
|
19
|
-
case _:
|
|
20
|
-
print("Usage: ssb-pubmd sync <content_file_path>")
|
|
21
|
-
sys.exit(1)
|
ssb_pubmd/adapters/cms_client.py
DELETED
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
from urllib.parse import urlparse
|
|
2
|
-
|
|
3
|
-
import requests
|
|
4
|
-
|
|
5
|
-
from ssb_pubmd.models import CmsResponse
|
|
6
|
-
from ssb_pubmd.models import Content
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class CmsClientError(Exception): ...
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class MimirCmsClient:
|
|
13
|
-
base_url: str
|
|
14
|
-
|
|
15
|
-
def __init__(self, base_url: str) -> None:
|
|
16
|
-
self.base_url = base_url
|
|
17
|
-
|
|
18
|
-
def _convert_preview_url(self, url_from_response: str) -> str:
|
|
19
|
-
"""Convert the preview URL to a full URL if it's relative."""
|
|
20
|
-
url = urlparse(url_from_response)
|
|
21
|
-
if url.scheme and url.netloc:
|
|
22
|
-
return url.geturl()
|
|
23
|
-
else:
|
|
24
|
-
return urlparse(self.base_url)._replace(path=url.path).geturl()
|
|
25
|
-
|
|
26
|
-
def send(self, token: str, content: Content) -> CmsResponse:
|
|
27
|
-
"""Sends a request to the Enonic CMS, assumed to have the mimir application installed (currently this only works with the feature branch https://github.com/statisticsnorway/mimir/pull/3192)."""
|
|
28
|
-
try:
|
|
29
|
-
response = requests.post(
|
|
30
|
-
f"{self.base_url}/_/service/mimir/postMarkdown",
|
|
31
|
-
headers={
|
|
32
|
-
"Authorization": f"Bearer {token}",
|
|
33
|
-
"Content-Type": "application/json",
|
|
34
|
-
},
|
|
35
|
-
json=content.to_json(),
|
|
36
|
-
)
|
|
37
|
-
if response.status_code != 200:
|
|
38
|
-
raise CmsClientError(
|
|
39
|
-
f"Request to CMS failed with status code {response.status_code}."
|
|
40
|
-
)
|
|
41
|
-
body = response.json()
|
|
42
|
-
return CmsResponse(
|
|
43
|
-
id=body["_id"],
|
|
44
|
-
preview_url=self._convert_preview_url(body["previewPath"]),
|
|
45
|
-
)
|
|
46
|
-
except Exception as e:
|
|
47
|
-
raise CmsClientError("Request to CMS failed.") from e
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
import nbformat
|
|
6
|
-
|
|
7
|
-
from ssb_pubmd.models import Content
|
|
8
|
-
|
|
9
|
-
ID_KEY = "_id"
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class LocalStorageError(Exception): ...
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
@dataclass
|
|
16
|
-
class LocalStorageAdapter:
|
|
17
|
-
metadata_file_path: Path
|
|
18
|
-
|
|
19
|
-
def get_file_id(self, file_path: Path) -> str:
|
|
20
|
-
"""
|
|
21
|
-
Returns the content id of a given file path.
|
|
22
|
-
If no id is registered for the given file path,
|
|
23
|
-
it returns an empty string.
|
|
24
|
-
"""
|
|
25
|
-
with open(self.metadata_file_path) as metadata_file:
|
|
26
|
-
metadata = json.load(metadata_file)
|
|
27
|
-
return str(metadata.get(str(file_path.absolute()), {}).get(ID_KEY, ""))
|
|
28
|
-
|
|
29
|
-
def set_file_id(self, file_path: Path, content_id: str) -> None:
|
|
30
|
-
"""Stores a given file's content id in the metadata file"""
|
|
31
|
-
with open(self.metadata_file_path) as metadata_file:
|
|
32
|
-
metadata = json.load(metadata_file)
|
|
33
|
-
metadata[str(file_path.absolute())] = {ID_KEY: content_id}
|
|
34
|
-
with open(self.metadata_file_path, "w") as metadata_file:
|
|
35
|
-
json.dump(metadata, metadata_file)
|
|
36
|
-
|
|
37
|
-
def get_content(self, file_path: Path) -> Content:
|
|
38
|
-
"""
|
|
39
|
-
Returns the markdown content of a given file.
|
|
40
|
-
If the file is neither a .md or a .ipynb file,
|
|
41
|
-
this function will throw a `LocalStorageError`
|
|
42
|
-
"""
|
|
43
|
-
return Content(
|
|
44
|
-
content_id=self.get_file_id(file_path),
|
|
45
|
-
file_path=file_path,
|
|
46
|
-
markdown=self._get_content(file_path),
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
def _get_content_from_notebook_file(self, file_path: Path) -> str:
|
|
50
|
-
"""Extracts all markdown cells from the notebook and returns it as a string."""
|
|
51
|
-
notebook = nbformat.read(file_path, as_version=nbformat.NO_CONVERT) # type: ignore
|
|
52
|
-
markdown_cells = []
|
|
53
|
-
for cell in notebook.cells:
|
|
54
|
-
if cell.cell_type == "markdown":
|
|
55
|
-
markdown_cells.append(cell.source)
|
|
56
|
-
sep = "\n\n"
|
|
57
|
-
return sep.join(markdown_cells)
|
|
58
|
-
|
|
59
|
-
def _get_content_from_markdown_file(self, file_path: Path) -> str:
|
|
60
|
-
"""Returns the content of a markdown file as a string."""
|
|
61
|
-
with open(file_path) as file:
|
|
62
|
-
return file.read()
|
|
63
|
-
|
|
64
|
-
def _get_content(self, file_path: Path) -> str:
|
|
65
|
-
file_type = file_path.suffix
|
|
66
|
-
match file_type:
|
|
67
|
-
case ".md":
|
|
68
|
-
return self._get_content_from_markdown_file(file_path)
|
|
69
|
-
case ".ipynb":
|
|
70
|
-
return self._get_content_from_notebook_file(file_path)
|
|
71
|
-
case _:
|
|
72
|
-
raise LocalStorageError(f"Unsupported file type: {file_type}")
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
"""This module handles HTTP requests and responses to and from the CMS."""
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
from dataclasses import dataclass
|
|
5
|
-
from datetime import datetime
|
|
6
|
-
|
|
7
|
-
import jwt
|
|
8
|
-
from google.cloud import secretmanager
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@dataclass
|
|
12
|
-
class Secret:
|
|
13
|
-
private_key: str
|
|
14
|
-
kid: str
|
|
15
|
-
principal_key: str
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class SecretManagerError(Exception): ...
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class GoogleSecretManagerClient:
|
|
22
|
-
TYPE = "JWT"
|
|
23
|
-
ALGORITHM = "RS256"
|
|
24
|
-
_gc_secret_resource_name: str
|
|
25
|
-
|
|
26
|
-
def __init__(self, gc_secret_resource_name: str) -> None:
|
|
27
|
-
self._gc_secret_resource_name = gc_secret_resource_name
|
|
28
|
-
|
|
29
|
-
def _get_secret(self) -> Secret:
|
|
30
|
-
"""Fetches the private key and related data from Google Cloud Secret Manager."""
|
|
31
|
-
client = secretmanager.SecretManagerServiceClient()
|
|
32
|
-
response = client.access_secret_version(name=self._gc_secret_resource_name)
|
|
33
|
-
raw_data = response.payload.data.decode("UTF-8")
|
|
34
|
-
data = json.loads(raw_data)
|
|
35
|
-
try:
|
|
36
|
-
return Secret(
|
|
37
|
-
private_key=data["privateKey"],
|
|
38
|
-
kid=data["kid"],
|
|
39
|
-
principal_key=data["principalKey"],
|
|
40
|
-
)
|
|
41
|
-
except KeyError as e:
|
|
42
|
-
raise SecretManagerError(
|
|
43
|
-
"The secret must be a JSON object with keys 'privateKey', 'kid' and 'principalKey'."
|
|
44
|
-
) from e
|
|
45
|
-
|
|
46
|
-
def generate_token(self) -> str:
|
|
47
|
-
secret = self._get_secret()
|
|
48
|
-
|
|
49
|
-
header = {
|
|
50
|
-
"kid": secret.kid,
|
|
51
|
-
"typ": self.TYPE,
|
|
52
|
-
"alg": self.ALGORITHM,
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
iat = int(datetime.now().timestamp())
|
|
56
|
-
exp = iat + 30
|
|
57
|
-
payload = {
|
|
58
|
-
"sub": secret.principal_key,
|
|
59
|
-
"iat": iat,
|
|
60
|
-
"exp": exp,
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
token = jwt.encode(
|
|
64
|
-
payload, secret.private_key, algorithm=self.ALGORITHM, headers=header
|
|
65
|
-
)
|
|
66
|
-
return token
|
ssb_pubmd/enonic_cms_manager.py
DELETED
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from ssb_pubmd.config import Config
|
|
5
|
-
from ssb_pubmd.ports import CmsClient
|
|
6
|
-
from ssb_pubmd.ports import ContentFileHandler
|
|
7
|
-
from ssb_pubmd.ports import SecretManagerClient
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
@dataclass
|
|
11
|
-
class EnonicCmsManager:
|
|
12
|
-
"""A CMS Mananager tailored to the Enonic CMS."""
|
|
13
|
-
|
|
14
|
-
config: Config
|
|
15
|
-
cms_client: CmsClient
|
|
16
|
-
secret_manager_client: SecretManagerClient
|
|
17
|
-
content_file_handler: ContentFileHandler
|
|
18
|
-
|
|
19
|
-
def sync(self, content_file_path: str) -> str:
|
|
20
|
-
"""Requests that Enonic stores/updates the given contant file and gives back a rendered preview.
|
|
21
|
-
|
|
22
|
-
The details of the communication are handled by the CmsClient implementation, which in turn depends on the services that are exposed by the Enonic XP application. The only thing this class cares is that it receives a CmsResponse object, which contains an id and preview url of the content.
|
|
23
|
-
"""
|
|
24
|
-
content = self.content_file_handler.get_content(Path(content_file_path))
|
|
25
|
-
response = self.cms_client.send(
|
|
26
|
-
token=self.secret_manager_client.generate_token(),
|
|
27
|
-
content=content,
|
|
28
|
-
)
|
|
29
|
-
self.content_file_handler.set_file_id(content.file_path, response.id)
|
|
30
|
-
return response.preview_url
|
ssb_pubmd/models.py
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
@dataclass
|
|
6
|
-
class CmsResponse:
|
|
7
|
-
id: str
|
|
8
|
-
preview_url: str
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@dataclass
|
|
12
|
-
class Content:
|
|
13
|
-
content_id: str
|
|
14
|
-
file_path: Path
|
|
15
|
-
markdown: str
|
|
16
|
-
|
|
17
|
-
@property
|
|
18
|
-
def display_name(self) -> str:
|
|
19
|
-
"""Generate a display name for the content."""
|
|
20
|
-
return self.file_path.stem.replace("_", " ").title()
|
|
21
|
-
|
|
22
|
-
def to_json(self) -> dict[str, str]:
|
|
23
|
-
"""Returns a json representation of the content."""
|
|
24
|
-
return {
|
|
25
|
-
"_id": self.content_id,
|
|
26
|
-
"displayName": self.display_name,
|
|
27
|
-
"markdown": self.markdown,
|
|
28
|
-
}
|
ssb_pubmd/ports.py
DELETED
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
from typing import Protocol
|
|
3
|
-
|
|
4
|
-
from ssb_pubmd.models import CmsResponse
|
|
5
|
-
from ssb_pubmd.models import Content
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class CmsManager(Protocol):
|
|
9
|
-
"""An interface (primary port) for managing a CMS.
|
|
10
|
-
|
|
11
|
-
An implementing class is tailored to a specific CMS application.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
def sync(self, content_file_path: str) -> str:
|
|
15
|
-
"""Requests that the CMS stores and renders the given content file.
|
|
16
|
-
|
|
17
|
-
:param content_file_path: The path to the content file to be rendered.
|
|
18
|
-
:return: A URL to the rendered preview.
|
|
19
|
-
"""
|
|
20
|
-
...
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class CmsClient(Protocol):
|
|
24
|
-
"""An interface (secondary port) for communicating with a CMS.
|
|
25
|
-
|
|
26
|
-
An implementing class is tailored to a specific CMS, with its specific exposed services and token authentication flow.
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
def send(self, token: str, content: Content) -> CmsResponse: ...
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class ContentFileHandler(Protocol):
|
|
33
|
-
"""An interface (secondary port) for handling content files, including extraction and handling of metadata."""
|
|
34
|
-
|
|
35
|
-
def get_file_id(self, file_path: Path) -> str:
|
|
36
|
-
"""
|
|
37
|
-
Returns the id of a given file path.
|
|
38
|
-
If no id is registered for the given file path,
|
|
39
|
-
it returns an empty string.
|
|
40
|
-
"""
|
|
41
|
-
...
|
|
42
|
-
|
|
43
|
-
def set_file_id(self, file_path: Path, content_id: str) -> None:
|
|
44
|
-
"""Sets the id of a given file path."""
|
|
45
|
-
...
|
|
46
|
-
|
|
47
|
-
def get_content(self, file_path: Path) -> Content:
|
|
48
|
-
"""
|
|
49
|
-
Extracts the content of a given file.
|
|
50
|
-
"""
|
|
51
|
-
...
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class SecretManagerClient(Protocol):
|
|
55
|
-
"""An interface (secondary port) for communicating with a secret manager and generating tokens."""
|
|
56
|
-
|
|
57
|
-
def generate_token(self) -> str: ...
|
ssb_pubmd-0.1.0.dist-info/RECORD
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
ssb_pubmd/__main__.py,sha256=rjxFBSqNHykkGrEfY6t9e1gZfv2vksjsT3YJSjOvNlc,816
|
|
2
|
-
ssb_pubmd/adapters/cli.py,sha256=DO5HVGUH2FEZDoLpJs-kUCln1D6uapHVXQfeaIoQHGg,627
|
|
3
|
-
ssb_pubmd/adapters/cms_client.py,sha256=vPA2xhffb4ubDr9_9qMN4MuNp2gg2SLjOVoZAnWgo2o,1699
|
|
4
|
-
ssb_pubmd/adapters/local_storage.py,sha256=3LPWRlmQSdlDhmU23yBD33x13HWKjYuvKfg2i8Ghh0A,2595
|
|
5
|
-
ssb_pubmd/adapters/secret_manager_client.py,sha256=fmFUiWzt9Ajvn5dYswFdh23JV7_zkDM15tunSfaHCXM,1839
|
|
6
|
-
ssb_pubmd/config.py,sha256=ZSynxkTY4s-qUKG6DO_Ss_txGjtB20um46hV1pbEndw,838
|
|
7
|
-
ssb_pubmd/enonic_cms_manager.py,sha256=lNYTkydPXkYvXhVQZcOqzy4J8iKQTPVEgTS72ZnqcW0,1264
|
|
8
|
-
ssb_pubmd/models.py,sha256=Lbi5qaEbJhOl3Cp9Fx2-ie74o3J4ptodLasPyuBGyFM,626
|
|
9
|
-
ssb_pubmd/ports.py,sha256=sTZ06jVrUy_fk0G3yeduo13nmXa9ZiXyts_UVBNUFow,1703
|
|
10
|
-
ssb_pubmd/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
ssb_pubmd-0.1.0.dist-info/LICENSE,sha256=tF5bnYv09fgH5ph9t1EpH1MGrVOGTQeswL4dzVeZ_ak,1073
|
|
12
|
-
ssb_pubmd-0.1.0.dist-info/METADATA,sha256=YYgpiyNdhQCXAZFa1TUbPuqCpRRTL7XSbYYSWbmyvfA,3996
|
|
13
|
-
ssb_pubmd-0.1.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
14
|
-
ssb_pubmd-0.1.0.dist-info/entry_points.txt,sha256=o4oU99zbZNIBKGYWdgdEG6ev-62ZRWEJOe7EOjJaajk,53
|
|
15
|
-
ssb_pubmd-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|