ssb-pubmd 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ssb_pubmd/__init__.py +2 -2
- ssb_pubmd/__main__.py +43 -4
- ssb_pubmd/browser_context.py +23 -25
- ssb_pubmd/{notebook_syncer.py → markdown_syncer.py} +64 -30
- {ssb_pubmd-0.0.10.dist-info → ssb_pubmd-0.0.12.dist-info}/METADATA +23 -3
- ssb_pubmd-0.0.12.dist-info/RECORD +10 -0
- {ssb_pubmd-0.0.10.dist-info → ssb_pubmd-0.0.12.dist-info}/WHEEL +1 -1
- ssb_pubmd-0.0.12.dist-info/entry_points.txt +3 -0
- ssb_pubmd-0.0.10.dist-info/RECORD +0 -10
- ssb_pubmd-0.0.10.dist-info/entry_points.txt +0 -3
- {ssb_pubmd-0.0.10.dist-info → ssb_pubmd-0.0.12.dist-info}/LICENSE +0 -0
ssb_pubmd/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""SSB Pubmd."""
|
|
2
2
|
|
|
3
3
|
from .browser_context import BrowserRequestContext as BrowserContext
|
|
4
|
-
from .
|
|
4
|
+
from .markdown_syncer import MarkdownSyncer
|
|
5
5
|
|
|
6
|
-
__all__ = ["BrowserContext", "
|
|
6
|
+
__all__ = ["BrowserContext", "MarkdownSyncer"]
|
ssb_pubmd/__main__.py
CHANGED
|
@@ -1,13 +1,52 @@
|
|
|
1
1
|
"""Command-line interface."""
|
|
2
2
|
|
|
3
|
+
import os
|
|
4
|
+
|
|
3
5
|
import click
|
|
4
6
|
|
|
7
|
+
from ssb_pubmd.browser_context import BrowserRequestContext as RequestContext
|
|
8
|
+
from ssb_pubmd.markdown_syncer import MarkdownSyncer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@click.group()
|
|
12
|
+
def cli() -> None:
|
|
13
|
+
"""Command-line interface for the ssb_pubmd package."""
|
|
14
|
+
pass
|
|
15
|
+
|
|
5
16
|
|
|
6
17
|
@click.command()
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
""
|
|
18
|
+
def login() -> None:
|
|
19
|
+
"""Login to the server."""
|
|
20
|
+
login_url = os.getenv("PUBMD_LOGIN_URL", "")
|
|
21
|
+
request_context = RequestContext()
|
|
22
|
+
print(login_url)
|
|
23
|
+
storage_state_file, storage_state = request_context.create_new(login_url)
|
|
24
|
+
click.echo(
|
|
25
|
+
f"The following browser context object is now stored in {storage_state_file}:"
|
|
26
|
+
)
|
|
27
|
+
click.echo(storage_state)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@click.command()
|
|
31
|
+
@click.argument("content_file_path", type=click.Path())
|
|
32
|
+
def sync(content_file_path: str) -> None:
|
|
33
|
+
"""Sync the content."""
|
|
34
|
+
post_url = os.getenv("PUBMD_POST_URL", "")
|
|
35
|
+
request_context = RequestContext()
|
|
36
|
+
request_context.recreate_from_file()
|
|
37
|
+
|
|
38
|
+
syncer = MarkdownSyncer(post_url=post_url, request_context=request_context)
|
|
39
|
+
syncer.content_file_path = content_file_path
|
|
40
|
+
|
|
41
|
+
content_id = syncer.sync_content()
|
|
42
|
+
|
|
43
|
+
click.echo(
|
|
44
|
+
f"File '{click.format_filename(content_file_path)}' synced to CMS with content ID: {content_id}"
|
|
45
|
+
)
|
|
46
|
+
|
|
10
47
|
|
|
48
|
+
cli.add_command(login)
|
|
49
|
+
cli.add_command(sync)
|
|
11
50
|
|
|
12
51
|
if __name__ == "__main__":
|
|
13
|
-
|
|
52
|
+
cli() # pragma: no cover
|
ssb_pubmd/browser_context.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
from
|
|
1
|
+
from playwright.sync_api import BrowserContext
|
|
2
|
+
from playwright.sync_api import StorageState
|
|
3
|
+
from playwright.sync_api import sync_playwright
|
|
2
4
|
|
|
3
|
-
from
|
|
4
|
-
from playwright.async_api import async_playwright
|
|
5
|
+
from .markdown_syncer import Response
|
|
5
6
|
|
|
6
|
-
|
|
7
|
+
BROWSER_CONTEXT_FILE = "browser_context.json"
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
class BrowserRequestContext:
|
|
@@ -11,39 +12,37 @@ class BrowserRequestContext:
|
|
|
11
12
|
|
|
12
13
|
def __init__(self) -> None:
|
|
13
14
|
"""Initializes an empty browser context object."""
|
|
14
|
-
self._storage_state_path: str =
|
|
15
|
+
self._storage_state_path: str = BROWSER_CONTEXT_FILE
|
|
15
16
|
self._context: BrowserContext | None = None
|
|
16
17
|
|
|
17
|
-
|
|
18
|
+
def create_new(self, login_url: str) -> tuple[str, StorageState]:
|
|
18
19
|
"""Creates a browser context by opening a login page and waiting for it to be closed by user.
|
|
19
20
|
|
|
20
21
|
This function also saves the browser context to a file for later use.
|
|
21
22
|
"""
|
|
22
|
-
playwright =
|
|
23
|
-
browser =
|
|
23
|
+
playwright = sync_playwright().start()
|
|
24
|
+
browser = playwright.chromium.launch(headless=False)
|
|
24
25
|
|
|
25
|
-
self._context =
|
|
26
|
-
login_page =
|
|
26
|
+
self._context = browser.new_context()
|
|
27
|
+
login_page = self._context.new_page()
|
|
27
28
|
|
|
28
|
-
|
|
29
|
-
|
|
29
|
+
login_page.goto(login_url)
|
|
30
|
+
login_page.wait_for_event("close", timeout=0)
|
|
30
31
|
|
|
31
|
-
|
|
32
|
+
storage_state = self._context.storage_state(path=self._storage_state_path)
|
|
32
33
|
|
|
33
|
-
return self.
|
|
34
|
+
return self._storage_state_path, storage_state
|
|
34
35
|
|
|
35
|
-
|
|
36
|
+
def recreate_from_file(self) -> BrowserContext:
|
|
36
37
|
"""Recreates a browser context object from a file."""
|
|
37
|
-
playwright =
|
|
38
|
-
browser =
|
|
38
|
+
playwright = sync_playwright().start()
|
|
39
|
+
browser = playwright.chromium.launch(headless=False)
|
|
39
40
|
|
|
40
|
-
self._context =
|
|
41
|
-
storage_state=self._storage_state_path
|
|
42
|
-
)
|
|
41
|
+
self._context = browser.new_context(storage_state=self._storage_state_path)
|
|
43
42
|
|
|
44
43
|
return self._context
|
|
45
44
|
|
|
46
|
-
|
|
45
|
+
def send_request(
|
|
47
46
|
self,
|
|
48
47
|
url: str,
|
|
49
48
|
headers: dict[str, str] | None = None,
|
|
@@ -53,14 +52,13 @@ class BrowserRequestContext:
|
|
|
53
52
|
if self._context is None:
|
|
54
53
|
raise ValueError("Browser context has not been created.")
|
|
55
54
|
|
|
56
|
-
|
|
57
|
-
api_response = await self._context.request.post(
|
|
55
|
+
api_response = self._context.request.post(
|
|
58
56
|
url,
|
|
59
|
-
params=
|
|
57
|
+
params=data,
|
|
60
58
|
)
|
|
61
59
|
|
|
62
60
|
try:
|
|
63
|
-
body =
|
|
61
|
+
body = api_response.json()
|
|
64
62
|
body = dict(body)
|
|
65
63
|
except Exception:
|
|
66
64
|
body = None
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
+
from enum import Enum
|
|
4
5
|
from typing import Any
|
|
5
6
|
from typing import Protocol
|
|
6
7
|
|
|
@@ -23,7 +24,7 @@ class RequestContext(Protocol):
|
|
|
23
24
|
Implementing classes may handle authentication, sessions, etc.
|
|
24
25
|
"""
|
|
25
26
|
|
|
26
|
-
|
|
27
|
+
def send_request(
|
|
27
28
|
self,
|
|
28
29
|
url: str,
|
|
29
30
|
headers: dict[str, str] | None = None,
|
|
@@ -40,7 +41,7 @@ class BasicRequestContext:
|
|
|
40
41
|
"""Initializes the basic request context."""
|
|
41
42
|
pass
|
|
42
43
|
|
|
43
|
-
|
|
44
|
+
def send_request(
|
|
44
45
|
self,
|
|
45
46
|
url: str,
|
|
46
47
|
headers: dict[str, str] | None = None,
|
|
@@ -64,8 +65,15 @@ class BasicRequestContext:
|
|
|
64
65
|
)
|
|
65
66
|
|
|
66
67
|
|
|
67
|
-
class
|
|
68
|
-
"""
|
|
68
|
+
class FileType(Enum):
|
|
69
|
+
"""File extensions for markdown and notebook files."""
|
|
70
|
+
|
|
71
|
+
MARKDOWN = ".md"
|
|
72
|
+
NOTEBOOK = ".ipynb"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class MarkdownSyncer:
|
|
76
|
+
"""This class syncs a markdown/notebook file to a CMS (Content Management System).
|
|
69
77
|
|
|
70
78
|
The CMS must have an endpoint that satisfies the following constraints:
|
|
71
79
|
|
|
@@ -77,43 +85,54 @@ class NotebookSyncer:
|
|
|
77
85
|
|
|
78
86
|
- On the first request, an empty string is sent as *_id*.
|
|
79
87
|
- If the request succeeds, the value of *_id* (in the response) is stored in a JSON file
|
|
80
|
-
(created in the same directory as the notebook file).
|
|
88
|
+
(created in the same directory as the markdown/notebook file).
|
|
81
89
|
- On subsequent requests, the stored value is sent as *_id*.
|
|
82
90
|
"""
|
|
83
91
|
|
|
84
92
|
ID_KEY = "_id"
|
|
85
93
|
|
|
86
94
|
def __init__(self, post_url: str, request_context: RequestContext) -> None:
|
|
87
|
-
"""Creates a
|
|
95
|
+
"""Creates a markdown syncer instance that connects to the CMS through the post url."""
|
|
88
96
|
self._post_url: str = post_url
|
|
89
97
|
self._context: RequestContext = request_context
|
|
90
|
-
self.
|
|
98
|
+
self._content_file_path: str = ""
|
|
99
|
+
self._content_file_type: FileType = FileType.MARKDOWN
|
|
91
100
|
|
|
92
101
|
@property
|
|
93
|
-
def
|
|
94
|
-
"""Returns the path of the notebook file."""
|
|
95
|
-
return self.
|
|
96
|
-
|
|
97
|
-
@
|
|
98
|
-
def
|
|
99
|
-
"""Sets the path of the notebook file."""
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
102
|
+
def content_file_path(self) -> str:
|
|
103
|
+
"""Returns the path of the markdown/notebook file."""
|
|
104
|
+
return self._content_file_path
|
|
105
|
+
|
|
106
|
+
@content_file_path.setter
|
|
107
|
+
def content_file_path(self, content_file_path: str) -> None:
|
|
108
|
+
"""Sets the path of the markdown/notebook file."""
|
|
109
|
+
content_file_path = os.path.abspath(content_file_path)
|
|
110
|
+
|
|
111
|
+
if not os.path.exists(content_file_path):
|
|
112
|
+
raise FileNotFoundError(f"The file '{content_file_path}' does not exist.")
|
|
113
|
+
|
|
114
|
+
ext = os.path.splitext(content_file_path)[1]
|
|
115
|
+
for e in FileType:
|
|
116
|
+
if ext == e.value:
|
|
117
|
+
self._content_file_type = e
|
|
118
|
+
break
|
|
119
|
+
else:
|
|
120
|
+
raise ValueError(
|
|
121
|
+
f"The file '{content_file_path}' is not a markdown or notebook file."
|
|
104
122
|
)
|
|
105
|
-
|
|
123
|
+
|
|
124
|
+
self._content_file_path = content_file_path
|
|
106
125
|
|
|
107
126
|
@property
|
|
108
127
|
def basename(self) -> str:
|
|
109
|
-
"""The name of the notebook file without extension."""
|
|
110
|
-
basename = os.path.basename(self.
|
|
128
|
+
"""The name of the markdown/notebook file without extension."""
|
|
129
|
+
basename = os.path.basename(self.content_file_path)
|
|
111
130
|
return os.path.splitext(basename)[0]
|
|
112
131
|
|
|
113
132
|
@property
|
|
114
133
|
def data_path(self) -> str:
|
|
115
134
|
"""The absolute path of the file to store the data returned from the CMS."""
|
|
116
|
-
return os.path.splitext(self.
|
|
135
|
+
return os.path.splitext(self.content_file_path)[0] + ".json"
|
|
117
136
|
|
|
118
137
|
@property
|
|
119
138
|
def display_name(self) -> str:
|
|
@@ -138,9 +157,9 @@ class NotebookSyncer:
|
|
|
138
157
|
|
|
139
158
|
def _read_notebook(self) -> NotebookNode:
|
|
140
159
|
"""Reads the notebook file and returns its content."""
|
|
141
|
-
return nbformat.read(self.
|
|
160
|
+
return nbformat.read(self._content_file_path, as_version=nbformat.NO_CONVERT) # type: ignore
|
|
142
161
|
|
|
143
|
-
def
|
|
162
|
+
def _get_content_from_notebook_file(self) -> str:
|
|
144
163
|
"""Extracts all markdown cells from the notebook and returns it as a merged string."""
|
|
145
164
|
notebook = self._read_notebook()
|
|
146
165
|
|
|
@@ -153,17 +172,32 @@ class NotebookSyncer:
|
|
|
153
172
|
|
|
154
173
|
return markdown_content
|
|
155
174
|
|
|
175
|
+
def _get_content_from_markdown_file(self) -> str:
|
|
176
|
+
"""Returns the content of a markdown file."""
|
|
177
|
+
with open(self._content_file_path) as file:
|
|
178
|
+
markdown_content = file.read()
|
|
179
|
+
return markdown_content
|
|
180
|
+
|
|
181
|
+
def _get_content(self) -> str:
|
|
182
|
+
content = ""
|
|
183
|
+
match self._content_file_type:
|
|
184
|
+
case FileType.MARKDOWN:
|
|
185
|
+
content = self._get_content_from_markdown_file()
|
|
186
|
+
case FileType.NOTEBOOK:
|
|
187
|
+
content = self._get_content_from_notebook_file()
|
|
188
|
+
return content
|
|
189
|
+
|
|
156
190
|
def _request_data(self) -> dict[str, str]:
|
|
157
191
|
"""Prepares the request data to be sent to the CMS endpoint."""
|
|
158
192
|
return {
|
|
159
193
|
"_id": self._get_content_id(),
|
|
160
194
|
"displayName": self.display_name,
|
|
161
|
-
"markdown": self.
|
|
195
|
+
"markdown": self._get_content(),
|
|
162
196
|
}
|
|
163
197
|
|
|
164
|
-
|
|
198
|
+
def _send_request(self) -> str:
|
|
165
199
|
"""Sends the request to the CMS endpoint and returns the content id from the response."""
|
|
166
|
-
response =
|
|
200
|
+
response = self._context.send_request(
|
|
167
201
|
url=self._post_url, data=self._request_data()
|
|
168
202
|
)
|
|
169
203
|
|
|
@@ -186,8 +220,8 @@ class NotebookSyncer:
|
|
|
186
220
|
|
|
187
221
|
return content_id
|
|
188
222
|
|
|
189
|
-
|
|
190
|
-
"""Sends the
|
|
191
|
-
content_id =
|
|
223
|
+
def sync_content(self) -> str:
|
|
224
|
+
"""Sends the markdown content to the CMS endpoint and stores the id from the response."""
|
|
225
|
+
content_id = self._send_request()
|
|
192
226
|
self._save_content_id(content_id)
|
|
193
227
|
return content_id
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ssb-pubmd
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.12
|
|
4
4
|
Summary: SSB Pubmd
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Olav Landsverk
|
|
@@ -53,7 +53,6 @@ Description-Content-Type: text/markdown
|
|
|
53
53
|
|
|
54
54
|
## Features
|
|
55
55
|
|
|
56
|
-
- Helper library for syncing a Jupyter Notebook with a remote server (e.g. a CMS/publishing platform).
|
|
57
56
|
- Supports logging in through a popup browser window.
|
|
58
57
|
|
|
59
58
|
## Requirements
|
|
@@ -70,7 +69,28 @@ pip install ssb-pubmd
|
|
|
70
69
|
|
|
71
70
|
## Usage
|
|
72
71
|
|
|
73
|
-
|
|
72
|
+
First set environment variables:
|
|
73
|
+
|
|
74
|
+
```console
|
|
75
|
+
export PUBMD_LOGIN_URL=<https://www.example.com/login>
|
|
76
|
+
export PUBMD_POST_URL=<https://www.example.com/post>
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
To log in, run:
|
|
80
|
+
|
|
81
|
+
```console
|
|
82
|
+
pubmd login
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Close the popup browser window when you are logged in.
|
|
86
|
+
|
|
87
|
+
To synchronize markdown content to the CMS server, run:
|
|
88
|
+
|
|
89
|
+
```console
|
|
90
|
+
pubmd sync <file>
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
`<file>` should be an absolute or relative path, and the allowed extensions are `.ipynb` and `md`.
|
|
74
94
|
|
|
75
95
|
## Contributing
|
|
76
96
|
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
ssb_pubmd/__init__.py,sha256=WD-U43QQTQIRSRgTHSnEjcG5MuH4T_CPFHPXols2NLk,179
|
|
2
|
+
ssb_pubmd/__main__.py,sha256=WdiICEorWrTbM1NDIkG-97Gg49IV5Pm_MBkQHr3Scec,1364
|
|
3
|
+
ssb_pubmd/browser_context.py,sha256=JXDlvUJZ-NYLQlE0_MRAiuRxGrMVBfvm5Rt2QX7dYRA,2344
|
|
4
|
+
ssb_pubmd/markdown_syncer.py,sha256=RHP3bHIGPRLichrleygOS2WIVN1KOlYPehbrcenJMRk,7631
|
|
5
|
+
ssb_pubmd/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
ssb_pubmd-0.0.12.dist-info/LICENSE,sha256=tF5bnYv09fgH5ph9t1EpH1MGrVOGTQeswL4dzVeZ_ak,1073
|
|
7
|
+
ssb_pubmd-0.0.12.dist-info/METADATA,sha256=6YfXG5dXzW-2xU-Cdbtik5-ophdBRngUsSbBJ_DVYkw,4707
|
|
8
|
+
ssb_pubmd-0.0.12.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
|
9
|
+
ssb_pubmd-0.0.12.dist-info/entry_points.txt,sha256=1_NfsiOfqTg948JWXYPwi4QtDk90KHkNn1CQtye8rJ0,48
|
|
10
|
+
ssb_pubmd-0.0.12.dist-info/RECORD,,
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
ssb_pubmd/__init__.py,sha256=bEVVJ-sm5MmH5bXnzzyek_nSAvutyB0WNpBy835lE2g,179
|
|
2
|
-
ssb_pubmd/__main__.py,sha256=8D0yedPhnV_2L7nj0s0KUKxNQqPxoussMHGDNM-vyjg,209
|
|
3
|
-
ssb_pubmd/browser_context.py,sha256=ihFYos4Vm-0Llgr5WlFRAITlE6CAY1D53baMF5rzxwI,2405
|
|
4
|
-
ssb_pubmd/notebook_syncer.py,sha256=cyfB7jsXUVIaohVt38x8TKxlllkk1txy5jVab8vclSc,6511
|
|
5
|
-
ssb_pubmd/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
ssb_pubmd-0.0.10.dist-info/LICENSE,sha256=tF5bnYv09fgH5ph9t1EpH1MGrVOGTQeswL4dzVeZ_ak,1073
|
|
7
|
-
ssb_pubmd-0.0.10.dist-info/METADATA,sha256=Ylp5hVFos3KaDjGpbJBgngtmB6lsLugntRTaRvIDDAo,4407
|
|
8
|
-
ssb_pubmd-0.0.10.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
9
|
-
ssb_pubmd-0.0.10.dist-info/entry_points.txt,sha256=o4oU99zbZNIBKGYWdgdEG6ev-62ZRWEJOe7EOjJaajk,53
|
|
10
|
-
ssb_pubmd-0.0.10.dist-info/RECORD,,
|
|
File without changes
|