ssb-pubmd 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ssb_pubmd/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """SSB Pubmd."""
2
2
 
3
- from .exporter import notebook_to_cms
3
+ from .browser_context import BrowserRequestContext as BrowserContext
4
+ from .notebook_syncer import NotebookSyncer
4
5
 
5
- __all__ = ["notebook_to_cms"]
6
+ __all__ = ["BrowserContext", "NotebookSyncer"]
@@ -0,0 +1,70 @@
1
+ from typing import cast
2
+
3
+ from playwright.async_api import BrowserContext
4
+ from playwright.async_api import async_playwright
5
+
6
+ from .notebook_syncer import Response
7
+
8
+
9
+ class BrowserRequestContext:
10
+ """Creates a logged in browser context from which to send requests."""
11
+
12
+ def __init__(self) -> None:
13
+ """Initializes a browser context with a login url."""
14
+ self._storage_state_path: str = "browser_context.json"
15
+ self._context: BrowserContext | None = None
16
+
17
+ async def create_new(self, login_url: str) -> BrowserContext:
18
+ """Creates a browser context by opening a logging page and waiting for it to be closed by user."""
19
+ playwright = await async_playwright().start()
20
+ browser = await playwright.chromium.launch(headless=False)
21
+
22
+ self._context = await browser.new_context()
23
+ login_page = await self._context.new_page()
24
+
25
+ await login_page.goto(login_url)
26
+ await login_page.wait_for_event("close", timeout=0)
27
+
28
+ await self._context.storage_state(path=self._storage_state_path)
29
+
30
+ return self._context
31
+
32
+ async def recreate_from_file(self) -> BrowserContext:
33
+ """Restores a browser context from the storage state file."""
34
+ playwright = await async_playwright().start()
35
+ browser = await playwright.chromium.launch(headless=False)
36
+
37
+ self._context = await browser.new_context(
38
+ storage_state=self._storage_state_path
39
+ )
40
+
41
+ return self._context
42
+
43
+ async def send_request(
44
+ self,
45
+ url: str,
46
+ headers: dict[str, str] | None = None,
47
+ data: dict[str, str] | None = None,
48
+ ) -> Response:
49
+ """Sends a request tp the specified url, optionally with headers and data, within the browser context."""
50
+ if self._context is None:
51
+ raise ValueError("Browser context has not been created.")
52
+
53
+ params = cast(dict[str, str | float | bool], data)
54
+ api_response = await self._context.request.post(
55
+ url,
56
+ params=params,
57
+ )
58
+
59
+ try:
60
+ body = await api_response.json()
61
+ body = dict(body)
62
+ except Exception:
63
+ body = None
64
+
65
+ response = Response(
66
+ status_code=api_response.status,
67
+ body=body,
68
+ )
69
+
70
+ return response
@@ -0,0 +1,193 @@
1
+ import json
2
+ import os
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+ from typing import Protocol
6
+
7
+ import nbformat
8
+ import requests
9
+ from nbformat import NotebookNode
10
+
11
+
12
+ @dataclass
13
+ class Response:
14
+ """The expected response object used in this module."""
15
+
16
+ status_code: int
17
+ body: dict[str, Any] | None = None
18
+
19
+
20
+ class RequestContext(Protocol):
21
+ """Interface for the context in which a request is sent.
22
+
23
+ Implementing classes may handle authentication, session management, etc.
24
+ """
25
+
26
+ async def send_request(
27
+ self,
28
+ url: str,
29
+ headers: dict[str, str] | None = None,
30
+ data: dict[str, str] | None = None,
31
+ ) -> Response:
32
+ """Sends the request to the specified url, optionally with headers and data, and returns the response."""
33
+ ...
34
+
35
+
36
+ class BasicRequestContext:
37
+ """Basic, unauthenticated request context."""
38
+
39
+ def __init__(self) -> None:
40
+ """Initializes the basic request context."""
41
+ pass
42
+
43
+ async def send_request(
44
+ self,
45
+ url: str,
46
+ headers: dict[str, str] | None = None,
47
+ data: dict[str, str] | None = None,
48
+ ) -> Response:
49
+ """Sends the request to the specified url without any headers."""
50
+ response = requests.post(
51
+ url,
52
+ data=data,
53
+ )
54
+
55
+ try:
56
+ body = response.json()
57
+ body = dict(body)
58
+ except Exception:
59
+ body = None
60
+
61
+ return Response(
62
+ status_code=response.status_code,
63
+ body=body,
64
+ )
65
+
66
+
67
+ class NotebookSyncer:
68
+ """Utility class that helps syncing a notebook to a CMS (Content Management System).
69
+
70
+ The CMS must have an endpoint that satisfies the following constraints:
71
+
72
+ - It must accept a post request with fields *_id*, *displayName* and *markdown*.
73
+ - The response body must have a key *_id* whose value should be
74
+ a unique string identifier of the content.
75
+
76
+ Creating and updating content is handled in the following way:
77
+
78
+ - On the first request, an empty string is sent as *_id*.
79
+ - If the request succeeds, the value of *_id* (in the response) is stored in a JSON file
80
+ (created in the same directory as the notebook file).
81
+ - On subsequent requests, the stored value is sent as *_id*.
82
+ """
83
+
84
+ ID_KEY = "_id"
85
+
86
+ def __init__(self, post_url: str, request_context: RequestContext) -> None:
87
+ """Creates a notebook syncer instance that connects to the CMS through the post url."""
88
+ self._post_url: str = post_url
89
+ self._context: RequestContext = request_context
90
+ self._notebook_path: str = ""
91
+
92
+ @property
93
+ def notebook_path(self) -> str:
94
+ """Returns the path of the notebook file."""
95
+ return self._notebook_path
96
+
97
+ @notebook_path.setter
98
+ def notebook_path(self, notebook_path: str) -> None:
99
+ """Sets the path of the notebook file."""
100
+ notebook_path = os.path.abspath(notebook_path)
101
+ if not os.path.exists(notebook_path):
102
+ raise FileNotFoundError(
103
+ f"The notebook file '{notebook_path}' does not exist."
104
+ )
105
+ self._notebook_path = notebook_path
106
+
107
+ @property
108
+ def basename(self) -> str:
109
+ """The name of the notebook file without extension."""
110
+ basename = os.path.basename(self.notebook_path)
111
+ return os.path.splitext(basename)[0]
112
+
113
+ @property
114
+ def data_path(self) -> str:
115
+ """The absolute path of the file to store the data returned from the CMS."""
116
+ return os.path.splitext(self.notebook_path)[0] + ".json"
117
+
118
+ @property
119
+ def display_name(self) -> str:
120
+ """Generate a display name for the content."""
121
+ return self.basename.replace("_", " ").title()
122
+
123
+ def _save_content_id(self, content_id: str) -> None:
124
+ """Saves the content id to the data file."""
125
+ filename = self.data_path
126
+ with open(filename, "w") as file:
127
+ json.dump({self.ID_KEY: content_id}, file)
128
+
129
+ def _get_content_id(self) -> str:
130
+ """Returns the content id from the data file if it exists, otherwise an empty string."""
131
+ content_id = ""
132
+
133
+ filename = self.data_path
134
+ if os.path.exists(filename):
135
+ with open(filename) as file:
136
+ content_id = json.load(file)[self.ID_KEY]
137
+ return content_id
138
+
139
+ def _read_notebook(self) -> NotebookNode:
140
+ """Reads the notebook file and returns its content."""
141
+ return nbformat.read(self._notebook_path, as_version=nbformat.NO_CONVERT) # type: ignore
142
+
143
+ def _get_content_from_notebook(self) -> str:
144
+ """Extracts all markdown cells from the notebook and returns it as a merged string."""
145
+ notebook = self._read_notebook()
146
+
147
+ markdown_cells = []
148
+ for cell in notebook.cells:
149
+ if cell.cell_type == "markdown":
150
+ markdown_cells.append(cell.source)
151
+
152
+ markdown_content = "\n\n".join(markdown_cells)
153
+
154
+ return markdown_content
155
+
156
+ def _request_data(self) -> dict[str, str]:
157
+ """Prepares the request data to be sent to the CMS endpoint."""
158
+ return {
159
+ "_id": self._get_content_id(),
160
+ "displayName": self.display_name,
161
+ "markdown": self._get_content_from_notebook(),
162
+ }
163
+
164
+ async def _send_request(self) -> str:
165
+ """Sends the request to the CMS endpoint and returns the content id from the response."""
166
+ response = await self._context.send_request(
167
+ url=self._post_url, data=self._request_data()
168
+ )
169
+
170
+ if response.status_code != 200:
171
+ raise ValueError(
172
+ f"Request to the CMS failed with status code {response.status_code}."
173
+ )
174
+ if response.body is None:
175
+ raise ValueError("Response body from CMS could not be parsed.")
176
+ if self.ID_KEY not in response.body:
177
+ raise ValueError(
178
+ f"Response from the CMS does not contain the expected key '{self.ID_KEY}'."
179
+ )
180
+ result = response.body[self.ID_KEY]
181
+ if not isinstance(result, str):
182
+ raise ValueError(
183
+ f"Response from the CMS does not contain a valid content id: {result}"
184
+ )
185
+ content_id: str = result
186
+
187
+ return content_id
188
+
189
+ async def sync_content(self) -> str:
190
+ """Sends the notebook content to the CMS endpoint and stores the id from the response."""
191
+ content_id = await self._send_request()
192
+ self._save_content_id(content_id)
193
+ return content_id
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ssb-pubmd
3
- Version: 0.0.8
3
+ Version: 0.0.9
4
4
  Summary: SSB Pubmd
5
5
  License: MIT
6
6
  Author: Olav Landsverk
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Requires-Dist: click (>=8.0.1)
17
17
  Requires-Dist: nbformat (>=5.10.4,<6.0.0)
18
+ Requires-Dist: playwright (>=1.51.0,<2.0.0)
18
19
  Requires-Dist: requests (>=2.32.3,<3.0.0)
19
20
  Requires-Dist: types-requests (>=2.32.0.20250306,<3.0.0.0)
20
21
  Project-URL: Changelog, https://github.com/statisticsnorway/ssb-pubmd/releases
@@ -52,11 +53,12 @@ Description-Content-Type: text/markdown
52
53
 
53
54
  ## Features
54
55
 
55
- - TODO
56
+ - Helper library for syncing a Jupyter Notebook with a remote server (e.g. a CMS/publishing platform).
57
+ - Supports logging in through a popup browser window.
56
58
 
57
59
  ## Requirements
58
60
 
59
- - TODO
61
+ - This library uses [playwright](https://github.com/microsoft/playwright-python) to create a logged in browser context. This requires installing a [browser binary](https://playwright.dev/python/docs/browsers#install-browsers) and necessary [system dependencies](https://playwright.dev/python/docs/browsers#install-system-dependencies).
60
62
 
61
63
  ## Installation
62
64
 
@@ -0,0 +1,10 @@
1
+ ssb_pubmd/__init__.py,sha256=bEVVJ-sm5MmH5bXnzzyek_nSAvutyB0WNpBy835lE2g,179
2
+ ssb_pubmd/__main__.py,sha256=8D0yedPhnV_2L7nj0s0KUKxNQqPxoussMHGDNM-vyjg,209
3
+ ssb_pubmd/browser_context.py,sha256=V4KyL9Ell2xlW5LnDFQNa0p16TDCnOEXduYtH1UW4p0,2309
4
+ ssb_pubmd/notebook_syncer.py,sha256=w0SXHaT5Ubxzs1CfGHdbe6kW1vqHz7HgY90ZZK1Py0M,6537
5
+ ssb_pubmd/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ ssb_pubmd-0.0.9.dist-info/LICENSE,sha256=tF5bnYv09fgH5ph9t1EpH1MGrVOGTQeswL4dzVeZ_ak,1073
7
+ ssb_pubmd-0.0.9.dist-info/METADATA,sha256=ZIJBSX_aNAPyBjkKvdncKxl8BAox_QvYQY7nOJftjPI,4406
8
+ ssb_pubmd-0.0.9.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
9
+ ssb_pubmd-0.0.9.dist-info/entry_points.txt,sha256=o4oU99zbZNIBKGYWdgdEG6ev-62ZRWEJOe7EOjJaajk,53
10
+ ssb_pubmd-0.0.9.dist-info/RECORD,,
ssb_pubmd/exporter.py DELETED
@@ -1,140 +0,0 @@
1
- import json
2
- import os
3
-
4
- import nbformat
5
- import requests
6
- from nbformat import NotebookNode
7
-
8
-
9
- class _Exporter:
10
- """Helper class for exporting notebook content."""
11
-
12
- ID_KEY = "_id"
13
-
14
- def __init__(self, post_url: str) -> None:
15
- self.post_url: str = post_url
16
- self.notebook_folder: str = ""
17
- self.notebook_filename: str = ""
18
-
19
- @property
20
- def parent_folder(self) -> str:
21
- """The parent folder path, defaults to current working directory."""
22
- if self.notebook_folder:
23
- return self.notebook_folder
24
- else:
25
- return os.getcwd()
26
-
27
- @property
28
- def notebook_path(self) -> str:
29
- """The absolute path of the notebook file."""
30
- return os.path.join(self.parent_folder, self.notebook_filename)
31
-
32
- @property
33
- def basename(self) -> str:
34
- """The name of the notebook file without extension."""
35
- return os.path.splitext(self.notebook_filename)[0]
36
-
37
- @property
38
- def data_path(self) -> str:
39
- """The absolute path of the file to store data returned from the CMS."""
40
- return os.path.join(self.parent_folder, self.basename + ".json")
41
-
42
- @property
43
- def display_name(self) -> str:
44
- """Generate a display name for the content."""
45
- return self.basename.replace("_", " ").title()
46
-
47
- def _save_content_id(self, content_id: str) -> None:
48
- """Saves the content id to the data file."""
49
- filename = self.data_path
50
- with open(filename, "w") as file:
51
- json.dump({self.ID_KEY: content_id}, file)
52
-
53
- def _get_content_id(self) -> str:
54
- """Returns the content id from the data file if it exists, otherwise an empty string."""
55
- content_id = ""
56
-
57
- filename = self.data_path
58
- if os.path.exists(filename):
59
- with open(filename) as file:
60
- content_id = json.load(file)[self.ID_KEY]
61
- return content_id
62
-
63
- def _read_notebook(self) -> NotebookNode:
64
- """Reads the notebook file and returns its content."""
65
- return nbformat.read(self.notebook_path, as_version=nbformat.NO_CONVERT) # type: ignore
66
-
67
- def _get_content_from_notebook(self) -> str:
68
- """Extracts all markdown cells from the notebook and returns it as a merged string."""
69
- notebook = self._read_notebook()
70
-
71
- markdown_cells = []
72
- for cell in notebook.cells:
73
- if cell.cell_type == "markdown":
74
- markdown_cells.append(cell.source)
75
-
76
- markdown_content = "\n\n".join(markdown_cells)
77
-
78
- return markdown_content
79
-
80
- def _request_data(self) -> dict[str, str]:
81
- """Prepares the request data to be sent to the CMS post_url."""
82
- return {
83
- "_id": self._get_content_id(),
84
- "displayName": self.display_name,
85
- "markdown": self._get_content_from_notebook(),
86
- }
87
-
88
- def _send_request(self) -> str:
89
- """Sends the request to the CMS endpoint and returns the content id from the response."""
90
- response = requests.post(
91
- self.post_url,
92
- data=self._request_data(),
93
- )
94
- content_id = response.json()[self.ID_KEY]
95
- return content_id # type: ignore
96
-
97
- def set_notebook(self, notebook_filename: str, notebook_folder: str) -> None:
98
- """Sets the notebook filename and notebook folder."""
99
- self.notebook_filename = notebook_filename
100
- self.notebook_folder = notebook_folder
101
-
102
- def export(self) -> str:
103
- """Main method to export the notebook content to the CMS post_url."""
104
- content_id = self._send_request()
105
- self._save_content_id(content_id)
106
- return content_id
107
-
108
-
109
- def notebook_to_cms(
110
- post_url: str,
111
- notebook_filename: str,
112
- notebook_folder: str = "",
113
- ) -> str:
114
- r"""Sends all the markdown content of a notebook to a CMS endpoint.
115
-
116
- The CMS endpoint must satisfy two constraints:
117
-
118
- - It must accept a post request with fields *_id*, *displayName* and *markdown*.
119
- - The response body must have a key *_id* whose value should be
120
- a unique string identifier of the content.
121
-
122
- Creating and updating content is handled in the following way:
123
-
124
- - On the first request, an empty string is sent as *id*.
125
- - If the request succeeds, the value of *_id* (in the response) is stored in a JSON file
126
- (created in the same directory as the notebook file).
127
- - On subsequent requests, the stored value is sent as *id*.
128
-
129
- Args:
130
- post_url (str): The URL of the CMS endpoint.
131
- notebook_filename (str): The name of the notebook file, e.g. `"my_notebook.ipynb"`.
132
- notebook_folder (str): Sets a custom notebook folder (as absolute path) containing the notebook file.
133
- If not set, the current folder is used.
134
-
135
- Returns:
136
- str: The identifier of the content returned by the CMS endpoint.
137
- """
138
- exporter = _Exporter(post_url)
139
- exporter.set_notebook(notebook_filename, notebook_folder)
140
- return exporter.export()
@@ -1,9 +0,0 @@
1
- ssb_pubmd/__init__.py,sha256=2ivKDmqp2ZqJDgVfcUaA8A3-ahUJj-h3RMktSTzMIMw,87
2
- ssb_pubmd/__main__.py,sha256=8D0yedPhnV_2L7nj0s0KUKxNQqPxoussMHGDNM-vyjg,209
3
- ssb_pubmd/exporter.py,sha256=73l1rOMIXwV5yqTjifuuPfHlBsqngou3lmggVzMXuKA,4974
4
- ssb_pubmd/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- ssb_pubmd-0.0.8.dist-info/LICENSE,sha256=tF5bnYv09fgH5ph9t1EpH1MGrVOGTQeswL4dzVeZ_ak,1073
6
- ssb_pubmd-0.0.8.dist-info/METADATA,sha256=X3UeJFn9kFqOm6nJkpzf2Y2-JVJQDD1uixXHk8JW97g,3883
7
- ssb_pubmd-0.0.8.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
8
- ssb_pubmd-0.0.8.dist-info/entry_points.txt,sha256=o4oU99zbZNIBKGYWdgdEG6ev-62ZRWEJOe7EOjJaajk,53
9
- ssb_pubmd-0.0.8.dist-info/RECORD,,