ssb-pubmd 0.0.10__tar.gz → 0.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ssb-pubmd
3
- Version: 0.0.10
3
+ Version: 0.0.12
4
4
  Summary: SSB Pubmd
5
5
  License: MIT
6
6
  Author: Olav Landsverk
@@ -53,7 +53,6 @@ Description-Content-Type: text/markdown
53
53
 
54
54
  ## Features
55
55
 
56
- - Helper library for syncing a Jupyter Notebook with a remote server (e.g. a CMS/publishing platform).
57
56
  - Supports logging in through a popup browser window.
58
57
 
59
58
  ## Requirements
@@ -70,7 +69,28 @@ pip install ssb-pubmd
70
69
 
71
70
  ## Usage
72
71
 
73
- Please see the [Reference Guide] for details.
72
+ First set environment variables:
73
+
74
+ ```console
75
+ export PUBMD_LOGIN_URL=<https://www.example.com/login>
76
+ export PUBMD_POST_URL=<https://www.example.com/post>
77
+ ```
78
+
79
+ To log in, run:
80
+
81
+ ```console
82
+ pubmd login
83
+ ```
84
+
85
+ Close the popup browser window when you are logged in.
86
+
87
+ To synchronize markdown content to the CMS server, run:
88
+
89
+ ```console
90
+ pubmd sync <file>
91
+ ```
92
+
93
+ `<file>` should be an absolute or relative path, and the allowed extensions are `.ipynb` and `md`.
74
94
 
75
95
  ## Contributing
76
96
 
@@ -27,7 +27,6 @@
27
27
 
28
28
  ## Features
29
29
 
30
- - Helper library for syncing a Jupyter Notebook with a remote server (e.g. a CMS/publishing platform).
31
30
  - Supports logging in through a popup browser window.
32
31
 
33
32
  ## Requirements
@@ -44,7 +43,28 @@ pip install ssb-pubmd
44
43
 
45
44
  ## Usage
46
45
 
47
- Please see the [Reference Guide] for details.
46
+ First set environment variables:
47
+
48
+ ```console
49
+ export PUBMD_LOGIN_URL=<https://www.example.com/login>
50
+ export PUBMD_POST_URL=<https://www.example.com/post>
51
+ ```
52
+
53
+ To log in, run:
54
+
55
+ ```console
56
+ pubmd login
57
+ ```
58
+
59
+ Close the popup browser window when you are logged in.
60
+
61
+ To synchronize markdown content to the CMS server, run:
62
+
63
+ ```console
64
+ pubmd sync <file>
65
+ ```
66
+
67
+ `<file>` should be an absolute or relative path, and the allowed extensions are `.ipynb` and `md`.
48
68
 
49
69
  ## Contributing
50
70
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ssb-pubmd"
3
- version = "0.0.10"
3
+ version = "0.0.12"
4
4
  description = "SSB Pubmd"
5
5
  authors = ["Olav Landsverk <stud-oll@ssb.no>"]
6
6
  license = "MIT"
@@ -47,7 +47,7 @@ pytest-asyncio = "^0.26.0"
47
47
  pythonpath = ["src"]
48
48
 
49
49
  [tool.poetry.scripts]
50
- ssb-pubmd = "ssb_pubmd.__main__:main"
50
+ pubmd = "ssb_pubmd.__main__:cli"
51
51
 
52
52
  [tool.coverage.paths]
53
53
  source = ["src", "*/site-packages"]
@@ -0,0 +1,6 @@
1
+ """SSB Pubmd."""
2
+
3
+ from .browser_context import BrowserRequestContext as BrowserContext
4
+ from .markdown_syncer import MarkdownSyncer
5
+
6
+ __all__ = ["BrowserContext", "MarkdownSyncer"]
@@ -0,0 +1,52 @@
1
+ """Command-line interface."""
2
+
3
+ import os
4
+
5
+ import click
6
+
7
+ from ssb_pubmd.browser_context import BrowserRequestContext as RequestContext
8
+ from ssb_pubmd.markdown_syncer import MarkdownSyncer
9
+
10
+
11
+ @click.group()
12
+ def cli() -> None:
13
+ """Command-line interface for the ssb_pubmd package."""
14
+ pass
15
+
16
+
17
+ @click.command()
18
+ def login() -> None:
19
+ """Login to the server."""
20
+ login_url = os.getenv("PUBMD_LOGIN_URL", "")
21
+ request_context = RequestContext()
22
+ print(login_url)
23
+ storage_state_file, storage_state = request_context.create_new(login_url)
24
+ click.echo(
25
+ f"The following browser context object is now stored in {storage_state_file}:"
26
+ )
27
+ click.echo(storage_state)
28
+
29
+
30
+ @click.command()
31
+ @click.argument("content_file_path", type=click.Path())
32
+ def sync(content_file_path: str) -> None:
33
+ """Sync the content."""
34
+ post_url = os.getenv("PUBMD_POST_URL", "")
35
+ request_context = RequestContext()
36
+ request_context.recreate_from_file()
37
+
38
+ syncer = MarkdownSyncer(post_url=post_url, request_context=request_context)
39
+ syncer.content_file_path = content_file_path
40
+
41
+ content_id = syncer.sync_content()
42
+
43
+ click.echo(
44
+ f"File '{click.format_filename(content_file_path)}' synced to CMS with content ID: {content_id}"
45
+ )
46
+
47
+
48
+ cli.add_command(login)
49
+ cli.add_command(sync)
50
+
51
+ if __name__ == "__main__":
52
+ cli() # pragma: no cover
@@ -0,0 +1,71 @@
1
+ from playwright.sync_api import BrowserContext
2
+ from playwright.sync_api import StorageState
3
+ from playwright.sync_api import sync_playwright
4
+
5
+ from .markdown_syncer import Response
6
+
7
+ BROWSER_CONTEXT_FILE = "browser_context.json"
8
+
9
+
10
+ class BrowserRequestContext:
11
+ """This class is used to create a logged in browser context from which to send requests."""
12
+
13
+ def __init__(self) -> None:
14
+ """Initializes an empty browser context object."""
15
+ self._storage_state_path: str = BROWSER_CONTEXT_FILE
16
+ self._context: BrowserContext | None = None
17
+
18
+ def create_new(self, login_url: str) -> tuple[str, StorageState]:
19
+ """Creates a browser context by opening a login page and waiting for it to be closed by user.
20
+
21
+ This function also saves the browser context to a file for later use.
22
+ """
23
+ playwright = sync_playwright().start()
24
+ browser = playwright.chromium.launch(headless=False)
25
+
26
+ self._context = browser.new_context()
27
+ login_page = self._context.new_page()
28
+
29
+ login_page.goto(login_url)
30
+ login_page.wait_for_event("close", timeout=0)
31
+
32
+ storage_state = self._context.storage_state(path=self._storage_state_path)
33
+
34
+ return self._storage_state_path, storage_state
35
+
36
+ def recreate_from_file(self) -> BrowserContext:
37
+ """Recreates a browser context object from a file."""
38
+ playwright = sync_playwright().start()
39
+ browser = playwright.chromium.launch(headless=False)
40
+
41
+ self._context = browser.new_context(storage_state=self._storage_state_path)
42
+
43
+ return self._context
44
+
45
+ def send_request(
46
+ self,
47
+ url: str,
48
+ headers: dict[str, str] | None = None,
49
+ data: dict[str, str] | None = None,
50
+ ) -> Response:
51
+ """Sends a request to the specified url, optionally with headers and data, within the browser context."""
52
+ if self._context is None:
53
+ raise ValueError("Browser context has not been created.")
54
+
55
+ api_response = self._context.request.post(
56
+ url,
57
+ params=data,
58
+ )
59
+
60
+ try:
61
+ body = api_response.json()
62
+ body = dict(body)
63
+ except Exception:
64
+ body = None
65
+
66
+ response = Response(
67
+ status_code=api_response.status,
68
+ body=body,
69
+ )
70
+
71
+ return response
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import os
3
3
  from dataclasses import dataclass
4
+ from enum import Enum
4
5
  from typing import Any
5
6
  from typing import Protocol
6
7
 
@@ -23,7 +24,7 @@ class RequestContext(Protocol):
23
24
  Implementing classes may handle authentication, sessions, etc.
24
25
  """
25
26
 
26
- async def send_request(
27
+ def send_request(
27
28
  self,
28
29
  url: str,
29
30
  headers: dict[str, str] | None = None,
@@ -40,7 +41,7 @@ class BasicRequestContext:
40
41
  """Initializes the basic request context."""
41
42
  pass
42
43
 
43
- async def send_request(
44
+ def send_request(
44
45
  self,
45
46
  url: str,
46
47
  headers: dict[str, str] | None = None,
@@ -64,8 +65,15 @@ class BasicRequestContext:
64
65
  )
65
66
 
66
67
 
67
- class NotebookSyncer:
68
- """This class syncs a notebook to a CMS (Content Management System).
68
+ class FileType(Enum):
69
+ """File extensions for markdown and notebook files."""
70
+
71
+ MARKDOWN = ".md"
72
+ NOTEBOOK = ".ipynb"
73
+
74
+
75
+ class MarkdownSyncer:
76
+ """This class syncs a markdown/notebook file to a CMS (Content Management System).
69
77
 
70
78
  The CMS must have an endpoint that satisfies the following constraints:
71
79
 
@@ -77,43 +85,54 @@ class NotebookSyncer:
77
85
 
78
86
  - On the first request, an empty string is sent as *_id*.
79
87
  - If the request succeeds, the value of *_id* (in the response) is stored in a JSON file
80
- (created in the same directory as the notebook file).
88
+ (created in the same directory as the markdown/notebook file).
81
89
  - On subsequent requests, the stored value is sent as *_id*.
82
90
  """
83
91
 
84
92
  ID_KEY = "_id"
85
93
 
86
94
  def __init__(self, post_url: str, request_context: RequestContext) -> None:
87
- """Creates a notebook syncer instance that connects to the CMS through the post url."""
95
+ """Creates a markdown syncer instance that connects to the CMS through the post url."""
88
96
  self._post_url: str = post_url
89
97
  self._context: RequestContext = request_context
90
- self._notebook_path: str = ""
98
+ self._content_file_path: str = ""
99
+ self._content_file_type: FileType = FileType.MARKDOWN
91
100
 
92
101
  @property
93
- def notebook_path(self) -> str:
94
- """Returns the path of the notebook file."""
95
- return self._notebook_path
96
-
97
- @notebook_path.setter
98
- def notebook_path(self, notebook_path: str) -> None:
99
- """Sets the path of the notebook file."""
100
- notebook_path = os.path.abspath(notebook_path)
101
- if not os.path.exists(notebook_path):
102
- raise FileNotFoundError(
103
- f"The notebook file '{notebook_path}' does not exist."
102
+ def content_file_path(self) -> str:
103
+ """Returns the path of the markdown/notebook file."""
104
+ return self._content_file_path
105
+
106
+ @content_file_path.setter
107
+ def content_file_path(self, content_file_path: str) -> None:
108
+ """Sets the path of the markdown/notebook file."""
109
+ content_file_path = os.path.abspath(content_file_path)
110
+
111
+ if not os.path.exists(content_file_path):
112
+ raise FileNotFoundError(f"The file '{content_file_path}' does not exist.")
113
+
114
+ ext = os.path.splitext(content_file_path)[1]
115
+ for e in FileType:
116
+ if ext == e.value:
117
+ self._content_file_type = e
118
+ break
119
+ else:
120
+ raise ValueError(
121
+ f"The file '{content_file_path}' is not a markdown or notebook file."
104
122
  )
105
- self._notebook_path = notebook_path
123
+
124
+ self._content_file_path = content_file_path
106
125
 
107
126
  @property
108
127
  def basename(self) -> str:
109
- """The name of the notebook file without extension."""
110
- basename = os.path.basename(self.notebook_path)
128
+ """The name of the markdown/notebook file without extension."""
129
+ basename = os.path.basename(self.content_file_path)
111
130
  return os.path.splitext(basename)[0]
112
131
 
113
132
  @property
114
133
  def data_path(self) -> str:
115
134
  """The absolute path of the file to store the data returned from the CMS."""
116
- return os.path.splitext(self.notebook_path)[0] + ".json"
135
+ return os.path.splitext(self.content_file_path)[0] + ".json"
117
136
 
118
137
  @property
119
138
  def display_name(self) -> str:
@@ -138,9 +157,9 @@ class NotebookSyncer:
138
157
 
139
158
  def _read_notebook(self) -> NotebookNode:
140
159
  """Reads the notebook file and returns its content."""
141
- return nbformat.read(self._notebook_path, as_version=nbformat.NO_CONVERT) # type: ignore
160
+ return nbformat.read(self._content_file_path, as_version=nbformat.NO_CONVERT) # type: ignore
142
161
 
143
- def _get_content_from_notebook(self) -> str:
162
+ def _get_content_from_notebook_file(self) -> str:
144
163
  """Extracts all markdown cells from the notebook and returns it as a merged string."""
145
164
  notebook = self._read_notebook()
146
165
 
@@ -153,17 +172,32 @@ class NotebookSyncer:
153
172
 
154
173
  return markdown_content
155
174
 
175
+ def _get_content_from_markdown_file(self) -> str:
176
+ """Returns the content of a markdown file."""
177
+ with open(self._content_file_path) as file:
178
+ markdown_content = file.read()
179
+ return markdown_content
180
+
181
+ def _get_content(self) -> str:
182
+ content = ""
183
+ match self._content_file_type:
184
+ case FileType.MARKDOWN:
185
+ content = self._get_content_from_markdown_file()
186
+ case FileType.NOTEBOOK:
187
+ content = self._get_content_from_notebook_file()
188
+ return content
189
+
156
190
  def _request_data(self) -> dict[str, str]:
157
191
  """Prepares the request data to be sent to the CMS endpoint."""
158
192
  return {
159
193
  "_id": self._get_content_id(),
160
194
  "displayName": self.display_name,
161
- "markdown": self._get_content_from_notebook(),
195
+ "markdown": self._get_content(),
162
196
  }
163
197
 
164
- async def _send_request(self) -> str:
198
+ def _send_request(self) -> str:
165
199
  """Sends the request to the CMS endpoint and returns the content id from the response."""
166
- response = await self._context.send_request(
200
+ response = self._context.send_request(
167
201
  url=self._post_url, data=self._request_data()
168
202
  )
169
203
 
@@ -186,8 +220,8 @@ class NotebookSyncer:
186
220
 
187
221
  return content_id
188
222
 
189
- async def sync_content(self) -> str:
190
- """Sends the notebook content to the CMS endpoint and stores the id from the response."""
191
- content_id = await self._send_request()
223
+ def sync_content(self) -> str:
224
+ """Sends the markdown content to the CMS endpoint and stores the id from the response."""
225
+ content_id = self._send_request()
192
226
  self._save_content_id(content_id)
193
227
  return content_id
@@ -1,6 +0,0 @@
1
- """SSB Pubmd."""
2
-
3
- from .browser_context import BrowserRequestContext as BrowserContext
4
- from .notebook_syncer import NotebookSyncer
5
-
6
- __all__ = ["BrowserContext", "NotebookSyncer"]
@@ -1,13 +0,0 @@
1
- """Command-line interface."""
2
-
3
- import click
4
-
5
-
6
- @click.command()
7
- @click.version_option()
8
- def main() -> None:
9
- """SSB Pubmd."""
10
-
11
-
12
- if __name__ == "__main__":
13
- main(prog_name="ssb-pubmd") # pragma: no cover
@@ -1,73 +0,0 @@
1
- from typing import cast
2
-
3
- from playwright.async_api import BrowserContext
4
- from playwright.async_api import async_playwright
5
-
6
- from .notebook_syncer import Response
7
-
8
-
9
- class BrowserRequestContext:
10
- """This class is used to create a logged in browser context from which to send requests."""
11
-
12
- def __init__(self) -> None:
13
- """Initializes an empty browser context object."""
14
- self._storage_state_path: str = "browser_context.json"
15
- self._context: BrowserContext | None = None
16
-
17
- async def create_new(self, login_url: str) -> BrowserContext:
18
- """Creates a browser context by opening a login page and waiting for it to be closed by user.
19
-
20
- This function also saves the browser context to a file for later use.
21
- """
22
- playwright = await async_playwright().start()
23
- browser = await playwright.chromium.launch(headless=False)
24
-
25
- self._context = await browser.new_context()
26
- login_page = await self._context.new_page()
27
-
28
- await login_page.goto(login_url)
29
- await login_page.wait_for_event("close", timeout=0)
30
-
31
- await self._context.storage_state(path=self._storage_state_path)
32
-
33
- return self._context
34
-
35
- async def recreate_from_file(self) -> BrowserContext:
36
- """Recreates a browser context object from a file."""
37
- playwright = await async_playwright().start()
38
- browser = await playwright.chromium.launch(headless=False)
39
-
40
- self._context = await browser.new_context(
41
- storage_state=self._storage_state_path
42
- )
43
-
44
- return self._context
45
-
46
- async def send_request(
47
- self,
48
- url: str,
49
- headers: dict[str, str] | None = None,
50
- data: dict[str, str] | None = None,
51
- ) -> Response:
52
- """Sends a request to the specified url, optionally with headers and data, within the browser context."""
53
- if self._context is None:
54
- raise ValueError("Browser context has not been created.")
55
-
56
- params = cast(dict[str, str | float | bool], data)
57
- api_response = await self._context.request.post(
58
- url,
59
- params=params,
60
- )
61
-
62
- try:
63
- body = await api_response.json()
64
- body = dict(body)
65
- except Exception:
66
- body = None
67
-
68
- response = Response(
69
- status_code=api_response.status,
70
- body=body,
71
- )
72
-
73
- return response
File without changes