ssb-pubmd 0.0.7__tar.gz → 0.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ssb_pubmd-0.0.7 → ssb_pubmd-0.0.8}/PKG-INFO +1 -1
- {ssb_pubmd-0.0.7 → ssb_pubmd-0.0.8}/pyproject.toml +2 -1
- ssb_pubmd-0.0.8/src/ssb_pubmd/exporter.py +140 -0
- ssb_pubmd-0.0.7/src/ssb_pubmd/exporter.py +0 -130
- {ssb_pubmd-0.0.7 → ssb_pubmd-0.0.8}/LICENSE +0 -0
- {ssb_pubmd-0.0.7 → ssb_pubmd-0.0.8}/README.md +0 -0
- {ssb_pubmd-0.0.7 → ssb_pubmd-0.0.8}/src/ssb_pubmd/__init__.py +0 -0
- {ssb_pubmd-0.0.7 → ssb_pubmd-0.0.8}/src/ssb_pubmd/__main__.py +0 -0
- {ssb_pubmd-0.0.7 → ssb_pubmd-0.0.8}/src/ssb_pubmd/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "ssb-pubmd"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.8"
|
|
4
4
|
description = "SSB Pubmd"
|
|
5
5
|
authors = ["Olav Landsverk <stud-oll@ssb.no>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -39,6 +39,7 @@ typeguard = ">=2.13.3"
|
|
|
39
39
|
xdoctest = { extras = ["colors"], version = ">=0.15.10" }
|
|
40
40
|
myst-parser = { version = ">=0.16.1" }
|
|
41
41
|
uuid = "^1.30"
|
|
42
|
+
ipykernel = "^6.29.5"
|
|
42
43
|
|
|
43
44
|
[tool.pytest.ini_options]
|
|
44
45
|
pythonpath = ["src"]
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
import nbformat
|
|
5
|
+
import requests
|
|
6
|
+
from nbformat import NotebookNode
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class _Exporter:
|
|
10
|
+
"""Helper class for exporting notebook content."""
|
|
11
|
+
|
|
12
|
+
ID_KEY = "_id"
|
|
13
|
+
|
|
14
|
+
def __init__(self, post_url: str) -> None:
|
|
15
|
+
self.post_url: str = post_url
|
|
16
|
+
self.notebook_folder: str = ""
|
|
17
|
+
self.notebook_filename: str = ""
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def parent_folder(self) -> str:
|
|
21
|
+
"""The parent folder path, defaults to current working directory."""
|
|
22
|
+
if self.notebook_folder:
|
|
23
|
+
return self.notebook_folder
|
|
24
|
+
else:
|
|
25
|
+
return os.getcwd()
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def notebook_path(self) -> str:
|
|
29
|
+
"""The absolute path of the notebook file."""
|
|
30
|
+
return os.path.join(self.parent_folder, self.notebook_filename)
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def basename(self) -> str:
|
|
34
|
+
"""The name of the notebook file without extension."""
|
|
35
|
+
return os.path.splitext(self.notebook_filename)[0]
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def data_path(self) -> str:
|
|
39
|
+
"""The absolute path of the file to store data returned from the CMS."""
|
|
40
|
+
return os.path.join(self.parent_folder, self.basename + ".json")
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def display_name(self) -> str:
|
|
44
|
+
"""Generate a display name for the content."""
|
|
45
|
+
return self.basename.replace("_", " ").title()
|
|
46
|
+
|
|
47
|
+
def _save_content_id(self, content_id: str) -> None:
|
|
48
|
+
"""Saves the content id to the data file."""
|
|
49
|
+
filename = self.data_path
|
|
50
|
+
with open(filename, "w") as file:
|
|
51
|
+
json.dump({self.ID_KEY: content_id}, file)
|
|
52
|
+
|
|
53
|
+
def _get_content_id(self) -> str:
|
|
54
|
+
"""Returns the content id from the data file if it exists, otherwise an empty string."""
|
|
55
|
+
content_id = ""
|
|
56
|
+
|
|
57
|
+
filename = self.data_path
|
|
58
|
+
if os.path.exists(filename):
|
|
59
|
+
with open(filename) as file:
|
|
60
|
+
content_id = json.load(file)[self.ID_KEY]
|
|
61
|
+
return content_id
|
|
62
|
+
|
|
63
|
+
def _read_notebook(self) -> NotebookNode:
|
|
64
|
+
"""Reads the notebook file and returns its content."""
|
|
65
|
+
return nbformat.read(self.notebook_path, as_version=nbformat.NO_CONVERT) # type: ignore
|
|
66
|
+
|
|
67
|
+
def _get_content_from_notebook(self) -> str:
|
|
68
|
+
"""Extracts all markdown cells from the notebook and returns it as a merged string."""
|
|
69
|
+
notebook = self._read_notebook()
|
|
70
|
+
|
|
71
|
+
markdown_cells = []
|
|
72
|
+
for cell in notebook.cells:
|
|
73
|
+
if cell.cell_type == "markdown":
|
|
74
|
+
markdown_cells.append(cell.source)
|
|
75
|
+
|
|
76
|
+
markdown_content = "\n\n".join(markdown_cells)
|
|
77
|
+
|
|
78
|
+
return markdown_content
|
|
79
|
+
|
|
80
|
+
def _request_data(self) -> dict[str, str]:
|
|
81
|
+
"""Prepares the request data to be sent to the CMS post_url."""
|
|
82
|
+
return {
|
|
83
|
+
"_id": self._get_content_id(),
|
|
84
|
+
"displayName": self.display_name,
|
|
85
|
+
"markdown": self._get_content_from_notebook(),
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
def _send_request(self) -> str:
|
|
89
|
+
"""Sends the request to the CMS endpoint and returns the content id from the response."""
|
|
90
|
+
response = requests.post(
|
|
91
|
+
self.post_url,
|
|
92
|
+
data=self._request_data(),
|
|
93
|
+
)
|
|
94
|
+
content_id = response.json()[self.ID_KEY]
|
|
95
|
+
return content_id # type: ignore
|
|
96
|
+
|
|
97
|
+
def set_notebook(self, notebook_filename: str, notebook_folder: str) -> None:
|
|
98
|
+
"""Sets the notebook filename and notebook folder."""
|
|
99
|
+
self.notebook_filename = notebook_filename
|
|
100
|
+
self.notebook_folder = notebook_folder
|
|
101
|
+
|
|
102
|
+
def export(self) -> str:
|
|
103
|
+
"""Main method to export the notebook content to the CMS post_url."""
|
|
104
|
+
content_id = self._send_request()
|
|
105
|
+
self._save_content_id(content_id)
|
|
106
|
+
return content_id
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def notebook_to_cms(
|
|
110
|
+
post_url: str,
|
|
111
|
+
notebook_filename: str,
|
|
112
|
+
notebook_folder: str = "",
|
|
113
|
+
) -> str:
|
|
114
|
+
r"""Sends all the markdown content of a notebook to a CMS endpoint.
|
|
115
|
+
|
|
116
|
+
The CMS endpoint must satisfy two constraints:
|
|
117
|
+
|
|
118
|
+
- It must accept a post request with fields *_id*, *displayName* and *markdown*.
|
|
119
|
+
- The response body must have a key *_id* whose value should be
|
|
120
|
+
a unique string identifier of the content.
|
|
121
|
+
|
|
122
|
+
Creating and updating content is handled in the following way:
|
|
123
|
+
|
|
124
|
+
- On the first request, an empty string is sent as *id*.
|
|
125
|
+
- If the request succeeds, the value of *_id* (in the response) is stored in a JSON file
|
|
126
|
+
(created in the same directory as the notebook file).
|
|
127
|
+
- On subsequent requests, the stored value is sent as *id*.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
post_url (str): The URL of the CMS endpoint.
|
|
131
|
+
notebook_filename (str): The name of the notebook file, e.g. `"my_notebook.ipynb"`.
|
|
132
|
+
notebook_folder (str): Sets a custom notebook folder (as absolute path) containing the notebook file.
|
|
133
|
+
If not set, the current folder is used.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
str: The identifier of the content returned by the CMS endpoint.
|
|
137
|
+
"""
|
|
138
|
+
exporter = _Exporter(post_url)
|
|
139
|
+
exporter.set_notebook(notebook_filename, notebook_folder)
|
|
140
|
+
return exporter.export()
|
|
@@ -1,130 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import os
|
|
3
|
-
|
|
4
|
-
import nbformat
|
|
5
|
-
import requests
|
|
6
|
-
from nbformat import NotebookNode
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class _Exporter:
|
|
10
|
-
"""Helper class for exporting markdown-based content."""
|
|
11
|
-
|
|
12
|
-
def __init__(self, endpoint: str) -> None:
|
|
13
|
-
self.endpoint: str = endpoint
|
|
14
|
-
self.notebook_folder: str = ""
|
|
15
|
-
self.notebook_filename: str = ""
|
|
16
|
-
self.content_id: str = ""
|
|
17
|
-
|
|
18
|
-
def _set_working_directory(self) -> None:
|
|
19
|
-
"""Set the working directory to the notebook folder."""
|
|
20
|
-
if self.notebook_folder:
|
|
21
|
-
os.chdir(self.notebook_folder)
|
|
22
|
-
else:
|
|
23
|
-
os.chdir(os.getcwd())
|
|
24
|
-
|
|
25
|
-
def _get_basename(self) -> str:
|
|
26
|
-
"""Returns the name of the notebook file without extension."""
|
|
27
|
-
return os.path.splitext(self.notebook_filename)[0]
|
|
28
|
-
|
|
29
|
-
def _get_display_name(self) -> str:
|
|
30
|
-
"""Generate a display name to send to the CMS endpoint."""
|
|
31
|
-
return self._get_basename().replace("_", " ").title()
|
|
32
|
-
|
|
33
|
-
def _get_data_filename(self) -> str:
|
|
34
|
-
"""Returns the data filename to store the content id returned from the CMS."""
|
|
35
|
-
return self._get_basename() + ".json"
|
|
36
|
-
|
|
37
|
-
def _get_content_id(self) -> str:
|
|
38
|
-
"""Get the content id from the JSON file if it exists."""
|
|
39
|
-
content_id = ""
|
|
40
|
-
data_filename = self._get_data_filename()
|
|
41
|
-
if os.path.exists(data_filename):
|
|
42
|
-
with open(data_filename) as file:
|
|
43
|
-
content_id = json.load(file)["_id"]
|
|
44
|
-
return content_id
|
|
45
|
-
|
|
46
|
-
def _read_notebook(self) -> NotebookNode:
|
|
47
|
-
"""Reads the notebook file and returns its content."""
|
|
48
|
-
return nbformat.read(self.notebook_filename, as_version=nbformat.NO_CONVERT) # type: ignore
|
|
49
|
-
|
|
50
|
-
def _get_content_from_notebook(self) -> str:
|
|
51
|
-
"""Extracts all markdown cells from the notebook and returns it as a merged string."""
|
|
52
|
-
notebook = self._read_notebook()
|
|
53
|
-
markdown_content = ""
|
|
54
|
-
for cell in notebook.cells:
|
|
55
|
-
if cell.cell_type == "markdown":
|
|
56
|
-
markdown_content += cell.source + "\n\n"
|
|
57
|
-
return markdown_content
|
|
58
|
-
|
|
59
|
-
def _prepare_request_data(self) -> dict[str, str]:
|
|
60
|
-
"""Prepares the request data to be sent to the CMS endpoint."""
|
|
61
|
-
return {
|
|
62
|
-
"_id": self._get_content_id(),
|
|
63
|
-
"displayName": self._get_display_name(),
|
|
64
|
-
"markdown": self._get_content_from_notebook(),
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
def _prepare_headers(self) -> dict[str, str]:
|
|
68
|
-
"""Prepares the headers for the request."""
|
|
69
|
-
return {"Content-Type": "application/json"}
|
|
70
|
-
|
|
71
|
-
def _send_request(self) -> None:
|
|
72
|
-
"""Sends the request to the CMS endpoint and returns the content id."""
|
|
73
|
-
data = self._prepare_request_data()
|
|
74
|
-
response = requests.post(
|
|
75
|
-
self.endpoint,
|
|
76
|
-
data=json.dumps(data),
|
|
77
|
-
headers=self._prepare_headers(),
|
|
78
|
-
)
|
|
79
|
-
self.content_id = response.json()["_id"]
|
|
80
|
-
|
|
81
|
-
def _save_content_id(self) -> None:
|
|
82
|
-
"""Saves the content id to a JSON file."""
|
|
83
|
-
with open(self._get_data_filename(), "w") as file:
|
|
84
|
-
json.dump({"_id": self.content_id}, file)
|
|
85
|
-
|
|
86
|
-
def set_notebook(self, notebook_filename: str, notebook_folder: str) -> None:
|
|
87
|
-
"""Uses the notebook_filename and working directory."""
|
|
88
|
-
self.notebook_filename = notebook_filename
|
|
89
|
-
self.notebook_folder = notebook_folder
|
|
90
|
-
self._set_working_directory()
|
|
91
|
-
|
|
92
|
-
def export(self) -> str:
|
|
93
|
-
"""Main method to export the notebook content to the CMS endpoint."""
|
|
94
|
-
self._send_request()
|
|
95
|
-
self._save_content_id()
|
|
96
|
-
return self.content_id
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def notebook_to_cms(
|
|
100
|
-
notebook_filename: str,
|
|
101
|
-
endpoint: str,
|
|
102
|
-
notebook_folder: str = "",
|
|
103
|
-
) -> str:
|
|
104
|
-
r"""Sends all the markdown content of a notebook to a CMS endpoint.
|
|
105
|
-
|
|
106
|
-
The CMS endpoint must satisfy two constraints:
|
|
107
|
-
|
|
108
|
-
- It must accept a post request with fields *id*, *displayName* and *markdown*.
|
|
109
|
-
- The response body must have a key *_id* whose value should be
|
|
110
|
-
a unique string identifier of the content.
|
|
111
|
-
|
|
112
|
-
Creating and updating content is handled in the following way:
|
|
113
|
-
|
|
114
|
-
- On the first request, an empty string is sent as *id*.
|
|
115
|
-
- If the request succeeds, the value of *_id* (in the response) is stored in a JSON file
|
|
116
|
-
(created in the same directory as the notebook file).
|
|
117
|
-
- On subsequent requests, the stored value is sent as *id*.
|
|
118
|
-
|
|
119
|
-
Args:
|
|
120
|
-
notebook_filename (str): The name of the notebook file, e.g. `"my_notebook.ipynb"`.
|
|
121
|
-
endpoint (str): The URL of the CMS endpoint.
|
|
122
|
-
notebook_folder (str): Sets a custom notebook folder (as absolute path) containing the notebook file.
|
|
123
|
-
If not set, the current folder is used.
|
|
124
|
-
|
|
125
|
-
Returns:
|
|
126
|
-
str: The identifier of the content returned by the CMS endpoint.
|
|
127
|
-
"""
|
|
128
|
-
exporter = _Exporter(endpoint)
|
|
129
|
-
exporter.set_notebook(notebook_filename, notebook_folder)
|
|
130
|
-
return exporter.export()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|