txpyfind 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ # This workflow will upload a Python Package using Twine when a release is created
2
+ # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3
+
4
+ name: Upload Python Package
5
+
6
+ on:
7
+ release:
8
+ types: [created]
9
+ workflow_dispatch:
10
+
11
+ jobs:
12
+ deploy:
13
+
14
+ runs-on: ubuntu-latest
15
+
16
+ steps:
17
+ - uses: actions/checkout@v2
18
+ - name: Set up Python
19
+ uses: actions/setup-python@v2
20
+ with:
21
+ python-version: '3.8'
22
+ - name: Install dependencies
23
+ run: |
24
+ python -m pip install --upgrade pip
25
+ pip install setuptools wheel build twine
26
+ pip install -r requirements.txt
27
+ - name: Build and publish
28
+ env:
29
+ TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
30
+ TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
31
+ run: |
32
+ python -m build .
33
+ twine upload dist/*
@@ -0,0 +1,38 @@
1
+ # This workflow will install Python dependencies, run tests and lint with a single version of Python
2
+ # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3
+
4
+ name: Tests
5
+
6
+ on: [push, pull_request]
7
+
8
+ jobs:
9
+ build:
10
+
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
15
+
16
+ steps:
17
+ - uses: actions/checkout@v3
18
+ - name: Set up Python
19
+ uses: actions/setup-python@v3
20
+ with:
21
+ python-version: ${{ matrix.python-version }}
22
+ - name: Lint with flake8
23
+ run: |
24
+ pip install flake8
25
+ # stop the build if there are Python syntax errors or undefined names
26
+ flake8 txpyfind --count --select=E9,F63,F7,F82 --show-source --statistics
27
+ # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
28
+ flake8 txpyfind --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
29
+ - name: Install package
30
+ run: pip install .
31
+ - name: Install dependencies for test
32
+ run: pip install pytest coverage
33
+ - name: Run test and coverage
34
+ run: |
35
+ coverage erase
36
+ coverage run -m pytest test
37
+ coverage report
38
+ coverage html
@@ -0,0 +1,151 @@
1
+ # generated by setuptools-scm
2
+ txpyfind/_version.py
3
+
4
+ # Development files / directories
5
+ _/
6
+ dev/
7
+ *-dev
8
+ *-dev.*
9
+ dev-*
10
+ dev.*
11
+
12
+ # Editor
13
+ *.swp
14
+ *.swo
15
+ *~
16
+ .vscode
17
+
18
+ # Data
19
+ *.json
20
+
21
+ # <https://github.com/github/gitignore/blob/master/Python.gitignore>
22
+
23
+ # Byte-compiled / optimized / DLL files
24
+ __pycache__/
25
+ *.py[cod]
26
+ *$py.class
27
+
28
+ # C extensions
29
+ *.so
30
+
31
+ # Distribution / packaging
32
+ .Python
33
+ build/
34
+ develop-eggs/
35
+ dist/
36
+ downloads/
37
+ eggs/
38
+ .eggs/
39
+ lib/
40
+ lib64/
41
+ parts/
42
+ sdist/
43
+ var/
44
+ wheels/
45
+ pip-wheel-metadata/
46
+ share/python-wheels/
47
+ *.egg-info/
48
+ .installed.cfg
49
+ *.egg
50
+ MANIFEST
51
+
52
+ # PyInstaller
53
+ # Usually these files are written by a python script from a template
54
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
55
+ *.manifest
56
+ *.spec
57
+
58
+ # Installer logs
59
+ pip-log.txt
60
+ pip-delete-this-directory.txt
61
+
62
+ # Unit test / coverage reports
63
+ htmlcov/
64
+ .tox/
65
+ .nox/
66
+ .coverage
67
+ .coverage.*
68
+ .cache
69
+ nosetests.xml
70
+ coverage.xml
71
+ *.cover
72
+ *.py,cover
73
+ .hypothesis/
74
+ .pytest_cache/
75
+
76
+ # Translations
77
+ *.mo
78
+ *.pot
79
+
80
+ # Django stuff:
81
+ *.log
82
+ local_settings.py
83
+ db.sqlite3
84
+ db.sqlite3-journal
85
+
86
+ # Flask stuff:
87
+ instance/
88
+ .webassets-cache
89
+
90
+ # Scrapy stuff:
91
+ .scrapy
92
+
93
+ # Sphinx documentation
94
+ docs/_build/
95
+
96
+ # PyBuilder
97
+ target/
98
+
99
+ # Jupyter Notebook
100
+ .ipynb_checkpoints
101
+
102
+ # IPython
103
+ profile_default/
104
+ ipython_config.py
105
+
106
+ # pyenv
107
+ .python-version
108
+
109
+ # pipenv
110
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
111
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
112
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
113
+ # install all needed dependencies.
114
+ #Pipfile.lock
115
+
116
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
117
+ __pypackages__/
118
+
119
+ # Celery stuff
120
+ celerybeat-schedule
121
+ celerybeat.pid
122
+
123
+ # SageMath parsed files
124
+ *.sage.py
125
+
126
+ # Environments
127
+ .env
128
+ .venv
129
+ env/
130
+ venv/
131
+ ENV/
132
+ env.bak/
133
+ venv.bak/
134
+
135
+ # Spyder project settings
136
+ .spyderproject
137
+ .spyproject
138
+
139
+ # Rope project settings
140
+ .ropeproject
141
+
142
+ # mkdocs documentation
143
+ /site
144
+
145
+ # mypy
146
+ .mypy_cache/
147
+ .dmypy.json
148
+ dmypy.json
149
+
150
+ # Pyre type checker
151
+ .pyre/
@@ -0,0 +1,5 @@
1
+ 0.1.0 [2025-05-31]
2
+ - first published release
3
+
4
+ 0.0.0 [2022-08-20]
5
+ - dummy release
txpyfind-0.1.0/LICENSE ADDED
File without changes
@@ -0,0 +1,69 @@
1
+ Metadata-Version: 2.1
2
+ Name: txpyfind
3
+ Version: 0.1.0
4
+ Summary: enables Pythonic access to data exports from TYPO3-find
5
+ Author: Donatus Herre
6
+ Author-email: donatus.herre@slub-dresden.de
7
+ License: GPLv3
8
+ Project-URL: homepage, https://github.com/slub/txpyfind
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Education
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
17
+ Requires-Python: >=3.7
18
+ Description-Content-Type: text/x-rst
19
+ License-File: LICENSE
20
+
21
+ ========
22
+ txpyfind
23
+ ========
24
+
25
+ ``txpyfind`` enables access to data exports from `TYPO3-find <https://github.com/subugoe/typo3-find>`_
26
+ in Python. Details on the TYPO3-find setup required for data exports can be found in the section
27
+ `Data export <https://github.com/subugoe/typo3-find#data-export>`_ in the README file of that repository.
28
+
29
+ The three JSON formats ``json-all``, ``json-solr-results`` and ``raw-solr-response`` are already available
30
+ in the TYPO3 extension, see the
31
+ `partials <https://github.com/subugoe/typo3-find/tree/main/Resources/Private/Partials/Formats>`_ used
32
+ to create the three formats.
33
+
34
+ You can use the client class available in this Python package to query these exports. A simple parser
35
+ for the returned JSON objects is also available.
36
+
37
+ Installation
38
+ ============
39
+
40
+ ... via PyPI
41
+ ~~~~~~~~~~~~
42
+
43
+ .. code-block:: bash
44
+
45
+ pip install txpyfind
46
+
47
+ ... or from Github source
48
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
49
+
50
+ .. code-block:: bash
51
+
52
+ pip install git+https://github.com/herreio/txpyfind.git
53
+
54
+
55
+ Usage Example
56
+ =============
57
+
58
+ .. code-block:: python
59
+
60
+ from txpyfind.client import Find
61
+ # create Find instance
62
+ slub_find = Find("https://katalog.slub-dresden.de", document_path="id", export_format="json-ld")
63
+ # retrieve JSON-LD data (detail view)
64
+ slub_ld_doc = slub_find.get_document("0-1132486122")
65
+ # retrieve JSON-LD data (query view)
66
+ slub_ld_q_default = slub_find.get_query("manfred bonitz")
67
+ # ...
68
+
69
+ See `slubfind <https://github.com/slub/slubfind>`_ for a full setup example.
@@ -0,0 +1,49 @@
1
+ ========
2
+ txpyfind
3
+ ========
4
+
5
+ ``txpyfind`` enables access to data exports from `TYPO3-find <https://github.com/subugoe/typo3-find>`_
6
+ in Python. Details on the TYPO3-find setup required for data exports can be found in the section
7
+ `Data export <https://github.com/subugoe/typo3-find#data-export>`_ in the README file of that repository.
8
+
9
+ The three JSON formats ``json-all``, ``json-solr-results`` and ``raw-solr-response`` are already available
10
+ in the TYPO3 extension, see the
11
+ `partials <https://github.com/subugoe/typo3-find/tree/main/Resources/Private/Partials/Formats>`_ used
12
+ to create the three formats.
13
+
14
+ You can use the client class available in this Python package to query these exports. A simple parser
15
+ for the returned JSON objects is also available.
16
+
17
+ Installation
18
+ ============
19
+
20
+ ... via PyPI
21
+ ~~~~~~~~~~~~
22
+
23
+ .. code-block:: bash
24
+
25
+ pip install txpyfind
26
+
27
+ ... or from Github source
28
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
29
+
30
+ .. code-block:: bash
31
+
32
+ pip install git+https://github.com/herreio/txpyfind.git
33
+
34
+
35
+ Usage Example
36
+ =============
37
+
38
+ .. code-block:: python
39
+
40
+ from txpyfind.client import Find
41
+ # create Find instance
42
+ slub_find = Find("https://katalog.slub-dresden.de", document_path="id", export_format="json-ld")
43
+ # retrieve JSON-LD data (detail view)
44
+ slub_ld_doc = slub_find.get_document("0-1132486122")
45
+ # retrieve JSON-LD data (query view)
46
+ slub_ld_q_default = slub_find.get_query("manfred bonitz")
47
+ # ...
48
+
49
+ See `slubfind <https://github.com/slub/slubfind>`_ for a full setup example.
@@ -0,0 +1,39 @@
1
+ [build-system]
2
+ requires = [
3
+ "setuptools>=42",
4
+ "setuptools_scm[toml]",
5
+ "wheel",
6
+ ] # PEP 508 specifications.
7
+ build-backend = "setuptools.build_meta"
8
+
9
+ [project]
10
+ name = "txpyfind"
11
+ description = "enables Pythonic access to data exports from TYPO3-find"
12
+ readme = "README.rst"
13
+ authors = [
14
+ {name = "Donatus Herre"},
15
+ {email = "donatus.herre@slub-dresden.de"}
16
+ ]
17
+ license = {text = "GPLv3"}
18
+ keywords = []
19
+ classifiers = [
20
+ "Programming Language :: Python :: 3",
21
+ "Operating System :: OS Independent",
22
+ "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
23
+ "Development Status :: 3 - Alpha",
24
+ "Intended Audience :: Developers",
25
+ "Intended Audience :: Education",
26
+ "Intended Audience :: Science/Research",
27
+ "Topic :: Software Development :: Libraries :: Python Modules",
28
+ ]
29
+ dynamic = ["version", "dependencies"]
30
+ requires-python = ">=3.7"
31
+
32
+ [project.urls]
33
+ homepage = "https://github.com/slub/txpyfind"
34
+
35
+ [tool.setuptools.dynamic]
36
+ dependencies = {file = ["requirements.txt"]}
37
+
38
+ [tool.setuptools_scm]
39
+ write_to = "txpyfind/_version.py"
File without changes
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
File without changes
@@ -0,0 +1,46 @@
1
+ from txpyfind.client import Find
2
+
3
+ def test_get_document():
4
+ # create Find instance
5
+ slub_find = Find("https://katalog.slub-dresden.de", document_path="id", export_format="json-ld")
6
+ # retrieve JSON-LD data (detail view)
7
+ slub_ld_doc = slub_find.get_document("0-1132486122")
8
+ assert slub_ld_doc is not None
9
+ assert '@graph' in slub_ld_doc.raw
10
+ assert any(slub_ld_doc.raw['@graph'])
11
+ assert '@id' in slub_ld_doc.raw['@graph'][0]
12
+ assert 'katalog.slub-dresden.de' in slub_ld_doc.raw['@graph'][0]['@id']
13
+
14
+ def test_get_query():
15
+ # create Find instance
16
+ slub_find = Find("https://katalog.slub-dresden.de", document_path="id", export_format="json-ld")
17
+ # retrieve JSON-LD data (query view)
18
+ slub_ld_doc = slub_find.get_query("manfred bonitz")
19
+ assert slub_ld_doc is not None
20
+ assert '@graph' in slub_ld_doc.raw
21
+ assert any(slub_ld_doc.raw['@graph'])
22
+ assert '@id' in slub_ld_doc.raw['@graph'][0]
23
+
24
+ def test_get_query_via_url():
25
+ # create Find instance
26
+ slub_find = Find("https://katalog.slub-dresden.de", document_path="id", export_format="json-ld")
27
+ # retrieve JSON-LD data (query view)
28
+ slub_ld_doc = slub_find.get_query_via_url("https://katalog.slub-dresden.de/?tx_find_find%5Bq%5D%5Bdefault%5D=manfred+bonitz")
29
+ assert slub_ld_doc is not None
30
+ assert '@graph' in slub_ld_doc.raw
31
+ assert any(slub_ld_doc.raw['@graph'])
32
+ assert '@id' in slub_ld_doc.raw['@graph'][0]
33
+
34
+ def test_pagination():
35
+ # create Find instance
36
+ slub_find = Find("https://katalog.slub-dresden.de", document_path="id")
37
+ # scroll SOLR JSON data (query view + pagination)
38
+ slub_solr_docs = slub_find.scroll_get_query("manfred bonitz", batch=10)
39
+ assert slub_solr_docs is not None
40
+ assert len(slub_solr_docs) > 0
41
+ # stream SOLR JSON data (query view + pagination)
42
+ slub_solr_docs2 = slub_find.stream_get_query("manfred bonitz", batch=10)
43
+ assert slub_solr_docs2 is not None
44
+ slub_solr_docs2 = list(slub_solr_docs2)
45
+ assert len(slub_solr_docs2) > 0
46
+ assert len(slub_solr_docs) == len(slub_solr_docs2)
@@ -0,0 +1,7 @@
1
+ """
2
+ With ``txpyfind`` you can access data exports from TYPO3-find.
3
+ """
4
+ from ._version import __version__, version, version_tuple
5
+ from . import client, parser
6
+
7
+ __all__ = ["__version__", "version", "version_tuple", "client", "parser"]
@@ -0,0 +1,21 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
6
+ TYPE_CHECKING = False
7
+ if TYPE_CHECKING:
8
+ from typing import Tuple
9
+ from typing import Union
10
+
11
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
12
+ else:
13
+ VERSION_TUPLE = object
14
+
15
+ version: str
16
+ __version__: str
17
+ __version_tuple__: VERSION_TUPLE
18
+ version_tuple: VERSION_TUPLE
19
+
20
+ __version__ = version = '0.1.0'
21
+ __version_tuple__ = version_tuple = (0, 1, 0)
@@ -0,0 +1,333 @@
1
+ """
2
+ client module of ``txpyfind`` package
3
+ """
4
+ import re
5
+ import logging
6
+ from . import utils
7
+ from .parser import JSONResponse
8
+ from .urlparse import URLParser
9
+
10
+
11
+ class Find:
12
+ """
13
+ ``Find`` class from ``txpyfind.client`` module
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ base_url,
19
+ document_path=None,
20
+ query_types=None,
21
+ facets=None,
22
+ count_limit=100,
23
+ sort_pattern=None,
24
+ export_format="raw-solr-response",
25
+ export_page=1369315139,
26
+ parser_class=JSONResponse):
27
+ self.base_url = base_url
28
+ self.document_path = document_path
29
+ if query_types is None:
30
+ query_types = ["default"]
31
+ self.query_types = query_types
32
+ self.facets = facets
33
+ self.count_limit = count_limit
34
+ self.sort_pattern = sort_pattern
35
+ self.document_url = None
36
+ if document_path is not None:
37
+ self.document_url = f"{self.base_url}/{self.document_path}"
38
+ self.export_page = export_page
39
+ self.export_format = export_format
40
+ self.parser_class = parser_class
41
+ self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
42
+
43
+ def set_data_params(
44
+ self,
45
+ url,
46
+ data_format=None,
47
+ type_num=None):
48
+ """
49
+ add data exports parameters as initial parameters to given URL
50
+ """
51
+ if data_format is None:
52
+ data_format = self.export_format
53
+ if type_num is None:
54
+ type_num = self.export_page
55
+ return utils.set_tx_param_data(
56
+ url,
57
+ data_format=data_format,
58
+ type_num=type_num)
59
+
60
+ def add_data_params(
61
+ self,
62
+ url,
63
+ data_format=None,
64
+ type_num=None):
65
+ """
66
+ add data exports parameters as subsequent parameters to given URL
67
+ """
68
+ if data_format is None:
69
+ data_format = self.export_format
70
+ if type_num is None:
71
+ type_num = self.export_page
72
+ return utils.add_tx_param_data(
73
+ url,
74
+ data_format=data_format,
75
+ type_num=type_num)
76
+
77
+ def add_facet_params(
78
+ self,
79
+ url,
80
+ facet=None):
81
+ """
82
+ add facet parameters as subsequent parameters to given URL
83
+ """
84
+ if facet is not None:
85
+ for fct in facet:
86
+ if isinstance(fct, str):
87
+ if isinstance(self.facets, list) and fct not in self.facets:
88
+ self.logger.warning("Unknown facet type %s!", fct)
89
+ continue
90
+ url = utils.add_tx_param(url, ["facet", fct, utils.url_encode(facet[fct])], 1)
91
+ elif isinstance(fct, dict):
92
+ for k in fct:
93
+ if isinstance(self.facets, list) and k not in self.facets:
94
+ self.logger.warning("Unknown facet type %s!", k)
95
+ continue
96
+ url = utils.add_tx_param(url, ["facet", k, utils.url_encode(fct[k])], 1)
97
+ return url
98
+
99
+ def url_parser(self, url):
100
+ return URLParser(url)
101
+
102
+ def url_document(
103
+ self,
104
+ doc_id,
105
+ data_format=None,
106
+ type_num=None):
107
+ """
108
+ get the URL for the detail view of the document given by id
109
+ """
110
+ if self.document_url is not None:
111
+ doc_url = f"{self.document_url}/{doc_id}"
112
+ return self.set_data_params(
113
+ doc_url,
114
+ data_format=data_format,
115
+ type_num=type_num)
116
+ return None
117
+
118
+ def get_document(
119
+ self,
120
+ document_id,
121
+ data_format=None,
122
+ type_num=None,
123
+ parser_class=None):
124
+ """
125
+ get the detail view of the document given by id
126
+ """
127
+ url = self.url_document(
128
+ document_id,
129
+ data_format=data_format,
130
+ type_num=type_num)
131
+ if url is not None:
132
+ doc = utils.plain_request(url)
133
+ if doc is not None:
134
+ if parser_class is not None:
135
+ return parser_class(doc)
136
+ if self.parser_class is not None:
137
+ return self.parser_class(doc)
138
+ return doc
139
+ return None
140
+
141
+ def url_query(
142
+ self,
143
+ query,
144
+ qtype="default",
145
+ facet=None,
146
+ page=0,
147
+ count=0,
148
+ sort="",
149
+ data_format=None,
150
+ type_num=None):
151
+ """
152
+ get the URL for the query view
153
+ """
154
+ if qtype not in self.query_types:
155
+ self.logger.warning("Unknown query type!")
156
+ qtype = "default"
157
+ url = utils.set_tx_param(self.base_url, ["q", qtype], utils.url_encode(query))
158
+ url = self.add_data_params(url, data_format=data_format, type_num=type_num)
159
+ url = self.add_facet_params(url, facet=facet)
160
+ if page:
161
+ url = utils.add_tx_param(url, "page", page)
162
+ if count:
163
+ if count > self.count_limit:
164
+ self.logger.warning("Count %d exceeds limit!", count)
165
+ count = self.count_limit
166
+ url = utils.add_tx_param(url, "count", count)
167
+ if sort != "" and self.sort_pattern is not None and not isinstance(
168
+ self.sort_pattern.match(sort), re.Match):
169
+ self.logger.warning("Sort instruction %s is unknown!", sort)
170
+ sort = ""
171
+ if sort != "":
172
+ url = utils.add_tx_param(url, "sort", utils.url_encode(sort))
173
+ return url
174
+
175
+ def get_query(
176
+ self,
177
+ query,
178
+ qtype="default",
179
+ facet=None,
180
+ page=0,
181
+ count=0,
182
+ sort="",
183
+ data_format=None,
184
+ type_num=None,
185
+ parser_class=None):
186
+ """
187
+ get query view
188
+ """
189
+ url = self.url_query(
190
+ query,
191
+ qtype=qtype,
192
+ facet=facet,
193
+ page=page,
194
+ count=count,
195
+ sort=sort,
196
+ data_format=data_format,
197
+ type_num=type_num)
198
+ response = utils.plain_request(url)
199
+ if response is not None:
200
+ if parser_class is not None:
201
+ return parser_class(response)
202
+ if self.parser_class is not None:
203
+ return self.parser_class(response)
204
+ return response
205
+ return None
206
+
207
+ def get_query_via_url(
208
+ self,
209
+ url,
210
+ data_format=None,
211
+ type_num=None,
212
+ parser_class=None):
213
+ """
214
+ get query view via url
215
+ """
216
+ url = self.url_parser(url)
217
+ if url.is_ok:
218
+ return self.get_query(
219
+ url.query,
220
+ qtype=url.qtype,
221
+ facet=url.facets,
222
+ page=url.page,
223
+ count=url.count,
224
+ sort=url.sort,
225
+ data_format=data_format,
226
+ type_num=type_num,
227
+ parser_class=parser_class)
228
+ return None
229
+
230
+ def scroll_get_query(
231
+ self,
232
+ query,
233
+ qtype="default",
234
+ facet=None,
235
+ batch=20,
236
+ sort="",
237
+ data_format="raw-solr-response",
238
+ type_num=None,
239
+ parser_class=None):
240
+ """
241
+ get all pages of a query view
242
+ """
243
+ if data_format is None:
244
+ data_format = self.export_format
245
+ if data_format != "raw-solr-response":
246
+ self.logger.warning(
247
+ "Scrolling only supports data format of type 'raw-solr-response'!")
248
+ data_format = "raw-solr-response"
249
+ response = self.get_query(
250
+ query,
251
+ qtype=qtype,
252
+ facet=facet,
253
+ sort=sort,
254
+ data_format=data_format,
255
+ type_num=type_num,
256
+ parser_class=parser_class)
257
+ if hasattr(response, "raw") and isinstance(
258
+ response.raw, dict) and "response" in response.raw:
259
+ data = response.raw["response"]
260
+ total = data["numFound"]
261
+ docs = []
262
+ pages = int(total / batch) + (total % batch > 0)
263
+ for i in range(1, pages+1):
264
+ response_i = self.get_query(
265
+ query,
266
+ qtype=qtype,
267
+ facet=facet,
268
+ page=i,
269
+ count=batch,
270
+ sort=sort,
271
+ data_format=data_format,
272
+ type_num=type_num,
273
+ parser_class=parser_class)
274
+ if hasattr(response_i, "raw") and isinstance(
275
+ response_i.raw, dict) and "response" in response_i.raw:
276
+ data_i = response_i.raw["response"]
277
+ if "docs" in data_i:
278
+ for doc in data_i["docs"]:
279
+ docs.append(doc)
280
+ found = len(docs)
281
+ if total != found:
282
+ self.logger.warning(
283
+ "Expected %d record%s for query %s. Got %d record%s.",
284
+ total, 's' if total != 1 else '', query, found, 's' if found != 1 else '')
285
+ return docs
286
+ return None
287
+
288
+ def stream_get_query(
289
+ self,
290
+ query,
291
+ qtype="default",
292
+ facet=None,
293
+ batch=20,
294
+ sort="",
295
+ data_format="raw-solr-response",
296
+ type_num=None,
297
+ parser_class=None):
298
+ """
299
+ iterate all pages of a query view
300
+ """
301
+ if data_format is None:
302
+ data_format = self.export_format
303
+ if data_format != "raw-solr-response":
304
+ self.logger.warning(
305
+ "Streaming only supports data format of type 'raw-solr-response'!")
306
+ data_format = "raw-solr-response"
307
+ response = self.get_query(
308
+ query,
309
+ qtype=qtype,
310
+ facet=facet,
311
+ sort=sort,
312
+ data_format=data_format,
313
+ type_num=type_num,
314
+ parser_class=parser_class)
315
+ if hasattr(response, "raw") and isinstance(
316
+ response.raw, dict) and "response" in response.raw:
317
+ data = response.raw["response"]
318
+ total = data["numFound"]
319
+ pages = int(total / batch) + (total % batch > 0)
320
+ for i in range(1, pages+1):
321
+ response_i = self.get_query(
322
+ query,
323
+ qtype=qtype,
324
+ facet=facet,
325
+ page=i,
326
+ count=batch,
327
+ sort=sort,
328
+ data_format=data_format)
329
+ if hasattr(response_i, "raw") and isinstance(
330
+ response_i.raw, dict) and "response" in response_i.raw:
331
+ data_i = response_i.raw["response"]
332
+ if "docs" in data_i:
333
+ yield from data_i["docs"]
@@ -0,0 +1,44 @@
1
+ """
2
+ parser module of ``txpyfind`` package
3
+ """
4
+ import html
5
+ import json
6
+ import logging
7
+
8
+
9
+ class JSONResponse:
10
+ """
11
+ ``JSONResponse`` class from ``txpyfind.parser`` module
12
+ """
13
+
14
+ def __init__(self, plain):
15
+ self.plain = plain
16
+ self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
17
+ try:
18
+ self.raw = json.loads(plain)
19
+ except json.decoder.JSONDecodeError as err:
20
+ self.logger.error(err)
21
+ self.raw = None
22
+ self.fields = self._names(raw=self.raw)
23
+
24
+ def _names(self, raw=None):
25
+ if raw is None:
26
+ raw = self.raw
27
+ if isinstance(raw, dict):
28
+ return list(raw.keys())
29
+ return []
30
+
31
+ def _field(self, name, raw=None):
32
+ if raw is None:
33
+ raw = self.raw
34
+ if isinstance(raw, dict) and name in raw:
35
+ return self._unescape(raw[name])
36
+ return None
37
+
38
+ def _unescape(self, value):
39
+ if isinstance(value, str):
40
+ return html.unescape(value.strip())
41
+ if isinstance(value, list) and len(value) > 0 and \
42
+ all(isinstance(v, str) and len(v.strip()) > 0 for v in value):
43
+ return [html.unescape(v.strip()) for v in value]
44
+ return value
@@ -0,0 +1,141 @@
1
+ """
2
+ urlparse module of ``txpyfind`` package
3
+ """
4
+ import re
5
+ from urllib.parse import unquote_plus as unquote
6
+
7
+ QUERY = re.compile(r"tx_find_find\[q\]\[([^]]*)\]=([^&]*)")
8
+ QUERY_AMP = re.compile(r"=([^&]*%26[^&]*)")
9
+ SUBSTITUTE = "%#"
10
+ FACET = re.compile(r"tx_find_find\[facet\]\[([^]]*)\]\[([^]]*)\]=1&?")
11
+ PAGE = re.compile(r"tx_find_find\[page\]=(\d*)&?")
12
+ COUNT = re.compile(r"tx_find_find\[count\]=(\d*)&?")
13
+ SORT = re.compile(r"tx_find_find\[sort\]=([a-zA-Z]*)[+ ]([a-zA-Z]*)&?")
14
+
15
+
16
+ class URLParser:
17
+ """
18
+ ``URLParser`` class from ``txpyfind.urlparse`` module
19
+ """
20
+
21
+ def __init__(self, url):
22
+ self.url = url
23
+ query_details = get_query(url)
24
+ self.is_ok = False
25
+ if len(query_details) > 1:
26
+ self.query = query_details[1]
27
+ self.qtype = query_details[0]
28
+ self.is_ok = True
29
+ self.facets = get_facets(url)
30
+ self.page = get_page(url)
31
+ self.count = get_count(url)
32
+ self.sort = get_sort(url)
33
+
34
+
35
+ def preserve_ampersand(url):
36
+ """
37
+ preserve ampersand (``&``) in given URL
38
+ """
39
+ amps = QUERY_AMP.findall(url)
40
+ if len(amps) == 1:
41
+ url = url.replace(amps[0], amps[0].replace("%", SUBSTITUTE))
42
+ return url, True
43
+ return url, False
44
+
45
+
46
+ def find_query(url):
47
+ """
48
+ find query parameter in given URL
49
+ """
50
+ url, ampersand = preserve_ampersand(url)
51
+ url = unquote(url)
52
+ if ampersand:
53
+ return [tuple(unquote(e.replace(SUBSTITUTE, "%"))
54
+ for e in q)
55
+ for q in QUERY.findall(url)]
56
+ return QUERY.findall(url)
57
+
58
+
59
+ def find_facets(url):
60
+ """
61
+ find facet parameters in given URL
62
+ """
63
+ return FACET.findall(unquote(url))
64
+
65
+
66
+ def find_page(url):
67
+ """
68
+ find page parameter in given URL
69
+ """
70
+ return PAGE.findall(unquote(url))
71
+
72
+
73
+ def find_count(url):
74
+ """
75
+ find count parameter in given URL
76
+ """
77
+ return COUNT.findall(unquote(url))
78
+
79
+
80
+ def find_sort(url):
81
+ """
82
+ find sort parameter in given URL
83
+ """
84
+ return SORT.findall(unquote(url))
85
+
86
+
87
+ def get_query(url):
88
+ """
89
+ get query parameter from given URL
90
+ """
91
+ query = find_query(url)
92
+ if len(query) == 1:
93
+ qtype = query[0][0]
94
+ qval = query[0][1]
95
+ qval = re.sub(" +", " ", qval)
96
+ qval = re.sub("^ *| *$", "", qval)
97
+ return qtype, qval
98
+ return ()
99
+
100
+
101
+ def get_facets(url):
102
+ """
103
+ get facet parameters from given URL
104
+ """
105
+ facets = find_facets(url)
106
+ if len(facets) > 0:
107
+ fct = []
108
+ for facet in facets:
109
+ fct.append({facet[0]: facet[1]})
110
+ return fct
111
+ return {}
112
+
113
+
114
+ def get_page(url):
115
+ """
116
+ get page parameter from given URL
117
+ """
118
+ page = find_page(url)
119
+ if len(page) == 1:
120
+ return int(page[0])
121
+ return 0
122
+
123
+
124
+ def get_count(url):
125
+ """
126
+ get count parameter from given URL
127
+ """
128
+ count = find_count(url)
129
+ if len(count) > 0:
130
+ return int(count[0])
131
+ return 0
132
+
133
+
134
+ def get_sort(url):
135
+ """
136
+ get sort parameter from given URL
137
+ """
138
+ sort = find_sort(url)
139
+ if len(sort) > 0:
140
+ return f"{sort[0][0]} {sort[0][1]}"
141
+ return ""
@@ -0,0 +1,131 @@
1
+ """
2
+ utils module of ``txpyfind`` package
3
+ """
4
+ import json
5
+ import logging
6
+ from urllib.parse import quote_plus
7
+ from urllib.request import Request, urlopen
8
+
9
+ from ._version import __version__
10
+
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def get_request(url):
16
+ """
17
+ send HTTP GET request to given URL
18
+ """
19
+ req = Request(url)
20
+ req.add_header("User-Agent", f"txpyfind {__version__}")
21
+ try:
22
+ with urlopen(req) as response:
23
+ if response.code == 200:
24
+ return response.read()
25
+ logger.error("HTTP %d GET %s", response.code, url)
26
+ except Exception as exc:
27
+ logger.error(exc)
28
+ return None
29
+
30
+
31
+ def plain_request(url):
32
+ """
33
+ request data in plain text format from given URL
34
+ """
35
+ payload = get_request(url)
36
+ if isinstance(payload, bytes):
37
+ try:
38
+ return payload.decode()
39
+ except Exception as exc:
40
+ logger.error(exc)
41
+ return None
42
+
43
+
44
+ def json_request(url):
45
+ """
46
+ request data in JSON format from given URL
47
+ """
48
+ plain = plain_request(url)
49
+ if isinstance(plain, str):
50
+ try:
51
+ return json.loads(plain)
52
+ except json.decoder.JSONDecodeError:
53
+ logger.error("Got faulty JSON from URL %s", url)
54
+ return None
55
+
56
+
57
+ def url_encode(url):
58
+ """
59
+ encode given URL
60
+ """
61
+ return quote_plus(url)
62
+
63
+
64
+ def set_param(url, key, value=None):
65
+ """
66
+ add initial parameter to given URL
67
+ """
68
+ url = f"{url}?{key}"
69
+ if value is not None:
70
+ url = f"{url}={value}"
71
+ return url
72
+
73
+
74
+ def add_param(url, key, value=None):
75
+ """
76
+ add subsequent parameter to given URL
77
+ """
78
+ url = f"{url}&{key}"
79
+ if value is not None:
80
+ url = f"{url}={value}"
81
+ return url
82
+
83
+
84
+ def tx_param(key, index=None):
85
+ """
86
+ create URL parameter for TYPO3-find
87
+ """
88
+ if isinstance(key, str):
89
+ k = f"[{key}]"
90
+ else:
91
+ k = "".join(f"[{k}]" for k in key)
92
+ if isinstance(index, int):
93
+ k += f"[{index}]"
94
+ return f"tx_find_find{k}"
95
+
96
+
97
+ def set_tx_param(url, key, value, index=None):
98
+ """
99
+ add TYPO3-find parameter as initial parameter to given URL
100
+ """
101
+ return set_param(url, tx_param(key, index=index), value)
102
+
103
+
104
+ def add_tx_param(url, key, value, index=None):
105
+ """
106
+ add TYPO3-find parameter as subsequent parameter to given URL
107
+ """
108
+ return add_param(url, tx_param(key, index=index), value)
109
+
110
+
111
+ def tx_param_data(data_format, type_num=1369315139):
112
+ """
113
+ create parameters for TYPO3-find data exports
114
+ """
115
+ param = f"{tx_param('format')}=data"
116
+ param = add_tx_param(param, "data-format", data_format)
117
+ return add_param(param, "type", type_num)
118
+
119
+
120
+ def set_tx_param_data(url, data_format, type_num=1369315139):
121
+ """
122
+ add parameters for TYPO3-find data exports as initial parameters to given URL
123
+ """
124
+ return set_param(url, tx_param_data(data_format, type_num=type_num))
125
+
126
+
127
+ def add_tx_param_data(url, data_format, type_num=1369315139):
128
+ """
129
+ add parameters for TYPO3-find data exports as subsequent parameters to given URL
130
+ """
131
+ return add_param(url, tx_param_data(data_format, type_num=type_num))
@@ -0,0 +1,69 @@
1
+ Metadata-Version: 2.1
2
+ Name: txpyfind
3
+ Version: 0.1.0
4
+ Summary: enables Pythonic access to data exports from TYPO3-find
5
+ Author: Donatus Herre
6
+ Author-email: donatus.herre@slub-dresden.de
7
+ License: GPLv3
8
+ Project-URL: homepage, https://github.com/slub/txpyfind
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Education
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
17
+ Requires-Python: >=3.7
18
+ Description-Content-Type: text/x-rst
19
+ License-File: LICENSE
20
+
21
+ ========
22
+ txpyfind
23
+ ========
24
+
25
+ ``txpyfind`` enables access to data exports from `TYPO3-find <https://github.com/subugoe/typo3-find>`_
26
+ in Python. Details on the TYPO3-find setup required for data exports can be found in the section
27
+ `Data export <https://github.com/subugoe/typo3-find#data-export>`_ in the README file of that repository.
28
+
29
+ The three JSON formats ``json-all``, ``json-solr-results`` and ``raw-solr-response`` are already available
30
+ in the TYPO3 extension, see the
31
+ `partials <https://github.com/subugoe/typo3-find/tree/main/Resources/Private/Partials/Formats>`_ used
32
+ to create the three formats.
33
+
34
+ You can use the client class available in this Python package to query these exports. A simple parser
35
+ for the returned JSON objects is also available.
36
+
37
+ Installation
38
+ ============
39
+
40
+ ... via PyPI
41
+ ~~~~~~~~~~~~
42
+
43
+ .. code-block:: bash
44
+
45
+ pip install txpyfind
46
+
47
+ ... or from Github source
48
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
49
+
50
+ .. code-block:: bash
51
+
52
+ pip install git+https://github.com/herreio/txpyfind.git
53
+
54
+
55
+ Usage Example
56
+ =============
57
+
58
+ .. code-block:: python
59
+
60
+ from txpyfind.client import Find
61
+ # create Find instance
62
+ slub_find = Find("https://katalog.slub-dresden.de", document_path="id", export_format="json-ld")
63
+ # retrieve JSON-LD data (detail view)
64
+ slub_ld_doc = slub_find.get_document("0-1132486122")
65
+ # retrieve JSON-LD data (query view)
66
+ slub_ld_q_default = slub_find.get_query("manfred bonitz")
67
+ # ...
68
+
69
+ See `slubfind <https://github.com/slub/slubfind>`_ for a full setup example.
@@ -0,0 +1,20 @@
1
+ .gitignore
2
+ CHANGELOG
3
+ LICENSE
4
+ README.rst
5
+ pyproject.toml
6
+ requirements.txt
7
+ .github/workflows/pypi.yml
8
+ .github/workflows/test.yml
9
+ test/__init__.py
10
+ test/test_slub.py
11
+ txpyfind/__init__.py
12
+ txpyfind/_version.py
13
+ txpyfind/client.py
14
+ txpyfind/parser.py
15
+ txpyfind/urlparse.py
16
+ txpyfind/utils.py
17
+ txpyfind.egg-info/PKG-INFO
18
+ txpyfind.egg-info/SOURCES.txt
19
+ txpyfind.egg-info/dependency_links.txt
20
+ txpyfind.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ txpyfind