txpyfind 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- txpyfind-0.1.0/.github/workflows/pypi.yml +33 -0
- txpyfind-0.1.0/.github/workflows/test.yml +38 -0
- txpyfind-0.1.0/.gitignore +151 -0
- txpyfind-0.1.0/CHANGELOG +5 -0
- txpyfind-0.1.0/LICENSE +0 -0
- txpyfind-0.1.0/PKG-INFO +69 -0
- txpyfind-0.1.0/README.rst +49 -0
- txpyfind-0.1.0/pyproject.toml +39 -0
- txpyfind-0.1.0/requirements.txt +0 -0
- txpyfind-0.1.0/setup.cfg +4 -0
- txpyfind-0.1.0/test/__init__.py +0 -0
- txpyfind-0.1.0/test/test_slub.py +46 -0
- txpyfind-0.1.0/txpyfind/__init__.py +7 -0
- txpyfind-0.1.0/txpyfind/_version.py +21 -0
- txpyfind-0.1.0/txpyfind/client.py +333 -0
- txpyfind-0.1.0/txpyfind/parser.py +44 -0
- txpyfind-0.1.0/txpyfind/urlparse.py +141 -0
- txpyfind-0.1.0/txpyfind/utils.py +131 -0
- txpyfind-0.1.0/txpyfind.egg-info/PKG-INFO +69 -0
- txpyfind-0.1.0/txpyfind.egg-info/SOURCES.txt +20 -0
- txpyfind-0.1.0/txpyfind.egg-info/dependency_links.txt +1 -0
- txpyfind-0.1.0/txpyfind.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# This workflow will upload a Python Package using Twine when a release is created
|
|
2
|
+
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
|
|
3
|
+
|
|
4
|
+
name: Upload Python Package
|
|
5
|
+
|
|
6
|
+
on:
|
|
7
|
+
release:
|
|
8
|
+
types: [created]
|
|
9
|
+
workflow_dispatch:
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
deploy:
|
|
13
|
+
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v2
|
|
18
|
+
- name: Set up Python
|
|
19
|
+
uses: actions/setup-python@v2
|
|
20
|
+
with:
|
|
21
|
+
python-version: '3.8'
|
|
22
|
+
- name: Install dependencies
|
|
23
|
+
run: |
|
|
24
|
+
python -m pip install --upgrade pip
|
|
25
|
+
pip install setuptools wheel build twine
|
|
26
|
+
pip install -r requirements.txt
|
|
27
|
+
- name: Build and publish
|
|
28
|
+
env:
|
|
29
|
+
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
|
30
|
+
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
|
31
|
+
run: |
|
|
32
|
+
python -m build .
|
|
33
|
+
twine upload dist/*
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# This workflow will install Python dependencies, run tests and lint with a single version of Python
|
|
2
|
+
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
|
|
3
|
+
|
|
4
|
+
name: Tests
|
|
5
|
+
|
|
6
|
+
on: [push, pull_request]
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v3
|
|
18
|
+
- name: Set up Python
|
|
19
|
+
uses: actions/setup-python@v3
|
|
20
|
+
with:
|
|
21
|
+
python-version: ${{ matrix.python-version }}
|
|
22
|
+
- name: Lint with flake8
|
|
23
|
+
run: |
|
|
24
|
+
pip install flake8
|
|
25
|
+
# stop the build if there are Python syntax errors or undefined names
|
|
26
|
+
flake8 txpyfind --count --select=E9,F63,F7,F82 --show-source --statistics
|
|
27
|
+
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
|
28
|
+
flake8 txpyfind --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
|
29
|
+
- name: Install package
|
|
30
|
+
run: pip install .
|
|
31
|
+
- name: Install dependencies for test
|
|
32
|
+
run: pip install pytest coverage
|
|
33
|
+
- name: Run test and coverage
|
|
34
|
+
run: |
|
|
35
|
+
coverage erase
|
|
36
|
+
coverage run -m pytest test
|
|
37
|
+
coverage report
|
|
38
|
+
coverage html
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# generated by setuptools-scm
|
|
2
|
+
txpyfind/_version.py
|
|
3
|
+
|
|
4
|
+
# Development files / directories
|
|
5
|
+
_/
|
|
6
|
+
dev/
|
|
7
|
+
*-dev
|
|
8
|
+
*-dev.*
|
|
9
|
+
dev-*
|
|
10
|
+
dev.*
|
|
11
|
+
|
|
12
|
+
# Editor
|
|
13
|
+
*.swp
|
|
14
|
+
*.swo
|
|
15
|
+
*~
|
|
16
|
+
.vscode
|
|
17
|
+
|
|
18
|
+
# Data
|
|
19
|
+
*.json
|
|
20
|
+
|
|
21
|
+
# <https://github.com/github/gitignore/blob/master/Python.gitignore>
|
|
22
|
+
|
|
23
|
+
# Byte-compiled / optimized / DLL files
|
|
24
|
+
__pycache__/
|
|
25
|
+
*.py[cod]
|
|
26
|
+
*$py.class
|
|
27
|
+
|
|
28
|
+
# C extensions
|
|
29
|
+
*.so
|
|
30
|
+
|
|
31
|
+
# Distribution / packaging
|
|
32
|
+
.Python
|
|
33
|
+
build/
|
|
34
|
+
develop-eggs/
|
|
35
|
+
dist/
|
|
36
|
+
downloads/
|
|
37
|
+
eggs/
|
|
38
|
+
.eggs/
|
|
39
|
+
lib/
|
|
40
|
+
lib64/
|
|
41
|
+
parts/
|
|
42
|
+
sdist/
|
|
43
|
+
var/
|
|
44
|
+
wheels/
|
|
45
|
+
pip-wheel-metadata/
|
|
46
|
+
share/python-wheels/
|
|
47
|
+
*.egg-info/
|
|
48
|
+
.installed.cfg
|
|
49
|
+
*.egg
|
|
50
|
+
MANIFEST
|
|
51
|
+
|
|
52
|
+
# PyInstaller
|
|
53
|
+
# Usually these files are written by a python script from a template
|
|
54
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
55
|
+
*.manifest
|
|
56
|
+
*.spec
|
|
57
|
+
|
|
58
|
+
# Installer logs
|
|
59
|
+
pip-log.txt
|
|
60
|
+
pip-delete-this-directory.txt
|
|
61
|
+
|
|
62
|
+
# Unit test / coverage reports
|
|
63
|
+
htmlcov/
|
|
64
|
+
.tox/
|
|
65
|
+
.nox/
|
|
66
|
+
.coverage
|
|
67
|
+
.coverage.*
|
|
68
|
+
.cache
|
|
69
|
+
nosetests.xml
|
|
70
|
+
coverage.xml
|
|
71
|
+
*.cover
|
|
72
|
+
*.py,cover
|
|
73
|
+
.hypothesis/
|
|
74
|
+
.pytest_cache/
|
|
75
|
+
|
|
76
|
+
# Translations
|
|
77
|
+
*.mo
|
|
78
|
+
*.pot
|
|
79
|
+
|
|
80
|
+
# Django stuff:
|
|
81
|
+
*.log
|
|
82
|
+
local_settings.py
|
|
83
|
+
db.sqlite3
|
|
84
|
+
db.sqlite3-journal
|
|
85
|
+
|
|
86
|
+
# Flask stuff:
|
|
87
|
+
instance/
|
|
88
|
+
.webassets-cache
|
|
89
|
+
|
|
90
|
+
# Scrapy stuff:
|
|
91
|
+
.scrapy
|
|
92
|
+
|
|
93
|
+
# Sphinx documentation
|
|
94
|
+
docs/_build/
|
|
95
|
+
|
|
96
|
+
# PyBuilder
|
|
97
|
+
target/
|
|
98
|
+
|
|
99
|
+
# Jupyter Notebook
|
|
100
|
+
.ipynb_checkpoints
|
|
101
|
+
|
|
102
|
+
# IPython
|
|
103
|
+
profile_default/
|
|
104
|
+
ipython_config.py
|
|
105
|
+
|
|
106
|
+
# pyenv
|
|
107
|
+
.python-version
|
|
108
|
+
|
|
109
|
+
# pipenv
|
|
110
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
111
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
112
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
113
|
+
# install all needed dependencies.
|
|
114
|
+
#Pipfile.lock
|
|
115
|
+
|
|
116
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
|
117
|
+
__pypackages__/
|
|
118
|
+
|
|
119
|
+
# Celery stuff
|
|
120
|
+
celerybeat-schedule
|
|
121
|
+
celerybeat.pid
|
|
122
|
+
|
|
123
|
+
# SageMath parsed files
|
|
124
|
+
*.sage.py
|
|
125
|
+
|
|
126
|
+
# Environments
|
|
127
|
+
.env
|
|
128
|
+
.venv
|
|
129
|
+
env/
|
|
130
|
+
venv/
|
|
131
|
+
ENV/
|
|
132
|
+
env.bak/
|
|
133
|
+
venv.bak/
|
|
134
|
+
|
|
135
|
+
# Spyder project settings
|
|
136
|
+
.spyderproject
|
|
137
|
+
.spyproject
|
|
138
|
+
|
|
139
|
+
# Rope project settings
|
|
140
|
+
.ropeproject
|
|
141
|
+
|
|
142
|
+
# mkdocs documentation
|
|
143
|
+
/site
|
|
144
|
+
|
|
145
|
+
# mypy
|
|
146
|
+
.mypy_cache/
|
|
147
|
+
.dmypy.json
|
|
148
|
+
dmypy.json
|
|
149
|
+
|
|
150
|
+
# Pyre type checker
|
|
151
|
+
.pyre/
|
txpyfind-0.1.0/CHANGELOG
ADDED
txpyfind-0.1.0/LICENSE
ADDED
|
File without changes
|
txpyfind-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: txpyfind
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: enables Pythonic access to data exports from TYPO3-find
|
|
5
|
+
Author: Donatus Herre
|
|
6
|
+
Author-email: donatus.herre@slub-dresden.de
|
|
7
|
+
License: GPLv3
|
|
8
|
+
Project-URL: homepage, https://github.com/slub/txpyfind
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Education
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
17
|
+
Requires-Python: >=3.7
|
|
18
|
+
Description-Content-Type: text/x-rst
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
|
|
21
|
+
========
|
|
22
|
+
txpyfind
|
|
23
|
+
========
|
|
24
|
+
|
|
25
|
+
``txpyfind`` enables access to data exports from `TYPO3-find <https://github.com/subugoe/typo3-find>`_
|
|
26
|
+
in Python. Details on the TYPO3-find setup required for data exports can be found in the section
|
|
27
|
+
`Data export <https://github.com/subugoe/typo3-find#data-export>`_ in the README file of that repository.
|
|
28
|
+
|
|
29
|
+
The three JSON formats ``json-all``, ``json-solr-results`` and ``raw-solr-response`` are already available
|
|
30
|
+
in the TYPO3 extension, see the
|
|
31
|
+
`partials <https://github.com/subugoe/typo3-find/tree/main/Resources/Private/Partials/Formats>`_ used
|
|
32
|
+
to create the three formats.
|
|
33
|
+
|
|
34
|
+
You can use the client class available in this Python package to query these exports. A simple parser
|
|
35
|
+
for the returned JSON objects is also available.
|
|
36
|
+
|
|
37
|
+
Installation
|
|
38
|
+
============
|
|
39
|
+
|
|
40
|
+
... via PyPI
|
|
41
|
+
~~~~~~~~~~~~
|
|
42
|
+
|
|
43
|
+
.. code-block:: bash
|
|
44
|
+
|
|
45
|
+
pip install txpyfind
|
|
46
|
+
|
|
47
|
+
... or from Github source
|
|
48
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
49
|
+
|
|
50
|
+
.. code-block:: bash
|
|
51
|
+
|
|
52
|
+
pip install git+https://github.com/herreio/txpyfind.git
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
Usage Example
|
|
56
|
+
=============
|
|
57
|
+
|
|
58
|
+
.. code-block:: python
|
|
59
|
+
|
|
60
|
+
from txpyfind.client import Find
|
|
61
|
+
# create Find instance
|
|
62
|
+
slub_find = Find("https://katalog.slub-dresden.de", document_path="id", export_format="json-ld")
|
|
63
|
+
# retrieve JSON-LD data (detail view)
|
|
64
|
+
slub_ld_doc = slub_find.get_document("0-1132486122")
|
|
65
|
+
# retrieve JSON-LD data (query view)
|
|
66
|
+
slub_ld_q_default = slub_find.get_query("manfred bonitz")
|
|
67
|
+
# ...
|
|
68
|
+
|
|
69
|
+
See `slubfind <https://github.com/slub/slubfind>`_ for a full setup example.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
========
|
|
2
|
+
txpyfind
|
|
3
|
+
========
|
|
4
|
+
|
|
5
|
+
``txpyfind`` enables access to data exports from `TYPO3-find <https://github.com/subugoe/typo3-find>`_
|
|
6
|
+
in Python. Details on the TYPO3-find setup required for data exports can be found in the section
|
|
7
|
+
`Data export <https://github.com/subugoe/typo3-find#data-export>`_ in the README file of that repository.
|
|
8
|
+
|
|
9
|
+
The three JSON formats ``json-all``, ``json-solr-results`` and ``raw-solr-response`` are already available
|
|
10
|
+
in the TYPO3 extension, see the
|
|
11
|
+
`partials <https://github.com/subugoe/typo3-find/tree/main/Resources/Private/Partials/Formats>`_ used
|
|
12
|
+
to create the three formats.
|
|
13
|
+
|
|
14
|
+
You can use the client class available in this Python package to query these exports. A simple parser
|
|
15
|
+
for the returned JSON objects is also available.
|
|
16
|
+
|
|
17
|
+
Installation
|
|
18
|
+
============
|
|
19
|
+
|
|
20
|
+
... via PyPI
|
|
21
|
+
~~~~~~~~~~~~
|
|
22
|
+
|
|
23
|
+
.. code-block:: bash
|
|
24
|
+
|
|
25
|
+
pip install txpyfind
|
|
26
|
+
|
|
27
|
+
... or from Github source
|
|
28
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
29
|
+
|
|
30
|
+
.. code-block:: bash
|
|
31
|
+
|
|
32
|
+
pip install git+https://github.com/herreio/txpyfind.git
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
Usage Example
|
|
36
|
+
=============
|
|
37
|
+
|
|
38
|
+
.. code-block:: python
|
|
39
|
+
|
|
40
|
+
from txpyfind.client import Find
|
|
41
|
+
# create Find instance
|
|
42
|
+
slub_find = Find("https://katalog.slub-dresden.de", document_path="id", export_format="json-ld")
|
|
43
|
+
# retrieve JSON-LD data (detail view)
|
|
44
|
+
slub_ld_doc = slub_find.get_document("0-1132486122")
|
|
45
|
+
# retrieve JSON-LD data (query view)
|
|
46
|
+
slub_ld_q_default = slub_find.get_query("manfred bonitz")
|
|
47
|
+
# ...
|
|
48
|
+
|
|
49
|
+
See `slubfind <https://github.com/slub/slubfind>`_ for a full setup example.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = [
|
|
3
|
+
"setuptools>=42",
|
|
4
|
+
"setuptools_scm[toml]",
|
|
5
|
+
"wheel",
|
|
6
|
+
] # PEP 508 specifications.
|
|
7
|
+
build-backend = "setuptools.build_meta"
|
|
8
|
+
|
|
9
|
+
[project]
|
|
10
|
+
name = "txpyfind"
|
|
11
|
+
description = "enables Pythonic access to data exports from TYPO3-find"
|
|
12
|
+
readme = "README.rst"
|
|
13
|
+
authors = [
|
|
14
|
+
{name = "Donatus Herre"},
|
|
15
|
+
{email = "donatus.herre@slub-dresden.de"}
|
|
16
|
+
]
|
|
17
|
+
license = {text = "GPLv3"}
|
|
18
|
+
keywords = []
|
|
19
|
+
classifiers = [
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Operating System :: OS Independent",
|
|
22
|
+
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
|
23
|
+
"Development Status :: 3 - Alpha",
|
|
24
|
+
"Intended Audience :: Developers",
|
|
25
|
+
"Intended Audience :: Education",
|
|
26
|
+
"Intended Audience :: Science/Research",
|
|
27
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
28
|
+
]
|
|
29
|
+
dynamic = ["version", "dependencies"]
|
|
30
|
+
requires-python = ">=3.7"
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
homepage = "https://github.com/slub/txpyfind"
|
|
34
|
+
|
|
35
|
+
[tool.setuptools.dynamic]
|
|
36
|
+
dependencies = {file = ["requirements.txt"]}
|
|
37
|
+
|
|
38
|
+
[tool.setuptools_scm]
|
|
39
|
+
write_to = "txpyfind/_version.py"
|
|
File without changes
|
txpyfind-0.1.0/setup.cfg
ADDED
|
File without changes
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from txpyfind.client import Find
|
|
2
|
+
|
|
3
|
+
def test_get_document():
|
|
4
|
+
# create Find instance
|
|
5
|
+
slub_find = Find("https://katalog.slub-dresden.de", document_path="id", export_format="json-ld")
|
|
6
|
+
# retrieve JSON-LD data (detail view)
|
|
7
|
+
slub_ld_doc = slub_find.get_document("0-1132486122")
|
|
8
|
+
assert slub_ld_doc is not None
|
|
9
|
+
assert '@graph' in slub_ld_doc.raw
|
|
10
|
+
assert any(slub_ld_doc.raw['@graph'])
|
|
11
|
+
assert '@id' in slub_ld_doc.raw['@graph'][0]
|
|
12
|
+
assert 'katalog.slub-dresden.de' in slub_ld_doc.raw['@graph'][0]['@id']
|
|
13
|
+
|
|
14
|
+
def test_get_query():
|
|
15
|
+
# create Find instance
|
|
16
|
+
slub_find = Find("https://katalog.slub-dresden.de", document_path="id", export_format="json-ld")
|
|
17
|
+
# retrieve JSON-LD data (query view)
|
|
18
|
+
slub_ld_doc = slub_find.get_query("manfred bonitz")
|
|
19
|
+
assert slub_ld_doc is not None
|
|
20
|
+
assert '@graph' in slub_ld_doc.raw
|
|
21
|
+
assert any(slub_ld_doc.raw['@graph'])
|
|
22
|
+
assert '@id' in slub_ld_doc.raw['@graph'][0]
|
|
23
|
+
|
|
24
|
+
def test_get_query_via_url():
|
|
25
|
+
# create Find instance
|
|
26
|
+
slub_find = Find("https://katalog.slub-dresden.de", document_path="id", export_format="json-ld")
|
|
27
|
+
# retrieve JSON-LD data (query view)
|
|
28
|
+
slub_ld_doc = slub_find.get_query_via_url("https://katalog.slub-dresden.de/?tx_find_find%5Bq%5D%5Bdefault%5D=manfred+bonitz")
|
|
29
|
+
assert slub_ld_doc is not None
|
|
30
|
+
assert '@graph' in slub_ld_doc.raw
|
|
31
|
+
assert any(slub_ld_doc.raw['@graph'])
|
|
32
|
+
assert '@id' in slub_ld_doc.raw['@graph'][0]
|
|
33
|
+
|
|
34
|
+
def test_pagination():
|
|
35
|
+
# create Find instance
|
|
36
|
+
slub_find = Find("https://katalog.slub-dresden.de", document_path="id")
|
|
37
|
+
# scroll SOLR JSON data (query view + pagination)
|
|
38
|
+
slub_solr_docs = slub_find.scroll_get_query("manfred bonitz", batch=10)
|
|
39
|
+
assert slub_solr_docs is not None
|
|
40
|
+
assert len(slub_solr_docs) > 0
|
|
41
|
+
# stream SOLR JSON data (query view + pagination)
|
|
42
|
+
slub_solr_docs2 = slub_find.stream_get_query("manfred bonitz", batch=10)
|
|
43
|
+
assert slub_solr_docs2 is not None
|
|
44
|
+
slub_solr_docs2 = list(slub_solr_docs2)
|
|
45
|
+
assert len(slub_solr_docs2) > 0
|
|
46
|
+
assert len(slub_solr_docs) == len(slub_solr_docs2)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
|
|
5
|
+
|
|
6
|
+
TYPE_CHECKING = False
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from typing import Tuple
|
|
9
|
+
from typing import Union
|
|
10
|
+
|
|
11
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
12
|
+
else:
|
|
13
|
+
VERSION_TUPLE = object
|
|
14
|
+
|
|
15
|
+
version: str
|
|
16
|
+
__version__: str
|
|
17
|
+
__version_tuple__: VERSION_TUPLE
|
|
18
|
+
version_tuple: VERSION_TUPLE
|
|
19
|
+
|
|
20
|
+
__version__ = version = '0.1.0'
|
|
21
|
+
__version_tuple__ = version_tuple = (0, 1, 0)
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
"""
|
|
2
|
+
client module of ``txpyfind`` package
|
|
3
|
+
"""
|
|
4
|
+
import re
|
|
5
|
+
import logging
|
|
6
|
+
from . import utils
|
|
7
|
+
from .parser import JSONResponse
|
|
8
|
+
from .urlparse import URLParser
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Find:
|
|
12
|
+
"""
|
|
13
|
+
``Find`` class from ``txpyfind.client`` module
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
base_url,
|
|
19
|
+
document_path=None,
|
|
20
|
+
query_types=None,
|
|
21
|
+
facets=None,
|
|
22
|
+
count_limit=100,
|
|
23
|
+
sort_pattern=None,
|
|
24
|
+
export_format="raw-solr-response",
|
|
25
|
+
export_page=1369315139,
|
|
26
|
+
parser_class=JSONResponse):
|
|
27
|
+
self.base_url = base_url
|
|
28
|
+
self.document_path = document_path
|
|
29
|
+
if query_types is None:
|
|
30
|
+
query_types = ["default"]
|
|
31
|
+
self.query_types = query_types
|
|
32
|
+
self.facets = facets
|
|
33
|
+
self.count_limit = count_limit
|
|
34
|
+
self.sort_pattern = sort_pattern
|
|
35
|
+
self.document_url = None
|
|
36
|
+
if document_path is not None:
|
|
37
|
+
self.document_url = f"{self.base_url}/{self.document_path}"
|
|
38
|
+
self.export_page = export_page
|
|
39
|
+
self.export_format = export_format
|
|
40
|
+
self.parser_class = parser_class
|
|
41
|
+
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
|
42
|
+
|
|
43
|
+
def set_data_params(
|
|
44
|
+
self,
|
|
45
|
+
url,
|
|
46
|
+
data_format=None,
|
|
47
|
+
type_num=None):
|
|
48
|
+
"""
|
|
49
|
+
add data exports parameters as initial parameters to given URL
|
|
50
|
+
"""
|
|
51
|
+
if data_format is None:
|
|
52
|
+
data_format = self.export_format
|
|
53
|
+
if type_num is None:
|
|
54
|
+
type_num = self.export_page
|
|
55
|
+
return utils.set_tx_param_data(
|
|
56
|
+
url,
|
|
57
|
+
data_format=data_format,
|
|
58
|
+
type_num=type_num)
|
|
59
|
+
|
|
60
|
+
def add_data_params(
|
|
61
|
+
self,
|
|
62
|
+
url,
|
|
63
|
+
data_format=None,
|
|
64
|
+
type_num=None):
|
|
65
|
+
"""
|
|
66
|
+
add data exports parameters as subsequent parameters to given URL
|
|
67
|
+
"""
|
|
68
|
+
if data_format is None:
|
|
69
|
+
data_format = self.export_format
|
|
70
|
+
if type_num is None:
|
|
71
|
+
type_num = self.export_page
|
|
72
|
+
return utils.add_tx_param_data(
|
|
73
|
+
url,
|
|
74
|
+
data_format=data_format,
|
|
75
|
+
type_num=type_num)
|
|
76
|
+
|
|
77
|
+
def add_facet_params(
|
|
78
|
+
self,
|
|
79
|
+
url,
|
|
80
|
+
facet=None):
|
|
81
|
+
"""
|
|
82
|
+
add facet parameters as subsequent parameters to given URL
|
|
83
|
+
"""
|
|
84
|
+
if facet is not None:
|
|
85
|
+
for fct in facet:
|
|
86
|
+
if isinstance(fct, str):
|
|
87
|
+
if isinstance(self.facets, list) and fct not in self.facets:
|
|
88
|
+
self.logger.warning("Unknown facet type %s!", fct)
|
|
89
|
+
continue
|
|
90
|
+
url = utils.add_tx_param(url, ["facet", fct, utils.url_encode(facet[fct])], 1)
|
|
91
|
+
elif isinstance(fct, dict):
|
|
92
|
+
for k in fct:
|
|
93
|
+
if isinstance(self.facets, list) and k not in self.facets:
|
|
94
|
+
self.logger.warning("Unknown facet type %s!", k)
|
|
95
|
+
continue
|
|
96
|
+
url = utils.add_tx_param(url, ["facet", k, utils.url_encode(fct[k])], 1)
|
|
97
|
+
return url
|
|
98
|
+
|
|
99
|
+
def url_parser(self, url):
|
|
100
|
+
return URLParser(url)
|
|
101
|
+
|
|
102
|
+
def url_document(
|
|
103
|
+
self,
|
|
104
|
+
doc_id,
|
|
105
|
+
data_format=None,
|
|
106
|
+
type_num=None):
|
|
107
|
+
"""
|
|
108
|
+
get the URL for the detail view of the document given by id
|
|
109
|
+
"""
|
|
110
|
+
if self.document_url is not None:
|
|
111
|
+
doc_url = f"{self.document_url}/{doc_id}"
|
|
112
|
+
return self.set_data_params(
|
|
113
|
+
doc_url,
|
|
114
|
+
data_format=data_format,
|
|
115
|
+
type_num=type_num)
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
def get_document(
|
|
119
|
+
self,
|
|
120
|
+
document_id,
|
|
121
|
+
data_format=None,
|
|
122
|
+
type_num=None,
|
|
123
|
+
parser_class=None):
|
|
124
|
+
"""
|
|
125
|
+
get the detail view of the document given by id
|
|
126
|
+
"""
|
|
127
|
+
url = self.url_document(
|
|
128
|
+
document_id,
|
|
129
|
+
data_format=data_format,
|
|
130
|
+
type_num=type_num)
|
|
131
|
+
if url is not None:
|
|
132
|
+
doc = utils.plain_request(url)
|
|
133
|
+
if doc is not None:
|
|
134
|
+
if parser_class is not None:
|
|
135
|
+
return parser_class(doc)
|
|
136
|
+
if self.parser_class is not None:
|
|
137
|
+
return self.parser_class(doc)
|
|
138
|
+
return doc
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
def url_query(
|
|
142
|
+
self,
|
|
143
|
+
query,
|
|
144
|
+
qtype="default",
|
|
145
|
+
facet=None,
|
|
146
|
+
page=0,
|
|
147
|
+
count=0,
|
|
148
|
+
sort="",
|
|
149
|
+
data_format=None,
|
|
150
|
+
type_num=None):
|
|
151
|
+
"""
|
|
152
|
+
get the URL for the query view
|
|
153
|
+
"""
|
|
154
|
+
if qtype not in self.query_types:
|
|
155
|
+
self.logger.warning("Unknown query type!")
|
|
156
|
+
qtype = "default"
|
|
157
|
+
url = utils.set_tx_param(self.base_url, ["q", qtype], utils.url_encode(query))
|
|
158
|
+
url = self.add_data_params(url, data_format=data_format, type_num=type_num)
|
|
159
|
+
url = self.add_facet_params(url, facet=facet)
|
|
160
|
+
if page:
|
|
161
|
+
url = utils.add_tx_param(url, "page", page)
|
|
162
|
+
if count:
|
|
163
|
+
if count > self.count_limit:
|
|
164
|
+
self.logger.warning("Count %d exceeds limit!", count)
|
|
165
|
+
count = self.count_limit
|
|
166
|
+
url = utils.add_tx_param(url, "count", count)
|
|
167
|
+
if sort != "" and self.sort_pattern is not None and not isinstance(
|
|
168
|
+
self.sort_pattern.match(sort), re.Match):
|
|
169
|
+
self.logger.warning("Sort instruction %s is unknown!", sort)
|
|
170
|
+
sort = ""
|
|
171
|
+
if sort != "":
|
|
172
|
+
url = utils.add_tx_param(url, "sort", utils.url_encode(sort))
|
|
173
|
+
return url
|
|
174
|
+
|
|
175
|
+
def get_query(
|
|
176
|
+
self,
|
|
177
|
+
query,
|
|
178
|
+
qtype="default",
|
|
179
|
+
facet=None,
|
|
180
|
+
page=0,
|
|
181
|
+
count=0,
|
|
182
|
+
sort="",
|
|
183
|
+
data_format=None,
|
|
184
|
+
type_num=None,
|
|
185
|
+
parser_class=None):
|
|
186
|
+
"""
|
|
187
|
+
get query view
|
|
188
|
+
"""
|
|
189
|
+
url = self.url_query(
|
|
190
|
+
query,
|
|
191
|
+
qtype=qtype,
|
|
192
|
+
facet=facet,
|
|
193
|
+
page=page,
|
|
194
|
+
count=count,
|
|
195
|
+
sort=sort,
|
|
196
|
+
data_format=data_format,
|
|
197
|
+
type_num=type_num)
|
|
198
|
+
response = utils.plain_request(url)
|
|
199
|
+
if response is not None:
|
|
200
|
+
if parser_class is not None:
|
|
201
|
+
return parser_class(response)
|
|
202
|
+
if self.parser_class is not None:
|
|
203
|
+
return self.parser_class(response)
|
|
204
|
+
return response
|
|
205
|
+
return None
|
|
206
|
+
|
|
207
|
+
def get_query_via_url(
|
|
208
|
+
self,
|
|
209
|
+
url,
|
|
210
|
+
data_format=None,
|
|
211
|
+
type_num=None,
|
|
212
|
+
parser_class=None):
|
|
213
|
+
"""
|
|
214
|
+
get query view via url
|
|
215
|
+
"""
|
|
216
|
+
url = self.url_parser(url)
|
|
217
|
+
if url.is_ok:
|
|
218
|
+
return self.get_query(
|
|
219
|
+
url.query,
|
|
220
|
+
qtype=url.qtype,
|
|
221
|
+
facet=url.facets,
|
|
222
|
+
page=url.page,
|
|
223
|
+
count=url.count,
|
|
224
|
+
sort=url.sort,
|
|
225
|
+
data_format=data_format,
|
|
226
|
+
type_num=type_num,
|
|
227
|
+
parser_class=parser_class)
|
|
228
|
+
return None
|
|
229
|
+
|
|
230
|
+
def scroll_get_query(
|
|
231
|
+
self,
|
|
232
|
+
query,
|
|
233
|
+
qtype="default",
|
|
234
|
+
facet=None,
|
|
235
|
+
batch=20,
|
|
236
|
+
sort="",
|
|
237
|
+
data_format="raw-solr-response",
|
|
238
|
+
type_num=None,
|
|
239
|
+
parser_class=None):
|
|
240
|
+
"""
|
|
241
|
+
get all pages of a query view
|
|
242
|
+
"""
|
|
243
|
+
if data_format is None:
|
|
244
|
+
data_format = self.export_format
|
|
245
|
+
if data_format != "raw-solr-response":
|
|
246
|
+
self.logger.warning(
|
|
247
|
+
"Scrolling only supports data format of type 'raw-solr-response'!")
|
|
248
|
+
data_format = "raw-solr-response"
|
|
249
|
+
response = self.get_query(
|
|
250
|
+
query,
|
|
251
|
+
qtype=qtype,
|
|
252
|
+
facet=facet,
|
|
253
|
+
sort=sort,
|
|
254
|
+
data_format=data_format,
|
|
255
|
+
type_num=type_num,
|
|
256
|
+
parser_class=parser_class)
|
|
257
|
+
if hasattr(response, "raw") and isinstance(
|
|
258
|
+
response.raw, dict) and "response" in response.raw:
|
|
259
|
+
data = response.raw["response"]
|
|
260
|
+
total = data["numFound"]
|
|
261
|
+
docs = []
|
|
262
|
+
pages = int(total / batch) + (total % batch > 0)
|
|
263
|
+
for i in range(1, pages+1):
|
|
264
|
+
response_i = self.get_query(
|
|
265
|
+
query,
|
|
266
|
+
qtype=qtype,
|
|
267
|
+
facet=facet,
|
|
268
|
+
page=i,
|
|
269
|
+
count=batch,
|
|
270
|
+
sort=sort,
|
|
271
|
+
data_format=data_format,
|
|
272
|
+
type_num=type_num,
|
|
273
|
+
parser_class=parser_class)
|
|
274
|
+
if hasattr(response_i, "raw") and isinstance(
|
|
275
|
+
response_i.raw, dict) and "response" in response_i.raw:
|
|
276
|
+
data_i = response_i.raw["response"]
|
|
277
|
+
if "docs" in data_i:
|
|
278
|
+
for doc in data_i["docs"]:
|
|
279
|
+
docs.append(doc)
|
|
280
|
+
found = len(docs)
|
|
281
|
+
if total != found:
|
|
282
|
+
self.logger.warning(
|
|
283
|
+
"Expected %d record%s for query %s. Got %d record%s.",
|
|
284
|
+
total, 's' if total != 1 else '', query, found, 's' if found != 1 else '')
|
|
285
|
+
return docs
|
|
286
|
+
return None
|
|
287
|
+
|
|
288
|
+
def stream_get_query(
|
|
289
|
+
self,
|
|
290
|
+
query,
|
|
291
|
+
qtype="default",
|
|
292
|
+
facet=None,
|
|
293
|
+
batch=20,
|
|
294
|
+
sort="",
|
|
295
|
+
data_format="raw-solr-response",
|
|
296
|
+
type_num=None,
|
|
297
|
+
parser_class=None):
|
|
298
|
+
"""
|
|
299
|
+
iterate all pages of a query view
|
|
300
|
+
"""
|
|
301
|
+
if data_format is None:
|
|
302
|
+
data_format = self.export_format
|
|
303
|
+
if data_format != "raw-solr-response":
|
|
304
|
+
self.logger.warning(
|
|
305
|
+
"Streaming only supports data format of type 'raw-solr-response'!")
|
|
306
|
+
data_format = "raw-solr-response"
|
|
307
|
+
response = self.get_query(
|
|
308
|
+
query,
|
|
309
|
+
qtype=qtype,
|
|
310
|
+
facet=facet,
|
|
311
|
+
sort=sort,
|
|
312
|
+
data_format=data_format,
|
|
313
|
+
type_num=type_num,
|
|
314
|
+
parser_class=parser_class)
|
|
315
|
+
if hasattr(response, "raw") and isinstance(
|
|
316
|
+
response.raw, dict) and "response" in response.raw:
|
|
317
|
+
data = response.raw["response"]
|
|
318
|
+
total = data["numFound"]
|
|
319
|
+
pages = int(total / batch) + (total % batch > 0)
|
|
320
|
+
for i in range(1, pages+1):
|
|
321
|
+
response_i = self.get_query(
|
|
322
|
+
query,
|
|
323
|
+
qtype=qtype,
|
|
324
|
+
facet=facet,
|
|
325
|
+
page=i,
|
|
326
|
+
count=batch,
|
|
327
|
+
sort=sort,
|
|
328
|
+
data_format=data_format)
|
|
329
|
+
if hasattr(response_i, "raw") and isinstance(
|
|
330
|
+
response_i.raw, dict) and "response" in response_i.raw:
|
|
331
|
+
data_i = response_i.raw["response"]
|
|
332
|
+
if "docs" in data_i:
|
|
333
|
+
yield from data_i["docs"]
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""
|
|
2
|
+
parser module of ``txpyfind`` package
|
|
3
|
+
"""
|
|
4
|
+
import html
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class JSONResponse:
|
|
10
|
+
"""
|
|
11
|
+
``JSONResponse`` class from ``txpyfind.parser`` module
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, plain):
|
|
15
|
+
self.plain = plain
|
|
16
|
+
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
|
17
|
+
try:
|
|
18
|
+
self.raw = json.loads(plain)
|
|
19
|
+
except json.decoder.JSONDecodeError as err:
|
|
20
|
+
self.logger.error(err)
|
|
21
|
+
self.raw = None
|
|
22
|
+
self.fields = self._names(raw=self.raw)
|
|
23
|
+
|
|
24
|
+
def _names(self, raw=None):
|
|
25
|
+
if raw is None:
|
|
26
|
+
raw = self.raw
|
|
27
|
+
if isinstance(raw, dict):
|
|
28
|
+
return list(raw.keys())
|
|
29
|
+
return []
|
|
30
|
+
|
|
31
|
+
def _field(self, name, raw=None):
|
|
32
|
+
if raw is None:
|
|
33
|
+
raw = self.raw
|
|
34
|
+
if isinstance(raw, dict) and name in raw:
|
|
35
|
+
return self._unescape(raw[name])
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
def _unescape(self, value):
|
|
39
|
+
if isinstance(value, str):
|
|
40
|
+
return html.unescape(value.strip())
|
|
41
|
+
if isinstance(value, list) and len(value) > 0 and \
|
|
42
|
+
all(isinstance(v, str) and len(v.strip()) > 0 for v in value):
|
|
43
|
+
return [html.unescape(v.strip()) for v in value]
|
|
44
|
+
return value
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""
|
|
2
|
+
urlparse module of ``txpyfind`` package
|
|
3
|
+
"""
|
|
4
|
+
import re
|
|
5
|
+
from urllib.parse import unquote_plus as unquote
|
|
6
|
+
|
|
7
|
+
QUERY = re.compile(r"tx_find_find\[q\]\[([^]]*)\]=([^&]*)")
|
|
8
|
+
QUERY_AMP = re.compile(r"=([^&]*%26[^&]*)")
|
|
9
|
+
SUBSTITUTE = "%#"
|
|
10
|
+
FACET = re.compile(r"tx_find_find\[facet\]\[([^]]*)\]\[([^]]*)\]=1&?")
|
|
11
|
+
PAGE = re.compile(r"tx_find_find\[page\]=(\d*)&?")
|
|
12
|
+
COUNT = re.compile(r"tx_find_find\[count\]=(\d*)&?")
|
|
13
|
+
SORT = re.compile(r"tx_find_find\[sort\]=([a-zA-Z]*)[+ ]([a-zA-Z]*)&?")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class URLParser:
|
|
17
|
+
"""
|
|
18
|
+
``URLParser`` class from ``txpyfind.urlparse`` module
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, url):
|
|
22
|
+
self.url = url
|
|
23
|
+
query_details = get_query(url)
|
|
24
|
+
self.is_ok = False
|
|
25
|
+
if len(query_details) > 1:
|
|
26
|
+
self.query = query_details[1]
|
|
27
|
+
self.qtype = query_details[0]
|
|
28
|
+
self.is_ok = True
|
|
29
|
+
self.facets = get_facets(url)
|
|
30
|
+
self.page = get_page(url)
|
|
31
|
+
self.count = get_count(url)
|
|
32
|
+
self.sort = get_sort(url)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def preserve_ampersand(url):
|
|
36
|
+
"""
|
|
37
|
+
preserve ampersand (``&``) in given URL
|
|
38
|
+
"""
|
|
39
|
+
amps = QUERY_AMP.findall(url)
|
|
40
|
+
if len(amps) == 1:
|
|
41
|
+
url = url.replace(amps[0], amps[0].replace("%", SUBSTITUTE))
|
|
42
|
+
return url, True
|
|
43
|
+
return url, False
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def find_query(url):
|
|
47
|
+
"""
|
|
48
|
+
find query parameter in given URL
|
|
49
|
+
"""
|
|
50
|
+
url, ampersand = preserve_ampersand(url)
|
|
51
|
+
url = unquote(url)
|
|
52
|
+
if ampersand:
|
|
53
|
+
return [tuple(unquote(e.replace(SUBSTITUTE, "%"))
|
|
54
|
+
for e in q)
|
|
55
|
+
for q in QUERY.findall(url)]
|
|
56
|
+
return QUERY.findall(url)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def find_facets(url):
|
|
60
|
+
"""
|
|
61
|
+
find facet parameters in given URL
|
|
62
|
+
"""
|
|
63
|
+
return FACET.findall(unquote(url))
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def find_page(url):
|
|
67
|
+
"""
|
|
68
|
+
find page parameter in given URL
|
|
69
|
+
"""
|
|
70
|
+
return PAGE.findall(unquote(url))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def find_count(url):
|
|
74
|
+
"""
|
|
75
|
+
find count parameter in given URL
|
|
76
|
+
"""
|
|
77
|
+
return COUNT.findall(unquote(url))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def find_sort(url):
|
|
81
|
+
"""
|
|
82
|
+
find sort parameter in given URL
|
|
83
|
+
"""
|
|
84
|
+
return SORT.findall(unquote(url))
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def get_query(url):
|
|
88
|
+
"""
|
|
89
|
+
get query parameter from given URL
|
|
90
|
+
"""
|
|
91
|
+
query = find_query(url)
|
|
92
|
+
if len(query) == 1:
|
|
93
|
+
qtype = query[0][0]
|
|
94
|
+
qval = query[0][1]
|
|
95
|
+
qval = re.sub(" +", " ", qval)
|
|
96
|
+
qval = re.sub("^ *| *$", "", qval)
|
|
97
|
+
return qtype, qval
|
|
98
|
+
return ()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def get_facets(url):
|
|
102
|
+
"""
|
|
103
|
+
get facet parameters from given URL
|
|
104
|
+
"""
|
|
105
|
+
facets = find_facets(url)
|
|
106
|
+
if len(facets) > 0:
|
|
107
|
+
fct = []
|
|
108
|
+
for facet in facets:
|
|
109
|
+
fct.append({facet[0]: facet[1]})
|
|
110
|
+
return fct
|
|
111
|
+
return {}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def get_page(url):
|
|
115
|
+
"""
|
|
116
|
+
get page parameter from given URL
|
|
117
|
+
"""
|
|
118
|
+
page = find_page(url)
|
|
119
|
+
if len(page) == 1:
|
|
120
|
+
return int(page[0])
|
|
121
|
+
return 0
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def get_count(url):
|
|
125
|
+
"""
|
|
126
|
+
get count parameter from given URL
|
|
127
|
+
"""
|
|
128
|
+
count = find_count(url)
|
|
129
|
+
if len(count) > 0:
|
|
130
|
+
return int(count[0])
|
|
131
|
+
return 0
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def get_sort(url):
|
|
135
|
+
"""
|
|
136
|
+
get sort parameter from given URL
|
|
137
|
+
"""
|
|
138
|
+
sort = find_sort(url)
|
|
139
|
+
if len(sort) > 0:
|
|
140
|
+
return f"{sort[0][0]} {sort[0][1]}"
|
|
141
|
+
return ""
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
utils module of ``txpyfind`` package
|
|
3
|
+
"""
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
from urllib.parse import quote_plus
|
|
7
|
+
from urllib.request import Request, urlopen
|
|
8
|
+
|
|
9
|
+
from ._version import __version__
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_request(url):
|
|
16
|
+
"""
|
|
17
|
+
send HTTP GET request to given URL
|
|
18
|
+
"""
|
|
19
|
+
req = Request(url)
|
|
20
|
+
req.add_header("User-Agent", f"txpyfind {__version__}")
|
|
21
|
+
try:
|
|
22
|
+
with urlopen(req) as response:
|
|
23
|
+
if response.code == 200:
|
|
24
|
+
return response.read()
|
|
25
|
+
logger.error("HTTP %d GET %s", response.code, url)
|
|
26
|
+
except Exception as exc:
|
|
27
|
+
logger.error(exc)
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def plain_request(url):
|
|
32
|
+
"""
|
|
33
|
+
request data in plain text format from given URL
|
|
34
|
+
"""
|
|
35
|
+
payload = get_request(url)
|
|
36
|
+
if isinstance(payload, bytes):
|
|
37
|
+
try:
|
|
38
|
+
return payload.decode()
|
|
39
|
+
except Exception as exc:
|
|
40
|
+
logger.error(exc)
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def json_request(url):
|
|
45
|
+
"""
|
|
46
|
+
request data in JSON format from given URL
|
|
47
|
+
"""
|
|
48
|
+
plain = plain_request(url)
|
|
49
|
+
if isinstance(plain, str):
|
|
50
|
+
try:
|
|
51
|
+
return json.loads(plain)
|
|
52
|
+
except json.decoder.JSONDecodeError:
|
|
53
|
+
logger.error("Got faulty JSON from URL %s", url)
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def url_encode(url):
|
|
58
|
+
"""
|
|
59
|
+
encode given URL
|
|
60
|
+
"""
|
|
61
|
+
return quote_plus(url)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def set_param(url, key, value=None):
|
|
65
|
+
"""
|
|
66
|
+
add initial parameter to given URL
|
|
67
|
+
"""
|
|
68
|
+
url = f"{url}?{key}"
|
|
69
|
+
if value is not None:
|
|
70
|
+
url = f"{url}={value}"
|
|
71
|
+
return url
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def add_param(url, key, value=None):
|
|
75
|
+
"""
|
|
76
|
+
add subsequent parameter to given URL
|
|
77
|
+
"""
|
|
78
|
+
url = f"{url}&{key}"
|
|
79
|
+
if value is not None:
|
|
80
|
+
url = f"{url}={value}"
|
|
81
|
+
return url
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def tx_param(key, index=None):
|
|
85
|
+
"""
|
|
86
|
+
create URL parameter for TYPO3-find
|
|
87
|
+
"""
|
|
88
|
+
if isinstance(key, str):
|
|
89
|
+
k = f"[{key}]"
|
|
90
|
+
else:
|
|
91
|
+
k = "".join(f"[{k}]" for k in key)
|
|
92
|
+
if isinstance(index, int):
|
|
93
|
+
k += f"[{index}]"
|
|
94
|
+
return f"tx_find_find{k}"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def set_tx_param(url, key, value, index=None):
|
|
98
|
+
"""
|
|
99
|
+
add TYPO3-find parameter as initial parameter to given URL
|
|
100
|
+
"""
|
|
101
|
+
return set_param(url, tx_param(key, index=index), value)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def add_tx_param(url, key, value, index=None):
|
|
105
|
+
"""
|
|
106
|
+
add TYPO3-find parameter as subsequent parameter to given URL
|
|
107
|
+
"""
|
|
108
|
+
return add_param(url, tx_param(key, index=index), value)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def tx_param_data(data_format, type_num=1369315139):
|
|
112
|
+
"""
|
|
113
|
+
create parameters for TYPO3-find data exports
|
|
114
|
+
"""
|
|
115
|
+
param = f"{tx_param('format')}=data"
|
|
116
|
+
param = add_tx_param(param, "data-format", data_format)
|
|
117
|
+
return add_param(param, "type", type_num)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def set_tx_param_data(url, data_format, type_num=1369315139):
|
|
121
|
+
"""
|
|
122
|
+
add parameters for TYPO3-find data exports as initial parameters to given URL
|
|
123
|
+
"""
|
|
124
|
+
return set_param(url, tx_param_data(data_format, type_num=type_num))
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def add_tx_param_data(url, data_format, type_num=1369315139):
|
|
128
|
+
"""
|
|
129
|
+
add parameters for TYPO3-find data exports as subsequent parameters to given URL
|
|
130
|
+
"""
|
|
131
|
+
return add_param(url, tx_param_data(data_format, type_num=type_num))
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: txpyfind
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: enables Pythonic access to data exports from TYPO3-find
|
|
5
|
+
Author: Donatus Herre
|
|
6
|
+
Author-email: donatus.herre@slub-dresden.de
|
|
7
|
+
License: GPLv3
|
|
8
|
+
Project-URL: homepage, https://github.com/slub/txpyfind
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Education
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
17
|
+
Requires-Python: >=3.7
|
|
18
|
+
Description-Content-Type: text/x-rst
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
|
|
21
|
+
========
|
|
22
|
+
txpyfind
|
|
23
|
+
========
|
|
24
|
+
|
|
25
|
+
``txpyfind`` enables access to data exports from `TYPO3-find <https://github.com/subugoe/typo3-find>`_
|
|
26
|
+
in Python. Details on the TYPO3-find setup required for data exports can be found in the section
|
|
27
|
+
`Data export <https://github.com/subugoe/typo3-find#data-export>`_ in the README file of that repository.
|
|
28
|
+
|
|
29
|
+
The three JSON formats ``json-all``, ``json-solr-results`` and ``raw-solr-response`` are already available
|
|
30
|
+
in the TYPO3 extension, see the
|
|
31
|
+
`partials <https://github.com/subugoe/typo3-find/tree/main/Resources/Private/Partials/Formats>`_ used
|
|
32
|
+
to create the three formats.
|
|
33
|
+
|
|
34
|
+
You can use the client class available in this Python package to query these exports. A simple parser
|
|
35
|
+
for the returned JSON objects is also available.
|
|
36
|
+
|
|
37
|
+
Installation
|
|
38
|
+
============
|
|
39
|
+
|
|
40
|
+
... via PyPI
|
|
41
|
+
~~~~~~~~~~~~
|
|
42
|
+
|
|
43
|
+
.. code-block:: bash
|
|
44
|
+
|
|
45
|
+
pip install txpyfind
|
|
46
|
+
|
|
47
|
+
... or from Github source
|
|
48
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
49
|
+
|
|
50
|
+
.. code-block:: bash
|
|
51
|
+
|
|
52
|
+
pip install git+https://github.com/herreio/txpyfind.git
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
Usage Example
|
|
56
|
+
=============
|
|
57
|
+
|
|
58
|
+
.. code-block:: python
|
|
59
|
+
|
|
60
|
+
from txpyfind.client import Find
|
|
61
|
+
# create Find instance
|
|
62
|
+
slub_find = Find("https://katalog.slub-dresden.de", document_path="id", export_format="json-ld")
|
|
63
|
+
# retrieve JSON-LD data (detail view)
|
|
64
|
+
slub_ld_doc = slub_find.get_document("0-1132486122")
|
|
65
|
+
# retrieve JSON-LD data (query view)
|
|
66
|
+
slub_ld_q_default = slub_find.get_query("manfred bonitz")
|
|
67
|
+
# ...
|
|
68
|
+
|
|
69
|
+
See `slubfind <https://github.com/slub/slubfind>`_ for a full setup example.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
.gitignore
|
|
2
|
+
CHANGELOG
|
|
3
|
+
LICENSE
|
|
4
|
+
README.rst
|
|
5
|
+
pyproject.toml
|
|
6
|
+
requirements.txt
|
|
7
|
+
.github/workflows/pypi.yml
|
|
8
|
+
.github/workflows/test.yml
|
|
9
|
+
test/__init__.py
|
|
10
|
+
test/test_slub.py
|
|
11
|
+
txpyfind/__init__.py
|
|
12
|
+
txpyfind/_version.py
|
|
13
|
+
txpyfind/client.py
|
|
14
|
+
txpyfind/parser.py
|
|
15
|
+
txpyfind/urlparse.py
|
|
16
|
+
txpyfind/utils.py
|
|
17
|
+
txpyfind.egg-info/PKG-INFO
|
|
18
|
+
txpyfind.egg-info/SOURCES.txt
|
|
19
|
+
txpyfind.egg-info/dependency_links.txt
|
|
20
|
+
txpyfind.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
txpyfind
|