uniprotlib 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+
8
+ jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ matrix:
13
+ python-version: ["3.12", "3.13", "3.14"]
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ - uses: astral-sh/setup-uv@v4
17
+ - run: uv python install ${{ matrix.python-version }}
18
+ - run: uv sync --python ${{ matrix.python-version }}
19
+ - run: uv run python -m pytest tests/ -v
20
+
21
+ publish:
22
+ needs: test
23
+ runs-on: ubuntu-latest
24
+ permissions:
25
+ id-token: write
26
+ environment: pypi
27
+ steps:
28
+ - uses: actions/checkout@v4
29
+ - uses: astral-sh/setup-uv@v4
30
+ - run: uv build
31
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,16 @@
1
+ name: Tests
2
+
3
+ on: push
4
+
5
+ jobs:
6
+ test:
7
+ runs-on: ubuntu-latest
8
+ strategy:
9
+ matrix:
10
+ python-version: ["3.12", "3.13", "3.14"]
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - uses: astral-sh/setup-uv@v4
14
+ - run: uv python install ${{ matrix.python-version }}
15
+ - run: uv sync --python ${{ matrix.python-version }}
16
+ - run: uv run pytest tests/ -v
@@ -0,0 +1,334 @@
1
+ # Created by https://www.toptal.com/developers/gitignore/api/python,macos,linux,visualstudiocode,jetbrains+all
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=python,macos,linux,visualstudiocode,jetbrains+all
3
+
4
+ ### JetBrains+all ###
5
+ # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
6
+ # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
7
+
8
+ # User-specific stuff
9
+ .idea/**/workspace.xml
10
+ .idea/**/tasks.xml
11
+ .idea/**/usage.statistics.xml
12
+ .idea/**/dictionaries
13
+ .idea/**/shelf
14
+
15
+ # AWS User-specific
16
+ .idea/**/aws.xml
17
+
18
+ # Generated files
19
+ .idea/**/contentModel.xml
20
+
21
+ # Sensitive or high-churn files
22
+ .idea/**/dataSources/
23
+ .idea/**/dataSources.ids
24
+ .idea/**/dataSources.local.xml
25
+ .idea/**/sqlDataSources.xml
26
+ .idea/**/dynamic.xml
27
+ .idea/**/uiDesigner.xml
28
+ .idea/**/dbnavigator.xml
29
+
30
+ # Gradle
31
+ .idea/**/gradle.xml
32
+ .idea/**/libraries
33
+
34
+ # Gradle and Maven with auto-import
35
+ # When using Gradle or Maven with auto-import, you should exclude module files,
36
+ # since they will be recreated, and may cause churn. Uncomment if using
37
+ # auto-import.
38
+ # .idea/artifacts
39
+ # .idea/compiler.xml
40
+ # .idea/jarRepositories.xml
41
+ # .idea/modules.xml
42
+ # .idea/*.iml
43
+ # .idea/modules
44
+ # *.iml
45
+ # *.ipr
46
+
47
+ # CMake
48
+ cmake-build-*/
49
+
50
+ # Mongo Explorer plugin
51
+ .idea/**/mongoSettings.xml
52
+
53
+ # File-based project format
54
+ *.iws
55
+
56
+ # IntelliJ
57
+ out/
58
+
59
+ # mpeltonen/sbt-idea plugin
60
+ .idea_modules/
61
+
62
+ # JIRA plugin
63
+ atlassian-ide-plugin.xml
64
+
65
+ # Cursive Clojure plugin
66
+ .idea/replstate.xml
67
+
68
+ # SonarLint plugin
69
+ .idea/sonarlint/
70
+
71
+ # Crashlytics plugin (for Android Studio and IntelliJ)
72
+ com_crashlytics_export_strings.xml
73
+ crashlytics.properties
74
+ crashlytics-build.properties
75
+ fabric.properties
76
+
77
+ # Editor-based Rest Client
78
+ .idea/httpRequests
79
+
80
+ # Android studio 3.1+ serialized cache file
81
+ .idea/caches/build_file_checksums.ser
82
+
83
+ ### JetBrains+all Patch ###
84
+ # Ignore everything but code style settings and run configurations
85
+ # that are supposed to be shared within teams.
86
+
87
+ .idea/*
88
+
89
+ !.idea/codeStyles
90
+ !.idea/runConfigurations
91
+
92
+ ### Linux ###
93
+ *~
94
+
95
+ # temporary files which can be created if a process still has a handle open of a deleted file
96
+ .fuse_hidden*
97
+
98
+ # KDE directory preferences
99
+ .directory
100
+
101
+ # Linux trash folder which might appear on any partition or disk
102
+ .Trash-*
103
+
104
+ # .nfs files are created when an open file is removed but is still being accessed
105
+ .nfs*
106
+
107
+ ### macOS ###
108
+ # General
109
+ .DS_Store
110
+ .AppleDouble
111
+ .LSOverride
112
+
113
+ # Icon must end with two \r
114
+ Icon
115
+
116
+
117
+ # Thumbnails
118
+ ._*
119
+
120
+ # Files that might appear in the root of a volume
121
+ .DocumentRevisions-V100
122
+ .fseventsd
123
+ .Spotlight-V100
124
+ .TemporaryItems
125
+ .Trashes
126
+ .VolumeIcon.icns
127
+ .com.apple.timemachine.donotpresent
128
+
129
+ # Directories potentially created on remote AFP share
130
+ .AppleDB
131
+ .AppleDesktop
132
+ Network Trash Folder
133
+ Temporary Items
134
+ .apdisk
135
+
136
+ ### macOS Patch ###
137
+ # iCloud generated files
138
+ *.icloud
139
+
140
+ ### Python ###
141
+ # Byte-compiled / optimized / DLL files
142
+ __pycache__/
143
+ *.py[cod]
144
+ *$py.class
145
+
146
+ # C extensions
147
+ *.so
148
+
149
+ # Distribution / packaging
150
+ .Python
151
+ build/
152
+ develop-eggs/
153
+ dist/
154
+ downloads/
155
+ eggs/
156
+ .eggs/
157
+ lib/
158
+ lib64/
159
+ parts/
160
+ sdist/
161
+ var/
162
+ wheels/
163
+ share/python-wheels/
164
+ *.egg-info/
165
+ .installed.cfg
166
+ *.egg
167
+ MANIFEST
168
+
169
+ # PyInstaller
170
+ # Usually these files are written by a python script from a template
171
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
172
+ *.manifest
173
+ *.spec
174
+
175
+ # Installer logs
176
+ pip-log.txt
177
+ pip-delete-this-directory.txt
178
+
179
+ # Unit test / coverage reports
180
+ htmlcov/
181
+ .tox/
182
+ .nox/
183
+ .coverage
184
+ .coverage.*
185
+ .cache
186
+ nosetests.xml
187
+ coverage.xml
188
+ *.cover
189
+ *.py,cover
190
+ .hypothesis/
191
+ .pytest_cache/
192
+ cover/
193
+
194
+ # Translations
195
+ *.mo
196
+ *.pot
197
+
198
+ # Django stuff:
199
+ *.log
200
+ local_settings.py
201
+ db.sqlite3
202
+ db.sqlite3-journal
203
+
204
+ # Flask stuff:
205
+ instance/
206
+ .webassets-cache
207
+
208
+ # Scrapy stuff:
209
+ .scrapy
210
+
211
+ # Sphinx documentation
212
+ docs/_build/
213
+
214
+ # PyBuilder
215
+ .pybuilder/
216
+ target/
217
+
218
+ # Jupyter Notebook
219
+ .ipynb_checkpoints
220
+
221
+ # IPython
222
+ profile_default/
223
+ ipython_config.py
224
+
225
+ # pyenv
226
+ # For a library or package, you might want to ignore these files since the code is
227
+ # intended to run in multiple environments; otherwise, check them in:
228
+ # .python-version
229
+
230
+ # pipenv
231
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
232
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
233
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
234
+ # install all needed dependencies.
235
+ #Pipfile.lock
236
+
237
+ # poetry
238
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
239
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
240
+ # commonly ignored for libraries.
241
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
242
+ #poetry.lock
243
+
244
+ # pdm
245
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
246
+ #pdm.lock
247
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
248
+ # in version control.
249
+ # https://pdm.fming.dev/#use-with-ide
250
+ .pdm.toml
251
+
252
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
253
+ __pypackages__/
254
+
255
+ # Celery stuff
256
+ celerybeat-schedule
257
+ celerybeat.pid
258
+
259
+ # SageMath parsed files
260
+ *.sage.py
261
+
262
+ # Environments
263
+ .env
264
+ .venv
265
+ env/
266
+ venv/
267
+ ENV/
268
+ env.bak/
269
+ venv.bak/
270
+
271
+ # Spyder project settings
272
+ .spyderproject
273
+ .spyproject
274
+
275
+ # Rope project settings
276
+ .ropeproject
277
+
278
+ # mkdocs documentation
279
+ /site
280
+
281
+ # mypy
282
+ .mypy_cache/
283
+ .dmypy.json
284
+ dmypy.json
285
+
286
+ # Pyre type checker
287
+ .pyre/
288
+
289
+ # pytype static type analyzer
290
+ .pytype/
291
+
292
+ # Cython debug symbols
293
+ cython_debug/
294
+
295
+ # PyCharm
296
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
297
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
298
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
299
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
300
+ #.idea/
301
+
302
+ ### Python Patch ###
303
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
304
+ poetry.toml
305
+
306
+ # ruff
307
+ .ruff_cache/
308
+
309
+ # LSP config files
310
+ pyrightconfig.json
311
+
312
+ ### VisualStudioCode ###
313
+ .vscode/*
314
+ !.vscode/settings.json
315
+ !.vscode/tasks.json
316
+ !.vscode/launch.json
317
+ !.vscode/extensions.json
318
+ !.vscode/*.code-snippets
319
+
320
+ # Local History for Visual Studio Code
321
+ .history/
322
+
323
+ # Built Visual Studio Code Extensions
324
+ *.vsix
325
+
326
+ ### VisualStudioCode Patch ###
327
+ # Ignore all local history of files
328
+ .history
329
+ .ionide
330
+
331
+ # End of https://www.toptal.com/developers/gitignore/api/python,macos,linux,visualstudiocode,jetbrains+all
332
+
333
+ # Claude Code
334
+ CLAUDE.md
@@ -0,0 +1 @@
1
+ 3.14
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Martin Preusse
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,72 @@
1
+ Metadata-Version: 2.4
2
+ Name: uniprotlib
3
+ Version: 0.1.0
4
+ Summary: Python library for parsing UniProt XML data
5
+ License-Expression: MIT
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.12
8
+ Requires-Dist: lxml>=6.0.2
9
+ Description-Content-Type: text/markdown
10
+
11
+ # uniprotlib
12
+
13
+ > **Note:** This library was vibe coded with Claude. It works, it's tested, but review accordingly.
14
+
15
+ Python library for parsing UniProt XML files. Handles both single-entry downloads and multi-GB gzip-compressed database dumps with bounded memory usage.
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install uniprotlib
21
+ ```
22
+
23
+ Or with [uv](https://docs.astral.sh/uv/):
24
+
25
+ ```bash
26
+ uv add uniprotlib
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ ```python
32
+ from uniprotlib import parse_xml
33
+
34
+ # single file
35
+ for entry in parse_xml("Q9Y261.xml"):
36
+ print(entry.primary_accession, entry.protein_name)
37
+
38
+ # gzipped bulk download
39
+ for entry in parse_xml("uniprot_sprot.xml.gz"):
40
+ print(entry.gene.primary, entry.organism.scientific_name)
41
+
42
+ # multiple files
43
+ for entry in parse_xml("human.xml.gz", "mouse.xml.gz"):
44
+ print(entry.primary_accession)
45
+ ```
46
+
47
+ `parse_xml()` returns an iterator that yields `UniProtEntry` objects. Gzip detection is automatic based on the `.gz` extension. Memory stays bounded regardless of file size.
48
+
49
+ ## Parsed fields
50
+
51
+ | Model | Fields |
52
+ |---|---|
53
+ | `UniProtEntry` | primary_accession, accessions, entry_name, dataset, protein_name, gene, organism, sequence, keywords, db_references |
54
+ | `Gene` | primary, synonyms, ordered_locus_names, orf_names |
55
+ | `Organism` | scientific_name, common_name, tax_id, lineage |
56
+ | `Sequence` | value, length, mass, checksum |
57
+ | `DbReference` | type, id, molecule, properties |
58
+
59
+ All model classes are dataclasses with full type annotations and `py.typed` support.
60
+
61
+ ## Development
62
+
63
+ Requires Python >= 3.12 and [uv](https://docs.astral.sh/uv/).
64
+
65
+ ```bash
66
+ uv sync
67
+ uv run pytest tests/ -v
68
+ ```
69
+
70
+ ## License
71
+
72
+ MIT
@@ -0,0 +1,62 @@
1
+ # uniprotlib
2
+
3
+ > **Note:** This library was vibe coded with Claude. It works, it's tested, but review accordingly.
4
+
5
+ Python library for parsing UniProt XML files. Handles both single-entry downloads and multi-GB gzip-compressed database dumps with bounded memory usage.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install uniprotlib
11
+ ```
12
+
13
+ Or with [uv](https://docs.astral.sh/uv/):
14
+
15
+ ```bash
16
+ uv add uniprotlib
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ```python
22
+ from uniprotlib import parse_xml
23
+
24
+ # single file
25
+ for entry in parse_xml("Q9Y261.xml"):
26
+ print(entry.primary_accession, entry.protein_name)
27
+
28
+ # gzipped bulk download
29
+ for entry in parse_xml("uniprot_sprot.xml.gz"):
30
+ print(entry.gene.primary, entry.organism.scientific_name)
31
+
32
+ # multiple files
33
+ for entry in parse_xml("human.xml.gz", "mouse.xml.gz"):
34
+ print(entry.primary_accession)
35
+ ```
36
+
37
+ `parse_xml()` returns an iterator that yields `UniProtEntry` objects. Gzip detection is automatic based on the `.gz` extension. Memory stays bounded regardless of file size.
38
+
39
+ ## Parsed fields
40
+
41
+ | Model | Fields |
42
+ |---|---|
43
+ | `UniProtEntry` | primary_accession, accessions, entry_name, dataset, protein_name, gene, organism, sequence, keywords, db_references |
44
+ | `Gene` | primary, synonyms, ordered_locus_names, orf_names |
45
+ | `Organism` | scientific_name, common_name, tax_id, lineage |
46
+ | `Sequence` | value, length, mass, checksum |
47
+ | `DbReference` | type, id, molecule, properties |
48
+
49
+ All model classes are dataclasses with full type annotations and `py.typed` support.
50
+
51
+ ## Development
52
+
53
+ Requires Python >= 3.12 and [uv](https://docs.astral.sh/uv/).
54
+
55
+ ```bash
56
+ uv sync
57
+ uv run pytest tests/ -v
58
+ ```
59
+
60
+ ## License
61
+
62
+ MIT