xpid 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xpid-0.0.1/PKG-INFO +103 -0
- xpid-0.0.1/README.md +90 -0
- xpid-0.0.1/pyproject.toml +26 -0
- xpid-0.0.1/setup.cfg +4 -0
- xpid-0.0.1/src/xpid/__init__.py +45 -0
- xpid-0.0.1/src/xpid/cli.py +385 -0
- xpid-0.0.1/src/xpid/config.py +73 -0
- xpid-0.0.1/src/xpid/core.py +219 -0
- xpid-0.0.1/src/xpid/geometry.py +93 -0
- xpid-0.0.1/src/xpid/io.py +32 -0
- xpid-0.0.1/src/xpid/prep.py +80 -0
- xpid-0.0.1/src/xpid/residue_ss.py +83 -0
- xpid-0.0.1/src/xpid.egg-info/PKG-INFO +103 -0
- xpid-0.0.1/src/xpid.egg-info/SOURCES.txt +16 -0
- xpid-0.0.1/src/xpid.egg-info/dependency_links.txt +1 -0
- xpid-0.0.1/src/xpid.egg-info/top_level.txt +1 -0
- xpid-0.0.1/test/test_geometry.py +30 -0
- xpid-0.0.1/test/test_integration.py +29 -0
xpid-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: xpid
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: XH-/PI interaction detection
|
|
5
|
+
Author-email: Sean Wang <sean.wannng@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/SeanWang5868/xpid
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# Xpid
|
|
15
|
+
|
|
16
|
+
`Xpid` is a Gemmi-based tool designed to detect XH-π interactions in PDB/mmCIF files.
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
Requires Python 3.9+.
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
git clone https://github.com/SeanWang5868/xpid
|
|
24
|
+
cd xpid
|
|
25
|
+
pip install .
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Configuration
|
|
29
|
+
|
|
30
|
+
The detection of XH-π interactions depends on the position of H atoms. In order to add H to the structure before detecting, the path to the monomer library (e.g. CCP4 monomer library) needs to be specified.
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
xpid --set-mon-lib /Users/abc123/monomers
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Quick Start
|
|
37
|
+
|
|
38
|
+
Scans a directory or PDB/mmCIF file and save results into a JSON file.
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
xpid ./data
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
> **Output**: `./data/xpid_output/xpid_results.json`
|
|
45
|
+
|
|
46
|
+
## Geometric Criteria
|
|
47
|
+
|
|
48
|
+
Definitions: $C_\pi$ (Ring Centroid), $\vec{n}$ (Ring Normal), $X$ (Donor Heavy Atom), $Xp$ (The projection of X onto the π plane), $H$ (Hydrogen).
|
|
49
|
+
|
|
50
|
+
### [Hudson System](https://doi.org/10.1021/jacs.5b08424)
|
|
51
|
+
|
|
52
|
+
$d_{X \text{--} C_\pi}$: $\le 4.5$ Å, $\angle X\text{--}H \text{--} \vec{n}$): $\le 40^\circ$. $d_{Xp \text{--} C_\pi}$: $\le 1.6$ Å $\text{for His, Trp-A}$, $\le 2.0$ Å $\text{for Phe, Trp-B, Tyr}$.
|
|
53
|
+
|
|
54
|
+
### [Plevin System](https://doi.org/10.1038/nchem.650)
|
|
55
|
+
|
|
56
|
+
$d_{X \text{--} C_\pi}$: $< 4.3$ Å, $\angle X\text{--}H \text{--} C_\pi$: $> 120^\circ$, $\angle X \text{--} C_\pi \text{--} \vec{n}$): $< 25^\circ$.
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
## Command Options
|
|
60
|
+
|
|
61
|
+
| Argument | Description |
|
|
62
|
+
| :--- | :--- |
|
|
63
|
+
| `inputs` | Input file (`.cif`, `.pdb`) or directory path. |
|
|
64
|
+
| `--out-dir` | Specify custom output directory. |
|
|
65
|
+
| `--separate` | Save results as separate files per PDB (Default: Merge). |
|
|
66
|
+
| `--file-type` | Output format: `json` (default) or `csv`. |
|
|
67
|
+
| `-v`, `--verbose` | Output detailed metrics (angles, coords, B-factors). |
|
|
68
|
+
| `--log` | Enable log file saving. |
|
|
69
|
+
| `--jobs N` | Number of CPU cores to use (Default: 1). |
|
|
70
|
+
| `--h-mode N` | Hydrogen handling mode (0=NoChange, 4=ReAddButWater). |
|
|
71
|
+
| `--model ID` | Model index to analyze (Default: `0`; use `all` for NMR). |
|
|
72
|
+
| `--pi-res` | Limit acceptor residues (e.g., `TRP,TYR`). |
|
|
73
|
+
| `--donor-res` | Limit donor residues (e.g., `HIS,ARG`). |
|
|
74
|
+
| `--donor-atom` | Limit donor element types (e.g., `N,O`). |
|
|
75
|
+
|
|
76
|
+
## Output Data
|
|
77
|
+
|
|
78
|
+
**Simple Mode (Default)**
|
|
79
|
+
|
|
80
|
+
* PDB ID, Resolution
|
|
81
|
+
* Chain, Name, ID for X-donor and $\pi$ Residues.
|
|
82
|
+
* Distance ($d_{X \text{--} C_\pi}$)
|
|
83
|
+
|
|
84
|
+
**Detailed Mode (`-v`)**
|
|
85
|
+
|
|
86
|
+
* **Includes all Simple fields plus:**
|
|
87
|
+
* **Secondary structure**: Type (H/G/I/E/C) and Region IDs.
|
|
88
|
+
* **Coordinates**: Flattened x, y, z for $\pi$-center and X-atom.
|
|
89
|
+
* **Geometric parameters**: $\angle X\text{--}H \text{--} \vec{n}$, $\angle X\text{--}H \text{--} C_\pi$, $\angle X \text{--} C_\pi \text{--} \vec{n}$, $d_{Xp \text{--} C_\pi}$
|
|
90
|
+
* **B-factors**: Average B-factor for ring atoms and X-atom.
|
|
91
|
+
|
|
92
|
+
## Dependencies
|
|
93
|
+
|
|
94
|
+
* `gemmi`
|
|
95
|
+
* `numpy`
|
|
96
|
+
|
|
97
|
+
-----
|
|
98
|
+
|
|
99
|
+
## Contact
|
|
100
|
+
|
|
101
|
+
**Sean Wang** (sean.wang@york.ac.uk)
|
|
102
|
+
|
|
103
|
+
York Structural Biology Laboratory (YSBL), University of York
|
xpid-0.0.1/README.md
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# Xpid
|
|
2
|
+
|
|
3
|
+
`Xpid` is a Gemmi-based tool designed to detect XH-π interactions in PDB/mmCIF files.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Requires Python 3.9+.
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
git clone https://github.com/SeanWang5868/xpid
|
|
11
|
+
cd xpid
|
|
12
|
+
pip install .
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Configuration
|
|
16
|
+
|
|
17
|
+
The detection of XH-π interactions depends on the position of H atoms. In order to add H to the structure before detecting, the path to the monomer library (e.g. CCP4 monomer library) needs to be specified.
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
xpid --set-mon-lib /Users/abc123/monomers
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
Scans a directory or PDB/mmCIF file and save results into a JSON file.
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
xpid ./data
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
> **Output**: `./data/xpid_output/xpid_results.json`
|
|
32
|
+
|
|
33
|
+
## Geometric Criteria
|
|
34
|
+
|
|
35
|
+
Definitions: $C_\pi$ (Ring Centroid), $\vec{n}$ (Ring Normal), $X$ (Donor Heavy Atom), $Xp$ (The projection of X onto the π plane), $H$ (Hydrogen).
|
|
36
|
+
|
|
37
|
+
### [Hudson System](https://doi.org/10.1021/jacs.5b08424)
|
|
38
|
+
|
|
39
|
+
$d_{X \text{--} C_\pi}$: $\le 4.5$ Å, $\angle X\text{--}H \text{--} \vec{n}$): $\le 40^\circ$. $d_{Xp \text{--} C_\pi}$: $\le 1.6$ Å $\text{for His, Trp-A}$, $\le 2.0$ Å $\text{for Phe, Trp-B, Tyr}$.
|
|
40
|
+
|
|
41
|
+
### [Plevin System](https://doi.org/10.1038/nchem.650)
|
|
42
|
+
|
|
43
|
+
$d_{X \text{--} C_\pi}$: $< 4.3$ Å, $\angle X\text{--}H \text{--} C_\pi$: $> 120^\circ$, $\angle X \text{--} C_\pi \text{--} \vec{n}$): $< 25^\circ$.
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
## Command Options
|
|
47
|
+
|
|
48
|
+
| Argument | Description |
|
|
49
|
+
| :--- | :--- |
|
|
50
|
+
| `inputs` | Input file (`.cif`, `.pdb`) or directory path. |
|
|
51
|
+
| `--out-dir` | Specify custom output directory. |
|
|
52
|
+
| `--separate` | Save results as separate files per PDB (Default: Merge). |
|
|
53
|
+
| `--file-type` | Output format: `json` (default) or `csv`. |
|
|
54
|
+
| `-v`, `--verbose` | Output detailed metrics (angles, coords, B-factors). |
|
|
55
|
+
| `--log` | Enable log file saving. |
|
|
56
|
+
| `--jobs N` | Number of CPU cores to use (Default: 1). |
|
|
57
|
+
| `--h-mode N` | Hydrogen handling mode (0=NoChange, 4=ReAddButWater). |
|
|
58
|
+
| `--model ID` | Model index to analyze (Default: `0`; use `all` for NMR). |
|
|
59
|
+
| `--pi-res` | Limit acceptor residues (e.g., `TRP,TYR`). |
|
|
60
|
+
| `--donor-res` | Limit donor residues (e.g., `HIS,ARG`). |
|
|
61
|
+
| `--donor-atom` | Limit donor element types (e.g., `N,O`). |
|
|
62
|
+
|
|
63
|
+
## Output Data
|
|
64
|
+
|
|
65
|
+
**Simple Mode (Default)**
|
|
66
|
+
|
|
67
|
+
* PDB ID, Resolution
|
|
68
|
+
* Chain, Name, ID for X-donor and $\pi$ Residues.
|
|
69
|
+
* Distance ($d_{X \text{--} C_\pi}$)
|
|
70
|
+
|
|
71
|
+
**Detailed Mode (`-v`)**
|
|
72
|
+
|
|
73
|
+
* **Includes all Simple fields plus:**
|
|
74
|
+
* **Secondary structure**: Type (H/G/I/E/C) and Region IDs.
|
|
75
|
+
* **Coordinates**: Flattened x, y, z for $\pi$-center and X-atom.
|
|
76
|
+
* **Geometric parameters**: $\angle X\text{--}H \text{--} \vec{n}$, $\angle X\text{--}H \text{--} C_\pi$, $\angle X \text{--} C_\pi \text{--} \vec{n}$, $d_{Xp \text{--} C_\pi}$
|
|
77
|
+
* **B-factors**: Average B-factor for ring atoms and X-atom.
|
|
78
|
+
|
|
79
|
+
## Dependencies
|
|
80
|
+
|
|
81
|
+
* `gemmi`
|
|
82
|
+
* `numpy`
|
|
83
|
+
|
|
84
|
+
-----
|
|
85
|
+
|
|
86
|
+
## Contact
|
|
87
|
+
|
|
88
|
+
**Sean Wang** (sean.wang@york.ac.uk)
|
|
89
|
+
|
|
90
|
+
York Structural Biology Laboratory (YSBL), University of York
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "xpid"
|
|
7
|
+
version = "0.0.1"
|
|
8
|
+
description = "XH-/PI interaction detection"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
authors = [{name = "Sean Wang", email = "sean.wannng@gmail.com"}]
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
requires-python = ">=3.8"
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Operating System :: OS Independent",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
# dependencies = ["requests", "numpy>=1.21"]
|
|
20
|
+
|
|
21
|
+
[project.urls]
|
|
22
|
+
Homepage = "https://github.com/SeanWang5868/xpid"
|
|
23
|
+
|
|
24
|
+
[tool.setuptools.packages.find]
|
|
25
|
+
where = ["src"]
|
|
26
|
+
# namespaces = false
|
xpid-0.0.1/setup.cfg
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import gemmi
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import List, Dict, Any, Optional, Union
|
|
4
|
+
|
|
5
|
+
from . import prep
|
|
6
|
+
from . import core
|
|
7
|
+
from . import config
|
|
8
|
+
|
|
9
|
+
def detect(
|
|
10
|
+
file_path: Union[str, Path],
|
|
11
|
+
mon_lib_path: Optional[str] = None,
|
|
12
|
+
h_mode: int = 4,
|
|
13
|
+
filter_pi: Optional[List[str]] = None,
|
|
14
|
+
filter_donor: Optional[List[str]] = None,
|
|
15
|
+
filter_donor_atom: Optional[List[str]] = None
|
|
16
|
+
) -> List[Dict[str, Any]]:
|
|
17
|
+
"""
|
|
18
|
+
API entry point to run analysis on a single file from Python code.
|
|
19
|
+
"""
|
|
20
|
+
path_obj = Path(file_path)
|
|
21
|
+
# Handle filename extraction
|
|
22
|
+
if path_obj.name.count('.') > 1:
|
|
23
|
+
pdb_name = path_obj.name.split('.')[0]
|
|
24
|
+
else:
|
|
25
|
+
pdb_name = path_obj.stem
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
structure = gemmi.read_structure(str(path_obj))
|
|
29
|
+
structure = prep.add_hydrogens_memory(structure, mon_lib_path, h_change_val=h_mode)
|
|
30
|
+
|
|
31
|
+
if not structure:
|
|
32
|
+
return []
|
|
33
|
+
|
|
34
|
+
results = core.detect_interactions_in_structure(
|
|
35
|
+
structure,
|
|
36
|
+
pdb_name,
|
|
37
|
+
filter_pi=filter_pi,
|
|
38
|
+
filter_donor=filter_donor,
|
|
39
|
+
filter_donor_atom=filter_donor_atom
|
|
40
|
+
)
|
|
41
|
+
return results
|
|
42
|
+
|
|
43
|
+
except Exception as e:
|
|
44
|
+
print(f"Analysis error: {e}")
|
|
45
|
+
return []
|
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cli.py
|
|
3
|
+
Command Line Interface for xpid.
|
|
4
|
+
Handles argument parsing, validation, execution flow, and streaming output.
|
|
5
|
+
"""
|
|
6
|
+
import argparse
|
|
7
|
+
import sys
|
|
8
|
+
import re
|
|
9
|
+
import multiprocessing
|
|
10
|
+
import logging
|
|
11
|
+
import gemmi
|
|
12
|
+
import json
|
|
13
|
+
import csv
|
|
14
|
+
import os
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import List, Dict, Any, Tuple, Optional, Set
|
|
17
|
+
|
|
18
|
+
# Ensure package is accessible
|
|
19
|
+
try:
|
|
20
|
+
from xpid import prep, core, config
|
|
21
|
+
except ImportError:
|
|
22
|
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
|
23
|
+
from xpid import prep, core, config
|
|
24
|
+
|
|
25
|
+
# --- Constants ---
|
|
26
|
+
H_MODE_MAP = {
|
|
27
|
+
0: "NoChange", 1: "Shift", 2: "Remove",
|
|
28
|
+
3: "ReAdd", 4: "ReAddButWater", 5: "ReAddKnown"
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
SIMPLE_COLS = [
|
|
32
|
+
'pdb', 'resolution',
|
|
33
|
+
'pi_chain', 'pi_res', 'pi_id',
|
|
34
|
+
'X_chain', 'X_res', 'X_id', 'X_atom', 'H_atom',
|
|
35
|
+
'dist_X_Pi', 'is_plevin', 'is_hudson', 'remark'
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
# --- Logging ---
|
|
39
|
+
logger = logging.getLogger('xpid')
|
|
40
|
+
|
|
41
|
+
def setup_logging(log_file: Path):
|
|
42
|
+
"""Configures logging to file and stdout."""
|
|
43
|
+
if log_file.parent:
|
|
44
|
+
log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
|
|
46
|
+
handlers = [
|
|
47
|
+
logging.StreamHandler(sys.stdout),
|
|
48
|
+
logging.FileHandler(log_file, mode='w', encoding='utf-8')
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
logging.basicConfig(
|
|
52
|
+
level=logging.INFO,
|
|
53
|
+
format='[%(asctime)s] %(message)s',
|
|
54
|
+
datefmt='%H:%M:%S',
|
|
55
|
+
handlers=handlers,
|
|
56
|
+
force=True
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# --- Helper Classes (Streaming) ---
|
|
60
|
+
|
|
61
|
+
class ResultStreamer:
|
|
62
|
+
"""
|
|
63
|
+
Handles streaming output to CSV or JSON files to prevent Memory OOM.
|
|
64
|
+
Writes data incrementally as it becomes available.
|
|
65
|
+
"""
|
|
66
|
+
def __init__(self, output_path: Path, file_type: str, verbose: bool):
|
|
67
|
+
self.output_path = output_path
|
|
68
|
+
self.file_type = file_type.lower()
|
|
69
|
+
self.verbose = verbose
|
|
70
|
+
self.file_handle = None
|
|
71
|
+
self.csv_writer = None
|
|
72
|
+
self.is_first_chunk = True
|
|
73
|
+
self.has_written_data = False
|
|
74
|
+
|
|
75
|
+
def __enter__(self):
|
|
76
|
+
self.output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
self.file_handle = open(self.output_path, 'w', newline='', encoding='utf-8')
|
|
78
|
+
|
|
79
|
+
if self.file_type == 'json':
|
|
80
|
+
self.file_handle.write('[\n')
|
|
81
|
+
|
|
82
|
+
return self
|
|
83
|
+
|
|
84
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
85
|
+
if self.file_type == 'json':
|
|
86
|
+
self.file_handle.write('\n]')
|
|
87
|
+
|
|
88
|
+
if self.file_handle:
|
|
89
|
+
self.file_handle.close()
|
|
90
|
+
|
|
91
|
+
def _get_fieldnames(self, first_record: Dict[str, Any]) -> List[str]:
|
|
92
|
+
if self.verbose:
|
|
93
|
+
return list(first_record.keys())
|
|
94
|
+
else:
|
|
95
|
+
keys = list(first_record.keys())
|
|
96
|
+
return [k for k in SIMPLE_COLS if k in keys]
|
|
97
|
+
|
|
98
|
+
def write_chunk(self, results: List[Dict[str, Any]]):
|
|
99
|
+
if not results:
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
self.has_written_data = True
|
|
103
|
+
|
|
104
|
+
if self.file_type == 'csv':
|
|
105
|
+
if self.is_first_chunk:
|
|
106
|
+
fieldnames = self._get_fieldnames(results[0])
|
|
107
|
+
self.csv_writer = csv.DictWriter(self.file_handle, fieldnames=fieldnames, extrasaction='ignore')
|
|
108
|
+
self.csv_writer.writeheader()
|
|
109
|
+
self.is_first_chunk = False
|
|
110
|
+
|
|
111
|
+
self.csv_writer.writerows(results)
|
|
112
|
+
|
|
113
|
+
elif self.file_type == 'json':
|
|
114
|
+
keys_to_keep = None
|
|
115
|
+
if not self.verbose and self.is_first_chunk:
|
|
116
|
+
sample_keys = results[0].keys()
|
|
117
|
+
keys_to_keep = set([k for k in SIMPLE_COLS if k in sample_keys])
|
|
118
|
+
|
|
119
|
+
for item in results:
|
|
120
|
+
if not self.verbose:
|
|
121
|
+
if keys_to_keep is None:
|
|
122
|
+
sample_keys = item.keys()
|
|
123
|
+
keys_to_keep = set([k for k in SIMPLE_COLS if k in sample_keys])
|
|
124
|
+
clean_item = {k: v for k, v in item.items() if k in keys_to_keep}
|
|
125
|
+
else:
|
|
126
|
+
clean_item = item
|
|
127
|
+
|
|
128
|
+
if not self.is_first_chunk:
|
|
129
|
+
self.file_handle.write(',\n')
|
|
130
|
+
else:
|
|
131
|
+
self.is_first_chunk = False
|
|
132
|
+
|
|
133
|
+
json.dump(clean_item, self.file_handle, indent=2)
|
|
134
|
+
|
|
135
|
+
# --- Helpers ---
|
|
136
|
+
|
|
137
|
+
def find_files(inputs: List[str]) -> List[Path]:
|
|
138
|
+
file_list = set()
|
|
139
|
+
pattern = re.compile(r'^[a-zA-Z0-9]{4}\.(cif|pdb)(\.gz)?$', re.IGNORECASE)
|
|
140
|
+
for inp in inputs:
|
|
141
|
+
path = Path(inp)
|
|
142
|
+
if path.is_file():
|
|
143
|
+
file_list.add(path.resolve())
|
|
144
|
+
elif path.is_dir():
|
|
145
|
+
for p in path.rglob("*"):
|
|
146
|
+
if p.is_file() and pattern.match(p.name):
|
|
147
|
+
file_list.add(p.resolve())
|
|
148
|
+
return sorted(list(file_list))
|
|
149
|
+
|
|
150
|
+
def save_single_file_results(results: List[Dict[str, Any]], output_path: Path, file_type: str, verbose: bool) -> None:
|
|
151
|
+
"""Helper for saving individual files in separate mode (non-streaming context)."""
|
|
152
|
+
with ResultStreamer(output_path, file_type, verbose) as streamer:
|
|
153
|
+
streamer.write_chunk(results)
|
|
154
|
+
|
|
155
|
+
def process_one_file(args_packet: Tuple) -> Tuple[Optional[str], int, Optional[List[Dict[str, Any]]], Optional[str]]:
|
|
156
|
+
filepath, mon_lib, ftype, hmode, output_dir, separate_mode, filters, verbose, model_mode = args_packet
|
|
157
|
+
|
|
158
|
+
if re.match(r'^[a-zA-Z0-9]{4}\.', filepath.name):
|
|
159
|
+
pdb_name = filepath.name[:4]
|
|
160
|
+
else:
|
|
161
|
+
pdb_name = filepath.stem.replace('.cif', '').replace('.pdb', '')
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
try:
|
|
165
|
+
structure = gemmi.read_structure(str(filepath))
|
|
166
|
+
except Exception as e:
|
|
167
|
+
return (f"Read Error ({pdb_name}): {e}", 0, None, None)
|
|
168
|
+
|
|
169
|
+
structure = prep.add_hydrogens_memory(structure, mon_lib, h_change_val=hmode)
|
|
170
|
+
if not structure:
|
|
171
|
+
return (f"AddH Failed ({pdb_name})", 0, None, None)
|
|
172
|
+
|
|
173
|
+
results = core.detect_interactions_in_structure(
|
|
174
|
+
structure,
|
|
175
|
+
pdb_name,
|
|
176
|
+
filter_pi=filters['pi'],
|
|
177
|
+
filter_donor=filters['donor'],
|
|
178
|
+
filter_donor_atom=filters['donor_atom'],
|
|
179
|
+
model_mode=model_mode
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
count = len(results)
|
|
183
|
+
|
|
184
|
+
if count > 0:
|
|
185
|
+
if separate_mode:
|
|
186
|
+
filename = f"{pdb_name}_xpid.{ftype}"
|
|
187
|
+
out_path = Path(output_dir) / filename
|
|
188
|
+
save_single_file_results(results, out_path, ftype, verbose)
|
|
189
|
+
return (None, count, None, str(out_path.parent))
|
|
190
|
+
else:
|
|
191
|
+
return (None, count, results, None)
|
|
192
|
+
else:
|
|
193
|
+
return (None, 0, None, None)
|
|
194
|
+
|
|
195
|
+
except Exception as e:
|
|
196
|
+
return (f"Critical Error ({pdb_name}): {e}", 0, None, None)
|
|
197
|
+
|
|
198
|
+
def get_parser():
|
|
199
|
+
h_mode_help = (
|
|
200
|
+
"Hydrogen handling mode (Default: 4):\n"
|
|
201
|
+
" 0: NoChange - Keep existing H (Recommended for Neutron)\n"
|
|
202
|
+
" 1: Shift - Move H to standard distances\n"
|
|
203
|
+
" 2: Remove - Remove all H\n"
|
|
204
|
+
" 3: ReAdd - Remove and Re-add all H\n"
|
|
205
|
+
" 4: ReAddButWater - Re-add all H, skip waters (Standard)\n"
|
|
206
|
+
" 5: ReAddKnown - Only add H to known residues"
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
parser = argparse.ArgumentParser(
|
|
210
|
+
prog="xpid",
|
|
211
|
+
description="xpid: XH-/pi interaction detector.",
|
|
212
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
213
|
+
usage="xpid [options] <input>" # [修改点] 简洁的基础用法提示
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
parser.add_argument('inputs', nargs='*', metavar='PATH', help="Input file(s) or directory.")
|
|
217
|
+
|
|
218
|
+
io_group = parser.add_argument_group("Input/Output Options")
|
|
219
|
+
io_group.add_argument('--separate', action='store_true',
|
|
220
|
+
help="Save separate result files for each PDB (default: merge into one file).")
|
|
221
|
+
io_group.add_argument('--out-dir', type=str, metavar='DIR',
|
|
222
|
+
help="Override default output directory.")
|
|
223
|
+
io_group.add_argument('--output-name', type=str, default='xpid_results', metavar='NAME',
|
|
224
|
+
help="Base name for the merged result file (default: 'xpid_results').")
|
|
225
|
+
io_group.add_argument('--file-type', default='json', choices=['json', 'csv'],
|
|
226
|
+
help="Output format. Default: json.")
|
|
227
|
+
io_group.add_argument('-v', '--verbose', action='store_true', help="Output detailed metrics.")
|
|
228
|
+
io_group.add_argument('--log', action='store_true', help="Save log file.")
|
|
229
|
+
|
|
230
|
+
set_group = parser.add_argument_group("Configuration")
|
|
231
|
+
set_group.add_argument('--h-mode', type=int, default=4, choices=[0,1,2,3,4,5], metavar='N', help=h_mode_help)
|
|
232
|
+
set_group.add_argument('--jobs', type=int, default=1, metavar='N', help="CPU cores.")
|
|
233
|
+
set_group.add_argument('--model', type=str, default="0", metavar='ID', help="Model index (0,1...) or 'all'. Default: 0.")
|
|
234
|
+
set_group.add_argument('--mon-lib', type=str, metavar='DIR', help="Custom Monomer Library path.")
|
|
235
|
+
set_group.add_argument('--set-mon-lib', type=str, metavar='DIR', help="Set default Monomer Library.")
|
|
236
|
+
|
|
237
|
+
filt_group = parser.add_argument_group("Filters")
|
|
238
|
+
filt_group.add_argument('--pi-res', type=str, help="Limit Pi residues (TRP,TYR).")
|
|
239
|
+
filt_group.add_argument('--donor-res', type=str, help="Limit Donor residues (HIS,ARG).")
|
|
240
|
+
filt_group.add_argument('--donor-atom', type=str, help="Limit Donor elements (C,N,O,S).")
|
|
241
|
+
|
|
242
|
+
return parser
|
|
243
|
+
|
|
244
|
+
def main():
|
|
245
|
+
parser = get_parser()
|
|
246
|
+
args = parser.parse_args()
|
|
247
|
+
|
|
248
|
+
# 1. Config Mode
|
|
249
|
+
if args.set_mon_lib:
|
|
250
|
+
if os.path.isdir(args.set_mon_lib):
|
|
251
|
+
path = config.save_mon_lib_path(args.set_mon_lib)
|
|
252
|
+
print(f"Configuration saved: {path}")
|
|
253
|
+
sys.exit(0)
|
|
254
|
+
else:
|
|
255
|
+
print(f"Error: Invalid path {args.set_mon_lib}")
|
|
256
|
+
sys.exit(1)
|
|
257
|
+
|
|
258
|
+
if not args.inputs:
|
|
259
|
+
parser.print_help()
|
|
260
|
+
sys.exit(0)
|
|
261
|
+
|
|
262
|
+
# 2. File Finding
|
|
263
|
+
files = find_files(args.inputs)
|
|
264
|
+
if not files:
|
|
265
|
+
print("ERROR: No valid structure files found.")
|
|
266
|
+
sys.exit(1)
|
|
267
|
+
|
|
268
|
+
# 3. Output Dir & Log Setup
|
|
269
|
+
if args.out_dir:
|
|
270
|
+
output_dir = Path(args.out_dir)
|
|
271
|
+
else:
|
|
272
|
+
output_dir = files[0].parent / "xpid_output"
|
|
273
|
+
|
|
274
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
275
|
+
|
|
276
|
+
log_file = output_dir / "xpid_run.log"
|
|
277
|
+
if args.log:
|
|
278
|
+
setup_logging(log_file)
|
|
279
|
+
else:
|
|
280
|
+
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%H:%M:%S', force=True)
|
|
281
|
+
|
|
282
|
+
logger.info("Scanning files...")
|
|
283
|
+
|
|
284
|
+
# 4. Validation
|
|
285
|
+
mon_lib_path = args.mon_lib if args.mon_lib else config.DEFAULT_MON_LIB_PATH
|
|
286
|
+
if mon_lib_path and not os.path.isdir(mon_lib_path):
|
|
287
|
+
logger.error(f"Monomer Library path not found: {mon_lib_path}")
|
|
288
|
+
sys.exit(1)
|
|
289
|
+
|
|
290
|
+
# 5. Filters
|
|
291
|
+
filters = {
|
|
292
|
+
'pi': [x.strip().upper() for x in args.pi_res.split(',')] if args.pi_res else None,
|
|
293
|
+
'donor': [x.strip().upper() for x in args.donor_res.split(',')] if args.donor_res else None,
|
|
294
|
+
'donor_atom': None
|
|
295
|
+
}
|
|
296
|
+
if args.donor_atom:
|
|
297
|
+
valid_elements = {'C', 'N', 'O', 'S'}
|
|
298
|
+
inputs = [x.strip().upper() for x in args.donor_atom.split(',')]
|
|
299
|
+
for inp in inputs:
|
|
300
|
+
if inp not in valid_elements:
|
|
301
|
+
logger.error(f"Invalid donor element: '{inp}'. Allowed: C, N, O, S")
|
|
302
|
+
sys.exit(1)
|
|
303
|
+
filters['donor_atom'] = inputs
|
|
304
|
+
|
|
305
|
+
# 6. Status Log
|
|
306
|
+
separate_mode = args.separate
|
|
307
|
+
ftype = args.file_type.lower()
|
|
308
|
+
|
|
309
|
+
h_mode_desc = H_MODE_MAP.get(args.h_mode, "Unknown")
|
|
310
|
+
model_desc = "First Model" if args.model == "0" else ("All Models" if args.model == "all" else "Specific Index")
|
|
311
|
+
output_mode_desc = "Detailed" if args.verbose else "Simple"
|
|
312
|
+
|
|
313
|
+
logger.info("--- Xpid Initialization ---")
|
|
314
|
+
logger.info(f"Targets : {len(files)} files")
|
|
315
|
+
logger.info(f"Output Dir : {output_dir.resolve()}")
|
|
316
|
+
logger.info(f"Format : {ftype.upper()} ({'Separate Files' if separate_mode else 'Merged File'})")
|
|
317
|
+
logger.info(f"H-Mode : {args.h_mode} ({h_mode_desc})")
|
|
318
|
+
logger.info(f"Model : {args.model} ({model_desc})")
|
|
319
|
+
logger.info(f"Filters : {filters if any(filters.values()) else 'None'}")
|
|
320
|
+
logger.info(f"Columns : {output_mode_desc}")
|
|
321
|
+
|
|
322
|
+
# 7. Execution
|
|
323
|
+
tasks = [(f, mon_lib_path, ftype, args.h_mode, str(output_dir), separate_mode, filters, args.verbose, args.model) for f in files]
|
|
324
|
+
|
|
325
|
+
error_logs = []
|
|
326
|
+
total_found = 0
|
|
327
|
+
|
|
328
|
+
try:
|
|
329
|
+
merge_file_path = None
|
|
330
|
+
streamer = None
|
|
331
|
+
|
|
332
|
+
if not separate_mode:
|
|
333
|
+
merge_filename = f"{args.output_name}.{ftype}"
|
|
334
|
+
merge_file_path = output_dir / merge_filename
|
|
335
|
+
streamer = ResultStreamer(merge_file_path, ftype, args.verbose)
|
|
336
|
+
streamer.__enter__()
|
|
337
|
+
|
|
338
|
+
with multiprocessing.Pool(args.jobs, maxtasksperchild=100) as pool:
|
|
339
|
+
for i, (err, count, data, out_path) in enumerate(pool.imap_unordered(process_one_file, tasks), 1):
|
|
340
|
+
if err:
|
|
341
|
+
error_logs.append(err)
|
|
342
|
+
logger.warning(err)
|
|
343
|
+
|
|
344
|
+
total_found += count
|
|
345
|
+
|
|
346
|
+
if not separate_mode and data:
|
|
347
|
+
streamer.write_chunk(data)
|
|
348
|
+
|
|
349
|
+
msg = f"[INFO] Progress : {i}/{len(files)} files processed..."
|
|
350
|
+
sys.stdout.write(f"\r{msg}")
|
|
351
|
+
sys.stdout.flush()
|
|
352
|
+
|
|
353
|
+
if streamer:
|
|
354
|
+
streamer.__exit__(None, None, None)
|
|
355
|
+
|
|
356
|
+
print("")
|
|
357
|
+
|
|
358
|
+
# 8. Summary
|
|
359
|
+
print("-" * 60)
|
|
360
|
+
print(f"[SUMMARY] Total XH-pi interactions detected: {total_found}")
|
|
361
|
+
|
|
362
|
+
if error_logs:
|
|
363
|
+
print(f"[WARNING] {len(error_logs)} files failed processing. See log for details.")
|
|
364
|
+
|
|
365
|
+
if total_found > 0:
|
|
366
|
+
if not separate_mode:
|
|
367
|
+
print(f"[OUTPUT] Merged result saved to:\n -> {merge_file_path.resolve()}")
|
|
368
|
+
else:
|
|
369
|
+
print(f"[OUTPUT] Separate result files saved in:\n -> {output_dir.resolve()}")
|
|
370
|
+
|
|
371
|
+
if args.log:
|
|
372
|
+
print(f"[OUTPUT] Log saved to:\n -> {log_file.resolve()}")
|
|
373
|
+
else:
|
|
374
|
+
print("[OUTPUT] No interactions found.")
|
|
375
|
+
|
|
376
|
+
except KeyboardInterrupt:
|
|
377
|
+
print("\n[Aborted by user]")
|
|
378
|
+
sys.exit(1)
|
|
379
|
+
except Exception as e:
|
|
380
|
+
logger.error(f"Unexpected error: {e}")
|
|
381
|
+
sys.exit(1)
|
|
382
|
+
|
|
383
|
+
if __name__ == "__main__":
|
|
384
|
+
multiprocessing.freeze_support()
|
|
385
|
+
main()
|