streamd 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. streamd-0.2.4/LICENSE.txt +21 -0
  2. streamd-0.2.4/MANIFEST.in +2 -0
  3. streamd-0.2.4/PKG-INFO +484 -0
  4. streamd-0.2.4/README.md +468 -0
  5. streamd-0.2.4/setup.cfg +7 -0
  6. streamd-0.2.4/setup.py +35 -0
  7. streamd-0.2.4/streamd/__init__.py +1 -0
  8. streamd-0.2.4/streamd/example/instructions.txt +4 -0
  9. streamd-0.2.4/streamd/example/ligand.mol +95 -0
  10. streamd-0.2.4/streamd/example/protein_HIS.pdb +4875 -0
  11. streamd-0.2.4/streamd/mcpbpy_md/__init__.py +0 -0
  12. streamd-0.2.4/streamd/mcpbpy_md/mcbpy_md.py +154 -0
  13. streamd-0.2.4/streamd/md_analysis.py +74 -0
  14. streamd-0.2.4/streamd/preparation/__init__.py +0 -0
  15. streamd-0.2.4/streamd/preparation/complex_preparation.py +89 -0
  16. streamd-0.2.4/streamd/preparation/ligand_preparation.py +305 -0
  17. streamd-0.2.4/streamd/preparation/mcpbpy_preparation.py +294 -0
  18. streamd-0.2.4/streamd/preparation/md_files_preparation.py +191 -0
  19. streamd-0.2.4/streamd/prolif/__init__.py +0 -0
  20. streamd-0.2.4/streamd/prolif/prolif2png.py +102 -0
  21. streamd-0.2.4/streamd/prolif/prolif_frame_map.py +87 -0
  22. streamd-0.2.4/streamd/prolif/run_prolif.py +244 -0
  23. streamd-0.2.4/streamd/run_gbsa.py +395 -0
  24. streamd-0.2.4/streamd/run_md.py +539 -0
  25. streamd-0.2.4/streamd/scripts/__init__.py +0 -0
  26. streamd-0.2.4/streamd/scripts/com/1.com +5 -0
  27. streamd-0.2.4/streamd/scripts/com/2.com +5 -0
  28. streamd-0.2.4/streamd/scripts/com/3.com +6 -0
  29. streamd-0.2.4/streamd/scripts/gbsa/mmpbsa.in +21 -0
  30. streamd-0.2.4/streamd/scripts/getcharge.py +30 -0
  31. streamd-0.2.4/streamd/scripts/mcpbpy_scripts/protein.in +11 -0
  32. streamd-0.2.4/streamd/scripts/mdp/ions.mdp +14 -0
  33. streamd-0.2.4/streamd/scripts/mdp/md.mdp +46 -0
  34. streamd-0.2.4/streamd/scripts/mdp/minim.mdp +14 -0
  35. streamd-0.2.4/streamd/scripts/mdp/npt.mdp +44 -0
  36. streamd-0.2.4/streamd/scripts/mdp/nvt.mdp +41 -0
  37. streamd-0.2.4/streamd/scripts/mol2_fix_coordsbonds.py +113 -0
  38. streamd-0.2.4/streamd/scripts/pdb2mol.py +84 -0
  39. streamd-0.2.4/streamd/scripts/pmed_amb2gmx.py +22 -0
  40. streamd-0.2.4/streamd/scripts/prepare_Gaussian_input.py +46 -0
  41. streamd-0.2.4/streamd/scripts/script_sh/continue_md.sh +14 -0
  42. streamd-0.2.4/streamd/scripts/script_sh/equlibration.sh +30 -0
  43. streamd-0.2.4/streamd/scripts/script_sh/ligand_mol2prep.sh +4 -0
  44. streamd-0.2.4/streamd/scripts/script_sh/ligand_mol2prep_by_gaussian.sh +32 -0
  45. streamd-0.2.4/streamd/scripts/script_sh/ligand_prep.sh +14 -0
  46. streamd-0.2.4/streamd/scripts/script_sh/md.sh +8 -0
  47. streamd-0.2.4/streamd/scripts/script_sh/md_analysis.sh +21 -0
  48. streamd-0.2.4/streamd/scripts/script_sh/md_ligand_analysis.sh +9 -0
  49. streamd-0.2.4/streamd/scripts/script_sh/solv_ions.sh +18 -0
  50. streamd-0.2.4/streamd/scripts/tleap.in +7 -0
  51. streamd-0.2.4/streamd/scripts/xvg2png.py +73 -0
  52. streamd-0.2.4/streamd/utils/__init__.py +0 -0
  53. streamd-0.2.4/streamd/utils/dask_init.py +75 -0
  54. streamd-0.2.4/streamd/utils/utils.py +79 -0
  55. streamd-0.2.4/streamd.egg-info/PKG-INFO +484 -0
  56. streamd-0.2.4/streamd.egg-info/SOURCES.txt +59 -0
  57. streamd-0.2.4/streamd.egg-info/dependency_links.txt +1 -0
  58. streamd-0.2.4/streamd.egg-info/entry_points.txt +6 -0
  59. streamd-0.2.4/streamd.egg-info/requires.txt +3 -0
  60. streamd-0.2.4/streamd.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Aleksandra Ivanova, Olena Mokshyna, Pavel Polishchuk
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,2 @@
1
+ graft streamd/scripts
2
+ graft streamd/example
streamd-0.2.4/PKG-INFO ADDED
@@ -0,0 +1,484 @@
1
+ Metadata-Version: 2.1
2
+ Name: streamd
3
+ Version: 0.2.4
4
+ Summary: Streamd Python module to facilitate molecular dynamics
5
+ Home-page: https://github.com/ci-lab-cz/streamd
6
+ Author: Aleksandra Ivanova, Olena Mokshyna, Pavel Polishchuk
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Operating System :: POSIX :: Linux
9
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
10
+ Classifier: Topic :: Scientific/Engineering :: Chemistry
11
+ Requires-Python: >=3.6
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE.txt
14
+ Provides-Extra: rdkit
15
+ Requires-Dist: rdkit>=2017.09; extra == "rdkit"
16
+
17
+ # StreaMD: a tool to perform high-throughput automated molecular dynamics simulations
18
+
19
+ ## installation
20
+ *Source: https://valdes-tresanco-ms.github.io/gmx_MMPBSA/installation/*
21
+
22
+ [env.yml](https://github.com/ci-lab-cz/streamd/blob/master/env.yml)
23
+ ```
24
+ conda env create -n md --file env.yml
25
+
26
+ ```
27
+
28
+ ## **Description**
29
+ #### **Fully automatic pipeline for molecular dynamics**
30
+
31
+ #### Features:
32
+ - supports run of multiple simultaneous molecular dynamics simulations
33
+ - supports simulation for different systems:
34
+ - Protein in Water;
35
+ - Protein - Ligand;
36
+ - Protein - Cofactor (multiple);
37
+ - Protein - Ligand - Cofactor (multiple);
38
+
39
+ - supports of simulations of boron-containing molecules using Gaussian Software
40
+ - supports of simulations of ligand binding metalloproteins with MCPB.py
41
+ - supports distributed computing using dask library
42
+ - supports of running of parallel simulations on multiple servers
43
+ - supports to extend time of MD simulations
44
+ - supports to continue of interrupted MD simulation
45
+ - interrupted MD preparation can be restarted by invoking the same command
46
+ - implemented tools for end-state free energy calculations (gmx_MMPBSA) and Protein-Ligand Interaction analysis (ProLIF)
47
+
48
+ ### **USAGE**
49
+ ```
50
+ run_md -h
51
+ usage: run_md [-h] [-p FILENAME] [-d WDIR] [-l FILENAME] [--cofactor FILENAME] [--clean_previous_md] [--hostfile FILENAME] [-c INTEGER] [--topol topol.top]
52
+ [--topol_itp topol_chainA.itp topol_chainB.itp [topol_chainA.itp topol_chainB.itp ...]] [--posre posre.itp [posre.itp ...]]
53
+ [--protein_forcefield amber99sb-ildn] [--md_time ns] [--npt_time ps] [--nvt_time ps] [--seed int] [--not_clean_log_files] [--steps [STEPS ...]]
54
+ [--wdir_to_continue DIRNAME [DIRNAME ...]] [--deffnm preffix for md files] [--tpr FILENAME] [--cpt FILENAME] [--xtc FILENAME]
55
+ [--ligand_list_file all_ligand_resid.txt] [--ligand_id UNL] [--activate_gaussian module load Gaussian/09-d01]
56
+ [--gaussian_exe g09 or /apps/all/Gaussian/09-d01/g09/g09] [--gaussian_basis B3LYP/6-31G*] [--gaussian_memory 120GB] [--metal_resnames [MN ...]]
57
+ [--metal_cutoff 2.8] [--metal_charges {MN:2, ZN:2, CA:2}]
58
+
59
+ Run or continue MD simulation. Allowed systems: Protein, Protein-Ligand, Protein-Cofactors(multiple), Protein-Ligand-Cofactors(multiple)
60
+
61
+ options:
62
+ -h, --help show this help message and exit
63
+
64
+ Standard Molecular Dynamics Simulation Run:
65
+ -p FILENAME, --protein FILENAME
66
+ input file of protein. Supported formats: *.pdb or gro
67
+ -d WDIR, --wdir WDIR Working directory. If not set the current directory will be used.
68
+ -l FILENAME, --ligand FILENAME
69
+ input file with compound(s). Supported formats: *.mol or sdf
70
+ --cofactor FILENAME input file with compound(s). Supported formats: *.mol or sdf
71
+ --clean_previous_md remove a production MD simulation directory if it exists to re-initialize production MD setup
72
+ --hostfile FILENAME text file with addresses of nodes of dask SSH cluster. The most typical, it can be passed as $PBS_NODEFILE variable from inside a PBS script.
73
+ The first line in this file will be the address of the scheduler running on the standard port 8786. If omitted, calculations will run on a
74
+ single machine as usual.
75
+ -c INTEGER, --ncpu INTEGER
76
+ number of CPU per server. Use all cpus by default.
77
+ --topol topol.top topology file (required if a gro-file is provided for the protein).All output files obtained from gmx2pdb should preserve the original names
78
+ --topol_itp topol_chainA.itp topol_chainB.itp [topol_chainA.itp topol_chainB.itp ...]
79
+ Itp files for individual protein chains (required if a gro-file is provided for the protein).All output files obtained from gmx2pdb should
80
+ preserve the original names
81
+ --posre posre.itp [posre.itp ...]
82
+ posre file(s) (required if a gro-file is provided for the protein).All output files obtained from gmx2pdb should preserve the original names
83
+ --protein_forcefield amber99sb-ildn
84
+ Force Field for protein preparation
85
+ --md_time ns time of MD simulation in ns
86
+ --npt_time ps time of NPT equilibration in ps
87
+ --nvt_time ps time of NVT equilibration in ps
88
+ --seed int seed
89
+ --not_clean_log_files
90
+ Not to remove all backups of md files
91
+ --steps [STEPS ...] Run a particular step(s) of the StreaMD run. Options:
92
+ 1 - run preparation step (protein, ligand, cofactor preparation)
93
+ 2 - run MD equilibration step (minimization, NVT, NPT)
94
+ 3 - run MD simulation
95
+ 4 - run MD analysis
96
+ Ex: 3 4.
97
+ If 2 or 3 or 4 step(s) are used --wdir_to_continue argument should be
98
+ used to provide directories with files obtained during the previous steps
99
+ --wdir_to_continue DIRNAME [DIRNAME ...]
100
+ single or multiple directories contain simulations created by the tool.
101
+ Use with steps 2,3,4 to continue the run. Should consist of: tpr, cpt,
102
+ xtc and all_ligand_resid.txt files. File all_ligand_resid.txt is optional and used to run md analysis for the ligands. If you want to continue
103
+ your own simulation not created by the tool use --tpr, --cpt, --xtc and --wdir or arguments (--ligand_list_file is optional and required to
104
+ run md analysis after simulation )
105
+
106
+ Continue or Extend Molecular Dynamics Simulation:
107
+ --deffnm preffix for md files
108
+ Used to run, extend or continue the simulation.
109
+ If --wdir_to_continue is used files as deffnm.tpr, deffnm.cpt, deffnm.xtc will be searched from --wdir_to_continue directories
110
+ --tpr FILENAME use explicit tpr to continue a non-StreaMD simulation
111
+ --cpt FILENAME use explicit cpt to continue a non-StreaMD simulation
112
+ --xtc FILENAME use explicit xtc to continue a non-StreaMD simulation
113
+ --ligand_list_file all_ligand_resid.txt
114
+ If you want automatic md analysis for ligands was run after continue of non-StreaMD simulation you should set ligand_list file. Format of the file (no
115
+ headers): user_ligand_id gromacs_ligand_id. Example: my_ligand UNL. Can be set up or placed into --wdir_to_continue directory(ies)
116
+ --ligand_id UNL If you want to run an automatic md analysis for a ligand after continue of simulation you can set ligand_id if it is not UNL as a default value
117
+
118
+ Boron-containing molecules or MCPBPY usage (use together with Standard Molecular Dynamics Simulation Run arguments group):
119
+ --activate_gaussian module load Gaussian/09-d01
120
+ string that load gaussian module if necessary
121
+ --gaussian_exe g09 or /apps/all/Gaussian/09-d01/g09/g09
122
+ path to gaussian executable or alias. Requred to run preparation of boron-containing compounds.
123
+ --gaussian_basis B3LYP/6-31G*
124
+ Gaussian Basis
125
+ --gaussian_memory 120GB
126
+ Gaussian Memory Usage
127
+
128
+ MCPBPY usage (use together with Standard Molecular Dynamics Simulation Run and Boron-containing molecules arguments group):
129
+ --metal_resnames [MN ...]
130
+ Metal residue names to run MCPB.py procedure. Start MCPBPY procedure only if gaussian_exe and activate_gaussian arguments are set up,Otherwise
131
+ standard gmx2pdb procedure will be run.
132
+ --metal_cutoff 2.8 Metal residue cutoff to run MCPB.py procedure
133
+ --metal_charges {MN:2, ZN:2, CA:2}
134
+ Metal residue charges in dictionary formatStart MCPBPY procedure only if metal_resnames and gaussian_exe and activate_gaussian arguments are
135
+ set up,Otherwise standard gmx2pdb procedure will be run.
136
+
137
+ ```
138
+
139
+ ### **Examples**
140
+ Before run MD simulation it is important to prepare protein by yourself to make sure you simulate correct system.
141
+ #### Example of preparation steps before MD:
142
+ #### 1) Target Preparation:
143
+ *Manual preparation:*
144
+ - **Fill missing residues and loops**
145
+
146
+ *Using Chimera:*
147
+
148
+ ``Tools -> Sequence -> Structure -> Modeller (loops/refinement)``
149
+ ``Tools -> Structure Editing -> Dock Prep ``
150
+ * **Explicit water molecules as well as cofactors from a crystal structure can be removed, or if necessarily retained manually;**
151
+
152
+ * **Remove co-crystallizated ligands;**
153
+
154
+ * **Add hydrogens based on protonation states.**
155
+ * Check states of histidines and put proper aliases HIE, HID or HIP instead of HIS (otherwise protonation can be distorted during MD preparation stage)
156
+
157
+ *type into Chimera cmd:*
158
+ ```
159
+ setattr r type HID :HIS@HD1,DD1,TD1,HND
160
+ setattr r type HIP :HID@HE2,DE2,TE2
161
+ setattr r type HIE :HIS@HE2
162
+ ```
163
+
164
+ #### 2) Docking procedure
165
+ Required to obtain relevant poses of the ligand if needed
166
+ * **Perform docking procedure**
167
+ https://github.com/ci-lab-cz/easydock
168
+
169
+ ### Run molecular dynamics simulation
170
+ ``` source activate md ```
171
+
172
+ **Run simulation for different sytems:**
173
+ - Protein in Water
174
+ ```
175
+ run_md -p protein_H_HIS.pdb --md_time 0.1 --nvt_time 100 --npt_time 100 --ncpu 128
176
+ ```
177
+
178
+ - Protein - Ligand
179
+ ```
180
+ run_md -p protein_H_HIS.pdb -l ligand.mol --md_time 0.1 --nvt_time 100 --npt_time 100 --ncpu 128
181
+ ```
182
+
183
+ - Protein - Cofactor
184
+ All molecules should present in simulated system, so any problem with preparation of cofactors will interrupt the program.
185
+ ```
186
+ run_md -p protein_H_HIS.pdb --cofactor cofactors.sdf --md_time 0.1 --nvt_time 100 --npt_time 100 --ncpu 128
187
+
188
+ ```
189
+
190
+ **To run simulations with boron-containing compounds**
191
+ *Gaussian Software* should be available.
192
+ Gaussian optimization and charge calculation will be run only for molecules with boron atoms, other molecules will be processed by regular procedure by Antechamber.
193
+ If Gaussian cannot be load boron-containing molecules will be skipped.
194
+ Any --ligand or --cofactor files can consist of boron-containing compounds
195
+ ```
196
+ run_md -p protein_H_HIS.pdb -l molecules.sdf --cofactor cofactors.sdf --md_time 0.1 --npt_time 10 --nvt_time 10 --activate_gaussian "module load Gaussian/09-d01" --gaussian_exe g09 --ncpu 128
197
+
198
+ ```
199
+
200
+ **To run simulations with MCPBPY using parametrization procedure**
201
+ *Gaussian Software* should be available.
202
+ ```
203
+ run_md -p protein_H_HIS.pdb -l molecules.sdf --cofactor cofactors.sdf --md_time 0.1 --npt_time 10 --nvt_time 10 --activate_gaussian "module load Gaussian/09-d01" --gaussian_exe g09 --ncpu 128 --metal_resnames ZN
204
+
205
+ ```
206
+ **To run simulations using multiple servers**
207
+ ```
208
+ PBS:
209
+ run_md -p protein_H_HIS.pdb -l molecules.sdf --cofactor cofactors.sdf --md_time 0.1 --npt_time 10 --nvt_time 10 --hostfile $PBS_NODEFILE --ncpu 128
210
+
211
+ SLURM:
212
+ srun hostname | sort | uniq > hostfile
213
+ run_md -p protein_H_HIS.pdb -l molecules.sdf --cofactor cofactors.sdf --md_time 0.1 --npt_time 10 --nvt_time 10 --hostfile hostfile --ncpu 128
214
+
215
+ ```
216
+
217
+ **To extend the simulation**
218
+ you can continue your simulation unlimited times. As the --md_time argument user should set up the overall time of the simulation
219
+ ```
220
+ run_md --wdir_to_continue md_files/md_run/protein_H_HIS_ligand_*/ --md_time 0.2
221
+ ```
222
+ or use explicit tpr, cpt and xtc arguments to continue a non-StreaMD simulation
223
+ ```
224
+ run_md --wdir_to_continue md_files/md_run/protein_H_HIS_ligand_1/ --md_time 0.3 --tpr protein_H_HIS_ligand_1/md_out.tpr --cpt protein_H_HIS_ligand_1/md_out.cpt --xtc protein_H_HIS_ligand_1/md_out.xtc
225
+ ```
226
+ in case you don't want to check/run all preparation steps with using non-StreaMD simulations you can use --steps argument
227
+ ```
228
+ run_md --wdir_to_continue md_files/md_run/protein_H_HIS_ligand_1/ --md_time 0.3 --steps 3 4
229
+ ```
230
+
231
+ **Output**
232
+ *each run creates in the working directory (or in the current directory if wdir argument was not set up):*
233
+ 1) a unique streaMD log file which name contains name of the protein, ligand file, cofactor file and time of run.
234
+ log_*protein-fname*\_*ligand-fname*\_*cofactor-fname*\_*start-time*.log
235
+ Contains important information/warnings/errors about the main program run.
236
+ 2) a unique bash log file.
237
+ streamd_bash_*protein-fname*\_*ligand-fname*\_*cofactor-fname*\_*start-time*.log
238
+ Contains stdout from Gromacs and Antechamber.
239
+
240
+ will be created the next folders:
241
+ ```
242
+ md_files/
243
+ - md_preparation/
244
+ -- protein/
245
+ -- ligands/
246
+ -- cofactors/
247
+ - md_run/
248
+ -- protein-id_ligand-id
249
+ ```
250
+
251
+ ```
252
+ md_files/md_preparation/protein/:
253
+ protein.gro posre.itp topol.top
254
+
255
+ OR for multiple chain protein:
256
+ md_files/md_preparation/protein/:
257
+ protein.gro
258
+ topol.top
259
+ posre_Protein_chain_A.itp
260
+ posre_Protein_chain_B.itp
261
+ topol_Protein_chain_A.itp
262
+ topol_Protein_chain_B.itp
263
+ ```
264
+
265
+ ```
266
+ md_files/md_preparation/ligands/:
267
+ all_resid.txt
268
+
269
+ ligand_1/
270
+ ligand_1.frcmod ligand_1.lib ligand_1.top sqm.in
271
+ ligand_1.gro ligand_1.mol leap.log sqm.out
272
+ ligand_1.inpcrd ligand_1.mol2 posre_ligand_1.itp sqm.pdb
273
+ ligand_1.itp ligand_1.prmtop resid.txt tleap.in
274
+
275
+ ligand_2/
276
+ ..
277
+ ```
278
+
279
+ ```
280
+ md_files/md_preparation/cofactors/:
281
+ all_resid.txt
282
+
283
+ cofactor_1/
284
+ cofactor_1.frcmod cofactor_1.lib cofactor_1.top sqm.in
285
+ cofactor_1.gro cofactor_1.mol leap.log sqm.out
286
+ cofactor_1.inpcrd cofactor_1.mol2 posre_cofactor_1.itp sqm.pdb
287
+ cofactor_1.itp cofactor_1.prmtop resid.txt tleap.in
288
+
289
+ cofactor_2/
290
+ ```
291
+
292
+ ```
293
+ md_files/md_run/
294
+
295
+ protein_H_HIS_ligand_1/
296
+ ligand_1.itp density.xvg em.trr ions.tpr md_out.edr md_out.tpr npt.cpt npt.tpr nvt.log potential.xvg rmsd.xvg temperature.xvg
297
+ cofactor_1.itp em.edr frame.pdb md_centermolsnoPBC.xtc md_out.gro md_out.xtc npt.edr npt.trr nvt.mdp pressure.xvg rmsf.pdb topol.top
298
+ all.itp em.gro gyrate.xvg md_fit.xtc md_out.log md_short_forcheck.xtc npt.gro nvt.cpt nvt.tpr rmsd_cofactor_1.xvg rmsf.xvg
299
+ all_ligand_resid.txt em.log index.ndx md.mdp mdout.mdp minim.mdp npt.log nvt.edr nvt.trr rmsd_ligand_1.xvg solv.gro
300
+ complex.gro em.tpr ions.mdp md_out.cpt md_out_noj_noPBC.xtc newbox.gro npt.mdp nvt.gro posre.itp rmsd_xtal.xvg solv_ions.gro
301
+
302
+ protein_H_HIS_ligand_2/
303
+ ```
304
+ - **MD output files**
305
+ ```
306
+ md_fit.xtc - MD trajectory with removed PBC and fitted into Protein or Protein-Ligand group
307
+ md_short_forcheck.xtc - short trajectory to check if simulation was valid
308
+ frame.pdb - a frame for topology
309
+
310
+ ```
311
+ - **Analysis data**
312
+ ```
313
+ potential.png
314
+ temperature.png
315
+ pressure.png
316
+ density.png
317
+ rmsd.png - rmsd of the protein against minimized structure
318
+ rmsd_xtal.png - rmsd of the protein against crystal structure
319
+ rmsd_cofactor_1.png - rmsd of cofactor against minimized structure
320
+ rmsd_cofactor_1_xtal.png - rmsd of the ligand against crystal structure
321
+ rmsd_ligand_1.png - rmsd of the ligand against minimized structure
322
+ rmsd_ligand_1_xtal.png - rmsd of the ligand against crystal structure
323
+ rmsf.png - root mean square fluctuation (RMSF, i.e. standard deviation) of atomic positions in the trajectory
324
+ gyrate.png - radius of gyration
325
+ ```
326
+
327
+ ## Supplementary tools
328
+ ### MM-PBSA/MM-GBSA energy calculation
329
+ #### The tool is based on [gmx_MMPBSA](https://valdes-tresanco-ms.github.io/gmx_MMPBSA/dev/)
330
+ Calculation arguments can be changed/added by customized [mmpbsa.in](https://valdes-tresanco-ms.github.io/gmx_MMPBSA/dev/input_file/) file
331
+ #### **USAGE**
332
+ ```
333
+ run_gbsa -h
334
+ usage: run_gbsa [-h] [-i DIRNAME [DIRNAME ...]] [--topol topol.top] [--tpr md_out.tpr] [--xtc md_fit.xtc] [--index index.ndx] [-m mmpbsa.in] [-d WDIR]
335
+ [--out_files OUT_FILES [OUT_FILES ...]] [--hostfile FILENAME] [-c INTEGER] [--ligand_id UNL] [-a [STRING ...]] [--clean_previous]
336
+
337
+ Run MM-GBSA/MM-PBSA calculation using gmx_MMPBSA tool
338
+
339
+ options:
340
+ -h, --help show this help message and exit
341
+ -i DIRNAME [DIRNAME ...], --wdir_to_run DIRNAME [DIRNAME ...]
342
+ single or multiple directories for simulations. Should consist of: tpr, xtc, ndx files
343
+ --topol topol.top topol file from the the MD simulation. Will be ignored if --wdir_to_run is used
344
+ --tpr md_out.tpr tpr file from the the MD simulation. Will be ignored if --wdir_to_run is used
345
+ --xtc md_fit.xtc xtc file of the simulation. Trajectory should have no PBC and be fitted on the Protein_Ligand group. Will be ignored if --wdir_to_run is used
346
+ --index index.ndx Gromacs index file from the simulation. Will be ignored if --wdir_to_run is used
347
+ -m mmpbsa.in, --mmpbsa mmpbsa.in
348
+ MMPBSA input file. If not set up default template will be used.
349
+ -d WDIR, --wdir WDIR Working directory for program output. If not set the current directory will be used.
350
+ --out_files OUT_FILES [OUT_FILES ...]
351
+ gmxMMPBSA out files (FINAL*.dat) to parse. If set will be used over other variables.
352
+ --hostfile FILENAME text file with addresses of nodes of dask SSH cluster. The most typical, it can be passed as $PBS_NODEFILE variable from inside a PBS script.
353
+ The first line in this file will be the address of the scheduler running on the standard port 8786. If omitted, calculations will run on a
354
+ single machine as usual.
355
+ -c INTEGER, --ncpu INTEGER
356
+ number of CPU per server. Use all cpus by default.
357
+ --ligand_id UNL Ligand residue ID
358
+ -a [STRING ...], --append_protein_selection [STRING ...]
359
+ residue IDs whuch will be included in the protein system (cofactors).Example: ZN MG
360
+ --clean_previous Clean previous temporary gmxMMPBSA files
361
+ ```
362
+
363
+ ### **Examples**
364
+ ```
365
+ run_gbsa --wdir_to_run md_files/md_run/protein_H_HIS_ligand_1 md_files/md_run/protein_H_HIS_ligand_2 -c 128 -m mmpbsa.in
366
+ ```
367
+ **Output**
368
+ *each run creates in the working directory (or in the current directory if wdir argument was not set up):*
369
+ 1) a unique streaMD log file
370
+ log_mmpbsa_*start-time*.log
371
+ Contains important information/warnings/errors about the main run_gbsa program run.
372
+ 2) a unique bash log file.
373
+ log_mmpbsa_bash_*start-time*.log
374
+ Contains stdout from gmx_MMPBSA
375
+ 3) GBSA_output_*start-time*.csv with summary csv if MMGBSA method was run
376
+ 4) PBSA_output_*start-time*.csv with summary csv if MMPBSA method was run
377
+
378
+ each wdir_to_run has FINAL_RESULTS_MMPBSA_*start-time*.csv with GBSA/PBSA output.
379
+
380
+ ### ProLIF Protein-Ligand Interaction Fingerprints
381
+ #### **USAGE**
382
+ ```
383
+ run_prolif -h
384
+ usage: run_prolif [-h] [-i DIRNAME [DIRNAME ...]] [--xtc FILENAME] [--tpr FILENAME] [-l STRING] [-s INTEGER] [-a STRING] [-d WDIR] [-v] [--hostfile FILENAME]
385
+ [-c INTEGER] [--width FILENAME] [--height FILENAME] [-o FILENAME] [--not_save_pics]
386
+
387
+ Get protein-ligand interactions from MD trajectories using ProLIF module.
388
+
389
+ options:
390
+ -h, --help show this help message and exit
391
+ -i DIRNAME [DIRNAME ...], --wdir_to_run DIRNAME [DIRNAME ...]
392
+ single or multiple directories for simulations.
393
+ Should consist of: md_out.tpr and md_fit.xtc files (default: None)
394
+ --xtc FILENAME input trajectory file (XTC). Will be ignored if --wdir_to_run is used (default: None)
395
+ --tpr FILENAME input topology file (TPR). Will be ignored if --wdir_to_run is used (default: None)
396
+ -l STRING, --ligand STRING
397
+ residue name of a ligand in the input trajectory. (default: UNL)
398
+ -s INTEGER, --step INTEGER
399
+ step to take every n-th frame. ps (default: 1)
400
+ -a STRING, --append_protein_selection STRING
401
+ the string which will be concatenated to the protein selection atoms. Example: "resname ZN or resname MG". (default: None)
402
+ -d WDIR, --wdir WDIR Working directory for program output. If not set the current directory will be used. (default: None)
403
+ -v, --verbose print progress. (default: False)
404
+ --hostfile FILENAME text file with addresses of nodes of dask SSH cluster. The most typical, it can be passed as $PBS_NODEFILE variable from inside a PBS script. The first line in this file will be the address of the scheduler running on the standard port 8786. If omitted, calculations will run on a single machine as usual. (default: None)
405
+ -c INTEGER, --ncpu INTEGER
406
+ number of CPU per server. Use all cpus by default. (default: 32)
407
+ --width FILENAME width of the output pictures (default: 15)
408
+ --height FILENAME height of the output pictures (default: 10)
409
+ -o FILENAME, --occupancy FILENAME
410
+ occupancy of the unique contacts to show (default: 0.6)
411
+ --not_save_pics not create html and png files (by frames) for each unique trajectory. Only overall prolif png file will be created. (default: False)
412
+
413
+ ```
414
+
415
+ ### **Examples**
416
+ ```
417
+ run_prolif --wdir_to_run md_files/md_run/protein_H_HIS_ligand_1 md_files/md_run/protein_H_HIS_ligand_2 -c 128 -v -s 5
418
+ ```
419
+ **Output**
420
+ 1) in each directory where xtc file is located *plifs.csv*, *plifs.png*,*plifs_map.png*, *plifs.html* file for each simulation will be created
421
+ 2) *prolif_output_start-time.csv/png* - aggregated csv/png output file for all analyzed simulations
422
+
423
+ #### Supplementary scripts
424
+ _run_prolif applies all this scripts automatically. Use it if you want more detailed analysis or to change the picture/fonts sizes._
425
+ **prolif_drawmap**
426
+ Draw prolif plot for analysis binding mode of multiple ligands
427
+ ````
428
+ prolif_drawmap -h
429
+ usage: prolif_drawmap [-h] -i FILENAME [FILENAME ...] [-o FILENAME] [--width FILENAME] [--height FILENAME] [--base_size FILENAME]
430
+
431
+ Draw prolif plot for analysis binding mode of multiple ligands
432
+
433
+ options:
434
+ -h, --help show this help message and exit
435
+ -i FILENAME [FILENAME ...], --input FILENAME [FILENAME ...]
436
+ input file with prolif output for the set of molecules. Supported formats: *.csv
437
+ Ex: prolif_output.csv
438
+ --occupancy float
439
+ minimum occupancy of the unique contacts to show
440
+ --width int width of the output picture
441
+ --height int height of the output picture
442
+ --base_size int base size of the output picture
443
+
444
+ ````
445
+
446
+ **prolif_draw_by_frame**
447
+
448
+ ```
449
+ prolif_draw_by_frame -h
450
+ usage: prolif_draw_by_frame [-h] -i [FILENAME ...] [-o FILENAME] [--filt_only_H] [--width FILENAME] [--height FILENAME] [--base_size FILENAME]
451
+
452
+ options:
453
+ -h, --help show this help message and exit
454
+ -i [FILENAME ...], --input [FILENAME ...]
455
+ input file with prolif output for the unique molecule. Supported formats: *.csv
456
+ Ex: plifs.csv
457
+ --occupancy float
458
+ minimum occupancy of the unique contacts to show. Show all contacts by default.
459
+ --filt_only_H filt residues where only hydrophobic contacts occur
460
+ --width int width of the output picture
461
+ --height int height of the output picture
462
+ --base_size int base size of the output picture
463
+ ```
464
+
465
+ ### Logging
466
+ all system info or errors are saved into logging files which would be placed into your main working directory (the current working directory or the path which was passed through --wdir argument):
467
+ **run_md:**
468
+ ```
469
+ log_protein-fname_ligand-fname_cofactor-fname_current-date.log - StreaMD logging user info (status of the )
470
+ streamd_bash_protein-fname_ligand-fname_cofactor-fname_start-time.log - StreaMD bash system logging info
471
+ ```
472
+
473
+ **run_gbsa:**
474
+ ```
475
+ log_mmpbsa_start-time.log - StreaMD logging user info
476
+ log_mmpbsa_bash_start-time.log - StreaMD bash system logging info
477
+ ```
478
+
479
+ **run_prolif:**
480
+ ```
481
+ log_prolif_start-time.log - StreaMD logging user info
482
+ ```
483
+ ### Licence
484
+ MIT