spacr 0.0.2__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,72 @@
1
+ import csv
2
+ import os
3
+ import requests
4
+
5
+ def download_alphafold_structures(tsv_location, dst, version="4"):
6
+ # Create the destination directory if it does not exist
7
+ dst_pdb = os.path.join(dst,'pdb')
8
+ dst_cif = os.path.join(dst,'cif')
9
+ dst_pae = os.path.join(dst,'pae')
10
+
11
+ if not os.path.exists(dst):
12
+ os.makedirs(dst)
13
+ if not os.path.exists(dst_pdb):
14
+ os.makedirs(dst_pdb)
15
+ if not os.path.exists(dst_cif):
16
+ os.makedirs(dst_cif)
17
+ if not os.path.exists(dst_pae):
18
+ os.makedirs(dst_pae)
19
+
20
+ failed_downloads = [] # List to keep track of failed downloads
21
+
22
+ # Open the TSV file and read entries
23
+ with open(tsv_location, 'r') as tsv_file:
24
+ reader = csv.DictReader(tsv_file, delimiter='\t')
25
+ for row in reader:
26
+ entry = row['Entry']
27
+ af_link = f"https://alphafold.ebi.ac.uk/files/AF-{entry}-F1-model_v{version}.pdb"
28
+ cif_link = f"https://alphafold.ebi.ac.uk/files/AF-{entry}-F1-model_v{version}.cif"
29
+ pae_link = f"https://alphafold.ebi.ac.uk/files/AF-{entry}-F1-predicted_aligned_error_v{version}.json"
30
+
31
+ try:
32
+ response_pdb = requests.get(af_link, stream=True)
33
+ response_cif = requests.get(cif_link, stream=True)
34
+ response_pae = requests.get(pae_link, stream=True)
35
+ if response_pdb.status_code == 200:
36
+
37
+ # Save the PDB file
38
+ with open(os.path.join(dst_pdb, f"AF-{entry}-F1-model_v{version}.pdb"), 'wb') as pdb_file:
39
+ pdb_file.write(response_pdb.content)
40
+ print(f"Downloaded: AF-{entry}-F1-model_v{version}.pdb")
41
+
42
+ # Save the CIF file
43
+ with open(os.path.join(dst_cif, f"AF-{entry}-F1-model_v{version}.cif"), 'wb') as cif_file:
44
+ cif_file.write(response_cif.content)
45
+ print(f"Downloaded: AF-{entry}-F1-model_v{version}.cif")
46
+
47
+ # Save the PAE file
48
+ with open(os.path.join(dst_pae, f"AF-{entry}-F1-predicted_aligned_error_v{version}.json"), 'wb') as pdb_file:
49
+ pdb_file.write(response_pae.content)
50
+ print(f"Downloaded: AF-{entry}-F1-predicted_aligned_error_v{version}.json")
51
+
52
+ else:
53
+ # If the file could not be downloaded, record the entry
54
+ failed_downloads.append(entry)
55
+ print(f"Failed to download structure for: {entry}")
56
+ except Exception as e:
57
+ print(f"Error downloading structure for {entry}: {e}")
58
+ failed_downloads.append(entry)
59
+
60
+ # Save the list of failed downloads to a CSV file in the destination folder
61
+ if failed_downloads:
62
+ with open(os.path.join(dst, 'failed_downloads.csv'), 'w', newline='') as failed_file:
63
+ writer = csv.writer(failed_file)
64
+ writer.writerow(['Entry'])
65
+ for entry in failed_downloads:
66
+ writer.writerow([entry])
67
+ print(f"Failed download entries saved to: {os.path.join(dst, 'failed_downloads.csv')}")
68
+
69
+ # Example usage:
70
+ tsv_location = '/home/carruthers/Downloads/GT1_proteome/GT1_proteins_uniprot.tsv' # Replace with the path to your TSV file containing a list of UniProt entries
71
+ dst_folder = '/home/carruthers/Downloads/GT1_proteome' # Replace with your destination folder
72
+ download_alphafold_structures(tsv_location, dst_folder)