varsim 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {VarSim → varsim}/__init__.py +73 -54
- {varsim-1.0.3.dist-info → varsim-1.0.5.dist-info}/METADATA +1 -1
- varsim-1.0.5.dist-info/RECORD +6 -0
- varsim-1.0.5.dist-info/top_level.txt +1 -0
- varsim-1.0.3.dist-info/RECORD +0 -6
- varsim-1.0.3.dist-info/top_level.txt +0 -1
- {varsim-1.0.3.dist-info → varsim-1.0.5.dist-info}/WHEEL +0 -0
- {varsim-1.0.3.dist-info → varsim-1.0.5.dist-info}/licenses/LICENSE +0 -0
{VarSim → varsim}/__init__.py
RENAMED
@@ -1,6 +1,7 @@
|
|
1
1
|
import os
|
2
2
|
|
3
3
|
from Bio import Entrez, SeqIO
|
4
|
+
from Bio.Data.CodonTable import standard_dna_table
|
4
5
|
from Bio.Data.IUPACData import (
|
5
6
|
unambiguous_dna_letters,
|
6
7
|
protein_letters,
|
@@ -8,9 +9,8 @@ from Bio.Data.IUPACData import (
|
|
8
9
|
protein_letters_3to1,
|
9
10
|
)
|
10
11
|
from Bio.Seq import Seq
|
11
|
-
from Bio.SeqUtils import seq3
|
12
12
|
from Bio.SeqFeature import SimpleLocation
|
13
|
-
from Bio.
|
13
|
+
from Bio.SeqUtils import seq3
|
14
14
|
|
15
15
|
Entrez.email = os.environ["EMAIL"]
|
16
16
|
Entrez.api_key = os.environ["API_KEY"]
|
@@ -218,8 +218,7 @@ def aa_sub(gene: str) -> list:
|
|
218
218
|
variants.append(
|
219
219
|
(
|
220
220
|
f"{seqrecord.id}:p.{residue}{index}{aa}",
|
221
|
-
f"{seqrecord.id}:p.{protein_letters_1to3[residue]}{
|
222
|
-
index}{protein_letters_1to3[aa]}",
|
221
|
+
f"{seqrecord.id}:p.{protein_letters_1to3[residue]}{index}{protein_letters_1to3[aa]}",
|
223
222
|
)
|
224
223
|
)
|
225
224
|
return variants
|
@@ -251,12 +250,9 @@ def missense(gene: str) -> list:
|
|
251
250
|
):
|
252
251
|
variants.append(
|
253
252
|
(
|
254
|
-
f"{seqrecord.id}:c.{
|
255
|
-
|
256
|
-
f"{protein_id}:p.{protein[index]}{
|
257
|
-
index + 1}{seq.translate()}",
|
258
|
-
f"{protein_id}:p.{seq3(protein[index])}{
|
259
|
-
index + 1}{seq3(seq.translate())}",
|
253
|
+
f"{seqrecord.id}:c.{codon + 3}{cds[codon + 2]}>{base[2]}",
|
254
|
+
f"{protein_id}:p.{protein[index]}{index + 1}{seq.translate()}",
|
255
|
+
f"{protein_id}:p.{seq3(protein[index])}{index + 1}{seq3(seq.translate())}",
|
260
256
|
)
|
261
257
|
)
|
262
258
|
elif (
|
@@ -266,12 +262,9 @@ def missense(gene: str) -> list:
|
|
266
262
|
):
|
267
263
|
variants.append(
|
268
264
|
(
|
269
|
-
f"{seqrecord.id}:c.{
|
270
|
-
|
271
|
-
f"{protein_id}:p.{protein[index]}{
|
272
|
-
index + 1}{seq.translate()}",
|
273
|
-
f"{protein_id}:p.{seq3(protein[index])}{
|
274
|
-
index + 1}{seq3(seq.translate())}",
|
265
|
+
f"{seqrecord.id}:c.{codon + 2}{cds[codon + 1]}>{base[1]}",
|
266
|
+
f"{protein_id}:p.{protein[index]}{index + 1}{seq.translate()}",
|
267
|
+
f"{protein_id}:p.{seq3(protein[index])}{index + 1}{seq3(seq.translate())}",
|
275
268
|
)
|
276
269
|
)
|
277
270
|
elif (
|
@@ -281,23 +274,17 @@ def missense(gene: str) -> list:
|
|
281
274
|
):
|
282
275
|
variants.append(
|
283
276
|
(
|
284
|
-
f"{seqrecord.id}:c.{
|
285
|
-
|
286
|
-
f"{protein_id}:p.{protein[index]}{
|
287
|
-
index + 1}{seq.translate()}",
|
288
|
-
f"{protein_id}:p.{seq3(protein[index])}{
|
289
|
-
index + 1}{seq3(seq.translate())}",
|
277
|
+
f"{seqrecord.id}:c.{codon + 1}{cds[codon]}>{base[0]}",
|
278
|
+
f"{protein_id}:p.{protein[index]}{index + 1}{seq.translate()}",
|
279
|
+
f"{protein_id}:p.{seq3(protein[index])}{index + 1}{seq3(seq.translate())}",
|
290
280
|
)
|
291
281
|
)
|
292
282
|
else:
|
293
283
|
variants.append(
|
294
284
|
(
|
295
|
-
f"{seqrecord.id}:c.{codon + 1}_{codon +
|
296
|
-
|
297
|
-
f"{protein_id}:p.{protein[index]}{
|
298
|
-
index + 1}{seq.translate()}",
|
299
|
-
f"{protein_id}:p.{seq3(protein[index])}{
|
300
|
-
index + 1}{seq3(seq.translate())}",
|
285
|
+
f"{seqrecord.id}:c.{codon + 1}_{codon + 3}{cds[codon:codon + 3]}>{base}",
|
286
|
+
f"{protein_id}:p.{protein[index]}{index + 1}{seq.translate()}",
|
287
|
+
f"{protein_id}:p.{seq3(protein[index])}{index + 1}{seq3(seq.translate())}",
|
301
288
|
)
|
302
289
|
)
|
303
290
|
else:
|
@@ -308,12 +295,9 @@ def missense(gene: str) -> list:
|
|
308
295
|
):
|
309
296
|
variants.append(
|
310
297
|
(
|
311
|
-
f"{seqrecord.id}:c.{
|
312
|
-
|
313
|
-
f"{protein_id}:p.{
|
314
|
-
protein[index]}{index + 1}=",
|
315
|
-
f"{protein_id}:p.{seq3(protein[index])}{
|
316
|
-
index + 1}=",
|
298
|
+
f"{seqrecord.id}:c.{codon + 3}{cds[codon + 2]}>{base[2]}",
|
299
|
+
f"{protein_id}:p.{protein[index]}{index + 1}=",
|
300
|
+
f"{protein_id}:p.{seq3(protein[index])}{index + 1}=",
|
317
301
|
)
|
318
302
|
)
|
319
303
|
elif (
|
@@ -323,12 +307,9 @@ def missense(gene: str) -> list:
|
|
323
307
|
):
|
324
308
|
variants.append(
|
325
309
|
(
|
326
|
-
f"{seqrecord.id}:c.{
|
327
|
-
|
328
|
-
f"{protein_id}:p.{
|
329
|
-
protein[index]}{index + 1}=",
|
330
|
-
f"{protein_id}:p.{seq3(protein[index])}{
|
331
|
-
index + 1}=",
|
310
|
+
f"{seqrecord.id}:c.{codon + 2}{cds[codon + 1]}>{base[1]}",
|
311
|
+
f"{protein_id}:p.{protein[index]}{index + 1}=",
|
312
|
+
f"{protein_id}:p.{seq3(protein[index])}{index + 1}=",
|
332
313
|
)
|
333
314
|
)
|
334
315
|
elif (
|
@@ -338,23 +319,17 @@ def missense(gene: str) -> list:
|
|
338
319
|
):
|
339
320
|
variants.append(
|
340
321
|
(
|
341
|
-
f"{seqrecord.id}:c.{
|
342
|
-
|
343
|
-
f"{protein_id}:p.{
|
344
|
-
protein[index]}{index + 1}=",
|
345
|
-
f"{protein_id}:p.{seq3(protein[index])}{
|
346
|
-
index + 1}=",
|
322
|
+
f"{seqrecord.id}:c.{codon + 1}{cds[codon]}>{base[0]}",
|
323
|
+
f"{protein_id}:p.{protein[index]}{index + 1}=",
|
324
|
+
f"{protein_id}:p.{seq3(protein[index])}{index + 1}=",
|
347
325
|
)
|
348
326
|
)
|
349
327
|
else:
|
350
328
|
variants.append(
|
351
329
|
(
|
352
|
-
f"{seqrecord.id}:c.{codon + 1}_{codon +
|
353
|
-
|
354
|
-
f"{protein_id}:p.{
|
355
|
-
protein[index]}{index + 1}=",
|
356
|
-
f"{protein_id}:p.{seq3(protein[index])}{
|
357
|
-
index + 1}=",
|
330
|
+
f"{seqrecord.id}:c.{codon + 1}_{codon + 3}{cds[codon:codon + 3]}>{base}",
|
331
|
+
f"{protein_id}:p.{protein[index]}{index + 1}=",
|
332
|
+
f"{protein_id}:p.{seq3(protein[index])}{index + 1}=",
|
358
333
|
)
|
359
334
|
)
|
360
335
|
return variants
|
@@ -410,5 +385,49 @@ def inframe_dup(gene: str) -> list:
|
|
410
385
|
return variants
|
411
386
|
|
412
387
|
|
413
|
-
|
414
|
-
|
388
|
+
def frameshift_dup(gene: str) -> list:
|
389
|
+
variants = []
|
390
|
+
term = f'{gene}[Gene Name] "mane select"[keyword]'
|
391
|
+
stream = Entrez.esearch(db="nucleotide", term=term)
|
392
|
+
record = Entrez.read(stream)
|
393
|
+
stream = Entrez.efetch(
|
394
|
+
db="nucleotide", id=record["IdList"], rettype="gb", retmode="text"
|
395
|
+
)
|
396
|
+
seqrecord = SeqIO.read(stream, "genbank")
|
397
|
+
for feature in seqrecord.features:
|
398
|
+
if feature.type == "CDS":
|
399
|
+
cds = feature.location.extract(seqrecord).seq
|
400
|
+
for index, base in enumerate(cds, start=1):
|
401
|
+
variants.append((f"{seqrecord.id}:c.{str(index) + base}dup",))
|
402
|
+
return variants
|
403
|
+
|
404
|
+
|
405
|
+
def frameshift_del(gene: str) -> list:
|
406
|
+
variants = []
|
407
|
+
term = f'{gene}[Gene Name] "mane select"[keyword]'
|
408
|
+
stream = Entrez.esearch(db="nucleotide", term=term)
|
409
|
+
record = Entrez.read(stream)
|
410
|
+
stream = Entrez.efetch(
|
411
|
+
db="nucleotide", id=record["IdList"], rettype="gb", retmode="text"
|
412
|
+
)
|
413
|
+
seqrecord = SeqIO.read(stream, "genbank")
|
414
|
+
for feature in seqrecord.features:
|
415
|
+
if feature.type == "CDS":
|
416
|
+
cds = feature.location.extract(seqrecord).seq
|
417
|
+
for index, base in enumerate(cds, start=1):
|
418
|
+
variants.append((f"{seqrecord.id}:c.{str(index) + base}del",))
|
419
|
+
return variants
|
420
|
+
|
421
|
+
|
422
|
+
__all__ = [
|
423
|
+
"frameshift_dup",
|
424
|
+
"frameshift_del",
|
425
|
+
"cds",
|
426
|
+
"inframe_dup",
|
427
|
+
"inframe_del",
|
428
|
+
"splicing",
|
429
|
+
"utr5",
|
430
|
+
"utr3",
|
431
|
+
"aa_sub",
|
432
|
+
"missense",
|
433
|
+
]
|
@@ -0,0 +1,6 @@
|
|
1
|
+
varsim/__init__.py,sha256=r2jXdCqlbLaJsNGXum0B9hBoX0gpGMNQ_aIqt2GyXr8,17714
|
2
|
+
varsim-1.0.5.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
3
|
+
varsim-1.0.5.dist-info/METADATA,sha256=8Uo9_Gn2Pg3Mkct-ZC7oJ9la2ZfddqvduepCUwnorPA,2464
|
4
|
+
varsim-1.0.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
5
|
+
varsim-1.0.5.dist-info/top_level.txt,sha256=2fLprhnBvkF-7VEOzGcpKoodqW08HjyNbVzM6emJrTI,7
|
6
|
+
varsim-1.0.5.dist-info/RECORD,,
|
@@ -0,0 +1 @@
|
|
1
|
+
varsim
|
varsim-1.0.3.dist-info/RECORD
DELETED
@@ -1,6 +0,0 @@
|
|
1
|
-
VarSim/__init__.py,sha256=6bFR9e1vaNb4Ow1pBvraet1o-5WXtkGnenmlkLjGFKg,17291
|
2
|
-
varsim-1.0.3.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
3
|
-
varsim-1.0.3.dist-info/METADATA,sha256=8HrhM1PXMWht3ca3CBshTNq26ZgV2DytKTW5INsPSoE,2464
|
4
|
-
varsim-1.0.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
5
|
-
varsim-1.0.3.dist-info/top_level.txt,sha256=k7Z7TmZCty_ldWkOo_O6Nw15AZ2d55Sj8v7GKtu_Pzo,7
|
6
|
-
varsim-1.0.3.dist-info/RECORD,,
|
@@ -1 +0,0 @@
|
|
1
|
-
VarSim
|
File without changes
|
File without changes
|