varsim 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import os
2
2
 
3
3
  from Bio import Entrez, SeqIO
4
+ from Bio.Data.CodonTable import standard_dna_table
4
5
  from Bio.Data.IUPACData import (
5
6
  unambiguous_dna_letters,
6
7
  protein_letters,
@@ -8,9 +9,8 @@ from Bio.Data.IUPACData import (
8
9
  protein_letters_3to1,
9
10
  )
10
11
  from Bio.Seq import Seq
11
- from Bio.SeqUtils import seq3
12
12
  from Bio.SeqFeature import SimpleLocation
13
- from Bio.Data.CodonTable import standard_dna_table
13
+ from Bio.SeqUtils import seq3
14
14
 
15
15
  Entrez.email = os.environ["EMAIL"]
16
16
  Entrez.api_key = os.environ["API_KEY"]
@@ -218,8 +218,7 @@ def aa_sub(gene: str) -> list:
218
218
  variants.append(
219
219
  (
220
220
  f"{seqrecord.id}:p.{residue}{index}{aa}",
221
- f"{seqrecord.id}:p.{protein_letters_1to3[residue]}{
222
- index}{protein_letters_1to3[aa]}",
221
+ f"{seqrecord.id}:p.{protein_letters_1to3[residue]}{index}{protein_letters_1to3[aa]}",
223
222
  )
224
223
  )
225
224
  return variants
@@ -251,12 +250,9 @@ def missense(gene: str) -> list:
251
250
  ):
252
251
  variants.append(
253
252
  (
254
- f"{seqrecord.id}:c.{
255
- codon + 3}{cds[codon + 2]}>{base[2]}",
256
- f"{protein_id}:p.{protein[index]}{
257
- index + 1}{seq.translate()}",
258
- f"{protein_id}:p.{seq3(protein[index])}{
259
- index + 1}{seq3(seq.translate())}",
253
+ f"{seqrecord.id}:c.{codon + 3}{cds[codon + 2]}>{base[2]}",
254
+ f"{protein_id}:p.{protein[index]}{index + 1}{seq.translate()}",
255
+ f"{protein_id}:p.{seq3(protein[index])}{index + 1}{seq3(seq.translate())}",
260
256
  )
261
257
  )
262
258
  elif (
@@ -266,12 +262,9 @@ def missense(gene: str) -> list:
266
262
  ):
267
263
  variants.append(
268
264
  (
269
- f"{seqrecord.id}:c.{
270
- codon + 2}{cds[codon + 1]}>{base[1]}",
271
- f"{protein_id}:p.{protein[index]}{
272
- index + 1}{seq.translate()}",
273
- f"{protein_id}:p.{seq3(protein[index])}{
274
- index + 1}{seq3(seq.translate())}",
265
+ f"{seqrecord.id}:c.{codon + 2}{cds[codon + 1]}>{base[1]}",
266
+ f"{protein_id}:p.{protein[index]}{index + 1}{seq.translate()}",
267
+ f"{protein_id}:p.{seq3(protein[index])}{index + 1}{seq3(seq.translate())}",
275
268
  )
276
269
  )
277
270
  elif (
@@ -281,23 +274,17 @@ def missense(gene: str) -> list:
281
274
  ):
282
275
  variants.append(
283
276
  (
284
- f"{seqrecord.id}:c.{
285
- codon + 1}{cds[codon]}>{base[0]}",
286
- f"{protein_id}:p.{protein[index]}{
287
- index + 1}{seq.translate()}",
288
- f"{protein_id}:p.{seq3(protein[index])}{
289
- index + 1}{seq3(seq.translate())}",
277
+ f"{seqrecord.id}:c.{codon + 1}{cds[codon]}>{base[0]}",
278
+ f"{protein_id}:p.{protein[index]}{index + 1}{seq.translate()}",
279
+ f"{protein_id}:p.{seq3(protein[index])}{index + 1}{seq3(seq.translate())}",
290
280
  )
291
281
  )
292
282
  else:
293
283
  variants.append(
294
284
  (
295
- f"{seqrecord.id}:c.{codon + 1}_{codon +
296
- 3}{cds[codon:codon + 3]}>{base}",
297
- f"{protein_id}:p.{protein[index]}{
298
- index + 1}{seq.translate()}",
299
- f"{protein_id}:p.{seq3(protein[index])}{
300
- index + 1}{seq3(seq.translate())}",
285
+ f"{seqrecord.id}:c.{codon + 1}_{codon + 3}{cds[codon:codon + 3]}>{base}",
286
+ f"{protein_id}:p.{protein[index]}{index + 1}{seq.translate()}",
287
+ f"{protein_id}:p.{seq3(protein[index])}{index + 1}{seq3(seq.translate())}",
301
288
  )
302
289
  )
303
290
  else:
@@ -308,12 +295,9 @@ def missense(gene: str) -> list:
308
295
  ):
309
296
  variants.append(
310
297
  (
311
- f"{seqrecord.id}:c.{
312
- codon + 3}{cds[codon + 2]}>{base[2]}",
313
- f"{protein_id}:p.{
314
- protein[index]}{index + 1}=",
315
- f"{protein_id}:p.{seq3(protein[index])}{
316
- index + 1}=",
298
+ f"{seqrecord.id}:c.{codon + 3}{cds[codon + 2]}>{base[2]}",
299
+ f"{protein_id}:p.{protein[index]}{index + 1}=",
300
+ f"{protein_id}:p.{seq3(protein[index])}{index + 1}=",
317
301
  )
318
302
  )
319
303
  elif (
@@ -323,12 +307,9 @@ def missense(gene: str) -> list:
323
307
  ):
324
308
  variants.append(
325
309
  (
326
- f"{seqrecord.id}:c.{
327
- codon + 2}{cds[codon + 1]}>{base[1]}",
328
- f"{protein_id}:p.{
329
- protein[index]}{index + 1}=",
330
- f"{protein_id}:p.{seq3(protein[index])}{
331
- index + 1}=",
310
+ f"{seqrecord.id}:c.{codon + 2}{cds[codon + 1]}>{base[1]}",
311
+ f"{protein_id}:p.{protein[index]}{index + 1}=",
312
+ f"{protein_id}:p.{seq3(protein[index])}{index + 1}=",
332
313
  )
333
314
  )
334
315
  elif (
@@ -338,23 +319,17 @@ def missense(gene: str) -> list:
338
319
  ):
339
320
  variants.append(
340
321
  (
341
- f"{seqrecord.id}:c.{
342
- codon + 1}{cds[codon]}>{base[0]}",
343
- f"{protein_id}:p.{
344
- protein[index]}{index + 1}=",
345
- f"{protein_id}:p.{seq3(protein[index])}{
346
- index + 1}=",
322
+ f"{seqrecord.id}:c.{codon + 1}{cds[codon]}>{base[0]}",
323
+ f"{protein_id}:p.{protein[index]}{index + 1}=",
324
+ f"{protein_id}:p.{seq3(protein[index])}{index + 1}=",
347
325
  )
348
326
  )
349
327
  else:
350
328
  variants.append(
351
329
  (
352
- f"{seqrecord.id}:c.{codon + 1}_{codon +
353
- 3}{cds[codon:codon + 3]}>{base}",
354
- f"{protein_id}:p.{
355
- protein[index]}{index + 1}=",
356
- f"{protein_id}:p.{seq3(protein[index])}{
357
- index + 1}=",
330
+ f"{seqrecord.id}:c.{codon + 1}_{codon + 3}{cds[codon:codon + 3]}>{base}",
331
+ f"{protein_id}:p.{protein[index]}{index + 1}=",
332
+ f"{protein_id}:p.{seq3(protein[index])}{index + 1}=",
358
333
  )
359
334
  )
360
335
  return variants
@@ -410,5 +385,49 @@ def inframe_dup(gene: str) -> list:
410
385
  return variants
411
386
 
412
387
 
413
- if __name__ == "__main__":
414
- print(splicing("TMPRSS6"))
388
+ def frameshift_dup(gene: str) -> list:
389
+ variants = []
390
+ term = f'{gene}[Gene Name] "mane select"[keyword]'
391
+ stream = Entrez.esearch(db="nucleotide", term=term)
392
+ record = Entrez.read(stream)
393
+ stream = Entrez.efetch(
394
+ db="nucleotide", id=record["IdList"], rettype="gb", retmode="text"
395
+ )
396
+ seqrecord = SeqIO.read(stream, "genbank")
397
+ for feature in seqrecord.features:
398
+ if feature.type == "CDS":
399
+ cds = feature.location.extract(seqrecord).seq
400
+ for index, base in enumerate(cds, start=1):
401
+ variants.append((f"{seqrecord.id}:c.{str(index) + base}dup",))
402
+ return variants
403
+
404
+
405
+ def frameshift_del(gene: str) -> list:
406
+ variants = []
407
+ term = f'{gene}[Gene Name] "mane select"[keyword]'
408
+ stream = Entrez.esearch(db="nucleotide", term=term)
409
+ record = Entrez.read(stream)
410
+ stream = Entrez.efetch(
411
+ db="nucleotide", id=record["IdList"], rettype="gb", retmode="text"
412
+ )
413
+ seqrecord = SeqIO.read(stream, "genbank")
414
+ for feature in seqrecord.features:
415
+ if feature.type == "CDS":
416
+ cds = feature.location.extract(seqrecord).seq
417
+ for index, base in enumerate(cds, start=1):
418
+ variants.append((f"{seqrecord.id}:c.{str(index) + base}del",))
419
+ return variants
420
+
421
+
422
+ __all__ = [
423
+ "frameshift_dup",
424
+ "frameshift_del",
425
+ "cds",
426
+ "inframe_dup",
427
+ "inframe_del",
428
+ "splicing",
429
+ "utr5",
430
+ "utr3",
431
+ "aa_sub",
432
+ "missense",
433
+ ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: varsim
3
- Version: 1.0.3
3
+ Version: 1.0.5
4
4
  Summary: Variant Simulator
5
5
  Author-email: Liu Sun <sunliu@yxnu.edu.cn>, Jian Yang <yangjian@yxnu.edu.cn>
6
6
  Project-URL: Homepage, https://github.com/liu-sun/VarSim
@@ -0,0 +1,6 @@
1
+ varsim/__init__.py,sha256=r2jXdCqlbLaJsNGXum0B9hBoX0gpGMNQ_aIqt2GyXr8,17714
2
+ varsim-1.0.5.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
3
+ varsim-1.0.5.dist-info/METADATA,sha256=8Uo9_Gn2Pg3Mkct-ZC7oJ9la2ZfddqvduepCUwnorPA,2464
4
+ varsim-1.0.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
5
+ varsim-1.0.5.dist-info/top_level.txt,sha256=2fLprhnBvkF-7VEOzGcpKoodqW08HjyNbVzM6emJrTI,7
6
+ varsim-1.0.5.dist-info/RECORD,,
@@ -0,0 +1 @@
1
+ varsim
@@ -1,6 +0,0 @@
1
- VarSim/__init__.py,sha256=6bFR9e1vaNb4Ow1pBvraet1o-5WXtkGnenmlkLjGFKg,17291
2
- varsim-1.0.3.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
3
- varsim-1.0.3.dist-info/METADATA,sha256=8HrhM1PXMWht3ca3CBshTNq26ZgV2DytKTW5INsPSoE,2464
4
- varsim-1.0.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
5
- varsim-1.0.3.dist-info/top_level.txt,sha256=k7Z7TmZCty_ldWkOo_O6Nw15AZ2d55Sj8v7GKtu_Pzo,7
6
- varsim-1.0.3.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- VarSim
File without changes