PyPI - split3c - Versions diffs - 0.0.1__tar.gz → 0.0.2__tar.gz - Mend

split3c 0.0.1tar.gz → 0.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{split3c-0.0.1/src/split3c.egg-info → split3c-0.0.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: split3c
-Version: 0.0.1
+Version: 0.0.2
 Summary: Toolkit to split and resolve chimeric 3C/Hi-C/Micro-C reads
 Author-email: Samir Bertache <samir.bertache.djenadi@gmail.com>
 License-Expression: AGPL-3.0-or-later
@@ -23,8 +23,8 @@ Requires-Dist: build>=1.2.0; extra == "dev"
 Requires-Dist: twine>=5.0.0; extra == "dev"
 Dynamic: license-file
-[![pipeline status](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/badges/main/pipeline.svg)](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/-/pipelines)
-[![coverage report](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/badges/main/coverage.svg?job=tests)](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/-/commits/main)
+[![pipeline status](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/badges/master/pipeline.svg)](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/-/pipelines)
+[![coverage report](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/badges/master/coverage.svg?job=tests)](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/-/commits/main)
 # `split3c`
@@ -76,7 +76,7 @@ split3c resolve --help
 Restriction enzyme-based workflow for Hi-C / HiChIP / 3C-like libraries.
-![split3c re-site workflow](docs/images/resite-workflow.png)
+![split3c re-site workflow](doc/img/resite-workflow.png)
 ---
@@ -84,17 +84,16 @@ Restriction enzyme-based workflow for Hi-C / HiChIP / 3C-like libraries.
 Non-specific ligation workflow for Micro-C-like libraries.
-![split3c ns-site workflow](docs/images/nssite-workflow.png)
+![split3c ns-site workflow](doc/img/nssite-workflow.png)
 ---
 ## Benchmark
-![split3c benchmark](docs/images/benchmark.png)
+![split3c benchmark](doc/img/benchmark.png)
 ---
 ## License
 split3c is released under the AGPLv3 license.

{split3c-0.0.1 → split3c-0.0.2}/README.md RENAMED Viewed

@@ -1,5 +1,5 @@
-[![pipeline status](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/badges/main/pipeline.svg)](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/-/pipelines)
-[![coverage report](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/badges/main/coverage.svg?job=tests)](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/-/commits/main)
+[![pipeline status](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/badges/master/pipeline.svg)](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/-/pipelines)
+[![coverage report](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/badges/master/coverage.svg?job=tests)](https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/-/commits/main)
 # `split3c`
@@ -51,7 +51,7 @@ split3c resolve --help
 Restriction enzyme-based workflow for Hi-C / HiChIP / 3C-like libraries.
-![split3c re-site workflow](docs/images/resite-workflow.png)
+![split3c re-site workflow](doc/img/resite-workflow.png)
 ---
@@ -59,17 +59,16 @@ Restriction enzyme-based workflow for Hi-C / HiChIP / 3C-like libraries.
 Non-specific ligation workflow for Micro-C-like libraries.
-![split3c ns-site workflow](docs/images/nssite-workflow.png)
+![split3c ns-site workflow](doc/img/nssite-workflow.png)
 ---
 ## Benchmark
-![split3c benchmark](docs/images/benchmark.png)
+![split3c benchmark](doc/img/benchmark.png)
 ---
 ## License
 split3c is released under the AGPLv3 license.

{split3c-0.0.1 → split3c-0.0.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "split3c"
-version = "0.0.1"
+version = "0.0.2"
 description = "Toolkit to split and resolve chimeric 3C/Hi-C/Micro-C reads"
 readme = "README.md"
 requires-python = ">=3.12"

{split3c-0.0.1 → split3c-0.0.2}/src/split3c/cli.py RENAMED Viewed

@@ -1,7 +1,7 @@
 """
-This script is a the split3c project, designed to process paired-end FASTQ files by fragmenting DNA sequences at specified restriction enzyme sites.
+This script is a the split3c project ; split3c is a toolkit for preprocessing 3C-type sequencing libraries and converting BAM alignments into .pairs files for chromatin contact analysis.
-Copyright © 2024 Samir Bertache
+Copyright © 2026 Samir Bertache
 SPDX-License-Identifier: AGPL-3.0-or-later

{split3c-0.0.1 → split3c-0.0.2}/src/split3c/nssite/auxiliary.py RENAMED Viewed

@@ -1,3 +1,27 @@
+"""
+This script is a the split3c project, designed to process paired-end FASTQ files by fragmenting DNA sequences at specified unmapped site. Constructs to analyse Micro-C/CAD-C data
+Copyright © 2024 Samir Bertache
+SPDX-License-Identifier: AGPL-3.0-or-later
+===============================================================================
+This program is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by the
+Free Software Foundation, either version 3 of the License, or (at your option)
+any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+See the GNU Affero General Public License for more details.
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <https://www.gnu.org/licenses/>.
+"""
 def signal_handler(sig, frame, out_f, out_r=None):
     """
     Handle termination signals to gracefully terminate processes.
@@ -43,27 +67,37 @@ def signal_handler(sig, frame, out_f, out_r=None):
 def partitionning(num_threads: int, single_bam: bool = False) -> tuple[int, int, int]:
     """
-    Heuristique empirique de partition des ressources pour microsplit.
+    Empirical resource partitioning heuristic for microsplit.
+    Returns:
+    pigz_threads_per_file: pigz threads per file (F and R)
-    Retourne:
-      pigz_threads_per_file : threads pigz par fichier (F et R)
-      compute_processes     : nb de workers process_items
-      bam_threads           : threads pysam/htslib par fichier (lecture ET écriture)
+    compute_processes: number of process_items
+    bam_threads: pysam/htslib threads per file (read AND write)
     IMPORTANT
-    ---------
-    Cette fonction est volontairement empirique (surallocation CPU acceptée).
-    `num_threads` est un *hint* de cœurs disponibles, pas un budget strict.
-    Points de calibration (bench observés)
+    --------- This function is intentionally empirical (CPU overallocation is accepted).
+    `num_threads` is a *hint* of available cores, not a strict budget.
+    Calibration points (observed benchmarks)
     --------------------------------------
-    - 4  cœurs -> (1, 1, 1)
-    - 8  cœurs -> (2, 3, 1)
-    - 16 cœurs -> (3, 4, 3)
-    En mode single_bam=True :
-    - on double les threads BAM, car un seul flux BAM doit alimenter toute la pipeline
-    - pigz_per_file et compute_processes restent inchangés
+    - 4 cores -> (1, 1, 1)
+    - 8 cores -> (2, 3, 1)
+    - 16 cores -> (3, 4, 3)
+    In single_bam=True mode:
+    - The number of BAM threads is doubled, as a single BAM stream must feed the entire pipeline.
+    - pigz_per_file and compute_processes remain unchanged.
     Doctests
     --------

{split3c-0.0.1 → split3c-0.0.2}/src/split3c/nssite/bam.py RENAMED Viewed

@@ -1,7 +1,31 @@
+"""
+This script is a the split3c project, designed to process paired-end FASTQ files by fragmenting DNA sequences at specified unmapped site. Constructs to analyse Micro-C/CAD-C data
+Copyright © 2024 Samir Bertache
+SPDX-License-Identifier: AGPL-3.0-or-later
+===============================================================================
+This program is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by the
+Free Software Foundation, either version 3 of the License, or (at your option)
+any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+See the GNU Affero General Public License for more details.
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <https://www.gnu.org/licenses/>.
+"""
 def get_bam_headers(bam_for_path, bam_rev_path):
     """
-    Ouvre les fichiers BAM, extrait leurs headers sous forme de dictionnaire,
-    et referme les fichiers immédiatement.
+    Open the BAM files, extract their headers as a dictionary,
+    and close the files immediately.
     Returns:
         tuple: (header_dict_forward, header_dict_reverse)
@@ -76,7 +100,7 @@ def write_bam_pair_from_sam(
     bam_threads=1,
 ):
     """
-    Écrit les paires BAM en utilisant les dictionnaires de header fournis.
+    Writes the BAM pairs using the provided header dictionaries.
     """
     import sys
@@ -122,8 +146,9 @@ def write_bam_pair_from_sam(
 def get_bam_header_single(bam_path):
     """
-    Ouvre un BAM unique, extrait son header sous forme de dictionnaire,
-    puis referme le fichier.
+    Opens a single BAM file, extracts its header as a dictionary,
+    then closes the file.
     Returns
     -------
@@ -138,23 +163,25 @@ def get_bam_header_single(bam_path):
 def _pair_reads_from_single_bam(read_a, read_b, strict=True):
     """
-    Ordonne deux lectures provenant d'un BAM interleavé en (forward/read1, reverse/read2).
+    Orders two reads from a BAM interleaved as (forward/read1, reverse/read2).
-    Paramètres
+    Parameters
     ----------
-    read_a, read_b : pysam.AlignedSegment
-    strict : bool
-        Si True, lève une erreur en cas d'incohérence forte.
-        Si False, tente un fallback par ordre d'apparition.
+    read_a, read_b: pysam.AlignedSegment
+    strict: bool
+    If True, raises an error in case of a strong inconsistency.
+    If False, attempts a fallback in order of appearance.
     Returns
-    -------
+    ------
     tuple
-        (read_for, read_rev)
+    (read_for, read_rev)
     Notes
     -----
-    On utilise en priorité les flags 0x40 / 0x80 (is_read1 / is_read2).
+    Flags 0x40 / 0x80 (is_read1 / is_read2) are used as the primary method.
     """
     if read_a is None or read_b is None:
         raise ValueError("Pairing failure: one of the reads is None.")
@@ -178,28 +205,30 @@ def read_bam_interleaved(
     strict=True,
 ):
     """
-    Lit un BAM unique interleavé (une ligne forward/read1 suivie de la ligne reverse/read2) et envoie des batchs de paires SAM dans input_queue.
+    Reads a single interleaved BAM (a forward/read1 line followed by a reverse/read2 line) and sends batches of SAM pairs to input_queue.
+    Output contract identical to read_bam_pair:
-    Contrat de sortie identique à read_bam_pair:
-        batch = list[(sam_f, sam_r)]
+    batch = list[(sam_f, sam_r)]
-    Paramètres
+    Parameters
     ----------
-    bam_file : str
-        Chemin vers un BAM unique interleavé.
-    input_queue : multiprocessing.Queue
-    num_processes : int
-        Nombre de workers compute, pour envoyer les sentinelles None.
-    bam_threads : int
-        Threads pysam/htslib.
-    batch_size : int
-        Taille des batchs.
-    strict : bool
-        Si True, échoue si les paires ne sont pas parfaitement cohérentes.
-    Exigences
+    bam_file: str
+    Path to a single interleaved BAM.
+    input_queue: multiprocessing.Queue
+    num_processes: int
+    Number of compute workers, to send sentinels. None.
+    bam_threads: int
+    Pysam/htslib threads.
+    batch_size: int
+    Batch size.
+    strict: bool
+    If True, fails if the pairs are not perfectly matched.
+    Requirements
     ---------
-    Le BAM doit être ordonné par nom ou au minimum avoir les deux mates consécutives.
+    The BAM must be ordered by name or, at a minimum, have two consecutive pairs.
     """
     import sys
@@ -250,21 +279,25 @@ def write_bam_interleaved_from_sam(
     bam_threads=1,
 ):
     """
-    Écrit les paires BAM non splittables dans un BAM unique interleavé.
+    Writes non-splittable BAM pairs into a single interleaved BAM.
-    Contrat d'entrée:
-        queue contient des batchs list[(sam_f, sam_r)]
+    Input contract:
-    Sortie:
-        un seul BAM avec read1 puis read2 à la suite.
+    queue contains batches `list[(sam_f, sam_r)]`
-    Paramètres
+    Output:
+    a single BAM with `read1` followed by `read2`.
+    Parameters
     ----------
-    queue : multiprocessing.Queue
-    out_bam_path : str
-    header_dict : dict
-    num_procs_finished_signal : int
-    bam_threads : int
+    queue: multiprocessing.Queue
+    out_bam_path: str
+    header_dict: dict
+    num_procs_finished_signal: int
+    bam_threads: int
     """
     import sys

{split3c-0.0.1 → split3c-0.0.2}/src/split3c/nssite/fastq.py RENAMED Viewed

@@ -1,3 +1,27 @@
+"""
+This script is a the split3c project, designed to process paired-end FASTQ files by fragmenting DNA sequences at specified unmapped site. Constructs to analyse Micro-C/CAD-C data
+Copyright © 2024 Samir Bertache
+SPDX-License-Identifier: AGPL-3.0-or-later
+===============================================================================
+This program is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by the
+Free Software Foundation, either version 3 of the License, or (at your option)
+any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+See the GNU Affero General Public License for more details.
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <https://www.gnu.org/licenses/>.
+"""
 def open_output(output_forward, output_reverse, write_processes):
     """
     Open output files for writing with pigz compression.

{split3c-0.0.1 → split3c-0.0.2}/src/split3c/nssite/main.py RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-This script is a the split3c project, designed to process paired-end FASTQ files by fragmenting DNA sequences at specified restriction enzyme sites.
+This script is a the split3c project, designed to process paired-end FASTQ files by fragmenting DNA sequences at specified unmapped site. Constructs to analyse Micro-C/CAD-C data
 Copyright © 2024 Samir Bertache
@@ -150,7 +150,7 @@ def _print_banner() -> None:
             "[bold blue]Microsplit[/bold blue]\n"
             "Process paired BAM (Micro-C) into paired FASTQ.\n\n"
             "Use --help to see detailed options.",
-            title="[bold green]microsplit-cut[/bold green]",
+            title="[bold green]split3c nssite-cut[/bold green]",
             subtitle=f"Version: {__version__}",
             expand=True,
             width=100,
@@ -202,8 +202,8 @@ def main_cli(argv: Optional[list[str]] = None) -> int:
         ),
         epilog=(
             "Examples:\n"
-            " \tmicrosplit -1 fwd.bam -2 rev.bam -o1 R1.fastq.gz -o2 R2.fastq.gz -t 12 -s 20 -l 0 --pairing-mode cover \n"
-            "  \tmicrosplit -1 merged.bam --single-bam -o1 R1.fastq.gz -o2 R2.fastq.gz -t 12 -s 20 --pairing-mode all\n"
+            " \tsplit3c nssite -1 fwd.bam -2 rev.bam -o1 R1.fastq.gz -o2 R2.fastq.gz -t 12 -s 20 -l 0 --pairing-mode cover \n"
+            "  \tsplit3c nssite -1 merged.bam --single-bam -o1 R1.fastq.gz -o2 R2.fastq.gz -t 12 -s 20 --pairing-mode all\n"
         ),
         formatter_class=_formatter_class(),
     )

{split3c-0.0.1 → split3c-0.0.2}/src/split3c/nssite/processmanager.py RENAMED Viewed

@@ -1,6 +1,30 @@
-from multiprocessing import Process, Queue
+"""
+This script is a the split3c project, designed to process paired-end FASTQ files by fragmenting DNA sequences at specified unmapped site. Constructs to analyse Micro-C/CAD-C data
+Copyright © 2024 Samir Bertache
+SPDX-License-Identifier: AGPL-3.0-or-later
+===============================================================================
+This program is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by the
+Free Software Foundation, either version 3 of the License, or (at your option)
+any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+See the GNU Affero General Public License for more details.
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <https://www.gnu.org/licenses/>.
+"""
 import sys
 import traceback
+from multiprocessing import Process, Queue
 class WorkerProcess(Process):
     def __init__(self, target, args, error_queue):
@@ -15,6 +39,7 @@ class WorkerProcess(Process):
             self.error_queue.put((str(e), traceback.format_exc()))
             sys.exit(1)
 class ProcessManager:
     def __init__(self):
         self.processes = []

{split3c-0.0.1 → split3c-0.0.2}/src/split3c/nssite/split.py RENAMED Viewed

@@ -1,3 +1,26 @@
+"""
+This script is a the split3c project, designed to process paired-end FASTQ files by fragmenting DNA sequences at specified unmapped site. Constructs to analyse Micro-C/CAD-C data
+Copyright © 2024 Samir Bertache
+SPDX-License-Identifier: AGPL-3.0-or-later
+===============================================================================
+This program is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by the
+Free Software Foundation, either version 3 of the License, or (at your option)
+any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+See the GNU Affero General Public License for more details.
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <https://www.gnu.org/licenses/>.
+"""
 import logging
 import os
 import signal
@@ -256,14 +279,15 @@ def process_cigard(name, sequence, quality, cigar, seed_size, len_add):
 def read_name(base_name, tag_i, tag_j, tot_for, tot_rev, tags=None):
     """
-    Construit un header de paire à partir d'un nom de read et de deux tags.
+    Constructs a pair header from a read name and two tags.
+    `base_name`: logical name of the read (e.g., '@READ')
+    `tag_i`, `tag_j`: fragment identifiers, typically 'F1', 'R1', etc. (1-based)
+    `tot_for`, `tot_rev`: total number of forward/reverse fragments
-    base_name : nom logique du read (ex: '@READ')
-    tag_i, tag_j : identifiants de fragments, typiquement 'F1', 'R1', etc. (1-based)
-    tot_for, tot_rev : nombres totaux de fragments forward / reverse
+    Return (origin/reverse mode):
-    Retour (mode origin/o):
-        '<base_name>:[<tag_i>,<tag_j>:FT<tot_for>,RT<tot_rev>]'
+    '<base_name>:[<tag_i>,<tag_j>:FT<tot_for>,RT<tot_rev>]'
     Examples
     --------
@@ -288,7 +312,7 @@ def read_name(base_name, tag_i, tag_j, tot_for, tot_rev, tags=None):
 def _fraglist_to_entries(frag_list, origin):
     """
-    Transforme une liste de FastQ en tuples (origin, idx, seq, qual).
+    Transforms a list of FastQ into tuples (origin, idx, seq, qual).
     Examples
     --------
@@ -308,7 +332,7 @@ def _fraglist_to_entries(frag_list, origin):
 def _emit_pair(base_name, e1, e2, tot_for, tot_rev, tags=None):
     """
-    Construit une paire FASTQ textuelle à partir de deux entrées.
+    Constructs a textual FASTQ pair from two inputs.
     e = (origin, idx, seq, qual)
     """
@@ -326,15 +350,15 @@ def _emit_pair(base_name, e1, e2, tot_for, tot_rev, tags=None):
 def gen_read_pairs_from_frags_cover(base_name, frags_f, frags_r, tags=None):
     """
-    Génère un nombre minimal (ou quasi minimal) de paires pour que
-    chaque fragment apparaisse au moins une fois.
+    Generates a minimal (or near-minimal) number of pairs so that
+    each fragment appears at least once.
-    Stratégie:
-    1. appariement F-R tant que possible
-    2. appariement des restes au sein du même côté
-    3. si un fragment reste seul, on le rattache à un anchor déjà utilisé
+    Strategy:
+        1. Match F-R whenever possible
+        2. Match remainders within the same side
+        3. If a fragment remains alone, reattach it to an already used anchor
-    Complexité: O(F + R)
+    Complexity: O(F + R)
     Examples
     --------
@@ -425,14 +449,14 @@ def gen_read_pairs_from_frags_cover(base_name, frags_f, frags_r, tags=None):
 def gen_read_pairs_from_frags_all(base_name, frags_f, frags_r, tags=None):
     """
-    Génère deux chaînes FastQ (forward/reverse) à partir de fragments déjà splittés.
+    Generates two FastQ (forward/reverse) chains from already split fragments.
-    `frags_f` et `frags_r` sont des listes de fragments FASTQ complets:
+    `frags_f` et `frags_r` are lists of complete FASTQ fragments :
         '@name\\nSEQ\\n+\\nQUAL\\n'
     Examples
     --------
-    Cas simple : 1 fragment forward, 1 fragment reverse (une seule combinaison).
+    Simple case : 1 fragment forward, 1 fragment reverse (only one combinaison).
     >>> frags_f = ["@x\\nAC\\n+\\n??\\n"]
     >>> frags_r = ["@x\\nTG\\n+\\n!!\\n"]
     >>> F, R = gen_read_pairs_from_frags_all("@READ", frags_f, frags_r)
@@ -441,14 +465,14 @@ def gen_read_pairs_from_frags_all(base_name, frags_f, frags_r, tags=None):
     >>> R
     '@READ:[F1,R1:FT1,RT1]\\nTG\\n+\\n!!\\n'
-    Même cas, sans tag (nt).
+    Same one, without tag (nt).
     >>> F, R = gen_read_pairs_from_frags_all("@READ", frags_f, frags_r, tags="nt")
     >>> F
     '@READ\\nAC\\n+\\n??\\n'
     >>> R
     '@READ\\nTG\\n+\\n!!\\n'
-    Cas combinatoire :
+    Combinatorial case :
       - forward: 2 fragments (F0, F1)
       - reverse: 1 fragment (R0)
       -> combinaisons : (F0,F1), (F0,R0), (F1,R0)
@@ -470,7 +494,7 @@ def gen_read_pairs_from_frags_all(base_name, frags_f, frags_r, tags=None):
     >>> R
     '@READ:[F1,F2:FT2,RT1]\\nBCD\\n+\\n===\\n@READ:[F1,R1:FT2,RT1]\\nWXYZ\\n+\\n>>>>\\n@READ:[F2,R1:FT2,RT1]\\nWXYZ\\n+\\n>>>>\\n'
-    Cas combinatoire symétrique (2 fragments forward, 2 fragments reverse).
+    Symmetric combinatorial case (2 fragments forward, 2 fragments reverse).
     >>> frags_f = ["@x\\nA\\n+\\n!\\n", "@x\\nBC\\n+\\n!!\\n"]
     >>> frags_r = ["@x\\nD\\n+\\n#\\n", "@x\\nEF\\n+\\n##\\n"]
     >>> F, R = gen_read_pairs_from_frags_all("@READ", frags_f, frags_r)
@@ -480,17 +504,16 @@ def gen_read_pairs_from_frags_all(base_name, frags_f, frags_r, tags=None):
     """
     from itertools import combinations
-    # Cas combinatoire: on annote chaque fragment avec son origine (F/R) et un index local
     def _to_entries(frag_list, origin):
         """
-        Transforme une liste de FastQ en tuples (origin, idx, seq, qual).
+        Transforms a list of FastQ into tuples (origin, idx, seq, qual).
         frag = '@smth\\nSEQ\\n+\\nQUAL\\n'
         """
         entries = []
         for idx, frag in enumerate(frag_list):
             lines = frag.strip().split("\n")
             if len(lines) != 4 or lines[2] != "+":
-                raise ValueError("Fragment FastQ invalide dans process_cigard.")
+                raise ValueError("FastQ Fragment invalid in process_cigard.")
             seq = lines[1]
             qual = lines[3]
             entries.append((origin, idx, seq, qual))
@@ -520,7 +543,7 @@ def gen_read_pairs_from_frags(
     base_name, frags_f, frags_r, tags=None, pairing_mode="all"
 ):
     """
-    Dispatcher entre plusieurs stratégies de génération de paires.
+    Dispatch between several pair generation strategies.
     """
     if pairing_mode == "all":
         return gen_read_pairs_from_frags_all(base_name, frags_f, frags_r, tags=tags)
@@ -530,7 +553,6 @@ def gen_read_pairs_from_frags(
         raise ValueError(f"Unknown pairing_mode: {pairing_mode}")
-# Pensez à la gestion de seq = "*"
 def sam_fields(sam_line: str):
     """
     Return minimal information

split3c-0.0.2/src/split3c/resite/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .frag import process_items
+from .pretreatment import partition_threads, search_in_database
+from .read import read_fastq_gzip_simultaneously
+from .write_control import manage_pigz_problems, open_output, write_pairs

{split3c-0.0.1 → split3c-0.0.2}/src/split3c/resite/frag.py RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-This script is a the split3c project, designed to process paired-end FASTQ files by fragmenting DNA sequences at specified restriction enzyme sites.
+This script is a the split3c project, designed to process paired-end FASTQ files by fragmenting DNA sequences at specified unmapped.
 Copyright © 2024 Samir Bertache
@@ -286,7 +286,7 @@ def processing_fr(
     """
     Process the sequences to generate buffers for forward and reverse reads
     selon le mode FR (un fragment forward + un fragment reverse).
-    N'ajoute pas de suffixe :ij si une seule paire.
+    Do not add the suffix :ij if there is only one pair.
     Doctests:
     >>> seqs = ["AAAACCCCGGGG", "TTTTGGGGCCCC"]

split3c 0.0.1__tar.gz → 0.0.2__tar.gz

split3c 0.0.1tar.gz → 0.0.2tar.gz