PyPI - tpcav - Versions diffs - 0.1.0__tar.gz → 0.2.1__tar.gz - Mend

tpcav 0.1.0tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

tpcav-0.2.1/PKG-INFO +91 -0
tpcav-0.2.1/README.md +66 -0
{tpcav-0.1.0 → tpcav-0.2.1}/pyproject.toml +1 -1
{tpcav-0.1.0 → tpcav-0.2.1}/test/test_cav_trainer.py +65 -8
{tpcav-0.1.0 → tpcav-0.2.1}/tpcav/__init__.py +1 -1
{tpcav-0.1.0 → tpcav-0.2.1}/tpcav/cavs.py +190 -3
{tpcav-0.1.0 → tpcav-0.2.1}/tpcav/concepts.py +129 -116
tpcav-0.2.1/tpcav/helper.py +302 -0
{tpcav-0.1.0 → tpcav-0.2.1}/tpcav/tpcav_model.py +26 -21
{tpcav-0.1.0 → tpcav-0.2.1}/tpcav/utils.py +93 -0
tpcav-0.2.1/tpcav.egg-info/PKG-INFO +91 -0
{tpcav-0.1.0 → tpcav-0.2.1}/tpcav.egg-info/top_level.txt +1 -0
tpcav-0.1.0/PKG-INFO +0 -89
tpcav-0.1.0/README.md +0 -64
tpcav-0.1.0/tpcav/helper.py +0 -165
tpcav-0.1.0/tpcav.egg-info/PKG-INFO +0 -89
{tpcav-0.1.0 → tpcav-0.2.1}/LICENSE +0 -0
{tpcav-0.1.0 → tpcav-0.2.1}/setup.cfg +0 -0
{tpcav-0.1.0 → tpcav-0.2.1}/tpcav/logging_utils.py +0 -0
{tpcav-0.1.0 → tpcav-0.2.1}/tpcav.egg-info/SOURCES.txt +0 -0
{tpcav-0.1.0 → tpcav-0.2.1}/tpcav.egg-info/dependency_links.txt +0 -0
{tpcav-0.1.0 → tpcav-0.2.1}/tpcav.egg-info/requires.txt +0 -0

tpcav-0.2.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,91 @@
+Metadata-Version: 2.4
+Name: tpcav
+Version: 0.2.1
+Summary: Testing with PCA projected Concept Activation Vectors
+Author-email: Jianyu Yang <yztxwd@gmail.com>
+License-Expression: MIT AND (Apache-2.0 OR BSD-2-Clause)
+Project-URL: Homepage, https://github.com/seqcode/TPCAV
+Keywords: interpretation,attribution,concept,genomics,deep learning
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: torch
+Requires-Dist: pandas
+Requires-Dist: numpy
+Requires-Dist: seqchromloader
+Requires-Dist: deeplift
+Requires-Dist: pyfaidx
+Requires-Dist: pybedtools
+Requires-Dist: captum
+Requires-Dist: scikit-learn
+Requires-Dist: biopython
+Requires-Dist: seaborn
+Requires-Dist: matplotlib
+Dynamic: license-file
+# TPCAV (Testing with PCA projected Concept Activation Vectors)
+This repository contains code to compute TPCAV (Testing with PCA projected Concept Activation Vectors) on deep learning models. TPCAV is an extension of the original TCAV method, which uses PCA to reduce the dimensionality of the activations at a selected intermediate layer before computing Concept Activation Vectors (CAVs) to improve the consistency of the results.
+## Installation
+`pip install tpcav`
+## Quick start
+> `tpcav` only works with Pytorch model, if your model is built using other libraries, you should port the model into Pytorch first. For Tensorflow models, you can use [tf2onnx](https://github.com/onnx/tensorflow-onnx) and [onnx2pytorch](https://github.com/Talmaj/onnx2pytorch) for the conversion.
+```python
+import torch
+from tpcav import run_tpcav
+class DummyModelSeq(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.layer1 = torch.nn.Linear(1024, 1)
+        self.layer2 = torch.nn.Linear(4, 1)
+    def forward(self, seq):
+        y_hat = self.layer1(seq)
+        y_hat = y_hat.squeeze(-1)
+        y_hat = self.layer2(y_hat)
+        return y_hat
+# transformation function to obtain one-hot encoded sequences
+def transform_fasta_to_one_hot_seq(seq, chrom):
+    # `seq` is a list of fasta sequences
+    # `chrom` is a numpy array of bigwig signals of shape [-1, # bigwigs, len]
+    return (helper.fasta_to_one_hot_sequences(seq),) # it has to return a tuple of inputs, even if there is only one input
+motif_path = "data/motif-clustering-v2.1beta_consensus_pwms.test.meme"
+bed_seq_concept = "data/hg38_rmsk.head500k.bed"
+genome_fasta = "data/hg38.analysisSet.fa"
+model = DummyModelSeq() # load the model
+layer_name = "layer1"   # name of the layer to be interpreted
+# concept_fscores_dataframe: fscores of each concept
+# motif_cav_trainers: each trainer contains the cav weights of motifs inserted different number of times
+# bed_cav_trainer: trainer contains the cav weights of the sequence concepts provided in bed file
+concept_fscores_dataframe, motif_cav_trainers, bed_cav_trainer = run_tpcav(
+    model=model,
+    layer_name=layer_name,
+    meme_motif_file=motif_path,
+    genome_fasta=genome_fasta,
+    num_motif_insertions=[4, 8],
+    bed_seq_file=bed_seq_concept,
+    output_dir="test_run_tpcav_output/",
+    input_transform_func=transform_fasta_to_one_hot_seq
+)
+# check each trainer for detailed weights
+print(bed_cav_trainer.cav_weights)
+```
+## Detailed Usage
+For detailed usage, please refer to this [jupyter notebook](https://github.com/seqcode/TPCAV/tree/main/examples/tpcav_detailed_usage.ipynb)
+If you find any issue, feel free to open an issue (strongly suggested) or contact [Jianyu Yang](mailto:jmy5455@psu.edu).

tpcav-0.2.1/README.md ADDED Viewed

@@ -0,0 +1,66 @@
+# TPCAV (Testing with PCA projected Concept Activation Vectors)
+This repository contains code to compute TPCAV (Testing with PCA projected Concept Activation Vectors) on deep learning models. TPCAV is an extension of the original TCAV method, which uses PCA to reduce the dimensionality of the activations at a selected intermediate layer before computing Concept Activation Vectors (CAVs) to improve the consistency of the results.
+## Installation
+`pip install tpcav`
+## Quick start
+> `tpcav` only works with Pytorch model, if your model is built using other libraries, you should port the model into Pytorch first. For Tensorflow models, you can use [tf2onnx](https://github.com/onnx/tensorflow-onnx) and [onnx2pytorch](https://github.com/Talmaj/onnx2pytorch) for the conversion.
+```python
+import torch
+from tpcav import run_tpcav
+class DummyModelSeq(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.layer1 = torch.nn.Linear(1024, 1)
+        self.layer2 = torch.nn.Linear(4, 1)
+    def forward(self, seq):
+        y_hat = self.layer1(seq)
+        y_hat = y_hat.squeeze(-1)
+        y_hat = self.layer2(y_hat)
+        return y_hat
+# transformation function to obtain one-hot encoded sequences
+def transform_fasta_to_one_hot_seq(seq, chrom):
+    # `seq` is a list of fasta sequences
+    # `chrom` is a numpy array of bigwig signals of shape [-1, # bigwigs, len]
+    return (helper.fasta_to_one_hot_sequences(seq),) # it has to return a tuple of inputs, even if there is only one input
+motif_path = "data/motif-clustering-v2.1beta_consensus_pwms.test.meme"
+bed_seq_concept = "data/hg38_rmsk.head500k.bed"
+genome_fasta = "data/hg38.analysisSet.fa"
+model = DummyModelSeq() # load the model
+layer_name = "layer1"   # name of the layer to be interpreted
+# concept_fscores_dataframe: fscores of each concept
+# motif_cav_trainers: each trainer contains the cav weights of motifs inserted different number of times
+# bed_cav_trainer: trainer contains the cav weights of the sequence concepts provided in bed file
+concept_fscores_dataframe, motif_cav_trainers, bed_cav_trainer = run_tpcav(
+    model=model,
+    layer_name=layer_name,
+    meme_motif_file=motif_path,
+    genome_fasta=genome_fasta,
+    num_motif_insertions=[4, 8],
+    bed_seq_file=bed_seq_concept,
+    output_dir="test_run_tpcav_output/",
+    input_transform_func=transform_fasta_to_one_hot_seq
+)
+# check each trainer for detailed weights
+print(bed_cav_trainer.cav_weights)
+```
+## Detailed Usage
+For detailed usage, please refer to this [jupyter notebook](https://github.com/seqcode/TPCAV/tree/main/examples/tpcav_detailed_usage.ipynb)
+If you find any issue, feel free to open an issue (strongly suggested) or contact [Jianyu Yang](mailto:jmy5455@psu.edu).

{tpcav-0.1.0 → tpcav-0.2.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "tpcav"
-version = "0.1.0"
+version = "0.2.1"
 description = "Testing with PCA projected Concept Activation Vectors"
 authors = [{name = "Jianyu Yang", email = "yztxwd@gmail.com"},]
 readme = "README.md"

{tpcav-0.1.0 → tpcav-0.2.1}/test/test_cav_trainer.py RENAMED Viewed

@@ -6,7 +6,7 @@ import torch
 from Bio import motifs as Bio_motifs
 from captum.attr import DeepLift
-from tpcav import helper
+from tpcav import helper, run_tpcav
 from tpcav.cavs import CavTrainer
 from tpcav.concepts import ConceptBuilder
 from tpcav.tpcav_model import TPCAV, _abs_attribution_func
@@ -47,15 +47,14 @@ def transform_fasta_to_one_hot_seq(seq, chrom):
     return (helper.fasta_to_one_hot_sequences(seq),)
-class CavTrainerIntegrationTest(unittest.TestCase):
+class TPCAVTest(unittest.TestCase):
-    def test_motif_concepts(self):
+    def test_motif_concepts_insertion(self):
         motif_path = Path("data") / "motif-clustering-v2.1beta_consensus_pwms.test.meme"
         self.assertTrue(motif_path.exists(), "Motif file is missing")
         builder = ConceptBuilder(
             genome_fasta="data/hg38.analysisSet.fa",
-            genome_size_file="data/hg38.analysisSet.fa.fai",
             input_window_length=1024,
             bws=None,
             num_motifs=16,
@@ -102,6 +101,62 @@ class CavTrainerIntegrationTest(unittest.TestCase):
                 f"Control concept has more motif matches than Motif concept, motif concept: {len(matches)}, control concept: {len(control_matches)}",
             )
+    def test_run_tpcav(self):
+        motif_path = Path("data") / "motif-clustering-v2.1beta_consensus_pwms.test.meme"
+        genome_fasta = "data/hg38.analysisSet.fa"
+        model = DummyModelSeq()
+        layer_name = "layer1"
+        cavs_fscores_df, motif_cav_trainers, bed_cav_trainer = run_tpcav(
+            model=model,
+            layer_name=layer_name,
+            meme_motif_file=str(motif_path),
+            genome_fasta=genome_fasta,
+            num_motif_insertions=[4, 8],
+            bed_seq_file="data/hg38_rmsk.head50k.bed",
+            output_dir="data/test_run_tpcav_output/",
+        )
+    def test_write_bw(self):
+        random_regions_1 = helper.random_regions_dataframe(
+            "data/hg38.analysisSet.fa.fai", 1024, 100, seed=1
+        )
+        helper.write_attrs_to_bw(torch.rand((100, 1024)).numpy(),
+                                 random_regions_1.apply(lambda x: f"{x.chrom}:{x.start}-{x.end}", axis=1).tolist(),
+                                 "data/hg38.analysisSet.fa.fai", "data/test_run_tpcav_output/input_attrs.bw")
+    def test_motif_concepts_against_permute_control(self):
+        motif_path = Path("data") / "motif-clustering-v2.1beta_consensus_pwms.test.meme"
+        self.assertTrue(motif_path.exists(), "Motif file is missing")
+        builder = ConceptBuilder(
+            genome_fasta="data/hg38.analysisSet.fa",
+            input_window_length=1024,
+            bws=None,
+            num_motifs=16,
+            include_reverse_complement=True,
+            min_samples=1000,
+            batch_size=8,
+        )
+        builder.build_control()
+        concepts_pairs = builder.add_meme_motif_concepts(str(motif_path), build_permute_control=True)
+        builder.apply_transform(transform_fasta_to_one_hot_seq)
+        tpcav_model = TPCAV(DummyModelSeq(), layer_name="layer1")
+        tpcav_model.fit_pca(
+            concepts=builder.all_concepts(),
+            num_samples_per_concept=10,
+            num_pc="full",
+        )
+        cav_trainer = CavTrainer(tpcav_model)
+        for motif_concept, permute_concept in concepts_pairs:
+            cav_trainer.set_control(permute_concept, 200)
+            cav_trainer.train_concepts([motif_concept,], 200, output_dir="data/cavs_permute/", num_processes=2)
     def test_all(self):
         motif_path = Path("data") / "motif-clustering-v2.1beta_consensus_pwms.test.meme"
@@ -109,7 +164,6 @@ class CavTrainerIntegrationTest(unittest.TestCase):
         builder = ConceptBuilder(
             genome_fasta="data/hg38.analysisSet.fa",
-            genome_size_file="data/hg38.analysisSet.fa.fai",
             input_window_length=1024,
             bws=None,
             num_motifs=12,
@@ -166,7 +220,7 @@ class CavTrainerIntegrationTest(unittest.TestCase):
         attributions = tpcav_model.layer_attributions(
             pack_data_iters(random_regions_1), pack_data_iters(random_regions_2)
-        )["attributions"]
+        )["attributions"].cpu()
         cav_trainer.tpcav_score("AC0001:GATA-PROP:GATA", attributions)
@@ -220,9 +274,12 @@ class CavTrainerIntegrationTest(unittest.TestCase):
             custom_attribution_func=_abs_attribution_func,
         )
         attr_residual, attr_projected = attributions_old
-        attributions_old = torch.cat((attr_projected, attr_residual), dim=1)
+        attributions_old = torch.cat((attr_projected, attr_residual), dim=1).cpu()
-        self.assertTrue(torch.allclose(attributions.cpu(), attributions_old.cpu()))
+        self.assertTrue(
+            torch.allclose(attributions.cpu(), attributions_old.cpu(), atol=1e-6),
+            f"Attributions do not match, max difference is {torch.abs(attributions - attributions_old).max()}",
+        )
 if __name__ == "__main__":

{tpcav-0.1.0 → tpcav-0.2.1}/tpcav/__init__.py RENAMED Viewed

@@ -10,7 +10,7 @@ import logging
 # Set the logging level to INFO
 logging.basicConfig(level=logging.INFO)
-from .cavs import CavTrainer
+from .cavs import CavTrainer, run_tpcav
 from .concepts import ConceptBuilder
 from .helper import (
     bed_to_chrom_tracks_iter,

{tpcav-0.1.0 → tpcav-0.2.1}/tpcav/cavs.py RENAMED Viewed

@@ -5,11 +5,15 @@ CAV training and attribution utilities built on TPCAV.
 import logging
 import multiprocessing
+from collections import defaultdict
+import os
 from pathlib import Path
-from typing import Iterable, List, Optional, Tuple
+from typing import Iterable, List, Optional, Tuple, Dict
+from Bio import motifs
 import matplotlib.pyplot as plt
 import numpy as np
+import pandas as pd
 import seaborn as sns
 import torch
 from sklearn.linear_model import SGDClassifier
@@ -17,8 +21,11 @@ from sklearn.metrics import precision_recall_fscore_support
 from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.model_selection import GridSearchCV
 from torch.utils.data import DataLoader, TensorDataset, random_split
+from sklearn.linear_model import LinearRegression
-from tpcav.tpcav_model import TPCAV
+from . import helper, utils
+from .concepts import ConceptBuilder
+from .tpcav_model import TPCAV
 logger = logging.getLogger(__name__)
@@ -246,6 +253,16 @@ class CavTrainer:
         return scores
+    def tpcav_score_all_concepts(self, attributions: torch.Tensor) -> dict:
+        """
+        Compute TCAV scores for all trained concepts.
+        """
+        scores_dict = {}
+        for concept_name in self.cav_weights.keys():
+            scores = self.tpcav_score(concept_name, attributions)
+            scores_dict[concept_name] = scores
+        return scores_dict
     def tpcav_score_binary_log_ratio(
         self, concept_name: str, attributions: torch.Tensor, pseudocount: float = 1.0
     ) -> float:
@@ -259,6 +276,20 @@ class CavTrainer:
         return np.log((pos_count + pseudocount) / (neg_count + pseudocount))
+    def tpcav_score_all_concepts_log_ratio(
+        self, attributions: torch.Tensor, pseudocount: float = 1.0
+    ) -> dict:
+        """
+        Compute TCAV log ratio scores for all trained concepts.
+        """
+        log_ratio_dict = {}
+        for concept_name in self.cav_weights.keys():
+            log_ratio = self.tpcav_score_binary_log_ratio(
+                concept_name, attributions, pseudocount
+            )
+            log_ratio_dict[concept_name] = log_ratio
+        return log_ratio_dict
     def plot_cavs_similaritiy_heatmap(
         self,
         attributions: torch.Tensor,
@@ -274,7 +305,7 @@ class CavTrainer:
         cavs_names_pass = []
         for cname in cavs_names:
             if self.cavs_fscores[cname] >= fscore_thresh:
-                cavs_pass.append(self.cav_weights[cname])
+                cavs_pass.append(self.cav_weights[cname].cpu().numpy())
                 cavs_names_pass.append(cname)
             else:
                 logger.info(
@@ -332,3 +363,159 @@ class CavTrainer:
         ax_log.set_title("TCAV log ratio")
         plt.savefig(output_path, dpi=300, bbox_inches="tight")
+def load_motifs_from_meme(motif_meme_file):
+    return {utils.clean_motif_name(m.name): m for m in motifs.parse(open(motif_meme_file), fmt="MINIMAL")}
+def compute_motif_auc_fscore(num_motif_insertions: List[int], cav_trainers: List[CavTrainer], meme_motif_file: str | None = None):
+    cavs_fscores_df = pd.DataFrame({nm: cav_trainer.cavs_fscores for nm, cav_trainer in zip(num_motif_insertions, cav_trainers)})
+    cavs_fscores_df['concept'] = list(cav_trainers[0].cavs_fscores.keys())
+    def compute_auc_fscore(row):
+        y = [row[nm] for nm in num_motif_insertions]
+        return np.trapz(y, num_motif_insertions) / (
+            num_motif_insertions[-1] - num_motif_insertions[0]
+        )
+    cavs_fscores_df["AUC_fscores"] = cavs_fscores_df.apply(compute_auc_fscore, axis=1)
+    # if motif instances are provided, fit linear regression curve to remove the dependency of f-scores on information content and motif lengthj
+    if meme_motif_file is not None:
+        motifs_dict = load_motifs_from_meme(meme_motif_file)
+        cavs_fscores_df['information_content'] = cavs_fscores_df.apply(lambda x: motifs_dict[x['concept']].relative_entropy.sum(), axis=1)
+        cavs_fscores_df['motif_len'] = cavs_fscores_df.apply(lambda x: len(motifs_dict[x['concept']].consensus), axis=1)
+        model = LinearRegression()
+        model.fit(cavs_fscores_df[['information_content', 'motif_len']].to_numpy(), cavs_fscores_df['AUC_fscores'].to_numpy()[:, np.newaxis])
+        y_pred = model.predict(cavs_fscores_df[['information_content', 'motif_len']].to_numpy())
+        residuals = cavs_fscores_df['AUC_fscores'].to_numpy() - y_pred.flatten()
+        cavs_fscores_df['AUC_fscores_residual'] = residuals
+        cavs_fscores_df.sort_values("AUC_fscores_residual", ascending=False, inplace=True)
+    else:
+        cavs_fscores_df.sort_values("AUC_fscores", ascending=False, inplace=True)
+    return cavs_fscores_df
+def run_tpcav(
+    model,
+    layer_name: str,
+    meme_motif_file: str,
+    genome_fasta: str,
+    num_motif_insertions: List[int] = [4, 8, 16],
+    bed_seq_file: Optional[str] = None,
+    bed_chrom_file: Optional[str] = None,
+    output_dir: str = "tpcav/",
+    num_samples_for_pca=10,
+    num_samples_for_cav=1000,
+    input_window_length=1024,
+    batch_size=8,
+    bws=None,
+    input_transform_func=helper.fasta_chrom_to_one_hot_seq,
+    p=4
+):
+    """
+    One-stop function to compute CAVs on motif concepts and bed concepts, compute AUC of motif concept f-scores after correction
+    """
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    output_path = Path(output_dir)
+    # create concept builder to generate concepts
+    ## motif concepts
+    motif_concepts_pairs = {}
+    motif_concept_builders = []
+    num_motif_insertions.sort()
+    for nm in num_motif_insertions:
+        builder = ConceptBuilder(
+            genome_fasta=genome_fasta,
+            input_window_length=input_window_length,
+            bws=bws,
+            num_motifs=nm,
+            include_reverse_complement=True,
+            min_samples=num_samples_for_cav,
+            batch_size=batch_size,
+        )
+        # use random regions as control
+        builder.build_control()
+        # use meme motif PWMs to build motif concepts, one concept per motif
+        concepts_pairs = builder.add_meme_motif_concepts(str(meme_motif_file))
+        # apply transform to convert fasta sequences to one-hot encoded sequences
+        builder.apply_transform(input_transform_func)
+        motif_concepts_pairs[nm] = concepts_pairs
+        motif_concept_builders.append(builder)
+    ## bed concepts (optional)
+    if bed_seq_file is not None or bed_chrom_file is not None:
+        bed_builder = ConceptBuilder(
+            genome_fasta=genome_fasta,
+            input_window_length=input_window_length,
+            bws=bws,
+            num_motifs=0,
+            include_reverse_complement=True,
+            min_samples=num_samples_for_cav,
+            batch_size=batch_size,
+        )
+        # use random regions as control
+        bed_builder.build_control()
+        if bed_seq_file is not None:
+            # build concepts from fasta sequences in bed file
+            bed_builder.add_bed_sequence_concepts(bed_seq_file)
+        if bed_chrom_file is not None:
+            # build concepts from chromatin tracks in bed file
+            bed_builder.add_bed_chrom_concepts(bed_chrom_file)
+        # apply transform to convert fasta sequences to one-hot encoded sequences
+        bed_builder.apply_transform(input_transform_func)
+    else:
+        bed_builder = None
+    # create TPCAV model on top of the given model
+    tpcav_model = TPCAV(model, layer_name=layer_name)
+    # fit PCA on sampled all concept activations of the last builder (should have the most motifs)
+    tpcav_model.fit_pca(
+        concepts=motif_concept_builders[-1].all_concepts() + bed_builder.concepts if  bed_builder is not None else motif_concept_builders[-1].all_concepts(),
+        num_samples_per_concept=num_samples_for_pca,
+        num_pc="full",
+    )
+    #torch.save(tpcav_model, output_path / "tpcav_model.pt")
+    # create trainer for computing CAVs
+    motif_cav_trainers = {}
+    for nm in num_motif_insertions:
+        cav_trainer = CavTrainer(tpcav_model, penalty="l2")
+        for motif_concept, permuted_concept in motif_concepts_pairs[nm]:
+            # set control concept for CAV training
+            cav_trainer.set_control(
+                permuted_concept, num_samples=num_samples_for_cav
+            )
+            # train CAVs for all concepts
+            cav_trainer.train_concepts(
+                [motif_concept,],
+                num_samples_for_cav,
+                output_dir=str(output_path / f"cavs_{nm}_motifs/"),
+                num_processes=p,
+            )
+        motif_cav_trainers[nm] = cav_trainer
+    if bed_builder is not None:
+        bed_cav_trainer = CavTrainer(tpcav_model, penalty="l2")
+        bed_cav_trainer.set_control(
+            bed_builder.control_concepts[0], num_samples=num_samples_for_cav
+        )
+        bed_cav_trainer.train_concepts(
+            bed_builder.concepts,
+            num_samples_for_cav,
+            output_dir=str(output_path / f"cavs_bed_concepts/"),
+            num_processes=p,
+        )
+    else:
+        bed_cav_trainer = None
+    if len(num_motif_insertions) > 1:
+        cavs_fscores_df = compute_motif_auc_fscore(num_motif_insertions, list(motif_cav_trainers.values()), meme_motif_file=meme_motif_file)
+    else:
+        cavs_fscores_df = None
+    return cavs_fscores_df, motif_cav_trainers, bed_cav_trainer

tpcav 0.1.0__tar.gz → 0.2.1__tar.gz

tpcav 0.1.0tar.gz → 0.2.1tar.gz