PyPI - stratiphy - Versions diffs - 0.3.2__py3-none-any.whl - Mend

stratiphy 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

stratiphy/__init__.py +14 -0
stratiphy/_cli.py +426 -0
stratiphy/analysis/__init__.py +0 -0
stratiphy/analysis/explain/__init__.py +15 -0
stratiphy/analysis/explain/_explain.py +421 -0
stratiphy/analysis/gap.py +124 -0
stratiphy/analysis/metrics.py +60 -0
stratiphy/analysis/simulate/__init__.py +11 -0
stratiphy/analysis/simulate/_impl.py +318 -0
stratiphy/analysis/split.py +164 -0
stratiphy/bench/__init__.py +5 -0
stratiphy/bench/_bencher.py +125 -0
stratiphy/bench/_cli.py +390 -0
stratiphy/bench/_data.py +170 -0
stratiphy/bench/_io.py +49 -0
stratiphy/bench/_model.py +69 -0
stratiphy/cluster/__init__.py +18 -0
stratiphy/cluster/_base.py +41 -0
stratiphy/cluster/_sim.py +91 -0
stratiphy/cluster/_sklearn_sim.py +33 -0
stratiphy/config/__init__.py +5 -0
stratiphy/config/_workflow.py +43 -0
stratiphy/io.py +164 -0
stratiphy/model/__init__.py +9 -0
stratiphy/model/_base.py +357 -0
stratiphy/preprocessing/__init__.py +3 -0
stratiphy/preprocessing/annoqc.py +76 -0
stratiphy/preprocessing/phenopackets.py +190 -0
stratiphy/preprocessing/sanitize/__init__.py +19 -0
stratiphy/preprocessing/sanitize/_api.py +36 -0
stratiphy/preprocessing/sanitize/_convenience.py +147 -0
stratiphy/preprocessing/sanitize/_impl.py +620 -0
stratiphy/preprocessing/sanitize/_model.py +271 -0
stratiphy/preprocessing/sanitize/_test__model.py +148 -0
stratiphy/preprocessing/summarize/__init__.py +6 -0
stratiphy/preprocessing/summarize/_summarize.py +54 -0
stratiphy/preprocessing/validate/__init__.py +7 -0
stratiphy/preprocessing/validate/_base.py +149 -0
stratiphy/preprocessing/validate/_simple.py +9 -0
stratiphy/py.typed +0 -0
stratiphy/semsim/__init__.py +26 -0
stratiphy/semsim/_base.py +223 -0
stratiphy/semsim/_ic.py +58 -0
stratiphy/semsim/_pe.py +139 -0
stratiphy/semsim/_sts.py +121 -0
stratiphy/semsim/_test__base.py +52 -0
stratiphy/semsim/_test__sts.py +74 -0
stratiphy/util.py +85 -0
stratiphy/workflow/__init__.py +13 -0
stratiphy/workflow/_base.py +705 -0
stratiphy/workflow/util.py +201 -0
stratiphy/workflow/workflow_pb2.py +58 -0
stratiphy/workflow/workflow_pb2.pyi +95 -0
stratiphy-0.3.2.dist-info/METADATA +86 -0
stratiphy-0.3.2.dist-info/RECORD +59 -0
stratiphy-0.3.2.dist-info/WHEEL +5 -0
stratiphy-0.3.2.dist-info/entry_points.txt +3 -0
stratiphy-0.3.2.dist-info/licenses/LICENSE +29 -0
stratiphy-0.3.2.dist-info/top_level.txt +1 -0

stratiphy/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""
+The API documentation for the `stratiphy` Python package.
+The API documentation is targeted for the advanced users wanting to use
+`stratiphy` as a Python library. For general public, we recommend to use
+the command-line interface (CLI).
+See the [Tutorial](../../tutorial.md) and [User Guide](../../user-guide/index.md)
+for an overview of the CLI main use cases.
+"""
+from importlib.metadata import version
+__version__ = version("stratiphy")

stratiphy/_cli.py ADDED Viewed

@@ -0,0 +1,426 @@
+import argparse
+import json
+import logging
+import os
+import pathlib
+import sys
+import typing
+import hpotk
+from hpotk.util import open_text_io_handle_for_reading, open_text_io_handle_for_writing
+import stratiphy
+from stratiphy.io import StratiphyJSONDecoder
+from stratiphy.model import Sample
+from stratiphy.util import setup_logging
+PROG = "stratiphy"
+DEFAULT_DATA_PATH = "data"
+DEFAULT_HPO_PATH = "hp.json"
+DEFAULT_SAMPLES_PATH = "samples.json.gz"
+DEFAULT_RESULTS_PATH = "results.pb"
+DEFAULT_RESULTS_JSON_PATH = "results.json"
+# ################################## CLI ######################################
+logger = logging.getLogger(PROG)
+parser = argparse.ArgumentParser(
+    prog=PROG,
+    formatter_class=argparse.RawTextHelpFormatter,
+    description="Phenotype-driven stratification of patient cohorts",
+    epilog="Find more info at https://P2GX.github.io/stratiphy/stable",
+)
+parser.add_argument(
+    "-v",
+    "--verbosity",
+    action="count",
+    default=0,
+    help="increase verbosity",
+)
+parser.add_argument(
+    "--version",
+    action="version",
+    version="%(prog)s {version}".format(version=stratiphy.__version__),
+)
+# generate subparsers/subcommands
+subparsers = parser.add_subparsers(dest="command")
+# #################### ------------ `setup` ---------------- ####################
+parser_setup = subparsers.add_parser(
+    "setup",
+    description="Initialize stratiphy resources",
+    help="initialize stratiphy resources",
+    epilog="Find more info at https://P2GX.github.io/stratiphy/stable",
+)
+subparsers_setup = parser_setup.add_subparsers(dest="command_setup")
+# #################### ------------ `setup download` ------- ####################
+parser_setup_download = subparsers_setup.add_parser(
+    "download",
+    help="download the resource files",
+)
+parser_setup_download.add_argument(
+    "-d",
+    "--data",
+    type=pathlib.Path,
+    default=pathlib.Path(os.getcwd()).joinpath(DEFAULT_DATA_PATH),
+    help=f"where to download the resources (default: {DEFAULT_DATA_PATH})",
+)
+parser_setup_download.add_argument(
+    "-w",
+    "--overwrite",
+    default=False,
+    action="store_true",
+    help="overwrite previously downloaded resource files",
+)
+def setup_download(
+    data: pathlib.Path,
+    overwrite: bool,
+) -> int:
+    # Ensure the `data` directory exists
+    if not os.path.exists(data):
+        logger.debug("Creating directory at %s", data)
+        os.makedirs(data, exist_ok=True)
+    elif os.path.isfile(data):
+        logger.error("`-d | --data` must point to a directory, but %s is a file", data)
+        return 1
+    # Download HPO, if needed
+    fpath_hpo = data.joinpath(DEFAULT_HPO_PATH)
+    should_download_hpo = should_execute(
+        fpath_hpo,
+        "HPO",
+        "download",
+        overwrite,
+    )
+    if should_download_hpo:
+        url_hpo = "https://purl.obolibrary.org/obo/hp.json"
+        download_resource(
+            url_hpo,
+            str(fpath_hpo),
+            "HPO",
+        )
+    return 0
+def should_execute(
+    fpath: pathlib.Path,
+    resource_name: str,
+    action_name: str,
+    overwrite: bool,
+) -> bool:
+    if os.path.isfile(fpath):
+        if overwrite:
+            logger.info("Overwriting %s at %s", resource_name, fpath)
+            return True
+        else:
+            logger.info(
+                "Cowardly refusing to %s %s since it already exists at %s",
+                action_name,
+                resource_name,
+                fpath,
+            )
+            return False
+    else:
+        logger.info("Proceeding with the %s of %s", action_name, resource_name)
+        return True
+def download_resource(
+    url: str,
+    destination: str,
+    resource_name: str,
+):
+    logger.debug("Fetching %s from %s", resource_name, url)
+    logger.debug("Storing %s to %s", resource_name, destination)
+    with (
+        open_text_io_handle_for_reading(url) as fhin,
+        open_text_io_handle_for_writing(destination) as fhout,
+    ):
+        fhout.write(fhin.read())
+# #################### ------------ `preprocess` ----------- ####################
+parser_preprocess = subparsers.add_parser(
+    "preprocess",
+    help="prepare phenopackets for clustering",
+)
+parser_preprocess.add_argument(
+    "-d",
+    "--data",
+    type=pathlib.Path,
+    default=pathlib.Path(os.getcwd()).joinpath(DEFAULT_DATA_PATH),
+    help="path to stratify data directory",
+)
+parser_preprocess.add_argument(
+    "--controversy",
+    type=str,
+    default="small",
+    choices=("high", "moderate", "small", "none"),
+    help="try to sanitize issues with controversy less than this threshold",
+)
+parser_preprocess.add_argument(
+    "outdir",
+    type=pathlib.Path,
+    default=pathlib.Path(os.getcwd()),
+    help="folder for storing the preprocessed files",
+)
+parser_preprocess.add_argument(
+    "phenopackets",
+    nargs="+",
+    type=pathlib.Path,
+    help="phenopacket JSON files with case reports for clustering",
+)
+def preprocess(
+    data: pathlib.Path,
+    controversy: typing.Literal["high", "moderate", "small", "none"],
+    outdir: pathlib.Path,
+    phenopackets: typing.Sequence[pathlib.Path],
+) -> int:
+    import json
+    from stratiphy.io import StratiphyJSONEncoder
+    from stratiphy.preprocessing.phenopackets import read_phenopacket
+    from stratiphy.preprocessing.sanitize import Controversy, sanitize_samples
+    # Check inputs
+    fpath_hpo = data.joinpath(DEFAULT_HPO_PATH)
+    if not os.path.isfile(fpath_hpo):
+        logger.error("HPO is not present at %s", fpath_hpo.absolute())
+        return 1
+    # Try to create the output folder, including possibly non-existent parent folders
+    os.makedirs(outdir, exist_ok=True)
+    # Check the controversy threshold
+    assert controversy.lower() in ("high", "moderate", "small", "none")
+    # Read phenopackets
+    logger.info("Reading phenopackets")
+    logger.debug(
+        "Phenopacket paths: %s",
+        list(str(pp) for pp in phenopackets),
+    )
+    samples = tuple(read_phenopacket(pp) for pp in phenopackets)
+    logger.info("Read %d phenopackets", len(samples))
+    # Sanitize sample
+    logger.info("Sanitizing samples")
+    logger.debug("Loading HPO from %s", fpath_hpo.absolute())
+    hpo = hpotk.load_minimal_ontology(str(fpath_hpo.absolute()))
+    level = Controversy[controversy.upper()]
+    logger.debug("Fixing sanity issues at or below %s level of controversy", level.name.lower())
+    sanitation_result = sanitize_samples(
+        samples=samples,
+        hpo=hpo,
+        threshold=level,
+    )
+    for sample, actions in sanitation_result.get_samples_and_actions():
+        print(f"Sample: {sample.labels}")
+        for action in actions:
+            print(f" - {action}")
+    # Serialize the samples
+    logger.info("Serializing the sanitized samples")
+    fpath_cohort = os.path.abspath(os.path.join(outdir, DEFAULT_SAMPLES_PATH))
+    with open_text_io_handle_for_writing(fpath_cohort) as fh:
+        json.dump(sanitation_result.sanitized_samples, fh, cls=StratiphyJSONEncoder)
+    logger.info("Wrote the samples to %s", fpath_cohort)
+    return 0
+# #################### ------------- `compute` ------------- ####################
+parser_compute = subparsers.add_parser(
+    "compute",
+    help="execute the clustering workflow",
+)
+parser_compute.add_argument(
+    "-d",
+    "--data",
+    type=pathlib.Path,
+    default=pathlib.Path(os.getcwd()).joinpath(DEFAULT_DATA_PATH),
+    help="path to stratify data directory",
+)
+parser_compute.add_argument(
+    "--rand-iter",
+    type=int,
+    default=200,
+    help="the number of random cohorts to simulate",
+)
+parser_compute.add_argument(
+    "-k",
+    "--k-clusters",
+    nargs="+",
+    type=int,
+    default=(2, 3, 4, 5, 6),
+    help="k clusters to test",
+)
+parser_compute.add_argument(
+    "--mc-iter",
+    type=int,
+    default=1_000_000,
+    help="count of Monte-Carlo simulations for testing term-cluster association",
+)
+parser_compute.add_argument(
+    "-s",
+    "--samples",
+    metavar=DEFAULT_SAMPLES_PATH,
+    default=None,
+    help="path to JSON file with preprocessed samples",
+)
+parser_compute.add_argument(
+    "-r",
+    "--results",
+    metavar=DEFAULT_RESULTS_PATH,
+    default=None,
+    help="path to store the clustering result data",
+)
+parser_compute.add_argument(
+    "outdir",
+    type=pathlib.Path,
+    default=pathlib.Path(os.getcwd()),
+    help="folder for storing the preprocessed files",
+)
+def compute(
+    k_clusters: typing.Sequence[int],
+    n_rand_cohort: int,
+    mc_iter: int,
+    fpath_samples: typing.Optional[pathlib.Path],
+    fpath_results: typing.Optional[pathlib.Path],
+    data: pathlib.Path,
+    outdir: pathlib.Path,
+) -> int:
+    from stratiphy.config import configure_workflow
+    samples = _read_samples(fpath_samples, outdir)
+    logger.info("Read %d samples", len(samples))
+    logger.info("Configuring the clustering workflow")
+    logger.debug("%d random cohorts", n_rand_cohort)
+    fpath_hpo = data.joinpath(DEFAULT_HPO_PATH)
+    logger.debug("Using HPO at %s", fpath_hpo.absolute())
+    # Sanitize inputs.
+    _validate_is_readable_file(fpath_hpo)
+    hpo = hpotk.load_minimal_ontology(str(fpath_hpo))
+    workflow = configure_workflow(
+        hpo=hpo,
+        rand_cohorts=n_rand_cohort,
+        mc_iter=mc_iter,
+    )
+    logger.info("Executing the workflow")
+    result = workflow.run(
+        samples=samples,
+        k_clusters=k_clusters,
+    )
+    logger.debug("Serializing clustering results")
+    if fpath_results is None:
+        fpath_results = outdir.joinpath(DEFAULT_RESULTS_PATH)
+    result.to_protobuf(fpath_results)
+    logger.info("Serialized the results to %s", fpath_results.absolute())
+    return 0
+# ###############################################################################
+# Utils
+def _make_optional_path(
+    path: typing.Optional[str],
+) -> typing.Optional[pathlib.Path]:
+    return None if path is None else pathlib.Path(path)
+def _read_samples(
+    fpath_samples: typing.Optional[pathlib.Path],
+    outdir: pathlib.Path,
+) -> typing.Sequence[Sample]:
+    if fpath_samples is None:
+        fpath_samples = outdir.joinpath(DEFAULT_SAMPLES_PATH)
+    logger.debug(
+        "Reading samples from %s",
+        fpath_samples.absolute(),
+    )
+    # TODO: remove `str()` when using hpotk>=0.6.1
+    with open_text_io_handle_for_reading(fpath_samples) as fh:
+        return json.load(fh, cls=StratiphyJSONDecoder)
+def _validate_is_readable_file(fpath: typing.Union[str, pathlib.Path]):
+    if not isinstance(fpath, (str, pathlib.Path)) or not (os.path.isfile(fpath) and os.access(fpath, os.R_OK)):
+        raise ValueError(f"{fpath} is not a `str` or `pathlib.Path` pointing to a readable file")
+# ###############################################################################
+def main():
+    argv = sys.argv[1:]
+    if len(argv) == 0:
+        parser.print_help()
+        sys.exit(1)
+    args = parser.parse_args(argv)
+    setup_logging(logger, args.verbosity)
+    if args.command == "setup":
+        if args.command_setup == "download":
+            sys.exit(
+                setup_download(
+                    data=getattr(args, "data"),
+                    overwrite=getattr(args, "overwrite"),
+                ),
+            )
+        else:
+            parser_setup.print_help()
+            sys.exit(1)
+    elif args.command == "preprocess":
+        sys.exit(
+            preprocess(
+                data=getattr(args, "data"),
+                controversy=getattr(args, "controversy"),
+                outdir=getattr(args, "outdir"),
+                phenopackets=getattr(args, "phenopackets"),
+            )
+        )
+    elif args.command == "compute":
+        sys.exit(
+            compute(
+                k_clusters=getattr(args, "k_clusters"),
+                n_rand_cohort=getattr(args, "rand_iter"),
+                mc_iter=getattr(args, "mc_iter"),
+                fpath_samples=_make_optional_path(getattr(args, "samples")),
+                fpath_results=_make_optional_path(getattr(args, "results")),
+                data=getattr(args, "data"),
+                outdir=getattr(args, "outdir"),
+            )
+        )
+    else:
+        parser.print_help()
+        sys.exit(1)

stratiphy/analysis/__init__.py ADDED Viewed

File without changes

stratiphy/analysis/explain/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+from ._explain import (
+    FisherExplainMethod,
+    TermAssociation,
+    TermCounter,
+    TermFilter,
+    TermTest,
+)
+__all__ = [
+    "FisherExplainMethod",
+    "TermAssociation",
+    "TermCounter",
+    "TermFilter",
+    "TermTest",
+]