PyPI - traffic-taffy - Versions diffs - 0.8.5__py3-none-any.whl → 0.9__py3-none-any.whl - Mend

traffic-taffy 0.8.5py3-none-any.whl → 0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

traffic_taffy/__init__.py +1 -1
traffic_taffy/algorithms/__init__.py +14 -7
traffic_taffy/algorithms/comparecorrelation.py +164 -0
traffic_taffy/algorithms/comparecorrelationchanges.py +210 -0
traffic_taffy/algorithms/compareseries.py +117 -0
traffic_taffy/algorithms/compareslices.py +116 -0
traffic_taffy/algorithms/statistical.py +9 -9
traffic_taffy/compare.py +149 -159
traffic_taffy/comparison.py +18 -4
traffic_taffy/config.py +133 -0
traffic_taffy/dissection.py +78 -6
traffic_taffy/dissectmany.py +26 -16
traffic_taffy/dissector.py +189 -77
traffic_taffy/dissector_engine/scapy.py +41 -8
traffic_taffy/graph.py +54 -53
traffic_taffy/graphdata.py +13 -2
traffic_taffy/hooks/ip2asn.py +20 -7
traffic_taffy/hooks/labels.py +45 -0
traffic_taffy/hooks/psl.py +21 -3
traffic_taffy/output/__init__.py +8 -48
traffic_taffy/output/console.py +37 -25
traffic_taffy/output/fsdb.py +24 -18
traffic_taffy/reports/__init__.py +5 -0
traffic_taffy/reports/compareslicesreport.py +85 -0
traffic_taffy/reports/correlationchangereport.py +54 -0
traffic_taffy/reports/correlationreport.py +42 -0
traffic_taffy/taffy_config.py +44 -0
traffic_taffy/tests/test_compare_results.py +22 -7
traffic_taffy/tests/test_config.py +149 -0
traffic_taffy/tests/test_global_config.py +33 -0
traffic_taffy/tests/test_normalize.py +1 -0
traffic_taffy/tests/test_pcap_dissector.py +12 -2
traffic_taffy/tests/test_pcap_splitter.py +21 -10
traffic_taffy/tools/cache_info.py +3 -2
traffic_taffy/tools/compare.py +32 -24
traffic_taffy/tools/config.py +83 -0
traffic_taffy/tools/dissect.py +51 -59
traffic_taffy/tools/explore.py +5 -4
traffic_taffy/tools/export.py +28 -17
traffic_taffy/tools/graph.py +25 -27
{traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.dist-info}/METADATA +4 -1
traffic_taffy-0.9.dist-info/RECORD +56 -0
{traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.dist-info}/entry_points.txt +1 -0
traffic_taffy/report.py +0 -12
traffic_taffy-0.8.5.dist-info/RECORD +0 -43
{traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.dist-info}/WHEEL +0 -0
{traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.dist-info}/licenses/LICENSE.txt +0 -0

traffic_taffy/compare.py CHANGED Viewed

@@ -1,61 +1,105 @@
 """The primary statistical packet comparison engine."""
 from __future__ import annotations
-from logging import debug, error
-from typing import List, TYPE_CHECKING
-from datetime import datetime
-import datetime as dt
-import itertools
+from typing import List, TYPE_CHECKING, Any
+from logging import error
 if TYPE_CHECKING:
-    from argparse import ArgumentParser, Namespace
+    from traffic_taffy.dissection import Dissection
+    from traffic_taffy.comparison import Comparison
+    from argparse_with_config import ArgumentParserWithConfig
-from traffic_taffy.comparison import Comparison
 from traffic_taffy.dissectmany import PCAPDissectMany
-from traffic_taffy.dissector import PCAPDissectorLevel
-from traffic_taffy.dissection import Dissection
 from traffic_taffy.algorithms.statistical import ComparisonStatistical
+from traffic_taffy.algorithms.comparecorrelation import CompareCorrelation
+from traffic_taffy.algorithms.comparecorrelationchanges import CompareCorrelationChanges
+from traffic_taffy.taffy_config import TaffyConfig, taffy_default
+from traffic_taffy.dissector import TTD_CFG, TTL_CFG
+class TTC_CFG:
+    KEY_COMPARE: str = "compare"
+    ONLY_POSITIVE: str = "only_positive"
+    ONLY_NEGATIVE: str = "only_negative"
+    PRINT_THRESHOLD: str = "print_threshold"
+    TOP_RECORDS: str = "top_records"
+    REVERSE_SORT: str = "reverse_sort"
+    SORT_BY: str = "sort_by"
+    ALGORITHM: str = "algorithm"
+def compare_default(name: str, value: Any) -> None:
+    taffy_default(TTC_CFG.KEY_COMPARE + "." + name, value)
+compare_default(TTC_CFG.ONLY_POSITIVE, False)
+compare_default(TTC_CFG.ONLY_NEGATIVE, False)
+compare_default(TTC_CFG.PRINT_THRESHOLD, 0.0)
+compare_default(TTC_CFG.TOP_RECORDS, None)
+compare_default(TTC_CFG.REVERSE_SORT, False)
+compare_default(TTC_CFG.SORT_BY, "delta%")
+compare_default(TTC_CFG.ALGORITHM, "statistical")
+compare_default(TTC_CFG.PRINT_THRESHOLD, 0.0)
 class PcapCompare:
     """Take a set of PCAPs to then perform various comparisons upon."""
-    REPORT_VERSION: int = 2
     def __init__(
         self,
         pcap_files: List[str],
-        maximum_count: int = 0,  # where 0 == all
-        deep: bool = True,
-        pcap_filter: str | None = None,
-        cache_results: bool = False,
-        cache_file_suffix: str = "taffy",
-        bin_size: int | None = None,
-        dissection_level: PCAPDissectorLevel = PCAPDissectorLevel.COUNT_ONLY,
-        between_times: List[int] | None = None,
-        ignore_list: List[str] | None = None,
-        layers: List[str] | None = None,
-        force_load: bool = False,
-        force_overwrite: bool = False,
-        merge_files: bool = False,
+        config: TaffyConfig | None = None,
     ) -> None:
         """Create a compare object."""
-        self.pcap_files = pcap_files
-        self.deep = deep
-        self.maximum_count = maximum_count
-        self.pcap_filter = pcap_filter
-        self.cache_results = cache_results
-        self.dissection_level = dissection_level
-        self.between_times = between_times
-        self.bin_size = bin_size
-        self.cache_file_suffix = cache_file_suffix
-        self.ignore_list = ignore_list or []
-        self.layers = layers
-        self.force_overwrite = force_overwrite
-        self.force_load = force_load
-        self.merge_files = merge_files
-        self.algorithm = ComparisonStatistical()
+        self.config = config
+        self._pcap_files = pcap_files
+        if not self.config:
+            config = TaffyConfig()
+        dissector_config = config[TTD_CFG.KEY_DISSECTOR]
+        self.maximum_count = dissector_config[TTD_CFG.PACKET_COUNT]
+        self.pcap_filter = dissector_config[TTD_CFG.CACHE_PCAP_RESULTS]
+        self.dissection_level = dissector_config[TTD_CFG.DISSECTION_LEVEL]
+        # self.between_times = config[TTC_CFG.BETWEEN_TIMES]
+        self.bin_size = dissector_config[TTD_CFG.BIN_SIZE]
+        self.cache_file_suffix = dissector_config[TTD_CFG.CACHE_FILE_SUFFIX]
+        if self.cache_file_suffix[0] != ".":
+            self.cache_file_suffix = "." + self.cache_file_suffix
+        self.ignore_list = dissector_config[TTD_CFG.IGNORE_LIST]
+        self.layers = dissector_config[TTD_CFG.LAYERS]
+        self.force_overwrite = dissector_config[TTD_CFG.FORCE_OVERWRITE]
+        self.force_load = dissector_config[TTD_CFG.FORCE_LOAD]
+        self.filter_arguments = dissector_config[TTD_CFG.FILTER_ARGUMENTS]
+        self.merge_files = dissector_config[TTD_CFG.MERGE]
+        compare_config = config[TTC_CFG.KEY_COMPARE]
+        algorithm = compare_config[TTC_CFG.ALGORITHM]
+        algorithm_arguments = {
+            "timestamps": None,
+            "match_string": self.filter_arguments["match_string"],
+            "match_value": self.filter_arguments["match_value"],
+            "minimum_count": self.filter_arguments["minimum_count"],
+            "make_printable": True,
+            "match_expression": self.filter_arguments["match_expression"],
+        }
+        if algorithm == "statistical":
+            self.algorithm = ComparisonStatistical(
+                **algorithm_arguments,
+            )
+        elif algorithm == "correlation":
+            self.algorithm = CompareCorrelation(
+                **algorithm_arguments,
+            )
+        elif algorithm == "correlationchanges":
+            self.algorithm = CompareCorrelationChanges(
+                **algorithm_arguments,
+            )
+        else:
+            error(f"unknown algorithm: {algorithm}")
+            raise ValueError()
     @property
     def pcap_files(self) -> List[str]:
@@ -75,145 +119,76 @@ class PcapCompare:
     def reports(self, newvalue: List[dict]) -> None:
         self._reports = newvalue
-    def load_pcaps(self) -> None:
+    def load_pcaps(self, config: TaffyConfig) -> None:
         """Load all pcaps into memory and dissect them."""
-        # load the first as a reference pcap
+        # load the first as a reference pap
         pdm = PCAPDissectMany(
             self.pcap_files,
-            bin_size=self.bin_size,
-            maximum_count=self.maximum_count,
-            pcap_filter=self.pcap_filter,
-            cache_results=self.cache_results,
-            cache_file_suffix=self.cache_file_suffix,
-            dissector_level=self.dissection_level,
-            ignore_list=self.ignore_list,
-            layers=self.layers,
-            force_load=self.force_load,
-            force_overwrite=self.force_overwrite,
-            merge_files=self.merge_files,
+            config,
         )
         return pdm.load_all()
     def compare(self) -> List[Comparison]:
         """Compare each pcap as requested."""
-        dissections = self.load_pcaps()
+        dissections = self.load_pcaps(self.config)
         self.compare_all(dissections)
         return self.reports
     def compare_all(self, dissections: List[Dissection]) -> List[Comparison]:
         """Compare all loaded pcaps."""
-        reports = []
-        # hack to figure out if there is at least two instances of a generator
-        # without actually extracting them all
-        # (since it could be memory expensive)
-        reference = next(dissections)
-        other = None
-        multiple = True
-        try:
-            other = next(dissections)
-            dissections = itertools.chain([other], dissections)
-        except Exception as e:
-            print(e)
-            multiple = False
-        if multiple:
-            # multiple file comparison
-            for other in dissections:
-                # compare the two global summaries
-                report = self.algorithm.compare_dissections(
-                    reference.data[0], other.data[0]
-                )
-                report.title = f"{reference.pcap_file} vs {other.pcap_file}"
-                reports.append(report)
-        else:
-            # deal with timestamps within a single file
-            reference = reference.data
-            timestamps = list(reference.keys())
-            if len(timestamps) <= 2:  # just 0-summary plus a single stamp
-                error(
-                    "the requested pcap data was not long enough to compare against itself"
-                )
-                errorstr: str = "not large enough pcap file"
-                raise ValueError(errorstr)
-            debug(
-                f"found {len(timestamps)} timestamps from {timestamps[2]} to {timestamps[-1]}"
-            )
-            for timestamp in range(
-                2, len(timestamps)
-            ):  # second real non-zero timestamp to last
-                time_left = timestamps[timestamp - 1]
-                time_right = timestamps[timestamp]
-                # see if we were asked to only use particular time ranges
-                if self.between_times and (
-                    time_left < self.between_times[0]
-                    or time_right > self.between_times[1]
-                ):
-                    # debug(f"skipping timestamps {time_left} and {time_right}")
-                    continue
-                debug(f"comparing timestamps {time_left} and {time_right}")
-                report = self.algorithm.compare_dissections(
-                    reference[time_left],
-                    reference[time_right],
-                )
-                title_left = datetime.fromtimestamp(time_left, dt.UTC).strftime(
-                    "%Y-%m-%d %H:%M:%S"
-                )
-                title_right = datetime.fromtimestamp(time_right, dt.UTC).strftime(
-                    "%Y-%m-%d %H:%M:%S"
-                )
-                report.title = f"time {title_left} vs time {title_right}"
-                reports.append(report)
-                continue
-                # takes way too much memory to do it "right"
-                # reports.append(
-                #     {
-                #         "report": report,
-                #         "title": f"time {time_left} vs time {time_right}",
-                #     }
-                # )
-        self.reports = reports
-        return reports
+        self.reports = self.algorithm.compare_dissections(dissections)
+        return self.reports
 def compare_add_parseargs(
-    compare_parser: ArgumentParser, add_subgroup: bool = True
-) -> ArgumentParser:
+    compare_parser: ArgumentParserWithConfig,
+    config: TaffyConfig | None = None,
+    add_subgroup: bool = True,
+) -> ArgumentParserWithConfig:
     """Add common comparison arguments."""
+    if not config:
+        config = TaffyConfig()
+    compare_config = config[TTC_CFG.KEY_COMPARE]
     if add_subgroup:
-        compare_parser = compare_parser.add_argument_group("Comparison result options")
+        compare_parser = compare_parser.add_argument_group(
+            "Comparison result options", config_path=TTC_CFG.KEY_COMPARE
+        )
     compare_parser.add_argument(
         "-t",
         "--print-threshold",
-        default=0.0,
+        default=compare_config[TTC_CFG.PRINT_THRESHOLD],
+        config_path=TTC_CFG.PRINT_THRESHOLD,
         type=float,
         help="Don't print results with abs(percent) less than this threshold",
     )
     compare_parser.add_argument(
-        "-P", "--only-positive", action="store_true", help="Only show positive entries"
+        "-P",
+        "--only-positive",
+        action="store_true",
+        help="Only show positive entries",
+        default=compare_config[TTC_CFG.ONLY_POSITIVE],
+        config_path=TTC_CFG.ONLY_POSITIVE,
     )
     compare_parser.add_argument(
-        "-N", "--only-negative", action="store_true", help="Only show negative entries"
+        "-N",
+        "--only-negative",
+        action="store_true",
+        help="Only show negative entries",
+        default=compare_config[TTC_CFG.ONLY_NEGATIVE],
+        config_path=TTC_CFG.ONLY_NEGATIVE,
     )
     compare_parser.add_argument(
         "-R",
         "--top-records",
-        default=None,
+        default=compare_config[TTC_CFG.TOP_RECORDS],
+        config_path=TTC_CFG.TOP_RECORDS,
         type=int,
         help="Show the top N records from each section.",
     )
@@ -222,39 +197,54 @@ def compare_add_parseargs(
         "-r",
         "--reverse_sort",
         action="store_true",
+        default=compare_config[TTC_CFG.REVERSE_SORT],
+        config_path=TTC_CFG.REVERSE_SORT,
         help="Reverse the sort order of reports",
     )
     compare_parser.add_argument(
         "-s",
         "--sort-by",
-        default="delta%",
+        default=compare_config[TTC_CFG.SORT_BY],
+        config_path=TTC_CFG.SORT_BY,
         type=str,
         help="Sort report entries by this column",
     )
+    compare_parser.add_argument(
+        "-A",
+        "--algorithm",
+        default=compare_config[TTC_CFG.ALGORITHM],
+        config_path=TTC_CFG.ALGORITHM,
+        type=str,
+        help="The algorithm to apply for data comparison (statistical, correlation)",
+    )
     # compare_parser.add_argument(
     #     "-T",
     #     "--between-times",
-    #     nargs=2,
-    #     type=int,
-    #     help="For single files, only display results between these timestamps",
-    # )
     return compare_parser
-def get_comparison_args(args: Namespace) -> dict:
+def get_comparison_args(config: dict) -> dict:
     """Return a dict of comparison parameters from arguments."""
+    dissect_config = config[TTD_CFG.KEY_DISSECTOR]
+    compare_config = config[TTC_CFG.KEY_COMPARE]
+    limitor_config = config[TTL_CFG.KEY_LIMITOR]
     return {
-        "maximum_count": args.packet_count or 0,
-        "print_threshold": float(args.print_threshold) / 100.0,
-        "minimum_count": args.minimum_count,
-        "match_string": args.match_string,
-        "only_positive": args.only_positive,
-        "only_negative": args.only_negative,
-        "top_records": args.top_records,
-        "reverse_sort": args.reverse_sort,
-        "sort_by": args.sort_by,
-        "merge_files": args.merge,
+        "maximum_count": dissect_config[TTD_CFG.PACKET_COUNT] or 0,
+        "match_string": limitor_config[TTL_CFG.MATCH_STRING],
+        "match_value": limitor_config[TTL_CFG.MATCH_VALUE],
+        "match_expression": limitor_config[TTL_CFG.MATCH_EXPRESSION],
+        "minimum_count": limitor_config[TTL_CFG.MINIMUM_COUNT],
+        "print_threshold": float(compare_config[TTC_CFG.PRINT_THRESHOLD]) / 100.0,
+        "only_positive": compare_config[TTC_CFG.ONLY_POSITIVE],
+        "only_negative": compare_config[TTC_CFG.ONLY_NEGATIVE],
+        "top_records": compare_config[TTC_CFG.TOP_RECORDS],
+        "reverse_sort": compare_config[TTC_CFG.REVERSE_SORT],
+        "sort_by": compare_config[TTC_CFG.SORT_BY],
+        "merge_files": dissect_config[TTD_CFG.MERGE],
+        "algorithm": compare_config[TTC_CFG.ALGORITHM],
     }

traffic_taffy/comparison.py CHANGED Viewed

@@ -3,15 +3,29 @@
 from __future__ import annotations
 from typing import Dict, Any
+from traffic_taffy.reports import Report
+# Organized reports are dicts containing a primary key that is being
+# compared to (left hand side), and a secondary key that is the right
+# hand thing being compared.  Each key/subkey combination should point
+# to a Report containing the results of that comparison.
+OrganizedReports = Dict[str, Dict[Any, Report]]
 class Comparison:
     """A simple data storage class to hold comparison data."""
-    def __init__(self, contents: list, title: str = ""):
+    def __init__(
+        self,
+        contents: OrganizedReports,
+        title: str = "",
+        sort_by: str = "delta_percentage",
+    ):
         """Create a Comparison class from contents."""
-        self.contents = contents
+        self.contents: OrganizedReports = contents
         self.title: str = title
         self.printing_arguments: Dict[str, Any] = {}
+        self.sort_by = sort_by
     # title
     @property
@@ -25,10 +39,10 @@ class Comparison:
     # report contents -- actual data
     @property
-    def contents(self) -> None:
+    def contents(self) -> OrganizedReports:
         """The contents of this comparison."""
         return self._contents
     @contents.setter
-    def contents(self, new_contents: str) -> None:
+    def contents(self, new_contents: OrganizedReports) -> None:
         self._contents = new_contents

traffic_taffy/config.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""A helper class to store a generic set of configuration as a dict."""
+from __future__ import annotations
+from enum import Enum
+from typing import TextIO, Dict, List, Any
+from pathlib import Path
+from logging import error
+from argparse import Namespace
+from dotnest import DotNest
+class ConfigStyles(Enum):
+    """A set of configuration types."""
+    YAML = "yaml"
+    TOML = "toml"
+    # TODO(hardaker): support "any" at some point to determine type at run-time
+class Config(dict):
+    """A generic configuration storage class."""
+    def __init__(self, *args, **kwargs):
+        """Create an configuration object to store collected data in."""
+        self._config_option_names = ["--config"]
+        self.dotnest = DotNest(self, allow_creation=True)
+        super().__init__(*args, **kwargs)
+    @property
+    def config_option_names(self) -> List[str]:
+        """The list of configuration file arguments to use/look for."""
+        return self._config_option_names
+    @config_option_names.setter
+    def config_option_names(self, newlist: str | List[str]) -> None:
+        if isinstance(newlist, str):
+            newlist = [newlist]
+        self._config_option_names = newlist
+    def deep_update(self, ref: dict, new_content: dict):
+        for key in new_content:
+            if key in ref and isinstance(ref[key], dict):
+                self.deep_update(ref[key], new_content[key])
+            else:
+                ref[key] = new_content[key]
+    def load_stream(
+        self, config_handle: TextIO, style: ConfigStyles = ConfigStyles.YAML
+    ) -> None:
+        """Import a set of configuration from a IO stream."""
+        if style == ConfigStyles.YAML:
+            import yaml
+            contents = yaml.safe_load(config_handle)
+        # TODO(hardaker): support TOML
+        self.deep_update(self, contents)
+    def load_file(
+        self, config_file: str, style: ConfigStyles = ConfigStyles.YAML
+    ) -> None:
+        """Load a configuration file from a filename."""
+        self.load_stream(Path.open(config_file), style=style)
+    def load_namespace(
+        self, namespace: Namespace, mapping: Dict[str, Any] | None = None
+    ) -> None:
+        """Load the contents of an argparse Namespace into configuration."""
+        values = vars(namespace)
+        if mapping:
+            values = {mapping.get(key, key): value for key, value in values.items()}
+        self.update(values)
+    def read_configfile_from_arguments(
+        self,
+        argv: List[str],
+    ) -> None:
+        """Scan an list of arguments for configuration file(s) and load them."""
+        # TODO(hardaker): convert this to argparse's parse known feature
+        # aka replace using stackoverflow answer to 3609852
+        for n, item in enumerate(argv):
+            if item in self.config_option_names:
+                if len(argv) == n:
+                    error(f"no argument supplied after '{item}'")
+                    raise ValueError
+                if argv[n + 1].startswith("-"):
+                    error(f"The argument after '{item}' seems to be another argument")
+                    raise ValueError
+                filename = argv[n + 1]
+                if "=" in filename:
+                    (left, right) = filename.split("=")
+                    left = left.strip()
+                    right = right.strip()
+                    self.set_dotnest(left, right)
+                    continue
+                if not Path(filename).is_file():
+                    error(
+                        f"The filename after '{item}' does not exist or is not a file"
+                    )
+                    raise ValueError
+                self.load_file(filename)
+    def as_namespace(self) -> Namespace:
+        """Convert the configuration (back) into a argparse Namespace."""
+        namespace = Namespace()
+        for item, value in self.items():
+            setattr(namespace, item, value)
+        return namespace
+    def dump(self):
+        """Dumps the current configuration into a YAML format."""
+        import yaml
+        print(yaml.dump(self))
+    def set_dotnest(self, parameter: str, value: Any):
+        self.dotnest.set(parameter, value)
+    def get_dotnest(
+        self, parameter: str, default: Any = None, return_none: bool = True
+    ):
+        result = self.dotnest.get(parameter, return_none=return_none)
+        if result is not None:
+            return result
+        return default

traffic-taffy 0.8.5__py3-none-any.whl → 0.9__py3-none-any.whl

traffic-taffy 0.8.5py3-none-any.whl → 0.9py3-none-any.whl