PyPI - traffic-taffy - Versions diffs - 0.2__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

traffic-taffy 0.2py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

pcap_compare/__init__.py +0 -0
pcap_compare/cache_info.py +46 -0
pcap_compare/compare.py +288 -0
pcap_compare/dissectmany.py +21 -0
pcap_compare/dissector.py +512 -0
pcap_compare/dissectorresults.py +21 -0
pcap_compare/graph.py +210 -0
traffic_taffy/cache_info.py +6 -0
traffic_taffy/compare.py +106 -35
traffic_taffy/dissectmany.py +1 -1
traffic_taffy/dissector.py +60 -5
traffic_taffy/explore.py +222 -0
traffic_taffy/graph.py +0 -16
traffic_taffy/pcap_splitter.py +36 -17
{traffic_taffy-0.2.dist-info → traffic_taffy-0.3.5.dist-info}/METADATA +6 -1
traffic_taffy-0.3.5.dist-info/RECORD +21 -0
traffic_taffy-0.2.dist-info/RECORD +0 -13
{traffic_taffy-0.2.dist-info → traffic_taffy-0.3.5.dist-info}/WHEEL +0 -0
{traffic_taffy-0.2.dist-info → traffic_taffy-0.3.5.dist-info}/entry_points.txt +0 -0
{traffic_taffy-0.2.dist-info → traffic_taffy-0.3.5.dist-info}/top_level.txt +0 -0

pcap_compare/graph.py ADDED Viewed

@@ -0,0 +1,210 @@
+"""Read a PCAP file and graph it or parts of it"""
+import os
+import seaborn as sns
+import matplotlib.pyplot as plt
+import pandas
+from pandas import DataFrame, to_datetime
+from pcap_compare.dissector import (
+    PCAPDissectorType,
+    dissector_add_parseargs,
+    limitor_add_parseargs,
+    check_dissector_level,
+)
+from pcap_compare.dissectmany import PCAPDissectMany, PCAPDissector
+from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
+from logging import debug, info
+import logging
+def parse_args():
+    "Parse the command line arguments."
+    parser = ArgumentParser(
+        formatter_class=ArgumentDefaultsHelpFormatter,
+        description=__doc__,
+        epilog="Exmaple Usage: ",
+    )
+    parser.add_argument(
+        "-g",
+        "--graph-elements",
+        default=None,
+        type=str,
+        help="Graph these particular elements; the default is packet counts",
+    )
+    parser.add_argument(
+        "-o",
+        "--output-file",
+        default=None,
+        type=str,
+        help="Where to save the output (png)",
+    )
+    parser.add_argument(
+        "--log-level",
+        "--ll",
+        default="info",
+        help="Define verbosity level (debug, info, warning, error, fotal, critical).",
+    )
+    parser.add_argument(
+        "-b",
+        "--bin-size",
+        type=int,
+        default=1,
+        help="Bin results into this many seconds",
+    )
+    dissector_add_parseargs(parser)
+    limitor_add_parseargs(parser)
+    parser.add_argument("input_file", type=str, help="PCAP file to graph", nargs="+")
+    args = parser.parse_args()
+    log_level = args.log_level.upper()
+    logging.basicConfig(level=log_level, format="%(levelname)-10s:\t%(message)s")
+    logging.getLogger("matplotlib.font_manager").setLevel(logging.ERROR)
+    return args
+class PcapGraph:
+    def __init__(
+        self,
+        pcap_files: str,
+        output_file: str,
+        maximum_count: int = None,
+        minimum_count: int = None,
+        bin_size: int = None,
+        match_key: str = None,
+        match_value: str = None,
+        cache_pcap_results: bool = False,
+        dissector_level: PCAPDissectorType = PCAPDissectorType.COUNT_ONLY,
+    ):
+        self.pcap_files = pcap_files
+        self.output_file = output_file
+        self.maximum_count = maximum_count
+        self.minimum_count = minimum_count
+        self.bin_size = bin_size
+        self.subsections = None
+        self.pkt_filter = None
+        self.match_key = match_key
+        self.match_value = match_value
+        self.cache_pcap_results = cache_pcap_results
+        self.dissector_level = dissector_level
+    def load_pcaps(self):
+        "loads the pcap and counts things into bins"
+        self.data = {}
+        info("reading pcap files")
+        pdm = PCAPDissectMany(
+            self.pcap_files,
+            bin_size=self.bin_size,
+            maximum_count=self.maximum_count,
+            dissector_level=self.dissector_level,
+            pcap_filter=self.pkt_filter,
+            cache_results=self.cache_pcap_results,
+        )
+        results = pdm.load_all()
+        for result in results:
+            self.data[result["file"]] = result["data"]
+        info("done reading pcap files")
+    def normalize_bins(self, counters):
+        results = {}
+        time_keys = list(counters.keys())
+        if time_keys[0] == 0:  # likely always
+            time_keys.pop(0)
+        time_keys[0]
+        time_keys[-1]
+        results = {"time": [], "count": [], "index": []}
+        # TODO: this could likely be made much more efficient and needs hole-filling
+        info(f"match value: {self.match_value}")
+        for (timestamp, key, subkey, value) in PCAPDissector.find_data(
+            counters,
+            timestamps=time_keys,
+            match_string=self.match_key,
+            match_value=self.match_value,
+            minimum_count=self.minimum_count,
+            make_printable=True,
+        ):
+            index = key + "=" + subkey
+            results["count"].append(int(value))
+            results["index"].append(index)
+            results["time"].append(timestamp)
+        return results
+    def merge_datasets(self):
+        datasets = []
+        for dataset in self.data:
+            data = self.normalize_bins(self.data[dataset])
+            data = DataFrame.from_records(data)
+            data["filename"] = os.path.basename(dataset)
+            data["time"] = to_datetime(data["time"], unit="s")
+            datasets.append(data)
+        datasets = pandas.concat(datasets)
+        return datasets
+    def create_graph(self):
+        "Graph the results of the data collection"
+        debug("creating the graph")
+        sns.set_theme()
+        df = self.merge_datasets()
+        debug(df)
+        hue_variable = "index"
+        if df[hue_variable].nunique() == 1:
+            hue_variable = None
+        ax = sns.relplot(
+            data=df,
+            kind="line",
+            x="time",
+            y="count",
+            hue=hue_variable,
+            aspect=1.77,
+        )
+        ax.set(xlabel="time", ylabel="count")
+        plt.xticks(rotation=45)
+        info(f"saving graph to {self.output_file}")
+        if self.output_file:
+            plt.savefig(self.output_file)
+        else:
+            plt.show()
+    def graph_it(self):
+        debug("--- loading pcaps")
+        self.load_pcaps()
+        debug("--- creating graph")
+        self.create_graph()
+def main():
+    args = parse_args()
+    check_dissector_level(args.dissection_level)
+    pc = PcapGraph(
+        args.input_file,
+        args.output_file,
+        maximum_count=args.packet_count,
+        minimum_count=args.minimum_count,
+        bin_size=args.bin_size,
+        match_key=args.match_string,
+        match_value=args.match_value,
+        cache_pcap_results=args.cache_pcap_results,
+        dissector_level=args.dissection_level,
+    )
+    pc.graph_it()
+if __name__ == "__main__":
+    main()

traffic_taffy/cache_info.py CHANGED Viewed

@@ -47,6 +47,12 @@ def main():
     for key in contents["parameters"]:
         print(f"    {key:<16} {contents['parameters'][key]}")
+    print("data info:")
+    timestamps = list(contents["dissection"].keys())
+    print(f"    timestamps:      {len(timestamps)}")
+    print(f"    first:           {timestamps[1]}")  # skips 0 = global
+    print(f"    last:            {timestamps[-1]}")
 if __name__ == "__main__":
     main()

traffic_taffy/compare.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Takes a set of pcap files to compare and creates a report"""
 import logging
-from logging import info
+from logging import info, debug
 from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
 from typing import List
 from rich.console import Console
@@ -33,6 +33,7 @@ class PcapCompare:
         only_negative: bool = False,
         cache_results: bool = False,
         dissection_level: PCAPDissectorType = PCAPDissectorType.COUNT_ONLY,
+        between_times: List[int] | None = None,
     ) -> None:
         self.pcaps = pcaps
         self.deep = deep
@@ -45,40 +46,45 @@ class PcapCompare:
         self.only_negative = only_negative
         self.cache_results = cache_results
         self.dissection_level = dissection_level
+        self.between_times = between_times
-    def compare_results(self, report1: dict, report2: dict) -> dict:
+    @property
+    def reports(self):
+        return self._reports
+    @reports.setter
+    def reports(self, newvalue):
+        self._reports = newvalue
+    def compare_dissections(self, dissection1: dict, dissection2: dict) -> dict:
         "compares the results from two reports"
         # TODO: handle recursive depths, where items are subtrees rather than Counters
         report = {}
-        # TODO: we're only (currently) doing full pcap compares
-        report1 = report1[0]
-        report2 = report2[0]
-        # TODO: missing key in report2 (major items added)
-        for key in report1:
+        # TODO: missing key in dissection2 (major items added)
+        for key in dissection1:
             # TODO: deal with missing keys from one set
-            report1_total = report1[key].total()
-            report2_total = report2[key].total()
+            dissection1_total = dissection1[key].total()
+            dissection2_total = dissection2[key].total()
             report[key] = {}
-            for subkey in report1[key].keys():
+            for subkey in dissection1[key].keys():
                 delta = 0.0
                 total = 0
-                if subkey in report1[key] and subkey in report2[key]:
+                if subkey in dissection1[key] and subkey in dissection2[key]:
                     delta = (
-                        report2[key][subkey] / report2_total
-                        - report1[key][subkey] / report1_total
+                        dissection2[key][subkey] / dissection2_total
+                        - dissection1[key][subkey] / dissection1_total
                     )
-                    total = report2[key][subkey] + report1[key][subkey]
-                    ref_count = report1[key][subkey]
-                    comp_count = report2[key][subkey]
+                    total = dissection2[key][subkey] + dissection1[key][subkey]
+                    ref_count = dissection1[key][subkey]
+                    comp_count = dissection2[key][subkey]
                 else:
                     delta = -1.0
-                    total = report1[key][subkey]
-                    ref_count = report1[key][subkey]
+                    total = dissection1[key][subkey]
+                    ref_count = dissection1[key][subkey]
                     comp_count = 0
                 report[key][subkey] = {
@@ -88,12 +94,12 @@ class PcapCompare:
                     "comp_count": comp_count,
                 }
-            for subkey in report2[key].keys():
+            for subkey in dissection2[key].keys():
                 if subkey not in report[key]:
                     delta = 1.0
-                    total = report2[key][subkey]
+                    total = dissection2[key][subkey]
                     ref_count = 0
-                    comp_count = report2[key][subkey]
+                    comp_count = dissection2[key][subkey]
                     report[key][subkey] = {
                         "delta": delta,
@@ -136,6 +142,7 @@ class PcapCompare:
     def print_report(self, report: dict) -> None:
         "prints a report to the console"
         console = Console()
         for key in sorted(report):
             reported: bool = False
@@ -180,16 +187,11 @@ class PcapCompare:
     def print(self) -> None:
         "outputs the results"
         for n, report in enumerate(self.reports):
-            print(f"************ report #{n}")
-            self.print_report(report)
-    def compare(self) -> None:
-        "Compares each pcap against the original source"
-        reports = []
-        # TODO: use parallel processes to load multiple at a time
+            title = report.get("title", f"report #{n}")
+            print(f"************ {title}")
+            self.print_report(report["report"])
+    def load_pcaps(self) -> None:
         # load the first as a reference pcap
         info(f"reading pcap files using level={self.dissection_level}")
         pdm = PCAPDissectMany(
@@ -201,11 +203,71 @@ class PcapCompare:
             dissector_level=self.dissection_level,
         )
         results = pdm.load_all()
+        return results
+    def compare(self) -> None:
+        "Compares each pcap against the original source"
+        results = self.load_pcaps()
+        self.compare_all(results)
-        reference = next(results)
-        for other in results:
-            # compare the two
-            reports.append(self.compare_results(reference["data"], other["data"]))
+    def compare_all(self, results):
+        reports = []
+        if len(self.pcaps) > 1:
+            # multiple file comparison
+            reference = next(results)
+            for other in results:
+                # compare the two global summaries
+                reports.append(
+                    {
+                        "report": self.compare_dissections(
+                            reference["data"][0], other["data"][0]
+                        ),
+                        "title": f"{reference['file']} vs {other['file']}",
+                    }
+                )
+        else:
+            # deal with timestamps within a single file
+            results = list(results)
+            reference = results[0]
+            timestamps = list(reference["data"].keys())
+            debug(
+                f"found {len(timestamps)} timestamps from {timestamps[2]} to {timestamps[-1]}"
+            )
+            for timestamp in range(
+                2, len(timestamps)
+            ):  # second real non-zero timestamp to last
+                time_left = timestamps[timestamp - 1]
+                time_right = timestamps[timestamp]
+                # see if we were asked to only use particular time ranges
+                if self.between_times and (
+                    time_left < self.between_times[0]
+                    or time_right > self.between_times[1]
+                ):
+                    # debug(f"skipping timestamps {time_left} and {time_right}")
+                    continue
+                debug(f"comparing timestamps {time_left} and {time_right}")
+                report = self.compare_dissections(
+                    reference["data"][time_left], reference["data"][time_right]
+                )
+                title = f"time {time_left} vs time {time_right}"
+                print(f"************ {title}")
+                self.print_report(report)
+                continue
+                # takes way too much memory to do it "right"
+                # reports.append(
+                #     {
+                #         "report": report,
+                #         "title": f"time {time_left} vs time {time_right}",
+                #     }
+                # )
         self.reports = reports
@@ -236,6 +298,14 @@ def parse_args():
         "-N", "--only-negative", action="store_true", help="Only show negative entries"
     )
+    limiting_parser.add_argument(
+        "-T",
+        "--between-times",
+        nargs=2,
+        type=int,
+        help="For single files, only display results between these timestamps",
+    )
     dissector_add_parseargs(parser)
     debugging_group = parser.add_argument_group("Debugging options")
@@ -270,6 +340,7 @@ def main():
         only_negative=args.only_negative,
         cache_results=args.cache_pcap_results,
         dissection_level=args.dissection_level,
+        between_times=args.between_times,
     )
     # compare the pcaps

traffic_taffy/dissectmany.py CHANGED Viewed

@@ -32,7 +32,7 @@ class PCAPDissectMany:
         pd.load()
         return pd.data
-    def load_pcap(self, pcap_file, split_size=100000, maximum_count=0):
+    def load_pcap(self, pcap_file, split_size=None, maximum_count=0):
         pd = PCAPDissector(
             pcap_file,
             *self.args,

traffic_taffy/dissector.py CHANGED Viewed

@@ -110,6 +110,50 @@ class PCAPDissector:
                     data[timestamp][key]
                 )
+    @staticmethod
+    def open_maybe_compressed(filename):
+        """Opens a pcap file, potentially decompressing it."""
+        magic_dict = {
+            bytes([0x1F, 0x8B, 0x08]): "gz",
+            bytes([0x42, 0x5A, 0x68]): "bz2",
+            bytes([0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]): "xz",
+        }
+        max_len = max(len(x) for x in magic_dict)
+        base_handle = open(filename, "rb")
+        file_start = base_handle.read(max_len)
+        base_handle.close()
+        for magic, filetype in magic_dict.items():
+            if file_start.startswith(magic):
+                try:
+                    if filetype == "gz":
+                        import gzip
+                        return_handle = gzip.open(filename, "rb")
+                        return return_handle
+                    elif filetype == "bz2":
+                        import bz2
+                        return_handle = bz2.open(filename, "rb")
+                        setattr(return_handle, "name", filename)
+                        return return_handle
+                    elif filetype == "xz":
+                        import lzma
+                        return_handle = lzma.open(filename, "rb")
+                        return return_handle
+                    else:
+                        raise ValueError("unknown compression error")
+                except Exception:
+                    # likely we failed to find a compression module
+                    debug(f"failed to use {filetype} module to decode the input stream")
+                    raise ValueError("cannot decode file")
+        # return a raw file and hope it's not compressed'
+        return open(filename, "rb")
     def incr(self, key: str, value: Any, count: int = 1):
         # always save a total count at the zero bin
         # note: there should be no recorded tcpdump files from 1970 Jan 01 :-)
@@ -158,7 +202,7 @@ class PCAPDissector:
                 if parameter == "dissector_level":
                     debug("------------ here 1")
                 if parameter == "dissector_level" and specified <= cached:
-                    debug("here with dissector_level {specified} and {cached}")
+                    debug(f"here with dissector_level {specified} and {cached}")
                     # loading a more detailed cache is ok
                     continue
@@ -206,7 +250,7 @@ class PCAPDissector:
             self.timestamp = self.timestamp - self.timestamp % self.bin_size
         self.incr(self.TOTAL_COUNT, self.TOTAL_SUBKEY)
-        if self.dissector_level == PCAPDissectorType.THROUGH_IP.value:
+        if self.dissector_level >= PCAPDissectorType.THROUGH_IP.value:
             eth = dpkt.ethernet.Ethernet(packet)
             # these names are designed to match scapy names
             self.incr("Ethernet.dst", eth.dst)
@@ -262,7 +306,7 @@ class PCAPDissector:
     def load_via_dpkt(self) -> dict:
         self.data = {0: defaultdict(Counter)}
         if isinstance(self.pcap_file, str):
-            pcap = dpkt.pcap.Reader(open(self.pcap_file, "rb"))
+            pcap = dpkt.pcap.Reader(PCAPDissector.open_maybe_compressed(self.pcap_file))
         else:
             # it's an open handle already
             pcap = dpkt.pcap.Reader(self.pcap_file)
@@ -335,8 +379,11 @@ class PCAPDissector:
     def load_via_scapy(self) -> dict:
         "Loads a pcap file into a nested dictionary of statistical counts"
+        load_this = self.pcap_file
+        if isinstance(self.pcap_file, str):
+            load_this = PCAPDissector.open_maybe_compressed(self.pcap_file)
         sniff(
-            offline=self.pcap_file,
+            offline=load_this,
             prn=self.scapy_callback,
             store=0,
             count=self.maximum_count,
@@ -452,6 +499,14 @@ def dissector_add_parseargs(parser, add_subgroup: bool = True):
         help="Maximum number of packets to analyze",
     )
+    parser.add_argument(
+        "-b",
+        "--bin-size",
+        type=int,
+        default=3600,
+        help="Bin results into this many seconds",
+    )
     parser.add_argument(
         "-C",
         "--cache-pcap-results",
@@ -550,7 +605,7 @@ def main():
     pd = PCAPDissector(
         args.input_file,
-        bin_size=0,
+        bin_size=args.bin_size,
         dissector_level=args.dissection_level,
         maximum_count=args.packet_count,
         cache_results=args.cache_pcap_results,

traffic-taffy 0.2__py3-none-any.whl → 0.3.5__py3-none-any.whl

traffic-taffy 0.2py3-none-any.whl → 0.3.5py3-none-any.whl