PyPI - traffic-taffy - Versions diffs - 0.3.6__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

traffic-taffy 0.3.6py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

traffic_taffy/cache_info.py +0 -6
traffic_taffy/compare.py +154 -250
traffic_taffy/comparison.py +26 -0
traffic_taffy/dissection.py +383 -0
traffic_taffy/dissectmany.py +20 -18
traffic_taffy/dissector.py +128 -476
traffic_taffy/dissector_engine/__init__.py +35 -0
traffic_taffy/dissector_engine/dpkt.py +98 -0
traffic_taffy/dissector_engine/scapy.py +98 -0
traffic_taffy/graph.py +23 -90
traffic_taffy/graphdata.py +35 -20
traffic_taffy/output/__init__.py +118 -0
traffic_taffy/output/console.py +72 -0
traffic_taffy/output/fsdb.py +50 -0
traffic_taffy/output/memory.py +51 -0
traffic_taffy/pcap_splitter.py +17 -36
traffic_taffy/tools/cache_info.py +65 -0
traffic_taffy/tools/compare.py +110 -0
traffic_taffy/tools/dissect.py +77 -0
traffic_taffy/tools/explore.py +686 -0
traffic_taffy/tools/graph.py +85 -0
{traffic_taffy-0.3.6.dist-info → traffic_taffy-0.4.1.dist-info}/METADATA +1 -1
traffic_taffy-0.4.1.dist-info/RECORD +29 -0
traffic_taffy-0.4.1.dist-info/entry_points.txt +6 -0
pcap_compare/cache_info.py +0 -46
pcap_compare/compare.py +0 -288
pcap_compare/dissectmany.py +0 -21
pcap_compare/dissector.py +0 -512
pcap_compare/dissectorresults.py +0 -21
pcap_compare/graph.py +0 -210
traffic_taffy/explore.py +0 -221
traffic_taffy-0.3.6.dist-info/RECORD +0 -22
traffic_taffy-0.3.6.dist-info/entry_points.txt +0 -5
{pcap_compare → traffic_taffy/tools}/__init__.py +0 -0
{traffic_taffy-0.3.6.dist-info → traffic_taffy-0.4.1.dist-info}/WHEEL +0 -0
{traffic_taffy-0.3.6.dist-info → traffic_taffy-0.4.1.dist-info}/top_level.txt +0 -0

traffic_taffy/dissector_engine/__init__.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""Base class for a dissection engine with subclasses overriding load()"""
+from traffic_taffy.dissection import Dissection, PCAPDissectorLevel
+class DissectionEngine:
+    def __init__(
+        self,
+        pcap_file,
+        pcap_filter: str = "",
+        maximum_count: int = 0,
+        bin_size: int = 0,
+        dissector_level: PCAPDissectorLevel = PCAPDissectorLevel.DETAILED,
+        cache_file_suffix: str = "pkl",
+        ignore_list: list = [],
+    ):
+        self.pcap_file = pcap_file
+        self.dissector_level = dissector_level
+        self.bin_size = bin_size
+        self.maximum_count = maximum_count
+        self.pcap_filter = pcap_filter
+        self.cache_file_suffix = cache_file_suffix
+        self.ignore_list = set(ignore_list)
+    def init_dissection(self) -> Dissection:
+        self.dissection = Dissection(
+            pcap_file=self.pcap_file,
+            dissector_level=self.dissector_level,
+            bin_size=self.bin_size,
+            pcap_filter=self.pcap_filter,
+            maximum_count=self.maximum_count,
+            cache_file_suffix=self.cache_file_suffix,
+            ignore_list=self.ignore_list,
+        )
+        return self.dissection

traffic_taffy/dissector_engine/dpkt.py ADDED Viewed

@@ -0,0 +1,98 @@
+from traffic_taffy.dissector_engine import DissectionEngine
+from traffic_taffy.dissection import Dissection, PCAPDissectorLevel
+from pcap_parallel import PCAPParallel as pcapp
+import dpkt
+class DissectionEngineDpkt(DissectionEngine):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def load(self) -> Dissection:
+        self.init_dissection()
+        if isinstance(self.pcap_file, str):
+            pcap = dpkt.pcap.Reader(pcapp.open_maybe_compressed(self.pcap_file))
+        else:
+            # it's an open handle already
+            pcap = dpkt.pcap.Reader(self.pcap_file)
+        if self.pcap_filter:
+            pcap.setfilter(self.pcap_filter)
+        pcap.dispatch(self.maximum_count, self.callback)
+        self.dissection.calculate_metadata()
+        return self.dissection
+    def incr(self, dissection, name, value):
+        if name not in self.ignore_list:
+            dissection.incr(name, value)
+    def callback(self, timestamp: float, packet: bytes):
+        # if binning is requested, save it in a binned time slot
+        dissection: Dissection = self.dissection
+        dissection.timestamp = int(timestamp)
+        if dissection.bin_size:
+            dissection.timestamp = (
+                dissection.timestamp - dissection.timestamp % dissection.bin_size
+            )
+        dissection.incr(Dissection.TOTAL_COUNT, dissection.TOTAL_SUBKEY)
+        level = self.dissector_level
+        if isinstance(level, PCAPDissectorLevel):
+            level = level.value
+        if level >= PCAPDissectorLevel.THROUGH_IP.value:
+            eth = dpkt.ethernet.Ethernet(packet)
+            # these names are designed to match scapy names
+            self.incr(dissection, "Ethernet.dst", eth.dst)
+            self.incr(dissection, "Ethernet.src", eth.src)
+            self.incr(dissection, "Ethernet.type", eth.type)
+            if isinstance(eth.data, dpkt.ip.IP):
+                ip = eth.data
+                IPVER = "IP"
+                if ip.v == 6:
+                    IPVER = "IPv6"
+                prefix = f"Ethernet.{IPVER}."
+                # TODO: make sure all these match scapy
+                self.incr(dissection, prefix + "dst", ip.dst)
+                self.incr(dissection, prefix + "src", ip.src)
+                self.incr(dissection, prefix + "df", ip.df)
+                self.incr(dissection, prefix + "offset", ip.offset)
+                self.incr(dissection, prefix + "tos", ip.tos)
+                self.incr(dissection, prefix + "len", ip.len)
+                self.incr(dissection, prefix + "id", ip.id)
+                self.incr(dissection, prefix + "hl", ip.hl)
+                self.incr(dissection, prefix + "rf", ip.rf)
+                self.incr(dissection, prefix + "p", ip.p)
+                self.incr(dissection, prefix + "chksum", ip.sum)
+                self.incr(dissection, prefix + "tos", ip.tos)
+                self.incr(dissection, prefix + "version", ip.v)
+                self.incr(dissection, prefix + "ttl", ip.ttl)
+                if isinstance(ip.data, dpkt.udp.UDP):
+                    udp = ip.data
+                    self.incr(dissection, prefix + "UDP.sport", udp.sport)
+                    self.incr(dissection, prefix + "UDP.dport", udp.dport)
+                    self.incr(dissection, prefix + "UDP.len", udp.ulen)
+                    self.incr(dissection, prefix + "UDP.chksum", udp.sum)
+                    # TODO: handle DNS and others for level 3
+                elif isinstance(ip.data, dpkt.tcp.TCP):
+                    # TODO
+                    tcp = ip.data
+                    self.incr(dissection, prefix + "TCP.sport", tcp.sport)
+                    self.incr(dissection, prefix + "TCP.dport", tcp.dport)
+                    self.incr(dissection, prefix + "TCP.seq", tcp.seq)
+                    self.incr(dissection, prefix + "TCP.flags", tcp.flags)
+                    # self.incr(dissection, prefix + "TCP.reserved", tcp.reserved)
+                    self.incr(dissection, prefix + "TCP.window", tcp.win)
+                    self.incr(dissection, prefix + "TCP.chksum", tcp.sum)
+                    self.incr(dissection, prefix + "TCP.options", tcp.opts)
+                    # TODO: handle DNS and others for level 3

traffic_taffy/dissector_engine/scapy.py ADDED Viewed

@@ -0,0 +1,98 @@
+from traffic_taffy.dissector_engine import DissectionEngine
+from traffic_taffy.dissection import Dissection
+from pcap_parallel import PCAPParallel as pcapp
+from logging import warning
+from scapy.all import sniff
+class DissectionEngineScapy(DissectionEngine):
+    def _init_(self, *args, **kwargs):
+        super()._init_(*args, **kwargs)
+    def load(self) -> Dissection:
+        "Loads a pcap file into a nested dictionary of statistical counts"
+        self.init_dissection()
+        load_this = self.pcap_file
+        if isinstance(self.pcap_file, str):
+            load_this = pcapp.open_maybe_compressed(self.pcap_file)
+        sniff(
+            offline=load_this,
+            prn=self.callback,
+            store=0,
+            count=self.maximum_count,
+            filter=self.pcap_filter,
+        )
+        self.dissection.calculate_metadata()
+        # TODO: for some reason this fails on xz compressed files when processing in parallel
+        return self.dissection
+    def add_item(self, field_value, prefix: str) -> None:
+        "Adds an item to the self.dissection regardless of it's various types"
+        if isinstance(field_value, list):
+            if len(field_value) > 0:
+                # if it's a list of tuples, count the (eg TCP option) names
+                # TODO: values can be always the same or things like timestamps
+                #       that will always change or are too unique
+                if isinstance(field_value[0], tuple):
+                    for item in field_value:
+                        self.dissection.incr(prefix, item[0])
+                else:
+                    for item in field_value:
+                        self.add_item(item, prefix)
+            # else:
+            #     debug(f"ignoring empty-list: {field_value}")
+        elif (
+            isinstance(field_value, str)
+            or isinstance(field_value, int)
+            or isinstance(field_value, float)
+        ):
+            self.dissection.incr(prefix, field_value)
+        elif isinstance(field_value, bytes):
+            try:
+                converted = field_value.decode("utf-8")
+                self.dissection.incr(prefix, converted)
+            except Exception:
+                converted = "0x" + field_value.hex()
+                self.dissection.incr(prefix, converted)
+    def add_layer(self, layer, prefix: str | None = "") -> None:
+        "Analyzes a layer to add counts to each layer sub-component"
+        if hasattr(layer, "fields_desc"):
+            name_list = [field.name for field in layer.fields_desc]
+        elif hasattr(layer, "fields"):
+            name_list = [field.name for field in layer.fields]
+        else:
+            warning(f"unavailable to deep dive into: {layer}")
+            return
+        for field_name in name_list:
+            new_prefix = prefix + field_name
+            if new_prefix in self.ignore_list:
+                continue
+            try:
+                field_value = getattr(layer, field_name)
+                if hasattr(field_value, "fields"):
+                    self.add_layer(field_value, new_prefix + ".")
+                else:
+                    self.add_item(field_value, new_prefix)
+            except Exception as e:
+                warning(f"scapy error at '{prefix}' in field '{field_name}'")
+                warning(e)
+    def callback(self, packet):
+        prefix = "."
+        self.timestamp = int(packet.time)
+        if self.bin_size:
+            self.timestamp = self.timestamp - self.timestamp % self.bin_size
+        self.dissection.timestamp = int(self.timestamp)
+        self.dissection.incr(Dissection.TOTAL_COUNT, Dissection.TOTAL_SUBKEY)
+        for payload in packet.iterpayloads():
+            prefix = f"{prefix}{payload.name}."
+            self.add_layer(payload, prefix[1:])

traffic_taffy/graph.py CHANGED Viewed

@@ -1,62 +1,12 @@
-"""Read a PCAP file and graph it or parts of it"""
 import seaborn as sns
 import matplotlib.pyplot as plt
-from traffic_taffy.dissector import (
-    PCAPDissectorType,
-    dissector_add_parseargs,
-    limitor_add_parseargs,
-    check_dissector_level,
-)
+from logging import debug, info
+from typing import List
+from traffic_taffy.dissector import PCAPDissectorLevel
 from traffic_taffy.dissectmany import PCAPDissectMany
 from traffic_taffy.graphdata import PcapGraphData
-from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
-from logging import debug, info
-import logging
-def parse_args():
-    "Parse the command line arguments."
-    parser = ArgumentParser(
-        formatter_class=ArgumentDefaultsHelpFormatter,
-        description=__doc__,
-        epilog="Exmaple Usage: ",
-    )
-    parser.add_argument(
-        "-o",
-        "--output-file",
-        default=None,
-        type=str,
-        help="Where to save the output (png)",
-    )
-    parser.add_argument(
-        "--log-level",
-        "--ll",
-        default="info",
-        help="Define verbosity level (debug, info, warning, error, fotal, critical).",
-    )
-    parser.add_argument(
-        "-i",
-        "--interactive",
-        action="store_true",
-        help="Prompt repeatedly for graph data to create",
-    )
-    dissector_add_parseargs(parser)
-    limitor_add_parseargs(parser)
-    parser.add_argument("input_file", type=str, help="PCAP file to graph", nargs="+")
-    args = parser.parse_args()
-    log_level = args.log_level.upper()
-    logging.basicConfig(level=log_level, format="%(levelname)-10s:\t%(message)s")
-    logging.getLogger("matplotlib.font_manager").setLevel(logging.ERROR)
-    return args
 class PcapGraph(PcapGraphData):
     def __init__(
@@ -66,11 +16,13 @@ class PcapGraph(PcapGraphData):
         maximum_count: int = None,
         minimum_count: int = None,
         bin_size: int = None,
-        match_key: str = None,
+        match_string: str = None,
         match_value: str = None,
         cache_pcap_results: bool = False,
-        dissector_level: PCAPDissectorType = PCAPDissectorType.COUNT_ONLY,
+        dissector_level: PCAPDissectorLevel = PCAPDissectorLevel.COUNT_ONLY,
         interactive: bool = False,
+        ignore_list: List[str] = [],
+        by_percentage: bool = False,
     ):
         self.pcap_files = pcap_files
         self.output_file = output_file
@@ -79,11 +31,13 @@ class PcapGraph(PcapGraphData):
         self.bin_size = bin_size
         self.subsections = None
         self.pkt_filter = None
-        self.match_key = match_key
+        self.match_string = match_string
         self.match_value = match_value
         self.cache_pcap_results = cache_pcap_results
         self.dissector_level = dissector_level
         self.interactive = interactive
+        self.ignore_list = ignore_list
+        self.by_percentage = by_percentage
         super().__init__()
@@ -99,26 +53,29 @@ class PcapGraph(PcapGraphData):
             dissector_level=self.dissector_level,
             pcap_filter=self.pkt_filter,
             cache_results=self.cache_pcap_results,
+            ignore_list=self.ignore_list,
         )
-        results = pdm.load_all()
-        for result in results:
-            self.data[result["file"]] = result["data"]
+        self.dissections = pdm.load_all()
         info("done reading pcap files")
     def create_graph(self):
-        df = self.merge_datasets()
-        debug(df)
+        df = self.get_dataframe(merge=True, calculate_load_fraction=self.by_percentage)
         hue_variable = "index"
         if df[hue_variable].nunique() == 1:
             hue_variable = None
+        if self.by_percentage:
+            df["load_fraction"]
+            y_column = "load_fraction"
+        else:
+            y_column = "count"
         ax = sns.relplot(
             data=df,
             kind="line",
             x="time",
-            y="count",
+            y=y_column,
             hue=hue_variable,
             aspect=1.77,
         )
@@ -143,9 +100,9 @@ class PcapGraph(PcapGraphData):
             self.create_graph()
             if self.interactive:
-                self.match_key = input("search key: ")
+                self.match_string = input("search key: ")
                 self.match_value = input("value key: ")
-                if not self.match_key and not self.match_value:
+                if not self.match_string and not self.match_value:
                     self.interactive = False
     def graph_it(self):
@@ -153,27 +110,3 @@ class PcapGraph(PcapGraphData):
         self.load_pcaps()
         debug("--- creating graph")
         self.show_graph()
-def main():
-    args = parse_args()
-    check_dissector_level(args.dissection_level)
-    pc = PcapGraph(
-        args.input_file,
-        args.output_file,
-        maximum_count=args.packet_count,
-        minimum_count=args.minimum_count,
-        bin_size=args.bin_size,
-        match_key=args.match_string,
-        match_value=args.match_value,
-        cache_pcap_results=args.cache_pcap_results,
-        dissector_level=args.dissection_level,
-        interactive=args.interactive,
-    )
-    pc.graph_it()
-if __name__ == "__main__":
-    main()

traffic_taffy/graphdata.py CHANGED Viewed

@@ -1,35 +1,32 @@
 import os
-from traffic_taffy.dissector import PCAPDissector
 from pandas import DataFrame, to_datetime, concat
 class PcapGraphData:
     def __init__(self):
+        self.dissections = []
         pass
     @property
-    def data(self):
-        return self._data
+    def dissections(self):
+        return self._dissections
-    @data.setter
-    def data(self, newvalue):
-        self._data = newvalue
+    @dissections.setter
+    def dissections(self, newvalue):
+        self._dissections = newvalue
-    def normalize_bins(self, counters):
+    def normalize_bins(self, dissection):
         results = {}
-        time_keys = list(counters.keys())
+        time_keys = list(dissection.data.keys())
         if time_keys[0] == 0:  # likely always
             time_keys.pop(0)
-        time_keys[0]
-        time_keys[-1]
-        results = {"time": [], "count": [], "index": []}
+        results = {"time": [], "count": [], "index": [], "key": [], "subkey": []}
         # TODO: this could likely be made much more efficient and needs hole-filling
-        for timestamp, key, subkey, value in PCAPDissector.find_data(
-            counters,
+        for timestamp, key, subkey, value in dissection.find_data(
             timestamps=time_keys,
-            match_string=self.match_key,
+            match_string=self.match_string,
             match_value=self.match_value,
             minimum_count=self.minimum_count,
             make_printable=True,
@@ -37,17 +34,35 @@ class PcapGraphData:
             index = key + "=" + subkey
             results["count"].append(int(value))
             results["index"].append(index)
+            results["key"].append(key)
+            results["subkey"].append(subkey)
             results["time"].append(timestamp)
         return results
-    def merge_datasets(self):
+    def get_dataframe(self, merge=False, calculate_load_fraction=False):
         datasets = []
-        for dataset in self.data:
-            data = self.normalize_bins(self.data[dataset])
+        if merge:
+            dissection = next(self.dissections).clone()
+            for tomerge in self.dissections:
+                dissection.merge(tomerge)
+            dissections = [dissection]
+        else:
+            dissections = self.dissections
+        for dissection in dissections:
+            data = self.normalize_bins(dissection)
             data = DataFrame.from_records(data)
-            data["filename"] = os.path.basename(dataset)
-            data["time"] = to_datetime(data["time"], unit="s")
+            data["filename"] = os.path.basename(dissection.pcap_file)
+            data["time"] = to_datetime(data["time"], unit="s", utc=True)
+            data["key"] = data["index"]
             datasets.append(data)
-        datasets = concat(datasets)
+        datasets = concat(datasets, ignore_index=True)
+        if calculate_load_fraction:
+            time_groups = datasets.groupby("time")
+            datasets["load_fraction"] = (
+                100 * datasets["count"] / time_groups.transform("max")["count"]
+            )
         return datasets

traffic_taffy/output/__init__.py ADDED Viewed

@@ -0,0 +1,118 @@
+class Output:
+    def __init__(self, report, options={}):
+        self.report = report
+        self.output_options = options
+    @property
+    def report(self):
+        return self._report
+    @report.setter
+    def report(self, new_report):
+        self._report = new_report
+    @property
+    def output_options(self):
+        return self._output_options
+    @output_options.setter
+    def output_options(self, new_output_options):
+        self._output_options = new_output_options
+    def output(self, report=None):
+        if not report:
+            report = self.report
+        contents = report.contents
+        first_of_anything: bool = True
+        top_records = self.output_options.get("top_records")
+        # intentionally reversed, as it should default to high to low
+        sort_order = not self.output_options.get("reverse_sort", False)
+        for key in sorted(contents):
+            reported: bool = False
+            if (
+                self.output_options.get("match_string") is not None
+                and self.output_options["match_string"] not in key
+            ):
+                continue
+            # TODO: we don't do match_value here?
+            record_count = 0
+            for subkey, data in sorted(
+                contents[key].items(),
+                key=lambda x: x[1]["delta_percentage"],
+                reverse=sort_order,
+            ):
+                if not self.filter_check(data):
+                    continue
+                # print the header
+                if not reported:
+                    if first_of_anything:
+                        self.output_start(report)
+                        first_of_anything = False
+                    self.output_new_section(key)
+                    reported = True
+                self.output_record(key, subkey, data)
+                record_count += 1
+                if top_records and record_count >= top_records:
+                    break
+        self.output_close()
+    def output_new_section(self, key):
+        return
+    def output_close(self):
+        return
+    def filter_check(self, data: dict) -> bool:
+        "Return true if we should include it."
+        delta: float = data["delta_percentage"]
+        total: int = data["total"]
+        if self.output_options["only_positive"] and delta <= 0:
+            return False
+        if self.output_options["only_negative"] and delta >= 0:
+            return False
+        if (
+            not self.output_options["print_threshold"]
+            and not self.output_options["minimum_count"]
+        ):
+            # always print
+            return True
+        if (
+            self.output_options["print_threshold"]
+            and not self.output_options["minimum_count"]
+        ):
+            # check output_options["print_threshold"] as a fraction
+            if abs(delta) > self.output_options["print_threshold"]:
+                return True
+        elif (
+            not self.output_options["print_threshold"]
+            and self.output_options["minimum_count"]
+        ):
+            # just check output_options["minimum_count"]
+            if total > self.output_options["minimum_count"]:
+                return True
+        else:
+            # require both
+            if (
+                total > self.output_options["minimum_count"]
+                and abs(delta) > self.output_options["print_threshold"]
+            ):
+                return True
+        return False

traffic_taffy/output/console.py ADDED Viewed

@@ -0,0 +1,72 @@
+from traffic_taffy.output import Output
+from traffic_taffy.dissection import Dissection
+from rich.console import Console as RichConsole
+class Console(Output):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.console = None
+        self.have_done_header = False
+    # actual routines to print stuff
+    def init_console(self):
+        if not self.console:
+            self.console = RichConsole()
+    def output_start(self, report):
+        "Prints the header about columns being displayed"
+        # This should match the spacing in print_contents()
+        self.init_console()
+        self.console.print(f"======== {report.title}")
+        if self.have_done_header:
+            return
+        self.have_done_header = True
+        style = ""
+        subkey = "Value"
+        endstyle = ""
+        left_count = "Left"
+        right_count = "Right"
+        actual_delta = "Delta"
+        left_percent = "Left %"
+        right_percent = "Right %"
+        percent_delta = "Delta-%"
+        line = f"  {style}{subkey:<50}{endstyle}"
+        line += f" {left_count:>8} {right_count:>8} {actual_delta:>8}"
+        line += f" {left_percent:>8} {right_percent:>8}  {percent_delta:>7}"
+        self.console.print(line)
+    def output_new_section(self, key):
+        print(f"----- {key}")
+    def output_record(self, key, subkey, data) -> None:
+        "prints a report to the console"
+        delta_percentage: float = data["delta_percentage"]
+        # apply some fancy styling
+        style = ""
+        if delta_percentage < -0.5:
+            style = "[bold red]"
+        elif delta_percentage < 0.0:
+            style = "[red]"
+        elif delta_percentage > 0.5:
+            style = "[bold green]"
+        elif delta_percentage > 0.0:
+            style = "[green]"
+        endstyle = style.replace("[", "[/")
+        # construct the output line with styling
+        subkey = Dissection.make_printable(key, subkey)
+        line = f"  {style}{subkey:<50}{endstyle}"
+        line += f" {data['left_count']:>8} {data['right_count']:>8} {data['delta_absolute']:>8}"
+        line += f" {100*data['left_percentage']:>7.2f} {100*data['right_percentage']:>7.2f}  {100*delta_percentage:>7.2f}"
+        # print it to the rich console
+        self.console.print(line)

traffic-taffy 0.3.6__py3-none-any.whl → 0.4.1__py3-none-any.whl

traffic-taffy 0.3.6py3-none-any.whl → 0.4.1py3-none-any.whl