PyPI - traffic-taffy - Versions diffs - 0.8.1__py3-none-any.whl → 0.9__py3-none-any.whl - Mend

traffic-taffy 0.8.1py3-none-any.whl → 0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

traffic_taffy/__init__.py +1 -1
traffic_taffy/algorithms/__init__.py +14 -7
traffic_taffy/algorithms/comparecorrelation.py +164 -0
traffic_taffy/algorithms/comparecorrelationchanges.py +210 -0
traffic_taffy/algorithms/compareseries.py +117 -0
traffic_taffy/algorithms/compareslices.py +116 -0
traffic_taffy/algorithms/statistical.py +9 -9
traffic_taffy/compare.py +149 -159
traffic_taffy/comparison.py +18 -4
traffic_taffy/config.py +133 -0
traffic_taffy/dissection.py +171 -6
traffic_taffy/dissectmany.py +26 -16
traffic_taffy/dissector.py +189 -77
traffic_taffy/dissector_engine/scapy.py +41 -8
traffic_taffy/graph.py +54 -53
traffic_taffy/graphdata.py +13 -2
traffic_taffy/hooks/ip2asn.py +20 -7
traffic_taffy/hooks/labels.py +45 -0
traffic_taffy/hooks/psl.py +21 -3
traffic_taffy/iana/tables.msgpak +0 -0
traffic_taffy/output/__init__.py +8 -48
traffic_taffy/output/console.py +37 -25
traffic_taffy/output/fsdb.py +24 -18
traffic_taffy/reports/__init__.py +5 -0
traffic_taffy/reports/compareslicesreport.py +85 -0
traffic_taffy/reports/correlationchangereport.py +54 -0
traffic_taffy/reports/correlationreport.py +42 -0
traffic_taffy/taffy_config.py +44 -0
traffic_taffy/tests/test_compare_results.py +22 -7
traffic_taffy/tests/test_config.py +149 -0
traffic_taffy/tests/test_global_config.py +33 -0
traffic_taffy/tests/test_normalize.py +1 -0
traffic_taffy/tests/test_pcap_dissector.py +12 -2
traffic_taffy/tests/test_pcap_splitter.py +21 -10
traffic_taffy/tools/cache_info.py +3 -2
traffic_taffy/tools/compare.py +32 -24
traffic_taffy/tools/config.py +83 -0
traffic_taffy/tools/dissect.py +51 -59
traffic_taffy/tools/explore.py +5 -4
traffic_taffy/tools/export.py +28 -17
traffic_taffy/tools/graph.py +25 -27
{traffic_taffy-0.8.1.dist-info → traffic_taffy-0.9.dist-info}/METADATA +4 -1
traffic_taffy-0.9.dist-info/RECORD +56 -0
{traffic_taffy-0.8.1.dist-info → traffic_taffy-0.9.dist-info}/entry_points.txt +1 -0
traffic_taffy/report.py +0 -12
traffic_taffy/tests/test_dpkt_engine.py +0 -15
traffic_taffy-0.8.1.dist-info/RECORD +0 -43
{traffic_taffy-0.8.1.dist-info → traffic_taffy-0.9.dist-info}/WHEEL +0 -0
{traffic_taffy-0.8.1.dist-info → traffic_taffy-0.9.dist-info}/licenses/LICENSE.txt +0 -0

traffic_taffy/dissection.py CHANGED Viewed

@@ -11,6 +11,23 @@ from typing import List
 from copy import deepcopy
 from pathlib import Path
 from traffic_taffy import __VERSION__ as VERSION
+from io import BytesIO
+import pkgutil
+# TODO(hardaker): fix to not use a global
+# note that this is designed to load only once before forking
+iana_data = None
+if not iana_data:
+    # try a local copy first
+    if Path("traffic_taffy/iana/tables.msgpakx").exists():
+        iana_data = msgpack.load(Path.open("traffic_taffy/iana/tables.msgpak", "rb"))
+    else:
+        content = pkgutil.get_data("traffic_taffy", "iana/tables.msgpak")
+        if content:
+            content = BytesIO(content)
+            iana_data = msgpack.load(content)
+        else:
+            warning("failed to load IANA data tables -- no enum expansion available")
 class PCAPDissectorLevel(Enum):
@@ -44,8 +61,8 @@ class Dissection:
         dissector_level: PCAPDissectorLevel = PCAPDissectorLevel.DETAILED,
         cache_file_suffix: str = "taffy",
         ignore_list: list | None = None,
-        *args: list,
-        **kwargs: dict,
+        *_args: list,
+        **_kwargs: dict,
     ) -> Dissection:
         """Create a Dissection instance."""
         self.pcap_file = pcap_file
@@ -57,6 +74,7 @@ class Dissection:
         self.maximum_count = maximum_count
         self.pcap_filter = pcap_filter
         self.ignore_list = ignore_list or []
+        self.iana_data = defaultdict(dict)
         self.parameters = [
             "pcap_file",
@@ -352,6 +370,42 @@ class Dissection:
         return contents
+    def filter(
+        self: Dissection,
+        timestamps: List[int] | None = None,
+        match_string: str | None = None,
+        match_value: str | None = None,
+        minimum_count: int | None = None,
+        make_printable: bool = False,
+        match_expression: str | None = None,
+    ) -> None:
+        """Creates a new dissection that has been filtered based on passed criteria."""
+        debug(
+            f"filtering dissection with: {timestamps=}, {match_string=} {match_value=}, {minimum_count=}, {make_printable=}"
+        )
+        new_dissection: Dissection = Dissection(
+            self.pcap_file,
+            self.pcap_filter,
+            self.maximum_count,
+            self.bin_size,
+            self.dissector_level,
+            self.cache_file_suffix,
+            self.ignore_list,
+        )
+        for timestamp, key, subkey, value in self.find_data(
+            timestamps=timestamps,
+            match_string=match_string,
+            match_value=match_value,
+            minimum_count=minimum_count,
+            make_printable=make_printable,
+            match_expression=match_expression,
+        ):
+            new_dissection.data[timestamp][key][subkey] = value
+        debug("  done filtering")
+        return new_dissection
     def find_data(
         self: Dissection,
         timestamps: List[int] | None = None,
@@ -359,6 +413,7 @@ class Dissection:
         match_value: str | None = None,
         minimum_count: int | None = None,
         make_printable: bool = False,
+        match_expression: str | None = None,
     ) -> list:
         """Search through data for appropriate records."""
         data = self.data
@@ -368,6 +423,9 @@ class Dissection:
         if not timestamps:
             timestamps = data.keys()
+        match_eval_compiled = None
+        if match_expression:
+            match_eval_compiled = compile(f"{match_expression}", "<string>", "eval")
         # find timestamps/key values with at least one item above count
         # TODO(hardaker): we should really use pandas for this
         usable = defaultdict(set)
@@ -380,15 +438,15 @@ class Dissection:
                 # ensure at least one of the count valuse for the
                 # stream gets above minimum_count
                 for subkey, count in data[timestamp][key].items():
-                    if (
-                        not minimum_count
-                        or minimum_count
-                        and abs(count) > minimum_count
+                    if not minimum_count or (
+                        minimum_count and abs(count) >= minimum_count
                     ):
                         usable[key].add(subkey)
         # TODO(hardaker): move the timestamp inside the other fors for faster
         # processing of skipped key/subkeys
+        globals = {}  # TODO(hardaker): maybe create some in the future
         for timestamp in timestamps:
             for key in sorted(data[timestamp]):
                 if key not in usable:
@@ -401,6 +459,7 @@ class Dissection:
                     if subkey not in usable[key]:
                         continue
+                    subkey_original = subkey
                     if make_printable:
                         subkey = Dissection.make_printable(key, subkey)
                         count = Dissection.make_printable(None, count)
@@ -408,6 +467,23 @@ class Dissection:
                     if match_value and not any(x in subkey for x in match_value):
                         continue
+                    if match_eval_compiled:
+                        result = eval(
+                            match_eval_compiled,
+                            globals,
+                            {
+                                "timestamp": timestamp,
+                                "key": key,
+                                "subkey": subkey,
+                                "value": data[timestamp][key][subkey_original],
+                            },
+                        )
+                        # if the evaluation didn't return truthy,
+                        # ignore this entry
+                        if not result:
+                            continue
                     yield (timestamp, key, subkey, count)
     @staticmethod
@@ -421,6 +497,8 @@ class Dissection:
                     )
                 else:
                     value = "0x" + value.hex()
+            elif value_type in Dissection.ENUM_TRANSLATORS:
+                value = str(Dissection.ENUM_TRANSLATORS[value_type](value_type, value))
             else:
                 value = str(value)
         except Exception:
@@ -448,6 +526,93 @@ class Dissection:
         """Convert binary bytes to IP addresses (v4 and v6)."""
         return ipaddress.ip_address(value)
+    UDP_PORTS: ClassVar[Dict[str, str]] = {
+        "53": "DNS",
+    }
+    IANA_TRANSLATORS: ClassVar[Dict[str, str]] = {
+        "Ethernet_IP_proto": "protocols",
+        "Ethernet_IPv6_proto": "protocols",
+        "Ethernet_IP_UDP_sport": "udp_ports",
+        "Ethernet_IP_UDP_dport": "udp_ports",
+        "Ethernet_IP_TCP_sport": "tcp_ports",
+        "Ethernet_IP_TCP_dport": "tcp_ports",
+        "Ethernet_IPv6_UDP_sport": "udp_ports",
+        "Ethernet_IPv6_UDP_dport": "udp_ports",
+        "Ethernet_IPv6_TCP_sport": "tcp_ports",
+        "Ethernet_IPv6_TCP_dport": "tcp_ports",
+        "Ethernet_IP_ICMP_code": "icmp_codes",
+        "Ethernet_IP_ICMP_type": "icmp_types",
+        "Ethernet_IP_ICMP_IP in ICMP_UDP in ICMP_dport": "udp_ports",
+        "Ethernet_IP_ICMP_IP in ICMP_UDP in ICMP_sport": "udp_ports",
+        "Ethernet_IP_ICMP_IP in ICMP_TCP in ICMP_dport": "tcp_ports",
+        "Ethernet_IP_ICMP_IP in ICMP_TCP in ICMP_sport": "tcp_ports",
+        "Ethernet_IP_ICMP_IP in ICMP_protoc": "protocols",
+        "Ethernet_IP_UDP_DNS_qd_qclass": "dns_classes",
+        "Ethernet_IP_UDP_DNS_ns_rclass": "dns_classes",
+        "Ethernet_IP_UDP_DNS_an_rclass": "dns_classes",
+        "Ethernet_IP_UDP_DNS_qd_qtype": "dns_rrtypes",
+        "Ethernet_IP_UDP_DNS_ns_type": "dns_rrtypes",
+        "Ethernet_IP_UDP_DNS_an_type": "dns_rrtypes",
+        "Ethernet_IP_UDP_DNS_opcode": "dns_opcodes",
+        "Ethernet_IP_TCP_DNS_qd_qclass": "dns_classes",
+        "Ethernet_IP_TCP_DNS_ns_rclass": "dns_classes",
+        "Ethernet_IP_TCP_DNS_an_rclass": "dns_classes",
+        "Ethernet_IP_TCP_DNS_qd_qtype": "dns_rrtypes",
+        "Ethernet_IP_TCP_DNS_ns_type": "dns_rrtypes",
+        "Ethernet_IP_TCP_DNS_an_type": "dns_rrtypes",
+        "Ethernet_IP_TCP_DNS_opcode": "dns_opcodes",
+    }
+    @staticmethod
+    def print_iana_values(value_type: str, value: bytes) -> str:
+        """Use IANA lookup tables for converting protocol enumerations to human readable types."""
+        table_name = Dissection.IANA_TRANSLATORS.get(value_type)
+        if not table_name:
+            return value
+        table = iana_data[table_name]
+        value = str(value)
+        if value not in table:
+            return value
+        return f"{value} ({table[value]})"
+    ENUM_TRANSLATORS: ClassVar[Dict[str, callable]] = {
+        "Ethernet_IP_proto": print_iana_values,
+        "Ethernet_IPv6_proto": print_iana_values,
+        "Ethernet_IP_UDP_sport": print_iana_values,
+        "Ethernet_IP_UDP_dport": print_iana_values,
+        "Ethernet_IP_TCP_sport": print_iana_values,
+        "Ethernet_IP_TCP_dport": print_iana_values,
+        "Ethernet_IP_ICMP_IP in ICMP_UDP in ICMP_dport": print_iana_values,
+        "Ethernet_IP_ICMP_IP in ICMP_UDP in ICMP_sport": print_iana_values,
+        "Ethernet_IP_ICMP_IP in ICMP_TCP in ICMP_dport": print_iana_values,
+        "Ethernet_IP_ICMP_IP in ICMP_TCP in ICMP_sport": print_iana_values,
+        "Ethernet_IP_ICMP_IP in ICMP_proto": print_iana_values,
+        "Ethernet_IPv6_UDP_sport": print_iana_values,
+        "Ethernet_IPv6_UDP_dport": print_iana_values,
+        "Ethernet_IPv6_TCP_sport": print_iana_values,
+        "Ethernet_IPv6_TCP_dport": print_iana_values,
+        "Ethernet_IP_ICMP_code": print_iana_values,
+        "Ethernet_IP_ICMP_type": print_iana_values,
+        "Ethernet_IP_UDP_DNS_qd_qclass": print_iana_values,
+        "Ethernet_IP_UDP_DNS_ns_rclass": print_iana_values,
+        "Ethernet_IP_UDP_DNS_an_rclass": print_iana_values,
+        "Ethernet_IP_UDP_DNS_qd_qtype": print_iana_values,
+        "Ethernet_IP_UDP_DNS_ns_type": print_iana_values,
+        "Ethernet_IP_UDP_DNS_an_type": print_iana_values,
+        "Ethernet_IP_UDP_DNS_opcode": print_iana_values,
+        "Ethernet_IP_TCP_DNS_qd_qclass": print_iana_values,
+        "Ethernet_IP_TCP_DNS_ns_rclass": print_iana_values,
+        "Ethernet_IP_TCP_DNS_an_rclass": print_iana_values,
+        "Ethernet_IP_TCP_DNS_qd_qtype": print_iana_values,
+        "Ethernet_IP_TCP_DNS_ns_type": print_iana_values,
+        "Ethernet_IP_TCP_DNS_an_type": print_iana_values,
+        "Ethernet_IP_TCP_DNS_opcode": print_iana_values,
+    }
     # has to go at the end to pick up the above function names
     DISPLAY_TRANSFORMERS: ClassVar[Dict[str, callable]] = {
         "Ethernet_IP_src": print_ip_address,

traffic_taffy/dissectmany.py CHANGED Viewed

@@ -9,23 +9,28 @@ from pcap_parallel import PCAPParallel
 from typing import List, TYPE_CHECKING
 from traffic_taffy.dissector import PCAPDissector
+from traffic_taffy.taffy_config import TT_CFG
 if TYPE_CHECKING:
     from io import BufferedIOBase
     from traffic_taffy.dissection import Dissection
+    from traffic_taffy.config import TaffyConfig
 class PCAPDissectMany:
     """A class for dissecting a number of PCAP files."""
-    def __init__(self, pcap_files: List[str], *args: list, **kwargs: dict):
+    def __init__(
+        self, pcap_files: List[str], config: TaffyConfig, *args: list, **kwargs: dict
+    ):
         """Create a PCAPDissectMany instance."""
         self.pcap_files = pcap_files
+        self.config = config
         self.args = args
         self.kwargs = kwargs
         self.futures = {}
-        self.maximum_cores = self.kwargs.get("maximum_cores")
+        self.maximum_cores = self.config.get_dotnest("dissect.maximum_cores")
         if not self.maximum_cores:
             # since we're loading multiple files in parallel, reduce the
             # maximum number of cores available to the splitter
@@ -34,12 +39,13 @@ class PCAPDissectMany:
     def load_pcap_piece(self, pcap_io_buffer: BufferedIOBase) -> Dissection:
         """Load one piece of a pcap from a buffer."""
-        kwargs = copy.copy(self.kwargs)
+        config = copy.deepcopy(self.config)
         # force false for actually loading
-        kwargs["cache_results"] = False
+        config[TT_CFG.CACHE_RESULTS] = False
         pd = PCAPDissector(
             pcap_io_buffer,
+            config,
             *self.args,
             **self.kwargs,
         )
@@ -51,28 +57,30 @@ class PCAPDissectMany:
         self,
         pcap_file: str,
         split_size: int | None = None,
+        dont_fork: bool = False,
     ) -> Dissection:
         """Load one pcap file."""
         pd = PCAPDissector(
             pcap_file,
-            *self.args,
-            **self.kwargs,
+            self.config,
         )
         dissection = pd.load_from_cache(
-            force_overwrite=self.kwargs.get("force_overwrite", False),
-            force_load=self.kwargs.get("force_load", False),
+            force_overwrite=self.config.get_dotnest("dissect.force_overwrite", False),
+            force_load=self.config.get_dotnest("dissect.force_load", False),
         )
         if dissection:
             return dissection
         info(f"processing {pcap_file}")
-        if isinstance(pcap_file, str) and (
-            pcap_file.endswith(".dnstap") or pcap_file.endswith(".tap")
+        if dont_fork or (
+            isinstance(pcap_file, str)
+            and (pcap_file.endswith(".dnstap") or pcap_file.endswith(".tap"))
         ):
             # deal with dnstap files
             # the Dissector already handles loading a dnstap engine
             # TODO(hardaker): see if we can use a splitter here with the framing chunks
+            info("loading without forking -- may be slow")
             dissection = pd.load()
         else:  # assume pcap
@@ -80,8 +88,8 @@ class PCAPDissectMany:
                 pcap_file,
                 split_size=split_size,
                 callback=self.load_pcap_piece,
-                maximum_count=self.kwargs.get("maximum_count", 0),
-                maximum_cores=self.maximum_cores,
+                maximum_count=self.config.get_dotnest("dissect.packet_count", 0),
+                maximum_cores=self.config.get_dotnest("dissect.maximum_cores", 20),
             )
             results = ps.split()
@@ -95,12 +103,14 @@ class PCAPDissectMany:
             # recalculate metadata now that merges have happened
             dissection.calculate_metadata()
-        if self.kwargs.get("cache_results"):
+        if self.config.get_dotnest("dissect.cache_pcap_results"):
             # create a dissector just to save the cache
             # (we don't call load())
             dissection.pcap_file = pcap_file
             dissection.save_to_cache(
-                pcap_file + "." + self.kwargs.get("cache_file_suffix", "taffy")
+                pcap_file
+                + "."
+                + self.config.get_dotnest("dissect.cache_file_suffix", "taffy")
             )
         return dissection
@@ -113,7 +123,7 @@ class PCAPDissectMany:
             # handle each one individually -- typically for inserting debugging stops
             dissections = []
             for pcap_file in self.pcap_files:
-                dissection = self.load_pcap(pcap_file)
+                dissection = self.load_pcap(pcap_file, dont_fork=dont_fork)
                 dissections.append(dissection)
             return dissections
@@ -122,7 +132,7 @@ class PCAPDissectMany:
             dissections = executor.map(self.load_pcap, self.pcap_files)
             # all loaded files should be merged as if they are one
-            if self.kwargs["merge_files"]:
+            if self.config.get_dotnest("dissect.merge", False):
                 dissection = next(dissections)
                 for to_be_merged in dissections:
                     dissection.merge(to_be_merged)

traffic-taffy 0.8.1__py3-none-any.whl → 0.9__py3-none-any.whl

traffic-taffy 0.8.1py3-none-any.whl → 0.9py3-none-any.whl