PyPI - traffic-taffy - Versions diffs - 0.8.5__py3-none-any.whl → 0.9.1__py3-none-any.whl - Mend

traffic-taffy 0.8.5py3-none-any.whl → 0.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

traffic_taffy/__init__.py +1 -1
traffic_taffy/algorithms/__init__.py +14 -7
traffic_taffy/algorithms/comparecorrelation.py +164 -0
traffic_taffy/algorithms/comparecorrelationchanges.py +210 -0
traffic_taffy/algorithms/compareseries.py +117 -0
traffic_taffy/algorithms/compareslices.py +116 -0
traffic_taffy/algorithms/statistical.py +9 -9
traffic_taffy/compare.py +149 -159
traffic_taffy/comparison.py +18 -4
traffic_taffy/config.py +133 -0
traffic_taffy/dissection.py +78 -6
traffic_taffy/dissectmany.py +26 -16
traffic_taffy/dissector.py +189 -77
traffic_taffy/dissector_engine/scapy.py +41 -8
traffic_taffy/graph.py +54 -53
traffic_taffy/graphdata.py +13 -2
traffic_taffy/hooks/ip2asn.py +20 -7
traffic_taffy/hooks/labels.py +45 -0
traffic_taffy/hooks/psl.py +21 -3
traffic_taffy/output/__init__.py +8 -48
traffic_taffy/output/console.py +37 -25
traffic_taffy/output/fsdb.py +24 -18
traffic_taffy/reports/__init__.py +5 -0
traffic_taffy/reports/compareslicesreport.py +85 -0
traffic_taffy/reports/correlationchangereport.py +54 -0
traffic_taffy/reports/correlationreport.py +42 -0
traffic_taffy/taffy_config.py +44 -0
traffic_taffy/tests/test_compare_results.py +22 -7
traffic_taffy/tests/test_config.py +149 -0
traffic_taffy/tests/test_global_config.py +33 -0
traffic_taffy/tests/test_normalize.py +1 -0
traffic_taffy/tests/test_pcap_dissector.py +12 -2
traffic_taffy/tests/test_pcap_splitter.py +21 -10
traffic_taffy/tools/cache_info.py +3 -2
traffic_taffy/tools/compare.py +32 -24
traffic_taffy/tools/config.py +83 -0
traffic_taffy/tools/dissect.py +51 -59
traffic_taffy/tools/explore.py +5 -4
traffic_taffy/tools/export.py +28 -17
traffic_taffy/tools/graph.py +25 -27
{traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.1.dist-info}/METADATA +4 -1
traffic_taffy-0.9.1.dist-info/RECORD +56 -0
{traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.1.dist-info}/entry_points.txt +1 -0
traffic_taffy/report.py +0 -12
traffic_taffy-0.8.5.dist-info/RECORD +0 -43
{traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.1.dist-info}/WHEEL +0 -0
{traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.1.dist-info}/licenses/LICENSE.txt +0 -0

traffic_taffy/dissection.py CHANGED Viewed

@@ -61,8 +61,8 @@ class Dissection:
         dissector_level: PCAPDissectorLevel = PCAPDissectorLevel.DETAILED,
         cache_file_suffix: str = "taffy",
         ignore_list: list | None = None,
-        *args: list,
-        **kwargs: dict,
+        *_args: list,
+        **_kwargs: dict,
     ) -> Dissection:
         """Create a Dissection instance."""
         self.pcap_file = pcap_file
@@ -370,6 +370,42 @@ class Dissection:
         return contents
+    def filter(
+        self: Dissection,
+        timestamps: List[int] | None = None,
+        match_string: str | None = None,
+        match_value: str | None = None,
+        minimum_count: int | None = None,
+        make_printable: bool = False,
+        match_expression: str | None = None,
+    ) -> None:
+        """Creates a new dissection that has been filtered based on passed criteria."""
+        debug(
+            f"filtering dissection with: {timestamps=}, {match_string=} {match_value=}, {minimum_count=}, {make_printable=}"
+        )
+        new_dissection: Dissection = Dissection(
+            self.pcap_file,
+            self.pcap_filter,
+            self.maximum_count,
+            self.bin_size,
+            self.dissector_level,
+            self.cache_file_suffix,
+            self.ignore_list,
+        )
+        for timestamp, key, subkey, value in self.find_data(
+            timestamps=timestamps,
+            match_string=match_string,
+            match_value=match_value,
+            minimum_count=minimum_count,
+            make_printable=make_printable,
+            match_expression=match_expression,
+        ):
+            new_dissection.data[timestamp][key][subkey] = value
+        debug("  done filtering")
+        return new_dissection
     def find_data(
         self: Dissection,
         timestamps: List[int] | None = None,
@@ -377,6 +413,7 @@ class Dissection:
         match_value: str | None = None,
         minimum_count: int | None = None,
         make_printable: bool = False,
+        match_expression: str | None = None,
     ) -> list:
         """Search through data for appropriate records."""
         data = self.data
@@ -386,6 +423,9 @@ class Dissection:
         if not timestamps:
             timestamps = data.keys()
+        match_eval_compiled = None
+        if match_expression:
+            match_eval_compiled = compile(f"{match_expression}", "<string>", "eval")
         # find timestamps/key values with at least one item above count
         # TODO(hardaker): we should really use pandas for this
         usable = defaultdict(set)
@@ -398,15 +438,15 @@ class Dissection:
                 # ensure at least one of the count valuse for the
                 # stream gets above minimum_count
                 for subkey, count in data[timestamp][key].items():
-                    if (
-                        not minimum_count
-                        or minimum_count
-                        and abs(count) > minimum_count
+                    if not minimum_count or (
+                        minimum_count and abs(count) >= minimum_count
                     ):
                         usable[key].add(subkey)
         # TODO(hardaker): move the timestamp inside the other fors for faster
         # processing of skipped key/subkeys
+        globals = {}  # TODO(hardaker): maybe create some in the future
         for timestamp in timestamps:
             for key in sorted(data[timestamp]):
                 if key not in usable:
@@ -419,6 +459,7 @@ class Dissection:
                     if subkey not in usable[key]:
                         continue
+                    subkey_original = subkey
                     if make_printable:
                         subkey = Dissection.make_printable(key, subkey)
                         count = Dissection.make_printable(None, count)
@@ -426,6 +467,23 @@ class Dissection:
                     if match_value and not any(x in subkey for x in match_value):
                         continue
+                    if match_eval_compiled:
+                        result = eval(
+                            match_eval_compiled,
+                            globals,
+                            {
+                                "timestamp": timestamp,
+                                "key": key,
+                                "subkey": subkey,
+                                "value": data[timestamp][key][subkey_original],
+                            },
+                        )
+                        # if the evaluation didn't return truthy,
+                        # ignore this entry
+                        if not result:
+                            continue
                     yield (timestamp, key, subkey, count)
     @staticmethod
@@ -497,6 +555,13 @@ class Dissection:
         "Ethernet_IP_UDP_DNS_ns_type": "dns_rrtypes",
         "Ethernet_IP_UDP_DNS_an_type": "dns_rrtypes",
         "Ethernet_IP_UDP_DNS_opcode": "dns_opcodes",
+        "Ethernet_IP_TCP_DNS_qd_qclass": "dns_classes",
+        "Ethernet_IP_TCP_DNS_ns_rclass": "dns_classes",
+        "Ethernet_IP_TCP_DNS_an_rclass": "dns_classes",
+        "Ethernet_IP_TCP_DNS_qd_qtype": "dns_rrtypes",
+        "Ethernet_IP_TCP_DNS_ns_type": "dns_rrtypes",
+        "Ethernet_IP_TCP_DNS_an_type": "dns_rrtypes",
+        "Ethernet_IP_TCP_DNS_opcode": "dns_opcodes",
     }
     @staticmethod
@@ -539,6 +604,13 @@ class Dissection:
         "Ethernet_IP_UDP_DNS_ns_type": print_iana_values,
         "Ethernet_IP_UDP_DNS_an_type": print_iana_values,
         "Ethernet_IP_UDP_DNS_opcode": print_iana_values,
+        "Ethernet_IP_TCP_DNS_qd_qclass": print_iana_values,
+        "Ethernet_IP_TCP_DNS_ns_rclass": print_iana_values,
+        "Ethernet_IP_TCP_DNS_an_rclass": print_iana_values,
+        "Ethernet_IP_TCP_DNS_qd_qtype": print_iana_values,
+        "Ethernet_IP_TCP_DNS_ns_type": print_iana_values,
+        "Ethernet_IP_TCP_DNS_an_type": print_iana_values,
+        "Ethernet_IP_TCP_DNS_opcode": print_iana_values,
     }
     # has to go at the end to pick up the above function names

traffic_taffy/dissectmany.py CHANGED Viewed

@@ -9,23 +9,28 @@ from pcap_parallel import PCAPParallel
 from typing import List, TYPE_CHECKING
 from traffic_taffy.dissector import PCAPDissector
+from traffic_taffy.taffy_config import TT_CFG
 if TYPE_CHECKING:
     from io import BufferedIOBase
     from traffic_taffy.dissection import Dissection
+    from traffic_taffy.config import TaffyConfig
 class PCAPDissectMany:
     """A class for dissecting a number of PCAP files."""
-    def __init__(self, pcap_files: List[str], *args: list, **kwargs: dict):
+    def __init__(
+        self, pcap_files: List[str], config: TaffyConfig, *args: list, **kwargs: dict
+    ):
         """Create a PCAPDissectMany instance."""
         self.pcap_files = pcap_files
+        self.config = config
         self.args = args
         self.kwargs = kwargs
         self.futures = {}
-        self.maximum_cores = self.kwargs.get("maximum_cores")
+        self.maximum_cores = self.config.get_dotnest("dissect.maximum_cores")
         if not self.maximum_cores:
             # since we're loading multiple files in parallel, reduce the
             # maximum number of cores available to the splitter
@@ -34,12 +39,13 @@ class PCAPDissectMany:
     def load_pcap_piece(self, pcap_io_buffer: BufferedIOBase) -> Dissection:
         """Load one piece of a pcap from a buffer."""
-        kwargs = copy.copy(self.kwargs)
+        config = copy.deepcopy(self.config)
         # force false for actually loading
-        kwargs["cache_results"] = False
+        config[TT_CFG.CACHE_RESULTS] = False
         pd = PCAPDissector(
             pcap_io_buffer,
+            config,
             *self.args,
             **self.kwargs,
         )
@@ -51,28 +57,30 @@ class PCAPDissectMany:
         self,
         pcap_file: str,
         split_size: int | None = None,
+        dont_fork: bool = False,
     ) -> Dissection:
         """Load one pcap file."""
         pd = PCAPDissector(
             pcap_file,
-            *self.args,
-            **self.kwargs,
+            self.config,
         )
         dissection = pd.load_from_cache(
-            force_overwrite=self.kwargs.get("force_overwrite", False),
-            force_load=self.kwargs.get("force_load", False),
+            force_overwrite=self.config.get_dotnest("dissect.force_overwrite", False),
+            force_load=self.config.get_dotnest("dissect.force_load", False),
         )
         if dissection:
             return dissection
         info(f"processing {pcap_file}")
-        if isinstance(pcap_file, str) and (
-            pcap_file.endswith(".dnstap") or pcap_file.endswith(".tap")
+        if dont_fork or (
+            isinstance(pcap_file, str)
+            and (pcap_file.endswith(".dnstap") or pcap_file.endswith(".tap"))
         ):
             # deal with dnstap files
             # the Dissector already handles loading a dnstap engine
             # TODO(hardaker): see if we can use a splitter here with the framing chunks
+            info("loading without forking -- may be slow")
             dissection = pd.load()
         else:  # assume pcap
@@ -80,8 +88,8 @@ class PCAPDissectMany:
                 pcap_file,
                 split_size=split_size,
                 callback=self.load_pcap_piece,
-                maximum_count=self.kwargs.get("maximum_count", 0),
-                maximum_cores=self.maximum_cores,
+                maximum_count=self.config.get_dotnest("dissect.packet_count", 0),
+                maximum_cores=self.config.get_dotnest("dissect.maximum_cores", 20),
             )
             results = ps.split()
@@ -95,12 +103,14 @@ class PCAPDissectMany:
             # recalculate metadata now that merges have happened
             dissection.calculate_metadata()
-        if self.kwargs.get("cache_results"):
+        if self.config.get_dotnest("dissect.cache_pcap_results"):
             # create a dissector just to save the cache
             # (we don't call load())
             dissection.pcap_file = pcap_file
             dissection.save_to_cache(
-                pcap_file + "." + self.kwargs.get("cache_file_suffix", "taffy")
+                pcap_file
+                + "."
+                + self.config.get_dotnest("dissect.cache_file_suffix", "taffy")
             )
         return dissection
@@ -113,7 +123,7 @@ class PCAPDissectMany:
             # handle each one individually -- typically for inserting debugging stops
             dissections = []
             for pcap_file in self.pcap_files:
-                dissection = self.load_pcap(pcap_file)
+                dissection = self.load_pcap(pcap_file, dont_fork=dont_fork)
                 dissections.append(dissection)
             return dissections
@@ -122,7 +132,7 @@ class PCAPDissectMany:
             dissections = executor.map(self.load_pcap, self.pcap_files)
             # all loaded files should be merged as if they are one
-            if self.kwargs["merge_files"]:
+            if self.config.get_dotnest("dissect.merge", False):
                 dissection = next(dissections)
                 for to_be_merged in dissections:
                     dissection.merge(to_be_merged)

traffic-taffy 0.8.5__py3-none-any.whl → 0.9.1__py3-none-any.whl

traffic-taffy 0.8.5py3-none-any.whl → 0.9.1py3-none-any.whl