traffic-taffy 0.8.5__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. traffic_taffy/__init__.py +1 -1
  2. traffic_taffy/algorithms/__init__.py +14 -7
  3. traffic_taffy/algorithms/comparecorrelation.py +164 -0
  4. traffic_taffy/algorithms/comparecorrelationchanges.py +210 -0
  5. traffic_taffy/algorithms/compareseries.py +117 -0
  6. traffic_taffy/algorithms/compareslices.py +116 -0
  7. traffic_taffy/algorithms/statistical.py +9 -9
  8. traffic_taffy/compare.py +149 -159
  9. traffic_taffy/comparison.py +18 -4
  10. traffic_taffy/config.py +133 -0
  11. traffic_taffy/dissection.py +78 -6
  12. traffic_taffy/dissectmany.py +26 -16
  13. traffic_taffy/dissector.py +189 -77
  14. traffic_taffy/dissector_engine/scapy.py +41 -8
  15. traffic_taffy/graph.py +54 -53
  16. traffic_taffy/graphdata.py +13 -2
  17. traffic_taffy/hooks/ip2asn.py +20 -7
  18. traffic_taffy/hooks/labels.py +45 -0
  19. traffic_taffy/hooks/psl.py +21 -3
  20. traffic_taffy/output/__init__.py +8 -48
  21. traffic_taffy/output/console.py +37 -25
  22. traffic_taffy/output/fsdb.py +24 -18
  23. traffic_taffy/reports/__init__.py +5 -0
  24. traffic_taffy/reports/compareslicesreport.py +85 -0
  25. traffic_taffy/reports/correlationchangereport.py +54 -0
  26. traffic_taffy/reports/correlationreport.py +42 -0
  27. traffic_taffy/taffy_config.py +44 -0
  28. traffic_taffy/tests/test_compare_results.py +22 -7
  29. traffic_taffy/tests/test_config.py +149 -0
  30. traffic_taffy/tests/test_global_config.py +33 -0
  31. traffic_taffy/tests/test_normalize.py +1 -0
  32. traffic_taffy/tests/test_pcap_dissector.py +12 -2
  33. traffic_taffy/tests/test_pcap_splitter.py +21 -10
  34. traffic_taffy/tools/cache_info.py +3 -2
  35. traffic_taffy/tools/compare.py +32 -24
  36. traffic_taffy/tools/config.py +83 -0
  37. traffic_taffy/tools/dissect.py +51 -59
  38. traffic_taffy/tools/explore.py +5 -4
  39. traffic_taffy/tools/export.py +28 -17
  40. traffic_taffy/tools/graph.py +25 -27
  41. {traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.1.dist-info}/METADATA +4 -1
  42. traffic_taffy-0.9.1.dist-info/RECORD +56 -0
  43. {traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.1.dist-info}/entry_points.txt +1 -0
  44. traffic_taffy/report.py +0 -12
  45. traffic_taffy-0.8.5.dist-info/RECORD +0 -43
  46. {traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.1.dist-info}/WHEEL +0 -0
  47. {traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -61,8 +61,8 @@ class Dissection:
61
61
  dissector_level: PCAPDissectorLevel = PCAPDissectorLevel.DETAILED,
62
62
  cache_file_suffix: str = "taffy",
63
63
  ignore_list: list | None = None,
64
- *args: list,
65
- **kwargs: dict,
64
+ *_args: list,
65
+ **_kwargs: dict,
66
66
  ) -> Dissection:
67
67
  """Create a Dissection instance."""
68
68
  self.pcap_file = pcap_file
@@ -370,6 +370,42 @@ class Dissection:
370
370
 
371
371
  return contents
372
372
 
373
+ def filter(
374
+ self: Dissection,
375
+ timestamps: List[int] | None = None,
376
+ match_string: str | None = None,
377
+ match_value: str | None = None,
378
+ minimum_count: int | None = None,
379
+ make_printable: bool = False,
380
+ match_expression: str | None = None,
381
+ ) -> None:
382
+ """Creates a new dissection that has been filtered based on passed criteria."""
383
+ debug(
384
+ f"filtering dissection with: {timestamps=}, {match_string=} {match_value=}, {minimum_count=}, {make_printable=}"
385
+ )
386
+ new_dissection: Dissection = Dissection(
387
+ self.pcap_file,
388
+ self.pcap_filter,
389
+ self.maximum_count,
390
+ self.bin_size,
391
+ self.dissector_level,
392
+ self.cache_file_suffix,
393
+ self.ignore_list,
394
+ )
395
+
396
+ for timestamp, key, subkey, value in self.find_data(
397
+ timestamps=timestamps,
398
+ match_string=match_string,
399
+ match_value=match_value,
400
+ minimum_count=minimum_count,
401
+ make_printable=make_printable,
402
+ match_expression=match_expression,
403
+ ):
404
+ new_dissection.data[timestamp][key][subkey] = value
405
+
406
+ debug(" done filtering")
407
+ return new_dissection
408
+
373
409
  def find_data(
374
410
  self: Dissection,
375
411
  timestamps: List[int] | None = None,
@@ -377,6 +413,7 @@ class Dissection:
377
413
  match_value: str | None = None,
378
414
  minimum_count: int | None = None,
379
415
  make_printable: bool = False,
416
+ match_expression: str | None = None,
380
417
  ) -> list:
381
418
  """Search through data for appropriate records."""
382
419
  data = self.data
@@ -386,6 +423,9 @@ class Dissection:
386
423
  if not timestamps:
387
424
  timestamps = data.keys()
388
425
 
426
+ match_eval_compiled = None
427
+ if match_expression:
428
+ match_eval_compiled = compile(f"{match_expression}", "<string>", "eval")
389
429
  # find timestamps/key values with at least one item above count
390
430
  # TODO(hardaker): we should really use pandas for this
391
431
  usable = defaultdict(set)
@@ -398,15 +438,15 @@ class Dissection:
398
438
  # ensure at least one of the count valuse for the
399
439
  # stream gets above minimum_count
400
440
  for subkey, count in data[timestamp][key].items():
401
- if (
402
- not minimum_count
403
- or minimum_count
404
- and abs(count) > minimum_count
441
+ if not minimum_count or (
442
+ minimum_count and abs(count) >= minimum_count
405
443
  ):
406
444
  usable[key].add(subkey)
407
445
 
408
446
  # TODO(hardaker): move the timestamp inside the other fors for faster
409
447
  # processing of skipped key/subkeys
448
+ globals = {} # TODO(hardaker): maybe create some in the future
449
+
410
450
  for timestamp in timestamps:
411
451
  for key in sorted(data[timestamp]):
412
452
  if key not in usable:
@@ -419,6 +459,7 @@ class Dissection:
419
459
  if subkey not in usable[key]:
420
460
  continue
421
461
 
462
+ subkey_original = subkey
422
463
  if make_printable:
423
464
  subkey = Dissection.make_printable(key, subkey)
424
465
  count = Dissection.make_printable(None, count)
@@ -426,6 +467,23 @@ class Dissection:
426
467
  if match_value and not any(x in subkey for x in match_value):
427
468
  continue
428
469
 
470
+ if match_eval_compiled:
471
+ result = eval(
472
+ match_eval_compiled,
473
+ globals,
474
+ {
475
+ "timestamp": timestamp,
476
+ "key": key,
477
+ "subkey": subkey,
478
+ "value": data[timestamp][key][subkey_original],
479
+ },
480
+ )
481
+
482
+ # if the evaluation didn't return truthy,
483
+ # ignore this entry
484
+ if not result:
485
+ continue
486
+
429
487
  yield (timestamp, key, subkey, count)
430
488
 
431
489
  @staticmethod
@@ -497,6 +555,13 @@ class Dissection:
497
555
  "Ethernet_IP_UDP_DNS_ns_type": "dns_rrtypes",
498
556
  "Ethernet_IP_UDP_DNS_an_type": "dns_rrtypes",
499
557
  "Ethernet_IP_UDP_DNS_opcode": "dns_opcodes",
558
+ "Ethernet_IP_TCP_DNS_qd_qclass": "dns_classes",
559
+ "Ethernet_IP_TCP_DNS_ns_rclass": "dns_classes",
560
+ "Ethernet_IP_TCP_DNS_an_rclass": "dns_classes",
561
+ "Ethernet_IP_TCP_DNS_qd_qtype": "dns_rrtypes",
562
+ "Ethernet_IP_TCP_DNS_ns_type": "dns_rrtypes",
563
+ "Ethernet_IP_TCP_DNS_an_type": "dns_rrtypes",
564
+ "Ethernet_IP_TCP_DNS_opcode": "dns_opcodes",
500
565
  }
501
566
 
502
567
  @staticmethod
@@ -539,6 +604,13 @@ class Dissection:
539
604
  "Ethernet_IP_UDP_DNS_ns_type": print_iana_values,
540
605
  "Ethernet_IP_UDP_DNS_an_type": print_iana_values,
541
606
  "Ethernet_IP_UDP_DNS_opcode": print_iana_values,
607
+ "Ethernet_IP_TCP_DNS_qd_qclass": print_iana_values,
608
+ "Ethernet_IP_TCP_DNS_ns_rclass": print_iana_values,
609
+ "Ethernet_IP_TCP_DNS_an_rclass": print_iana_values,
610
+ "Ethernet_IP_TCP_DNS_qd_qtype": print_iana_values,
611
+ "Ethernet_IP_TCP_DNS_ns_type": print_iana_values,
612
+ "Ethernet_IP_TCP_DNS_an_type": print_iana_values,
613
+ "Ethernet_IP_TCP_DNS_opcode": print_iana_values,
542
614
  }
543
615
 
544
616
  # has to go at the end to pick up the above function names
@@ -9,23 +9,28 @@ from pcap_parallel import PCAPParallel
9
9
  from typing import List, TYPE_CHECKING
10
10
 
11
11
  from traffic_taffy.dissector import PCAPDissector
12
+ from traffic_taffy.taffy_config import TT_CFG
12
13
 
13
14
  if TYPE_CHECKING:
14
15
  from io import BufferedIOBase
15
16
  from traffic_taffy.dissection import Dissection
17
+ from traffic_taffy.config import TaffyConfig
16
18
 
17
19
 
18
20
  class PCAPDissectMany:
19
21
  """A class for dissecting a number of PCAP files."""
20
22
 
21
- def __init__(self, pcap_files: List[str], *args: list, **kwargs: dict):
23
+ def __init__(
24
+ self, pcap_files: List[str], config: TaffyConfig, *args: list, **kwargs: dict
25
+ ):
22
26
  """Create a PCAPDissectMany instance."""
23
27
  self.pcap_files = pcap_files
28
+ self.config = config
24
29
  self.args = args
25
30
  self.kwargs = kwargs
26
31
  self.futures = {}
27
32
 
28
- self.maximum_cores = self.kwargs.get("maximum_cores")
33
+ self.maximum_cores = self.config.get_dotnest("dissect.maximum_cores")
29
34
  if not self.maximum_cores:
30
35
  # since we're loading multiple files in parallel, reduce the
31
36
  # maximum number of cores available to the splitter
@@ -34,12 +39,13 @@ class PCAPDissectMany:
34
39
 
35
40
  def load_pcap_piece(self, pcap_io_buffer: BufferedIOBase) -> Dissection:
36
41
  """Load one piece of a pcap from a buffer."""
37
- kwargs = copy.copy(self.kwargs)
42
+ config = copy.deepcopy(self.config)
38
43
  # force false for actually loading
39
- kwargs["cache_results"] = False
44
+ config[TT_CFG.CACHE_RESULTS] = False
40
45
 
41
46
  pd = PCAPDissector(
42
47
  pcap_io_buffer,
48
+ config,
43
49
  *self.args,
44
50
  **self.kwargs,
45
51
  )
@@ -51,28 +57,30 @@ class PCAPDissectMany:
51
57
  self,
52
58
  pcap_file: str,
53
59
  split_size: int | None = None,
60
+ dont_fork: bool = False,
54
61
  ) -> Dissection:
55
62
  """Load one pcap file."""
56
63
  pd = PCAPDissector(
57
64
  pcap_file,
58
- *self.args,
59
- **self.kwargs,
65
+ self.config,
60
66
  )
61
67
  dissection = pd.load_from_cache(
62
- force_overwrite=self.kwargs.get("force_overwrite", False),
63
- force_load=self.kwargs.get("force_load", False),
68
+ force_overwrite=self.config.get_dotnest("dissect.force_overwrite", False),
69
+ force_load=self.config.get_dotnest("dissect.force_load", False),
64
70
  )
65
71
  if dissection:
66
72
  return dissection
67
73
 
68
74
  info(f"processing {pcap_file}")
69
- if isinstance(pcap_file, str) and (
70
- pcap_file.endswith(".dnstap") or pcap_file.endswith(".tap")
75
+ if dont_fork or (
76
+ isinstance(pcap_file, str)
77
+ and (pcap_file.endswith(".dnstap") or pcap_file.endswith(".tap"))
71
78
  ):
72
79
  # deal with dnstap files
73
80
 
74
81
  # the Dissector already handles loading a dnstap engine
75
82
  # TODO(hardaker): see if we can use a splitter here with the framing chunks
83
+ info("loading without forking -- may be slow")
76
84
  dissection = pd.load()
77
85
 
78
86
  else: # assume pcap
@@ -80,8 +88,8 @@ class PCAPDissectMany:
80
88
  pcap_file,
81
89
  split_size=split_size,
82
90
  callback=self.load_pcap_piece,
83
- maximum_count=self.kwargs.get("maximum_count", 0),
84
- maximum_cores=self.maximum_cores,
91
+ maximum_count=self.config.get_dotnest("dissect.packet_count", 0),
92
+ maximum_cores=self.config.get_dotnest("dissect.maximum_cores", 20),
85
93
  )
86
94
  results = ps.split()
87
95
 
@@ -95,12 +103,14 @@ class PCAPDissectMany:
95
103
  # recalculate metadata now that merges have happened
96
104
  dissection.calculate_metadata()
97
105
 
98
- if self.kwargs.get("cache_results"):
106
+ if self.config.get_dotnest("dissect.cache_pcap_results"):
99
107
  # create a dissector just to save the cache
100
108
  # (we don't call load())
101
109
  dissection.pcap_file = pcap_file
102
110
  dissection.save_to_cache(
103
- pcap_file + "." + self.kwargs.get("cache_file_suffix", "taffy")
111
+ pcap_file
112
+ + "."
113
+ + self.config.get_dotnest("dissect.cache_file_suffix", "taffy")
104
114
  )
105
115
 
106
116
  return dissection
@@ -113,7 +123,7 @@ class PCAPDissectMany:
113
123
  # handle each one individually -- typically for inserting debugging stops
114
124
  dissections = []
115
125
  for pcap_file in self.pcap_files:
116
- dissection = self.load_pcap(pcap_file)
126
+ dissection = self.load_pcap(pcap_file, dont_fork=dont_fork)
117
127
  dissections.append(dissection)
118
128
  return dissections
119
129
 
@@ -122,7 +132,7 @@ class PCAPDissectMany:
122
132
  dissections = executor.map(self.load_pcap, self.pcap_files)
123
133
 
124
134
  # all loaded files should be merged as if they are one
125
- if self.kwargs["merge_files"]:
135
+ if self.config.get_dotnest("dissect.merge", False):
126
136
  dissection = next(dissections)
127
137
  for to_be_merged in dissections:
128
138
  dissection.merge(to_be_merged)