traffic-taffy 0.8.1__py3-none-any.whl → 0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. traffic_taffy/__init__.py +1 -1
  2. traffic_taffy/algorithms/__init__.py +14 -7
  3. traffic_taffy/algorithms/comparecorrelation.py +164 -0
  4. traffic_taffy/algorithms/comparecorrelationchanges.py +210 -0
  5. traffic_taffy/algorithms/compareseries.py +117 -0
  6. traffic_taffy/algorithms/compareslices.py +116 -0
  7. traffic_taffy/algorithms/statistical.py +9 -9
  8. traffic_taffy/compare.py +149 -159
  9. traffic_taffy/comparison.py +18 -4
  10. traffic_taffy/config.py +133 -0
  11. traffic_taffy/dissection.py +171 -6
  12. traffic_taffy/dissectmany.py +26 -16
  13. traffic_taffy/dissector.py +189 -77
  14. traffic_taffy/dissector_engine/scapy.py +41 -8
  15. traffic_taffy/graph.py +54 -53
  16. traffic_taffy/graphdata.py +13 -2
  17. traffic_taffy/hooks/ip2asn.py +20 -7
  18. traffic_taffy/hooks/labels.py +45 -0
  19. traffic_taffy/hooks/psl.py +21 -3
  20. traffic_taffy/iana/tables.msgpak +0 -0
  21. traffic_taffy/output/__init__.py +8 -48
  22. traffic_taffy/output/console.py +37 -25
  23. traffic_taffy/output/fsdb.py +24 -18
  24. traffic_taffy/reports/__init__.py +5 -0
  25. traffic_taffy/reports/compareslicesreport.py +85 -0
  26. traffic_taffy/reports/correlationchangereport.py +54 -0
  27. traffic_taffy/reports/correlationreport.py +42 -0
  28. traffic_taffy/taffy_config.py +44 -0
  29. traffic_taffy/tests/test_compare_results.py +22 -7
  30. traffic_taffy/tests/test_config.py +149 -0
  31. traffic_taffy/tests/test_global_config.py +33 -0
  32. traffic_taffy/tests/test_normalize.py +1 -0
  33. traffic_taffy/tests/test_pcap_dissector.py +12 -2
  34. traffic_taffy/tests/test_pcap_splitter.py +21 -10
  35. traffic_taffy/tools/cache_info.py +3 -2
  36. traffic_taffy/tools/compare.py +32 -24
  37. traffic_taffy/tools/config.py +83 -0
  38. traffic_taffy/tools/dissect.py +51 -59
  39. traffic_taffy/tools/explore.py +5 -4
  40. traffic_taffy/tools/export.py +28 -17
  41. traffic_taffy/tools/graph.py +25 -27
  42. {traffic_taffy-0.8.1.dist-info → traffic_taffy-0.9.dist-info}/METADATA +4 -1
  43. traffic_taffy-0.9.dist-info/RECORD +56 -0
  44. {traffic_taffy-0.8.1.dist-info → traffic_taffy-0.9.dist-info}/entry_points.txt +1 -0
  45. traffic_taffy/report.py +0 -12
  46. traffic_taffy/tests/test_dpkt_engine.py +0 -15
  47. traffic_taffy-0.8.1.dist-info/RECORD +0 -43
  48. {traffic_taffy-0.8.1.dist-info → traffic_taffy-0.9.dist-info}/WHEEL +0 -0
  49. {traffic_taffy-0.8.1.dist-info → traffic_taffy-0.9.dist-info}/licenses/LICENSE.txt +0 -0
@@ -11,6 +11,23 @@ from typing import List
11
11
  from copy import deepcopy
12
12
  from pathlib import Path
13
13
  from traffic_taffy import __VERSION__ as VERSION
14
+ from io import BytesIO
15
+ import pkgutil
16
+
17
+ # TODO(hardaker): fix to not use a global
18
+ # note that this is designed to load only once before forking
19
+ iana_data = None
20
+ if not iana_data:
21
+ # try a local copy first
22
+ if Path("traffic_taffy/iana/tables.msgpakx").exists():
23
+ iana_data = msgpack.load(Path.open("traffic_taffy/iana/tables.msgpak", "rb"))
24
+ else:
25
+ content = pkgutil.get_data("traffic_taffy", "iana/tables.msgpak")
26
+ if content:
27
+ content = BytesIO(content)
28
+ iana_data = msgpack.load(content)
29
+ else:
30
+ warning("failed to load IANA data tables -- no enum expansion available")
14
31
 
15
32
 
16
33
  class PCAPDissectorLevel(Enum):
@@ -44,8 +61,8 @@ class Dissection:
44
61
  dissector_level: PCAPDissectorLevel = PCAPDissectorLevel.DETAILED,
45
62
  cache_file_suffix: str = "taffy",
46
63
  ignore_list: list | None = None,
47
- *args: list,
48
- **kwargs: dict,
64
+ *_args: list,
65
+ **_kwargs: dict,
49
66
  ) -> Dissection:
50
67
  """Create a Dissection instance."""
51
68
  self.pcap_file = pcap_file
@@ -57,6 +74,7 @@ class Dissection:
57
74
  self.maximum_count = maximum_count
58
75
  self.pcap_filter = pcap_filter
59
76
  self.ignore_list = ignore_list or []
77
+ self.iana_data = defaultdict(dict)
60
78
 
61
79
  self.parameters = [
62
80
  "pcap_file",
@@ -352,6 +370,42 @@ class Dissection:
352
370
 
353
371
  return contents
354
372
 
373
+ def filter(
374
+ self: Dissection,
375
+ timestamps: List[int] | None = None,
376
+ match_string: str | None = None,
377
+ match_value: str | None = None,
378
+ minimum_count: int | None = None,
379
+ make_printable: bool = False,
380
+ match_expression: str | None = None,
381
+ ) -> None:
382
+ """Creates a new dissection that has been filtered based on passed criteria."""
383
+ debug(
384
+ f"filtering dissection with: {timestamps=}, {match_string=} {match_value=}, {minimum_count=}, {make_printable=}"
385
+ )
386
+ new_dissection: Dissection = Dissection(
387
+ self.pcap_file,
388
+ self.pcap_filter,
389
+ self.maximum_count,
390
+ self.bin_size,
391
+ self.dissector_level,
392
+ self.cache_file_suffix,
393
+ self.ignore_list,
394
+ )
395
+
396
+ for timestamp, key, subkey, value in self.find_data(
397
+ timestamps=timestamps,
398
+ match_string=match_string,
399
+ match_value=match_value,
400
+ minimum_count=minimum_count,
401
+ make_printable=make_printable,
402
+ match_expression=match_expression,
403
+ ):
404
+ new_dissection.data[timestamp][key][subkey] = value
405
+
406
+ debug(" done filtering")
407
+ return new_dissection
408
+
355
409
  def find_data(
356
410
  self: Dissection,
357
411
  timestamps: List[int] | None = None,
@@ -359,6 +413,7 @@ class Dissection:
359
413
  match_value: str | None = None,
360
414
  minimum_count: int | None = None,
361
415
  make_printable: bool = False,
416
+ match_expression: str | None = None,
362
417
  ) -> list:
363
418
  """Search through data for appropriate records."""
364
419
  data = self.data
@@ -368,6 +423,9 @@ class Dissection:
368
423
  if not timestamps:
369
424
  timestamps = data.keys()
370
425
 
426
+ match_eval_compiled = None
427
+ if match_expression:
428
+ match_eval_compiled = compile(f"{match_expression}", "<string>", "eval")
371
429
  # find timestamps/key values with at least one item above count
372
430
  # TODO(hardaker): we should really use pandas for this
373
431
  usable = defaultdict(set)
@@ -380,15 +438,15 @@ class Dissection:
380
438
  # ensure at least one of the count valuse for the
381
439
  # stream gets above minimum_count
382
440
  for subkey, count in data[timestamp][key].items():
383
- if (
384
- not minimum_count
385
- or minimum_count
386
- and abs(count) > minimum_count
441
+ if not minimum_count or (
442
+ minimum_count and abs(count) >= minimum_count
387
443
  ):
388
444
  usable[key].add(subkey)
389
445
 
390
446
  # TODO(hardaker): move the timestamp inside the other fors for faster
391
447
  # processing of skipped key/subkeys
448
+ globals = {} # TODO(hardaker): maybe create some in the future
449
+
392
450
  for timestamp in timestamps:
393
451
  for key in sorted(data[timestamp]):
394
452
  if key not in usable:
@@ -401,6 +459,7 @@ class Dissection:
401
459
  if subkey not in usable[key]:
402
460
  continue
403
461
 
462
+ subkey_original = subkey
404
463
  if make_printable:
405
464
  subkey = Dissection.make_printable(key, subkey)
406
465
  count = Dissection.make_printable(None, count)
@@ -408,6 +467,23 @@ class Dissection:
408
467
  if match_value and not any(x in subkey for x in match_value):
409
468
  continue
410
469
 
470
+ if match_eval_compiled:
471
+ result = eval(
472
+ match_eval_compiled,
473
+ globals,
474
+ {
475
+ "timestamp": timestamp,
476
+ "key": key,
477
+ "subkey": subkey,
478
+ "value": data[timestamp][key][subkey_original],
479
+ },
480
+ )
481
+
482
+ # if the evaluation didn't return truthy,
483
+ # ignore this entry
484
+ if not result:
485
+ continue
486
+
411
487
  yield (timestamp, key, subkey, count)
412
488
 
413
489
  @staticmethod
@@ -421,6 +497,8 @@ class Dissection:
421
497
  )
422
498
  else:
423
499
  value = "0x" + value.hex()
500
+ elif value_type in Dissection.ENUM_TRANSLATORS:
501
+ value = str(Dissection.ENUM_TRANSLATORS[value_type](value_type, value))
424
502
  else:
425
503
  value = str(value)
426
504
  except Exception:
@@ -448,6 +526,93 @@ class Dissection:
448
526
  """Convert binary bytes to IP addresses (v4 and v6)."""
449
527
  return ipaddress.ip_address(value)
450
528
 
529
+ UDP_PORTS: ClassVar[Dict[str, str]] = {
530
+ "53": "DNS",
531
+ }
532
+
533
+ IANA_TRANSLATORS: ClassVar[Dict[str, str]] = {
534
+ "Ethernet_IP_proto": "protocols",
535
+ "Ethernet_IPv6_proto": "protocols",
536
+ "Ethernet_IP_UDP_sport": "udp_ports",
537
+ "Ethernet_IP_UDP_dport": "udp_ports",
538
+ "Ethernet_IP_TCP_sport": "tcp_ports",
539
+ "Ethernet_IP_TCP_dport": "tcp_ports",
540
+ "Ethernet_IPv6_UDP_sport": "udp_ports",
541
+ "Ethernet_IPv6_UDP_dport": "udp_ports",
542
+ "Ethernet_IPv6_TCP_sport": "tcp_ports",
543
+ "Ethernet_IPv6_TCP_dport": "tcp_ports",
544
+ "Ethernet_IP_ICMP_code": "icmp_codes",
545
+ "Ethernet_IP_ICMP_type": "icmp_types",
546
+ "Ethernet_IP_ICMP_IP in ICMP_UDP in ICMP_dport": "udp_ports",
547
+ "Ethernet_IP_ICMP_IP in ICMP_UDP in ICMP_sport": "udp_ports",
548
+ "Ethernet_IP_ICMP_IP in ICMP_TCP in ICMP_dport": "tcp_ports",
549
+ "Ethernet_IP_ICMP_IP in ICMP_TCP in ICMP_sport": "tcp_ports",
550
+ "Ethernet_IP_ICMP_IP in ICMP_protoc": "protocols",
551
+ "Ethernet_IP_UDP_DNS_qd_qclass": "dns_classes",
552
+ "Ethernet_IP_UDP_DNS_ns_rclass": "dns_classes",
553
+ "Ethernet_IP_UDP_DNS_an_rclass": "dns_classes",
554
+ "Ethernet_IP_UDP_DNS_qd_qtype": "dns_rrtypes",
555
+ "Ethernet_IP_UDP_DNS_ns_type": "dns_rrtypes",
556
+ "Ethernet_IP_UDP_DNS_an_type": "dns_rrtypes",
557
+ "Ethernet_IP_UDP_DNS_opcode": "dns_opcodes",
558
+ "Ethernet_IP_TCP_DNS_qd_qclass": "dns_classes",
559
+ "Ethernet_IP_TCP_DNS_ns_rclass": "dns_classes",
560
+ "Ethernet_IP_TCP_DNS_an_rclass": "dns_classes",
561
+ "Ethernet_IP_TCP_DNS_qd_qtype": "dns_rrtypes",
562
+ "Ethernet_IP_TCP_DNS_ns_type": "dns_rrtypes",
563
+ "Ethernet_IP_TCP_DNS_an_type": "dns_rrtypes",
564
+ "Ethernet_IP_TCP_DNS_opcode": "dns_opcodes",
565
+ }
566
+
567
+ @staticmethod
568
+ def print_iana_values(value_type: str, value: bytes) -> str:
569
+ """Use IANA lookup tables for converting protocol enumerations to human readable types."""
570
+ table_name = Dissection.IANA_TRANSLATORS.get(value_type)
571
+
572
+ if not table_name:
573
+ return value
574
+
575
+ table = iana_data[table_name]
576
+ value = str(value)
577
+ if value not in table:
578
+ return value
579
+
580
+ return f"{value} ({table[value]})"
581
+
582
+ ENUM_TRANSLATORS: ClassVar[Dict[str, callable]] = {
583
+ "Ethernet_IP_proto": print_iana_values,
584
+ "Ethernet_IPv6_proto": print_iana_values,
585
+ "Ethernet_IP_UDP_sport": print_iana_values,
586
+ "Ethernet_IP_UDP_dport": print_iana_values,
587
+ "Ethernet_IP_TCP_sport": print_iana_values,
588
+ "Ethernet_IP_TCP_dport": print_iana_values,
589
+ "Ethernet_IP_ICMP_IP in ICMP_UDP in ICMP_dport": print_iana_values,
590
+ "Ethernet_IP_ICMP_IP in ICMP_UDP in ICMP_sport": print_iana_values,
591
+ "Ethernet_IP_ICMP_IP in ICMP_TCP in ICMP_dport": print_iana_values,
592
+ "Ethernet_IP_ICMP_IP in ICMP_TCP in ICMP_sport": print_iana_values,
593
+ "Ethernet_IP_ICMP_IP in ICMP_proto": print_iana_values,
594
+ "Ethernet_IPv6_UDP_sport": print_iana_values,
595
+ "Ethernet_IPv6_UDP_dport": print_iana_values,
596
+ "Ethernet_IPv6_TCP_sport": print_iana_values,
597
+ "Ethernet_IPv6_TCP_dport": print_iana_values,
598
+ "Ethernet_IP_ICMP_code": print_iana_values,
599
+ "Ethernet_IP_ICMP_type": print_iana_values,
600
+ "Ethernet_IP_UDP_DNS_qd_qclass": print_iana_values,
601
+ "Ethernet_IP_UDP_DNS_ns_rclass": print_iana_values,
602
+ "Ethernet_IP_UDP_DNS_an_rclass": print_iana_values,
603
+ "Ethernet_IP_UDP_DNS_qd_qtype": print_iana_values,
604
+ "Ethernet_IP_UDP_DNS_ns_type": print_iana_values,
605
+ "Ethernet_IP_UDP_DNS_an_type": print_iana_values,
606
+ "Ethernet_IP_UDP_DNS_opcode": print_iana_values,
607
+ "Ethernet_IP_TCP_DNS_qd_qclass": print_iana_values,
608
+ "Ethernet_IP_TCP_DNS_ns_rclass": print_iana_values,
609
+ "Ethernet_IP_TCP_DNS_an_rclass": print_iana_values,
610
+ "Ethernet_IP_TCP_DNS_qd_qtype": print_iana_values,
611
+ "Ethernet_IP_TCP_DNS_ns_type": print_iana_values,
612
+ "Ethernet_IP_TCP_DNS_an_type": print_iana_values,
613
+ "Ethernet_IP_TCP_DNS_opcode": print_iana_values,
614
+ }
615
+
451
616
  # has to go at the end to pick up the above function names
452
617
  DISPLAY_TRANSFORMERS: ClassVar[Dict[str, callable]] = {
453
618
  "Ethernet_IP_src": print_ip_address,
@@ -9,23 +9,28 @@ from pcap_parallel import PCAPParallel
9
9
  from typing import List, TYPE_CHECKING
10
10
 
11
11
  from traffic_taffy.dissector import PCAPDissector
12
+ from traffic_taffy.taffy_config import TT_CFG
12
13
 
13
14
  if TYPE_CHECKING:
14
15
  from io import BufferedIOBase
15
16
  from traffic_taffy.dissection import Dissection
17
+ from traffic_taffy.config import TaffyConfig
16
18
 
17
19
 
18
20
  class PCAPDissectMany:
19
21
  """A class for dissecting a number of PCAP files."""
20
22
 
21
- def __init__(self, pcap_files: List[str], *args: list, **kwargs: dict):
23
+ def __init__(
24
+ self, pcap_files: List[str], config: TaffyConfig, *args: list, **kwargs: dict
25
+ ):
22
26
  """Create a PCAPDissectMany instance."""
23
27
  self.pcap_files = pcap_files
28
+ self.config = config
24
29
  self.args = args
25
30
  self.kwargs = kwargs
26
31
  self.futures = {}
27
32
 
28
- self.maximum_cores = self.kwargs.get("maximum_cores")
33
+ self.maximum_cores = self.config.get_dotnest("dissect.maximum_cores")
29
34
  if not self.maximum_cores:
30
35
  # since we're loading multiple files in parallel, reduce the
31
36
  # maximum number of cores available to the splitter
@@ -34,12 +39,13 @@ class PCAPDissectMany:
34
39
 
35
40
  def load_pcap_piece(self, pcap_io_buffer: BufferedIOBase) -> Dissection:
36
41
  """Load one piece of a pcap from a buffer."""
37
- kwargs = copy.copy(self.kwargs)
42
+ config = copy.deepcopy(self.config)
38
43
  # force false for actually loading
39
- kwargs["cache_results"] = False
44
+ config[TT_CFG.CACHE_RESULTS] = False
40
45
 
41
46
  pd = PCAPDissector(
42
47
  pcap_io_buffer,
48
+ config,
43
49
  *self.args,
44
50
  **self.kwargs,
45
51
  )
@@ -51,28 +57,30 @@ class PCAPDissectMany:
51
57
  self,
52
58
  pcap_file: str,
53
59
  split_size: int | None = None,
60
+ dont_fork: bool = False,
54
61
  ) -> Dissection:
55
62
  """Load one pcap file."""
56
63
  pd = PCAPDissector(
57
64
  pcap_file,
58
- *self.args,
59
- **self.kwargs,
65
+ self.config,
60
66
  )
61
67
  dissection = pd.load_from_cache(
62
- force_overwrite=self.kwargs.get("force_overwrite", False),
63
- force_load=self.kwargs.get("force_load", False),
68
+ force_overwrite=self.config.get_dotnest("dissect.force_overwrite", False),
69
+ force_load=self.config.get_dotnest("dissect.force_load", False),
64
70
  )
65
71
  if dissection:
66
72
  return dissection
67
73
 
68
74
  info(f"processing {pcap_file}")
69
- if isinstance(pcap_file, str) and (
70
- pcap_file.endswith(".dnstap") or pcap_file.endswith(".tap")
75
+ if dont_fork or (
76
+ isinstance(pcap_file, str)
77
+ and (pcap_file.endswith(".dnstap") or pcap_file.endswith(".tap"))
71
78
  ):
72
79
  # deal with dnstap files
73
80
 
74
81
  # the Dissector already handles loading a dnstap engine
75
82
  # TODO(hardaker): see if we can use a splitter here with the framing chunks
83
+ info("loading without forking -- may be slow")
76
84
  dissection = pd.load()
77
85
 
78
86
  else: # assume pcap
@@ -80,8 +88,8 @@ class PCAPDissectMany:
80
88
  pcap_file,
81
89
  split_size=split_size,
82
90
  callback=self.load_pcap_piece,
83
- maximum_count=self.kwargs.get("maximum_count", 0),
84
- maximum_cores=self.maximum_cores,
91
+ maximum_count=self.config.get_dotnest("dissect.packet_count", 0),
92
+ maximum_cores=self.config.get_dotnest("dissect.maximum_cores", 20),
85
93
  )
86
94
  results = ps.split()
87
95
 
@@ -95,12 +103,14 @@ class PCAPDissectMany:
95
103
  # recalculate metadata now that merges have happened
96
104
  dissection.calculate_metadata()
97
105
 
98
- if self.kwargs.get("cache_results"):
106
+ if self.config.get_dotnest("dissect.cache_pcap_results"):
99
107
  # create a dissector just to save the cache
100
108
  # (we don't call load())
101
109
  dissection.pcap_file = pcap_file
102
110
  dissection.save_to_cache(
103
- pcap_file + "." + self.kwargs.get("cache_file_suffix", "taffy")
111
+ pcap_file
112
+ + "."
113
+ + self.config.get_dotnest("dissect.cache_file_suffix", "taffy")
104
114
  )
105
115
 
106
116
  return dissection
@@ -113,7 +123,7 @@ class PCAPDissectMany:
113
123
  # handle each one individually -- typically for inserting debugging stops
114
124
  dissections = []
115
125
  for pcap_file in self.pcap_files:
116
- dissection = self.load_pcap(pcap_file)
126
+ dissection = self.load_pcap(pcap_file, dont_fork=dont_fork)
117
127
  dissections.append(dissection)
118
128
  return dissections
119
129
 
@@ -122,7 +132,7 @@ class PCAPDissectMany:
122
132
  dissections = executor.map(self.load_pcap, self.pcap_files)
123
133
 
124
134
  # all loaded files should be merged as if they are one
125
- if self.kwargs["merge_files"]:
135
+ if self.config.get_dotnest("dissect.merge", False):
126
136
  dissection = next(dissections)
127
137
  for to_be_merged in dissections:
128
138
  dissection.merge(to_be_merged)