traffic-taffy 0.8.5__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- traffic_taffy/__init__.py +1 -1
- traffic_taffy/algorithms/__init__.py +14 -7
- traffic_taffy/algorithms/comparecorrelation.py +164 -0
- traffic_taffy/algorithms/comparecorrelationchanges.py +210 -0
- traffic_taffy/algorithms/compareseries.py +117 -0
- traffic_taffy/algorithms/compareslices.py +116 -0
- traffic_taffy/algorithms/statistical.py +9 -9
- traffic_taffy/compare.py +149 -159
- traffic_taffy/comparison.py +18 -4
- traffic_taffy/config.py +133 -0
- traffic_taffy/dissection.py +78 -6
- traffic_taffy/dissectmany.py +26 -16
- traffic_taffy/dissector.py +189 -77
- traffic_taffy/dissector_engine/scapy.py +41 -8
- traffic_taffy/graph.py +54 -53
- traffic_taffy/graphdata.py +13 -2
- traffic_taffy/hooks/ip2asn.py +20 -7
- traffic_taffy/hooks/labels.py +45 -0
- traffic_taffy/hooks/psl.py +21 -3
- traffic_taffy/output/__init__.py +8 -48
- traffic_taffy/output/console.py +37 -25
- traffic_taffy/output/fsdb.py +24 -18
- traffic_taffy/reports/__init__.py +5 -0
- traffic_taffy/reports/compareslicesreport.py +85 -0
- traffic_taffy/reports/correlationchangereport.py +54 -0
- traffic_taffy/reports/correlationreport.py +42 -0
- traffic_taffy/taffy_config.py +44 -0
- traffic_taffy/tests/test_compare_results.py +22 -7
- traffic_taffy/tests/test_config.py +149 -0
- traffic_taffy/tests/test_global_config.py +33 -0
- traffic_taffy/tests/test_normalize.py +1 -0
- traffic_taffy/tests/test_pcap_dissector.py +12 -2
- traffic_taffy/tests/test_pcap_splitter.py +21 -10
- traffic_taffy/tools/cache_info.py +3 -2
- traffic_taffy/tools/compare.py +32 -24
- traffic_taffy/tools/config.py +83 -0
- traffic_taffy/tools/dissect.py +51 -59
- traffic_taffy/tools/explore.py +5 -4
- traffic_taffy/tools/export.py +28 -17
- traffic_taffy/tools/graph.py +25 -27
- {traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.1.dist-info}/METADATA +4 -1
- traffic_taffy-0.9.1.dist-info/RECORD +56 -0
- {traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.1.dist-info}/entry_points.txt +1 -0
- traffic_taffy/report.py +0 -12
- traffic_taffy-0.8.5.dist-info/RECORD +0 -43
- {traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.1.dist-info}/WHEEL +0 -0
- {traffic_taffy-0.8.5.dist-info → traffic_taffy-0.9.1.dist-info}/licenses/LICENSE.txt +0 -0
traffic_taffy/dissection.py
CHANGED
@@ -61,8 +61,8 @@ class Dissection:
|
|
61
61
|
dissector_level: PCAPDissectorLevel = PCAPDissectorLevel.DETAILED,
|
62
62
|
cache_file_suffix: str = "taffy",
|
63
63
|
ignore_list: list | None = None,
|
64
|
-
*
|
65
|
-
**
|
64
|
+
*_args: list,
|
65
|
+
**_kwargs: dict,
|
66
66
|
) -> Dissection:
|
67
67
|
"""Create a Dissection instance."""
|
68
68
|
self.pcap_file = pcap_file
|
@@ -370,6 +370,42 @@ class Dissection:
|
|
370
370
|
|
371
371
|
return contents
|
372
372
|
|
373
|
+
def filter(
|
374
|
+
self: Dissection,
|
375
|
+
timestamps: List[int] | None = None,
|
376
|
+
match_string: str | None = None,
|
377
|
+
match_value: str | None = None,
|
378
|
+
minimum_count: int | None = None,
|
379
|
+
make_printable: bool = False,
|
380
|
+
match_expression: str | None = None,
|
381
|
+
) -> None:
|
382
|
+
"""Creates a new dissection that has been filtered based on passed criteria."""
|
383
|
+
debug(
|
384
|
+
f"filtering dissection with: {timestamps=}, {match_string=} {match_value=}, {minimum_count=}, {make_printable=}"
|
385
|
+
)
|
386
|
+
new_dissection: Dissection = Dissection(
|
387
|
+
self.pcap_file,
|
388
|
+
self.pcap_filter,
|
389
|
+
self.maximum_count,
|
390
|
+
self.bin_size,
|
391
|
+
self.dissector_level,
|
392
|
+
self.cache_file_suffix,
|
393
|
+
self.ignore_list,
|
394
|
+
)
|
395
|
+
|
396
|
+
for timestamp, key, subkey, value in self.find_data(
|
397
|
+
timestamps=timestamps,
|
398
|
+
match_string=match_string,
|
399
|
+
match_value=match_value,
|
400
|
+
minimum_count=minimum_count,
|
401
|
+
make_printable=make_printable,
|
402
|
+
match_expression=match_expression,
|
403
|
+
):
|
404
|
+
new_dissection.data[timestamp][key][subkey] = value
|
405
|
+
|
406
|
+
debug(" done filtering")
|
407
|
+
return new_dissection
|
408
|
+
|
373
409
|
def find_data(
|
374
410
|
self: Dissection,
|
375
411
|
timestamps: List[int] | None = None,
|
@@ -377,6 +413,7 @@ class Dissection:
|
|
377
413
|
match_value: str | None = None,
|
378
414
|
minimum_count: int | None = None,
|
379
415
|
make_printable: bool = False,
|
416
|
+
match_expression: str | None = None,
|
380
417
|
) -> list:
|
381
418
|
"""Search through data for appropriate records."""
|
382
419
|
data = self.data
|
@@ -386,6 +423,9 @@ class Dissection:
|
|
386
423
|
if not timestamps:
|
387
424
|
timestamps = data.keys()
|
388
425
|
|
426
|
+
match_eval_compiled = None
|
427
|
+
if match_expression:
|
428
|
+
match_eval_compiled = compile(f"{match_expression}", "<string>", "eval")
|
389
429
|
# find timestamps/key values with at least one item above count
|
390
430
|
# TODO(hardaker): we should really use pandas for this
|
391
431
|
usable = defaultdict(set)
|
@@ -398,15 +438,15 @@ class Dissection:
|
|
398
438
|
# ensure at least one of the count valuse for the
|
399
439
|
# stream gets above minimum_count
|
400
440
|
for subkey, count in data[timestamp][key].items():
|
401
|
-
if (
|
402
|
-
|
403
|
-
or minimum_count
|
404
|
-
and abs(count) > minimum_count
|
441
|
+
if not minimum_count or (
|
442
|
+
minimum_count and abs(count) >= minimum_count
|
405
443
|
):
|
406
444
|
usable[key].add(subkey)
|
407
445
|
|
408
446
|
# TODO(hardaker): move the timestamp inside the other fors for faster
|
409
447
|
# processing of skipped key/subkeys
|
448
|
+
globals = {} # TODO(hardaker): maybe create some in the future
|
449
|
+
|
410
450
|
for timestamp in timestamps:
|
411
451
|
for key in sorted(data[timestamp]):
|
412
452
|
if key not in usable:
|
@@ -419,6 +459,7 @@ class Dissection:
|
|
419
459
|
if subkey not in usable[key]:
|
420
460
|
continue
|
421
461
|
|
462
|
+
subkey_original = subkey
|
422
463
|
if make_printable:
|
423
464
|
subkey = Dissection.make_printable(key, subkey)
|
424
465
|
count = Dissection.make_printable(None, count)
|
@@ -426,6 +467,23 @@ class Dissection:
|
|
426
467
|
if match_value and not any(x in subkey for x in match_value):
|
427
468
|
continue
|
428
469
|
|
470
|
+
if match_eval_compiled:
|
471
|
+
result = eval(
|
472
|
+
match_eval_compiled,
|
473
|
+
globals,
|
474
|
+
{
|
475
|
+
"timestamp": timestamp,
|
476
|
+
"key": key,
|
477
|
+
"subkey": subkey,
|
478
|
+
"value": data[timestamp][key][subkey_original],
|
479
|
+
},
|
480
|
+
)
|
481
|
+
|
482
|
+
# if the evaluation didn't return truthy,
|
483
|
+
# ignore this entry
|
484
|
+
if not result:
|
485
|
+
continue
|
486
|
+
|
429
487
|
yield (timestamp, key, subkey, count)
|
430
488
|
|
431
489
|
@staticmethod
|
@@ -497,6 +555,13 @@ class Dissection:
|
|
497
555
|
"Ethernet_IP_UDP_DNS_ns_type": "dns_rrtypes",
|
498
556
|
"Ethernet_IP_UDP_DNS_an_type": "dns_rrtypes",
|
499
557
|
"Ethernet_IP_UDP_DNS_opcode": "dns_opcodes",
|
558
|
+
"Ethernet_IP_TCP_DNS_qd_qclass": "dns_classes",
|
559
|
+
"Ethernet_IP_TCP_DNS_ns_rclass": "dns_classes",
|
560
|
+
"Ethernet_IP_TCP_DNS_an_rclass": "dns_classes",
|
561
|
+
"Ethernet_IP_TCP_DNS_qd_qtype": "dns_rrtypes",
|
562
|
+
"Ethernet_IP_TCP_DNS_ns_type": "dns_rrtypes",
|
563
|
+
"Ethernet_IP_TCP_DNS_an_type": "dns_rrtypes",
|
564
|
+
"Ethernet_IP_TCP_DNS_opcode": "dns_opcodes",
|
500
565
|
}
|
501
566
|
|
502
567
|
@staticmethod
|
@@ -539,6 +604,13 @@ class Dissection:
|
|
539
604
|
"Ethernet_IP_UDP_DNS_ns_type": print_iana_values,
|
540
605
|
"Ethernet_IP_UDP_DNS_an_type": print_iana_values,
|
541
606
|
"Ethernet_IP_UDP_DNS_opcode": print_iana_values,
|
607
|
+
"Ethernet_IP_TCP_DNS_qd_qclass": print_iana_values,
|
608
|
+
"Ethernet_IP_TCP_DNS_ns_rclass": print_iana_values,
|
609
|
+
"Ethernet_IP_TCP_DNS_an_rclass": print_iana_values,
|
610
|
+
"Ethernet_IP_TCP_DNS_qd_qtype": print_iana_values,
|
611
|
+
"Ethernet_IP_TCP_DNS_ns_type": print_iana_values,
|
612
|
+
"Ethernet_IP_TCP_DNS_an_type": print_iana_values,
|
613
|
+
"Ethernet_IP_TCP_DNS_opcode": print_iana_values,
|
542
614
|
}
|
543
615
|
|
544
616
|
# has to go at the end to pick up the above function names
|
traffic_taffy/dissectmany.py
CHANGED
@@ -9,23 +9,28 @@ from pcap_parallel import PCAPParallel
|
|
9
9
|
from typing import List, TYPE_CHECKING
|
10
10
|
|
11
11
|
from traffic_taffy.dissector import PCAPDissector
|
12
|
+
from traffic_taffy.taffy_config import TT_CFG
|
12
13
|
|
13
14
|
if TYPE_CHECKING:
|
14
15
|
from io import BufferedIOBase
|
15
16
|
from traffic_taffy.dissection import Dissection
|
17
|
+
from traffic_taffy.config import TaffyConfig
|
16
18
|
|
17
19
|
|
18
20
|
class PCAPDissectMany:
|
19
21
|
"""A class for dissecting a number of PCAP files."""
|
20
22
|
|
21
|
-
def __init__(
|
23
|
+
def __init__(
|
24
|
+
self, pcap_files: List[str], config: TaffyConfig, *args: list, **kwargs: dict
|
25
|
+
):
|
22
26
|
"""Create a PCAPDissectMany instance."""
|
23
27
|
self.pcap_files = pcap_files
|
28
|
+
self.config = config
|
24
29
|
self.args = args
|
25
30
|
self.kwargs = kwargs
|
26
31
|
self.futures = {}
|
27
32
|
|
28
|
-
self.maximum_cores = self.
|
33
|
+
self.maximum_cores = self.config.get_dotnest("dissect.maximum_cores")
|
29
34
|
if not self.maximum_cores:
|
30
35
|
# since we're loading multiple files in parallel, reduce the
|
31
36
|
# maximum number of cores available to the splitter
|
@@ -34,12 +39,13 @@ class PCAPDissectMany:
|
|
34
39
|
|
35
40
|
def load_pcap_piece(self, pcap_io_buffer: BufferedIOBase) -> Dissection:
|
36
41
|
"""Load one piece of a pcap from a buffer."""
|
37
|
-
|
42
|
+
config = copy.deepcopy(self.config)
|
38
43
|
# force false for actually loading
|
39
|
-
|
44
|
+
config[TT_CFG.CACHE_RESULTS] = False
|
40
45
|
|
41
46
|
pd = PCAPDissector(
|
42
47
|
pcap_io_buffer,
|
48
|
+
config,
|
43
49
|
*self.args,
|
44
50
|
**self.kwargs,
|
45
51
|
)
|
@@ -51,28 +57,30 @@ class PCAPDissectMany:
|
|
51
57
|
self,
|
52
58
|
pcap_file: str,
|
53
59
|
split_size: int | None = None,
|
60
|
+
dont_fork: bool = False,
|
54
61
|
) -> Dissection:
|
55
62
|
"""Load one pcap file."""
|
56
63
|
pd = PCAPDissector(
|
57
64
|
pcap_file,
|
58
|
-
|
59
|
-
**self.kwargs,
|
65
|
+
self.config,
|
60
66
|
)
|
61
67
|
dissection = pd.load_from_cache(
|
62
|
-
force_overwrite=self.
|
63
|
-
force_load=self.
|
68
|
+
force_overwrite=self.config.get_dotnest("dissect.force_overwrite", False),
|
69
|
+
force_load=self.config.get_dotnest("dissect.force_load", False),
|
64
70
|
)
|
65
71
|
if dissection:
|
66
72
|
return dissection
|
67
73
|
|
68
74
|
info(f"processing {pcap_file}")
|
69
|
-
if
|
70
|
-
pcap_file
|
75
|
+
if dont_fork or (
|
76
|
+
isinstance(pcap_file, str)
|
77
|
+
and (pcap_file.endswith(".dnstap") or pcap_file.endswith(".tap"))
|
71
78
|
):
|
72
79
|
# deal with dnstap files
|
73
80
|
|
74
81
|
# the Dissector already handles loading a dnstap engine
|
75
82
|
# TODO(hardaker): see if we can use a splitter here with the framing chunks
|
83
|
+
info("loading without forking -- may be slow")
|
76
84
|
dissection = pd.load()
|
77
85
|
|
78
86
|
else: # assume pcap
|
@@ -80,8 +88,8 @@ class PCAPDissectMany:
|
|
80
88
|
pcap_file,
|
81
89
|
split_size=split_size,
|
82
90
|
callback=self.load_pcap_piece,
|
83
|
-
maximum_count=self.
|
84
|
-
maximum_cores=self.maximum_cores,
|
91
|
+
maximum_count=self.config.get_dotnest("dissect.packet_count", 0),
|
92
|
+
maximum_cores=self.config.get_dotnest("dissect.maximum_cores", 20),
|
85
93
|
)
|
86
94
|
results = ps.split()
|
87
95
|
|
@@ -95,12 +103,14 @@ class PCAPDissectMany:
|
|
95
103
|
# recalculate metadata now that merges have happened
|
96
104
|
dissection.calculate_metadata()
|
97
105
|
|
98
|
-
if self.
|
106
|
+
if self.config.get_dotnest("dissect.cache_pcap_results"):
|
99
107
|
# create a dissector just to save the cache
|
100
108
|
# (we don't call load())
|
101
109
|
dissection.pcap_file = pcap_file
|
102
110
|
dissection.save_to_cache(
|
103
|
-
pcap_file
|
111
|
+
pcap_file
|
112
|
+
+ "."
|
113
|
+
+ self.config.get_dotnest("dissect.cache_file_suffix", "taffy")
|
104
114
|
)
|
105
115
|
|
106
116
|
return dissection
|
@@ -113,7 +123,7 @@ class PCAPDissectMany:
|
|
113
123
|
# handle each one individually -- typically for inserting debugging stops
|
114
124
|
dissections = []
|
115
125
|
for pcap_file in self.pcap_files:
|
116
|
-
dissection = self.load_pcap(pcap_file)
|
126
|
+
dissection = self.load_pcap(pcap_file, dont_fork=dont_fork)
|
117
127
|
dissections.append(dissection)
|
118
128
|
return dissections
|
119
129
|
|
@@ -122,7 +132,7 @@ class PCAPDissectMany:
|
|
122
132
|
dissections = executor.map(self.load_pcap, self.pcap_files)
|
123
133
|
|
124
134
|
# all loaded files should be merged as if they are one
|
125
|
-
if self.
|
135
|
+
if self.config.get_dotnest("dissect.merge", False):
|
126
136
|
dissection = next(dissections)
|
127
137
|
for to_be_merged in dissections:
|
128
138
|
dissection.merge(to_be_merged)
|