traffic-taffy 0.2__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,512 @@
1
+ """Loads a PCAP file and counts contents with various levels of storage"""
2
+
3
+ import os
4
+ import pickle
5
+ from enum import Enum
6
+ from logging import warning, info, error, debug
7
+ from collections import Counter, defaultdict
8
+ from scapy.all import sniff
9
+ from typing import Any, List
10
+ import dpkt
11
+ from rich import print
12
+
13
+
14
+ class PCAPDissectorType(Enum):
15
+ COUNT_ONLY = 1
16
+ THROUGH_IP = 2
17
+ DETAILED = 10
18
+
19
+
20
+ class PCAPDissector:
21
+ "loads a pcap file and counts the contents in both time and depth"
22
+ TOTAL_COUNT: str = "__TOTAL__"
23
+ TOTAL_SUBKEY: str = "packet"
24
+ DISECTION_VERSION: int = 3
25
+
26
+ def __init__(
27
+ self,
28
+ pcap_file: str,
29
+ bin_size: int = 0,
30
+ maximum_count: int = 0,
31
+ dissector_level: PCAPDissectorType = PCAPDissectorType.DETAILED,
32
+ pcap_filter: str | None = None,
33
+ cache_results: bool = False,
34
+ ):
35
+ self.pcap_file = pcap_file
36
+ self.bin_size = bin_size
37
+ self.dissector_level = dissector_level
38
+ self.pcap_filter = pcap_filter
39
+ self.maximum_count = maximum_count
40
+ self.cache_results = cache_results
41
+
42
+ self.parameters = [
43
+ "pcap_file",
44
+ "bin_size",
45
+ "dissector_level",
46
+ "pcap_filter",
47
+ "maximum_count",
48
+ ]
49
+
50
+ # TODO: convert to a factory
51
+ self.data = {0: defaultdict(Counter)}
52
+
53
+ if dissector_level == PCAPDissectorType.COUNT_ONLY and bin_size == 0:
54
+ warning("counting packets only with no binning is unlikely to be helpful")
55
+
56
+ @property
57
+ def data(self):
58
+ return self._data
59
+
60
+ @data.setter
61
+ def data(self, value):
62
+ self._data = value
63
+
64
+ @staticmethod
65
+ def find_data(
66
+ data,
67
+ timestamps: List[int] | None = None,
68
+ match_string: str | None = None,
69
+ match_value: str | None = None,
70
+ minimum_count: int | None = None,
71
+ make_printable: bool = False,
72
+ ):
73
+ if not timestamps:
74
+ timestamps = data.keys()
75
+ for timestamp in timestamps:
76
+ for key in sorted(data[timestamp]):
77
+ if match_string and match_string not in key:
78
+ continue
79
+
80
+ for subkey, count in sorted(
81
+ data[timestamp][key].items(), key=lambda x: x[1], reverse=True
82
+ ):
83
+
84
+ if minimum_count and abs(count) < minimum_count:
85
+ continue
86
+
87
+ if make_printable:
88
+ subkey = PCAPDissector.make_printable(subkey)
89
+ count = PCAPDissector.make_printable(count)
90
+
91
+ if match_value and match_value not in subkey:
92
+ continue
93
+
94
+ yield (timestamp, key, subkey, count)
95
+
96
+ def incr(self, key: str, value: Any, count: int = 1):
97
+ # always save a total count at the zero bin
98
+ # note: there should be no recorded tcpdump files from 1970 Jan 01 :-)
99
+ self.data[0][key][value] += count
100
+ if self.timestamp:
101
+ if self.timestamp not in self.data:
102
+ self.data[self.timestamp] = defaultdict(Counter)
103
+ self.data[self.timestamp][key][value] += count
104
+
105
+ def load(self) -> dict:
106
+ cached_file = self.pcap_file + ".pkl"
107
+ if self.cache_results and os.path.exists(cached_file):
108
+ cached_contents = self.load_saved(cached_file, dont_overwrite=True)
109
+
110
+ ok_to_load = True
111
+
112
+ if cached_contents["PCAP_DISECTION_VERSION"] != self.DISECTION_VERSION:
113
+ ok_to_load = False
114
+
115
+ # a zero really is a 1 since bin(0) still does int(timestamp)
116
+ if cached_contents["parameters"]["bin_size"] == 0:
117
+ cached_contents["parameters"]["bin_size"] = 1
118
+
119
+ for parameter in self.parameters:
120
+ if (
121
+ getattr(self, parameter)
122
+ and getattr(self, parameter)
123
+ != cached_contents["parameters"][parameter]
124
+ ):
125
+ debug(
126
+ f"parameter {parameter} doesn't match: {getattr(self, parameter)} != {cached_contents['parameters'][parameter]}"
127
+ )
128
+ ok_to_load = False
129
+
130
+ if ok_to_load:
131
+ info(f"loading cached pcap contents from {cached_file}")
132
+ self.load_saved_contents(cached_contents)
133
+ return self.data
134
+
135
+ error(f"Failed to load cached data for {self.pcap_file} due to differences")
136
+ error("refusing to continue -- remove the cache to recreate it")
137
+ exit(1)
138
+
139
+ if (
140
+ self.dissector_level == PCAPDissectorType.DETAILED
141
+ or self.dissector_level == PCAPDissectorType.DETAILED.value
142
+ ):
143
+ return self.load_via_scapy()
144
+ else:
145
+ return self.load_via_dpkt()
146
+
147
+ def dpkt_callback(self, timestamp: float, packet: bytes):
148
+ # if binning is requested, save it in a binned time slot
149
+ self.timestamp = int(timestamp)
150
+ if self.bin_size:
151
+ self.timestamp = self.timestamp - self.timestamp % self.bin_size
152
+ self.incr(self.TOTAL_COUNT, self.TOTAL_SUBKEY)
153
+
154
+ if self.dissector_level == PCAPDissectorType.THROUGH_IP.value:
155
+ eth = dpkt.ethernet.Ethernet(packet)
156
+ # these names are designed to match scapy names
157
+ self.incr("Ethernet.dst", eth.dst)
158
+ self.incr("Ethernet.src", eth.src)
159
+ self.incr("Ethernet.type", eth.type)
160
+
161
+ if isinstance(eth.data, dpkt.ip.IP):
162
+ ip = eth.data
163
+
164
+ IPVER = "IP"
165
+ if ip.v == 6:
166
+ IPVER = "IPv6"
167
+
168
+ # TODO: make sure all these match scapy
169
+ self.incr(f"Ethernet.{IPVER}.dst", ip.dst)
170
+ self.incr(f"Ethernet.{IPVER}.src", ip.src)
171
+ self.incr(f"Ethernet.{IPVER}.df", ip.df)
172
+ self.incr(f"Ethernet.{IPVER}.offset", ip.offset)
173
+ self.incr(f"Ethernet.{IPVER}.tos", ip.tos)
174
+ self.incr(f"Ethernet.{IPVER}.len", ip.len)
175
+ self.incr(f"Ethernet.{IPVER}.id", ip.id)
176
+ self.incr(f"Ethernet.{IPVER}.hl", ip.hl)
177
+ self.incr(f"Ethernet.{IPVER}.rf", ip.rf)
178
+ self.incr(f"Ethernet.{IPVER}.p", ip.p)
179
+ self.incr(f"Ethernet.{IPVER}.chksum", ip.sum)
180
+ self.incr(f"Ethernet.{IPVER}.tos", ip.tos)
181
+ self.incr(f"Ethernet.{IPVER}.version", ip.v)
182
+ self.incr(f"Ethernet.{IPVER}.ttl", ip.ttl)
183
+
184
+ if isinstance(ip.data, dpkt.udp.UDP):
185
+ udp = ip.data
186
+ self.incr(f"Ethernet.{IPVER}.UDP.sport", udp.sport)
187
+ self.incr(f"Ethernet.{IPVER}.UDP.dport", udp.dport)
188
+ self.incr(f"Ethernet.{IPVER}.UDP.len", udp.ulen)
189
+ self.incr(f"Ethernet.{IPVER}.UDP.chksum", udp.sum)
190
+
191
+ # TODO: handle DNS and others for level 3
192
+
193
+ elif isinstance(ip.data, dpkt.tcp.TCP):
194
+ # TODO
195
+ tcp = ip.data
196
+ self.incr(f"Ethernet.{IPVER}.TCP.sport", tcp.sport)
197
+ self.incr(f"Ethernet.{IPVER}.TCP.dport", tcp.dport)
198
+ self.incr(f"Ethernet.{IPVER}.TCP.seq", tcp.seq)
199
+ self.incr(f"Ethernet.{IPVER}.TCP.flags", tcp.flags)
200
+ # self.incr(f"Ethernet.{IPVER}.TCP.reserved", tcp.reserved)
201
+ self.incr(f"Ethernet.{IPVER}.TCP.window", tcp.win)
202
+ self.incr(f"Ethernet.{IPVER}.TCP.chksum", tcp.sum)
203
+ self.incr(f"Ethernet.{IPVER}.TCP.options", tcp.opts)
204
+
205
+ # TODO: handle DNS and others for level 3
206
+
207
+ def load_via_dpkt(self) -> dict:
208
+ self.data = {0: defaultdict(Counter)}
209
+ pcap = dpkt.pcap.Reader(open(self.pcap_file, "rb"))
210
+ if self.pcap_filter:
211
+ pcap.setfilter(self.pcap_filter)
212
+ pcap.dispatch(self.maximum_count, self.dpkt_callback)
213
+
214
+ if self.cache_results:
215
+ self.save(self.pcap_file + ".pkl")
216
+ return self.data
217
+
218
+ def add_scapy_item(self, field_value, prefix: str) -> None:
219
+ "Adds an item to the self.data regardless of it's various types"
220
+ if isinstance(field_value, list):
221
+ if len(field_value) > 0:
222
+ # if it's a list of tuples, count the (eg TCP option) names
223
+ # TODO: values can be always the same or things like timestamps
224
+ # that will always change or are too unique
225
+ if isinstance(field_value[0], tuple):
226
+ for item in field_value:
227
+ self.incr(prefix, item[0])
228
+ else:
229
+ for item in field_value:
230
+ self.add_scapy_item(item, prefix)
231
+ # else:
232
+ # debug(f"ignoring empty-list: {field_value}")
233
+ elif (
234
+ isinstance(field_value, str)
235
+ or isinstance(field_value, int)
236
+ or isinstance(field_value, float)
237
+ ):
238
+ self.incr(prefix, field_value)
239
+
240
+ elif isinstance(field_value, bytes):
241
+ try:
242
+ converted = field_value.decode("utf-8")
243
+ self.incr(prefix, converted)
244
+ except Exception:
245
+ converted = "0x" + field_value.hex()
246
+ self.incr(prefix, converted)
247
+
248
+ def add_scapy_layer(self, layer, prefix: str | None = "") -> None:
249
+ "Analyzes a layer to add counts to each layer sub-component"
250
+
251
+ if hasattr(layer, "fields_desc"):
252
+ name_list = [field.name for field in layer.fields_desc]
253
+ elif hasattr(layer, "fields"):
254
+ name_list = [field.name for field in layer.fields]
255
+ else:
256
+ warning(f"unavailable to deep dive into: {layer}")
257
+ return
258
+
259
+ for field_name in name_list:
260
+ field_value = getattr(layer, field_name)
261
+ if hasattr(field_value, "fields"):
262
+ self.add_scapy_layer(field_value, prefix + field_name + ".")
263
+ else:
264
+ self.add_scapy_item(field_value, prefix + field_name)
265
+
266
+ def scapy_callback(self, packet):
267
+ prefix = "."
268
+ self.timestamp = int(packet.time)
269
+ if self.bin_size:
270
+ self.timestamp = self.timestamp - self.timestamp % self.bin_size
271
+
272
+ self.incr(self.TOTAL_COUNT, self.TOTAL_SUBKEY)
273
+ for payload in packet.iterpayloads():
274
+ prefix = f"{prefix}{payload.name}."
275
+ self.add_scapy_layer(payload, prefix[1:])
276
+
277
+ def load_via_scapy(self) -> dict:
278
+ "Loads a pcap file into a nested dictionary of statistical counts"
279
+ sniff(
280
+ offline=self.pcap_file,
281
+ prn=self.scapy_callback,
282
+ store=0,
283
+ count=self.maximum_count,
284
+ filter=self.pcap_filter,
285
+ )
286
+ if self.cache_results:
287
+ self.save(self.pcap_file + ".pkl")
288
+ return self.data
289
+
290
+ def save(self, where: str) -> None:
291
+ "Saves a generated dissection to a pickle file"
292
+
293
+ # wrap the report in a version header
294
+ versioned_cache = {
295
+ "PCAP_DISECTION_VERSION": self.DISECTION_VERSION,
296
+ "file": self.pcap_file,
297
+ "parameters": {},
298
+ "dissection": self.data,
299
+ }
300
+
301
+ for parameter in self.parameters:
302
+
303
+ versioned_cache["parameters"][parameter] = getattr(self, parameter)
304
+ # TODO: fix this hack
305
+
306
+ # basically, bin_size of 0 is 1... but it may be faster
307
+ # to leave it at zero to avoid the bin_size math of 1,
308
+ # which is actually a math noop that will still consume
309
+ # cycles. We save it as 1 though since the math is past
310
+ # us and a 1 value is more informative to the user.
311
+ if parameter == "bin_size" and self.bin_size == 0:
312
+ versioned_cache["parameters"][parameter] = 1
313
+
314
+ # save it
315
+ info(f"caching PCAP data to '{where}'")
316
+ pickle.dump(versioned_cache, open(where, "wb"))
317
+
318
+ def load_saved_contents(self, versioned_cache):
319
+ # set the local parameters from the cache
320
+ for parameter in self.parameters:
321
+ setattr(self, parameter, versioned_cache["parameters"][parameter])
322
+
323
+ # load the data
324
+ self.data = versioned_cache["dissection"]
325
+
326
+ def load_saved(self, where: str, dont_overwrite: bool = False) -> dict:
327
+ "Loads a previous saved report from a file instead of re-parsing pcaps"
328
+ contents = pickle.load(open(where, "rb"))
329
+
330
+ # check that the version header matches something we understand
331
+ if contents["PCAP_DISECTION_VERSION"] != self.DISECTION_VERSION:
332
+ raise ValueError(
333
+ "improper saved dissection version: report version = "
334
+ + str(contents["PCAP_COMPARE_VERSION"])
335
+ + ", our version: "
336
+ + str(self.DISECTION_VERSION)
337
+ )
338
+
339
+ if not dont_overwrite:
340
+ self.load_saved_contents(contents)
341
+
342
+ return contents
343
+
344
+ @staticmethod
345
+ def make_printable(value: Any) -> str:
346
+ try:
347
+ value = str(value)
348
+ except Exception:
349
+ if isinstance(value, bytes):
350
+ value = "0x" + value.hex()
351
+ else:
352
+ value = ["unprintable"]
353
+ return value
354
+
355
+ def print(
356
+ self,
357
+ timestamps: List[int] | None = [0],
358
+ match_string: str | None = None,
359
+ match_value: str | None = None,
360
+ minimum_count: int | None = None,
361
+ ) -> None:
362
+ for (timestamp, key, subkey, value) in self.find_data(
363
+ self._data,
364
+ timestamps=timestamps,
365
+ match_string=match_string,
366
+ match_value=match_value,
367
+ minimum_count=minimum_count,
368
+ make_printable=True,
369
+ ):
370
+ print(f"{key:<30} {subkey:<30} {value}")
371
+
372
+
373
+ def dissector_add_parseargs(parser, add_subgroup: bool = True):
374
+ if add_subgroup:
375
+ parser = parser.add_argument_group("Parsing Options")
376
+
377
+ parser.add_argument(
378
+ "-d",
379
+ "--dissection-level",
380
+ default=PCAPDissectorType.THROUGH_IP.value,
381
+ type=int,
382
+ help="Dump to various levels of detail (1-10, with 10 is the most detailed and slowest)",
383
+ )
384
+
385
+ parser.add_argument(
386
+ "-n",
387
+ "--packet-count",
388
+ default=-1,
389
+ type=int,
390
+ help="Maximum number of packets to analyze",
391
+ )
392
+
393
+ parser.add_argument(
394
+ "-s",
395
+ "--save-report",
396
+ default=None,
397
+ type=str,
398
+ help="Where to save a report file for quicker future loading",
399
+ )
400
+
401
+ parser.add_argument(
402
+ "-l",
403
+ "--load-report",
404
+ default=None,
405
+ type=str,
406
+ help="Load a report from a pickle file rather than use pcaps",
407
+ )
408
+
409
+ parser.add_argument(
410
+ "-C",
411
+ "--cache-pcap-results",
412
+ action="store_true",
413
+ help="Cache and use PCAP results into/from a .pkl file",
414
+ )
415
+
416
+ return parser
417
+
418
+
419
+ def limitor_add_parseargs(parser, add_subgroup: bool = True):
420
+ if add_subgroup:
421
+ parser = parser.add_argument_group("Limiting options")
422
+
423
+ parser.add_argument(
424
+ "-m",
425
+ "--match-string",
426
+ default=None,
427
+ type=str,
428
+ help="Only report on data with this substring in the header",
429
+ )
430
+
431
+ parser.add_argument(
432
+ "-M",
433
+ "--match-value",
434
+ default=None,
435
+ type=str,
436
+ help="Only report on data with this substring in the packet value field",
437
+ )
438
+
439
+ parser.add_argument(
440
+ "-c",
441
+ "--minimum-count",
442
+ default=None,
443
+ type=float,
444
+ help="Don't include results without this high of a record count",
445
+ )
446
+
447
+ return parser
448
+
449
+
450
+ def check_dissector_level(level: int):
451
+ current_dissection_levels = [
452
+ PCAPDissectorType.COUNT_ONLY.value,
453
+ PCAPDissectorType.THROUGH_IP.value,
454
+ PCAPDissectorType.DETAILED.value,
455
+ ]
456
+ if level not in current_dissection_levels:
457
+ error(f"currently supported dissection levels: {current_dissection_levels}")
458
+ exit(1)
459
+ return True
460
+
461
+
462
+ def main():
463
+ from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
464
+ import logging
465
+
466
+ def parse_args():
467
+ "Parse the command line arguments."
468
+ parser = ArgumentParser(
469
+ formatter_class=ArgumentDefaultsHelpFormatter,
470
+ description=__doc__,
471
+ epilog="Exmaple Usage: ",
472
+ )
473
+
474
+ parser.add_argument(
475
+ "--log-level",
476
+ "--ll",
477
+ default="info",
478
+ help="Define the logging verbosity level (debug, info, warning, error, fotal, critical).",
479
+ )
480
+
481
+ dissector_add_parseargs(parser)
482
+ limitor_add_parseargs(parser)
483
+
484
+ parser.add_argument("input_file", type=str, help="input pcap file")
485
+
486
+ args = parser.parse_args()
487
+ log_level = args.log_level.upper()
488
+ logging.basicConfig(level=log_level, format="%(levelname)-10s:\t%(message)s")
489
+ return args
490
+
491
+ args = parse_args()
492
+
493
+ check_dissector_level(args.dissection_level)
494
+
495
+ pd = PCAPDissector(
496
+ args.input_file,
497
+ bin_size=0,
498
+ dissector_level=args.dissection_level,
499
+ maximum_count=args.packet_count,
500
+ cache_results=args.cache_pcap_results,
501
+ )
502
+ pd.load()
503
+ pd.print(
504
+ timestamps=[0],
505
+ match_string=args.match_string,
506
+ match_value=args.match_value,
507
+ minimum_count=args.minimum_count,
508
+ )
509
+
510
+
511
+ if __name__ == "__main__":
512
+ main()
@@ -0,0 +1,21 @@
1
+ from collections import defaultdict, Counter
2
+
3
+
4
+ class DefaultCounter(Counter):
5
+ def __init__(self, *args, default_key: str = "total", **kargs):
6
+ super().__init__(*args, **kargs)
7
+ self.default_key = default_key
8
+
9
+ def __iadd__(self, value):
10
+ self[self.default_key] += value
11
+ return self
12
+
13
+ def __eq__(self, value):
14
+ return self[self.default_key] == value
15
+
16
+
17
+ class DissectorResults(defaultdict):
18
+ def __init__(self, has_delta: bool = False, default_key: str = "total"):
19
+ super().__init__(lambda: defaultdict(DefaultCounter))
20
+ self.has_delta = has_delta
21
+ self.default_key = default_key