traffic-taffy 0.2__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pcap_compare/graph.py ADDED
@@ -0,0 +1,210 @@
1
+ """Read a PCAP file and graph it or parts of it"""
2
+
3
+ import os
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+ import pandas
7
+ from pandas import DataFrame, to_datetime
8
+ from pcap_compare.dissector import (
9
+ PCAPDissectorType,
10
+ dissector_add_parseargs,
11
+ limitor_add_parseargs,
12
+ check_dissector_level,
13
+ )
14
+ from pcap_compare.dissectmany import PCAPDissectMany, PCAPDissector
15
+
16
+ from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
17
+ from logging import debug, info
18
+ import logging
19
+
20
+
21
+ def parse_args():
22
+ "Parse the command line arguments."
23
+ parser = ArgumentParser(
24
+ formatter_class=ArgumentDefaultsHelpFormatter,
25
+ description=__doc__,
26
+ epilog="Exmaple Usage: ",
27
+ )
28
+
29
+ parser.add_argument(
30
+ "-g",
31
+ "--graph-elements",
32
+ default=None,
33
+ type=str,
34
+ help="Graph these particular elements; the default is packet counts",
35
+ )
36
+
37
+ parser.add_argument(
38
+ "-o",
39
+ "--output-file",
40
+ default=None,
41
+ type=str,
42
+ help="Where to save the output (png)",
43
+ )
44
+
45
+ parser.add_argument(
46
+ "--log-level",
47
+ "--ll",
48
+ default="info",
49
+ help="Define verbosity level (debug, info, warning, error, fotal, critical).",
50
+ )
51
+
52
+ parser.add_argument(
53
+ "-b",
54
+ "--bin-size",
55
+ type=int,
56
+ default=1,
57
+ help="Bin results into this many seconds",
58
+ )
59
+
60
+ dissector_add_parseargs(parser)
61
+ limitor_add_parseargs(parser)
62
+
63
+ parser.add_argument("input_file", type=str, help="PCAP file to graph", nargs="+")
64
+
65
+ args = parser.parse_args()
66
+ log_level = args.log_level.upper()
67
+ logging.basicConfig(level=log_level, format="%(levelname)-10s:\t%(message)s")
68
+ logging.getLogger("matplotlib.font_manager").setLevel(logging.ERROR)
69
+ return args
70
+
71
+
72
+ class PcapGraph:
73
+ def __init__(
74
+ self,
75
+ pcap_files: str,
76
+ output_file: str,
77
+ maximum_count: int = None,
78
+ minimum_count: int = None,
79
+ bin_size: int = None,
80
+ match_key: str = None,
81
+ match_value: str = None,
82
+ cache_pcap_results: bool = False,
83
+ dissector_level: PCAPDissectorType = PCAPDissectorType.COUNT_ONLY,
84
+ ):
85
+ self.pcap_files = pcap_files
86
+ self.output_file = output_file
87
+ self.maximum_count = maximum_count
88
+ self.minimum_count = minimum_count
89
+ self.bin_size = bin_size
90
+ self.subsections = None
91
+ self.pkt_filter = None
92
+ self.match_key = match_key
93
+ self.match_value = match_value
94
+ self.cache_pcap_results = cache_pcap_results
95
+ self.dissector_level = dissector_level
96
+
97
+ def load_pcaps(self):
98
+ "loads the pcap and counts things into bins"
99
+ self.data = {}
100
+
101
+ info("reading pcap files")
102
+ pdm = PCAPDissectMany(
103
+ self.pcap_files,
104
+ bin_size=self.bin_size,
105
+ maximum_count=self.maximum_count,
106
+ dissector_level=self.dissector_level,
107
+ pcap_filter=self.pkt_filter,
108
+ cache_results=self.cache_pcap_results,
109
+ )
110
+ results = pdm.load_all()
111
+
112
+ for result in results:
113
+ self.data[result["file"]] = result["data"]
114
+ info("done reading pcap files")
115
+
116
+ def normalize_bins(self, counters):
117
+ results = {}
118
+ time_keys = list(counters.keys())
119
+ if time_keys[0] == 0: # likely always
120
+ time_keys.pop(0)
121
+ time_keys[0]
122
+ time_keys[-1]
123
+
124
+ results = {"time": [], "count": [], "index": []}
125
+
126
+ # TODO: this could likely be made much more efficient and needs hole-filling
127
+ info(f"match value: {self.match_value}")
128
+ for (timestamp, key, subkey, value) in PCAPDissector.find_data(
129
+ counters,
130
+ timestamps=time_keys,
131
+ match_string=self.match_key,
132
+ match_value=self.match_value,
133
+ minimum_count=self.minimum_count,
134
+ make_printable=True,
135
+ ):
136
+ index = key + "=" + subkey
137
+ results["count"].append(int(value))
138
+ results["index"].append(index)
139
+ results["time"].append(timestamp)
140
+
141
+ return results
142
+
143
+ def merge_datasets(self):
144
+ datasets = []
145
+ for dataset in self.data:
146
+ data = self.normalize_bins(self.data[dataset])
147
+ data = DataFrame.from_records(data)
148
+ data["filename"] = os.path.basename(dataset)
149
+ data["time"] = to_datetime(data["time"], unit="s")
150
+ datasets.append(data)
151
+ datasets = pandas.concat(datasets)
152
+ return datasets
153
+
154
+ def create_graph(self):
155
+ "Graph the results of the data collection"
156
+ debug("creating the graph")
157
+ sns.set_theme()
158
+
159
+ df = self.merge_datasets()
160
+ debug(df)
161
+
162
+ hue_variable = "index"
163
+ if df[hue_variable].nunique() == 1:
164
+ hue_variable = None
165
+
166
+ ax = sns.relplot(
167
+ data=df,
168
+ kind="line",
169
+ x="time",
170
+ y="count",
171
+ hue=hue_variable,
172
+ aspect=1.77,
173
+ )
174
+ ax.set(xlabel="time", ylabel="count")
175
+ plt.xticks(rotation=45)
176
+
177
+ info(f"saving graph to {self.output_file}")
178
+ if self.output_file:
179
+ plt.savefig(self.output_file)
180
+ else:
181
+ plt.show()
182
+
183
+ def graph_it(self):
184
+ debug("--- loading pcaps")
185
+ self.load_pcaps()
186
+ debug("--- creating graph")
187
+ self.create_graph()
188
+
189
+
190
+ def main():
191
+ args = parse_args()
192
+
193
+ check_dissector_level(args.dissection_level)
194
+
195
+ pc = PcapGraph(
196
+ args.input_file,
197
+ args.output_file,
198
+ maximum_count=args.packet_count,
199
+ minimum_count=args.minimum_count,
200
+ bin_size=args.bin_size,
201
+ match_key=args.match_string,
202
+ match_value=args.match_value,
203
+ cache_pcap_results=args.cache_pcap_results,
204
+ dissector_level=args.dissection_level,
205
+ )
206
+ pc.graph_it()
207
+
208
+
209
+ if __name__ == "__main__":
210
+ main()
@@ -47,6 +47,12 @@ def main():
47
47
  for key in contents["parameters"]:
48
48
  print(f" {key:<16} {contents['parameters'][key]}")
49
49
 
50
+ print("data info:")
51
+ timestamps = list(contents["dissection"].keys())
52
+ print(f" timestamps: {len(timestamps)}")
53
+ print(f" first: {timestamps[1]}") # skips 0 = global
54
+ print(f" last: {timestamps[-1]}")
55
+
50
56
 
51
57
  if __name__ == "__main__":
52
58
  main()
traffic_taffy/compare.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Takes a set of pcap files to compare and creates a report"""
2
2
 
3
3
  import logging
4
- from logging import info
4
+ from logging import info, debug
5
5
  from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
6
6
  from typing import List
7
7
  from rich.console import Console
@@ -33,6 +33,7 @@ class PcapCompare:
33
33
  only_negative: bool = False,
34
34
  cache_results: bool = False,
35
35
  dissection_level: PCAPDissectorType = PCAPDissectorType.COUNT_ONLY,
36
+ between_times: List[int] | None = None,
36
37
  ) -> None:
37
38
  self.pcaps = pcaps
38
39
  self.deep = deep
@@ -45,40 +46,45 @@ class PcapCompare:
45
46
  self.only_negative = only_negative
46
47
  self.cache_results = cache_results
47
48
  self.dissection_level = dissection_level
49
+ self.between_times = between_times
48
50
 
49
- def compare_results(self, report1: dict, report2: dict) -> dict:
51
+ @property
52
+ def reports(self):
53
+ return self._reports
54
+
55
+ @reports.setter
56
+ def reports(self, newvalue):
57
+ self._reports = newvalue
58
+
59
+ def compare_dissections(self, dissection1: dict, dissection2: dict) -> dict:
50
60
  "compares the results from two reports"
51
61
 
52
62
  # TODO: handle recursive depths, where items are subtrees rather than Counters
53
63
 
54
64
  report = {}
55
65
 
56
- # TODO: we're only (currently) doing full pcap compares
57
- report1 = report1[0]
58
- report2 = report2[0]
59
-
60
- # TODO: missing key in report2 (major items added)
61
- for key in report1:
66
+ # TODO: missing key in dissection2 (major items added)
67
+ for key in dissection1:
62
68
  # TODO: deal with missing keys from one set
63
- report1_total = report1[key].total()
64
- report2_total = report2[key].total()
69
+ dissection1_total = dissection1[key].total()
70
+ dissection2_total = dissection2[key].total()
65
71
  report[key] = {}
66
72
 
67
- for subkey in report1[key].keys():
73
+ for subkey in dissection1[key].keys():
68
74
  delta = 0.0
69
75
  total = 0
70
- if subkey in report1[key] and subkey in report2[key]:
76
+ if subkey in dissection1[key] and subkey in dissection2[key]:
71
77
  delta = (
72
- report2[key][subkey] / report2_total
73
- - report1[key][subkey] / report1_total
78
+ dissection2[key][subkey] / dissection2_total
79
+ - dissection1[key][subkey] / dissection1_total
74
80
  )
75
- total = report2[key][subkey] + report1[key][subkey]
76
- ref_count = report1[key][subkey]
77
- comp_count = report2[key][subkey]
81
+ total = dissection2[key][subkey] + dissection1[key][subkey]
82
+ ref_count = dissection1[key][subkey]
83
+ comp_count = dissection2[key][subkey]
78
84
  else:
79
85
  delta = -1.0
80
- total = report1[key][subkey]
81
- ref_count = report1[key][subkey]
86
+ total = dissection1[key][subkey]
87
+ ref_count = dissection1[key][subkey]
82
88
  comp_count = 0
83
89
 
84
90
  report[key][subkey] = {
@@ -88,12 +94,12 @@ class PcapCompare:
88
94
  "comp_count": comp_count,
89
95
  }
90
96
 
91
- for subkey in report2[key].keys():
97
+ for subkey in dissection2[key].keys():
92
98
  if subkey not in report[key]:
93
99
  delta = 1.0
94
- total = report2[key][subkey]
100
+ total = dissection2[key][subkey]
95
101
  ref_count = 0
96
- comp_count = report2[key][subkey]
102
+ comp_count = dissection2[key][subkey]
97
103
 
98
104
  report[key][subkey] = {
99
105
  "delta": delta,
@@ -136,6 +142,7 @@ class PcapCompare:
136
142
 
137
143
  def print_report(self, report: dict) -> None:
138
144
  "prints a report to the console"
145
+
139
146
  console = Console()
140
147
  for key in sorted(report):
141
148
  reported: bool = False
@@ -180,16 +187,11 @@ class PcapCompare:
180
187
  def print(self) -> None:
181
188
  "outputs the results"
182
189
  for n, report in enumerate(self.reports):
183
- print(f"************ report #{n}")
184
- self.print_report(report)
185
-
186
- def compare(self) -> None:
187
- "Compares each pcap against the original source"
188
-
189
- reports = []
190
-
191
- # TODO: use parallel processes to load multiple at a time
190
+ title = report.get("title", f"report #{n}")
191
+ print(f"************ {title}")
192
+ self.print_report(report["report"])
192
193
 
194
+ def load_pcaps(self) -> None:
193
195
  # load the first as a reference pcap
194
196
  info(f"reading pcap files using level={self.dissection_level}")
195
197
  pdm = PCAPDissectMany(
@@ -201,11 +203,71 @@ class PcapCompare:
201
203
  dissector_level=self.dissection_level,
202
204
  )
203
205
  results = pdm.load_all()
206
+ return results
207
+
208
+ def compare(self) -> None:
209
+ "Compares each pcap against the original source"
210
+
211
+ results = self.load_pcaps()
212
+ self.compare_all(results)
204
213
 
205
- reference = next(results)
206
- for other in results:
207
- # compare the two
208
- reports.append(self.compare_results(reference["data"], other["data"]))
214
+ def compare_all(self, results):
215
+ reports = []
216
+ if len(self.pcaps) > 1:
217
+ # multiple file comparison
218
+ reference = next(results)
219
+ for other in results:
220
+ # compare the two global summaries
221
+ reports.append(
222
+ {
223
+ "report": self.compare_dissections(
224
+ reference["data"][0], other["data"][0]
225
+ ),
226
+ "title": f"{reference['file']} vs {other['file']}",
227
+ }
228
+ )
229
+
230
+ else:
231
+ # deal with timestamps within a single file
232
+ results = list(results)
233
+ reference = results[0]
234
+ timestamps = list(reference["data"].keys())
235
+ debug(
236
+ f"found {len(timestamps)} timestamps from {timestamps[2]} to {timestamps[-1]}"
237
+ )
238
+ for timestamp in range(
239
+ 2, len(timestamps)
240
+ ): # second real non-zero timestamp to last
241
+ time_left = timestamps[timestamp - 1]
242
+ time_right = timestamps[timestamp]
243
+
244
+ # see if we were asked to only use particular time ranges
245
+ if self.between_times and (
246
+ time_left < self.between_times[0]
247
+ or time_right > self.between_times[1]
248
+ ):
249
+ # debug(f"skipping timestamps {time_left} and {time_right}")
250
+ continue
251
+
252
+ debug(f"comparing timestamps {time_left} and {time_right}")
253
+
254
+ report = self.compare_dissections(
255
+ reference["data"][time_left], reference["data"][time_right]
256
+ )
257
+
258
+ title = f"time {time_left} vs time {time_right}"
259
+ print(f"************ {title}")
260
+ self.print_report(report)
261
+
262
+ continue
263
+
264
+ # takes way too much memory to do it "right"
265
+ # reports.append(
266
+ # {
267
+ # "report": report,
268
+ # "title": f"time {time_left} vs time {time_right}",
269
+ # }
270
+ # )
209
271
 
210
272
  self.reports = reports
211
273
 
@@ -236,6 +298,14 @@ def parse_args():
236
298
  "-N", "--only-negative", action="store_true", help="Only show negative entries"
237
299
  )
238
300
 
301
+ limiting_parser.add_argument(
302
+ "-T",
303
+ "--between-times",
304
+ nargs=2,
305
+ type=int,
306
+ help="For single files, only display results between these timestamps",
307
+ )
308
+
239
309
  dissector_add_parseargs(parser)
240
310
 
241
311
  debugging_group = parser.add_argument_group("Debugging options")
@@ -270,6 +340,7 @@ def main():
270
340
  only_negative=args.only_negative,
271
341
  cache_results=args.cache_pcap_results,
272
342
  dissection_level=args.dissection_level,
343
+ between_times=args.between_times,
273
344
  )
274
345
 
275
346
  # compare the pcaps
@@ -32,7 +32,7 @@ class PCAPDissectMany:
32
32
  pd.load()
33
33
  return pd.data
34
34
 
35
- def load_pcap(self, pcap_file, split_size=100000, maximum_count=0):
35
+ def load_pcap(self, pcap_file, split_size=None, maximum_count=0):
36
36
  pd = PCAPDissector(
37
37
  pcap_file,
38
38
  *self.args,
@@ -110,6 +110,50 @@ class PCAPDissector:
110
110
  data[timestamp][key]
111
111
  )
112
112
 
113
+ @staticmethod
114
+ def open_maybe_compressed(filename):
115
+ """Opens a pcap file, potentially decompressing it."""
116
+
117
+ magic_dict = {
118
+ bytes([0x1F, 0x8B, 0x08]): "gz",
119
+ bytes([0x42, 0x5A, 0x68]): "bz2",
120
+ bytes([0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]): "xz",
121
+ }
122
+ max_len = max(len(x) for x in magic_dict)
123
+
124
+ base_handle = open(filename, "rb")
125
+ file_start = base_handle.read(max_len)
126
+ base_handle.close()
127
+
128
+ for magic, filetype in magic_dict.items():
129
+ if file_start.startswith(magic):
130
+ try:
131
+ if filetype == "gz":
132
+ import gzip
133
+
134
+ return_handle = gzip.open(filename, "rb")
135
+ return return_handle
136
+ elif filetype == "bz2":
137
+ import bz2
138
+
139
+ return_handle = bz2.open(filename, "rb")
140
+ setattr(return_handle, "name", filename)
141
+ return return_handle
142
+ elif filetype == "xz":
143
+ import lzma
144
+
145
+ return_handle = lzma.open(filename, "rb")
146
+ return return_handle
147
+ else:
148
+ raise ValueError("unknown compression error")
149
+ except Exception:
150
+ # likely we failed to find a compression module
151
+ debug(f"failed to use {filetype} module to decode the input stream")
152
+ raise ValueError("cannot decode file")
153
+
154
+ # return a raw file and hope it's not compressed'
155
+ return open(filename, "rb")
156
+
113
157
  def incr(self, key: str, value: Any, count: int = 1):
114
158
  # always save a total count at the zero bin
115
159
  # note: there should be no recorded tcpdump files from 1970 Jan 01 :-)
@@ -158,7 +202,7 @@ class PCAPDissector:
158
202
  if parameter == "dissector_level":
159
203
  debug("------------ here 1")
160
204
  if parameter == "dissector_level" and specified <= cached:
161
- debug("here with dissector_level {specified} and {cached}")
205
+ debug(f"here with dissector_level {specified} and {cached}")
162
206
  # loading a more detailed cache is ok
163
207
  continue
164
208
 
@@ -206,7 +250,7 @@ class PCAPDissector:
206
250
  self.timestamp = self.timestamp - self.timestamp % self.bin_size
207
251
  self.incr(self.TOTAL_COUNT, self.TOTAL_SUBKEY)
208
252
 
209
- if self.dissector_level == PCAPDissectorType.THROUGH_IP.value:
253
+ if self.dissector_level >= PCAPDissectorType.THROUGH_IP.value:
210
254
  eth = dpkt.ethernet.Ethernet(packet)
211
255
  # these names are designed to match scapy names
212
256
  self.incr("Ethernet.dst", eth.dst)
@@ -262,7 +306,7 @@ class PCAPDissector:
262
306
  def load_via_dpkt(self) -> dict:
263
307
  self.data = {0: defaultdict(Counter)}
264
308
  if isinstance(self.pcap_file, str):
265
- pcap = dpkt.pcap.Reader(open(self.pcap_file, "rb"))
309
+ pcap = dpkt.pcap.Reader(PCAPDissector.open_maybe_compressed(self.pcap_file))
266
310
  else:
267
311
  # it's an open handle already
268
312
  pcap = dpkt.pcap.Reader(self.pcap_file)
@@ -335,8 +379,11 @@ class PCAPDissector:
335
379
 
336
380
  def load_via_scapy(self) -> dict:
337
381
  "Loads a pcap file into a nested dictionary of statistical counts"
382
+ load_this = self.pcap_file
383
+ if isinstance(self.pcap_file, str):
384
+ load_this = PCAPDissector.open_maybe_compressed(self.pcap_file)
338
385
  sniff(
339
- offline=self.pcap_file,
386
+ offline=load_this,
340
387
  prn=self.scapy_callback,
341
388
  store=0,
342
389
  count=self.maximum_count,
@@ -452,6 +499,14 @@ def dissector_add_parseargs(parser, add_subgroup: bool = True):
452
499
  help="Maximum number of packets to analyze",
453
500
  )
454
501
 
502
+ parser.add_argument(
503
+ "-b",
504
+ "--bin-size",
505
+ type=int,
506
+ default=3600,
507
+ help="Bin results into this many seconds",
508
+ )
509
+
455
510
  parser.add_argument(
456
511
  "-C",
457
512
  "--cache-pcap-results",
@@ -550,7 +605,7 @@ def main():
550
605
 
551
606
  pd = PCAPDissector(
552
607
  args.input_file,
553
- bin_size=0,
608
+ bin_size=args.bin_size,
554
609
  dissector_level=args.dissection_level,
555
610
  maximum_count=args.packet_count,
556
611
  cache_results=args.cache_pcap_results,