ua-parser 1.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ua_parser/__init__.py ADDED
@@ -0,0 +1,193 @@
1
+ """The package provides top-level helpers which use a lazily initialised
2
+ default parser. These are convenience functions, for more control it
3
+ is perfectly acceptable to instantiate and call parsers directly.
4
+
5
+ The default parser does use a cache keyed on the user-agent string,
6
+ but its exact behaviour is unspecified, if you require a consistent
7
+ behaviour or specific algorithm, set up your own parser (global or
8
+ not).
9
+
10
+ For convenience, direct aliases are also provided for:
11
+
12
+ - :mod:`core types <.types>`
13
+ - :mod:`caching utilities <.caching>`
14
+ - :mod:`ua_parser.basic.Parser` as :class:`BasicParser`
15
+
16
+ This way importing anything but the top-level package should not be
17
+ necessary unless you want to *implement* a parser.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ __all__ = [
23
+ "OS",
24
+ "BasicResolver",
25
+ "Cache",
26
+ "CachingResolver",
27
+ "DefaultedResult",
28
+ "Device",
29
+ "Domain",
30
+ "Matchers",
31
+ "PartialResult",
32
+ "Resolver",
33
+ "Result",
34
+ "UserAgent",
35
+ "load_builtins",
36
+ "load_lazy_builtins",
37
+ "parse",
38
+ "parse_device",
39
+ "parse_os",
40
+ "parse_user_agent",
41
+ ]
42
+
43
+ import importlib.util
44
+ from typing import Callable, Optional
45
+
46
+ from .basic import Resolver as BasicResolver
47
+ from .caching import CachingResolver, S3Fifo as Cache
48
+ from .core import (
49
+ DefaultedResult,
50
+ Device,
51
+ Domain,
52
+ Matchers,
53
+ OS,
54
+ PartialResult,
55
+ Resolver,
56
+ Result,
57
+ UserAgent,
58
+ )
59
+ from .loaders import load_builtins, load_lazy_builtins
60
+ from .utils import IS_GRAAL
61
+
62
+ _ResolverCtor = Callable[[Matchers], Resolver]
63
+ Re2Resolver: Optional[_ResolverCtor] = None
64
+ if importlib.util.find_spec("re2"):
65
+ from .re2 import Resolver as Re2Resolver
66
+ RegexResolver: Optional[_ResolverCtor] = None
67
+ if importlib.util.find_spec("ua_parser_rs"):
68
+ from .regex import Resolver as RegexResolver
69
+ BestAvailableResolver: _ResolverCtor = next(
70
+ filter(
71
+ None,
72
+ (
73
+ RegexResolver,
74
+ Re2Resolver,
75
+ lambda m: CachingResolver(BasicResolver(m), Cache(2000)),
76
+ ),
77
+ )
78
+ )
79
+
80
+
81
+ VERSION = (1, 0, 0)
82
+
83
+
84
+ class Parser:
85
+ """Wrapper object, provides convenience methods around an
86
+ underlying :class:`Resolver`.
87
+
88
+ """
89
+
90
+ @classmethod
91
+ def from_matchers(cls, m: Matchers, /) -> Parser:
92
+ """from_matchers(Matchers) -> Parser
93
+
94
+ Instantiates a parser from the provided
95
+ :class:`~ua_parser.core.Matchers` using the default resolver
96
+ stack.
97
+
98
+ """
99
+ return cls(BestAvailableResolver(m))
100
+
101
+ def __init__(self, resolver: Resolver) -> None:
102
+ self.resolver = resolver
103
+
104
+ def __call__(self, ua: str, domains: Domain, /) -> PartialResult:
105
+ """Parses the ``ua`` string, returning a parse result with *at least*
106
+ the requested :class:`domains <Domain>` resolved (whether to success or
107
+ failure).
108
+ """
109
+ return self.resolver(ua, domains)
110
+
111
+ def parse(self: Resolver, ua: str) -> Result:
112
+ """Convenience method for parsing all domains."""
113
+ return self(ua, Domain.ALL).complete()
114
+
115
+ def parse_user_agent(self: Resolver, ua: str) -> Optional[UserAgent]:
116
+ """Convenience method for parsing the :class:`UserAgent` domain."""
117
+ return self(ua, Domain.USER_AGENT).user_agent
118
+
119
+ def parse_os(self: Resolver, ua: str) -> Optional[OS]:
120
+ """Convenience method for parsing the :class:`OS` domain."""
121
+ return self(ua, Domain.OS).os
122
+
123
+ def parse_device(self: Resolver, ua: str) -> Optional[Device]:
124
+ """Convenience method for parsing the :class:`Device` domain."""
125
+ return self(ua, Domain.DEVICE).device
126
+
127
+
128
+ parser: Parser
129
+ """Global :class:`Parser`, lazy-initialised on first access, used by
130
+ the global helper functions.
131
+
132
+ Can be *set* to configure a customised global parser.
133
+
134
+ Accessing the parser explicitely can be used eagerly force its
135
+ initialisation, rather than pay for it at first call.
136
+ """
137
+
138
+
139
+ def __getattr__(name: str) -> Parser:
140
+ global parser
141
+ if name == "parser":
142
+ if RegexResolver or Re2Resolver or IS_GRAAL:
143
+ matchers = load_lazy_builtins()
144
+ else:
145
+ matchers = load_builtins()
146
+ return Parser.from_matchers(matchers)
147
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
148
+
149
+
150
+ def parse(ua: str) -> Result:
151
+ """Parses the :class:`.UserAgent`, :class:`.OS`, and :class:`.Device`
152
+ information using the :data:`global parser <parser>`.
153
+
154
+ Equivalent to calling each of :func:`parse_user_agent`,
155
+ :func:`parse_os`, and :func:`parse_device` but *may* be more
156
+ efficient than calling them separately depending on the underlying
157
+ parser.
158
+
159
+ Even in the best case, prefer the domain-specific helpers if
160
+ you're not going to use *all* of them.
161
+ """
162
+ # import required to trigger __getattr__ and initialise the
163
+ # parser, a `global` access fails to and we get a NameError
164
+ from . import parser
165
+
166
+ return parser(ua, Domain.ALL).complete()
167
+
168
+
169
+ def parse_user_agent(ua: str) -> Optional[UserAgent]:
170
+ """Parses the :class:`browser <.UserAgent>` information using the
171
+ :data:`global parser <parser>`.
172
+ """
173
+ from . import parser
174
+
175
+ return parser(ua, Domain.USER_AGENT).user_agent
176
+
177
+
178
+ def parse_os(ua: str) -> Optional[OS]:
179
+ """Parses the :class:`.OS` information using the :data:`global parser
180
+ <parser>`.
181
+ """
182
+ from . import parser
183
+
184
+ return parser(ua, Domain.OS).os
185
+
186
+
187
+ def parse_device(ua: str) -> Optional[Device]:
188
+ """Parses the :class:`.Device` information using the :data:`global
189
+ parser <parser>`.
190
+ """
191
+ from . import parser
192
+
193
+ return parser(ua, Domain.DEVICE).device
ua_parser/__main__.py ADDED
@@ -0,0 +1,527 @@
1
+ import argparse
2
+ import bisect
3
+ import collections
4
+ import csv
5
+ import gc
6
+ import io
7
+ import itertools
8
+ import math
9
+ import os
10
+ import random
11
+ import sys
12
+ import threading
13
+ import time
14
+ import types
15
+ from typing import (
16
+ Any,
17
+ Callable,
18
+ Deque,
19
+ Dict,
20
+ Iterable,
21
+ List,
22
+ Optional,
23
+ Sequence,
24
+ Tuple,
25
+ Union,
26
+ cast,
27
+ )
28
+
29
+ from . import (
30
+ BasicResolver,
31
+ CachingResolver,
32
+ Domain,
33
+ Matchers,
34
+ Parser,
35
+ PartialResult,
36
+ Resolver,
37
+ caching,
38
+ )
39
+ from .caching import Cache, Local
40
+ from .loaders import load_builtins, load_yaml
41
+
42
+ try:
43
+ from .re2 import Resolver as Re2Resolver
44
+ except ImportError:
45
+ pass
46
+ try:
47
+ from .regex import Resolver as RegexResolver
48
+ except ImportError:
49
+ pass
50
+ from .user_agent_parser import Parse
51
+
52
+ CACHEABLE = {
53
+ "basic": True,
54
+ "re2": True,
55
+ "regex": True,
56
+ "legacy": False,
57
+ }
58
+
59
+
60
+ CACHES: Dict[str, Optional[Callable[[int], Cache]]] = {"none": None}
61
+ CACHES.update(
62
+ (cache.__name__.lower(), cache)
63
+ for cache in [
64
+ cast(Callable[[int], Cache], caching.Lru),
65
+ caching.S3Fifo,
66
+ caching.Sieve,
67
+ ]
68
+ )
69
+
70
+ try:
71
+ import tracemalloc
72
+ except ImportError:
73
+ snapshot = types.SimpleNamespace(
74
+ compare_to=lambda _1, _2: [],
75
+ )
76
+ tracemalloc = types.SimpleNamespace( # type: ignore
77
+ start=lambda: None,
78
+ take_snapshot=lambda: snapshot,
79
+ )
80
+
81
+
82
+ def get_rules(parsers: List[str], regexes: Optional[io.IOBase]) -> Matchers:
83
+ if regexes:
84
+ if not load_yaml:
85
+ sys.exit("yaml loading unavailable, please install pyyaml")
86
+
87
+ rules = load_yaml(regexes)
88
+ if "legacy" in parsers:
89
+ print(
90
+ "The legacy parser is incompatible with custom regexes, ignoring.",
91
+ file=sys.stderr,
92
+ )
93
+ parsers.remove("legacy")
94
+ else:
95
+ rules = load_builtins()
96
+
97
+ return rules
98
+
99
+
100
+ def run_stdout(args: argparse.Namespace) -> None:
101
+ lines = list(args.file)
102
+ count = len(lines)
103
+ uniques = len(set(lines))
104
+ print(f"{args.file.name}: {count} lines, {uniques} unique ({uniques/count:.0%})")
105
+
106
+ rules = get_rules(args.bases, args.regexes)
107
+
108
+ # width of the parser label
109
+ w = math.ceil(
110
+ 3
111
+ + max(map(len, args.bases))
112
+ + max(map(len, args.caches))
113
+ + max(map(math.log10, args.cachesizes))
114
+ )
115
+ for p, c, n in (
116
+ (p, c, n)
117
+ for p in args.bases
118
+ for c in (args.caches if CACHEABLE[p] and args.cachesizes != [0] else ["none"])
119
+ for n in (args.cachesizes if c != "none" else [0])
120
+ ):
121
+ name = "-".join(map(str, filter(None, (p, c != "none" and c, n))))
122
+ print(f"{name:{w}}", end=": ", flush=True)
123
+
124
+ p = get_parser(p, c, n, rules)
125
+ t = run(p, lines)
126
+
127
+ secs = t / 1e9
128
+ tpl = t / 1000 / len(lines)
129
+
130
+ print(f"{secs:>5.2f}s ({tpl:>4.0f}us/line)")
131
+
132
+
133
+ def run_csv(args: argparse.Namespace) -> None:
134
+ lines = list(args.file)
135
+ LEN = len(lines) * 1000
136
+ rules = get_rules(args.bases, args.regexes)
137
+
138
+ parsers = [
139
+ (p, c, n)
140
+ for p in args.bases
141
+ for c in (args.caches if CACHEABLE[p] else ["none"])
142
+ for n in (args.cachesizes if c != "none" else [0])
143
+ ]
144
+ if not parsers:
145
+ sys.exit("No parser selected")
146
+
147
+ columns = {"size": ""}
148
+ columns.update(
149
+ (f"{p}-{c}", p if c == "none" else f"{p}-{c}")
150
+ for p in args.bases
151
+ for c in (args.caches if CACHEABLE[p] else ["none"])
152
+ )
153
+ w = csv.DictWriter(
154
+ sys.stdout,
155
+ list(columns),
156
+ dialect="unix",
157
+ quoting=csv.QUOTE_MINIMAL,
158
+ )
159
+ w.writerow(columns)
160
+
161
+ parsers.sort(key=lambda t: t[2])
162
+ grouped = itertools.groupby(parsers, key=lambda t: t[2])
163
+
164
+ # these are the "template rows", which contain the no-cache
165
+ # runs which get replicated on every cachesize row
166
+ zeroes = {}
167
+ # if we have entries with no cache size, compute them first so
168
+ # we can apply them to every cachesize
169
+ if parsers[0][2] == 0:
170
+ (_, ps) = next(grouped)
171
+ # cache could be ignored as it should always be `"none"`
172
+ for parser, cache, _ in ps:
173
+ p = get_parser(parser, cache, 0, rules)
174
+ zeroes[f"{parser}-{cache}"] = run(p, lines) // LEN
175
+
176
+ # special cases for configurations where we can't have
177
+ # cachesize lines, write the template row out directly
178
+ if args.bases == ["legacy"] or args.caches == ["none"] or args.cachesizes == [0]:
179
+ zeroes["size"] = 0
180
+ w.writerow(zeroes)
181
+ return
182
+
183
+ for cachesize, ps in grouped:
184
+ row = dict(zeroes, size=cachesize)
185
+ for parser, cache, _ in ps:
186
+ p = get_parser(parser, cache, cachesize, rules)
187
+ row[f"{parser}-{cache}"] = run(p, lines) // LEN
188
+ w.writerow(row)
189
+
190
+
191
+ def get_parser(
192
+ parser: str, cache: str, cachesize: int, rules: Matchers
193
+ ) -> Callable[[str], Any]:
194
+ r: Resolver
195
+ if parser == "legacy":
196
+ return Parse
197
+ elif parser == "basic":
198
+ r = BasicResolver(rules)
199
+ elif parser == "re2":
200
+ r = Re2Resolver(rules)
201
+ elif parser == "regex":
202
+ r = RegexResolver(rules)
203
+ else:
204
+ sys.exit(f"unknown parser {parser!r}")
205
+
206
+ if cache not in CACHES:
207
+ sys.exit(f"unknown cache algorithm {cache!r}")
208
+
209
+ c = CACHES.get(cache)
210
+ if c is None:
211
+ return Parser(r).parse
212
+
213
+ return Parser(CachingResolver(r, c(cachesize))).parse
214
+
215
+
216
+ def run(
217
+ parse: Callable[[str], None],
218
+ lines: Iterable[str],
219
+ ) -> int:
220
+ t = time.perf_counter_ns()
221
+ for line in lines:
222
+ parse(line)
223
+ return time.perf_counter_ns() - t
224
+
225
+
226
+ class Belady:
227
+ def __init__(self, maxsize: int, data: List[str]):
228
+ self.maxsize = maxsize
229
+ self.cache: Dict[str, PartialResult] = {}
230
+ self.queue: Deque[Tuple[int, str]] = collections.deque()
231
+ self.distances: Dict[str, List[int]] = {}
232
+ for i, e in enumerate(data):
233
+ self.distances.setdefault(e, []).append(i)
234
+ for freqs in self.distances.values():
235
+ freqs.reverse()
236
+
237
+ def __getitem__(self, key: str) -> Optional[PartialResult]:
238
+ self.distances[key].pop()
239
+ if c := self.cache.get(key):
240
+ # on cache hit, the entry should be the lowest in the
241
+ # queue
242
+ assert self.queue.popleft()[1] == key
243
+ # if the key has future occurrences
244
+ if ds := self.distances[key]:
245
+ # reinsert in queue
246
+ bisect.insort(self.queue, (ds[-1], key))
247
+ else:
248
+ # otherwise remove from cache & occurrences map
249
+ del self.cache[key]
250
+
251
+ return c
252
+
253
+ def __setitem__(self, key: str, entry: PartialResult) -> None:
254
+ # if there are no future occurrences just bail
255
+ ds = self.distances[key]
256
+ if not ds:
257
+ return
258
+
259
+ next_distance = ds[-1]
260
+ # if the cache has room, just add the entry
261
+ if len(self.cache) >= self.maxsize:
262
+ # if the next occurrence of the new entry is later than
263
+ # every existing occurrence, ignore it
264
+ if next_distance > self.queue[-1][0]:
265
+ return
266
+ # otherwise remove the latest entry
267
+ _, k = self.queue.pop()
268
+ del self.cache[k]
269
+
270
+ self.cache[key] = entry
271
+ bisect.insort(self.queue, (next_distance, key))
272
+
273
+
274
+ def run_hitrates(args: argparse.Namespace) -> None:
275
+ r = PartialResult(
276
+ domains=Domain.ALL,
277
+ string="",
278
+ user_agent=None,
279
+ os=None,
280
+ device=None,
281
+ )
282
+
283
+ class Counter:
284
+ def __init__(self) -> None:
285
+ self.count = 0
286
+
287
+ def __call__(self, ua: str, domains: Domain, /) -> PartialResult:
288
+ self.count += 1
289
+ return r
290
+
291
+ lines = list(args.file)
292
+ total = len(lines)
293
+ uniques = len(set(lines))
294
+ print(total, "lines", uniques, "uniques")
295
+ print()
296
+ w = int(math.log10(max(args.cachesizes)) + 1)
297
+
298
+ def belady(maxsize: int) -> Cache:
299
+ return Belady(maxsize, lines)
300
+
301
+ tracemalloc.start()
302
+ for cache, cache_size in itertools.product(
303
+ itertools.chain([belady], filter(None, CACHES.values())),
304
+ args.cachesizes,
305
+ ):
306
+ misses = Counter()
307
+ gc.collect()
308
+ before = tracemalloc.take_snapshot()
309
+ parser = Parser(CachingResolver(misses, cache(cache_size)))
310
+ for line in lines:
311
+ parser.parse(line)
312
+ gc.collect()
313
+ after = tracemalloc.take_snapshot()
314
+ if cache == belady:
315
+ diff = "{0:>14} {0:>12}".format("-")
316
+ else:
317
+ overhead = sum(s.size_diff for s in after.compare_to(before, "filename"))
318
+ diff = "{:8} bytes ({:3.0f}b/entry)".format(
319
+ overhead,
320
+ overhead / cache_size,
321
+ )
322
+ print(
323
+ f"{cache.__name__.lower():8}({cache_size:{w}}): {(total - misses.count)/total*100:2.0f}% hit rate {diff}"
324
+ )
325
+ del misses, parser
326
+
327
+
328
+ CACHESIZE = 1000
329
+
330
+
331
+ def worker(
332
+ start: threading.Event,
333
+ parser: Parser,
334
+ lines: Iterable[str],
335
+ end: threading.Barrier,
336
+ ) -> None:
337
+ start.wait()
338
+
339
+ for ua in lines:
340
+ parser.parse(ua)
341
+
342
+ end.wait()
343
+
344
+
345
+ def run_threaded(args: argparse.Namespace) -> None:
346
+ lines = list(args.file)
347
+ basic = BasicResolver(load_builtins())
348
+ resolvers: List[Tuple[str, Resolver]] = [
349
+ ("locking-lru", CachingResolver(basic, caching.Lru(CACHESIZE))),
350
+ ("local-lru", CachingResolver(basic, Local(lambda: caching.Lru(CACHESIZE)))),
351
+ ("re2", Re2Resolver(load_builtins())),
352
+ ("regex", RegexResolver(load_builtins())),
353
+ ]
354
+ for name, resolver in resolvers:
355
+ print(f"{name:11}: ", end="", flush=True)
356
+ # randomize the dataset for each thread, predictably, to
357
+ # simulate distributed load (not great but better than
358
+ # nothing, and probably better than reusing the exact same
359
+ # load)
360
+ r = random.Random(42)
361
+ start = threading.Event()
362
+ end = threading.Barrier(args.threads + 1)
363
+
364
+ parser = Parser(resolver)
365
+ for _ in range(args.threads):
366
+ threading.Thread(
367
+ target=worker,
368
+ args=(start, parser, r.sample(lines, len(lines)), end),
369
+ daemon=True,
370
+ ).start()
371
+
372
+ st = time.perf_counter_ns()
373
+ start.set()
374
+ end.wait()
375
+
376
+ # each thread gets len(lines), so total number of processed
377
+ # lines is t*len(lines)
378
+ totlines = len(lines) * args.threads
379
+ # runtime in us
380
+ t = (time.perf_counter_ns() - st) / 1000
381
+ print(f"{t/totlines:>4.0f}us/line", flush=True)
382
+
383
+
384
+ EPILOG = """For good results the sample `file` should be an actual
385
+ non-sorted non-deduplicated sample of user agent strings from traffic
386
+ on a comparable (or the actual) site or application targeted for
387
+ classification."""
388
+
389
+ parser = argparse.ArgumentParser(prog="ua_parser", epilog="epi")
390
+ parser.set_defaults(func=None)
391
+
392
+ fp = argparse.ArgumentParser(add_help=False)
393
+ fp.add_argument(
394
+ "file",
395
+ type=argparse.FileType("r", encoding="utf-8"),
396
+ help="Sample user agent file, the file must contain a single user agent "
397
+ "string per line, use `-` for stdin.",
398
+ )
399
+
400
+ sub = parser.add_subparsers(title="commands")
401
+
402
+ bench = sub.add_parser(
403
+ "bench",
404
+ help="benchmark various parser configurations on sample files",
405
+ parents=[fp],
406
+ epilog=EPILOG,
407
+ description="""Different sites and applications can have different
408
+ traffic pattenrs, and thus want different setups and tradeoffs.
409
+ This subcommand allows testing ua-parser's different base
410
+ resolvers, caches, anc cache sizes in order to customise the
411
+ parser to the application's requirements. It's also useful to
412
+ bench the library itself though.""",
413
+ )
414
+ bench.add_argument(
415
+ "-R",
416
+ "--regexes",
417
+ type=argparse.FileType("rb"),
418
+ help="""Custom regexes.yaml file, if ommitted the benchmark will
419
+ use the embedded regexes file rom uap-core. Custom regexes files
420
+ can allow evaluating the performance impact of new rules or
421
+ cut-down reference files (if legacy rules are nor relevant to your
422
+ needs). Because YAML is (mostly) a superset of JSON, JSON regexes
423
+ files will also work fine.""",
424
+ )
425
+
426
+
427
+ class ToFunc(argparse.Action):
428
+ def __call__(
429
+ self,
430
+ parser: argparse.ArgumentParser,
431
+ namespace: argparse.Namespace,
432
+ values: Union[str, Sequence[str], None],
433
+ option_string: Optional[str] = None,
434
+ ) -> None:
435
+ if values == "stdout":
436
+ setattr(namespace, self.dest, run_stdout)
437
+ elif values == "csv":
438
+ setattr(namespace, self.dest, run_csv)
439
+ else:
440
+ raise ValueError(f"invalid output {values!r}")
441
+
442
+
443
+ bench.add_argument(
444
+ "-O",
445
+ "--output",
446
+ choices=["stdout", "csv"],
447
+ default=run_stdout,
448
+ dest="func",
449
+ action=ToFunc,
450
+ help="""By default (`stdout`) the result of each configuration /
451
+ combination is printed to stdout with the combination name
452
+ followed by the total parse time for the file and the per-entry
453
+ average. `csv` will instead output a valid CSV table to stdout,
454
+ with a parser combination per column and a cache size per row.
455
+ Combinations without cache will have the same value on every row.
456
+ If no combination uses a cache, the output will have a single row
457
+ with a first cell of value 0.""",
458
+ )
459
+ bench.add_argument(
460
+ "--bases",
461
+ nargs="+",
462
+ choices=["basic", "re2", "regex", "legacy"],
463
+ default=["basic", "re2", "regex", "legacy"],
464
+ help="""Base resolvers to benchmark. `basic` is a linear search
465
+ through the regexes file, `re2` is a prefiltered regex set
466
+ implemented in C++, `regex` is a prefiltered regex set implemented
467
+ in Rust, `legacy` is the legacy API (essentially a basic resolver
468
+ with a clearing cache of fixed 200 entries, but less layered so
469
+ usually slightly faster than an equivalent basic-based resolver).""",
470
+ )
471
+ bench.add_argument(
472
+ "--caches",
473
+ nargs="+",
474
+ choices=list(CACHES),
475
+ default=list(CACHES),
476
+ help="""Cache implementations to test. `clearing` completely
477
+ clears the cache when full, `lru` uses a least-recently-eviction
478
+ policy. `lru` is not thread-safe, so `lru-threadsafe` adds a mutex
479
+ and measures *uncontended* locking overhead.""",
480
+ )
481
+ bench.add_argument(
482
+ "--cachesizes",
483
+ nargs="+",
484
+ type=int,
485
+ default=[10, 20, 50, 100, 200, 500, 1000, 2000, 5000],
486
+ help="""Caches are a classic way to trade memory for performances.
487
+ Different base resolvers and traffic patterns have different
488
+ benefits from caches, this option allows testing the benefits of
489
+ various cache sizes (and thus amounts of memory used) on the cache
490
+ strategies. """,
491
+ )
492
+
493
+ hitrates = sub.add_parser(
494
+ "hitrates",
495
+ help="measure hitrates of cache configurations against sample files",
496
+ parents=[fp],
497
+ epilog=EPILOG,
498
+ )
499
+ hitrates.set_defaults(func=run_hitrates)
500
+ hitrates.add_argument(
501
+ "--cachesizes",
502
+ nargs="+",
503
+ type=int,
504
+ default=[10, 20, 50, 100, 200, 500, 1000, 2000, 5000],
505
+ help="""List of cache sizes to test hitrates for, for each cache
506
+ algorithm. """,
507
+ )
508
+
509
+ threaded = sub.add_parser(
510
+ "threading",
511
+ help="estimate impact of concurrency and contention on different parser configurations",
512
+ parents=[fp],
513
+ epilog=EPILOG,
514
+ )
515
+ threaded.set_defaults(func=run_threaded)
516
+ threaded.add_argument(
517
+ "-n",
518
+ "--threads",
519
+ type=int,
520
+ default=os.cpu_count() or 1,
521
+ )
522
+
523
+ args = parser.parse_args()
524
+ if args.func:
525
+ args.func(args)
526
+ else:
527
+ parser.print_help()