voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. analysis/__init__.py +49 -0
  2. analysis/opsec.py +454 -0
  3. analysis/patterns.py +202 -0
  4. analysis/temporal.py +201 -0
  5. api/__init__.py +1 -0
  6. api/auth.py +163 -0
  7. api/main.py +509 -0
  8. api/routes/__init__.py +1 -0
  9. api/routes/admin.py +214 -0
  10. api/routes/auth.py +157 -0
  11. api/routes/entities.py +871 -0
  12. api/routes/export.py +359 -0
  13. api/routes/investigations.py +2567 -0
  14. api/routes/monitors.py +405 -0
  15. api/routes/search.py +157 -0
  16. api/routes/settings.py +851 -0
  17. auth/__init__.py +1 -0
  18. auth/token_blacklist.py +108 -0
  19. cli/__init__.py +3 -0
  20. cli/adapters/__init__.py +1 -0
  21. cli/adapters/sqlite.py +273 -0
  22. cli/browser.py +376 -0
  23. cli/commands/__init__.py +1 -0
  24. cli/commands/configure.py +185 -0
  25. cli/commands/enrich.py +154 -0
  26. cli/commands/export.py +158 -0
  27. cli/commands/investigate.py +601 -0
  28. cli/commands/show.py +87 -0
  29. cli/config.py +180 -0
  30. cli/display.py +212 -0
  31. cli/main.py +154 -0
  32. cli/tor_detect.py +71 -0
  33. config.py +180 -0
  34. crawler/__init__.py +28 -0
  35. crawler/dedup.py +97 -0
  36. crawler/frontier.py +115 -0
  37. crawler/spider.py +462 -0
  38. crawler/utils.py +122 -0
  39. db/__init__.py +47 -0
  40. db/migrations/__init__.py +0 -0
  41. db/migrations/env.py +80 -0
  42. db/migrations/versions/0001_initial_schema.py +270 -0
  43. db/migrations/versions/0002_add_investigation_status_column.py +27 -0
  44. db/migrations/versions/0002_add_missing_tables.py +33 -0
  45. db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
  46. db/migrations/versions/0004_add_page_posted_at.py +41 -0
  47. db/migrations/versions/0005_add_extraction_method.py +32 -0
  48. db/migrations/versions/0006_add_monitor_alerts.py +26 -0
  49. db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
  50. db/migrations/versions/0008_add_users_table.py +47 -0
  51. db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
  52. db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
  53. db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
  54. db/migrations/versions/0013_add_graph_status.py +31 -0
  55. db/migrations/versions/0015_add_progress_fields.py +41 -0
  56. db/migrations/versions/0016_backfill_graph_status.py +33 -0
  57. db/migrations/versions/0017_add_user_api_keys.py +44 -0
  58. db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
  59. db/migrations/versions/0019_add_content_safety_log.py +46 -0
  60. db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
  61. db/models.py +618 -0
  62. db/queries.py +841 -0
  63. db/session.py +270 -0
  64. export/__init__.py +34 -0
  65. export/misp.py +257 -0
  66. export/sigma.py +342 -0
  67. export/stix.py +418 -0
  68. extractor/__init__.py +21 -0
  69. extractor/llm_extract.py +372 -0
  70. extractor/ner.py +512 -0
  71. extractor/normalizer.py +638 -0
  72. extractor/pipeline.py +401 -0
  73. extractor/regex_patterns.py +325 -0
  74. fingerprint/__init__.py +33 -0
  75. fingerprint/profiler.py +240 -0
  76. fingerprint/stylometry.py +249 -0
  77. graph/__init__.py +73 -0
  78. graph/builder.py +894 -0
  79. graph/export.py +225 -0
  80. graph/model.py +83 -0
  81. graph/queries.py +297 -0
  82. graph/visualize.py +178 -0
  83. i18n/__init__.py +24 -0
  84. i18n/detect.py +76 -0
  85. i18n/query_expand.py +72 -0
  86. i18n/translate.py +210 -0
  87. monitor/__init__.py +27 -0
  88. monitor/_db.py +74 -0
  89. monitor/alerts.py +345 -0
  90. monitor/config.py +118 -0
  91. monitor/diff.py +75 -0
  92. monitor/jobs.py +247 -0
  93. monitor/scheduler.py +184 -0
  94. scraper/__init__.py +0 -0
  95. scraper/scrape.py +857 -0
  96. scraper/scrape_js.py +272 -0
  97. search/__init__.py +318 -0
  98. search/circuit_breaker.py +240 -0
  99. search/search.py +334 -0
  100. sources/__init__.py +96 -0
  101. sources/blockchain.py +444 -0
  102. sources/cache.py +93 -0
  103. sources/cisa.py +108 -0
  104. sources/dns_enrichment.py +557 -0
  105. sources/domain_reputation.py +643 -0
  106. sources/email_reputation.py +635 -0
  107. sources/engines.py +244 -0
  108. sources/enrichment.py +1244 -0
  109. sources/github_scraper.py +589 -0
  110. sources/gitlab_scraper.py +624 -0
  111. sources/hash_reputation.py +856 -0
  112. sources/historical_intel.py +253 -0
  113. sources/ip_reputation.py +521 -0
  114. sources/paste_scraper.py +484 -0
  115. sources/pastes.py +278 -0
  116. sources/rss_scraper.py +576 -0
  117. sources/seed_manager.py +373 -0
  118. sources/seeds.py +368 -0
  119. sources/shodan.py +103 -0
  120. sources/telegram.py +199 -0
  121. sources/virustotal.py +113 -0
  122. utils/__init__.py +0 -0
  123. utils/async_utils.py +89 -0
  124. utils/content_safety.py +193 -0
  125. utils/defang.py +94 -0
  126. utils/encryption.py +34 -0
  127. utils/ioc_freshness.py +124 -0
  128. utils/user_keys.py +33 -0
  129. vector/__init__.py +39 -0
  130. vector/embedder.py +100 -0
  131. vector/model_singleton.py +49 -0
  132. vector/search.py +87 -0
  133. vector/store.py +514 -0
  134. voidaccess/__init__.py +0 -0
  135. voidaccess/llm.py +717 -0
  136. voidaccess/llm_utils.py +696 -0
  137. voidaccess-1.3.0.dist-info/METADATA +395 -0
  138. voidaccess-1.3.0.dist-info/RECORD +142 -0
  139. voidaccess-1.3.0.dist-info/WHEEL +5 -0
  140. voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
  141. voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
  142. voidaccess-1.3.0.dist-info/top_level.txt +19 -0
extractor/ner.py ADDED
@@ -0,0 +1,512 @@
1
+ """
2
+ extractor/ner.py — Named Entity Recognition for entities without fixed patterns.
3
+
4
+ Uses spaCy (en_core_web_sm) as a module-level singleton. If the model is not
5
+ installed the module still imports cleanly — all public functions return empty
6
+ dicts / sets and log a warning rather than raising.
7
+
8
+ Uses a bundled dictionary of 200+ malware family names for MALWARE_FAMILY and
9
+ RANSOMWARE_GROUP detection (word-bounded, case-insensitive).
10
+
11
+ Public interface
12
+ ----------------
13
+ extract_named_entities(text) → dict[str, list[str]]
14
+ load_malware_dictionary() → set[str]
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ import re
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # NER entity type constants (supplements regex_patterns constants)
26
+ # ---------------------------------------------------------------------------
27
+
28
+ THREAT_ACTOR_HANDLE = "THREAT_ACTOR_HANDLE"
29
+ MALWARE_FAMILY = "MALWARE_FAMILY"
30
+ RANSOMWARE_GROUP = "RANSOMWARE_GROUP"
31
+ ORGANIZATION_NAME = "ORGANIZATION_NAME"
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # Malware family dictionary
35
+ # ---------------------------------------------------------------------------
36
+
37
+ _MALWARE_DICT: set[str] = {
38
+ # Ransomware families — active and historical
39
+ "LockBit", "LockBit 2.0", "LockBit 3.0",
40
+ "BlackCat", "ALPHV",
41
+ "Cl0p", "Clop",
42
+ "REvil", "Sodinokibi",
43
+ "Conti",
44
+ "BlackMatter",
45
+ "Hive",
46
+ "Vice Society",
47
+ "Play",
48
+ "Royal",
49
+ "Akira",
50
+ "BlackSuit",
51
+ "Avaddon",
52
+ "DarkSide",
53
+ "Maze",
54
+ "Ryuk",
55
+ "Egregor",
56
+ "Babuk",
57
+ "DoppelPaymer",
58
+ "MedusaLocker",
59
+ "Prometheus",
60
+ "Grief",
61
+ "Ragnar Locker",
62
+ "RagnarLocker",
63
+ "Cuba",
64
+ "BlackBasta",
65
+ "Black Basta",
66
+ "Yanluowang",
67
+ "Quantum",
68
+ "Monti",
69
+ "Nokoyawa",
70
+ "Trigona",
71
+ "Rhysida",
72
+ "Hunters International",
73
+ "Cactus",
74
+ "INC Ransom",
75
+ "Meow",
76
+ "MedusaBIG",
77
+ "KillSec",
78
+ "Dispossessor",
79
+ "Eldorado",
80
+ "SenSayQ",
81
+ "RansomHub",
82
+ "DragonForce",
83
+ "Scattered Spider",
84
+ "Dark Angels",
85
+ "8Base",
86
+ "Qilin",
87
+ "Fog",
88
+ "Lynx",
89
+ "Cicada3301",
90
+ "Embargo",
91
+ "Karakurt",
92
+ "LV",
93
+ "Entropy",
94
+ "Vice",
95
+ "Zeppelin",
96
+ "Dharma",
97
+ "Phobos",
98
+ "Xorist",
99
+ "Globeimposter",
100
+ "Makop",
101
+ "Stop",
102
+ "Djvu",
103
+ "WannaCry",
104
+ "WannaCryptor",
105
+ "Petya",
106
+ "NotPetya",
107
+ "GoldenEye",
108
+ "BadRabbit",
109
+ "SamSam",
110
+ "Cerber",
111
+ "Locky",
112
+ "CryptoLocker",
113
+ "TeslaCrypt",
114
+ "Cryptowall",
115
+ "Jigsaw",
116
+ "Philadelphia",
117
+ "Stampado",
118
+ "Shade",
119
+ "Troldesh",
120
+ "Reveton",
121
+ "KeRanger",
122
+ "Erebus",
123
+ "Satan",
124
+ "GandCrab",
125
+ "Scarab",
126
+ "GlobeImposter",
127
+ "Sodinokibi",
128
+ # RATs — remote access trojans
129
+ "AsyncRAT",
130
+ "QuasarRAT",
131
+ "Quasar",
132
+ "NjRAT",
133
+ "njRAT",
134
+ "DarkComet",
135
+ "Remcos",
136
+ "NetWire",
137
+ "XWorm",
138
+ "Warzone",
139
+ "Warzone RAT",
140
+ "Agent Tesla",
141
+ "AgentTesla",
142
+ "BitRAT",
143
+ "RevengeRAT",
144
+ "Orcus",
145
+ "Gh0st",
146
+ "Gh0stRAT",
147
+ "Havoc",
148
+ "Sliver",
149
+ "Cobalt Strike",
150
+ "CobaltStrike",
151
+ "Metasploit",
152
+ "Empire",
153
+ "PowerShell Empire",
154
+ "Mythic",
155
+ "Brute Ratel",
156
+ "BruteRatel",
157
+ "PoshC2",
158
+ "Covenant",
159
+ "Merlin",
160
+ "SILENTTRINITY",
161
+ "Nishang",
162
+ "Pupy",
163
+ "Koadic",
164
+ # Stealers — credential and data theft
165
+ "RedLine",
166
+ "Raccoon",
167
+ "Raccoon Stealer",
168
+ "Vidar",
169
+ "Mars",
170
+ "Aurora",
171
+ "Lumma",
172
+ "Lumma Stealer",
173
+ "LummaC2",
174
+ "AZORult",
175
+ "Azorult",
176
+ "FormBook",
177
+ "Snake Keylogger",
178
+ "SnakeKeylogger",
179
+ "HawkEye",
180
+ "Predator",
181
+ "Predator the Thief",
182
+ "Ducktail",
183
+ "Rhadamanthys",
184
+ "WhiteSnake",
185
+ "Atomic Stealer",
186
+ "AMOS",
187
+ "StealC",
188
+ "Meduza",
189
+ "MetaStealer",
190
+ "RisePro",
191
+ "Mystic",
192
+ "CryptBot",
193
+ "Cryptbot",
194
+ "Panda Stealer",
195
+ "BlackGuard",
196
+ "Titan Stealer",
197
+ "Erbium",
198
+ "Eternity Stealer",
199
+ "Oski",
200
+ "Krypton Stealer",
201
+ "Luca Stealer",
202
+ "Spectre Stealer",
203
+ # Loaders — malware delivery mechanisms
204
+ "SmokeLoader",
205
+ "Smoke Loader",
206
+ "IcedID",
207
+ "Emotet",
208
+ "QakBot",
209
+ "Qakbot",
210
+ "Bumblebee",
211
+ "GootLoader",
212
+ "PrivateLoader",
213
+ "GuLoader",
214
+ "CloudEyE",
215
+ "DanaBot",
216
+ "Amadey",
217
+ "RCSession",
218
+ "PureCrypter",
219
+ "DonutLoader",
220
+ "ModiLoader",
221
+ "AiBotLoader",
222
+ "Loader",
223
+ "SystemBC",
224
+ "Matanbuchus",
225
+ "Gozi",
226
+ "DBatLoader",
227
+ "MalDoc",
228
+ "XLoader",
229
+ "FormBook",
230
+ "MoqHao",
231
+ "Pikabot",
232
+ "Darkgate",
233
+ "DarkGate",
234
+ "Latrodectus",
235
+ "WarmCookie",
236
+ # Banking trojans
237
+ "TrickBot",
238
+ "Trickbot",
239
+ "Dridex",
240
+ "Ursnif",
241
+ "ZLoader",
242
+ "Zloader",
243
+ "Gozi",
244
+ "ISFB",
245
+ "Ramnit",
246
+ "Qbot",
247
+ "QBot",
248
+ "Shylock",
249
+ "Kronos",
250
+ "Zeus",
251
+ "SpyEye",
252
+ "Carbanak",
253
+ "FIN7",
254
+ "Valak",
255
+ "BazarLoader",
256
+ "BazarBackdoor",
257
+ "IcedID",
258
+ "TaurusLoader",
259
+ "Bookworm",
260
+ "Casbaneiro",
261
+ "Mekotio",
262
+ "Grandoreiro",
263
+ "Javali",
264
+ "Vizom",
265
+ # APT / nation-state tools
266
+ "PlugX",
267
+ "ShadowPad",
268
+ "Winnti",
269
+ "Flame",
270
+ "Shamoon",
271
+ "BlackEnergy",
272
+ "GreyEnergy",
273
+ "Industroyer",
274
+ "Stuxnet",
275
+ "Turla",
276
+ "Snake",
277
+ "ComRAT",
278
+ "Duqu",
279
+ "Gauss",
280
+ "MiniFlame",
281
+ "Regin",
282
+ "ProjectSauron",
283
+ "EternalBlue",
284
+ "DoublePulsar",
285
+ "WannaMine",
286
+ # Post-exploitation / red team tools
287
+ "Mimikatz",
288
+ "BloodHound",
289
+ "SharpHound",
290
+ "Responder",
291
+ "Impacket",
292
+ "LaZagne",
293
+ "Rubeus",
294
+ "Certify",
295
+ "Seatbelt",
296
+ "PowerView",
297
+ "PowerSploit",
298
+ "Nmap",
299
+ "Metasploit",
300
+ "Burp Suite",
301
+ "SQLMap",
302
+ "Nikto",
303
+ }
304
+
305
+ # Ransomware group subset (active RaaS operators)
306
+ _RANSOMWARE_DICT: set[str] = {
307
+ "LockBit", "LockBit 2.0", "LockBit 3.0",
308
+ "BlackCat", "ALPHV",
309
+ "Cl0p", "Clop",
310
+ "REvil", "Sodinokibi",
311
+ "Conti",
312
+ "BlackMatter",
313
+ "Hive",
314
+ "Vice Society",
315
+ "Play",
316
+ "Royal",
317
+ "Akira",
318
+ "BlackSuit",
319
+ "Avaddon",
320
+ "DarkSide",
321
+ "Maze",
322
+ "Ryuk",
323
+ "Egregor",
324
+ "Babuk",
325
+ "DoppelPaymer",
326
+ "MedusaLocker",
327
+ "Prometheus",
328
+ "Grief",
329
+ "Ragnar Locker",
330
+ "RagnarLocker",
331
+ "Cuba",
332
+ "BlackBasta",
333
+ "Black Basta",
334
+ "Yanluowang",
335
+ "Quantum",
336
+ "Monti",
337
+ "Nokoyawa",
338
+ "Trigona",
339
+ "Rhysida",
340
+ "Hunters International",
341
+ "Cactus",
342
+ "INC Ransom",
343
+ "KillSec",
344
+ "Dispossessor",
345
+ "Eldorado",
346
+ "SenSayQ",
347
+ "RansomHub",
348
+ "DragonForce",
349
+ "Scattered Spider",
350
+ "Dark Angels",
351
+ "8Base",
352
+ "Qilin",
353
+ "Fog",
354
+ "Lynx",
355
+ "Cicada3301",
356
+ "Embargo",
357
+ "Karakurt",
358
+ "GandCrab",
359
+ "SamSam",
360
+ "WannaCry",
361
+ "NotPetya",
362
+ "Petya",
363
+ }
364
+
365
+ # ---------------------------------------------------------------------------
366
+ # Build compiled patterns from the dictionaries (at module load time)
367
+ # ---------------------------------------------------------------------------
368
+
369
+ def _build_pattern(names: set[str]) -> re.Pattern:
370
+ """Build a word-bounded alternation pattern sorted longest-first."""
371
+ sorted_names = sorted(names, key=len, reverse=True)
372
+ alternation = "|".join(re.escape(name) for name in sorted_names)
373
+ return re.compile(rf"\b(?:{alternation})\b", re.IGNORECASE)
374
+
375
+
376
+ _MALWARE_RE = _build_pattern(_MALWARE_DICT)
377
+ _RANSOMWARE_RE = _build_pattern(_RANSOMWARE_DICT)
378
+
379
+ # ---------------------------------------------------------------------------
380
+ # Heuristic threat-actor handle detection
381
+ # Context patterns: "posted by X", "user X", "alias X", "known as X", etc.
382
+ # Handle: 3–30 chars, may contain underscores / dots / hyphens but not
383
+ # starting or ending with them; must not be a plain email address.
384
+ # ---------------------------------------------------------------------------
385
+
386
+ _HANDLE_CHAR = r"[a-zA-Z0-9][a-zA-Z0-9_.\-]{1,28}[a-zA-Z0-9]"
387
+ _HANDLE_RE = re.compile(
388
+ r"(?:"
389
+ r"posted\s+by|user\s+|alias\s+|known\s+as|by\s+user"
390
+ r"|from\s+user|handle\s+|nickname\s+|nick\s+"
391
+ r"|op\s+is|author\s+|authored\s+by|written\s+by"
392
+ r")\s*(" + _HANDLE_CHAR + r")",
393
+ re.IGNORECASE,
394
+ )
395
+
396
+ # Words that are common English nouns/verbs that may false-positive as handles
397
+ _COMMON_WORDS: frozenset[str] = frozenset({
398
+ "admin", "moderator", "user", "guest", "anon", "anonymous",
399
+ "unknown", "nobody", "someone", "anyone", "everyone",
400
+ "the", "and", "not", "for", "with", "that", "this",
401
+ })
402
+
403
+ # Threat context words used to filter spaCy ORG entities
404
+ _THREAT_CONTEXT: frozenset[str] = frozenset({
405
+ "breach", "leak", "attack", "ransom", "victim", "target",
406
+ "compromised", "hacked", "stolen", "exfiltrated", "extorted",
407
+ "encrypted", "infected", "malware", "ransomware", "exploit",
408
+ "vulnerability", "phishing", "credentials", "data",
409
+ })
410
+
411
+ # ---------------------------------------------------------------------------
412
+ # spaCy singleton — loaded lazily on first call, never reloaded
413
+ # ---------------------------------------------------------------------------
414
+
415
+ _nlp = None
416
+ _nlp_attempted = False
417
+
418
+
419
+ def _get_nlp():
420
+ global _nlp, _nlp_attempted
421
+ if _nlp_attempted:
422
+ return _nlp
423
+ _nlp_attempted = True
424
+ try:
425
+ import spacy # noqa: PLC0415
426
+ _nlp = spacy.load("en_core_web_sm")
427
+ logger.info("spaCy en_core_web_sm loaded successfully")
428
+ except Exception as exc:
429
+ logger.warning(
430
+ "spaCy model en_core_web_sm is not available — NER will be skipped. "
431
+ "Install with: python -m spacy download en_core_web_sm. Error: %s",
432
+ exc,
433
+ )
434
+ _nlp = None
435
+ return _nlp
436
+
437
+
438
+ # ---------------------------------------------------------------------------
439
+ # Public interface
440
+ # ---------------------------------------------------------------------------
441
+
442
+
443
+ def load_malware_dictionary() -> set[str]:
444
+ """Return the full set of known malware family names used for matching."""
445
+ return set(_MALWARE_DICT)
446
+
447
+
448
+ def extract_named_entities(text: str) -> dict[str, list[str]]:
449
+ """
450
+ Extract named entities that don't have fixed regex patterns.
451
+
452
+ Returns a dict with the same format as regex_patterns.extract_all().
453
+ If spaCy is unavailable, regex-based malware matching still runs;
454
+ threat actor handles are extracted heuristically.
455
+ Never raises.
456
+ """
457
+ result: dict[str, list[str]] = {
458
+ THREAT_ACTOR_HANDLE: [],
459
+ MALWARE_FAMILY: [],
460
+ RANSOMWARE_GROUP: [],
461
+ ORGANIZATION_NAME: [],
462
+ }
463
+
464
+ try:
465
+ # --- Malware & ransomware: dictionary-based regex (no spaCy needed) ---
466
+ result[MALWARE_FAMILY] = _dedup(
467
+ m.group(0) for m in _MALWARE_RE.finditer(text)
468
+ )
469
+ result[RANSOMWARE_GROUP] = _dedup(
470
+ m.group(0) for m in _RANSOMWARE_RE.finditer(text)
471
+ )
472
+
473
+ # --- Threat actor handles: heuristic context matching ---
474
+ handles: list[str] = []
475
+ for m in _HANDLE_RE.finditer(text):
476
+ handle = m.group(1).strip()
477
+ if handle.lower() not in _COMMON_WORDS and "@" not in handle:
478
+ handles.append(handle)
479
+ result[THREAT_ACTOR_HANDLE] = _dedup(handles)
480
+
481
+ # --- Organization names: spaCy ORG entities in threat context ---
482
+ nlp = _get_nlp()
483
+ if nlp is not None:
484
+ text_lower = text.lower()
485
+ has_threat_context = any(w in text_lower for w in _THREAT_CONTEXT)
486
+ if has_threat_context:
487
+ doc = nlp(text[:100_000]) # cap for performance
488
+ orgs: list[str] = []
489
+ for ent in doc.ents:
490
+ if ent.label_ == "ORG":
491
+ orgs.append(ent.text.strip())
492
+ result[ORGANIZATION_NAME] = _dedup(orgs)
493
+
494
+ except Exception:
495
+ logger.exception("extract_named_entities encountered an unexpected error")
496
+
497
+ return result
498
+
499
+
500
+ # ---------------------------------------------------------------------------
501
+ # Internal helpers
502
+ # ---------------------------------------------------------------------------
503
+
504
+
505
+ def _dedup(values) -> list[str]:
506
+ seen: set[str] = set()
507
+ result: list[str] = []
508
+ for v in values:
509
+ if v not in seen:
510
+ seen.add(v)
511
+ result.append(v)
512
+ return result