txt2stix 1.0.1.post1__tar.gz → 1.0.1.post3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (257) hide show
  1. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/PKG-INFO +37 -23
  2. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/README.md +27 -17
  3. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/pyproject.toml +8 -20
  4. txt2stix-1.0.1.post3/tests/data/manually_generated_reports/not_security_content.txt +1 -0
  5. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/manual-tests/cases-standard-tests.md +16 -0
  6. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/src/test_indicator.py +2 -2
  7. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/src/test_main.py +2 -3
  8. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/src/test_run_txt2stix.py +5 -4
  9. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/ai_extractor/__init__.py +2 -1
  10. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/extractions.py +6 -2
  11. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/txt2stix.py +132 -32
  12. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/.env.example +0 -0
  13. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/.env.markdown +0 -0
  14. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/.github/workflows/create-release.yml +0 -0
  15. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/.github/workflows/run-tests.yml +0 -0
  16. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/.gitignore +0 -0
  17. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/LICENSE +0 -0
  18. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/docs/README.md +0 -0
  19. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/docs/stix-mapping.md +0 -0
  20. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/docs/txt2stix.png +0 -0
  21. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/__init__.py +0 -0
  22. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/extractions/ai/config.yaml +0 -0
  23. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/extractions/lookup/config.yaml +0 -0
  24. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/extractions/pattern/config.yaml +0 -0
  25. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/helpers/mimetype_filename_extension_list.csv +0 -0
  26. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/helpers/stix_relationship_types.txt +0 -0
  27. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/helpers/tlds.txt +0 -0
  28. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/helpers/windows_registry_key_prefix.txt +0 -0
  29. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/_README.md +0 -0
  30. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/_generate_lookups.py +0 -0
  31. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/attack_pattern.txt +0 -0
  32. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/campaign.txt +0 -0
  33. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/country_iso3166_alpha2.txt +0 -0
  34. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/course_of_action.txt +0 -0
  35. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/disarm_id_v1_5.txt +0 -0
  36. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/disarm_name_v1_5.txt +0 -0
  37. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/extensions.txt +0 -0
  38. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/identity.txt +0 -0
  39. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/infrastructure.txt +0 -0
  40. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/intrusion_set.txt +0 -0
  41. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/malware.txt +0 -0
  42. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_atlas_id_v4_5_2.txt +0 -0
  43. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_atlas_name_v4_5_2.txt +0 -0
  44. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_attack_enterprise_aliases_v16_0.txt +0 -0
  45. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_attack_enterprise_id_v16_0.txt +0 -0
  46. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_attack_enterprise_name_v16_0.txt +0 -0
  47. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_attack_ics_aliases_v16_0.txt +0 -0
  48. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_attack_ics_id_v16_0.txt +0 -0
  49. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_attack_ics_name_v16_0.txt +0 -0
  50. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_attack_mobile_aliases_v16_0.txt +0 -0
  51. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_attack_mobile_id_v16_0.txt +0 -0
  52. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_attack_mobile_name_v16_0.txt +0 -0
  53. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_capec_id_v3_9.txt +0 -0
  54. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_capec_name_v3_9.txt +0 -0
  55. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_cwe_id_v4_15.txt +0 -0
  56. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/mitre_cwe_name_v4_15.txt +0 -0
  57. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/threat_actor.txt +0 -0
  58. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/tld.txt +0 -0
  59. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/lookups/tool.txt +0 -0
  60. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/includes/tests/test_cases.yaml +0 -0
  61. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/requirements.txt +0 -0
  62. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/README.md +0 -0
  63. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/ai_country.txt +0 -0
  64. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/ai_mitre_attack_enterprise.txt +0 -0
  65. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/ai_mitre_attack_ics.txt +0 -0
  66. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/ai_mitre_attack_mobile.txt +0 -0
  67. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/ai_mitre_capec.txt +0 -0
  68. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/ai_mitre_cwe.txt +0 -0
  69. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/all_cases.txt +0 -0
  70. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_autonomous_system_number.txt +0 -0
  71. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_bank_card_all.txt +0 -0
  72. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_bank_card_amex.txt +0 -0
  73. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_bank_card_diners.txt +0 -0
  74. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_bank_card_discover.txt +0 -0
  75. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_bank_card_jcb.txt +0 -0
  76. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_bank_card_mastercard.txt +0 -0
  77. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_bank_card_union_pay.txt +0 -0
  78. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_bank_card_visa.txt +0 -0
  79. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_country_alpha2.txt +0 -0
  80. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_cpe_uri.txt +0 -0
  81. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_cryptocurrency_btc_transaction.txt +0 -0
  82. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_cryptocurrency_btc_wallet.txt +0 -0
  83. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_cryptocurrency_eth_transaction.txt +0 -0
  84. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_cryptocurrency_eth_wallet.txt +0 -0
  85. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_cryptocurrency_xmr_transaction.txt +0 -0
  86. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_cryptocurrency_xmr_wallet.txt +0 -0
  87. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_cve_id.txt +0 -0
  88. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_directory_unix.txt +0 -0
  89. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_directory_unix_file.txt +0 -0
  90. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_directory_windows.txt +0 -0
  91. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_directory_windows_with_file.txt +0 -0
  92. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_disarm.txt +0 -0
  93. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_disarm_name.txt +0 -0
  94. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_domain_name_only.txt +0 -0
  95. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_domain_name_subdomain.txt +0 -0
  96. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_email_address.txt +0 -0
  97. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_file_hash_md5.txt +0 -0
  98. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_file_hash_sha_1.txt +0 -0
  99. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_file_hash_sha_224.txt +0 -0
  100. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_file_hash_sha_256.txt +0 -0
  101. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_file_hash_sha_384.txt +0 -0
  102. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_file_hash_sha_512.txt +0 -0
  103. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_file_name.txt +0 -0
  104. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_host_name.txt +0 -0
  105. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_host_name_file.txt +0 -0
  106. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_host_name_path.txt +0 -0
  107. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_host_name_subdomain.txt +0 -0
  108. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_host_name_url.txt +0 -0
  109. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_iban_number.txt +0 -0
  110. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_ipv4_address_cidr.txt +0 -0
  111. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_ipv4_address_only.txt +0 -0
  112. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_ipv4_address_port.txt +0 -0
  113. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_ipv6_address_cidr.txt +0 -0
  114. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_ipv6_address_only.txt +0 -0
  115. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_ipv6_address_port.txt +0 -0
  116. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mac_address.txt +0 -0
  117. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_atlas.txt +0 -0
  118. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_atlas_name.txt +0 -0
  119. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_attack_enterprise.txt +0 -0
  120. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_attack_enterprise_aliases.txt +0 -0
  121. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_attack_enterprise_name.txt +0 -0
  122. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_attack_ics.txt +0 -0
  123. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_attack_ics_aliases.txt +0 -0
  124. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_attack_ics_name.txt +0 -0
  125. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_attack_mobile.txt +0 -0
  126. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_attack_mobile_aliases.txt +0 -0
  127. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_attack_mobile_name.txt +0 -0
  128. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_capec.txt +0 -0
  129. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_capec_name.txt +0 -0
  130. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_cwe.txt +0 -0
  131. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_mitre_cwe_name.txt +0 -0
  132. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_phone_number.txt +0 -0
  133. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_url.txt +0 -0
  134. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_url_file.txt +0 -0
  135. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_url_path.txt +0 -0
  136. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_user_agent.txt +0 -0
  137. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/generic_windows_registry_key.txt +0 -0
  138. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/lookup_attack_pattern.txt +0 -0
  139. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/lookup_campaign.txt +0 -0
  140. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/lookup_course_of_action.txt +0 -0
  141. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/lookup_identity.txt +0 -0
  142. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/lookup_infrastructure.txt +0 -0
  143. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/lookup_intrusion_set.txt +0 -0
  144. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/lookup_malware.txt +0 -0
  145. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/lookup_threat_actor.txt +0 -0
  146. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/extraction_types/lookup_tool.txt +0 -0
  147. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/attack_flow_demo.txt +0 -0
  148. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/basic_relationship.txt +0 -0
  149. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/char_length_too_long.txt +0 -0
  150. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/descriptive_for_ai_relationships_1.txt +0 -0
  151. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/disarm_demo.txt +0 -0
  152. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/embedded_img_ignore.txt +0 -0
  153. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/embedded_link_ignore.txt +0 -0
  154. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/ip1.txt +0 -0
  155. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/ip2.txt +0 -0
  156. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/known_whitelist_match.txt +0 -0
  157. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/mitre_attack_enterprise_ai_demo.txt +0 -0
  158. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/mitre_attack_enterprise_lookup_demo.txt +0 -0
  159. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/mixed_extractions.txt +0 -0
  160. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/test_ai_hash_error_with_stix2_lib.txt +0 -0
  161. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/test_aliases.txt +0 -0
  162. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/test_extraction_boundary.txt +0 -0
  163. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/manually_generated_reports/test_extraction_escapes.txt +0 -0
  164. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/real_intel_reports/APT28-Center-of-Storm-2017.txt +0 -0
  165. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/real_intel_reports/Bitdefender-Labs-Report-X-creat6958-en-EN.txt +0 -0
  166. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/real_intel_reports/FireEyeAPT39.txt +0 -0
  167. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/real_intel_reports/France_CERT_APT31_Pakdoor_TLPWHITE.txt +0 -0
  168. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/real_intel_reports/Group-IB_Ransomware_Uncovered_whitepaper_eng.txt +0 -0
  169. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/real_intel_reports/JOINT_CSA_HUNTING_RU_INTEL_SNAKE_MALWARE_20230509.txt +0 -0
  170. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/real_intel_reports/TA22-0126-QAKBOT-analysis-TLP-GREEN.txt +0 -0
  171. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/real_intel_reports/dinners_card.txt +0 -0
  172. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/real_intel_reports/mandiant-apt1.txt +0 -0
  173. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/data/real_intel_reports/mykings_report_final.txt +0 -0
  174. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/manual-tests/cases-ai-relationships.md +0 -0
  175. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/manual-tests/cases-extraction-type-ai.md +0 -0
  176. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/manual-tests/cases-extraction-type-lookup.md +0 -0
  177. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/manual-tests/cases-extraction-type-pattern.md +0 -0
  178. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/scripts/generate_simple_extraction_test_cases_txt_files.py +0 -0
  179. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/src/__init__.py +0 -0
  180. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/src/test_attack_flow.py +0 -0
  181. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/src/test_bundler.py +0 -0
  182. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/src/test_extractors.py +0 -0
  183. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/src/test_lookups.py +0 -0
  184. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/src/test_utils.py +0 -0
  185. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/tests/src/utils.py +0 -0
  186. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/__init__.py +0 -0
  187. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/ai_extractor/anthropic.py +0 -0
  188. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/ai_extractor/base.py +0 -0
  189. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/ai_extractor/deepseek.py +0 -0
  190. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/ai_extractor/gemini.py +0 -0
  191. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/ai_extractor/openai.py +0 -0
  192. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/ai_extractor/openrouter.py +0 -0
  193. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/ai_extractor/prompts.py +0 -0
  194. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/ai_extractor/utils.py +0 -0
  195. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/attack_flow.py +0 -0
  196. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/bundler.py +0 -0
  197. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/common.py +0 -0
  198. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/indicator.py +0 -0
  199. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/lookups.py +0 -0
  200. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/__init__.py +0 -0
  201. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/__init__.py +0 -0
  202. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/base_extractor.py +0 -0
  203. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/card/README.md +0 -0
  204. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/card/__init__.py +0 -0
  205. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/card/amex_card_extractor.py +0 -0
  206. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/card/diners_card_extractor.py +0 -0
  207. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/card/discover_card_extractor.py +0 -0
  208. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/card/jcb_card_extractor.py +0 -0
  209. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/card/master_card_extractor.py +0 -0
  210. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/card/union_card_extractor.py +0 -0
  211. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/card/visa_card_extractor.py +0 -0
  212. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/crypto/__init__.py +0 -0
  213. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/crypto/btc_extractor.py +0 -0
  214. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/directory/__init__.py +0 -0
  215. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/directory/unix_directory_extractor.py +0 -0
  216. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/directory/unix_file_path_extractor.py +0 -0
  217. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/directory/windows_directory_path_extractor.py +0 -0
  218. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/directory/windows_file_path_extractor.py +0 -0
  219. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/domain/__init__.py +0 -0
  220. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/domain/domain_extractor.py +0 -0
  221. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/domain/hostname_extractor.py +0 -0
  222. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/domain/sub_domain_extractor.py +0 -0
  223. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/hashes/__init__.py +0 -0
  224. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/hashes/md5_extractor.py +0 -0
  225. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/hashes/sha1_extractor.py +0 -0
  226. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/hashes/sha224_extractor.py +0 -0
  227. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/hashes/sha2_256_exactor.py +0 -0
  228. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/hashes/sha2_512_exactor.py +0 -0
  229. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/hashes/sha3_256_exactor.py +0 -0
  230. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/hashes/sha3_512_exactor.py +0 -0
  231. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/helper.py +0 -0
  232. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/ip/__init__.py +0 -0
  233. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/ip/ipv4_cidr_extractor.py +0 -0
  234. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/ip/ipv4_extractor.py +0 -0
  235. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/ip/ipv4_port_extractor.py +0 -0
  236. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/ip/ipv6_cidr_extractor.py +0 -0
  237. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/ip/ipv6_extractor.py +0 -0
  238. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/ip/ipv6_port_extractor.py +0 -0
  239. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/others/__init__.py +0 -0
  240. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/others/asn_extractor.py +0 -0
  241. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/others/cpe_extractor.py +0 -0
  242. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/others/cve_extractor.py +0 -0
  243. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/others/email_extractor.py +0 -0
  244. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/others/filename_extractor.py +0 -0
  245. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/others/iban_extractor.py +0 -0
  246. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/others/mac_address_extractor.py +0 -0
  247. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/others/phonenumber_extractor.py +0 -0
  248. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/others/user_agent_extractor.py +0 -0
  249. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/others/windows_registry_key_extractor.py +0 -0
  250. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/url/__init__.py +0 -0
  251. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/url/url_extractor.py +0 -0
  252. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/url/url_file_extractor.py +0 -0
  253. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/pattern/extractors/url/url_path_extractor.py +0 -0
  254. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/retriever.py +0 -0
  255. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/stix.py +0 -0
  256. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix/utils.py +0 -0
  257. {txt2stix-1.0.1.post1 → txt2stix-1.0.1.post3}/txt2stix.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: txt2stix
3
- Version: 1.0.1.post1
3
+ Version: 1.0.1.post3
4
4
  Summary: txt2stix is a Python script that is designed to identify and extract IoCs and TTPs from text files, identify the relationships between them, convert them to STIX 2.1 objects, and output as a STIX 2.1 bundle.
5
5
  Project-URL: Homepage, https://github.com/muchdogesec/txt2stix
6
6
  Project-URL: Issues, https://github.com/muchdogesec/txt2stix/issues
@@ -26,15 +26,19 @@ Requires-Dist: stix2extensions
26
26
  Requires-Dist: tld>=0.13
27
27
  Requires-Dist: tldextract>=5.1.2
28
28
  Requires-Dist: validators>=0.28.3
29
- Provides-Extra: full
30
- Requires-Dist: llama-index-llms-anthropic>=0.7.2; extra == 'full'
31
- Requires-Dist: llama-index-llms-deepseek>=0.1.2; extra == 'full'
32
- Requires-Dist: llama-index-llms-gemini>=0.5.0; extra == 'full'
33
- Requires-Dist: llama-index-llms-openrouter>=0.3.2; extra == 'full'
29
+ Provides-Extra: anthropic
30
+ Requires-Dist: llama-index-llms-anthropic>=0.7.2; extra == 'anthropic'
31
+ Provides-Extra: deepseek
32
+ Requires-Dist: llama-index-llms-deepseek>=0.1.2; extra == 'deepseek'
33
+ Provides-Extra: gemini
34
+ Requires-Dist: llama-index-llms-gemini>=0.5.0; extra == 'gemini'
35
+ Provides-Extra: openrouter
36
+ Requires-Dist: llama-index-llms-openrouter>=0.3.2; extra == 'openrouter'
34
37
  Provides-Extra: tests
35
38
  Requires-Dist: pytest; extra == 'tests'
36
39
  Requires-Dist: pytest-cov; extra == 'tests'
37
40
  Requires-Dist: pytest-subtests; extra == 'tests'
41
+ Requires-Dist: python-dateutil; extra == 'tests'
38
42
  Requires-Dist: requests; extra == 'tests'
39
43
  Description-Content-Type: text/markdown
40
44
 
@@ -86,7 +90,13 @@ cd txt2stix
86
90
  python3 -m venv txt2stix-venv
87
91
  source txt2stix-venv/bin/activate
88
92
  # install requirements
89
- pip3 install .
93
+ pip3 install txt2stix
94
+ ```
95
+
96
+ Note, by default txt2stix will install OpenAI to use as the AI provider. You can also use Anthropic, Gemini, OpenRouter or Deepseek. You need to install these manually if you plan to use them as follows (remove those that don't apply)
97
+
98
+ ```shell
99
+ pip3 install txt2stix[deepseek,gemini,anthropic,openrouter]
90
100
  ```
91
101
 
92
102
  ### Set variables
@@ -114,39 +124,39 @@ The following arguments are available:
114
124
 
115
125
  #### Input settings
116
126
 
117
- * `--input_file` (REQUIRED): the file to be converted. Must be `.txt`
127
+ * `--input_file` (`path/to/file.txt`, required): the file to be converted. Must be `.txt`
118
128
 
119
129
  #### STIX Report generation settings
120
130
 
121
131
 
122
- * `--name` (REQUIRED): name of file, max 72 chars. Will be used in the STIX Report Object created.
123
- * `--report_id` (OPTIONAL): Sometimes it is required to control the id of the `report` object generated. You can therefore pass a valid UUIDv4 in this field to be assigned to the report. e.g. passing `2611965-930e-43db-8b95-30a1e119d7e2` would create a STIX object id `report--2611965-930e-43db-8b95-30a1e119d7e2`. If this argument is not passed, the UUID will be randomly generated.
124
- * `--tlp_level` (OPTIONAL): Options are `clear`, `green`, `amber`, `amber_strict`, `red`. Default if not passed, is `clear`.
125
- * `--confidence` (OPTIONAL): value between 0-100. Default if not passed is null.
132
+ * `--name` (text, required): name of file, max 72 chars. Will be used in the STIX Report Object created.
133
+ * `--report_id` (UUIDv4, default is random UUIDv4): Sometimes it is required to control the id of the `report` object generated. You can therefore pass a valid UUIDv4 in this field to be assigned to the report. e.g. passing `2611965-930e-43db-8b95-30a1e119d7e2` would create a STIX object id `report--2611965-930e-43db-8b95-30a1e119d7e2`. If this argument is not passed, the UUID will be randomly generated.
134
+ * `--tlp_level` (dictionary, default, `clear`): Options are `clear`, `green`, `amber`, `amber_strict`, `red`.
135
+ * `--confidence` (value between 0-100): If not passed, report will be assigned no confidence score value
126
136
  * `--labels` (OPTIONAL): comma seperated list of labels. Case-insensitive (will all be converted to lower-case). Allowed `a-z`, `0-9`. e.g.`label1,label2` would create 2 labels.
127
- * `--created` (OPTIONAL): by default all object `created` times will take the time the script was run. If you want to explicitly set these times you can do so using this flag. Pass the value in the format `YYYY-MM-DDTHH:MM:SS.sssZ` e.g. `2020-01-01T00:00:00.000Z`
128
- * `--use_identity` (OPTIONAL): can pass a full STIX 2.1 identity object (make sure to properly escape). Will be validated by the STIX2 library.
137
+ * `--created` (datetime, optional): by default all object `created` times will take the time the script was run. If you want to explicitly set these times you can do so using this flag. Pass the value in the format `YYYY-MM-DDTHH:MM:SS.sssZ` e.g. `2020-01-01T00:00:00.000Z`
138
+ * `--use_identity` (stix identity, optional, default txt2stix identity): can pass a full STIX 2.1 identity object (make sure to properly escape). Will be validated by the STIX2 library.
129
139
  * `--external_refs` (OPTIONAL): txt2stix will automatically populate the `external_references` of the report object it creates for the input. You can use this value to add additional objects to `external_references`. Note, you can only add `source_name` and `external_id` values currently. Pass as `source_name=external_id`. e.g. `--external_refs txt2stix=demo1 source=id` would create the following objects under the `external_references` property: `{"source_name":"txt2stix","external_id":"demo1"},{"source_name":"source","external_id":"id"}`
130
140
 
131
141
  #### Output settings
132
142
 
133
143
  How the extractions are performed
134
144
 
135
- * `--use_extractions` (REQUIRED): if you only want to use certain extraction types, you can pass their slug found in either `includes/ai/config.yaml`, `includes/lookup/config.yaml` `includes/pattern/config.yaml` (e.g. `pattern_ipv4_address_only`). Default if not passed, no extractions applied. You can also pass a catch all wildcard `*` which will match all extraction paths (e.g. `'pattern_*'` would run all extractions starting with `pattern_` -- make sure to use quotes when using a wildcard)
145
+ * `--use_extractions` (dictionary, required): if you only want to use certain extraction types, you can pass their slug found in either `includes/ai/config.yaml`, `includes/lookup/config.yaml` `includes/pattern/config.yaml` (e.g. `pattern_ipv4_address_only`). Default if not passed, no extractions applied. You can also pass a catch all wildcard `*` which will match all extraction paths (e.g. `'pattern_*'` would run all extractions starting with `pattern_` -- make sure to use quotes when using a wildcard)
136
146
  * Important: if using any AI extractions (`ai_*`), you must set an AI API key in your `.env` file
137
147
  * Important: if you are using any MITRE ATT&CK, CAPEC, CWE, ATLAS or Location extractions you must set `CTIBUTLER` or NVD CPE or CVE extractions you must set `VULMATCH` settings in your `.env` file
138
- * `--relationship_mode` (REQUIRED): either.
148
+ * `--relationship_mode` (dictionary, required): either.
139
149
  * `ai`: AI provider must be enabled. extractions performed by either regex or AI for extractions user selected. Rich relationships created from AI provider from extractions.
140
150
  * `standard`: extractions performed by either regex or AI (AI provider must be enabled) for extractions user selected. Basic relationships created from extractions back to master Report object generated.
141
- * `--ignore_extraction_boundary` (OPTIONAL, default `false`, not compatible with AI extractions): in some cases the same string will create multiple extractions depending on extractions set (e.g. `https://www.google.com/file.txt` could create a url, url with file, domain, subdomain, and file). The default behaviour is for txt2stix to take the longest extraction and ignore everything else (e.g. only extract url with file, and ignore url, file, domain, subdomain, and file). If you want to override this behaviour and get all extractions in the output, set this flag to `true`.
142
- * `--ignore_image_refs` (default `true`): images references in documents don't usually need extracting. e.g. `<img src="https://example.com/image.png" alt="something">` you would not want domain or file extractions extracting `example.com` and `image.png`. Hence these are ignored by default (they are removed from text sent to extraction). Note, only the `img src` is ignored, all other values e.g. `alt` are considered. If you want extractions to consider this data, set it to `false`
143
- * `--ignore_link_refs` (default `true`): link references in documents don't usually need extracting e.g. `<a href="https://example.com/link.html" title="something">Bad Actor</a>` you would only want `Bad actor` to be considered for extraction. Hence these part of the link are ignored by default (they are removed from text sent to extraction). Note, only the `a href` is ignored, all other values e.g. `title` are considered. Setting this to `false` will also include everything inside the link tag (e.g. `example.com` would extract as a domain)
151
+ * `--ignore_extraction_boundary` (boolean, default `false`, not compatible with AI extractions): in some cases the same string will create multiple extractions depending on extractions set (e.g. `https://www.google.com/file.txt` could create a url, url with file, domain, subdomain, and file). The default behaviour is for txt2stix to take the longest extraction and ignore everything else (e.g. only extract url with file, and ignore url, file, domain, subdomain, and file). If you want to override this behaviour and get all extractions in the output, set this flag to `true`.
152
+ * `--ignore_image_refs` (boolean, default `true`): images references in documents don't usually need extracting. e.g. `<img src="https://example.com/image.png" alt="something">` you would not want domain or file extractions extracting `example.com` and `image.png`. Hence these are ignored by default (they are removed from text sent to extraction). Note, only the `img src` is ignored, all other values e.g. `alt` are considered. If you want extractions to consider this data, set it to `false`
153
+ * `--ignore_link_refs` (boolean, default `true`): link references in documents don't usually need extracting e.g. `<a href="https://example.com/link.html" title="something">Bad Actor</a>` you would only want `Bad actor` to be considered for extraction. Hence these part of the link are ignored by default (they are removed from text sent to extraction). Note, only the `a href` is ignored, all other values e.g. `title` are considered. Setting this to `false` will also include everything inside the link tag (e.g. `example.com` would extract as a domain)
144
154
 
145
155
  #### AI settings
146
156
 
147
157
  If any AI extractions, or AI relationship mode is set, you must set the following accordingly
148
158
 
149
- * `--ai_settings_extractions`:
159
+ * `--ai_settings_extractions` (`model:provider`, required if one or more AI extractions set):
150
160
  * defines the `provider:model` to be used for extractions. You can supply more than one provider. Seperate with a space (e.g. `openrouter:openai/gpt-4o` `openrouter:deepseek/deepseek-chat`) If more than one provider passed, txt2stix will take extractions from all models, de-dupelicate them, and them package them in the output. Currently supports:
151
161
  * Provider (env var required `OPENROUTER_API_KEY`): `openrouter:`, providers/models `openai/gpt-4o`, `deepseek/deepseek-chat` ([More here](https://openrouter.ai/models))
152
162
  * Provider (env var required `OPENAI_API_KEY`): `openai:`, models e.g.: `gpt-4o`, `gpt-4o-mini`, `gpt-4-turbo`, `gpt-4` ([More here](https://platform.openai.com/docs/models))
@@ -154,11 +164,15 @@ If any AI extractions, or AI relationship mode is set, you must set the followin
154
164
  * Provider (env var required `GOOGLE_API_KEY`): `gemini:models/`, models: `gemini-1.5-pro-latest`, `gemini-1.5-flash-latest` ([More here](https://ai.google.dev/gemini-api/docs/models/gemini))
155
165
  * Provider (env var required `DEEPSEEK_API_KEY`): `deepseek:`, models `deepseek-chat` ([More here](https://api-docs.deepseek.com/quick_start/pricing))
156
166
  * See `tests/manual-tests/cases-ai-extraction-type.md` for some examples
157
- * `--ai_settings_relationships`:
167
+ * `--ai_settings_relationships` (`model:provider`, required if AI relationship mode set):
158
168
  * similar to `ai_settings_extractions` but defines the model used to generate relationships. Only one model can be provided. Passed in same format as `ai_settings_extractions`
159
169
  * See `tests/manual-tests/cases-ai-relationships.md` for some examples
160
- * `--ai_content_check_provider`: Passing this flag will get the AI to try and classify the text in the input to 1) determine if it is talking about threat intelligence, and 2) what type of threat intelligence it is talking about. For context, we use this to filter out non-threat intel posts in Obstracts and Stixify. You pass `provider:model` with this flag to determine the AI model you wish to use to perform the check.
161
- * `--ai_create_attack_flow`: passing this flag will also prompt the AI model (the same entered for `--ai_settings_relationships`) to generate an [Attack Flow](https://center-for-threat-informed-defense.github.io/attack-flow/) for the MITRE ATT&CK extractions to define the logical order in which they are being described. You must pass `--ai_settings_relationships` for this to work.
170
+
171
+ #### Other AI related settings
172
+
173
+ * `--ai_content_check_provider` (`model:provider`, required if passed): Passing this flag will get the AI to try and classify the text in the input to 1) determine if it is talking about threat intelligence, and 2) what type of threat intelligence it is talking about. For context, we use this to filter out non-threat intel posts in Obstracts and Stixify. You pass `provider:model` with this flag to determine the AI model you wish to use to perform the check. It will also create a summary of the content passed (and store this into a STIX Note).
174
+ * `--ai_extract_if_no_incidence` (boolean, default `true`) if content check decides the report is not related to cyber security intelligence (e.g. vendor marketing), then you can use this setting to decide wether or not script should proceed. Setting to `false` will stop processing. It is designed to save AI tokens processing unknown content at scale in an automated way.
175
+ * `--ai_create_attack_flow` (boolean): passing this flag will also prompt the AI model (the same entered for `--ai_settings_relationships`) to generate an [Attack Flow](https://center-for-threat-informed-defense.github.io/attack-flow/) for the MITRE ATT&CK extractions to define the logical order in which they are being described. You must pass `--ai_settings_relationships` for this to work.
162
176
 
163
177
  ## Adding new extractions
164
178
 
@@ -46,7 +46,13 @@ cd txt2stix
46
46
  python3 -m venv txt2stix-venv
47
47
  source txt2stix-venv/bin/activate
48
48
  # install requirements
49
- pip3 install .
49
+ pip3 install txt2stix
50
+ ```
51
+
52
+ Note, by default txt2stix will install OpenAI to use as the AI provider. You can also use Anthropic, Gemini, OpenRouter or Deepseek. You need to install these manually if you plan to use them as follows (remove those that don't apply)
53
+
54
+ ```shell
55
+ pip3 install txt2stix[deepseek,gemini,anthropic,openrouter]
50
56
  ```
51
57
 
52
58
  ### Set variables
@@ -74,39 +80,39 @@ The following arguments are available:
74
80
 
75
81
  #### Input settings
76
82
 
77
- * `--input_file` (REQUIRED): the file to be converted. Must be `.txt`
83
+ * `--input_file` (`path/to/file.txt`, required): the file to be converted. Must be `.txt`
78
84
 
79
85
  #### STIX Report generation settings
80
86
 
81
87
 
82
- * `--name` (REQUIRED): name of file, max 72 chars. Will be used in the STIX Report Object created.
83
- * `--report_id` (OPTIONAL): Sometimes it is required to control the id of the `report` object generated. You can therefore pass a valid UUIDv4 in this field to be assigned to the report. e.g. passing `2611965-930e-43db-8b95-30a1e119d7e2` would create a STIX object id `report--2611965-930e-43db-8b95-30a1e119d7e2`. If this argument is not passed, the UUID will be randomly generated.
84
- * `--tlp_level` (OPTIONAL): Options are `clear`, `green`, `amber`, `amber_strict`, `red`. Default if not passed, is `clear`.
85
- * `--confidence` (OPTIONAL): value between 0-100. Default if not passed is null.
88
+ * `--name` (text, required): name of file, max 72 chars. Will be used in the STIX Report Object created.
89
+ * `--report_id` (UUIDv4, default is random UUIDv4): Sometimes it is required to control the id of the `report` object generated. You can therefore pass a valid UUIDv4 in this field to be assigned to the report. e.g. passing `2611965-930e-43db-8b95-30a1e119d7e2` would create a STIX object id `report--2611965-930e-43db-8b95-30a1e119d7e2`. If this argument is not passed, the UUID will be randomly generated.
90
+ * `--tlp_level` (dictionary, default, `clear`): Options are `clear`, `green`, `amber`, `amber_strict`, `red`.
91
+ * `--confidence` (value between 0-100): If not passed, report will be assigned no confidence score value
86
92
  * `--labels` (OPTIONAL): comma seperated list of labels. Case-insensitive (will all be converted to lower-case). Allowed `a-z`, `0-9`. e.g.`label1,label2` would create 2 labels.
87
- * `--created` (OPTIONAL): by default all object `created` times will take the time the script was run. If you want to explicitly set these times you can do so using this flag. Pass the value in the format `YYYY-MM-DDTHH:MM:SS.sssZ` e.g. `2020-01-01T00:00:00.000Z`
88
- * `--use_identity` (OPTIONAL): can pass a full STIX 2.1 identity object (make sure to properly escape). Will be validated by the STIX2 library.
93
+ * `--created` (datetime, optional): by default all object `created` times will take the time the script was run. If you want to explicitly set these times you can do so using this flag. Pass the value in the format `YYYY-MM-DDTHH:MM:SS.sssZ` e.g. `2020-01-01T00:00:00.000Z`
94
+ * `--use_identity` (stix identity, optional, default txt2stix identity): can pass a full STIX 2.1 identity object (make sure to properly escape). Will be validated by the STIX2 library.
89
95
  * `--external_refs` (OPTIONAL): txt2stix will automatically populate the `external_references` of the report object it creates for the input. You can use this value to add additional objects to `external_references`. Note, you can only add `source_name` and `external_id` values currently. Pass as `source_name=external_id`. e.g. `--external_refs txt2stix=demo1 source=id` would create the following objects under the `external_references` property: `{"source_name":"txt2stix","external_id":"demo1"},{"source_name":"source","external_id":"id"}`
90
96
 
91
97
  #### Output settings
92
98
 
93
99
  How the extractions are performed
94
100
 
95
- * `--use_extractions` (REQUIRED): if you only want to use certain extraction types, you can pass their slug found in either `includes/ai/config.yaml`, `includes/lookup/config.yaml` `includes/pattern/config.yaml` (e.g. `pattern_ipv4_address_only`). Default if not passed, no extractions applied. You can also pass a catch all wildcard `*` which will match all extraction paths (e.g. `'pattern_*'` would run all extractions starting with `pattern_` -- make sure to use quotes when using a wildcard)
101
+ * `--use_extractions` (dictionary, required): if you only want to use certain extraction types, you can pass their slug found in either `includes/ai/config.yaml`, `includes/lookup/config.yaml` `includes/pattern/config.yaml` (e.g. `pattern_ipv4_address_only`). Default if not passed, no extractions applied. You can also pass a catch all wildcard `*` which will match all extraction paths (e.g. `'pattern_*'` would run all extractions starting with `pattern_` -- make sure to use quotes when using a wildcard)
96
102
  * Important: if using any AI extractions (`ai_*`), you must set an AI API key in your `.env` file
97
103
  * Important: if you are using any MITRE ATT&CK, CAPEC, CWE, ATLAS or Location extractions you must set `CTIBUTLER` or NVD CPE or CVE extractions you must set `VULMATCH` settings in your `.env` file
98
- * `--relationship_mode` (REQUIRED): either.
104
+ * `--relationship_mode` (dictionary, required): either.
99
105
  * `ai`: AI provider must be enabled. extractions performed by either regex or AI for extractions user selected. Rich relationships created from AI provider from extractions.
100
106
  * `standard`: extractions performed by either regex or AI (AI provider must be enabled) for extractions user selected. Basic relationships created from extractions back to master Report object generated.
101
- * `--ignore_extraction_boundary` (OPTIONAL, default `false`, not compatible with AI extractions): in some cases the same string will create multiple extractions depending on extractions set (e.g. `https://www.google.com/file.txt` could create a url, url with file, domain, subdomain, and file). The default behaviour is for txt2stix to take the longest extraction and ignore everything else (e.g. only extract url with file, and ignore url, file, domain, subdomain, and file). If you want to override this behaviour and get all extractions in the output, set this flag to `true`.
102
- * `--ignore_image_refs` (default `true`): images references in documents don't usually need extracting. e.g. `<img src="https://example.com/image.png" alt="something">` you would not want domain or file extractions extracting `example.com` and `image.png`. Hence these are ignored by default (they are removed from text sent to extraction). Note, only the `img src` is ignored, all other values e.g. `alt` are considered. If you want extractions to consider this data, set it to `false`
103
- * `--ignore_link_refs` (default `true`): link references in documents don't usually need extracting e.g. `<a href="https://example.com/link.html" title="something">Bad Actor</a>` you would only want `Bad actor` to be considered for extraction. Hence these part of the link are ignored by default (they are removed from text sent to extraction). Note, only the `a href` is ignored, all other values e.g. `title` are considered. Setting this to `false` will also include everything inside the link tag (e.g. `example.com` would extract as a domain)
107
+ * `--ignore_extraction_boundary` (boolean, default `false`, not compatible with AI extractions): in some cases the same string will create multiple extractions depending on extractions set (e.g. `https://www.google.com/file.txt` could create a url, url with file, domain, subdomain, and file). The default behaviour is for txt2stix to take the longest extraction and ignore everything else (e.g. only extract url with file, and ignore url, file, domain, subdomain, and file). If you want to override this behaviour and get all extractions in the output, set this flag to `true`.
108
+ * `--ignore_image_refs` (boolean, default `true`): images references in documents don't usually need extracting. e.g. `<img src="https://example.com/image.png" alt="something">` you would not want domain or file extractions extracting `example.com` and `image.png`. Hence these are ignored by default (they are removed from text sent to extraction). Note, only the `img src` is ignored, all other values e.g. `alt` are considered. If you want extractions to consider this data, set it to `false`
109
+ * `--ignore_link_refs` (boolean, default `true`): link references in documents don't usually need extracting e.g. `<a href="https://example.com/link.html" title="something">Bad Actor</a>` you would only want `Bad actor` to be considered for extraction. Hence these part of the link are ignored by default (they are removed from text sent to extraction). Note, only the `a href` is ignored, all other values e.g. `title` are considered. Setting this to `false` will also include everything inside the link tag (e.g. `example.com` would extract as a domain)
104
110
 
105
111
  #### AI settings
106
112
 
107
113
  If any AI extractions, or AI relationship mode is set, you must set the following accordingly
108
114
 
109
- * `--ai_settings_extractions`:
115
+ * `--ai_settings_extractions` (`model:provider`, required if one or more AI extractions set):
110
116
  * defines the `provider:model` to be used for extractions. You can supply more than one provider. Seperate with a space (e.g. `openrouter:openai/gpt-4o` `openrouter:deepseek/deepseek-chat`) If more than one provider passed, txt2stix will take extractions from all models, de-dupelicate them, and them package them in the output. Currently supports:
111
117
  * Provider (env var required `OPENROUTER_API_KEY`): `openrouter:`, providers/models `openai/gpt-4o`, `deepseek/deepseek-chat` ([More here](https://openrouter.ai/models))
112
118
  * Provider (env var required `OPENAI_API_KEY`): `openai:`, models e.g.: `gpt-4o`, `gpt-4o-mini`, `gpt-4-turbo`, `gpt-4` ([More here](https://platform.openai.com/docs/models))
@@ -114,11 +120,15 @@ If any AI extractions, or AI relationship mode is set, you must set the followin
114
120
  * Provider (env var required `GOOGLE_API_KEY`): `gemini:models/`, models: `gemini-1.5-pro-latest`, `gemini-1.5-flash-latest` ([More here](https://ai.google.dev/gemini-api/docs/models/gemini))
115
121
  * Provider (env var required `DEEPSEEK_API_KEY`): `deepseek:`, models `deepseek-chat` ([More here](https://api-docs.deepseek.com/quick_start/pricing))
116
122
  * See `tests/manual-tests/cases-ai-extraction-type.md` for some examples
117
- * `--ai_settings_relationships`:
123
+ * `--ai_settings_relationships` (`model:provider`, required if AI relationship mode set):
118
124
  * similar to `ai_settings_extractions` but defines the model used to generate relationships. Only one model can be provided. Passed in same format as `ai_settings_extractions`
119
125
  * See `tests/manual-tests/cases-ai-relationships.md` for some examples
120
- * `--ai_content_check_provider`: Passing this flag will get the AI to try and classify the text in the input to 1) determine if it is talking about threat intelligence, and 2) what type of threat intelligence it is talking about. For context, we use this to filter out non-threat intel posts in Obstracts and Stixify. You pass `provider:model` with this flag to determine the AI model you wish to use to perform the check.
121
- * `--ai_create_attack_flow`: passing this flag will also prompt the AI model (the same entered for `--ai_settings_relationships`) to generate an [Attack Flow](https://center-for-threat-informed-defense.github.io/attack-flow/) for the MITRE ATT&CK extractions to define the logical order in which they are being described. You must pass `--ai_settings_relationships` for this to work.
126
+
127
+ #### Other AI related settings
128
+
129
+ * `--ai_content_check_provider` (`model:provider`, required if passed): Passing this flag will get the AI to try and classify the text in the input to 1) determine if it is talking about threat intelligence, and 2) what type of threat intelligence it is talking about. For context, we use this to filter out non-threat intel posts in Obstracts and Stixify. You pass `provider:model` with this flag to determine the AI model you wish to use to perform the check. It will also create a summary of the content passed (and store this into a STIX Note).
130
+ * `--ai_extract_if_no_incidence` (boolean, default `true`) if content check decides the report is not related to cyber security intelligence (e.g. vendor marketing), then you can use this setting to decide wether or not script should proceed. Setting to `false` will stop processing. It is designed to save AI tokens processing unknown content at scale in an automated way.
131
+ * `--ai_create_attack_flow` (boolean): passing this flag will also prompt the AI model (the same entered for `--ai_settings_relationships`) to generate an [Attack Flow](https://center-for-threat-informed-defense.github.io/attack-flow/) for the MITRE ATT&CK extractions to define the logical order in which they are being described. You must pass `--ai_settings_relationships` for this to work.
122
132
 
123
133
  ## Adding new extractions
124
134
 
@@ -4,13 +4,9 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "txt2stix"
7
- version = "1.0.1-1"
8
- authors = [
9
- { name = "dogesec" }
10
- ]
11
- maintainers = [
12
- { name = "dogesec" }
13
- ]
7
+ version = "1.0.1-3"
8
+ authors = [{ name = "dogesec" }]
9
+ maintainers = [{ name = "dogesec" }]
14
10
  description = "txt2stix is a Python script that is designed to identify and extract IoCs and TTPs from text files, identify the relationships between them, convert them to STIX 2.1 objects, and output as a STIX 2.1 bundle."
15
11
  readme = "README.md"
16
12
  requires-python = ">=3.9"
@@ -21,7 +17,6 @@ classifiers = [
21
17
  ]
22
18
 
23
19
 
24
-
25
20
  dependencies = [
26
21
  "pathvalidate>=3.2.0",
27
22
  "phonenumbers>=8.13.39",
@@ -55,15 +50,8 @@ stix2arango = "txt2stix.txt2stix:main"
55
50
  "includes" = "txt2stix/includes"
56
51
 
57
52
  [project.optional-dependencies]
58
- full = [
59
- 'llama-index-llms-anthropic>=0.7.2',
60
- 'llama-index-llms-gemini>=0.5.0',
61
- 'llama-index-llms-deepseek>=0.1.2',
62
- 'llama-index-llms-openrouter>=0.3.2',
63
- ]
64
- tests = [
65
- "pytest",
66
- "requests",
67
- "pytest-subtests",
68
- "pytest-cov",
69
- ]
53
+ anthropic = ['llama-index-llms-anthropic>=0.7.2']
54
+ gemini = ['llama-index-llms-gemini>=0.5.0']
55
+ deepseek = ['llama-index-llms-deepseek>=0.1.2']
56
+ openrouter = ['llama-index-llms-openrouter>=0.3.2']
57
+ tests = ["pytest", "requests", "pytest-subtests", "pytest-cov", "python-dateutil"]
@@ -0,0 +1 @@
1
+ this is not security content
@@ -417,6 +417,22 @@ python3 txt2stix.py \
417
417
  --report_id 4fa18f2d-278b-4fd4-8470-62a8807d35ad
418
418
  ```
419
419
 
420
+ The following should not be passed to AI (not security content)
421
+
422
+ ```shell
423
+ python3 txt2stix.py \
424
+ --relationship_mode standard \
425
+ --input_file tests/data/manually_generated_reports/not_security_content.txt \
426
+ --name 'Test AI Content check failure' \
427
+ --tlp_level clear \
428
+ --confidence 100 \
429
+ --use_extractions ai_ipv4_address_only \
430
+ --ai_settings_extractions openai:gpt-4o \
431
+ --ai_content_check_provider openai:gpt-4o \
432
+ --ai_extract_if_no_incidence false \
433
+ --report_id ed6039d6-699c-44f0-9bf0-957d4d0ff99f
434
+ ```
435
+
420
436
  ### attack flow demo
421
437
 
422
438
  no indicators
@@ -405,7 +405,7 @@ def test_build_observables(value, extractor_name, expected_objects, expected_rel
405
405
 
406
406
  @pytest.mark.parametrize(
407
407
  "extractor_name",
408
- {v.test_cases: k for k, v in all_extractors.items()}.values(),
408
+ {v.test_cases: k for k, v in all_extractors.items() if v.test_cases != 'ai_country'}.values(),
409
409
  )
410
410
  def test_build_observables_with_extractor_cases__positive(extractor_name, subtests):
411
411
  extractor = all_extractors[extractor_name]
@@ -430,7 +430,7 @@ def test_build_observables_with_extractor_cases__positive(extractor_name, subtes
430
430
  v.test_cases: k
431
431
  for k, v in all_extractors.items()
432
432
  if (
433
- not v.test_cases.startswith("generic_bank")
433
+ not v.test_cases.startswith("generic_bank") and not v.test_cases.startswith("lookup_")
434
434
  and v.stix_mapping
435
435
  not in [
436
436
  "url",
@@ -276,12 +276,11 @@ def test_main_func():
276
276
 
277
277
 
278
278
  def test_setLogFile():
279
- tmp = tempfile.NamedTemporaryFile(prefix='setlogfile', delete_on_close=False)
279
+ tmp = tempfile.NamedTemporaryFile(prefix='setlogfile')
280
280
  p = Path(tmp.name)
281
281
  logger = newLogger("txt2stix")
282
282
  setLogFile(logger, p)
283
- tmp.flush()
284
- tmp.close()
283
+ assert p.exists(), "log file should be created"
285
284
 
286
285
  def named_ai_extractor_mock(name, retval):
287
286
  m = MagicMock()
@@ -43,7 +43,7 @@ def test_content_check_param(mock_validate_token_count, subtests):
43
43
  incident_classifications = ["Class 1", "Class 2", "class 3"]
44
44
 
45
45
  with (
46
- subtests.test("check_content", describes_incident=False),
46
+ subtests.test("check_content", ai_extract_if_no_incidence=False, describes_incident=False),
47
47
  mock.patch(
48
48
  "txt2stix.ai_extractor.base.BaseAIExtractor.check_content"
49
49
  ) as mock_check_content,
@@ -56,6 +56,7 @@ def test_content_check_param(mock_validate_token_count, subtests):
56
56
  preprocessed_text,
57
57
  mock_extractors_map,
58
58
  ai_content_check_provider=parse_model(TEST_AI_MODEL),
59
+ ai_extract_if_no_incidence=False,
59
60
  )
60
61
  assert data.content_check.describes_incident == False
61
62
  assert (
@@ -67,7 +68,7 @@ def test_content_check_param(mock_validate_token_count, subtests):
67
68
  mock_validate_token_count.reset_mock()
68
69
 
69
70
  with (
70
- subtests.test("check_content", describes_incident=False, always_extract=True),
71
+ subtests.test("check_content", describes_incident=False, ai_extract_if_no_incidence=True),
71
72
  mock.patch(
72
73
  "txt2stix.ai_extractor.base.BaseAIExtractor.check_content"
73
74
  ) as mock_check_content,
@@ -84,12 +85,12 @@ def test_content_check_param(mock_validate_token_count, subtests):
84
85
  preprocessed_text,
85
86
  mock_extractors_map,
86
87
  ai_content_check_provider=parse_model(TEST_AI_MODEL),
87
- always_extract=True,
88
+ ai_extract_if_no_incidence=True,
88
89
  )
89
90
  assert data.content_check.describes_incident == False
90
91
  assert (
91
92
  data.extractions
92
- ), "extraction should happen when check_content.describes_incident is False but always_extract is True"
93
+ ), "extraction should happen when check_content.describes_incident is False but ai_extract_if_no_incidence is True"
93
94
  mock_check_content.assert_called_once()
94
95
  mock_validate_token_count.assert_called_once()
95
96
  mock_bundle__add_summary.assert_called_once_with("The summary", parse_model(TEST_AI_MODEL).extractor_name)
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import warnings
2
3
 
3
4
  import dotenv
4
5
 
@@ -12,4 +13,4 @@ for path in ["openai", "anthropic", "gemini", "deepseek", "openrouter"]:
12
13
  try:
13
14
  __import__(__package__ + "." + path)
14
15
  except Exception as e:
15
- logging.warning("%s not supported, please install missing modules", path, exc_info=True)
16
+ pass
@@ -31,8 +31,9 @@ class Extractor(NamedDict):
31
31
  self.extraction_key = key
32
32
  self.slug = key
33
33
  test_cases = test_cases or dict()
34
- self.prompt_negative_examples = test_cases.get('test_negative_examples') or []
35
- self.prompt_positive_examples = test_cases.get('test_positive_examples') or []
34
+
35
+ self.prompt_negative_examples = remove_empty(test_cases.get('test_negative_examples') or [])
36
+ self.prompt_positive_examples = remove_empty(test_cases.get('test_positive_examples') or [])
36
37
  if self.file and not Path(self.file).is_absolute() and include_path:
37
38
  self.file = Path(include_path) / self.file
38
39
 
@@ -44,6 +45,9 @@ class Extractor(NamedDict):
44
45
  for line in file.read_text().splitlines():
45
46
  self.lookups.add(line.strip())
46
47
 
48
+ def remove_empty(iterable: list):
49
+ return [it for it in iterable if it]
50
+
47
51
  def parse_extraction_config(include_path: Path):
48
52
  config = {}
49
53
  test_cases = load_test_cases_config(include_path)