strix-agent 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. strix/__init__.py +0 -0
  2. strix/agents/StrixAgent/__init__.py +4 -0
  3. strix/agents/StrixAgent/strix_agent.py +89 -0
  4. strix/agents/StrixAgent/system_prompt.jinja +404 -0
  5. strix/agents/__init__.py +10 -0
  6. strix/agents/base_agent.py +518 -0
  7. strix/agents/state.py +163 -0
  8. strix/interface/__init__.py +4 -0
  9. strix/interface/assets/tui_styles.tcss +694 -0
  10. strix/interface/cli.py +230 -0
  11. strix/interface/main.py +500 -0
  12. strix/interface/tool_components/__init__.py +39 -0
  13. strix/interface/tool_components/agents_graph_renderer.py +123 -0
  14. strix/interface/tool_components/base_renderer.py +62 -0
  15. strix/interface/tool_components/browser_renderer.py +120 -0
  16. strix/interface/tool_components/file_edit_renderer.py +99 -0
  17. strix/interface/tool_components/finish_renderer.py +31 -0
  18. strix/interface/tool_components/notes_renderer.py +108 -0
  19. strix/interface/tool_components/proxy_renderer.py +255 -0
  20. strix/interface/tool_components/python_renderer.py +34 -0
  21. strix/interface/tool_components/registry.py +72 -0
  22. strix/interface/tool_components/reporting_renderer.py +53 -0
  23. strix/interface/tool_components/scan_info_renderer.py +64 -0
  24. strix/interface/tool_components/terminal_renderer.py +131 -0
  25. strix/interface/tool_components/thinking_renderer.py +29 -0
  26. strix/interface/tool_components/user_message_renderer.py +43 -0
  27. strix/interface/tool_components/web_search_renderer.py +28 -0
  28. strix/interface/tui.py +1274 -0
  29. strix/interface/utils.py +559 -0
  30. strix/llm/__init__.py +15 -0
  31. strix/llm/config.py +20 -0
  32. strix/llm/llm.py +465 -0
  33. strix/llm/memory_compressor.py +212 -0
  34. strix/llm/request_queue.py +87 -0
  35. strix/llm/utils.py +87 -0
  36. strix/prompts/README.md +64 -0
  37. strix/prompts/__init__.py +109 -0
  38. strix/prompts/cloud/.gitkeep +0 -0
  39. strix/prompts/coordination/root_agent.jinja +41 -0
  40. strix/prompts/custom/.gitkeep +0 -0
  41. strix/prompts/frameworks/fastapi.jinja +142 -0
  42. strix/prompts/frameworks/nextjs.jinja +126 -0
  43. strix/prompts/protocols/graphql.jinja +215 -0
  44. strix/prompts/reconnaissance/.gitkeep +0 -0
  45. strix/prompts/technologies/firebase_firestore.jinja +177 -0
  46. strix/prompts/technologies/supabase.jinja +189 -0
  47. strix/prompts/vulnerabilities/authentication_jwt.jinja +147 -0
  48. strix/prompts/vulnerabilities/broken_function_level_authorization.jinja +146 -0
  49. strix/prompts/vulnerabilities/business_logic.jinja +171 -0
  50. strix/prompts/vulnerabilities/csrf.jinja +174 -0
  51. strix/prompts/vulnerabilities/idor.jinja +195 -0
  52. strix/prompts/vulnerabilities/information_disclosure.jinja +222 -0
  53. strix/prompts/vulnerabilities/insecure_file_uploads.jinja +188 -0
  54. strix/prompts/vulnerabilities/mass_assignment.jinja +141 -0
  55. strix/prompts/vulnerabilities/open_redirect.jinja +177 -0
  56. strix/prompts/vulnerabilities/path_traversal_lfi_rfi.jinja +142 -0
  57. strix/prompts/vulnerabilities/race_conditions.jinja +164 -0
  58. strix/prompts/vulnerabilities/rce.jinja +154 -0
  59. strix/prompts/vulnerabilities/sql_injection.jinja +151 -0
  60. strix/prompts/vulnerabilities/ssrf.jinja +135 -0
  61. strix/prompts/vulnerabilities/subdomain_takeover.jinja +155 -0
  62. strix/prompts/vulnerabilities/xss.jinja +169 -0
  63. strix/prompts/vulnerabilities/xxe.jinja +184 -0
  64. strix/runtime/__init__.py +19 -0
  65. strix/runtime/docker_runtime.py +399 -0
  66. strix/runtime/runtime.py +29 -0
  67. strix/runtime/tool_server.py +205 -0
  68. strix/telemetry/__init__.py +4 -0
  69. strix/telemetry/tracer.py +337 -0
  70. strix/tools/__init__.py +64 -0
  71. strix/tools/agents_graph/__init__.py +16 -0
  72. strix/tools/agents_graph/agents_graph_actions.py +621 -0
  73. strix/tools/agents_graph/agents_graph_actions_schema.xml +226 -0
  74. strix/tools/argument_parser.py +121 -0
  75. strix/tools/browser/__init__.py +4 -0
  76. strix/tools/browser/browser_actions.py +236 -0
  77. strix/tools/browser/browser_actions_schema.xml +183 -0
  78. strix/tools/browser/browser_instance.py +533 -0
  79. strix/tools/browser/tab_manager.py +342 -0
  80. strix/tools/executor.py +305 -0
  81. strix/tools/file_edit/__init__.py +4 -0
  82. strix/tools/file_edit/file_edit_actions.py +141 -0
  83. strix/tools/file_edit/file_edit_actions_schema.xml +128 -0
  84. strix/tools/finish/__init__.py +4 -0
  85. strix/tools/finish/finish_actions.py +174 -0
  86. strix/tools/finish/finish_actions_schema.xml +45 -0
  87. strix/tools/notes/__init__.py +14 -0
  88. strix/tools/notes/notes_actions.py +191 -0
  89. strix/tools/notes/notes_actions_schema.xml +150 -0
  90. strix/tools/proxy/__init__.py +20 -0
  91. strix/tools/proxy/proxy_actions.py +101 -0
  92. strix/tools/proxy/proxy_actions_schema.xml +267 -0
  93. strix/tools/proxy/proxy_manager.py +785 -0
  94. strix/tools/python/__init__.py +4 -0
  95. strix/tools/python/python_actions.py +47 -0
  96. strix/tools/python/python_actions_schema.xml +131 -0
  97. strix/tools/python/python_instance.py +172 -0
  98. strix/tools/python/python_manager.py +131 -0
  99. strix/tools/registry.py +196 -0
  100. strix/tools/reporting/__init__.py +6 -0
  101. strix/tools/reporting/reporting_actions.py +63 -0
  102. strix/tools/reporting/reporting_actions_schema.xml +30 -0
  103. strix/tools/terminal/__init__.py +4 -0
  104. strix/tools/terminal/terminal_actions.py +35 -0
  105. strix/tools/terminal/terminal_actions_schema.xml +146 -0
  106. strix/tools/terminal/terminal_manager.py +151 -0
  107. strix/tools/terminal/terminal_session.py +447 -0
  108. strix/tools/thinking/__init__.py +4 -0
  109. strix/tools/thinking/thinking_actions.py +18 -0
  110. strix/tools/thinking/thinking_actions_schema.xml +52 -0
  111. strix/tools/web_search/__init__.py +4 -0
  112. strix/tools/web_search/web_search_actions.py +80 -0
  113. strix/tools/web_search/web_search_actions_schema.xml +83 -0
  114. strix_agent-0.4.0.dist-info/LICENSE +201 -0
  115. strix_agent-0.4.0.dist-info/METADATA +282 -0
  116. strix_agent-0.4.0.dist-info/RECORD +118 -0
  117. strix_agent-0.4.0.dist-info/WHEEL +4 -0
  118. strix_agent-0.4.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,195 @@
1
+ <idor_vulnerability_guide>
2
+ <title>INSECURE DIRECT OBJECT REFERENCE (IDOR)</title>
3
+
4
+ <critical>Object- and function-level authorization failures (BOLA/IDOR) routinely lead to cross-account data exposure and unauthorized state changes across APIs, web, mobile, and microservices. Treat every object reference as untrusted until proven bound to the caller.</critical>
5
+
6
+ <scope>
7
+ - Horizontal access: access another subject's objects of the same type
8
+ - Vertical access: access privileged objects/actions (admin-only, staff-only)
9
+ - Cross-tenant access: break isolation boundaries in multi-tenant systems
10
+ - Cross-service access: token or context accepted by the wrong service
11
+ </scope>
12
+
13
+ <methodology>
14
+ 1. Build a Subject × Object × Action matrix (who can do what to which resource).
15
+ 2. For each resource type, obtain at least two principals: owner and non-owner (plus admin/staff if applicable). Capture at least one valid object ID per principal.
16
+ 3. Exercise every action (R/W/D/Export) while swapping IDs, tokens, tenants, and channels (web, mobile, API, GraphQL, WebSocket, gRPC).
17
+ 4. Track consistency: the same rule must hold regardless of transport, content-type, serialization, or gateway.
18
+ </methodology>
19
+
20
+ <discovery_techniques>
21
+ <parameter_analysis>
22
+ - Object references appear in: paths, query params, JSON bodies, form-data, headers, cookies, JWT claims, GraphQL arguments, WebSocket messages, gRPC messages
23
+ - Identifier forms: integers, UUID/ULID/CUID, Snowflake, slugs, composite keys (e.g., {orgId}:{userId}), opaque tokens, base64/hex-encoded blobs
24
+ - Relationship references: parentId, ownerId, accountId, tenantId, organization, teamId, projectId, subscriptionId
25
+ - Expansion/projection knobs: fields, include, expand, projection, with, select, populate (often bypass authorization in resolvers or serializers)
26
+ - Pagination/cursors: page[offset], page[limit], cursor, nextPageToken (often reveal or accept cross-tenant/state)
27
+ </parameter_analysis>
28
+
29
+ <advanced_enumeration>
30
+ - Alternate types: {% raw %}{"id":123}{% endraw} vs {% raw %}{"id":"123"}{% endraw}, arrays vs scalars, objects vs scalars, null/empty/0/-1/MAX_INT, scientific notation, overflows, unknown attributes retained by backend
31
+ - Duplicate keys/parameter pollution: id=1&id=2, JSON duplicate keys {% raw %}{"id":1,"id":2}{% endraw} (parser precedence differences)
32
+ - Case/aliasing: userId vs userid vs USER_ID; alt names like resourceId, targetId, account
33
+ - Path traversal-like in virtual file systems: /files/user_123/../../user_456/report.csv
34
+ - Directory/list endpoints as seeders: search/list/suggest/export often leak object IDs for secondary exploitation
35
+ </advanced_enumeration>
36
+ </discovery_techniques>
37
+
38
+ <high_value_targets>
39
+ - Exports/backups/reporting endpoints (CSV/PDF/ZIP)
40
+ - Messaging/mailbox/notifications, audit logs, activity feeds
41
+ - Billing: invoices, payment methods, transactions, credits
42
+ - Healthcare/education records, HR documents, PII/PHI/PCI
43
+ - Admin/staff tools, impersonation/session management
44
+ - File/object storage keys (S3/GCS signed URLs, share links)
45
+ - Background jobs: import/export job IDs, task results
46
+ - Multi-tenant resources: organizations, workspaces, projects
47
+ </high_value_targets>
48
+
49
+ <exploitation_techniques>
50
+ <horizontal_vertical>
51
+ - Swap object IDs between principals using the same token to probe horizontal access; then repeat with lower-privilege tokens to probe vertical access
52
+ - Target partial updates (PATCH, JSON Patch/JSON Merge Patch) for silent unauthorized modifications
53
+ </horizontal_vertical>
54
+
55
+ <bulk_and_batch>
56
+ - Batch endpoints (bulk update/delete) often validate only the first element; include cross-tenant IDs mid-array
57
+ - CSV/JSON imports referencing foreign object IDs (ownerId, orgId) may bypass create-time checks
58
+ </bulk_and_batch>
59
+
60
+ <secondary_idor>
61
+ - Use list/search endpoints, notifications, emails, webhooks, and client logs to collect valid IDs, then fetch or mutate those objects directly
62
+ - Pagination/cursor manipulation to skip filters and pull other users' pages
63
+ </secondary_idor>
64
+
65
+ <job_task_objects>
66
+ - Access job/task IDs from one user to retrieve results for another (export/{jobId}/download, reports/{taskId})
67
+ - Cancel/approve someone else's jobs by referencing their task IDs
68
+ </job_task_objects>
69
+
70
+ <file_object_storage>
71
+ - Direct object paths or weakly scoped signed URLs; attempt key prefix changes, content-disposition tricks, or stale signatures reused across tenants
72
+ - Replace share tokens with tokens from other tenants; try case/URL-encoding variations
73
+ </file_object_storage>
74
+ </exploitation_techniques>
75
+
76
+ <advanced_techniques>
77
+ <graphql>
78
+ - Enforce resolver-level checks: do not rely on a top-level gate. Verify field and edge resolvers bind the resource to the caller on every hop
79
+ - Abuse batching/aliases to retrieve multiple users' nodes in one request and compare responses
80
+ - Global node patterns (Relay): decode base64 IDs and swap raw IDs; test {% raw %}node(id: "...base64..."){...}{% endraw %}
81
+ - Overfetching via fragments on privileged types; verify hidden fields cannot be queried by unprivileged callers
82
+ - Example:
83
+ {% raw %}
84
+ query IDOR {
85
+ me { id }
86
+ u1: user(id: "VXNlcjo0NTY=") { email billing { last4 } }
87
+ u2: node(id: "VXNlcjo0NTc=") { ... on User { email } }
88
+ }
89
+ {% endraw %}
90
+ </graphql>
91
+
92
+ <microservices_gateways>
93
+ - Token confusion: a token scoped for Service A accepted by Service B due to shared JWT verification but missing audience/claims checks
94
+ - Trust on headers: reverse proxies or API gateways injecting/trusting headers like X-User-Id, X-Organization-Id; try overriding or removing them
95
+ - Context loss: async consumers (queues, workers) re-process requests without re-checking authorization
96
+ </microservices_gateways>
97
+
98
+ <multi_tenant>
99
+ - Probe tenant scoping through headers, subdomains, and path params (e.g., X-Tenant-ID, org slug). Try mixing org of token with resource from another org
100
+ - Test cross-tenant reports/analytics rollups and admin views which aggregate multiple tenants
101
+ </multi_tenant>
102
+
103
+ <uuid_and_opaque_ids>
104
+ - UUID/ULID are not authorization: acquire valid IDs from logs, exports, JS bundles, analytics endpoints, emails, or public activity, then test ownership binding
105
+ - Time-based IDs (UUIDv1, ULID) may be guessable within a window; combine with leakage sources for targeted access
106
+ </uuid_and_opaque_ids>
107
+
108
+ <blind_channels>
109
+ - Use differential responses (status, size, ETag, timing) to detect existence; error shape often differs for owned vs foreign objects
110
+ - HEAD/OPTIONS, conditional requests (If-None-Match/If-Modified-Since) can confirm existence without full content
111
+ </blind_channels>
112
+ </advanced_techniques>
113
+
114
+ <bypass_techniques>
115
+ <parser_and_transport>
116
+ - Content-type switching: application/json ↔ application/x-www-form-urlencoded ↔ multipart/form-data; some paths enforce checks per parser
117
+ - Method tunneling: X-HTTP-Method-Override, _method=PATCH; or using GET on endpoints incorrectly accepting state changes
118
+ - JSON duplicate keys/array injection to bypass naive validators
119
+ </parser_and_transport>
120
+
121
+ <parameter_pollution>
122
+ - Duplicate parameters in query/body to influence server-side precedence (id=123&id=456); try both orderings
123
+ - Mix case/alias param names so gateway and backend disagree (userId vs userid)
124
+ </parameter_pollution>
125
+
126
+ <cache_and_gateway>
127
+ - CDN/proxy key confusion: responses keyed without Authorization or tenant headers expose cached objects to other users; manipulate Vary and Accept
128
+ - Redirect chains and 304/206 behaviors can leak content across tenants
129
+ </cache_and_gateway>
130
+
131
+ <race_windows>
132
+ - Time-of-check vs time-of-use: change the referenced ID between validation and execution using parallel requests
133
+ </race_windows>
134
+ </bypass_techniques>
135
+
136
+ <special_contexts>
137
+ <websocket>
138
+ - Authorization per-subscription: ensure channel/topic names cannot be guessed (user_{id}, org_{id}); subscribe/publish checks must run server-side, not only at handshake
139
+ - Try sending messages with target user IDs after subscribing to own channels
140
+ </websocket>
141
+
142
+ <grpc>
143
+ - Direct protobuf fields (owner_id, tenant_id) often bypass HTTP-layer middleware; validate references via grpcurl with tokens from different principals
144
+ </grpc>
145
+
146
+ <integrations>
147
+ - Webhooks/callbacks referencing foreign objects (e.g., invoice_id) processed without verifying ownership
148
+ - Third-party importers syncing data into wrong tenant due to missing tenant binding
149
+ </integrations>
150
+ </special_contexts>
151
+
152
+ <chaining_attacks>
153
+ - IDOR + CSRF: force victims to trigger unauthorized changes on objects you discovered
154
+ - IDOR + Stored XSS: pivot into other users' sessions through data you gained access to
155
+ - IDOR + SSRF: exfiltrate internal IDs, then access their corresponding resources
156
+ - IDOR + Race: bypass spot checks with simultaneous requests
157
+ </chaining_attacks>
158
+
159
+ <validation>
160
+ 1. Demonstrate access to an object not owned by the caller (content or metadata).
161
+ 2. Show the same request fails with appropriately enforced authorization when corrected.
162
+ 3. Prove cross-channel consistency: same unauthorized access via at least two transports (e.g., REST and GraphQL).
163
+ 4. Document tenant boundary violations (if applicable).
164
+ 5. Provide reproducible steps and evidence (requests/responses for owner vs non-owner).
165
+ </validation>
166
+
167
+ <false_positives>
168
+ - Public/anonymous resources by design
169
+ - Soft-privatized data where content is already public
170
+ - Idempotent metadata lookups that do not reveal sensitive content
171
+ - Correct row-level checks enforced across all channels
172
+ </false_positives>
173
+
174
+ <impact>
175
+ - Cross-account data exposure (PII/PHI/PCI)
176
+ - Unauthorized state changes (transfers, role changes, cancellations)
177
+ - Cross-tenant data leaks violating contractual and regulatory boundaries
178
+ - Regulatory risk (GDPR/HIPAA/PCI), fraud, reputational damage
179
+ </impact>
180
+
181
+ <pro_tips>
182
+ 1. Always test list/search/export endpoints first; they are rich ID seeders.
183
+ 2. Build a reusable ID corpus from logs, notifications, emails, and client bundles.
184
+ 3. Toggle content-types and transports; authorization middleware often differs per stack.
185
+ 4. In GraphQL, validate at resolver boundaries; never trust parent auth to cover children.
186
+ 5. In multi-tenant apps, vary org headers, subdomains, and path params independently.
187
+ 6. Check batch/bulk operations and background job endpoints; they frequently skip per-item checks.
188
+ 7. Inspect gateways for header trust and cache key configuration.
189
+ 8. Treat UUIDs as untrusted; obtain them via OSINT/leaks and test binding.
190
+ 9. Use timing/size/ETag differentials for blind confirmation when content is masked.
191
+ 10. Prove impact with precise before/after diffs and role-separated evidence.
192
+ </pro_tips>
193
+
194
+ <remember>Authorization must bind subject, action, and specific object on every request, regardless of identifier opacity or transport. If the binding is missing anywhere, the system is vulnerable.</remember>
195
+ </idor_vulnerability_guide>
@@ -0,0 +1,222 @@
1
+ <information_disclosure_vulnerability_guide>
2
+ <title>INFORMATION DISCLOSURE</title>
3
+
4
+ <critical>Information leaks accelerate exploitation by revealing code, configuration, identifiers, and trust boundaries. Treat every response byte, artifact, and header as potential intelligence. Minimize, normalize, and scope disclosure across all channels.</critical>
5
+
6
+ <scope>
7
+ - Errors and exception pages: stack traces, file paths, SQL, framework versions
8
+ - Debug/dev tooling reachable in prod: debuggers, profilers, feature flags
9
+ - DVCS/build artifacts and temp/backup files: .git, .svn, .hg, .bak, .swp, archives
10
+ - Configuration and secrets: .env, phpinfo, appsettings.json, Docker/K8s manifests
11
+ - API schemas and introspection: OpenAPI/Swagger, GraphQL introspection, gRPC reflection
12
+ - Client bundles and source maps: webpack/Vite maps, embedded env, __NEXT_DATA__, static JSON
13
+ - Headers and response metadata: Server/X-Powered-By, tracing, ETag, Accept-Ranges, Server-Timing
14
+ - Storage/export surfaces: public buckets, signed URLs, export/download endpoints
15
+ - Observability/admin: /metrics, /actuator, /health, tracing UIs (Jaeger, Zipkin), Kibana, Admin UIs
16
+ - Directory listings and indexing: autoindex, sitemap/robots revealing hidden routes
17
+ - Cross-origin signals: CORS misconfig, Referrer-Policy leakage, Expose-Headers
18
+ - File/document metadata: EXIF, PDF/Office properties
19
+ </scope>
20
+
21
+ <methodology>
22
+ 1. Build a channel map: Web, API, GraphQL, WebSocket, gRPC, mobile, background jobs, exports, CDN.
23
+ 2. Establish a diff harness: compare owner vs non-owner vs anonymous across transports; normalize on status/body length/ETag/headers.
24
+ 3. Trigger controlled failures: send malformed types, boundary values, missing params, and alternate content-types to elicit error detail and stack traces.
25
+ 4. Enumerate artifacts: DVCS folders, backups, config endpoints, source maps, client bundles, API docs, observability routes.
26
+ 5. Correlate disclosures to impact: versions→CVE, paths→LFI/RCE, keys→cloud access, schemas→auth bypass, IDs→IDOR.
27
+ </methodology>
28
+
29
+ <surfaces>
30
+ <errors_and_exceptions>
31
+ - SQL/ORM errors: reveal table/column names, DBMS, query fragments
32
+ - Stack traces: absolute paths, class/method names, framework versions, developer emails
33
+ - Template engine probes: {% raw %}{{7*7}}, ${7*7}{% endraw %} identify templating stack and code paths
34
+ - JSON/XML parsers: type mismatches and coercion logs leak internal model names
35
+ </errors_and_exceptions>
36
+
37
+ <debug_and_env_modes>
38
+ - Debug pages and flags: Django DEBUG, Laravel Telescope, Rails error pages, Flask/Werkzeug debugger, ASP.NET customErrors Off
39
+ - Profiler endpoints: /debug/pprof, /actuator, /_profiler, custom /debug APIs
40
+ - Feature/config toggles exposed in JS or headers; admin/staff banners in HTML
41
+ </debug_and_env_modes>
42
+
43
+ <dvcs_and_backups>
44
+ - DVCS: /.git/ (HEAD, config, index, objects), .svn/entries, .hg/store → reconstruct source and secrets
45
+ - Backups/temp: .bak/.old/~/.swp/.swo/.tmp/.orig, db dumps, zipped deployments under /backup/, /old/, /archive/
46
+ - Build artifacts: dist artifacts containing .map, env prints, internal URLs
47
+ </dvcs_and_backups>
48
+
49
+ <configs_and_secrets>
50
+ - Classic: web.config, appsettings.json, settings.py, config.php, phpinfo.php
51
+ - Containers/cloud: Dockerfile, docker-compose.yml, Kubernetes manifests, service account tokens, cloud credentials files
52
+ - Credentials and connection strings; internal hosts and ports; JWT secrets
53
+ </configs_and_secrets>
54
+
55
+ <api_schemas_and_introspection>
56
+ - OpenAPI/Swagger: /swagger, /api-docs, /openapi.json — enumerate hidden/privileged operations
57
+ - GraphQL: introspection enabled; field suggestions; error disclosure via invalid fields; persisted queries catalogs
58
+ - gRPC: server reflection exposing services/messages; proto download via reflection
59
+ </api_schemas_and_introspection>
60
+
61
+ <client_bundles_and_maps>
62
+ - Source maps (.map) reveal original sources, comments, and internal logic
63
+ - Client env leakage: NEXT_PUBLIC_/VITE_/REACT_APP_ variables; runtime config; embedded secrets accidentally shipped
64
+ - Next.js data: __NEXT_DATA__ and pre-fetched JSON under /_next/data can include internal IDs, flags, or PII
65
+ - Static JSON/CSV feeds used by the UI that bypass server-side auth filtering
66
+ </client_bundles_and_maps>
67
+
68
+ <headers_and_response_metadata>
69
+ - Fingerprinting: Server, X-Powered-By, X-AspNet-Version
70
+ - Tracing: X-Request-Id, traceparent, Server-Timing, debug headers
71
+ - Caching oracles: ETag/If-None-Match, Last-Modified/If-Modified-Since, Accept-Ranges/Range (partial content reveals)
72
+ - Content sniffing and MIME metadata that implies backend components
73
+ </headers_and_response_metadata>
74
+
75
+ <storage_and_exports>
76
+ - Public object storage: S3/GCS/Azure blobs with world-readable ACLs or guessable keys
77
+ - Signed URLs: long-lived, weakly scoped, re-usable across tenants; metadata leaks in headers
78
+ - Export/report endpoints returning foreign data sets or unfiltered fields
79
+ </storage_and_exports>
80
+
81
+ <observability_and_admin>
82
+ - Metrics: Prometheus /metrics exposing internal hostnames, process args, SQL, credentials by mistake
83
+ - Health/config: /actuator/health, /actuator/env, Spring Boot info endpoints
84
+ - Tracing UIs and dashboards: Jaeger/Zipkin/Kibana/Grafana exposed without auth
85
+ </observability_and_admin>
86
+
87
+ <directory_and_indexing>
88
+ - Autoindex on /uploads/, /files/, /logs/, /tmp/, /assets/
89
+ - Robots/sitemap reveal hidden paths, admin panels, export feeds
90
+ </directory_and_indexing>
91
+
92
+ <cross_origin_signals>
93
+ - Referrer leakage: missing/referrer policy leading to path/query/token leaks to third parties
94
+ - CORS: overly permissive Access-Control-Allow-Origin/Expose-Headers revealing data cross-origin; preflight error shapes
95
+ </cross_origin_signals>
96
+
97
+ <file_metadata>
98
+ - EXIF, PDF/Office properties: authors, paths, software versions, timestamps, embedded objects
99
+ </file_metadata>
100
+ </surfaces>
101
+
102
+ <advanced_techniques>
103
+ <differential_oracles>
104
+ - Compare owner vs non-owner vs anonymous for the same resource and track: status, length, ETag, Last-Modified, Cache-Control
105
+ - HEAD vs GET: header-only differences can confirm existence or type without content
106
+ - Conditional requests: 304 vs 200 behaviors leak existence/state; binary search content size via Range requests
107
+ </differential_oracles>
108
+
109
+ <cdn_and_cache_keys>
110
+ - Identity-agnostic caches: CDN/proxy keys missing Authorization/tenant headers → cross-user cached responses
111
+ - Vary misconfiguration: user-agent/language vary without auth vary leaks alternate content
112
+ - 206 partial content + stale caches leak object fragments
113
+ </cdn_and_cache_keys>
114
+
115
+ <cross_channel_mirroring>
116
+ - Inconsistent hardening between REST, GraphQL, WebSocket, and gRPC; one channel leaks schema or fields hidden in others
117
+ - SSR vs CSR: server-rendered pages omit fields while JSON API includes them; compare responses
118
+ </cross_channel_mirroring>
119
+
120
+ <introspection_and_reflection>
121
+ - GraphQL: disabled introspection still leaks via errors, fragment suggestions, and client bundles containing schema
122
+ - gRPC reflection: list services/messages and infer internal resource names and flows
123
+ </introspection_and_reflection>
124
+
125
+ <cloud_specific>
126
+ - S3/GCS/Azure: anonymous listing disabled but object reads allowed; metadata headers leak owner/project identifiers
127
+ - Pre-signed URLs: audience not bound; observe key scope and lifetime in URL params
128
+ </cloud_specific>
129
+ </advanced_techniques>
130
+
131
+ <usefulness_assessment>
132
+ - Actionable signals:
133
+ - Secrets/keys/tokens that grant new access (DB creds, cloud keys, JWT signing/refresh, signed URL secrets)
134
+ - Versions with a reachable, unpatched CVE on an exposed path
135
+ - Cross-tenant identifiers/data or per-user fields that differ by principal
136
+ - File paths, service hosts, or internal URLs that enable LFI/SSRF/RCE pivots
137
+ - Cache/CDN differentials (Vary/ETag/Range) that expose other users' content
138
+ - Schema/introspection revealing hidden operations or fields that return sensitive data
139
+ - Likely benign or intended:
140
+ - Public docs or non-sensitive metadata explicitly documented as public
141
+ - Generic server names without precise versions or exploit path
142
+ - Redacted/sanitized fields with stable length/ETag across principals
143
+ - Per-user data visible only to the owner and consistent with privacy policy
144
+ </usefulness_assessment>
145
+
146
+ <triage_rubric>
147
+ - Critical: Credentials/keys; signed URL secrets; config dumps; unrestricted admin/observability panels
148
+ - High: Versions with reachable CVEs; cross-tenant data; caches serving cross-user content; schema enabling auth bypass
149
+ - Medium: Internal paths/hosts enabling LFI/SSRF pivots; source maps revealing hidden endpoints/IDs
150
+ - Low: Generic headers, marketing versions, intended documentation without exploit path
151
+ - Guidance: Always attempt a minimal, reversible proof for Critical/High; if no safe chain exists, document precise blocker and downgrade
152
+ </triage_rubric>
153
+
154
+ <escalation_playbook>
155
+ - If DVCS/backups/configs → extract secrets; test least-privileged read; rotate after coordinated disclosure
156
+ - If versions → map to CVE; verify exposure; execute minimal PoC under strict scope
157
+ - If schema/introspection → call hidden/privileged fields with non-owner tokens; confirm auth gaps
158
+ - If source maps/client JSON → mine endpoints/IDs/flags; pivot to IDOR/listing; validate filtering
159
+ - If cache/CDN keys → demonstrate cross-user cache leak via Vary/ETag/Range; escalate to broken access control
160
+ - If paths/hosts → target LFI/SSRF with harmless reads (e.g., /etc/hostname, metadata headers); avoid destructive actions
161
+ - If observability/admin → enumerate read-only info first; prove data scope breach; avoid write/exec operations
162
+ </escalation_playbook>
163
+
164
+ <exploitation_chains>
165
+ <credential_extraction>
166
+ - DVCS/config dumps exposing secrets (DB, SMTP, JWT, cloud)
167
+ - Keys → cloud control plane access; rotate and verify scope
168
+ </credential_extraction>
169
+
170
+ <version_to_cve>
171
+ 1. Derive precise component versions from headers/errors/bundles.
172
+ 2. Map to known CVEs and confirm reachability.
173
+ 3. Execute minimal proof targeting disclosed component.
174
+ </version_to_cve>
175
+
176
+ <path_disclosure_to_lfi>
177
+ 1. Paths from stack traces/templates reveal filesystem layout.
178
+ 2. Use LFI/traversal to fetch config/keys.
179
+ 3. Prove controlled access without altering state.
180
+ </path_disclosure_to_lfi>
181
+
182
+ <schema_to_auth_bypass>
183
+ 1. Schema reveals hidden fields/endpoints.
184
+ 2. Attempt requests with those fields; confirm missing authorization or field filtering.
185
+ </schema_to_auth_bypass>
186
+ </exploitation_chains>
187
+
188
+ <validation>
189
+ 1. Provide raw evidence (headers/body/artifact) and explain exact data revealed.
190
+ 2. Determine intent: cross-check docs/UX; classify per triage rubric (Critical/High/Medium/Low).
191
+ 3. Attempt minimal, reversible exploitation or present a concrete step-by-step chain (what to try next and why).
192
+ 4. Show reproducibility and minimal request set; include cross-channel confirmation where applicable.
193
+ 5. Bound scope (user, tenant, environment) and data sensitivity classification.
194
+ </validation>
195
+
196
+ <false_positives>
197
+ - Intentional public docs or non-sensitive metadata with no exploit path
198
+ - Generic errors with no actionable details
199
+ - Redacted fields that do not change differential oracles (length/ETag stable)
200
+ - Version banners with no exposed vulnerable surface and no chain
201
+ - Owner-visible-only details that do not cross identity/tenant boundaries
202
+ </false_positives>
203
+
204
+ <impact>
205
+ - Accelerated exploitation of RCE/LFI/SSRF via precise versions and paths
206
+ - Credential/secret exposure leading to persistent external compromise
207
+ - Cross-tenant data disclosure through exports, caches, or mis-scoped signed URLs
208
+ - Privacy/regulatory violations and business intelligence leakage
209
+ </impact>
210
+
211
+ <pro_tips>
212
+ 1. Start with artifacts (DVCS, backups, maps) before payloads; artifacts yield the fastest wins.
213
+ 2. Normalize responses and diff by digest to reduce noise when comparing roles.
214
+ 3. Hunt source maps and client data JSON; they often carry internal IDs and flags.
215
+ 4. Probe caches/CDNs for identity-unaware keys; verify Vary includes Authorization/tenant.
216
+ 5. Treat introspection and reflection as configuration findings across GraphQL/gRPC; validate per environment.
217
+ 6. Mine observability endpoints last; they are noisy but high-yield in misconfigured setups.
218
+ 7. Chain quickly to a concrete risk and stop—proof should be minimal and reversible.
219
+ </pro_tips>
220
+
221
+ <remember>Information disclosure is an amplifier. Convert leaks into precise, minimal exploits or clear architectural risks.</remember>
222
+ </information_disclosure_vulnerability_guide>
@@ -0,0 +1,188 @@
1
+ <insecure_file_uploads_guide>
2
+ <title>INSECURE FILE UPLOADS</title>
3
+
4
+ <critical>Upload surfaces are high risk: server-side execution (RCE), stored XSS, malware distribution, storage takeover, and DoS. Modern stacks mix direct-to-cloud uploads, background processors, and CDNs—authorization and validation must hold across every step.</critical>
5
+
6
+ <scope>
7
+ - Web/mobile/API uploads, direct-to-cloud (S3/GCS/Azure) presigned flows, resumable/multipart protocols (tus, S3 MPU)
8
+ - Image/document/media pipelines (ImageMagick/GraphicsMagick, Ghostscript, ExifTool, PDF engines, office converters)
9
+ - Admin/bulk importers, archive uploads (zip/tar), report/template uploads, rich text with attachments
10
+ - Serving paths: app directly, object storage, CDN, email attachments, previews/thumbnails
11
+ </scope>
12
+
13
+ <methodology>
14
+ 1. Map the pipeline: client → ingress (edge/app/gateway) → storage → processors (thumb, OCR, AV, CDR) → serving (app/storage/CDN). Note where validation and auth occur.
15
+ 2. Identify allowed types, size limits, filename rules, storage keys, and who serves the content. Collect baseline uploads per type and capture resulting URLs and headers.
16
+ 3. Exercise bypass families systematically: extension games, MIME/content-type, magic bytes, polyglots, metadata payloads, archive structure, chunk/finalize differentials.
17
+ 4. Validate execution and rendering: can uploaded content execute on server or client? Confirm with minimal PoCs and headers analysis.
18
+ </methodology>
19
+
20
+ <discovery_techniques>
21
+ <surface_map>
22
+ - Endpoints/fields: upload, file, avatar, image, attachment, import, media, document, template
23
+ - Direct-to-cloud params: key, bucket, acl, Content-Type, Content-Disposition, x-amz-meta-*, cache-control
24
+ - Resumable APIs: create/init → upload/chunk → complete/finalize; check if metadata/headers can be altered late
25
+ - Background processors: thumbnails, PDF→image, virus scan queues; identify timing and status transitions
26
+ </surface_map>
27
+
28
+ <capability_probes>
29
+ - Small probe files of each claimed type; diff resulting Content-Type, Content-Disposition, and X-Content-Type-Options on download
30
+ - Magic bytes vs extension: JPEG/GIF/PNG headers; mismatches reveal reliance on extension or MIME sniffing
31
+ - SVG/HTML probe: do they render inline (text/html or image/svg+xml) or download (attachment)?
32
+ - Archive probe: simple zip with nested path traversal entries and symlinks to detect extraction rules
33
+ </capability_probes>
34
+ </discovery_techniques>
35
+
36
+ <detection_channels>
37
+ <server_execution>
38
+ - Web shell execution (language dependent), config/handler uploads (.htaccess, .user.ini, web.config) enabling execution
39
+ - Interpreter-side template/script evaluation during conversion (ImageMagick/Ghostscript/ExifTool)
40
+ </server_execution>
41
+
42
+ <client_execution>
43
+ - Stored XSS via SVG/HTML/JS if served inline without correct headers; PDF JavaScript; office macros in previewers
44
+ </client_execution>
45
+
46
+ <header_and_render>
47
+ - Missing X-Content-Type-Options: nosniff enabling browser sniff to script
48
+ - Content-Type reflection from upload vs server-set; Content-Disposition: inline vs attachment
49
+ </header_and_render>
50
+
51
+ <process_side_effects>
52
+ - AV/CDR race or absence; background job status allows access before scan completes; password-protected archives bypass scanning
53
+ </process_side_effects>
54
+ </detection_channels>
55
+
56
+ <core_payloads>
57
+ <web_shells_and_configs>
58
+ - PHP: GIF polyglot (starts with GIF89a) followed by <?php echo 1; ?>; place where PHP is executed
59
+ - .htaccess to map extensions to code (AddType/AddHandler); .user.ini (auto_prepend/append_file) for PHP-FPM
60
+ - ASP/JSP equivalents where supported; IIS web.config to enable script execution
61
+ </web_shells_and_configs>
62
+
63
+ <stored_xss>
64
+ - SVG with onload/onerror handlers served as image/svg+xml or text/html
65
+ - HTML file with script when served as text/html or sniffed due to missing nosniff
66
+ </stored_xss>
67
+
68
+ <mime_magic_polyglots>
69
+ - Double extensions: avatar.jpg.php, report.pdf.html; mixed casing: .pHp, .PhAr
70
+ - Magic-byte spoofing: valid JPEG header then embedded script; verify server uses content inspection, not extensions alone
71
+ </mime_magic_polyglots>
72
+
73
+ <archive_attacks>
74
+ - Zip Slip: entries with ../../ to escape extraction dir; symlink-in-zip pointing outside target; nested zips
75
+ - Zip bomb: extreme compression ratios (e.g., 42.zip) to exhaust resources in processors
76
+ </archive_attacks>
77
+
78
+ <toolchain_exploits>
79
+ - ImageMagick/GraphicsMagick legacy vectors (policy.xml may mitigate): crafted SVG/PS/EPS invoking external commands or reading files
80
+ - Ghostscript in PDF/PS with file operators (%pipe%)
81
+ - ExifTool metadata parsing bugs; overly large or crafted EXIF/IPTC/XMP fields
82
+ </toolchain_exploits>
83
+
84
+ <cloud_storage_vectors>
85
+ - S3/GCS presigned uploads: attacker controls Content-Type/Disposition; set text/html or image/svg+xml and inline rendering
86
+ - Public-read ACL or permissive bucket policies expose uploads broadly; object key injection via user-controlled path prefixes
87
+ - Signed URL reuse and stale URLs; serving directly from bucket without attachment + nosniff headers
88
+ </cloud_storage_vectors>
89
+ </core_payloads>
90
+
91
+ <advanced_techniques>
92
+ <resumable_multipart>
93
+ - Change metadata between init and complete (e.g., swap Content-Type/Disposition at finalize)
94
+ - Upload benign chunks, then swap last chunk or complete with different source if server trusts client-side digests only
95
+ </resumable_multipart>
96
+
97
+ <filename_and_path>
98
+ - Unicode homoglyphs, trailing dots/spaces, device names, reserved characters to bypass validators and filesystem rules
99
+ - Null-byte truncation on legacy stacks; overlong paths; case-insensitive collisions overwriting existing files
100
+ </filename_and_path>
101
+
102
+ <processing_races>
103
+ - Request file immediately after upload but before AV/CDR completes; or during derivative creation to get unprocessed content
104
+ - Trigger heavy conversions (large images, deep PDFs) to widen race windows
105
+ </processing_races>
106
+
107
+ <metadata_abuse>
108
+ - Oversized EXIF/XMP/IPTC blocks to trigger parser flaws; payloads in document properties of Office/PDF rendered by previewers
109
+ </metadata_abuse>
110
+
111
+ <header_manipulation>
112
+ - Force inline rendering with Content-Type + inline Content-Disposition; test browsers with and without nosniff
113
+ - Cache poisoning via CDN with keys missing Vary on Content-Type/Disposition
114
+ </header_manipulation>
115
+ </advanced_techniques>
116
+
117
+ <filter_bypasses>
118
+ <validation_gaps>
119
+ - Client-side only checks; relying on JS/MIME provided by browser; trusting multipart boundary part headers blindly
120
+ - Extension allowlists without server-side content inspection; magic-bytes only without full parsing
121
+ </validation_gaps>
122
+
123
+ <evasion_tricks>
124
+ - Double extensions, mixed case, hidden dotfiles, extra dots (file..png), long paths with allowed suffix
125
+ - Multipart name vs filename vs path discrepancies; duplicate parameters and late parameter precedence
126
+ </evasion_tricks>
127
+ </filter_bypasses>
128
+
129
+ <special_contexts>
130
+ <rich_text_editors>
131
+ - RTEs allow image/attachment uploads and embed links; verify sanitization and serving headers for embedded content
132
+ </rich_text_editors>
133
+
134
+ <mobile_clients>
135
+ - Mobile SDKs may send nonstandard MIME or metadata; servers sometimes trust client-side transformations or EXIF orientation
136
+ </mobile_clients>
137
+
138
+ <serverless_and_cdn>
139
+ - Direct-to-bucket uploads with Lambda/Workers post-processing; verify that security decisions are not delegated to frontends
140
+ - CDN caching of uploaded content; ensure correct cache keys and headers (attachment, nosniff)
141
+ </serverless_and_cdn>
142
+ </special_contexts>
143
+
144
+ <parser_hardening>
145
+ - Validate on server: strict allowlist by true type (parse enough to confirm), size caps, and structural checks (dimensions, page count)
146
+ - Strip active content: convert SVG→PNG; remove scripts/JS from PDF; disable macros; normalize EXIF; consider CDR for risky types
147
+ - Store outside web root; serve via application or signed, time-limited URLs with Content-Disposition: attachment and X-Content-Type-Options: nosniff
148
+ - For cloud: private buckets, per-request signed GET, enforce Content-Type/Disposition on GET responses from your app/gateway
149
+ - Disable execution in upload paths; ignore .htaccess/.user.ini; sanitize keys to prevent path injections; randomize filenames
150
+ - AV + CDR: scan synchronously when possible; quarantine until verdict; block password-protected archives or process in sandbox
151
+ </parser_hardening>
152
+
153
+ <validation>
154
+ 1. Demonstrate execution or rendering of active content: web shell reachable, or SVG/HTML executing JS when viewed.
155
+ 2. Show filter bypass: upload accepted despite restrictions (extension/MIME/magic mismatch) with evidence on retrieval.
156
+ 3. Prove header weaknesses: inline rendering without nosniff or missing attachment; present exact response headers.
157
+ 4. Show race or pipeline gap: access before AV/CDR; extraction outside intended directory; derivative creation from malicious input.
158
+ 5. Provide reproducible steps: request/response for upload and subsequent access, with minimal PoCs.
159
+ </validation>
160
+
161
+ <false_positives>
162
+ - Upload stored but never served back; or always served as attachment with strict nosniff
163
+ - Converters run in locked-down sandboxes with no external IO and no script engines; no path traversal on archive extraction
164
+ - AV/CDR blocks the payload and quarantines; access before scan is impossible by design
165
+ </false_positives>
166
+
167
+ <impact>
168
+ - Remote code execution on application stack or media toolchain host
169
+ - Persistent cross-site scripting and session/token exfiltration via served uploads
170
+ - Malware distribution via public storage/CDN; brand/reputation damage
171
+ - Data loss or corruption via overwrite/zip slip; service degradation via zip bombs or oversized assets
172
+ </impact>
173
+
174
+ <pro_tips>
175
+ 1. Keep PoCs minimal: tiny SVG/HTML for XSS, a single-line PHP/ASP where relevant, and benign magic-byte polyglots.
176
+ 2. Always capture download response headers and final MIME from the server/CDN; that decides browser behavior.
177
+ 3. Prefer transforming risky formats to safe renderings (SVG→PNG) rather than attempting complex sanitization.
178
+ 4. In presigned flows, constrain all headers and object keys server-side; ignore client-supplied ACL and metadata.
179
+ 5. For archives, extract in a chroot/jail with explicit allowlist; drop symlinks and reject traversal.
180
+ 6. Test finalize/complete steps in resumable flows; many validations only run on init, not at completion.
181
+ 7. Verify background processors with EICAR and tiny polyglots; ensure quarantine gates access until safe.
182
+ 8. When you cannot get execution, aim for stored XSS or header-driven script execution; both are impactful.
183
+ 9. Validate that CDNs honor attachment/nosniff and do not override Content-Type/Disposition.
184
+ 10. Document full pipeline behavior per asset type; defenses must match actual processors and serving paths.
185
+ </pro_tips>
186
+
187
+ <remember>Secure uploads are a pipeline property. Enforce strict type, size, and header controls; transform or strip active content; never execute or inline-render untrusted uploads; and keep storage private with controlled, signed access.</remember>
188
+ </insecure_file_uploads_guide>