socketsecurity 2.1.21__tar.gz → 2.1.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/.gitignore +3 -1
  2. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/PKG-INFO +81 -12
  3. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/README.md +80 -11
  4. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/pyproject.toml +1 -1
  5. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/__init__.py +1 -1
  6. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/config.py +16 -0
  7. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/__init__.py +243 -33
  8. socketsecurity-2.1.23/socketsecurity/core/lazy_file_loader.py +165 -0
  9. socketsecurity-2.1.23/socketsecurity/core/resource_utils.py +58 -0
  10. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/socketcli.py +101 -39
  11. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/.github/CODEOWNERS +0 -0
  12. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/.github/PULL_REQUEST_TEMPLATE/bug-fix.md +0 -0
  13. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/.github/PULL_REQUEST_TEMPLATE/feature.md +0 -0
  14. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/.github/PULL_REQUEST_TEMPLATE/improvement.md +0 -0
  15. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  16. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/.github/workflows/docker-stable.yml +0 -0
  17. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/.github/workflows/pr-preview.yml +0 -0
  18. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/.github/workflows/release.yml +0 -0
  19. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/.github/workflows/version-check.yml +0 -0
  20. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/.hooks/sync_version.py +0 -0
  21. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/.pre-commit-config.yaml +0 -0
  22. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/.python-version +0 -0
  23. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/Dockerfile +0 -0
  24. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/LICENSE +0 -0
  25. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/Makefile +0 -0
  26. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/Pipfile.lock +0 -0
  27. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/docs/README.md +0 -0
  28. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/pytest.ini +0 -0
  29. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/requirements-dev.lock +0 -0
  30. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/requirements-dev.txt +0 -0
  31. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/requirements.lock +0 -0
  32. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/requirements.txt +0 -0
  33. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/scripts/build_container.sh +0 -0
  34. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/scripts/deploy-test-docker.sh +0 -0
  35. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/scripts/deploy-test-pypi.sh +0 -0
  36. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/scripts/run.sh +0 -0
  37. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/classes.py +0 -0
  38. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/cli_client.py +0 -0
  39. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/exceptions.py +0 -0
  40. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/git_interface.py +0 -0
  41. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/helper/__init__.py +0 -0
  42. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/logging.py +0 -0
  43. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/messages.py +0 -0
  44. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/scm/__init__.py +0 -0
  45. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/scm/base.py +0 -0
  46. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/scm/client.py +0 -0
  47. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/scm/github.py +0 -0
  48. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/scm/gitlab.py +0 -0
  49. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/scm_comments.py +0 -0
  50. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/socket_config.py +0 -0
  51. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/core/utils.py +0 -0
  52. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/output.py +0 -0
  53. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/plugins/__init__.py +0 -0
  54. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/plugins/base.py +0 -0
  55. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/plugins/jira.py +0 -0
  56. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/plugins/manager.py +0 -0
  57. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/plugins/slack.py +0 -0
  58. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/plugins/teams.py +0 -0
  59. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/socketsecurity/plugins/webhook.py +0 -0
  60. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/__init__.py +0 -0
  61. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/core/conftest.py +0 -0
  62. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/core/create_diff_input.json +0 -0
  63. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/core/test_diff_generation.py +0 -0
  64. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/core/test_package_and_alerts.py +0 -0
  65. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/core/test_sdk_methods.py +0 -0
  66. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/core/test_supporting_methods.py +0 -0
  67. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/data/fullscans/create_response.json +0 -0
  68. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/data/fullscans/diff/stream_diff.json +0 -0
  69. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/data/fullscans/diff/stream_diff_full.json +0 -0
  70. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/data/fullscans/head_scan/metadata.json +0 -0
  71. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/data/fullscans/head_scan/stream_scan.json +0 -0
  72. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/data/fullscans/head_scan/stream_scan_full.json +0 -0
  73. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/data/fullscans/new_scan/metadata.json +0 -0
  74. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/data/fullscans/new_scan/stream_scan.json +0 -0
  75. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/data/repos/repo_info_error.json +0 -0
  76. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/data/repos/repo_info_no_head.json +0 -0
  77. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/data/repos/repo_info_success.json +0 -0
  78. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/data/settings/security-policy.json +0 -0
  79. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/unit/__init__.py +0 -0
  80. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/unit/test_cli_config.py +0 -0
  81. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/unit/test_client.py +0 -0
  82. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/unit/test_config.py +0 -0
  83. {socketsecurity-2.1.21 → socketsecurity-2.1.23}/tests/unit/test_output.py +0 -0
@@ -24,4 +24,6 @@ file_generator.py
24
24
  .env.local
25
25
  Pipfile
26
26
  test/
27
- logs
27
+ logs
28
+ ai_testing/
29
+ verify_find_files_lazy_loading.py
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: socketsecurity
3
- Version: 2.1.21
3
+ Version: 2.1.23
4
4
  Summary: Socket Security CLI for CI/CD
5
5
  Project-URL: Homepage, https://socket.dev
6
6
  Author-email: Douglas Coburn <douglas@socket.dev>
@@ -63,10 +63,10 @@ The Socket Security CLI was created to enable integrations with other tools like
63
63
  ```` shell
64
64
  socketcli [-h] [--api-token API_TOKEN] [--repo REPO] [--integration {api,github,gitlab}] [--owner OWNER] [--branch BRANCH]
65
65
  [--committers [COMMITTERS ...]] [--pr-number PR_NUMBER] [--commit-message COMMIT_MESSAGE] [--commit-sha COMMIT_SHA]
66
- [--target-path TARGET_PATH] [--sbom-file SBOM_FILE] [--files FILES] [--default-branch] [--pending-head]
67
- [--generate-license] [--enable-debug] [--enable-json] [--enable-sarif] [--disable-overview] [--disable-security-issue]
68
- [--allow-unverified] [--ignore-commit-files] [--disable-blocking] [--scm SCM] [--timeout TIMEOUT]
69
- [--exclude-license-details]
66
+ [--target-path TARGET_PATH] [--sbom-file SBOM_FILE] [--files FILES] [--save-submitted-files-list SAVE_SUBMITTED_FILES_LIST]
67
+ [--default-branch] [--pending-head] [--generate-license] [--enable-debug] [--enable-json] [--enable-sarif]
68
+ [--disable-overview] [--disable-security-issue] [--allow-unverified] [--ignore-commit-files] [--disable-blocking]
69
+ [--scm SCM] [--timeout TIMEOUT] [--exclude-license-details]
70
70
  ````
71
71
 
72
72
  If you don't want to provide the Socket API Token every time then you can use the environment variable `SOCKET_SECURITY_API_KEY`
@@ -96,13 +96,15 @@ If you don't want to provide the Socket API Token every time then you can use th
96
96
  | --commit-sha | False | "" | Commit SHA |
97
97
 
98
98
  #### Path and File
99
- | Parameter | Required | Default | Description |
100
- |:----------------------|:---------|:----------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
101
- | --target-path | False | ./ | Target path for analysis |
102
- | --sbom-file | False | | SBOM file path |
103
- | --files | False | [] | Files to analyze (JSON array string) |
104
- | --excluded-ecosystems | False | [] | List of ecosystems to exclude from analysis (JSON array string). You can get supported files from the [Supported Files API](https://docs.socket.dev/reference/getsupportedfiles) |
105
- | --license-file-name | False | `license_output.json` | Name of the file to save the license details to if enabled |
99
+ | Parameter | Required | Default | Description |
100
+ |:----------------------------|:---------|:----------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
101
+ | --target-path | False | ./ | Target path for analysis |
102
+ | --sbom-file | False | | SBOM file path |
103
+ | --files | False | [] | Files to analyze (JSON array string) |
104
+ | --excluded-ecosystems | False | [] | List of ecosystems to exclude from analysis (JSON array string). You can get supported files from the [Supported Files API](https://docs.socket.dev/reference/getsupportedfiles) |
105
+ | --license-file-name | False | `license_output.json` | Name of the file to save the license details to if enabled |
106
+ | --save-submitted-files-list | False | | Save list of submitted file names to JSON file for debugging purposes |
107
+ | --save-manifest-tar | False | | Save all manifest files to a compressed tar.gz archive with original directory structure |
106
108
 
107
109
  #### Branch and Scan Configuration
108
110
  | Parameter | Required | Default | Description |
@@ -189,6 +191,73 @@ The CLI determines which files to scan based on the following logic:
189
191
  - **Using `--files`**: If you specify `--files '["package.json"]'`, the CLI will check if this file exists and is a manifest file before triggering a scan.
190
192
  - **Using `--ignore-commit-files`**: This forces a scan of all manifest files in the target path, regardless of what's in your commit.
191
193
 
194
+ ## Debugging and Troubleshooting
195
+
196
+ ### Saving Submitted Files List
197
+
198
+ The CLI provides a debugging option to save the list of files that were submitted for scanning:
199
+
200
+ ```bash
201
+ socketcli --save-submitted-files-list submitted_files.json
202
+ ```
203
+
204
+ This will create a JSON file containing:
205
+ - Timestamp of when the scan was performed
206
+ - Total number of files submitted
207
+ - Total size of all files (in bytes and human-readable format)
208
+ - Complete list of file paths that were found and submitted for scanning
209
+
210
+ Example output file:
211
+ ```json
212
+ {
213
+ "timestamp": "2025-01-22 10:30:45 UTC",
214
+ "total_files": 3,
215
+ "total_size_bytes": 2048,
216
+ "total_size_human": "2.00 KB",
217
+ "files": [
218
+ "./package.json",
219
+ "./requirements.txt",
220
+ "./Pipfile"
221
+ ]
222
+ }
223
+ ```
224
+
225
+ This feature is useful for:
226
+ - **Debugging**: Understanding which files the CLI found and submitted
227
+ - **Verification**: Confirming that expected manifest files are being detected
228
+ - **Size Analysis**: Understanding the total size of manifest files being uploaded
229
+ - **Troubleshooting**: Identifying why certain files might not be included in scans or if size limits are being hit
230
+
231
+ > **Note**: This option works with both differential scans (when git commits are detected) and full scans (API mode).
232
+
233
+ ### Saving Manifest Files Archive
234
+
235
+ For backup, sharing, or analysis purposes, you can save all manifest files to a compressed tar.gz archive:
236
+
237
+ ```bash
238
+ socketcli --save-manifest-tar manifest_files.tar.gz
239
+ ```
240
+
241
+ This will create a compressed archive containing all the manifest files that were found and submitted for scanning, preserving their original directory structure relative to the scanned directory.
242
+
243
+ Example usage with other options:
244
+ ```bash
245
+ # Save both files list and archive
246
+ socketcli --save-submitted-files-list files.json --save-manifest-tar backup.tar.gz
247
+
248
+ # Use with specific target path
249
+ socketcli --target-path ./my-project --save-manifest-tar my-project-manifests.tar.gz
250
+ ```
251
+
252
+ The manifest archive feature is useful for:
253
+ - **Backup**: Creating portable backups of all dependency manifest files
254
+ - **Sharing**: Sending the exact files being analyzed to colleagues or support
255
+ - **Analysis**: Examining the dependency files offline or with other tools
256
+ - **Debugging**: Verifying file discovery and content issues
257
+ - **Compliance**: Maintaining records of scanned dependency files
258
+
259
+ > **Note**: The tar.gz archive preserves the original directory structure, making it easy to extract and examine the files in their proper context.
260
+
192
261
  ## Development
193
262
 
194
263
  This project uses `pyproject.toml` as the primary dependency specification.
@@ -7,10 +7,10 @@ The Socket Security CLI was created to enable integrations with other tools like
7
7
  ```` shell
8
8
  socketcli [-h] [--api-token API_TOKEN] [--repo REPO] [--integration {api,github,gitlab}] [--owner OWNER] [--branch BRANCH]
9
9
  [--committers [COMMITTERS ...]] [--pr-number PR_NUMBER] [--commit-message COMMIT_MESSAGE] [--commit-sha COMMIT_SHA]
10
- [--target-path TARGET_PATH] [--sbom-file SBOM_FILE] [--files FILES] [--default-branch] [--pending-head]
11
- [--generate-license] [--enable-debug] [--enable-json] [--enable-sarif] [--disable-overview] [--disable-security-issue]
12
- [--allow-unverified] [--ignore-commit-files] [--disable-blocking] [--scm SCM] [--timeout TIMEOUT]
13
- [--exclude-license-details]
10
+ [--target-path TARGET_PATH] [--sbom-file SBOM_FILE] [--files FILES] [--save-submitted-files-list SAVE_SUBMITTED_FILES_LIST]
11
+ [--default-branch] [--pending-head] [--generate-license] [--enable-debug] [--enable-json] [--enable-sarif]
12
+ [--disable-overview] [--disable-security-issue] [--allow-unverified] [--ignore-commit-files] [--disable-blocking]
13
+ [--scm SCM] [--timeout TIMEOUT] [--exclude-license-details]
14
14
  ````
15
15
 
16
16
  If you don't want to provide the Socket API Token every time then you can use the environment variable `SOCKET_SECURITY_API_KEY`
@@ -40,13 +40,15 @@ If you don't want to provide the Socket API Token every time then you can use th
40
40
  | --commit-sha | False | "" | Commit SHA |
41
41
 
42
42
  #### Path and File
43
- | Parameter | Required | Default | Description |
44
- |:----------------------|:---------|:----------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
45
- | --target-path | False | ./ | Target path for analysis |
46
- | --sbom-file | False | | SBOM file path |
47
- | --files | False | [] | Files to analyze (JSON array string) |
48
- | --excluded-ecosystems | False | [] | List of ecosystems to exclude from analysis (JSON array string). You can get supported files from the [Supported Files API](https://docs.socket.dev/reference/getsupportedfiles) |
49
- | --license-file-name | False | `license_output.json` | Name of the file to save the license details to if enabled |
43
+ | Parameter | Required | Default | Description |
44
+ |:----------------------------|:---------|:----------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
45
+ | --target-path | False | ./ | Target path for analysis |
46
+ | --sbom-file | False | | SBOM file path |
47
+ | --files | False | [] | Files to analyze (JSON array string) |
48
+ | --excluded-ecosystems | False | [] | List of ecosystems to exclude from analysis (JSON array string). You can get supported files from the [Supported Files API](https://docs.socket.dev/reference/getsupportedfiles) |
49
+ | --license-file-name | False | `license_output.json` | Name of the file to save the license details to if enabled |
50
+ | --save-submitted-files-list | False | | Save list of submitted file names to JSON file for debugging purposes |
51
+ | --save-manifest-tar | False | | Save all manifest files to a compressed tar.gz archive with original directory structure |
50
52
 
51
53
  #### Branch and Scan Configuration
52
54
  | Parameter | Required | Default | Description |
@@ -133,6 +135,73 @@ The CLI determines which files to scan based on the following logic:
133
135
  - **Using `--files`**: If you specify `--files '["package.json"]'`, the CLI will check if this file exists and is a manifest file before triggering a scan.
134
136
  - **Using `--ignore-commit-files`**: This forces a scan of all manifest files in the target path, regardless of what's in your commit.
135
137
 
138
+ ## Debugging and Troubleshooting
139
+
140
+ ### Saving Submitted Files List
141
+
142
+ The CLI provides a debugging option to save the list of files that were submitted for scanning:
143
+
144
+ ```bash
145
+ socketcli --save-submitted-files-list submitted_files.json
146
+ ```
147
+
148
+ This will create a JSON file containing:
149
+ - Timestamp of when the scan was performed
150
+ - Total number of files submitted
151
+ - Total size of all files (in bytes and human-readable format)
152
+ - Complete list of file paths that were found and submitted for scanning
153
+
154
+ Example output file:
155
+ ```json
156
+ {
157
+ "timestamp": "2025-01-22 10:30:45 UTC",
158
+ "total_files": 3,
159
+ "total_size_bytes": 2048,
160
+ "total_size_human": "2.00 KB",
161
+ "files": [
162
+ "./package.json",
163
+ "./requirements.txt",
164
+ "./Pipfile"
165
+ ]
166
+ }
167
+ ```
168
+
169
+ This feature is useful for:
170
+ - **Debugging**: Understanding which files the CLI found and submitted
171
+ - **Verification**: Confirming that expected manifest files are being detected
172
+ - **Size Analysis**: Understanding the total size of manifest files being uploaded
173
+ - **Troubleshooting**: Identifying why certain files might not be included in scans or if size limits are being hit
174
+
175
+ > **Note**: This option works with both differential scans (when git commits are detected) and full scans (API mode).
176
+
177
+ ### Saving Manifest Files Archive
178
+
179
+ For backup, sharing, or analysis purposes, you can save all manifest files to a compressed tar.gz archive:
180
+
181
+ ```bash
182
+ socketcli --save-manifest-tar manifest_files.tar.gz
183
+ ```
184
+
185
+ This will create a compressed archive containing all the manifest files that were found and submitted for scanning, preserving their original directory structure relative to the scanned directory.
186
+
187
+ Example usage with other options:
188
+ ```bash
189
+ # Save both files list and archive
190
+ socketcli --save-submitted-files-list files.json --save-manifest-tar backup.tar.gz
191
+
192
+ # Use with specific target path
193
+ socketcli --target-path ./my-project --save-manifest-tar my-project-manifests.tar.gz
194
+ ```
195
+
196
+ The manifest archive feature is useful for:
197
+ - **Backup**: Creating portable backups of all dependency manifest files
198
+ - **Sharing**: Sending the exact files being analyzed to colleagues or support
199
+ - **Analysis**: Examining the dependency files offline or with other tools
200
+ - **Debugging**: Verifying file discovery and content issues
201
+ - **Compliance**: Maintaining records of scanned dependency files
202
+
203
+ > **Note**: The tar.gz archive preserves the original directory structure, making it easy to extract and examine the files in their proper context.
204
+
136
205
  ## Development
137
206
 
138
207
  This project uses `pyproject.toml` as the primary dependency specification.
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
6
6
 
7
7
  [project]
8
8
  name = "socketsecurity"
9
- version = "2.1.21"
9
+ version = "2.1.23"
10
10
  requires-python = ">= 3.10"
11
11
  license = {"file" = "LICENSE"}
12
12
  dependencies = [
@@ -1,2 +1,2 @@
1
1
  __author__ = 'socket.dev'
2
- __version__ = '2.1.21'
2
+ __version__ = '2.1.23'
@@ -57,6 +57,8 @@ class CliConfig:
57
57
  jira_plugin: PluginConfig = field(default_factory=PluginConfig)
58
58
  slack_plugin: PluginConfig = field(default_factory=PluginConfig)
59
59
  license_file_name: str = "license_output.json"
60
+ save_submitted_files_list: Optional[str] = None
61
+ save_manifest_tar: Optional[str] = None
60
62
 
61
63
  @classmethod
62
64
  def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
@@ -101,6 +103,8 @@ class CliConfig:
101
103
  'repo_is_public': args.repo_is_public,
102
104
  "excluded_ecosystems": args.excluded_ecosystems,
103
105
  'license_file_name': args.license_file_name,
106
+ 'save_submitted_files_list': args.save_submitted_files_list,
107
+ 'save_manifest_tar': args.save_manifest_tar,
104
108
  'version': __version__
105
109
  }
106
110
  try:
@@ -262,6 +266,18 @@ def create_argument_parser() -> argparse.ArgumentParser:
262
266
  metavar="<string>",
263
267
  help="SBOM file path"
264
268
  )
269
+ path_group.add_argument(
270
+ "--save-submitted-files-list",
271
+ dest="save_submitted_files_list",
272
+ metavar="<path>",
273
+ help="Save list of submitted file names to JSON file for debugging purposes"
274
+ )
275
+ path_group.add_argument(
276
+ "--save-manifest-tar",
277
+ dest="save_manifest_tar",
278
+ metavar="<path>",
279
+ help="Save all manifest files to a compressed tar.gz archive with original directory structure"
280
+ )
265
281
  path_group.add_argument(
266
282
  "--files",
267
283
  metavar="<json>",
@@ -1,14 +1,15 @@
1
1
  import logging
2
2
  import os
3
3
  import sys
4
+ import tarfile
4
5
  import time
5
6
  import io
7
+ import json
6
8
  from dataclasses import asdict
7
9
  from glob import glob
8
10
  from io import BytesIO
9
11
  from pathlib import PurePath
10
12
  from typing import BinaryIO, Dict, List, Tuple, Set, Union
11
- import re
12
13
  from socketdev import socketdev
13
14
  from socketdev.exceptions import APIFailure
14
15
  from socketdev.fullscans import FullScanParams, SocketArtifact
@@ -28,6 +29,8 @@ from socketsecurity.core.classes import (
28
29
  from socketsecurity.core.exceptions import APIResourceNotFound
29
30
  from .socket_config import SocketConfig
30
31
  from .utils import socket_globs
32
+ from .resource_utils import check_file_count_against_ulimit
33
+ from .lazy_file_loader import load_files_for_sending_lazy
31
34
  import importlib
32
35
  logging_std = importlib.import_module("logging")
33
36
 
@@ -176,6 +179,114 @@ class Core:
176
179
  return True
177
180
  return False
178
181
 
182
+ def save_submitted_files_list(self, files: List[str], output_path: str) -> None:
183
+ """
184
+ Save the list of submitted file names to a JSON file for debugging.
185
+
186
+ Args:
187
+ files: List of file paths that were submitted for scanning
188
+ output_path: Path where to save the JSON file
189
+ """
190
+ try:
191
+ # Calculate total size of all files
192
+ total_size_bytes = 0
193
+ valid_files = []
194
+
195
+ for file_path in files:
196
+ try:
197
+ if os.path.exists(file_path) and os.path.isfile(file_path):
198
+ file_size = os.path.getsize(file_path)
199
+ total_size_bytes += file_size
200
+ valid_files.append(file_path)
201
+ else:
202
+ log.warning(f"File not found or not accessible: {file_path}")
203
+ valid_files.append(file_path) # Still include in list for debugging
204
+ except OSError as e:
205
+ log.warning(f"Error accessing file {file_path}: {e}")
206
+ valid_files.append(file_path) # Still include in list for debugging
207
+
208
+ # Convert bytes to human-readable format
209
+ def format_bytes(bytes_value):
210
+ """Convert bytes to human readable format"""
211
+ for unit in ['B', 'KB', 'MB', 'GB']:
212
+ if bytes_value < 1024.0:
213
+ return f"{bytes_value:.2f} {unit}"
214
+ bytes_value /= 1024.0
215
+ return f"{bytes_value:.2f} TB"
216
+
217
+ file_data = {
218
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S UTC", time.gmtime()),
219
+ "total_files": len(valid_files),
220
+ "total_size_bytes": total_size_bytes,
221
+ "total_size_human": format_bytes(total_size_bytes),
222
+ "files": sorted(valid_files)
223
+ }
224
+
225
+ with open(output_path, 'w', encoding='utf-8') as f:
226
+ json.dump(file_data, f, indent=2, ensure_ascii=False)
227
+
228
+ log.info(f"Saved list of {len(valid_files)} submitted files ({file_data['total_size_human']}) to: {output_path}")
229
+
230
+ except Exception as e:
231
+ log.error(f"Failed to save submitted files list to {output_path}: {e}")
232
+
233
+ def save_manifest_tar(self, files: List[str], output_path: str, base_dir: str) -> None:
234
+ """
235
+ Save all manifest files to a compressed tar.gz archive with original directory structure.
236
+
237
+ Args:
238
+ files: List of file paths to include in the archive
239
+ output_path: Path where to save the tar.gz file
240
+ base_dir: Base directory to preserve relative structure
241
+ """
242
+ try:
243
+ # Normalize base directory
244
+ base_dir = os.path.abspath(base_dir)
245
+ if not base_dir.endswith(os.sep):
246
+ base_dir += os.sep
247
+
248
+ log.info(f"Creating manifest tar.gz file: {output_path}")
249
+ log.debug(f"Base directory: {base_dir}")
250
+
251
+ with tarfile.open(output_path, 'w:gz') as tar:
252
+ for file_path in files:
253
+ if not os.path.exists(file_path):
254
+ log.warning(f"File not found, skipping: {file_path}")
255
+ continue
256
+
257
+ # Calculate relative path within the base directory
258
+ abs_file_path = os.path.abspath(file_path)
259
+ if abs_file_path.startswith(base_dir):
260
+ # File is within base directory - use relative path
261
+ arcname = os.path.relpath(abs_file_path, base_dir)
262
+ else:
263
+ # File is outside base directory - use just the filename
264
+ arcname = os.path.basename(abs_file_path)
265
+ log.warning(f"File outside base dir, using basename: {file_path} -> {arcname}")
266
+
267
+ # Normalize archive name to use forward slashes
268
+ arcname = arcname.replace(os.sep, '/')
269
+
270
+ log.debug(f"Adding to tar: {file_path} -> {arcname}")
271
+ tar.add(file_path, arcname=arcname)
272
+
273
+ # Get tar file size for logging
274
+ tar_size = os.path.getsize(output_path)
275
+
276
+ def format_bytes(bytes_value):
277
+ """Convert bytes to human readable format"""
278
+ for unit in ['B', 'KB', 'MB', 'GB']:
279
+ if bytes_value < 1024.0:
280
+ return f"{bytes_value:.2f} {unit}"
281
+ bytes_value /= 1024.0
282
+ return f"{bytes_value:.2f} TB"
283
+
284
+ tar_size_human = format_bytes(tar_size)
285
+ log.info(f"Successfully created tar.gz with {len(files)} files ({tar_size_human}, {tar_size:,} bytes): {output_path}")
286
+
287
+ except Exception as e:
288
+ log.error(f"Failed to save manifest tar.gz to {output_path}: {e}")
289
+
179
290
  def find_files(self, path: str) -> List[str]:
180
291
  """
181
292
  Finds supported manifest files in the given path.
@@ -196,7 +307,7 @@ class Core:
196
307
  for ecosystem in patterns:
197
308
  if ecosystem in self.config.excluded_ecosystems:
198
309
  continue
199
- log.info(f'Scanning ecosystem: {ecosystem}')
310
+ log.debug(f'Scanning ecosystem: {ecosystem}')
200
311
  ecosystem_patterns = patterns[ecosystem]
201
312
  for file_name in ecosystem_patterns:
202
313
  original_pattern = ecosystem_patterns[file_name]["pattern"]
@@ -219,8 +330,24 @@ class Core:
219
330
  glob_end = time.time()
220
331
  log.debug(f"Globbing took {glob_end - glob_start:.4f} seconds")
221
332
 
222
- log.info(f"Total files found: {len(files)}")
223
- return sorted(files)
333
+ file_list = sorted(files)
334
+ file_count = len(file_list)
335
+ log.info(f"Total files found: {file_count}")
336
+
337
+ # Check if the number of manifest files might exceed ulimit -n
338
+ ulimit_check = check_file_count_against_ulimit(file_count)
339
+ if ulimit_check["can_check"]:
340
+ if ulimit_check["would_exceed"]:
341
+ log.warning(f"Found {file_count} manifest files, which may exceed the file descriptor limit (ulimit -n = {ulimit_check['soft_limit']})")
342
+ log.warning(f"Available file descriptors: {ulimit_check['available_fds']} (after {ulimit_check['buffer_size']} buffer)")
343
+ log.warning(f"Recommendation: {ulimit_check['recommendation']}")
344
+ log.warning("This may cause 'Too many open files' errors during processing")
345
+ else:
346
+ log.debug(f"File count ({file_count}) is within file descriptor limit ({ulimit_check['soft_limit']})")
347
+ else:
348
+ log.debug(f"Could not check file descriptor limit: {ulimit_check.get('error', 'Unknown error')}")
349
+
350
+ return file_list
224
351
 
225
352
  def get_supported_patterns(self) -> Dict:
226
353
  """
@@ -273,6 +400,18 @@ class Core:
273
400
  return True
274
401
  return False
275
402
 
403
+ def check_file_count_limit(self, file_count: int) -> dict:
404
+ """
405
+ Check if the given file count would exceed the system's file descriptor limit.
406
+
407
+ Args:
408
+ file_count: Number of files to check
409
+
410
+ Returns:
411
+ Dictionary with check results including recommendations
412
+ """
413
+ return check_file_count_against_ulimit(file_count)
414
+
276
415
  @staticmethod
277
416
  def to_case_insensitive_regex(input_string: str) -> str:
278
417
  """
@@ -300,7 +439,10 @@ class Core:
300
439
  @staticmethod
301
440
  def load_files_for_sending(files: List[str], workspace: str) -> List[Tuple[str, Tuple[str, BinaryIO]]]:
302
441
  """
303
- Prepares files for sending to the Socket API.
442
+ Prepares files for sending to the Socket API using lazy loading.
443
+
444
+ This version uses lazy file loading to prevent "Too many open files" errors
445
+ when processing large numbers of manifest files.
304
446
 
305
447
  Args:
306
448
  files: List of file paths from find_files()
@@ -310,25 +452,7 @@ class Core:
310
452
  List of tuples formatted for requests multipart upload:
311
453
  [(field_name, (filename, file_object)), ...]
312
454
  """
313
- send_files = []
314
- if "\\" in workspace:
315
- workspace = workspace.replace("\\", "/")
316
- for file_path in files:
317
- _, name = file_path.rsplit("/", 1)
318
-
319
- if file_path.startswith(workspace):
320
- key = file_path[len(workspace):]
321
- else:
322
- key = file_path
323
-
324
- key = key.lstrip("/")
325
- key = key.lstrip("./")
326
-
327
- f = open(file_path, 'rb')
328
- payload = (key, (name.lstrip(workspace), f))
329
- send_files.append(payload)
330
-
331
- return send_files
455
+ return load_files_for_sending_lazy(files, workspace)
332
456
 
333
457
  def create_full_scan(self, files: list[tuple[str, tuple[str, BytesIO]]], params: FullScanParams) -> FullScan:
334
458
  """
@@ -356,6 +480,85 @@ class Core:
356
480
 
357
481
  return full_scan
358
482
 
483
+ def create_full_scan_with_report_url(
484
+ self,
485
+ path: str,
486
+ params: FullScanParams,
487
+ no_change: bool = False,
488
+ save_files_list_path: str = None,
489
+ save_manifest_tar_path: str = None
490
+ ) -> dict:
491
+ """Create a new full scan and return with html_report_url.
492
+
493
+ Args:
494
+ path: Path to look for manifest files
495
+ params: Query params for the Full Scan endpoint
496
+ no_change: If True, return empty result
497
+ save_files_list_path: Optional path to save submitted files list for debugging
498
+ save_manifest_tar_path: Optional path to save manifest files tar.gz archive
499
+
500
+ Returns:
501
+ Dict with full scan data including html_report_url
502
+ """
503
+ log.debug(f"starting create_full_scan_with_report_url with no_change: {no_change}")
504
+ if no_change:
505
+ return {
506
+ "id": "NO_SCAN_RAN",
507
+ "html_report_url": "",
508
+ "unmatchedFiles": []
509
+ }
510
+
511
+ # Find manifest files
512
+ files = self.find_files(path)
513
+
514
+ # Save submitted files list if requested
515
+ if save_files_list_path and files:
516
+ self.save_submitted_files_list(files, save_files_list_path)
517
+
518
+ # Save manifest tar.gz if requested
519
+ if save_manifest_tar_path and files:
520
+ self.save_manifest_tar(files, save_manifest_tar_path, path)
521
+
522
+ files_for_sending = self.load_files_for_sending(files, path)
523
+ if not files:
524
+ return {
525
+ "id": "NO_SCAN_RAN",
526
+ "html_report_url": "",
527
+ "unmatchedFiles": []
528
+ }
529
+
530
+ try:
531
+ # Create new scan
532
+ new_scan_start = time.time()
533
+ new_full_scan = self.create_full_scan(files_for_sending, params)
534
+ new_scan_end = time.time()
535
+ log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}")
536
+ except APIFailure as e:
537
+ log.error(f"Failed to create full scan: {e}")
538
+ raise
539
+
540
+ # Construct report URL
541
+ base_socket = "https://socket.dev/dashboard/org"
542
+ report_url = f"{base_socket}/{self.config.org_slug}/sbom/{new_full_scan.id}"
543
+ if not params.include_license_details:
544
+ report_url += "?include_license_details=false"
545
+
546
+ # Return result in the format expected by the user
547
+ return {
548
+ "id": new_full_scan.id,
549
+ "created_at": new_full_scan.created_at,
550
+ "updated_at": new_full_scan.updated_at,
551
+ "organization_id": new_full_scan.organization_id,
552
+ "repository_id": new_full_scan.repository_id,
553
+ "branch": new_full_scan.branch,
554
+ "commit_message": new_full_scan.commit_message,
555
+ "commit_hash": new_full_scan.commit_hash,
556
+ "pull_request": new_full_scan.pull_request,
557
+ "committers": new_full_scan.committers,
558
+ "html_report_url": report_url,
559
+ "unmatchedFiles": getattr(new_full_scan, 'unmatchedFiles', [])
560
+ }
561
+
359
562
  def check_full_scans_status(self, head_full_scan_id: str, new_full_scan_id: str) -> bool:
360
563
  is_ready = False
361
564
  current_timeout = self.config.timeout
@@ -656,7 +859,9 @@ class Core:
656
859
  self,
657
860
  path: str,
658
861
  params: FullScanParams,
659
- no_change: bool = False
862
+ no_change: bool = False,
863
+ save_files_list_path: str = None,
864
+ save_manifest_tar_path: str = None
660
865
  ) -> Diff:
661
866
  """Create a new diff using the Socket SDK.
662
867
 
@@ -664,16 +869,27 @@ class Core:
664
869
  path: Path to look for manifest files
665
870
  params: Query params for the Full Scan endpoint
666
871
  no_change: If True, return empty diff
872
+ save_files_list_path: Optional path to save submitted files list for debugging
873
+ save_manifest_tar_path: Optional path to save manifest files tar.gz archive
667
874
  """
668
875
  log.debug(f"starting create_new_diff with no_change: {no_change}")
669
876
  if no_change:
670
- return Diff(id="no_diff_id", diff_url="", report_url="")
877
+ return Diff(id="NO_DIFF_RAN", diff_url="", report_url="")
671
878
 
672
879
  # Find manifest files
673
880
  files = self.find_files(path)
881
+
882
+ # Save submitted files list if requested
883
+ if save_files_list_path and files:
884
+ self.save_submitted_files_list(files, save_files_list_path)
885
+
886
+ # Save manifest tar.gz if requested
887
+ if save_manifest_tar_path and files:
888
+ self.save_manifest_tar(files, save_manifest_tar_path, path)
889
+
674
890
  files_for_sending = self.load_files_for_sending(files, path)
675
891
  if not files:
676
- return Diff(id="no_diff_id", diff_url="", report_url="")
892
+ return Diff(id="NO_DIFF_RAN", diff_url="", report_url="")
677
893
 
678
894
  try:
679
895
  # Get head scan ID
@@ -809,12 +1025,6 @@ class Core:
809
1025
 
810
1026
  return diff
811
1027
 
812
- def get_all_scores(self, packages: dict[str, Package]) -> dict[str, Package]:
813
- components = []
814
- for package_id in packages:
815
- package = packages[package_id]
816
- return packages
817
-
818
1028
  def create_purl(self, package_id: str, packages: dict[str, Package]) -> Purl:
819
1029
  """
820
1030
  Creates the extended PURL data for package identification and tracking.