spark-connect-cli 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/PKG-INFO +9 -1
  2. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/README.md +8 -0
  3. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/SKILL.md +4 -0
  4. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/pyproject.toml +1 -1
  5. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/src/spark_connect_cli/__init__.py +1 -1
  6. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/.github/workflows/publish.yml +0 -0
  7. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/.gitignore +0 -0
  8. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/LICENSE +0 -0
  9. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/src/spark_connect_cli/__main__.py +0 -0
  10. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/src/spark_connect_cli/cli.py +0 -0
  11. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/src/spark_connect_cli/jobs.py +0 -0
  12. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/src/spark_connect_cli/meta.py +0 -0
  13. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/src/spark_connect_cli/query.py +0 -0
  14. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/src/spark_connect_cli/rest.py +0 -0
  15. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/src/spark_connect_cli/session.py +0 -0
  16. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/src/spark_connect_cli/sync.py +0 -0
  17. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/tests/test_guard.py +0 -0
  18. {spark_connect_cli-0.2.0 → spark_connect_cli-0.2.1}/tests/test_jobs.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spark-connect-cli
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Agent-friendly Spark Connect CLI: read-only querying + async long-job control. No JVM, no Kerberos on the client.
5
5
  Project-URL: Homepage, https://github.com/dengshu2/spark-connect-cli
6
6
  Project-URL: Issues, https://github.com/dengshu2/spark-connect-cli/issues
@@ -151,6 +151,14 @@ so the list may show only the driver when nothing is running.
151
151
  etiquette, type-mapping table). Drop it into your agent's skills directory and
152
152
  the agent drives `scq` through a shell/Bash tool.
153
153
 
154
+ ## Roadmap
155
+
156
+ - Clarify in `SKILL.md` that `scq exec executors` `maxMemory` is the storage
157
+ pool, not total memory (already noted above).
158
+ - `scq cluster` — optional read-only passthrough to the YARN ResourceManager
159
+ REST (apps / queues / nodes), rounding out the introspection plane.
160
+ - Vendored/offline install path (bundle wheels) for air-gapped deployments.
161
+
154
162
  ## License
155
163
 
156
164
  MIT
@@ -131,6 +131,14 @@ so the list may show only the driver when nothing is running.
131
131
  etiquette, type-mapping table). Drop it into your agent's skills directory and
132
132
  the agent drives `scq` through a shell/Bash tool.
133
133
 
134
+ ## Roadmap
135
+
136
+ - Clarify in `SKILL.md` that `scq exec executors` `maxMemory` is the storage
137
+ pool, not total memory (already noted above).
138
+ - `scq cluster` — optional read-only passthrough to the YARN ResourceManager
139
+ REST (apps / queues / nodes), rounding out the introspection plane.
140
+ - Vendored/offline install path (bundle wheels) for air-gapped deployments.
141
+
134
142
  ## License
135
143
 
136
144
  MIT
@@ -129,6 +129,10 @@ scq exec jobs
129
129
  scq exec stages/<id>/<attempt>/taskSummary?quantiles=0.5,0.95,1.0
130
130
  ```
131
131
 
132
+ - **`executors` memory**: `maxMemory` / `memoryUsed` are the **storage/cache
133
+ pool** (roughly `(heap − 300MB) × 0.6`), **not** the executor's total memory.
134
+ A ~100MB `maxMemory` does **not** mean a tiny executor — total heap is set by
135
+ `spark.executor.memory`. Don't report the cache pool as the executor size.
132
136
  - **Data skew**: pull a stage's `taskSummary` and compare a metric's **max vs
133
137
  median** (`executorRunTime`, `shuffleReadBytes`, `shuffleReadRecords`). A large
134
138
  `max/median` ratio = a straggler / skewed partition. `…?details=true` on a
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "spark-connect-cli"
7
- version = "0.2.0"
7
+ version = "0.2.1"
8
8
  description = "Agent-friendly Spark Connect CLI: read-only querying + async long-job control. No JVM, no Kerberos on the client."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,2 +1,2 @@
1
1
  """spark-connect-cli — an agent-friendly Spark Connect CLI."""
2
- __version__ = "0.2.0"
2
+ __version__ = "0.2.1"