tab-cli 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tab_cli-0.1.0 → tab_cli-0.1.2}/.gitignore +6 -0
- tab_cli-0.1.2/CHANGELOG.md +5 -0
- tab_cli-0.1.2/LICENSE +21 -0
- tab_cli-0.1.2/Makefile +58 -0
- tab_cli-0.1.2/PKG-INFO +29 -0
- tab_cli-0.1.2/README.md +3 -0
- tab_cli-0.1.2/docs/cli-ref.md +105 -0
- tab_cli-0.1.2/docs/cloud.md +89 -0
- tab_cli-0.1.2/docs/gen_assets.sh +3 -0
- tab_cli-0.1.0/README.md → tab_cli-0.1.2/docs/index.md +16 -14
- tab_cli-0.1.2/docs/test.csv +9 -0
- tab_cli-0.1.2/mkdocs.yml +38 -0
- {tab_cli-0.1.0 → tab_cli-0.1.2}/pyproject.toml +17 -8
- tab_cli-0.1.2/site/404.html +348 -0
- tab_cli-0.1.2/site/assets/images/favicon.png +0 -0
- tab_cli-0.1.2/site/assets/javascripts/bundle.d7c377c4.min.js +29 -0
- tab_cli-0.1.2/site/assets/javascripts/bundle.d7c377c4.min.js.map +7 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.ar.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.da.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.de.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.du.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.el.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.es.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.fi.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.fr.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.he.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.hi.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.hu.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.hy.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.it.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.ja.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.jp.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.kn.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.ko.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.multi.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.nl.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.no.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.pt.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.ro.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.ru.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.sa.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.sv.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.ta.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.te.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.th.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.tr.min.js +18 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.vi.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/min/lunr.zh.min.js +1 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/tinyseg.js +206 -0
- tab_cli-0.1.2/site/assets/javascripts/lunr/wordcut.js +6708 -0
- tab_cli-0.1.2/site/assets/javascripts/workers/search.f886a092.min.js +42 -0
- tab_cli-0.1.2/site/assets/javascripts/workers/search.f886a092.min.js.map +7 -0
- tab_cli-0.1.2/site/assets/stylesheets/main.50c56a3b.min.css +1 -0
- tab_cli-0.1.2/site/assets/stylesheets/main.50c56a3b.min.css.map +1 -0
- tab_cli-0.1.2/site/assets/stylesheets/palette.06af60db.min.css +1 -0
- tab_cli-0.1.2/site/assets/stylesheets/palette.06af60db.min.css.map +1 -0
- tab_cli-0.1.2/site/assets/test-where.svg +80 -0
- tab_cli-0.1.2/site/assets/test.svg +106 -0
- tab_cli-0.1.2/site/cli-ref/index.html +718 -0
- tab_cli-0.1.2/site/cloud/index.html +582 -0
- tab_cli-0.1.2/site/gen_assets.sh +3 -0
- tab_cli-0.1.2/site/index.html +729 -0
- tab_cli-0.1.2/site/search/search_index.json +1 -0
- tab_cli-0.1.2/site/sitemap.xml +3 -0
- tab_cli-0.1.2/site/sitemap.xml.gz +0 -0
- tab_cli-0.1.2/site/test.csv +9 -0
- tab_cli-0.1.2/tab_cli/cli.py +171 -0
- tab_cli-0.1.2/tab_cli/config.py +14 -0
- tab_cli-0.1.2/tab_cli/formats/__init__.py +15 -0
- tab_cli-0.1.2/tab_cli/formats/avro.py +47 -0
- tab_cli-0.1.2/tab_cli/formats/base.py +63 -0
- tab_cli-0.1.2/tab_cli/formats/csv.py +45 -0
- tab_cli-0.1.2/tab_cli/formats/jsonl.py +41 -0
- tab_cli-0.1.2/tab_cli/formats/parquet.py +43 -0
- tab_cli-0.1.2/tab_cli/handlers/__init__.py +96 -0
- tab_cli-0.1.2/tab_cli/handlers/base.py +259 -0
- {tab_cli-0.1.0 → tab_cli-0.1.2}/tab_cli/handlers/cli_table.py +16 -9
- tab_cli-0.1.2/tab_cli/storage/__init__.py +83 -0
- tab_cli-0.1.2/tab_cli/storage/aws.py +190 -0
- tab_cli-0.1.2/tab_cli/storage/az.py +212 -0
- tab_cli-0.1.2/tab_cli/storage/base.py +36 -0
- tab_cli-0.1.2/tab_cli/storage/fsspec.py +104 -0
- tab_cli-0.1.2/tab_cli/storage/gcloud.py +173 -0
- tab_cli-0.1.2/tab_cli/storage/local.py +25 -0
- tab_cli-0.1.2/tab_cli/style.py +4 -0
- tab_cli-0.1.2/tab_cli/url_parser.py +99 -0
- tab_cli-0.1.0/LICENSE +0 -7
- tab_cli-0.1.0/Makefile +0 -32
- tab_cli-0.1.0/PKG-INFO +0 -100
- tab_cli-0.1.0/tab_cli/cli.py +0 -147
- tab_cli-0.1.0/tab_cli/handlers/__init__.py +0 -55
- tab_cli-0.1.0/tab_cli/handlers/base.py +0 -126
- tab_cli-0.1.0/tab_cli/handlers/csv.py +0 -56
- tab_cli-0.1.0/tab_cli/handlers/directory.py +0 -96
- tab_cli-0.1.0/tab_cli/handlers/jsonl.py +0 -47
- tab_cli-0.1.0/tab_cli/handlers/parquet.py +0 -75
- tab_cli-0.1.0/tab_cli/style.py +0 -3
- {tab_cli-0.1.0 → tab_cli-0.1.2}/tab_cli/__init__.py +0 -0
tab_cli-0.1.2/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Tongfei Chen
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
tab_cli-0.1.2/Makefile
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
SHELL := /bin/bash
|
|
2
|
+
|
|
3
|
+
.PHONY: install dev clean lint format test build mkdocs-build mkdocs-serve docs publish publish-test gh-deploy-docs
|
|
4
|
+
|
|
5
|
+
install:
|
|
6
|
+
uv tool install . --force
|
|
7
|
+
|
|
8
|
+
dev:
|
|
9
|
+
uv sync --dev
|
|
10
|
+
|
|
11
|
+
clean:
|
|
12
|
+
rm -rf dist/ build/ *.egg-info .pytest_cache .mypy_cache .ruff_cache
|
|
13
|
+
find . -type d -name __pycache__ -exec rm -rf {} +
|
|
14
|
+
|
|
15
|
+
lint:
|
|
16
|
+
uv run ruff check tab_cli/
|
|
17
|
+
|
|
18
|
+
format:
|
|
19
|
+
uv run ruff format tab_cli/
|
|
20
|
+
|
|
21
|
+
typecheck:
|
|
22
|
+
uv run ty check tab_cli/
|
|
23
|
+
|
|
24
|
+
test:
|
|
25
|
+
uv run pytest
|
|
26
|
+
|
|
27
|
+
build: clean
|
|
28
|
+
uv build
|
|
29
|
+
|
|
30
|
+
mkdocs-build:
|
|
31
|
+
sh ./docs/gen_assets.sh
|
|
32
|
+
mkdocs build --strict
|
|
33
|
+
|
|
34
|
+
mkdocs-serve:
|
|
35
|
+
mkdocs serve --dev-addr=127.0.0.1:8000
|
|
36
|
+
|
|
37
|
+
docs: mkdocs-build
|
|
38
|
+
|
|
39
|
+
publish: build
|
|
40
|
+
uv publish
|
|
41
|
+
|
|
42
|
+
publish-test: build
|
|
43
|
+
uv publish --publish-url https://test.pypi.org/legacy/
|
|
44
|
+
|
|
45
|
+
gh-deploy-docs: docs
|
|
46
|
+
set -ex ; \
|
|
47
|
+
WORK="$$( mktemp -d )" ; \
|
|
48
|
+
VER="$$( git describe --always --tags --dirty )" ; \
|
|
49
|
+
git worktree add --force "$$WORK" gh-pages ; \
|
|
50
|
+
rm -rf "$$WORK"/* ; \
|
|
51
|
+
rsync -av site/ "$$WORK"/ ; \
|
|
52
|
+
if [ -f CNAME ] ; then cp CNAME "$$WORK"/ ; fi ; \
|
|
53
|
+
pushd "$$WORK" ; \
|
|
54
|
+
git add -A ; \
|
|
55
|
+
git commit -m "Updated gh-pages $$VER" ; \
|
|
56
|
+
popd ; \
|
|
57
|
+
git worktree remove "$$WORK" ; \
|
|
58
|
+
git push origin gh-pages
|
tab_cli-0.1.2/PKG-INFO
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tab-cli
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: A CLI tool for tabular data
|
|
5
|
+
Author-email: Tongfei Chen <tongfei@pm.me>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Requires-Dist: blobfile>=3.0
|
|
9
|
+
Requires-Dist: fsspec>=2025.1.0
|
|
10
|
+
Requires-Dist: loguru>=0.7.3
|
|
11
|
+
Requires-Dist: polars-fastavro>=0.5.1
|
|
12
|
+
Requires-Dist: polars>=1.0
|
|
13
|
+
Requires-Dist: pyarrow>=15.0
|
|
14
|
+
Requires-Dist: rich>=13.0
|
|
15
|
+
Requires-Dist: typer>=0.21.1
|
|
16
|
+
Provides-Extra: azure
|
|
17
|
+
Requires-Dist: adlfs>=2025.1.0; extra == 'azure'
|
|
18
|
+
Requires-Dist: azure-identity>=1.10.0; extra == 'azure'
|
|
19
|
+
Provides-Extra: gcs
|
|
20
|
+
Requires-Dist: gcsfs>=2025.1.0; extra == 'gcs'
|
|
21
|
+
Requires-Dist: google-auth>=2.27.0; extra == 'gcs'
|
|
22
|
+
Provides-Extra: s3
|
|
23
|
+
Requires-Dist: boto3>=1.28.0; extra == 's3'
|
|
24
|
+
Requires-Dist: s3fs>=2025.1.0; extra == 's3'
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# tab
|
|
28
|
+
|
|
29
|
+
A CLI tool for viewing, querying, and converting tabular data files. Supports AWS / Azure / Google Cloud Storage URLs.
|
tab_cli-0.1.2/README.md
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# CLI Reference
|
|
2
|
+
|
|
3
|
+
## `tab view`
|
|
4
|
+
|
|
5
|
+
View tabular data from a data file, or a directory of partitions of data files.
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
tab view $path [OPTIONS]
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Options:
|
|
12
|
+
|
|
13
|
+
| Option | Description |
|
|
14
|
+
|-------------------------|-----------------------------------------------------------------------------------------------------------|
|
|
15
|
+
| `-i` / `--input-format` | Input format (`parquet`, `csv`, `tsv`, `jsonl`, `avro`). Auto-detected from extension if omitted. |
|
|
16
|
+
| `-o` / `--output-format` | Output format (`parquet`, `csv`, `tsv`, `jsonl`, `avro`). If not specified, print Rich table in terminal. |
|
|
17
|
+
| `--limit` | Maximum number of rows to display. |
|
|
18
|
+
| `--skip` | Number of rows to skip from the beginning. |
|
|
19
|
+
|
|
20
|
+
## `tab schema`
|
|
21
|
+
|
|
22
|
+
Display the schema of a tabular data file.
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
tab schema $path [OPTIONS]
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Options:
|
|
29
|
+
|
|
30
|
+
| Option | Description |
|
|
31
|
+
|-------------------------|-----------------------------------------------------------------------------------------------------------|
|
|
32
|
+
| `-i` / `--input-format` | Input format (`parquet`, `csv`, `tsv`, `jsonl`, `avro`). Auto-detected from extension if omitted. |
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
## `tab summary`
|
|
36
|
+
|
|
37
|
+
Display summary information about a tabular data file.
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
tab summary $path [OPTIONS]
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Options:
|
|
44
|
+
|
|
45
|
+
| Option | Description |
|
|
46
|
+
|-------------------------|-----------------------------------------------------------------------------------------------------------|
|
|
47
|
+
| `-i` / `--input-format` | Input format (`parquet`, `csv`, `tsv`, `jsonl`, `avro`). Auto-detected from extension if omitted. |
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
## `tab sql`
|
|
51
|
+
|
|
52
|
+
Run a SQL query on tabular data. The table is available as `t`.
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
tab sql $query $path [OPTIONS]
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Options:
|
|
59
|
+
|
|
60
|
+
| Option | Description |
|
|
61
|
+
|-------------------------|-----------------------------------------------------------------------------------------------------------|
|
|
62
|
+
| `-i` / `--input-format` | Input format (`parquet`, `csv`, `tsv`, `jsonl`, `avro`). Auto-detected from extension if omitted. |
|
|
63
|
+
| `-o` / `--output-format` | Output format (`parquet`, `csv`, `tsv`, `jsonl`, `avro`). If not specified, print Rich table in terminal. |
|
|
64
|
+
| `--limit` | Maximum number of rows to display. |
|
|
65
|
+
| `--skip` | Number of rows to skip from the beginning. |
|
|
66
|
+
|
|
67
|
+
## `tab convert`
|
|
68
|
+
|
|
69
|
+
Convert tabular data from one format to another.
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
tab convert $src $dst [OPTIONS]
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Options:
|
|
76
|
+
|
|
77
|
+
| Option | Description |
|
|
78
|
+
|-------------------------|---------------------------------------------------------------------------------------------------------|
|
|
79
|
+
| `-i` / `--input-format` | Input format (`parquet`, `csv`, `tsv`, `jsonl`, `avro`). Auto-detected from extension if omitted. |
|
|
80
|
+
| `-o` / `--output-format` | Output format (`parquet`, `csv`, `tsv`, `jsonl`, `avro`). If not specified, inherits from input format. |
|
|
81
|
+
| `-n` / `--num-partitions` | Number of output partitions. Creates a directory with partition files. |
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
## `tab cat`
|
|
85
|
+
|
|
86
|
+
Concatenate tabular data from multiple files.
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
tab cat $paths [OPTIONS]
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Options:
|
|
93
|
+
|
|
94
|
+
| Option | Description |
|
|
95
|
+
|-------------------------|-----------------------------------------------------------------------------------------------------------|
|
|
96
|
+
| `-i` / `--input-format` | Input format (`parquet`, `csv`, `tsv`, `jsonl`, `avro`). Auto-detected from extension if omitted. |
|
|
97
|
+
| `-o` / `--output-format` | Output format (`parquet`, `csv`, `tsv`, `jsonl`, `avro`). If not specified, print Rich table in terminal. |
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
## Global options
|
|
101
|
+
|
|
102
|
+
| Option | Description |
|
|
103
|
+
|-------------------------|------------------------------------------------------------------------------------------------------------------------------|
|
|
104
|
+
| `--az-url-authority-is-account` | Interpret az:// URL authority as storage account name instead of container name. See [azure.md](Azure) for more information. |
|
|
105
|
+
| `--log-level` | Log level from `{DEBUG, INFO, WARNING, ERROR, CRITICAL}`. |
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# Cloud support
|
|
2
|
+
|
|
3
|
+
`tab` supports directly reading tabular data files or directories from cloud storage:
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
tab view s3://$bucket/$path
|
|
7
|
+
tab view gs://$bucket/$path
|
|
8
|
+
tab view az://$container/$path
|
|
9
|
+
tab view abfss://$container@$account.dfs.core.windows.net/$path
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
## AWS S3
|
|
14
|
+
|
|
15
|
+
Authentication methods (in order):
|
|
16
|
+
|
|
17
|
+
1. **Environment variables**: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN`
|
|
18
|
+
2. **Profile**: `AWS_PROFILE` or default — handles `~/.aws/credentials`, SSO, assume role, instance metadata
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
# Option 1: Set credentials directly
|
|
22
|
+
export AWS_ACCESS_KEY_ID=...
|
|
23
|
+
export AWS_SECRET_ACCESS_KEY=...
|
|
24
|
+
|
|
25
|
+
# Option 2: Use a profile
|
|
26
|
+
aws configure # static keys
|
|
27
|
+
aws sso login # SSO authentication
|
|
28
|
+
export AWS_PROFILE=my-profile
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Azure Blob Storage
|
|
32
|
+
|
|
33
|
+
Authentication methods (in order):
|
|
34
|
+
|
|
35
|
+
1. **Connection string**: `AZURE_STORAGE_CONNECTION_STRING`
|
|
36
|
+
2. **Account key**: `AZURE_STORAGE_KEY`
|
|
37
|
+
3. **SAS token**: `AZURE_STORAGE_SAS_TOKEN`
|
|
38
|
+
4. **Azure AD / RBAC**: `DefaultAzureCredential`
|
|
39
|
+
5. **Azure CLI**: Key fetched via `az storage account keys list`
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Option 1: Connection string
|
|
43
|
+
export AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=https;AccountName=...;AccountKey=..."
|
|
44
|
+
|
|
45
|
+
# Option 2: Account key
|
|
46
|
+
export AZURE_STORAGE_ACCOUNT=myaccount
|
|
47
|
+
export AZURE_STORAGE_KEY=...
|
|
48
|
+
|
|
49
|
+
# Option 3: SAS token
|
|
50
|
+
export AZURE_STORAGE_ACCOUNT=myaccount
|
|
51
|
+
export AZURE_STORAGE_SAS_TOKEN="?sv=2022-11-02&ss=..."
|
|
52
|
+
|
|
53
|
+
# Option 4: Azure AD (requires RBAC role: Storage Blob Data Reader)
|
|
54
|
+
az login
|
|
55
|
+
|
|
56
|
+
# Option 5: CLI fallback (requires ARM access)
|
|
57
|
+
az login
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
#### Interpretation of `az://` URLs
|
|
61
|
+
The interpretation of the `az://` URL authority (the part between `az://` and the first `/`) can be configured with the `--az-url-authority-is-account` flag.
|
|
62
|
+
|
|
63
|
+
Two interpretations are supported:
|
|
64
|
+
|
|
65
|
+
- `az://$container/$path` - the authority is the container name (default adlfs behavior)
|
|
66
|
+
- `az://$account/$container/$path` - the authority is the storage account name
|
|
67
|
+
|
|
68
|
+
The first form is consistent with `s3://` and `gs://` URLs, but requires the `AZURE_STORAGE_ACCOUNT` environment variable to be set.
|
|
69
|
+
The second form requires the `--az-url-authority-is-account` flag.
|
|
70
|
+
|
|
71
|
+
## Google Cloud Storage
|
|
72
|
+
|
|
73
|
+
Authentication methods (in order):
|
|
74
|
+
|
|
75
|
+
1. **`GOOGLE_APPLICATION_CREDENTIALS`**: Path to service account JSON
|
|
76
|
+
2. **ADC file**: `~/.config/gcloud/application_default_credentials.json`
|
|
77
|
+
3. **gcloud CLI**: Token from `gcloud auth print-access-token`
|
|
78
|
+
4. **`google.auth.default()`**: Default credential resolution
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# Option 1: Service account
|
|
82
|
+
export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json
|
|
83
|
+
|
|
84
|
+
# Option 2: User credentials (ADC)
|
|
85
|
+
gcloud auth application-default login
|
|
86
|
+
|
|
87
|
+
# Option 3: CLI login (fallback)
|
|
88
|
+
gcloud auth login
|
|
89
|
+
```
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
# tab
|
|
2
2
|
|
|
3
|
-
A
|
|
3
|
+
A CLI tool for viewing, querying, and converting tabular data files. Supports AWS / Azure / Google Cloud Storage URLs.
|
|
4
4
|
|
|
5
5
|
## Supported Formats
|
|
6
|
-
-
|
|
6
|
+
- Jsonl
|
|
7
7
|
- CSV
|
|
8
8
|
- TSV
|
|
9
|
-
-
|
|
9
|
+
- Parquet
|
|
10
|
+
- Avro
|
|
10
11
|
|
|
11
12
|
## Usage
|
|
12
13
|
|
|
@@ -15,10 +16,9 @@ A fast CLI tool for viewing, querying, and converting tabular data files.
|
|
|
15
16
|
Display rows from a tabular data file:
|
|
16
17
|
|
|
17
18
|
```bash
|
|
18
|
-
tab view data.
|
|
19
|
-
tab view data.csv --limit 20
|
|
20
|
-
tab view data.tsv --skip 100 --limit 50
|
|
19
|
+
tab view data.csv
|
|
21
20
|
```
|
|
21
|
+

|
|
22
22
|
|
|
23
23
|
Output to different formats:
|
|
24
24
|
|
|
@@ -27,17 +27,13 @@ tab view data.parquet -o jsonl
|
|
|
27
27
|
tab view data.parquet -o csv
|
|
28
28
|
```
|
|
29
29
|
|
|
30
|
-
###
|
|
31
|
-
|
|
32
|
-
Display the schema (column names and types):
|
|
30
|
+
### Show schema
|
|
33
31
|
|
|
34
32
|
```bash
|
|
35
33
|
tab schema data.parquet
|
|
36
34
|
```
|
|
37
35
|
|
|
38
|
-
###
|
|
39
|
-
|
|
40
|
-
Display summary information about a file:
|
|
36
|
+
### Show summary
|
|
41
37
|
|
|
42
38
|
```bash
|
|
43
39
|
tab summary data.parquet
|
|
@@ -48,9 +44,9 @@ tab summary data.parquet
|
|
|
48
44
|
Run SQL queries on your data. The table is referenced as `t`:
|
|
49
45
|
|
|
50
46
|
```bash
|
|
51
|
-
tab sql
|
|
52
|
-
tab sql "SELECT name, COUNT(*) FROM t GROUP BY name" data.csv
|
|
47
|
+
tab sql 'SELECT * FROM t WHERE Metric_A_Value > 80' test.csv
|
|
53
48
|
```
|
|
49
|
+

|
|
54
50
|
|
|
55
51
|
### Convert
|
|
56
52
|
|
|
@@ -67,6 +63,12 @@ Write partitioned output:
|
|
|
67
63
|
tab convert data.csv output_dir/ -o parquet -n 4
|
|
68
64
|
```
|
|
69
65
|
|
|
66
|
+
### Concatenate multiple files
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
tab cat data1.csv data2.csv data3.csv -o jsonl > output.jsonl
|
|
70
|
+
```
|
|
71
|
+
|
|
70
72
|
## Options
|
|
71
73
|
|
|
72
74
|
### Common options
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Participant_ID,Timestamp,Treatment_Group,Metric_A_Value,Metric_B_Value,Status
|
|
2
|
+
P001,2026-01-28T08:00:00Z,Control,72.4,1.05,Baseline
|
|
3
|
+
P001,2026-01-28T12:00:00Z,Control,74.1,1.08,Active
|
|
4
|
+
P002,2026-01-28T08:05:00Z,Experimental,68.9,0.92,Baseline
|
|
5
|
+
P002,2026-01-28T12:05:00Z,Experimental,81.3,1.45,Active
|
|
6
|
+
P003,2026-01-28T08:10:00Z,Placebo,70.2,1.01,Baseline
|
|
7
|
+
P003,2026-01-28T12:10:00Z,Placebo,71.5,1.03,Active
|
|
8
|
+
P004,2026-01-28T08:15:00Z,Experimental,65.8,0.88,Baseline
|
|
9
|
+
P004,2026-01-28T12:15:00Z,Experimental,88.2,1.82,Active
|
tab_cli-0.1.2/mkdocs.yml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
site_name: 'tab'
|
|
2
|
+
site_description: 'A CLI tool for tabular data'
|
|
3
|
+
site_author: 'Tongfei Chen'
|
|
4
|
+
copyright: 'Copyright © 2026 Tongfei Chen'
|
|
5
|
+
|
|
6
|
+
theme:
|
|
7
|
+
name: 'material'
|
|
8
|
+
palette:
|
|
9
|
+
primary: 'light blue'
|
|
10
|
+
font:
|
|
11
|
+
text: 'Inter'
|
|
12
|
+
code: 'Roboto Mono'
|
|
13
|
+
icon:
|
|
14
|
+
logo: material/table-eye
|
|
15
|
+
|
|
16
|
+
markdown_extensions:
|
|
17
|
+
- toc:
|
|
18
|
+
permalink: true
|
|
19
|
+
- pymdownx.superfences:
|
|
20
|
+
custom_fences:
|
|
21
|
+
- name: mermaid
|
|
22
|
+
class: mermaid
|
|
23
|
+
format: !!python/name:pymdownx.superfences.fence_code_format
|
|
24
|
+
- pymdownx.highlight
|
|
25
|
+
- pymdownx.tabbed:
|
|
26
|
+
alternate_style: true
|
|
27
|
+
- admonition
|
|
28
|
+
|
|
29
|
+
repo_name: 'ctongfei/tab'
|
|
30
|
+
repo_url: 'https://github.com/ctongfei/tab'
|
|
31
|
+
|
|
32
|
+
nav:
|
|
33
|
+
- 'Introduction': 'index.md'
|
|
34
|
+
- 'Cloud support': 'cloud.md'
|
|
35
|
+
- 'CLI reference': 'cli-ref.md'
|
|
36
|
+
|
|
37
|
+
watch:
|
|
38
|
+
- tab_cli
|
|
@@ -1,28 +1,37 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "tab-cli"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.2"
|
|
4
4
|
description = "A CLI tool for tabular data"
|
|
5
5
|
authors = [{name = "Tongfei Chen", email = "tongfei@pm.me"}]
|
|
6
6
|
readme = "README.md"
|
|
7
7
|
repository = "https://github.com/tongfei/tab"
|
|
8
8
|
requires-python = ">=3.10"
|
|
9
9
|
dependencies = [
|
|
10
|
-
"
|
|
10
|
+
"typer>=0.21.1",
|
|
11
11
|
"rich>=13.0",
|
|
12
12
|
"polars>=1.0",
|
|
13
13
|
"pyarrow>=15.0",
|
|
14
14
|
"blobfile>=3.0",
|
|
15
|
+
"polars-fastavro>=0.5.1",
|
|
16
|
+
"fsspec>=2025.1.0",
|
|
17
|
+
"loguru>=0.7.3",
|
|
15
18
|
]
|
|
16
19
|
|
|
17
|
-
[project.
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
[
|
|
21
|
-
requires = ["hatchling"]
|
|
22
|
-
build-backend = "hatchling.build"
|
|
20
|
+
[project.optional-dependencies]
|
|
21
|
+
s3 = ["s3fs>=2025.1.0", "boto3>=1.28.0"]
|
|
22
|
+
gcs = ["gcsfs>=2025.1.0", "google-auth>=2.27.0"]
|
|
23
|
+
azure = ["adlfs>=2025.1.0", "azure-identity>=1.10.0"]
|
|
23
24
|
|
|
24
25
|
[dependency-groups]
|
|
25
26
|
dev = [
|
|
26
27
|
"ruff>=0.14.14",
|
|
27
28
|
"ty>=0.0.14",
|
|
29
|
+
"mkdocs-material>=9.0.0"
|
|
28
30
|
]
|
|
31
|
+
|
|
32
|
+
[project.scripts]
|
|
33
|
+
tab = "tab_cli.cli:main"
|
|
34
|
+
|
|
35
|
+
[build-system]
|
|
36
|
+
requires = ["hatchling"]
|
|
37
|
+
build-backend = "hatchling.build"
|