PyPI - tab-cli - Versions diffs - 0.1.2__tar.gz → 0.1.3__tar.gz - Mend

tab-cli 0.1.2tar.gz → 0.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

tab_cli-0.1.3/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,8 @@
+ - 0.1.3:
+   - Separate `tab view` from `tab cat`: `tab view` does not convert formats, `tab cat` does.
+   - Added `--max-cell-len` option to `tab view` to truncate long cell contents.
+ - 0.1.2:
+   - Bugfix on reading directories.
+ - 0.1.1:
+   - Better credential handling for Azure Blob Storage and Google Cloud Storage.
+ - 0.1.0: Initial release

{tab_cli-0.1.2 → tab_cli-0.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tab-cli
-Version: 0.1.2
+Version: 0.1.3
 Summary: A CLI tool for tabular data
 Author-email: Tongfei Chen <tongfei@pm.me>
 License-File: LICENSE
@@ -26,4 +26,11 @@ Description-Content-Type: text/markdown
 # tab
+![pypi](https://img.shields.io/pypi/v/tab-cli)
+```sh
+pip install tab-cli
+```
 A CLI tool for viewing, querying, and converting tabular data files. Supports AWS / Azure / Google Cloud Storage URLs.
+ - Documentation: [docs](https://tongfei.me/tab)

tab_cli-0.1.3/README.md ADDED Viewed

@@ -0,0 +1,10 @@
+# tab
+![pypi](https://img.shields.io/pypi/v/tab-cli)
+```sh
+pip install tab-cli
+```
+A CLI tool for viewing, querying, and converting tabular data files. Supports AWS / Azure / Google Cloud Storage URLs.
+ - Documentation: [docs](https://tongfei.me/tab)

{tab_cli-0.1.2 → tab_cli-0.1.3}/docs/cli-ref.md RENAMED Viewed

@@ -2,7 +2,7 @@
 ## `tab view`
-View tabular data from a data file, or a directory of partitions of data files.
+View tabular data from a data file in a rich CLI format, or a directory of partitions of data files.
 ```bash
 tab view $path [OPTIONS]
@@ -13,9 +13,9 @@ Options:
 | Option                  | Description                                                                                               |
 |-------------------------|-----------------------------------------------------------------------------------------------------------|
 | `-i` / `--input-format` | Input format (`parquet`, `csv`, `tsv`, `jsonl`, `avro`). Auto-detected from extension if omitted.         |
-| `-o` / `--output-format` | Output format (`parquet`, `csv`, `tsv`, `jsonl`, `avro`). If not specified, print Rich table in terminal. |
 | `--limit`               | Maximum number of rows to display.                                                                        |
 | `--skip`                | Number of rows to skip from the beginning.                                                                |
+| `--max-cell-len`        | Truncate cell contents longer than this.                                                                 |
 ## `tab schema`

{tab_cli-0.1.2/site → tab_cli-0.1.3/docs}/gen_assets.sh RENAMED Viewed

@@ -1,3 +1,3 @@
 mkdir -p docs/assets
-uv run tab view docs/test.csv -o table-svg 2> docs/assets/test.svg
+uv run tab view tests/assets/test.csv -o table-svg 2> docs/assets/test.svg
 uv run tab sql 'SELECT * FROM t WHERE Metric_A_Value > 80' docs/test.csv -o table-svg 2> docs/assets/test-where.svg

{tab_cli-0.1.2 → tab_cli-0.1.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "tab-cli"
-version = "0.1.2"
+version = "0.1.3"
 description = "A CLI tool for tabular data"
 authors = [{name = "Tongfei Chen", email = "tongfei@pm.me"}]
 readme = "README.md"
@@ -26,12 +26,16 @@ azure = ["adlfs>=2025.1.0", "azure-identity>=1.10.0"]
 dev = [
     "ruff>=0.14.14",
     "ty>=0.0.14",
-    "mkdocs-material>=9.0.0"
+    "mkdocs-material>=9.0.0",
+    "pytest>=8.0",
 ]
 [project.scripts]
 tab = "tab_cli.cli:main"
+[tool.hatch.build.targets.wheel]
+packages = ["src/tab_cli"]
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"

{tab_cli-0.1.2 → tab_cli-0.1.3/src}/tab_cli/cli.py RENAMED Viewed

@@ -11,6 +11,7 @@ from rich.logging import RichHandler
 from tab_cli import config
 from tab_cli.handlers import TableWriter, infer_reader, infer_writer
+from tab_cli.handlers.cli_table import CliTableFormatter
 app = typer.Typer(
     help="A CLI tool for viewing and manipulating tabular data.",
@@ -46,32 +47,28 @@ def main_callback(
     )
-def _output(
+def _apply_limit(
     lf: pl.LazyFrame,
     limit: int | None,
     skip: int,
-    output: str | None,
-) -> None:
-    show_truncation = limit is None and output is None
-    actual_limit = 20 if show_truncation else limit
-    if show_truncation:
-        assert actual_limit is not None
-        lf = lf.slice(skip, length=actual_limit + 1)
+    default_limit: int | None = None,
+) -> tuple[pl.LazyFrame, bool]:
+    """Apply skip/limit to a LazyFrame, optionally detecting truncation.
+    If limit is None and default_limit is set, caps at default_limit rows
+    and returns whether the data was truncated.
+    """
+    if limit is None and default_limit is not None:
+        lf = lf.slice(skip, length=default_limit + 1)
         df = lf.collect()
-        truncated = len(df) > actual_limit
+        truncated = len(df) > default_limit
         if truncated:
-            df = df.head(actual_limit)
-        lf = df.lazy()
+            df = df.head(default_limit)
+        return df.lazy(), truncated
     else:
-        if skip > 0 or actual_limit is not None:
-            lf = lf.slice(skip, length=actual_limit)
-        truncated = False
-    writer = infer_writer(output, truncated=show_truncation and truncated)
-    for chunk in writer.write(lf):
-        sys.stdout.buffer.write(chunk)
+        if skip > 0 or limit is not None:
+            lf = lf.slice(skip, length=limit)
+        return lf, False
 @app.command()
@@ -79,13 +76,16 @@ def view(
     path: Annotated[str, typer.Argument(help="Path to the data file or directory")],
     limit: Annotated[Optional[int], typer.Option("--limit", help="Maximum number of rows to display")] = None,
     skip: Annotated[int, typer.Option("--skip", help="Number of rows to skip")] = 0,
-    input: Annotated[Optional[str], typer.Option("-i", "--input-format", help="Input format")] = None,
-    output: Annotated[Optional[str], typer.Option("-o", "--output-format", help="Output format")] = None,
+    input: Annotated[Optional[str], typer.Option("-i", "--input-format", help="Input format, auto-detected from extension if omitted")] = None,
+    max_cell_len: Annotated[Optional[int], typer.Option("--max-cell-len", help="Truncate cell contents longer than this")] = None,
 ) -> None:
-    """View tabular data from a file."""
+    """View tabular data as a formatted table."""
     reader = infer_reader(path, format=input)
     lf = reader.read(path)
-    _output(lf, limit=limit, skip=skip, output=output)
+    lf, truncated = _apply_limit(lf, limit=limit, skip=skip, default_limit=20 if limit is None else None)
+    writer = CliTableFormatter(truncated=truncated, max_cell_len=max_cell_len)
+    for chunk in writer.write(lf):
+        sys.stdout.buffer.write(chunk)
 @app.command()
 def schema(
@@ -113,7 +113,11 @@ def sql(
     lf = reader.read(path)
     ctx = pl.SQLContext(t=lf, eager=False)
     result_lf = ctx.execute(query)
-    _output(result_lf, limit=limit, skip=skip, output=output)
+    show_truncation = limit is None and output is None
+    result_lf, truncated = _apply_limit(result_lf, limit=limit, skip=skip, default_limit=20 if show_truncation else None)
+    writer = infer_writer(output, truncated=truncated)
+    for chunk in writer.write(result_lf):
+        sys.stdout.buffer.write(chunk)
 @app.command()
@@ -156,11 +160,17 @@ def cat(
     input: Annotated[Optional[str], typer.Option("-i", "--input-format", help="Input format")] = None,
     output: Annotated[Optional[str], typer.Option("-o", "--output-format", help="Output format")] = None,
 ) -> None:
-    """Concatenate tabular data from multiple files."""
+    """Concatenate tabular data from multiple files, or just print a single file."""
     reader = infer_reader(paths[0], format=input)
     files = [reader.read(path) for path in paths]
     lf = pl.concat(files, how="vertical")
-    _output(lf, limit=None, skip=0, output=output)
+    if output is not None:
+        writer = infer_writer(format=output)
+    else:
+        writer = infer_writer(format=reader.format.extension())
+        assert isinstance(writer, TableWriter)
+    for chunk in writer.write(lf):
+        sys.stdout.buffer.write(chunk)
 def main() -> None:

{tab_cli-0.1.2 → tab_cli-0.1.3/src}/tab_cli/formats/avro.py RENAMED Viewed

@@ -14,7 +14,7 @@ class AvroFormat(FormatHandler):
     """Handler for Avro files."""
     def extension(self) -> str:
-        return ".avro"
+        return "avro"
     def supports_glob(self) -> bool:
         # polars_fastavro doesn't support glob patterns

{tab_cli-0.1.2 → tab_cli-0.1.3/src}/tab_cli/formats/csv.py RENAMED Viewed

@@ -16,7 +16,7 @@ class CsvFormat(FormatHandler):
         self.separator = separator
     def extension(self) -> str:
-        return ".csv" if self.separator == "," else ".tsv"
+        return "csv" if self.separator == "," else "tsv"
     def supports_glob(self) -> bool:
         return True

{tab_cli-0.1.2 → tab_cli-0.1.3/src}/tab_cli/formats/jsonl.py RENAMED Viewed

@@ -13,7 +13,7 @@ class JsonlFormat(FormatHandler):
     """Handler for JSONL files."""
     def extension(self) -> str:
-        return ".jsonl"
+        return "jsonl"
     def supports_glob(self) -> bool:
         return True

{tab_cli-0.1.2 → tab_cli-0.1.3/src}/tab_cli/formats/parquet.py RENAMED Viewed

@@ -13,7 +13,7 @@ class ParquetFormat(FormatHandler):
     """Handler for Parquet files."""
     def extension(self) -> str:
-        return ".parquet"
+        return "parquet"
     def supports_glob(self) -> bool:
         return True

{tab_cli-0.1.2 → tab_cli-0.1.3/src}/tab_cli/handlers/__init__.py RENAMED Viewed

@@ -74,20 +74,21 @@ def infer_reader(path: str, format: str | None = None) -> TableReader:
     return TableReader(backend, fmt)
-def infer_writer(format: str | None = None, truncated: bool = False) -> TableWriter:
+def infer_writer(format: str | None = None, truncated: bool = False, max_cell_len: int | None = None) -> TableWriter:
     """Infer the writer for a format.
     Args:
         format: Output format. If None, returns CLI table formatter.
         truncated: Whether the output is truncated (for CLI display).
+        max_cell_len: Maximum cell content length for CLI table display.
     Returns:
         TableWriter for the format.
     """
     if format is None:
-        return CliTableFormatter(truncated=truncated)
+        return CliTableFormatter(truncated=truncated, max_cell_len=max_cell_len)
     if format == "table-svg":
-        return CliTableFormatter(truncated=truncated, svg_capture=True)
+        return CliTableFormatter(truncated=truncated, svg_capture=True, max_cell_len=max_cell_len)
     fmt = _FORMAT_MAP.get(format.lower())
     if fmt is None:

{tab_cli-0.1.2 → tab_cli-0.1.3/src}/tab_cli/handlers/base.py RENAMED Viewed

@@ -119,7 +119,7 @@ class TableReader:
     def schema(self, url: str) -> TableSchema:
         if self.backend.is_directory(url):
             # Get schema from first file
-            files = list(self.backend.list_files(url, self.format.extension()))
+            files = list(self.backend.list_files(url, "." + self.format.extension()))
             if not files:
                 raise ValueError(f"No {self.format.extension()} files found in {url}")
             url = files[0].url
@@ -151,7 +151,7 @@ class TableReader:
     def _summary_directory(self, url: str) -> TableSummary:
         """Aggregate summary from all files in directory."""
-        files = list(self.backend.list_files(url, self.format.extension()))
+        files = list(self.backend.list_files(url, "." + self.format.extension()))
         if not files:
             raise ValueError(f"No {self.format.extension()} files found in {url}")

{tab_cli-0.1.2 → tab_cli-0.1.3/src}/tab_cli/handlers/cli_table.py RENAMED Viewed

@@ -11,9 +11,15 @@ from tab_cli.style import _ALT_ROW_STYLE_0, _ALT_ROW_STYLE_1, _KEY_STYLE
 class CliTableFormatter(TableWriter):
-    def __init__(self, truncated: bool = False, svg_capture: bool = False):
+    def __init__(self, truncated: bool = False, svg_capture: bool = False, max_cell_len: int | None = None):
         self.truncated = truncated
         self.svg_capture = svg_capture
+        self.max_cell_len = max_cell_len
+    def _truncate(self, value: str) -> str:
+        if self.max_cell_len is not None and len(value) > self.max_cell_len:
+            return value[:self.max_cell_len] + "..."
+        return value
     def extension(self) -> str:
         return ".txt"
@@ -32,7 +38,7 @@ class CliTableFormatter(TableWriter):
         for batch in lf.collect_batches():
             for row in batch.iter_rows():
-                table.add_row(*[str(v) if v is not None else "" for v in row])
+                table.add_row(*[self._truncate(str(v)) if v is not None else "" for v in row])
         if self.truncated:
             table.add_row(*["..." for _ in lf.collect_schema().names()])

tab_cli-0.1.3/tests/__init__.py ADDED Viewed

File without changes

tab_cli-0.1.3/tests/test_cli.py ADDED Viewed

@@ -0,0 +1,108 @@
+"""Tests for the tab CLI commands."""
+import os
+from typer.testing import CliRunner
+from tab_cli.cli import app
+runner = CliRunner()
+TEST_CSV = os.path.join(os.path.dirname(__file__), "assets", "test.csv")
+class TestView:
+    def test_basic(self):
+        result = runner.invoke(app, ["view", TEST_CSV])
+        assert result.exit_code == 0
+        assert "P001" in result.output
+        assert "Control" in result.output
+    def test_limit(self):
+        result = runner.invoke(app, ["view", TEST_CSV, "--limit", "2"])
+        assert result.exit_code == 0
+        assert "P001" in result.output
+        # Row 3 (P002 second row) should not appear
+        assert "P003" not in result.output
+        # No truncation indicator when explicit limit
+        assert "..." not in result.output
+    def test_skip(self):
+        result = runner.invoke(app, ["view", TEST_CSV, "--skip", "6", "--limit", "10"])
+        assert result.exit_code == 0
+        # First 6 rows skipped; only P004 rows remain
+        assert "P001" not in result.output
+        assert "P004" in result.output
+    def test_max_cell_len(self):
+        result = runner.invoke(app, ["view", TEST_CSV, "--max-cell-len", "5"])
+        assert result.exit_code == 0
+        # "Control" (7 chars) should be truncated to "Contr..."
+        assert "Contr..." in result.output
+        # "P001" (4 chars) fits within 5, should appear as-is
+        assert "P001" in result.output
+    def test_no_output_flag(self):
+        result = runner.invoke(app, ["view", TEST_CSV, "-o", "csv"])
+        assert result.exit_code != 0
+    def test_truncation_indicator(self):
+        """With no --limit and more than 20 rows, truncation '...' should appear.
+        Our test.csv only has 8 rows, so no truncation."""
+        result = runner.invoke(app, ["view", TEST_CSV])
+        assert result.exit_code == 0
+        # 8 rows < 20 default limit, so no truncation
+        lines_with_ellipsis = [l for l in result.output.splitlines() if l.strip() == "...   ...   ...   ...   ...   ..."]
+        assert len(lines_with_ellipsis) == 0
+class TestCat:
+    def test_basic_outputs_csv(self):
+        result = runner.invoke(app, ["cat", TEST_CSV])
+        assert result.exit_code == 0
+        # Should output in CSV format (the input format), not a Rich table
+        assert "Participant_ID," in result.output or "Participant_ID\t" in result.output or "P001" in result.output
+    def test_output_format_csv(self):
+        result = runner.invoke(app, ["cat", TEST_CSV, "-o", "csv"])
+        assert result.exit_code == 0
+        lines = result.output.strip().splitlines()
+        # CSV header
+        assert "Participant_ID" in lines[0]
+        # Should have header + 8 data rows
+        assert len(lines) == 9
+    def test_output_format_tsv(self):
+        result = runner.invoke(app, ["cat", TEST_CSV, "-o", "tsv"])
+        assert result.exit_code == 0
+        lines = result.output.strip().splitlines()
+        assert "\t" in lines[0]
+    def test_no_rich_table(self):
+        """cat without -o should NOT produce a Rich formatted table."""
+        result = runner.invoke(app, ["cat", TEST_CSV])
+        assert result.exit_code == 0
+        # Rich tables use box-drawing chars; CSV output won't
+        assert "─" not in result.output
+class TestSql:
+    def test_basic_table_output(self):
+        result = runner.invoke(app, ["sql", "SELECT * FROM t WHERE Status = 'Baseline'", TEST_CSV])
+        assert result.exit_code == 0
+        assert "Baseline" in result.output
+        # Should show as a table by default (no -o)
+        assert "Active" not in result.output
+    def test_with_output_format(self):
+        result = runner.invoke(app, ["sql", "SELECT Participant_ID, Status FROM t", TEST_CSV, "-o", "csv"])
+        assert result.exit_code == 0
+        lines = result.output.strip().splitlines()
+        assert "Participant_ID" in lines[0]
+        assert "Status" in lines[0]
+    def test_limit(self):
+        result = runner.invoke(app, ["sql", "SELECT * FROM t", TEST_CSV, "--limit", "2"])
+        assert result.exit_code == 0
+        # Should have limited rows
+        count = sum(1 for line in result.output.splitlines() if "P00" in line)
+        assert count <= 2

tab_cli-0.1.2/CHANGELOG.md DELETED Viewed

@@ -1,5 +0,0 @@
- - 0.1.2:
-   - Bugfix on reading directories.
- - 0.1.1:
-   - Better credential handling for Azure Blob Storage and Google Cloud Storage.
- - 0.1.0: Initial release