PyPI - unique-sdk - Versions diffs - 2026.26.0.dev9__tar.gz → 2026.26.0.dev10__tar.gz - Mend

unique-sdk 2026.26.0.dev9tar.gz → 2026.26.0.dev10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

{unique_sdk-2026.26.0.dev9 → unique_sdk-2026.26.0.dev10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: unique-sdk
-Version: 2026.26.0.dev9
+Version: 2026.26.0.dev10
 Summary:
 Author: Martin Fadler, Konstantin Krauss, Andreas Hauri
 Author-email: Martin Fadler <martin.fadler@unique.ch>, Konstantin Krauss <konstantin@unique.ch>, Andreas Hauri <andreas@unique.ch>

{unique_sdk-2026.26.0.dev9 → unique_sdk-2026.26.0.dev10}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "unique_sdk"
-version = "2026.26.0.dev9"
+version = "2026.26.0.dev10"
 description = ""
 readme = "README.md"
 license = { text = "MIT" }

{unique_sdk-2026.26.0.dev9 → unique_sdk-2026.26.0.dev10}/unique_sdk/cli/cli.py RENAMED Viewed

@@ -398,9 +398,41 @@ def cite(
 @main.command(name="read")
 @click.argument("cont_id")
+@click.option(
+    "--page",
+    "-p",
+    type=int,
+    default=None,
+    help="Read a single page (shorthand for --from-page N --to-page N).",
+)
+@click.option(
+    "--from-page",
+    type=int,
+    default=None,
+    help="First page to include (inclusive).",
+)
+@click.option(
+    "--to-page",
+    type=int,
+    default=None,
+    help="Last page to include (inclusive).",
+)
+@click.option(
+    "--max-chars",
+    type=int,
+    default=None,
+    help="Truncate the printed text to at most N characters.",
+)
 @click.pass_context
-def read_cmd(ctx: click.Context, cont_id: str) -> None:
-    """Read all indexed text chunks for a known content ID.
+def read_cmd(
+    ctx: click.Context,
+    cont_id: str,
+    page: int | None,
+    from_page: int | None,
+    to_page: int | None,
+    max_chars: int | None,
+) -> None:
+    """Read indexed text chunks for a known content ID.
     \b
     CONT_ID must be a content ID (cont_...) obtained from a prior `ls` or
@@ -411,11 +443,34 @@ def read_cmd(ctx: click.Context, cont_id: str) -> None:
     Use `search` when you need to find documents by topic or keyword.
     Use `read` when you already know the content ID and want the full text.
+    \b
+    Restrict to a page range with --page (single page) or --from-page/--to-page.
+    A chunk spanning pages 2-4 is returned for any overlapping request; files
+    without page numbers (e.g. plain text/markdown) are returned only without a
+    page range.
     \b
     Examples:
       unique-cli read cont_abc123
+      unique-cli read cont_abc123 --page 12
+      unique-cli read cont_abc123 --from-page 5 --to-page 9
+      unique-cli read cont_abc123 --to-page 3 --max-chars 8000
     """
-    output = cmd_read(LazyState.get(ctx), cont_id)
+    if page is not None and (from_page is not None or to_page is not None):
+        click.echo(
+            "read: use either --page or --from-page/--to-page, not both", err=True
+        )
+        raise SystemExit(1)
+    if page is not None:
+        from_page = page
+        to_page = page
+    output = cmd_read(
+        LazyState.get(ctx),
+        cont_id,
+        from_page=from_page,
+        to_page=to_page,
+        max_chars=max_chars,
+    )
     if _is_read_error_output(output):
         click.echo(output, err=True)
         raise SystemExit(1)

unique_sdk-2026.26.0.dev10/unique_sdk/cli/commands/read.py ADDED Viewed

@@ -0,0 +1,176 @@
+"""Read command: retrieve all indexed text chunks for a known content ID.
+Calls ``Content.search(where={"id": {"equals": cont_id}})`` — a direct
+Postgres lookup that returns every indexed chunk for the document in one
+request, no vector search involved.
+Use this when you already know the ``cont_*`` ID (e.g. from a prior ``ls``
+or ``unique-cli search`` result) and want to read the full document text.
+For discovery or query-based retrieval use ``unique-cli search`` instead.
+Pass ``from_page``/``to_page`` to read only part of a long document by page
+range; chunks are filtered client-side on the ``startPage``/``endPage`` the
+platform already returns, so no ingestion changes are required.
+"""
+from __future__ import annotations
+from typing import Any
+import unique_sdk
+from unique_sdk.cli.state import ShellState
+READ_ERROR_PREFIX = "read:"
+def _chunk_in_page_range(
+    chunk: dict[str, Any],
+    from_page: int | None,
+    to_page: int | None,
+) -> bool:
+    """Return True if *chunk* overlaps the requested ``[from_page, to_page]`` span.
+    A chunk covers ``startPage``..``endPage`` inclusive. With page-based chunking
+    these are equal (one chunk per page); otherwise a single chunk can span
+    several pages, so we keep any chunk that *overlaps* the requested range
+    rather than one fully contained in it. Chunks without page numbers are
+    excluded, since they cannot be placed on a page. ``from_page``/``to_page``
+    that are ``None`` act as open bounds.
+    """
+    start: int | None = chunk.get("startPage")
+    end: int | None = chunk.get("endPage")
+    if start is None:
+        start = end
+    if end is None:
+        end = start
+    if start is None or end is None:
+        return False
+    low = from_page if from_page is not None else start
+    high = to_page if to_page is not None else end
+    return start <= high and end >= low
+def _format_requested_range(from_page: int | None, to_page: int | None) -> str:
+    """Human-readable label for a requested page range (for messages)."""
+    if from_page is not None and to_page is not None:
+        return str(from_page) if from_page == to_page else f"{from_page}-{to_page}"
+    if from_page is not None:
+        return f"{from_page}+"
+    return f"up to {to_page}"
+def cmd_read(
+    state: ShellState,
+    cont_id: str,
+    from_page: int | None = None,
+    to_page: int | None = None,
+    max_chars: int | None = None,
+) -> str:
+    """Return indexed text chunks for *cont_id* as plain text.
+    Args:
+        state: Shell state carrying user/company credentials.
+        cont_id: A content ID (``cont_...``) to retrieve.
+        from_page: First page to include (inclusive). ``None`` = open start.
+        to_page: Last page to include (inclusive). ``None`` = open end.
+        max_chars: Truncate the returned text to at most this many characters.
+    Returns:
+        A formatted string of chunks, or an error message prefixed with
+        ``read:``.
+    When ``from_page``/``to_page`` are given, chunks are filtered to those that
+    overlap the requested pages. The page numbers come from ingestion; nothing
+    needs to change there. A chunk spanning pages 2-4 is returned for any range
+    touching 2-4, so the text may include a little from neighbouring pages.
+    """
+    if not cont_id.startswith("cont_"):
+        return f"{READ_ERROR_PREFIX} expected a content ID starting with 'cont_', got: {cont_id!r}"
+    if from_page is not None and to_page is not None and from_page > to_page:
+        return f"{READ_ERROR_PREFIX} invalid page range ({from_page} > {to_page})"
+    if max_chars is not None and max_chars < 1:
+        return f"{READ_ERROR_PREFIX} invalid --max-chars ({max_chars}); must be >= 1"
+    # Enforce the same .unique-search.json workspace boundary as search/ls/rm.
+    # Content.search has no scopeIds param, so we guard by owner scope before
+    # the point-lookup — matching rm/mv, not search's API-level scopeIds filter.
+    if not state.is_content_within_workspace(cont_id):
+        return f"{READ_ERROR_PREFIX} permission denied (outside workspace scope)"
+    try:
+        results = unique_sdk.Content.search(
+            user_id=state.config.user_id,
+            company_id=state.config.company_id,
+            where={"id": {"equals": cont_id}},
+        )
+    except unique_sdk.APIError as e:
+        return f"{READ_ERROR_PREFIX} {e}"
+    if not results:
+        return f"{READ_ERROR_PREFIX} no content found for ID: {cont_id}"
+    content = results[0]
+    title = getattr(content, "title", None) or getattr(content, "key", None) or cont_id
+    chunks = getattr(content, "chunks", None) or []
+    if not chunks:
+        return (
+            f"Content: {title} ({cont_id})\n"
+            "No indexed chunks found — the document may still be ingesting or ingestion failed."
+        )
+    sorted_chunks = sorted(chunks, key=lambda c: c.get("order") or 0)
+    if from_page is not None or to_page is not None:
+        sorted_chunks = [
+            c for c in sorted_chunks if _chunk_in_page_range(c, from_page, to_page)
+        ]
+        if not sorted_chunks:
+            page_range = _format_requested_range(from_page, to_page)
+            return (
+                f"Content: {title} ({cont_id})\n"
+                f"No indexed chunks found in page range {page_range}. The document "
+                "may not have page numbers (e.g. plain text/markdown) or spans a "
+                "different range — read without a page range to see all text."
+            )
+    lines: list[str] = [
+        f"Content: {title} ({cont_id}) — {len(sorted_chunks)} chunk(s)\n"
+    ]
+    for chunk in sorted_chunks:
+        text = (chunk.get("text") or "").strip()
+        if not text:
+            continue
+        start = chunk.get("startPage")
+        end = chunk.get("endPage")
+        if start is not None or end is not None:
+            page_start = start if start is not None else end
+            page_end = end if end is not None else start
+            if page_start is not None and page_end is not None:
+                page_ref = (
+                    f"[p.{page_start}]"
+                    if page_start == page_end
+                    else f"[p.{page_start}-{page_end}]"
+                )
+                lines.append(f"{page_ref} {text}")
+            else:
+                lines.append(text)
+        else:
+            lines.append(text)
+    output = "\n\n".join(lines)
+    if max_chars is not None and len(output) > max_chars:
+        if from_page is not None or to_page is not None:
+            hint = "narrow the page range or raise --max-chars to see more"
+        else:
+            hint = "use a page range (--page/--from-page/--to-page) or raise --max-chars to see more"
+        output = f"{output[:max_chars]}\n... [truncated at {max_chars} chars; {hint}]"
+    return output
+def is_error_output(output: str) -> bool:
+    """Return ``True`` when *output* is an error message from ``cmd_read``."""
+    return output.startswith(READ_ERROR_PREFIX)

{unique_sdk-2026.26.0.dev9 → unique_sdk-2026.26.0.dev10}/unique_sdk/cli/shell.py RENAMED Viewed

@@ -68,7 +68,11 @@ OVERVIEW_HELP = textwrap.dedent("""\
         --folder <path|id>        Restrict to a folder
         --metadata <key=value>    Filter by metadata (repeatable)
         --limit <N>               Max results (default: 200)
-      read <cont_id>            Read all indexed text chunks for a content ID
+      read <cont_id> [options]  Read indexed text chunks for a content ID
+        --page / -p <N>           Read a single page
+        --from-page <N>           First page (inclusive)
+        --to-page <N>             Last page (inclusive)
+        --max-chars <N>           Truncate output to N characters
     MCP:
       mcp [options] <json>      Call an MCP server tool
@@ -470,27 +474,95 @@ class UniqueShell(cmd.Cmd):
             return
         self._print(cmd_cite_file(self.state, positional[0], pages))
+    def _parse_int(self, raw: str, flag: str) -> tuple[int | None, bool]:
+        """Parse an int option value, returning (value, ok). Prints on failure."""
+        try:
+            return int(raw), True
+        except ValueError:
+            self._print(f"Invalid {flag}: {raw} (expected an integer)")
+            return None, False
     def do_read(self, arg: str) -> None:
-        """Read all indexed text chunks for a known content ID.
+        """Read indexed text chunks for a known content ID (optionally by page).
-        Usage: read <cont_id>
+        Usage: read <cont_id> [--page N | --from-page N --to-page M] [--max-chars N]
         Retrieves every indexed chunk for the document directly from the
-        database — no vector search, no query string needed.
+        database — no vector search, no query string needed. Use --page for a
+        single page or --from-page/--to-page for a range; a chunk spanning
+        pages 2-4 is returned for any overlapping request.
         Use `search` to find documents by topic; use `read` once you have
         the content ID and want the full text.
         Examples:
           /Reports> read cont_abc123
+          /Reports> read cont_abc123 --page 12
+          /Reports> read cont_abc123 --from-page 5 --to-page 9
         """
         from unique_sdk.cli.commands.read import cmd_read
         parts = shlex.split(arg)
+        usage = (
+            "Usage: read <cont_id> "
+            "[--page N | --from-page N --to-page M] [--max-chars N]"
+        )
         if not parts:
-            self._print("Usage: read <cont_id>")
+            self._print(usage)
             return
-        self._print(cmd_read(self.state, parts[0]))
+        cont_id: str | None = None
+        page: int | None = None
+        from_page: int | None = None
+        to_page: int | None = None
+        max_chars: int | None = None
+        int_flags = ("--page", "-p", "--from-page", "--to-page", "--max-chars")
+        i = 0
+        while i < len(parts):
+            tok = parts[i]
+            if tok in int_flags:
+                if i + 1 >= len(parts):
+                    self._print(f"Missing value for {tok}")
+                    return
+                value, ok = self._parse_int(parts[i + 1], tok)
+                if not ok:
+                    return
+                if tok in ("--page", "-p"):
+                    page = value
+                elif tok == "--from-page":
+                    from_page = value
+                elif tok == "--to-page":
+                    to_page = value
+                else:  # --max-chars
+                    max_chars = value
+                i += 2
+            elif cont_id is None:
+                cont_id = tok
+                i += 1
+            else:
+                self._print(f"Unknown argument: {tok}")
+                return
+        if cont_id is None:
+            self._print(usage)
+            return
+        if page is not None and (from_page is not None or to_page is not None):
+            self._print("read: use either --page or --from-page/--to-page, not both")
+            return
+        if page is not None:
+            from_page = page
+            to_page = page
+        self._print(
+            cmd_read(
+                self.state,
+                cont_id,
+                from_page=from_page,
+                to_page=to_page,
+                max_chars=max_chars,
+            )
+        )
     def do_rm(self, arg: str) -> None:
         """Delete a file.

{unique_sdk-2026.26.0.dev9 → unique_sdk-2026.26.0.dev10}/unique_sdk/cli/skills/unique-cli-file-management/SKILL.md RENAMED Viewed

@@ -4,6 +4,8 @@ description: >-
   Manage files and folders on the Unique AI Platform using the unique-cli
   command-line tool. Use when the user asks to upload, download, delete,
   rename, list, find, restore versions, list versions, look for, or organize files and folders on Unique,
+  or to read / view / quote the text contents of a known file (optionally by
+  page or page range, e.g. "what's on page 5?", "read pages 10-12"),
   or when working with scope IDs (scope_*) or content IDs (cont_*).
   IMPORTANT: When a user says they are "looking for a file" or wants to
   "find a file", they typically mean locating it within the Unique AI
@@ -64,6 +66,13 @@ unique-cli restore-version cver_abc123
 unique-cli download report.pdf ./local/
 unique-cli download cont_abc123 ~/Desktop/
+# Read a file's extracted text by content ID (whole file)
+unique-cli read cont_abc123
+# Read a single page or a page range
+unique-cli read cont_abc123 --page 12
+unique-cli read cont_abc123 --from-page 5 --to-page 9
 # Declare page citations after reading a file
 unique-cli cite report.pdf --pages 3,5,7
 unique-cli cite cont_abc123 --pages 1-4
@@ -141,6 +150,57 @@ unique-cli mkdir "2025/Q1/Financials"
 unique-cli upload ./budget.xlsx /2025/Q1/Financials/
 ```
+## Reading File Contents (by page range)
+Use `read` to retrieve the **extracted text** of a single, known file — for
+example to answer "what does page 5 say?", to quote an exact passage, or to
+read a long document a few pages at a time. This differs from `search`:
+`search` ranks chunks across many files by relevance; `read` returns the text
+of one file in document order.
+`read` takes a **content ID** (`cont_...`), not a file name. Get the ID first
+from `ls` or `search`, then pass it to `read`.
+```bash
+# Whole file
+unique-cli read cont_abc123
+# A single page
+unique-cli read cont_abc123 --page 12
+# A page range (inclusive)
+unique-cli read cont_abc123 --from-page 5 --to-page 9
+# Cap the output size (protects your context window on huge files)
+unique-cli read cont_abc123 --to-page 3 --max-chars 8000
+```
+| Option | Description |
+|--------|-------------|
+| `--page` / `-p N` | Read only page N (shorthand for `--from-page N --to-page N`) |
+| `--from-page N` | First page to include (inclusive) |
+| `--to-page N` | Last page to include (inclusive) |
+| `--max-chars N` | Truncate the printed text to N characters |
+Each chunk is prefixed with its source page(s) as `[p.N]` or `[p.N-M]`, so you
+can attribute text to pages.
+### How page filtering behaves (important)
+- **Page numbers come from ingestion.** Each chunk carries a `startPage` and
+  `endPage`; the page filter is applied to those values. Nothing in the
+  ingestion pipeline needs to change.
+- **Ranges overlap, they don't slice.** A chunk that spans pages 2-4 is
+  returned for `--page 3` (or any range touching 2-4). The returned text is the
+  whole chunk, so it may include a little from neighbouring pages. Treat the
+  result as "the chunks covering these pages", not a pixel-perfect page cut.
+- **Some files have no page numbers.** Plain text, markdown, and similar
+  content has no page numbers; those chunks are returned only when you read
+  **without** a page range. A page-filtered read of such a file returns nothing.
+- **Empty / not indexed?** If `read` reports the file is still ingesting or has
+  no indexed chunks, there is no extracted text to return — use `download` to
+  fetch the original bytes instead.
 ## Citing File Pages
 After reading **any** file and using its content in your answer, declare citations:

unique_sdk-2026.26.0.dev9/unique_sdk/cli/commands/read.py DELETED Viewed

@@ -1,93 +0,0 @@
-"""Read command: retrieve all indexed text chunks for a known content ID.
-Calls ``Content.search(where={"id": {"equals": cont_id}})`` — a direct
-Postgres lookup that returns every indexed chunk for the document in one
-request, no vector search involved.
-Use this when you already know the ``cont_*`` ID (e.g. from a prior ``ls``
-or ``unique-cli search`` result) and want to read the full document text.
-For discovery or query-based retrieval use ``unique-cli search`` instead.
-"""
-from __future__ import annotations
-import unique_sdk
-from unique_sdk.cli.state import ShellState
-READ_ERROR_PREFIX = "read:"
-def cmd_read(state: ShellState, cont_id: str) -> str:
-    """Return all indexed text chunks for *cont_id* as plain text.
-    Args:
-        state: Shell state carrying user/company credentials.
-        cont_id: A content ID (``cont_...``) to retrieve.
-    Returns:
-        A formatted string of chunks, or an error message prefixed with
-        ``read:``.
-    """
-    if not cont_id.startswith("cont_"):
-        return f"{READ_ERROR_PREFIX} expected a content ID starting with 'cont_', got: {cont_id!r}"
-    # Enforce the same .unique-search.json workspace boundary as search/ls/rm.
-    # Content.search has no scopeIds param, so we guard by owner scope before
-    # the point-lookup — matching rm/mv, not search's API-level scopeIds filter.
-    if not state.is_content_within_workspace(cont_id):
-        return f"{READ_ERROR_PREFIX} permission denied (outside workspace scope)"
-    try:
-        results = unique_sdk.Content.search(
-            user_id=state.config.user_id,
-            company_id=state.config.company_id,
-            where={"id": {"equals": cont_id}},
-        )
-    except unique_sdk.APIError as e:
-        return f"{READ_ERROR_PREFIX} {e}"
-    if not results:
-        return f"{READ_ERROR_PREFIX} no content found for ID: {cont_id}"
-    content = results[0]
-    title = getattr(content, "title", None) or getattr(content, "key", None) or cont_id
-    chunks = getattr(content, "chunks", None) or []
-    if not chunks:
-        return (
-            f"Content: {title} ({cont_id})\n"
-            "No indexed chunks found — the document may still be ingesting or ingestion failed."
-        )
-    sorted_chunks = sorted(chunks, key=lambda c: c.get("order") or 0)
-    lines: list[str] = [
-        f"Content: {title} ({cont_id}) — {len(sorted_chunks)} chunk(s)\n"
-    ]
-    for chunk in sorted_chunks:
-        text = (chunk.get("text") or "").strip()
-        if not text:
-            continue
-        start = chunk.get("startPage")
-        end = chunk.get("endPage")
-        if start is not None or end is not None:
-            page_start = start if start is not None else end
-            page_end = end if end is not None else start
-            if page_start is not None and page_end is not None:
-                page_ref = (
-                    f"[p.{page_start}]"
-                    if page_start == page_end
-                    else f"[p.{page_start}-{page_end}]"
-                )
-                lines.append(f"{page_ref} {text}")
-            else:
-                lines.append(text)
-        else:
-            lines.append(text)
-    return "\n\n".join(lines)
-def is_error_output(output: str) -> bool:
-    """Return ``True`` when *output* is an error message from ``cmd_read``."""
-    return output.startswith(READ_ERROR_PREFIX)