unique-sdk 2026.28.0.dev4__tar.gz → 2026.28.0.dev6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/PKG-INFO +1 -1
  2. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/pyproject.toml +1 -1
  3. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/cli.py +23 -4
  4. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/cite_file.py +112 -7
  5. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/shell.py +34 -9
  6. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/skills/unique-cli-file-management/SKILL.md +51 -15
  7. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/README.md +0 -0
  8. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/__init__.py +0 -0
  9. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/_api_requestor.py +0 -0
  10. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/_api_resource.py +0 -0
  11. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/_api_version.py +0 -0
  12. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/_error.py +0 -0
  13. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/_http_client.py +0 -0
  14. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/_list_object.py +0 -0
  15. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/_object_classes.py +0 -0
  16. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/_request_options.py +0 -0
  17. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/_unique_object.py +0 -0
  18. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/_unique_ql.py +0 -0
  19. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/_unique_response.py +0 -0
  20. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/_util.py +0 -0
  21. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/_version.py +0 -0
  22. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/_webhook.py +0 -0
  23. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/__init__.py +0 -0
  24. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_acronyms.py +0 -0
  25. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_agentic_table.py +0 -0
  26. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_analytics_order.py +0 -0
  27. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_benchmarking.py +0 -0
  28. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_briefing.py +0 -0
  29. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_chat_completion.py +0 -0
  30. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_content.py +0 -0
  31. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_dynamic_frontend.py +0 -0
  32. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_elicitation.py +0 -0
  33. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_embedding.py +0 -0
  34. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_event.py +0 -0
  35. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_folder.py +0 -0
  36. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_group.py +0 -0
  37. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_integrated.py +0 -0
  38. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_llm_models.py +0 -0
  39. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_mcp.py +0 -0
  40. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_message.py +0 -0
  41. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_message_assessment.py +0 -0
  42. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_message_execution.py +0 -0
  43. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_message_log.py +0 -0
  44. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_message_tool.py +0 -0
  45. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_module.py +0 -0
  46. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_scheduled_task.py +0 -0
  47. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_search.py +0 -0
  48. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_search_string.py +0 -0
  49. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_short_term_memory.py +0 -0
  50. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_space.py +0 -0
  51. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_user.py +0 -0
  52. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/api_resources/_web_search.py +0 -0
  53. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/__init__.py +0 -0
  54. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/__main__.py +0 -0
  55. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/__init__.py +0 -0
  56. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/_citation_manifest.py +0 -0
  57. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/dynamic_frontend.py +0 -0
  58. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/elicitation.py +0 -0
  59. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/files.py +0 -0
  60. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/folders.py +0 -0
  61. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/mcp.py +0 -0
  62. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/navigation.py +0 -0
  63. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/read.py +0 -0
  64. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/scheduled_tasks.py +0 -0
  65. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/search.py +0 -0
  66. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/subagent.py +0 -0
  67. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/web_search.py +0 -0
  68. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/commands/web_search_config.py +0 -0
  69. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/config.py +0 -0
  70. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/formatting.py +0 -0
  71. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/skills/unique-cli-dynamic-frontend/SKILL.md +0 -0
  72. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/skills/unique-cli-elicitation/SKILL.md +0 -0
  73. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/skills/unique-cli-mcp/SKILL.md +0 -0
  74. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/skills/unique-cli-scheduled-tasks/SKILL.md +0 -0
  75. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/skills/unique-cli-search/SKILL.md +0 -0
  76. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/skills/unique-cli-subagent/SKILL.md +0 -0
  77. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/skills/unique-cli-web-search/SKILL.md +0 -0
  78. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/cli/state.py +0 -0
  79. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/utils/analytics_order_run.py +0 -0
  80. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/utils/benchmarking_run.py +0 -0
  81. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/utils/chat_history.py +0 -0
  82. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/utils/chat_in_space.py +0 -0
  83. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/utils/file_io.py +0 -0
  84. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/utils/sources.py +0 -0
  85. {unique_sdk-2026.28.0.dev4 → unique_sdk-2026.28.0.dev6}/unique_sdk/utils/token.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: unique-sdk
3
- Version: 2026.28.0.dev4
3
+ Version: 2026.28.0.dev6
4
4
  Summary:
5
5
  Author: Martin Fadler, Konstantin Krauss, Andreas Hauri
6
6
  Author-email: Martin Fadler <martin.fadler@unique.ch>, Konstantin Krauss <konstantin@unique.ch>, Andreas Hauri <andreas@unique.ch>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "unique_sdk"
3
- version = "2026.28.0.dev4"
3
+ version = "2026.28.0.dev6"
4
4
  description = ""
5
5
  readme = "README.md"
6
6
  license = { text = "MIT" }
@@ -376,11 +376,26 @@ def download(ctx: click.Context, name_or_id: str, local_dest: str | None) -> Non
376
376
  default=None,
377
377
  help="Page numbers to cite: '3-7' or '1,3,5'. Omit for whole-file.",
378
378
  )
379
+ @click.option(
380
+ "--read-method",
381
+ "-m",
382
+ "read_method",
383
+ required=True,
384
+ help=(
385
+ "How you read the cited source: 'text' (page/document text, e.g. "
386
+ "pdftotext, PyMuPDF, MarkItDown), 'vision' (page rendered as an image "
387
+ "and read visually), or 'indexed' (read via the platform index with "
388
+ "unique-cli read). Common tool names (pdftotext, fitz, ocr, ...) are "
389
+ "accepted and normalized. Use separate cite calls when different pages "
390
+ "were read by different methods."
391
+ ),
392
+ )
379
393
  @click.pass_context
380
394
  def cite(
381
395
  ctx: click.Context,
382
396
  name_or_id: str,
383
397
  pages: str | None,
398
+ read_method: str,
384
399
  ) -> None:
385
400
  """Declare page citations for a file.
386
401
 
@@ -388,14 +403,18 @@ def cite(
388
403
  Registers [filesourceN] markers for pages you referenced in your answer.
389
404
  Does NOT read or extract the file — use your own tools for that.
390
405
  NAME_OR_ID can be a file path, current-directory file name, or content ID.
406
+ --read-method is mandatory: it records how you read the page(s).
407
+ --pages is optional; omit it to cite the whole file (e.g. non-paginated
408
+ formats).
391
409
 
392
410
  \b
393
411
  Examples:
394
- unique-cli cite report.pdf --pages 3,5,7
395
- unique-cli cite /Reports/Q1/report.pdf --pages 3,5,7
396
- unique-cli cite cont_abc123 --pages 1-4
412
+ unique-cli cite report.pdf --pages 3,5,7 --read-method text
413
+ unique-cli cite /Reports/Q1/report.pdf --pages 3,5,7 --read-method vision
414
+ unique-cli cite cont_abc123 --pages 1-4 --read-method indexed
415
+ unique-cli cite notes.docx --read-method text
397
416
  """
398
- click.echo(cmd_cite_file(LazyState.get(ctx), name_or_id, pages))
417
+ click.echo(cmd_cite_file(LazyState.get(ctx), name_or_id, pages, read_method))
399
418
 
400
419
 
401
420
  @main.command(name="read")
@@ -23,6 +23,74 @@ _CHAT_FILES_MANIFEST = Path(".unique") / "chat-files.json"
23
23
 
24
24
  _MAX_PAGES_PER_CALL = 500
25
25
 
26
+ # File extensions with no inherent pagination. Passing ``--pages`` for these is
27
+ # meaningless — the whole file is the citeable unit. PDFs and PPTX (slides) are
28
+ # paginated; unknown/other extensions (and bare content IDs) are left untouched
29
+ # so we only emit the targeted error when we can detect the format confidently.
30
+ _NON_PAGINATED_SUFFIXES = frozenset(
31
+ {
32
+ ".xlsx",
33
+ ".xls",
34
+ ".csv",
35
+ ".txt",
36
+ ".md",
37
+ ".html",
38
+ ".htm",
39
+ ".png",
40
+ ".jpg",
41
+ ".jpeg",
42
+ ".gif",
43
+ ".webp",
44
+ ".bmp",
45
+ ".tiff",
46
+ ".tif",
47
+ }
48
+ )
49
+
50
+
51
+ def _is_non_paginated(filename: str) -> bool:
52
+ """True when ``filename`` has a known extension that carries no page numbers."""
53
+ return Path(filename).suffix.lower() in _NON_PAGINATED_SUFFIXES
54
+
55
+
56
+ # Canonical reading-method values declared via ``--read-method``. They record
57
+ # the *representation* of the source the agent actually read so the runner can
58
+ # reconstruct the matching ground truth for a hallucination check:
59
+ # - ``text`` — page/document text (e.g. pdftotext, PyMuPDF get_text,
60
+ # MarkItDown conversion).
61
+ # - ``vision`` — the page/slide rendered as an image and read with vision.
62
+ # - ``indexed`` — content read through the platform index (unique-cli
63
+ # read/search), i.e. existing chunks.
64
+ READ_METHODS = ("text", "vision", "indexed")
65
+
66
+ # Convenience aliases accepted from the agent and normalized to canonical values.
67
+ _READ_METHOD_ALIASES = {
68
+ "pdftotext": "text",
69
+ "pymupdf": "text",
70
+ "fitz": "text",
71
+ "mupdf": "text",
72
+ "pdfminer": "text",
73
+ "markitdown": "text",
74
+ "image": "vision",
75
+ "ocr": "vision",
76
+ "render": "vision",
77
+ "read": "indexed",
78
+ "search": "indexed",
79
+ }
80
+
81
+
82
+ def _normalize_read_method(read_method: str | None) -> str | None:
83
+ """Normalize a ``--read-method`` value to a canonical one, or None if invalid.
84
+
85
+ Case-insensitive, with a small alias map. Returns None for missing or
86
+ unrecognized values so callers can fail closed with a clear message.
87
+ """
88
+ if not read_method or not read_method.strip():
89
+ return None
90
+ candidate = read_method.strip().lower()
91
+ candidate = _READ_METHOD_ALIASES.get(candidate, candidate)
92
+ return candidate if candidate in READ_METHODS else None
93
+
26
94
 
27
95
  def _parse_pages(pages: str | None) -> list[int]:
28
96
  """Parse '3-7' or '1,3,5' into a list of 1-based page numbers.
@@ -91,17 +159,35 @@ def cmd_cite_file(
91
159
  state: ShellState,
92
160
  name_or_id: str,
93
161
  pages: str | None,
162
+ read_method: str | None,
94
163
  ) -> str:
95
164
  """Declare citations for a file's pages.
96
165
 
97
166
  Writes entries to .unique/file-refs.jsonl and returns [filesourceN]
98
- markers for the agent to use inline.
167
+ markers for the agent to use inline. ``read_method`` records how the cited
168
+ page text was read (one of :data:`READ_METHODS`); it is mandatory and
169
+ validated here as defense in depth for callers that bypass the CLI layer.
99
170
  """
171
+ canonical_method = _normalize_read_method(read_method)
172
+ if canonical_method is None:
173
+ return (
174
+ f"{CITE_ERROR_PREFIX} --read-method is required and must be one of: "
175
+ f"{', '.join(READ_METHODS)}. Report the method that produced the "
176
+ "text you actually used."
177
+ )
178
+
100
179
  try:
101
180
  content_id, filename = _resolve_content_id_with_manifest(state, name_or_id)
102
181
  except Exception as exc:
103
182
  return f"{CITE_ERROR_PREFIX} {exc}"
104
183
 
184
+ if pages and pages.strip() and _is_non_paginated(filename):
185
+ suffix = Path(filename).suffix.lower()
186
+ return (
187
+ f"{CITE_ERROR_PREFIX} {filename} is non-paginated ({suffix} files "
188
+ "have no pages) — omit --pages to cite the whole file."
189
+ )
190
+
105
191
  page_list = _parse_pages(pages)
106
192
  if not page_list:
107
193
  return f"{CITE_ERROR_PREFIX} invalid --pages value"
@@ -114,20 +200,38 @@ def cmd_cite_file(
114
200
  ):
115
201
  existing = _read_turn_refs_manifest(refs_log_path)
116
202
 
117
- existing_keys: dict[tuple[str, int], int] = {}
203
+ # Track the source number and the read method already recorded for
204
+ # each (contentId, page) so we can dedup and flag method conflicts.
205
+ existing_keys: dict[tuple[str, int], tuple[int, str]] = {}
118
206
  for entry in existing:
119
207
  key = (entry.get("contentId", ""), entry.get("page", 0))
120
- existing_keys[key] = entry.get("sourceNumber", 0)
208
+ existing_keys[key] = (
209
+ entry.get("sourceNumber", 0),
210
+ entry.get("readMethod", ""),
211
+ )
121
212
 
122
- next_source_number = max(existing_keys.values()) + 1 if existing_keys else 1
213
+ next_source_number = (
214
+ max(sn for sn, _ in existing_keys.values()) + 1 if existing_keys else 1
215
+ )
123
216
 
124
217
  output_lines: list[str] = []
125
218
  for page in page_list:
126
219
  key = (content_id, page)
127
220
  if key in existing_keys:
128
- sn = existing_keys[key]
221
+ sn, prior_method = existing_keys[key]
222
+ # A page is grounded by a single representation. If the agent
223
+ # re-cites it with a different method, keep the first and say
224
+ # so explicitly rather than silently dropping the new method.
225
+ if prior_method and prior_method != canonical_method:
226
+ note = (
227
+ f"already declared with --read-method "
228
+ f"{prior_method}; keeping it (one read-method per "
229
+ "page — issue a separate cite for a different page)"
230
+ )
231
+ else:
232
+ note = "already declared"
129
233
  output_lines.append(
130
- f"[filesource{sn}] -> {filename} page {page} (already declared)"
234
+ f"[filesource{sn}] -> {filename} page {page} ({note})"
131
235
  )
132
236
  continue
133
237
 
@@ -136,12 +240,13 @@ def cmd_cite_file(
136
240
  "contentId": content_id,
137
241
  "filename": filename,
138
242
  "page": page,
243
+ "readMethod": canonical_method,
139
244
  }
140
245
  _append_turn_refs_manifest_entry(refs_log_path, entry)
141
246
  output_lines.append(
142
247
  f"[filesource{next_source_number}] -> {filename} page {page}"
143
248
  )
144
- existing_keys[key] = next_source_number
249
+ existing_keys[key] = (next_source_number, canonical_method)
145
250
  next_source_number += 1
146
251
 
147
252
  except UnsafeRefsLogPathError as exc:
@@ -62,7 +62,7 @@ OVERVIEW_HELP = textwrap.dedent("""\
62
62
  download <name|path|id> [dest] Download a file to local machine
63
63
  rm <name|path|id> Delete a file
64
64
  mv <old|path|id> <new> Rename a file
65
- cite <name|path|id> [--pages] Declare page citations for a file
65
+ cite <name|path|id> [--pages] --read-method METHOD Declare page citations
66
66
 
67
67
  Search:
68
68
  search <query> [options] Combined search (vector + full-text)
@@ -443,20 +443,31 @@ class UniqueShell(cmd.Cmd):
443
443
  def do_cite(self, arg: str) -> None:
444
444
  """Declare page citations for a file.
445
445
 
446
- Usage: cite <name|path|content_id> [--pages RANGE]
446
+ Usage: cite <name|path|content_id> [--pages RANGE] --read-method METHOD
447
+
448
+ --read-method is mandatory (one of: text, vision, indexed). --pages is
449
+ optional; omit it to cite the whole file.
447
450
 
448
451
  Examples:
449
- /Reports> cite report.pdf --pages 3,5,7
450
- /Reports> cite /Reports/Q1/report.pdf --pages 3,5,7
451
- /Reports> cite cont_abc123 --pages 1-4
452
+ /Reports> cite report.pdf --pages 3,5,7 --read-method text
453
+ /Reports> cite /Reports/Q1/report.pdf --pages 3,5,7 --read-method vision
454
+ /Reports> cite cont_abc123 --pages 1-4 --read-method indexed
455
+ /Reports> cite notes.docx --read-method text
452
456
  """
453
- from unique_sdk.cli.commands.cite_file import cmd_cite_file
457
+ from unique_sdk.cli.commands.cite_file import (
458
+ READ_METHODS,
459
+ cmd_cite_file,
460
+ )
454
461
 
462
+ usage = (
463
+ "Usage: cite <name|path|content_id> [--pages RANGE] --read-method METHOD"
464
+ )
455
465
  parts = shlex.split(arg)
456
466
  if not parts:
457
- self._print("Usage: cite <name|path|content_id> [--pages RANGE]")
467
+ self._print(usage)
458
468
  return
459
469
  pages: str | None = None
470
+ read_method: str | None = None
460
471
  positional: list[str] = []
461
472
  index = 0
462
473
  while index < len(parts):
@@ -467,13 +478,27 @@ class UniqueShell(cmd.Cmd):
467
478
  return
468
479
  pages = parts[index + 1]
469
480
  index += 2
481
+ elif token in ("--read-method", "-m"):
482
+ if index + 1 >= len(parts):
483
+ self._print(
484
+ "cite: --read-method requires a value (one of: "
485
+ f"{', '.join(READ_METHODS)})"
486
+ )
487
+ return
488
+ read_method = parts[index + 1]
489
+ index += 2
470
490
  else:
471
491
  positional.append(token)
472
492
  index += 1
473
493
  if not positional:
474
- self._print("Usage: cite <name|path|content_id> [--pages RANGE]")
494
+ self._print(usage)
495
+ return
496
+ if read_method is None:
497
+ self._print(
498
+ f"cite: --read-method is required (one of: {', '.join(READ_METHODS)})"
499
+ )
475
500
  return
476
- self._print(cmd_cite_file(self.state, positional[0], pages))
501
+ self._print(cmd_cite_file(self.state, positional[0], pages, read_method))
477
502
 
478
503
  def _parse_int(self, raw: str, flag: str) -> tuple[int | None, bool]:
479
504
  """Parse an int option value, returning (value, ok). Prints on failure."""
@@ -73,9 +73,11 @@ unique-cli read cont_abc123
73
73
  unique-cli read cont_abc123 --page 12
74
74
  unique-cli read cont_abc123 --from-page 5 --to-page 9
75
75
 
76
- # Declare page citations after reading a file
77
- unique-cli cite report.pdf --pages 3,5,7
78
- unique-cli cite cont_abc123 --pages 1-4
76
+ # Declare page citations after reading a file (--read-method is mandatory)
77
+ unique-cli cite report.pdf --pages 3,5,7 --read-method text
78
+ unique-cli cite cont_abc123 --pages 1-4 --read-method vision
79
+ # Non-paginated files (Excel, CSV, txt): omit --pages to cite the whole file
80
+ unique-cli cite data.xlsx --read-method text
79
81
 
80
82
  # Delete a file
81
83
  unique-cli rm report.pdf
@@ -203,26 +205,60 @@ can attribute text to pages.
203
205
 
204
206
  ## Citing File Pages
205
207
 
206
- After reading **any** file and using its content in your answer, declare citations:
208
+ After reading **any** file (PDF, Office, text, etc.) and using its content in your
209
+ answer, declare citations. `cite` works on **any** file type, not just PDFs.
210
+ `--read-method` is **mandatory**: it records how you actually read the cited page(s).
207
211
 
208
212
  ```bash
209
- unique-cli cite report.pdf --pages 3,5
210
- unique-cli cite cont_abc123 --pages 1-4
213
+ unique-cli cite report.pdf --pages 3,5 --read-method text
214
+ unique-cli cite cont_abc123 --pages 1-4 --read-method vision
215
+ unique-cli cite data.xlsx --read-method text # non-paginated: omit --pages
211
216
  ```
212
217
 
213
218
  This registers `[filesourceN]` markers. Use them inline in your answer.
214
219
  The platform converts `[filesourceN]` into footnotes and clickable reference chips.
215
220
 
216
- **MANDATORY 3-step verification before EVERY `unique-cli cite` call NO EXCEPTIONS:**
217
-
218
- 1. `pdfinfo file.pdf | grep Pages`get total physical page count.
219
- 2. For **each** page you intend to cite, run `pdftotext -f N -l N file.pdf -` and confirm the content you are referencing is actually on that physical page. Do NOT skip this. Do NOT assume page numbers.
220
- 3. Only after step 2 confirms a match, call `unique-cli cite` with the verified physical page numbers.
221
-
222
- Page numbers are **physical PDF positions** (1-based). NEVER use printed page numbers from headers/footers — they often differ from physical positions.
223
-
221
+ **`--pages` is optional.** Omit it to cite the **whole file**. Paginated formats
222
+ (PDF, PPTX) take page/slide numbers; **non-paginated formats (Excel `.xlsx`/`.xls`,
223
+ CSV, `.txt`, HTML, images) have no pages always omit `--pages`** and cite the
224
+ whole file.
225
+
226
+ **Choosing `--read-method`** (declare the *representation* of the source you actually read):
227
+
228
+ - `text` → you used **extracted text** (`pdftotext`, PyMuPDF / `fitz` `page.get_text()`, MarkItDown, or any text extraction).
229
+ - `vision` → you read a **rendered image** of the page/slide (e.g. `get_pixmap()`) with your vision capability.
230
+ - `indexed` → you relied on **`unique-cli read`** output (the platform's indexed chunks).
231
+
232
+ | Value | When to use |
233
+ |-------|-------------|
234
+ | `text` | You read the page/document as extracted text and used that text. |
235
+ | `vision` | You rendered the page to an image and read it with your vision capability. |
236
+ | `indexed` | You read the content via `unique-cli read` (indexed chunks). |
237
+
238
+ **MANDATORY verification before EVERY `unique-cli cite` call — NO EXCEPTIONS.**
239
+ Pick the row matching the file you read; verify the cited content really is where
240
+ you claim before calling `cite`:
241
+
242
+ - **PDF** — `pdfinfo file.pdf | grep Pages` for the total physical page count, then
243
+ for **each** page run `pdftotext -f N -l N file.pdf -` and confirm the content is
244
+ on that physical page. Page numbers are **physical PDF positions** (1-based);
245
+ NEVER use printed page numbers from headers/footers — they often differ.
246
+ - **PPTX** — the page number is the **slide number** (1-based). Verify against the
247
+ slide you actually read.
248
+ - **DOCX** — use the rendered page from your text extraction; if there is no
249
+ reliable page boundary, cite the **whole file** (omit `--pages`).
250
+ - **Non-paginated (XLSX/CSV/TXT/HTML/images)** — there are no pages. Do **NOT**
251
+ pass `--pages`; cite the whole file and verify the content exists in it.
252
+
253
+ Then determine `--read-method`: report the representation you actually read. In a
254
+ fallback chain (e.g. text extraction returned nothing → render + read visually),
255
+ report `text` if you used extracted text or `vision` if you read a rendered image.
256
+ Only after verifying, call `unique-cli cite` with the verified page numbers (if any)
257
+ and `--read-method`.
258
+
259
+ - **One method per `cite` call.** If different pages were read with different methods, issue separate `cite` calls — one per method.
224
260
  - Numbers are **per-turn only**; do not reuse from prior turns.
225
- - Do NOT use `cite` for content from `unique-cli search` or `unique-cli web-search`.
261
+ - Do NOT use `cite` for content from `unique-cli search` or `unique-cli web-search` — those are referenced automatically.
226
262
 
227
263
  ## Error Handling
228
264