suchi 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- suchi-0.1.1/.gitignore +48 -0
- suchi-0.1.1/PKG-INFO +23 -0
- suchi-0.1.1/pyproject.toml +47 -0
- suchi-0.1.1/src/suchi/__init__.py +3 -0
- suchi-0.1.1/src/suchi/api.py +56 -0
- suchi-0.1.1/src/suchi/citations/__init__.py +0 -0
- suchi-0.1.1/src/suchi/citations/processor.py +163 -0
- suchi-0.1.1/src/suchi/citations/styles/apa.csl +2273 -0
- suchi-0.1.1/src/suchi/citations/styles/chicago-author-date.csl +4189 -0
- suchi-0.1.1/src/suchi/citations/styles/elsevier-harvard.csl +238 -0
- suchi-0.1.1/src/suchi/citations/styles/harvard-cite-them-right.csl +316 -0
- suchi-0.1.1/src/suchi/citations/styles/ieee.csl +519 -0
- suchi-0.1.1/src/suchi/citations/styles/modern-language-association.csl +1184 -0
- suchi-0.1.1/src/suchi/citations/styles/nature.csl +189 -0
- suchi-0.1.1/src/suchi/cli.py +1788 -0
- suchi-0.1.1/src/suchi/collab/__init__.py +0 -0
- suchi-0.1.1/src/suchi/collections.py +218 -0
- suchi-0.1.1/src/suchi/config.py +73 -0
- suchi-0.1.1/src/suchi/connector/__init__.py +8 -0
- suchi-0.1.1/src/suchi/connector/server.py +377 -0
- suchi-0.1.1/src/suchi/library.py +519 -0
- suchi-0.1.1/src/suchi/models.py +69 -0
- suchi-0.1.1/src/suchi/pageindex/__init__.py +15 -0
- suchi-0.1.1/src/suchi/pageindex/indexer.py +353 -0
- suchi-0.1.1/src/suchi/pageindex/retriever.py +230 -0
- suchi-0.1.1/src/suchi/routes/__init__.py +0 -0
- suchi-0.1.1/src/suchi/routes/annotations.py +105 -0
- suchi-0.1.1/src/suchi/routes/chat.py +444 -0
- suchi-0.1.1/src/suchi/routes/citations.py +63 -0
- suchi-0.1.1/src/suchi/routes/collections.py +161 -0
- suchi-0.1.1/src/suchi/routes/discovery.py +82 -0
- suchi-0.1.1/src/suchi/routes/entries.py +280 -0
- suchi-0.1.1/src/suchi/routes/export.py +57 -0
- suchi-0.1.1/src/suchi/routes/pdf_finder.py +75 -0
- suchi-0.1.1/src/suchi/routes/references.py +316 -0
- suchi-0.1.1/src/suchi/routes/search.py +50 -0
- suchi-0.1.1/src/suchi/routes/settings.py +80 -0
- suchi-0.1.1/src/suchi/routes/sync.py +107 -0
- suchi-0.1.1/src/suchi/search.py +370 -0
- suchi-0.1.1/src/suchi/sync/__init__.py +1 -0
- suchi-0.1.1/src/suchi/sync/base.py +36 -0
- suchi-0.1.1/src/suchi/sync/engine.py +244 -0
- suchi-0.1.1/src/suchi/sync/gdrive.py +303 -0
- suchi-0.1.1/src/suchi/sync/oauth.py +296 -0
- suchi-0.1.1/src/suchi/translators/__init__.py +18 -0
- suchi-0.1.1/src/suchi/translators/arxiv.py +99 -0
- suchi-0.1.1/src/suchi/translators/crossref.py +144 -0
- suchi-0.1.1/src/suchi/translators/discovery.py +217 -0
- suchi-0.1.1/src/suchi/translators/grobid.py +349 -0
- suchi-0.1.1/src/suchi/translators/openalex.py +400 -0
- suchi-0.1.1/src/suchi/translators/openlibrary.py +53 -0
- suchi-0.1.1/src/suchi/translators/pdf_extract.py +315 -0
- suchi-0.1.1/src/suchi/translators/pdf_finder.py +210 -0
- suchi-0.1.1/src/suchi/translators/references.py +292 -0
- suchi-0.1.1/src/suchi/translators/resolver.py +63 -0
- suchi-0.1.1/src/suchi/translators/semantic_scholar.py +130 -0
- suchi-0.1.1/src/suchi/translators/zotero_rdf.py +536 -0
- suchi-0.1.1/suchi-server.py +57 -0
- suchi-0.1.1/suchi-server.spec +115 -0
- suchi-0.1.1/suchi_server.py +11 -0
suchi-0.1.1/.gitignore
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
*.egg
|
|
8
|
+
.venv/
|
|
9
|
+
venv/
|
|
10
|
+
|
|
11
|
+
# Node
|
|
12
|
+
node_modules/
|
|
13
|
+
frontend/dist/
|
|
14
|
+
|
|
15
|
+
# IDE
|
|
16
|
+
.vscode/
|
|
17
|
+
.idea/
|
|
18
|
+
*.swp
|
|
19
|
+
*.swo
|
|
20
|
+
*~
|
|
21
|
+
|
|
22
|
+
# OS
|
|
23
|
+
.DS_Store
|
|
24
|
+
Thumbs.db
|
|
25
|
+
|
|
26
|
+
# Suchi runtime
|
|
27
|
+
.tantivy-index/
|
|
28
|
+
.references-cache.json
|
|
29
|
+
.pageindex.json
|
|
30
|
+
.collection-index.json
|
|
31
|
+
|
|
32
|
+
# Config (contains API keys)
|
|
33
|
+
config.yaml
|
|
34
|
+
gdrive-token.json
|
|
35
|
+
.env
|
|
36
|
+
|
|
37
|
+
# Test
|
|
38
|
+
.pytest_cache/
|
|
39
|
+
.coverage
|
|
40
|
+
htmlcov/
|
|
41
|
+
|
|
42
|
+
# Rust / Tauri
|
|
43
|
+
src-tauri/target/
|
|
44
|
+
src-tauri/gen/
|
|
45
|
+
|
|
46
|
+
# Build
|
|
47
|
+
*.whl
|
|
48
|
+
*.tar.gz
|
suchi-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: suchi
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: सूची — CLI-first reference manager with AI-powered research tools
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: citeproc-py>=0.6
|
|
7
|
+
Requires-Dist: fastapi>=0.110
|
|
8
|
+
Requires-Dist: httpx>=0.27
|
|
9
|
+
Requires-Dist: pymupdf>=1.24
|
|
10
|
+
Requires-Dist: python-multipart>=0.0.9
|
|
11
|
+
Requires-Dist: pyyaml>=6.0
|
|
12
|
+
Requires-Dist: rapidfuzz>=3.0
|
|
13
|
+
Requires-Dist: rich>=13.0
|
|
14
|
+
Requires-Dist: tantivy>=0.22
|
|
15
|
+
Requires-Dist: typer>=0.9
|
|
16
|
+
Requires-Dist: uvicorn>=0.27
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
19
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
20
|
+
Requires-Dist: ruff>=0.3; extra == 'dev'
|
|
21
|
+
Provides-Extra: gdrive
|
|
22
|
+
Requires-Dist: google-api-python-client>=2.0; extra == 'gdrive'
|
|
23
|
+
Requires-Dist: google-auth-oauthlib>=1.0; extra == 'gdrive'
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "suchi"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "सूची — CLI-first reference manager with AI-powered research tools"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"typer>=0.9",
|
|
12
|
+
"fastapi>=0.110",
|
|
13
|
+
"uvicorn>=0.27",
|
|
14
|
+
"pyyaml>=6.0",
|
|
15
|
+
"rich>=13.0",
|
|
16
|
+
"httpx>=0.27",
|
|
17
|
+
"pymupdf>=1.24",
|
|
18
|
+
"python-multipart>=0.0.9",
|
|
19
|
+
"tantivy>=0.22",
|
|
20
|
+
"rapidfuzz>=3.0",
|
|
21
|
+
"citeproc-py>=0.6",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.optional-dependencies]
|
|
25
|
+
gdrive = [
|
|
26
|
+
"google-api-python-client>=2.0",
|
|
27
|
+
"google-auth-oauthlib>=1.0",
|
|
28
|
+
]
|
|
29
|
+
dev = [
|
|
30
|
+
"pytest>=8.0",
|
|
31
|
+
"pytest-asyncio>=0.23",
|
|
32
|
+
"ruff>=0.3",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.scripts]
|
|
36
|
+
suchi = "suchi.cli:app"
|
|
37
|
+
|
|
38
|
+
[tool.hatch.build.targets.wheel]
|
|
39
|
+
packages = ["src/suchi"]
|
|
40
|
+
|
|
41
|
+
[tool.ruff]
|
|
42
|
+
target-version = "py311"
|
|
43
|
+
line-length = 100
|
|
44
|
+
|
|
45
|
+
[tool.ruff.lint]
|
|
46
|
+
# Allow compact one-liners (if x: y) and semicolons — they're intentional for conciseness
|
|
47
|
+
ignore = ["E701", "E702", "E402"]
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""FastAPI application."""
|
|
2
|
+
|
|
3
|
+
from contextlib import asynccontextmanager
|
|
4
|
+
|
|
5
|
+
from fastapi import FastAPI
|
|
6
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
7
|
+
|
|
8
|
+
from .routes import entries, search, export, collections, settings, chat, citations, references, pdf_finder, annotations, discovery, sync
|
|
9
|
+
from . import library as lib_module
|
|
10
|
+
from .search import index_entry, remove_from_index, rebuild_index
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@asynccontextmanager
|
|
14
|
+
async def lifespan(app: FastAPI):
|
|
15
|
+
# Register search index hooks so entries are indexed on create/update/delete
|
|
16
|
+
lib_module.set_index_hooks(
|
|
17
|
+
on_added=index_entry,
|
|
18
|
+
on_removed=remove_from_index,
|
|
19
|
+
)
|
|
20
|
+
# Build index on startup if needed
|
|
21
|
+
rebuild_index()
|
|
22
|
+
yield
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
app = FastAPI(
|
|
26
|
+
title="Suchi",
|
|
27
|
+
description="सूची — CLI-first reference manager with AI-powered research tools",
|
|
28
|
+
version="0.1.1",
|
|
29
|
+
lifespan=lifespan,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
app.add_middleware(
|
|
33
|
+
CORSMiddleware,
|
|
34
|
+
allow_origins=["*"], # Tauri webview + browser extension
|
|
35
|
+
allow_credentials=True,
|
|
36
|
+
allow_methods=["*"],
|
|
37
|
+
allow_headers=["*"],
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
app.include_router(entries.router)
|
|
41
|
+
app.include_router(search.router)
|
|
42
|
+
app.include_router(export.router)
|
|
43
|
+
app.include_router(collections.router)
|
|
44
|
+
app.include_router(settings.router)
|
|
45
|
+
app.include_router(chat.router)
|
|
46
|
+
app.include_router(citations.router)
|
|
47
|
+
app.include_router(references.router)
|
|
48
|
+
app.include_router(pdf_finder.router)
|
|
49
|
+
app.include_router(annotations.router)
|
|
50
|
+
app.include_router(discovery.router)
|
|
51
|
+
app.include_router(sync.router)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@app.get("/api/health")
|
|
55
|
+
def health():
|
|
56
|
+
return {"status": "ok", "version": "0.1.1"}
|
|
File without changes
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""Citation formatter using citeproc-py with CSL styles.
|
|
2
|
+
|
|
3
|
+
Supports 10,000+ CSL citation styles. Ships with the most common ones;
|
|
4
|
+
users can add more .csl files to the styles directory.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from suchi.citations.processor import format_citation, format_bibliography, list_styles
|
|
8
|
+
|
|
9
|
+
# Format a single entry
|
|
10
|
+
citation = format_citation(entry, style="apa")
|
|
11
|
+
|
|
12
|
+
# Format a bibliography from multiple entries
|
|
13
|
+
bib = format_bibliography(entries, style="chicago-author-date")
|
|
14
|
+
|
|
15
|
+
# List available styles
|
|
16
|
+
styles = list_styles()
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
from citeproc import CitationStylesStyle, CitationStylesBibliography
|
|
22
|
+
from citeproc import Citation, CitationItem
|
|
23
|
+
from citeproc.source.json import CiteProcJSON
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
STYLES_DIR = Path(__file__).parent / "styles"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def list_styles() -> list[dict]:
|
|
30
|
+
"""List available citation styles."""
|
|
31
|
+
styles = []
|
|
32
|
+
for f in sorted(STYLES_DIR.glob("*.csl")):
|
|
33
|
+
styles.append({
|
|
34
|
+
"id": f.stem,
|
|
35
|
+
"name": f.stem.replace("-", " ").title(),
|
|
36
|
+
"file": f.name,
|
|
37
|
+
})
|
|
38
|
+
return styles
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _entry_to_csl(entry: dict) -> dict:
|
|
42
|
+
"""Convert a Suchi entry dict to CSL-JSON format for citeproc."""
|
|
43
|
+
csl = {
|
|
44
|
+
"id": entry.get("id", "unknown"),
|
|
45
|
+
"type": _map_to_csl_type(entry.get("type", "article")),
|
|
46
|
+
"title": entry.get("title", ""),
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# Authors
|
|
50
|
+
authors = entry.get("author", [])
|
|
51
|
+
if authors:
|
|
52
|
+
csl["author"] = [
|
|
53
|
+
{"family": a.get("family", ""), "given": a.get("given", "")}
|
|
54
|
+
for a in authors
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
# Date
|
|
58
|
+
date = entry.get("date", "")
|
|
59
|
+
if date:
|
|
60
|
+
parts = date.split("-")
|
|
61
|
+
date_parts = []
|
|
62
|
+
for p in parts:
|
|
63
|
+
try:
|
|
64
|
+
date_parts.append(int(p))
|
|
65
|
+
except ValueError:
|
|
66
|
+
break
|
|
67
|
+
if date_parts:
|
|
68
|
+
csl["issued"] = {"date-parts": [date_parts]}
|
|
69
|
+
|
|
70
|
+
# Other fields
|
|
71
|
+
field_map = {
|
|
72
|
+
"doi": "DOI",
|
|
73
|
+
"isbn": "ISBN",
|
|
74
|
+
"url": "URL",
|
|
75
|
+
"abstract": "abstract",
|
|
76
|
+
"volume": "volume",
|
|
77
|
+
"issue": "issue",
|
|
78
|
+
"pages": "page",
|
|
79
|
+
"publisher": "publisher",
|
|
80
|
+
}
|
|
81
|
+
for src, dst in field_map.items():
|
|
82
|
+
val = entry.get(src)
|
|
83
|
+
if val:
|
|
84
|
+
csl[dst] = str(val)
|
|
85
|
+
|
|
86
|
+
if entry.get("journal"):
|
|
87
|
+
csl["container-title"] = entry["journal"]
|
|
88
|
+
|
|
89
|
+
return csl
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def format_citation(entry: dict, style: str = "apa") -> str:
|
|
93
|
+
"""Format a single entry as an inline citation (e.g., '(Smith, 2024)')."""
|
|
94
|
+
csl_data = [_entry_to_csl(entry)]
|
|
95
|
+
source = CiteProcJSON(csl_data)
|
|
96
|
+
|
|
97
|
+
style_path = STYLES_DIR / f"{style}.csl"
|
|
98
|
+
if not style_path.exists():
|
|
99
|
+
raise ValueError(f"Style not found: {style}. Available: {[s['id'] for s in list_styles()]}")
|
|
100
|
+
|
|
101
|
+
bib_style = CitationStylesStyle(str(style_path), validate=False)
|
|
102
|
+
bibliography = CitationStylesBibliography(bib_style, source)
|
|
103
|
+
|
|
104
|
+
citation = Citation([CitationItem(entry.get("id", "unknown"))])
|
|
105
|
+
bibliography.register(citation)
|
|
106
|
+
|
|
107
|
+
# Get inline citation
|
|
108
|
+
result = bibliography.cite(citation, lambda _: None)
|
|
109
|
+
return str(result)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def format_bibliography(entries: list[dict], style: str = "apa") -> str:
|
|
113
|
+
"""Format multiple entries as a formatted bibliography."""
|
|
114
|
+
if not entries:
|
|
115
|
+
return ""
|
|
116
|
+
|
|
117
|
+
csl_data = [_entry_to_csl(e) for e in entries]
|
|
118
|
+
source = CiteProcJSON(csl_data)
|
|
119
|
+
|
|
120
|
+
style_path = STYLES_DIR / f"{style}.csl"
|
|
121
|
+
if not style_path.exists():
|
|
122
|
+
raise ValueError(f"Style not found: {style}. Available: {[s['id'] for s in list_styles()]}")
|
|
123
|
+
|
|
124
|
+
bib_style = CitationStylesStyle(str(style_path), validate=False)
|
|
125
|
+
bibliography = CitationStylesBibliography(bib_style, source)
|
|
126
|
+
|
|
127
|
+
# Register all citations
|
|
128
|
+
for entry in entries:
|
|
129
|
+
citation = Citation([CitationItem(entry.get("id", "unknown"))])
|
|
130
|
+
bibliography.register(citation)
|
|
131
|
+
|
|
132
|
+
# Render bibliography
|
|
133
|
+
bib_items = bibliography.bibliography()
|
|
134
|
+
if not bib_items:
|
|
135
|
+
return ""
|
|
136
|
+
|
|
137
|
+
lines = []
|
|
138
|
+
for item in bib_items:
|
|
139
|
+
text = str(item).strip()
|
|
140
|
+
if text:
|
|
141
|
+
lines.append(text)
|
|
142
|
+
|
|
143
|
+
return "\n\n".join(lines)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def format_entry_full(entry: dict, style: str = "apa") -> dict:
|
|
147
|
+
"""Format both inline citation and bibliography entry for a single entry."""
|
|
148
|
+
return {
|
|
149
|
+
"citation": format_citation(entry, style),
|
|
150
|
+
"bibliography": format_bibliography([entry], style),
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _map_to_csl_type(entry_type: str) -> str:
|
|
155
|
+
return {
|
|
156
|
+
"article": "article-journal",
|
|
157
|
+
"book": "book",
|
|
158
|
+
"inbook": "chapter",
|
|
159
|
+
"inproceedings": "paper-conference",
|
|
160
|
+
"thesis": "thesis",
|
|
161
|
+
"report": "report",
|
|
162
|
+
"dataset": "dataset",
|
|
163
|
+
}.get(entry_type, "article")
|