viscsv 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- viscsv/__init__.py +3 -0
- viscsv/builder.py +438 -0
- viscsv/meta.py +18 -0
- viscsv/thumbnail.py +26 -0
- viscsv/viewer.py +772 -0
- viscsv-0.1.0.dist-info/METADATA +78 -0
- viscsv-0.1.0.dist-info/RECORD +9 -0
- viscsv-0.1.0.dist-info/WHEEL +5 -0
- viscsv-0.1.0.dist-info/top_level.txt +1 -0
viscsv/__init__.py
ADDED
viscsv/builder.py
ADDED
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import json
|
|
5
|
+
import shutil
|
|
6
|
+
from typing import Iterable, Mapping, Sequence
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from .meta import write_meta_chunk_js
|
|
12
|
+
from .thumbnail import make_thumbnail
|
|
13
|
+
from .viewer import write_viewer
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class VisCSV:
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
image: str | Sequence[str] | None = None,
|
|
20
|
+
image_columns: str | Sequence[str] | None = None,
|
|
21
|
+
dataframe: pd.DataFrame | None = None,
|
|
22
|
+
) -> None:
|
|
23
|
+
image = image if image is not None else image_columns
|
|
24
|
+
if isinstance(image, str):
|
|
25
|
+
image = [image]
|
|
26
|
+
self.image_columns = list(image) if image is not None else []
|
|
27
|
+
self.df = dataframe.copy() if dataframe is not None else pd.DataFrame()
|
|
28
|
+
self._validate_image_columns()
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def from_csv(
|
|
32
|
+
cls,
|
|
33
|
+
csv_path: str | Path,
|
|
34
|
+
image: str | Sequence[str] | None = None,
|
|
35
|
+
**kwargs,
|
|
36
|
+
) -> "VisCSV":
|
|
37
|
+
path = Path(csv_path)
|
|
38
|
+
if not path.exists():
|
|
39
|
+
raise FileNotFoundError(f"CSV not found: {path}")
|
|
40
|
+
data = pd.read_csv(path, **kwargs)
|
|
41
|
+
return cls(image=image, dataframe=data)
|
|
42
|
+
|
|
43
|
+
def add(self, *args, **fields) -> None:
|
|
44
|
+
if len(args) == 1 and not fields:
|
|
45
|
+
self.add_any(args[0])
|
|
46
|
+
return
|
|
47
|
+
if not args:
|
|
48
|
+
raise TypeError("add() expects data or image path.")
|
|
49
|
+
image_path = args[0]
|
|
50
|
+
if not self.image_columns:
|
|
51
|
+
raise ValueError("No image column configured. Use add_row/add_rows or set image column.")
|
|
52
|
+
row = {self.image_columns[0]: str(image_path), **fields}
|
|
53
|
+
self.add_row(row)
|
|
54
|
+
|
|
55
|
+
def add_data(self, data) -> None:
|
|
56
|
+
self.add_any(data)
|
|
57
|
+
|
|
58
|
+
def add_any(self, data) -> None:
|
|
59
|
+
if isinstance(data, pd.DataFrame):
|
|
60
|
+
self.add_rows(data)
|
|
61
|
+
return
|
|
62
|
+
if isinstance(data, dict):
|
|
63
|
+
self.add_row(data)
|
|
64
|
+
return
|
|
65
|
+
if isinstance(data, (list, tuple)):
|
|
66
|
+
if data and isinstance(data[0], dict):
|
|
67
|
+
self.add_rows(data)
|
|
68
|
+
return
|
|
69
|
+
if data and isinstance(data[0], (list, tuple)):
|
|
70
|
+
self.add_rows(data)
|
|
71
|
+
return
|
|
72
|
+
self.add_row(data)
|
|
73
|
+
|
|
74
|
+
def add_row(self, row: dict | Sequence) -> None:
|
|
75
|
+
if isinstance(row, dict):
|
|
76
|
+
df = pd.DataFrame([row])
|
|
77
|
+
elif isinstance(row, (list, tuple)):
|
|
78
|
+
if not len(self.df.columns):
|
|
79
|
+
raise ValueError("Cannot append list row without existing columns.")
|
|
80
|
+
df = pd.DataFrame([row], columns=list(self.df.columns))
|
|
81
|
+
else:
|
|
82
|
+
raise TypeError("row must be a dict or a list/tuple.")
|
|
83
|
+
self.df = pd.concat([self.df, df], ignore_index=True)
|
|
84
|
+
self._validate_image_columns()
|
|
85
|
+
|
|
86
|
+
def add_rows(self, rows: Iterable) -> None:
|
|
87
|
+
if isinstance(rows, pd.DataFrame):
|
|
88
|
+
df = rows
|
|
89
|
+
else:
|
|
90
|
+
rows = list(rows)
|
|
91
|
+
if not rows:
|
|
92
|
+
return
|
|
93
|
+
if isinstance(rows[0], dict):
|
|
94
|
+
df = pd.DataFrame(rows)
|
|
95
|
+
else:
|
|
96
|
+
if not len(self.df.columns):
|
|
97
|
+
raise ValueError("Cannot append list rows without existing columns.")
|
|
98
|
+
df = pd.DataFrame(rows, columns=list(self.df.columns))
|
|
99
|
+
self.df = pd.concat([self.df, df], ignore_index=True)
|
|
100
|
+
self._validate_image_columns()
|
|
101
|
+
|
|
102
|
+
def to_csv(self, csv_path: str | Path, **kwargs) -> None:
|
|
103
|
+
path = Path(csv_path)
|
|
104
|
+
self.df.to_csv(path, index=False, **kwargs)
|
|
105
|
+
|
|
106
|
+
def build(
|
|
107
|
+
self,
|
|
108
|
+
output: str | Path = "output",
|
|
109
|
+
thumb_size: int = 256,
|
|
110
|
+
batch: int = 80,
|
|
111
|
+
image_column: str | None = None,
|
|
112
|
+
launch: bool = False,
|
|
113
|
+
type_hints: Mapping[str, str] | None = None,
|
|
114
|
+
chunk_size: int = 500,
|
|
115
|
+
) -> Path:
|
|
116
|
+
image_column = image_column or (self.image_columns[0] if self.image_columns else None)
|
|
117
|
+
return render(
|
|
118
|
+
self.df,
|
|
119
|
+
image=image_column,
|
|
120
|
+
output=output,
|
|
121
|
+
thumb_size=thumb_size,
|
|
122
|
+
batch=batch,
|
|
123
|
+
image_columns=self.image_columns,
|
|
124
|
+
launch=launch,
|
|
125
|
+
type_hints=type_hints,
|
|
126
|
+
chunk_size=chunk_size,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def _validate_image_columns(self) -> None:
|
|
130
|
+
if not self.image_columns:
|
|
131
|
+
return
|
|
132
|
+
if len(self.df.columns) == 0:
|
|
133
|
+
return
|
|
134
|
+
for col in self.image_columns:
|
|
135
|
+
if col not in self.df.columns:
|
|
136
|
+
raise ValueError(f"Image column not found: {col}")
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def render(
|
|
140
|
+
data: str | Path | pd.DataFrame | list[dict] | list[tuple],
|
|
141
|
+
image: str | None = None,
|
|
142
|
+
output: str | Path = "output",
|
|
143
|
+
columns: list[str] | None = None,
|
|
144
|
+
thumb_size: int = 256,
|
|
145
|
+
batch: int = 80,
|
|
146
|
+
image_columns: Sequence[str] | None = None,
|
|
147
|
+
launch: bool = False,
|
|
148
|
+
type_hints: Mapping[str, str] | None = None,
|
|
149
|
+
chunk_size: int = 500,
|
|
150
|
+
) -> Path:
|
|
151
|
+
df = _normalize_data(data, image=image, columns=columns)
|
|
152
|
+
inferred = infer_image_columns(df) if image_columns is None else list(image_columns)
|
|
153
|
+
if image is not None and image not in inferred:
|
|
154
|
+
inferred = [image] + [c for c in inferred if c != image]
|
|
155
|
+
return _build_viewer_from_df(
|
|
156
|
+
df,
|
|
157
|
+
image_columns=inferred,
|
|
158
|
+
output=output,
|
|
159
|
+
thumb_size=thumb_size,
|
|
160
|
+
batch=batch,
|
|
161
|
+
launch=launch,
|
|
162
|
+
type_hints=type_hints,
|
|
163
|
+
chunk_size=chunk_size,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def build_viewer(
|
|
168
|
+
data: str | Path | pd.DataFrame | list[dict] | list[tuple],
|
|
169
|
+
image_column: str = "image",
|
|
170
|
+
output_dir: str = "output",
|
|
171
|
+
columns: list[str] | None = None,
|
|
172
|
+
thumb_size: int = 256,
|
|
173
|
+
batch_render: int = 80,
|
|
174
|
+
launch: bool = False,
|
|
175
|
+
type_hints: Mapping[str, str] | None = None,
|
|
176
|
+
chunk_size: int = 500,
|
|
177
|
+
) -> Path:
|
|
178
|
+
return render(
|
|
179
|
+
data,
|
|
180
|
+
image=image_column,
|
|
181
|
+
output=output_dir,
|
|
182
|
+
columns=columns,
|
|
183
|
+
thumb_size=thumb_size,
|
|
184
|
+
batch=batch_render,
|
|
185
|
+
image_columns=[image_column],
|
|
186
|
+
launch=launch,
|
|
187
|
+
type_hints=type_hints,
|
|
188
|
+
chunk_size=chunk_size,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def preview(
|
|
193
|
+
data: str | Path | pd.DataFrame | list[dict] | list[tuple],
|
|
194
|
+
image: str | None = None,
|
|
195
|
+
**kwargs,
|
|
196
|
+
) -> Path:
|
|
197
|
+
return render(data, image=image, **kwargs)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _normalize_data(
|
|
201
|
+
data: str | Path | pd.DataFrame | list[dict] | list[tuple],
|
|
202
|
+
image: str | None,
|
|
203
|
+
columns: list[str] | None,
|
|
204
|
+
) -> pd.DataFrame:
|
|
205
|
+
if isinstance(data, (str, Path)):
|
|
206
|
+
path = Path(data)
|
|
207
|
+
if not path.exists():
|
|
208
|
+
raise FileNotFoundError(f"CSV not found: {path}")
|
|
209
|
+
df = pd.read_csv(path)
|
|
210
|
+
elif isinstance(data, pd.DataFrame):
|
|
211
|
+
df = data.copy()
|
|
212
|
+
elif isinstance(data, list) and data:
|
|
213
|
+
if isinstance(data[0], tuple):
|
|
214
|
+
if image is None:
|
|
215
|
+
raise ValueError("image is required when using tuple data.")
|
|
216
|
+
if columns is None:
|
|
217
|
+
raise ValueError("columns is required when using tuple data.")
|
|
218
|
+
df = pd.DataFrame(data, columns=[image] + list(columns))
|
|
219
|
+
elif isinstance(data[0], dict):
|
|
220
|
+
df = pd.DataFrame(data)
|
|
221
|
+
else:
|
|
222
|
+
raise TypeError("Unsupported data format for render.")
|
|
223
|
+
else:
|
|
224
|
+
raise TypeError("Data must be a CSV path, DataFrame, or non-empty list.")
|
|
225
|
+
|
|
226
|
+
if image is not None and image not in df.columns:
|
|
227
|
+
raise ValueError(f"Image column not found: {image}")
|
|
228
|
+
return df
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def infer_image_columns(df: pd.DataFrame, sample_size: int = 50) -> list[str]:
|
|
232
|
+
candidates = []
|
|
233
|
+
name_hints = {"image", "img", "picture", "pic", "photo", "thumbnail", "thumb", "path", "filepath", "file"}
|
|
234
|
+
image_exts = {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".gif"}
|
|
235
|
+
|
|
236
|
+
for col in df.columns:
|
|
237
|
+
if not pd.api.types.is_object_dtype(df[col]):
|
|
238
|
+
continue
|
|
239
|
+
col_lower = str(col).lower()
|
|
240
|
+
sample = df[col].dropna().astype(str).head(sample_size)
|
|
241
|
+
if sample.empty:
|
|
242
|
+
continue
|
|
243
|
+
hit = 0
|
|
244
|
+
for v in sample:
|
|
245
|
+
v = v.strip()
|
|
246
|
+
if v.startswith("http://") or v.startswith("https://"):
|
|
247
|
+
if any(ext in v.lower() for ext in image_exts):
|
|
248
|
+
hit += 1
|
|
249
|
+
continue
|
|
250
|
+
if any(v.lower().endswith(ext) for ext in image_exts):
|
|
251
|
+
hit += 1
|
|
252
|
+
ratio = hit / max(len(sample), 1)
|
|
253
|
+
if ratio >= 0.6 or any(h in col_lower for h in name_hints):
|
|
254
|
+
candidates.append(col)
|
|
255
|
+
return candidates
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _infer_column_types(
|
|
259
|
+
df: pd.DataFrame,
|
|
260
|
+
image_columns: Sequence[str],
|
|
261
|
+
type_hints: Mapping[str, str] | None = None,
|
|
262
|
+
sample_size: int = 80,
|
|
263
|
+
) -> dict[str, str]:
|
|
264
|
+
result: dict[str, str] = {}
|
|
265
|
+
type_hints = type_hints or {}
|
|
266
|
+
for col in df.columns:
|
|
267
|
+
if col in image_columns:
|
|
268
|
+
result[col] = "image"
|
|
269
|
+
continue
|
|
270
|
+
if col in type_hints:
|
|
271
|
+
result[col] = type_hints[col]
|
|
272
|
+
continue
|
|
273
|
+
result[col] = _infer_semantic_type(df[col], sample_size=sample_size)
|
|
274
|
+
return result
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _infer_semantic_type(series: pd.Series, sample_size: int = 80) -> str:
|
|
278
|
+
dtype = series.dtype
|
|
279
|
+
if pd.api.types.is_bool_dtype(dtype):
|
|
280
|
+
return "boolean"
|
|
281
|
+
if pd.api.types.is_numeric_dtype(dtype):
|
|
282
|
+
return "number"
|
|
283
|
+
if pd.api.types.is_datetime64_any_dtype(dtype):
|
|
284
|
+
return "datetime"
|
|
285
|
+
|
|
286
|
+
sample = series.dropna().astype(str).head(sample_size)
|
|
287
|
+
if sample.empty:
|
|
288
|
+
return "string"
|
|
289
|
+
|
|
290
|
+
sample_list = [v.strip() for v in sample if v and str(v).strip()]
|
|
291
|
+
if not sample_list:
|
|
292
|
+
return "string"
|
|
293
|
+
|
|
294
|
+
json_hits = sum(1 for s in sample_list if _looks_like_json(s))
|
|
295
|
+
md_hits = sum(1 for s in sample_list if _looks_like_markdown(s))
|
|
296
|
+
code_hits = sum(1 for s in sample_list if _looks_like_code(s))
|
|
297
|
+
num_hits = _count_numeric(sample_list)
|
|
298
|
+
date_hits = _count_datetime(sample_list)
|
|
299
|
+
|
|
300
|
+
ratio = lambda n: n / max(len(sample_list), 1)
|
|
301
|
+
if ratio(json_hits) >= 0.6:
|
|
302
|
+
return "json"
|
|
303
|
+
if ratio(md_hits) >= 0.5:
|
|
304
|
+
return "markdown"
|
|
305
|
+
if ratio(code_hits) >= 0.5:
|
|
306
|
+
return "code"
|
|
307
|
+
if ratio(date_hits) >= 0.6:
|
|
308
|
+
return "datetime"
|
|
309
|
+
if ratio(num_hits) >= 0.8:
|
|
310
|
+
return "number"
|
|
311
|
+
return "string"
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _looks_like_json(text: str) -> bool:
|
|
315
|
+
if not (text.startswith("{") and text.endswith("}")) and not (
|
|
316
|
+
text.startswith("[") and text.endswith("]")
|
|
317
|
+
):
|
|
318
|
+
return False
|
|
319
|
+
try:
|
|
320
|
+
json.loads(text)
|
|
321
|
+
return True
|
|
322
|
+
except Exception:
|
|
323
|
+
return False
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def _looks_like_markdown(text: str) -> bool:
|
|
327
|
+
indicators = ("```", "# ", "## ", "- ", "* ", "> ", "[", "](", "**", "_")
|
|
328
|
+
return any(token in text for token in indicators)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _looks_like_code(text: str) -> bool:
|
|
332
|
+
indicators = (";", "{", "}", "def ", "class ", "function ", "=>", "var ", "let ", "const ")
|
|
333
|
+
return any(token in text for token in indicators)
|
|
334
|
+
|
|
335
|
+
def _count_numeric(values: list[str]) -> int:
|
|
336
|
+
if not values:
|
|
337
|
+
return 0
|
|
338
|
+
parsed = pd.to_numeric(pd.Series(values), errors="coerce")
|
|
339
|
+
return int(parsed.notna().sum())
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _count_datetime(values: list[str]) -> int:
|
|
343
|
+
if not values:
|
|
344
|
+
return 0
|
|
345
|
+
parsed = pd.to_datetime(
|
|
346
|
+
pd.Series(values),
|
|
347
|
+
errors="coerce",
|
|
348
|
+
utc=False,
|
|
349
|
+
format="mixed",
|
|
350
|
+
cache=True,
|
|
351
|
+
)
|
|
352
|
+
return int(parsed.notna().sum())
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def _to_jsonable(value):
|
|
356
|
+
if pd.isna(value):
|
|
357
|
+
return None
|
|
358
|
+
if isinstance(value, (np.integer, np.floating, np.bool_)):
|
|
359
|
+
return value.item()
|
|
360
|
+
if isinstance(value, pd.Timestamp):
|
|
361
|
+
return value.isoformat()
|
|
362
|
+
return value
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _build_viewer_from_df(
|
|
366
|
+
df: pd.DataFrame,
|
|
367
|
+
image_columns: Sequence[str],
|
|
368
|
+
output: str | Path,
|
|
369
|
+
thumb_size: int,
|
|
370
|
+
batch: int,
|
|
371
|
+
launch: bool,
|
|
372
|
+
type_hints: Mapping[str, str] | None,
|
|
373
|
+
chunk_size: int,
|
|
374
|
+
) -> Path:
|
|
375
|
+
out = Path(output)
|
|
376
|
+
out.mkdir(parents=True, exist_ok=True)
|
|
377
|
+
img_dir = out / "images"
|
|
378
|
+
thumb_dir = out / "thumbs"
|
|
379
|
+
if image_columns:
|
|
380
|
+
img_dir.mkdir(parents=True, exist_ok=True)
|
|
381
|
+
thumb_dir.mkdir(parents=True, exist_ok=True)
|
|
382
|
+
|
|
383
|
+
records: list[dict] = []
|
|
384
|
+
chunk_index = 0
|
|
385
|
+
chunk_paths: list[Path] = []
|
|
386
|
+
columns = list(df.columns)
|
|
387
|
+
for i, row in enumerate(df.reset_index(drop=True).itertuples(index=False, name=None)):
|
|
388
|
+
row_map = dict(zip(columns, row))
|
|
389
|
+
img_map: dict[str, str | None] = {}
|
|
390
|
+
for col in image_columns:
|
|
391
|
+
raw_path = row_map[col]
|
|
392
|
+
img_id = None
|
|
393
|
+
if pd.notna(raw_path):
|
|
394
|
+
raw_text = str(raw_path)
|
|
395
|
+
if raw_text.startswith("http://") or raw_text.startswith("https://"):
|
|
396
|
+
img_id = None
|
|
397
|
+
else:
|
|
398
|
+
img_path = Path(raw_text)
|
|
399
|
+
ext = img_path.suffix.lower() if img_path.suffix else ".jpg"
|
|
400
|
+
name = f"{i:06d}_{col}{ext}"
|
|
401
|
+
if img_path.exists():
|
|
402
|
+
shutil.copy(img_path, img_dir / name)
|
|
403
|
+
make_thumbnail(img_path, thumb_dir / name, thumb_size)
|
|
404
|
+
img_id = name
|
|
405
|
+
img_map[col] = img_id
|
|
406
|
+
|
|
407
|
+
record = {"__img_id__": img_map}
|
|
408
|
+
for col in columns:
|
|
409
|
+
record[col] = _to_jsonable(row_map[col])
|
|
410
|
+
records.append(record)
|
|
411
|
+
|
|
412
|
+
if chunk_size > 0 and len(records) >= chunk_size:
|
|
413
|
+
chunk_path = out / f"meta_{chunk_index:04d}.js"
|
|
414
|
+
write_meta_chunk_js(chunk_path, records)
|
|
415
|
+
chunk_paths.append(chunk_path)
|
|
416
|
+
records = []
|
|
417
|
+
chunk_index += 1
|
|
418
|
+
|
|
419
|
+
if records or not chunk_paths:
|
|
420
|
+
chunk_path = out / f"meta_{chunk_index:04d}.js"
|
|
421
|
+
write_meta_chunk_js(chunk_path, records)
|
|
422
|
+
chunk_paths.append(chunk_path)
|
|
423
|
+
column_types = _infer_column_types(df, image_columns=image_columns, type_hints=type_hints)
|
|
424
|
+
write_viewer(
|
|
425
|
+
out / "viewer.html",
|
|
426
|
+
columns=columns,
|
|
427
|
+
column_types=column_types,
|
|
428
|
+
image_column=image_columns[0] if image_columns else "",
|
|
429
|
+
image_columns=list(image_columns),
|
|
430
|
+
batch_size=batch,
|
|
431
|
+
chunk_count=len(chunk_paths),
|
|
432
|
+
)
|
|
433
|
+
html_path = out / "viewer.html"
|
|
434
|
+
if launch:
|
|
435
|
+
import webbrowser
|
|
436
|
+
|
|
437
|
+
webbrowser.open(html_path.resolve().as_uri())
|
|
438
|
+
return html_path
|
viscsv/meta.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Iterable
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def write_meta_jsonl(meta_path: Path, records: Iterable[dict]) -> None:
|
|
7
|
+
with meta_path.open("w", encoding="utf-8") as f:
|
|
8
|
+
for r in records:
|
|
9
|
+
f.write(json.dumps(r, ensure_ascii=False) + "\n")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def write_meta_chunk_js(meta_path: Path, records: list[dict]) -> None:
|
|
13
|
+
payload = (
|
|
14
|
+
"window.__VISCSV_LAST_CHUNK__ = "
|
|
15
|
+
+ json.dumps(records, ensure_ascii=False)
|
|
16
|
+
+ ";"
|
|
17
|
+
)
|
|
18
|
+
meta_path.write_text(payload, encoding="utf-8")
|
viscsv/thumbnail.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from PIL import Image, ImageDraw
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def _save_image(img: Image.Image, dst: Path, quality: int) -> None:
|
|
6
|
+
ext = dst.suffix.lower()
|
|
7
|
+
if ext == ".png":
|
|
8
|
+
img.save(dst, "PNG", optimize=True)
|
|
9
|
+
else:
|
|
10
|
+
img.save(dst, "JPEG", quality=quality)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def make_thumbnail(src: Path, dst: Path, size: int, quality: int = 85) -> bool:
|
|
14
|
+
try:
|
|
15
|
+
img = Image.open(src)
|
|
16
|
+
img.thumbnail((size, size))
|
|
17
|
+
_save_image(img, dst, quality)
|
|
18
|
+
return True
|
|
19
|
+
except Exception:
|
|
20
|
+
img = Image.new("RGB", (size, size), (235, 235, 235))
|
|
21
|
+
draw = ImageDraw.Draw(img)
|
|
22
|
+
draw.rectangle([(0, 0), (size - 1, size - 1)], outline=(190, 190, 190))
|
|
23
|
+
draw.line([(0, 0), (size - 1, size - 1)], fill=(200, 200, 200), width=2)
|
|
24
|
+
draw.line([(0, size - 1), (size - 1, 0)], fill=(200, 200, 200), width=2)
|
|
25
|
+
_save_image(img, dst, quality)
|
|
26
|
+
return False
|
viscsv/viewer.py
ADDED
|
@@ -0,0 +1,772 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
HTML_TEMPLATE = """<!DOCTYPE html>
|
|
6
|
+
<html>
|
|
7
|
+
<head>
|
|
8
|
+
<meta charset="utf-8">
|
|
9
|
+
<title>VisCSV Viewer</title>
|
|
10
|
+
<style>
|
|
11
|
+
:root {
|
|
12
|
+
--bg: #f4f5f7;
|
|
13
|
+
--panel: #ffffff;
|
|
14
|
+
--ink: #1b1f24;
|
|
15
|
+
--muted: #5b6472;
|
|
16
|
+
--accent: #2a5d7c;
|
|
17
|
+
--accent-soft: #e0ecf4;
|
|
18
|
+
--border: #e2e6ea;
|
|
19
|
+
--shadow: 0 8px 30px rgba(16, 24, 40, 0.08);
|
|
20
|
+
--mono: "IBM Plex Mono", "SFMono-Regular", Consolas, "Liberation Mono", Menlo, monospace;
|
|
21
|
+
--sans: "Source Sans 3", "Segoe UI", "Helvetica Neue", Arial, sans-serif;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
* { box-sizing: border-box; }
|
|
25
|
+
body {
|
|
26
|
+
margin: 0;
|
|
27
|
+
font-family: var(--sans);
|
|
28
|
+
color: var(--ink);
|
|
29
|
+
background: radial-gradient(1200px 600px at 10% -10%, #f0f6fb, transparent),
|
|
30
|
+
radial-gradient(900px 500px at 100% 10%, #f2f0fb, transparent),
|
|
31
|
+
var(--bg);
|
|
32
|
+
height: 100vh;
|
|
33
|
+
overflow: hidden;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
.page {
|
|
37
|
+
height: 100vh;
|
|
38
|
+
display: flex;
|
|
39
|
+
flex-direction: column;
|
|
40
|
+
overflow: hidden;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
header {
|
|
44
|
+
padding: 22px 36px 12px;
|
|
45
|
+
display: flex;
|
|
46
|
+
align-items: center;
|
|
47
|
+
justify-content: space-between;
|
|
48
|
+
gap: 16px;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
h1 {
|
|
52
|
+
margin: 0 0 6px;
|
|
53
|
+
font-weight: 700;
|
|
54
|
+
font-size: 26px;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
.subtitle {
|
|
58
|
+
color: var(--muted);
|
|
59
|
+
font-size: 14px;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
.actions {
|
|
63
|
+
display: flex;
|
|
64
|
+
gap: 10px;
|
|
65
|
+
flex-wrap: wrap;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
.action-btn {
|
|
69
|
+
border: 1px solid var(--border);
|
|
70
|
+
background: var(--panel);
|
|
71
|
+
border-radius: 999px;
|
|
72
|
+
padding: 8px 14px;
|
|
73
|
+
font-size: 13px;
|
|
74
|
+
cursor: pointer;
|
|
75
|
+
box-shadow: var(--shadow);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
.drawer {
|
|
79
|
+
display: none;
|
|
80
|
+
grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
|
|
81
|
+
gap: 14px;
|
|
82
|
+
padding: 10px 36px 18px;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
.drawer.active {
|
|
86
|
+
display: grid;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
.group {
|
|
90
|
+
background: var(--panel);
|
|
91
|
+
border: 1px solid var(--border);
|
|
92
|
+
border-radius: 12px;
|
|
93
|
+
padding: 12px 14px;
|
|
94
|
+
box-shadow: var(--shadow);
|
|
95
|
+
display: grid;
|
|
96
|
+
gap: 8px;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
.group label {
|
|
100
|
+
font-size: 12px;
|
|
101
|
+
color: var(--muted);
|
|
102
|
+
text-transform: uppercase;
|
|
103
|
+
letter-spacing: 0.06em;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
.group input,
|
|
107
|
+
.group select {
|
|
108
|
+
border: 1px solid var(--border);
|
|
109
|
+
border-radius: 8px;
|
|
110
|
+
padding: 8px 10px;
|
|
111
|
+
font-size: 14px;
|
|
112
|
+
outline: none;
|
|
113
|
+
background: #fff;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
.stats {
|
|
117
|
+
display: grid;
|
|
118
|
+
gap: 6px;
|
|
119
|
+
font-size: 13px;
|
|
120
|
+
color: var(--muted);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
.stats strong {
|
|
124
|
+
color: var(--ink);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
.table-wrap {
|
|
128
|
+
padding: 10px 36px 10px;
|
|
129
|
+
overflow: auto;
|
|
130
|
+
flex: 1 1 auto;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
table {
|
|
134
|
+
width: max-content;
|
|
135
|
+
min-width: 100%;
|
|
136
|
+
table-layout: auto;
|
|
137
|
+
border-collapse: collapse;
|
|
138
|
+
background: var(--panel);
|
|
139
|
+
border-radius: 14px;
|
|
140
|
+
overflow: hidden;
|
|
141
|
+
box-shadow: var(--shadow);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
thead th {
|
|
145
|
+
position: sticky;
|
|
146
|
+
top: 0;
|
|
147
|
+
background: #f8fafc;
|
|
148
|
+
border-bottom: 1px solid var(--border);
|
|
149
|
+
text-align: left;
|
|
150
|
+
font-size: 13px;
|
|
151
|
+
padding: 12px;
|
|
152
|
+
z-index: 2;
|
|
153
|
+
min-width: 140px;
|
|
154
|
+
white-space: nowrap;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
.th-wrap {
|
|
158
|
+
display: flex;
|
|
159
|
+
align-items: center;
|
|
160
|
+
justify-content: space-between;
|
|
161
|
+
gap: 8px;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
.col-resizer {
|
|
165
|
+
width: 6px;
|
|
166
|
+
height: 22px;
|
|
167
|
+
cursor: col-resize;
|
|
168
|
+
border-radius: 4px;
|
|
169
|
+
background: rgba(0, 0, 0, 0.06);
|
|
170
|
+
flex: 0 0 auto;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
thead th.sorted {
|
|
174
|
+
background: var(--accent-soft);
|
|
175
|
+
color: var(--accent);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
tbody td {
|
|
179
|
+
border-bottom: 1px solid var(--border);
|
|
180
|
+
padding: 10px 12px;
|
|
181
|
+
font-size: 13px;
|
|
182
|
+
vertical-align: top;
|
|
183
|
+
max-width: 320px;
|
|
184
|
+
word-break: break-word;
|
|
185
|
+
white-space: normal;
|
|
186
|
+
min-width: 140px;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
tbody tr:hover {
|
|
190
|
+
background: #f7fafc;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
.preview {
|
|
194
|
+
width: 120px;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
.preview img {
|
|
198
|
+
width: 110px;
|
|
199
|
+
height: 110px;
|
|
200
|
+
object-fit: cover;
|
|
201
|
+
border-radius: 10px;
|
|
202
|
+
border: 1px solid #d7dee6;
|
|
203
|
+
cursor: pointer;
|
|
204
|
+
background: #fff;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
.cell-image img {
|
|
208
|
+
width: 100%;
|
|
209
|
+
height: auto;
|
|
210
|
+
max-height: 160px;
|
|
211
|
+
object-fit: cover;
|
|
212
|
+
border-radius: 10px;
|
|
213
|
+
border: 1px solid #d7dee6;
|
|
214
|
+
cursor: pointer;
|
|
215
|
+
background: #fff;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
.cell-number {
|
|
219
|
+
text-align: right;
|
|
220
|
+
font-variant-numeric: tabular-nums;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
.cell-code {
|
|
224
|
+
font-family: var(--mono);
|
|
225
|
+
background: #f3f4f6;
|
|
226
|
+
border-radius: 6px;
|
|
227
|
+
padding: 6px 8px;
|
|
228
|
+
display: block;
|
|
229
|
+
width: 100%;
|
|
230
|
+
box-sizing: border-box;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
.cell-json {
|
|
234
|
+
font-family: var(--mono);
|
|
235
|
+
white-space: pre-wrap;
|
|
236
|
+
background: #f8fafc;
|
|
237
|
+
border-radius: 8px;
|
|
238
|
+
padding: 8px 10px;
|
|
239
|
+
display: block;
|
|
240
|
+
width: 100%;
|
|
241
|
+
box-sizing: border-box;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
.cell-date {
|
|
245
|
+
font-family: var(--mono);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
.badge {
|
|
250
|
+
display: inline-block;
|
|
251
|
+
padding: 2px 8px;
|
|
252
|
+
border-radius: 999px;
|
|
253
|
+
background: var(--accent-soft);
|
|
254
|
+
color: var(--accent);
|
|
255
|
+
font-size: 11px;
|
|
256
|
+
margin-right: 6px;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
.value-null {
|
|
260
|
+
color: #9aa4af;
|
|
261
|
+
font-family: var(--mono);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
.footer {
|
|
265
|
+
padding: 6px 36px 12px;
|
|
266
|
+
color: var(--muted);
|
|
267
|
+
font-size: 12px;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
@media (max-width: 860px) {
|
|
271
|
+
header { padding: 18px 18px 8px; flex-direction: column; align-items: flex-start; }
|
|
272
|
+
.drawer, .table-wrap, .footer { padding-left: 18px; padding-right: 18px; }
|
|
273
|
+
.cell-image img { width: 90px; height: 90px; }
|
|
274
|
+
}
|
|
275
|
+
</style>
|
|
276
|
+
</head>
|
|
277
|
+
<body>
|
|
278
|
+
<div class="page">
|
|
279
|
+
|
|
280
|
+
<header>
|
|
281
|
+
<div>
|
|
282
|
+
<h1>VisCSV Image Table</h1>
|
|
283
|
+
<div class="subtitle">Focus on the data preview. Controls are tucked into drawers.</div>
|
|
284
|
+
</div>
|
|
285
|
+
<div class="actions">
|
|
286
|
+
<button class="action-btn" data-toggle="search">Search</button>
|
|
287
|
+
<button class="action-btn" data-toggle="filter">Filter</button>
|
|
288
|
+
<button class="action-btn" data-toggle="sort">Sort</button>
|
|
289
|
+
<button class="action-btn" data-toggle="stats">Stats</button>
|
|
290
|
+
</div>
|
|
291
|
+
</header>
|
|
292
|
+
|
|
293
|
+
<section class="drawer" id="drawer-search">
|
|
294
|
+
<div class="group">
|
|
295
|
+
<label for="searchInput">Search</label>
|
|
296
|
+
<input id="searchInput" placeholder="Search all columns" autocomplete="off">
|
|
297
|
+
<div class="stats"><span>Total: <strong id="totalCount">0</strong></span>
|
|
298
|
+
<span>Filtered: <strong id="filteredCount">0</strong></span></div>
|
|
299
|
+
</div>
|
|
300
|
+
</section>
|
|
301
|
+
|
|
302
|
+
<section class="drawer" id="drawer-filter">
|
|
303
|
+
<div class="group">
|
|
304
|
+
<label>Filter</label>
|
|
305
|
+
<select id="filterColumn"></select>
|
|
306
|
+
<input id="filterValue" placeholder="Contains" autocomplete="off">
|
|
307
|
+
</div>
|
|
308
|
+
</section>
|
|
309
|
+
|
|
310
|
+
<section class="drawer" id="drawer-sort">
|
|
311
|
+
<div class="group">
|
|
312
|
+
<label>Sort</label>
|
|
313
|
+
<select id="sortColumn"></select>
|
|
314
|
+
<select id="sortDir">
|
|
315
|
+
<option value="asc">Ascending</option>
|
|
316
|
+
<option value="desc">Descending</option>
|
|
317
|
+
</select>
|
|
318
|
+
</div>
|
|
319
|
+
</section>
|
|
320
|
+
|
|
321
|
+
<section class="drawer" id="drawer-stats">
|
|
322
|
+
<div class="group">
|
|
323
|
+
<label>Stats Column</label>
|
|
324
|
+
<select id="statsColumn"></select>
|
|
325
|
+
<div class="stats" id="statsPanel"></div>
|
|
326
|
+
</div>
|
|
327
|
+
</section>
|
|
328
|
+
|
|
329
|
+
<section class="table-wrap">
|
|
330
|
+
<table>
|
|
331
|
+
<thead>
|
|
332
|
+
<tr>
|
|
333
|
+
{headers}
|
|
334
|
+
</tr>
|
|
335
|
+
</thead>
|
|
336
|
+
<tbody id="tbody"></tbody>
|
|
337
|
+
</table>
|
|
338
|
+
</section>
|
|
339
|
+
|
|
340
|
+
<div class="footer">
|
|
341
|
+
Tip: Use the filter to quickly narrow a column, then sort for easier comparison.
|
|
342
|
+
</div>
|
|
343
|
+
|
|
344
|
+
<script>
|
|
345
|
+
const batchSize = {batch_size};
|
|
346
|
+
const columns = {columns};
|
|
347
|
+
const columnTypes = {column_types};
|
|
348
|
+
const imageColumn = {image_column};
|
|
349
|
+
const imageColumns = {image_columns};
|
|
350
|
+
const chunkCount = {chunk_count};
|
|
351
|
+
|
|
352
|
+
const placeholder =
|
|
353
|
+
"data:image/svg+xml;charset=utf-8," +
|
|
354
|
+
encodeURIComponent(`<svg xmlns='http://www.w3.org/2000/svg' width='120' height='120'>
|
|
355
|
+
<rect width='100%' height='100%' fill='#f1f3f5'/>
|
|
356
|
+
<path d='M10 10L110 110M110 10L10 110' stroke='#cbd5e1' stroke-width='4'/>
|
|
357
|
+
</svg>`);
|
|
358
|
+
|
|
359
|
+
let data = [];
|
|
360
|
+
let view = [];
|
|
361
|
+
let cursor = 0;
|
|
362
|
+
|
|
363
|
+
const tbody = document.getElementById("tbody");
|
|
364
|
+
const totalCount = document.getElementById("totalCount");
|
|
365
|
+
const filteredCount = document.getElementById("filteredCount");
|
|
366
|
+
const filterColumn = document.getElementById("filterColumn");
|
|
367
|
+
const filterValue = document.getElementById("filterValue");
|
|
368
|
+
const sortColumn = document.getElementById("sortColumn");
|
|
369
|
+
const sortDir = document.getElementById("sortDir");
|
|
370
|
+
const searchInput = document.getElementById("searchInput");
|
|
371
|
+
const statsColumn = document.getElementById("statsColumn");
|
|
372
|
+
const statsPanel = document.getElementById("statsPanel");
|
|
373
|
+
const colSpan = columns.length;
|
|
374
|
+
const table = document.querySelector("table");
|
|
375
|
+
const tableWrap = document.querySelector(".table-wrap");
|
|
376
|
+
|
|
377
|
+
const drawers = {
|
|
378
|
+
search: document.getElementById("drawer-search"),
|
|
379
|
+
filter: document.getElementById("drawer-filter"),
|
|
380
|
+
sort: document.getElementById("drawer-sort"),
|
|
381
|
+
stats: document.getElementById("drawer-stats"),
|
|
382
|
+
};
|
|
383
|
+
|
|
384
|
+
let columnsOrder = [...columns];
|
|
385
|
+
const columnWidths = {};
|
|
386
|
+
|
|
387
|
+
document.querySelectorAll("[data-toggle]").forEach(btn => {
|
|
388
|
+
btn.addEventListener("click", () => {
|
|
389
|
+
const key = btn.getAttribute("data-toggle");
|
|
390
|
+
Object.entries(drawers).forEach(([k, el]) => {
|
|
391
|
+
el.classList.toggle("active", k === key ? !el.classList.contains("active") : false);
|
|
392
|
+
});
|
|
393
|
+
});
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
function setOptions(select, items) {
|
|
397
|
+
select.innerHTML = items.map(v => `<option value="${v}">${v}</option>`).join("");
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
function normalizeValue(value) {
|
|
401
|
+
if (value === null || value === undefined) return "";
|
|
402
|
+
return String(value).toLowerCase();
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
function parseNumber(value) {
|
|
406
|
+
const n = Number(value);
|
|
407
|
+
return Number.isFinite(n) ? n : null;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
function parseDate(value) {
|
|
411
|
+
const t = Date.parse(value);
|
|
412
|
+
return Number.isFinite(t) ? t : null;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
function compareValues(a, b, type) {
|
|
416
|
+
if (type === "number") {
|
|
417
|
+
const na = parseNumber(a);
|
|
418
|
+
const nb = parseNumber(b);
|
|
419
|
+
if (na === null && nb === null) return 0;
|
|
420
|
+
if (na === null) return -1;
|
|
421
|
+
if (nb === null) return 1;
|
|
422
|
+
return na - nb;
|
|
423
|
+
}
|
|
424
|
+
if (type === "datetime") {
|
|
425
|
+
const ta = parseDate(a);
|
|
426
|
+
const tb = parseDate(b);
|
|
427
|
+
if (ta === null && tb === null) return 0;
|
|
428
|
+
if (ta === null) return -1;
|
|
429
|
+
if (tb === null) return 1;
|
|
430
|
+
return ta - tb;
|
|
431
|
+
}
|
|
432
|
+
if (type === "boolean") {
|
|
433
|
+
const ba = Boolean(a);
|
|
434
|
+
const bb = Boolean(b);
|
|
435
|
+
return (ba === bb) ? 0 : (ba ? 1 : -1);
|
|
436
|
+
}
|
|
437
|
+
const sa = normalizeValue(a);
|
|
438
|
+
const sb = normalizeValue(b);
|
|
439
|
+
if (sa < sb) return -1;
|
|
440
|
+
if (sa > sb) return 1;
|
|
441
|
+
return 0;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
function sortView() {
|
|
445
|
+
const col = sortColumn.value;
|
|
446
|
+
const dir = sortDir.value === "desc" ? -1 : 1;
|
|
447
|
+
const type = columnTypes[col] || "string";
|
|
448
|
+
view.sort((a, b) => compareValues(a[col], b[col], type) * dir);
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
function applyFilters() {
|
|
452
|
+
if (!data.length) {
|
|
453
|
+
tbody.innerHTML = `<tr><td colspan="${colSpan}" class="value-null">No data</td></tr>`;
|
|
454
|
+
updateCounts();
|
|
455
|
+
statsPanel.innerHTML = "<span>No data</span>";
|
|
456
|
+
return;
|
|
457
|
+
}
|
|
458
|
+
const q = searchInput.value.trim().toLowerCase();
|
|
459
|
+
const fcol = filterColumn.value;
|
|
460
|
+
const fval = filterValue.value.trim().toLowerCase();
|
|
461
|
+
|
|
462
|
+
view = data.filter(row => {
|
|
463
|
+
if (q) {
|
|
464
|
+
const match = columns.some(c => normalizeValue(row[c]).includes(q));
|
|
465
|
+
if (!match) return false;
|
|
466
|
+
}
|
|
467
|
+
if (fval) {
|
|
468
|
+
return normalizeValue(row[fcol]).includes(fval);
|
|
469
|
+
}
|
|
470
|
+
return true;
|
|
471
|
+
});
|
|
472
|
+
|
|
473
|
+
sortView();
|
|
474
|
+
cursor = 0;
|
|
475
|
+
tbody.innerHTML = "";
|
|
476
|
+
highlightSorted();
|
|
477
|
+
renderBatch();
|
|
478
|
+
updateCounts();
|
|
479
|
+
updateStats();
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
function highlightSorted() {
|
|
483
|
+
const col = sortColumn.value;
|
|
484
|
+
document.querySelectorAll("thead th").forEach(th => {
|
|
485
|
+
th.classList.toggle("sorted", th.getAttribute("data-col") === col);
|
|
486
|
+
});
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
function renderBatch() {
|
|
490
|
+
for (let i = cursor; i < Math.min(cursor + batchSize, view.length); i++) {
|
|
491
|
+
const d = view[i];
|
|
492
|
+
let row = "<tr>";
|
|
493
|
+
for (const c of columnsOrder) {
|
|
494
|
+
row += renderCell(d, c);
|
|
495
|
+
}
|
|
496
|
+
row += "</tr>";
|
|
497
|
+
tbody.insertAdjacentHTML("beforeend", row);
|
|
498
|
+
}
|
|
499
|
+
cursor += batchSize;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
function renderCell(row, col) {
|
|
503
|
+
const v = row[col];
|
|
504
|
+
const type = columnTypes[col] || "string";
|
|
505
|
+
const dataCol = `data-col="${col}"`;
|
|
506
|
+
const widthStyle = columnWidths[col] ? ` style="width:${columnWidths[col]}px;min-width:${columnWidths[col]}px"` : "";
|
|
507
|
+
if (v === null || v === undefined || v === "") {
|
|
508
|
+
return `<td ${dataCol}${widthStyle} class="value-null">null</td>`;
|
|
509
|
+
}
|
|
510
|
+
if ((type === "image" || imageColumns.includes(col)) && imageColumns.length) {
|
|
511
|
+
const imgMap = row.__img_id__ || {};
|
|
512
|
+
const imgId = imgMap[col];
|
|
513
|
+
const raw = String(v);
|
|
514
|
+
const src = imgId ? `thumbs/${imgId}` : (raw.startsWith("http") ? raw : placeholder);
|
|
515
|
+
const openTarget = imgId ? `images/${imgId}` : (raw.startsWith("http") ? raw : null);
|
|
516
|
+
return `<td ${dataCol}${widthStyle} class="cell-image">` +
|
|
517
|
+
`<img src="${src}" loading="lazy" alt="image"` +
|
|
518
|
+
(openTarget ? ` onclick="window.open('${openTarget}')"` : "") +
|
|
519
|
+
`>` +
|
|
520
|
+
`</td>`;
|
|
521
|
+
}
|
|
522
|
+
if (type === "number") {
|
|
523
|
+
return `<td ${dataCol}${widthStyle} class="cell-number">${v}</td>`;
|
|
524
|
+
}
|
|
525
|
+
if (type === "boolean") {
|
|
526
|
+
return `<td ${dataCol}${widthStyle}><span class="badge">${String(Boolean(v))}</span></td>`;
|
|
527
|
+
}
|
|
528
|
+
if (type === "datetime") {
|
|
529
|
+
const t = parseDate(v);
|
|
530
|
+
const text = t ? new Date(t).toLocaleString() : String(v);
|
|
531
|
+
const iso = t ? new Date(t).toISOString() : String(v);
|
|
532
|
+
return `<td ${dataCol}${widthStyle} class="cell-date" title="${iso}">${text}</td>`;
|
|
533
|
+
}
|
|
534
|
+
const s = String(v);
|
|
535
|
+
if (type === "json" || s.trim().startsWith("{") || s.trim().startsWith("[")) {
|
|
536
|
+
try {
|
|
537
|
+
const obj = JSON.parse(s);
|
|
538
|
+
return `<td ${dataCol}${widthStyle}><pre class="cell-json">${JSON.stringify(obj, null, 2)}</pre></td>`;
|
|
539
|
+
} catch (e) {
|
|
540
|
+
return `<td ${dataCol}${widthStyle}>${escapeHtml(s)}</td>`;
|
|
541
|
+
}
|
|
542
|
+
}
|
|
543
|
+
if (type === "markdown") {
|
|
544
|
+
return `<td ${dataCol}${widthStyle}>${escapeHtml(s)}</td>`;
|
|
545
|
+
}
|
|
546
|
+
if (type === "code" || s.includes(";") || (s.includes("{") && s.includes("}"))) {
|
|
547
|
+
return `<td ${dataCol}${widthStyle}><code class="cell-code">${escapeHtml(s)}</code></td>`;
|
|
548
|
+
}
|
|
549
|
+
return `<td ${dataCol}${widthStyle}>${escapeHtml(s)}</td>`;
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
function escapeHtml(str) {
|
|
553
|
+
return str
|
|
554
|
+
.replaceAll("&", "&")
|
|
555
|
+
.replaceAll("<", "<")
|
|
556
|
+
.replaceAll(">", ">")
|
|
557
|
+
.replaceAll('"', """)
|
|
558
|
+
.replaceAll("'", "'");
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
function renderHeader() {
|
|
562
|
+
const thead = document.querySelector("thead tr");
|
|
563
|
+
thead.innerHTML = columnsOrder.map(c => {
|
|
564
|
+
const w = columnWidths[c];
|
|
565
|
+
const style = w ? ` style="width:${w}px;min-width:${w}px"` : "";
|
|
566
|
+
return `<th data-col="${c}" draggable="true"${style}><div class="th-wrap"><span>${c}</span><span class="col-resizer" data-resize="${c}"></span></div></th>`;
|
|
567
|
+
}).join("");
|
|
568
|
+
attachDragHandlers();
|
|
569
|
+
attachResizeHandlers();
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
function attachDragHandlers() {
|
|
573
|
+
const headers = document.querySelectorAll("thead th");
|
|
574
|
+
headers.forEach(th => {
|
|
575
|
+
th.addEventListener("dragstart", e => {
|
|
576
|
+
e.dataTransfer.setData("text/plain", th.getAttribute("data-col"));
|
|
577
|
+
});
|
|
578
|
+
th.addEventListener("dragover", e => e.preventDefault());
|
|
579
|
+
th.addEventListener("drop", e => {
|
|
580
|
+
e.preventDefault();
|
|
581
|
+
const from = e.dataTransfer.getData("text/plain");
|
|
582
|
+
const to = th.getAttribute("data-col");
|
|
583
|
+
if (!from || !to || from === to) return;
|
|
584
|
+
const fromIdx = columnsOrder.indexOf(from);
|
|
585
|
+
const toIdx = columnsOrder.indexOf(to);
|
|
586
|
+
columnsOrder.splice(fromIdx, 1);
|
|
587
|
+
columnsOrder.splice(toIdx, 0, from);
|
|
588
|
+
tbody.innerHTML = "";
|
|
589
|
+
cursor = 0;
|
|
590
|
+
renderHeader();
|
|
591
|
+
highlightSorted();
|
|
592
|
+
renderBatch();
|
|
593
|
+
});
|
|
594
|
+
});
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
function attachResizeHandlers() {
|
|
598
|
+
document.querySelectorAll(".col-resizer").forEach(resizer => {
|
|
599
|
+
resizer.addEventListener("mousedown", e => {
|
|
600
|
+
e.preventDefault();
|
|
601
|
+
e.stopPropagation();
|
|
602
|
+
const col = resizer.getAttribute("data-resize");
|
|
603
|
+
const th = resizer.closest("th");
|
|
604
|
+
const startX = e.clientX;
|
|
605
|
+
const startWidth = th.getBoundingClientRect().width;
|
|
606
|
+
|
|
607
|
+
function onMove(ev) {
|
|
608
|
+
const delta = ev.clientX - startX;
|
|
609
|
+
const next = Math.max(80, Math.round(startWidth + delta));
|
|
610
|
+
columnWidths[col] = next;
|
|
611
|
+
th.style.width = `${next}px`;
|
|
612
|
+
th.style.minWidth = `${next}px`;
|
|
613
|
+
document.querySelectorAll(`td[data-col="${col}"]`).forEach(td => {
|
|
614
|
+
td.style.width = `${next}px`;
|
|
615
|
+
td.style.minWidth = `${next}px`;
|
|
616
|
+
});
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
function onUp() {
|
|
620
|
+
window.removeEventListener("mousemove", onMove);
|
|
621
|
+
window.removeEventListener("mouseup", onUp);
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
window.addEventListener("mousemove", onMove);
|
|
625
|
+
window.addEventListener("mouseup", onUp);
|
|
626
|
+
});
|
|
627
|
+
});
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
function updateCounts() {
|
|
631
|
+
totalCount.textContent = data.length;
|
|
632
|
+
filteredCount.textContent = view.length;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
function updateStats() {
|
|
636
|
+
const col = statsColumn.value;
|
|
637
|
+
const type = columnTypes[col] || "string";
|
|
638
|
+
const values = view.map(r => r[col]).filter(v => v !== null && v !== undefined && v !== "");
|
|
639
|
+
if (values.length === 0) {
|
|
640
|
+
statsPanel.innerHTML = "<span>No data</span>";
|
|
641
|
+
return;
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
if (type === "number") {
|
|
645
|
+
const nums = values.map(v => parseNumber(v)).filter(v => v !== null);
|
|
646
|
+
if (!nums.length) {
|
|
647
|
+
statsPanel.innerHTML = "<span>No numeric data</span>";
|
|
648
|
+
return;
|
|
649
|
+
}
|
|
650
|
+
const min = Math.min(...nums);
|
|
651
|
+
const max = Math.max(...nums);
|
|
652
|
+
const mean = nums.reduce((a, b) => a + b, 0) / nums.length;
|
|
653
|
+
statsPanel.innerHTML = `<span class="badge">number</span>
|
|
654
|
+
<div>count: <strong>${nums.length}</strong></div>
|
|
655
|
+
<div>min: <strong>${min.toFixed(3)}</strong></div>
|
|
656
|
+
<div>max: <strong>${max.toFixed(3)}</strong></div>
|
|
657
|
+
<div>mean: <strong>${mean.toFixed(3)}</strong></div>`;
|
|
658
|
+
return;
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
if (type === "boolean") {
|
|
662
|
+
const counts = { true: 0, false: 0 };
|
|
663
|
+
values.forEach(v => counts[Boolean(v)]++);
|
|
664
|
+
statsPanel.innerHTML = `<span class="badge">boolean</span>
|
|
665
|
+
<div>true: <strong>${counts.true}</strong></div>
|
|
666
|
+
<div>false: <strong>${counts.false}</strong></div>`;
|
|
667
|
+
return;
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
if (type === "datetime") {
|
|
671
|
+
const times = values.map(v => parseDate(v)).filter(v => v !== null);
|
|
672
|
+
if (!times.length) {
|
|
673
|
+
statsPanel.innerHTML = "<span>No datetime data</span>";
|
|
674
|
+
return;
|
|
675
|
+
}
|
|
676
|
+
const min = new Date(Math.min(...times)).toISOString();
|
|
677
|
+
const max = new Date(Math.max(...times)).toISOString();
|
|
678
|
+
statsPanel.innerHTML = `<span class="badge">datetime</span>
|
|
679
|
+
<div>count: <strong>${times.length}</strong></div>
|
|
680
|
+
<div>min: <strong>${min}</strong></div>
|
|
681
|
+
<div>max: <strong>${max}</strong></div>`;
|
|
682
|
+
return;
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
const freq = new Map();
|
|
686
|
+
values.forEach(v => {
|
|
687
|
+
const key = String(v);
|
|
688
|
+
freq.set(key, (freq.get(key) || 0) + 1);
|
|
689
|
+
});
|
|
690
|
+
const top = Array.from(freq.entries()).sort((a, b) => b[1] - a[1]).slice(0, 5);
|
|
691
|
+
statsPanel.innerHTML = `<span class="badge">string</span>` +
|
|
692
|
+
top.map(([k, v]) => `<div>${k}: <strong>${v}</strong></div>`).join("");
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
function init() {
|
|
696
|
+
setOptions(filterColumn, columns);
|
|
697
|
+
setOptions(sortColumn, columns);
|
|
698
|
+
setOptions(statsColumn, columns);
|
|
699
|
+
|
|
700
|
+
filterColumn.value = columns[0];
|
|
701
|
+
sortColumn.value = columns[0];
|
|
702
|
+
statsColumn.value = columns[0];
|
|
703
|
+
|
|
704
|
+
[searchInput, filterValue].forEach(el => el.addEventListener("input", applyFilters));
|
|
705
|
+
[filterColumn, sortColumn, sortDir, statsColumn].forEach(el => el.addEventListener("change", applyFilters));
|
|
706
|
+
|
|
707
|
+
renderHeader();
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
init();
|
|
711
|
+
loadChunks();
|
|
712
|
+
|
|
713
|
+
window.addEventListener("scroll", () => {
|
|
714
|
+
if (window.innerHeight + window.scrollY >= document.body.offsetHeight - 400) {
|
|
715
|
+
renderBatch();
|
|
716
|
+
}
|
|
717
|
+
});
|
|
718
|
+
|
|
719
|
+
function loadChunks() {
|
|
720
|
+
let index = 0;
|
|
721
|
+
function loadNext() {
|
|
722
|
+
if (index >= chunkCount) {
|
|
723
|
+
applyFilters();
|
|
724
|
+
return;
|
|
725
|
+
}
|
|
726
|
+
const script = document.createElement("script");
|
|
727
|
+
script.src = `meta_${String(index).padStart(4, "0")}.js`;
|
|
728
|
+
script.onload = () => {
|
|
729
|
+
const chunk = Array.isArray(window.__VISCSV_LAST_CHUNK__) ? window.__VISCSV_LAST_CHUNK__ : [];
|
|
730
|
+
if (chunk.length) {
|
|
731
|
+
data = data.concat(chunk);
|
|
732
|
+
}
|
|
733
|
+
window.__VISCSV_LAST_CHUNK__ = [];
|
|
734
|
+
applyFilters();
|
|
735
|
+
index += 1;
|
|
736
|
+
loadNext();
|
|
737
|
+
};
|
|
738
|
+
script.onerror = () => {
|
|
739
|
+
index += 1;
|
|
740
|
+
loadNext();
|
|
741
|
+
};
|
|
742
|
+
document.body.appendChild(script);
|
|
743
|
+
}
|
|
744
|
+
loadNext();
|
|
745
|
+
}
|
|
746
|
+
</script>
|
|
747
|
+
|
|
748
|
+
</div>
|
|
749
|
+
</body>
|
|
750
|
+
</html>
|
|
751
|
+
"""
|
|
752
|
+
|
|
753
|
+
|
|
754
|
+
def write_viewer(
|
|
755
|
+
html_path: Path,
|
|
756
|
+
columns: list[str],
|
|
757
|
+
column_types: dict[str, str],
|
|
758
|
+
image_column: str,
|
|
759
|
+
image_columns: list[str],
|
|
760
|
+
batch_size: int,
|
|
761
|
+
chunk_count: int,
|
|
762
|
+
):
|
|
763
|
+
headers = "".join(f"<th data-col=\"{c}\">{c}</th>" for c in columns)
|
|
764
|
+
html = HTML_TEMPLATE
|
|
765
|
+
html = html.replace("{headers}", headers)
|
|
766
|
+
html = html.replace("{columns}", json.dumps(columns))
|
|
767
|
+
html = html.replace("{column_types}", json.dumps(column_types))
|
|
768
|
+
html = html.replace("{image_column}", json.dumps(image_column))
|
|
769
|
+
html = html.replace("{image_columns}", json.dumps(image_columns))
|
|
770
|
+
html = html.replace("{batch_size}", str(batch_size))
|
|
771
|
+
html = html.replace("{chunk_count}", str(chunk_count))
|
|
772
|
+
html_path.write_text(html, encoding="utf-8")
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: viscsv
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Add your description here
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: pandas>=2.3.3
|
|
8
|
+
Requires-Dist: pillow>=12.1.0
|
|
9
|
+
|
|
10
|
+
# VisCSV
|
|
11
|
+
|
|
12
|
+

|
|
13
|
+

|
|
14
|
+

|
|
15
|
+
|
|
16
|
+
VisCSV turns CSV data (with image paths) into a clean, interactive HTML table.
|
|
17
|
+
Simple API, fast preview, no server required.
|
|
18
|
+
|
|
19
|
+
## Highlights
|
|
20
|
+
|
|
21
|
+
- Image columns support (local paths or URLs)
|
|
22
|
+
- Smart type inference (number, datetime, boolean, json, code)
|
|
23
|
+
- Drag to reorder columns
|
|
24
|
+
- Resize column widths
|
|
25
|
+
- Built-in horizontal/vertical scrolling
|
|
26
|
+
- Chunked loading for large datasets
|
|
27
|
+
|
|
28
|
+
## Quickstart
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from viscsv import preview
|
|
32
|
+
|
|
33
|
+
preview("data.csv", output="output")
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Open `output/viewer.html` in your browser.
|
|
37
|
+
|
|
38
|
+
## With Image Columns
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from viscsv import preview
|
|
42
|
+
|
|
43
|
+
preview("data.csv", image="image_path", output="output")
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Use the Class API
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from viscsv import VisCSV
|
|
50
|
+
|
|
51
|
+
vc = VisCSV(image="image")
|
|
52
|
+
vc.add("imgs/1.jpg", label="cat", score=0.98)
|
|
53
|
+
vc.add([
|
|
54
|
+
{"image": "imgs/2.jpg", "label": "dog", "score": 0.87},
|
|
55
|
+
{"image": "imgs/3.jpg", "label": "fox", "score": 0.72},
|
|
56
|
+
])
|
|
57
|
+
vc.build(output="output")
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Type Hints (Optional)
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
preview(
|
|
64
|
+
"data.csv",
|
|
65
|
+
output="output",
|
|
66
|
+
type_hints={"payload": "json", "created_at": "datetime"},
|
|
67
|
+
)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Large Datasets
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
preview("big.csv", output="output", chunk_size=2000)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Links
|
|
77
|
+
|
|
78
|
+
- 中文说明: [README-ch.md](README-ch.md)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
viscsv/__init__.py,sha256=2H6wo724_kfaILCGlWw9BvD5wX1OOdAXhP3GOMg8Tbc,118
|
|
2
|
+
viscsv/builder.py,sha256=FkEYNxDciD-sQr-Hf2DzxoM2G01raF7XS3OELxzhxt0,14279
|
|
3
|
+
viscsv/meta.py,sha256=kdp3jF1Bz3L9nR6I6T_AXlTni5Gf5MRhFm9UdAwbjJM,533
|
|
4
|
+
viscsv/thumbnail.py,sha256=ffZKUnqw8jazzly6XvLmDhrSXObgvIun6kfUzQaze8Q,917
|
|
5
|
+
viscsv/viewer.py,sha256=bE98Z0YubepbqrRbsWMyANaVK7wuv9zc2jNhouf68tI,20645
|
|
6
|
+
viscsv-0.1.0.dist-info/METADATA,sha256=YuH5JaJwXcByeMIEUkFHoJ76RBGyMS2kUcQuaWNPKUo,1806
|
|
7
|
+
viscsv-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
8
|
+
viscsv-0.1.0.dist-info/top_level.txt,sha256=Lp9UADgVYnnRtfDCXn9SVoUIwgVe1lGyBhR_HarbEV8,7
|
|
9
|
+
viscsv-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
viscsv
|