viscsv 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
viscsv-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,78 @@
1
+ Metadata-Version: 2.4
2
+ Name: viscsv
3
+ Version: 0.1.0
4
+ Summary: Add your description here
5
+ Requires-Python: >=3.10
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: pandas>=2.3.3
8
+ Requires-Dist: pillow>=12.1.0
9
+
10
+ # VisCSV
11
+
12
+ ![Python](https://img.shields.io/badge/Python-3.10%2B-3776AB?logo=python&logoColor=white)
13
+ ![Pandas](https://img.shields.io/badge/Pandas-2.x-150458?logo=pandas&logoColor=white)
14
+ ![License](https://img.shields.io/badge/License-MIT-000000?logo=opensourceinitiative&logoColor=white)
15
+
16
+ VisCSV turns CSV data (with image paths) into a clean, interactive HTML table.
17
+ Simple API, fast preview, no server required.
18
+
19
+ ## Highlights
20
+
21
+ - Image columns support (local paths or URLs)
22
+ - Smart type inference (number, datetime, boolean, json, code)
23
+ - Drag to reorder columns
24
+ - Resize column widths
25
+ - Built-in horizontal/vertical scrolling
26
+ - Chunked loading for large datasets
27
+
28
+ ## Quickstart
29
+
30
+ ```python
31
+ from viscsv import preview
32
+
33
+ preview("data.csv", output="output")
34
+ ```
35
+
36
+ Open `output/viewer.html` in your browser.
37
+
38
+ ## With Image Columns
39
+
40
+ ```python
41
+ from viscsv import preview
42
+
43
+ preview("data.csv", image="image_path", output="output")
44
+ ```
45
+
46
+ ## Use the Class API
47
+
48
+ ```python
49
+ from viscsv import VisCSV
50
+
51
+ vc = VisCSV(image="image")
52
+ vc.add("imgs/1.jpg", label="cat", score=0.98)
53
+ vc.add([
54
+ {"image": "imgs/2.jpg", "label": "dog", "score": 0.87},
55
+ {"image": "imgs/3.jpg", "label": "fox", "score": 0.72},
56
+ ])
57
+ vc.build(output="output")
58
+ ```
59
+
60
+ ## Type Hints (Optional)
61
+
62
+ ```python
63
+ preview(
64
+ "data.csv",
65
+ output="output",
66
+ type_hints={"payload": "json", "created_at": "datetime"},
67
+ )
68
+ ```
69
+
70
+ ## Large Datasets
71
+
72
+ ```python
73
+ preview("big.csv", output="output", chunk_size=2000)
74
+ ```
75
+
76
+ ## Links
77
+
78
+ - 中文说明: [README-ch.md](README-ch.md)
viscsv-0.1.0/README.md ADDED
@@ -0,0 +1,69 @@
1
+ # VisCSV
2
+
3
+ ![Python](https://img.shields.io/badge/Python-3.10%2B-3776AB?logo=python&logoColor=white)
4
+ ![Pandas](https://img.shields.io/badge/Pandas-2.x-150458?logo=pandas&logoColor=white)
5
+ ![License](https://img.shields.io/badge/License-MIT-000000?logo=opensourceinitiative&logoColor=white)
6
+
7
+ VisCSV turns CSV data (with image paths) into a clean, interactive HTML table.
8
+ Simple API, fast preview, no server required.
9
+
10
+ ## Highlights
11
+
12
+ - Image columns support (local paths or URLs)
13
+ - Smart type inference (number, datetime, boolean, json, code)
14
+ - Drag to reorder columns
15
+ - Resize column widths
16
+ - Built-in horizontal/vertical scrolling
17
+ - Chunked loading for large datasets
18
+
19
+ ## Quickstart
20
+
21
+ ```python
22
+ from viscsv import preview
23
+
24
+ preview("data.csv", output="output")
25
+ ```
26
+
27
+ Open `output/viewer.html` in your browser.
28
+
29
+ ## With Image Columns
30
+
31
+ ```python
32
+ from viscsv import preview
33
+
34
+ preview("data.csv", image="image_path", output="output")
35
+ ```
36
+
37
+ ## Use the Class API
38
+
39
+ ```python
40
+ from viscsv import VisCSV
41
+
42
+ vc = VisCSV(image="image")
43
+ vc.add("imgs/1.jpg", label="cat", score=0.98)
44
+ vc.add([
45
+ {"image": "imgs/2.jpg", "label": "dog", "score": 0.87},
46
+ {"image": "imgs/3.jpg", "label": "fox", "score": 0.72},
47
+ ])
48
+ vc.build(output="output")
49
+ ```
50
+
51
+ ## Type Hints (Optional)
52
+
53
+ ```python
54
+ preview(
55
+ "data.csv",
56
+ output="output",
57
+ type_hints={"payload": "json", "created_at": "datetime"},
58
+ )
59
+ ```
60
+
61
+ ## Large Datasets
62
+
63
+ ```python
64
+ preview("big.csv", output="output", chunk_size=2000)
65
+ ```
66
+
67
+ ## Links
68
+
69
+ - 中文说明: [README-ch.md](README-ch.md)
@@ -0,0 +1,10 @@
1
+ [project]
2
+ name = "viscsv"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ dependencies = [
8
+ "pandas>=2.3.3",
9
+ "pillow>=12.1.0",
10
+ ]
viscsv-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ from .builder import build_viewer, preview, render, VisCSV
2
+
3
+ __all__ = ["build_viewer", "preview", "render", "VisCSV"]
@@ -0,0 +1,438 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ import json
5
+ import shutil
6
+ from typing import Iterable, Mapping, Sequence
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ from .meta import write_meta_chunk_js
12
+ from .thumbnail import make_thumbnail
13
+ from .viewer import write_viewer
14
+
15
+
16
+ class VisCSV:
17
+ def __init__(
18
+ self,
19
+ image: str | Sequence[str] | None = None,
20
+ image_columns: str | Sequence[str] | None = None,
21
+ dataframe: pd.DataFrame | None = None,
22
+ ) -> None:
23
+ image = image if image is not None else image_columns
24
+ if isinstance(image, str):
25
+ image = [image]
26
+ self.image_columns = list(image) if image is not None else []
27
+ self.df = dataframe.copy() if dataframe is not None else pd.DataFrame()
28
+ self._validate_image_columns()
29
+
30
+ @classmethod
31
+ def from_csv(
32
+ cls,
33
+ csv_path: str | Path,
34
+ image: str | Sequence[str] | None = None,
35
+ **kwargs,
36
+ ) -> "VisCSV":
37
+ path = Path(csv_path)
38
+ if not path.exists():
39
+ raise FileNotFoundError(f"CSV not found: {path}")
40
+ data = pd.read_csv(path, **kwargs)
41
+ return cls(image=image, dataframe=data)
42
+
43
+ def add(self, *args, **fields) -> None:
44
+ if len(args) == 1 and not fields:
45
+ self.add_any(args[0])
46
+ return
47
+ if not args:
48
+ raise TypeError("add() expects data or image path.")
49
+ image_path = args[0]
50
+ if not self.image_columns:
51
+ raise ValueError("No image column configured. Use add_row/add_rows or set image column.")
52
+ row = {self.image_columns[0]: str(image_path), **fields}
53
+ self.add_row(row)
54
+
55
+ def add_data(self, data) -> None:
56
+ self.add_any(data)
57
+
58
+ def add_any(self, data) -> None:
59
+ if isinstance(data, pd.DataFrame):
60
+ self.add_rows(data)
61
+ return
62
+ if isinstance(data, dict):
63
+ self.add_row(data)
64
+ return
65
+ if isinstance(data, (list, tuple)):
66
+ if data and isinstance(data[0], dict):
67
+ self.add_rows(data)
68
+ return
69
+ if data and isinstance(data[0], (list, tuple)):
70
+ self.add_rows(data)
71
+ return
72
+ self.add_row(data)
73
+
74
+ def add_row(self, row: dict | Sequence) -> None:
75
+ if isinstance(row, dict):
76
+ df = pd.DataFrame([row])
77
+ elif isinstance(row, (list, tuple)):
78
+ if not len(self.df.columns):
79
+ raise ValueError("Cannot append list row without existing columns.")
80
+ df = pd.DataFrame([row], columns=list(self.df.columns))
81
+ else:
82
+ raise TypeError("row must be a dict or a list/tuple.")
83
+ self.df = pd.concat([self.df, df], ignore_index=True)
84
+ self._validate_image_columns()
85
+
86
+ def add_rows(self, rows: Iterable) -> None:
87
+ if isinstance(rows, pd.DataFrame):
88
+ df = rows
89
+ else:
90
+ rows = list(rows)
91
+ if not rows:
92
+ return
93
+ if isinstance(rows[0], dict):
94
+ df = pd.DataFrame(rows)
95
+ else:
96
+ if not len(self.df.columns):
97
+ raise ValueError("Cannot append list rows without existing columns.")
98
+ df = pd.DataFrame(rows, columns=list(self.df.columns))
99
+ self.df = pd.concat([self.df, df], ignore_index=True)
100
+ self._validate_image_columns()
101
+
102
+ def to_csv(self, csv_path: str | Path, **kwargs) -> None:
103
+ path = Path(csv_path)
104
+ self.df.to_csv(path, index=False, **kwargs)
105
+
106
+ def build(
107
+ self,
108
+ output: str | Path = "output",
109
+ thumb_size: int = 256,
110
+ batch: int = 80,
111
+ image_column: str | None = None,
112
+ launch: bool = False,
113
+ type_hints: Mapping[str, str] | None = None,
114
+ chunk_size: int = 500,
115
+ ) -> Path:
116
+ image_column = image_column or (self.image_columns[0] if self.image_columns else None)
117
+ return render(
118
+ self.df,
119
+ image=image_column,
120
+ output=output,
121
+ thumb_size=thumb_size,
122
+ batch=batch,
123
+ image_columns=self.image_columns,
124
+ launch=launch,
125
+ type_hints=type_hints,
126
+ chunk_size=chunk_size,
127
+ )
128
+
129
+ def _validate_image_columns(self) -> None:
130
+ if not self.image_columns:
131
+ return
132
+ if len(self.df.columns) == 0:
133
+ return
134
+ for col in self.image_columns:
135
+ if col not in self.df.columns:
136
+ raise ValueError(f"Image column not found: {col}")
137
+
138
+
139
+ def render(
140
+ data: str | Path | pd.DataFrame | list[dict] | list[tuple],
141
+ image: str | None = None,
142
+ output: str | Path = "output",
143
+ columns: list[str] | None = None,
144
+ thumb_size: int = 256,
145
+ batch: int = 80,
146
+ image_columns: Sequence[str] | None = None,
147
+ launch: bool = False,
148
+ type_hints: Mapping[str, str] | None = None,
149
+ chunk_size: int = 500,
150
+ ) -> Path:
151
+ df = _normalize_data(data, image=image, columns=columns)
152
+ inferred = infer_image_columns(df) if image_columns is None else list(image_columns)
153
+ if image is not None and image not in inferred:
154
+ inferred = [image] + [c for c in inferred if c != image]
155
+ return _build_viewer_from_df(
156
+ df,
157
+ image_columns=inferred,
158
+ output=output,
159
+ thumb_size=thumb_size,
160
+ batch=batch,
161
+ launch=launch,
162
+ type_hints=type_hints,
163
+ chunk_size=chunk_size,
164
+ )
165
+
166
+
167
+ def build_viewer(
168
+ data: str | Path | pd.DataFrame | list[dict] | list[tuple],
169
+ image_column: str = "image",
170
+ output_dir: str = "output",
171
+ columns: list[str] | None = None,
172
+ thumb_size: int = 256,
173
+ batch_render: int = 80,
174
+ launch: bool = False,
175
+ type_hints: Mapping[str, str] | None = None,
176
+ chunk_size: int = 500,
177
+ ) -> Path:
178
+ return render(
179
+ data,
180
+ image=image_column,
181
+ output=output_dir,
182
+ columns=columns,
183
+ thumb_size=thumb_size,
184
+ batch=batch_render,
185
+ image_columns=[image_column],
186
+ launch=launch,
187
+ type_hints=type_hints,
188
+ chunk_size=chunk_size,
189
+ )
190
+
191
+
192
+ def preview(
193
+ data: str | Path | pd.DataFrame | list[dict] | list[tuple],
194
+ image: str | None = None,
195
+ **kwargs,
196
+ ) -> Path:
197
+ return render(data, image=image, **kwargs)
198
+
199
+
200
+ def _normalize_data(
201
+ data: str | Path | pd.DataFrame | list[dict] | list[tuple],
202
+ image: str | None,
203
+ columns: list[str] | None,
204
+ ) -> pd.DataFrame:
205
+ if isinstance(data, (str, Path)):
206
+ path = Path(data)
207
+ if not path.exists():
208
+ raise FileNotFoundError(f"CSV not found: {path}")
209
+ df = pd.read_csv(path)
210
+ elif isinstance(data, pd.DataFrame):
211
+ df = data.copy()
212
+ elif isinstance(data, list) and data:
213
+ if isinstance(data[0], tuple):
214
+ if image is None:
215
+ raise ValueError("image is required when using tuple data.")
216
+ if columns is None:
217
+ raise ValueError("columns is required when using tuple data.")
218
+ df = pd.DataFrame(data, columns=[image] + list(columns))
219
+ elif isinstance(data[0], dict):
220
+ df = pd.DataFrame(data)
221
+ else:
222
+ raise TypeError("Unsupported data format for render.")
223
+ else:
224
+ raise TypeError("Data must be a CSV path, DataFrame, or non-empty list.")
225
+
226
+ if image is not None and image not in df.columns:
227
+ raise ValueError(f"Image column not found: {image}")
228
+ return df
229
+
230
+
231
+ def infer_image_columns(df: pd.DataFrame, sample_size: int = 50) -> list[str]:
232
+ candidates = []
233
+ name_hints = {"image", "img", "picture", "pic", "photo", "thumbnail", "thumb", "path", "filepath", "file"}
234
+ image_exts = {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".gif"}
235
+
236
+ for col in df.columns:
237
+ if not pd.api.types.is_object_dtype(df[col]):
238
+ continue
239
+ col_lower = str(col).lower()
240
+ sample = df[col].dropna().astype(str).head(sample_size)
241
+ if sample.empty:
242
+ continue
243
+ hit = 0
244
+ for v in sample:
245
+ v = v.strip()
246
+ if v.startswith("http://") or v.startswith("https://"):
247
+ if any(ext in v.lower() for ext in image_exts):
248
+ hit += 1
249
+ continue
250
+ if any(v.lower().endswith(ext) for ext in image_exts):
251
+ hit += 1
252
+ ratio = hit / max(len(sample), 1)
253
+ if ratio >= 0.6 or any(h in col_lower for h in name_hints):
254
+ candidates.append(col)
255
+ return candidates
256
+
257
+
258
+ def _infer_column_types(
259
+ df: pd.DataFrame,
260
+ image_columns: Sequence[str],
261
+ type_hints: Mapping[str, str] | None = None,
262
+ sample_size: int = 80,
263
+ ) -> dict[str, str]:
264
+ result: dict[str, str] = {}
265
+ type_hints = type_hints or {}
266
+ for col in df.columns:
267
+ if col in image_columns:
268
+ result[col] = "image"
269
+ continue
270
+ if col in type_hints:
271
+ result[col] = type_hints[col]
272
+ continue
273
+ result[col] = _infer_semantic_type(df[col], sample_size=sample_size)
274
+ return result
275
+
276
+
277
+ def _infer_semantic_type(series: pd.Series, sample_size: int = 80) -> str:
278
+ dtype = series.dtype
279
+ if pd.api.types.is_bool_dtype(dtype):
280
+ return "boolean"
281
+ if pd.api.types.is_numeric_dtype(dtype):
282
+ return "number"
283
+ if pd.api.types.is_datetime64_any_dtype(dtype):
284
+ return "datetime"
285
+
286
+ sample = series.dropna().astype(str).head(sample_size)
287
+ if sample.empty:
288
+ return "string"
289
+
290
+ sample_list = [v.strip() for v in sample if v and str(v).strip()]
291
+ if not sample_list:
292
+ return "string"
293
+
294
+ json_hits = sum(1 for s in sample_list if _looks_like_json(s))
295
+ md_hits = sum(1 for s in sample_list if _looks_like_markdown(s))
296
+ code_hits = sum(1 for s in sample_list if _looks_like_code(s))
297
+ num_hits = _count_numeric(sample_list)
298
+ date_hits = _count_datetime(sample_list)
299
+
300
+ ratio = lambda n: n / max(len(sample_list), 1)
301
+ if ratio(json_hits) >= 0.6:
302
+ return "json"
303
+ if ratio(md_hits) >= 0.5:
304
+ return "markdown"
305
+ if ratio(code_hits) >= 0.5:
306
+ return "code"
307
+ if ratio(date_hits) >= 0.6:
308
+ return "datetime"
309
+ if ratio(num_hits) >= 0.8:
310
+ return "number"
311
+ return "string"
312
+
313
+
314
+ def _looks_like_json(text: str) -> bool:
315
+ if not (text.startswith("{") and text.endswith("}")) and not (
316
+ text.startswith("[") and text.endswith("]")
317
+ ):
318
+ return False
319
+ try:
320
+ json.loads(text)
321
+ return True
322
+ except Exception:
323
+ return False
324
+
325
+
326
+ def _looks_like_markdown(text: str) -> bool:
327
+ indicators = ("```", "# ", "## ", "- ", "* ", "> ", "[", "](", "**", "_")
328
+ return any(token in text for token in indicators)
329
+
330
+
331
+ def _looks_like_code(text: str) -> bool:
332
+ indicators = (";", "{", "}", "def ", "class ", "function ", "=>", "var ", "let ", "const ")
333
+ return any(token in text for token in indicators)
334
+
335
+ def _count_numeric(values: list[str]) -> int:
336
+ if not values:
337
+ return 0
338
+ parsed = pd.to_numeric(pd.Series(values), errors="coerce")
339
+ return int(parsed.notna().sum())
340
+
341
+
342
+ def _count_datetime(values: list[str]) -> int:
343
+ if not values:
344
+ return 0
345
+ parsed = pd.to_datetime(
346
+ pd.Series(values),
347
+ errors="coerce",
348
+ utc=False,
349
+ format="mixed",
350
+ cache=True,
351
+ )
352
+ return int(parsed.notna().sum())
353
+
354
+
355
+ def _to_jsonable(value):
356
+ if pd.isna(value):
357
+ return None
358
+ if isinstance(value, (np.integer, np.floating, np.bool_)):
359
+ return value.item()
360
+ if isinstance(value, pd.Timestamp):
361
+ return value.isoformat()
362
+ return value
363
+
364
+
365
+ def _build_viewer_from_df(
366
+ df: pd.DataFrame,
367
+ image_columns: Sequence[str],
368
+ output: str | Path,
369
+ thumb_size: int,
370
+ batch: int,
371
+ launch: bool,
372
+ type_hints: Mapping[str, str] | None,
373
+ chunk_size: int,
374
+ ) -> Path:
375
+ out = Path(output)
376
+ out.mkdir(parents=True, exist_ok=True)
377
+ img_dir = out / "images"
378
+ thumb_dir = out / "thumbs"
379
+ if image_columns:
380
+ img_dir.mkdir(parents=True, exist_ok=True)
381
+ thumb_dir.mkdir(parents=True, exist_ok=True)
382
+
383
+ records: list[dict] = []
384
+ chunk_index = 0
385
+ chunk_paths: list[Path] = []
386
+ columns = list(df.columns)
387
+ for i, row in enumerate(df.reset_index(drop=True).itertuples(index=False, name=None)):
388
+ row_map = dict(zip(columns, row))
389
+ img_map: dict[str, str | None] = {}
390
+ for col in image_columns:
391
+ raw_path = row_map[col]
392
+ img_id = None
393
+ if pd.notna(raw_path):
394
+ raw_text = str(raw_path)
395
+ if raw_text.startswith("http://") or raw_text.startswith("https://"):
396
+ img_id = None
397
+ else:
398
+ img_path = Path(raw_text)
399
+ ext = img_path.suffix.lower() if img_path.suffix else ".jpg"
400
+ name = f"{i:06d}_{col}{ext}"
401
+ if img_path.exists():
402
+ shutil.copy(img_path, img_dir / name)
403
+ make_thumbnail(img_path, thumb_dir / name, thumb_size)
404
+ img_id = name
405
+ img_map[col] = img_id
406
+
407
+ record = {"__img_id__": img_map}
408
+ for col in columns:
409
+ record[col] = _to_jsonable(row_map[col])
410
+ records.append(record)
411
+
412
+ if chunk_size > 0 and len(records) >= chunk_size:
413
+ chunk_path = out / f"meta_{chunk_index:04d}.js"
414
+ write_meta_chunk_js(chunk_path, records)
415
+ chunk_paths.append(chunk_path)
416
+ records = []
417
+ chunk_index += 1
418
+
419
+ if records or not chunk_paths:
420
+ chunk_path = out / f"meta_{chunk_index:04d}.js"
421
+ write_meta_chunk_js(chunk_path, records)
422
+ chunk_paths.append(chunk_path)
423
+ column_types = _infer_column_types(df, image_columns=image_columns, type_hints=type_hints)
424
+ write_viewer(
425
+ out / "viewer.html",
426
+ columns=columns,
427
+ column_types=column_types,
428
+ image_column=image_columns[0] if image_columns else "",
429
+ image_columns=list(image_columns),
430
+ batch_size=batch,
431
+ chunk_count=len(chunk_paths),
432
+ )
433
+ html_path = out / "viewer.html"
434
+ if launch:
435
+ import webbrowser
436
+
437
+ webbrowser.open(html_path.resolve().as_uri())
438
+ return html_path
@@ -0,0 +1,18 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Iterable
4
+
5
+
6
+ def write_meta_jsonl(meta_path: Path, records: Iterable[dict]) -> None:
7
+ with meta_path.open("w", encoding="utf-8") as f:
8
+ for r in records:
9
+ f.write(json.dumps(r, ensure_ascii=False) + "\n")
10
+
11
+
12
+ def write_meta_chunk_js(meta_path: Path, records: list[dict]) -> None:
13
+ payload = (
14
+ "window.__VISCSV_LAST_CHUNK__ = "
15
+ + json.dumps(records, ensure_ascii=False)
16
+ + ";"
17
+ )
18
+ meta_path.write_text(payload, encoding="utf-8")
@@ -0,0 +1,26 @@
1
+ from pathlib import Path
2
+ from PIL import Image, ImageDraw
3
+
4
+
5
+ def _save_image(img: Image.Image, dst: Path, quality: int) -> None:
6
+ ext = dst.suffix.lower()
7
+ if ext == ".png":
8
+ img.save(dst, "PNG", optimize=True)
9
+ else:
10
+ img.save(dst, "JPEG", quality=quality)
11
+
12
+
13
+ def make_thumbnail(src: Path, dst: Path, size: int, quality: int = 85) -> bool:
14
+ try:
15
+ img = Image.open(src)
16
+ img.thumbnail((size, size))
17
+ _save_image(img, dst, quality)
18
+ return True
19
+ except Exception:
20
+ img = Image.new("RGB", (size, size), (235, 235, 235))
21
+ draw = ImageDraw.Draw(img)
22
+ draw.rectangle([(0, 0), (size - 1, size - 1)], outline=(190, 190, 190))
23
+ draw.line([(0, 0), (size - 1, size - 1)], fill=(200, 200, 200), width=2)
24
+ draw.line([(0, size - 1), (size - 1, 0)], fill=(200, 200, 200), width=2)
25
+ _save_image(img, dst, quality)
26
+ return False