viscsv 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- viscsv-0.1.0/PKG-INFO +78 -0
- viscsv-0.1.0/README.md +69 -0
- viscsv-0.1.0/pyproject.toml +10 -0
- viscsv-0.1.0/setup.cfg +4 -0
- viscsv-0.1.0/src/viscsv/__init__.py +3 -0
- viscsv-0.1.0/src/viscsv/builder.py +438 -0
- viscsv-0.1.0/src/viscsv/meta.py +18 -0
- viscsv-0.1.0/src/viscsv/thumbnail.py +26 -0
- viscsv-0.1.0/src/viscsv/viewer.py +772 -0
- viscsv-0.1.0/src/viscsv.egg-info/PKG-INFO +78 -0
- viscsv-0.1.0/src/viscsv.egg-info/SOURCES.txt +13 -0
- viscsv-0.1.0/src/viscsv.egg-info/dependency_links.txt +1 -0
- viscsv-0.1.0/src/viscsv.egg-info/requires.txt +2 -0
- viscsv-0.1.0/src/viscsv.egg-info/top_level.txt +1 -0
- viscsv-0.1.0/tests/test_viscsv.py +117 -0
viscsv-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: viscsv
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Add your description here
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: pandas>=2.3.3
|
|
8
|
+
Requires-Dist: pillow>=12.1.0
|
|
9
|
+
|
|
10
|
+
# VisCSV
|
|
11
|
+
|
|
12
|
+

|
|
13
|
+

|
|
14
|
+

|
|
15
|
+
|
|
16
|
+
VisCSV turns CSV data (with image paths) into a clean, interactive HTML table.
|
|
17
|
+
Simple API, fast preview, no server required.
|
|
18
|
+
|
|
19
|
+
## Highlights
|
|
20
|
+
|
|
21
|
+
- Image columns support (local paths or URLs)
|
|
22
|
+
- Smart type inference (number, datetime, boolean, json, code)
|
|
23
|
+
- Drag to reorder columns
|
|
24
|
+
- Resize column widths
|
|
25
|
+
- Built-in horizontal/vertical scrolling
|
|
26
|
+
- Chunked loading for large datasets
|
|
27
|
+
|
|
28
|
+
## Quickstart
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from viscsv import preview
|
|
32
|
+
|
|
33
|
+
preview("data.csv", output="output")
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Open `output/viewer.html` in your browser.
|
|
37
|
+
|
|
38
|
+
## With Image Columns
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from viscsv import preview
|
|
42
|
+
|
|
43
|
+
preview("data.csv", image="image_path", output="output")
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Use the Class API
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from viscsv import VisCSV
|
|
50
|
+
|
|
51
|
+
vc = VisCSV(image="image")
|
|
52
|
+
vc.add("imgs/1.jpg", label="cat", score=0.98)
|
|
53
|
+
vc.add([
|
|
54
|
+
{"image": "imgs/2.jpg", "label": "dog", "score": 0.87},
|
|
55
|
+
{"image": "imgs/3.jpg", "label": "fox", "score": 0.72},
|
|
56
|
+
])
|
|
57
|
+
vc.build(output="output")
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Type Hints (Optional)
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
preview(
|
|
64
|
+
"data.csv",
|
|
65
|
+
output="output",
|
|
66
|
+
type_hints={"payload": "json", "created_at": "datetime"},
|
|
67
|
+
)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Large Datasets
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
preview("big.csv", output="output", chunk_size=2000)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Links
|
|
77
|
+
|
|
78
|
+
- 中文说明: [README-ch.md](README-ch.md)
|
viscsv-0.1.0/README.md
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# VisCSV
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+

|
|
5
|
+

|
|
6
|
+
|
|
7
|
+
VisCSV turns CSV data (with image paths) into a clean, interactive HTML table.
|
|
8
|
+
Simple API, fast preview, no server required.
|
|
9
|
+
|
|
10
|
+
## Highlights
|
|
11
|
+
|
|
12
|
+
- Image columns support (local paths or URLs)
|
|
13
|
+
- Smart type inference (number, datetime, boolean, json, code)
|
|
14
|
+
- Drag to reorder columns
|
|
15
|
+
- Resize column widths
|
|
16
|
+
- Built-in horizontal/vertical scrolling
|
|
17
|
+
- Chunked loading for large datasets
|
|
18
|
+
|
|
19
|
+
## Quickstart
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
from viscsv import preview
|
|
23
|
+
|
|
24
|
+
preview("data.csv", output="output")
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Open `output/viewer.html` in your browser.
|
|
28
|
+
|
|
29
|
+
## With Image Columns
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from viscsv import preview
|
|
33
|
+
|
|
34
|
+
preview("data.csv", image="image_path", output="output")
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Use the Class API
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from viscsv import VisCSV
|
|
41
|
+
|
|
42
|
+
vc = VisCSV(image="image")
|
|
43
|
+
vc.add("imgs/1.jpg", label="cat", score=0.98)
|
|
44
|
+
vc.add([
|
|
45
|
+
{"image": "imgs/2.jpg", "label": "dog", "score": 0.87},
|
|
46
|
+
{"image": "imgs/3.jpg", "label": "fox", "score": 0.72},
|
|
47
|
+
])
|
|
48
|
+
vc.build(output="output")
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Type Hints (Optional)
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
preview(
|
|
55
|
+
"data.csv",
|
|
56
|
+
output="output",
|
|
57
|
+
type_hints={"payload": "json", "created_at": "datetime"},
|
|
58
|
+
)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Large Datasets
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
preview("big.csv", output="output", chunk_size=2000)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Links
|
|
68
|
+
|
|
69
|
+
- 中文说明: [README-ch.md](README-ch.md)
|
viscsv-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import json
|
|
5
|
+
import shutil
|
|
6
|
+
from typing import Iterable, Mapping, Sequence
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from .meta import write_meta_chunk_js
|
|
12
|
+
from .thumbnail import make_thumbnail
|
|
13
|
+
from .viewer import write_viewer
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class VisCSV:
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
image: str | Sequence[str] | None = None,
|
|
20
|
+
image_columns: str | Sequence[str] | None = None,
|
|
21
|
+
dataframe: pd.DataFrame | None = None,
|
|
22
|
+
) -> None:
|
|
23
|
+
image = image if image is not None else image_columns
|
|
24
|
+
if isinstance(image, str):
|
|
25
|
+
image = [image]
|
|
26
|
+
self.image_columns = list(image) if image is not None else []
|
|
27
|
+
self.df = dataframe.copy() if dataframe is not None else pd.DataFrame()
|
|
28
|
+
self._validate_image_columns()
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def from_csv(
|
|
32
|
+
cls,
|
|
33
|
+
csv_path: str | Path,
|
|
34
|
+
image: str | Sequence[str] | None = None,
|
|
35
|
+
**kwargs,
|
|
36
|
+
) -> "VisCSV":
|
|
37
|
+
path = Path(csv_path)
|
|
38
|
+
if not path.exists():
|
|
39
|
+
raise FileNotFoundError(f"CSV not found: {path}")
|
|
40
|
+
data = pd.read_csv(path, **kwargs)
|
|
41
|
+
return cls(image=image, dataframe=data)
|
|
42
|
+
|
|
43
|
+
def add(self, *args, **fields) -> None:
|
|
44
|
+
if len(args) == 1 and not fields:
|
|
45
|
+
self.add_any(args[0])
|
|
46
|
+
return
|
|
47
|
+
if not args:
|
|
48
|
+
raise TypeError("add() expects data or image path.")
|
|
49
|
+
image_path = args[0]
|
|
50
|
+
if not self.image_columns:
|
|
51
|
+
raise ValueError("No image column configured. Use add_row/add_rows or set image column.")
|
|
52
|
+
row = {self.image_columns[0]: str(image_path), **fields}
|
|
53
|
+
self.add_row(row)
|
|
54
|
+
|
|
55
|
+
def add_data(self, data) -> None:
|
|
56
|
+
self.add_any(data)
|
|
57
|
+
|
|
58
|
+
def add_any(self, data) -> None:
|
|
59
|
+
if isinstance(data, pd.DataFrame):
|
|
60
|
+
self.add_rows(data)
|
|
61
|
+
return
|
|
62
|
+
if isinstance(data, dict):
|
|
63
|
+
self.add_row(data)
|
|
64
|
+
return
|
|
65
|
+
if isinstance(data, (list, tuple)):
|
|
66
|
+
if data and isinstance(data[0], dict):
|
|
67
|
+
self.add_rows(data)
|
|
68
|
+
return
|
|
69
|
+
if data and isinstance(data[0], (list, tuple)):
|
|
70
|
+
self.add_rows(data)
|
|
71
|
+
return
|
|
72
|
+
self.add_row(data)
|
|
73
|
+
|
|
74
|
+
def add_row(self, row: dict | Sequence) -> None:
|
|
75
|
+
if isinstance(row, dict):
|
|
76
|
+
df = pd.DataFrame([row])
|
|
77
|
+
elif isinstance(row, (list, tuple)):
|
|
78
|
+
if not len(self.df.columns):
|
|
79
|
+
raise ValueError("Cannot append list row without existing columns.")
|
|
80
|
+
df = pd.DataFrame([row], columns=list(self.df.columns))
|
|
81
|
+
else:
|
|
82
|
+
raise TypeError("row must be a dict or a list/tuple.")
|
|
83
|
+
self.df = pd.concat([self.df, df], ignore_index=True)
|
|
84
|
+
self._validate_image_columns()
|
|
85
|
+
|
|
86
|
+
def add_rows(self, rows: Iterable) -> None:
|
|
87
|
+
if isinstance(rows, pd.DataFrame):
|
|
88
|
+
df = rows
|
|
89
|
+
else:
|
|
90
|
+
rows = list(rows)
|
|
91
|
+
if not rows:
|
|
92
|
+
return
|
|
93
|
+
if isinstance(rows[0], dict):
|
|
94
|
+
df = pd.DataFrame(rows)
|
|
95
|
+
else:
|
|
96
|
+
if not len(self.df.columns):
|
|
97
|
+
raise ValueError("Cannot append list rows without existing columns.")
|
|
98
|
+
df = pd.DataFrame(rows, columns=list(self.df.columns))
|
|
99
|
+
self.df = pd.concat([self.df, df], ignore_index=True)
|
|
100
|
+
self._validate_image_columns()
|
|
101
|
+
|
|
102
|
+
def to_csv(self, csv_path: str | Path, **kwargs) -> None:
|
|
103
|
+
path = Path(csv_path)
|
|
104
|
+
self.df.to_csv(path, index=False, **kwargs)
|
|
105
|
+
|
|
106
|
+
def build(
|
|
107
|
+
self,
|
|
108
|
+
output: str | Path = "output",
|
|
109
|
+
thumb_size: int = 256,
|
|
110
|
+
batch: int = 80,
|
|
111
|
+
image_column: str | None = None,
|
|
112
|
+
launch: bool = False,
|
|
113
|
+
type_hints: Mapping[str, str] | None = None,
|
|
114
|
+
chunk_size: int = 500,
|
|
115
|
+
) -> Path:
|
|
116
|
+
image_column = image_column or (self.image_columns[0] if self.image_columns else None)
|
|
117
|
+
return render(
|
|
118
|
+
self.df,
|
|
119
|
+
image=image_column,
|
|
120
|
+
output=output,
|
|
121
|
+
thumb_size=thumb_size,
|
|
122
|
+
batch=batch,
|
|
123
|
+
image_columns=self.image_columns,
|
|
124
|
+
launch=launch,
|
|
125
|
+
type_hints=type_hints,
|
|
126
|
+
chunk_size=chunk_size,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def _validate_image_columns(self) -> None:
|
|
130
|
+
if not self.image_columns:
|
|
131
|
+
return
|
|
132
|
+
if len(self.df.columns) == 0:
|
|
133
|
+
return
|
|
134
|
+
for col in self.image_columns:
|
|
135
|
+
if col not in self.df.columns:
|
|
136
|
+
raise ValueError(f"Image column not found: {col}")
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def render(
|
|
140
|
+
data: str | Path | pd.DataFrame | list[dict] | list[tuple],
|
|
141
|
+
image: str | None = None,
|
|
142
|
+
output: str | Path = "output",
|
|
143
|
+
columns: list[str] | None = None,
|
|
144
|
+
thumb_size: int = 256,
|
|
145
|
+
batch: int = 80,
|
|
146
|
+
image_columns: Sequence[str] | None = None,
|
|
147
|
+
launch: bool = False,
|
|
148
|
+
type_hints: Mapping[str, str] | None = None,
|
|
149
|
+
chunk_size: int = 500,
|
|
150
|
+
) -> Path:
|
|
151
|
+
df = _normalize_data(data, image=image, columns=columns)
|
|
152
|
+
inferred = infer_image_columns(df) if image_columns is None else list(image_columns)
|
|
153
|
+
if image is not None and image not in inferred:
|
|
154
|
+
inferred = [image] + [c for c in inferred if c != image]
|
|
155
|
+
return _build_viewer_from_df(
|
|
156
|
+
df,
|
|
157
|
+
image_columns=inferred,
|
|
158
|
+
output=output,
|
|
159
|
+
thumb_size=thumb_size,
|
|
160
|
+
batch=batch,
|
|
161
|
+
launch=launch,
|
|
162
|
+
type_hints=type_hints,
|
|
163
|
+
chunk_size=chunk_size,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def build_viewer(
|
|
168
|
+
data: str | Path | pd.DataFrame | list[dict] | list[tuple],
|
|
169
|
+
image_column: str = "image",
|
|
170
|
+
output_dir: str = "output",
|
|
171
|
+
columns: list[str] | None = None,
|
|
172
|
+
thumb_size: int = 256,
|
|
173
|
+
batch_render: int = 80,
|
|
174
|
+
launch: bool = False,
|
|
175
|
+
type_hints: Mapping[str, str] | None = None,
|
|
176
|
+
chunk_size: int = 500,
|
|
177
|
+
) -> Path:
|
|
178
|
+
return render(
|
|
179
|
+
data,
|
|
180
|
+
image=image_column,
|
|
181
|
+
output=output_dir,
|
|
182
|
+
columns=columns,
|
|
183
|
+
thumb_size=thumb_size,
|
|
184
|
+
batch=batch_render,
|
|
185
|
+
image_columns=[image_column],
|
|
186
|
+
launch=launch,
|
|
187
|
+
type_hints=type_hints,
|
|
188
|
+
chunk_size=chunk_size,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def preview(
|
|
193
|
+
data: str | Path | pd.DataFrame | list[dict] | list[tuple],
|
|
194
|
+
image: str | None = None,
|
|
195
|
+
**kwargs,
|
|
196
|
+
) -> Path:
|
|
197
|
+
return render(data, image=image, **kwargs)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _normalize_data(
|
|
201
|
+
data: str | Path | pd.DataFrame | list[dict] | list[tuple],
|
|
202
|
+
image: str | None,
|
|
203
|
+
columns: list[str] | None,
|
|
204
|
+
) -> pd.DataFrame:
|
|
205
|
+
if isinstance(data, (str, Path)):
|
|
206
|
+
path = Path(data)
|
|
207
|
+
if not path.exists():
|
|
208
|
+
raise FileNotFoundError(f"CSV not found: {path}")
|
|
209
|
+
df = pd.read_csv(path)
|
|
210
|
+
elif isinstance(data, pd.DataFrame):
|
|
211
|
+
df = data.copy()
|
|
212
|
+
elif isinstance(data, list) and data:
|
|
213
|
+
if isinstance(data[0], tuple):
|
|
214
|
+
if image is None:
|
|
215
|
+
raise ValueError("image is required when using tuple data.")
|
|
216
|
+
if columns is None:
|
|
217
|
+
raise ValueError("columns is required when using tuple data.")
|
|
218
|
+
df = pd.DataFrame(data, columns=[image] + list(columns))
|
|
219
|
+
elif isinstance(data[0], dict):
|
|
220
|
+
df = pd.DataFrame(data)
|
|
221
|
+
else:
|
|
222
|
+
raise TypeError("Unsupported data format for render.")
|
|
223
|
+
else:
|
|
224
|
+
raise TypeError("Data must be a CSV path, DataFrame, or non-empty list.")
|
|
225
|
+
|
|
226
|
+
if image is not None and image not in df.columns:
|
|
227
|
+
raise ValueError(f"Image column not found: {image}")
|
|
228
|
+
return df
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def infer_image_columns(df: pd.DataFrame, sample_size: int = 50) -> list[str]:
|
|
232
|
+
candidates = []
|
|
233
|
+
name_hints = {"image", "img", "picture", "pic", "photo", "thumbnail", "thumb", "path", "filepath", "file"}
|
|
234
|
+
image_exts = {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".gif"}
|
|
235
|
+
|
|
236
|
+
for col in df.columns:
|
|
237
|
+
if not pd.api.types.is_object_dtype(df[col]):
|
|
238
|
+
continue
|
|
239
|
+
col_lower = str(col).lower()
|
|
240
|
+
sample = df[col].dropna().astype(str).head(sample_size)
|
|
241
|
+
if sample.empty:
|
|
242
|
+
continue
|
|
243
|
+
hit = 0
|
|
244
|
+
for v in sample:
|
|
245
|
+
v = v.strip()
|
|
246
|
+
if v.startswith("http://") or v.startswith("https://"):
|
|
247
|
+
if any(ext in v.lower() for ext in image_exts):
|
|
248
|
+
hit += 1
|
|
249
|
+
continue
|
|
250
|
+
if any(v.lower().endswith(ext) for ext in image_exts):
|
|
251
|
+
hit += 1
|
|
252
|
+
ratio = hit / max(len(sample), 1)
|
|
253
|
+
if ratio >= 0.6 or any(h in col_lower for h in name_hints):
|
|
254
|
+
candidates.append(col)
|
|
255
|
+
return candidates
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _infer_column_types(
|
|
259
|
+
df: pd.DataFrame,
|
|
260
|
+
image_columns: Sequence[str],
|
|
261
|
+
type_hints: Mapping[str, str] | None = None,
|
|
262
|
+
sample_size: int = 80,
|
|
263
|
+
) -> dict[str, str]:
|
|
264
|
+
result: dict[str, str] = {}
|
|
265
|
+
type_hints = type_hints or {}
|
|
266
|
+
for col in df.columns:
|
|
267
|
+
if col in image_columns:
|
|
268
|
+
result[col] = "image"
|
|
269
|
+
continue
|
|
270
|
+
if col in type_hints:
|
|
271
|
+
result[col] = type_hints[col]
|
|
272
|
+
continue
|
|
273
|
+
result[col] = _infer_semantic_type(df[col], sample_size=sample_size)
|
|
274
|
+
return result
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _infer_semantic_type(series: pd.Series, sample_size: int = 80) -> str:
|
|
278
|
+
dtype = series.dtype
|
|
279
|
+
if pd.api.types.is_bool_dtype(dtype):
|
|
280
|
+
return "boolean"
|
|
281
|
+
if pd.api.types.is_numeric_dtype(dtype):
|
|
282
|
+
return "number"
|
|
283
|
+
if pd.api.types.is_datetime64_any_dtype(dtype):
|
|
284
|
+
return "datetime"
|
|
285
|
+
|
|
286
|
+
sample = series.dropna().astype(str).head(sample_size)
|
|
287
|
+
if sample.empty:
|
|
288
|
+
return "string"
|
|
289
|
+
|
|
290
|
+
sample_list = [v.strip() for v in sample if v and str(v).strip()]
|
|
291
|
+
if not sample_list:
|
|
292
|
+
return "string"
|
|
293
|
+
|
|
294
|
+
json_hits = sum(1 for s in sample_list if _looks_like_json(s))
|
|
295
|
+
md_hits = sum(1 for s in sample_list if _looks_like_markdown(s))
|
|
296
|
+
code_hits = sum(1 for s in sample_list if _looks_like_code(s))
|
|
297
|
+
num_hits = _count_numeric(sample_list)
|
|
298
|
+
date_hits = _count_datetime(sample_list)
|
|
299
|
+
|
|
300
|
+
ratio = lambda n: n / max(len(sample_list), 1)
|
|
301
|
+
if ratio(json_hits) >= 0.6:
|
|
302
|
+
return "json"
|
|
303
|
+
if ratio(md_hits) >= 0.5:
|
|
304
|
+
return "markdown"
|
|
305
|
+
if ratio(code_hits) >= 0.5:
|
|
306
|
+
return "code"
|
|
307
|
+
if ratio(date_hits) >= 0.6:
|
|
308
|
+
return "datetime"
|
|
309
|
+
if ratio(num_hits) >= 0.8:
|
|
310
|
+
return "number"
|
|
311
|
+
return "string"
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _looks_like_json(text: str) -> bool:
|
|
315
|
+
if not (text.startswith("{") and text.endswith("}")) and not (
|
|
316
|
+
text.startswith("[") and text.endswith("]")
|
|
317
|
+
):
|
|
318
|
+
return False
|
|
319
|
+
try:
|
|
320
|
+
json.loads(text)
|
|
321
|
+
return True
|
|
322
|
+
except Exception:
|
|
323
|
+
return False
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def _looks_like_markdown(text: str) -> bool:
|
|
327
|
+
indicators = ("```", "# ", "## ", "- ", "* ", "> ", "[", "](", "**", "_")
|
|
328
|
+
return any(token in text for token in indicators)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _looks_like_code(text: str) -> bool:
|
|
332
|
+
indicators = (";", "{", "}", "def ", "class ", "function ", "=>", "var ", "let ", "const ")
|
|
333
|
+
return any(token in text for token in indicators)
|
|
334
|
+
|
|
335
|
+
def _count_numeric(values: list[str]) -> int:
|
|
336
|
+
if not values:
|
|
337
|
+
return 0
|
|
338
|
+
parsed = pd.to_numeric(pd.Series(values), errors="coerce")
|
|
339
|
+
return int(parsed.notna().sum())
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _count_datetime(values: list[str]) -> int:
|
|
343
|
+
if not values:
|
|
344
|
+
return 0
|
|
345
|
+
parsed = pd.to_datetime(
|
|
346
|
+
pd.Series(values),
|
|
347
|
+
errors="coerce",
|
|
348
|
+
utc=False,
|
|
349
|
+
format="mixed",
|
|
350
|
+
cache=True,
|
|
351
|
+
)
|
|
352
|
+
return int(parsed.notna().sum())
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def _to_jsonable(value):
|
|
356
|
+
if pd.isna(value):
|
|
357
|
+
return None
|
|
358
|
+
if isinstance(value, (np.integer, np.floating, np.bool_)):
|
|
359
|
+
return value.item()
|
|
360
|
+
if isinstance(value, pd.Timestamp):
|
|
361
|
+
return value.isoformat()
|
|
362
|
+
return value
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _build_viewer_from_df(
|
|
366
|
+
df: pd.DataFrame,
|
|
367
|
+
image_columns: Sequence[str],
|
|
368
|
+
output: str | Path,
|
|
369
|
+
thumb_size: int,
|
|
370
|
+
batch: int,
|
|
371
|
+
launch: bool,
|
|
372
|
+
type_hints: Mapping[str, str] | None,
|
|
373
|
+
chunk_size: int,
|
|
374
|
+
) -> Path:
|
|
375
|
+
out = Path(output)
|
|
376
|
+
out.mkdir(parents=True, exist_ok=True)
|
|
377
|
+
img_dir = out / "images"
|
|
378
|
+
thumb_dir = out / "thumbs"
|
|
379
|
+
if image_columns:
|
|
380
|
+
img_dir.mkdir(parents=True, exist_ok=True)
|
|
381
|
+
thumb_dir.mkdir(parents=True, exist_ok=True)
|
|
382
|
+
|
|
383
|
+
records: list[dict] = []
|
|
384
|
+
chunk_index = 0
|
|
385
|
+
chunk_paths: list[Path] = []
|
|
386
|
+
columns = list(df.columns)
|
|
387
|
+
for i, row in enumerate(df.reset_index(drop=True).itertuples(index=False, name=None)):
|
|
388
|
+
row_map = dict(zip(columns, row))
|
|
389
|
+
img_map: dict[str, str | None] = {}
|
|
390
|
+
for col in image_columns:
|
|
391
|
+
raw_path = row_map[col]
|
|
392
|
+
img_id = None
|
|
393
|
+
if pd.notna(raw_path):
|
|
394
|
+
raw_text = str(raw_path)
|
|
395
|
+
if raw_text.startswith("http://") or raw_text.startswith("https://"):
|
|
396
|
+
img_id = None
|
|
397
|
+
else:
|
|
398
|
+
img_path = Path(raw_text)
|
|
399
|
+
ext = img_path.suffix.lower() if img_path.suffix else ".jpg"
|
|
400
|
+
name = f"{i:06d}_{col}{ext}"
|
|
401
|
+
if img_path.exists():
|
|
402
|
+
shutil.copy(img_path, img_dir / name)
|
|
403
|
+
make_thumbnail(img_path, thumb_dir / name, thumb_size)
|
|
404
|
+
img_id = name
|
|
405
|
+
img_map[col] = img_id
|
|
406
|
+
|
|
407
|
+
record = {"__img_id__": img_map}
|
|
408
|
+
for col in columns:
|
|
409
|
+
record[col] = _to_jsonable(row_map[col])
|
|
410
|
+
records.append(record)
|
|
411
|
+
|
|
412
|
+
if chunk_size > 0 and len(records) >= chunk_size:
|
|
413
|
+
chunk_path = out / f"meta_{chunk_index:04d}.js"
|
|
414
|
+
write_meta_chunk_js(chunk_path, records)
|
|
415
|
+
chunk_paths.append(chunk_path)
|
|
416
|
+
records = []
|
|
417
|
+
chunk_index += 1
|
|
418
|
+
|
|
419
|
+
if records or not chunk_paths:
|
|
420
|
+
chunk_path = out / f"meta_{chunk_index:04d}.js"
|
|
421
|
+
write_meta_chunk_js(chunk_path, records)
|
|
422
|
+
chunk_paths.append(chunk_path)
|
|
423
|
+
column_types = _infer_column_types(df, image_columns=image_columns, type_hints=type_hints)
|
|
424
|
+
write_viewer(
|
|
425
|
+
out / "viewer.html",
|
|
426
|
+
columns=columns,
|
|
427
|
+
column_types=column_types,
|
|
428
|
+
image_column=image_columns[0] if image_columns else "",
|
|
429
|
+
image_columns=list(image_columns),
|
|
430
|
+
batch_size=batch,
|
|
431
|
+
chunk_count=len(chunk_paths),
|
|
432
|
+
)
|
|
433
|
+
html_path = out / "viewer.html"
|
|
434
|
+
if launch:
|
|
435
|
+
import webbrowser
|
|
436
|
+
|
|
437
|
+
webbrowser.open(html_path.resolve().as_uri())
|
|
438
|
+
return html_path
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Iterable
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def write_meta_jsonl(meta_path: Path, records: Iterable[dict]) -> None:
|
|
7
|
+
with meta_path.open("w", encoding="utf-8") as f:
|
|
8
|
+
for r in records:
|
|
9
|
+
f.write(json.dumps(r, ensure_ascii=False) + "\n")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def write_meta_chunk_js(meta_path: Path, records: list[dict]) -> None:
|
|
13
|
+
payload = (
|
|
14
|
+
"window.__VISCSV_LAST_CHUNK__ = "
|
|
15
|
+
+ json.dumps(records, ensure_ascii=False)
|
|
16
|
+
+ ";"
|
|
17
|
+
)
|
|
18
|
+
meta_path.write_text(payload, encoding="utf-8")
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from PIL import Image, ImageDraw
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def _save_image(img: Image.Image, dst: Path, quality: int) -> None:
|
|
6
|
+
ext = dst.suffix.lower()
|
|
7
|
+
if ext == ".png":
|
|
8
|
+
img.save(dst, "PNG", optimize=True)
|
|
9
|
+
else:
|
|
10
|
+
img.save(dst, "JPEG", quality=quality)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def make_thumbnail(src: Path, dst: Path, size: int, quality: int = 85) -> bool:
|
|
14
|
+
try:
|
|
15
|
+
img = Image.open(src)
|
|
16
|
+
img.thumbnail((size, size))
|
|
17
|
+
_save_image(img, dst, quality)
|
|
18
|
+
return True
|
|
19
|
+
except Exception:
|
|
20
|
+
img = Image.new("RGB", (size, size), (235, 235, 235))
|
|
21
|
+
draw = ImageDraw.Draw(img)
|
|
22
|
+
draw.rectangle([(0, 0), (size - 1, size - 1)], outline=(190, 190, 190))
|
|
23
|
+
draw.line([(0, 0), (size - 1, size - 1)], fill=(200, 200, 200), width=2)
|
|
24
|
+
draw.line([(0, size - 1), (size - 1, 0)], fill=(200, 200, 200), width=2)
|
|
25
|
+
_save_image(img, dst, quality)
|
|
26
|
+
return False
|