vlm-dataset-captioner 0.0.3__tar.gz → 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vlm_dataset_captioner-0.0.3 → vlm_dataset_captioner-0.0.4}/PKG-INFO +1 -1
- {vlm_dataset_captioner-0.0.3 → vlm_dataset_captioner-0.0.4}/vlm_dataset_captioner/vlm_caption.py +11 -2
- {vlm_dataset_captioner-0.0.3 → vlm_dataset_captioner-0.0.4}/vlm_dataset_captioner/vlm_caption_cli.py +7 -5
- {vlm_dataset_captioner-0.0.3 → vlm_dataset_captioner-0.0.4}/.gitignore +0 -0
- {vlm_dataset_captioner-0.0.3 → vlm_dataset_captioner-0.0.4}/README.md +0 -0
- {vlm_dataset_captioner-0.0.3 → vlm_dataset_captioner-0.0.4}/pyproject.toml +0 -0
- {vlm_dataset_captioner-0.0.3 → vlm_dataset_captioner-0.0.4}/vlm_dataset_captioner/__init__.py +0 -0
{vlm_dataset_captioner-0.0.3 → vlm_dataset_captioner-0.0.4}/vlm_dataset_captioner/vlm_caption.py
RENAMED
|
@@ -86,6 +86,8 @@ def contains_chinese(text_string):
|
|
|
86
86
|
def caption_image(prompt, image, model, processor, max_new_tokens=None):
|
|
87
87
|
messages = get_messages(prompt, image)
|
|
88
88
|
|
|
89
|
+
print(f"INFO: Generating caption for image: {image}.", flush=True)
|
|
90
|
+
|
|
89
91
|
# Prepare inputs for the model
|
|
90
92
|
text = processor.apply_chat_template(
|
|
91
93
|
messages, tokenize=False, add_generation_prompt=True
|
|
@@ -129,6 +131,8 @@ def caption_image(prompt, image, model, processor, max_new_tokens=None):
|
|
|
129
131
|
clean_up_tokenization_spaces=False,
|
|
130
132
|
)
|
|
131
133
|
|
|
134
|
+
print(f"INFO: Caption generated for image: {image}.", flush=True)
|
|
135
|
+
|
|
132
136
|
return output_text[0]
|
|
133
137
|
|
|
134
138
|
|
|
@@ -206,15 +210,20 @@ def caption_entire_directory(
|
|
|
206
210
|
caption += "\n"
|
|
207
211
|
|
|
208
212
|
while True:
|
|
209
|
-
|
|
213
|
+
individual_caption = caption_image(
|
|
210
214
|
prompt,
|
|
211
215
|
os.path.join(directory_path, image_file),
|
|
212
216
|
model,
|
|
213
217
|
processor,
|
|
214
218
|
max_new_tokens,
|
|
215
219
|
)
|
|
216
|
-
if not contains_chinese(
|
|
220
|
+
if not contains_chinese(individual_caption):
|
|
221
|
+
caption += individual_caption
|
|
217
222
|
break
|
|
223
|
+
print(
|
|
224
|
+
"WARN: Detected Chinese characters in caption. Regenerating...",
|
|
225
|
+
flush=True,
|
|
226
|
+
)
|
|
218
227
|
write_caption_to_file(image_file, caption, output_directory)
|
|
219
228
|
except Exception as e:
|
|
220
229
|
print(
|
{vlm_dataset_captioner-0.0.3 → vlm_dataset_captioner-0.0.4}/vlm_dataset_captioner/vlm_caption_cli.py
RENAMED
|
@@ -16,7 +16,7 @@ def parse_args():
|
|
|
16
16
|
parser.add_argument(
|
|
17
17
|
"--model",
|
|
18
18
|
type=str,
|
|
19
|
-
default=
|
|
19
|
+
default="Qwen/Qwen2.5-VL-32B-Instruct",
|
|
20
20
|
help="The HuggingFace model used to generate captions.",
|
|
21
21
|
)
|
|
22
22
|
parser.add_argument(
|
|
@@ -55,8 +55,10 @@ def parse_args():
|
|
|
55
55
|
def main():
|
|
56
56
|
args = parse_args()
|
|
57
57
|
model, processor = init_model(args.model)
|
|
58
|
-
|
|
59
|
-
output_dir =
|
|
58
|
+
|
|
59
|
+
output_dir = (
|
|
60
|
+
args.output_dir if args.output_dir is not None else f"{args.input_dir}_caption"
|
|
61
|
+
)
|
|
60
62
|
|
|
61
63
|
if args.model is not None:
|
|
62
64
|
print(f"INFO: Using model {args.model} for captioning.", flush=True)
|
|
@@ -71,8 +73,8 @@ def main():
|
|
|
71
73
|
caption_entire_directory(
|
|
72
74
|
args.input_dir,
|
|
73
75
|
output_dir,
|
|
74
|
-
model,
|
|
75
|
-
processor,
|
|
76
|
+
model=model,
|
|
77
|
+
processor=processor,
|
|
76
78
|
max_new_tokens=args.max_length,
|
|
77
79
|
ignore_substring=args.ignore_substring,
|
|
78
80
|
num_captions=args.num_captions,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{vlm_dataset_captioner-0.0.3 → vlm_dataset_captioner-0.0.4}/vlm_dataset_captioner/__init__.py
RENAMED
|
File without changes
|