vlm-dataset-captioner 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ """VLM Captioner - Uses a VLM to caption images from a dataset."""
2
+
3
+ from .vlm_caption import caption_entire_directory
4
+
5
+ __all__ = ["caption_entire_directory"]
@@ -49,7 +49,7 @@ def get_prompt_for_directory(directory_path):
49
49
  f"WARN: Prompt file not found for directory {prompt_file_path}. Using default prompt.",
50
50
  flush=True,
51
51
  )
52
- prompt = "Describe the image in detail."
52
+ prompt = "In one short sentence. The caption will be used for image indexing and search, so include relevant details. 1 sentence only."
53
53
 
54
54
  print(f"INFO: Using prompt: '{prompt}'", flush=True)
55
55
 
@@ -103,7 +103,7 @@ def caption_image(prompt, image, model, processor, max_new_tokens=None):
103
103
  # Generate caption
104
104
  generated_ids = model.generate(
105
105
  **inputs,
106
- max_new_tokens=128,
106
+ max_new_tokens=max_new_tokens or 128,
107
107
  do_sample=True,
108
108
  top_p=1.0,
109
109
  temperature=0.7,
@@ -159,13 +159,17 @@ def requires_caption(image_file, output_directory, overwrite):
159
159
  def caption_entire_directory(
160
160
  directory_path,
161
161
  output_directory,
162
- model,
163
- processor,
162
+ model_name="Qwen/Qwen2.5-VL-32B-Instruct",
163
+ model=None,
164
+ processor=None,
164
165
  max_new_tokens=None,
165
166
  ignore_substring=None,
166
167
  num_captions=None,
167
168
  overwrite=False,
168
169
  ):
170
+ if model is None or processor is None:
171
+ model, processor = init_model(model_name=model_name)
172
+
169
173
  print(
170
174
  f"INFO: Processing directory {directory_path} for image captions.", flush=True
171
175
  )
@@ -178,12 +182,12 @@ def caption_entire_directory(
178
182
  caption_entire_directory(
179
183
  subdir_path,
180
184
  os.path.join(output_directory, subdir),
181
- model,
182
- processor,
183
- max_new_tokens,
184
- ignore_substring,
185
- num_captions,
186
- overwrite,
185
+ model=model,
186
+ processor=processor,
187
+ max_new_tokens=max_new_tokens,
188
+ ignore_substring=ignore_substring,
189
+ num_captions=num_captions,
190
+ overwrite=overwrite,
187
191
  )
188
192
  else:
189
193
  prompt = get_prompt_for_directory(directory_path)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vlm-dataset-captioner
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: Uses a VLM to caption images from a dataset.
5
5
  Author: Alex Senden
6
6
  Maintainer: Alex Senden
@@ -0,0 +1,7 @@
1
+ vlm_dataset_captioner/__init__.py,sha256=QxMUJP2aOo2WyjPS_ogVuzFanueOLtxQAz_jJj0FppY,158
2
+ vlm_dataset_captioner/vlm_caption.py,sha256=MMTGzxrNO2Bmp5YtAHaf8R-pR45460FHEXZW1qP_x6c,7200
3
+ vlm_dataset_captioner/vlm_caption_cli.py,sha256=i1SS43ga2hpxCAQ2XtOkzNFBfI0zKZ5y-aKWI6djt4M,2341
4
+ vlm_dataset_captioner-0.0.3.dist-info/METADATA,sha256=B73cLqao9zzJiUuulpjnWczGT-90XOdhH2uAPFXzna0,2430
5
+ vlm_dataset_captioner-0.0.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
+ vlm_dataset_captioner-0.0.3.dist-info/entry_points.txt,sha256=k-zH3SWvcplaeDuGV4J6OyHKLr9GieWcOhRB5sF2pEI,75
7
+ vlm_dataset_captioner-0.0.3.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- vlm_dataset_captioner/vlm_caption.py,sha256=k711kghgWmWXZIYva8t7v2ew519BjwcchZt3vwzmfZc,6854
2
- vlm_dataset_captioner/vlm_caption_cli.py,sha256=i1SS43ga2hpxCAQ2XtOkzNFBfI0zKZ5y-aKWI6djt4M,2341
3
- vlm_dataset_captioner-0.0.1.dist-info/METADATA,sha256=b7B8SwZAIIs2DPsfoY0nUb3ZomPujDl0-LEYDGco-x8,2430
4
- vlm_dataset_captioner-0.0.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
5
- vlm_dataset_captioner-0.0.1.dist-info/entry_points.txt,sha256=k-zH3SWvcplaeDuGV4J6OyHKLr9GieWcOhRB5sF2pEI,75
6
- vlm_dataset_captioner-0.0.1.dist-info/RECORD,,