PyPI - vlm-dataset-captioner - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl - Mend

vlm-dataset-captioner 0.0.1py3-none-any.whl → 0.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

vlm_dataset_captioner/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""VLM Captioner - Uses a VLM to caption images from a dataset."""
+from .vlm_caption import caption_entire_directory
+__all__ = ["caption_entire_directory"]

vlm_dataset_captioner/vlm_caption.py CHANGED Viewed

@@ -49,7 +49,7 @@ def get_prompt_for_directory(directory_path):
             f"WARN: Prompt file not found for directory {prompt_file_path}. Using default prompt.",
             flush=True,
         )
-        prompt = "Describe the image in detail."
+        prompt = "In one short sentence. The caption will be used for image indexing and search, so include relevant details. 1 sentence only."
     print(f"INFO: Using prompt: '{prompt}'", flush=True)
@@ -103,7 +103,7 @@ def caption_image(prompt, image, model, processor, max_new_tokens=None):
     # Generate caption
     generated_ids = model.generate(
         **inputs,
-        max_new_tokens=128,
+        max_new_tokens=max_new_tokens or 128,
         do_sample=True,
         top_p=1.0,
         temperature=0.7,
@@ -159,13 +159,17 @@ def requires_caption(image_file, output_directory, overwrite):
 def caption_entire_directory(
     directory_path,
     output_directory,
-    model,
-    processor,
+    model_name="Qwen/Qwen2.5-VL-32B-Instruct",
+    model=None,
+    processor=None,
     max_new_tokens=None,
     ignore_substring=None,
     num_captions=None,
     overwrite=False,
 ):
+    if model is None or processor is None:
+        model, processor = init_model(model_name=model_name)
     print(
         f"INFO: Processing directory {directory_path} for image captions.", flush=True
     )
@@ -178,12 +182,12 @@ def caption_entire_directory(
                     caption_entire_directory(
                         subdir_path,
                         os.path.join(output_directory, subdir),
-                        model,
-                        processor,
-                        max_new_tokens,
-                        ignore_substring,
-                        num_captions,
-                        overwrite,
+                        model=model,
+                        processor=processor,
+                        max_new_tokens=max_new_tokens,
+                        ignore_substring=ignore_substring,
+                        num_captions=num_captions,
+                        overwrite=overwrite,
                     )
     else:
         prompt = get_prompt_for_directory(directory_path)

{vlm_dataset_captioner-0.0.1.dist-info → vlm_dataset_captioner-0.0.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vlm-dataset-captioner
-Version: 0.0.1
+Version: 0.0.3
 Summary: Uses a VLM to caption images from a dataset.
 Author: Alex Senden
 Maintainer: Alex Senden

vlm_dataset_captioner-0.0.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+vlm_dataset_captioner/__init__.py,sha256=QxMUJP2aOo2WyjPS_ogVuzFanueOLtxQAz_jJj0FppY,158
+vlm_dataset_captioner/vlm_caption.py,sha256=MMTGzxrNO2Bmp5YtAHaf8R-pR45460FHEXZW1qP_x6c,7200
+vlm_dataset_captioner/vlm_caption_cli.py,sha256=i1SS43ga2hpxCAQ2XtOkzNFBfI0zKZ5y-aKWI6djt4M,2341
+vlm_dataset_captioner-0.0.3.dist-info/METADATA,sha256=B73cLqao9zzJiUuulpjnWczGT-90XOdhH2uAPFXzna0,2430
+vlm_dataset_captioner-0.0.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+vlm_dataset_captioner-0.0.3.dist-info/entry_points.txt,sha256=k-zH3SWvcplaeDuGV4J6OyHKLr9GieWcOhRB5sF2pEI,75
+vlm_dataset_captioner-0.0.3.dist-info/RECORD,,

vlm_dataset_captioner-0.0.1.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-vlm_dataset_captioner/vlm_caption.py,sha256=k711kghgWmWXZIYva8t7v2ew519BjwcchZt3vwzmfZc,6854
-vlm_dataset_captioner/vlm_caption_cli.py,sha256=i1SS43ga2hpxCAQ2XtOkzNFBfI0zKZ5y-aKWI6djt4M,2341
-vlm_dataset_captioner-0.0.1.dist-info/METADATA,sha256=b7B8SwZAIIs2DPsfoY0nUb3ZomPujDl0-LEYDGco-x8,2430
-vlm_dataset_captioner-0.0.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-vlm_dataset_captioner-0.0.1.dist-info/entry_points.txt,sha256=k-zH3SWvcplaeDuGV4J6OyHKLr9GieWcOhRB5sF2pEI,75
-vlm_dataset_captioner-0.0.1.dist-info/RECORD,,

{vlm_dataset_captioner-0.0.1.dist-info → vlm_dataset_captioner-0.0.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{vlm_dataset_captioner-0.0.1.dist-info → vlm_dataset_captioner-0.0.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

vlm-dataset-captioner 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

vlm-dataset-captioner 0.0.1py3-none-any.whl → 0.0.3py3-none-any.whl