vision-agent 0.2.183__py3-none-any.whl → 0.2.184__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vision_agent/tools/__init__.py +1 -0
- vision_agent/tools/tools.py +76 -0
- {vision_agent-0.2.183.dist-info → vision_agent-0.2.184.dist-info}/METADATA +1 -1
- {vision_agent-0.2.183.dist-info → vision_agent-0.2.184.dist-info}/RECORD +6 -6
- {vision_agent-0.2.183.dist-info → vision_agent-0.2.184.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.183.dist-info → vision_agent-0.2.184.dist-info}/WHEEL +0 -0
vision_agent/tools/__init__.py
CHANGED
vision_agent/tools/tools.py
CHANGED
@@ -1773,6 +1773,82 @@ def closest_box_distance(
|
|
1773
1773
|
return cast(float, np.sqrt(horizontal_distance**2 + vertical_distance**2))
|
1774
1774
|
|
1775
1775
|
|
1776
|
+
def flux_image_inpainting(
|
1777
|
+
prompt: str,
|
1778
|
+
image: np.ndarray,
|
1779
|
+
mask: np.ndarray,
|
1780
|
+
) -> np.ndarray:
|
1781
|
+
"""'flux_image_inpainting' performs image inpainting to fill the masked regions,
|
1782
|
+
given by mask, in the image, given image based on the text prompt and surrounding image context.
|
1783
|
+
It can be used to edit regions of an image according to the prompt given.
|
1784
|
+
|
1785
|
+
Parameters:
|
1786
|
+
prompt (str): A detailed text description guiding what should be generated
|
1787
|
+
in the masked area. More detailed and specific prompts typically yield better results.
|
1788
|
+
image (np.ndarray): The source image to be inpainted.
|
1789
|
+
The image will serve as the base context for the inpainting process.
|
1790
|
+
mask (np.ndarray): A binary mask image with 0's and 1's,
|
1791
|
+
where 1 indicates areas to be inpainted and 0 indicates areas to be preserved.
|
1792
|
+
|
1793
|
+
Returns:
|
1794
|
+
np.ndarray:
|
1795
|
+
The generated image(s) as a numpy array in RGB format
|
1796
|
+
with values ranging from 0 to 255.
|
1797
|
+
|
1798
|
+
-------
|
1799
|
+
Example:
|
1800
|
+
>>> # Generate inpainting
|
1801
|
+
>>> result = flux_image_inpainting(
|
1802
|
+
... prompt="a modern black leather sofa with white pillows",
|
1803
|
+
... image=image,
|
1804
|
+
... mask=mask,
|
1805
|
+
... )
|
1806
|
+
>>> save_image(result, "inpainted_room.png")
|
1807
|
+
"""
|
1808
|
+
if (
|
1809
|
+
image.shape[0] < 8
|
1810
|
+
or image.shape[1] < 8
|
1811
|
+
or mask.shape[0] < 8
|
1812
|
+
or mask.shape[1] < 8
|
1813
|
+
):
|
1814
|
+
raise ValueError("The image or mask does not have enough size for inpainting")
|
1815
|
+
|
1816
|
+
if np.array_equal(mask, mask.astype(bool).astype(int)):
|
1817
|
+
mask = np.where(mask > 0, 255, 0).astype(np.uint8)
|
1818
|
+
else:
|
1819
|
+
raise ValueError("The mask should be a binary mask with 0's and 1's")
|
1820
|
+
|
1821
|
+
image_file = numpy_to_bytes(image)
|
1822
|
+
mask_file = numpy_to_bytes(mask)
|
1823
|
+
|
1824
|
+
files = [
|
1825
|
+
("image", image_file),
|
1826
|
+
("mask_image", mask_file),
|
1827
|
+
]
|
1828
|
+
|
1829
|
+
payload = {
|
1830
|
+
"prompt": prompt,
|
1831
|
+
"task": "inpainting",
|
1832
|
+
"height": image.shape[0],
|
1833
|
+
"width": image.shape[1],
|
1834
|
+
"strength": 0.99,
|
1835
|
+
"guidance_scale": 18,
|
1836
|
+
"num_inference_steps": 20,
|
1837
|
+
"seed": None,
|
1838
|
+
}
|
1839
|
+
|
1840
|
+
response = send_inference_request(
|
1841
|
+
payload=payload,
|
1842
|
+
endpoint_name="flux1",
|
1843
|
+
files=files,
|
1844
|
+
v2=True,
|
1845
|
+
metadata_payload={"function_name": "flux_image_inpainting"},
|
1846
|
+
)
|
1847
|
+
|
1848
|
+
output_image = np.array(b64_to_pil(response[0]).convert("RGB"))
|
1849
|
+
return output_image
|
1850
|
+
|
1851
|
+
|
1776
1852
|
# Utility and visualization functions
|
1777
1853
|
|
1778
1854
|
|
@@ -16,11 +16,11 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
|
|
16
16
|
vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
|
17
17
|
vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
|
18
18
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
19
|
-
vision_agent/tools/__init__.py,sha256=
|
19
|
+
vision_agent/tools/__init__.py,sha256=KVP4_6qxOb2lpFdQgQtyDfdkMLL1O6wVZNK19MXp-xo,2798
|
20
20
|
vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB9Kg,32074
|
21
21
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
22
22
|
vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
|
23
|
-
vision_agent/tools/tools.py,sha256=
|
23
|
+
vision_agent/tools/tools.py,sha256=kHeBjiVvncQJeL_Gni84bgHOCgxko4XO7otpt8IyWU4,83610
|
24
24
|
vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
|
25
25
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
26
26
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
|
|
29
29
|
vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
|
30
30
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
31
31
|
vision_agent/utils/video.py,sha256=fOPR48-SuwMbE5eB5rc2F7lVo6k1mVHn26eEJ0QCslc,5602
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
32
|
+
vision_agent-0.2.184.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
33
|
+
vision_agent-0.2.184.dist-info/METADATA,sha256=n8BeCLsPCBXDsr0FCmRBtScseMyJ8TuR68MWlqeO9Is,18330
|
34
|
+
vision_agent-0.2.184.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
35
|
+
vision_agent-0.2.184.dist-info/RECORD,,
|
File without changes
|
File without changes
|