versionhq 1.2.4.13__tar.gz → 1.2.4.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/.gitignore +2 -0
  2. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/PKG-INFO +2 -1
  3. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/pyproject.toml +2 -1
  4. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/requirements.txt +1 -0
  5. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/__init__.py +1 -1
  6. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/_utils/__init__.py +1 -0
  7. versionhq-1.2.4.14/src/versionhq/_utils/handle_directory.py +15 -0
  8. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/agent/model.py +5 -3
  9. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/tool/gpt/_enum.py +1 -1
  10. versionhq-1.2.4.14/src/versionhq/tool/gpt/cua.py +456 -0
  11. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq.egg-info/PKG-INFO +2 -1
  12. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq.egg-info/SOURCES.txt +1 -0
  13. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq.egg-info/requires.txt +1 -0
  14. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/tool/gpt_test.py +3 -13
  15. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/uv.lock +312 -214
  16. versionhq-1.2.4.13/src/versionhq/tool/gpt/cua.py +0 -295
  17. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/.env.sample +0 -0
  18. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/.github/workflows/deploy_docs.yml +0 -0
  19. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/.github/workflows/publish.yml +0 -0
  20. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/.github/workflows/publish_testpypi.yml +0 -0
  21. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/.github/workflows/run_tests.yml +0 -0
  22. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/.github/workflows/security_check.yml +0 -0
  23. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/.pre-commit-config.yaml +0 -0
  24. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/.python-version +0 -0
  25. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/LICENSE +0 -0
  26. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/README.md +0 -0
  27. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/SECURITY.md +0 -0
  28. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/db/preprocess.py +0 -0
  29. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/CNAME +0 -0
  30. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/_logos/favicon.ico +0 -0
  31. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/_logos/logo192.png +0 -0
  32. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/agent/config.md +0 -0
  33. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/agent/index.md +0 -0
  34. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/agent/task-handling.md +0 -0
  35. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/agent-network/config.md +0 -0
  36. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/agent-network/form.md +0 -0
  37. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/agent-network/index.md +0 -0
  38. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/agent-network/ref.md +0 -0
  39. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/knowledge.md +0 -0
  40. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/llm/index.md +0 -0
  41. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/memory.md +0 -0
  42. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/rag-tool.md +0 -0
  43. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/task/evaluation.md +0 -0
  44. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/task/index.md +0 -0
  45. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/task/reference.md +0 -0
  46. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/task/response-field.md +0 -0
  47. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/task/task-execution.md +0 -0
  48. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/task/task-output.md +0 -0
  49. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/task/task-strc-response.md +0 -0
  50. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/task-graph/index.md +0 -0
  51. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/core/tool.md +0 -0
  52. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/index.md +0 -0
  53. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/quickstart.md +0 -0
  54. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/stylesheets/main.css +0 -0
  55. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/docs/tags.md +0 -0
  56. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/mkdocs.yml +0 -0
  57. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/requirements-dev.txt +0 -0
  58. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/runtime.txt +0 -0
  59. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/setup.cfg +0 -0
  60. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/_prompt/auto_feedback.py +0 -0
  61. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/_prompt/constants.py +0 -0
  62. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/_prompt/model.py +0 -0
  63. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/_utils/convert_img_url.py +0 -0
  64. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/_utils/i18n.py +0 -0
  65. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/_utils/is_valid_enum.py +0 -0
  66. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/_utils/is_valid_url.py +0 -0
  67. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/_utils/llm_as_a_judge.py +0 -0
  68. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/_utils/logger.py +0 -0
  69. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/_utils/process_config.py +0 -0
  70. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/_utils/usage_metrics.py +0 -0
  71. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/_utils/vars.py +0 -0
  72. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/agent/TEMPLATES/Backstory.py +0 -0
  73. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/agent/TEMPLATES/__init__.py +0 -0
  74. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/agent/__init__.py +0 -0
  75. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/agent/inhouse_agents.py +0 -0
  76. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/agent/parser.py +0 -0
  77. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/agent/rpm_controller.py +0 -0
  78. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/agent_network/__init__.py +0 -0
  79. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/agent_network/formation.py +0 -0
  80. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/agent_network/model.py +0 -0
  81. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/cli/__init__.py +0 -0
  82. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/clients/__init__.py +0 -0
  83. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/clients/customer/__init__.py +0 -0
  84. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/clients/customer/model.py +0 -0
  85. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/clients/product/__init__.py +0 -0
  86. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/clients/product/model.py +0 -0
  87. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/clients/workflow/__init__.py +0 -0
  88. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/clients/workflow/model.py +0 -0
  89. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/knowledge/__init__.py +0 -0
  90. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/knowledge/_utils.py +0 -0
  91. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/knowledge/embedding.py +0 -0
  92. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/knowledge/model.py +0 -0
  93. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/knowledge/source.py +0 -0
  94. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/knowledge/source_docling.py +0 -0
  95. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/knowledge/storage.py +0 -0
  96. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/llm/__init__.py +0 -0
  97. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/llm/llm_vars.py +0 -0
  98. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/llm/model.py +0 -0
  99. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/memory/__init__.py +0 -0
  100. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/memory/contextual_memory.py +0 -0
  101. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/memory/model.py +0 -0
  102. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/storage/__init__.py +0 -0
  103. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/storage/base.py +0 -0
  104. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/storage/ltm_sqlite_storage.py +0 -0
  105. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/storage/mem0_storage.py +0 -0
  106. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/storage/rag_storage.py +0 -0
  107. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/storage/task_output_storage.py +0 -0
  108. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/storage/utils.py +0 -0
  109. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/task/TEMPLATES/Description.py +0 -0
  110. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/task/__init__.py +0 -0
  111. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/task/evaluation.py +0 -0
  112. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/task/formatter.py +0 -0
  113. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/task/model.py +0 -0
  114. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/task/structured_response.py +0 -0
  115. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/task_graph/__init__.py +0 -0
  116. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/task_graph/colors.py +0 -0
  117. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/task_graph/draft.py +0 -0
  118. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/task_graph/model.py +0 -0
  119. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/tool/__init__.py +0 -0
  120. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/tool/cache_handler.py +0 -0
  121. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/tool/composio/__init__.py +0 -0
  122. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/tool/composio/model.py +0 -0
  123. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/tool/composio/params.py +0 -0
  124. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/tool/decorator.py +0 -0
  125. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/tool/gpt/__init__.py +0 -0
  126. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/tool/gpt/file_search.py +0 -0
  127. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/tool/gpt/web_search.py +0 -0
  128. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/tool/model.py +0 -0
  129. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/tool/rag_tool.py +0 -0
  130. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq/tool/tool_handler.py +0 -0
  131. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq.egg-info/dependency_links.txt +0 -0
  132. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/src/versionhq.egg-info/top_level.txt +0 -0
  133. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/__init__.py +0 -0
  134. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/_prompt/auto_feedback_test.py +0 -0
  135. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/_prompt/prompt_test.py +0 -0
  136. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/_sample/sample.csv +0 -0
  137. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/_sample/sample.json +0 -0
  138. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/_sample/sample.mp3 +0 -0
  139. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/_sample/screenshot.png +0 -0
  140. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/agent/__init__.py +0 -0
  141. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/agent/agent_test.py +0 -0
  142. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/agent/doc_test.py +0 -0
  143. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/agent_network/Prompts/Demo_test.py +0 -0
  144. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/agent_network/__init__.py +0 -0
  145. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/agent_network/agent_network_test.py +0 -0
  146. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/agent_network/doc_test.py +0 -0
  147. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/cli/__init__.py +0 -0
  148. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/clients/customer_test.py +0 -0
  149. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/clients/product_test.py +0 -0
  150. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/clients/workflow_test.py +0 -0
  151. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/conftest.py +0 -0
  152. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/doc_test.py +0 -0
  153. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/formation_test.py +0 -0
  154. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/knowledge/__init__.py +0 -0
  155. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/knowledge/knowledge_test.py +0 -0
  156. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/knowledge/mock_report_compressed.pdf +0 -0
  157. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/llm/__init__.py +0 -0
  158. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/llm/llm_connection_test.py +0 -0
  159. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/llm/llm_test.py +0 -0
  160. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/memory/__init__.py +0 -0
  161. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/memory/memory_test.py +0 -0
  162. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/task/__init__.py +0 -0
  163. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/task/doc_eval_test.py +0 -0
  164. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/task/doc_taskoutput_test.py +0 -0
  165. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/task/doc_test.py +0 -0
  166. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/task/eval_test.py +0 -0
  167. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/task/task_test.py +0 -0
  168. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/task_graph/__init__.py +0 -0
  169. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/task_graph/doc_test.py +0 -0
  170. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/task_graph/task_graph_test.py +0 -0
  171. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/tool/__init__.py +0 -0
  172. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/tool/composio_test.py +0 -0
  173. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/tool/doc_test.py +0 -0
  174. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/tool/rag_tool_test.py +0 -0
  175. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/tool/tool_test.py +0 -0
  176. {versionhq-1.2.4.13 → versionhq-1.2.4.14}/tests/usecase_test.py +0 -0
@@ -43,3 +43,5 @@ __pycache__
43
43
 
44
44
  .env
45
45
  .venv/
46
+
47
+ _screenshots/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: versionhq
3
- Version: 1.2.4.13
3
+ Version: 1.2.4.14
4
4
  Summary: Autonomous agent networks for task automation with multi-step reasoning.
5
5
  Author-email: Kuriko Iwai <kuriko@versi0n.io>
6
6
  License: MIT License
@@ -77,6 +77,7 @@ Provides-Extra: tools
77
77
  Requires-Dist: html2text>=2024.2.26; extra == "tools"
78
78
  Requires-Dist: sec-api>=1.0.28; extra == "tools"
79
79
  Requires-Dist: pytest-playwright>=0.7.0; extra == "tools"
80
+ Requires-Dist: selenium>=4.29.0; extra == "tools"
80
81
  Provides-Extra: torch
81
82
  Requires-Dist: torch>=2.6.0; extra == "torch"
82
83
  Requires-Dist: torchvision>=0.21.0; extra == "torch"
@@ -15,7 +15,7 @@ exclude = ["test*", "__pycache__", "*.egg-info"]
15
15
 
16
16
  [project]
17
17
  name = "versionhq"
18
- version = "1.2.4.13"
18
+ version = "1.2.4.14"
19
19
  authors = [{ name = "Kuriko Iwai", email = "kuriko@versi0n.io" }]
20
20
  description = "Autonomous agent networks for task automation with multi-step reasoning."
21
21
  readme = "README.md"
@@ -82,6 +82,7 @@ tools = [
82
82
  "html2text>=2024.2.26",
83
83
  "sec-api>=1.0.28",
84
84
  "pytest-playwright>=0.7.0",
85
+ "selenium>=4.29.0",
85
86
  ]
86
87
  torch = [
87
88
  "torch>=2.6.0",
@@ -19,3 +19,4 @@ boto3>=1.37.1
19
19
  scikit-learn>=1.6.1
20
20
  numpy>=1.26.4
21
21
  pandas>=2.2.3
22
+ selenium==4.29.0
@@ -35,7 +35,7 @@ from versionhq.agent_network.formation import form_agent_network
35
35
  from versionhq.task_graph.draft import workflow
36
36
 
37
37
 
38
- __version__ = "1.2.4.13"
38
+ __version__ = "1.2.4.14"
39
39
  __all__ = [
40
40
  "Agent",
41
41
 
@@ -5,3 +5,4 @@ from versionhq._utils.is_valid_url import is_valid_url
5
5
  from versionhq._utils.usage_metrics import UsageMetrics, ErrorType
6
6
  from versionhq._utils.convert_img_url import convert_img_url
7
7
  from versionhq._utils.is_valid_enum import is_valid_enum
8
+ from versionhq._utils.handle_directory import handle_directory
@@ -0,0 +1,15 @@
1
+ import os
2
+ import datetime
3
+ from pathlib import Path
4
+
5
+
6
+ def handle_directory(directory_name: str = None, filename: str = None, ext: str = 'png') -> Path:
7
+ """Creates and returns the absolute file path"""
8
+
9
+ os.makedirs(directory_name, exist_ok=True)
10
+
11
+ date = str(datetime.datetime.now().strftime('%j'))
12
+ cwd = Path.cwd()
13
+ DIRECTORY = cwd / f'{directory_name}/{filename}_{date}.{ext}'
14
+
15
+ return DIRECTORY
@@ -454,14 +454,14 @@ class Agent(BaseModel):
454
454
  return rag_tools, gpt_tools, tools
455
455
 
456
456
 
457
- def _handle_gpt_tools(self, gpt_tools: list[Any] = None) -> Any: # TaskOutput
457
+ def _handle_gpt_tools(self, gpt_tools: list[Any] = None) -> Any: # TaskOutput or None
458
458
  """Generates k, v pairs from multiple GPT tool results and stores them in TaskOutput class."""
459
459
 
460
460
  from versionhq.task.model import TaskOutput
461
461
  from versionhq._utils import UsageMetrics
462
462
 
463
463
  if not gpt_tools:
464
- return
464
+ return None
465
465
 
466
466
  tool_res = dict()
467
467
  annotation_set = dict()
@@ -470,7 +470,9 @@ class Agent(BaseModel):
470
470
  for i, item in enumerate(gpt_tools):
471
471
  raw, annotations, usage = item.run()
472
472
  tool_res.update({ str(i): raw })
473
- annotation_set.update({ str(i): annotations })
473
+
474
+ if annotations:
475
+ annotation_set.update({ str(i): annotations })
474
476
  total_usage.aggregate(metrics=usage)
475
477
 
476
478
  res = TaskOutput(raw=str(tool_res), tool_output=tool_res, usage=total_usage, annotations=annotation_set)
@@ -7,7 +7,7 @@ class GPTSizeEnum(str, Enum):
7
7
  HIGH = "high"
8
8
 
9
9
 
10
- class GPTCUAEnvironmentEnum(str, Enum):
10
+ class GPTCUABrowserEnum(str, Enum):
11
11
  BROWSER = "browser"
12
12
  MAC = "mac"
13
13
  WINDOWS = "windows"
@@ -0,0 +1,456 @@
1
+ import base64
2
+ import datetime
3
+ import time
4
+ import platform
5
+ from typing import List, Dict, Any, Tuple
6
+
7
+ from versionhq._utils import convert_img_url
8
+ from versionhq.tool.gpt import openai_client
9
+ from versionhq.tool.gpt._enum import GPTCUABrowserEnum, GPTCUATypeEnum, GPTSizeEnum
10
+ from versionhq._utils import is_valid_enum, UsageMetrics, ErrorType, Logger, is_valid_url, handle_directory
11
+
12
+ allowed_browsers = ['chromium', 'firefox']
13
+
14
+
15
+ class CUAToolSchema:
16
+ type: str = GPTCUATypeEnum.COMPUTER_USE_PREVIEW.value
17
+ display_width: int = 1024
18
+ display_height: int = 768
19
+ environment: str = GPTCUABrowserEnum.BROWSER.value
20
+
21
+ def __init__(
22
+ self,
23
+ type: str | GPTCUATypeEnum = None,
24
+ display_width: int = None,
25
+ display_height: int = None,
26
+ environment: str | GPTCUABrowserEnum = None
27
+ ):
28
+ self.display_height = display_height if display_height else self.display_height
29
+ self.display_width = display_width if display_width else self.display_width
30
+
31
+ if type and is_valid_enum(enum=GPTCUATypeEnum, val=type):
32
+ self.type = type.value if isinstance(type, GPTCUATypeEnum) else type
33
+
34
+ if environment and is_valid_enum(enum=GPTCUABrowserEnum, val=environment):
35
+ self.environment = environment.value if isinstance(environment, GPTCUABrowserEnum) else environment
36
+
37
+ @property
38
+ def schema(self) -> Dict[str, Any]:
39
+ return {
40
+ "type": self.type if isinstance(self.type, str) else self.type.value,
41
+ "display_width": self.display_width,
42
+ "display_height": self.display_height,
43
+ "environment": self.environment if isinstance(self.environment, str) else self.environment.value,
44
+ }
45
+
46
+
47
+ class GPTToolCUA:
48
+ model: str = "computer-use-preview"
49
+ tools: List[CUAToolSchema] = list()
50
+ user_prompt: str = None
51
+ img_url: str = None
52
+ web_url: str = "https://www.google.com"
53
+ browser: str = "firefox"
54
+ reasoning_effort: str = GPTSizeEnum.MEDIUM.value
55
+ truncation: str = "auto"
56
+
57
+ _schema: Dict[str, Any] = dict()
58
+ _response_ids: List[str] = list()
59
+ _call_ids: List[str] = list()
60
+ _calls: Dict[str, Dict[str, Any]] = dict() # stores response_id and raw output object.
61
+ _usage: UsageMetrics = UsageMetrics()
62
+ _logger: Logger = Logger(info_file_save=True, filename="cua-task-{}".format(str(datetime.datetime.now().timestamp())) + ".png")
63
+
64
+
65
+ def __init__(
66
+ self,
67
+ user_prompt: str,
68
+ tools: List[CUAToolSchema] | CUAToolSchema = None,
69
+ img_url: str = None,
70
+ web_url: str = "https://www.google.com",
71
+ browser: str = "chromium",
72
+ reasoning_effort: GPTSizeEnum | str = None,
73
+ truncation: str = None,
74
+ _usage: UsageMetrics = UsageMetrics()
75
+ ):
76
+ self.user_prompt = user_prompt
77
+ self.web_url = web_url if is_valid_url(web_url) else None
78
+ self.browser = browser if browser in allowed_browsers else 'chromium'
79
+ self.truncation = truncation if truncation else self.truncation
80
+ self._usage = _usage
81
+ self._response_ids = list()
82
+ self._call_ids = list()
83
+
84
+ if img_url:
85
+ img_url = convert_img_url(img_url)
86
+ self.img_url = img_url
87
+
88
+ if reasoning_effort and is_valid_enum(enum=GPTSizeEnum, val=reasoning_effort):
89
+ self.reasoning_effort = reasoning_effort.value if isinstance(reasoning_effort, GPTSizeEnum) else reasoning_effort
90
+
91
+ if tools:
92
+ match tools:
93
+ case list():
94
+ if self.tools:
95
+ self.tools.extend(tools)
96
+ else:
97
+ self.tools = tools
98
+ case CUAToolSchema():
99
+ if self.tools:
100
+ self.tools.append(tools)
101
+ else:
102
+ self.tools = [tools]
103
+ case _:
104
+ pass
105
+
106
+
107
+ def _structure_schema(self, screenshot: str = None) -> None:
108
+ """Formats args schema for CUA calling."""
109
+
110
+ tool_schema = [item.schema for item in self.tools]
111
+ schema = dict()
112
+ inputs = list()
113
+ previous_response_id = self._response_ids[-1] if self._response_ids else None
114
+ # (self._response_ids[-1].startswith("rs") or self._response_ids[-1].startswith("resp")) else None
115
+
116
+ if self._call_ids:
117
+ inputs = [
118
+ {
119
+ "call_id": self._call_ids[-1],
120
+ "type": "computer_call_output",
121
+ }
122
+ ]
123
+ if screenshot:
124
+ inputs[0].update({ "output": { "type": "computer_screenshot", "image_url": f"data:image/png;base64,{str(screenshot)}"}})
125
+
126
+ # if self._calls:
127
+ # call = self._calls[self._call_ids[-1]]
128
+ # if call and call.call_id not in inputs[0]:
129
+ # inputs.append(call)
130
+
131
+ if previous_response_id:
132
+ schema = dict(
133
+ model=self.model,
134
+ previous_response_id=previous_response_id,
135
+ tools=tool_schema,
136
+ input=inputs,
137
+ truncation=self.truncation
138
+ )
139
+ else:
140
+ schema = dict(
141
+ model=self.model,
142
+ tools=tool_schema,
143
+ input=inputs,
144
+ truncation=self.truncation
145
+ )
146
+
147
+ else:
148
+ input = [{ "role": "user", "content": self.user_prompt } ]
149
+ img_url = convert_img_url(self.img_url) if self.img_url else None
150
+ if img_url:
151
+ input.append({"type": "input_image", "image_url": f"data:image/png;base64,{img_url}"})
152
+
153
+ schema = dict(
154
+ model=self.model,
155
+ tools=tool_schema,
156
+ input=input,
157
+ reasoning={ "effort": self.reasoning_effort},
158
+ truncation=self.truncation
159
+ )
160
+
161
+ self._schema = schema
162
+ # return self._schema
163
+
164
+
165
+ def _run(self, screenshot: str = None) -> Tuple[Dict[str, Any], None, UsageMetrics]:
166
+ raw_res = dict()
167
+ usage = self._usage if self._usage else UsageMetrics()
168
+ start_dt = datetime.datetime.now()
169
+
170
+ try:
171
+ self._structure_schema(screenshot=screenshot)
172
+ res = openai_client.responses.create(**self._schema)
173
+ if not res:
174
+ usage.record_errors(ErrorType.TOOL)
175
+ else:
176
+ self._response_ids.append(res.id)
177
+ for item in res.output:
178
+
179
+ match item.type:
180
+ case "reasoning":
181
+ reasoning = item.summary[0].text if item.summary and isinstance(item.summary, list) else str(item.summary) if item.summary else ""
182
+ raw_res.update(dict(reasoning=reasoning))
183
+ # self._response_ids.append(item.id)
184
+
185
+ case "computer_call":
186
+ raw_res.update(dict(action=item.action))
187
+ # self._response_ids.append(item.id)
188
+ call_id = item.call_id
189
+ self._call_ids.append(call_id)
190
+ self._calls.update({ call_id: item })
191
+ case _:
192
+ pass
193
+ usage.record_token_usage(**res.usage.__dict__)
194
+
195
+ except Exception as e:
196
+ self._logger.log(message=f"Failed to run: {str(e)}", color="red", level="error")
197
+ usage.record_errors(ErrorType.TOOL)
198
+
199
+ end_dt = datetime.datetime.now()
200
+ usage.record_latency(start_dt=start_dt, end_dt=end_dt)
201
+ return raw_res, None, usage
202
+
203
+
204
+ def invoke_playwright(self) -> Dict[str, Any]:
205
+ """Handles computer use loop. Ref. OpenAI official website."""
206
+ try:
207
+ from playwright.sync_api import sync_playwright
208
+ except Exception as e:
209
+ self._logger.log(level="error", message=f"Install Playwright by adding `versionhq[tools]` to requirements.txt or run `uv add playwright`. {str(e)}", color="red")
210
+ raise e
211
+
212
+ import os
213
+ os.environ["DEBUG"] = "pw:browser"
214
+ self._logger.log(message="Start computer use.", level="info", color="blue")
215
+ start_dt = datetime.datetime.now()
216
+ res = None
217
+
218
+ # try:
219
+ p = sync_playwright().start()
220
+ b = p.firefox if self.browser == "firefox" else p.chromium
221
+ browser = b.launch(headless=True)
222
+ page = browser.new_page()
223
+ if not browser or not page:
224
+ return None, None, None
225
+
226
+ if self.web_url:
227
+ page.goto(self.web_url, timeout=3000000, wait_until="load", referer=None)
228
+ time.sleep(3)
229
+
230
+ res, _, usage = self._run()
231
+ self._usage.aggregate(metrics=usage)
232
+ actions = [v for k, v in res.items() if k =="action"] if res else []
233
+ action = actions[0] if actions else None
234
+
235
+ if action:
236
+ while True:
237
+ x = action.x if hasattr(action, 'x') else 0
238
+ y = action.y if hasattr(action, 'y') else 0
239
+ scroll_x = action.scroll_x if hasattr(action, 'scroll_x') else 0
240
+ scroll_y = action.scroll_y if hasattr(action, 'scroll_y') else 0
241
+ text = action.text if hasattr(action, 'text') else ''
242
+ screenshot_base64 = None
243
+ path = handle_directory(directory_name='_screenshots', filename=f'cua_playwright', ext='png')
244
+
245
+ match action.type:
246
+ case "click":
247
+ self._logger.log(message="Action: click", color="blue", level="info")
248
+ button = action.button if hasattr(action, 'button') and (action.button == 'left' or action.button == 'right') else 'left'
249
+ page.mouse.move(x, y)
250
+ page.mouse.click(x, y, button=button)
251
+ time.sleep(1)
252
+
253
+ case "scroll":
254
+ self._logger.log(message="Action: scroll", color="blue", level="info")
255
+ page.mouse.move(x, y)
256
+ page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
257
+ time.sleep(1)
258
+
259
+ case "move":
260
+ self._logger.log(message="Action: move", color="blue", level="info")
261
+ page.mouse.move(x, y)
262
+ page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
263
+ time.sleep(1)
264
+
265
+ case "keypress":
266
+ self._logger.log(message="Action: keypress", color="blue", level="info")
267
+ keys = action.keys
268
+ for k in keys:
269
+ match k.lower():
270
+ case "enter": page.keyboard.press("Enter")
271
+ case "space": page.keyboard.press(" ")
272
+ case _: page.keyboard.press(k)
273
+ time.sleep(1)
274
+
275
+ case "type":
276
+ self._logger.log(message="Action: type", color="blue", level="info")
277
+ page.keyboard.type(text)
278
+ time.sleep(1)
279
+
280
+ case "wait":
281
+ self._logger.log(message="Action: wait", color="blue", level="info")
282
+ time.sleep(3)
283
+
284
+ case "screenshot":
285
+ self._logger.log(message="Action: screenshot", color="blue", level="info")
286
+ screenshot_bytes = page.screenshot(path=path)
287
+ screenshot_base64 = base64.b64encode(screenshot_bytes).decode("utf-8")
288
+ time.sleep(1)
289
+
290
+ case _:
291
+ self._logger.log(message=f"Unrecognized action: {action}", level="warning", color="yellow")
292
+ return False
293
+
294
+ if not screenshot_base64:
295
+ screenshot_bytes = page.screenshot(path=path)
296
+ screenshot_base64 = base64.b64encode(screenshot_bytes).decode("utf-8")
297
+ time.sleep(1)
298
+
299
+ res, _, usage = self._run(screenshot=screenshot_base64)
300
+ self._usage.aggregate(metrics=usage)
301
+ if not res:
302
+ usage.record_errors(type=ErrorType.API)
303
+ break
304
+
305
+ actions = [v for k, v in res.items() if k =="action"] if res else []
306
+ action = actions[0] if actions else None
307
+ if not action:
308
+ break
309
+ else:
310
+ self._usage.record_errors(type=ErrorType.TOOL)
311
+
312
+ # except Exception as e:
313
+ # self._logger.log(message=f"Failed to execute. {str(e)}", color="red", level="error")
314
+ # browser.close()
315
+
316
+ end_dt = datetime.datetime.now()
317
+ self._usage.record_latency(start_dt=start_dt, end_dt=end_dt)
318
+ return res
319
+
320
+
321
+ def invoke_selenium(self, **kwargs) -> Dict[str, Any]:
322
+ try:
323
+ from selenium import webdriver
324
+ from selenium.webdriver.common.keys import Keys
325
+ from selenium.webdriver.common.action_chains import ActionChains
326
+ from selenium.webdriver.common.actions.action_builder import ActionBuilder
327
+ except Exception as e:
328
+ self._logger.log(level="error", message=f"Install Selenium by `uv pip install versionhq[tools]` or `uv add selenium`. {str(e)}", color="red")
329
+ raise e
330
+
331
+ self._logger.log(message="Start computer use", level="info", color="blue")
332
+
333
+ start_dt = datetime.datetime.now()
334
+
335
+ driver = webdriver.Chrome(options=kwargs) if kwargs else webdriver.Chrome()
336
+ if self.tools:
337
+ driver.set_window_size(height=self.tools[0].display_height, width=self.tools[0].display_width)
338
+
339
+ if self.web_url:
340
+ driver.get(self.web_url)
341
+ time.sleep(3)
342
+
343
+ res, _, usage = self._run()
344
+ self._logger.log(message=f"Initial response: {res}", color="blue", level="info")
345
+ self._usage.aggregate(metrics=usage)
346
+ actions = [v for k, v in res.items() if k =="action"] if res else []
347
+ action = actions[0] if actions else None
348
+ action_chains = ActionChains(driver=driver)
349
+ action_builder = ActionBuilder(driver=driver)
350
+
351
+ if action:
352
+ while True:
353
+ x = action.x if hasattr(action, 'x') else 0
354
+ y = action.y if hasattr(action, 'y') else 0
355
+ scroll_x = action.scroll_x if hasattr(action, 'scroll_x') else 0
356
+ scroll_y = action.scroll_y if hasattr(action, 'scroll_y') else 0
357
+ text = action.text if hasattr(action, 'text') else ''
358
+ path = handle_directory(directory_name='_screenshots', filename=f'cua_selenium', ext='png')
359
+
360
+ match action.type:
361
+ case 'click':
362
+ self._logger.log(message="Action: click", color="blue", level="info")
363
+ driver.execute_script(f'window.scrollBy({x}, {y})')
364
+ action_chains.move_by_offset(xoffset=x, yoffset=y)
365
+ action_chains.perform()
366
+
367
+ if hasattr(action, 'button'):
368
+ match action.button:
369
+ case 'left':
370
+ action_chains.click()
371
+ case 'right':
372
+ action_chains.context_click()
373
+ action_chains.perform()
374
+ time.sleep(1)
375
+
376
+ case "scroll" | "move":
377
+ self._logger.log(message="Action: scroll", color="blue", level="info")
378
+ driver.execute_script(f'window.scrollBy({scroll_x}, {scroll_y})')
379
+ time.sleep(1)
380
+
381
+ case "keypress":
382
+ self._logger.log(message="Action: keypress", color="blue", level="info")
383
+ keys = action.keys
384
+ if keys:
385
+ for k in keys:
386
+ match k.lower():
387
+ case "enter": action_chains.key_down(Keys.ENTER).perform()
388
+ case "space": action_chains.key_down(Keys.SPACE).perform()
389
+ case "select_all":
390
+ if platform.system() == 'Darwin':
391
+ action_chains.send_keys(Keys.COMMAND + "a").perform()
392
+ else:
393
+ action_chains.send_keys(Keys.CONTROL + "a").perform()
394
+ case _:
395
+ action_chains.key_down(Keys.SHIFT).send_keys(k).key_up(Keys.SHIFT).perform()
396
+ time.sleep(1)
397
+
398
+ case "type":
399
+ self._logger.log(message="Action: type", color="blue", level="info")
400
+ action_chains.send_keys(text).perform()
401
+ time.sleep(1)
402
+
403
+ case "wait":
404
+ self._logger.log(message="Action: wait", color="blue", level="info")
405
+ action_chains.pause(3)
406
+
407
+ case "screenshot":
408
+ self._logger.log(message="Action: screenshot", color="blue", level="info")
409
+ driver.save_screenshot(path)
410
+ time.sleep(1)
411
+
412
+ case _:
413
+ self._logger.log(message=f"Unrecognized action: {action}", level="warning", color="yellow")
414
+ return False
415
+
416
+ with open(path, "rb") as image_file:
417
+ res, usage = None, None
418
+ if image_file:
419
+ screenshot_base64 = base64.b64encode(image_file.read()).decode("utf-8")
420
+ res, _, usage = self._run(screenshot=screenshot_base64)
421
+ else:
422
+ res, _, usage = self._run()
423
+
424
+ print("res", res)
425
+
426
+ self._usage.aggregate(metrics=usage)
427
+ if not res:
428
+ usage.record_errors(type=ErrorType.API)
429
+ break
430
+
431
+ actions = [v for k, v in res.items() if k =="action"] if res else []
432
+ action = actions[0] if actions else None
433
+ if not action:
434
+ self._logger.log(message="No action found.", color="yellow", level="warning")
435
+ break
436
+ else:
437
+ self._usage.record_errors(type=ErrorType.TOOL)
438
+
439
+ end_dt = datetime.datetime.now()
440
+ self._usage.record_latency(start_dt=start_dt, end_dt=end_dt)
441
+ return res
442
+
443
+
444
+ def run(self) -> Tuple[Dict[str, Any], None, UsageMetrics]:
445
+ """Core function to execute the tool."""
446
+
447
+ res = None
448
+ try:
449
+ res = self.invoke_playwright()
450
+ except:
451
+ self._call_ids = []
452
+ self._calls = dict()
453
+ self._response_ids = []
454
+ res = self.invoke_selenium()
455
+
456
+ return res, None, self._usage
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: versionhq
3
- Version: 1.2.4.13
3
+ Version: 1.2.4.14
4
4
  Summary: Autonomous agent networks for task automation with multi-step reasoning.
5
5
  Author-email: Kuriko Iwai <kuriko@versi0n.io>
6
6
  License: MIT License
@@ -77,6 +77,7 @@ Provides-Extra: tools
77
77
  Requires-Dist: html2text>=2024.2.26; extra == "tools"
78
78
  Requires-Dist: sec-api>=1.0.28; extra == "tools"
79
79
  Requires-Dist: pytest-playwright>=0.7.0; extra == "tools"
80
+ Requires-Dist: selenium>=4.29.0; extra == "tools"
80
81
  Provides-Extra: torch
81
82
  Requires-Dist: torch>=2.6.0; extra == "torch"
82
83
  Requires-Dist: torchvision>=0.21.0; extra == "torch"
@@ -55,6 +55,7 @@ src/versionhq/_prompt/constants.py
55
55
  src/versionhq/_prompt/model.py
56
56
  src/versionhq/_utils/__init__.py
57
57
  src/versionhq/_utils/convert_img_url.py
58
+ src/versionhq/_utils/handle_directory.py
58
59
  src/versionhq/_utils/i18n.py
59
60
  src/versionhq/_utils/is_valid_enum.py
60
61
  src/versionhq/_utils/is_valid_url.py
@@ -36,6 +36,7 @@ pygraphviz>=1.14
36
36
  html2text>=2024.2.26
37
37
  sec-api>=1.0.28
38
38
  pytest-playwright>=0.7.0
39
+ selenium>=4.29.0
39
40
 
40
41
  [torch]
41
42
  torch>=2.6.0
@@ -22,21 +22,11 @@ def test_gpt_cua():
22
22
  assert tool.reasoning_effort == "medium"
23
23
  assert isinstance(tool.tools, list)
24
24
  assert tool.tools[0].display_width == 500
25
- assert tool.tools[0].environment == "mac"
26
25
  assert tool.tools[0].type == "computer_use_preview"
27
- assert tool.schema is not None
28
-
29
- raw, _, usage = tool.run()
30
- assert raw is not None if usage.total_errors == 0 else raw == dict()
31
-
32
- if raw:
33
- assert isinstance(usage, UsageMetrics)
34
- assert usage.total_tokens > 0
35
- assert usage.latency > 0
36
26
 
37
- with patch.object(vhq.GPTToolCUA, "run", return_value=(dict(), None, UsageMetrics())) as mock_run:
38
- tool.invoke_playwright()
39
- mock_run.assert_called()
27
+ with patch.object(vhq.GPTToolCUA, "_structure_schema", return_value=None) as mock_schema:
28
+ tool.run()
29
+ mock_schema.assert_called()
40
30
 
41
31
 
42
32
  def test_gpt_web_search():