zrb 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,13 +5,26 @@ from typing import Any
5
5
  from zrb.builtin.group import llm_group
6
6
  from zrb.builtin.llm.tool.api import get_current_location, get_current_weather
7
7
  from zrb.builtin.llm.tool.cli import run_shell_command
8
- from zrb.builtin.llm.tool.web import open_web_route, query_internet
8
+ from zrb.builtin.llm.tool.file import (
9
+ list_file,
10
+ read_source_code,
11
+ read_text_file,
12
+ write_text_file,
13
+ )
14
+ from zrb.builtin.llm.tool.web import (
15
+ create_search_internet_tool,
16
+ open_web_page,
17
+ search_arxiv,
18
+ search_wikipedia,
19
+ )
9
20
  from zrb.config import (
10
21
  LLM_ALLOW_ACCESS_INTERNET,
22
+ LLM_ALLOW_ACCESS_LOCAL_FILE,
11
23
  LLM_ALLOW_ACCESS_SHELL,
12
24
  LLM_HISTORY_DIR,
13
25
  LLM_MODEL,
14
26
  LLM_SYSTEM_PROMPT,
27
+ SERP_API_KEY,
15
28
  )
16
29
  from zrb.context.any_shared_context import AnySharedContext
17
30
  from zrb.input.bool_input import BoolInput
@@ -117,11 +130,21 @@ llm_chat: LLMTask = llm_group.add_task(
117
130
  alias="chat",
118
131
  )
119
132
 
133
+
134
+ if LLM_ALLOW_ACCESS_LOCAL_FILE:
135
+ llm_chat.add_tool(read_source_code)
136
+ llm_chat.add_tool(list_file)
137
+ llm_chat.add_tool(read_text_file)
138
+ llm_chat.add_tool(write_text_file)
139
+
120
140
  if LLM_ALLOW_ACCESS_SHELL:
121
141
  llm_chat.add_tool(run_shell_command)
122
142
 
123
143
  if LLM_ALLOW_ACCESS_INTERNET:
124
- llm_chat.add_tool(open_web_route)
125
- llm_chat.add_tool(query_internet)
144
+ llm_chat.add_tool(open_web_page)
145
+ llm_chat.add_tool(search_wikipedia)
146
+ llm_chat.add_tool(search_arxiv)
147
+ if SERP_API_KEY != "":
148
+ llm_chat.add_tool(create_search_internet_tool(SERP_API_KEY))
126
149
  llm_chat.add_tool(get_current_location)
127
150
  llm_chat.add_tool(get_current_weather)
@@ -1,13 +1,13 @@
1
1
  import json
2
2
  from typing import Annotated, Literal
3
3
 
4
- import requests
5
-
6
4
 
7
5
  def get_current_location() -> (
8
6
  Annotated[str, "JSON string representing latitude and longitude"]
9
7
  ): # noqa
10
8
  """Get the user's current location."""
9
+ import requests
10
+
11
11
  return json.dumps(requests.get("http://ip-api.com/json?fields=lat,lon").json())
12
12
 
13
13
 
@@ -17,6 +17,8 @@ def get_current_weather(
17
17
  temperature_unit: Literal["celsius", "fahrenheit"],
18
18
  ) -> str:
19
19
  """Get the current weather in a given location."""
20
+ import requests
21
+
20
22
  resp = requests.get(
21
23
  "https://api.open-meteo.com/v1/forecast",
22
24
  params={
@@ -0,0 +1,39 @@
1
+ import os
2
+
3
+ from zrb.util.file import read_file, write_file
4
+
5
+
6
+ def list_file(
7
+ directory: str = ".",
8
+ extensions: list[str] = [".py", ".go", ".js", ".ts", ".java", ".c", ".cpp"],
9
+ ) -> list[str]:
10
+ """List all files in a directory"""
11
+ all_files: list[str] = []
12
+ for root, _, files in os.walk(directory):
13
+ for filename in files:
14
+ for extension in extensions:
15
+ if filename.lower().endswith(extension):
16
+ all_files.append(os.path.join(root, filename))
17
+ return all_files
18
+
19
+
20
+ def read_text_file(file: str) -> str:
21
+ """Read a text file"""
22
+ return read_file(os.path.abspath(file))
23
+
24
+
25
+ def write_text_file(file: str, content: str):
26
+ """Write a text file"""
27
+ return write_file(os.path.abspath(file), content)
28
+
29
+
30
+ def read_source_code(
31
+ directory: str = ".",
32
+ extensions: list[str] = [".py", ".go", ".js", ".ts", ".java", ".c", ".cpp"],
33
+ ) -> list[str]:
34
+ """Read source code in a directory"""
35
+ files = list_file(directory, extensions)
36
+ for index, file in enumerate(files):
37
+ content = read_text_file(file)
38
+ files[index] = f"# {file}\n```\n{content}\n```"
39
+ return files
@@ -1,7 +1,9 @@
1
+ import fnmatch
1
2
  import hashlib
2
3
  import json
3
4
  import os
4
5
  import sys
6
+ from collections.abc import Callable
5
7
 
6
8
  import ulid
7
9
 
@@ -15,6 +17,20 @@ from zrb.util.cli.style import stylize_error, stylize_faint
15
17
  from zrb.util.file import read_file
16
18
 
17
19
 
20
+ class RAGFileReader:
21
+ def __init__(self, glob_pattern: str, read: Callable[[str], str]):
22
+ self.glob_pattern = glob_pattern
23
+ self.read = read
24
+
25
+ def is_match(self, file_name: str):
26
+ if os.sep not in self.glob_pattern and (
27
+ os.altsep is None or os.altsep not in self.glob_pattern
28
+ ):
29
+ # Pattern like "*.txt" – match only the basename.
30
+ return fnmatch.fnmatch(os.path.basename(file_name), self.glob_pattern)
31
+ return fnmatch.fnmatch(file_name, self.glob_pattern)
32
+
33
+
18
34
  def create_rag_from_directory(
19
35
  tool_name: str,
20
36
  tool_description: str,
@@ -25,6 +41,7 @@ def create_rag_from_directory(
25
41
  chunk_size: int = RAG_CHUNK_SIZE,
26
42
  overlap: int = RAG_OVERLAP,
27
43
  max_result_count: int = RAG_MAX_RESULT_COUNT,
44
+ file_reader: list[RAGFileReader] = [],
28
45
  ):
29
46
  async def retrieve(query: str) -> str:
30
47
  from chromadb import PersistentClient
@@ -36,35 +53,31 @@ def create_rag_from_directory(
36
53
  path=vector_db_path, settings=Settings(allow_reset=True)
37
54
  )
38
55
  collection = client.get_or_create_collection(vector_db_collection)
39
-
40
56
  # Track file changes using a hash-based approach
41
57
  hash_file_path = os.path.join(vector_db_path, "file_hashes.json")
42
58
  previous_hashes = _load_hashes(hash_file_path)
43
59
  current_hashes = {}
44
-
60
+ # Get updated_files
45
61
  updated_files = []
46
-
47
62
  for root, _, files in os.walk(document_dir_path):
48
63
  for file in files:
49
64
  file_path = os.path.join(root, file)
50
65
  file_hash = _compute_file_hash(file_path)
51
66
  relative_path = os.path.relpath(file_path, document_dir_path)
52
67
  current_hashes[relative_path] = file_hash
53
-
54
68
  if previous_hashes.get(relative_path) != file_hash:
55
69
  updated_files.append(file_path)
56
-
70
+ # Upsert updated_files to vector db
57
71
  if updated_files:
58
72
  print(
59
73
  stylize_faint(f"Updating {len(updated_files)} changed files"),
60
74
  file=sys.stderr,
61
75
  )
62
-
63
76
  for file_path in updated_files:
64
77
  try:
65
78
  relative_path = os.path.relpath(file_path, document_dir_path)
66
79
  collection.delete(where={"file_path": relative_path})
67
- content = _read_file_content(file_path)
80
+ content = _read_txt_content(file_path, file_reader)
68
81
  file_id = ulid.new().str
69
82
  for i in range(0, len(content), chunk_size - overlap):
70
83
  chunk = content[i : i + chunk_size]
@@ -92,14 +105,13 @@ def create_rag_from_directory(
92
105
  stylize_error(f"Error processing {file_path}: {e}"),
93
106
  file=sys.stderr,
94
107
  )
95
-
96
108
  _save_hashes(hash_file_path, current_hashes)
97
109
  else:
98
110
  print(
99
111
  stylize_faint("No changes detected. Skipping database update."),
100
112
  file=sys.stderr,
101
113
  )
102
-
114
+ # Vectorize query and get related document chunks
103
115
  print(stylize_faint("Vectorizing query"), file=sys.stderr)
104
116
  embedding_result = list(embedding_model.embed([query]))
105
117
  query_vector = embedding_result[0]
@@ -123,7 +135,22 @@ def _compute_file_hash(file_path: str) -> str:
123
135
  return hash_md5.hexdigest()
124
136
 
125
137
 
126
- def _read_file_content(file_path: str) -> str:
138
+ def _load_hashes(file_path: str) -> dict:
139
+ if os.path.exists(file_path):
140
+ with open(file_path, "r") as f:
141
+ return json.load(f)
142
+ return {}
143
+
144
+
145
+ def _save_hashes(file_path: str, hashes: dict):
146
+ with open(file_path, "w") as f:
147
+ json.dump(hashes, f)
148
+
149
+
150
+ def _read_txt_content(file_path: str, file_reader: list[RAGFileReader]):
151
+ for reader in file_reader:
152
+ if reader.is_match(file_path):
153
+ return reader.read(file_path)
127
154
  if file_path.lower().endswith(".pdf"):
128
155
  return _read_pdf(file_path)
129
156
  return read_file(file_path)
@@ -136,15 +163,3 @@ def _read_pdf(file_path: str) -> str:
136
163
  return "\n".join(
137
164
  page.extract_text() for page in pdf.pages if page.extract_text()
138
165
  )
139
-
140
-
141
- def _load_hashes(file_path: str) -> dict:
142
- if os.path.exists(file_path):
143
- with open(file_path, "r") as f:
144
- return json.load(f)
145
- return {}
146
-
147
-
148
- def _save_hashes(file_path: str, hashes: dict):
149
- with open(file_path, "w") as f:
150
- json.dump(hashes, f)
@@ -1,8 +1,9 @@
1
1
  import json
2
+ from collections.abc import Callable
2
3
  from typing import Annotated
3
4
 
4
5
 
5
- def open_web_route(url: str) -> str:
6
+ def open_web_page(url: str) -> str:
6
7
  """Get content from a web page."""
7
8
  import requests
8
9
 
@@ -19,30 +20,55 @@ def open_web_route(url: str) -> str:
19
20
  return json.dumps(parse_html_text(response.text))
20
21
 
21
22
 
22
- def query_internet(
23
+ def create_search_internet_tool(serp_api_key: str) -> Callable[[str, int], str]:
24
+ def search_internet(
25
+ query: Annotated[str, "Search query"],
26
+ num_results: Annotated[int, "Search result count, by default 10"] = 10,
27
+ ) -> str:
28
+ """Search factual information from the internet by using Google."""
29
+ import requests
30
+
31
+ response = requests.get(
32
+ "https://serpapi.com/search",
33
+ headers={
34
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" # noqa
35
+ },
36
+ params={
37
+ "q": query,
38
+ "num": num_results,
39
+ "hl": "en",
40
+ "safe": "off",
41
+ "api_key": serp_api_key,
42
+ },
43
+ )
44
+ if response.status_code != 200:
45
+ raise Exception(
46
+ f"Error: Unable to retrieve search results (status code: {response.status_code})" # noqa
47
+ )
48
+ return json.dumps(parse_html_text(response.text))
49
+
50
+ return search_internet
51
+
52
+
53
+ def search_wikipedia(query: Annotated[str, "Search query"]) -> str:
54
+ """Search on wikipedia"""
55
+ import requests
56
+
57
+ params = {"action": "query", "list": "search", "srsearch": query, "format": "json"}
58
+ response = requests.get("https://en.wikipedia.org/w/api.php", params=params)
59
+ return response.json()
60
+
61
+
62
+ def search_arxiv(
23
63
  query: Annotated[str, "Search query"],
24
64
  num_results: Annotated[int, "Search result count, by default 10"] = 10,
25
65
  ) -> str:
26
- """Search factual information from the internet by using Google."""
66
+ """Search on Arxiv"""
27
67
  import requests
28
68
 
29
- response = requests.get(
30
- "https://google.com/search",
31
- headers={
32
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" # noqa
33
- },
34
- params={
35
- "q": query,
36
- "num": num_results,
37
- "hl": "en",
38
- "safe": "off",
39
- },
40
- )
41
- if response.status_code != 200:
42
- raise Exception(
43
- f"Error: Unable to retrieve search results (status code: {response.status_code})" # noqa
44
- )
45
- return json.dumps(parse_html_text(response.text))
69
+ params = {"search_query": f"all:{query}", "start": 0, "max_results": num_results}
70
+ response = requests.get("http://export.arxiv.org/api/query", params=params)
71
+ return response.content
46
72
 
47
73
 
48
74
  def parse_html_text(html_text: str) -> dict[str, str]:
@@ -62,7 +62,7 @@ scaffold_fastapp = Scaffolder(
62
62
  ),
63
63
  # Register fastapp's tasks to project's zrb_init (project_dir/zrb_init.py)
64
64
  ContentTransformer(
65
- name="trasnform-zrb-init",
65
+ name="transform-zrb-init",
66
66
  match=is_project_zrb_init_file,
67
67
  transform=update_project_zrb_init_file,
68
68
  ),
@@ -1,12 +1,12 @@
1
1
  import os
2
2
 
3
3
  from my_app_name._zrb.column.add_column_util import (
4
- update_fastapp_schema,
5
- update_fastapp_test_create,
6
- update_fastapp_test_delete,
7
- update_fastapp_test_read,
8
- update_fastapp_test_update,
9
- update_fastapp_ui,
4
+ update_my_app_name_schema,
5
+ update_my_app_name_test_create,
6
+ update_my_app_name_test_delete,
7
+ update_my_app_name_test_read,
8
+ update_my_app_name_test_update,
9
+ update_my_app_name_ui,
10
10
  )
11
11
  from my_app_name._zrb.config import APP_DIR
12
12
  from my_app_name._zrb.format_task import format_my_app_name_code
@@ -23,14 +23,14 @@ from zrb import AnyContext, Task, make_task
23
23
 
24
24
 
25
25
  @make_task(
26
- name="validate-add-fastapp-column",
26
+ name="validate-add-my-app-name-column",
27
27
  input=[
28
28
  existing_module_input,
29
29
  existing_entity_input,
30
30
  ],
31
31
  retries=0,
32
32
  )
33
- async def validate_add_fastapp_column(ctx: AnyContext):
33
+ async def validate_add_my_app_name_column(ctx: AnyContext):
34
34
  module_name = ctx.input.module
35
35
  if module_name not in get_existing_module_names():
36
36
  raise ValueError(f"Module not exist: {module_name}")
@@ -39,96 +39,96 @@ async def validate_add_fastapp_column(ctx: AnyContext):
39
39
  raise ValueError(f"Schema not exist: {schema_name}")
40
40
 
41
41
 
42
- update_fastapp_schema_task = Task(
43
- name="update-fastapp-schema",
42
+ update_my_app_name_schema_task = Task(
43
+ name="update-my-app-name-schema",
44
44
  input=[
45
45
  existing_module_input,
46
46
  existing_entity_input,
47
47
  new_column_input,
48
48
  new_column_type_input,
49
49
  ],
50
- action=update_fastapp_schema,
50
+ action=update_my_app_name_schema,
51
51
  retries=0,
52
- upstream=validate_add_fastapp_column,
52
+ upstream=validate_add_my_app_name_column,
53
53
  )
54
54
 
55
- update_fastapp_ui_task = Task(
56
- name="update-fastapp-ui",
55
+ update_my_app_name_ui_task = Task(
56
+ name="update-my-app-name-ui",
57
57
  input=[
58
58
  existing_module_input,
59
59
  existing_entity_input,
60
60
  new_column_input,
61
61
  new_column_type_input,
62
62
  ],
63
- action=update_fastapp_ui,
63
+ action=update_my_app_name_ui,
64
64
  retries=0,
65
- upstream=validate_add_fastapp_column,
65
+ upstream=validate_add_my_app_name_column,
66
66
  )
67
67
 
68
- update_fastapp_test_create_task = Task(
69
- name="update-fastapp-test-create",
68
+ update_my_app_name_test_create_task = Task(
69
+ name="update-my-app-name-test-create",
70
70
  input=[
71
71
  existing_module_input,
72
72
  existing_entity_input,
73
73
  new_column_input,
74
74
  new_column_type_input,
75
75
  ],
76
- action=update_fastapp_test_create,
76
+ action=update_my_app_name_test_create,
77
77
  retries=0,
78
- upstream=validate_add_fastapp_column,
78
+ upstream=validate_add_my_app_name_column,
79
79
  )
80
80
 
81
- update_fastapp_test_read_task = Task(
82
- name="update-fastapp-test-read",
81
+ update_my_app_name_test_read_task = Task(
82
+ name="update-my-app-name-test-read",
83
83
  input=[
84
84
  existing_module_input,
85
85
  existing_entity_input,
86
86
  new_column_input,
87
87
  new_column_type_input,
88
88
  ],
89
- action=update_fastapp_test_read,
89
+ action=update_my_app_name_test_read,
90
90
  retries=0,
91
- upstream=validate_add_fastapp_column,
91
+ upstream=validate_add_my_app_name_column,
92
92
  )
93
93
 
94
- update_fastapp_test_update_task = Task(
95
- name="update-fastapp-test-update",
94
+ update_my_app_name_test_update_task = Task(
95
+ name="update-my-app-name-test-update",
96
96
  input=[
97
97
  existing_module_input,
98
98
  existing_entity_input,
99
99
  new_column_input,
100
100
  new_column_type_input,
101
101
  ],
102
- action=update_fastapp_test_update,
102
+ action=update_my_app_name_test_update,
103
103
  retries=0,
104
- upstream=validate_add_fastapp_column,
104
+ upstream=validate_add_my_app_name_column,
105
105
  )
106
106
 
107
- update_fastapp_test_delete_task = Task(
108
- name="update-fastapp-test-delete",
107
+ update_my_app_name_test_delete_task = Task(
108
+ name="update-my-app-name-test-delete",
109
109
  input=[
110
110
  existing_module_input,
111
111
  existing_entity_input,
112
112
  new_column_input,
113
113
  new_column_type_input,
114
114
  ],
115
- action=update_fastapp_test_delete,
115
+ action=update_my_app_name_test_delete,
116
116
  retries=0,
117
- upstream=validate_add_fastapp_column,
117
+ upstream=validate_add_my_app_name_column,
118
118
  )
119
119
 
120
120
 
121
- add_fastapp_column = app_create_group.add_task(
121
+ add_my_app_name_column = app_create_group.add_task(
122
122
  Task(
123
- name="add-fastapp-column",
123
+ name="add-my-app-name-column",
124
124
  description="📊 Create new column on an entity",
125
125
  upstream=[
126
- update_fastapp_schema_task,
127
- update_fastapp_ui_task,
128
- update_fastapp_test_create_task,
129
- update_fastapp_test_read_task,
130
- update_fastapp_test_update_task,
131
- update_fastapp_test_delete_task,
126
+ update_my_app_name_schema_task,
127
+ update_my_app_name_ui_task,
128
+ update_my_app_name_test_create_task,
129
+ update_my_app_name_test_read_task,
130
+ update_my_app_name_test_update_task,
131
+ update_my_app_name_test_delete_task,
132
132
  ],
133
133
  successor=format_my_app_name_code,
134
134
  retries=0,
@@ -6,11 +6,7 @@ from bs4 import BeautifulSoup, formatter
6
6
  from my_app_name._zrb.config import APP_DIR
7
7
 
8
8
  from zrb.context.any_context import AnyContext
9
- from zrb.util.codemod.modify_class import append_code_to_class
10
- from zrb.util.codemod.modify_class_parent import prepend_parent_class
11
9
  from zrb.util.codemod.modify_class_property import append_property_to_class
12
- from zrb.util.codemod.modify_function import append_code_to_function
13
- from zrb.util.codemod.modify_module import prepend_code_to_module
14
10
  from zrb.util.file import read_file, write_file
15
11
  from zrb.util.string.conversion import (
16
12
  to_human_case,
@@ -20,7 +16,7 @@ from zrb.util.string.conversion import (
20
16
  )
21
17
 
22
18
 
23
- def update_fastapp_schema(ctx: AnyContext):
19
+ def update_my_app_name_schema(ctx: AnyContext):
24
20
  snake_entity_name = to_snake_case(ctx.input.entity)
25
21
  pascal_entity_name = to_pascal_case(ctx.input.entity)
26
22
  snake_column_name = to_snake_case(ctx.input.column)
@@ -64,7 +60,7 @@ def _get_default_column_value(data_type: str) -> str:
64
60
  return "None"
65
61
 
66
62
 
67
- def update_fastapp_ui(ctx: AnyContext):
63
+ def update_my_app_name_ui(ctx: AnyContext):
68
64
  kebab_module_name = to_kebab_case(ctx.input.module)
69
65
  kebab_entity_name = to_kebab_case(ctx.input.entity)
70
66
  snake_column_name = to_snake_case(ctx.input.column)
@@ -237,7 +233,7 @@ def _alter_js_function_returned_array(
237
233
  return new_html
238
234
 
239
235
 
240
- def update_fastapp_test_create(ctx: AnyContext):
236
+ def update_my_app_name_test_create(ctx: AnyContext):
241
237
  snake_module_name = to_snake_case(ctx.input.module)
242
238
  snake_entity_name = to_snake_case(ctx.input.entity)
243
239
  test_file_path = os.path.join(
@@ -253,7 +249,7 @@ def update_fastapp_test_create(ctx: AnyContext):
253
249
  write_file(test_file_path, new_code)
254
250
 
255
251
 
256
- def update_fastapp_test_read(ctx: AnyContext):
252
+ def update_my_app_name_test_read(ctx: AnyContext):
257
253
  snake_module_name = to_snake_case(ctx.input.module)
258
254
  snake_entity_name = to_snake_case(ctx.input.entity)
259
255
  test_file_path = os.path.join(
@@ -269,7 +265,7 @@ def update_fastapp_test_read(ctx: AnyContext):
269
265
  write_file(test_file_path, new_code)
270
266
 
271
267
 
272
- def update_fastapp_test_update(ctx: AnyContext):
268
+ def update_my_app_name_test_update(ctx: AnyContext):
273
269
  snake_module_name = to_snake_case(ctx.input.module)
274
270
  snake_entity_name = to_snake_case(ctx.input.entity)
275
271
  test_file_path = os.path.join(
@@ -285,7 +281,7 @@ def update_fastapp_test_update(ctx: AnyContext):
285
281
  write_file(test_file_path, new_code)
286
282
 
287
283
 
288
- def update_fastapp_test_delete(ctx: AnyContext):
284
+ def update_my_app_name_test_delete(ctx: AnyContext):
289
285
  snake_module_name = to_snake_case(ctx.input.module)
290
286
  snake_entity_name = to_snake_case(ctx.input.entity)
291
287
  test_file_path = os.path.join(