tinybird 0.0.1.dev67__py3-none-any.whl → 0.0.1.dev69__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tinybird might be problematic. Click here for more details.

Files changed (36) hide show
  1. tinybird/ch_utils/engine.py +2 -4
  2. tinybird/context.py +0 -1
  3. tinybird/prompts.py +3 -3
  4. tinybird/sql_template.py +1 -3
  5. tinybird/sql_toolset.py +3 -3
  6. tinybird/tb/__cli__.py +2 -2
  7. tinybird/tb/modules/auth.py +1 -1
  8. tinybird/tb/modules/cli.py +5 -5
  9. tinybird/tb/modules/common.py +9 -9
  10. tinybird/tb/modules/create.py +42 -80
  11. tinybird/tb/modules/datafile/build_common.py +1 -1
  12. tinybird/tb/modules/datafile/build_datasource.py +1 -1
  13. tinybird/tb/modules/datafile/common.py +18 -3
  14. tinybird/tb/modules/datafile/pipe_checker.py +1 -1
  15. tinybird/tb/modules/datasource.py +2 -2
  16. tinybird/tb/modules/deployment.py +60 -11
  17. tinybird/tb/modules/endpoint.py +1 -1
  18. tinybird/tb/modules/fmt.py +1 -1
  19. tinybird/tb/modules/materialization.py +5 -5
  20. tinybird/tb/modules/mock.py +9 -38
  21. tinybird/tb/modules/pipe.py +1 -1
  22. tinybird/tb/modules/project.py +3 -3
  23. tinybird/tb/modules/shell.py +13 -21
  24. tinybird/tb/modules/test.py +39 -71
  25. tinybird/tb/modules/token.py +4 -4
  26. tinybird/tb/modules/watch.py +25 -12
  27. tinybird/tb/modules/workspace.py +6 -6
  28. tinybird/tb/modules/workspace_members.py +6 -6
  29. tinybird/tb_cli_modules/common.py +2 -2
  30. tinybird/tornado_template.py +2 -1
  31. tinybird-0.0.1.dev69.dist-info/METADATA +73 -0
  32. {tinybird-0.0.1.dev67.dist-info → tinybird-0.0.1.dev69.dist-info}/RECORD +35 -35
  33. {tinybird-0.0.1.dev67.dist-info → tinybird-0.0.1.dev69.dist-info}/WHEEL +1 -1
  34. tinybird-0.0.1.dev67.dist-info/METADATA +0 -64
  35. {tinybird-0.0.1.dev67.dist-info → tinybird-0.0.1.dev69.dist-info}/entry_points.txt +0 -0
  36. {tinybird-0.0.1.dev67.dist-info → tinybird-0.0.1.dev69.dist-info}/top_level.txt +0 -0
@@ -456,7 +456,7 @@ ENABLED_ENGINES = [
456
456
  MERGETREE_OPTIONS,
457
457
  ),
458
458
  # AggregatingMergeTree()
459
- engine_config("AggregatingMergeTree", options=REPLACINGMERGETREE_OPTIONS),
459
+ engine_config("AggregatingMergeTree", options=MERGETREE_OPTIONS),
460
460
  # CollapsingMergeTree(sign)
461
461
  engine_config(
462
462
  "CollapsingMergeTree",
@@ -631,9 +631,7 @@ def engine_full_from_dict(
631
631
 
632
632
  >>> schema = 'sign_column Int8'
633
633
  >>> engine_full_from_dict('AggregatingMergeTree', {}, schema=schema)
634
- Traceback (most recent call last):
635
- ...
636
- ValueError: Missing required option 'sorting_key'
634
+ 'AggregatingMergeTree() ORDER BY (tuple())'
637
635
 
638
636
  >>> columns=[]
639
637
  >>> columns.append({'name': 'key_column', 'type': 'Int8', 'codec': None, 'default_value': None, 'nullable': False, 'normalized_name': 'key_column'})
tinybird/context.py CHANGED
@@ -20,5 +20,4 @@ engine: ContextVar[str] = ContextVar("engine")
20
20
  wait_parameter: ContextVar[bool] = ContextVar("wait_parameter")
21
21
  api_host: ContextVar[str] = ContextVar("api_host")
22
22
  ff_split_to_array_escape: ContextVar[bool] = ContextVar("ff_split_to_array_escape")
23
- ff_preprocess_parameters_circuit_breaker: ContextVar[bool] = ContextVar("ff_preprocess_parameters_circuit_breaker")
24
23
  ff_column_json_backticks_circuit_breaker: ContextVar[bool] = ContextVar("ff_column_json_backticks_circuit_breaker")
tinybird/prompts.py CHANGED
@@ -699,8 +699,8 @@ pipe_instructions = """
699
699
  - Nodes can't have the same exact name as the Pipe they belong to.
700
700
  - Avoid more than one node per pipe unless it is really necessary or requested by the user.
701
701
  - No indentation is allowed for property names: DESCRIPTION, NODE, SQL, TYPE, etc.
702
- - Allowed TYPE values are: endpoint, copy, materialized, sink
703
- - Add always the output node in the TYPE section.
702
+ - Allowed TYPE values are: endpoint, copy, materialized.
703
+ - Add always the output node in the TYPE section or in the last node of the pipe.
704
704
  </pipe_file_instructions>
705
705
  """
706
706
 
@@ -788,7 +788,7 @@ When you need to work with resources or data in cloud, add always the --cloud fl
788
788
  ├── endpoints
789
789
  ├── fixtures
790
790
  ├── materializations
791
- ├── sinks
791
+ ├── pipes
792
792
  └── tests
793
793
  - The local development server will be available at http://localhost:80. Even if some response uses another base url, use always http://localhost:80.
794
794
  - After every change in your .datasource, .pipe or .ndjson files, run `{base_command} build` to build the project locally.
tinybird/sql_template.py CHANGED
@@ -14,7 +14,6 @@ from tornado.util import ObjectDict, exec_in, unicode_type
14
14
 
15
15
  from tinybird.context import (
16
16
  ff_column_json_backticks_circuit_breaker,
17
- ff_preprocess_parameters_circuit_breaker,
18
17
  ff_split_to_array_escape,
19
18
  )
20
19
 
@@ -2244,14 +2243,13 @@ def render_sql_template(
2244
2243
  tinybird.sql_template.SQLTemplateException: Template Syntax Error: Required parameter is not defined. Check the parameters test. Please provide a value or set a default value in the pipe code.
2245
2244
  """
2246
2245
  escape_split_to_array = ff_split_to_array_escape.get(False)
2247
- bypass_preprocess_variables = ff_preprocess_parameters_circuit_breaker.get(False)
2248
2246
 
2249
2247
  t, template_variables, variable_warnings = get_template_and_variables(
2250
2248
  sql, name, escape_arrays=escape_split_to_array
2251
2249
  )
2252
2250
  template_variables_with_types = get_var_names_and_types_cached(t)
2253
2251
 
2254
- if not bypass_preprocess_variables and variables is not None:
2252
+ if variables is not None:
2255
2253
  processed_variables = preprocess_variables(variables, template_variables_with_types)
2256
2254
  variables.update(processed_variables)
2257
2255
 
tinybird/sql_toolset.py CHANGED
@@ -3,7 +3,7 @@ import logging
3
3
  from collections import defaultdict
4
4
  from datetime import datetime
5
5
  from functools import lru_cache
6
- from typing import FrozenSet, List, Optional, Set, Tuple
6
+ from typing import FrozenSet, List, Optional, Set, Tuple, Union
7
7
 
8
8
  from chtoolset import query as chquery
9
9
  from toposort import toposort
@@ -172,7 +172,7 @@ def tables_or_sql(replacement: dict, table_functions=False) -> set:
172
172
  return {replacement}
173
173
 
174
174
 
175
- def _separate_as_tuple_if_contains_database_and_table(definition: str) -> str | Tuple[str, str]:
175
+ def _separate_as_tuple_if_contains_database_and_table(definition: str) -> Union[str, Tuple[str, str]]:
176
176
  if "." in definition:
177
177
  database_and_table_separated = definition.split(".")
178
178
  return database_and_table_separated[0], database_and_table_separated[1]
@@ -255,7 +255,7 @@ def replace_tables(
255
255
  function_allow_list=function_allow_list,
256
256
  )
257
257
  seen_tables = set()
258
- table: Tuple[str, str] | Tuple[str, str, str]
258
+ table: Union[Tuple[str, str], Tuple[str, str, str]]
259
259
  if function_allow_list is None:
260
260
  _enabled_table_functions = ENABLED_TABLE_FUNCTIONS
261
261
  else:
tinybird/tb/__cli__.py CHANGED
@@ -4,5 +4,5 @@ __description__ = 'Tinybird Command Line Tool'
4
4
  __url__ = 'https://www.tinybird.co/docs/cli/introduction.html'
5
5
  __author__ = 'Tinybird'
6
6
  __author_email__ = 'support@tinybird.co'
7
- __version__ = '0.0.1.dev67'
8
- __revision__ = '76a69c0'
7
+ __version__ = '0.0.1.dev69'
8
+ __revision__ = 'f90d907'
@@ -30,7 +30,7 @@ from tinybird.tb.modules.regions import Region
30
30
  @click.option("--token", help="Use auth token, defaults to TB_TOKEN envvar, then to the .tinyb file")
31
31
  @click.option(
32
32
  "--host",
33
- help="Set custom host if it's different than https://api.tinybird.co. Check https://www.tinybird.co/docs/api-reference/overview#regions-and-endpoints for the available list of regions",
33
+ help="Set custom host if it's different than https://api.tinybird.co. Check https://www.tinybird.co/docs/api-reference#regions-and-endpoints for the available list of regions",
34
34
  )
35
35
  @click.option(
36
36
  "--region", envvar="TB_REGION", help="Set region. Run 'tb auth ls' to show available regions. Overrides host."
@@ -147,12 +147,12 @@ async def pull(ctx: Context, force: bool, fmt: bool) -> None:
147
147
  @click.option("--no-deps", is_flag=True, default=False, help="Print only data sources with no pipes using them")
148
148
  @click.option("--match", default=None, help="Retrieve any resource matching the pattern")
149
149
  @click.option("--pipe", default=None, help="Retrieve any resource used by pipe")
150
- @click.option("--datasource", default=None, help="Retrieve resources depending on this Data Source")
150
+ @click.option("--datasource", default=None, help="Retrieve resources depending on this data source")
151
151
  @click.option(
152
152
  "--check-for-partial-replace",
153
153
  is_flag=True,
154
154
  default=False,
155
- help="Retrieve dependant Data Sources that will have their data replaced if a partial replace is executed in the Data Source selected",
155
+ help="Retrieve dependant data sources that will have their data replaced if a partial replace is executed in the data source selected",
156
156
  )
157
157
  @click.option("--recursive", is_flag=True, default=False, help="Calculate recursive dependencies")
158
158
  @click.pass_context
@@ -186,7 +186,7 @@ async def dependencies(
186
186
 
187
187
  @cli.command(
188
188
  name="diff",
189
- short_help="Diffs local datafiles to the corresponding remote files in the workspace. For the case of .datasource files it just diffs VERSION and SCHEMA, since ENGINE, KAFKA or other metadata is considered immutable.",
189
+ short_help="Diff local datafiles to the corresponding remote files in the workspace. For the case of .datasource files it just diffs VERSION and SCHEMA, since ENGINE, KAFKA or other metadata is considered immutable.",
190
190
  )
191
191
  @click.argument("filename", type=click.Path(exists=True), nargs=-1, required=False)
192
192
  @click.option(
@@ -203,7 +203,7 @@ async def dependencies(
203
203
  "--main",
204
204
  is_flag=True,
205
205
  default=False,
206
- help="Diffs local datafiles to the corresponding remote files in the main workspace. Only works when authenticated on a Branch.",
206
+ help="Diff local datafiles to the corresponding remote files in the main workspace. Only works when authenticated on a Branch.",
207
207
  hidden=True,
208
208
  )
209
209
  @click.pass_context
@@ -255,7 +255,7 @@ async def diff(
255
255
  @cli.command()
256
256
  @click.argument("query", required=False)
257
257
  @click.option("--rows_limit", default=100, help="Max number of rows retrieved")
258
- @click.option("--pipeline", default=None, help="The name of the Pipe to run the SQL Query")
258
+ @click.option("--pipeline", default=None, help="The name of the pipe to run the SQL Query")
259
259
  @click.option("--pipe", default=None, help="The path to the .pipe file to run the SQL Query of a specific NODE")
260
260
  @click.option("--node", default=None, help="The NODE name")
261
261
  @click.option(
@@ -571,7 +571,7 @@ def region_from_host(region_name_or_host, regions):
571
571
 
572
572
  def ask_for_user_token(action: str, ui_host: str) -> str:
573
573
  return click.prompt(
574
- f'\nUse the token called "user token" in order to {action}. Copy it from {ui_host}/tokens and paste it here',
574
+ f'\nUse the token called "user token" to {action}. Copy it from {ui_host}/tokens and paste it here',
575
575
  hide_input=True,
576
576
  show_default=False,
577
577
  default=None,
@@ -591,13 +591,13 @@ async def check_user_token(ctx: Context, token: str):
591
591
  if not is_authenticated.get("is_valid", False):
592
592
  raise CLIWorkspaceException(
593
593
  FeedbackManager.error_exception(
594
- error='Invalid token. Please, be sure you are using the "user token" instead of the "admin your@email" token.'
594
+ error='Invalid token. Make sure you are using the "user token" instead of the "admin your@email" token.'
595
595
  )
596
596
  )
597
597
  if is_authenticated.get("is_valid") and not is_authenticated.get("is_user", False):
598
598
  raise CLIWorkspaceException(
599
599
  FeedbackManager.error_exception(
600
- error='Invalid user authentication. Please, be sure you are using the "user token" instead of the "admin your@email" token.'
600
+ error='Invalid user authentication. Make sure you are using the "user token" instead of the "admin your@email" token.'
601
601
  )
602
602
  )
603
603
 
@@ -614,13 +614,13 @@ async def check_user_token_with_client(client: TinyB, token: str):
614
614
  if not is_authenticated.get("is_valid", False):
615
615
  raise CLIWorkspaceException(
616
616
  FeedbackManager.error_exception(
617
- error='Invalid token. Please, be sure you are using the "user token" instead of the "admin your@email" token.'
617
+ error='Invalid token. Make sure you are using the "user token" instead of the "admin your@email" token.'
618
618
  )
619
619
  )
620
620
  if is_authenticated.get("is_valid") and not is_authenticated.get("is_user", False):
621
621
  raise CLIWorkspaceException(
622
622
  FeedbackManager.error_exception(
623
- error='Invalid user authentication. Please, be sure you are using the "user token" instead of the "admin your@email" token.'
623
+ error='Invalid user authentication. Make sure you are using the "user token" instead of the "admin your@email" token.'
624
624
  )
625
625
  )
626
626
 
@@ -1153,17 +1153,17 @@ def validate_kafka_bootstrap_servers(host_and_port):
1153
1153
 
1154
1154
  def validate_kafka_key(s):
1155
1155
  if not isinstance(s, str):
1156
- raise CLIException("Key format is not correct, it should be a string")
1156
+ raise CLIException("Key format is not correct, it must be a string")
1157
1157
 
1158
1158
 
1159
1159
  def validate_kafka_secret(s):
1160
1160
  if not isinstance(s, str):
1161
- raise CLIException("Password format is not correct, it should be a string")
1161
+ raise CLIException("Password format is not correct, it must be a string")
1162
1162
 
1163
1163
 
1164
1164
  def validate_string_connector_param(param, s):
1165
1165
  if not isinstance(s, str):
1166
- raise CLIConnectionException(param + " format is not correct, it should be a string")
1166
+ raise CLIConnectionException(param + " format is not correct, it must be a string")
1167
1167
 
1168
1168
 
1169
1169
  async def validate_connection_name(client, connection_name, service):
@@ -1434,7 +1434,7 @@ async def try_update_config_with_remote(
1434
1434
  def ask_for_admin_token_interactively(ui_host: str, default_token: Optional[str]) -> str:
1435
1435
  return (
1436
1436
  click.prompt(
1437
- f'\nCopy the "admin your@email" token from {ui_host}/tokens and paste it here {"OR press enter to use the token from .tinyb file" if default_token else ""}',
1437
+ f'\nCopy the "admin your@email" token from {ui_host}/tokens and paste it here {"OR press enter to use the token from the .tinyb file" if default_token else ""}',
1438
1438
  hide_input=True,
1439
1439
  show_default=False,
1440
1440
  default=default_token,
@@ -35,17 +35,9 @@ from tinybird.tb.modules.project import Project
35
35
  )
36
36
  @click.option("--rows", type=int, default=10, help="Number of events to send")
37
37
  @click.option("--source", type=str, default="tb", help="Source of the command")
38
- @click.option("--skip", is_flag=True, default=False, help="Skip following up on the generated resources")
39
38
  @click.pass_context
40
39
  @coro
41
- async def create(
42
- ctx: click.Context,
43
- data: Optional[str],
44
- prompt: Optional[str],
45
- rows: int,
46
- source: str,
47
- skip: bool,
48
- ) -> None:
40
+ async def create(ctx: click.Context, data: Optional[str], prompt: Optional[str], rows: int, source: str) -> None:
49
41
  """Initialize a new project."""
50
42
  project: Project = ctx.ensure_object(dict)["project"]
51
43
  local_client: TinyB = ctx.ensure_object(dict)["client"]
@@ -80,7 +72,7 @@ async def create(
80
72
  result = ""
81
73
  if data or prompt:
82
74
  click.echo(FeedbackManager.highlight(message="\n» Creating resources..."))
83
- result = await create_resources(local_client, tb_client, user_token, data, prompt, folder, skip)
75
+ result = await create_resources(local_client, tb_client, user_token, data, prompt, folder)
84
76
  click.echo(FeedbackManager.success(message="✓ Done!\n"))
85
77
 
86
78
  if not already_has_cicd(folder):
@@ -125,7 +117,7 @@ async def create(
125
117
  click.echo(FeedbackManager.error(message=f"Error: {str(e)}"))
126
118
 
127
119
 
128
- PROJECT_PATHS = ("datasources", "endpoints", "materializations", "copies", "sinks", "fixtures", "tests")
120
+ PROJECT_PATHS = ("datasources", "endpoints", "materializations", "copies", "pipes", "fixtures", "tests")
129
121
 
130
122
 
131
123
  def validate_project_structure(folder: str) -> bool:
@@ -166,7 +158,6 @@ async def create_resources(
166
158
  data: Optional[str],
167
159
  prompt: Optional[str],
168
160
  folder: str,
169
- skip: bool,
170
161
  ):
171
162
  result = ""
172
163
  folder_path = Path(folder)
@@ -215,73 +206,39 @@ TYPE ENDPOINT
215
206
  ]
216
207
  )
217
208
  llm = LLM(user_token=user_token, host=tb_client.host)
218
- result = ""
219
- iterations = 0
220
- history = ""
221
- generated_paths: list[Path] = []
222
-
223
- while iterations < 10:
224
- feedback = ""
225
- if iterations > 0:
226
- feedback = click.prompt("\nFollow-up instructions or continue", default="continue")
227
- if iterations > 0 and (not feedback or feedback in ("continue", "ok", "exit", "quit", "q")):
228
- break
229
- else:
230
- if iterations > 0:
231
- click.echo(FeedbackManager.highlight(message="\n» Creating resources..."))
232
- for path in generated_paths:
233
- path.unlink()
234
- generated_paths = []
235
-
236
- save_context(prompt, feedback)
237
- result = llm.ask(system_prompt=create_prompt(resources_xml, feedback, history), prompt=prompt)
238
- result = extract_xml(result, "response")
239
- history = (
240
- history
241
- + f"""
242
- <result_iteration_{iterations}>
243
- {result}
244
- </result_iteration_{iterations}>
245
- """
246
- )
247
- resources = parse_xml(result, "resource")
248
- datasources = []
249
- pipes = []
250
- for resource_xml in resources:
251
- resource_type = extract_xml(resource_xml, "type")
252
- name = extract_xml(resource_xml, "name")
253
- content = extract_xml(resource_xml, "content")
254
- resource = {
255
- "name": name,
256
- "content": content,
257
- }
258
- if resource_type.lower() == "datasource":
259
- datasources.append(resource)
260
- elif resource_type.lower() == "pipe":
261
- pipes.append(resource)
262
-
263
- for ds in datasources:
264
- content = ds["content"].replace("```", "")
265
- filename = f"{ds['name']}.datasource"
266
- datasource_path = generate_datafile(
267
- content,
268
- filename=filename,
269
- data=None,
270
- _format="ndjson",
271
- force=True,
272
- folder=folder,
273
- )
274
- generated_paths.append(datasource_path)
275
- for pipe in pipes:
276
- content = pipe["content"].replace("```", "")
277
- pipe_path = generate_pipe_file(pipe["name"], content, folder)
278
- generated_paths.append(pipe_path)
279
- if skip:
280
- break
281
- iterations += 1
282
-
283
- if iterations == 10:
284
- click.echo(FeedbackManager.info(message="Too many iterations. Change the prompt and try again."))
209
+ result = llm.ask(system_prompt=create_prompt(resources_xml), prompt=prompt)
210
+ result = extract_xml(result, "response")
211
+ resources = parse_xml(result, "resource")
212
+ datasources = []
213
+ pipes = []
214
+ for resource_xml in resources:
215
+ resource_type = extract_xml(resource_xml, "type")
216
+ name = extract_xml(resource_xml, "name")
217
+ content = extract_xml(resource_xml, "content")
218
+ resource = {
219
+ "name": name,
220
+ "content": content,
221
+ }
222
+ if resource_type.lower() == "datasource":
223
+ datasources.append(resource)
224
+ elif resource_type.lower() == "pipe":
225
+ pipes.append(resource)
226
+
227
+ for ds in datasources:
228
+ content = ds["content"].replace("```", "")
229
+ filename = f"{ds['name']}.datasource"
230
+ generate_datafile(
231
+ content,
232
+ filename=filename,
233
+ data=None,
234
+ _format="ndjson",
235
+ force=True,
236
+ folder=folder,
237
+ )
238
+
239
+ for pipe in pipes:
240
+ content = pipe["content"].replace("```", "")
241
+ generate_pipe_file(pipe["name"], content, folder)
285
242
 
286
243
  return result
287
244
 
@@ -313,14 +270,19 @@ def generate_pipe_file(name: str, content: str, folder: str) -> Path:
313
270
  def is_sink(content: str) -> bool:
314
271
  return re.search(r"TYPE sink", content, re.IGNORECASE) is not None
315
272
 
273
+ def is_endpoint(content: str) -> bool:
274
+ return re.search(r"TYPE endpoint", content, re.IGNORECASE) is not None
275
+
316
276
  if is_copy(content):
317
277
  pathname = "copies"
318
278
  elif is_materialization(content):
319
279
  pathname = "materializations"
320
280
  elif is_sink(content):
321
281
  pathname = "sinks"
322
- else:
282
+ elif is_endpoint(content):
323
283
  pathname = "endpoints"
284
+ else:
285
+ pathname = "pipes"
324
286
 
325
287
  base = Path(folder) / pathname
326
288
  if not base.exists():
@@ -94,7 +94,7 @@ async def update_tags_in_resource(rs: Dict[str, Any], resource_type: str, client
94
94
  resource_name = persisted_ds.get("name", "")
95
95
  except DoesNotExistException:
96
96
  click.echo(
97
- FeedbackManager.error_tag_generic("Could not get the latest Data Source info for updating its tags.")
97
+ FeedbackManager.error_tag_generic("Could not get the latest data source info for updating its tags.")
98
98
  )
99
99
  elif resource_type == "pipe":
100
100
  pipe_name = rs["name"]
@@ -60,7 +60,7 @@ async def new_ds(
60
60
 
61
61
  if engine_param.lower() == "join":
62
62
  deprecation_notice = FeedbackManager.warning_deprecated(
63
- warning="Data Sources with Join engine are deprecated and will be removed in the next major release of tinybird-cli. Use MergeTree instead."
63
+ warning="Data sources with Join engine are deprecated and will be removed in the next major release of tinybird-cli. Use MergeTree instead."
64
64
  )
65
65
  click.echo(deprecation_notice)
66
66
 
@@ -20,6 +20,7 @@ from string import Template
20
20
  from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, cast
21
21
 
22
22
  import click
23
+ from croniter import croniter
23
24
  from mypy_extensions import KwArg, VarArg
24
25
 
25
26
  from tinybird.ch_utils.engine import ENABLED_ENGINES
@@ -200,6 +201,20 @@ class Datafile:
200
201
  def set_kind(self, kind: DatafileKind):
201
202
  self.kind = kind
202
203
 
204
+ def validate_copy_node(self, node: Dict[str, Any]):
205
+ if "target_datasource" not in node:
206
+ raise DatafileValidationError("COPY node missing target datasource")
207
+ # copy mode must be append or replace
208
+ if node.get("mode") and node["mode"] not in ["append", "replace"]:
209
+ raise DatafileValidationError("COPY node mode must be append or replace")
210
+ # copy schedule must be @on-demand or a cron-expression
211
+ if (
212
+ node.get("copy_schedule")
213
+ and node["copy_schedule"] != ON_DEMAND
214
+ and not croniter.is_valid(node["copy_schedule"])
215
+ ):
216
+ raise DatafileValidationError("COPY node schedule must be @on-demand or a valid cron expression.")
217
+
203
218
  def validate(self):
204
219
  if self.kind == DatafileKind.pipe:
205
220
  # TODO(eclbg):
@@ -208,7 +223,7 @@ class Datafile:
208
223
  # [x] Materialized nodes have target datasource
209
224
  # [x] Only one materialized node
210
225
  # [x] Only one node of any specific type
211
- # [ ] ...
226
+ # (rbarbadillo): there's a HUGE amount of validations in api_pipes.py, we should somehow merge them
212
227
  for node in self.nodes:
213
228
  if "sql" not in node:
214
229
  raise DatafileValidationError(f"SQL missing for node {repr(node['name'])}")
@@ -220,6 +235,8 @@ class Datafile:
220
235
  raise DatafileValidationError("Multiple non-standard nodes in pipe. There can only be one")
221
236
  if node.get("type", "").lower() == PipeNodeTypes.MATERIALIZED and "datasource" not in node:
222
237
  raise DatafileValidationError(f"Materialized node {repr(node['name'])} missing target datasource")
238
+ if node.get("type", "").lower() == PipeNodeTypes.COPY:
239
+ self.validate_copy_node(node)
223
240
  elif self.kind == DatafileKind.datasource:
224
241
  # TODO(eclbg):
225
242
  # [x] Just one node
@@ -232,8 +249,6 @@ class Datafile:
232
249
  node = self.nodes[0]
233
250
  if "schema" not in node:
234
251
  raise DatafileValidationError("SCHEMA is mandatory")
235
- if "engine" not in node:
236
- raise DatafileValidationError("ENGINE is mandatory")
237
252
  else:
238
253
  # We cannot validate a datafile whose kind is unknown
239
254
  pass
@@ -412,7 +412,7 @@ class PipeCheckerRunner:
412
412
  )
413
413
 
414
414
  result = PipeCheckerTextTestResult(
415
- self.checker_stream_result_class(sys.stdout), # type: ignore
415
+ self.checker_stream_result_class(sys.stdout),
416
416
  descriptions=True,
417
417
  verbosity=2,
418
418
  custom_output=custom_output,
@@ -34,7 +34,7 @@ def datasource(ctx):
34
34
 
35
35
 
36
36
  @datasource.command(name="ls")
37
- @click.option("--match", default=None, help="Retrieve any resources matching the pattern. eg --match _test")
37
+ @click.option("--match", default=None, help="Retrieve any resources matching the pattern. For example, --match _test")
38
38
  @click.option(
39
39
  "--format",
40
40
  "format_",
@@ -116,7 +116,7 @@ async def datasource_append(
116
116
  concurrency: int,
117
117
  ):
118
118
  """
119
- Appends data to an existing Data Source from URL, local file or a connector
119
+ Appends data to an existing data source from URL, local file or a connector
120
120
 
121
121
  - Load from URL `tb datasource append [datasource_name] https://url_to_csv`
122
122