syntaxmatrix 2.5.6__py3-none-any.whl → 2.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. syntaxmatrix/agentic/agents.py +1220 -169
  2. syntaxmatrix/agentic/agents_orchestrer.py +326 -0
  3. syntaxmatrix/agentic/code_tools_registry.py +27 -32
  4. syntaxmatrix/commentary.py +16 -16
  5. syntaxmatrix/core.py +185 -81
  6. syntaxmatrix/db.py +460 -4
  7. syntaxmatrix/{display.py → display_html.py} +2 -6
  8. syntaxmatrix/gpt_models_latest.py +1 -1
  9. syntaxmatrix/media/__init__.py +0 -0
  10. syntaxmatrix/media/media_pixabay.py +277 -0
  11. syntaxmatrix/models.py +1 -1
  12. syntaxmatrix/page_builder_defaults.py +183 -0
  13. syntaxmatrix/page_builder_generation.py +1122 -0
  14. syntaxmatrix/page_layout_contract.py +644 -0
  15. syntaxmatrix/page_patch_publish.py +1471 -0
  16. syntaxmatrix/preface.py +142 -21
  17. syntaxmatrix/profiles.py +28 -10
  18. syntaxmatrix/routes.py +1740 -453
  19. syntaxmatrix/selftest_page_templates.py +360 -0
  20. syntaxmatrix/settings/client_items.py +28 -0
  21. syntaxmatrix/settings/model_map.py +1022 -207
  22. syntaxmatrix/settings/prompts.py +328 -130
  23. syntaxmatrix/static/assets/hero-default.svg +22 -0
  24. syntaxmatrix/static/icons/bot-icon.png +0 -0
  25. syntaxmatrix/static/icons/favicon.png +0 -0
  26. syntaxmatrix/static/icons/logo.png +0 -0
  27. syntaxmatrix/static/icons/logo3.png +0 -0
  28. syntaxmatrix/templates/admin_branding.html +104 -0
  29. syntaxmatrix/templates/admin_features.html +63 -0
  30. syntaxmatrix/templates/admin_secretes.html +108 -0
  31. syntaxmatrix/templates/dashboard.html +296 -133
  32. syntaxmatrix/templates/dataset_resize.html +535 -0
  33. syntaxmatrix/templates/edit_page.html +2535 -0
  34. syntaxmatrix/utils.py +2431 -2383
  35. {syntaxmatrix-2.5.6.dist-info → syntaxmatrix-2.6.2.dist-info}/METADATA +6 -2
  36. {syntaxmatrix-2.5.6.dist-info → syntaxmatrix-2.6.2.dist-info}/RECORD +39 -24
  37. syntaxmatrix/generate_page.py +0 -644
  38. syntaxmatrix/static/icons/hero_bg.jpg +0 -0
  39. {syntaxmatrix-2.5.6.dist-info → syntaxmatrix-2.6.2.dist-info}/WHEEL +0 -0
  40. {syntaxmatrix-2.5.6.dist-info → syntaxmatrix-2.6.2.dist-info}/licenses/LICENSE.txt +0 -0
  41. {syntaxmatrix-2.5.6.dist-info → syntaxmatrix-2.6.2.dist-info}/top_level.txt +0 -0
@@ -2,8 +2,9 @@
2
2
  from __future__ import annotations
3
3
  import os, re, json, textwrap, requests
4
4
  import pandas as pd
5
-
6
- from typing import Optional, List
5
+ import uuid
6
+ import io
7
+ from typing import Optional, List, Any, Dict
7
8
 
8
9
  from syntaxmatrix import utils
9
10
  from syntaxmatrix.settings.model_map import GPT_MODELS_LATEST
@@ -13,6 +14,14 @@ from google.genai import types
13
14
  import tiktoken
14
15
  from google.genai.errors import APIError
15
16
 
17
+ from io import BytesIO
18
+ from PIL import Image
19
+
20
+ from dataclasses import dataclass
21
+ import hashlib
22
+ from PIL import Image
23
+ from syntaxmatrix.page_layout_contract import normalise_layout, validate_layout
24
+
16
25
 
17
26
  def token_calculator(total_input_content, llm_profile):
18
27
 
@@ -42,7 +51,7 @@ def token_calculator(total_input_content, llm_profile):
42
51
  input_prompt_tokens = len(enc.encode(total_input_content))
43
52
  return input_prompt_tokens
44
53
 
45
- def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1, max_tokens=4096):
54
+ def mlearning_agent(user_prompt, system_prompt, coder_profile):
46
55
  """
47
56
  Returns:
48
57
  (text, usage_dict)
@@ -58,11 +67,12 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1,
58
67
  }
59
68
  """
60
69
 
61
- # coding_profile['client'] = _prof.get_client(coding_profile)
62
- _client = coding_profile["client"]
63
- _provider = coding_profile["provider"].lower()
64
- _model = coding_profile["model"]
65
-
70
+ _coder_profile = _prof.get_profile('coder')
71
+ _coder_profile['client'] = _prof.get_client(_coder_profile)
72
+ _client = _coder_profile['client']
73
+ _provider = _coder_profile["provider"].lower()
74
+ _model = _coder_profile["model"]
75
+
66
76
  usage = {
67
77
  "provider": _provider,
68
78
  "model": _model,
@@ -95,72 +105,41 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1,
95
105
  # Google
96
106
  def google_generate_code():
97
107
  nonlocal usage
98
- """
99
- Generates content using the Gemini API and calculates token usage
100
- including Context Overhead for consistency.
101
- """
102
-
103
- try:
104
- # 1. Client Initialization
105
- config = types.GenerateContentConfig(
106
- system_instruction=system_prompt,
107
- temperature=temperature,
108
- max_output_tokens=max_tokens,
109
- )
108
+ config = types.GenerateContentConfig(
109
+ system_instruction=system_prompt,
110
+ # Optional: Force the model to generate a Python code block as JSON
111
+ response_mime_type="application/json",
112
+ response_schema=types.Schema(
113
+ type=types.Type.OBJECT,
114
+ properties={
115
+ "code": types.Schema(type=types.Type.STRING, description="The runnable Python code."),
116
+ "explanation": types.Schema(type=types.Type.STRING, description="A brief explanation of the code."),
117
+ },
118
+ required=["code"]
119
+ ),
120
+ )
110
121
 
111
- # 2. API Call
112
- resp = _client.models.generate_content(
122
+ try:
123
+ response = _client.models.generate_content(
113
124
  model=_model,
114
- contents=[user_prompt],
125
+ contents=user_prompt,
115
126
  config=config,
116
127
  )
128
+ except Exception as e:
129
+ return f"An error occurred during API call: {e}"
117
130
 
118
- # 3. Token Usage Capture and Context Overhead Calculation
119
- um = resp.usage_metadata
120
- usage["input_tokens"] = um.prompt_token_count
121
- usage["output_tokens"] = um.thoughts_token_count
122
- usage["total_tokens"] = um.total_token_count
123
-
124
- # 4. Response Extraction (same robust logic as before)
125
- text = getattr(resp, "text", None)
126
- if isinstance(text, str) and text.strip():
127
- return text.strip()
128
-
129
- chunks = []
130
- candidates = getattr(resp, "candidates", None) or []
131
- for cand in candidates:
132
- content = getattr(cand, "content", None)
133
- if content:
134
- parts = getattr(content, "parts", None) or []
135
- for part in parts:
136
- t = getattr(part, "text", None)
137
- if t:
138
- chunks.append(str(t))
139
-
140
- text = "\n".join(chunks).strip()
141
- if text:
142
- return text
143
-
144
- # 5. Handle blocked response
145
- fb = getattr(resp, "prompt_feedback", None)
146
- block_reason = getattr(fb, "block_reason", None) if fb else None
147
- if block_reason and block_reason != types.BlockedReason.REASON_UNSPECIFIED:
148
- raise RuntimeError(f"{_model} blocked the response. Reason: {block_reason.name}")
149
- raise RuntimeError(f"{_model} failed to return content due to insufficient data.")
131
+ # 3. Token Usage Capture and Context Overhead Calculation
132
+ um = response.usage_metadata
133
+ usage["input_tokens"] = um.prompt_token_count
134
+ usage["output_tokens"] = um.candidates_token_count + um.thoughts_token_count
135
+ usage["total_tokens"] = um.total_token_count
150
136
 
151
- except APIError as e:
152
- error_msg = f"Gemini API Error: {e}"
153
-
137
+ try:
138
+ # The response text will be a JSON string due to the config.
139
+ response_json = json.loads(response.text)
140
+ return response_json.get("code", "Error: Code field not found in response.")
154
141
  except Exception as e:
155
- error_msg = f"An unexpected error occurred during API call or processing: {e}"
156
-
157
- # --- Return the error message wrapped in the required output code structure ---
158
- msg = f"I smxAI have instructed {error_msg}\n"
159
- return (
160
- f"# {msg}\n"
161
- "from syntaxmatrix.display import show\n"
162
- f"show({msg!r})\n"
163
- )
142
+ return f"Error parsing response as JSON: {e}\nRaw Response: {response.text}"
164
143
 
165
144
  # OpenAI Responses API
166
145
  def gpt_models_latest_generate_code():
@@ -170,7 +149,7 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1,
170
149
  reasoning_effort, verbosity = "medium", "medium"
171
150
  if _model == "gpt-5-nano":
172
151
  reasoning_effort, verbosity = "low", "low"
173
- elif _model in ["gpt-5-mini", "gpt-5-codex-mini"]:
152
+ elif _model in ["gpt-5-mini", "gpt-5-mini-codex"]:
174
153
  reasoning_effort, verbosity = "medium", "medium"
175
154
  elif _model in ["gpt-5", "gpt-5-codex", "gpt-5-pro"]:
176
155
  reasoning_effort, verbosity = "high", "high"
@@ -194,19 +173,7 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1,
194
173
 
195
174
  code = _out(resp).strip()
196
175
  if code:
197
- return code
198
-
199
- # Try to surface any block reason (safety / policy / etc.)
200
- block_reason = None
201
- output = resp.get("output")
202
- for item in output:
203
- fr = getattr(item, "finish_reason", None)
204
- if fr and fr != "stop":
205
- block_reason = fr
206
- break
207
- if block_reason:
208
- raise RuntimeError(f"{_model} stopped with reason: {block_reason}")
209
- raise RuntimeError(f"{_model} returned an empty response in this section due to insufficient data.")
176
+ return code
210
177
 
211
178
  except APIError as e:
212
179
  # IMPORTANT: return VALID PYTHON so the dashboard can show the error
@@ -225,15 +192,14 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1,
225
192
  "from syntaxmatrix.display import show\n"
226
193
  f"show({msg!r})\n"
227
194
  )
228
-
195
+
229
196
  # Anthropic
230
197
  def anthropic_generate_code():
231
- nonlocal usage
198
+
232
199
  try:
233
200
  resp = _client.messages.create(
234
201
  model=_model,
235
- max_tokens=max_tokens,
236
- temperature=temperature,
202
+ temperature=0,
237
203
  system=system_prompt,
238
204
  messages=[
239
205
  {"role": "user", "content": user_prompt}
@@ -276,40 +242,43 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1,
276
242
  def openai_sdk_generate_code():
277
243
  nonlocal usage
278
244
  try:
279
- resp = _client.chat.completions.create(
245
+ response = None
246
+ if _model == "deepseek-reasoner":
247
+ response = _client.chat.completions.create(
248
+ model=_model,
249
+ messages=[
250
+ {"role": "system", "content": system_prompt},
251
+ {"role": "user", "content": user_prompt},
252
+ ],
253
+ extra_body={"thinking": {"type": "enabled"}},
254
+ temperature=0,
255
+ stream=False
256
+ )
257
+ else:
258
+ response = _client.chat.completions.create(
280
259
  model=_model,
281
260
  messages=[
282
261
  {"role": "system", "content": system_prompt},
283
262
  {"role": "user", "content": user_prompt},
284
263
  ],
285
- temperature=temperature,
286
- max_tokens=max_tokens,
264
+ temperature=0,
265
+ stream=False
287
266
  )
288
-
289
-
290
-
291
- um = resp.usage
267
+ content = response.choices[0].message.content
268
+
269
+ um = response.usage
292
270
  usage["input_tokens"] = um.prompt_tokens
293
271
  usage["output_tokens"] = um.completion_tokens
294
272
  usage["total_tokens"] = um.total_tokens
295
273
 
296
- text = resp.choices[0].message.content
297
- if text:
298
- return text
299
-
300
- # Try to surface any block reason (safety / policy / etc.)
301
- block_reason = None
302
- choices = getattr(resp, "choices", None) or []
303
- if choices:
304
- first = choices[0]
305
- fr = getattr(first, "finish_reason", None)
306
- if fr and fr != "stop":
307
- block_reason = fr
308
-
309
- if block_reason:
310
- raise RuntimeError(f"{_model} stopped with reason: {block_reason}")
311
- # Fallback: nothing useful came back
312
- raise RuntimeError(f"{_model} returned nothing in this section due to insufficient data.")
274
+ code_match = re.search(r"```(?:python)?\n(.*?)```", content, re.DOTALL)
275
+
276
+ if code_match:
277
+ return code_match.group(1).strip()
278
+ else:
279
+ # If no markdown blocks are found, return the raw content
280
+ # (assuming the model obeyed instructions to output only code)
281
+ return content.strip()
313
282
 
314
283
  except Exception as e:
315
284
  # IMPORTANT: return VALID PYTHON so the dashboard can show the error
@@ -318,9 +287,7 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1,
318
287
  f"# {msg}\n"
319
288
  "from syntaxmatrix.display import show\n"
320
289
  f"show({msg!r})\n"
321
- )
322
-
323
- # print("TTOOKKEENN: ", token_calculator(system_prompt + user_prompt, coding_profile))
290
+ )
324
291
 
325
292
  if _provider == "google":
326
293
  code = google_generate_code()
@@ -331,18 +298,20 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1,
331
298
  else:
332
299
  code = openai_sdk_generate_code()
333
300
 
301
+ code = str(code or "")
334
302
  return code, usage
335
303
 
336
304
 
337
- def refine_question_agent(raw_question: str, dataset_context: str | None = None) -> str:
338
-
339
- def response_agent(user_prompt, system_prompt, llm_profile, temp=0.0, max_tokens=128):
340
- _profile = llm_profile
341
-
342
- _client = _profile["client"]
343
- _provider = _profile["provider"].lower()
344
- _model = _profile["model"]
345
-
305
+ def context_compatibility(question: str, dataset_context: str | None = None) -> str:
306
+
307
+ _profile = _prof.get_profile('classifier') or _prof.get_profile('admin')
308
+ _profile['client'] = _prof.get_client(_profile)
309
+ _client = _profile['client']
310
+ _provider = _profile.get("provider").lower()
311
+ _model = _profile.get("model")
312
+
313
+ def compatibility_response(user_prompt, system_prompt, temp=0.0, max_tokens=128):
314
+
346
315
  # Google GenAI
347
316
  if _provider == "google":
348
317
  resp = _client.models.generate_content(
@@ -425,48 +394,33 @@ def refine_question_agent(raw_question: str, dataset_context: str | None = None)
425
394
 
426
395
  return "Configure LLM Profiles or contact your administrator."
427
396
 
428
- system_prompt = (
429
- "You rewrite user questions into specification Machine Learning (ML) job description. "
430
- "If a dataset summary is provided, use it to respect column and help you redefine the question. "
431
- "DO NOT write andy prelude or preamble"
432
- )
397
+ system_prompt = ("""
398
+ - You are a Machine Learning (ML) and Data Science (DS) expert who detects incompatibilities between user questions and dataset summaries.
399
+ - Your goal is to analyze the question and the provided dataset summary to determine if they are compatible.
400
+ - If and only if the dataset summary columns are not relevant to your desired columns that you have deduced, by analysing the question, and you suspect that the wrong dataset was used in the dataset summary, you MUST STOP just say: 'incompatible'.
401
+ - If they are compatible, just 'compatible'.
402
+ - DO NOT include any prelude or preamble. Just the response: 'incompatible' or 'compatible'.
403
+ """)
433
404
 
434
- user_prompt = f"User question:\n{raw_question}\n\n"
405
+ user_prompt = f"User question:\n{question}\n\n"
435
406
  if dataset_context:
436
407
  user_prompt += f"Dataset summary:\n{dataset_context}\n"
437
408
 
438
- _refiner_profile = _prof.get_profile("classification") or _prof.get_profile("admin")
439
- if not _refiner_profile:
440
- return "ERROR"
441
-
442
- _refiner_profile['client'] = _prof.get_client(_refiner_profile)
443
-
444
- refined_question = response_agent(user_prompt, system_prompt, _refiner_profile, temp=0.0, max_tokens=128)
445
- return refined_question
409
+ compatibility = compatibility_response(user_prompt, system_prompt, temp=0.0, max_tokens=120)
410
+ return compatibility
446
411
 
447
412
 
448
413
  def classify_ml_job_agent(refined_question, dataset_profile):
449
- """
450
- Instructs an LLM (gemini-2.5-flash) to analyze a task description
451
- and return a list of associated machine learning job/task types.
452
- This version uses a highly extensive, generalized list of ML jobs
453
- to ensure robustness across all domains (NLP, CV, RL, etc.).
454
-
455
- Args:
456
- task_description: The detailed description of the statistical/ML task.
457
-
458
- Returns:
459
- A list of strings identifying the relevant ML jobs. Returns an empty
460
- list if the API call fails or the output cannot be parsed.
461
- """
414
+ import ast
415
+
416
+ _profile = _prof.get_profile('classifier') or _prof.get_profile('admin')
417
+ _profile['client'] = _prof.get_client(_profile)
418
+ _client = _profile['client']
419
+ _provider = _profile["provider"].lower()
420
+ _model = _profile["model"]
462
421
 
463
- def ml_response(user_prompt, system_prompt, profile):
464
- _profile = profile # _prof.get_profile["admin"]
422
+ def ml_response(user_prompt, system_prompt):
465
423
 
466
- _client = _profile["client"]
467
- _provider = _profile["provider"].lower()
468
- _model = _profile["model"]
469
-
470
424
  prompt = user_prompt + "\n\n" + system_prompt
471
425
 
472
426
  # Google GenAI
@@ -571,10 +525,19 @@ def classify_ml_job_agent(refined_question, dataset_profile):
571
525
 
572
526
  return "Configure LLM Profiles or contact your administrator."
573
527
 
574
- system_prompt = (
575
- "You are a strict machine learning task classifier for an ML workbench.\n"
576
- "Your job is to label the user's task desc. with all relevant tags from a fixed list.\n\n"
577
- )
528
+ system_prompt = ("""
529
+ "You are an expert ML task extractor. Your job is to analyze the user's task description and extract every implied or explicit machine learning (ML) task that would be necessary to accomplish the user's goals. Use the provided list of ML tasks as your reference for classification.
530
+ Core Instruction:
531
+ Extract every implied or explicit ML task. Format the response solely as a simple, flat list of tasks. Use concise, imperative verbs. Do not include explanations, examples, preludes, conclusions, or any non-task text.
532
+
533
+ Extraction Rules:
534
+ Ignore all context-setting, descriptions, goals, and commentary.
535
+ Convert every actionable step, data operation, and visualization generation into a discrete task.
536
+ Generalize any dataset-specific terms (e.g., column names) to their functional purpose.
537
+ Treat "CoT" or reasoning steps as a source for data preparation tasks.
538
+ Do not include steps like "No Scaling required" or "No Modeling required" as tasks.
539
+ Output only the list. No titles, headers, numbering or bullet points.
540
+ """)
578
541
 
579
542
  # --- 1. Define the Master List of ML Tasks (Generalized) ---
580
543
  ml_task_list = [
@@ -592,15 +555,14 @@ def classify_ml_job_agent(refined_question, dataset_profile):
592
555
  "generative_modeling", "causal_inference", "risk_modeling", "graph_analysis",
593
556
 
594
557
  # Foundational/Pipeline Steps
595
- "feature_engineering", "statistical_inference", "data_preprocessing",
596
- "model_validation", "hyperparameter_tuning"
558
+ "data_preprocessing", "feature_engineering", "statistical_inference", "clustering", "hyperparameter_tuning"
597
559
  ]
598
560
 
599
561
  # --- 2. Construct the Generalized Prompt for the LLM ---
600
562
  task_description = refined_question
601
563
 
602
564
  user_prompt = f"""
603
- Analyze the following task description:
565
+ Analyze and classify the following task description:
604
566
  ---
605
567
  {task_description}
606
568
  ---
@@ -613,21 +575,1110 @@ def classify_ml_job_agent(refined_question, dataset_profile):
613
575
  ML Jobs List: {', '.join(ml_task_list)}
614
576
 
615
577
  Respond ONLY with a valid JSON array of strings containing the selected ML job names.
616
- Example Response: ["natural_language_processing", "classification", "feature_engineering"]
578
+ Example Response: ["data_preprocessing", "regression", "classification", "feature_engineering"]
617
579
  """
618
580
 
619
581
  if dataset_profile:
620
582
  user_prompt += f"\nDataset profile:\n{dataset_profile}\n"
621
583
 
622
- llm_profile = _prof.get_profile("classification") or _prof.get_profile("admin")
623
- if not llm_profile:
624
- return "ERROR"
625
584
 
626
- llm_profile['client'] = _prof.get_client(llm_profile)
585
+ tasks = ml_response(user_prompt, system_prompt)
586
+ try:
587
+ return ast.literal_eval(tasks)
588
+ except Exception:
589
+ return tasks
590
+
591
+
592
+ # ─────────────────────────────────────────────────────────
593
+ # Agentic Page Generation (plan JSON → validate → Pixabay → compile HTML)
594
+ # ─────────────────────────────────────────────────────────
595
+ def agentic_generate_page(*,
596
+ page_slug: str,
597
+ website_description: str,
598
+ client_dir: str,
599
+ pixabay_api_key: str = "",
600
+ llm_profile: dict | None = None,
601
+ max_retries: int = 2,
602
+ max_images: int = 9,
603
+ ) -> dict:
604
+ """
605
+ Returns:
606
+ {
607
+ "slug": "<slug>",
608
+ "plan": <dict>,
609
+ "html": "<compiled html>",
610
+ "notes": [..]
611
+ }
612
+ """
613
+
614
+ _ICON_SVGS = {
615
+ "spark": '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">'
616
+ '<path d="M12 2l1.2 6.2L20 12l-6.8 3.8L12 22l-1.2-6.2L4 12l6.8-3.8L12 2z"/></svg>',
617
+ "shield": '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">'
618
+ '<path d="M12 2l7 4v6c0 5-3.5 9-7 10-3.5-1-7-5-7-10V6l7-4z"/></svg>',
619
+ "stack": '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">'
620
+ '<path d="M12 2l9 5-9 5-9-5 9-5z"/><path d="M3 12l9 5 9-5"/><path d="M3 17l9 5 9-5"/></svg>',
621
+ "chart": '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">'
622
+ '<path d="M3 3v18h18"/><path d="M7 14v4"/><path d="M12 10v8"/><path d="M17 6v12"/></svg>',
623
+ "rocket": '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">'
624
+ '<path d="M5 13l4 6 6-4c6-4 5-12 5-12S13 2 9 8l-4 5z"/><path d="M9 8l7 7"/>'
625
+ '<path d="M5 13l-2 2"/><path d="M11 19l-2 2"/></svg>',
626
+ "plug": '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">'
627
+ '<path d="M9 2v6"/><path d="M15 2v6"/><path d="M7 8h10"/>'
628
+ '<path d="M12 8v7a4 4 0 0 1-4 4H7"/><path d="M12 8v7a4 4 0 0 0 4 4h1"/></svg>',
629
+ "arrow": '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">'
630
+ '<path d="M5 12h12"/><path d="M13 6l6 6-6 6"/></svg>',
631
+ "users": '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">'
632
+ '<path d="M17 21v-2a4 4 0 0 0-4-4H5a4 4 0 0 0-4 4v2"/>'
633
+ '<circle cx="9" cy="7" r="4"/>'
634
+ '<path d="M23 21v-2a4 4 0 0 0-3-3.87"/>'
635
+ '<path d="M16 3.13a4 4 0 0 1 0 7.75"/></svg>',
636
+ }
637
+
638
+ _PLACEHOLDER_PATTERNS = [
639
+ r"\blorem\b",
640
+ r"\bplaceholder\b",
641
+ r"coming soon",
642
+ r"add (?:a|your|an)\b",
643
+ r"replace this",
644
+ r"insert (?:your|a)\b",
645
+ r"dummy text",
646
+ r"example (?:text|copy)\b",
647
+ ]
648
+
649
+ _PX_BANNED_TAGS = {
650
+ "shoe", "shoes", "sneaker", "sneakers", "footwear", "fashion",
651
+ "lingerie", "bikini", "underwear", "swimwear",
652
+ }
653
+
654
+
655
+ def _strip(s: str) -> str:
656
+ return (s or "").strip()
657
+
658
+ def _slugify(s: str) -> str:
659
+ s = _strip(s).lower()
660
+ s = re.sub(r"[^a-z0-9\s\-]", "", s)
661
+ s = re.sub(r"\s+", "-", s).strip("-")
662
+ s = re.sub(r"-{2,}", "-", s)
663
+ return s or "page"
664
+
665
+ def _title_from_slug(slug: str) -> str:
666
+ t = _strip(slug).replace("-", " ").replace("_", " ")
667
+ t = re.sub(r"\s+", " ", t)
668
+ return (t[:1].upper() + t[1:]) if t else "New page"
669
+
670
+ def _contains_placeholders(text: str) -> bool:
671
+ t = (text or "").lower()
672
+ for pat in _PLACEHOLDER_PATTERNS:
673
+ if re.search(pat, t):
674
+ return True
675
+ return False
676
+
677
+ def _extract_domain_keywords(website_description: str, max_terms: int = 6) -> list[str]:
678
+ """
679
+ Very lightweight keyword extraction to keep Pixabay queries on-topic.
680
+ No ML needed: just pick frequent meaningful tokens.
681
+ """
682
+ wd = (website_description or "").lower()
683
+ wd = re.sub(r"[^a-z0-9\s\-]", " ", wd)
684
+ toks = [t for t in re.split(r"\s+", wd) if 3 <= len(t) <= 18]
685
+ stop = {
686
+ "this", "that", "with", "from", "into", "your", "their", "have", "will",
687
+ "also", "more", "than", "them", "such", "only", "when", "where", "which",
688
+ "what", "about", "page", "website", "company", "product", "service",
689
+ "syntaxmatrix", "framework", "system", "platform",
690
+ }
691
+ freq = {}
692
+ for t in toks:
693
+ if t in stop:
694
+ continue
695
+ freq[t] = freq.get(t, 0) + 1
696
+ ranked = sorted(freq.items(), key=lambda x: x[1], reverse=True)
697
+ out = [k for k, _ in ranked[:max_terms]]
698
+ # Always anchor to software/AI semantics if present
699
+ anchors = []
700
+ for a in ["ai", "assistant", "dashboard", "retrieval", "vector", "ml", "analytics", "deployment"]:
701
+ if a in wd and a not in out:
702
+ anchors.append(a)
703
+ return (anchors + out)[:max_terms]
704
+
705
+ def _get_json_call(system_prompt: str, user_prompt: str) -> dict:
706
+
707
+ llm_profile = _prof.get_profile('coder')
708
+ llm_profile['client'] = _prof.get_client(llm_profile)
709
+ client = llm_profile["client"]
710
+ model = llm_profile["model"]
711
+ provider = llm_profile["provider"].lower()
712
+
713
+
714
+ def openai_sdk_response():
715
+ resp = client.chat.completions.create(
716
+ model=model,
717
+ messages=[
718
+ {"role": "system", "content": system_prompt},
719
+ {"role": "user", "content": user_prompt},
720
+ ],
721
+ response_format={"type": "json_object"},
722
+ )
723
+
724
+ # Access the text via choices[0].message.content
725
+ txt = resp.choices[0].message.content.strip()
726
+
727
+ try:
728
+ return json.loads(txt)
729
+ except Exception:
730
+ # try to salvage first JSON object (consistent with your other providers)
731
+ m = re.search(r"\{.*\}", txt, re.S)
732
+ if not m:
733
+ raise RuntimeError(f"Model did not return JSON. Output was:\n{txt[:800]}")
734
+ return json.loads(m.group(0))
735
+
736
+
737
+ if provider == "google":
738
+ cfg = types.GenerateContentConfig(
739
+ system_instruction=system_prompt,
740
+ response_mime_type="application/json",
741
+ )
742
+ resp = client.models.generate_content(
743
+ model=model,
744
+ contents=user_prompt,
745
+ config=cfg,
746
+ )
747
+ txt = (resp.text or "").strip()
748
+ try:
749
+ return json.loads(txt)
750
+ except Exception:
751
+ # try to salvage first JSON object
752
+ m = re.search(r"\{.*\}", txt, re.S)
753
+ if not m:
754
+ raise RuntimeError(f"Model did not return JSON. Output was:\n{txt[:800]}")
755
+ return json.loads(m.group(0))
756
+
757
+
758
+ if provider == "openai":
759
+ if int(model.split("gpt-")[1][0])>=5:
760
+ response = client.responses.create(
761
+ model=model,
762
+ instructions=system_prompt,
763
+ input=[
764
+ {"role": "user", "content": user_prompt}
765
+ ],
766
+ reasoning={"effort": "medium"},
767
+ text=[
768
+ {"verbosity": "low"},
769
+ {"format": {"type": "json_object"}}
770
+ ],
771
+ )
772
+
773
+ txt = (response.output_text or "")
774
+ try:
775
+ return json.loads(txt)
776
+ except Exception:
777
+ # try to salvage first JSON object
778
+ m = re.search(r"\{.*\}", txt, re.S)
779
+ if not m:
780
+ raise RuntimeError(f"Model did not return JSON. Output was:\n{txt[:800]}")
781
+ return json.loads(m.group(0))
782
+
783
+ else:
784
+ return openai_sdk_response()
785
+
786
+
787
+ if provider == "anthropic":
788
+ # Anthropic requires a max_tokens parameter
789
+ resp = client.messages.create(
790
+ model=model,
791
+ system=system_prompt,
792
+ messages=[
793
+ {"role": "user", "content": user_prompt}
794
+ ],
795
+ max_tokens=4096,
796
+ )
797
+
798
+ # Anthropic returns a list of content blocks
799
+ txt = resp.content[0].text.strip()
800
+
801
+ try:
802
+ return json.loads(txt)
803
+ except Exception:
804
+ # try to salvage first JSON object (same logic as your Google snippet)
805
+ m = re.search(r"\{.*\}", txt, re.S)
806
+ if not m:
807
+ raise RuntimeError(f"Model did not return JSON. Output was:\n{txt[:800]}")
808
+ return json.loads(m.group(0))
809
+
810
+ return openai_sdk_response()
811
+
812
+
813
+ def _page_plan_system_prompt(spec: dict) -> str:
814
+ allowed_sections = spec.get("allowed_section_types") or ["hero", "features", "gallery", "testimonials", "faq", "cta", "richtext"]
815
+ req = spec.get("required_sections") or []
816
+
817
+ req_lines = ""
818
+ if req:
819
+ req_lines = "REQUIRED SECTIONS (must appear in this exact order):\n" + "\n".join(
820
+ [f"- {r['id']} (type: {r['type']})" for r in req]
821
+ )
822
+
823
+ return f"""
824
+ You are a senior UX designer + product copywriter for modern software websites.
825
+
826
+ TASK:
827
+ Create a complete page plan (content + structure) for a page builder.
828
+
829
+ RULES (strict):
830
+ - No placeholders, no “add your…”, no “replace this…”, no “lorem ipsum”, no “coming soon”.
831
+ - All copy must be final, meaningful, and grounded in the provided WEBSITE_DESCRIPTION.
832
+ - Produce a page that looks like a finished, publish-ready website page.
833
+ - Choose section types and item types from the allowed lists.
834
+ - Choose icon names only from the allowed icon list.
835
+ - Provide image search queries for items that need images. Keep queries on-topic (software/AI/tech).
836
+
837
+ {req_lines}
838
+
839
+ OUTPUT:
840
+ Return ONLY valid JSON.
841
+
842
+ ALLOWED SECTION TYPES:
843
+ {chr(10).join([f"- {t}" for t in allowed_sections])}
844
+
845
+ ALLOWED ITEM TYPES:
846
+ - card
847
+ - quote
848
+ - faq
849
+
850
+ ALLOWED ICONS:
851
+ - spark, shield, stack, chart, rocket, plug, arrow, users
852
+
853
+ JSON SCHEMA:
854
+ {{
855
+ "page": "<slug>",
856
+ "category": "<string>",
857
+ "template": {{ "id": "<string>", "version": "<string>" }},
858
+ "meta": {{
859
+ "pageTitle": "<string>",
860
+ "summary": "<string>"
861
+ }},
862
+ "sections": [
863
+ {{
864
+ "id": "<string>",
865
+ "type": "<sectionType>",
866
+ "title": "<string>",
867
+ "text": "<string>",
868
+ "cols": 1-5,
869
+ "items": [
870
+ {{
871
+ "id": "<string>",
872
+ "type": "<itemType>",
873
+ "title": "<string>",
874
+ "text": "<string>",
875
+ "icon": "<iconName or empty>",
876
+ "imgQuery": "<search query or empty>",
877
+ "needsImage": true|false
878
+ }}
879
+ ]
880
+ }}
881
+ ]
882
+ }}
883
+
884
+ GUIDANCE:
885
+ - Keep sections between {spec.get("min_sections", 4)} and {spec.get("max_sections", 7)}.
886
+ - Keep total images between {spec.get("min_images", 6)} and {spec.get("max_images", 9)}.
887
+ """.strip()
888
+
889
+
890
+ def _make_page_plan(*, page_slug: str, website_description: str, template_spec: dict) -> dict:
891
+ slug = _slugify(page_slug)
892
+ wd = _strip(website_description)
893
+ if not wd:
894
+ raise ValueError("website_description is empty. Pass smx.website_description if the form field is blank.")
895
+
896
+ domain_terms = _extract_domain_keywords(wd)
897
+ user_prompt = json.dumps({
898
+ "PAGE_SLUG": slug,
899
+ "PAGE_TITLE": _title_from_slug(slug),
900
+ "WEBSITE_DESCRIPTION": wd,
901
+ "DOMAIN_KEYWORDS": domain_terms,
902
+ "HARD_REQUIREMENTS": {
903
+ "no_placeholders": True,
904
+ "uk_english": True,
905
+ "min_sections": 4,
906
+ "max_sections": 7,
907
+ "min_images": 6,
908
+ "max_images": 9
909
+ }
910
+ }, indent=2)
911
+
912
+ plan = _get_json_call(
913
+ system_prompt=_page_plan_system_prompt(template_spec),
914
+ user_prompt=user_prompt
915
+ )
916
+
917
+ plan["page"] = slug
918
+ plan["category"] = template_spec["category"]
919
+ plan["template"] = template_spec["template"]
920
+
921
+ # Normalise a few fields
922
+ plan["page"] = slug
923
+ if "sections" not in plan or not isinstance(plan["sections"], list):
924
+ raise RuntimeError("Invalid plan: missing sections[]")
925
+
926
+ return plan
927
+
928
+
929
+ def _validate_plan_or_raise(plan: dict) -> None:
930
+ if not isinstance(plan, dict):
931
+ raise ValueError("Plan is not a dict.")
932
+
933
+ if not plan.get("page"):
934
+ raise ValueError("Plan missing 'page'.")
935
+
936
+ secs = plan.get("sections")
937
+ if not isinstance(secs, list) or len(secs) < 3:
938
+ raise ValueError("Plan must have at least 3 sections.")
939
+
940
+ total_imgs = 0
941
+ for s in secs:
942
+ if not isinstance(s, dict):
943
+ raise ValueError("Section is not an object.")
944
+ if _contains_placeholders(s.get("title", "")) or _contains_placeholders(s.get("text", "")):
945
+ raise ValueError("Plan contains placeholder text in section title/text.")
946
+
947
+ items = s.get("items") or []
948
+ if not isinstance(items, list):
949
+ raise ValueError("Section items must be a list.")
950
+ for it in items:
951
+ if not isinstance(it, dict):
952
+ raise ValueError("Item is not an object.")
953
+ if _contains_placeholders(it.get("title", "")) or _contains_placeholders(it.get("text", "")):
954
+ raise ValueError("Plan contains placeholder text in item title/text.")
955
+ if it.get("needsImage"):
956
+ total_imgs += 1
957
+
958
+ if total_imgs < 4:
959
+ raise ValueError("Plan is too light on imagery; needs at least 4 items marked needsImage=true.")
960
+
961
+
962
+ def _repair_plan(*, plan: dict, error_msg: str, website_description: str) -> dict:
963
+ # Ask Gemini to repair the plan, not recreate randomly.
964
+ system_prompt = _page_plan_system_prompt() + "\n\nYou are repairing an existing plan. Keep it consistent and improve only what is needed."
965
+ user_prompt = json.dumps({
966
+ "ERROR": error_msg,
967
+ "WEBSITE_DESCRIPTION": website_description,
968
+ "PLAN": plan
969
+ }, indent=2)
970
+
971
+ fixed = _get_json_call(
972
+ system_prompt=system_prompt,
973
+ user_prompt=user_prompt
974
+ )
975
+ fixed["page"] = plan.get("page") or fixed.get("page")
976
+ return fixed
977
+
978
+ def _ensure_hero_image(plan: dict, default_url: str = "/static/assets/hero-default.svg") -> None:
979
+ """Guarantee hero.imageUrl exists so contract validation never fails."""
980
+ sections = plan.get("sections") if isinstance(plan.get("sections"), list) else []
981
+ hero = next((s for s in sections if isinstance(s, dict) and (s.get("type") or "").lower() == "hero"), None)
982
+ if not hero:
983
+ return
984
+
985
+ def _first_image_in_items(items):
986
+ if not isinstance(items, list):
987
+ return ""
988
+ for it in items:
989
+ if not isinstance(it, dict):
990
+ continue
991
+ u = (it.get("imageUrl") or "").strip()
992
+ if u:
993
+ return u
994
+ return ""
995
+
996
+ # 1) Use hero.imageUrl if present
997
+ img = (hero.get("imageUrl") or "").strip()
998
+
999
+ # 2) Else use hero.items[*].imageUrl
1000
+ if not img:
1001
+ img = _first_image_in_items(hero.get("items"))
1002
+
1003
+ # 3) Else use first image anywhere else in the plan
1004
+ if not img:
1005
+ for s in sections:
1006
+ if not isinstance(s, dict):
1007
+ continue
1008
+ img = _first_image_in_items(s.get("items"))
1009
+ if img:
1010
+ break
1011
+
1012
+ # 4) Final fallback
1013
+ if not img:
1014
+ img = default_url
1015
+
1016
+ hero["imageUrl"] = img
1017
+
1018
+ # Back-compat: ensure hero.items[0].imageUrl exists too (your normaliser also does this)
1019
+ items = hero.get("items") if isinstance(hero.get("items"), list) else []
1020
+ if items:
1021
+ if not (items[0].get("imageUrl") or "").strip():
1022
+ items[0]["imageUrl"] = img
1023
+ else:
1024
+ hero["items"] = [{"id": "hero_media", "type": "card", "title": "Hero image", "text": "", "imageUrl": img}]
1025
+
1026
+ TEMPLATE_SPECS = {
1027
+ "generic_v1": {
1028
+ "category": "landing",
1029
+ "template": {"id": "generic_v1", "version": "1.0.0"},
1030
+ "allowed_section_types": ["hero", "features", "gallery", "testimonials", "faq", "cta", "richtext"],
1031
+ "required_sections": [],
1032
+ "min_sections": 4,
1033
+ "max_sections": 7,
1034
+ "min_images": 6,
1035
+ "max_images": 9,
1036
+ },
1037
+ "services_grid_v1": {
1038
+ "category": "services",
1039
+ "template": {"id": "services_grid_v1", "version": "1.0.0"},
1040
+ "allowed_section_types": ["hero", "services", "process", "proof", "faq", "cta", "richtext"],
1041
+ "required_sections": [
1042
+ {"id": "sec_hero", "type": "hero"},
1043
+ {"id": "sec_services", "type": "services"},
1044
+ {"id": "sec_process", "type": "process"},
1045
+ {"id": "sec_proof", "type": "proof"},
1046
+ {"id": "sec_faq", "type": "faq"},
1047
+ {"id": "sec_cta", "type": "cta"},
1048
+ ],
1049
+ "min_sections": 6,
1050
+ "max_sections": 6,
1051
+ "min_images": 6,
1052
+ "max_images": 9,
1053
+ },
1054
+ "services_detail_v1": {
1055
+ "category": "services",
1056
+ "template": {"id": "services_detail_v1", "version": "1.0.0"},
1057
+ "allowed_section_types": ["hero", "offers", "comparison", "process", "case_studies", "faq", "cta", "richtext"],
1058
+ "required_sections": [
1059
+ {"id": "sec_hero", "type": "hero"},
1060
+ {"id": "sec_offers", "type": "offers"},
1061
+ {"id": "sec_comparison", "type": "comparison"},
1062
+ {"id": "sec_process", "type": "process"},
1063
+ {"id": "sec_case_studies", "type": "case_studies"},
1064
+ {"id": "sec_faq", "type": "faq"},
1065
+ {"id": "sec_cta", "type": "cta"},
1066
+ ],
1067
+ "min_sections": 7,
1068
+ "max_sections": 7,
1069
+ "min_images": 6,
1070
+ "max_images": 9,
1071
+ },
1072
+ "about_glass_hero_v1": {
1073
+ "category": "about",
1074
+ "template": {"id": "about_glass_hero_v1", "version": "1.0.0"},
1075
+ "allowed_section_types": ["hero", "story", "values", "logos", "team", "testimonials", "faq", "cta", "richtext"],
1076
+ "required_sections": [
1077
+ {"id": "sec_hero", "type": "hero"},
1078
+ {"id": "sec_story", "type": "story"},
1079
+ {"id": "sec_values", "type": "values"},
1080
+ {"id": "sec_cta", "type": "cta"},
1081
+ ],
1082
+ "min_sections": 4,
1083
+ "max_sections": 7,
1084
+ "min_images": 6,
1085
+ "max_images": 9,
1086
+ },
1087
+ }
1088
+
1089
+ def _select_template_spec(slug: str) -> dict:
1090
+ s = _slugify(slug)
1091
+ if "service" in s:
1092
+ if any(k in s for k in ("pricing", "plan", "plans", "package", "packages", "tier", "tiers")):
1093
+ return TEMPLATE_SPECS["services_detail_v1"]
1094
+ return TEMPLATE_SPECS["services_grid_v1"]
1095
+ if "about" in s:
1096
+ return TEMPLATE_SPECS["about_glass_hero_v1"]
1097
+ return TEMPLATE_SPECS["generic_v1"]
1098
+
1099
+
1100
+ PIXABAY_API_URL = "https://pixabay.com/api/"
1101
+
1102
+ def _pixabay_search(api_key: str, query: str, *, category: str = "AI", per_page: int = 20, timeout: int = 15) -> list[dict]:
1103
+ q = _strip(query)
1104
+ if not api_key or not q:
1105
+ return []
1106
+ params = {
1107
+ "key": api_key,
1108
+ "q": q,
1109
+ "image_type": "photo",
1110
+ "orientation": "horizontal",
1111
+ "safesearch": "true",
1112
+ "editors_choice": "false",
1113
+ "order": "popular",
1114
+ "category": category or "AI" or "Artificial Intelligence" or "computer",
1115
+ "per_page": max(3, min(200, int(per_page or 20))),
1116
+ "page": 1,
1117
+ }
1118
+ r = requests.get(PIXABAY_API_URL, params=params, timeout=timeout)
1119
+ r.raise_for_status()
1120
+ data = r.json() or {}
1121
+ return data.get("hits") or []
1122
+
1123
+
1124
+ def _is_pixabay_url(url: str) -> bool:
1125
+ u = _strip(url).lower()
1126
+ return u.startswith("https://") and ("pixabay.com" in u)
1127
+
1128
+
1129
+ def _fetch_bytes(url: str, timeout: int = 20) -> bytes:
1130
+ if not _is_pixabay_url(url):
1131
+ raise ValueError("Only Pixabay URLs are allowed")
1132
+ r = requests.get(url, stream=True, timeout=timeout)
1133
+ r.raise_for_status()
1134
+ return r.content
1135
+
1136
+
1137
+ def _save_image(img_bytes: bytes, out_path_no_ext: str, *, max_width: int = 1920) -> str:
1138
+ img = Image.open(io.BytesIO(img_bytes))
1139
+ img.load()
1140
+
1141
+ if img.width > int(max_width or 1920):
1142
+ ratio = (int(max_width) / float(img.width))
1143
+ new_h = max(1, int(round(img.height * ratio)))
1144
+ img = img.resize((int(max_width), new_h), Image.LANCZOS)
1145
+
1146
+ has_alpha = ("A" in img.getbands())
1147
+ ext = ".png" if has_alpha else ".jpg"
1148
+ out_path = out_path_no_ext + ext
1149
+ os.makedirs(os.path.dirname(out_path), exist_ok=True)
1150
+
1151
+ if ext == ".jpg":
1152
+ rgb = img.convert("RGB") if img.mode != "RGB" else img
1153
+ rgb.save(out_path, "JPEG", quality=85, optimize=True, progressive=True)
1154
+ else:
1155
+ img.save(out_path, "PNG", optimize=True)
1156
+
1157
+ return out_path
1158
+
1159
+
1160
+ def _pick_pixabay_hit(hits: list[dict], *, min_width: int) -> dict | None:
1161
+ for h in hits:
1162
+ tags = (h.get("tags") or "").lower()
1163
+ if any(b in tags for b in _PX_BANNED_TAGS):
1164
+ continue
1165
+ w = int(h.get("imageWidth") or 0)
1166
+ if w >= int(min_width or 0):
1167
+ return h
1168
+ # fallback: first non-banned
1169
+ for h in hits:
1170
+ tags = (h.get("tags") or "").lower()
1171
+ if any(b in tags for b in _PX_BANNED_TAGS):
1172
+ continue
1173
+ return h
1174
+ return None
1175
+
1176
+
1177
+ def fill_plan_images_from_pixabay(plan: dict, *, api_key: str, client_dir: str, max_width: int = 1920, max_downloads: int = 9) -> dict:
1178
+ if not api_key:
1179
+ return plan
1180
+
1181
+ media_dir = os.path.join(client_dir, "uploads", "media")
1182
+ imported_dir = os.path.join(media_dir, "images", "imported")
1183
+ os.makedirs(imported_dir, exist_ok=True)
1184
+
1185
+ used_ids = set()
1186
+ downloads = 0
1187
+
1188
+ domain_terms = []
1189
+ try:
1190
+ meta = plan.get("meta") or {}
1191
+ domain_terms = _extract_domain_keywords(meta.get("summary") or "", max_terms=5)
1192
+ except Exception:
1193
+ domain_terms = []
1194
+
1195
+ for s in (plan.get("sections") or []):
1196
+ items = s.get("items") or []
1197
+ for it in items:
1198
+ if downloads >= max_downloads:
1199
+ return plan
1200
+ if not it.get("needsImage"):
1201
+ continue
1202
+ if _strip(it.get("imageUrl")):
1203
+ continue
1204
+
1205
+ q = _strip(it.get("imgQuery"))
1206
+ if not q:
1207
+ # if model forgot: make something safe and on-topic
1208
+ q = f"{_strip(it.get('title'))} software ai dashboard"
1209
+
1210
+ # keep the query on-domain
1211
+ if domain_terms:
1212
+ q = f"{q} " + " ".join(domain_terms[:3])
1213
+
1214
+ min_w = 1920 if (s.get("type") == "hero") else 1100
1215
+
1216
+ hits = _pixabay_search(api_key, q, category="computer")
1217
+ if not hits:
1218
+ continue
1219
+
1220
+ chosen = _pick_pixabay_hit(hits, min_width=min_w)
1221
+ if not chosen:
1222
+ continue
1223
+
1224
+ pid = int(chosen.get("id") or 0)
1225
+ if not pid or pid in used_ids:
1226
+ continue
1227
+ used_ids.add(pid)
1228
+
1229
+ web_u = _strip(chosen.get("webformatURL") or "")
1230
+ large_u = _strip(chosen.get("largeImageURL") or "")
1231
+
1232
+ base = os.path.join(imported_dir, f"pixabay-{pid}")
1233
+ existing = None
1234
+ for ext in (".jpg", ".png"):
1235
+ if os.path.exists(base + ext):
1236
+ existing = base + ext
1237
+ break
1238
+
1239
+ if existing:
1240
+ rel = os.path.relpath(existing, media_dir).replace("\\", "/")
1241
+ it["imageUrl"] = f"/uploads/media/{rel}"
1242
+ continue
1243
+
1244
+ try:
1245
+ b1 = _fetch_bytes(web_u)
1246
+ img1 = Image.open(io.BytesIO(b1)); img1.load()
1247
+
1248
+ chosen_bytes = b1
1249
+ if img1.width < min_w and large_u:
1250
+ try:
1251
+ b2 = _fetch_bytes(large_u)
1252
+ img2 = Image.open(io.BytesIO(b2)); img2.load()
1253
+ if img2.width > img1.width:
1254
+ chosen_bytes = b2
1255
+ except Exception:
1256
+ pass
1257
+
1258
+ saved = _save_image(chosen_bytes, base, max_width=max_width)
1259
+ rel = os.path.relpath(saved, media_dir).replace("\\", "/")
1260
+ it["imageUrl"] = f"/uploads/media/{rel}"
1261
+ downloads += 1
1262
+ except Exception:
1263
+ continue
1264
+
1265
+ return plan
1266
+
1267
+
1268
+ # ─────────────────────────────────────────────────────────
1269
+ # Compile plan JSON → modern HTML (responsive + animations)
1270
+ # ─────────────────────────────────────────────────────────
1271
+ def compile_plan_to_html(plan: dict) -> str:
1272
+ page_slug = _slugify(plan.get("page") or "page")
1273
+ page_id = f"smx-page-{page_slug}"
1274
+
1275
+ sections = list(plan.get("sections") or [])
1276
+ meta = plan.get("meta") or {}
1277
+
1278
+ # Useful anchor targets for CTAs
1279
+ sec_id_by_type = {}
1280
+ for s in sections:
1281
+ st = (s.get("type") or "").lower()
1282
+ sid = _strip(s.get("id"))
1283
+ if st and sid and st not in sec_id_by_type:
1284
+ sec_id_by_type[st] = sid
1285
+
1286
+ def esc(s: str) -> str:
1287
+ s = s or ""
1288
+ s = s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
1289
+ s = s.replace('"', "&quot;").replace("'", "&#39;")
1290
+ return s
1291
+
1292
+ def _btn(label: str, href: str, *, primary: bool = False) -> str:
1293
+ label = _strip(label)
1294
+ href = _strip(href)
1295
+ if not label or not href:
1296
+ return ""
1297
+ cls = "btn btn-primary" if primary else "btn"
1298
+ return f'<a class="{cls}" href="{esc(href)}">{esc(label)}</a>'
1299
+
1300
+ def icon(name: str) -> str:
1301
+ svg = _ICON_SVGS.get((name or "").strip().lower())
1302
+ if not svg:
1303
+ return ""
1304
+ return f'<span class="smx-ic">{svg}</span>'
1305
+
1306
+ css = f"""
1307
+ <style>
1308
+ #{page_id} {{
1309
+ --r: 18px;
1310
+ --bd: rgba(148,163,184,.25);
1311
+ --fg: #0f172a;
1312
+ --mut: #475569;
1313
+ --card: rgba(255,255,255,.78);
1314
+ --bg: #f8fafc;
1315
+ font-family: system-ui, -apple-system, Segoe UI, Roboto, Arial, sans-serif;
1316
+ background: var(--bg);
1317
+ color: var(--fg);
1318
+ overflow-x: clip;
1319
+ }}
1320
+ @media (prefers-color-scheme: dark){{
1321
+ #{page_id} {{
1322
+ --fg: #e2e8f0;
1323
+ --mut: #a7b3c6;
1324
+ --card: rgba(2,6,23,.45);
1325
+ --bg: radial-gradient(circle at 20% 10%, rgba(30,64,175,.25), rgba(2,6,23,.95) 55%);
1326
+ --bd: rgba(148,163,184,.18);
1327
+ }}
1328
+ }}
1329
+ #{page_id} .wrap{{ max-width:1120px; margin:0 auto; padding:0 18px; }}
1330
+ #{page_id} .sec{{ padding:56px 0; }}
1331
+ #{page_id} .kicker{{ color:var(--mut); font-size:.92rem; margin:0 0 8px; }}
1332
+ #{page_id} h1{{ font-size:clamp(2rem,3.4vw,3.1rem); line-height:1.08; margin:0 0 12px; }}
1333
+ #{page_id} h2{{ font-size:clamp(1.35rem,2.2vw,1.95rem); margin:0 0 10px; }}
1334
+ #{page_id} p{{ margin:0; color:var(--mut); line-height:1.65; }}
1335
+ #{page_id} .hero{{ padding:0; }}
1336
+ #{page_id} .card{{ border:1px solid var(--bd); border-radius:var(--r); background:var(--card); padding:14px; }}
1337
+ #{page_id} .btnrow{{ display:flex; gap:10px; flex-wrap:wrap; margin-top:18px; }}
1338
+ #{page_id} .btn{{ display:inline-flex; gap:8px; align-items:center; border-radius:999px; padding:10px 14px;
1339
+ border:1px solid var(--bd); text-decoration:none; background: rgba(99,102,241,.12); color:inherit; }}
1340
+ #{page_id} .btn-primary{{ background: rgba(99,102,241,.22); border-color: rgba(99,102,241,.35); }}
1341
+ #{page_id} .btn:hover{{ transform: translateY(-1px); }}
1342
+ #{page_id} .grid{{ display:grid; gap:12px; }}
1343
+ #{page_id} img{{ width:100%; height:auto; border-radius: calc(var(--r) - 6px); display:block; }}
1344
+ #{page_id} .smx-ic{{ width:20px; height:20px; display:inline-block; opacity:.9; }}
1345
+ #{page_id} .smx-ic svg{{ width:20px; height:20px; }}
1346
+
1347
+ /* HERO BANNER */
1348
+ #{page_id} .hero-banner{{
1349
+ position:relative;
1350
+ width:100%;
1351
+ min-height:clamp(380px, 60vh, 680px);
1352
+ display:flex;
1353
+ align-items:flex-end;
1354
+ overflow:hidden;
1355
+ }}
1356
+ #{page_id} .hero-bg{{
1357
+ position:absolute; inset:0;
1358
+ background-position:center;
1359
+ background-size:cover;
1360
+ background-repeat:no-repeat;
1361
+ transform:scale(1.02);
1362
+ filter:saturate(1.02);
1363
+ }}
1364
+ #{page_id} .hero-overlay{{
1365
+ position:absolute; inset:0;
1366
+ background:linear-gradient(90deg,
1367
+ rgba(2,6,23,.62) 0%,
1368
+ rgba(2,6,23,.40) 42%,
1369
+ rgba(2,6,23,.14) 72%,
1370
+ rgba(2,6,23,.02) 100%
1371
+ );
1372
+ }}
1373
+ @media (max-width: 860px){{
1374
+ #{page_id} .hero-overlay{{
1375
+ background:linear-gradient(180deg,
1376
+ rgba(2,6,23,.16) 0%,
1377
+ rgba(2,6,23,.55) 70%,
1378
+ rgba(2,6,23,.70) 100%
1379
+ );
1380
+ }}
1381
+ }}
1382
+ #{page_id} .hero-content{{ position:relative; width:100%; padding:72px 18px 48px; }}
1383
+ #{page_id} .hero-panel{{
1384
+ max-width:700px;
1385
+ border:1px solid rgba(148,163,184,.30);
1386
+ background:rgba(2,6,23,.24);
1387
+ border-radius:var(--r);
1388
+ padding:18px;
1389
+ backdrop-filter: blur(4px);
1390
+ -webkit-backdrop-filter: blur(4px);
1391
+ box-shadow: 0 18px 40px rgba(2,6,23,.18);
1392
+ color:#e2e8f0;
1393
+ }}
1394
+ #{page_id} .hero-panel p{{ color:rgba(226,232,240,.84); }}
1395
+ #{page_id} .hero-panel h1{{ text-shadow:0 10px 30px rgba(2,6,23,.45); }}
1396
+ #{page_id} .hero-panel .kicker{{
1397
+ margin:0 0 8px;
1398
+ font-size:.9rem;
1399
+ color:#a5b4fc;
1400
+ text-transform:uppercase;
1401
+ letter-spacing:.18em;
1402
+ opacity:.95;
1403
+ }}
1404
+ #{page_id} .hero-panel .btn{{
1405
+ background:rgba(15,23,42,.55);
1406
+ border-color:rgba(148,163,184,.45);
1407
+ color:#e2e8f0;
1408
+ }}
1409
+ #{page_id} .hero-panel .btn-primary{{
1410
+ background:rgba(79,70,229,.92);
1411
+ border-color:rgba(129,140,248,.70);
1412
+ }}
1413
+ #{page_id} .lead{{ margin-top:10px; font-size:1.05rem; line-height:1.65; }}
1414
+
1415
+ /* FAQ */
1416
+ #{page_id} .faq details{{ border:1px solid var(--bd); border-radius:14px; background:var(--card); padding:12px 14px; }}
1417
+ #{page_id} .faq summary{{ cursor:pointer; font-weight:600; }}
1418
+ #{page_id} .faq details + details{{ margin-top:10px; }}
1419
+
1420
+ #{page_id} .quote{{ font-size:1.02rem; line-height:1.6; color:inherit; }}
1421
+ #{page_id} .mut{{ color:var(--mut); }}
1422
+
1423
+ #{page_id} .reveal{{ opacity:0; transform:translateY(14px); transition:opacity .55s ease, transform .55s ease; }}
1424
+ #{page_id} .reveal.in{{ opacity:1; transform:none; }}
1425
+ @media (prefers-reduced-motion: reduce){{ #{page_id} .reveal{{ transition:none; transform:none; opacity:1; }} }}
1426
+ </style>
1427
+ """.strip()
1428
+
1429
+ js = f"""
1430
+ <script>
1431
+ (function(){{
1432
+ const root = document.getElementById("{page_id}");
1433
+ if(!root) return;
1434
+ const els = root.querySelectorAll(".reveal");
1435
+ const io = new IntersectionObserver((entries)=>{{
1436
+ entries.forEach(e=>{{ if(e.isIntersecting) e.target.classList.add("in"); }});
1437
+ }}, {{ threshold: 0.12 }});
1438
+ els.forEach(el=>io.observe(el));
1439
+ }})();
1440
+ </script>
1441
+ """.strip()
1442
+
1443
+ parts = [f'<div id="{page_id}">', css]
1444
+
1445
+ for s in sections:
1446
+ st = (s.get("type") or "section").lower()
1447
+ title = esc(s.get("title") or "")
1448
+ text = esc(s.get("text") or "")
1449
+ cols = int(s.get("cols") or 3)
1450
+ cols = max(1, min(5, cols))
1451
+ items = s.get("items") or []
1452
+ sec_dom_id = _strip(s.get("id"))
1453
+ sec_id_attr = f' id="{esc(sec_dom_id)}"' if sec_dom_id else ""
1454
+
1455
+ # HERO BANNER (no /admin links)
1456
+ if st == "hero":
1457
+ hero_img = ""
1458
+ for it in items:
1459
+ u = _strip(it.get("imageUrl"))
1460
+ if u:
1461
+ hero_img = u
1462
+ break
1463
+
1464
+ primary = meta.get("primaryCta") or {}
1465
+ secondary = meta.get("secondaryCta") or {}
1466
+ cta_anchor = "#" + (sec_id_by_type.get("cta") or "sec_cta")
1467
+ feats_anchor = "#" + (sec_id_by_type.get("features") or "sec_features")
1468
+
1469
+ primary_label = primary.get("label") or "Request a demo"
1470
+ primary_href = primary.get("href") or cta_anchor
1471
+ secondary_label = secondary.get("label") or "See capabilities"
1472
+ secondary_href = secondary.get("href") or feats_anchor
1473
+
1474
+ bg_style = f"style=\"background-image:url('{esc(hero_img)}')\"" if hero_img else ""
1475
+
1476
+ parts.append(f"""
1477
+ <section class="hero hero-banner"{sec_id_attr}>
1478
+ <div class="hero-bg" {bg_style}></div>
1479
+ <div class="hero-overlay"></div>
1480
+ <div class="wrap hero-content">
1481
+ <div class="hero-panel reveal">
1482
+ <p class="kicker">{esc(meta.get("pageTitle") or title)}</p>
1483
+ <h1>{title}</h1>
1484
+ <p class="lead">{text}</p>
1485
+ <div class="btnrow">
1486
+ {_btn(primary_label, primary_href, primary=True)}
1487
+ {_btn(secondary_label, secondary_href)}
1488
+ </div>
1489
+ </div>
1490
+ </div>
1491
+ </section>
1492
+ """.strip())
1493
+ continue
1494
+
1495
+ # FAQ as accordion
1496
+ if st == "faq":
1497
+ qa = []
1498
+ for it in items:
1499
+ q = esc(it.get("title") or "")
1500
+ a = esc(it.get("text") or "")
1501
+ if not q and not a:
1502
+ continue
1503
+ qa.append(
1504
+ f"<details class=\"reveal\"><summary>{q}</summary>"
1505
+ f"<div class=\"mut\" style=\"margin-top:8px;\">{a}</div></details>"
1506
+ )
1507
+
1508
+ parts.append(f"""
1509
+ <section class="sec faq"{sec_id_attr}>
1510
+ <div class="wrap">
1511
+ <h2 class="reveal">{title}</h2>
1512
+ {"<p class='reveal' style='margin-bottom:14px;'>" + text + "</p>" if text else ""}
1513
+ {"".join(qa)}
1514
+ </div>
1515
+ </section>
1516
+ """.strip())
1517
+ continue
1518
+
1519
+ # Testimonials styled differently
1520
+ if st == "testimonials":
1521
+ cards = []
1522
+ for it in items:
1523
+ quote = esc(it.get("text") or "")
1524
+ who = esc(it.get("title") or "")
1525
+ if not quote:
1526
+ continue
1527
+ cards.append(
1528
+ f"<div class='card reveal'><div class='quote'>“{quote}”</div>"
1529
+ f"<div class='mut' style='margin-top:10px;font-weight:600;'>{who}</div></div>"
1530
+ )
1531
+
1532
+ grid_html = (
1533
+ f'<div class="grid" style="grid-template-columns:repeat({max(1, min(cols, 3))}, minmax(0,1fr));">'
1534
+ + "\n".join(cards) + "</div>"
1535
+ ) if cards else ""
1536
+
1537
+ parts.append(f"""
1538
+ <section class="sec"{sec_id_attr}>
1539
+ <div class="wrap">
1540
+ <h2 class="reveal">{title}</h2>
1541
+ {"<p class='reveal' style='margin-bottom:14px;'>" + text + "</p>" if text else ""}
1542
+ {grid_html}
1543
+ </div>
1544
+ </section>
1545
+ """.strip())
1546
+ continue
1547
+
1548
+ # Stats + Logos break rhythm so pages look different
1549
+ if st in ("stats", "logos"):
1550
+ cards = []
1551
+ for it in items:
1552
+ it_title = esc(it.get("title") or "")
1553
+ it_text = esc(it.get("text") or "")
1554
+ img = _strip(it.get("imageUrl"))
1555
+ if st == "logos" and img:
1556
+ cards.append(
1557
+ f"<div class='card reveal' style='padding:12px;display:flex;align-items:center;justify-content:center;'>"
1558
+ f"<img loading='lazy' decoding='async' src='{esc(img)}' alt='{it_title}' style='max-height:46px;width:auto;border-radius:0;'>"
1559
+ f"</div>"
1560
+ )
1561
+ else:
1562
+ cards.append(
1563
+ f"<div class='card reveal'><div style='font-size:1.35rem;font-weight:800;line-height:1.1;'>{it_title}</div>"
1564
+ f"<div class='mut' style='margin-top:8px;'>{it_text}</div></div>"
1565
+ )
1566
+
1567
+ use_cols = max(2, min(cols, 5))
1568
+ grid_html = (
1569
+ f'<div class="grid" style="grid-template-columns:repeat({use_cols}, minmax(0,1fr));">'
1570
+ + "\n".join(cards) + "</div>"
1571
+ ) if cards else ""
1572
+
1573
+ parts.append(f"""
1574
+ <section class="sec"{sec_id_attr}>
1575
+ <div class="wrap">
1576
+ {"<h2 class='reveal'>" + title + "</h2>" if title else ""}
1577
+ {"<p class='reveal' style='margin-bottom:14px;'>" + text + "</p>" if text else ""}
1578
+ {grid_html}
1579
+ </div>
1580
+ </section>
1581
+ """.strip())
1582
+ continue
1583
+
1584
+ # Default cards grid (features, gallery, process, integrations, team, timeline, richtext, cta etc.)
1585
+ cards = []
1586
+ for it in items:
1587
+ it_title = esc(it.get("title") or "")
1588
+ it_text = esc(it.get("text") or "")
1589
+ it_icon = icon(it.get("icon") or "")
1590
+ img = _strip(it.get("imageUrl"))
1591
+ img_html = f'<img loading="lazy" decoding="async" src="{esc(img)}" alt="{it_title}">' if img else ""
1592
+ cards.append(f"""
1593
+ <div class="card reveal">
1594
+ {img_html}
1595
+ <div style="display:flex; gap:10px; align-items:center; margin-top:{'10px' if img_html else '0'};">
1596
+ {it_icon}
1597
+ <h3 style="margin:0; font-size:1.05rem;">{it_title}</h3>
1598
+ </div>
1599
+ <p style="margin-top:8px;">{it_text}</p>
1600
+ </div>
1601
+ """.strip())
1602
+
1603
+ grid_html = (
1604
+ f'<div class="grid" style="grid-template-columns:repeat({cols}, minmax(0,1fr));">'
1605
+ + "\n".join(cards) + "</div>"
1606
+ ) if cards else ""
1607
+
1608
+ parts.append(f"""
1609
+ <section class="sec"{sec_id_attr}>
1610
+ <div class="wrap">
1611
+ <h2 class="reveal">{title}</h2>
1612
+ {"<p class='reveal' style='margin-bottom:14px;'>" + text + "</p>" if text else ""}
1613
+ {grid_html}
1614
+ </div>
1615
+ </section>
1616
+ """.strip())
1617
+
1618
+ parts.append(js)
1619
+ parts.append("</div>")
1620
+ return "\n\n".join(parts)
1621
+
1622
+ notes = []
1623
+
1624
+ tpl_spec = _select_template_spec(page_slug)
1625
+ plan = _make_page_plan(page_slug=page_slug, website_description=website_description, template_spec=tpl_spec)
1626
+
1627
+
1628
+ for attempt in range(max_retries + 1):
1629
+ try:
1630
+ _validate_plan_or_raise(plan)
1631
+ break
1632
+ except Exception as e:
1633
+ notes.append(f"plan_validation_failed: {e}")
1634
+ if attempt >= max_retries:
1635
+ raise
1636
+ plan = _repair_plan(plan=plan, error_msg=str(e), website_description=website_description)
1637
+
1638
+ # Fill images locally (Pixabay) to avoid broken links
1639
+ if pixabay_api_key:
1640
+ try:
1641
+ plan = fill_plan_images_from_pixabay(
1642
+ plan,
1643
+ api_key=pixabay_api_key,
1644
+ client_dir=client_dir,
1645
+ max_width=1920,
1646
+ max_downloads=max_images
1647
+ )
1648
+ except Exception as e:
1649
+ notes.append(f"pixabay_fill_failed: {e}")
1650
+
1651
+ # Normalise and validate against the layout contract (after images exist)
1652
+ plan = normalise_layout(
1653
+ plan,
1654
+ default_category=(plan.get("category") or "landing"),
1655
+ default_template_id=((plan.get("template") or {}).get("id") or "generic_v1"),
1656
+ default_template_version=((plan.get("template") or {}).get("version") or "1.0.0"),
1657
+ mode="prod",
1658
+ )
1659
+
1660
+ _ensure_hero_image(plan)
1661
+
1662
+ issues = validate_layout(plan)
1663
+ errors = [i for i in issues if i.level == "error"]
1664
+ if errors:
1665
+ msg = "layout_contract_validation_failed:\n" + "\n".join([f"{e.path}: {e.message}" for e in errors])
1666
+ notes.append(msg)
1667
+ raise RuntimeError(msg)
1668
+
1669
+ # Final sanity check: no placeholders left
1670
+ blob = json.dumps(plan, ensure_ascii=False)
1671
+ if _contains_placeholders(blob):
1672
+ raise RuntimeError("Refusing to publish: plan still contains placeholder-style text.")
1673
+
1674
+ html = compile_plan_to_html(plan)
1675
+ return {
1676
+ "slug": _slugify(plan.get("page") or page_slug),
1677
+ "plan": plan,
1678
+ "html": html,
1679
+ "notes": notes,
1680
+ }
627
1681
 
628
- # Extract raw content
629
- tasks = ml_response(user_prompt, system_prompt, llm_profile)
630
- return tasks
631
1682
 
632
1683
 
633
1684
  def text_formatter_agent(text):