PyPI - sweep-autocomplete - Versions diffs - 0.1.0__tar.gz - Mend

sweep-autocomplete 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

sweep_autocomplete-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,30 @@
+Metadata-Version: 2.4
+Name: sweep-autocomplete
+Version: 0.1.0
+Summary: Local next-edit autocomplete server powered by llama.cpp
+Author: Sweep AI
+License-Expression: Apache-2.0
+Project-URL: Homepage, https://github.com/sweepai/sweep-autocomplete
+Project-URL: Repository, https://github.com/sweepai/sweep-autocomplete
+Keywords: autocomplete,code-completion,llama,next-edit
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Software Development :: Libraries
+Requires-Python: >=3.10
+Requires-Dist: fastapi>=0.100.0
+Requires-Dist: uvicorn[standard]>=0.23.0
+Requires-Dist: hypercorn>=0.17.0
+Requires-Dist: python-multipart>=0.0.6
+Requires-Dist: loguru>=0.7.0
+Requires-Dist: requests>=2.31.0
+Requires-Dist: numpy>=1.24.0
+Requires-Dist: scipy>=1.11.0
+Requires-Dist: regex>=2023.0
+Requires-Dist: brotli>=1.1.0
+Requires-Dist: pydantic>=2.0.0
+Requires-Dist: llama-cpp-python>=0.2.0
+Requires-Dist: huggingface-hub>=0.20.0

sweep_autocomplete-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,48 @@
+[build-system]
+requires = ["setuptools>=68.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "sweep-autocomplete"
+version = "0.1.0"
+description = "Local next-edit autocomplete server powered by llama.cpp"
+requires-python = ">=3.10"
+license = "Apache-2.0"
+authors = [
+    { name = "Sweep AI" },
+]
+keywords = ["autocomplete", "code-completion", "llama", "next-edit"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Software Development :: Libraries",
+]
+dependencies = [
+    "fastapi>=0.100.0",
+    "uvicorn[standard]>=0.23.0",
+    "hypercorn>=0.17.0",
+    "python-multipart>=0.0.6",
+    "loguru>=0.7.0",
+    "requests>=2.31.0",
+    "numpy>=1.24.0",
+    "scipy>=1.11.0",
+    "regex>=2023.0",
+    "brotli>=1.1.0",
+    "pydantic>=2.0.0",
+    "llama-cpp-python>=0.2.0",
+    "huggingface-hub>=0.20.0",
+]
+[project.scripts]
+sweep-autocomplete = "sweep_autocomplete.cli:main"
+[project.urls]
+Homepage = "https://github.com/sweepai/sweep-autocomplete"
+Repository = "https://github.com/sweepai/sweep-autocomplete"
+[tool.setuptools.packages.find]
+include = ["sweep_autocomplete*"]

sweep_autocomplete-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

sweep_autocomplete-0.1.0/sweep_autocomplete/__init__.py ADDED Viewed

File without changes

sweep_autocomplete-0.1.0/sweep_autocomplete/app.py ADDED Viewed

@@ -0,0 +1,106 @@
+import json
+import time
+import traceback
+from fastapi import Body
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
+from sweep_autocomplete.autocomplete.next_edit_autocomplete import (
+    AutocompleteMetadata,
+    fetch_next_edits,
+)
+from sweep_autocomplete.dataclasses.file_chunk_data import (
+    EditorDiagnostic,
+    FileChunkData,
+    UserAction,
+)
+from sweep_autocomplete.utils.compression_middleware import RequestCompressionMiddleware
+from loguru import logger
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.add_middleware(RequestCompressionMiddleware)
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+@app.post("/backend/next_edit_autocomplete", include_in_schema=False)
+def next_edit_autocomplete(
+    file_path: str = Body(...),
+    file_contents: str = Body(...),
+    original_file_contents: str = Body(None),
+    recent_changes: str = Body(...),
+    cursor_position: int = Body(...),
+    file_chunks: list[FileChunkData] = Body([]),
+    retrieval_chunks: list[FileChunkData] = Body([]),
+    recent_user_actions: list[UserAction] = Body([]),
+    multiple_suggestions: bool = Body(False),
+    recent_changes_high_res: str = Body(default=""),
+    changes_above_cursor: bool = Body(default=True),
+    editor_diagnostics: list[EditorDiagnostic] = Body(default=[]),
+):
+    function_start_time = time.time()
+    def stream():
+        metadata: AutocompleteMetadata = AutocompleteMetadata()
+        try:
+            for result, completions, formatted_prompt, metadata in fetch_next_edits(
+                file_path=file_path,
+                file_contents=file_contents,
+                recent_changes=recent_changes,
+                cursor_position=cursor_position,
+                original_file_contents=original_file_contents,
+                file_chunks=file_chunks,
+                retrieval_chunks=retrieval_chunks,
+                recent_user_actions=recent_user_actions,
+                recent_changes_high_res=recent_changes_high_res,
+                changes_above_cursor=changes_above_cursor,
+                is_new_user=False,
+                editor_diagnostics=editor_diagnostics,
+            ):
+                data = {
+                    **result.__dict__,
+                    "elapsed_time_ms": int((time.time() - function_start_time) * 1000),
+                }
+                logger.debug(
+                    f"Next edit autocomplete took {data['elapsed_time_ms']}ms"
+                )
+                if multiple_suggestions:
+                    data["completions"] = [
+                        completion.__dict__ for completion in completions
+                    ]
+                yield json.dumps(data) + "\n"
+        except BaseException as e:
+            logger.error(f"Next edit autocomplete error: {str(e)}")
+            yield json.dumps(
+                {
+                    "status": "error",
+                    "error": f"Next edit autocomplete error: {str(e)}",
+                    "traceback": str(traceback.format_exc()),
+                }
+            )
+            if not isinstance(e, GeneratorExit):
+                raise e
+        finally:
+            end_time = time.time()
+            latency_ms = (end_time - function_start_time) * 1000
+            logger.debug(
+                f"Next edit autocomplete took {latency_ms:.2f}ms for finally block:{metadata.convert_to_string()}"
+            )
+    return StreamingResponse(stream(), media_type="application/x-ndjson")

sweep_autocomplete-0.1.0/sweep_autocomplete/autocomplete/__init__.py ADDED Viewed

File without changes

sweep_autocomplete-0.1.0/sweep_autocomplete/autocomplete/llm_local.py ADDED Viewed

@@ -0,0 +1,92 @@
+import threading
+import time
+from typing import Any
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
+from sweep_autocomplete.config import MODEL_REPO, MODEL_FILENAME
+from loguru import logger
+_model: Llama | None = None
+_model_lock = threading.Lock()
+_request_lock = threading.Lock()
+_latest_request_id = 0
+class RequestCancelled(Exception):
+    """Raised when a queued request is superseded by a newer one."""
+    pass
+def get_model() -> Llama:
+    global _model
+    if _model is None:
+        logger.info(f"Downloading model {MODEL_FILENAME} from {MODEL_REPO}")
+        model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
+        logger.info(f"Loading model from {model_path}")
+        _model = Llama(
+            model_path=model_path,
+            n_ctx=16384,
+            n_batch=4096,
+            n_gpu_layers=-1,
+            flash_attn=True,
+            draft_model=LlamaPromptLookupDecoding(num_pred_tokens=32),
+            logits_all=True,
+        )
+        logger.info("Model loaded successfully")
+    return _model
+def generate_completion(
+    prompt: str,
+    stop: list[str],
+    max_tokens: int,
+    temperature: float,
+    prefix: str = "",
+) -> tuple[str, int, list[Any], str | None]:
+    """Generate a completion using the local llama-cpp model.
+    Only the latest request will actually run inference. If a newer request
+    arrives while this one is waiting for the model lock, this request is
+    cancelled (raises RequestCancelled).
+    Returns (completion_text, elapsed_ms, logprobs, finish_reason)
+    matching the signature of fetch_next_edits_http.
+    """
+    global _latest_request_id
+    model = get_model()
+    full_prompt = prompt + prefix if prefix else prompt
+    # Claim a request ID — always monotonically increasing
+    with _request_lock:
+        _latest_request_id += 1
+        my_id = _latest_request_id
+    # Wait for the model. When we get the lock, check if we're still latest.
+    with _model_lock:
+        if my_id != _latest_request_id:
+            logger.info(f"Request {my_id} cancelled (latest is {_latest_request_id})")
+            raise RequestCancelled()
+        tokens = model.tokenize(full_prompt.encode("utf-8"))
+        logger.info(f"Prompt length: {len(full_prompt)} chars, {len(tokens)} tokens, n_ctx={model.n_ctx()}")
+        start = time.time()
+        result = model.create_completion(
+            prompt=full_prompt,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            stop=stop,
+        )
+        elapsed_ms = int((time.time() - start) * 1000)
+    text = result["choices"][0]["text"]
+    if prefix:
+        text = prefix + text
+    finish_reason = result["choices"][0].get("finish_reason")
+    return text, elapsed_ms, [], finish_reason