sweep-autocomplete 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. sweep_autocomplete-0.1.0/PKG-INFO +30 -0
  2. sweep_autocomplete-0.1.0/pyproject.toml +48 -0
  3. sweep_autocomplete-0.1.0/setup.cfg +4 -0
  4. sweep_autocomplete-0.1.0/sweep_autocomplete/__init__.py +0 -0
  5. sweep_autocomplete-0.1.0/sweep_autocomplete/app.py +106 -0
  6. sweep_autocomplete-0.1.0/sweep_autocomplete/autocomplete/__init__.py +0 -0
  7. sweep_autocomplete-0.1.0/sweep_autocomplete/autocomplete/llm_local.py +92 -0
  8. sweep_autocomplete-0.1.0/sweep_autocomplete/autocomplete/next_edit_autocomplete.py +2046 -0
  9. sweep_autocomplete-0.1.0/sweep_autocomplete/autocomplete/next_edit_autocomplete_retrieval.py +335 -0
  10. sweep_autocomplete-0.1.0/sweep_autocomplete/autocomplete/next_edit_autocomplete_service.py +8 -0
  11. sweep_autocomplete-0.1.0/sweep_autocomplete/autocomplete/next_edit_autocomplete_utils.py +631 -0
  12. sweep_autocomplete-0.1.0/sweep_autocomplete/cli.py +15 -0
  13. sweep_autocomplete-0.1.0/sweep_autocomplete/config.py +8 -0
  14. sweep_autocomplete-0.1.0/sweep_autocomplete/core/__init__.py +0 -0
  15. sweep_autocomplete-0.1.0/sweep_autocomplete/dataclasses/__init__.py +0 -0
  16. sweep_autocomplete-0.1.0/sweep_autocomplete/dataclasses/file_chunk_data.py +35 -0
  17. sweep_autocomplete-0.1.0/sweep_autocomplete/utils/__init__.py +0 -0
  18. sweep_autocomplete-0.1.0/sweep_autocomplete/utils/compression_middleware.py +74 -0
  19. sweep_autocomplete-0.1.0/sweep_autocomplete/utils/str_utils.py +40 -0
  20. sweep_autocomplete-0.1.0/sweep_autocomplete/utils/timer.py +59 -0
  21. sweep_autocomplete-0.1.0/sweep_autocomplete.egg-info/PKG-INFO +30 -0
  22. sweep_autocomplete-0.1.0/sweep_autocomplete.egg-info/SOURCES.txt +24 -0
  23. sweep_autocomplete-0.1.0/sweep_autocomplete.egg-info/dependency_links.txt +1 -0
  24. sweep_autocomplete-0.1.0/sweep_autocomplete.egg-info/entry_points.txt +2 -0
  25. sweep_autocomplete-0.1.0/sweep_autocomplete.egg-info/requires.txt +13 -0
  26. sweep_autocomplete-0.1.0/sweep_autocomplete.egg-info/top_level.txt +1 -0
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.4
2
+ Name: sweep-autocomplete
3
+ Version: 0.1.0
4
+ Summary: Local next-edit autocomplete server powered by llama.cpp
5
+ Author: Sweep AI
6
+ License-Expression: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/sweepai/sweep-autocomplete
8
+ Project-URL: Repository, https://github.com/sweepai/sweep-autocomplete
9
+ Keywords: autocomplete,code-completion,llama,next-edit
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Requires-Python: >=3.10
18
+ Requires-Dist: fastapi>=0.100.0
19
+ Requires-Dist: uvicorn[standard]>=0.23.0
20
+ Requires-Dist: hypercorn>=0.17.0
21
+ Requires-Dist: python-multipart>=0.0.6
22
+ Requires-Dist: loguru>=0.7.0
23
+ Requires-Dist: requests>=2.31.0
24
+ Requires-Dist: numpy>=1.24.0
25
+ Requires-Dist: scipy>=1.11.0
26
+ Requires-Dist: regex>=2023.0
27
+ Requires-Dist: brotli>=1.1.0
28
+ Requires-Dist: pydantic>=2.0.0
29
+ Requires-Dist: llama-cpp-python>=0.2.0
30
+ Requires-Dist: huggingface-hub>=0.20.0
@@ -0,0 +1,48 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "sweep-autocomplete"
7
+ version = "0.1.0"
8
+ description = "Local next-edit autocomplete server powered by llama.cpp"
9
+ requires-python = ">=3.10"
10
+ license = "Apache-2.0"
11
+ authors = [
12
+ { name = "Sweep AI" },
13
+ ]
14
+ keywords = ["autocomplete", "code-completion", "llama", "next-edit"]
15
+ classifiers = [
16
+ "Development Status :: 3 - Alpha",
17
+ "Intended Audience :: Developers",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Software Development :: Libraries",
23
+ ]
24
+ dependencies = [
25
+ "fastapi>=0.100.0",
26
+ "uvicorn[standard]>=0.23.0",
27
+ "hypercorn>=0.17.0",
28
+ "python-multipart>=0.0.6",
29
+ "loguru>=0.7.0",
30
+ "requests>=2.31.0",
31
+ "numpy>=1.24.0",
32
+ "scipy>=1.11.0",
33
+ "regex>=2023.0",
34
+ "brotli>=1.1.0",
35
+ "pydantic>=2.0.0",
36
+ "llama-cpp-python>=0.2.0",
37
+ "huggingface-hub>=0.20.0",
38
+ ]
39
+
40
+ [project.scripts]
41
+ sweep-autocomplete = "sweep_autocomplete.cli:main"
42
+
43
+ [project.urls]
44
+ Homepage = "https://github.com/sweepai/sweep-autocomplete"
45
+ Repository = "https://github.com/sweepai/sweep-autocomplete"
46
+
47
+ [tool.setuptools.packages.find]
48
+ include = ["sweep_autocomplete*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,106 @@
1
+ import json
2
+ import time
3
+ import traceback
4
+
5
+ from fastapi import Body
6
+ from fastapi import FastAPI
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from fastapi.responses import StreamingResponse
9
+
10
+ from sweep_autocomplete.autocomplete.next_edit_autocomplete import (
11
+ AutocompleteMetadata,
12
+ fetch_next_edits,
13
+ )
14
+ from sweep_autocomplete.dataclasses.file_chunk_data import (
15
+ EditorDiagnostic,
16
+ FileChunkData,
17
+ UserAction,
18
+ )
19
+ from sweep_autocomplete.utils.compression_middleware import RequestCompressionMiddleware
20
+ from loguru import logger
21
+
22
+ app = FastAPI()
23
+
24
+ app.add_middleware(
25
+ CORSMiddleware,
26
+ allow_origins=["*"],
27
+ allow_credentials=True,
28
+ allow_methods=["*"],
29
+ allow_headers=["*"],
30
+ )
31
+ app.add_middleware(RequestCompressionMiddleware)
32
+
33
+
34
+ @app.get("/health")
35
+ def health():
36
+ return {"status": "ok"}
37
+
38
+
39
+ @app.post("/backend/next_edit_autocomplete", include_in_schema=False)
40
+ def next_edit_autocomplete(
41
+ file_path: str = Body(...),
42
+ file_contents: str = Body(...),
43
+ original_file_contents: str = Body(None),
44
+ recent_changes: str = Body(...),
45
+ cursor_position: int = Body(...),
46
+ file_chunks: list[FileChunkData] = Body([]),
47
+ retrieval_chunks: list[FileChunkData] = Body([]),
48
+ recent_user_actions: list[UserAction] = Body([]),
49
+ multiple_suggestions: bool = Body(False),
50
+ recent_changes_high_res: str = Body(default=""),
51
+ changes_above_cursor: bool = Body(default=True),
52
+ editor_diagnostics: list[EditorDiagnostic] = Body(default=[]),
53
+ ):
54
+ function_start_time = time.time()
55
+
56
+ def stream():
57
+ metadata: AutocompleteMetadata = AutocompleteMetadata()
58
+
59
+ try:
60
+ for result, completions, formatted_prompt, metadata in fetch_next_edits(
61
+ file_path=file_path,
62
+ file_contents=file_contents,
63
+ recent_changes=recent_changes,
64
+ cursor_position=cursor_position,
65
+ original_file_contents=original_file_contents,
66
+ file_chunks=file_chunks,
67
+ retrieval_chunks=retrieval_chunks,
68
+ recent_user_actions=recent_user_actions,
69
+ recent_changes_high_res=recent_changes_high_res,
70
+ changes_above_cursor=changes_above_cursor,
71
+ is_new_user=False,
72
+ editor_diagnostics=editor_diagnostics,
73
+ ):
74
+ data = {
75
+ **result.__dict__,
76
+ "elapsed_time_ms": int((time.time() - function_start_time) * 1000),
77
+ }
78
+ logger.debug(
79
+ f"Next edit autocomplete took {data['elapsed_time_ms']}ms"
80
+ )
81
+
82
+ if multiple_suggestions:
83
+ data["completions"] = [
84
+ completion.__dict__ for completion in completions
85
+ ]
86
+ yield json.dumps(data) + "\n"
87
+
88
+ except BaseException as e:
89
+ logger.error(f"Next edit autocomplete error: {str(e)}")
90
+ yield json.dumps(
91
+ {
92
+ "status": "error",
93
+ "error": f"Next edit autocomplete error: {str(e)}",
94
+ "traceback": str(traceback.format_exc()),
95
+ }
96
+ )
97
+ if not isinstance(e, GeneratorExit):
98
+ raise e
99
+ finally:
100
+ end_time = time.time()
101
+ latency_ms = (end_time - function_start_time) * 1000
102
+ logger.debug(
103
+ f"Next edit autocomplete took {latency_ms:.2f}ms for finally block:{metadata.convert_to_string()}"
104
+ )
105
+
106
+ return StreamingResponse(stream(), media_type="application/x-ndjson")
@@ -0,0 +1,92 @@
1
+ import threading
2
+ import time
3
+ from typing import Any
4
+
5
+ from huggingface_hub import hf_hub_download
6
+ from llama_cpp import Llama
7
+ from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
8
+
9
+ from sweep_autocomplete.config import MODEL_REPO, MODEL_FILENAME
10
+ from loguru import logger
11
+
12
+ _model: Llama | None = None
13
+ _model_lock = threading.Lock()
14
+ _request_lock = threading.Lock()
15
+ _latest_request_id = 0
16
+
17
+
18
+ class RequestCancelled(Exception):
19
+ """Raised when a queued request is superseded by a newer one."""
20
+ pass
21
+
22
+
23
+ def get_model() -> Llama:
24
+ global _model
25
+ if _model is None:
26
+ logger.info(f"Downloading model {MODEL_FILENAME} from {MODEL_REPO}")
27
+ model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
28
+ logger.info(f"Loading model from {model_path}")
29
+ _model = Llama(
30
+ model_path=model_path,
31
+ n_ctx=16384,
32
+ n_batch=4096,
33
+ n_gpu_layers=-1,
34
+ flash_attn=True,
35
+ draft_model=LlamaPromptLookupDecoding(num_pred_tokens=32),
36
+ logits_all=True,
37
+ )
38
+ logger.info("Model loaded successfully")
39
+ return _model
40
+
41
+
42
+ def generate_completion(
43
+ prompt: str,
44
+ stop: list[str],
45
+ max_tokens: int,
46
+ temperature: float,
47
+ prefix: str = "",
48
+ ) -> tuple[str, int, list[Any], str | None]:
49
+ """Generate a completion using the local llama-cpp model.
50
+
51
+ Only the latest request will actually run inference. If a newer request
52
+ arrives while this one is waiting for the model lock, this request is
53
+ cancelled (raises RequestCancelled).
54
+
55
+ Returns (completion_text, elapsed_ms, logprobs, finish_reason)
56
+ matching the signature of fetch_next_edits_http.
57
+ """
58
+ global _latest_request_id
59
+
60
+ model = get_model()
61
+ full_prompt = prompt + prefix if prefix else prompt
62
+
63
+ # Claim a request ID — always monotonically increasing
64
+ with _request_lock:
65
+ _latest_request_id += 1
66
+ my_id = _latest_request_id
67
+
68
+ # Wait for the model. When we get the lock, check if we're still latest.
69
+ with _model_lock:
70
+ if my_id != _latest_request_id:
71
+ logger.info(f"Request {my_id} cancelled (latest is {_latest_request_id})")
72
+ raise RequestCancelled()
73
+
74
+ tokens = model.tokenize(full_prompt.encode("utf-8"))
75
+ logger.info(f"Prompt length: {len(full_prompt)} chars, {len(tokens)} tokens, n_ctx={model.n_ctx()}")
76
+
77
+ start = time.time()
78
+ result = model.create_completion(
79
+ prompt=full_prompt,
80
+ max_tokens=max_tokens,
81
+ temperature=temperature,
82
+ stop=stop,
83
+ )
84
+ elapsed_ms = int((time.time() - start) * 1000)
85
+
86
+ text = result["choices"][0]["text"]
87
+ if prefix:
88
+ text = prefix + text
89
+
90
+ finish_reason = result["choices"][0].get("finish_reason")
91
+
92
+ return text, elapsed_ms, [], finish_reason