PyPI - tarang - Versions diffs - 4.4.0__py3-none-any.whl - Mend

tarang 4.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

tarang/__init__.py +23 -0
tarang/cli.py +1168 -0
tarang/client/__init__.py +19 -0
tarang/client/api_client.py +701 -0
tarang/client/auth.py +178 -0
tarang/context/__init__.py +41 -0
tarang/context/bm25.py +218 -0
tarang/context/chunker.py +984 -0
tarang/context/graph.py +464 -0
tarang/context/indexer.py +514 -0
tarang/context/retriever.py +270 -0
tarang/context/skeleton.py +282 -0
tarang/context_collector.py +449 -0
tarang/executor/__init__.py +6 -0
tarang/executor/diff_apply.py +246 -0
tarang/executor/linter.py +184 -0
tarang/stream.py +1346 -0
tarang/ui/__init__.py +7 -0
tarang/ui/console.py +407 -0
tarang/ui/diff_viewer.py +146 -0
tarang/ui/formatter.py +1151 -0
tarang/ui/keyboard.py +197 -0
tarang/ws/__init__.py +14 -0
tarang/ws/client.py +464 -0
tarang/ws/executor.py +638 -0
tarang/ws/handlers.py +590 -0
tarang-4.4.0.dist-info/METADATA +102 -0
tarang-4.4.0.dist-info/RECORD +31 -0
tarang-4.4.0.dist-info/WHEEL +5 -0
tarang-4.4.0.dist-info/entry_points.txt +2 -0
tarang-4.4.0.dist-info/top_level.txt +1 -0

tarang/context_collector.py ADDED Viewed

@@ -0,0 +1,449 @@
+"""
+Context Collector - Gathers local project context for LLM.
+This module scans the project and collects relevant files based on:
+1. Project structure (file list)
+2. Instruction keywords (relevant files)
+3. Recently modified files
+The context is sent to the backend with the instruction,
+enabling the LLM to make informed decisions without
+bidirectional communication.
+"""
+from __future__ import annotations
+import fnmatch
+import os
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import List, Optional, Set
+@dataclass
+class FileContent:
+    """A file with its content."""
+    path: str
+    content: str
+    lines: int = 0
+@dataclass
+class ProjectContext:
+    """Context about the project."""
+    cwd: str
+    files: List[str] = field(default_factory=list)
+    relevant_files: List[FileContent] = field(default_factory=list)
+    _indexed_context: Optional[dict] = field(default=None, repr=False)
+    _folder_tree: Optional[str] = field(default=None, repr=False)
+    def to_dict(self) -> dict:
+        """Convert to dictionary for API."""
+        result = {
+            "cwd": self.cwd,
+            "files": self.files,
+            "relevant_files": [
+                {"path": f.path, "content": f.content, "lines": f.lines}
+                for f in self.relevant_files
+            ],
+        }
+        # Include folder tree for project structure understanding
+        if self._folder_tree:
+            result["folder_tree"] = self._folder_tree
+        # Include indexed context if available (BM25 + KG retrieval)
+        if self._indexed_context:
+            result["indexed"] = self._indexed_context
+        return result
+class ContextCollector:
+    """
+    Collects project context for LLM processing.
+    Usage:
+        collector = ContextCollector("/path/to/project")
+        context = collector.collect("add authentication")
+    """
+    # Files/directories to ignore
+    IGNORE_PATTERNS = {
+        # Version control
+        ".git", ".svn", ".hg",
+        # Dependencies
+        "node_modules", "venv", ".venv", "env", ".env",
+        "__pycache__", ".pytest_cache", ".mypy_cache",
+        "vendor", "packages",
+        # Build outputs
+        "dist", "build", ".next", ".nuxt", "out",
+        "target", "bin", "obj",
+        # IDE
+        ".idea", ".vscode", ".vs",
+        # Misc
+        ".tarang", ".tarang_backups",
+        "*.pyc", "*.pyo", "*.so", "*.dylib",
+        "*.egg-info", "*.egg",
+        ".DS_Store", "Thumbs.db",
+    }
+    # File extensions to read
+    CODE_EXTENSIONS = {
+        ".py", ".js", ".ts", ".jsx", ".tsx",
+        ".java", ".kt", ".scala",
+        ".go", ".rs", ".c", ".cpp", ".h", ".hpp",
+        ".rb", ".php", ".swift", ".m",
+        ".html", ".css", ".scss", ".sass", ".less",
+        ".json", ".yaml", ".yml", ".toml",
+        ".md", ".txt", ".rst",
+        ".sql", ".sh", ".bash", ".zsh",
+        ".vue", ".svelte",
+        ".xml", ".gradle",
+    }
+    # Max file size to read (100KB)
+    MAX_FILE_SIZE = 100 * 1024
+    # Max files to list
+    MAX_FILES = 500
+    # Max relevant files to include
+    MAX_RELEVANT_FILES = 15
+    # Max content per file
+    MAX_CONTENT_LINES = 300
+    # Config file extensions to auto-include from root (reveals project type)
+    CONFIG_EXTENSIONS = {
+        ".json", ".toml", ".yaml", ".yml", ".lock",
+        ".config.js", ".config.ts",
+    }
+    # Config filenames (no extension or special names)
+    CONFIG_NAMES = {
+        "Dockerfile", "Makefile", "Gemfile", "Procfile",
+        "requirements.txt", "setup.py", "setup.cfg",
+        ".gitignore", ".env.example",
+    }
+    # Skip these even if they match (too large or not useful)
+    SKIP_CONFIG_FILES = {
+        "package-lock.json", "yarn.lock", "pnpm-lock.yaml",
+        "poetry.lock", "Cargo.lock", "composer.lock",
+    }
+    def __init__(self, project_root: str):
+        self.project_root = Path(project_root).resolve()
+    def collect(self, instruction: str) -> ProjectContext:
+        """
+        Collect project context based on instruction.
+        Args:
+            instruction: User instruction to inform file selection
+        Returns:
+            ProjectContext with file list and relevant file contents
+        """
+        # Get all files
+        all_files = self._scan_files()
+        # Build folder structure tree (helps LLM understand project layout)
+        folder_tree = self._build_folder_tree(all_files)
+        # ALWAYS include project identity files first (reduces tool calls!)
+        identity_files = self._collect_identity_files()
+        # Find relevant files based on instruction
+        relevant_paths = self._find_relevant_files(instruction, all_files)
+        # For small projects, include all files if we didn't find specific matches
+        if len(all_files) <= 10 and len(relevant_paths) < 3:
+            # Small project - read all code files
+            relevant_paths = all_files
+        # Combine: identity files first, then instruction-relevant files
+        # (avoiding duplicates)
+        identity_paths = {f.path for f in identity_files}
+        additional_files = []
+        for path in relevant_paths:
+            if path not in identity_paths:
+                content = self._read_file(path)
+                if content:
+                    additional_files.append(content)
+                    if len(identity_files) + len(additional_files) >= self.MAX_RELEVANT_FILES:
+                        break
+        relevant_files = identity_files + additional_files
+        context = ProjectContext(
+            cwd=str(self.project_root),
+            files=all_files[:self.MAX_FILES],
+            relevant_files=relevant_files,
+        )
+        # Add folder tree as metadata
+        context._folder_tree = folder_tree
+        return context
+    def _collect_identity_files(self) -> List[FileContent]:
+        """
+        Collect root-level config files that reveal project type.
+        Reads actual files in root directory (not a hardcoded list).
+        Skips .md files, lock files, and other non-config files.
+        """
+        identity_files = []
+        # Scan root directory only (not recursive)
+        try:
+            for item in self.project_root.iterdir():
+                if not item.is_file():
+                    continue
+                filename = item.name
+                # Skip ignored files
+                if self._should_ignore(filename):
+                    continue
+                # Skip lock files and other large files
+                if filename in self.SKIP_CONFIG_FILES:
+                    continue
+                # Skip markdown files (user specified: non .md)
+                if filename.endswith(".md"):
+                    continue
+                # Include if it's a config file (by extension or name)
+                is_config = (
+                    item.suffix.lower() in self.CONFIG_EXTENSIONS
+                    or filename in self.CONFIG_NAMES
+                    or filename.startswith(".")  # dotfiles like .eslintrc
+                )
+                if is_config:
+                    content = self._read_file(filename)
+                    if content:
+                        identity_files.append(content)
+        except OSError:
+            pass
+        return identity_files
+    def _build_folder_tree(self, files: List[str], max_depth: int = 3) -> str:
+        """
+        Build a folder structure tree string.
+        This helps LLM understand project layout without calling list_files.
+        """
+        # Build directory structure
+        dirs: Set[str] = set()
+        root_files: List[str] = []
+        for f in files:
+            parts = Path(f).parts
+            if len(parts) == 1:
+                root_files.append(f)
+            else:
+                # Add all parent directories up to max_depth
+                for i in range(1, min(len(parts), max_depth + 1)):
+                    dirs.add("/".join(parts[:i]))
+        # Build tree string
+        lines = ["."]
+        # Root files first
+        for f in sorted(root_files)[:10]:
+            lines.append(f"├── {f}")
+        if len(root_files) > 10:
+            lines.append(f"├── ... ({len(root_files) - 10} more files)")
+        # Then directories
+        sorted_dirs = sorted(dirs)
+        for d in sorted_dirs[:20]:
+            depth = d.count("/")
+            indent = "│   " * depth
+            name = d.split("/")[-1]
+            lines.append(f"{indent}├── {name}/")
+        if len(sorted_dirs) > 20:
+            lines.append(f"... ({len(sorted_dirs) - 20} more directories)")
+        return "\n".join(lines)
+    def _scan_files(self) -> List[str]:
+        """Scan project for all files."""
+        files = []
+        for root, dirs, filenames in os.walk(self.project_root):
+            # Filter directories
+            dirs[:] = [d for d in dirs if not self._should_ignore(d)]
+            for filename in filenames:
+                if self._should_ignore(filename):
+                    continue
+                full_path = Path(root) / filename
+                try:
+                    rel_path = str(full_path.relative_to(self.project_root))
+                    files.append(rel_path)
+                except ValueError:
+                    continue
+                if len(files) >= self.MAX_FILES:
+                    break
+            if len(files) >= self.MAX_FILES:
+                break
+        return sorted(files)
+    def _should_ignore(self, name: str) -> bool:
+        """Check if file/directory should be ignored."""
+        for pattern in self.IGNORE_PATTERNS:
+            if fnmatch.fnmatch(name, pattern):
+                return True
+        return False
+    def _find_relevant_files(
+        self,
+        instruction: str,
+        all_files: List[str],
+    ) -> List[str]:
+        """Find files relevant to the instruction."""
+        relevant: Set[str] = set()
+        # Extract keywords from instruction
+        keywords = self._extract_keywords(instruction)
+        # Score files by relevance
+        scored_files = []
+        for file_path in all_files:
+            score = self._score_file(file_path, keywords, instruction)
+            if score > 0:
+                scored_files.append((file_path, score))
+        # Sort by score and return top files
+        scored_files.sort(key=lambda x: x[1], reverse=True)
+        return [f[0] for f in scored_files[:self.MAX_RELEVANT_FILES]]
+    def _extract_keywords(self, instruction: str) -> List[str]:
+        """Extract keywords from instruction."""
+        # Remove common words
+        stopwords = {
+            "the", "a", "an", "is", "are", "was", "were", "be", "been",
+            "have", "has", "had", "do", "does", "did", "will", "would",
+            "could", "should", "may", "might", "must", "can", "need",
+            "to", "of", "in", "for", "on", "with", "at", "by", "from",
+            "as", "into", "through", "during", "before", "after",
+            "and", "but", "if", "or", "because", "until", "while",
+            "this", "that", "these", "those", "i", "me", "my", "we",
+            "you", "your", "it", "its", "they", "them", "their",
+            "what", "which", "who", "how", "where", "when", "why",
+            "add", "create", "make", "build", "implement", "write",
+            "fix", "update", "change", "modify", "remove", "delete",
+            "please", "help", "want", "like", "using", "use",
+        }
+        # Split and filter
+        words = re.findall(r'\b\w+\b', instruction.lower())
+        keywords = [w for w in words if w not in stopwords and len(w) > 2]
+        return keywords
+    def _score_file(
+        self,
+        file_path: str,
+        keywords: List[str],
+        instruction: str,
+    ) -> int:
+        """Score a file's relevance."""
+        score = 0
+        file_lower = file_path.lower()
+        filename = Path(file_path).name.lower()
+        stem = Path(file_path).stem.lower()
+        # Check if file is explicitly mentioned
+        if filename in instruction.lower() or stem in instruction.lower():
+            score += 100
+        # Check file path for keywords
+        for keyword in keywords:
+            if keyword in file_lower:
+                score += 10
+            if keyword in filename:
+                score += 20
+            if keyword == stem:
+                score += 50
+        # Boost common entry points
+        entry_points = ["main", "app", "index", "server", "cli", "__init__"]
+        if stem in entry_points:
+            score += 5
+        # Boost by extension relevance
+        ext = Path(file_path).suffix.lower()
+        if ext in {".py", ".js", ".ts", ".tsx", ".jsx"}:
+            score += 2
+        if ext in {".json", ".yaml", ".yml", ".toml"}:
+            score += 1
+        return score
+    def _read_file(self, rel_path: str) -> Optional[FileContent]:
+        """Read file content."""
+        full_path = self.project_root / rel_path
+        # Check if readable
+        if not full_path.exists() or not full_path.is_file():
+            return None
+        # Check extension
+        if full_path.suffix.lower() not in self.CODE_EXTENSIONS:
+            return None
+        # Check size
+        try:
+            size = full_path.stat().st_size
+            if size > self.MAX_FILE_SIZE:
+                return None
+        except OSError:
+            return None
+        # Read content
+        try:
+            content = full_path.read_text(encoding="utf-8", errors="replace")
+            lines = content.splitlines()
+            # Truncate if too long
+            if len(lines) > self.MAX_CONTENT_LINES:
+                lines = lines[:self.MAX_CONTENT_LINES]
+                content = "\n".join(lines) + "\n... (truncated)"
+            return FileContent(
+                path=rel_path,
+                content=content,
+                lines=len(lines),
+            )
+        except Exception:
+            return None
+def collect_context(project_root: str, instruction: str) -> ProjectContext:
+    """
+    Convenience function to collect context.
+    Args:
+        project_root: Path to project
+        instruction: User instruction
+    Returns:
+        ProjectContext
+    """
+    collector = ContextCollector(project_root)
+    return collector.collect(instruction)

tarang/executor/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Tarang Executor - Local file and shell operations."""
+from tarang.executor.diff_apply import DiffApplicator, DiffResult
+from tarang.executor.linter import ShadowLinter, LintResult
+__all__ = ["DiffApplicator", "DiffResult", "ShadowLinter", "LintResult"]

tarang/executor/diff_apply.py ADDED Viewed

@@ -0,0 +1,246 @@
+"""
+Diff Applicator - Apply edits from backend to local files.
+Supports unified diffs, search/replace, and full content replacement.
+"""
+from __future__ import annotations
+import shutil
+import subprocess
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+@dataclass
+class DiffResult:
+    """Result of applying a diff."""
+    success: bool
+    path: str
+    error: Optional[str] = None
+    backup_path: Optional[str] = None
+class DiffApplicator:
+    """
+    Apply edits from backend to local files.
+    Supports:
+    - Unified diffs (via patch command)
+    - Search/replace edits
+    - Full content replacement
+    Includes backup/rollback for safety.
+    """
+    def __init__(self, project_root: Path):
+        self.project_root = project_root
+        self.backup_dir = project_root / ".tarang_backups"
+    def apply_diff(self, path: str, diff: str) -> DiffResult:
+        """
+        Apply a unified diff to a file.
+        Args:
+            path: File path relative to project root
+            diff: Unified diff content
+        Returns:
+            DiffResult with success/error info
+        """
+        file_path = self.project_root / path
+        # Create backup first
+        backup_path = self._create_backup(file_path)
+        try:
+            # Try using patch command
+            result = subprocess.run(
+                ["patch", "-u", str(file_path)],
+                input=diff.encode(),
+                capture_output=True,
+                timeout=30
+            )
+            if result.returncode != 0:
+                # Restore from backup
+                self._restore_backup(file_path, backup_path)
+                return DiffResult(
+                    success=False,
+                    path=path,
+                    error=result.stderr.decode() or "Patch failed",
+                )
+            return DiffResult(
+                success=True,
+                path=path,
+                backup_path=str(backup_path) if backup_path else None,
+            )
+        except FileNotFoundError:
+            # patch command not available, restore and fail
+            self._restore_backup(file_path, backup_path)
+            return DiffResult(
+                success=False,
+                path=path,
+                error="patch command not available",
+            )
+        except subprocess.TimeoutExpired:
+            self._restore_backup(file_path, backup_path)
+            return DiffResult(
+                success=False,
+                path=path,
+                error="Patch timed out",
+            )
+    def apply_search_replace(
+        self,
+        path: str,
+        search: str,
+        replace: str,
+    ) -> DiffResult:
+        """
+        Apply a search/replace edit.
+        Args:
+            path: File path relative to project root
+            search: Text to find
+            replace: Text to replace with
+        Returns:
+            DiffResult with success/error info
+        """
+        file_path = self.project_root / path
+        if not file_path.exists():
+            return DiffResult(
+                success=False,
+                path=path,
+                error=f"File not found: {path}",
+            )
+        try:
+            content = file_path.read_text()
+            if search not in content:
+                return DiffResult(
+                    success=False,
+                    path=path,
+                    error=f"Search text not found in {path}",
+                )
+            # Create backup
+            backup_path = self._create_backup(file_path)
+            # Apply replacement
+            new_content = content.replace(search, replace, 1)
+            file_path.write_text(new_content)
+            return DiffResult(
+                success=True,
+                path=path,
+                backup_path=str(backup_path) if backup_path else None,
+            )
+        except Exception as e:
+            return DiffResult(
+                success=False,
+                path=path,
+                error=str(e),
+            )
+    def apply_content(self, path: str, content: str) -> DiffResult:
+        """
+        Write full content to a file.
+        Args:
+            path: File path relative to project root
+            content: Full file content
+        Returns:
+            DiffResult with success/error info
+        """
+        file_path = self.project_root / path
+        try:
+            # Create backup if file exists
+            backup_path = self._create_backup(file_path) if file_path.exists() else None
+            # Ensure parent directory exists
+            file_path.parent.mkdir(parents=True, exist_ok=True)
+            # Write content
+            file_path.write_text(content)
+            return DiffResult(
+                success=True,
+                path=path,
+                backup_path=str(backup_path) if backup_path else None,
+            )
+        except Exception as e:
+            return DiffResult(
+                success=False,
+                path=path,
+                error=str(e),
+            )
+    def rollback(self, result: DiffResult) -> bool:
+        """
+        Rollback a change using backup.
+        Args:
+            result: DiffResult with backup_path
+        Returns:
+            True if rollback succeeded
+        """
+        if not result.backup_path:
+            return False
+        return self._restore_backup(
+            self.project_root / result.path,
+            Path(result.backup_path)
+        )
+    def cleanup_backups(self, max_age_hours: int = 24) -> int:
+        """
+        Clean up old backup files.
+        Args:
+            max_age_hours: Maximum age of backups to keep
+        Returns:
+            Number of files cleaned up
+        """
+        if not self.backup_dir.exists():
+            return 0
+        cleaned = 0
+        cutoff = time.time() - (max_age_hours * 3600)
+        for backup_file in self.backup_dir.glob("*.bak"):
+            if backup_file.stat().st_mtime < cutoff:
+                backup_file.unlink()
+                cleaned += 1
+        return cleaned
+    def _create_backup(self, file_path: Path) -> Optional[Path]:
+        """Create a backup of a file."""
+        if not file_path.exists():
+            return None
+        self.backup_dir.mkdir(exist_ok=True)
+        timestamp = int(time.time() * 1000)
+        backup_path = self.backup_dir / f"{file_path.name}.{timestamp}.bak"
+        shutil.copy2(file_path, backup_path)
+        return backup_path
+    def _restore_backup(self, file_path: Path, backup_path: Optional[Path]) -> bool:
+        """Restore a file from backup."""
+        if backup_path and backup_path.exists():
+            shutil.copy2(backup_path, file_path)
+            return True
+        return False