PyPI - televault - Versions diffs - 0.1.0__py3-none-any.whl - Mend

televault 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

televault/__init__.py +16 -0
televault/chunker.py +189 -0
televault/cli.py +445 -0
televault/compress.py +138 -0
televault/config.py +81 -0
televault/core.py +479 -0
televault/crypto.py +170 -0
televault/models.py +149 -0
televault/telegram.py +375 -0
televault-0.1.0.dist-info/METADATA +242 -0
televault-0.1.0.dist-info/RECORD +13 -0
televault-0.1.0.dist-info/WHEEL +4 -0
televault-0.1.0.dist-info/entry_points.txt +3 -0

televault/compress.py ADDED Viewed

@@ -0,0 +1,138 @@
+"""Compression utilities for TeleVault - zstd for speed and ratio."""
+import io
+from pathlib import Path
+from typing import BinaryIO
+import zstandard as zstd
+# Compression level: 3 is a good balance (default)
+# Level 1-3: fast, decent compression
+# Level 10-15: slower, better compression
+# Level 19-22: very slow, best compression
+DEFAULT_LEVEL = 3
+# File extensions that are already compressed (skip compression)
+INCOMPRESSIBLE_EXTENSIONS = {
+    # Images
+    ".jpg", ".jpeg", ".png", ".gif", ".webp", ".heic", ".heif", ".avif",
+    # Video
+    ".mp4", ".mkv", ".avi", ".mov", ".webm", ".m4v", ".wmv", ".flv",
+    # Audio
+    ".mp3", ".aac", ".ogg", ".opus", ".flac", ".m4a", ".wma",
+    # Archives
+    ".zip", ".gz", ".bz2", ".xz", ".7z", ".rar", ".zst", ".lz4", ".lzma",
+    # Documents (already compressed)
+    ".pdf", ".docx", ".xlsx", ".pptx", ".odt",
+    # Other
+    ".woff", ".woff2", ".br",
+}
+def should_compress(filename: str) -> bool:
+    """Check if file should be compressed based on extension."""
+    suffix = Path(filename).suffix.lower()
+    return suffix not in INCOMPRESSIBLE_EXTENSIONS
+def compress_data(data: bytes, level: int = DEFAULT_LEVEL) -> bytes:
+    """Compress data using zstd."""
+    cctx = zstd.ZstdCompressor(level=level)
+    return cctx.compress(data)
+def decompress_data(data: bytes, max_output_size: int = 0) -> bytes:
+    """Decompress zstd data."""
+    dctx = zstd.ZstdDecompressor()
+    # max_output_size=0 means use content size from frame header
+    # For streaming data without content size, caller must provide max_output_size
+    return dctx.decompress(data, max_output_size=max_output_size)
+def compress_file(input_path: str | Path, output_path: str | Path, level: int = DEFAULT_LEVEL) -> float:
+    """
+    Compress a file using zstd.
+    Returns compression ratio (compressed_size / original_size).
+    """
+    cctx = zstd.ZstdCompressor(level=level)
+    with open(input_path, "rb") as fin, open(output_path, "wb") as fout:
+        cctx.copy_stream(fin, fout)
+    original_size = Path(input_path).stat().st_size
+    compressed_size = Path(output_path).stat().st_size
+    return compressed_size / original_size if original_size > 0 else 1.0
+def decompress_file(input_path: str | Path, output_path: str | Path) -> None:
+    """Decompress a zstd file."""
+    dctx = zstd.ZstdDecompressor()
+    with open(input_path, "rb") as fin, open(output_path, "wb") as fout:
+        dctx.copy_stream(fin, fout)
+class StreamingCompressor:
+    """Streaming compressor for pipeline integration."""
+    def __init__(self, level: int = DEFAULT_LEVEL):
+        self.cctx = zstd.ZstdCompressor(level=level)
+        self.compressor = self.cctx.compressobj()
+        self.total_in = 0
+        self.total_out = 0
+    def compress(self, data: bytes) -> bytes:
+        """Compress a chunk of data."""
+        self.total_in += len(data)
+        compressed = self.compressor.compress(data)
+        self.total_out += len(compressed)
+        return compressed
+    def flush(self) -> bytes:
+        """Flush remaining data and finalize compression."""
+        final = self.compressor.flush()
+        self.total_out += len(final)
+        return final
+    @property
+    def ratio(self) -> float:
+        """Current compression ratio."""
+        if self.total_in == 0:
+            return 1.0
+        return self.total_out / self.total_in
+class StreamingDecompressor:
+    """Streaming decompressor for pipeline integration."""
+    def __init__(self):
+        self.dctx = zstd.ZstdDecompressor()
+        self.decompressor = self.dctx.decompressobj()
+    def decompress(self, data: bytes) -> bytes:
+        """Decompress a chunk of data."""
+        return self.decompressor.decompress(data)
+def estimate_compressed_size(original_size: int, filename: str) -> int:
+    """
+    Estimate compressed size based on file type.
+    Returns estimated size in bytes.
+    """
+    if not should_compress(filename):
+        return original_size
+    # Typical compression ratios by type
+    suffix = Path(filename).suffix.lower()
+    if suffix in {".txt", ".log", ".csv", ".json", ".xml", ".html", ".md"}:
+        return int(original_size * 0.2)  # Text compresses well
+    elif suffix in {".sql", ".py", ".js", ".ts", ".go", ".rs", ".c", ".cpp", ".h"}:
+        return int(original_size * 0.25)  # Code compresses well
+    elif suffix in {".tar", ".iso", ".img"}:
+        return int(original_size * 0.6)  # Containers vary
+    else:
+        return int(original_size * 0.5)  # Default estimate

televault/config.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Configuration management for TeleVault."""
+import json
+from pathlib import Path
+from dataclasses import dataclass, field, asdict
+from typing import Optional
+import os
+def get_config_dir() -> Path:
+    """Get TeleVault config directory."""
+    if os.name == "nt":  # Windows
+        base = Path(os.environ.get("APPDATA", "~"))
+    else:  # Unix
+        base = Path(os.environ.get("XDG_CONFIG_HOME", "~/.config"))
+    config_dir = base.expanduser() / "televault"
+    config_dir.mkdir(parents=True, exist_ok=True)
+    return config_dir
+def get_data_dir() -> Path:
+    """Get TeleVault data directory (for temp files, cache)."""
+    if os.name == "nt":
+        base = Path(os.environ.get("LOCALAPPDATA", "~"))
+    else:
+        base = Path(os.environ.get("XDG_DATA_HOME", "~/.local/share"))
+    data_dir = base.expanduser() / "televault"
+    data_dir.mkdir(parents=True, exist_ok=True)
+    return data_dir
+@dataclass
+class Config:
+    """TeleVault configuration."""
+    # Telegram settings
+    channel_id: Optional[int] = None
+    # Chunking
+    chunk_size: int = 100 * 1024 * 1024  # 100MB
+    # Processing options
+    compression: bool = True
+    encryption: bool = True
+    # Concurrency
+    parallel_uploads: int = 3
+    parallel_downloads: int = 5
+    # Retry settings
+    max_retries: int = 3
+    retry_delay: float = 1.0
+    def save(self) -> None:
+        """Save config to file."""
+        config_path = get_config_dir() / "config.json"
+        with open(config_path, "w") as f:
+            json.dump(asdict(self), f, indent=2)
+    @classmethod
+    def load(cls) -> "Config":
+        """Load config from file."""
+        config_path = get_config_dir() / "config.json"
+        if not config_path.exists():
+            return cls()
+        with open(config_path) as f:
+            data = json.load(f)
+        return cls(**data)
+    @classmethod
+    def load_or_create(cls) -> "Config":
+        """Load config or create default."""
+        config = cls.load()
+        if not (get_config_dir() / "config.json").exists():
+            config.save()
+        return config

televault/core.py ADDED Viewed

@@ -0,0 +1,479 @@
+"""Core TeleVault operations - upload, download, list."""
+import asyncio
+import os
+import tempfile
+from pathlib import Path
+from typing import Optional, Callable
+from dataclasses import dataclass
+import hashlib
+from .config import Config, get_data_dir
+from .models import FileMetadata, ChunkInfo, VaultIndex
+from .telegram import TelegramVault, TelegramConfig
+from .chunker import iter_chunks, hash_file, hash_data, ChunkWriter, DEFAULT_CHUNK_SIZE
+from .crypto import encrypt_chunk, decrypt_chunk
+from .compress import compress_data, decompress_data, should_compress
+def generate_file_id(name: str, size: int) -> str:
+    """Generate short unique file ID."""
+    data = f"{name}:{size}:{os.urandom(8).hex()}"
+    return hashlib.sha256(data.encode()).hexdigest()[:12]
+@dataclass
+class UploadProgress:
+    """Progress information for upload."""
+    file_name: str
+    total_size: int
+    uploaded_size: int
+    total_chunks: int
+    uploaded_chunks: int
+    current_chunk: int
+    @property
+    def percent(self) -> float:
+        if self.total_chunks == 0:
+            return 100.0
+        return (self.uploaded_chunks / self.total_chunks) * 100
+@dataclass
+class DownloadProgress:
+    """Progress information for download."""
+    file_name: str
+    total_size: int
+    downloaded_size: int
+    total_chunks: int
+    downloaded_chunks: int
+    current_chunk: int
+    @property
+    def percent(self) -> float:
+        if self.total_chunks == 0:
+            return 100.0
+        return (self.downloaded_chunks / self.total_chunks) * 100
+ProgressCallback = Callable[[UploadProgress | DownloadProgress], None]
+class TeleVault:
+    """
+    Main TeleVault interface.
+    Handles file upload, download, listing with compression and encryption.
+    """
+    def __init__(
+        self,
+        config: Optional[Config] = None,
+        telegram_config: Optional[TelegramConfig] = None,
+        password: Optional[str] = None,
+    ):
+        self.config = config or Config.load_or_create()
+        self.telegram = TelegramVault(telegram_config)
+        self.password = password
+        self._connected = False
+    async def connect(self, skip_channel: bool = False) -> None:
+        """Connect to Telegram."""
+        await self.telegram.connect()
+        if not skip_channel and self.config.channel_id:
+            # Only set channel if we're already authenticated
+            if await self.telegram._client.is_user_authorized():
+                await self.telegram.set_channel(self.config.channel_id)
+        self._connected = True
+    async def disconnect(self) -> None:
+        """Disconnect from Telegram."""
+        await self.telegram.disconnect()
+        self._connected = False
+    async def login(self, phone: Optional[str] = None) -> str:
+        """Interactive login flow."""
+        return await self.telegram.login(phone)
+    async def setup_channel(self, channel_id: Optional[int] = None) -> int:
+        """Set up storage channel."""
+        if channel_id:
+            await self.telegram.set_channel(channel_id)
+            self.config.channel_id = channel_id
+        else:
+            channel_id = await self.telegram.create_channel()
+            self.config.channel_id = channel_id
+        self.config.save()
+        return channel_id
+    async def upload(
+        self,
+        file_path: str | Path,
+        password: Optional[str] = None,
+        progress_callback: Optional[ProgressCallback] = None,
+        preserve_path: bool = False,
+    ) -> FileMetadata:
+        """
+        Upload a file to TeleVault with parallel chunk uploads.
+        Args:
+            file_path: Path to file to upload
+            password: Encryption password (uses instance password if not provided)
+            progress_callback: Optional progress callback
+            preserve_path: If True, include full path in filename (for directory uploads)
+        Returns:
+            FileMetadata of uploaded file
+        """
+        if not self._connected:
+            raise RuntimeError("Not connected. Call connect() first.")
+        file_path = Path(file_path)
+        if not file_path.exists():
+            raise FileNotFoundError(f"File not found: {file_path}")
+        password = password or self.password
+        # Get file info
+        file_name = file_path.name
+        if preserve_path:
+            # Use full path relative to upload root (replace / with _ for safety)
+            # For now, just use the full path
+            file_name = str(file_path)
+            file_name = file_name.replace("/", "_")
+        file_size = file_path.stat().st_size
+        file_hash = hash_file(file_path)
+        file_id = generate_file_id(file_name, file_size)
+        # Count chunks
+        chunk_size = self.config.chunk_size
+        total_chunks = (file_size + chunk_size - 1) // chunk_size
+        if total_chunks == 0:
+            total_chunks = 1  # Empty file = 1 empty chunk
+        # Create initial metadata
+        metadata = FileMetadata(
+            id=file_id,
+            name=file_name,
+            size=file_size,
+            hash=file_hash,
+            encrypted=self.config.encryption and password is not None,
+            compressed=self.config.compression and should_compress(file_name),
+        )
+        # Upload metadata message first
+        metadata_msg_id = await self.telegram.upload_metadata(metadata)
+        metadata.message_id = metadata_msg_id
+        # Prepare chunks for parallel upload
+        chunk_results: dict[int, ChunkInfo] = {}
+        uploaded_count = 0
+        lock = asyncio.Lock()
+        async def upload_single_chunk(chunk):
+            nonlocal uploaded_count
+            data = chunk.data
+            # Compress if enabled
+            if metadata.compressed:
+                data = compress_data(data)
+            # Encrypt if enabled
+            if metadata.encrypted and password:
+                data = encrypt_chunk(data, password)
+            # Upload chunk
+            chunk_msg_id = await self.telegram.upload_chunk(
+                data=data,
+                filename=f"{file_id}_{chunk.index:04d}.chunk",
+                reply_to=metadata_msg_id,
+            )
+            # Track chunk info
+            chunk_info = ChunkInfo(
+                index=chunk.index,
+                message_id=chunk_msg_id,
+                size=len(data),
+                hash=hash_data(data),
+            )
+            async with lock:
+                chunk_results[chunk.index] = chunk_info
+                uploaded_count += 1
+                # Progress callback
+                if progress_callback:
+                    progress_callback(UploadProgress(
+                        file_name=file_name,
+                        total_size=file_size,
+                        uploaded_size=int(file_size * uploaded_count / total_chunks),
+                        total_chunks=total_chunks,
+                        uploaded_chunks=uploaded_count,
+                        current_chunk=chunk.index,
+                    ))
+        # Upload chunks in parallel (limited concurrency)
+        semaphore = asyncio.Semaphore(self.config.parallel_uploads)
+        async def upload_with_limit(chunk):
+            async with semaphore:
+                await upload_single_chunk(chunk)
+        # Collect all chunks first for parallel processing
+        chunks = list(iter_chunks(file_path, chunk_size))
+        if chunks:
+            await asyncio.gather(*[upload_with_limit(c) for c in chunks])
+        # Sort chunks by index
+        metadata.chunks = [chunk_results[i] for i in sorted(chunk_results.keys())]
+        # Update metadata with chunk info
+        await self.telegram.update_metadata(metadata_msg_id, metadata)
+        # Update index
+        index = await self.telegram.get_index()
+        index.add_file(file_id, metadata_msg_id)
+        await self.telegram.save_index(index)
+        return metadata
+        # Upload metadata message first
+        metadata_msg_id = await self.telegram.upload_metadata(metadata)
+        metadata.message_id = metadata_msg_id
+        # Prepare chunks for parallel upload
+        chunk_results: dict[int, ChunkInfo] = {}
+        uploaded_count = 0
+        lock = asyncio.Lock()
+        async def upload_single_chunk(chunk):
+            nonlocal uploaded_count
+            data = chunk.data
+            # Compress if enabled
+            if metadata.compressed:
+                data = compress_data(data)
+            # Encrypt if enabled
+            if metadata.encrypted and password:
+                data = encrypt_chunk(data, password)
+            # Upload chunk
+            chunk_msg_id = await self.telegram.upload_chunk(
+                data=data,
+                filename=f"{file_id}_{chunk.index:04d}.chunk",
+                reply_to=metadata_msg_id,
+            )
+            # Track chunk info
+            chunk_info = ChunkInfo(
+                index=chunk.index,
+                message_id=chunk_msg_id,
+                size=len(data),
+                hash=hash_data(data),
+            )
+            async with lock:
+                chunk_results[chunk.index] = chunk_info
+                uploaded_count += 1
+                # Progress callback
+                if progress_callback:
+                    progress_callback(UploadProgress(
+                        file_name=file_name,
+                        total_size=file_size,
+                        uploaded_size=int(file_size * uploaded_count / total_chunks),
+                        total_chunks=total_chunks,
+                        uploaded_chunks=uploaded_count,
+                        current_chunk=chunk.index,
+                    ))
+        # Upload chunks in parallel (limited concurrency)
+        semaphore = asyncio.Semaphore(self.config.parallel_uploads)
+        async def upload_with_limit(chunk):
+            async with semaphore:
+                await upload_single_chunk(chunk)
+        # Collect all chunks first for parallel processing
+        chunks = list(iter_chunks(file_path, chunk_size))
+        if chunks:
+            await asyncio.gather(*[upload_with_limit(c) for c in chunks])
+        # Sort chunks by index
+        metadata.chunks = [chunk_results[i] for i in sorted(chunk_results.keys())]
+        # Update metadata with chunk info
+        await self.telegram.update_metadata(metadata_msg_id, metadata)
+        # Update index
+        index = await self.telegram.get_index()
+        index.add_file(file_id, metadata_msg_id)
+        await self.telegram.save_index(index)
+        return metadata
+    async def download(
+        self,
+        file_id_or_name: str,
+        output_path: Optional[str | Path] = None,
+        password: Optional[str] = None,
+        progress_callback: Optional[ProgressCallback] = None,
+    ) -> Path:
+        """
+        Download a file from TeleVault.
+        Args:
+            file_id_or_name: File ID or name to download
+            output_path: Output path (uses original filename in current dir if not provided)
+            password: Decryption password
+            progress_callback: Optional progress callback
+        Returns:
+            Path to downloaded file
+        """
+        if not self._connected:
+            raise RuntimeError("Not connected. Call connect() first.")
+        password = password or self.password
+        # Find file
+        index = await self.telegram.get_index()
+        # Try as file ID first
+        if file_id_or_name in index.files:
+            metadata_msg_id = index.files[file_id_or_name]
+        else:
+            # Search by name
+            files = await self.telegram.list_files()
+            matches = [f for f in files if f.name == file_id_or_name or file_id_or_name in f.name]
+            if not matches:
+                raise FileNotFoundError(f"File not found: {file_id_or_name}")
+            if len(matches) > 1:
+                raise ValueError(f"Multiple files match '{file_id_or_name}': {[f.name for f in matches]}")
+            metadata_msg_id = matches[0].message_id
+        # Get metadata
+        metadata = await self.telegram.get_metadata(metadata_msg_id)
+        # Determine output path
+        if output_path:
+            output_path = Path(output_path)
+        else:
+            output_path = Path.cwd() / metadata.name
+        # Create chunk writer
+        writer = ChunkWriter(output_path, metadata.size, self.config.chunk_size)
+        downloaded_size = 0
+        # Download chunks in order
+        for chunk_info in sorted(metadata.chunks, key=lambda c: c.index):
+            # Download chunk
+            data = await self.telegram.download_chunk(chunk_info.message_id)
+            # Verify hash
+            if hash_data(data) != chunk_info.hash:
+                raise ValueError(f"Chunk {chunk_info.index} hash mismatch - data corrupted")
+            # Decrypt if needed
+            if metadata.encrypted:
+                if not password:
+                    raise ValueError("File is encrypted but no password provided")
+                data = decrypt_chunk(data, password)
+            # Decompress if needed
+            if metadata.compressed:
+                data = decompress_data(data)
+            # Write chunk
+            from .chunker import Chunk
+            writer.write_chunk(Chunk(
+                index=chunk_info.index,
+                data=data,
+                hash="",  # Already verified
+                size=len(data),
+            ))
+            downloaded_size += len(data)
+            # Progress callback
+            if progress_callback:
+                progress_callback(DownloadProgress(
+                    file_name=metadata.name,
+                    total_size=metadata.size,
+                    downloaded_size=downloaded_size,
+                    total_chunks=len(metadata.chunks),
+                    downloaded_chunks=chunk_info.index + 1,
+                    current_chunk=chunk_info.index,
+                ))
+        # Verify final hash
+        if hash_file(output_path) != metadata.hash:
+            output_path.unlink()  # Delete corrupted file
+            raise ValueError("Downloaded file hash mismatch - file corrupted")
+        return output_path
+    async def list_files(self) -> list[FileMetadata]:
+        """List all files in the vault."""
+        if not self._connected:
+            raise RuntimeError("Not connected. Call connect() first.")
+        return await self.telegram.list_files()
+    async def search(self, query: str) -> list[FileMetadata]:
+        """Search files by name."""
+        if not self._connected:
+            raise RuntimeError("Not connected. Call connect() first.")
+        return await self.telegram.search_files(query)
+    async def delete(self, file_id_or_name: str) -> bool:
+        """Delete a file."""
+        if not self._connected:
+            raise RuntimeError("Not connected. Call connect() first.")
+        index = await self.telegram.get_index()
+        # Try as file ID first
+        if file_id_or_name in index.files:
+            return await self.telegram.delete_file(file_id_or_name)
+        # Search by name
+        files = await self.telegram.list_files()
+        matches = [f for f in files if f.name == file_id_or_name]
+        if not matches:
+            return False
+        if len(matches) > 1:
+            raise ValueError(f"Multiple files match '{file_id_or_name}'")
+        return await self.telegram.delete_file(matches[0].id)
+    async def get_status(self) -> dict:
+        """Get vault status."""
+        if not self._connected:
+            raise RuntimeError("Not connected. Call connect() first.")
+        files = await self.list_files()
+        total_size = sum(f.size for f in files)
+        stored_size = sum(f.total_stored_size for f in files)
+        return {
+            "channel_id": self.config.channel_id,
+            "file_count": len(files),
+            "total_size": total_size,
+            "stored_size": stored_size,
+            "compression_ratio": stored_size / total_size if total_size > 0 else 1.0,
+        }