vacancies-parser-kit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ Metadata-Version: 2.4
2
+ Name: vacancies-parser-kit
3
+ Version: 0.1.0
4
+ Summary: Multi-source vacancy parser with pluggable storage backends
5
+ License: MIT
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: click>=8.1
8
+ Requires-Dist: pyyaml>=6.0
9
+ Requires-Dist: pydantic>=2.0
10
+ Requires-Dist: jinja2>=3.1
11
+ Requires-Dist: rich>=13.0
12
+ Provides-Extra: hh
13
+ Requires-Dist: httpx>=0.27; extra == "hh"
14
+ Provides-Extra: telegram
15
+ Requires-Dist: telethon>=1.36; extra == "telegram"
16
+ Provides-Extra: linkedin
17
+ Requires-Dist: httpx>=0.27; extra == "linkedin"
18
+ Provides-Extra: clickhouse
19
+ Requires-Dist: clickhouse-connect>=0.7; extra == "clickhouse"
20
+ Provides-Extra: postgres
21
+ Requires-Dist: asyncpg>=0.29; extra == "postgres"
22
+ Requires-Dist: psycopg[binary]>=3.1; extra == "postgres"
23
+ Provides-Extra: all
24
+ Requires-Dist: vpr[clickhouse,hh,linkedin,postgres,telegram]; extra == "all"
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=8.0; extra == "dev"
27
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
28
+ Requires-Dist: ruff>=0.4; extra == "dev"
@@ -0,0 +1,36 @@
1
+ vpr/__init__.py,sha256=-8BZGB5_iPPXXECpGVGmU9uYVVlft3YVyeULLyn_gbQ,66
2
+ vpr/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ vpr/cli/main.py,sha256=Oxruo2Lgez8aL8phbiqSK9u7-mxbTYW_0Puu0fMcwX8,438
4
+ vpr/cli/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ vpr/cli/commands/init.py,sha256=df2mxEm7DvQVj95gVTH8r94gaXc9lmqNq2AxTMhMMnM,898
6
+ vpr/cli/commands/list.py,sha256=rNgp6r0Y89FzBX14dM7FH2Oqv4-YvntrbknqXPn80oU,1648
7
+ vpr/cli/commands/run.py,sha256=6axJ2LkOpsqSAylCn4OV4qvf8xy-0shLJ7cAt_vm0kw,3424
8
+ vpr/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ vpr/config/profile.py,sha256=YdTQ7-oSX_zaA6M-FvrFY_YkXA_qLUKUTUeE_3FxTyg,1299
10
+ vpr/config/project.py,sha256=zaNcWI8IQun1SDEpbsICSFcTLIo3WkNUHmgwsw-vze0,1130
11
+ vpr/config/source_config.py,sha256=yh2kxSXLKRB6hPlrWe9mKRncsq5D3HKhBsvtWm-iDdg,1278
12
+ vpr/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ vpr/core/models.py,sha256=g8y32wqDOwDM9VEKEKs4cgUzFFie_MYB0MavM7kHmd0,1663
14
+ vpr/core/pipeline.py,sha256=Kfb6ruXIGCpb-75kyUrqXjqMx8ow8CuDspz_8Uf5Gz8,1990
15
+ vpr/core/selector.py,sha256=qeH45-y08eR2q0aoDSUeAF2A-j9GksScxnKYAczL5xc,1068
16
+ vpr/sources/__init__.py,sha256=JIbkAFKHpwgVTiTgN8uC7JPAUUilVDNSkl952Kf4UEg,119
17
+ vpr/sources/base.py,sha256=A7gqRKYTBoMgFvVBqRqpWRKVN1zM2rAsSO_A7N-nGAU,802
18
+ vpr/sources/headhunter.py,sha256=cRip32dXf3qVKe3aqbG1kFj9CqWiQL5LovjzMMnTguA,9606
19
+ vpr/sources/hh_dictionaries.py,sha256=wrCPGM8aA9IpOtjCTV1u9PHSKjDRRhciiVzHp3qvHdM,2405
20
+ vpr/sources/registry.py,sha256=oKusJkHrSlgT5aym9eDcbHv9W9q65HcR55ZWuljxZE0,757
21
+ vpr/templates/__init__.py,sha256=HoBzfwDVFdQ3x3cBTBsWjmC3-vfV2aN92rAGklgICnU,98
22
+ vpr/templates/scaffold.py,sha256=Z2vyN9s5Nv0NaIHxj9sRIQpwnySYww7jEB7r7hNfyHA,2856
23
+ vpr/transforms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
+ vpr/transforms/normalize.py,sha256=licHq8Hb74Pyt0Je2HAg4qnM_4yw2b59jFr0zyC5rts,519
25
+ vpr/writers/__init__.py,sha256=GxHSSrh1OUAJ5NMpSCOTWC-ldSHAYv0LWvlUy42xFXA,398
26
+ vpr/writers/base.py,sha256=PtdQzlYvOc49NoiGPUP8jecvcYit_zUR9wQQRgGtOCs,743
27
+ vpr/writers/clickhouse.py,sha256=E8gJhC7OiSvUcs40XuOyDSN-g5krhoeqRj6PCGG89Rc,3471
28
+ vpr/writers/jsonl.py,sha256=F6G3Lb94juXXqOOenxoxYLX7EohNiIoqTn_-7noAjQ4,821
29
+ vpr/writers/postgres.py,sha256=WscPr5d4uINUVvfAxfxETfpVcB3iO1Ymxbb3GHBLRgk,4132
30
+ vpr/writers/registry.py,sha256=T0MtCNvgsyxryIiIqFbcfOLskM_hoiIseD2l1jGWAS4,757
31
+ vpr/writers/sqlite.py,sha256=rwxVvAX6iF75v3L7XadrPaYvHCFboiENJD23JE6VmfE,3673
32
+ vacancies_parser_kit-0.1.0.dist-info/METADATA,sha256=WtYAcSdMfbYnNDHBhzcIPelW_WZ7-REYk9ilcjKs7i8,989
33
+ vacancies_parser_kit-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
34
+ vacancies_parser_kit-0.1.0.dist-info/entry_points.txt,sha256=egwVIIn97VkYCJ9ZgKTyk_8nIeuTCQe-tSEIKywo1Y8,41
35
+ vacancies_parser_kit-0.1.0.dist-info/top_level.txt,sha256=YtP4jygNOfY9lRC2fgxBLU-n1WVbODLp75tLRznx_0M,4
36
+ vacancies_parser_kit-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ vpr = vpr.cli.main:cli
vpr/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """vpr — multi-source vacancy parser."""
2
+
3
+ __version__ = "0.1.0"
vpr/cli/__init__.py ADDED
File without changes
File without changes
@@ -0,0 +1,29 @@
1
+ """vpr init — scaffold a new project."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import click
8
+ from rich.console import Console
9
+
10
+ from vpr.templates import render_project_scaffold
11
+
12
+ console = Console()
13
+
14
+
15
+ @click.command("init")
16
+ @click.argument("project_name")
17
+ def init_cmd(project_name: str) -> None:
18
+ """Create a new vpr project."""
19
+ target = Path.cwd() / project_name
20
+ if target.exists():
21
+ raise click.ClickException(f"Directory '{project_name}' already exists")
22
+
23
+ render_project_scaffold(target, project_name)
24
+ console.print(f"[green]Project '{project_name}' created at {target}[/green]")
25
+ console.print("Next steps:")
26
+ console.print(f" cd {project_name}")
27
+ console.print(" # edit profiles.yml with your database credentials")
28
+ console.print(" # add source definitions to sources/")
29
+ console.print(" vpr run --select <source_name>")
@@ -0,0 +1,59 @@
1
+ """vpr list — show all configured sources."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import click
6
+ from rich.console import Console
7
+ from rich.table import Table
8
+
9
+ from vpr.config.project import find_project_root, load_project_config
10
+ from vpr.config.source_config import load_source_configs
11
+
12
+ console = Console()
13
+
14
+
15
+ @click.command("list")
16
+ @click.option("--tags", is_flag=True, help="Group sources by tag")
17
+ def list_cmd(tags: bool) -> None:
18
+ """List all configured sources."""
19
+ project_root = find_project_root()
20
+ project_cfg = load_project_config(project_root)
21
+ sources = load_source_configs(project_root / project_cfg.paths.sources)
22
+
23
+ if not sources:
24
+ console.print("[yellow]No sources found.[/yellow]")
25
+ return
26
+
27
+ if tags:
28
+ _print_by_tags(sources)
29
+ else:
30
+ _print_table(sources)
31
+
32
+
33
+ def _print_table(sources) -> None:
34
+ table = Table(title="Sources")
35
+ table.add_column("Name", style="cyan")
36
+ table.add_column("Type", style="green")
37
+ table.add_column("Tags")
38
+ table.add_column("Targets")
39
+
40
+ for src in sorted(sources, key=lambda s: s.name):
41
+ table.add_row(
42
+ src.name,
43
+ src.type,
44
+ ", ".join(src.tags),
45
+ ", ".join(src.targets) if src.targets else "—",
46
+ )
47
+ console.print(table)
48
+
49
+
50
+ def _print_by_tags(sources) -> None:
51
+ tag_map: dict[str, list[str]] = {}
52
+ for src in sources:
53
+ for tag in src.tags:
54
+ tag_map.setdefault(tag, []).append(src.name)
55
+
56
+ for tag in sorted(tag_map):
57
+ console.print(f"[bold]tag:{tag}[/bold]")
58
+ for name in sorted(tag_map[tag]):
59
+ console.print(f" - {name}")
@@ -0,0 +1,92 @@
1
+ """vpr run — fetch, transform, and write vacancies."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from typing import TYPE_CHECKING
7
+
8
+ import click
9
+ from rich.console import Console
10
+
11
+ from vpr.config.project import find_project_root, load_project_config
12
+ from vpr.config.profile import load_profiles
13
+ from vpr.config.source_config import load_source_configs
14
+ from vpr.core.selector import select_sources
15
+
16
+ if TYPE_CHECKING:
17
+ pass
18
+
19
+ console = Console()
20
+
21
+ VALID_STEPS = ("fetch", "transform", "write")
22
+
23
+
24
+ @click.command("run")
25
+ @click.option("--select", "selector", required=True, help="Source selector: name, glob pattern, or tag:<tag>")
26
+ @click.option("--exclude", "excludes", multiple=True, help="Exclude sources by name")
27
+ @click.option("--steps", default=None, help=f"Comma-separated pipeline steps: {', '.join(VALID_STEPS)}")
28
+ @click.option("--profile", default=None, help="Override default profile from vpr_project.yml")
29
+ @click.option("--full-refresh", is_flag=True, help="Drop existing data and re-fetch from scratch")
30
+ def run_cmd(
31
+ selector: str,
32
+ excludes: tuple[str, ...],
33
+ steps: str | None,
34
+ profile: str | None,
35
+ full_refresh: bool,
36
+ ) -> None:
37
+ """Run the vacancy pipeline for selected sources."""
38
+ active_steps = _parse_steps(steps)
39
+
40
+ project_root = find_project_root()
41
+ project_cfg = load_project_config(project_root)
42
+ profiles_cfg = load_profiles(project_root)
43
+ all_sources = load_source_configs(project_root / project_cfg.paths.sources)
44
+
45
+ if not all_sources:
46
+ raise click.ClickException(f"No source configs found in {project_cfg.paths.sources}/")
47
+
48
+ matched = select_sources(all_sources, selector, excludes=set(excludes))
49
+ if not matched:
50
+ raise click.ClickException(f"No sources matched selector '{selector}'")
51
+
52
+ profile_name = profile or project_cfg.default_profile
53
+ if profile_name not in profiles_cfg.profiles:
54
+ raise click.ClickException(f"Profile '{profile_name}' not found in profiles.yml")
55
+
56
+ console.print(f"[bold]Profile:[/bold] {profile_name}")
57
+ console.print(f"[bold]Steps:[/bold] {', '.join(active_steps)}")
58
+ console.print(f"[bold]Sources ({len(matched)}):[/bold]")
59
+ for src in matched:
60
+ console.print(f" - {src.name} [dim]({src.type})[/dim]")
61
+ console.print()
62
+
63
+ asyncio.run(_run_pipeline(matched, profiles_cfg, profile_name, active_steps, full_refresh))
64
+
65
+
66
+ async def _run_pipeline(sources, profiles_cfg, profile_name, steps, full_refresh):
67
+ """Execute the pipeline for each matched source."""
68
+ from vpr.core.pipeline import run_source_pipeline
69
+
70
+ profile = profiles_cfg.profiles[profile_name]
71
+ for src_cfg in sources:
72
+ console.print(f"[bold cyan]>>> {src_cfg.name}[/bold cyan]")
73
+ try:
74
+ await run_source_pipeline(
75
+ source_config=src_cfg,
76
+ profile=profile,
77
+ steps=steps,
78
+ full_refresh=full_refresh,
79
+ )
80
+ console.print(f"[green] ✓ {src_cfg.name} done[/green]")
81
+ except Exception as exc:
82
+ console.print(f"[red] ✗ {src_cfg.name} failed: {exc}[/red]")
83
+
84
+
85
+ def _parse_steps(raw: str | None) -> list[str]:
86
+ if raw is None:
87
+ return list(VALID_STEPS)
88
+ parts = [s.strip() for s in raw.split(",")]
89
+ for s in parts:
90
+ if s not in VALID_STEPS:
91
+ raise click.ClickException(f"Unknown step '{s}'. Valid: {', '.join(VALID_STEPS)}")
92
+ return parts
vpr/cli/main.py ADDED
@@ -0,0 +1,19 @@
1
+ """CLI entry point for vpr."""
2
+
3
+ import click
4
+
5
+ from vpr import __version__
6
+ from vpr.cli.commands.init import init_cmd
7
+ from vpr.cli.commands.list import list_cmd
8
+ from vpr.cli.commands.run import run_cmd
9
+
10
+
11
+ @click.group()
12
+ @click.version_option(version=__version__, prog_name="vpr")
13
+ def cli():
14
+ """vpr — multi-source vacancy parser."""
15
+
16
+
17
+ cli.add_command(init_cmd, "init")
18
+ cli.add_command(run_cmd, "run")
19
+ cli.add_command(list_cmd, "list")
vpr/config/__init__.py ADDED
File without changes
vpr/config/profile.py ADDED
@@ -0,0 +1,50 @@
1
+ """Profiles configuration (profiles.yml)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import re
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import yaml
11
+ from pydantic import BaseModel
12
+
13
+ _ENV_VAR_RE = re.compile(r"\{\{\s*env_var\(['\"](\w+)['\"]\)\s*\}\}")
14
+
15
+
16
+ class TargetConfig(BaseModel):
17
+ type: str
18
+ params: dict[str, Any] = {}
19
+
20
+ model_config = {"extra": "allow"}
21
+
22
+
23
+ class ProfileConfig(BaseModel):
24
+ targets: dict[str, TargetConfig]
25
+
26
+
27
+ class ProfilesConfig(BaseModel):
28
+ default_profile: str = "dev"
29
+ profiles: dict[str, ProfileConfig]
30
+
31
+
32
+ def _resolve_env_vars(obj: Any) -> Any:
33
+ """Recursively resolve {{ env_var('NAME') }} placeholders."""
34
+ if isinstance(obj, str):
35
+ return _ENV_VAR_RE.sub(lambda m: os.environ.get(m.group(1), ""), obj)
36
+ if isinstance(obj, dict):
37
+ return {k: _resolve_env_vars(v) for k, v in obj.items()}
38
+ if isinstance(obj, list):
39
+ return [_resolve_env_vars(v) for v in obj]
40
+ return obj
41
+
42
+
43
+ def load_profiles(project_root: Path) -> ProfilesConfig:
44
+ path = project_root / "profiles.yml"
45
+ if not path.is_file():
46
+ raise FileNotFoundError(f"profiles.yml not found in {project_root}")
47
+ with open(path) as f:
48
+ data = yaml.safe_load(f)
49
+ data = _resolve_env_vars(data)
50
+ return ProfilesConfig(**data)
vpr/config/project.py ADDED
@@ -0,0 +1,41 @@
1
+ """Project-level configuration (vpr_project.yml)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import yaml
8
+ from pydantic import BaseModel, Field
9
+
10
+ PROJECT_FILE = "vpr_project.yml"
11
+
12
+
13
+ class ProjectPaths(BaseModel):
14
+ sources: str = "sources"
15
+ dictionaries: str = "dictionaries"
16
+
17
+
18
+ class ProjectConfig(BaseModel):
19
+ name: str
20
+ version: str = "1.0"
21
+ default_profile: str = "dev"
22
+ paths: ProjectPaths = Field(default_factory=ProjectPaths)
23
+
24
+
25
+ def find_project_root(start: Path | None = None) -> Path:
26
+ """Walk up from *start* (default cwd) until vpr_project.yml is found."""
27
+ current = (start or Path.cwd()).resolve()
28
+ for directory in (current, *current.parents):
29
+ if (directory / PROJECT_FILE).is_file():
30
+ return directory
31
+ raise FileNotFoundError(
32
+ f"Could not find {PROJECT_FILE}. "
33
+ "Are you inside a vpr project? Run 'vpr init <name>' to create one."
34
+ )
35
+
36
+
37
+ def load_project_config(project_root: Path) -> ProjectConfig:
38
+ path = project_root / PROJECT_FILE
39
+ with open(path) as f:
40
+ data = yaml.safe_load(f)
41
+ return ProjectConfig(**data)
@@ -0,0 +1,46 @@
1
+ """Source instance configuration (sources/*.yml)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ import yaml
9
+ from pydantic import BaseModel, Field
10
+
11
+
12
+ class SourceConfig(BaseModel):
13
+ """A single source instance loaded from YAML."""
14
+
15
+ name: str
16
+ type: str
17
+ tags: list[str] = Field(default_factory=list)
18
+ profile: str | None = None
19
+ params: dict[str, Any] = Field(default_factory=dict)
20
+ targets: list[str] = Field(default_factory=list)
21
+ enabled: bool = True
22
+
23
+ # metadata — filled at load time
24
+ config_path: Path | None = Field(default=None, exclude=True)
25
+
26
+
27
+ def load_source_config(path: Path) -> SourceConfig:
28
+ with open(path) as f:
29
+ data = yaml.safe_load(f)
30
+ if not data:
31
+ raise ValueError(f"Empty source config: {path}")
32
+ cfg = SourceConfig(**data)
33
+ cfg.config_path = path
34
+ return cfg
35
+
36
+
37
+ def load_source_configs(sources_dir: Path) -> list[SourceConfig]:
38
+ """Load all *.yml files from *sources_dir* recursively."""
39
+ if not sources_dir.is_dir():
40
+ return []
41
+ configs: list[SourceConfig] = []
42
+ for yml_path in sorted(sources_dir.rglob("*.yml")):
43
+ cfg = load_source_config(yml_path)
44
+ if cfg.enabled:
45
+ configs.append(cfg)
46
+ return configs
vpr/core/__init__.py ADDED
File without changes
vpr/core/models.py ADDED
@@ -0,0 +1,66 @@
1
+ """Canonical vacancy model — unified schema all sources map into."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import UTC, datetime
6
+ from enum import Enum
7
+ from typing import Any
8
+
9
+ from pydantic import BaseModel, Field
10
+
11
+
12
+ class VacancyStatus(str, Enum):
13
+ CREATED = "created"
14
+ UPDATED = "updated"
15
+ DELETED = "deleted"
16
+
17
+
18
+ class Salary(BaseModel):
19
+ min: float | None = None
20
+ max: float | None = None
21
+ currency: str | None = None
22
+ gross: bool | None = None
23
+
24
+
25
+ class Vacancy(BaseModel):
26
+ """Canonical vacancy record.
27
+
28
+ Composite identity: (source, source_id).
29
+ All fields except source/source_id are nullable — sources may provide
30
+ incomplete data and that's OK.
31
+ """
32
+
33
+ # --- identity ---
34
+ source: str
35
+ source_id: str
36
+
37
+ # --- core ---
38
+ title: str | None = None
39
+ company: str | None = None
40
+ description: str | None = None
41
+ url: str | None = None
42
+
43
+ # --- location ---
44
+ city: str | None = None
45
+ region: str | None = None
46
+ country: str | None = None
47
+ is_remote: bool | None = None
48
+
49
+ # --- compensation ---
50
+ salary: Salary | None = None
51
+
52
+ # --- classification ---
53
+ experience: str | None = None
54
+ employment_type: str | None = None # full-time, part-time, contract …
55
+ schedule: str | None = None # remote, office, hybrid …
56
+ skills: list[str] = Field(default_factory=list)
57
+
58
+ # --- timestamps ---
59
+ published_at: datetime | None = None
60
+ fetched_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
61
+
62
+ # --- lifecycle ---
63
+ status: VacancyStatus = VacancyStatus.CREATED
64
+
65
+ # --- extra ---
66
+ raw: dict[str, Any] = Field(default_factory=dict)
vpr/core/pipeline.py ADDED
@@ -0,0 +1,59 @@
1
+ """Pipeline: fetch → transform → write."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from rich.console import Console
6
+
7
+ from vpr.config.profile import ProfileConfig
8
+ from vpr.config.source_config import SourceConfig
9
+ from vpr.core.models import Vacancy
10
+ from vpr.sources.registry import get_source_class
11
+ from vpr.writers.registry import get_writer_class
12
+
13
+ console = Console()
14
+
15
+
16
+ async def run_source_pipeline(
17
+ *,
18
+ source_config: SourceConfig,
19
+ profile: ProfileConfig,
20
+ steps: list[str],
21
+ full_refresh: bool = False,
22
+ ) -> None:
23
+ """Execute fetch → transform → write for a single source."""
24
+ vacancies: list[Vacancy] = []
25
+
26
+ # --- FETCH ---
27
+ if "fetch" in steps:
28
+ console.print(" [dim]fetching…[/dim]")
29
+ source_cls = get_source_class(source_config.type)
30
+ source = source_cls(source_config)
31
+ async for vacancy in source.fetch():
32
+ vacancies.append(vacancy)
33
+ console.print(f" [dim]fetched {len(vacancies)} vacancies[/dim]")
34
+
35
+ # --- TRANSFORM ---
36
+ if "transform" in steps and vacancies:
37
+ console.print(" [dim]transforming…[/dim]")
38
+ from vpr.transforms.normalize import normalize
39
+
40
+ vacancies = [normalize(v) for v in vacancies]
41
+
42
+ # --- WRITE ---
43
+ if "write" in steps and vacancies:
44
+ targets = source_config.targets
45
+ if not targets:
46
+ targets = list(profile.targets.keys())
47
+
48
+ for target_name in targets:
49
+ if target_name not in profile.targets:
50
+ console.print(f" [yellow]target '{target_name}' not found in profile, skipping[/yellow]")
51
+ continue
52
+ target_cfg = profile.targets[target_name]
53
+ writer_cls = get_writer_class(target_cfg.type)
54
+ writer = writer_cls(target_cfg)
55
+ try:
56
+ count = await writer.write(vacancies)
57
+ console.print(f" [dim]wrote {count} rows → {target_name}[/dim]")
58
+ finally:
59
+ await writer.close()
vpr/core/selector.py ADDED
@@ -0,0 +1,35 @@
1
+ """Source selector: resolve --select / --exclude to a list of SourceConfig."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from fnmatch import fnmatch
6
+
7
+ from vpr.config.source_config import SourceConfig
8
+
9
+ TAG_PREFIX = "tag:"
10
+
11
+
12
+ def select_sources(
13
+ all_sources: list[SourceConfig],
14
+ selector: str,
15
+ *,
16
+ excludes: set[str] | None = None,
17
+ ) -> list[SourceConfig]:
18
+ """Return sources matching *selector*, minus *excludes*.
19
+
20
+ Selector forms:
21
+ - ``tag:<tag>`` — all sources with given tag
22
+ - ``name_with_*`` — glob match on source name
23
+ - ``exact_name`` — exact match on source name
24
+ """
25
+ excludes = excludes or set()
26
+
27
+ if selector.startswith(TAG_PREFIX):
28
+ tag = selector[len(TAG_PREFIX) :]
29
+ matched = [s for s in all_sources if tag in s.tags]
30
+ elif any(ch in selector for ch in ("*", "?", "[")):
31
+ matched = [s for s in all_sources if fnmatch(s.name, selector)]
32
+ else:
33
+ matched = [s for s in all_sources if s.name == selector]
34
+
35
+ return [s for s in matched if s.name not in excludes]
@@ -0,0 +1,3 @@
1
+ """Source plugins — import here so @register_source decorators fire."""
2
+
3
+ import vpr.sources.headhunter # noqa: F401
vpr/sources/base.py ADDED
@@ -0,0 +1,28 @@
1
+ """Base class for vacancy sources."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import AsyncIterator
7
+
8
+ from vpr.config.source_config import SourceConfig
9
+ from vpr.core.models import Vacancy
10
+
11
+
12
+ class BaseSource(ABC):
13
+ """Every source plugin extends this class.
14
+
15
+ A source receives its YAML-defined ``params`` and yields raw Vacancy
16
+ objects (with at least ``source`` and ``source_id`` filled).
17
+ """
18
+
19
+ source_type: str # must match the ``type`` field in source YAML
20
+
21
+ def __init__(self, config: SourceConfig) -> None:
22
+ self.config = config
23
+ self.params = config.params
24
+
25
+ @abstractmethod
26
+ async def fetch(self) -> AsyncIterator[Vacancy]:
27
+ """Yield vacancies from the external source."""
28
+ ... # pragma: no cover