vscode-ark 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vscode_ark/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """VS Code Ark - VS Code/Copilot Chat session intelligence analysis system."""
2
+
3
+ __version__ = "0.1.0"
vscode_ark/audit.py ADDED
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env python3
2
+ import sqlite3, os
3
+
4
+ conn = sqlite3.connect('/Volumes/intel/vscode-ark/vscode-ark.db')
5
+ conn.row_factory = sqlite3.Row
6
+
7
+ print('=== Sessions per workspace in DB (top 20) ===')
8
+ for r in conn.execute('SELECT workspace_id, COUNT(*) n FROM sessions GROUP BY workspace_id ORDER BY n DESC LIMIT 20').fetchall():
9
+ print(f' {r[0][:16]} {r[1]} sessions')
10
+
11
+ total = conn.execute('SELECT COUNT(*) FROM sessions').fetchone()[0]
12
+ ws_with_sessions = conn.execute('SELECT COUNT(DISTINCT workspace_id) FROM sessions').fetchone()[0]
13
+ ws_no_sessions = conn.execute('SELECT COUNT(*) FROM workspaces WHERE workspace_id NOT IN (SELECT DISTINCT workspace_id FROM sessions)').fetchone()[0]
14
+ print(f'\n Total sessions in DB: {total} across {ws_with_sessions} workspaces ({ws_no_sessions} workspaces have 0 sessions)')
15
+
16
+ print()
17
+ print('=== session_storage coverage ===')
18
+ has_any = conn.execute('SELECT COUNT(*) FROM session_storage WHERE has_transcript=1 OR has_chat_session=1 OR has_debug_log=1 OR has_tool_outputs=1 OR has_edit_session=1').fetchone()[0]
19
+ has_none = conn.execute('SELECT COUNT(*) FROM session_storage WHERE has_transcript=0 AND has_chat_session=0 AND has_debug_log=0 AND has_tool_outputs=0 AND has_edit_session=0').fetchone()[0]
20
+ print(f' Sessions with at least 1 real file: {has_any}')
21
+ print(f' Sessions with NO files (index-only): {has_none}')
22
+
23
+ print()
24
+ print('=== VFS counts by type ===')
25
+ for r in conn.execute('SELECT source_type, COUNT(*) n FROM vfs GROUP BY source_type ORDER BY n DESC').fetchall():
26
+ print(f' {r[0]:<22} {r[1]}')
27
+
28
+ def vfs_count(t):
29
+ return conn.execute('SELECT COUNT(*) FROM vfs WHERE source_type=?', (t,)).fetchone()[0]
30
+
31
+ ws_mem_in_db = conn.execute('SELECT COUNT(*) FROM memory_files WHERE scope != "global"').fetchone()[0]
32
+
33
+ print()
34
+ print('=== On-disk counts vs DB ===')
35
+ home = os.path.expanduser('~')
36
+ vs_root = os.path.join(home, 'Library/Application Support/Code/User/workspaceStorage')
37
+ ws_dirs = [d for d in os.listdir(vs_root) if os.path.isdir(os.path.join(vs_root, d))]
38
+
39
+ cs_disk = 0
40
+ tr_disk = 0
41
+ edit_disk = 0
42
+ ws_mem_disk = 0
43
+ sem_disk = 0
44
+ ft_disk = 0
45
+ tool_disk = 0
46
+
47
+ for ws in ws_dirs:
48
+ ws_path = os.path.join(vs_root, ws)
49
+ copilot_path = os.path.join(ws_path, 'GitHub.copilot-chat')
50
+
51
+ # chatSessions
52
+ cs_dir = os.path.join(ws_path, 'chatSessions')
53
+ if os.path.isdir(cs_dir):
54
+ cs_disk += len([f for f in os.listdir(cs_dir) if f.endswith('.jsonl')])
55
+
56
+ # transcripts
57
+ tr_dir = os.path.join(copilot_path, 'transcripts')
58
+ if os.path.isdir(tr_dir):
59
+ tr_disk += len([f for f in os.listdir(tr_dir) if f.endswith('.jsonl')])
60
+
61
+ # editSessions
62
+ edit_dir = os.path.join(ws_path, 'chatEditingSessions')
63
+ if os.path.isdir(edit_dir):
64
+ edit_disk += len([x for x in os.listdir(edit_dir) if os.path.isdir(os.path.join(edit_dir, x))])
65
+
66
+ # tool outputs
67
+ tool_dir = os.path.join(copilot_path, 'chat-session-resources')
68
+ if os.path.isdir(tool_dir):
69
+ for s_dir in os.listdir(tool_dir):
70
+ s_path = os.path.join(tool_dir, s_dir)
71
+ if os.path.isdir(s_path):
72
+ for t_dir in os.listdir(s_path):
73
+ t_path = os.path.join(s_path, t_dir)
74
+ if os.path.isdir(t_path) and os.path.exists(os.path.join(t_path, 'content.txt')):
75
+ tool_disk += 1
76
+
77
+ # workspace memory files
78
+ mem_dir = os.path.join(copilot_path, 'memory-tool', 'memories')
79
+ if os.path.isdir(mem_dir):
80
+ for root2, dirs2, files2 in os.walk(mem_dir):
81
+ ws_mem_disk += len(files2)
82
+
83
+ # semantic index
84
+ if os.path.exists(os.path.join(copilot_path, 'workspace-chunks.db')):
85
+ sem_disk += 1
86
+
87
+ # full-text index
88
+ if any(f.startswith('local-index') for f in os.listdir(ws_path)):
89
+ ft_disk += 1
90
+
91
+ print(f' chatSessions .jsonl disk: {cs_disk:>5} VFS: {vfs_count("chat_session"):>5} gap: {cs_disk - vfs_count("chat_session")}')
92
+ print(f' transcripts .jsonl disk: {tr_disk:>5} VFS: {vfs_count("transcript"):>5} gap: {tr_disk - vfs_count("transcript")}')
93
+ print(f' editSession dirs disk: {edit_disk:>5} VFS: {vfs_count("edit_state"):>5} gap: {edit_disk - vfs_count("edit_state")}')
94
+ print(f' tool output files disk: {tool_disk:>5} VFS: {vfs_count("tool_output"):>5} gap: {tool_disk - vfs_count("tool_output")}')
95
+ print(f' workspace memory files disk: {ws_mem_disk:>5} DB: {ws_mem_in_db:>5} gap: {ws_mem_disk - ws_mem_in_db}')
96
+ print(f' workspace-chunks.db disk: {sem_disk:>5} (intentionally excluded — path-only)')
97
+ print(f' local-index* DBs disk: {ft_disk:>5} (intentionally excluded — path-only)')
98
+ conn.close()