feat(memory): add SQLite+FTS5 database layer
New memory/database.py with memory_meta table (path, category, size_bytes, modified, summary, keywords, embedding BLOB reserved for Stage 8) and memory_fts virtual table for full-text search. Public API: init_db, upsert, remove, search, list_all, migrate_from_files. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,194 @@
|
||||
import datetime
|
||||
import json
|
||||
import sqlite3
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
|
||||
from pyra.memory import _MEMORY_ROOT
|
||||
from pyra.utils.paths import safe_chmod
|
||||
|
||||
_DB_PATH = _MEMORY_ROOT / "memory.db"
|
||||
_EXCLUDED = {"MEMORY_INDEX.md", "memory_index.json"}
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _connect():
|
||||
conn = sqlite3.connect(str(_DB_PATH))
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
yield conn
|
||||
conn.commit()
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def init_db() -> None:
|
||||
with _connect() as conn:
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS memory_meta (
|
||||
id INTEGER PRIMARY KEY,
|
||||
path TEXT UNIQUE NOT NULL,
|
||||
category TEXT NOT NULL DEFAULT 'root',
|
||||
size_bytes INTEGER NOT NULL DEFAULT 0,
|
||||
modified TEXT NOT NULL DEFAULT '',
|
||||
summary TEXT NOT NULL DEFAULT '',
|
||||
keywords TEXT NOT NULL DEFAULT '[]',
|
||||
embedding BLOB DEFAULT NULL
|
||||
)
|
||||
""")
|
||||
conn.execute("""
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS memory_fts USING fts5(
|
||||
path UNINDEXED,
|
||||
body,
|
||||
summary,
|
||||
keywords
|
||||
)
|
||||
""")
|
||||
safe_chmod(_DB_PATH, 0o600)
|
||||
|
||||
|
||||
def upsert(
|
||||
path: str,
|
||||
*,
|
||||
content: str,
|
||||
category: str = "root",
|
||||
size_bytes: int = 0,
|
||||
modified: str = "",
|
||||
summary: str = "",
|
||||
keywords: list[str] | None = None,
|
||||
) -> None:
|
||||
kw_json = json.dumps(keywords or [])
|
||||
kw_text = " ".join(keywords or [])
|
||||
with _connect() as conn:
|
||||
conn.execute(
|
||||
"""INSERT INTO memory_meta (path, category, size_bytes, modified, summary, keywords)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(path) DO UPDATE SET
|
||||
category=excluded.category,
|
||||
size_bytes=excluded.size_bytes,
|
||||
modified=excluded.modified,
|
||||
summary=excluded.summary,
|
||||
keywords=excluded.keywords""",
|
||||
(path, category, size_bytes, modified, summary, kw_json),
|
||||
)
|
||||
conn.execute("DELETE FROM memory_fts WHERE path = ?", (path,))
|
||||
conn.execute(
|
||||
"INSERT INTO memory_fts (path, body, summary, keywords) VALUES (?, ?, ?, ?)",
|
||||
(path, content, summary, kw_text),
|
||||
)
|
||||
|
||||
|
||||
def remove(path: str) -> None:
|
||||
with _connect() as conn:
|
||||
conn.execute("DELETE FROM memory_meta WHERE path = ?", (path,))
|
||||
conn.execute("DELETE FROM memory_fts WHERE path = ?", (path,))
|
||||
|
||||
|
||||
def search(query: str, limit: int = 20) -> list[dict]:
|
||||
"""FTS5 full-text search; returns [{file, summary, keywords, snippet}]."""
|
||||
if not _DB_PATH.exists():
|
||||
return []
|
||||
try:
|
||||
with _connect() as conn:
|
||||
fts_rows = conn.execute(
|
||||
"""SELECT path, snippet(memory_fts, 1, '[', ']', '…', 32) AS snip
|
||||
FROM memory_fts
|
||||
WHERE memory_fts MATCH ?
|
||||
ORDER BY rank
|
||||
LIMIT ?""",
|
||||
(query, limit),
|
||||
).fetchall()
|
||||
if not fts_rows:
|
||||
return []
|
||||
paths = [r["path"] for r in fts_rows]
|
||||
snippets = {r["path"]: r["snip"] for r in fts_rows}
|
||||
placeholders = ",".join("?" * len(paths))
|
||||
meta_rows = conn.execute(
|
||||
f"SELECT path, summary, keywords FROM memory_meta WHERE path IN ({placeholders})",
|
||||
paths,
|
||||
).fetchall()
|
||||
meta = {
|
||||
r["path"]: {
|
||||
"summary": r["summary"],
|
||||
"keywords": json.loads(r["keywords"] or "[]"),
|
||||
}
|
||||
for r in meta_rows
|
||||
}
|
||||
return [
|
||||
{
|
||||
"file": p,
|
||||
"summary": meta.get(p, {}).get("summary", ""),
|
||||
"keywords": meta.get(p, {}).get("keywords", []),
|
||||
"snippet": snippets.get(p, ""),
|
||||
}
|
||||
for p in paths
|
||||
if p in meta
|
||||
]
|
||||
except sqlite3.OperationalError:
|
||||
return []
|
||||
|
||||
|
||||
def list_all() -> list[dict]:
|
||||
"""Return all rows from memory_meta ordered by path."""
|
||||
if not _DB_PATH.exists():
|
||||
return []
|
||||
with _connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT path, category, size_bytes, modified, summary, keywords "
|
||||
"FROM memory_meta ORDER BY path"
|
||||
).fetchall()
|
||||
return [
|
||||
{
|
||||
"path": r["path"],
|
||||
"category": r["category"],
|
||||
"size_bytes": r["size_bytes"],
|
||||
"modified": r["modified"],
|
||||
"summary": r["summary"],
|
||||
"keywords": json.loads(r["keywords"] or "[]"),
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
def migrate_from_files() -> None:
|
||||
"""Populate DB from existing .md files on first run; no-op if DB already has entries."""
|
||||
if not _DB_PATH.exists():
|
||||
return
|
||||
with _connect() as conn:
|
||||
count = conn.execute("SELECT COUNT(*) FROM memory_meta").fetchone()[0]
|
||||
if count > 0:
|
||||
return
|
||||
|
||||
json_index_path = _MEMORY_ROOT / "memory_index.json"
|
||||
existing: dict = {}
|
||||
if json_index_path.exists():
|
||||
try:
|
||||
existing = json.loads(json_index_path.read_text())
|
||||
except (json.JSONDecodeError, OSError):
|
||||
pass
|
||||
|
||||
for f in sorted(_MEMORY_ROOT.rglob("*.md")):
|
||||
if f.name in _EXCLUDED:
|
||||
continue
|
||||
rel = f.relative_to(_MEMORY_ROOT)
|
||||
rel_key = rel.as_posix()
|
||||
category = rel.parts[0] if len(rel.parts) > 1 else "root"
|
||||
stat = f.stat()
|
||||
mtime = datetime.datetime.fromtimestamp(stat.st_mtime).isoformat(timespec="seconds")
|
||||
prev = existing.get(rel_key, {})
|
||||
try:
|
||||
content = f.read_text()
|
||||
except OSError:
|
||||
content = ""
|
||||
upsert(
|
||||
rel_key,
|
||||
content=content,
|
||||
category=category,
|
||||
size_bytes=stat.st_size,
|
||||
modified=mtime,
|
||||
summary=prev.get("summary", ""),
|
||||
keywords=prev.get("keywords", []),
|
||||
)
|
||||
Reference in New Issue
Block a user