feat(memory): add SQLite+FTS5 database layer

New memory/database.py with memory_meta table (path, category, size_bytes,
modified, summary, keywords, embedding BLOB reserved for Stage 8) and
memory_fts virtual table for full-text search. Public API: init_db, upsert,
remove, search, list_all, migrate_from_files.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
curo1305
2026-05-18 15:23:43 +02:00
parent 84785967c3
commit 45e6ec32ec
+194
View File
@@ -0,0 +1,194 @@
import datetime
import json
import sqlite3
from contextlib import contextmanager
from pathlib import Path
from pyra.memory import _MEMORY_ROOT
from pyra.utils.paths import safe_chmod
_DB_PATH = _MEMORY_ROOT / "memory.db"
_EXCLUDED = {"MEMORY_INDEX.md", "memory_index.json"}
@contextmanager
def _connect():
conn = sqlite3.connect(str(_DB_PATH))
conn.row_factory = sqlite3.Row
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
finally:
conn.close()
def init_db() -> None:
with _connect() as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS memory_meta (
id INTEGER PRIMARY KEY,
path TEXT UNIQUE NOT NULL,
category TEXT NOT NULL DEFAULT 'root',
size_bytes INTEGER NOT NULL DEFAULT 0,
modified TEXT NOT NULL DEFAULT '',
summary TEXT NOT NULL DEFAULT '',
keywords TEXT NOT NULL DEFAULT '[]',
embedding BLOB DEFAULT NULL
)
""")
conn.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS memory_fts USING fts5(
path UNINDEXED,
body,
summary,
keywords
)
""")
safe_chmod(_DB_PATH, 0o600)
def upsert(
path: str,
*,
content: str,
category: str = "root",
size_bytes: int = 0,
modified: str = "",
summary: str = "",
keywords: list[str] | None = None,
) -> None:
kw_json = json.dumps(keywords or [])
kw_text = " ".join(keywords or [])
with _connect() as conn:
conn.execute(
"""INSERT INTO memory_meta (path, category, size_bytes, modified, summary, keywords)
VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT(path) DO UPDATE SET
category=excluded.category,
size_bytes=excluded.size_bytes,
modified=excluded.modified,
summary=excluded.summary,
keywords=excluded.keywords""",
(path, category, size_bytes, modified, summary, kw_json),
)
conn.execute("DELETE FROM memory_fts WHERE path = ?", (path,))
conn.execute(
"INSERT INTO memory_fts (path, body, summary, keywords) VALUES (?, ?, ?, ?)",
(path, content, summary, kw_text),
)
def remove(path: str) -> None:
with _connect() as conn:
conn.execute("DELETE FROM memory_meta WHERE path = ?", (path,))
conn.execute("DELETE FROM memory_fts WHERE path = ?", (path,))
def search(query: str, limit: int = 20) -> list[dict]:
"""FTS5 full-text search; returns [{file, summary, keywords, snippet}]."""
if not _DB_PATH.exists():
return []
try:
with _connect() as conn:
fts_rows = conn.execute(
"""SELECT path, snippet(memory_fts, 1, '[', ']', '', 32) AS snip
FROM memory_fts
WHERE memory_fts MATCH ?
ORDER BY rank
LIMIT ?""",
(query, limit),
).fetchall()
if not fts_rows:
return []
paths = [r["path"] for r in fts_rows]
snippets = {r["path"]: r["snip"] for r in fts_rows}
placeholders = ",".join("?" * len(paths))
meta_rows = conn.execute(
f"SELECT path, summary, keywords FROM memory_meta WHERE path IN ({placeholders})",
paths,
).fetchall()
meta = {
r["path"]: {
"summary": r["summary"],
"keywords": json.loads(r["keywords"] or "[]"),
}
for r in meta_rows
}
return [
{
"file": p,
"summary": meta.get(p, {}).get("summary", ""),
"keywords": meta.get(p, {}).get("keywords", []),
"snippet": snippets.get(p, ""),
}
for p in paths
if p in meta
]
except sqlite3.OperationalError:
return []
def list_all() -> list[dict]:
"""Return all rows from memory_meta ordered by path."""
if not _DB_PATH.exists():
return []
with _connect() as conn:
rows = conn.execute(
"SELECT path, category, size_bytes, modified, summary, keywords "
"FROM memory_meta ORDER BY path"
).fetchall()
return [
{
"path": r["path"],
"category": r["category"],
"size_bytes": r["size_bytes"],
"modified": r["modified"],
"summary": r["summary"],
"keywords": json.loads(r["keywords"] or "[]"),
}
for r in rows
]
def migrate_from_files() -> None:
"""Populate DB from existing .md files on first run; no-op if DB already has entries."""
if not _DB_PATH.exists():
return
with _connect() as conn:
count = conn.execute("SELECT COUNT(*) FROM memory_meta").fetchone()[0]
if count > 0:
return
json_index_path = _MEMORY_ROOT / "memory_index.json"
existing: dict = {}
if json_index_path.exists():
try:
existing = json.loads(json_index_path.read_text())
except (json.JSONDecodeError, OSError):
pass
for f in sorted(_MEMORY_ROOT.rglob("*.md")):
if f.name in _EXCLUDED:
continue
rel = f.relative_to(_MEMORY_ROOT)
rel_key = rel.as_posix()
category = rel.parts[0] if len(rel.parts) > 1 else "root"
stat = f.stat()
mtime = datetime.datetime.fromtimestamp(stat.st_mtime).isoformat(timespec="seconds")
prev = existing.get(rel_key, {})
try:
content = f.read_text()
except OSError:
content = ""
upsert(
rel_key,
content=content,
category=category,
size_bytes=stat.st_size,
modified=mtime,
summary=prev.get("summary", ""),
keywords=prev.get("keywords", []),
)