feat: port Memoria plugin to OpenClaw 2026.5.x hook system
Original plugin (git_proj/openclaw-memoria) uses deprecated registration pattern
and old hooks that don't work in current OpenClaw version. Ported to new API:
- Wrap entry point with definePluginEntry({ id, name, description, register })
instead of legacy { register } export
- Update SDK imports from 'openclaw/plugin-sdk/core' (deprecated barrel)
to narrow subpaths: 'openclaw/plugin-sdk/plugin-entry' and 'openclaw/plugin-sdk'
- Register as memory capability via manifest kind: 'memory'
- Add setup.providers section for provider auth configuration
Changed files: index.ts, recall.ts, continuous.ts, capture.ts, procedural-hooks.ts
New file: tsup.config.ts (ESM build with tsup)
Updated configs: package.json, tsconfig.json, openclaw.plugin.json
All 21 cognitive layers and business logic preserved unchanged.
Build output: dist/index.js (ESM) + dist/index.d.ts
This commit is contained in:
11
openclaw-memoria-port/.clawhubignore
Normal file
11
openclaw-memoria-port/.clawhubignore
Normal file
@@ -0,0 +1,11 @@
|
||||
node_modules/
|
||||
*.db
|
||||
*.db-shm
|
||||
*.db-wal
|
||||
*.bak*
|
||||
.git/
|
||||
benchmarks/results/
|
||||
install.sh.bak-v34
|
||||
audit-v25.ts
|
||||
tests/
|
||||
package-lock.json
|
||||
12
openclaw-memoria-port/.clawignore
Normal file
12
openclaw-memoria-port/.clawignore
Normal file
@@ -0,0 +1,12 @@
|
||||
# Dev/test files — not part of the published package
|
||||
audit-v25.ts
|
||||
bootstrap-topics.ts
|
||||
benchmarks/
|
||||
*.db
|
||||
*.db-journal
|
||||
*.db-shm
|
||||
*.db-wal
|
||||
*.bak
|
||||
*.tar.gz
|
||||
node_modules/
|
||||
.git/
|
||||
29
openclaw-memoria-port/.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
29
openclaw-memoria-port/.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
---
|
||||
name: Bug Report
|
||||
about: Report a bug in Memoria
|
||||
title: '[Bug] '
|
||||
labels: bug
|
||||
---
|
||||
|
||||
## Describe the bug
|
||||
A clear description of what's wrong.
|
||||
|
||||
## To reproduce
|
||||
1. Configure Memoria with...
|
||||
2. Send a message...
|
||||
3. Check recall...
|
||||
|
||||
## Expected behavior
|
||||
What should happen.
|
||||
|
||||
## Environment
|
||||
- Memoria version: (e.g., v3.4.0)
|
||||
- OpenClaw version:
|
||||
- OS: (e.g., macOS 15, Ubuntu 24)
|
||||
- LLM provider: (e.g., Ollama gemma3:4b)
|
||||
- Embed model: (e.g., nomic-embed-text-v2-moe)
|
||||
|
||||
## Logs
|
||||
```
|
||||
Paste relevant logs here
|
||||
```
|
||||
18
openclaw-memoria-port/.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
18
openclaw-memoria-port/.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
---
|
||||
name: Feature Request
|
||||
about: Suggest a new feature for Memoria
|
||||
title: '[Feature] '
|
||||
labels: enhancement
|
||||
---
|
||||
|
||||
## Problem
|
||||
What problem does this solve?
|
||||
|
||||
## Proposed solution
|
||||
How should it work?
|
||||
|
||||
## Alternatives considered
|
||||
Any other approaches you've thought about?
|
||||
|
||||
## Additional context
|
||||
Benchmarks, examples, or references.
|
||||
20
openclaw-memoria-port/.github/pull_request_template.md
vendored
Normal file
20
openclaw-memoria-port/.github/pull_request_template.md
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
## What changed
|
||||
Brief description of the changes.
|
||||
|
||||
## Why
|
||||
Motivation and context.
|
||||
|
||||
## Type
|
||||
- [ ] Feature
|
||||
- [ ] Bug fix
|
||||
- [ ] Documentation
|
||||
- [ ] Performance
|
||||
- [ ] Tests
|
||||
|
||||
## Testing
|
||||
- [ ] `npx tsx tests/test-core.ts` passes (22/22)
|
||||
- [ ] Tested with OpenClaw gateway restart
|
||||
- [ ] Benchmark run (if applicable)
|
||||
|
||||
## Breaking changes
|
||||
List any breaking changes or "None".
|
||||
82
openclaw-memoria-port/.github/workflows/ci.yml
vendored
Normal file
82
openclaw-memoria-port/.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
typecheck:
|
||||
name: Typecheck (Node ${{ matrix.node }})
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
node: [20, 22, 24]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ matrix.node }}
|
||||
cache: npm
|
||||
- run: npm ci
|
||||
- name: Typecheck (report errors, allow known baseline)
|
||||
run: |
|
||||
ERRORS=$(npx tsc --noEmit 2>&1 | grep -c "error TS" || true)
|
||||
echo "TypeScript errors: $ERRORS"
|
||||
# Baseline: 51 known errors (missing OpenClaw types, MemoriaConfig fields, etc.)
|
||||
# Phase 1.3 will fix these. For now, fail only if NEW errors are introduced.
|
||||
if [ "$ERRORS" -gt 55 ]; then
|
||||
echo "❌ Error count ($ERRORS) exceeds baseline+margin (55). New type errors introduced!"
|
||||
npx tsc --noEmit 2>&1 | tail -20
|
||||
exit 1
|
||||
fi
|
||||
echo "✅ Error count ($ERRORS) within baseline tolerance"
|
||||
|
||||
smoke:
|
||||
name: Smoke test (Node ${{ matrix.node }})
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
node: [20, 22, 24]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ matrix.node }}
|
||||
cache: npm
|
||||
- run: npm ci
|
||||
- name: Boot test — all modules import cleanly
|
||||
run: |
|
||||
node --input-type=module <<'EOF'
|
||||
const modules = [
|
||||
"./db.ts", "./scoring.ts", "./selective.ts", "./embeddings.ts",
|
||||
"./graph.ts", "./topics.ts", "./observations.ts", "./patterns.ts",
|
||||
"./procedural.ts", "./lifecycle.ts", "./hebbian.ts", "./feedback.ts",
|
||||
"./fact-clusters.ts", "./md-regen.ts", "./fallback.ts",
|
||||
"./embed-fallback.ts", "./sync.ts", "./migrate.ts"
|
||||
];
|
||||
|
||||
let failed = 0;
|
||||
for (const m of modules) {
|
||||
try {
|
||||
await import(m);
|
||||
console.log(`✅ ${m}`);
|
||||
} catch (e) {
|
||||
// Some modules need runtime deps (better-sqlite3, openclaw)
|
||||
// Accept "Cannot find module" for native deps, reject syntax errors
|
||||
if (e.code === 'ERR_MODULE_NOT_FOUND' || e.message?.includes('better-sqlite3') || e.message?.includes('openclaw')) {
|
||||
console.log(`⚠️ ${m}: skipped (missing runtime dep)`);
|
||||
} else {
|
||||
console.error(`❌ ${m}: ${e.message}`);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (failed > 0) {
|
||||
console.error(`\n❌ ${failed} module(s) failed to import`);
|
||||
process.exit(1);
|
||||
}
|
||||
console.log(`\n✅ All ${modules.length} modules checked`);
|
||||
EOF
|
||||
3
openclaw-memoria-port/.gitignore
vendored
Normal file
3
openclaw-memoria-port/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
node_modules/
|
||||
*.db
|
||||
install.sh.bak-v34
|
||||
587
openclaw-memoria-port/CHANGELOG.md
Normal file
587
openclaw-memoria-port/CHANGELOG.md
Normal file
@@ -0,0 +1,587 @@
|
||||
## 3.22.2 — Layer 21: Continuous Learning + 6 Bug Fixes (2026-03-28)
|
||||
## [3.22.3] — 2026-03-28
|
||||
|
||||
### Fixed
|
||||
- **CRITICAL: CONTINUOUS_ENABLED TDZ crash on Node 24.x** — `const` variable accessed before declaration at register time, causing plugin to fail silently on gateways running Node 24.x (embedded binary). Inlined config read for boot log.
|
||||
- **better-sqlite3 cross-Node-version build guide** — documented that `npx node-gyp rebuild --target=24.13.1` is needed when shell Node differs from gateway's embedded Node.
|
||||
|
||||
|
||||
### New: Layer 21 — Continuous Learning
|
||||
Real-time fact capture via `message_received` + `llm_output` hooks, independent of session end or compaction.
|
||||
- **3 extraction modes**: periodic (every N turns), urgent (on user frustration/error), self-error (on assistant self-admission)
|
||||
- **Cross-layer integration**: facts go through selective dedup → full postProcess pipeline (embed, graph, topics, observations, clusters, sync)
|
||||
- **Smart dedup with agent_end**: when continuous already captured during the session, agent_end reduces its extraction scope to avoid double LLM calls
|
||||
- **Configurable**: `continuous.interval` (default 4), `continuous.cooldownMs` (default 45s), `continuous.enabled` (default true)
|
||||
|
||||
### Bug Fixes (3 rounds of audit)
|
||||
**v3.22.0** — Initial implementation
|
||||
**v3.22.1** — Audit round 1:
|
||||
- `cfg.continuous` not typed in `MemoriaConfig` → added full interface
|
||||
- No `enabled` guard → both hooks now check `CONTINUOUS_ENABLED` before running
|
||||
- `cooldownMs` hardcoded → now reads from config
|
||||
- agent_end double capture → reduces scope when continuous already ran
|
||||
|
||||
**v3.22.2** — Audit round 2:
|
||||
- **Concurrent extraction risk** — urgent trigger during periodic extraction ran 2 extractions in parallel → added `continuousExtractionInProgress` lock with `finally` release
|
||||
- **Buffer never cleared** — same messages re-analyzed at each extraction → snapshot + clear before extraction
|
||||
|
||||
### Documentation
|
||||
- `docs/ARCHITECTURE.md`: full Layer 21 section with config, hooks, extraction modes, cross-layer integration
|
||||
- Boot log now shows continuous learning status
|
||||
|
||||
## 3.21.0 — Deep Audit: 10 Bugs Found & Fixed (2026-03-28)
|
||||
|
||||
### Critical Fixes
|
||||
- **Hebbian learning was 100% dead** — `hebbian.ts` used wrong column names (`from_entity`/`to_entity`/`relation_type`/`updated_at`) but DB has `source_id`/`target_id`/`relation`/`last_accessed_at`). All queries silently returned nothing since creation.
|
||||
- **Proactive revision never triggered** — searched for `lifecycle_state = 'mature'` but DB only has `fresh`/`settled`/`dormant`
|
||||
- **storeFact() lost 6 columns** on INSERT — `usefulness`, `recall_count`, `used_count`, `synced_to_md`, `relevance_weight`, `lifecycle_state` silently dropped
|
||||
- **Fact interface missing 4 DB columns** — 8 `as any` casts removed
|
||||
- **4 SQL queries filtered wrong lifecycle state** — `!= 'archived'` → `!= 'dormant'`
|
||||
- **Cross-layer 9b used wrong column names** — same `from_entity`/`to_entity` bug as hebbian
|
||||
- **revision.ts imported non-existent file** — `./llm-provider.js` → `./providers/types.js`
|
||||
- **procedural.ts wrong import path** — aligned to `./providers/types.js`
|
||||
- **Fact type unions incomplete** — added `"cluster"` and `"pattern"` to `fact_type`
|
||||
|
||||
## 3.20.1 — Audit: 6 Bugs Found & Fixed (2026-03-28)
|
||||
- Type alignment: `fact_type` and `lifecycle_state` unions updated
|
||||
- SQL queries: 4 instances of `archived` → `dormant`
|
||||
- Import fixes: `revision.ts` and `procedural.ts`
|
||||
|
||||
## 3.20.0 — Cross-Layer Connections (2026-03-28)
|
||||
- **Feedback → Lifecycle**: facts recalled 5+ times with usefulness ≥ 2 → auto-promoted to "settled"
|
||||
- **Hebbian → Topics**: strong relations (weight ≥ 1.0) auto-organize topic hierarchy (smaller becomes child)
|
||||
- **Lifecycle → Patterns**: patterns with 5+ occurrences → settled
|
||||
- Pattern detection step added to postProcessNewFacts
|
||||
|
||||
## 3.19.0 — Behavioral Pattern Detection, Layer 20 (2026-03-28)
|
||||
- New module `patterns.ts` (~477 LOC) — detects repeated similar facts and consolidates
|
||||
- Patterns stored as `fact_type = "pattern"` with occurrence metadata in tags
|
||||
- Wired into capture pipeline (postProcessNewFacts) and recall scoring
|
||||
- ARCHITECTURE.md updated to reflect Layer 20
|
||||
|
||||
## 3.18.0 — Fix Existing Layers, Phase 1 (2026-03-28)
|
||||
- New DB table `cluster_members` with backfill (~407 links)
|
||||
- Topics parent inference improved (composite-name strategy)
|
||||
- Boot-time reparenting of existing topics (17 reparented)
|
||||
- ARCHITECTURE.md reviewed/updated (19→20 layers documented)
|
||||
|
||||
## 3.17.0 — ClawHub Security Fixes (2026-03-28)
|
||||
- SKILL.md: explicit `entrypoint: index.ts`, env vars declared optional, expanded security section
|
||||
- Install instructions: plugin install (one command) + "review code first" for source
|
||||
- Bundle includes all .ts modules + index.ts
|
||||
- License: Apache-2.0 in YAML frontmatter
|
||||
|
||||
## 3.16.0 — ClawHub Suspicious Fix (2026-03-28)
|
||||
- Created `.clawignore` to exclude dev/audit artifacts
|
||||
- Declared env vars (OPENAI_API_KEY, OPENROUTER_API_KEY, OPENCLAW_WORKSPACE) as optional
|
||||
- Added security posture section to SKILL.md
|
||||
|
||||
## 3.14.1 — Error Detection: Touch Fire Once (2026-03-27)
|
||||
- Automatic error signal detection (crashes, frustration keywords, rollbacks)
|
||||
- Each error captures: what happened, why dangerous, what to never repeat, safe alternative
|
||||
|
||||
## 3.14.0 — Smarter Extraction + Consolidation (2026-03-27)
|
||||
- Extraction prompt rewritten: demands concrete details (who/what/when/why)
|
||||
- Cluster-aware recall: member facts get -40% score penalty
|
||||
- Procedures: first success = worth remembering (was: needed 3+ steps)
|
||||
- Added `failure_reasons` column for contextual alternative selection
|
||||
|
||||
## 3.12.0 — Capture Quality & Contradiction Detection
|
||||
|
||||
### Fix 1: Capture Filter
|
||||
- Only store **reusable** procedures (≥3 meaningful steps + at least one "action" command)
|
||||
- Skip noise: health checks, diagnostics, log inspections, status checks
|
||||
- Double-check LLM-assigned names against noise patterns
|
||||
|
||||
### Fix 2: Duplicate Detection
|
||||
- `findSimilarProcedure()` — word-overlap matching on name + goal (threshold 50%)
|
||||
- Before creating a new procedure, check if a similar one exists → reinforce instead of duplicate
|
||||
- Applied at both `extractProcedure()` and `after_tool_call` hook levels
|
||||
|
||||
### Fix 3: Contradiction Check on Facts
|
||||
- Widened entity search from 5 to 10 candidates
|
||||
- Version-containing facts prioritized in contradiction search
|
||||
- Enhanced contradiction prompt: version changes, status changes, quantity changes = explicit contradictions
|
||||
- Prevents stale facts (e.g., "Sol = v2.7.0") from persisting when newer facts arrive ("Sol = v3.11.0")
|
||||
|
||||
## 3.10.0 (2026-03-27)
|
||||
|
||||
### Features
|
||||
- **FTS5 procedural search**: `procedures_fts` virtual table with LIKE fallback — fast full-text search on procedures (name, goal, context, gotchas, steps)
|
||||
- **Configurable thresholds**: `ProceduralConfig` interface with `qualityWeights`, `degradationStep`, `healingStep`, `reflectEvery`, `degradedThreshold`, `defaultSafety`
|
||||
- **FTS auto-sync**: index created at boot, rebuilt if empty, kept in sync on every `storeProcedure` call
|
||||
- **Plugin schema**: procedural config exposed in `openclaw.plugin.json` for wizard/UI configuration
|
||||
|
||||
### Fixes
|
||||
- `kg` → `graph` variable reference (runtime crash)
|
||||
- Feedback proc IDs removed (was querying wrong table)
|
||||
- Procedure objects fully typed (no more `as any` partial objects)
|
||||
|
||||
# Changelog
|
||||
|
||||
## [3.9.0] - 2026-03-27
|
||||
### Added — Reflective Procedural Learning
|
||||
- **Quality dimensions** — each procedure scored on speed, reliability, elegance, safety
|
||||
- Weighted composite: reliability (35%) > safety (25%) > speed (25%) > elegance (15%)
|
||||
- Quality evolves with each execution, not static
|
||||
- **Post-execution reflection** — every 3rd success triggers LLM review
|
||||
- "Was this the best approach?" → suggestions, quality reassessment
|
||||
- Blends new assessment (70%) with accumulated wisdom (30%)
|
||||
- Tracks gotchas/workarounds learned
|
||||
- **Alternatives** — same goal, different approaches
|
||||
- `getAlternatives()` finds competing procedures
|
||||
- `setPreferred()` marks the best approach
|
||||
- Search prioritizes preferred procedures
|
||||
- **Version tracking** — procedures evolve: version increments on each improvement
|
||||
- **Personal best** — tracks fastest execution, speed quality improves when beaten
|
||||
- **Schema auto-migration** — new quality columns added seamlessly on boot
|
||||
|
||||
### Why
|
||||
"Un humain n'enregistre pas un savoir en rentrant chez lui le soir —
|
||||
il apprend sur le tas, il améliore en direct. La qualité passe par
|
||||
une meilleure réflexion, et c'est en améliorant la qualité qu'on
|
||||
gagne en vitesse d'exécution car on la reproduit plus souvent."
|
||||
|
||||
## [3.8.0] - 2026-03-27
|
||||
### Added — Real-time Procedural Learning
|
||||
- **`after_tool_call` hook** — captures procedures in real-time, not at end of session
|
||||
- Buffers tool calls during conversation (last 30)
|
||||
- On success signal (Published, ✅, deployed, committed, etc.) → immediately assembles procedure via LLM
|
||||
- If similar procedure exists → reinforces it (success_count++) and adds improvements
|
||||
- If new → creates new procedure with steps, goal, trigger patterns, gotchas
|
||||
- 60s cooldown between assemblies to avoid spam
|
||||
- Fingerprint dedup to avoid duplicate captures
|
||||
- `agent_end` remains as safety net for any uncaptured sequences
|
||||
|
||||
### Why this change
|
||||
- Humans learn on-the-fly, not at the end of the day
|
||||
- `agent_end` only fires at conversation end → in long-running sessions, procedures were never captured
|
||||
- Real-time learning means knowledge is available immediately for the next similar task
|
||||
|
||||
## [3.7.2] - 2026-03-27
|
||||
### Fixed — 3 Critical Memory Issues
|
||||
- **ProceduralMemory DB fix**: was receiving MemoriaDB wrapper instead of raw better-sqlite3 Database, causing `this.db.prepare is not a function` — procedures were "captured" in logs but never persisted (0 in DB)
|
||||
- **Recall query pollution fix**: FTS5 search was matching on OpenClaw envelope metadata (`"Conversation info (untrusted metadata)..."`) instead of actual user message — causing 89% of facts to never be recalled. Now strips envelope before search
|
||||
- **DB cleanup**: 22 vague/meta facts superseded (e.g., "Le nouveau fait complète l'ancien"), cortex.db archived
|
||||
|
||||
### Impact
|
||||
- Procedures now persist to SQLite correctly
|
||||
- Recall will match on what the user actually says, not Telegram metadata
|
||||
- 436 active facts (was 450, 14 were noise)
|
||||
|
||||
## [3.7.1] - 2026-03-27
|
||||
### Fixed — Phase 3 Procedural Capture
|
||||
- **Dual-strategy extraction** for better reliability:
|
||||
- Strategy A: extract from `event.toolCalls` when available (original path)
|
||||
- Strategy B: parse assistant messages for command patterns (fallback)
|
||||
- Patterns detected: bash code blocks, inline commands, shell prompts (`$ ...`)
|
||||
- Success detection: ✅|success|published|deployed|completed keywords
|
||||
- Deduplication of consecutive identical commands
|
||||
- **Debug logging** added to diagnose capture behavior in production
|
||||
- New method: `ProceduralMemory.extractFromMessages(messages, context)`
|
||||
|
||||
### Why this fix
|
||||
- v3.7.0 captured 0 procedures because `event.toolCalls` was empty/unavailable
|
||||
- Message parsing ensures capture works even when toolCalls not exposed by OpenClaw
|
||||
- Enables real-world validation of Phase 3 procedural learning
|
||||
|
||||
## [3.7.0] - 2026-03-27
|
||||
### Added — Procedural Memory (Phase 3)
|
||||
- **How-to knowledge that improves with repetition**
|
||||
- New `procedures` table: stores sequences of successful actions (exec/tool calls)
|
||||
- Captures steps, success/failure counts, degradation score, alternatives
|
||||
- Hook `agent_end`: detects successful command sequences → extracts procedure
|
||||
- Hook `before_prompt_build`: searches matching procedures → injects steps
|
||||
- Dynamic improvement: success_count++ reduces degradation, failure++ increases it
|
||||
- Alternative paths: when degradation > 0.5, searches for better alternative procedure
|
||||
- Example: "Publish to ClawHub" captured as 4-step procedure with success rate
|
||||
|
||||
- **Stats at boot**: `procedures: 0✓/0⚠` (healthy/degraded)
|
||||
|
||||
### Why this matters
|
||||
- Memoria now learns "how to do things" (not just "what happened")
|
||||
- Procedures improve over time as they're repeated successfully
|
||||
- Failed attempts trigger degradation → search for alternative approach
|
||||
- Solves: "I published v3.5.0 but don't remember HOW" → now it's stored & recalled
|
||||
|
||||
## [3.6.0] - 2026-03-27
|
||||
### Added — Human-Like Memory Architecture
|
||||
- **Identity-aware memory** (Phase 0)
|
||||
- New `relevance_weight` column (0.0-1.0, default 0.5) on facts
|
||||
- Parses `USER.md`, `COMPANY.md`, `projects/objectifs.md` to extract identity/priorities
|
||||
- Boosts facts about Bureau, Polymarket, Primask (core work) vs Memoria internals (meta)
|
||||
- Scoring integrates relevance: Bureau facts rise, config/plugin facts sink
|
||||
- New `identity_cache` table stores parsed identity for fast lookup
|
||||
|
||||
- **Lifecycle states** (Phase 1.1)
|
||||
- Facts evolve through 4 states: `fresh` → `mature` → `aged` → `archived`
|
||||
- Automatic transitions based on time + usage ratio + recall count
|
||||
- `archived` facts excluded from recall (forgotten, not deleted)
|
||||
- Stats displayed at boot: `338f/0m/0a/0⚰` (fresh/mature/aged/archived)
|
||||
|
||||
- **Proactive revision** (Phase 1.2)
|
||||
- Mature facts with 10+ recalls trigger LLM revision proposal
|
||||
- If improved → new fact created + old superseded
|
||||
- Revision runs in background (non-blocking)
|
||||
|
||||
- **Hebbian reinforcement** (Phase 2)
|
||||
- Relations now have `weight` (0.0-2.0, default 1.0)
|
||||
- Co-occurrence → weight++ (entities seen together strengthen)
|
||||
- Time decay → weight-- (unused relations fade)
|
||||
- Weak relations pruned automatically (<0.3)
|
||||
- Stats: `21 strong, 0 weak` relations
|
||||
|
||||
- **Expertise specialization** (Phase 2)
|
||||
- Topics gain "expertise levels": novice/familiar/experienced/expert
|
||||
- Based on `access_count` (interaction frequency)
|
||||
- Expert topics boost recall score (1.3x for expert, 1.1x for experienced)
|
||||
- Stats: `8★★★/6★★/4★` (expert/experienced/familiar)
|
||||
|
||||
### Fixed
|
||||
- Added try/catch to lifecycle, hebbian, expertise modules (prevent crash on SQL errors)
|
||||
- Expertise module: fixed schema mismatch (`topic` → `name`, `interaction_count` → `access_count`)
|
||||
|
||||
## [3.5.1] - 2026-03-26
|
||||
### Fixed
|
||||
- TypeScript parse error in `feedback.ts` (class closing brace misplaced) — plugin was crashing silently for 7h
|
||||
- Plugin now loads correctly after restart
|
||||
|
||||
## [3.5.0] - 2026-03-26
|
||||
### Added — Feedback Loop & Adaptive Learning
|
||||
- **Usefulness tracking** — each recalled fact now has `usefulness`, `recall_count`, `used_count` scores
|
||||
- Facts referenced in the assistant's response → usefulness++ (boost)
|
||||
- Facts ignored repeatedly → usefulness-- (sink naturally)
|
||||
- Scoring integrates usefulness: high-use facts rise, never-used facts decay faster
|
||||
- **User correction detection** — detects patterns like "non c'est", "en fait", "actually", "that's wrong" (FR+EN)
|
||||
- Penalizes the last-recalled facts that may have caused the error (-1.5 penalty)
|
||||
- **User frustration detection** — detects "putain", "bordel", "wtf", repeated questions
|
||||
- Mild penalty (-0.5) on last-recalled facts
|
||||
- **Adaptive budget** — budget now learns from compactions:
|
||||
- If recall → compaction within 5 min → penalty increases (injected too many facts)
|
||||
- Penalty reduces limit by 1-3 facts (minimum always respected)
|
||||
- Penalty decays naturally when compactions stop (self-correcting)
|
||||
|
||||
### Added — Cross-Layer Supersede Cascade
|
||||
- When a fact is superseded, ALL layers are notified:
|
||||
- **Observations**: superseded fact removed from evidence lists; empty observations deleted
|
||||
- **Graph**: fact removed from relation contexts; orphaned relations weakened (-0.15) or pruned
|
||||
- **Topics**: fact↔topic links removed; empty topics deleted; fact_count updated
|
||||
- **Embeddings**: stale embedding vector deleted (no more ghost matches in semantic search)
|
||||
- Before: layers were disconnected. A superseded fact's ghost persisted in graph, topics, embeddings.
|
||||
|
||||
### Added — Smart md-regen
|
||||
- Auto-triggers on 3 conditions (replaces dumb "lines > 200" check):
|
||||
- `captures_since_regen >= 20` — enough new facts accumulated
|
||||
- `last_regen_at > 7 days` — stale files even with few captures
|
||||
- Any `.md file > 200 lines` — backward-compatible safety net
|
||||
- Tracks `captures_since_regen` and `last_regen_at` in meta table
|
||||
|
||||
### Improved — Extraction Quality
|
||||
- **Anti-meta prompt** — blocks vague/meta-facts ("Le nouveau fait fournit des informations...")
|
||||
- Requires at least one proper noun, number, or concrete command per fact
|
||||
- **Tighter dedup** — combined threshold lowered to 0.75 + new "8 first words identical" → instant duplicate
|
||||
- **Dynamic entity matching** — `SelectiveMemory` now loads entities from the Knowledge Graph DB (373+ entities)
|
||||
instead of a hardcoded regex list. Refreshes every 5 min.
|
||||
|
||||
### Fixed
|
||||
- DB cleanup: 307→294 active facts (13 superseded, 5 duplicate clusters purged, 3 meta-facts removed)
|
||||
|
||||
## [3.4.1] - 2026-03-26
|
||||
### Improved — Install Wizard UX
|
||||
- **Clearer prompts**: "Tapez 1, 2 ou 3" on all choices (not just "Choix [1]")
|
||||
- **Cloud providers**: choose between OpenAI, OpenRouter, or Anthropic (was OpenAI-only)
|
||||
- **Modifiable after install**: all prompts now mention `configure.sh` for post-install changes
|
||||
- **Update mode**: `--update` flag for quick silent updates; auto-detection of existing install
|
||||
- **Existing install detection**: if Memoria is already installed, proposes Update / Reinstall / Cancel
|
||||
- **Thank-you message**: links to @Nitix_ (X), GitHub star, Primo Studio credit
|
||||
- **Auto-cleanup**: `memory-convex` entry automatically removed from `openclaw.json` if present
|
||||
- **Fallback info**: warns user that crash notifications appear in logs
|
||||
- **Embeddings note**: displayed during install with "changeable later" mention
|
||||
|
||||
## [3.4.0] - 2026-03-26
|
||||
### Added — Fact Clusters
|
||||
- **Entity-grouped "dossier" summaries**: groups 3+ facts sharing the same entity into one dense paragraph
|
||||
- Clusters stored as `fact_type = "cluster"` — searchable via FTS5 + embeddings like regular facts
|
||||
- 15% scoring boost (info-dense = higher recall value)
|
||||
- Auto-invalidation: when a member fact is superseded, cluster marked stale → regenerated next cycle
|
||||
- Entity detection: knowledge graph IDs first, proper noun extraction fallback
|
||||
- Known entities pattern matching for Memoria-specific terms (Sol, Bureau, Primask, etc.)
|
||||
- **Impact**: MS (multi-session) benchmark 2/5 → 3.5/5; overall accuracy 75% → 81.7%
|
||||
|
||||
### Benchmark Results (v3.4.0, GPT-5.4-nano judge)
|
||||
- Accuracy: **81.7%** (22/30 correct + 5 partial)
|
||||
- Retrieval: **50.0%** (15/30)
|
||||
- SSU 5/5, KU 5/5, SSP 5/5, SSA 3.5/5, TR 3.5/5, MS 2.5/5
|
||||
- 39 atomic facts + 5 clusters = 44 total facts from 10 sessions
|
||||
|
||||
## [3.3.0] - 2026-03-26
|
||||
### Added — Query Expansion
|
||||
- **Hybrid search now expands queries** into 2-4 semantic variants before searching
|
||||
- Domain-specific concept map: "taux horaire" → ["salaire", "€/h", "paie"], "projets" → ["apps", "MVPs"], etc.
|
||||
- FTS + cosine both search across all variants, deduplicating results
|
||||
- Proper noun extraction: named entities searched standalone
|
||||
- **Impact**: MS (multi-session) questions like "quels taux horaires?" now find "5.19€/h" facts
|
||||
|
||||
### Improved — Topic-Aware Recall
|
||||
- `findRelevantTopics` now receives expanded queries for broader matching
|
||||
- Topic name exact match bonus (+3 score) with expanded variants
|
||||
- **Impact**: Topics like "salaires" found even when query says "rémunération"
|
||||
|
||||
### Improved — Denser Extraction
|
||||
- Extraction prompt now enforces "one fact per distinct entity"
|
||||
- Example: session mentioning 3 people → 3 separate facts instead of 1 merged
|
||||
- **Impact**: More facts per session = better multi-session recall
|
||||
|
||||
## [3.2.0] - 2026-03-26
|
||||
### Fixed — Reasoning Model Support (I3+I4)
|
||||
- **Ollama provider**: Now reads `thinking` field when `response` is empty (GPT-OSS, Qwen3.5 reasoning models)
|
||||
- **OpenAI-compat provider**: Now reads `reasoning_content` and `reasoning` fields (LM Studio GPT-OSS)
|
||||
- **Impact**: Clients using reasoning models no longer get empty extractions/answers
|
||||
|
||||
### Fixed — Knowledge Update Recall (I1+I2)
|
||||
- **Recall now shows dates**: Each fact displays age (`[aujourd'hui]`, `[il y a 3j]`, `[2026-03-20]`)
|
||||
- **Header instructs**: "Les faits les plus récents sont les plus fiables en cas de contradiction"
|
||||
- **Impact**: Answering model can now disambiguate when old and new versions of a fact coexist
|
||||
|
||||
### Improved — Procedure Extraction (I5)
|
||||
- **Multi-sentence facts allowed**: Procedures can now be captured as 2-4 sentences in a single fact
|
||||
- **Prompt guidance**: Examples show good vs bad procedure capture
|
||||
- **Impact**: Workflows and how-to knowledge preserved as coherent units
|
||||
|
||||
### Improved — Short Query Handling (I6)
|
||||
- **Adaptive FTS/cosine weights**: Short queries (<3 words) now favor semantic search (55%) over FTS (20%)
|
||||
- **Impact**: Generic queries like "Bureau" return semantically relevant facts instead of keyword noise
|
||||
|
||||
### Added — Provider Interface Cleanup (I7)
|
||||
- **`generateWithMeta`** added to LLMProvider interface (optional, with default implementation)
|
||||
- **All providers** (Ollama, OpenAI-compat) now implement generateWithMeta
|
||||
- **Impact**: Providers are fully interchangeable with FallbackChain
|
||||
|
||||
### Added — Anthropic Provider (I8+A3)
|
||||
- **New `providers/anthropic.ts`**: Native Claude API support (`/v1/messages` format)
|
||||
- **Supported in**: LLM config, fallback chain, per-layer overrides
|
||||
- **Models**: Any Claude model (Haiku, Sonnet, Opus) via API key
|
||||
- **Impact**: Clients can use Claude directly without routing through OpenRouter
|
||||
|
||||
### Added — Config Schema Update
|
||||
- **`anthropic`** added to `llm.provider` enum in plugin schema
|
||||
- **Fallback chain** supports `type: "anthropic"` entries
|
||||
|
||||
## [3.1.1] - 2026-03-25
|
||||
### Improved — Extraction Quality (Results over Status)
|
||||
- **Problem**: Extraction captured "test passed ✅" but lost actual results like "Retrieval 92%, bottleneck = local model"
|
||||
- **New ✅ categories**: benchmark results with metrics, conclusions from experiments, measured comparisons, machine/infra specs
|
||||
- **Smarter filtering**: block narration WITHOUT results (not all narration); block binary status without info ("test OK")
|
||||
- **Extraction priority**: 🥇 learnings > 🥈 measured results > 🥉 durable facts
|
||||
|
||||
## [3.1.0] - 2026-03-25
|
||||
### Fixed — Entity-based Semantic Contradiction Detection
|
||||
- **Critical fix**: Contradictions between facts with different wording but same entities were not detected
|
||||
- Example: "No models on Sol" vs "gemma3:4b installed on Sol" had only 0.23 textual similarity → contradiction check was never called
|
||||
- Root cause: Levenshtein+Jaccard gate (threshold 0.7) prevented LLM from seeing semantically related facts with different words
|
||||
- **New entity extraction**: Extracts proper nouns, tech terms, tool names from facts (Sol, Memoria, Ollama, gemma3, etc.)
|
||||
- **Entity-based FTS search**: When new fact shares entities with existing facts, triggers LLM contradiction check regardless of text similarity
|
||||
- **Wider FTS search** (20 candidates per entity) to avoid missing facts ranked beyond top 5
|
||||
- **Fail-safe**: If entity check fails → fact is stored (never lost)
|
||||
|
||||
### Improved — Extraction Prompt
|
||||
- **Generalization rules**: When a pattern repeats (e.g. "npm not found in SSH" + "ollama not found in SSH"), extract the general rule instead of individual cases
|
||||
- **Process knowledge**: Explicit instructions to store "how to do X" commands (e.g. "lms server start launches LM Studio without GUI")
|
||||
|
||||
### Technical
|
||||
- `SelectiveMemory` constructor now accepts optional `EmbeddingManager` (4th arg) for future semantic enhancements
|
||||
- `semanticContradictionThreshold` config option added (default 0.40)
|
||||
- `extractSubjectEntities()` function with patterns for common tech terms
|
||||
- `findFactsBySharedEntities()` method for entity-overlap search
|
||||
- Build order in index.ts: embed providers created before SelectiveMemory instantiation
|
||||
|
||||
## [3.0.0] - 2026-03-25
|
||||
### Added — Phase 2: Semantic/Episodic Memory
|
||||
- **fact_type column**: `semantic` (durable, slow decay 30-90 days) vs `episodic` (dated, fast decay 7-14 days)
|
||||
- **Extraction prompt rewritten**: explicit STOCKER/NE PAS STOCKER rules, LLM now classifies fact type
|
||||
- **TODO/action filter**: blocks transient facts ("il faut X", "en préparation", "prochaine étape")
|
||||
- Auto-migration adds `fact_type` column to existing DBs
|
||||
|
||||
### Added — Phase 3: Observations (Living Syntheses)
|
||||
- **Observation layer**: inspired by Hindsight, multi-fact synthesis that evolves
|
||||
- Observations are **created** when 3+ facts share a topic (auto-emergence via LLM topic extraction)
|
||||
- Observations are **updated** (re-synthesized) when new related facts arrive
|
||||
- **Recall priority**: Observations injected FIRST, individual facts as complement
|
||||
- Each observation tracks `evidence_ids`, `revision` count, `access_count`, embedding
|
||||
- Matching via embedding cosine similarity + keyword fallback
|
||||
- Configurable: `emergenceThreshold`, `matchThreshold`, `maxRecallObservations`, `maxEvidencePerObservation`
|
||||
|
||||
### Added — Phase 4: Recall Adaptatif
|
||||
- Observations count adjusts to context window (recallLimit / 3, min 2)
|
||||
- Individual facts fill remaining budget after observations
|
||||
- Format splits into "Observations (synthèses vivantes)" + "Faits individuels"
|
||||
|
||||
### Added — Procedural Memory Preservation
|
||||
- **Procedural memory** (procedural): like learning bike tricks — processes, tips, "what worked" are preserved as durable knowledge
|
||||
- **Smart TODO filter**: distinguishes disposable TODOs ("pull X") from learned processes ("use VACUUM INTO because WAL copies lose -shm")
|
||||
- Heuristics: length >60 chars usually = knowledge → keep; explanation markers (car/sinon/pour/because/→) → always keep
|
||||
- Transient patterns (en préparation, en cours, pas encore) only skip short facts
|
||||
|
||||
### Fixed
|
||||
- **CRITICAL: `api.config` vs `api.pluginConfig`** — all custom settings were silently ignored since v0.1.0
|
||||
- Fallback `provider` vs `type` normalization in parseConfig
|
||||
|
||||
## [2.7.0] - 2026-03-25
|
||||
### Added
|
||||
- **Interactive wizard in install.sh** — 2-question guided install: "Local or Cloud?" → "Fallback or strict?". Detects environment (Ollama, LM Studio, OpenAI key), shows summary, asks confirmation.
|
||||
- **Presets for silent install** — `--preset local-only|cloud-first|paranoid` for CI/scripting. Also `--yes` to skip confirmation.
|
||||
- **Post-install validation** — Tests LLM provider after install (quick Ollama smoke test).
|
||||
- **Bilingual installer** — French interface for better UX (target market).
|
||||
|
||||
### Fixed
|
||||
- **CRITICAL: `api.config` vs `api.pluginConfig`** — Plugin was reading global OpenClaw config instead of plugin-specific config. ALL custom settings (fallback, llm, embed, limits) were silently ignored since v0.1.0. Fixed to use `api.pluginConfig`.
|
||||
- **Fallback `provider` vs `type` mapping** — User config uses `provider` field but internal code expected `type`. Added normalization in parseConfig.
|
||||
|
||||
### Changed
|
||||
- install.sh rewritten as interactive wizard with environment detection and guided choices.
|
||||
- Config generated based on user choices (not hardcoded defaults).
|
||||
|
||||
## [2.6.1] - 2026-03-25
|
||||
### Added
|
||||
- **Auto-config in install.sh** — The installer now auto-edits `openclaw.json`: adds memoria to `plugins.entries` and `plugins.allow` with a backup of the original file. Users keep full control to customize after.
|
||||
- **Existing data detection** — install.sh detects cortex.db, memoria.db, or facts.json and shows migration status (fact count, file size).
|
||||
- **Summary panel** — install.sh now displays version, location, config path, LLM/embed info at the end.
|
||||
- **Node.js/npm version display** — Shows detected versions during prerequisite check.
|
||||
|
||||
### Fixed
|
||||
- **WAL-mode migration** — `VACUUM INTO` used instead of `cp` for cortex.db→memoria.db migration. Plain `cp` on WAL-mode SQLite DBs resulted in empty copies (0 facts). Fallback copies WAL+SHM files if VACUUM fails.
|
||||
- **Empty DB override** — Migration now triggers if memoria.db exists but is < 8KB (empty schema-only DB from a failed previous attempt).
|
||||
|
||||
### Changed
|
||||
- install.sh rewritten: auto-config replaces manual "copy-paste this JSON" step.
|
||||
- INSTALL.md updated to document auto-config, WAL migration, and data detection.
|
||||
|
||||
## [2.6.0] - 2026-03-25
|
||||
### Added
|
||||
- **`install.sh`** — One-line installer: checks prerequisites, pulls Ollama models, clones repo, installs deps. Usage: `curl -fsSL https://raw.githubusercontent.com/Primo-Studio/openclaw-memoria/main/install.sh | bash`
|
||||
- **Auto-migration cortex→memoria** — If `memoria.db` doesn't exist but `cortex.db` does, auto-copies it. Zero manual migration needed.
|
||||
|
||||
### Fixed
|
||||
- **Schema too strict** — `additionalProperties` changed from `false` to `true` everywhere. Unknown config keys no longer crash the gateway.
|
||||
- **`syncMd` type** — Was rejecting `{ enabled: true }` objects. Now only accepts boolean as documented, and schema makes it clear.
|
||||
- **`embed.dims` vs `embed.dimensions`** — Schema now documents `dimensions` clearly with defaults shown.
|
||||
- **`fallback[].type` vs `fallback[].provider`** — Schema field is `provider`, not `type`.
|
||||
- **`llm.default` doesn't exist** — Schema clearly shows `llm.provider` + `llm.model` at top level.
|
||||
- **DB constructor confusion** — `MemoriaDB()` takes workspace root, not DB path. Documented + auto-migration handles legacy DB name.
|
||||
|
||||
### Changed
|
||||
- **Smart defaults everywhere** — `{ "memoria": { "enabled": true } }` is now a valid minimal config. Defaults: Ollama + gemma3:4b + nomic-embed-text-v2-moe + 768 dims + recall 12 + capture 8.
|
||||
- Schema defaults added to all fields for documentation.
|
||||
- INSTALL.md rewritten with config minimale, bugs connus, et providers table.
|
||||
|
||||
## [2.5.0] - 2026-03-25
|
||||
### Added
|
||||
- **Hot Tier**: facts accessed ≥5 times = always injected in recall, like a phone number you know by heart. New `getHotFacts()` in scoring, `hotFacts()` in DB.
|
||||
- **Access-based learning**: `accessBoostFactor` tripled (0.1 → 0.3) — frequently used facts score much higher, mimicking human memory retention through repetition.
|
||||
- **Configurable defaults raised**: `captureMaxFacts` 3→8, `recallLimit` 8→12, `maxFacts` 10→12. Users with smaller context windows can lower these in config.
|
||||
|
||||
### Changed
|
||||
- Recall pipeline now: hot tier (always first) → hybrid search → graph → topics → context tree → budget limit
|
||||
- Hot facts excluded from search results to avoid duplicates
|
||||
- `searchLimit` = `recallLimit - hotCount` so hot facts don't eat into query-relevant slots
|
||||
|
||||
## [2.4.0] - 2026-03-25
|
||||
### Added
|
||||
- **Embed Fallback** (`embed-fallback.ts`): `EmbedFallback` wraps multiple `EmbedProvider`s with automatic retry (Ollama → LM Studio → OpenAI). If primary embed fails, tries next provider.
|
||||
- **Post-processing function** `postProcessNewFacts()`: shared between `agent_end` and `after_compaction` hooks — embed, graph extract, topic tag, sync .md, auto md-regen.
|
||||
- **Auto md-regen**: triggers automatically when any .md file exceeds 200 lines after capture. Bounded regeneration (30d recent, 150 max/file).
|
||||
|
||||
### Fixed
|
||||
- **after_compaction incomplete** ✅: compaction-rescued facts now get full enrichment (embed + graph + topics + sync + regen) — same pipeline as agent_end.
|
||||
- **Embed no fallback** ✅: EmbedFallback chains configured embed provider → LM Studio → OpenAI (if API key available).
|
||||
- **md-regen manual only** ✅: now auto-triggered in postProcessNewFacts when file size threshold exceeded.
|
||||
|
||||
### Changed
|
||||
- Post-processing code extracted from agent_end into reusable `postProcessNewFacts()` function
|
||||
- Log messages now include `[capture]` or `[compaction]` source label
|
||||
|
||||
## [2.3.0] - 2026-03-25
|
||||
### Added
|
||||
- **Per-layer LLM config**: each layer (extract, contradiction, graph, topics, contextTree) can use a different model/provider
|
||||
- `llm.overrides` config section with per-layer `{ provider, model, baseUrl?, apiKey? }`
|
||||
- Override chains include the user's chosen model as primary, then fallback to the default chain
|
||||
- Boot log shows active overrides when configured
|
||||
- JSON Schema `$defs/layerLlm` in manifest for validation
|
||||
|
||||
### Changed
|
||||
- `FallbackChain` now implements `LLMProvider` interface directly (`generate()` → string, `generateWithMeta()` → metadata)
|
||||
- All modules receive FallbackChain (full fallback) instead of `chain.primaryLLM` (Ollama-only)
|
||||
- `selective` uses `contradictionLlm`, `graph` uses `graphLlm`, `topics` uses `topicsLlm`, `contextTree` uses `contextTreeLlm`, extract uses `extractLlm`
|
||||
|
||||
### Fixed
|
||||
- Fallback gap: selective, graph, topics, context-tree had NO fallback (Ollama-only). Now all have full chain.
|
||||
|
||||
## [2.2.0] - 2026-03-25
|
||||
### Added
|
||||
- Phase 9: `.md Vivants` — bounded markdown regeneration (recent 30d, max 150/file, archive notice)
|
||||
- `MdRegenManager` class with configurable regen settings
|
||||
- Boot-time .md file size logging
|
||||
|
||||
## [2.1.0] - 2026-03-25
|
||||
### Added
|
||||
- Phase 8: `Topics Émergents` — auto-clustering from keyword patterns
|
||||
- `TopicManager` class with keyword extraction, emergence scanning, sub-topics
|
||||
- Topic embeddings (mean of fact embeddings, cosine search)
|
||||
- Topic enrichment in recall pipeline (after graph, before context tree)
|
||||
- Bootstrap script for initial tagging (389/438 facts tagged → 94 topics)
|
||||
- `topics` + `fact_topics` tables in SQLite schema
|
||||
|
||||
## [2.0.0] - 2026-03-25
|
||||
### Added
|
||||
- Phase 10: `Fallback Chain` — graceful LLM degradation (Ollama → OpenAI → LM Studio → FTS-only)
|
||||
- `FallbackChain` class with round-robin retry and configurable providers
|
||||
|
||||
## [1.0.0] - 2026-03-25
|
||||
### Added
|
||||
- Phase 7: `Budget Adaptatif` — dynamic recall limit based on context usage (light/medium/heavy/critical zones)
|
||||
- Phase 7: `Sync .md` — auto-append new facts to mapped workspace markdown files
|
||||
- `AdaptiveBudget` class with configurable thresholds
|
||||
- `MdSync` class with dedup (first 60 chars check)
|
||||
|
||||
## [0.5.0] - 2026-03-25
|
||||
### Added
|
||||
- Phase 6: `Context Tree` — hierarchical fact organization with query-weighted scoring
|
||||
- `ContextTreeBuilder` class with category clustering and sub-clustering
|
||||
|
||||
## [0.4.0] - 2026-03-25
|
||||
### Added
|
||||
- Phase 5: `Knowledge Graph + Hebbian Learning` — entity extraction, relation storage, BFS traversal
|
||||
- `KnowledgeGraph` class with graph extraction prompts and Hebbian reinforcement
|
||||
- Partial/fuzzy entity matching
|
||||
|
||||
## [0.3.0] - 2026-03-25
|
||||
### Added
|
||||
- Phase 4: `Embeddings + Hybrid Search` — cosine similarity with local Ollama embeddings
|
||||
- `EmbeddingManager` class with batch embedding, hybrid search (FTS + cosine + temporal)
|
||||
|
||||
### Fixed
|
||||
- FTS5 query sanitization (hyphenated terms crash)
|
||||
|
||||
## [0.2.0] - 2026-03-25
|
||||
### Added
|
||||
- Phase 2: `Mémoire Sélective` — dedup (Levenshtein + Jaccard), contradiction check via LLM, importance threshold, enrichment/merge
|
||||
- `SelectiveMemory` class with configurable thresholds
|
||||
|
||||
## [0.1.0] - 2026-03-25
|
||||
### Added
|
||||
- Phase 1: Core SQLite + FTS5, temporal scoring, perception hooks
|
||||
- `MemoriaDB` class, migration from facts.json (423 facts)
|
||||
- Provider abstraction (Ollama, OpenAI-compat, LM Studio)
|
||||
|
||||
<!-- v3.14.x entries moved to proper chronological position above -->
|
||||
|
||||
## 3.22.0 — 2026-03-28
|
||||
|
||||
### Added
|
||||
- **Layer 21: Continuous Learning** — real-time fact capture via `message_received` + `llm_output` hooks
|
||||
- Rolling buffer of last 10 user/assistant exchanges
|
||||
- Periodic extraction every 4 turns (configurable via `continuous.interval`)
|
||||
- **Urgent extraction** triggered immediately on frustration/error signals (e.g. "ne fais plus", "doublon", "crash")
|
||||
- **Self-error detection** — captures when assistant acknowledges its own mistake
|
||||
- Uses same LLM extraction prompt and selective dedup pipeline as agent_end
|
||||
- Independent of context size, compaction, or session end — works with 60K, 200K, or 10M token contexts
|
||||
26
openclaw-memoria-port/CODE_OF_CONDUCT.md
Normal file
26
openclaw-memoria-port/CODE_OF_CONDUCT.md
Normal file
@@ -0,0 +1,26 @@
|
||||
# Code of Conduct
|
||||
|
||||
## Our Pledge
|
||||
|
||||
We are committed to providing a friendly, safe and welcoming environment for all contributors, regardless of experience level, gender identity, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion, or nationality.
|
||||
|
||||
## Our Standards
|
||||
|
||||
**Positive behavior:**
|
||||
- Being respectful and inclusive
|
||||
- Giving and accepting constructive feedback gracefully
|
||||
- Focusing on what is best for the community
|
||||
- Showing empathy toward other community members
|
||||
|
||||
**Unacceptable behavior:**
|
||||
- Harassment, trolling, or personal attacks
|
||||
- Publishing others' private information without consent
|
||||
- Any conduct that would be considered inappropriate in a professional setting
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of unacceptable behavior may be reported to **contact@primo-studio.fr**. All complaints will be reviewed and investigated promptly.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 2.1.
|
||||
82
openclaw-memoria-port/CONTRIBUTING.md
Normal file
82
openclaw-memoria-port/CONTRIBUTING.md
Normal file
@@ -0,0 +1,82 @@
|
||||
# Contributing to Memoria
|
||||
|
||||
Thanks for your interest in contributing! 🧠
|
||||
|
||||
## Getting Started
|
||||
|
||||
1. Fork the repo
|
||||
2. Clone your fork:
|
||||
```bash
|
||||
git clone https://github.com/YOUR_USERNAME/openclaw-memoria.git
|
||||
cd openclaw-memoria
|
||||
```
|
||||
3. Install dependencies:
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
4. Run tests:
|
||||
```bash
|
||||
npx tsx tests/test-core.ts
|
||||
```
|
||||
|
||||
## Development Setup
|
||||
|
||||
### Requirements
|
||||
- Node.js 20+
|
||||
- Ollama with `gemma3:4b` and `nomic-embed-text-v2-moe` (for full integration tests)
|
||||
- OpenClaw (for plugin testing)
|
||||
|
||||
### Running in OpenClaw
|
||||
1. Symlink or copy the extension into `~/.openclaw/extensions/memoria/`
|
||||
2. Add to `openclaw.json`:
|
||||
```json
|
||||
{ "plugins": { "allow": ["memoria"], "entries": { "memoria": { "enabled": true } } } }
|
||||
```
|
||||
3. Restart gateway: `openclaw gateway restart`
|
||||
|
||||
## What to Contribute
|
||||
|
||||
### High-impact areas
|
||||
- **New LLM providers** — implement `LLMProvider` interface in `providers/`
|
||||
- **Better extraction prompts** — improve fact quality from conversations
|
||||
- **Benchmark datasets** — more diverse test scenarios in `benchmarks/`
|
||||
- **Contradiction detection** — improve stale fact supersession
|
||||
- **Performance** — optimize FTS5/embedding queries for large DBs (10K+ facts)
|
||||
|
||||
### Good first issues
|
||||
- Add tests for individual modules (graph, topics, observations)
|
||||
- Improve `.md` regeneration formatting
|
||||
- Add provider for Groq, Together, Mistral API
|
||||
|
||||
## Code Style
|
||||
|
||||
- TypeScript (strict when possible)
|
||||
- Imports use `.js` extension (ESM resolution)
|
||||
- Functions documented with JSDoc where non-obvious
|
||||
- Error handling: prefer graceful degradation over throwing
|
||||
|
||||
## Pull Request Process
|
||||
|
||||
1. Create a branch: `git checkout -b feat/my-feature`
|
||||
2. Write tests if applicable
|
||||
3. Run existing tests: `npx tsx tests/test-core.ts`
|
||||
4. Commit with clear message: `feat: add Groq provider support`
|
||||
5. Open PR against `main` with:
|
||||
- What changed and why
|
||||
- Test results
|
||||
- Breaking changes (if any)
|
||||
|
||||
## Commit Convention
|
||||
|
||||
```
|
||||
feat: new feature
|
||||
fix: bug fix
|
||||
docs: documentation only
|
||||
perf: performance improvement
|
||||
test: adding tests
|
||||
refactor: code restructuring
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
By contributing, you agree that your contributions will be licensed under Apache 2.0.
|
||||
226
openclaw-memoria-port/INSTALL.md
Normal file
226
openclaw-memoria-port/INSTALL.md
Normal file
@@ -0,0 +1,226 @@
|
||||
# Installation Memoria
|
||||
|
||||
## Option A : Installation automatique (recommandé)
|
||||
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/Primo-Studio/openclaw-memoria/main/install.sh | bash
|
||||
```
|
||||
|
||||
Le script :
|
||||
1. Vérifie les prérequis (Node.js, npm, Ollama)
|
||||
2. Pull les modèles Ollama (gemma3:4b + nomic-embed-text-v2-moe)
|
||||
3. Clone le repo et installe les dépendances
|
||||
4. **Auto-configure `openclaw.json`** (ajoute memoria aux plugins avec backup)
|
||||
5. Détecte les données existantes (cortex.db, memoria.db, facts.json)
|
||||
|
||||
Le client garde le contrôle total : toute config peut être modifiée après dans `openclaw.json`.
|
||||
|
||||
## Option B : Installation manuelle
|
||||
|
||||
### Prérequis
|
||||
|
||||
- **OpenClaw** installé et fonctionnel
|
||||
- **Node.js** ≥ 20 avec `npm` dans le PATH
|
||||
- **Ollama** installé ([ollama.ai](https://ollama.ai))
|
||||
|
||||
### 1. Installer les modèles Ollama
|
||||
|
||||
```bash
|
||||
ollama pull gemma3:4b # LLM extraction (3.3 GB)
|
||||
ollama pull nomic-embed-text-v2-moe # Embeddings (957 MB)
|
||||
```
|
||||
|
||||
Vérifier : `ollama list` doit afficher les deux modèles.
|
||||
|
||||
### 2. Installer le plugin
|
||||
|
||||
```bash
|
||||
git clone https://github.com/Primo-Studio/openclaw-memoria.git \
|
||||
~/.openclaw/extensions/memoria
|
||||
|
||||
cd ~/.openclaw/extensions/memoria
|
||||
npm install
|
||||
```
|
||||
|
||||
### 3. Configurer openclaw.json
|
||||
|
||||
**Config minimale** — tout le reste a des defaults intelligents :
|
||||
|
||||
```json
|
||||
{
|
||||
"plugins": {
|
||||
"allow": ["memoria"],
|
||||
"entries": {
|
||||
"memoria": { "enabled": true }
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Config complète** (si vous voulez personnaliser) :
|
||||
|
||||
```json
|
||||
{
|
||||
"memoria": {
|
||||
"enabled": true,
|
||||
"config": {
|
||||
"autoRecall": true,
|
||||
"autoCapture": true,
|
||||
"recallLimit": 12,
|
||||
"captureMaxFacts": 8,
|
||||
"defaultAgent": "koda",
|
||||
"contextWindow": 200000,
|
||||
"syncMd": true,
|
||||
"llm": {
|
||||
"provider": "ollama",
|
||||
"model": "gemma3:4b"
|
||||
},
|
||||
"embed": {
|
||||
"provider": "ollama",
|
||||
"model": "nomic-embed-text-v2-moe",
|
||||
"dimensions": 768
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Vérifier et démarrer
|
||||
|
||||
```bash
|
||||
openclaw doctor # Vérifier la config
|
||||
openclaw gateway restart # Redémarrer
|
||||
openclaw status # Vérifier le chargement
|
||||
```
|
||||
|
||||
Vous devez voir :
|
||||
```
|
||||
[plugins] memoria: v3.22.3 registered (X facts, Y observations, ... continuous: every 4 turns)
|
||||
```
|
||||
|
||||
### 5. Migration automatique
|
||||
|
||||
**Depuis cortex.db** : Memoria détecte automatiquement un ancien `cortex.db` et migre les données en `memoria.db` au premier démarrage. Zéro action nécessaire. Utilise `VACUUM INTO` pour gérer les DB en mode WAL.
|
||||
|
||||
**Depuis facts.json** (memory-convex) :
|
||||
|
||||
```bash
|
||||
cd ~/.openclaw/extensions/memoria
|
||||
npx tsx migrate.ts
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Bugs connus à l'installation
|
||||
|
||||
### ❌ `syncMd` doit être un boolean
|
||||
|
||||
**Erreur** : `plugins.entries.memoria.config.syncMd: must be boolean`
|
||||
**Cause** : écrire `"syncMd": { "enabled": true }` au lieu de `"syncMd": true`
|
||||
**Fix** : `"syncMd": true`
|
||||
|
||||
### ❌ `embed.dims` n'existe pas
|
||||
|
||||
**Erreur** : `must NOT have additional properties`
|
||||
**Cause** : le champ s'appelle `dimensions`, pas `dims`
|
||||
**Fix** : `"dimensions": 768`
|
||||
|
||||
### ❌ `llm.default` n'existe pas
|
||||
|
||||
**Erreur** : `must NOT have additional properties`
|
||||
**Cause** : les champs `provider` et `model` sont directement dans `llm`, pas dans `llm.default`
|
||||
**Fix** :
|
||||
```json
|
||||
"llm": {
|
||||
"provider": "ollama",
|
||||
"model": "gemma3:4b"
|
||||
}
|
||||
```
|
||||
|
||||
### ❌ `fallback[].type` n'existe pas
|
||||
|
||||
**Erreur** : propriété inconnue
|
||||
**Cause** : le champ s'appelle `provider`, pas `type`
|
||||
**Fix** : `{ "provider": "ollama", "model": "gemma3:4b" }`
|
||||
|
||||
### ❌ DB path = workspace root, pas le fichier
|
||||
|
||||
Le constructeur `MemoriaDB()` attend le **workspace root** (ex: `~/.openclaw/workspace`).
|
||||
Il crée automatiquement `memory/memoria.db` dedans.
|
||||
Ne pas passer le chemin de la DB directement.
|
||||
|
||||
### ⚠️ Ollama modèles = 0 malgré process running
|
||||
|
||||
**Symptôme** : `ollama list` retourne vide, mais le process tourne
|
||||
**Cause** : Ollama app lancée mais aucun modèle pull
|
||||
**Fix** : `ollama pull gemma3:4b && ollama pull nomic-embed-text-v2-moe`
|
||||
|
||||
### ⚠️ `npm` / `node` not found via SSH
|
||||
|
||||
**Cause** : SSH ne charge pas le PATH complet (brew, nvm, etc.)
|
||||
**Fix** : `export PATH=/opt/homebrew/bin:$PATH` avant les commandes
|
||||
|
||||
### ⚠️ "loaded without install/load-path provenance"
|
||||
|
||||
**Cause** : plugin local, pas installé via `openclaw plugin install`
|
||||
**Impact** : warning non-bloquant, le plugin fonctionne
|
||||
**Fix** : ajouter dans `plugins.allow` (déjà fait si vous suivez le guide)
|
||||
|
||||
---
|
||||
|
||||
## Config minimale (copier-coller)
|
||||
|
||||
Pour une installation rapide avec Ollama local :
|
||||
|
||||
```json
|
||||
{
|
||||
"plugins": {
|
||||
"allow": ["memoria"],
|
||||
"entries": {
|
||||
"memoria": {
|
||||
"enabled": true,
|
||||
"config": {
|
||||
"autoRecall": true,
|
||||
"autoCapture": true,
|
||||
"syncMd": true,
|
||||
"llm": { "provider": "ollama", "model": "gemma3:4b" },
|
||||
"embed": { "provider": "ollama", "model": "nomic-embed-text-v2-moe", "dimensions": 768 }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Providers supportés
|
||||
|
||||
| Provider | LLM | Embeddings | Prérequis |
|
||||
|----------|-----|------------|-----------|
|
||||
| `ollama` | ✅ | ✅ | Ollama installé, modèles pull |
|
||||
| `lmstudio` | ✅ | ✅ | LM Studio avec serveur local |
|
||||
| `openai` | ✅ | ✅ | Clé API OpenAI |
|
||||
| `openrouter` | ✅ | ❌ | Clé API OpenRouter |
|
||||
| `anthropic` | ✅ | ❌ | Clé API Anthropic (Claude) |
|
||||
|
||||
### Anthropic (Claude API)
|
||||
|
||||
Config avec Claude comme LLM d'extraction :
|
||||
```json
|
||||
"llm": {
|
||||
"provider": "anthropic",
|
||||
"model": "claude-haiku-3-5",
|
||||
"apiKey": "sk-ant-..."
|
||||
}
|
||||
```
|
||||
|
||||
Ou en fallback :
|
||||
```json
|
||||
"fallback": [
|
||||
{ "provider": "ollama", "model": "gemma3:4b" },
|
||||
{ "provider": "anthropic", "model": "claude-haiku-3-5", "apiKey": "sk-ant-..." }
|
||||
]
|
||||
```
|
||||
|
||||
Note : Anthropic ne supporte pas les embeddings. Utilisez Ollama ou OpenAI pour les embeddings.
|
||||
190
openclaw-memoria-port/LICENSE
Normal file
190
openclaw-memoria-port/LICENSE
Normal file
@@ -0,0 +1,190 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to the Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by the Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding any notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2026 Primo Studio
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
120
openclaw-memoria-port/PHASE-2.2-SUMMARY.md
Normal file
120
openclaw-memoria-port/PHASE-2.2-SUMMARY.md
Normal file
@@ -0,0 +1,120 @@
|
||||
# Phase 2.2 — Extraction memoria-core (TERMINÉE ✅)
|
||||
|
||||
## Objectif
|
||||
Extraire le core de Memoria dans un sous-dossier `core/` pour le rendre **indépendant d'OpenClaw** et publiable sur npm en tant que package standalone.
|
||||
|
||||
## Résultat
|
||||
|
||||
### Structure créée
|
||||
```
|
||||
memoria/
|
||||
├── core/ ← Package standalone (@primo-studio/memoria-core)
|
||||
│ ├── index.ts ← API publique (Memoria class)
|
||||
│ ├── package.json ← npm package config
|
||||
│ ├── README.md ← Documentation complète
|
||||
│ ├── providers/ ← LLM providers (Ollama, OpenAI, Anthropic, LM Studio)
|
||||
│ └── [28 modules].ts ← 10,062 LOC (83% du code)
|
||||
│
|
||||
├── index.ts ← Adapter OpenClaw (318 LOC)
|
||||
├── recall.ts ← Hook before_prompt_build
|
||||
├── continuous.ts ← Hooks message_received + llm_output
|
||||
├── capture.ts ← Hooks agent_end + after_compaction
|
||||
├── procedural-hooks.ts ← Hook after_tool_call
|
||||
├── orchestrator.ts ← Pipeline post-capture
|
||||
└── openclaw.d.ts ← Types OpenClaw
|
||||
```
|
||||
|
||||
### Modules dans core/ (30 fichiers)
|
||||
|
||||
**Core logic (18):**
|
||||
- db.ts, selective.ts, scoring.ts, lifecycle.ts, hebbian.ts
|
||||
- embeddings.ts, graph.ts, topics.ts, patterns.ts
|
||||
- observations.ts, fact-clusters.ts, procedural.ts
|
||||
- feedback.ts, expertise.ts, revision.ts
|
||||
- context-tree.ts, budget.ts, identity-parser.ts
|
||||
|
||||
**Infra (10):**
|
||||
- extraction.ts, format.ts, fallback.ts, embed-fallback.ts
|
||||
- config.ts, migrate.ts, sync.ts, md-regen.ts
|
||||
- audit-v25.ts, bootstrap-topics.ts
|
||||
|
||||
**Providers (4):**
|
||||
- providers/types.ts
|
||||
- providers/ollama.ts
|
||||
- providers/openai-compat.ts
|
||||
- providers/anthropic.ts
|
||||
|
||||
### API publique core/index.ts
|
||||
|
||||
```typescript
|
||||
export class Memoria {
|
||||
static async init(options: MemoriaInitOptions): Promise<Memoria>
|
||||
async store(fact: string, category?: string, confidence?: number): Promise<StoreResult>
|
||||
async recall(query: string, options?: RecallOptions): Promise<RecallResult>
|
||||
async query(naturalLanguageQuestion: string): Promise<string>
|
||||
async stats(): Promise<MemoriaStats>
|
||||
close(): void
|
||||
}
|
||||
```
|
||||
|
||||
### Vérifications
|
||||
|
||||
✅ **Structure créée** — core/ existe avec 30 modules
|
||||
✅ **Imports mis à jour** — tous les fichiers adapter importent depuis `./core/`
|
||||
✅ **API publique** — core/index.ts expose la classe Memoria
|
||||
✅ **Package.json** — core/package.json créé (v0.1.0)
|
||||
✅ **README.md** — documentation complète avec exemples
|
||||
✅ **ZERO OpenClaw dependency dans core/** — grep confirmé
|
||||
✅ **TypeScript clean** — `npx tsc --noEmit` retourne 0 erreurs
|
||||
✅ **Git history preserved** — git mv utilisé (renames détectés)
|
||||
✅ **Commit créé** — c266521
|
||||
|
||||
### Métriques
|
||||
|
||||
- **Core:** 10,062 LOC (83% du codebase)
|
||||
- **Adapters:** 1,785 LOC (17% du codebase)
|
||||
- **Total:** 11,847 LOC
|
||||
- **TypeScript errors:** 0
|
||||
- **Fichiers déplacés:** 30 (git mv)
|
||||
- **Fichiers créés:** 3 (core/index.ts, core/package.json, core/README.md)
|
||||
|
||||
### Prochaines étapes
|
||||
|
||||
1. **Push sur GitHub** — `git push origin main` (bloqué réseau, à retry)
|
||||
2. **Test standalone** — créer un projet test avec `import { Memoria } from './core'`
|
||||
3. **npm publish** — publier sur npm registry
|
||||
4. **Documentation** — ajouter examples/ avec code samples
|
||||
5. **CI/CD** — GitHub Actions pour tests automatiques
|
||||
|
||||
## Commit
|
||||
|
||||
```
|
||||
c266521 refactor: Phase 2.2 — extract memoria-core as standalone package
|
||||
|
||||
Move 30 modules into core/ subdirectory:
|
||||
- 20 core modules (db, selective, scoring, lifecycle, etc.)
|
||||
- 10 infra modules (providers, fallback, config, sync, etc.)
|
||||
|
||||
Keep 6 adapter modules at root (OpenClaw hooks):
|
||||
- index.ts, recall.ts, continuous.ts, capture.ts,
|
||||
procedural-hooks.ts, orchestrator.ts
|
||||
|
||||
New: core/index.ts — public API class Memoria with:
|
||||
- Memoria.init(options) — standalone initialization
|
||||
- store(), recall(), query(), stats(), close()
|
||||
|
||||
83% of code now lives in core/ with ZERO OpenClaw dependency.
|
||||
npm publish @primo-studio/memoria-core will work standalone.
|
||||
```
|
||||
|
||||
## État final
|
||||
|
||||
✅ **Phase 2.2 TERMINÉE**
|
||||
- Structure core/ créée
|
||||
- API publique fonctionnelle
|
||||
- 0 erreur TypeScript
|
||||
- Git history préservé
|
||||
- Documentation complète
|
||||
- Prêt pour publication npm
|
||||
|
||||
⚠️ **Push bloqué** — problème réseau temporaire, à retry plus tard
|
||||
54
openclaw-memoria-port/README.md
Normal file
54
openclaw-memoria-port/README.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# openclaw-memoria-port
|
||||
|
||||
Портированная версия плагина **Memoria** для работы с новой системой хуков OpenClaw 2026.5.x.
|
||||
|
||||
## Проблема
|
||||
|
||||
Оригинальный плагин Memoria (`git_proj/openclaw-memoria`) использует устаревший механизм регистрации плагинов:
|
||||
|
||||
- Экспортирует `{ register(api) }` вместо `definePluginEntry()`
|
||||
- Импортирует из deprecated barrel `"openclaw/plugin-sdk/core"`
|
||||
- Загружается как **non-capability** — система статусов выдаёт предупреждение `"hook-only-plugin-shape"`
|
||||
- Не имеет явной capability-регистрации
|
||||
|
||||
В OpenClaw 2026.5.x старый механизм хуков не работает, поэтому плагин не загружается.
|
||||
|
||||
## Решение
|
||||
|
||||
Портирование в `openclaw-memoria-port` — минимальные изменения для совместимости с новой системой:
|
||||
|
||||
### Изменённые файлы (4 шт.)
|
||||
|
||||
| Файл | Изменение |
|
||||
|------|-----------|
|
||||
| `index.ts` | Обёртка `definePluginEntry({ id, name, description, register })`; импорты из `"openclaw/plugin-sdk/plugin-entry"` и `"openclaw/plugin-sdk"` |
|
||||
| `recall.ts` | Импорт `OpenClawPluginApi` → `"openclaw/plugin-sdk"` |
|
||||
| `continuous.ts` | То же |
|
||||
| `capture.ts` | То же |
|
||||
| `procedural-hooks.ts` | То же |
|
||||
|
||||
### Новые файлы (1 шт.)
|
||||
|
||||
- **tsup.config.ts** — сборка ESM в `dist/`, external для `openclaw/*` и `better-sqlite3`
|
||||
|
||||
### Обновлённые конфигурации
|
||||
|
||||
- **package.json**: `"main": "./dist/index.js"`, скрипт `"build": "tsup"`, devDependencies: tsup + typescript 5.8
|
||||
- **tsconfig.json**: ESM bundler mode (как в mem0), `allowImportingTsExtensions: true`
|
||||
- **openclaw.plugin.json**: добавлено `"kind": "memory"` и секция `setup.providers`
|
||||
|
||||
## Что НЕ менялось (всё бизнес-логика сохранена)
|
||||
|
||||
Все 21 cognitive layer, SQLite schema, hooks (`before_prompt_build`, `agent_end`, `message_received`, `llm_output`, `after_tool_call`, `after_compaction`), pipeline постобработки фактов — всё на месте.
|
||||
|
||||
## Build
|
||||
|
||||
```bash
|
||||
npm install
|
||||
npm run build
|
||||
# результат: dist/index.js (ESM) + dist/index.d.ts
|
||||
```
|
||||
|
||||
## Сравнение с mem0
|
||||
|
||||
Плагин mem0 (`git_proj/mem0/openclaw`) — эталон для новой системы хуков. Использует тот же паттерн `definePluginEntry()` и импорты из `"openclaw/plugin-sdk"`.
|
||||
45
openclaw-memoria-port/SECURITY.md
Normal file
45
openclaw-memoria-port/SECURITY.md
Normal file
@@ -0,0 +1,45 @@
|
||||
# Security Policy
|
||||
|
||||
## Supported Versions
|
||||
|
||||
| Version | Supported |
|
||||
|---------|-----------|
|
||||
| 3.4.x | ✅ Active |
|
||||
| 3.0.x - 3.3.x | ⚠️ Critical fixes only |
|
||||
| < 3.0 | ❌ Not supported |
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
If you discover a security vulnerability in Memoria, please **do not open a public issue**.
|
||||
|
||||
Instead, email us at: **contact@primo-studio.fr**
|
||||
|
||||
Include:
|
||||
- Description of the vulnerability
|
||||
- Steps to reproduce
|
||||
- Impact assessment
|
||||
- Suggested fix (if any)
|
||||
|
||||
We will respond within **48 hours** and aim to release a fix within **7 days** for critical issues.
|
||||
|
||||
## Security Considerations
|
||||
|
||||
### Data Storage
|
||||
- All data is stored **locally** in SQLite (`memoria.db`) — no cloud sync by default
|
||||
- API keys in config are **never** sent to third parties (only to the configured LLM providers)
|
||||
- The `.md` sync files may contain extracted facts — treat your workspace as sensitive
|
||||
|
||||
### LLM Provider Security
|
||||
- Fact extraction sends conversation content to your configured LLM (Ollama = 100% local)
|
||||
- If using cloud providers (OpenAI, Anthropic), conversation data leaves your machine
|
||||
- Use `fallback` config carefully — ensure only trusted providers are listed
|
||||
|
||||
### Best Practices
|
||||
- **Never commit API keys** — use environment variables (`OPENAI_API_KEY`)
|
||||
- **Restrict workspace permissions** — `memoria.db` contains all your agent's memory
|
||||
- **Backup regularly** — `cp memoria.db memoria.db.bak` (or use `VACUUM INTO`)
|
||||
- **Review .md sync output** — ensure no sensitive data leaks into shared files
|
||||
|
||||
## Disclosure Policy
|
||||
|
||||
We follow responsible disclosure. Security researchers who report valid vulnerabilities will be credited in the release notes (unless they prefer to remain anonymous).
|
||||
217
openclaw-memoria-port/SKILL.md
Normal file
217
openclaw-memoria-port/SKILL.md
Normal file
@@ -0,0 +1,217 @@
|
||||
---
|
||||
name: Memoria for OpenClaw
|
||||
version: 3.34.0
|
||||
description: "The most advanced memory system for AI agents. 24 cognitive layers, knowledge graph, procedural learning, dialectic queries, AI self-observation, auto skill creation, crash-safe WAL, async prefetch. Works with Claude, Cursor, Copilot, ChatGPT & any OpenClaw agent. 100% local-first (SQLite + Ollama), zero cloud cost, zero API keys required."
|
||||
author: Primo Studio (@Nieto42)
|
||||
license: Apache-2.0
|
||||
homepage: https://github.com/Primo-Studio/openclaw-memoria
|
||||
repository: https://github.com/Primo-Studio/openclaw-memoria
|
||||
feedback: https://x.com/Nitix_
|
||||
keywords:
|
||||
- memory
|
||||
- ai-agent
|
||||
- persistent-memory
|
||||
- long-term-memory
|
||||
- knowledge-graph
|
||||
- procedural-learning
|
||||
- vector-search
|
||||
- sqlite
|
||||
- ollama
|
||||
- lm-studio
|
||||
- local-first
|
||||
- cognitive
|
||||
- context
|
||||
- claude
|
||||
- cursor
|
||||
- copilot
|
||||
- chatgpt
|
||||
- openclaw
|
||||
- wal
|
||||
- vibe-coding
|
||||
- developer-tools
|
||||
- devtools
|
||||
- typescript
|
||||
- llm
|
||||
- automation
|
||||
- brain-inspired
|
||||
- multi-layer
|
||||
- fact-extraction
|
||||
- self-improving
|
||||
tags:
|
||||
- memory
|
||||
- ai-agent
|
||||
- persistent-memory
|
||||
- long-term-memory
|
||||
- knowledge-graph
|
||||
- procedural-learning
|
||||
- vector-search
|
||||
- sqlite
|
||||
- ollama
|
||||
- lm-studio
|
||||
- local-first
|
||||
- cognitive
|
||||
- context
|
||||
- claude
|
||||
- cursor
|
||||
- copilot
|
||||
- chatgpt
|
||||
- openclaw
|
||||
- wal
|
||||
- vibe-coding
|
||||
- developer-tools
|
||||
- devtools
|
||||
- typescript
|
||||
- llm
|
||||
- automation
|
||||
- brain-inspired
|
||||
- multi-layer
|
||||
- fact-extraction
|
||||
- self-improving
|
||||
- self-observation
|
||||
- dialectic
|
||||
- auto-skill
|
||||
- prefetch
|
||||
- crash-recovery
|
||||
- async
|
||||
env:
|
||||
- name: OPENAI_API_KEY
|
||||
required: false
|
||||
description: Optional — used as fallback for LLM extraction and embeddings when local models are unavailable. Never required for default operation.
|
||||
- name: OPENROUTER_API_KEY
|
||||
required: false
|
||||
description: Optional — used as fallback for remote LLM provider. Never required for default operation.
|
||||
- name: OPENCLAW_WORKSPACE
|
||||
required: false
|
||||
description: Auto-set by OpenClaw runtime — workspace path for memory files. Do not set manually.
|
||||
security: |
|
||||
## Data & Privacy
|
||||
- Memoria runs 100% locally by default. No data leaves your machine unless you explicitly configure a remote LLM.
|
||||
- All memory is stored in a local SQLite database in your OpenClaw workspace.
|
||||
- API keys (OPENAI_API_KEY, OPENROUTER_API_KEY) are optional fallbacks — never required.
|
||||
|
||||
## What Memoria reads
|
||||
- Workspace files: USER.md, COMPANY.md, projects/* — used for identity-aware relevance scoring.
|
||||
These files may contain personal or business information. Review them before enabling Memoria.
|
||||
- Conversation content: assistant messages and tool call results, to extract durable facts.
|
||||
- No files outside the OpenClaw workspace are read.
|
||||
|
||||
## What Memoria writes
|
||||
- A single SQLite database (memoria.db) in your workspace/memory/ folder.
|
||||
- Optional markdown summaries in workspace/memory/ folder.
|
||||
- No network requests unless a remote LLM fallback is configured.
|
||||
|
||||
## Hooks used
|
||||
- before_prompt_build: injects recalled facts into context
|
||||
- after_tool_call: captures procedural knowledge (how-to steps)
|
||||
- agent_end: extracts facts from completed conversations
|
||||
- after_compaction: extracts facts from compacted summaries
|
||||
These are standard OpenClaw plugin hooks. They run locally within the plugin process.
|
||||
entrypoint: index.ts
|
||||
---
|
||||
|
||||
# 🧠 Memoria — Multi-Layer Persistent Memory for OpenClaw
|
||||
|
||||
**The most complete memory system for OpenClaw.** 20 layers of memory that work together, powered by YOUR choice of LLM.
|
||||
|
||||
## Why Memoria?
|
||||
|
||||
### 🏗️ 20 Memory Layers (not just a fact store)
|
||||
1. **Facts** — Durable knowledge extracted from every conversation
|
||||
2. **Procedures** — HOW to do things, improves with repetition, learns from failures
|
||||
3. **Knowledge Graph** — Entities + relations connecting your facts
|
||||
4. **Topics & Expertise** — Tracks what you talk about most, specializes over time
|
||||
5. **Observations** — Short-term working memory for active context
|
||||
6. **Error Detection** 🔥 — Touch fire once, remember forever. Dangers captured on first occurrence
|
||||
7. **Lifecycle** — Fresh → Settled → Dormant. Nothing deleted, priority shifts naturally
|
||||
|
||||
### 🔌 Bring Your Own LLM
|
||||
Configure each layer independently. Mix and match:
|
||||
- **Ollama** — Run gemma3, qwen3.5, llama, or any model locally (recommended)
|
||||
- **LM Studio** — Use any GGUF model from your local server
|
||||
- **Remote APIs** — OpenAI, Anthropic, OpenRouter as primary or fallback
|
||||
- **Fallback chains** — Ollama → LM Studio → API. If one fails, the next takes over automatically
|
||||
|
||||
### 🏠 100% Local-First
|
||||
- **SQLite + FTS5** — No external database needed
|
||||
- **Local embeddings** — nomic-embed-text via Ollama (zero API cost)
|
||||
- **Zero cloud dependency** — Works offline, your data stays on your machine
|
||||
- **Fallback chain** — Degrades gracefully if a provider goes down
|
||||
|
||||
### 🧬 What Makes Memoria Different
|
||||
| Feature | Memoria | Basic memory plugins |
|
||||
|---------|---------|---------------------|
|
||||
| Memory layers | 20 specialized layers | Single fact store |
|
||||
| LLM choice | Any local or remote model | Usually hardcoded |
|
||||
| Per-layer LLM config | ✅ Different model per layer | ❌ |
|
||||
| Procedural learning | ✅ Learns HOW, not just WHAT | ❌ |
|
||||
| Error detection | ✅ Auto-captures dangers | ❌ |
|
||||
| Knowledge graph | ✅ Entities + relations | ❌ |
|
||||
| Lifecycle management | ✅ Smart aging, never forgets | ❌ or simple TTL |
|
||||
| Cost | $0 with local models | Varies |
|
||||
|
||||
## Installation
|
||||
|
||||
### As Plugin (recommended — one command)
|
||||
```bash
|
||||
openclaw plugins install clawhub:memoria-plugin
|
||||
```
|
||||
This installs Memoria from the ClawHub registry. No manual steps needed.
|
||||
|
||||
### From source (for contributors / advanced users)
|
||||
If you prefer to inspect the code first:
|
||||
1. Browse the repository: [github.com/Primo-Studio/openclaw-memoria](https://github.com/Primo-Studio/openclaw-memoria)
|
||||
2. Review the source code, especially `index.ts` (main entrypoint) and `openclaw.plugin.json` (config schema)
|
||||
3. Clone and install:
|
||||
```bash
|
||||
cd ~/.openclaw/extensions
|
||||
git clone https://github.com/Primo-Studio/openclaw-memoria.git memoria
|
||||
cd memoria && npm install
|
||||
```
|
||||
|
||||
Then add to your `openclaw.json` under `plugins.entries`:
|
||||
```json
|
||||
{
|
||||
"memoria": { "enabled": true },
|
||||
"memory-convex": { "enabled": false }
|
||||
}
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### Minimal (works out of the box with Ollama)
|
||||
Just install and restart. Defaults: Ollama + gemma3:4b for extraction, nomic for embeddings.
|
||||
|
||||
### Custom LLM per layer
|
||||
```json
|
||||
"memoria": {
|
||||
"enabled": true,
|
||||
"config": {
|
||||
"llm": {
|
||||
"default": { "provider": "ollama", "model": "qwen3.5:4b" },
|
||||
"procedural": { "provider": "lmstudio", "model": "your-model" },
|
||||
"graph": { "provider": "openai", "model": "gpt-4o-mini" }
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Source Code
|
||||
|
||||
The full source is available on GitHub: [Primo-Studio/openclaw-memoria](https://github.com/Primo-Studio/openclaw-memoria)
|
||||
|
||||
Key files:
|
||||
- `index.ts` — Main plugin entrypoint (hooks, extraction, recall pipeline)
|
||||
- `procedural.ts` — Procedural memory (how-to learning)
|
||||
- `lifecycle.ts` — Lifecycle management (fresh/settled/dormant)
|
||||
- `scoring.ts` — Temporal scoring and relevance ranking
|
||||
- `selective.ts` — Dedup, contradiction detection, fact quality
|
||||
- `openclaw.plugin.json` — Configuration schema
|
||||
|
||||
## Feedback & Community
|
||||
|
||||
**We'd love your feedback!** Tell us how Memoria works for you:
|
||||
- 🐦 **Tweet us** [@Nitix_](https://x.com/Nitix_) — share your setup, results, or ideas
|
||||
- ⭐ **Star the repo**: [github.com/Primo-Studio/openclaw-memoria](https://github.com/Primo-Studio/openclaw-memoria)
|
||||
- 🐛 **Issues**: [GitHub Issues](https://github.com/Primo-Studio/openclaw-memoria/issues)
|
||||
|
||||
Built with ❤️ by [Primo Studio](https://primo-studio.fr) 🇬🇫 — AI tooling from French Guiana.
|
||||
BIN
openclaw-memoria-port/assets/logo.png
Normal file
BIN
openclaw-memoria-port/assets/logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 366 KiB |
35
openclaw-memoria-port/benchmarks/README.md
Normal file
35
openclaw-memoria-port/benchmarks/README.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# Memoria Benchmarks
|
||||
|
||||
## Methodology
|
||||
|
||||
- **Dataset**: 10 synthetic sessions based on real workspace data (Primo Studio)
|
||||
- **30 questions** across 6 categories:
|
||||
- **SSU** (Single-Session Understanding): fact lookup from one session
|
||||
- **SSA** (Single-Session Aggregation): combine facts from one session
|
||||
- **SSP** (Single-Session Procedure): how-to questions
|
||||
- **KU** (Knowledge Update): handle contradicting/updated information
|
||||
- **TR** (Temporal Reasoning): time-based questions
|
||||
- **MS** (Multi-Session): cross-session aggregation
|
||||
- **Pipeline**: Extract (gemma3:4b Ollama) → Embed (nomic-embed-text-v2-moe) → Answer (GPT-OSS 20B LM Studio) → Judge (GPT-5.4-nano OpenAI)
|
||||
- **Fair**: fresh DB each run, same questions, same models
|
||||
|
||||
## Results History
|
||||
|
||||
| Version | Accuracy | Retrieval | SSU | SSA | SSP | KU | TR | MS | Key Change |
|
||||
|---------|----------|-----------|-----|-----|-----|-----|-----|-----|------------|
|
||||
| v3.1.1 | 41.7% | 90.0% | 3/5 | 2/5 | 3/5 | 2/5 | 1/5 | 1/5 | Baseline (gemma3 judge) |
|
||||
| v3.2.0 | 75.0% | 53.3% | 4.5/5 | 3/5 | 5/5 | 5/5 | 2.5/5 | 1.5/5 | Supersession + procedures |
|
||||
| v3.3.0 | 75.0% | 43.3% | 5/5 | 3/5 | 4.5/5 | 5/5 | 3/5 | 2/5 | Query expansion (nano judge) |
|
||||
| **v3.4.0** | **81.7%** | **50.0%** | **5/5** | **3.5/5** | **5/5** | **5/5** | **3.5/5** | **2.5/5** | **Fact Clusters** |
|
||||
|
||||
## Running
|
||||
|
||||
```bash
|
||||
# Requires: Ollama (gemma3:4b + nomic-embed), LM Studio (GPT-OSS 20B), OpenAI API key
|
||||
python3 benchmarks/bench-v34.py
|
||||
```
|
||||
|
||||
## Files
|
||||
|
||||
- `bench-v34.py` — Latest benchmark script (v3.4.0)
|
||||
- `results/` — JSON results from each run
|
||||
510
openclaw-memoria-port/benchmarks/bench-v34.py
Normal file
510
openclaw-memoria-port/benchmarks/bench-v34.py
Normal file
@@ -0,0 +1,510 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
LEGACY — Memoria v3.4.0 Benchmark (March 2026)
|
||||
Targets v3.4.0 with 10 synthetic sessions. Kept for reference.
|
||||
Current version: v3.22.3 with 21 layers — updated benchmarks TBD.
|
||||
"""
|
||||
import json, time, requests, sqlite3, os, re
|
||||
from collections import defaultdict
|
||||
|
||||
OLLAMA = "http://localhost:11434"
|
||||
LMSTUDIO = "http://localhost:1234/v1"
|
||||
OPENAI = "https://api.openai.com/v1"
|
||||
OPENAI_KEY = os.environ.get("OPENAI_API_KEY", "YOUR_KEY_HERE")
|
||||
EMBED_MODEL = "nomic-embed-text-v2-moe"
|
||||
EXTRACT_MODEL = "gemma3:4b"
|
||||
ANSWER_MODEL = "openai/gpt-oss-20b"
|
||||
JUDGE_MODEL = "gpt-5.4-nano"
|
||||
DB_PATH = "/tmp/bench-v34-clusters.db"
|
||||
RESULTS_PATH = f"/tmp/results-v34-{time.strftime('%Y%m%d-%H%M%S')}.json"
|
||||
|
||||
# Known entities for clustering
|
||||
KNOWN_ENTITIES = [
|
||||
"Sol", "Luna", "Koda", "Neto", "Bureau", "Convex", "Primask",
|
||||
"DockGroups", "Memoria", "Ollama", "Cloudflare", "Vercel",
|
||||
"Qonto", "Alexandre", "Pierre", "Primo Studio", "LM Studio"
|
||||
]
|
||||
|
||||
CONCEPT_MAP = {
|
||||
"taux horaire": ["€/h", "salaire"],
|
||||
"salaire": ["taux horaire", "€/h"],
|
||||
"rémunération": ["salaire", "€/h"],
|
||||
"ca": ["chiffre d'affaires"],
|
||||
"chiffre d'affaires": ["CA"],
|
||||
"deploy": ["déploiement"],
|
||||
"déploiement": ["deploy"],
|
||||
"modèle": ["model"],
|
||||
"modèles": ["models"],
|
||||
"config": ["configuration"],
|
||||
"configuration": ["config"],
|
||||
}
|
||||
|
||||
SESSIONS = [
|
||||
{"id": "s1", "messages": [
|
||||
"J'ai configuré Ollama sur Sol avec gemma3:4b comme modèle d'extraction.",
|
||||
"Les embeddings utilisent nomic-embed-text-v2-moe avec 768 dimensions.",
|
||||
"Le fallback chain est Ollama → LM Studio → OpenAI."
|
||||
]},
|
||||
{"id": "s2", "messages": [
|
||||
"Bureau utilise Convex comme backend avec des subscriptions useQuery en temps réel.",
|
||||
"Le module CRM gère 11 structures : entreprises, associations et collectivités.",
|
||||
"La sync Qonto se fait via une action Convex, pas un script bash."
|
||||
]},
|
||||
{"id": "s3", "messages": [
|
||||
"Neto travaille depuis la Guyane française, timezone America/Cayenne (GMT-3).",
|
||||
"Il préfère le step-by-step et déteste les régressions.",
|
||||
"Sa machine principale est un Mac Studio avec 64 Go de RAM."
|
||||
]},
|
||||
{"id": "s4", "messages": [
|
||||
"Primask est une app de planning développée par Primo Studio.",
|
||||
"Le deploy se fait via GitHub → Vercel avec auto-deploy.",
|
||||
"Le token Hello-Primo est utilisé pour les projets Bureau sur Vercel."
|
||||
]},
|
||||
{"id": "s5", "messages": [
|
||||
"Luna est un iMac qui gère les emails et le calendrier via CalDAV iCloud.",
|
||||
"Sol est un Mac Mini disponible 24/7 pour les tâches de dev.",
|
||||
"Koda est le dev AI senior, promu le 22 mars 2026."
|
||||
]},
|
||||
{"id": "s6", "messages": [
|
||||
"Le benchmark LongMemEval-S teste 6 catégories : SSU, SSA, SSP, KU, TR, MS.",
|
||||
"Le retrieval rate de Memoria est de 93.3% avec gemma3:4b.",
|
||||
"KU (Knowledge Update) est le point faible : 0/5 correct."
|
||||
]},
|
||||
{"id": "s7", "messages": [
|
||||
"DockGroups est une app macOS menu bar pour organiser le Dock.",
|
||||
"Le drag & drop a été retiré car instable dans MenuBarExtra.",
|
||||
"Version actuelle : v0.5.0 avec apply-to-dock sécurisé via SIGHUP."
|
||||
]},
|
||||
{"id": "s8", "messages": [
|
||||
"Memoria v3.2.0 ajoute le support des modèles à reasoning (Ollama/OpenAI-compat).",
|
||||
"Le recall inclut maintenant les dates avec guidance de recency.",
|
||||
"L'extraction procédurale permet les faits multi-phrases.",
|
||||
"Le hybridSearch est adaptatif : plus de poids cosine pour les queries courtes."
|
||||
]},
|
||||
{"id": "s9", "messages": [
|
||||
"Alexandre est en amélioration, son taux horaire est de 5.19€/h.",
|
||||
"Pierre a terminé son contrat, non renouvelé. Son taux était de 7.39€/h.",
|
||||
"Le CA 2025 était de 111 223€, objectif 2026 : 80-100K€.",
|
||||
"Update : Alexandre a été augmenté à 6.50€/h suite à sa progression."
|
||||
]},
|
||||
{"id": "s10", "messages": [
|
||||
"Cloudflare gère le DNS de primo-studio.fr avec proxy vers Vercel (front) et tunnel vers NAS (Directus).",
|
||||
"Le zone ID Cloudflare est 403c7dc0dfe5c1ec6e94d92d8d0765ba.",
|
||||
"Transport Rino est un MVP offline-first avec signatures intégrées au parcours terrain."
|
||||
]}
|
||||
]
|
||||
|
||||
QUESTIONS = [
|
||||
{"q": "Quel modèle d'extraction est configuré sur Sol ?", "expected": "gemma3:4b", "cat": "SSU", "session": "s1"},
|
||||
{"q": "Combien de structures gère le CRM Bureau ?", "expected": "11", "cat": "SSU", "session": "s2"},
|
||||
{"q": "Quelle est la timezone de Neto ?", "expected": "America/Cayenne", "cat": "SSU", "session": "s3"},
|
||||
{"q": "Comment se fait le deploy de Primask ?", "expected": "GitHub vers Vercel auto-deploy", "cat": "SSU", "session": "s4"},
|
||||
{"q": "Quel est le rôle de Luna ?", "expected": "iMac, emails et calendrier", "cat": "SSU", "session": "s5"},
|
||||
{"q": "Quels sont les 3 niveaux du fallback chain de Memoria ?", "expected": "Ollama, LM Studio, OpenAI", "cat": "SSA", "session": "s1"},
|
||||
{"q": "Quels types de structures le CRM Bureau gère-t-il ?", "expected": "entreprises, associations, collectivités", "cat": "SSA", "session": "s2"},
|
||||
{"q": "Quelles sont les 3 machines de l'équipe et leurs rôles ?", "expected": "Mac Studio (Neto), iMac Luna (emails/cal), Mac Mini Sol (dev 24/7)", "cat": "SSA", "session": "s5"},
|
||||
{"q": "Quelles features a apporté Memoria v3.2.0 ?", "expected": "reasoning models, dated recall, procedures multi-phrases, adaptive hybridSearch", "cat": "SSA", "session": "s8"},
|
||||
{"q": "Quels services Cloudflare fournit pour primo-studio.fr ?", "expected": "DNS, proxy Vercel front, tunnel NAS Directus", "cat": "SSA", "session": "s10"},
|
||||
{"q": "Comment configurer le fallback chain Memoria ?", "expected": "Ollama → LM Studio → OpenAI dans la config", "cat": "SSP", "session": "s1"},
|
||||
{"q": "Comment synchroniser Qonto avec Bureau ?", "expected": "via action Convex syncQonto, pas script bash", "cat": "SSP", "session": "s2"},
|
||||
{"q": "Quelle est la procédure pour déployer sur Vercel ?", "expected": "push GitHub déclenche auto-deploy", "cat": "SSP", "session": "s4"},
|
||||
{"q": "Comment appliquer les groupes au Dock dans DockGroups ?", "expected": "apply-to-dock sécurisé via SIGHUP", "cat": "SSP", "session": "s7"},
|
||||
{"q": "Comment fonctionne le hybridSearch adaptatif ?", "expected": "plus de poids cosine pour queries courtes, plus FTS pour queries longues", "cat": "SSP", "session": "s8"},
|
||||
{"q": "Quel est le taux horaire actuel d'Alexandre ?", "expected": "6.50€/h", "cat": "KU", "session": "s9"},
|
||||
{"q": "Pierre travaille-t-il encore chez Primo Studio ?", "expected": "non, contrat terminé", "cat": "KU", "session": "s9"},
|
||||
{"q": "Le drag & drop fonctionne-t-il dans DockGroups ?", "expected": "non, retiré car instable", "cat": "KU", "session": "s7"},
|
||||
{"q": "Quelle est la version actuelle de Memoria ?", "expected": "v3.2.0", "cat": "KU", "session": "s8"},
|
||||
{"q": "Quel est le score KU du benchmark Memoria ?", "expected": "0/5 correct", "cat": "KU", "session": "s6"},
|
||||
{"q": "Quand Koda a-t-il été promu Dev Senior ?", "expected": "22 mars 2026", "cat": "TR", "session": "s5"},
|
||||
{"q": "Quelle version de DockGroups a introduit apply-to-dock sécurisé ?", "expected": "v0.5.0", "cat": "TR", "session": "s7"},
|
||||
{"q": "Le CA a-t-il augmenté ou baissé entre 2025 et l'objectif 2026 ?", "expected": "baissé (111K → 80-100K objectif)", "cat": "TR", "session": "s9"},
|
||||
{"q": "Quel était le retrieval rate avant les améliorations v3.2.0 ?", "expected": "93.3%", "cat": "TR", "session": "s6"},
|
||||
{"q": "Avant le reasoning support, que se passait-il avec les modèles thinking ?", "expected": "le thinking consommait les tokens, réponse vide/pas de JSON", "cat": "TR", "session": "s8"},
|
||||
{"q": "Quelles machines utilisent Memoria ?", "expected": "Mac Studio (Koda) et Mac Mini (Sol)", "cat": "MS", "session": ""},
|
||||
{"q": "Quel est le lien entre Bureau et Qonto ?", "expected": "sync via action Convex, matching auto virements↔projets", "cat": "MS", "session": ""},
|
||||
{"q": "Quels sont les projets actifs de Primo Studio ?", "expected": "Bureau, Primask, DockGroups, Transport Rino, Memoria", "cat": "MS", "session": ""},
|
||||
{"q": "Quels modèles LLM sont disponibles sur Sol ?", "expected": "gemma3:4b, nomic-embed, qwen3.5:27b, GPT-OSS 20B via LM Studio", "cat": "MS", "session": ""},
|
||||
{"q": "Quels taux horaires sont pratiqués chez Primo Studio ?", "expected": "Neto 0€, Alexandre 6.50€, Pierre 7.39€ (parti)", "cat": "MS", "session": ""}
|
||||
]
|
||||
|
||||
def expand_query(query):
|
||||
variants = [query]
|
||||
lower = query.lower()
|
||||
for key, synonyms in CONCEPT_MAP.items():
|
||||
if key in lower:
|
||||
for syn in synonyms[:2]:
|
||||
variant = re.sub(re.escape(key), syn, query, flags=re.IGNORECASE)
|
||||
if variant != query and variant not in variants:
|
||||
variants.append(variant)
|
||||
for m in re.finditer(r'\b[A-Z][a-z]+(?:\s[A-Z][a-z]+)*', query):
|
||||
noun = m.group()
|
||||
if len(noun) > 2 and noun not in variants:
|
||||
variants.append(noun)
|
||||
return variants[:4]
|
||||
|
||||
def ollama_generate(prompt, model=EXTRACT_MODEL, fmt="json", timeout=120):
|
||||
body = {"model": model, "prompt": prompt, "stream": False,
|
||||
"options": {"temperature": 0.1, "num_predict": 2048, "num_ctx": 8192}}
|
||||
if fmt: body["format"] = fmt
|
||||
r = requests.post(f"{OLLAMA}/api/generate", json=body, timeout=timeout)
|
||||
r.raise_for_status()
|
||||
return r.json().get("response", "")
|
||||
|
||||
def ollama_generate_text(prompt, model=EXTRACT_MODEL, timeout=60):
|
||||
"""Generate plain text (no JSON format constraint)"""
|
||||
body = {"model": model, "prompt": prompt, "stream": False,
|
||||
"options": {"temperature": 0.1, "num_predict": 300, "num_ctx": 4096}}
|
||||
r = requests.post(f"{OLLAMA}/api/generate", json=body, timeout=timeout)
|
||||
r.raise_for_status()
|
||||
return r.json().get("response", "").strip()
|
||||
|
||||
def ollama_embed(texts):
|
||||
r = requests.post(f"{OLLAMA}/api/embed", json={"model": EMBED_MODEL, "input": texts}, timeout=60)
|
||||
r.raise_for_status()
|
||||
return r.json().get("embeddings", [])
|
||||
|
||||
def lmstudio_chat(prompt, timeout=120):
|
||||
body = {"model": ANSWER_MODEL, "messages": [{"role": "user", "content": prompt}],
|
||||
"max_tokens": 500, "temperature": 0.1}
|
||||
r = requests.post(f"{LMSTUDIO}/chat/completions", json=body, timeout=timeout)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
msg = data["choices"][0]["message"]
|
||||
return msg.get("content") or msg.get("reasoning_content") or msg.get("reasoning") or ""
|
||||
|
||||
def nano_judge(question, expected, answer):
|
||||
body = {
|
||||
"model": JUDGE_MODEL,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a fair judge evaluating answer quality. Be lenient: if the answer contains the key information from the expected answer, even with extra details or different wording, mark it as correct. Only mark wrong if the core information is missing or contradicted."},
|
||||
{"role": "user", "content": f"""Judge if this answer is correct.
|
||||
|
||||
Question: {question}
|
||||
Expected answer: {expected}
|
||||
Actual answer: {answer}
|
||||
|
||||
Return ONLY valid JSON: {{"verdict": "correct"|"partial"|"wrong", "reason": "brief explanation"}}"""}
|
||||
],
|
||||
"max_completion_tokens": 150,
|
||||
"response_format": {"type": "json_object"}
|
||||
}
|
||||
try:
|
||||
r = requests.post(f"{OPENAI}/chat/completions", json=body, timeout=30,
|
||||
headers={"Authorization": f"Bearer {OPENAI_KEY}", "Content-Type": "application/json"})
|
||||
r.raise_for_status()
|
||||
content = r.json()["choices"][0]["message"]["content"]
|
||||
j = json.loads(content)
|
||||
return j.get("verdict", "wrong"), j.get("reason", "")
|
||||
except Exception as e:
|
||||
return "wrong", f"judge error: {e}"
|
||||
|
||||
def cosine_sim(a, b):
|
||||
dot = sum(x*y for x,y in zip(a,b))
|
||||
na = sum(x*x for x in a)**0.5
|
||||
nb = sum(x*x for x in b)**0.5
|
||||
return dot/(na*nb) if na and nb else 0
|
||||
|
||||
def setup_db():
|
||||
if os.path.exists(DB_PATH):
|
||||
os.remove(DB_PATH)
|
||||
db = sqlite3.connect(DB_PATH)
|
||||
db.execute("""CREATE TABLE facts (
|
||||
id INTEGER PRIMARY KEY, fact TEXT, category TEXT, fact_type TEXT DEFAULT 'semantic',
|
||||
confidence REAL DEFAULT 0.8, session_id TEXT, source TEXT DEFAULT 'auto-capture',
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
superseded_by INTEGER, embedding BLOB)""")
|
||||
db.execute("CREATE VIRTUAL TABLE IF NOT EXISTS facts_fts USING fts5(fact, content=facts, content_rowid=id)")
|
||||
db.execute("""CREATE TRIGGER facts_ai AFTER INSERT ON facts BEGIN
|
||||
INSERT INTO facts_fts(rowid, fact) VALUES (new.id, new.fact); END""")
|
||||
db.commit()
|
||||
return db
|
||||
|
||||
def extract_facts(text, session_id):
|
||||
prompt = f"""Extract durable facts from this conversation. Return JSON array.
|
||||
IMPORTANT: Create ONE FACT PER DISTINCT ENTITY (person, tool, project, machine).
|
||||
|
||||
Keys: fact, category (savoir/outil/erreur/preference/chronologie/rh/client), fact_type (semantic or episodic), confidence (0.0-1.0).
|
||||
|
||||
Text:
|
||||
{text}
|
||||
|
||||
Return ONLY a JSON array:"""
|
||||
raw = ollama_generate(prompt)
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
if isinstance(data, dict) and "facts" in data:
|
||||
data = data["facts"]
|
||||
if isinstance(data, list):
|
||||
if data and isinstance(data[0], list):
|
||||
data = data[0]
|
||||
return [f for f in data if isinstance(f, dict) and "fact" in f]
|
||||
return []
|
||||
except:
|
||||
return []
|
||||
|
||||
def generate_clusters(db):
|
||||
"""NEW in v3.4.0: Group facts by entity, generate cluster summaries"""
|
||||
# Group facts by entity
|
||||
rows = db.execute("SELECT id, fact, category, fact_type FROM facts WHERE superseded_by IS NULL AND fact_type != 'cluster'").fetchall()
|
||||
|
||||
entity_facts = defaultdict(list)
|
||||
for row in rows:
|
||||
fid, fact, cat, ftype = row
|
||||
# Match known entities
|
||||
for entity in KNOWN_ENTITIES:
|
||||
if entity.lower() in fact.lower():
|
||||
entity_facts[entity].append({"id": fid, "fact": fact, "category": cat})
|
||||
|
||||
clusters_created = 0
|
||||
for entity, facts in entity_facts.items():
|
||||
if len(facts) < 3:
|
||||
continue
|
||||
|
||||
facts_text = "\n".join(f"- [{f['category']}] {f['fact']}" for f in facts[:12])
|
||||
prompt = f"""Tu résumes un groupe de faits liés à la même entité en UN SEUL paragraphe dense.
|
||||
Règles: contenir TOUTES les informations clés (noms, chiffres, dates, versions, états). 2-4 phrases max, dense et factuel, en français.
|
||||
|
||||
Entité: {entity}
|
||||
|
||||
Faits:
|
||||
{facts_text}
|
||||
|
||||
Résumé dense:"""
|
||||
|
||||
try:
|
||||
summary = ollama_generate_text(prompt)
|
||||
if summary and len(summary) > 20:
|
||||
# Remove any JSON/markdown artifacts
|
||||
summary = summary.strip().strip('"').strip("'")
|
||||
if summary.startswith("```"):
|
||||
summary = re.sub(r'^```.*\n?', '', summary)
|
||||
summary = re.sub(r'\n?```$', '', summary)
|
||||
|
||||
member_ids = [f["id"] for f in facts[:12]]
|
||||
db.execute(
|
||||
"INSERT INTO facts (fact, category, fact_type, confidence, session_id, source) VALUES (?, ?, 'cluster', 0.85, NULL, ?)",
|
||||
(summary, facts[0]["category"], f"cluster:{entity.lower()}")
|
||||
)
|
||||
clusters_created += 1
|
||||
except Exception as e:
|
||||
print(f" ⚠ Cluster {entity} failed: {e}")
|
||||
|
||||
db.commit()
|
||||
return clusters_created
|
||||
|
||||
def hybrid_search_expanded(db, query, top_k=5):
|
||||
queries = expand_query(query)
|
||||
all_q_embs = ollama_embed(queries)
|
||||
results = {}
|
||||
|
||||
for qi, q in enumerate(queries):
|
||||
tokens = [t for t in q.split() if len(t) > 1]
|
||||
if not tokens: continue
|
||||
fts_q = " OR ".join(f'"{t}"' for t in tokens[:10])
|
||||
try:
|
||||
rows = db.execute(
|
||||
"SELECT f.id, f.fact, f.category, f.fact_type, f.confidence, f.session_id, f.superseded_by, f.embedding "
|
||||
"FROM facts_fts fts JOIN facts f ON fts.rowid = f.id "
|
||||
"WHERE facts_fts MATCH ? AND f.superseded_by IS NULL LIMIT 20", (fts_q,)).fetchall()
|
||||
for i, r in enumerate(rows):
|
||||
if r[0] not in results:
|
||||
results[r[0]] = {"id": r[0], "fact": r[1], "cat": r[2], "type": r[3], "conf": r[4],
|
||||
"session": r[5], "emb": r[7], "fts_rank": 1.0/(i+1), "cosine": 0}
|
||||
else:
|
||||
results[r[0]]["fts_rank"] = max(results[r[0]]["fts_rank"], 1.0/(i+1))
|
||||
except:
|
||||
pass
|
||||
|
||||
rows = db.execute("SELECT id, fact, category, fact_type, confidence, session_id, embedding FROM facts WHERE superseded_by IS NULL AND embedding IS NOT NULL").fetchall()
|
||||
for r in rows:
|
||||
emb = json.loads(r[6]) if isinstance(r[6], str) else None
|
||||
if not emb: continue
|
||||
max_sim = 0
|
||||
for q_emb in all_q_embs:
|
||||
sim = cosine_sim(q_emb, emb)
|
||||
if sim > max_sim:
|
||||
max_sim = sim
|
||||
if max_sim > 0.3:
|
||||
if r[0] in results:
|
||||
results[r[0]]["cosine"] = max(results[r[0]].get("cosine", 0), max_sim)
|
||||
else:
|
||||
results[r[0]] = {"id": r[0], "fact": r[1], "cat": r[2], "type": r[3], "conf": r[4],
|
||||
"session": r[5], "emb": r[6], "fts_rank": 0, "cosine": max_sim}
|
||||
|
||||
short_query = len(query.split()) <= 4
|
||||
w_fts = 0.20 if short_query else 0.40
|
||||
w_cos = 0.55 if short_query else 0.40
|
||||
w_conf = 0.25 if short_query else 0.20
|
||||
|
||||
scored = []
|
||||
for r in results.values():
|
||||
score = w_fts * r.get("fts_rank", 0) + w_cos * r.get("cosine", 0) + w_conf * r.get("conf", 0.5)
|
||||
# Cluster boost: 15% more weight (info-dense)
|
||||
if r.get("type") == "cluster":
|
||||
score *= 1.15
|
||||
scored.append((score, r))
|
||||
|
||||
scored.sort(key=lambda x: -x[0])
|
||||
return [(s, r["fact"], r["cat"]) for s, r in scored[:top_k]]
|
||||
|
||||
def main():
|
||||
print(f"=== Memoria v3.4.0 Benchmark — Fact Clusters + Nano Judge ===")
|
||||
print(f"Extraction: {EXTRACT_MODEL} (Ollama) | Answers: {ANSWER_MODEL} (LM Studio)")
|
||||
print(f"Embeddings: {EMBED_MODEL} (Ollama) | Judge: {JUDGE_MODEL} (OpenAI)")
|
||||
print(f"NEW: Fact Clusters (entity-grouped summaries)")
|
||||
print()
|
||||
|
||||
# Test judge
|
||||
print("🔑 Testing GPT-5.4-nano judge...")
|
||||
v, r = nano_judge("What is 2+2?", "4", "The answer is 4.")
|
||||
print(f" Test: verdict={v}")
|
||||
if "error" in r.lower():
|
||||
print(" ❌ Judge not working, aborting.")
|
||||
return
|
||||
print()
|
||||
|
||||
db = setup_db()
|
||||
|
||||
# Phase 1: Extract atomic facts
|
||||
print("📥 Phase 1: Ingestion (dense extraction)...")
|
||||
t0 = time.time()
|
||||
total_facts = 0
|
||||
for sess in SESSIONS:
|
||||
text = "\n".join(sess["messages"])
|
||||
facts = extract_facts(text, sess["id"])
|
||||
for f in facts:
|
||||
db.execute("INSERT INTO facts (fact, category, fact_type, confidence, session_id) VALUES (?,?,?,?,?)",
|
||||
(f["fact"], f.get("category", "savoir"), f.get("fact_type", "semantic"),
|
||||
f.get("confidence", 0.8), sess["id"]))
|
||||
total_facts += 1
|
||||
db.commit()
|
||||
ingest_time = time.time() - t0
|
||||
print(f" ✅ {total_facts} atomic facts from {len(SESSIONS)} sessions in {ingest_time:.1f}s")
|
||||
|
||||
# Phase 2: Generate clusters (NEW in v3.4.0)
|
||||
print("\n🧩 Phase 2: Fact Clusters...")
|
||||
t0 = time.time()
|
||||
num_clusters = generate_clusters(db)
|
||||
cluster_time = time.time() - t0
|
||||
total_with_clusters = db.execute("SELECT COUNT(*) FROM facts WHERE superseded_by IS NULL").fetchone()[0]
|
||||
print(f" ✅ {num_clusters} clusters generated in {cluster_time:.1f}s ({total_with_clusters} total facts)")
|
||||
|
||||
# Phase 3: Embeddings (atomic + clusters)
|
||||
print("\n📐 Phase 3: Embeddings...")
|
||||
t0 = time.time()
|
||||
rows = db.execute("SELECT id, fact FROM facts WHERE superseded_by IS NULL").fetchall()
|
||||
batch_size = 20
|
||||
embedded = 0
|
||||
for i in range(0, len(rows), batch_size):
|
||||
batch = rows[i:i+batch_size]
|
||||
texts = [r[1] for r in batch]
|
||||
embs = ollama_embed(texts)
|
||||
for (rid, _), emb in zip(batch, embs):
|
||||
db.execute("UPDATE facts SET embedding = ? WHERE id = ?", (json.dumps(emb), rid))
|
||||
embedded += 1
|
||||
db.commit()
|
||||
embed_time = time.time() - t0
|
||||
print(f" ✅ {embedded}/{len(rows)} embedded in {embed_time:.1f}s")
|
||||
|
||||
# Phase 4: Q&A
|
||||
print(f"\n❓ Phase 4: Q&A + Nano Judge ({len(QUESTIONS)} questions)...")
|
||||
results = []
|
||||
cats = {}
|
||||
for i, q in enumerate(QUESTIONS):
|
||||
t0 = time.time()
|
||||
expanded = expand_query(q["q"])
|
||||
hits = hybrid_search_expanded(db, q["q"], top_k=5)
|
||||
retrieval_hit = any(
|
||||
any(kw.lower() in h[1].lower() for kw in q["expected"].split(", ")[:2])
|
||||
for h in hits
|
||||
)
|
||||
context = "\n".join(f"- [{h[2]}] {h[1]}" for h in hits)
|
||||
|
||||
prompt = f"""Based on the following facts, answer the question concisely in French.
|
||||
If you find contradicting information, prefer the most recent one.
|
||||
|
||||
Facts:
|
||||
{context}
|
||||
|
||||
Question: {q['q']}
|
||||
|
||||
Answer concisely:"""
|
||||
try:
|
||||
answer = lmstudio_chat(prompt, timeout=120)
|
||||
except Exception as e:
|
||||
answer = f"[Error: {e}]"
|
||||
|
||||
latency = time.time() - t0
|
||||
verdict, reason = nano_judge(q["q"], q["expected"], answer)
|
||||
|
||||
cat = q["cat"]
|
||||
if cat not in cats:
|
||||
cats[cat] = {"correct": 0, "partial": 0, "wrong": 0, "retrieval_hits": 0, "total": 0}
|
||||
cats[cat]["total"] += 1
|
||||
cats[cat][verdict] += 1
|
||||
if retrieval_hit:
|
||||
cats[cat]["retrieval_hits"] += 1
|
||||
|
||||
# Check if a cluster was in the top hits
|
||||
has_cluster = any("cluster" in str(h) for h in hits)
|
||||
cluster_marker = " 🧩" if has_cluster else ""
|
||||
|
||||
status = "✅" if verdict == "correct" else "🟡" if verdict == "partial" else "❌"
|
||||
exp_str = f" [+{len(expanded)-1}exp]" if len(expanded) > 1 else ""
|
||||
print(f" {status} Q{i+1} [{cat}] {q['q'][:50]}... → {verdict} ({latency:.1f}s) {'📎' if retrieval_hit else '🔍✗'}{exp_str}{cluster_marker}")
|
||||
|
||||
results.append({
|
||||
"question": q["q"], "expected": q["expected"], "answer": answer,
|
||||
"verdict": verdict, "reason": reason, "category": cat,
|
||||
"retrieval_hit": retrieval_hit, "latency_s": round(latency, 2),
|
||||
"expanded_queries": expanded,
|
||||
"context_facts": [h[1] for h in hits]
|
||||
})
|
||||
|
||||
total_correct = sum(c["correct"] for c in cats.values())
|
||||
total_partial = sum(c["partial"] for c in cats.values())
|
||||
total_retrieval = sum(c["retrieval_hits"] for c in cats.values())
|
||||
avg_latency = sum(r["latency_s"] for r in results) / len(results)
|
||||
acc = (total_correct+total_partial/2)/len(QUESTIONS)*100
|
||||
ret = total_retrieval/len(QUESTIONS)*100
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"📊 RÉSULTATS — Memoria v3.4.0 + Fact Clusters")
|
||||
print(f"{'='*60}")
|
||||
print(f" Facts: {total_facts} atomic + {num_clusters} clusters = {total_with_clusters} total")
|
||||
print(f" Accuracy: {total_correct}/{len(QUESTIONS)} correct + {total_partial} partial = {acc:.1f}%")
|
||||
print(f" Retrieval: {total_retrieval}/{len(QUESTIONS)} = {ret:.1f}%")
|
||||
print(f" Latency: {avg_latency:.1f}s avg")
|
||||
print(f"\n Par catégorie:")
|
||||
for cat in ["SSU", "SSA", "SSP", "KU", "TR", "MS"]:
|
||||
if cat in cats:
|
||||
c = cats[cat]
|
||||
print(f" {cat}: {c['correct']}/{c['total']} correct, {c['partial']} partial, retrieval {c['retrieval_hits']}/{c['total']}")
|
||||
|
||||
print(f"\n 📈 vs v3.3.0 (nano judge, same pipeline sans clusters):")
|
||||
print(f" v3.3.0: 75.0% accuracy, 43.3% retrieval")
|
||||
print(f" v3.4.0: {acc:.1f}% accuracy, {ret:.1f}% retrieval")
|
||||
print(f" Clusters: {num_clusters} generated in {cluster_time:.1f}s")
|
||||
|
||||
output = {
|
||||
"benchmark": "Memoria v3.4.0 (fact clusters)", "date": time.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"config": {"extract": EXTRACT_MODEL, "answer": ANSWER_MODEL, "embed": EMBED_MODEL,
|
||||
"judge": JUDGE_MODEL, "query_expansion": True, "fact_clusters": True},
|
||||
"metrics": {
|
||||
"atomic_facts": total_facts, "clusters": num_clusters, "total_facts": total_with_clusters,
|
||||
"embedded": embedded,
|
||||
"ingest_time_s": round(ingest_time, 1), "cluster_time_s": round(cluster_time, 1),
|
||||
"embed_time_s": round(embed_time, 1),
|
||||
"accuracy": round(acc, 1), "correct": total_correct, "partial": total_partial,
|
||||
"wrong": len(QUESTIONS)-total_correct-total_partial,
|
||||
"retrieval_rate": round(ret, 1), "avg_latency_s": round(avg_latency, 1)
|
||||
},
|
||||
"by_category": cats, "results": results
|
||||
}
|
||||
with open(RESULTS_PATH, "w") as f:
|
||||
json.dump(output, f, indent=2, ensure_ascii=False)
|
||||
print(f"\n 💾 Résultats: {RESULTS_PATH}")
|
||||
print(f" 💾 DB: {DB_PATH}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,736 @@
|
||||
{
|
||||
"benchmark": "Memoria v3.4.0 (fact clusters)",
|
||||
"date": "2026-03-26 08:37:02",
|
||||
"config": {
|
||||
"extract": "gemma3:4b",
|
||||
"answer": "openai/gpt-oss-20b",
|
||||
"embed": "nomic-embed-text-v2-moe",
|
||||
"judge": "gpt-5.4-nano",
|
||||
"query_expansion": true,
|
||||
"fact_clusters": true
|
||||
},
|
||||
"metrics": {
|
||||
"atomic_facts": 39,
|
||||
"clusters": 5,
|
||||
"total_facts": 44,
|
||||
"embedded": 44,
|
||||
"ingest_time_s": 35.8,
|
||||
"cluster_time_s": 7.8,
|
||||
"embed_time_s": 0.6,
|
||||
"accuracy": 81.7,
|
||||
"correct": 22,
|
||||
"partial": 5,
|
||||
"wrong": 3,
|
||||
"retrieval_rate": 50.0,
|
||||
"avg_latency_s": 2.4
|
||||
},
|
||||
"by_category": {
|
||||
"SSU": {
|
||||
"correct": 5,
|
||||
"partial": 0,
|
||||
"wrong": 0,
|
||||
"retrieval_hits": 4,
|
||||
"total": 5
|
||||
},
|
||||
"SSA": {
|
||||
"correct": 3,
|
||||
"partial": 1,
|
||||
"wrong": 1,
|
||||
"retrieval_hits": 3,
|
||||
"total": 5
|
||||
},
|
||||
"SSP": {
|
||||
"correct": 5,
|
||||
"partial": 0,
|
||||
"wrong": 0,
|
||||
"retrieval_hits": 1,
|
||||
"total": 5
|
||||
},
|
||||
"KU": {
|
||||
"correct": 5,
|
||||
"partial": 0,
|
||||
"wrong": 0,
|
||||
"retrieval_hits": 3,
|
||||
"total": 5
|
||||
},
|
||||
"TR": {
|
||||
"correct": 3,
|
||||
"partial": 1,
|
||||
"wrong": 1,
|
||||
"retrieval_hits": 2,
|
||||
"total": 5
|
||||
},
|
||||
"MS": {
|
||||
"correct": 1,
|
||||
"partial": 3,
|
||||
"wrong": 1,
|
||||
"retrieval_hits": 2,
|
||||
"total": 5
|
||||
}
|
||||
},
|
||||
"results": [
|
||||
{
|
||||
"question": "Quel modèle d'extraction est configuré sur Sol ?",
|
||||
"expected": "gemma3:4b",
|
||||
"answer": "Le modèle d'extraction configuré sur Sol est **gemma3:4b**.",
|
||||
"verdict": "correct",
|
||||
"reason": "L’état répond explicitement que le modèle d'extraction configuré sur Sol est gemma3:4b, ce qui correspond à l'attendu.",
|
||||
"category": "SSU",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 9.09,
|
||||
"expanded_queries": [
|
||||
"Quel modèle d'extraction est configuré sur Sol ?",
|
||||
"Quel model d'extraction est configuré sur Sol ?",
|
||||
"Quel modèle d'extraction est configurationuré sur Sol ?",
|
||||
"Quel"
|
||||
],
|
||||
"context_facts": [
|
||||
"Ollama was configured on Sol with gemma3:4b as the extraction model.",
|
||||
"Sol is a Mac Mini.",
|
||||
"Sur un Mac Mini nommé Sol, l'outil Ollama a été configuré avec le modèle d'extraction gemma3:4b. Sol est disponible 24 heures sur 24 et 7 jours sur 7 pour les tâches de développement, constituant ainsi une plateforme accessible en continu pour l'expérimentation avec ce modèle spécifique.",
|
||||
"Sol is available 24/7 for development tasks.",
|
||||
"Ollama a été configuré sur Sol avec le modèle gemma3:4b comme modèle d'extraction, tandis que la chaîne de secours utilise Ollama, LM Studio et OpenAI. L'ajout de Memoria v3.2.0 apporte un support pour les modèles de raisonnement compatibles avec Ollama et OpenAI, élargissant ainsi les capacités de l'outil."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Combien de structures gère le CRM Bureau ?",
|
||||
"expected": "11",
|
||||
"answer": "Le CRM Bureau gère **11** structures.",
|
||||
"verdict": "correct",
|
||||
"reason": "L’answer indique clairement que le CRM Bureau gère 11 structures, ce qui correspond à la réponse attendue.",
|
||||
"category": "SSU",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 1.5,
|
||||
"expanded_queries": [
|
||||
"Combien de structures gère le CRM Bureau ?",
|
||||
"Combien",
|
||||
"Bureau"
|
||||
],
|
||||
"context_facts": [
|
||||
"Bureau utilise Convex comme backend",
|
||||
"Le module CRM gère 11 structures : entreprises, associations et collectivités",
|
||||
"The token Hello-Primo is used for Bureau projects on Vercel.",
|
||||
"Convex est utilisé comme backend par l'outil Bureau, notamment pour la synchronisation de Qonto via une action Convex, et non via un script bash. Cette action Convex est actuellement en cours d'utilisation pour la synchronisation Qonto, bien que son implémentation initiale ait été une action et non un script bash. L'utilisation de Convex est donc centrale à la fonctionnalité de synchronisation Qonto au sein de l'écosystème Bureau.",
|
||||
"Vercel est une plateforme de déploiement d'applications, notamment via l'intégration avec GitHub pour un déploiement automatique. Elle utilise le token \"Hello-Primo\" pour les projets Bureau et gère le DNS de primo-studio.fr via Cloudflare, avec un proxy vers Vercel et un tunnel vers NAS via Directus. Ces configurations permettent une infrastructure optimisée pour les applications déployées sur Vercel."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quelle est la timezone de Neto ?",
|
||||
"expected": "America/Cayenne",
|
||||
"answer": "La timezone de Neto est **America/Cayenne (GMT‑3)**.",
|
||||
"verdict": "correct",
|
||||
"reason": "L’explication indique clairement que la timezone de Neto est America/Cayenne (GMT‑3), ce qui correspond à la réponse attendue.",
|
||||
"category": "SSU",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 1.34,
|
||||
"expanded_queries": [
|
||||
"Quelle est la timezone de Neto ?",
|
||||
"Quelle",
|
||||
"Neto"
|
||||
],
|
||||
"context_facts": [
|
||||
"Neto works from French Guiana, timezone America/Cayenne (GMT-3).",
|
||||
"Neto prefers a step-by-step approach and dislikes regressions.",
|
||||
"La sync Qonto se fait via une action Convex",
|
||||
"Neto travaille depuis la Guyane française, avec un fuseau horaire de GMT-3, et privilégie une approche méthodique, détestant les régessions. Son environnement de travail principal est un Mac Studio équipé de 64 Go de RAM. Il utilise cet outil pour ses activités, sans indication de version spécifique.",
|
||||
"Neto's main machine is a Mac Studio with 64 GB of RAM."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Comment se fait le deploy de Primask ?",
|
||||
"expected": "GitHub vers Vercel auto-deploy",
|
||||
"answer": "Le déploiement de Primask se fait automatiquement depuis GitHub vers Vercel : chaque push déclenche un auto‑deploy sur Vercel, qui utilise le token « Hello‑Primo » et gère le DNS de primo‑studio.fr via Cloudflare.",
|
||||
"verdict": "correct",
|
||||
"reason": "L’answer indique que le déploiement se fait automatiquement depuis GitHub vers Vercel et que chaque push déclenche un auto-deploy sur Vercel, ce qui correspond à l’attendu (GitHub → Vercel auto-deploy). Des détails supplémentaires sur le token et le DNS sont ajoutés sans contredire.",
|
||||
"category": "SSU",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 2.12,
|
||||
"expanded_queries": [
|
||||
"Comment se fait le deploy de Primask ?",
|
||||
"Comment se fait le déploiement de Primask ?",
|
||||
"Comment",
|
||||
"Primask"
|
||||
],
|
||||
"context_facts": [
|
||||
"Primask is a planning app developed by Primo Studio.",
|
||||
"Le module CRM gère 11 structures : entreprises, associations et collectivités",
|
||||
"Deployment is done via GitHub → Vercel with auto-deploy.",
|
||||
"Vercel est une plateforme de déploiement d'applications, notamment via l'intégration avec GitHub pour un déploiement automatique. Elle utilise le token \"Hello-Primo\" pour les projets Bureau et gère le DNS de primo-studio.fr via Cloudflare, avec un proxy vers Vercel et un tunnel vers NAS via Directus. Ces configurations permettent une infrastructure optimisée pour les applications déployées sur Vercel.",
|
||||
"La sync Qonto se fait via une action Convex"
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quel est le rôle de Luna ?",
|
||||
"expected": "iMac, emails et calendrier",
|
||||
"answer": "Luna est un iMac dédié à la gestion des emails et du calendrier via CalDAV iCloud.",
|
||||
"verdict": "correct",
|
||||
"reason": "L’énoncé mentionne bien qu’il s’agit d’un iMac dédié et qu’il gère les emails et le calendrier via CalDAV iCloud, ce qui correspond à « iMac, emails et calendrier ».",
|
||||
"category": "SSU",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 1.8,
|
||||
"expanded_queries": [
|
||||
"Quel est le rôle de Luna ?",
|
||||
"Quel",
|
||||
"Luna"
|
||||
],
|
||||
"context_facts": [
|
||||
"Luna is an iMac.",
|
||||
"Le module CRM gère 11 structures : entreprises, associations et collectivités",
|
||||
"Luna manages emails and calendar via CalDAV iCloud.",
|
||||
"DockGroups est une app macOS menu bar pour organiser le Dock.",
|
||||
"Sur un Mac Mini nommé Sol, l'outil Ollama a été configuré avec le modèle d'extraction gemma3:4b. Sol est disponible 24 heures sur 24 et 7 jours sur 7 pour les tâches de développement, constituant ainsi une plateforme accessible en continu pour l'expérimentation avec ce modèle spécifique."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quels sont les 3 niveaux du fallback chain de Memoria ?",
|
||||
"expected": "Ollama, LM Studio, OpenAI",
|
||||
"answer": "Les trois niveaux de la chaîne de secours (fallback chain) de Memoria sont : \n1. Ollama \n2. LM Studio \n3. OpenAI",
|
||||
"verdict": "correct",
|
||||
"reason": "L’orchestration attendue (Ollama, LM Studio, OpenAI) apparaît exactement dans la réponse, dans le même ordre.",
|
||||
"category": "SSA",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 1.66,
|
||||
"expanded_queries": [
|
||||
"Quels sont les 3 niveaux du fallback chain de Memoria ?",
|
||||
"Quels",
|
||||
"Memoria"
|
||||
],
|
||||
"context_facts": [
|
||||
"Memoria's retrieval rate is 93.3% with gemma3:4b.",
|
||||
"The fallback chain is Ollama → LM Studio → OpenAI.",
|
||||
"Memoria v3.2.0 adds support for models with reasoning (Ollama/OpenAI-compatible).",
|
||||
"Ollama a été configuré sur Sol avec le modèle gemma3:4b comme modèle d'extraction, tandis que la chaîne de secours utilise Ollama, LM Studio et OpenAI. L'ajout de Memoria v3.2.0 apporte un support pour les modèles de raisonnement compatibles avec Ollama et OpenAI, élargissant ainsi les capacités de l'outil.",
|
||||
"Sur un Mac Mini nommé Sol, l'outil Ollama a été configuré avec le modèle d'extraction gemma3:4b. Sol est disponible 24 heures sur 24 et 7 jours sur 7 pour les tâches de développement, constituant ainsi une plateforme accessible en continu pour l'expérimentation avec ce modèle spécifique."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quels types de structures le CRM Bureau gère-t-il ?",
|
||||
"expected": "entreprises, associations, collectivités",
|
||||
"answer": "Le CRM Bureau gère des **entreprises, associations et collectivités**.",
|
||||
"verdict": "correct",
|
||||
"reason": "L’answer mentionne bien les trois types de structures : entreprises, associations et collectivités, conformément à la réponse attendue.",
|
||||
"category": "SSA",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 1.57,
|
||||
"expanded_queries": [
|
||||
"Quels types de structures le CRM Bureau gère-t-il ?",
|
||||
"Quels",
|
||||
"Bureau"
|
||||
],
|
||||
"context_facts": [
|
||||
"Bureau utilise Convex comme backend",
|
||||
"Le module CRM gère 11 structures : entreprises, associations et collectivités",
|
||||
"The token Hello-Primo is used for Bureau projects on Vercel.",
|
||||
"Convex est utilisé comme backend par l'outil Bureau, notamment pour la synchronisation de Qonto via une action Convex, et non via un script bash. Cette action Convex est actuellement en cours d'utilisation pour la synchronisation Qonto, bien que son implémentation initiale ait été une action et non un script bash. L'utilisation de Convex est donc centrale à la fonctionnalité de synchronisation Qonto au sein de l'écosystème Bureau.",
|
||||
"Vercel est une plateforme de déploiement d'applications, notamment via l'intégration avec GitHub pour un déploiement automatique. Elle utilise le token \"Hello-Primo\" pour les projets Bureau et gère le DNS de primo-studio.fr via Cloudflare, avec un proxy vers Vercel et un tunnel vers NAS via Directus. Ces configurations permettent une infrastructure optimisée pour les applications déployées sur Vercel."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quelles sont les 3 machines de l'équipe et leurs rôles ?",
|
||||
"expected": "Mac Studio (Neto), iMac Luna (emails/cal), Mac Mini Sol (dev 24/7)",
|
||||
"answer": "- **Sol** (Mac Mini) : plateforme de développement 24/7, hébergeant Ollama avec le modèle gemma3:4b pour l’extraction et les tests. \n- **Mac Studio de Neto** : poste principal de travail, utilisé depuis la Guyane française (GMT‑3) pour les activités quotidiennes. \n- **Serveur Convex** : backend du système Bureau, responsable de la synchronisation Qonto via l’action Convex.",
|
||||
"verdict": "partial",
|
||||
"reason": "L’answer mentionne bien 3 machines, dont Mac Studio de Neto et une machine “Sol” (Mac Mini) pour le dev 24/7. En revanche, il manque l’iMac Luna (emails/cal) et remplace ce point par “Serveur Convex”, donc le rôle de la 3e machine attendue n’est pas couvert.",
|
||||
"category": "SSA",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 7.38,
|
||||
"expanded_queries": [
|
||||
"Quelles sont les 3 machines de l'équipe et leurs rôles ?",
|
||||
"Quelles"
|
||||
],
|
||||
"context_facts": [
|
||||
"Le module CRM gère 11 structures : entreprises, associations et collectivités",
|
||||
"Sur un Mac Mini nommé Sol, l'outil Ollama a été configuré avec le modèle d'extraction gemma3:4b. Sol est disponible 24 heures sur 24 et 7 jours sur 7 pour les tâches de développement, constituant ainsi une plateforme accessible en continu pour l'expérimentation avec ce modèle spécifique.",
|
||||
"Ollama a été configuré sur Sol avec le modèle gemma3:4b comme modèle d'extraction, tandis que la chaîne de secours utilise Ollama, LM Studio et OpenAI. L'ajout de Memoria v3.2.0 apporte un support pour les modèles de raisonnement compatibles avec Ollama et OpenAI, élargissant ainsi les capacités de l'outil.",
|
||||
"Convex est utilisé comme backend par l'outil Bureau, notamment pour la synchronisation de Qonto via une action Convex, et non via un script bash. Cette action Convex est actuellement en cours d'utilisation pour la synchronisation Qonto, bien que son implémentation initiale ait été une action et non un script bash. L'utilisation de Convex est donc centrale à la fonctionnalité de synchronisation Qonto au sein de l'écosystème Bureau.",
|
||||
"Neto travaille depuis la Guyane française, avec un fuseau horaire de GMT-3, et privilégie une approche méthodique, détestant les régessions. Son environnement de travail principal est un Mac Studio équipé de 64 Go de RAM. Il utilise cet outil pour ses activités, sans indication de version spécifique."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quelles features a apporté Memoria v3.2.0 ?",
|
||||
"expected": "reasoning models, dated recall, procedures multi-phrases, adaptive hybridSearch",
|
||||
"answer": "Memoria v3.2.0 a ajouté le support des modèles de raisonnement compatibles avec Ollama et OpenAI, élargissant ainsi les capacités d’extraction.",
|
||||
"verdict": "wrong",
|
||||
"reason": "L’article attendu mentionne quatre caractéristiques spécifiques (reasoning models, dated recall, procédures multi-phrases, adaptive hybridSearch). La réponse fournie ne cite que le support de modèles compatibles Ollama/OpenAI et l’extraction, sans mentionner les autres fonctionnalités.",
|
||||
"category": "SSA",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 1.67,
|
||||
"expanded_queries": [
|
||||
"Quelles features a apporté Memoria v3.2.0 ?",
|
||||
"Quelles",
|
||||
"Memoria"
|
||||
],
|
||||
"context_facts": [
|
||||
"Memoria's retrieval rate is 93.3% with gemma3:4b.",
|
||||
"Memoria v3.2.0 adds support for models with reasoning (Ollama/OpenAI-compatible).",
|
||||
"Ollama a été configuré sur Sol avec le modèle gemma3:4b comme modèle d'extraction, tandis que la chaîne de secours utilise Ollama, LM Studio et OpenAI. L'ajout de Memoria v3.2.0 apporte un support pour les modèles de raisonnement compatibles avec Ollama et OpenAI, élargissant ainsi les capacités de l'outil.",
|
||||
"Neto's main machine is a Mac Studio with 64 GB of RAM.",
|
||||
"Version actuelle : v0.5.0 avec apply-to-dock sécurisé via SIGHUP."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quels services Cloudflare fournit pour primo-studio.fr ?",
|
||||
"expected": "DNS, proxy Vercel front, tunnel NAS Directus",
|
||||
"answer": "Cloudflare gère le DNS de primo‑studio.fr, fournit un proxy vers Vercel pour l’hébergement front‑end et ouvre un tunnel vers le NAS via Directus.",
|
||||
"verdict": "correct",
|
||||
"reason": "L’answer mentionne bien : gestion DNS de primo-studio.fr, un proxy vers Vercel pour le front-end, et un tunnel vers le NAS via Directus. Aucun élément clé n’est manquant ou contredit.",
|
||||
"category": "SSA",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 1.82,
|
||||
"expanded_queries": [
|
||||
"Quels services Cloudflare fournit pour primo-studio.fr ?",
|
||||
"Quels",
|
||||
"Cloudflare"
|
||||
],
|
||||
"context_facts": [
|
||||
"Cloudflare manages the DNS for primo-studio.fr with a proxy to Vercel (front) and a tunnel to NAS (Directus).",
|
||||
"The Cloudflare zone ID is 403c7dc0dfe5c1ec6e94d92d8d0765ba.",
|
||||
"Vercel est une plateforme de déploiement d'applications, notamment via l'intégration avec GitHub pour un déploiement automatique. Elle utilise le token \"Hello-Primo\" pour les projets Bureau et gère le DNS de primo-studio.fr via Cloudflare, avec un proxy vers Vercel et un tunnel vers NAS via Directus. Ces configurations permettent une infrastructure optimisée pour les applications déployées sur Vercel.",
|
||||
"DockGroups est une app macOS menu bar pour organiser le Dock.",
|
||||
"Primask is a planning app developed by Primo Studio."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Comment configurer le fallback chain Memoria ?",
|
||||
"expected": "Ollama → LM Studio → OpenAI dans la config",
|
||||
"answer": "Configurez la chaîne de secours (fallback chain) de Memoria en suivant l’ordre : \n1. **Ollama** (exemple : modèle *gemma3:4b* pour l’extraction) \n2. **LM Studio** (si Ollama échoue) \n3. **OpenAI** (en dernier recours). \n\nAvec Memoria v3.2.0, assurez‑vous que les modèles de raisonnement compatibles Ollama/OpenAI sont activés pour profiter pleinement du fallback.",
|
||||
"verdict": "correct",
|
||||
"reason": "L’actual answer indique bien l’ordre du fallback chain : Ollama → LM Studio → OpenAI, avec des détails supplémentaires sur Memoria v3.2.0 et les modèles compatibles.",
|
||||
"category": "SSP",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 2.99,
|
||||
"expanded_queries": [
|
||||
"Comment configurer le fallback chain Memoria ?",
|
||||
"Comment configurationurer le fallback chain Memoria ?",
|
||||
"Comment",
|
||||
"Memoria"
|
||||
],
|
||||
"context_facts": [
|
||||
"The fallback chain is Ollama → LM Studio → OpenAI.",
|
||||
"Memoria's retrieval rate is 93.3% with gemma3:4b.",
|
||||
"Memoria v3.2.0 adds support for models with reasoning (Ollama/OpenAI-compatible).",
|
||||
"Ollama a été configuré sur Sol avec le modèle gemma3:4b comme modèle d'extraction, tandis que la chaîne de secours utilise Ollama, LM Studio et OpenAI. L'ajout de Memoria v3.2.0 apporte un support pour les modèles de raisonnement compatibles avec Ollama et OpenAI, élargissant ainsi les capacités de l'outil.",
|
||||
"Le module CRM gère 11 structures : entreprises, associations et collectivités"
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Comment synchroniser Qonto avec Bureau ?",
|
||||
"expected": "via action Convex syncQonto, pas script bash",
|
||||
"answer": "Pour synchroniser Qonto avec Bureau : utilisez l’action Convex intégrée dans le backend de Bureau (et non un script bash). Cette action, déjà en production, gère la synchronisation Qonto via Convex.",
|
||||
"verdict": "correct",
|
||||
"reason": "L’essentiel est présent : synchroniser Qonto avec Bureau via l’action Convex intégrée, et explicitement pas via un script bash. Répond à la contrainte attendue.",
|
||||
"category": "SSP",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 1.91,
|
||||
"expanded_queries": [
|
||||
"Comment synchroniser Qonto avec Bureau ?",
|
||||
"Comment",
|
||||
"Qonto",
|
||||
"Bureau"
|
||||
],
|
||||
"context_facts": [
|
||||
"La sync Qonto se fait via une action Convex",
|
||||
"Bureau utilise Convex comme backend",
|
||||
"La sync Qonto se fait via une action Convex, pas un script bash",
|
||||
"Convex est utilisé comme backend par l'outil Bureau, notamment pour la synchronisation de Qonto via une action Convex, et non via un script bash. Cette action Convex est actuellement en cours d'utilisation pour la synchronisation Qonto, bien que son implémentation initiale ait été une action et non un script bash. L'utilisation de Convex est donc centrale à la fonctionnalité de synchronisation Qonto au sein de l'écosystème Bureau.",
|
||||
"The token Hello-Primo is used for Bureau projects on Vercel."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quelle est la procédure pour déployer sur Vercel ?",
|
||||
"expected": "push GitHub déclenche auto-deploy",
|
||||
"answer": "Déployer sur Vercel : \n1. Pousser le code vers GitHub (branch principale). \n2. Vercel détecte la push et lance automatiquement le déploiement via l’intégration GitHub. \n3. Le token « Hello‑Primo » est utilisé pour les projets Bureau afin d’autoriser le déploiement. \n4. Vercel gère le DNS via Cloudflare (proxy vers Vercel) et ouvre un tunnel Directus vers le NAS.",
|
||||
"verdict": "correct",
|
||||
"reason": "La réponse inclut bien l’idée clé que la push sur GitHub déclenche un déploiement automatique via l’intégration GitHub sur Vercel.",
|
||||
"category": "SSP",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 2.5,
|
||||
"expanded_queries": [
|
||||
"Quelle est la procédure pour déployer sur Vercel ?",
|
||||
"Quelle",
|
||||
"Vercel"
|
||||
],
|
||||
"context_facts": [
|
||||
"Deployment is done via GitHub → Vercel with auto-deploy.",
|
||||
"La sync Qonto se fait via une action Convex",
|
||||
"Vercel est une plateforme de déploiement d'applications, notamment via l'intégration avec GitHub pour un déploiement automatique. Elle utilise le token \"Hello-Primo\" pour les projets Bureau et gère le DNS de primo-studio.fr via Cloudflare, avec un proxy vers Vercel et un tunnel vers NAS via Directus. Ces configurations permettent une infrastructure optimisée pour les applications déployées sur Vercel.",
|
||||
"The token Hello-Primo is used for Bureau projects on Vercel.",
|
||||
"Cloudflare manages the DNS for primo-studio.fr with a proxy to Vercel (front) and a tunnel to NAS (Directus)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Comment appliquer les groupes au Dock dans DockGroups ?",
|
||||
"expected": "apply-to-dock sécurisé via SIGHUP",
|
||||
"answer": "Appliquez les groupes en lançant : \n`dockgroups apply-to-dock --sighup` (ou simplement `dockgroups apply-to-dock`). \nCette commande envoie un SIGHUP au Dock pour recharger les groupes.",
|
||||
"verdict": "correct",
|
||||
"reason": "L’answer indique d’utiliser dockgroups apply-to-dock avec l’option --sighup pour envoyer un SIGHUP au Dock et recharger les groupes, ce qui correspond à l’attendu.",
|
||||
"category": "SSP",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 1.9,
|
||||
"expanded_queries": [
|
||||
"Comment appliquer les groupes au Dock dans DockGroups ?",
|
||||
"Comment",
|
||||
"Dock"
|
||||
],
|
||||
"context_facts": [
|
||||
"DockGroups est une app macOS menu bar pour organiser le Dock.",
|
||||
"Version actuelle : v0.5.0 avec apply-to-dock sécurisé via SIGHUP.",
|
||||
"Le drag & drop a été retiré car instable dans MenuBarExtra.",
|
||||
"Sur un Mac Mini nommé Sol, l'outil Ollama a été configuré avec le modèle d'extraction gemma3:4b. Sol est disponible 24 heures sur 24 et 7 jours sur 7 pour les tâches de développement, constituant ainsi une plateforme accessible en continu pour l'expérimentation avec ce modèle spécifique.",
|
||||
"Ollama a été configuré sur Sol avec le modèle gemma3:4b comme modèle d'extraction, tandis que la chaîne de secours utilise Ollama, LM Studio et OpenAI. L'ajout de Memoria v3.2.0 apporte un support pour les modèles de raisonnement compatibles avec Ollama et OpenAI, élargissant ainsi les capacités de l'outil."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Comment fonctionne le hybridSearch adaptatif ?",
|
||||
"expected": "plus de poids cosine pour queries courtes, plus FTS pour queries longues",
|
||||
"answer": "Le HybridSearch ajuste dynamiquement le poids du calcul de similarité cosinus : pour les requêtes courtes, il augmente la pondération du cosine afin de compenser le manque d’information, tandis que pour les requêtes plus longues il réduit ce poids pour privilégier d’autres critères de pertinence.",
|
||||
"verdict": "correct",
|
||||
"reason": "L’expliquation donnée correspond à l’attendu : augmentation du poids du cosinus pour des requêtes courtes et réduction pour des requêtes longues afin de privilégier d’autres critères (FTS dans l’attendu).",
|
||||
"category": "SSP",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 1.8,
|
||||
"expanded_queries": [
|
||||
"Comment fonctionne le hybridSearch adaptatif ?",
|
||||
"Comment"
|
||||
],
|
||||
"context_facts": [
|
||||
"Le module CRM gère 11 structures : entreprises, associations et collectivités",
|
||||
"HybridSearch is adaptive: more cosine weight for short queries.",
|
||||
"DockGroups est une app macOS menu bar pour organiser le Dock.",
|
||||
"Le drag & drop a été retiré car instable dans MenuBarExtra.",
|
||||
"Sur un Mac Mini nommé Sol, l'outil Ollama a été configuré avec le modèle d'extraction gemma3:4b. Sol est disponible 24 heures sur 24 et 7 jours sur 7 pour les tâches de développement, constituant ainsi une plateforme accessible en continu pour l'expérimentation avec ce modèle spécifique."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quel est le taux horaire actuel d'Alexandre ?",
|
||||
"expected": "6.50€/h",
|
||||
"answer": "Le taux horaire actuel d'Alexandre est **6,50 €/h**.",
|
||||
"verdict": "correct",
|
||||
"reason": "L’answer indique clairement 6,50 €/h, ce qui correspond à la valeur attendue.",
|
||||
"category": "KU",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 1.57,
|
||||
"expanded_queries": [
|
||||
"Quel est le taux horaire actuel d'Alexandre ?",
|
||||
"Quel est le €/h actuel d'Alexandre ?",
|
||||
"Quel est le salaire actuel d'Alexandre ?",
|
||||
"Quel"
|
||||
],
|
||||
"context_facts": [
|
||||
"Le module CRM gère 11 structures : entreprises, associations et collectivités",
|
||||
"Alexandre's hourly rate is 5.19€/h.",
|
||||
"Alexandre's hourly rate was increased to 6.50€/h.",
|
||||
"Pierre's hourly rate was 7.39€/h.",
|
||||
"DockGroups est une app macOS menu bar pour organiser le Dock."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Pierre travaille-t-il encore chez Primo Studio ?",
|
||||
"expected": "non, contrat terminé",
|
||||
"answer": "Non, son contrat n’a pas été renouvelé.",
|
||||
"verdict": "correct",
|
||||
"reason": "La réponse indique que son contrat n’a pas été renouvelé, ce qui correspond à l’idée que Pierre ne travaille plus chez Primo Studio (contrat terminé).",
|
||||
"category": "KU",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 1.79,
|
||||
"expanded_queries": [
|
||||
"Pierre travaille-t-il encore chez Primo Studio ?",
|
||||
"Pierre",
|
||||
"Primo Studio"
|
||||
],
|
||||
"context_facts": [
|
||||
"Pierre's hourly rate was 7.39€/h.",
|
||||
"Pierre's contract was not renewed.",
|
||||
"The fallback chain is Ollama → LM Studio → OpenAI.",
|
||||
"Primask is a planning app developed by Primo Studio.",
|
||||
"Neto's main machine is a Mac Studio with 64 GB of RAM."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Le drag & drop fonctionne-t-il dans DockGroups ?",
|
||||
"expected": "non, retiré car instable",
|
||||
"answer": "Non, le drag & drop a été retiré et ne fonctionne pas dans DockGroups.",
|
||||
"verdict": "correct",
|
||||
"reason": "L’answer indique que le drag & drop a été retiré et ne fonctionne pas dans DockGroups, ce qui correspond à l’attendu (retiré car instable).",
|
||||
"category": "KU",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 1.41,
|
||||
"expanded_queries": [
|
||||
"Le drag & drop fonctionne-t-il dans DockGroups ?",
|
||||
"Dock"
|
||||
],
|
||||
"context_facts": [
|
||||
"DockGroups est une app macOS menu bar pour organiser le Dock.",
|
||||
"Le module CRM gère 11 structures : entreprises, associations et collectivités",
|
||||
"Version actuelle : v0.5.0 avec apply-to-dock sécurisé via SIGHUP.",
|
||||
"Le drag & drop a été retiré car instable dans MenuBarExtra.",
|
||||
"Sur un Mac Mini nommé Sol, l'outil Ollama a été configuré avec le modèle d'extraction gemma3:4b. Sol est disponible 24 heures sur 24 et 7 jours sur 7 pour les tâches de développement, constituant ainsi une plateforme accessible en continu pour l'expérimentation avec ce modèle spécifique."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quelle est la version actuelle de Memoria ?",
|
||||
"expected": "v3.2.0",
|
||||
"answer": "La version actuelle de Memoria est : **v3.2.0**",
|
||||
"verdict": "correct",
|
||||
"reason": "The answer explicitly states that the current version of Memoria is v3.2.0, matching the expected value.",
|
||||
"category": "KU",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 1.47,
|
||||
"expanded_queries": [
|
||||
"Quelle est la version actuelle de Memoria ?",
|
||||
"Quelle",
|
||||
"Memoria"
|
||||
],
|
||||
"context_facts": [
|
||||
"Memoria's retrieval rate is 93.3% with gemma3:4b.",
|
||||
"Memoria v3.2.0 adds support for models with reasoning (Ollama/OpenAI-compatible).",
|
||||
"La sync Qonto se fait via une action Convex",
|
||||
"Ollama a été configuré sur Sol avec le modèle gemma3:4b comme modèle d'extraction, tandis que la chaîne de secours utilise Ollama, LM Studio et OpenAI. L'ajout de Memoria v3.2.0 apporte un support pour les modèles de raisonnement compatibles avec Ollama et OpenAI, élargissant ainsi les capacités de l'outil.",
|
||||
"La sync Qonto se fait via une action Convex, pas un script bash"
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quel est le score KU du benchmark Memoria ?",
|
||||
"expected": "0/5 correct",
|
||||
"answer": "Je ne dispose pas d’une valeur précise pour le score KU du benchmark Memoria.",
|
||||
"verdict": "correct",
|
||||
"reason": "La réponse ne fournit pas le score KU et indique ne pas en avoir la valeur précise ; elle ne contredit pas l’attendu (0/5 correct) et manque donc l’information attendue.",
|
||||
"category": "KU",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 2.05,
|
||||
"expanded_queries": [
|
||||
"Quel est le score KU du benchmark Memoria ?",
|
||||
"Quel",
|
||||
"Memoria"
|
||||
],
|
||||
"context_facts": [
|
||||
"Memoria's retrieval rate is 93.3% with gemma3:4b.",
|
||||
"Le module CRM gère 11 structures : entreprises, associations et collectivités",
|
||||
"Memoria v3.2.0 adds support for models with reasoning (Ollama/OpenAI-compatible).",
|
||||
"LongMemEval-S tests 6 categories: SSU, SSA, SSP, KU, TR, MS.",
|
||||
"Ollama a été configuré sur Sol avec le modèle gemma3:4b comme modèle d'extraction, tandis que la chaîne de secours utilise Ollama, LM Studio et OpenAI. L'ajout de Memoria v3.2.0 apporte un support pour les modèles de raisonnement compatibles avec Ollama et OpenAI, élargissant ainsi les capacités de l'outil."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quand Koda a-t-il été promu Dev Senior ?",
|
||||
"expected": "22 mars 2026",
|
||||
"answer": "Le 22 mars 2026.",
|
||||
"verdict": "correct",
|
||||
"reason": "La réponse donnée indique bien la date du 22 mars 2026, correspondant à la réponse attendue.",
|
||||
"category": "TR",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 1.4,
|
||||
"expanded_queries": [
|
||||
"Quand Koda a-t-il été promu Dev Senior ?",
|
||||
"Quand Koda",
|
||||
"Dev Senior"
|
||||
],
|
||||
"context_facts": [
|
||||
"Koda is a senior AI developer.",
|
||||
"Koda was promoted on March 22, 2026.",
|
||||
"Le drag & drop a été retiré car instable dans MenuBarExtra.",
|
||||
"Sur un Mac Mini nommé Sol, l'outil Ollama a été configuré avec le modèle d'extraction gemma3:4b. Sol est disponible 24 heures sur 24 et 7 jours sur 7 pour les tâches de développement, constituant ainsi une plateforme accessible en continu pour l'expérimentation avec ce modèle spécifique.",
|
||||
"Sol is available 24/7 for development tasks."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quelle version de DockGroups a introduit apply-to-dock sécurisé ?",
|
||||
"expected": "v0.5.0",
|
||||
"answer": "La version v0.5.0 de DockGroups a introduit l’apply‑to‑dock sécurisé.",
|
||||
"verdict": "correct",
|
||||
"reason": "L’essentiel est correct : l’answer indique que la version v0.5.0 de DockGroups a introduit l’apply-to-dock sécurisé.",
|
||||
"category": "TR",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 1.86,
|
||||
"expanded_queries": [
|
||||
"Quelle version de DockGroups a introduit apply-to-dock sécurisé ?",
|
||||
"Quelle",
|
||||
"Dock"
|
||||
],
|
||||
"context_facts": [
|
||||
"DockGroups est une app macOS menu bar pour organiser le Dock.",
|
||||
"Version actuelle : v0.5.0 avec apply-to-dock sécurisé via SIGHUP.",
|
||||
"Sur un Mac Mini nommé Sol, l'outil Ollama a été configuré avec le modèle d'extraction gemma3:4b. Sol est disponible 24 heures sur 24 et 7 jours sur 7 pour les tâches de développement, constituant ainsi une plateforme accessible en continu pour l'expérimentation avec ce modèle spécifique.",
|
||||
"Le drag & drop a été retiré car instable dans MenuBarExtra.",
|
||||
"Ollama a été configuré sur Sol avec le modèle gemma3:4b comme modèle d'extraction, tandis que la chaîne de secours utilise Ollama, LM Studio et OpenAI. L'ajout de Memoria v3.2.0 apporte un support pour les modèles de raisonnement compatibles avec Ollama et OpenAI, élargissant ainsi les capacités de l'outil."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Le CA a-t-il augmenté ou baissé entre 2025 et l'objectif 2026 ?",
|
||||
"expected": "baissé (111K → 80-100K objectif)",
|
||||
"answer": "Le chiffre d’affaires a baissé : il passe de 111 223 € en 2025 à un objectif compris entre 80 000 et 100 000 € pour 2026.",
|
||||
"verdict": "correct",
|
||||
"reason": "Le CA est indiqué comme ayant baissé entre 2025 (111 223 €) et l’objectif 2026 (80 000–100 000 €), ce qui correspond à l’attendu.",
|
||||
"category": "TR",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 1.88,
|
||||
"expanded_queries": [
|
||||
"Le CA a-t-il augmenté ou baissé entre 2025 et l'objectif 2026 ?",
|
||||
"Le chiffre d'affaires a-t-il augmenté ou baissé entre 2025 et l'objectif 2026 ?"
|
||||
],
|
||||
"context_facts": [
|
||||
"Le module CRM gère 11 structures : entreprises, associations et collectivités",
|
||||
"The 2025 revenue was 111,223€.",
|
||||
"The 2026 revenue target is 80-100K€.",
|
||||
"DockGroups est une app macOS menu bar pour organiser le Dock.",
|
||||
"Koda was promoted on March 22, 2026."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quel était le retrieval rate avant les améliorations v3.2.0 ?",
|
||||
"expected": "93.3%",
|
||||
"answer": "Moins que 93,3 %.",
|
||||
"verdict": "partial",
|
||||
"reason": "L’information attendue est 93,3%. La réponse indique seulement “moins que 93,3%”, ce qui ne fournit pas le retrieval rate exact.",
|
||||
"category": "TR",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 2.64,
|
||||
"expanded_queries": [
|
||||
"Quel était le retrieval rate avant les améliorations v3.2.0 ?",
|
||||
"Quel"
|
||||
],
|
||||
"context_facts": [
|
||||
"Memoria's retrieval rate is 93.3% with gemma3:4b.",
|
||||
"Le module CRM gère 11 structures : entreprises, associations et collectivités",
|
||||
"Ollama a été configuré sur Sol avec le modèle gemma3:4b comme modèle d'extraction, tandis que la chaîne de secours utilise Ollama, LM Studio et OpenAI. L'ajout de Memoria v3.2.0 apporte un support pour les modèles de raisonnement compatibles avec Ollama et OpenAI, élargissant ainsi les capacités de l'outil.",
|
||||
"DockGroups est une app macOS menu bar pour organiser le Dock.",
|
||||
"HybridSearch is adaptive: more cosine weight for short queries."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Avant le reasoning support, que se passait-il avec les modèles thinking ?",
|
||||
"expected": "le thinking consommait les tokens, réponse vide/pas de JSON",
|
||||
"answer": "Avant l’ajout du support de reasoning, les modèles “thinking” n’étaient pas pris en charge : on ne pouvait pas les utiliser pour le raisonnement.",
|
||||
"verdict": "wrong",
|
||||
"reason": "L’attendu indique que le thinking consommait les tokens (et qu’il y avait réponse vide/pas de JSON). La réponse dit surtout qu’ils n’étaient pas pris en charge, sans mentionner la consommation de tokens ni le comportement de sortie (vide/pas de JSON).",
|
||||
"category": "TR",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 2.02,
|
||||
"expanded_queries": [
|
||||
"Avant le reasoning support, que se passait-il avec les modèles thinking ?",
|
||||
"Avant le reasoning support, que se passait-il avec les models thinking ?",
|
||||
"Avant"
|
||||
],
|
||||
"context_facts": [
|
||||
"Le module CRM gère 11 structures : entreprises, associations et collectivités",
|
||||
"Memoria v3.2.0 adds support for models with reasoning (Ollama/OpenAI-compatible).",
|
||||
"Ollama a été configuré sur Sol avec le modèle gemma3:4b comme modèle d'extraction, tandis que la chaîne de secours utilise Ollama, LM Studio et OpenAI. L'ajout de Memoria v3.2.0 apporte un support pour les modèles de raisonnement compatibles avec Ollama et OpenAI, élargissant ainsi les capacités de l'outil.",
|
||||
"La sync Qonto se fait via une action Convex",
|
||||
"La sync Qonto se fait via une action Convex, pas un script bash"
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quelles machines utilisent Memoria ?",
|
||||
"expected": "Mac Studio (Koda) et Mac Mini (Sol)",
|
||||
"answer": "Les machines qui utilisent Memoria sont : \n- **Sol** (via Ollama avec le modèle gemma3:4b) ; \n- **le Mac Studio de Neto** (64 GB RAM).",
|
||||
"verdict": "partial",
|
||||
"reason": "L’exemple attendu (Mac Mini = Sol) est présent, et Mac Studio est bien mentionné. En revanche, le nom associé à Mac Studio est incorrect (attendu: Koda, réponse: Neto), donc l’info clé n’est pas entièrement fidèle.",
|
||||
"category": "MS",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 4.06,
|
||||
"expanded_queries": [
|
||||
"Quelles machines utilisent Memoria ?",
|
||||
"Quelles",
|
||||
"Memoria"
|
||||
],
|
||||
"context_facts": [
|
||||
"Memoria's retrieval rate is 93.3% with gemma3:4b.",
|
||||
"Memoria v3.2.0 adds support for models with reasoning (Ollama/OpenAI-compatible).",
|
||||
"Ollama a été configuré sur Sol avec le modèle gemma3:4b comme modèle d'extraction, tandis que la chaîne de secours utilise Ollama, LM Studio et OpenAI. L'ajout de Memoria v3.2.0 apporte un support pour les modèles de raisonnement compatibles avec Ollama et OpenAI, élargissant ainsi les capacités de l'outil.",
|
||||
"Neto's main machine is a Mac Studio with 64 GB of RAM.",
|
||||
"Neto travaille depuis la Guyane française, avec un fuseau horaire de GMT-3, et privilégie une approche méthodique, détestant les régessions. Son environnement de travail principal est un Mac Studio équipé de 64 Go de RAM. Il utilise cet outil pour ses activités, sans indication de version spécifique."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quel est le lien entre Bureau et Qonto ?",
|
||||
"expected": "sync via action Convex, matching auto virements↔projets",
|
||||
"answer": "Bureau utilise le backend Convex pour synchroniser Qonto via une action Convex (et non un script bash).",
|
||||
"verdict": "correct",
|
||||
"reason": "L’answer indique que Bureau synchronise Qonto via le backend Convex au moyen d’une action Convex (et non un script bash). C’est l’information centrale attendue ; la partie sur le matching auto virements↔projets n’est pas explicitement mentionnée, mais n’est pas contredite.",
|
||||
"category": "MS",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 1.82,
|
||||
"expanded_queries": [
|
||||
"Quel est le lien entre Bureau et Qonto ?",
|
||||
"Quel",
|
||||
"Bureau",
|
||||
"Qonto"
|
||||
],
|
||||
"context_facts": [
|
||||
"La sync Qonto se fait via une action Convex",
|
||||
"Bureau utilise Convex comme backend",
|
||||
"Convex est utilisé comme backend par l'outil Bureau, notamment pour la synchronisation de Qonto via une action Convex, et non via un script bash. Cette action Convex est actuellement en cours d'utilisation pour la synchronisation Qonto, bien que son implémentation initiale ait été une action et non un script bash. L'utilisation de Convex est donc centrale à la fonctionnalité de synchronisation Qonto au sein de l'écosystème Bureau.",
|
||||
"La sync Qonto se fait via une action Convex, pas un script bash",
|
||||
"The token Hello-Primo is used for Bureau projects on Vercel."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quels sont les projets actifs de Primo Studio ?",
|
||||
"expected": "Bureau, Primask, DockGroups, Transport Rino, Memoria",
|
||||
"answer": "Les projets actifs de Primo Studio sont :\n\n- **Primask** (application de planification) \n- Les projets **Bureau** déployés sur Vercel (utilisant le token “Hello‑Primo”).",
|
||||
"verdict": "partial",
|
||||
"reason": "L’actual answer mentionne bien Primask et Bureau, mais omet DockGroups, Transport Rino et Memoria. Donc ce n’est pas la liste complète des projets actifs attendus.",
|
||||
"category": "MS",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 2.47,
|
||||
"expanded_queries": [
|
||||
"Quels sont les projets actifs de Primo Studio ?",
|
||||
"Quels",
|
||||
"Primo Studio"
|
||||
],
|
||||
"context_facts": [
|
||||
"The fallback chain is Ollama → LM Studio → OpenAI.",
|
||||
"Primask is a planning app developed by Primo Studio.",
|
||||
"Neto's main machine is a Mac Studio with 64 GB of RAM.",
|
||||
"The token Hello-Primo is used for Bureau projects on Vercel.",
|
||||
"Vercel est une plateforme de déploiement d'applications, notamment via l'intégration avec GitHub pour un déploiement automatique. Elle utilise le token \"Hello-Primo\" pour les projets Bureau et gère le DNS de primo-studio.fr via Cloudflare, avec un proxy vers Vercel et un tunnel vers NAS via Directus. Ces configurations permettent une infrastructure optimisée pour les applications déployées sur Vercel."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quels modèles LLM sont disponibles sur Sol ?",
|
||||
"expected": "gemma3:4b, nomic-embed, qwen3.5:27b, GPT-OSS 20B via LM Studio",
|
||||
"answer": "Les LLM disponibles sur Sol : \n- **gemma3:4b** (via Ollama) \n- Les modèles accessibles via la chaîne de secours : **Ollama, LM Studio et OpenAI**.",
|
||||
"verdict": "partial",
|
||||
"reason": "L’answer mentionne bien gemma3:4b, mais omet nomic-embed et qwen3.5:27b et ne précise pas correctement GPT-OSS 20B via LM Studio (reste trop général).",
|
||||
"category": "MS",
|
||||
"retrieval_hit": true,
|
||||
"latency_s": 3.15,
|
||||
"expanded_queries": [
|
||||
"Quels modèles LLM sont disponibles sur Sol ?",
|
||||
"Quels models LLM sont disponibles sur Sol ?",
|
||||
"Quels",
|
||||
"Sol"
|
||||
],
|
||||
"context_facts": [
|
||||
"Ollama was configured on Sol with gemma3:4b as the extraction model.",
|
||||
"Sol is a Mac Mini.",
|
||||
"Sol is available 24/7 for development tasks.",
|
||||
"Sur un Mac Mini nommé Sol, l'outil Ollama a été configuré avec le modèle d'extraction gemma3:4b. Sol est disponible 24 heures sur 24 et 7 jours sur 7 pour les tâches de développement, constituant ainsi une plateforme accessible en continu pour l'expérimentation avec ce modèle spécifique.",
|
||||
"Ollama a été configuré sur Sol avec le modèle gemma3:4b comme modèle d'extraction, tandis que la chaîne de secours utilise Ollama, LM Studio et OpenAI. L'ajout de Memoria v3.2.0 apporte un support pour les modèles de raisonnement compatibles avec Ollama et OpenAI, élargissant ainsi les capacités de l'outil."
|
||||
]
|
||||
},
|
||||
{
|
||||
"question": "Quels taux horaires sont pratiqués chez Primo Studio ?",
|
||||
"expected": "Neto 0€, Alexandre 6.50€, Pierre 7.39€ (parti)",
|
||||
"answer": "Je ne dispose d’aucune information sur les taux horaires pratiqués chez Primo Studio.",
|
||||
"verdict": "wrong",
|
||||
"reason": "L’answer ne fournit aucun des taux horaires attendus (0€, 6.50€, 7.39€) et dit ne pas avoir l’information.",
|
||||
"category": "MS",
|
||||
"retrieval_hit": false,
|
||||
"latency_s": 1.2,
|
||||
"expanded_queries": [
|
||||
"Quels taux horaires sont pratiqués chez Primo Studio ?",
|
||||
"Quels €/hs sont pratiqués chez Primo Studio ?",
|
||||
"Quels salaires sont pratiqués chez Primo Studio ?",
|
||||
"Quels"
|
||||
],
|
||||
"context_facts": [
|
||||
"The fallback chain is Ollama → LM Studio → OpenAI.",
|
||||
"Neto's main machine is a Mac Studio with 64 GB of RAM.",
|
||||
"Primask is a planning app developed by Primo Studio.",
|
||||
"The token Hello-Primo is used for Bureau projects on Vercel.",
|
||||
"Cloudflare manages the DNS for primo-studio.fr with a proxy to Vercel (front) and a tunnel to NAS (Directus)."
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
343
openclaw-memoria-port/capture.ts
Normal file
343
openclaw-memoria-port/capture.ts
Normal file
@@ -0,0 +1,343 @@
|
||||
/**
|
||||
* Memoria — Capture hooks (Layer 1: agent_end + after_compaction)
|
||||
*
|
||||
* Extracted from index.ts Phase 2.2 — pure mechanical move, zero logic change.
|
||||
*/
|
||||
|
||||
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
|
||||
import type { MemoriaConfig } from "./core/config.js";
|
||||
import type { MemoriaDB } from "./core/db.js";
|
||||
import type { SelectiveMemory } from "./core/selective.js";
|
||||
import type { LLMProvider } from "./core/providers/types.js";
|
||||
import type { IdentityParser } from "./core/identity-parser.js";
|
||||
import type { ProceduralMemory } from "./core/procedural.js";
|
||||
import type { FeedbackManager } from "./core/feedback.js";
|
||||
import type { AdaptiveBudget } from "./core/budget.js";
|
||||
import type { ContinuousHooksState } from "./continuous.js";
|
||||
import { LLM_EXTRACT_PROMPT, parseJSON, normalizeCategory } from "./core/extraction.js";
|
||||
|
||||
export interface CaptureDeps {
|
||||
api: OpenClawPluginApi;
|
||||
cfg: MemoriaConfig;
|
||||
db: MemoriaDB;
|
||||
selective: SelectiveMemory;
|
||||
extractLlm: LLMProvider;
|
||||
identityParser: IdentityParser;
|
||||
proceduralMem: ProceduralMemory;
|
||||
feedbackMgr: FeedbackManager;
|
||||
budget: AdaptiveBudget;
|
||||
postProcessNewFacts: (source: "capture" | "compaction") => Promise<void>;
|
||||
continuousState: ContinuousHooksState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register the agent_end hook (session capture, Layer 1).
|
||||
* Extracts facts from session messages + procedures from tool calls.
|
||||
*/
|
||||
export function registerAgentEndHook(deps: CaptureDeps): void {
|
||||
const { api, cfg, db, selective, extractLlm, identityParser,
|
||||
proceduralMem, feedbackMgr, postProcessNewFacts, continuousState } = deps;
|
||||
|
||||
if (!cfg.autoCapture) return;
|
||||
|
||||
api.on("agent_end", async (event: any, _ctx: any) => {
|
||||
if (!event.success || !event.messages || event.messages.length === 0) return;
|
||||
|
||||
// Track how many messages continuous already processed
|
||||
const continuousAlreadyCaptured = continuousState.hasExtracted();
|
||||
|
||||
try {
|
||||
// ── Feedback loop: measure if recalled facts were used in responses ──
|
||||
try {
|
||||
const assistantTexts: string[] = [];
|
||||
for (const msg of event.messages) {
|
||||
if (!msg || typeof msg !== "object") continue;
|
||||
const m = msg as Record<string, unknown>;
|
||||
if (m.role !== "assistant") continue;
|
||||
const c = m.content;
|
||||
if (typeof c === "string" && c.length > 10) assistantTexts.push(c);
|
||||
else if (Array.isArray(c)) {
|
||||
for (const part of c) {
|
||||
if (part && typeof part === "object" && (part as any).type === "text") {
|
||||
const t = (part as any).text;
|
||||
if (typeof t === "string" && t.length > 10) assistantTexts.push(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (assistantTexts.length > 0) {
|
||||
const responseText = assistantTexts.slice(-3).join("\n");
|
||||
const fb = await feedbackMgr.processResponse(responseText);
|
||||
if (fb.used + fb.ignored > 0) {
|
||||
api.logger.debug?.(`memoria: feedback — ${fb.used} used, ${fb.ignored} ignored (${fb.details.length} total)`);
|
||||
}
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:feedback-process: ' + String(e)); }
|
||||
|
||||
// Collect user + assistant texts
|
||||
const texts: string[] = [];
|
||||
for (const msg of event.messages) {
|
||||
if (!msg || typeof msg !== "object") continue;
|
||||
const m = msg as Record<string, unknown>;
|
||||
const role = m.role as string;
|
||||
if (role !== "user" && role !== "assistant") continue;
|
||||
|
||||
const content = m.content;
|
||||
if (typeof content === "string" && content.length > 10) {
|
||||
texts.push(content.slice(0, 3000)); // truncate for LLM
|
||||
} else if (Array.isArray(content)) {
|
||||
for (const part of content) {
|
||||
if (part && typeof part === "object" && (part as any).type === "text") {
|
||||
const t = (part as any).text;
|
||||
if (typeof t === "string" && t.length > 10) texts.push(t.slice(0, 3000));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (texts.length === 0) return;
|
||||
|
||||
// If continuous learning already captured during this session,
|
||||
// only extract from messages NOT yet seen (reduce duplicate LLM calls)
|
||||
const effectiveTexts = continuousAlreadyCaptured
|
||||
? texts.slice(-1) // Only the very last message (likely not yet captured)
|
||||
: texts.slice(-3);
|
||||
|
||||
if (effectiveTexts.length === 0) return;
|
||||
|
||||
// Take last messages (most relevant)
|
||||
const recentTexts = effectiveTexts.join("\n---\n");
|
||||
const prompt = LLM_EXTRACT_PROMPT
|
||||
.replace("{TEXT}", recentTexts)
|
||||
.replace("{MAX_FACTS}", String(cfg.captureMaxFacts));
|
||||
|
||||
const result = await extractLlm.generateWithMeta!(prompt, {
|
||||
maxTokens: 1024,
|
||||
temperature: 0.1,
|
||||
format: "json",
|
||||
timeoutMs: 30000,
|
||||
});
|
||||
|
||||
if (!result) {
|
||||
api.logger.debug?.("memoria: capture skipped — all LLM providers failed");
|
||||
return;
|
||||
}
|
||||
|
||||
const parsed = parseJSON(result.response) as { facts?: Array<{ fact: string; category: string; type?: string; confidence: number }> };
|
||||
if (!parsed?.facts || parsed.facts.length === 0) return;
|
||||
|
||||
let stored = 0;
|
||||
let skipped = 0;
|
||||
let enriched = 0;
|
||||
let superseded = 0;
|
||||
for (const f of parsed.facts) {
|
||||
if (!f.fact || f.fact.length < 5) continue;
|
||||
if (f.confidence < 0.7) continue;
|
||||
|
||||
const factType = (f.type === "episodic") ? "episodic" : "semantic";
|
||||
|
||||
try {
|
||||
const category = normalizeCategory(f.category);
|
||||
const relevance = identityParser.calculateRelevance(f.fact, category);
|
||||
const result = await selective.processAndApply(
|
||||
f.fact,
|
||||
category,
|
||||
f.confidence,
|
||||
cfg.defaultAgent,
|
||||
factType,
|
||||
relevance
|
||||
);
|
||||
if (result.stored) {
|
||||
if (result.action === "enrich") enriched++;
|
||||
else if (result.action === "supersede") superseded++;
|
||||
else stored++;
|
||||
} else {
|
||||
skipped++;
|
||||
}
|
||||
} catch (e) {
|
||||
api?.logger?.debug?.('memoria:selective-store: ' + String(e));
|
||||
// Fallback: store directly if selective fails
|
||||
const category = normalizeCategory(f.category);
|
||||
const relevance = identityParser.calculateRelevance(f.fact, category);
|
||||
db.storeFact({
|
||||
id: `fact_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`,
|
||||
fact: f.fact,
|
||||
category,
|
||||
confidence: f.confidence,
|
||||
source: "auto-capture",
|
||||
tags: "[]",
|
||||
agent: cfg.defaultAgent,
|
||||
created_at: Date.now(),
|
||||
updated_at: Date.now(),
|
||||
fact_type: factType,
|
||||
relevance_weight: relevance,
|
||||
});
|
||||
stored++;
|
||||
}
|
||||
}
|
||||
|
||||
const parts: string[] = [];
|
||||
if (stored > 0) parts.push(`${stored} new`);
|
||||
if (enriched > 0) parts.push(`${enriched} enriched`);
|
||||
if (superseded > 0) parts.push(`${superseded} superseded`);
|
||||
if (skipped > 0) parts.push(`${skipped} skipped`);
|
||||
if (parts.length > 0) {
|
||||
api.logger.info?.(`memoria: capture — ${parts.join(", ")}`);
|
||||
}
|
||||
|
||||
// Post-processing: embed + graph + topics + sync
|
||||
if (stored > 0 || enriched > 0) {
|
||||
await postProcessNewFacts("capture");
|
||||
}
|
||||
|
||||
// ── Procedural Memory: extract successful command sequences ──
|
||||
try {
|
||||
// DEBUG: log what we receive
|
||||
const toolCallCount = event.toolCalls?.length || 0;
|
||||
const messageCount = event.messages?.length || 0;
|
||||
api.logger.info?.(`[DEBUG] agent_end — toolCalls: ${toolCallCount}, messages: ${messageCount}`);
|
||||
|
||||
// Strategy A: Try toolCalls first (if available)
|
||||
let proc: any = null;
|
||||
if (event.toolCalls && event.toolCalls.length >= 2) {
|
||||
api.logger.info?.(`[DEBUG] Trying toolCalls extraction...`);
|
||||
const lastMessage = event.messages[event.messages.length - 1];
|
||||
const lastText = typeof lastMessage === "object" && (lastMessage as any).content
|
||||
? String((lastMessage as any).content).toLowerCase()
|
||||
: "";
|
||||
|
||||
const successKeywords = ["success", "done", "published", "deployed", "completed", "✓", "✅"];
|
||||
const isSuccess = successKeywords.some(kw => lastText.includes(kw));
|
||||
|
||||
if (isSuccess) {
|
||||
proc = await proceduralMem.extractProcedure(
|
||||
event.toolCalls as any,
|
||||
'success',
|
||||
`Session: ${event.agentId || cfg.defaultAgent}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Strategy B: Fallback to parsing messages (more robust)
|
||||
if (!proc && event.messages && event.messages.length >= 3) {
|
||||
api.logger.info?.(`[DEBUG] Trying message extraction...`);
|
||||
proc = await proceduralMem.extractFromMessages(
|
||||
event.messages as any,
|
||||
`Session: ${event.agentId || cfg.defaultAgent}`
|
||||
);
|
||||
}
|
||||
|
||||
if (proc) {
|
||||
api.logger.info?.(`memoria: procedural ✅ captured "${proc.name}" (${proc.steps.length} steps)`);
|
||||
} else {
|
||||
api.logger.debug?.(`[DEBUG] No procedure extracted (toolCalls=${toolCallCount}, messages=${messageCount})`);
|
||||
}
|
||||
} catch (err) {
|
||||
api.logger.warn?.(`[DEBUG] procedural extraction error: ${String(err)}`);
|
||||
}
|
||||
|
||||
} catch (err) {
|
||||
api.logger.warn?.(`memoria: capture failed: ${String(err)}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Register the after_compaction hook (compaction capture, Layer 1).
|
||||
* Saves facts from LCM compaction summaries before they are lost.
|
||||
*/
|
||||
export function registerCompactionHook(deps: CaptureDeps): void {
|
||||
const { api, cfg, db, selective, extractLlm, identityParser,
|
||||
proceduralMem, budget, postProcessNewFacts } = deps;
|
||||
|
||||
api.on("after_compaction", async (event: any, _ctx: any) => {
|
||||
// Budget learning: compaction happened → we may have been too aggressive
|
||||
try { budget.onCompaction(); } catch (e) { api?.logger?.debug?.('memoria:budget-compaction: ' + String(e)); }
|
||||
const penaltyNote = budget.penalty > 0 ? ` (compaction penalty: -${budget.penalty} facts)` : "";
|
||||
if (penaltyNote) api.logger.debug?.(`memoria: budget adjusted${penaltyNote}`);
|
||||
|
||||
try {
|
||||
const summary = typeof event.summary === "string" ? event.summary : "";
|
||||
if (!summary || summary.length < 50) return;
|
||||
|
||||
const prompt = LLM_EXTRACT_PROMPT
|
||||
.replace("{TEXT}", summary.slice(0, 4000))
|
||||
.replace("{MAX_FACTS}", String(cfg.captureMaxFacts));
|
||||
|
||||
const result = await extractLlm.generateWithMeta!(prompt, {
|
||||
maxTokens: 1024,
|
||||
temperature: 0.1,
|
||||
format: "json",
|
||||
timeoutMs: 30000,
|
||||
});
|
||||
|
||||
if (!result) {
|
||||
api.logger.debug?.("memoria: compaction capture skipped — all LLM providers failed");
|
||||
return;
|
||||
}
|
||||
|
||||
const parsed = parseJSON(result.response) as { facts?: Array<{ fact: string; category: string; type?: string; confidence: number }> };
|
||||
if (!parsed?.facts || parsed.facts.length === 0) return;
|
||||
|
||||
let stored = 0;
|
||||
let skipped = 0;
|
||||
for (const f of parsed.facts) {
|
||||
if (!f.fact || f.fact.length < 5 || f.confidence < 0.7) continue;
|
||||
const factType = (f.type === "episodic") ? "episodic" : "semantic";
|
||||
try {
|
||||
const category = normalizeCategory(f.category);
|
||||
const relevance = identityParser.calculateRelevance(f.fact, category);
|
||||
const result = await selective.processAndApply(
|
||||
f.fact, category, f.confidence, cfg.defaultAgent, factType, relevance
|
||||
);
|
||||
if (result.stored) stored++;
|
||||
else skipped++;
|
||||
} catch (e) {
|
||||
api?.logger?.debug?.('memoria:compaction-store: ' + String(e));
|
||||
const category = normalizeCategory(f.category);
|
||||
const relevance = identityParser.calculateRelevance(f.fact, category);
|
||||
db.storeFact({
|
||||
id: `fact_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`,
|
||||
fact: f.fact,
|
||||
category,
|
||||
confidence: f.confidence,
|
||||
source: "compaction",
|
||||
tags: "[]",
|
||||
agent: cfg.defaultAgent,
|
||||
created_at: Date.now(),
|
||||
updated_at: Date.now(),
|
||||
fact_type: factType,
|
||||
relevance_weight: relevance,
|
||||
});
|
||||
stored++;
|
||||
}
|
||||
}
|
||||
|
||||
if (stored > 0 || skipped > 0) {
|
||||
api.logger.info?.(`memoria: compaction — ${stored} stored, ${skipped} skipped (dedup/noise)`);
|
||||
}
|
||||
|
||||
// Enrich compaction facts: embed + graph + topics + sync
|
||||
if (stored > 0) {
|
||||
await postProcessNewFacts("compaction");
|
||||
}
|
||||
|
||||
// ── Procedural Memory: extract from compaction summary ──
|
||||
try {
|
||||
const fakeMessages = [{ role: 'assistant', content: summary }];
|
||||
const proc = await proceduralMem.extractFromMessages(
|
||||
fakeMessages as any,
|
||||
`Compaction summary: ${event.agentId || cfg.defaultAgent}`
|
||||
);
|
||||
if (proc) {
|
||||
api.logger.info?.(`memoria: procedural ✅ captured from compaction "${proc.name}" (${proc.steps.length} steps)`);
|
||||
}
|
||||
} catch (err) {
|
||||
api.logger.debug?.(`[DEBUG] procedural compaction extraction error: ${String(err)}`);
|
||||
}
|
||||
|
||||
} catch (err) {
|
||||
api.logger.warn?.(`memoria: compaction capture failed: ${String(err)}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
372
openclaw-memoria-port/continuous.ts
Normal file
372
openclaw-memoria-port/continuous.ts
Normal file
@@ -0,0 +1,372 @@
|
||||
/**
|
||||
* 🧠 Memoria — Continuous Learning (Layer 21)
|
||||
*
|
||||
* This module implements real-time fact extraction from conversation flow,
|
||||
* independent of session end or compaction. Like a child learning while walking.
|
||||
*
|
||||
* Exports:
|
||||
* - CONTINUOUS_URGENT_PATTERNS — regex patterns for urgent/error signals
|
||||
* - registerContinuousHooks() — attach message_received + llm_output hooks
|
||||
*/
|
||||
|
||||
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
|
||||
import type { MemoriaConfig } from "./core/config.js";
|
||||
import type { MemoriaDB } from "./core/db.js";
|
||||
import type { SelectiveMemory } from "./core/selective.js";
|
||||
import type { LLMProvider } from "./core/providers/types.js";
|
||||
import type { IdentityParser } from "./core/identity-parser.js";
|
||||
import { LLM_EXTRACT_PROMPT, parseJSON, normalizeCategory } from "./core/extraction.js";
|
||||
import type { PrefetchCache } from "./prefetch.js";
|
||||
import { computeRecall, extractUserPrompt } from "./recall.js";
|
||||
import type { RecallDeps } from "./recall.js";
|
||||
import type { WriteAheadLog } from "./core/wal.js";
|
||||
import type { SelfObserver } from "./core/self-observation.js";
|
||||
|
||||
// ─── Constants ───
|
||||
|
||||
export const CONTINUOUS_URGENT_PATTERNS = [
|
||||
// Frustration / explicit error signals
|
||||
/\bne\s+fais?\s+plus\b/i, /\bne\s+jamais\b/i, /\bputain\b/i, /\bmerde\b/i,
|
||||
/\bc'est\s+la\s+[23]\w*\s+fois\b/i, /\bj'ai\s+d[uû]\b/i,
|
||||
/\bdoublon\b/i, /\berreur\b/i, /\bcrash\b/i, /\bcassé\b/i, /\bmort\b/i,
|
||||
/\brevert\b/i, /\brollback\b/i, /\bhotfix\b/i,
|
||||
/\btu\s+as\s+pas\s+(compris|appris|retenu)\b/i,
|
||||
/\bpourquoi\s+tu\s+(refais?|recommence)\b/i,
|
||||
// English equivalents
|
||||
/\bnever\s+do\b/i, /\bdon'?t\s+ever\b/i, /\bbroke\b/i, /\bdead\b/i,
|
||||
/\bduplicate\b/i, /\bmistake\b/i,
|
||||
];
|
||||
|
||||
// Correction patterns — user is correcting the agent's behavior/output
|
||||
export const CORRECTION_PATTERNS = [
|
||||
// Direct corrections (FR)
|
||||
/\bnon\s*[,!.]?\s*(c'est|c'etait|il\s+faut|faut)\b/i,
|
||||
/\bc'est\s+pas\s+(ça|ca|correct|bon)\b/i,
|
||||
/\bje\s+t'ai\s+dit\s+(que|de)\b/i,
|
||||
/\bje\s+t'avais\s+dit\b/i,
|
||||
/\bt'as\s+pas\s+(compris|lu|vu|fait)\b/i,
|
||||
/\bc'est\s+faux\b/i, /\bc'est\s+pas\s+ce\s+que\b/i,
|
||||
/\bregarde\s+bien\b/i, /\brelis\b/i,
|
||||
/\bje\s+voulais\s+dire\b/i, /\bpas\s+comme\s+(ça|ca)\b/i,
|
||||
/\btu\s+(comprends|captes)\s+pas\b/i,
|
||||
/\ben\s+fait\s*[,!]\s*(c'est|il|faut)\b/i,
|
||||
/\btu\s+te\s+trompe/i, /\bmauvais/i,
|
||||
// Direct corrections (EN)
|
||||
/\bno\s*[,!.]?\s*(it'?s|that'?s|you\s+should|I\s+said|I\s+meant)\b/i,
|
||||
/\bthat'?s\s+(wrong|incorrect|not\s+(right|what))\b/i,
|
||||
/\bactually\s*[,!]/i, /\bI\s+already\s+told\s+you\b/i,
|
||||
/\byou\s+(misunderstood|didn'?t\s+(understand|read|listen))\b/i,
|
||||
];
|
||||
|
||||
const SELF_ERROR_PATTERNS = [
|
||||
/erreur.*j'ai\s+(fait|commis|créé)/i,
|
||||
/mon\s+erreur/i, /j'aurais\s+d[uû]/i,
|
||||
/je\s+n'aurais\s+pas\s+d[uû]/i,
|
||||
/confond[ure]/i, /par\s+erreur/i,
|
||||
/ERREUR\s+CRITIQUE/i,
|
||||
];
|
||||
|
||||
// ─── Hook Registration ───
|
||||
|
||||
interface ContinuousState {
|
||||
buffer: Array<{ role: "user" | "assistant"; text: string; ts: number }>;
|
||||
turnCount: number;
|
||||
lastExtraction: number;
|
||||
inProgress: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register continuous learning hooks on the OpenClaw API.
|
||||
* Buffers user messages + assistant responses, triggers extraction when:
|
||||
* - Urgent: user frustration/error keywords (immediate, bypasses cooldown)
|
||||
* - Self-error: assistant self-admission of mistake
|
||||
* - Periodic: every N turns (default 4) with cooldown
|
||||
*
|
||||
* Requires:
|
||||
* - cfg.autoCapture = true (master capture switch)
|
||||
* - cfg.continuous.enabled !== false (layer switch, default true)
|
||||
*/
|
||||
export interface ContinuousHooksState {
|
||||
/** Returns true if continuous extraction has run at least once this session */
|
||||
hasExtracted(): boolean;
|
||||
}
|
||||
|
||||
export function registerContinuousHooks(
|
||||
api: OpenClawPluginApi,
|
||||
cfg: MemoriaConfig,
|
||||
db: MemoriaDB,
|
||||
selective: SelectiveMemory,
|
||||
extractLlm: LLMProvider,
|
||||
identityParser: IdentityParser,
|
||||
postProcessNewFacts: (source: "capture" | "compaction") => Promise<void>,
|
||||
prefetchCache?: PrefetchCache,
|
||||
recallDeps?: Omit<RecallDeps, "prefetchCache">,
|
||||
wal?: WriteAheadLog,
|
||||
selfObserver?: SelfObserver
|
||||
): ContinuousHooksState {
|
||||
const ENABLED = cfg.continuous?.enabled !== false && cfg.autoCapture;
|
||||
if (!ENABLED) return { hasExtracted: () => false };
|
||||
|
||||
const COOLDOWN_MS = cfg.continuous?.cooldownMs ?? 45_000;
|
||||
const MAX_BUFFER = 10;
|
||||
const NORMAL_INTERVAL = cfg.continuous?.interval ?? 4;
|
||||
|
||||
const state: ContinuousState = {
|
||||
buffer: [],
|
||||
turnCount: 0,
|
||||
lastExtraction: 0,
|
||||
inProgress: false,
|
||||
};
|
||||
|
||||
// ── message_received: buffer user messages + detect urgent ──
|
||||
api.on("message_received", async (event: any, _ctx: any) => {
|
||||
try {
|
||||
if (!event.content || event.content.length < 5) return;
|
||||
// Skip heartbeat/system messages
|
||||
if (/^(HEARTBEAT|Read HEARTBEAT|NO_REPLY)/i.test(event.content)) return;
|
||||
|
||||
// ── WAL: persist message IMMEDIATELY (< 1ms, crash-safe) ──
|
||||
if (wal) {
|
||||
try { wal.write("user", event.content); }
|
||||
catch (e) { api.logger.debug?.(`memoria: WAL write error: ${String(e)}`); }
|
||||
}
|
||||
|
||||
state.buffer.push({
|
||||
role: "user",
|
||||
text: event.content.slice(0, 3000),
|
||||
ts: Date.now(),
|
||||
});
|
||||
if (state.buffer.length > MAX_BUFFER) state.buffer.shift();
|
||||
state.turnCount++;
|
||||
|
||||
// ── Async prefetch: start recall computation NOW ──
|
||||
// By the time before_prompt_build fires, the result will be cached
|
||||
// Debounce: skip if last prefetch was < 5s ago (prevents loop on rapid messages)
|
||||
const now = Date.now();
|
||||
const PREFETCH_DEBOUNCE_MS = 5_000;
|
||||
if (prefetchCache && recallDeps && event.content.length > 10 &&
|
||||
(now - state.lastExtraction > PREFETCH_DEBOUNCE_MS || state.turnCount <= 1)) {
|
||||
const userMsg = extractUserPrompt(event.content);
|
||||
if (userMsg.length > 5) {
|
||||
prefetchCache.startPrefetch(event.content, async () => {
|
||||
api.logger.debug?.(`memoria: 🚀 prefetch started for message (${userMsg.length} chars)`);
|
||||
return computeRecall(userMsg, state.turnCount, recallDeps);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Check for correction signals — user is fixing agent's mistake
|
||||
const isCorrection = CORRECTION_PATTERNS.some(p => p.test(event.content));
|
||||
if (isCorrection) {
|
||||
api.logger.info?.(`memoria: 📝 continuous — correction detected in user message`);
|
||||
// Self-observation: record correction signal
|
||||
if (selfObserver) {
|
||||
try { selfObserver.record("correction", event.content); }
|
||||
catch (e) { api.logger.debug?.(`memoria: self-obs error: ${String(e)}`); }
|
||||
}
|
||||
// Log to .learnings/ for self-improving-agent coupling
|
||||
try {
|
||||
const workspacePath = cfg.workspacePath || process.env.OPENCLAW_WORKSPACE || "";
|
||||
if (workspacePath) {
|
||||
const fs = await import("fs");
|
||||
const path = await import("path");
|
||||
const learningsDir = path.join(workspacePath, ".learnings");
|
||||
const learningsFile = path.join(learningsDir, "LEARNINGS.md");
|
||||
if (fs.existsSync(learningsFile)) {
|
||||
const now = new Date();
|
||||
const timestamp = now.toISOString().slice(0, 16).replace("T", " ");
|
||||
const snippet = event.content.slice(0, 200).replace(/\n/g, " ");
|
||||
const entry = `\n### ${timestamp} — Correction\n- **Category**: correction\n- **What happened**: User corrected the agent\n- **Context**: "${snippet}"\n- **Source**: memoria auto-detection\n\n`;
|
||||
fs.appendFileSync(learningsFile, entry);
|
||||
api.logger.debug?.(`memoria: correction logged to .learnings/LEARNINGS.md`);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
api.logger.debug?.(`memoria: failed to write .learnings: ${String(e)}`);
|
||||
}
|
||||
await doContinuousExtraction(api, cfg, db, selective, extractLlm, identityParser, postProcessNewFacts, state, "correction");
|
||||
return; // correction already triggers extraction, skip urgent check
|
||||
}
|
||||
|
||||
// Check for urgent signals in user message — extract immediately
|
||||
const isUrgent = CONTINUOUS_URGENT_PATTERNS.some(p => p.test(event.content));
|
||||
if (isUrgent) {
|
||||
api.logger.info?.(`memoria: ⚡ continuous — urgent signal detected in user message`);
|
||||
// Self-observation: frustration or error detection
|
||||
if (selfObserver) {
|
||||
try { selfObserver.record("frustration", event.content); }
|
||||
catch (e) { api.logger.debug?.(`memoria: self-obs error: ${String(e)}`); }
|
||||
}
|
||||
await doContinuousExtraction(api, cfg, db, selective, extractLlm, identityParser, postProcessNewFacts, state, "urgent");
|
||||
}
|
||||
} catch (err) {
|
||||
api.logger.debug?.(`memoria: continuous message_received error: ${String(err)}`);
|
||||
}
|
||||
});
|
||||
|
||||
// ── llm_output: buffer assistant responses + trigger periodic ──
|
||||
api.on("llm_output", async (event: any, _ctx: any) => {
|
||||
try {
|
||||
const texts = event.assistantTexts?.filter((t: any) => t && t.length > 15) || [];
|
||||
if (texts.length === 0) return;
|
||||
|
||||
const combined = texts.join("\n").slice(0, 3000);
|
||||
// Skip empty/system responses
|
||||
if (/^(HEARTBEAT_OK|NO_REPLY)$/i.test(combined.trim())) return;
|
||||
|
||||
// ── WAL: persist assistant response IMMEDIATELY ──
|
||||
if (wal) {
|
||||
try { wal.write("assistant", combined); }
|
||||
catch (e) { api.logger.debug?.(`memoria: WAL write error: ${String(e)}`); }
|
||||
}
|
||||
|
||||
state.buffer.push({
|
||||
role: "assistant",
|
||||
text: combined,
|
||||
ts: Date.now(),
|
||||
});
|
||||
if (state.buffer.length > MAX_BUFFER) state.buffer.shift();
|
||||
|
||||
// Check for self-detected errors in assistant response
|
||||
const selfError = SELF_ERROR_PATTERNS.some(p => p.test(combined));
|
||||
if (selfError) {
|
||||
api.logger.info?.(`memoria: ⚡ continuous — self-detected error in assistant response`);
|
||||
await doContinuousExtraction(api, cfg, db, selective, extractLlm, identityParser, postProcessNewFacts, state, "self-error");
|
||||
}
|
||||
|
||||
// Normal periodic extraction
|
||||
if (state.turnCount >= NORMAL_INTERVAL) {
|
||||
const now = Date.now();
|
||||
if (now - state.lastExtraction > COOLDOWN_MS) {
|
||||
await doContinuousExtraction(api, cfg, db, selective, extractLlm, identityParser, postProcessNewFacts, state, "periodic");
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
api.logger.debug?.(`memoria: continuous llm_output error: ${String(err)}`);
|
||||
}
|
||||
});
|
||||
|
||||
return { hasExtracted: () => state.lastExtraction > 0 };
|
||||
}
|
||||
|
||||
// ─── Extraction Logic ───
|
||||
|
||||
/**
|
||||
* Layer 21: Continuous Learning — micro-extraction from rolling buffer.
|
||||
*
|
||||
* Triggers:
|
||||
* - "periodic": every N turns (default 4), with cooldown
|
||||
* - "urgent": immediate on user frustration/error keywords (bypasses cooldown)
|
||||
* - "self-error": immediate on assistant self-admission phrases
|
||||
*
|
||||
* Uses same LLM_EXTRACT_PROMPT + selective + postProcessNewFacts as agent_end.
|
||||
* Guarded by state.inProgress lock to prevent concurrent runs.
|
||||
* Buffer is snapshot + cleared before extraction to avoid re-processing.
|
||||
*/
|
||||
async function doContinuousExtraction(
|
||||
api: OpenClawPluginApi,
|
||||
cfg: MemoriaConfig,
|
||||
db: MemoriaDB,
|
||||
selective: SelectiveMemory,
|
||||
extractLlm: LLMProvider,
|
||||
identityParser: IdentityParser,
|
||||
postProcessNewFacts: (source: "capture" | "compaction") => Promise<void>,
|
||||
state: ContinuousState,
|
||||
trigger: "periodic" | "urgent" | "self-error" | "correction"
|
||||
): Promise<void> {
|
||||
if (state.buffer.length < 2) return;
|
||||
if (state.inProgress) return; // prevent concurrent extractions
|
||||
|
||||
const now = Date.now();
|
||||
// Urgent bypasses cooldown, others respect it
|
||||
if (trigger === "periodic" && now - state.lastExtraction < (cfg.continuous?.cooldownMs ?? 45_000)) return;
|
||||
|
||||
state.inProgress = true;
|
||||
state.lastExtraction = now;
|
||||
state.turnCount = 0;
|
||||
|
||||
// Snapshot and clear buffer to avoid re-processing same messages
|
||||
const snapshot = [...state.buffer];
|
||||
state.buffer.length = 0;
|
||||
|
||||
// Build context from snapshot
|
||||
const context = snapshot
|
||||
.map(m => `[${m.role}]: ${m.text}`)
|
||||
.join("\n---\n");
|
||||
|
||||
const urgencyHint = trigger === "urgent"
|
||||
? "\n\n⚠️ SIGNAL D'URGENCE DÉTECTÉ — L'utilisateur exprime une frustration ou signale une erreur. PRIORITÉ MAXIMALE aux faits de catégorie 'erreur'."
|
||||
: trigger === "self-error"
|
||||
? "\n\n⚠️ L'ASSISTANT A DÉTECTÉ SA PROPRE ERREUR — Capturer ce qui s'est mal passé, pourquoi, et ce qu'il ne faut plus faire."
|
||||
: trigger === "correction"
|
||||
? "\n\n📝 CORRECTION DÉTECTÉE — L'utilisateur corrige l'assistant. Capturer : (1) ce que l'assistant a fait de FAUX, (2) ce qui est CORRECT selon l'utilisateur, (3) la RÈGLE à retenir pour ne pas répéter. Catégorie = 'erreur' ou 'preference' selon le contexte."
|
||||
: "";
|
||||
|
||||
const prompt = LLM_EXTRACT_PROMPT
|
||||
.replace("{TEXT}", context + urgencyHint)
|
||||
.replace("{MAX_FACTS}", String(Math.min(cfg.captureMaxFacts, trigger === "periodic" ? 3 : 5)));
|
||||
|
||||
try {
|
||||
const result = await extractLlm.generateWithMeta!(prompt, {
|
||||
maxTokens: 768,
|
||||
temperature: 0.1,
|
||||
format: "json",
|
||||
timeoutMs: 20000,
|
||||
});
|
||||
|
||||
if (!result?.response) return;
|
||||
|
||||
const parsed = parseJSON(result.response) as { facts?: Array<{ fact: string; category: string; type?: string; confidence: number }> };
|
||||
if (!parsed?.facts || parsed.facts.length === 0) return;
|
||||
|
||||
let stored = 0, skipped = 0, enriched = 0, superseded = 0;
|
||||
for (const f of parsed.facts) {
|
||||
if (!f.fact || f.fact.length < 5) continue;
|
||||
if (f.confidence < 0.7) continue;
|
||||
|
||||
const factType = (f.type === "episodic") ? "episodic" : "semantic";
|
||||
try {
|
||||
const category = normalizeCategory(f.category);
|
||||
const relevance = identityParser.calculateRelevance(f.fact, category);
|
||||
const res = await selective.processAndApply(
|
||||
f.fact, category, f.confidence, cfg.defaultAgent, factType, relevance
|
||||
);
|
||||
if (res.stored) {
|
||||
if (res.action === "enrich") enriched++;
|
||||
else if (res.action === "supersede") superseded++;
|
||||
else stored++;
|
||||
} else { skipped++; }
|
||||
} catch (e) {
|
||||
api?.logger?.debug?.('memoria:contradiction-check: ' + String(e));
|
||||
const category = normalizeCategory(f.category);
|
||||
const relevance = identityParser.calculateRelevance(f.fact, category);
|
||||
db.storeFact({
|
||||
id: `fact_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`,
|
||||
fact: f.fact, category, confidence: f.confidence,
|
||||
source: `continuous-${trigger}`,
|
||||
tags: "[]", agent: cfg.defaultAgent,
|
||||
created_at: Date.now(), updated_at: Date.now(),
|
||||
fact_type: factType, relevance_weight: relevance,
|
||||
});
|
||||
stored++;
|
||||
}
|
||||
}
|
||||
|
||||
const parts: string[] = [];
|
||||
if (stored > 0) parts.push(`${stored} new`);
|
||||
if (enriched > 0) parts.push(`${enriched} enriched`);
|
||||
if (superseded > 0) parts.push(`${superseded} superseded`);
|
||||
if (skipped > 0) parts.push(`${skipped} skipped`);
|
||||
if (parts.length > 0) {
|
||||
api.logger.info?.(`memoria: ⚡ continuous [${trigger}] — ${parts.join(", ")}`);
|
||||
// Post-process (embed, graph, topics, etc.)
|
||||
if (stored > 0 || enriched > 0) {
|
||||
await postProcessNewFacts("capture");
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
api.logger.debug?.(`memoria: continuous extraction failed: ${String(err)}`);
|
||||
} finally {
|
||||
state.inProgress = false;
|
||||
}
|
||||
}
|
||||
256
openclaw-memoria-port/core/README.md
Normal file
256
openclaw-memoria-port/core/README.md
Normal file
@@ -0,0 +1,256 @@
|
||||
# @primo-studio/memoria-core
|
||||
|
||||
**Standalone multi-layer cognitive memory engine** — works with or without OpenClaw.
|
||||
|
||||
## Features
|
||||
|
||||
- **21-layer cognitive architecture** — selective storage, embeddings, knowledge graph, topics, patterns, observations, procedural memory
|
||||
- **Zero cloud dependency** — runs 100% local with Ollama, LM Studio, or any OpenAI-compatible API
|
||||
- **Automatic fallback chain** — Ollama → OpenAI → LM Studio → FTS-only (never fails)
|
||||
- **SQLite + FTS5** — blazing fast full-text search
|
||||
- **Vector embeddings** — cosine similarity with nomic-embed-text, text-embedding-3-small, or custom models
|
||||
- **Knowledge graph** — entity extraction, relations, Hebbian learning
|
||||
- **Pattern detection** — consolidates recurring facts, 1.5x recall boost
|
||||
- **Lifecycle management** — fresh → active → settled → dormant (automatic aging)
|
||||
- **Contradiction detection** — replaces outdated facts with new information
|
||||
- **Procedural memory** — learns from tool use, reflects on success patterns
|
||||
- **Markdown sync** — bi-directional sync with .md files
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
npm install @primo-studio/memoria-core
|
||||
# or
|
||||
pnpm add @primo-studio/memoria-core
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
```typescript
|
||||
import { Memoria } from '@primo-studio/memoria-core';
|
||||
|
||||
// Initialize with Ollama (local, free)
|
||||
const memoria = await Memoria.init({
|
||||
dbPath: './my-app.db',
|
||||
provider: 'ollama',
|
||||
model: 'qwen3.5:4b',
|
||||
embeddingModel: 'nomic-embed-text-v2-moe'
|
||||
});
|
||||
|
||||
// Store facts
|
||||
await memoria.store('User prefers dark mode', 'preference', 0.95);
|
||||
await memoria.store('User location: New York', 'savoir', 0.9);
|
||||
|
||||
// Recall facts
|
||||
const results = await memoria.recall('What theme does the user like?');
|
||||
console.log(results.facts);
|
||||
// [{ id: 1, fact: 'User prefers dark mode', category: 'preference', confidence: 0.95, ... }]
|
||||
|
||||
// Natural language query (future: dialectic reasoning)
|
||||
const answer = await memoria.query('Tell me about the user preferences');
|
||||
console.log(answer);
|
||||
|
||||
// Stats
|
||||
const stats = await memoria.stats();
|
||||
console.log(stats);
|
||||
// { totalFacts: 42, totalEmbeddings: 38, totalRelations: 15, ... }
|
||||
|
||||
// Close
|
||||
memoria.close();
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### Local (Ollama)
|
||||
|
||||
```typescript
|
||||
const memoria = await Memoria.init({
|
||||
dbPath: './memoria.db',
|
||||
provider: 'ollama',
|
||||
model: 'qwen3.5:4b',
|
||||
embeddingModel: 'nomic-embed-text-v2-moe',
|
||||
baseUrl: 'http://localhost:11434'
|
||||
});
|
||||
```
|
||||
|
||||
### Cloud (OpenAI)
|
||||
|
||||
```typescript
|
||||
const memoria = await Memoria.init({
|
||||
dbPath: './memoria.db',
|
||||
provider: 'openai',
|
||||
model: 'gpt-4o-mini',
|
||||
embeddingModel: 'text-embedding-3-small',
|
||||
apiKey: process.env.OPENAI_API_KEY
|
||||
});
|
||||
```
|
||||
|
||||
### LM Studio
|
||||
|
||||
```typescript
|
||||
const memoria = await Memoria.init({
|
||||
dbPath: './memoria.db',
|
||||
provider: 'lmstudio',
|
||||
model: 'auto', // uses loaded model
|
||||
baseUrl: 'http://localhost:1234/v1'
|
||||
});
|
||||
```
|
||||
|
||||
### Custom Fallback Chain
|
||||
|
||||
```typescript
|
||||
const memoria = await Memoria.init({
|
||||
dbPath: './memoria.db',
|
||||
fallback: [
|
||||
{ type: 'ollama', model: 'qwen3.5:4b', baseUrl: 'http://localhost:11434' },
|
||||
{ type: 'openai', model: 'gpt-5.4-nano', apiKey: process.env.OPENAI_API_KEY },
|
||||
{ type: 'lmstudio', baseUrl: 'http://localhost:1234/v1' }
|
||||
]
|
||||
});
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
### `Memoria.init(options: MemoriaInitOptions): Promise<Memoria>`
|
||||
|
||||
Initialize a new Memoria instance.
|
||||
|
||||
**Options:**
|
||||
- `dbPath` (required): Path to SQLite database file
|
||||
- `provider`: `'ollama'` | `'openai'` | `'anthropic'` | `'lmstudio'`
|
||||
- `model`: LLM model name
|
||||
- `embeddingModel`: Embedding model name
|
||||
- `embeddingDimensions`: Embedding dimensions (default: 768)
|
||||
- `baseUrl`: Provider base URL
|
||||
- `apiKey`: API key for cloud providers
|
||||
- `language`: `'fr'` | `'en'` (default: 'en')
|
||||
- `fallback`: Array of fallback providers
|
||||
- `recallLimit`: Max facts to return (default: 8)
|
||||
- `workspacePath`: Path for markdown sync (optional)
|
||||
- `debug`: Enable debug logging
|
||||
|
||||
### `memoria.store(fact: string, category?: string, confidence?: number): Promise<StoreResult>`
|
||||
|
||||
Store a new fact in memory.
|
||||
|
||||
**Categories:**
|
||||
- `savoir` — knowledge, facts
|
||||
- `preference` — user preferences
|
||||
- `erreur` — errors, mistakes
|
||||
- `chronologie` — events, timeline
|
||||
- `outil` — tools, configs
|
||||
- `client` — client info
|
||||
- `rh` — team, HR
|
||||
- `pattern` — detected patterns (auto-generated)
|
||||
|
||||
**Returns:**
|
||||
- `factId`: Database ID (-1 if not stored)
|
||||
- `stored`: Boolean
|
||||
- `reason`: Why not stored (if applicable)
|
||||
|
||||
### `memoria.recall(query: string, options?: RecallOptions): Promise<RecallResult>`
|
||||
|
||||
Recall facts based on a query.
|
||||
|
||||
**Options:**
|
||||
- `limit`: Max facts to return
|
||||
- `minConfidence`: Minimum confidence threshold
|
||||
- `categories`: Filter by categories
|
||||
|
||||
**Returns:**
|
||||
- `facts`: Array of facts with scores
|
||||
- `totalFound`: Number of results
|
||||
|
||||
### `memoria.query(question: string): Promise<string>`
|
||||
|
||||
Natural language query with formatted context (future: dialectic reasoning).
|
||||
|
||||
### `memoria.stats(): Promise<MemoriaStats>`
|
||||
|
||||
Get memory statistics.
|
||||
|
||||
**Returns:**
|
||||
- `totalFacts`, `totalEmbeddings`, `totalRelations`, `totalTopics`, `totalPatterns`, `totalObservations`
|
||||
- `lifecycleDistribution`: Facts by lifecycle state
|
||||
- `categoryCounts`: Facts by category
|
||||
|
||||
### `memoria.close(): void`
|
||||
|
||||
Close database connection.
|
||||
|
||||
## Advanced Usage
|
||||
|
||||
### Direct Manager Access
|
||||
|
||||
```typescript
|
||||
// Access low-level managers for advanced use
|
||||
const entities = memoria.graph.getEntitiesByType('person');
|
||||
const strongRelations = memoria.hebbian.getStrong(5);
|
||||
const topTopics = memoria.topics.getTopics(10);
|
||||
const patterns = memoria.patterns.getAll();
|
||||
```
|
||||
|
||||
### Custom Providers
|
||||
|
||||
```typescript
|
||||
import { type LLMProvider, type EmbedProvider } from '@primo-studio/memoria-core';
|
||||
|
||||
const customLLM: LLMProvider = {
|
||||
async generate(prompt: string): Promise<string> {
|
||||
// Your custom implementation
|
||||
return 'response';
|
||||
}
|
||||
};
|
||||
|
||||
const customEmbed: EmbedProvider = {
|
||||
async embed(text: string): Promise<number[]> {
|
||||
// Your custom implementation
|
||||
return [/* 768-dim vector */];
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
Memoria uses a **21-layer cognitive architecture**:
|
||||
|
||||
1. **Layer 1** — Capture (fact extraction from conversations)
|
||||
2. **Layer 2** — Deduplication (Levenshtein + Jaccard)
|
||||
3. **Layer 3** — Contradiction detection (semantic similarity + LLM)
|
||||
4. **Layer 4** — Enrichment (merge related facts)
|
||||
5. **Layer 5** — Embedding (vector search)
|
||||
6. **Layer 6** — Recall (retrieval pipeline)
|
||||
7. **Layer 7** — Scoring (FTS5 + cosine + context boost)
|
||||
8. **Layer 8** — Knowledge graph (entity/relation extraction)
|
||||
9. **Layer 9** — Topics (emergent clustering)
|
||||
10. **Layer 10** — Lifecycle (aging, state transitions)
|
||||
11. **Layer 11** — Hebbian (co-occurrence learning)
|
||||
12. **Layer 12** — Expertise (skill tracking)
|
||||
13. **Layer 13** — Feedback (usefulness tracking)
|
||||
14. **Layer 14** — Patterns (behavioral consolidation)
|
||||
15. **Layer 15** — Observations (hypothesis testing)
|
||||
16. **Layer 16** — Fact clusters (semantic grouping)
|
||||
17. **Layer 17** — Revision (content updates)
|
||||
18. **Layer 18** — Context tree (hierarchical boost)
|
||||
19. **Layer 19** — Budget (adaptive limits)
|
||||
20. **Layer 20** — Procedural (tool patterns)
|
||||
21. **Layer 21** — Continuous (live extraction)
|
||||
|
||||
## Performance
|
||||
|
||||
- **Recall**: ~50-150ms (FTS5 + embeddings + reranking)
|
||||
- **Store**: ~200-500ms (with embedding + graph extraction)
|
||||
- **Memory**: ~50MB base + ~1KB per fact
|
||||
- **Database**: SQLite (portable, zero-config)
|
||||
|
||||
## License
|
||||
|
||||
Apache-2.0
|
||||
|
||||
## Contributing
|
||||
|
||||
PRs welcome! See [CONTRIBUTING.md](../CONTRIBUTING.md)
|
||||
|
||||
## Credits
|
||||
|
||||
Built by [Primo Studio](https://primo-studio.fr) for OpenClaw and standalone use.
|
||||
258
openclaw-memoria-port/core/audit-v25.ts
Normal file
258
openclaw-memoria-port/core/audit-v25.ts
Normal file
@@ -0,0 +1,258 @@
|
||||
/**
|
||||
* @deprecated LEGACY — Audit Memoria v2.5.0 (March 2026)
|
||||
* This audit script targets v2.5.0 logic and uses local absolute paths.
|
||||
* Kept for historical reference only. Do not use for current versions.
|
||||
* Current version: v3.22.3 — see tests/test-core.ts for active tests.
|
||||
*/
|
||||
import { MemoriaDB } from "./db.js";
|
||||
import { scoreFact, scoreAndRank, getHotFacts, HOT_TIER_CONFIG } from "./scoring.js";
|
||||
import { AdaptiveBudget } from "./budget.js";
|
||||
import { EmbeddingManager } from "./embeddings.js";
|
||||
import { KnowledgeGraph } from "./graph.js";
|
||||
import { ContextTreeBuilder } from "./context-tree.js";
|
||||
import { TopicManager } from "./topics.js";
|
||||
import { SelectiveMemory } from "./selective.js";
|
||||
import { MdSync } from "./sync.js";
|
||||
import { MdRegenManager } from "./md-regen.js";
|
||||
import { FallbackChain } from "./fallback.js";
|
||||
import { EmbedFallback } from "./embed-fallback.js";
|
||||
import { OllamaEmbed } from "./providers/ollama.js";
|
||||
import type { Fact } from "./db.js";
|
||||
|
||||
const WORKSPACE = process.env.HOME + "/.openclaw/workspace";
|
||||
const db = new MemoriaDB(WORKSPACE);
|
||||
let passed = 0, failed = 0, warnings = 0;
|
||||
|
||||
function ok(name: string, condition: boolean, detail?: string) {
|
||||
if (condition) { passed++; console.log(` ✅ ${name}${detail ? ` — ${detail}` : ""}`); }
|
||||
else { failed++; console.log(` ❌ ${name}${detail ? ` — ${detail}` : ""}`); }
|
||||
}
|
||||
function warn(name: string, detail: string) { warnings++; console.log(` ⚠️ ${name} — ${detail}`); }
|
||||
|
||||
async function run() {
|
||||
console.log("═══════════════════════════════════════");
|
||||
console.log(" AUDIT MEMORIA v2.5.0 — Full Stack");
|
||||
console.log("═══════════════════════════════════════\n");
|
||||
|
||||
// ─── 1. DB basics ───
|
||||
console.log("▸ Layer 1: Database");
|
||||
const stats = db.stats();
|
||||
ok("DB active facts", stats.active > 0, `${stats.active} facts`);
|
||||
ok("DB categories exist", Object.keys(stats.categories).length > 0, Object.keys(stats.categories).join(", "));
|
||||
ok("FTS5 safe empty query", true, "searchFacts('') falls back to recent");
|
||||
const fts = db.searchFacts("Bureau", 5);
|
||||
ok("FTS5 search 'Bureau'", fts.length > 0, `${fts.length} results`);
|
||||
const ftsHyphen = db.searchFacts("real-world", 5);
|
||||
ok("FTS5 hyphenated query safe", true, `${ftsHyphen.length} results (no crash)`);
|
||||
|
||||
// ─── 2. Hot Tier (NEW v2.5.0) ───
|
||||
console.log("\n▸ Layer 2: Hot Tier (NEW)");
|
||||
const hotRaw = db.hotFacts(5, 30, 5);
|
||||
ok("db.hotFacts() returns results", hotRaw.length > 0, `${hotRaw.length} hot facts`);
|
||||
ok("Hot facts sorted by access_count", hotRaw.length < 2 || hotRaw[0].access_count >= hotRaw[1].access_count);
|
||||
|
||||
const hotScored = getHotFacts(hotRaw);
|
||||
ok("getHotFacts() respects maxHotFacts", hotScored.length <= HOT_TIER_CONFIG.maxHotFacts, `${hotScored.length} ≤ ${HOT_TIER_CONFIG.maxHotFacts}`);
|
||||
ok("HOT_TIER_CONFIG.minAccessCount", HOT_TIER_CONFIG.minAccessCount === 5, `${HOT_TIER_CONFIG.minAccessCount}`);
|
||||
ok("HOT_TIER_CONFIG.maxHotFacts", HOT_TIER_CONFIG.maxHotFacts === 3, `${HOT_TIER_CONFIG.maxHotFacts}`);
|
||||
ok("HOT_TIER_CONFIG.staleAfterDays", HOT_TIER_CONFIG.staleAfterDays === 30, `${HOT_TIER_CONFIG.staleAfterDays}`);
|
||||
|
||||
// Verify hot facts have high access
|
||||
if (hotRaw.length > 0) {
|
||||
ok("All hot facts ≥ 5 accesses", hotRaw.every(f => f.access_count >= 5), `min=${Math.min(...hotRaw.map(f => f.access_count))}`);
|
||||
console.log(" Hot facts:");
|
||||
hotScored.forEach(f => console.log(` [${f.access_count}x, score=${f.temporalScore.toFixed(3)}] ${f.fact.slice(0, 70)}...`));
|
||||
}
|
||||
|
||||
// ─── 3. Scoring (access boost 3x) ───
|
||||
console.log("\n▸ Layer 3: Scoring (access boost)");
|
||||
// Create two mock facts: same but different access counts
|
||||
const baseFact: Fact = {
|
||||
id: "test_1", fact: "Test fact", category: "savoir", confidence: 0.9,
|
||||
source: "test", tags: "[]", agent: "koda",
|
||||
created_at: Date.now() - 3600000, updated_at: Date.now() - 3600000,
|
||||
access_count: 0, last_accessed_at: null, superseded: 0,
|
||||
superseded_by: null, superseded_at: null, md_file: null, md_line: null,
|
||||
entity_ids: null,
|
||||
};
|
||||
const noAccess = scoreFact(baseFact);
|
||||
const highAccess = scoreFact({ ...baseFact, access_count: 50, last_accessed_at: Date.now() });
|
||||
ok("Access boost increases score", highAccess.temporalScore > noAccess.temporalScore,
|
||||
`0x=${noAccess.temporalScore.toFixed(3)} vs 50x=${highAccess.temporalScore.toFixed(3)} (${(highAccess.temporalScore/noAccess.temporalScore).toFixed(1)}x boost)`);
|
||||
ok("Access boost factor = 0.3", true, "3x stronger than v2.4.0 (was 0.1)");
|
||||
|
||||
// ─── 4. Adaptive Budget ───
|
||||
console.log("\n▸ Layer 4: Adaptive Budget");
|
||||
const budget = new AdaptiveBudget({ contextWindow: 200000, maxFacts: 12, minFacts: 2 });
|
||||
const light = budget.compute(10000);
|
||||
const medium = budget.compute(100000);
|
||||
const heavy = budget.compute(170000);
|
||||
const critical = budget.compute(190000);
|
||||
ok("Light zone gives maxFacts", light.limit === 12, `light=${light.limit}`);
|
||||
ok("Medium zone scales down", medium.limit < 12 && medium.limit > 2, `medium=${medium.limit}`);
|
||||
ok("Heavy zone near min", heavy.limit <= 4, `heavy=${heavy.limit}`);
|
||||
ok("Critical = minFacts", critical.limit === 2, `critical=${critical.limit}`);
|
||||
|
||||
// Test with 1M context (our config)
|
||||
const budget1M = new AdaptiveBudget({ contextWindow: 1000000, maxFacts: 12, minFacts: 2 });
|
||||
const b1m = budget1M.compute(100000);
|
||||
ok("1M context: 100K used = still light/max", b1m.limit === 12, `limit=${b1m.limit}, zone=${b1m.zone}`);
|
||||
|
||||
// Test with 128K context (small user)
|
||||
const budget128k = new AdaptiveBudget({ contextWindow: 128000, maxFacts: 5, minFacts: 1 });
|
||||
const b128k = budget128k.compute(50000);
|
||||
ok("128K context: 50K used = adapts", b128k.limit <= 5, `limit=${b128k.limit}, zone=${b128k.zone}`);
|
||||
|
||||
// ─── 5. Embeddings ───
|
||||
console.log("\n▸ Layer 5: Embeddings");
|
||||
const embedder = new OllamaEmbed();
|
||||
const embMgr = new EmbeddingManager(db, embedder);
|
||||
const embCount = embMgr.embeddedCount();
|
||||
ok("Embeddings count", embCount > 0, `${embCount} embedded`);
|
||||
ok("Embed coverage", embCount >= stats.active * 0.9, `${((embCount/stats.active)*100).toFixed(0)}%`);
|
||||
|
||||
try {
|
||||
const sem = await embMgr.semanticSearch("Bureau CRM", 5);
|
||||
ok("Semantic search works", sem.length > 0, `${sem.length} results`);
|
||||
} catch (e) {
|
||||
ok("Semantic search works", false, String(e));
|
||||
}
|
||||
|
||||
try {
|
||||
const hyb = await embMgr.hybridSearch("Bureau CRM", 5);
|
||||
ok("Hybrid search works", hyb.length > 0, `${hyb.length} results`);
|
||||
} catch (e) {
|
||||
ok("Hybrid search works", false, String(e));
|
||||
}
|
||||
|
||||
// ─── 6. Knowledge Graph ───
|
||||
console.log("\n▸ Layer 6: Knowledge Graph");
|
||||
const graph = new KnowledgeGraph(db, { name: "audit", generate: async () => "" });
|
||||
const gStats = graph.stats();
|
||||
ok("Graph has entities", gStats.entities > 0, `${gStats.entities} entities`);
|
||||
ok("Graph has relations", gStats.relations > 0, `${gStats.relations} relations`);
|
||||
const entities = graph.findEntitiesInText("Convex Bureau");
|
||||
ok("Entity lookup works", entities.length > 0, entities.map(e => e.name).join(", "));
|
||||
if (entities.length > 0) {
|
||||
const related = graph.getRelatedFacts(entities.map(e => e.name), 2, 3);
|
||||
ok("Graph traversal returns facts", related.length > 0, `${related.length} related facts`);
|
||||
}
|
||||
|
||||
// ─── 7. Context Tree ───
|
||||
console.log("\n▸ Layer 7: Context Tree");
|
||||
const tree = new ContextTreeBuilder(db);
|
||||
const candidates = db.searchFacts("Bureau", 10);
|
||||
if (candidates.length > 0) {
|
||||
const built = await tree.build(candidates, "Bureau CRM");
|
||||
ok("Context tree builds", built.roots.length > 0, `${built.roots.length} roots`);
|
||||
const extracted = tree.extractFacts(built, 5);
|
||||
ok("Context tree extracts facts", extracted.length > 0, `${extracted.length} facts`);
|
||||
}
|
||||
|
||||
// ─── 8. Topics ───
|
||||
console.log("\n▸ Layer 8: Topics");
|
||||
const topicMgr = new TopicManager(db, { name: "audit", generate: async () => "" }, embedder);
|
||||
const tStats = topicMgr.stats();
|
||||
ok("Topics exist", tStats.totalTopics > 0, `${tStats.totalTopics} topics`);
|
||||
try {
|
||||
const relevant = await topicMgr.findRelevantTopics("Bureau CRM", 3);
|
||||
ok("Topic search works", relevant.length >= 0, `${relevant.length} relevant topics`);
|
||||
} catch (e) {
|
||||
ok("Topic search works", false, String(e));
|
||||
}
|
||||
|
||||
// ─── 9. MD Sync + Regen ───
|
||||
console.log("\n▸ Layer 9: MD Sync & Regen");
|
||||
const mdSync = new MdSync(db, { workspacePath: WORKSPACE, dbToMd: true, mdToDb: false });
|
||||
mdSync.ensureSchema(db);
|
||||
ok("MdSync ensureSchema OK", true);
|
||||
const mdRegen = new MdRegenManager(db, WORKSPACE);
|
||||
const sizes = mdRegen.fileSizes();
|
||||
const fileNames = Object.keys(sizes);
|
||||
ok("MD files detected", fileNames.length > 0, fileNames.join(", "));
|
||||
for (const [name, info] of Object.entries(sizes)) {
|
||||
if (info.lines > 200) warn(`${name} > 200 lines`, `${info.lines} lines (auto-regen will trigger)`);
|
||||
}
|
||||
|
||||
// ─── 10. Fallback Chain ───
|
||||
console.log("\n▸ Layer 10: Fallback Chain");
|
||||
const chain = new FallbackChain({
|
||||
providers: [
|
||||
{ name: "ollama", type: "ollama", model: "gemma3:4b", timeoutMs: 10000 },
|
||||
{ name: "openai", type: "openai", model: "gpt-5.4-nano", apiKey: process.env.OPENAI_API_KEY || "", timeoutMs: 10000 },
|
||||
{ name: "lmstudio", type: "lmstudio", model: "auto", baseUrl: "http://localhost:1234/v1", timeoutMs: 10000 },
|
||||
]
|
||||
});
|
||||
ok("FallbackChain created", chain.providerNames.length === 3, chain.providerNames.join(" → "));
|
||||
ok("FallbackChain implements LLMProvider", typeof chain.generate === "function" && typeof chain.generateWithMeta === "function");
|
||||
|
||||
// Test EmbedFallback
|
||||
const embedFallback = new EmbedFallback([embedder]);
|
||||
ok("EmbedFallback created", true);
|
||||
|
||||
// ─── 11. Selective Memory ───
|
||||
console.log("\n▸ Layer 11: Selective Memory");
|
||||
const selective = new SelectiveMemory(db, chain, { dupThreshold: 0.85, contradictionCheck: true, enrichEnabled: true });
|
||||
ok("SelectiveMemory created", true);
|
||||
|
||||
// ─── 12. Config defaults ───
|
||||
console.log("\n▸ Layer 12: Config Defaults (v2.5.0)");
|
||||
ok("Default recallLimit = 12", true, "was 8 in v2.4.0");
|
||||
ok("Default captureMaxFacts = 8", true, "was 3 in v2.4.0");
|
||||
ok("accessBoostFactor = 0.3", true, "was 0.1 in v2.4.0");
|
||||
ok("HOT_TIER integrated in recall pipeline", true, "hot → search → graph → topics → tree");
|
||||
|
||||
// ─── 13. Version sync ───
|
||||
console.log("\n▸ Layer 13: Version Sync");
|
||||
const fs = await import("fs");
|
||||
const pkg = JSON.parse(fs.readFileSync("/Users/primostudio/.openclaw/extensions/memoria/package.json", "utf-8"));
|
||||
const manifest = JSON.parse(fs.readFileSync("/Users/primostudio/.openclaw/extensions/memoria/openclaw.plugin.json", "utf-8"));
|
||||
ok("package.json version", pkg.version === "2.5.0", pkg.version);
|
||||
ok("manifest version", manifest.version === "2.5.0", manifest.version);
|
||||
|
||||
// Check index.ts header
|
||||
const indexSrc = fs.readFileSync("/Users/primostudio/.openclaw/extensions/memoria/index.ts", "utf-8");
|
||||
ok("index.ts header v2.5.0", indexSrc.includes("v2.5.0"), indexSrc.match(/v[\d.]+/)?.[0] || "?");
|
||||
ok("index.ts boot log v2.5.0", indexSrc.includes("memoria: v2.5.0"), "log line");
|
||||
ok("Default recallLimit in code = 12", indexSrc.includes("|| 12"), "parseConfig");
|
||||
ok("Default captureMaxFacts in code = 8", indexSrc.includes("captureMaxFacts: (raw?.captureMaxFacts as number) || 8"));
|
||||
|
||||
// ─── 14. Integration: Hot exclusion from search ───
|
||||
console.log("\n▸ Layer 14: Integration Checks");
|
||||
ok("index.ts excludes hot from search", indexSrc.includes("!hotIds.has(f.id)"), "no duplicates");
|
||||
ok("index.ts merges hot first", indexSrc.includes("[...hotScored, ...topFacts"), "hot → search → graph → topics");
|
||||
ok("index.ts logs hot count", indexSrc.includes("hotNote"), "recall log includes hot count");
|
||||
ok("index.ts searchLimit = recallLimit - hotCount", indexSrc.includes("recallLimit - hotLimit"), "reserved slots");
|
||||
|
||||
// ─── 15. DB Integrity ───
|
||||
console.log("\n▸ Layer 15: DB Integrity");
|
||||
const dupes = db.raw.prepare("SELECT fact, COUNT(*) as c FROM facts WHERE superseded = 0 GROUP BY fact HAVING c > 1 LIMIT 5").all() as any[];
|
||||
ok("No exact duplicates", dupes.length === 0, dupes.length > 0 ? `${dupes.length} dupes found` : "clean");
|
||||
|
||||
const untagged = (db.raw.prepare("SELECT COUNT(*) as c FROM facts WHERE superseded = 0 AND (tags IS NULL OR tags = '[]')").get() as any).c;
|
||||
if (untagged > 20) warn("Untagged facts", `${untagged} facts without tags`);
|
||||
else ok("Tagged facts", true, `${untagged} untagged`);
|
||||
|
||||
const noTopic = (db.raw.prepare("SELECT COUNT(*) as c FROM facts WHERE superseded = 0 AND id NOT IN (SELECT DISTINCT fact_id FROM fact_topics)").get() as any).c;
|
||||
if (noTopic > 50) warn("Facts without topic", `${noTopic} facts`);
|
||||
else ok("Topic coverage", true, `${noTopic} without topic`);
|
||||
|
||||
// ─── 16. OpenClaw Config ───
|
||||
console.log("\n▸ Layer 16: OpenClaw Config");
|
||||
const ocCfg = JSON.parse(fs.readFileSync(process.env.HOME + "/.openclaw/openclaw.json", "utf-8"));
|
||||
const memoriaCfg = ocCfg?.plugins?.entries?.memoria?.config;
|
||||
ok("memoria enabled", ocCfg?.plugins?.entries?.memoria?.enabled === true);
|
||||
ok("memory-convex disabled", ocCfg?.plugins?.entries?.["memory-convex"]?.enabled === false);
|
||||
ok("recallLimit = 12 in config", memoriaCfg?.recallLimit === 12, `${memoriaCfg?.recallLimit}`);
|
||||
ok("captureMaxFacts = 8 in config", memoriaCfg?.captureMaxFacts === 8, `${memoriaCfg?.captureMaxFacts}`);
|
||||
ok("contextWindow = 1000000 in config", memoriaCfg?.contextWindow === 1000000, `${memoriaCfg?.contextWindow}`);
|
||||
|
||||
// ═══ Summary ═══
|
||||
console.log("\n═══════════════════════════════════════");
|
||||
console.log(` RÉSULTAT: ${passed} passed, ${failed} failed, ${warnings} warnings`);
|
||||
console.log("═══════════════════════════════════════");
|
||||
|
||||
if (failed > 0) process.exit(1);
|
||||
}
|
||||
|
||||
run().catch(e => { console.error("Audit crash:", e); process.exit(1); });
|
||||
191
openclaw-memoria-port/core/auto-skill.ts
Normal file
191
openclaw-memoria-port/core/auto-skill.ts
Normal file
@@ -0,0 +1,191 @@
|
||||
/**
|
||||
* Memoria — Auto Skill Creation (Layer 23)
|
||||
*
|
||||
* When a procedure reaches maturity (repeated successes, high quality),
|
||||
* automatically generates a reusable skill document (.md) in the workspace.
|
||||
*
|
||||
* Inspired by Hermes' "auto skill creation from experience" concept.
|
||||
*
|
||||
* Lifecycle:
|
||||
* 1. Procedure is captured via after_tool_call (existing Layer 1b)
|
||||
* 2. Procedure accumulates successes + quality improvements
|
||||
* 3. When maturity threshold is reached → auto-generate skill .md
|
||||
* 4. Skill is written to workspace/.skills/ (or .learnings/skills/)
|
||||
* 5. Agent can reference these skills in future sessions
|
||||
*/
|
||||
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import type { ProceduralMemory, Procedure } from "./procedural.js";
|
||||
|
||||
export interface AutoSkillConfig {
|
||||
/** Minimum success count before promoting to skill (default: 3) */
|
||||
minSuccesses: number;
|
||||
/** Minimum quality.overall before promoting (default: 0.6) */
|
||||
minQuality: number;
|
||||
/** Directory to write skill files (relative to workspace) */
|
||||
skillDir: string;
|
||||
/** Maximum number of auto-skills to create per session (default: 2) */
|
||||
maxPerSession: number;
|
||||
}
|
||||
|
||||
const DEFAULT_CONFIG: AutoSkillConfig = {
|
||||
minSuccesses: 3,
|
||||
minQuality: 0.6,
|
||||
skillDir: ".skills",
|
||||
maxPerSession: 2,
|
||||
};
|
||||
|
||||
export class AutoSkillCreator {
|
||||
private cfg: AutoSkillConfig;
|
||||
private proceduralMem: ProceduralMemory;
|
||||
private workspacePath: string;
|
||||
private createdThisSession: Set<string> = new Set();
|
||||
private promotedIds: Set<string> = new Set();
|
||||
|
||||
constructor(
|
||||
proceduralMem: ProceduralMemory,
|
||||
workspacePath: string,
|
||||
config?: Partial<AutoSkillConfig>
|
||||
) {
|
||||
this.proceduralMem = proceduralMem;
|
||||
this.workspacePath = workspacePath;
|
||||
this.cfg = { ...DEFAULT_CONFIG, ...config };
|
||||
|
||||
// Load already-promoted procedure IDs from existing skill files
|
||||
this.loadPromotedIds();
|
||||
}
|
||||
|
||||
private loadPromotedIds(): void {
|
||||
const skillDir = path.join(this.workspacePath, this.cfg.skillDir);
|
||||
if (!fs.existsSync(skillDir)) return;
|
||||
|
||||
try {
|
||||
const files = fs.readdirSync(skillDir).filter(f => f.endsWith(".md"));
|
||||
for (const file of files) {
|
||||
const content = fs.readFileSync(path.join(skillDir, file), "utf-8");
|
||||
const match = content.match(/procedure_id:\s*(\S+)/);
|
||||
if (match) this.promotedIds.add(match[1]);
|
||||
}
|
||||
} catch {
|
||||
// Ignore read errors
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check all mature procedures and create skills for eligible ones.
|
||||
* Called periodically (e.g., on agent_end or after successful tool sequences).
|
||||
* Returns the number of new skills created.
|
||||
*/
|
||||
checkAndPromote(): number {
|
||||
if (this.createdThisSession.size >= this.cfg.maxPerSession) return 0;
|
||||
|
||||
const candidates = this.findMatureProcedures();
|
||||
let created = 0;
|
||||
|
||||
for (const proc of candidates) {
|
||||
if (this.createdThisSession.size >= this.cfg.maxPerSession) break;
|
||||
if (this.promotedIds.has(proc.id)) continue;
|
||||
|
||||
try {
|
||||
this.createSkillFile(proc);
|
||||
this.promotedIds.add(proc.id);
|
||||
this.createdThisSession.add(proc.id);
|
||||
created++;
|
||||
} catch {
|
||||
// Non-blocking
|
||||
}
|
||||
}
|
||||
|
||||
return created;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find procedures that have reached maturity threshold.
|
||||
*/
|
||||
private findMatureProcedures(): Procedure[] {
|
||||
const all = this.proceduralMem.getAllProcedures();
|
||||
return all.filter(proc =>
|
||||
proc.success_count >= this.cfg.minSuccesses &&
|
||||
proc.quality.overall >= this.cfg.minQuality &&
|
||||
!this.promotedIds.has(proc.id) &&
|
||||
proc.degradation_score < 0.5 // Not degraded
|
||||
).sort((a, b) => {
|
||||
// Prioritize: highest quality × most successes
|
||||
const scoreA = a.quality.overall * Math.log2(a.success_count + 1);
|
||||
const scoreB = b.quality.overall * Math.log2(b.success_count + 1);
|
||||
return scoreB - scoreA;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a skill .md file from a mature procedure.
|
||||
*/
|
||||
private createSkillFile(proc: Procedure): void {
|
||||
const skillDir = path.join(this.workspacePath, this.cfg.skillDir);
|
||||
if (!fs.existsSync(skillDir)) {
|
||||
fs.mkdirSync(skillDir, { recursive: true });
|
||||
}
|
||||
|
||||
// Generate a clean filename
|
||||
const slug = proc.name
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, "-")
|
||||
.replace(/^-+|-+$/g, "")
|
||||
.slice(0, 60);
|
||||
const filename = `${slug}.md`;
|
||||
const filepath = path.join(skillDir, filename);
|
||||
|
||||
// Don't overwrite existing files
|
||||
if (fs.existsSync(filepath)) return;
|
||||
|
||||
const successRate = Math.round(
|
||||
(proc.success_count / Math.max(proc.success_count + proc.failure_count, 1)) * 100
|
||||
);
|
||||
|
||||
const content = `# ${proc.name}
|
||||
|
||||
> Auto-generated by Memoria from ${proc.success_count} successful executions.
|
||||
> procedure_id: ${proc.id}
|
||||
> quality: ${Math.round(proc.quality.overall * 100)}% | success_rate: ${successRate}%
|
||||
> version: ${proc.version} | last_success: ${proc.last_success_at ? new Date(proc.last_success_at).toISOString().slice(0, 10) : "unknown"}
|
||||
|
||||
## Goal
|
||||
|
||||
${proc.goal}
|
||||
|
||||
## Steps
|
||||
|
||||
${proc.steps.map((s, i) => `${i + 1}. ${s}`).join("\n")}
|
||||
|
||||
${proc.gotchas ? `## Gotchas
|
||||
|
||||
${proc.gotchas.split(" | ").map(g => `- ⚠️ ${g}`).join("\n")}` : ""}
|
||||
|
||||
${proc.improvements && proc.improvements.length > 0 ? `## Evolution
|
||||
|
||||
${proc.improvements.slice(-3).map(imp => `- ${imp.change}`).join("\n")}` : ""}
|
||||
|
||||
${proc.context ? `## Trigger Patterns
|
||||
|
||||
Keywords: ${proc.context}` : ""}
|
||||
|
||||
---
|
||||
*This skill was automatically created by Memoria when the procedure reached maturity.*
|
||||
*Edit freely — your improvements will be preserved.*
|
||||
`;
|
||||
|
||||
fs.writeFileSync(filepath, content, "utf-8");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get stats about auto-skill creation.
|
||||
*/
|
||||
stats(): { totalPromoted: number; createdThisSession: number; candidates: number } {
|
||||
return {
|
||||
totalPromoted: this.promotedIds.size,
|
||||
createdThisSession: this.createdThisSession.size,
|
||||
candidates: this.findMatureProcedures().length,
|
||||
};
|
||||
}
|
||||
}
|
||||
88
openclaw-memoria-port/core/bootstrap-topics.ts
Normal file
88
openclaw-memoria-port/core/bootstrap-topics.ts
Normal file
@@ -0,0 +1,88 @@
|
||||
/**
|
||||
* Bootstrap: tag all untagged facts and run topic emergence scan.
|
||||
* Run once to seed the system.
|
||||
*/
|
||||
import { MemoriaDB } from "./db.js";
|
||||
import { OllamaLLM } from "./providers/ollama.js";
|
||||
import { OllamaEmbed } from "./providers/ollama.js";
|
||||
import { TopicManager } from "./topics.js";
|
||||
|
||||
const WORKSPACE = process.env.HOME + "/.openclaw/workspace";
|
||||
|
||||
async function main() {
|
||||
const db = new MemoriaDB(WORKSPACE);
|
||||
const llm = new OllamaLLM("http://localhost:11434", "gemma3:4b");
|
||||
const embedder = new OllamaEmbed("http://localhost:11434", "nomic-embed-text-v2-moe", 768);
|
||||
const topicMgr = new TopicManager(db, llm, embedder, {
|
||||
emergenceThreshold: 3,
|
||||
scanInterval: 999, // We'll manually scan
|
||||
});
|
||||
|
||||
// Get all untagged, active facts
|
||||
const untagged = db.raw.prepare(
|
||||
"SELECT id, fact, category FROM facts WHERE (tags = '[]' OR tags IS NULL) AND superseded = 0 ORDER BY created_at DESC"
|
||||
).all() as Array<{ id: string; fact: string; category: string }>;
|
||||
|
||||
console.log(`\n🏷️ Tagging ${untagged.length} untagged facts...\n`);
|
||||
|
||||
let tagged = 0;
|
||||
let failed = 0;
|
||||
const batchSize = 10;
|
||||
|
||||
for (let i = 0; i < untagged.length; i += batchSize) {
|
||||
const batch = untagged.slice(i, i + batchSize);
|
||||
const promises = batch.map(async (f) => {
|
||||
try {
|
||||
const { keywords, topics } = await topicMgr.onFactCaptured(f.id, f.fact, f.category);
|
||||
if (keywords.length > 0) {
|
||||
tagged++;
|
||||
if (tagged % 50 === 0 || tagged <= 5) {
|
||||
console.log(` [${tagged}/${untagged.length}] "${f.fact.slice(0, 50)}..." → [${keywords.join(", ")}]`);
|
||||
}
|
||||
} else {
|
||||
failed++;
|
||||
}
|
||||
} catch (e) {
|
||||
failed++;
|
||||
}
|
||||
});
|
||||
|
||||
// Process batch sequentially to avoid hammering Ollama
|
||||
for (const p of promises) await p;
|
||||
|
||||
// Progress
|
||||
if ((i + batchSize) % 100 === 0) {
|
||||
console.log(` Progress: ${Math.min(i + batchSize, untagged.length)}/${untagged.length} (${tagged} tagged, ${failed} failed)`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n✅ Tagging done: ${tagged} tagged, ${failed} failed\n`);
|
||||
|
||||
// Now run the emergence scan
|
||||
console.log("🔍 Running topic emergence scan...\n");
|
||||
const scanResult = await topicMgr.scanAndEmerge();
|
||||
console.log(` Created: ${scanResult.created} topics`);
|
||||
console.log(` Merged: ${scanResult.merged} topics`);
|
||||
console.log(` Sub-topics: ${scanResult.subtopics}`);
|
||||
|
||||
// Show results
|
||||
const stats = topicMgr.stats();
|
||||
console.log(`\n📊 Final stats:`);
|
||||
console.log(` Total topics: ${stats.totalTopics}`);
|
||||
console.log(` Top-level: ${stats.topLevelTopics}`);
|
||||
console.log(` Sub-topics: ${stats.subTopics}`);
|
||||
console.log(` Orphan facts: ${stats.orphanFacts}`);
|
||||
console.log(` Avg facts/topic: ${stats.avgFactsPerTopic}`);
|
||||
|
||||
// Show topic list
|
||||
const topics = db.raw.prepare("SELECT name, keywords, fact_count, importance_score FROM topics ORDER BY importance_score DESC LIMIT 20").all() as any[];
|
||||
console.log(`\n🏷️ Top topics:`);
|
||||
for (const t of topics) {
|
||||
const kw = JSON.parse(t.keywords).slice(0, 5).join(", ");
|
||||
console.log(` ${t.name} (${t.fact_count} facts, score ${t.importance_score.toFixed(1)}) — [${kw}]`);
|
||||
}
|
||||
|
||||
db.raw.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
185
openclaw-memoria-port/core/budget.ts
Normal file
185
openclaw-memoria-port/core/budget.ts
Normal file
@@ -0,0 +1,185 @@
|
||||
/**
|
||||
* Memoria — Layer 7: Adaptive Budget
|
||||
*
|
||||
* Dynamically adjusts the number of facts injected based on available context space.
|
||||
* Tiers: empty (12 facts) → light (10) → medium (8) → heavy (5) → critical (2).
|
||||
*
|
||||
* Début de session (context vide) → 8-10 faits
|
||||
* Session moyenne → 5-6 faits
|
||||
* Session chargée → 2-3 faits essentiels
|
||||
*
|
||||
* Évite de saturer le contexte et d'accélérer la compaction.
|
||||
*/
|
||||
|
||||
export interface BudgetConfig {
|
||||
/** Max context window in tokens. Default 200000 (Opus) */
|
||||
contextWindow: number;
|
||||
/** Max facts to inject. Default 10 */
|
||||
maxFacts: number;
|
||||
/** Min facts to inject (always at least this). Default 2 */
|
||||
minFacts: number;
|
||||
/** Target % of context to use for memory. Default 0.03 (3%) */
|
||||
targetMemoryShare: number;
|
||||
/** Avg tokens per fact (estimated). Default 60 */
|
||||
avgTokensPerFact: number;
|
||||
/** Context usage thresholds */
|
||||
thresholds: {
|
||||
/** Below this = "light" (use maxFacts). Default 0.3 */
|
||||
light: number;
|
||||
/** Below this = "medium" (scale down). Default 0.7 */
|
||||
medium: number;
|
||||
/** Above this = "heavy" (use minFacts). Default 0.85 */
|
||||
heavy: number;
|
||||
};
|
||||
}
|
||||
|
||||
export const DEFAULT_BUDGET_CONFIG: BudgetConfig = {
|
||||
contextWindow: 200000,
|
||||
maxFacts: 10,
|
||||
minFacts: 2,
|
||||
targetMemoryShare: 0.03,
|
||||
avgTokensPerFact: 60,
|
||||
thresholds: {
|
||||
light: 0.3,
|
||||
medium: 0.7,
|
||||
heavy: 0.85,
|
||||
},
|
||||
};
|
||||
|
||||
export interface BudgetResult {
|
||||
/** Computed number of facts to inject */
|
||||
limit: number;
|
||||
/** Context usage ratio (0-1) */
|
||||
usage: number;
|
||||
/** Budget zone */
|
||||
zone: "light" | "medium" | "heavy" | "critical";
|
||||
/** Available tokens for memory */
|
||||
availableTokens: number;
|
||||
}
|
||||
|
||||
export class AdaptiveBudget {
|
||||
private cfg: BudgetConfig;
|
||||
|
||||
// ─── Learning from compactions ───
|
||||
// If we inject too many facts and compaction fires right after → we were too aggressive.
|
||||
// Track this to self-correct.
|
||||
private recentRecalls: Array<{ timestamp: number; limit: number }> = [];
|
||||
private compactionPenalty = 0; // 0-3: reduces limit by this many facts
|
||||
private lastCompactionAt = 0;
|
||||
private static readonly RECALL_WINDOW_MS = 5 * 60 * 1000; // 5 min window
|
||||
private static readonly MAX_PENALTY = 3;
|
||||
|
||||
constructor(config?: Partial<BudgetConfig>) {
|
||||
this.cfg = { ...DEFAULT_BUDGET_CONFIG, ...config };
|
||||
if (config?.thresholds) {
|
||||
this.cfg.thresholds = { ...DEFAULT_BUDGET_CONFIG.thresholds, ...config.thresholds };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when compaction happens. If a recall happened recently,
|
||||
* we were likely too aggressive → increase penalty.
|
||||
*/
|
||||
onCompaction(): void {
|
||||
const now = Date.now();
|
||||
this.lastCompactionAt = now;
|
||||
|
||||
// Was there a recall in the last 5 minutes?
|
||||
const recentRecall = this.recentRecalls.find(
|
||||
r => (now - r.timestamp) < AdaptiveBudget.RECALL_WINDOW_MS
|
||||
);
|
||||
|
||||
if (recentRecall) {
|
||||
// We injected facts and compaction fired soon after → too aggressive
|
||||
this.compactionPenalty = Math.min(
|
||||
this.compactionPenalty + 1,
|
||||
AdaptiveBudget.MAX_PENALTY
|
||||
);
|
||||
}
|
||||
|
||||
// Clean old entries
|
||||
this.recentRecalls = this.recentRecalls.filter(
|
||||
r => (now - r.timestamp) < AdaptiveBudget.RECALL_WINDOW_MS * 2
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called after a successful recall to log timing.
|
||||
*/
|
||||
recordRecall(limit: number): void {
|
||||
this.recentRecalls.push({ timestamp: Date.now(), limit });
|
||||
// Keep last 10
|
||||
if (this.recentRecalls.length > 10) this.recentRecalls.shift();
|
||||
|
||||
// Decay penalty over time: if no compaction for 30 min, reduce penalty
|
||||
if (this.compactionPenalty > 0 && this.lastCompactionAt > 0) {
|
||||
const minSinceCompaction = (Date.now() - this.lastCompactionAt) / 60000;
|
||||
if (minSinceCompaction > 30) {
|
||||
this.compactionPenalty = Math.max(0, this.compactionPenalty - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Current compaction penalty (for logging) */
|
||||
get penalty(): number {
|
||||
return this.compactionPenalty;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute how many facts to inject given current context state.
|
||||
*
|
||||
* @param messagesTokenEstimate - Estimated tokens already in conversation
|
||||
* @param systemTokenEstimate - Estimated tokens in system prompt + workspace files
|
||||
*/
|
||||
compute(messagesTokenEstimate: number, systemTokenEstimate: number = 0): BudgetResult {
|
||||
const totalUsed = messagesTokenEstimate + systemTokenEstimate;
|
||||
const usage = Math.min(totalUsed / this.cfg.contextWindow, 1.0);
|
||||
|
||||
// Determine zone
|
||||
let zone: BudgetResult["zone"];
|
||||
let limit: number;
|
||||
|
||||
if (usage < this.cfg.thresholds.light) {
|
||||
zone = "light";
|
||||
limit = this.cfg.maxFacts;
|
||||
} else if (usage < this.cfg.thresholds.medium) {
|
||||
zone = "medium";
|
||||
// Aggressive curve: 10 → 4 across medium zone (quadratic ease-in)
|
||||
const t = (usage - this.cfg.thresholds.light) / (this.cfg.thresholds.medium - this.cfg.thresholds.light);
|
||||
const tCurve = t * t; // quadratic: drops slowly at start, fast at end
|
||||
const mediumFloor = this.cfg.minFacts + 2; // 4 facts at end of medium
|
||||
limit = Math.round(this.cfg.maxFacts - tCurve * (this.cfg.maxFacts - mediumFloor));
|
||||
} else if (usage < this.cfg.thresholds.heavy) {
|
||||
zone = "heavy";
|
||||
// 4 → minFacts across heavy zone
|
||||
const t = (usage - this.cfg.thresholds.medium) / (this.cfg.thresholds.heavy - this.cfg.thresholds.medium);
|
||||
const mediumFloor = this.cfg.minFacts + 2; // 4
|
||||
limit = Math.round(mediumFloor - t * (mediumFloor - this.cfg.minFacts));
|
||||
} else {
|
||||
zone = "critical";
|
||||
limit = this.cfg.minFacts;
|
||||
}
|
||||
|
||||
// Apply compaction penalty (learned from recent compactions)
|
||||
if (this.compactionPenalty > 0) {
|
||||
limit = Math.max(this.cfg.minFacts, limit - this.compactionPenalty);
|
||||
}
|
||||
|
||||
// Ensure bounds
|
||||
limit = Math.max(this.cfg.minFacts, Math.min(this.cfg.maxFacts, limit));
|
||||
|
||||
// Available tokens for memory
|
||||
const availableTokens = Math.max(0, this.cfg.contextWindow * this.cfg.targetMemoryShare - totalUsed * this.cfg.targetMemoryShare);
|
||||
|
||||
return { limit, usage, zone, availableTokens };
|
||||
}
|
||||
|
||||
/**
|
||||
* Quick estimate: count messages × avg tokens per message.
|
||||
* Rough but fast — no tokenizer needed.
|
||||
*/
|
||||
static estimateTokens(messageCount: number, avgCharsPerMessage: number = 200): number {
|
||||
// ~4 chars per token (rough English/French average)
|
||||
return Math.round(messageCount * avgCharsPerMessage / 4);
|
||||
}
|
||||
}
|
||||
158
openclaw-memoria-port/core/config.ts
Normal file
158
openclaw-memoria-port/core/config.ts
Normal file
@@ -0,0 +1,158 @@
|
||||
/**
|
||||
* 🧠 Memoria — Configuration parser and provider factories
|
||||
*
|
||||
* This module exports:
|
||||
* - MemoriaConfig interface (typed config from openclaw.json)
|
||||
* - parseConfig() — raw JSON → typed config with smart defaults
|
||||
* - createEmbedProvider() — factory for embedding providers
|
||||
* - createLLMProvider() — factory for LLM providers
|
||||
*/
|
||||
|
||||
import { OllamaEmbed, OllamaLLM } from "./providers/ollama.js";
|
||||
import { lmStudioLLM, lmStudioEmbed, openaiLLM, openaiEmbed, openrouterLLM, openrouterEmbed } from "./providers/openai-compat.js";
|
||||
import type { EmbedProvider, LLMProvider } from "./providers/types.js";
|
||||
import type { FallbackProviderConfig } from "./fallback.js";
|
||||
import { AnthropicLLM } from "./providers/anthropic.js";
|
||||
|
||||
// ─── Config Interface ───
|
||||
|
||||
export interface MemoriaConfig {
|
||||
autoRecall: boolean;
|
||||
autoCapture: boolean;
|
||||
recallLimit: number;
|
||||
captureMaxFacts: number;
|
||||
defaultAgent: string;
|
||||
contextWindow: number;
|
||||
workspacePath: string;
|
||||
syncMd: boolean;
|
||||
fallback: FallbackProviderConfig[];
|
||||
/** Continuous Learning (Layer 21) config */
|
||||
continuous?: {
|
||||
/** Extract every N turns (default 4) */
|
||||
interval?: number;
|
||||
/** Cooldown between periodic extractions in ms (default 45000) */
|
||||
cooldownMs?: number;
|
||||
/** Enable/disable (default true when autoCapture is true) */
|
||||
enabled?: boolean;
|
||||
};
|
||||
embed: {
|
||||
provider: "ollama" | "lmstudio" | "openai" | "openrouter" | "anthropic";
|
||||
baseUrl?: string;
|
||||
model: string;
|
||||
dimensions: number;
|
||||
apiKey?: string;
|
||||
};
|
||||
llm: {
|
||||
provider: "ollama" | "lmstudio" | "openai" | "openrouter" | "anthropic";
|
||||
baseUrl?: string;
|
||||
model: string;
|
||||
apiKey?: string;
|
||||
/** Per-layer overrides: each key = layer name, value = provider config */
|
||||
overrides?: Partial<Record<MemoriaLayer, LayerLLMConfig>>;
|
||||
};
|
||||
lifecycle?: {
|
||||
freshDays?: number;
|
||||
settledMinAccess?: number;
|
||||
dormantAfterDays?: number;
|
||||
detailCursor?: number;
|
||||
revisionRecallThreshold?: number;
|
||||
};
|
||||
procedural?: {
|
||||
reflectEvery?: number;
|
||||
degradedThreshold?: number;
|
||||
defaultSafety?: number;
|
||||
staleDays?: number;
|
||||
docCheckDays?: number;
|
||||
};
|
||||
patterns?: any; // PatternManager config, loosely typed for now
|
||||
autoSkill?: {
|
||||
minSuccesses?: number;
|
||||
minQuality?: number;
|
||||
skillDir?: string;
|
||||
maxPerSession?: number;
|
||||
};
|
||||
}
|
||||
|
||||
/** Named layers that accept a per-layer LLM override */
|
||||
export type MemoriaLayer = "extract" | "contradiction" | "graph" | "topics" | "procedural";
|
||||
|
||||
export interface LayerLLMConfig {
|
||||
provider: "ollama" | "lmstudio" | "openai" | "openrouter" | "anthropic";
|
||||
baseUrl?: string;
|
||||
model: string;
|
||||
apiKey?: string;
|
||||
}
|
||||
|
||||
// ─── Config Parser ───
|
||||
|
||||
/** Parse raw plugin config (from openclaw.json) into typed MemoriaConfig with smart defaults. */
|
||||
export function parseConfig(raw: Record<string, unknown> | undefined): MemoriaConfig {
|
||||
const embed = (raw?.embed as Record<string, unknown>) || {};
|
||||
const llm = (raw?.llm as Record<string, unknown>) || {};
|
||||
return {
|
||||
autoRecall: raw?.autoRecall !== false,
|
||||
autoCapture: raw?.autoCapture !== false,
|
||||
recallLimit: (raw?.recallLimit as number) || 12,
|
||||
captureMaxFacts: (raw?.captureMaxFacts as number) || 8,
|
||||
defaultAgent: (raw?.defaultAgent as string) || "koda",
|
||||
contextWindow: (raw?.contextWindow as number) || 200000,
|
||||
workspacePath: (raw?.workspacePath as string) || process.env.HOME + "/.openclaw/workspace",
|
||||
syncMd: raw?.syncMd !== false,
|
||||
fallback: ((raw?.fallback as any[]) || []).map((f: any) => ({
|
||||
...f,
|
||||
// Normalize: user config uses "provider", internal uses "type"
|
||||
type: f.type || f.provider || "ollama",
|
||||
name: f.name || f.provider || f.type || "ollama",
|
||||
})) as FallbackProviderConfig[],
|
||||
embed: {
|
||||
provider: (embed.provider as MemoriaConfig["embed"]["provider"]) || "ollama",
|
||||
baseUrl: embed.baseUrl as string | undefined,
|
||||
model: (embed.model as string) || "nomic-embed-text-v2-moe",
|
||||
dimensions: (embed.dimensions as number) || 768,
|
||||
apiKey: embed.apiKey as string | undefined,
|
||||
},
|
||||
llm: {
|
||||
provider: (llm.provider as MemoriaConfig["llm"]["provider"]) || "ollama",
|
||||
baseUrl: llm.baseUrl as string | undefined,
|
||||
model: (llm.model as string) || "gemma3:4b",
|
||||
apiKey: llm.apiKey as string | undefined,
|
||||
overrides: (llm.overrides as MemoriaConfig["llm"]["overrides"]) || undefined,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Provider Factories ───
|
||||
|
||||
/** Create an embedding provider from config. Used for the main embedder + fallback list. */
|
||||
export function createEmbedProvider(cfg: MemoriaConfig["embed"]): EmbedProvider {
|
||||
switch (cfg.provider) {
|
||||
case "ollama":
|
||||
return new OllamaEmbed(cfg.baseUrl || "http://localhost:11434", cfg.model, cfg.dimensions);
|
||||
case "lmstudio":
|
||||
return lmStudioEmbed(cfg.model, cfg.dimensions, cfg.baseUrl || "http://localhost:1234/v1");
|
||||
case "openai":
|
||||
return openaiEmbed(cfg.model, cfg.apiKey || "", cfg.dimensions);
|
||||
case "openrouter":
|
||||
return openrouterEmbed(cfg.model, cfg.apiKey || "", cfg.dimensions);
|
||||
default:
|
||||
return new OllamaEmbed(); // safe default
|
||||
}
|
||||
}
|
||||
|
||||
/** Create an LLM provider from config. Used for the main chain + per-layer overrides. */
|
||||
export function createLLMProvider(cfg: MemoriaConfig["llm"]): LLMProvider {
|
||||
switch (cfg.provider) {
|
||||
case "ollama":
|
||||
return new OllamaLLM(cfg.baseUrl || "http://localhost:11434", cfg.model);
|
||||
case "lmstudio":
|
||||
return lmStudioLLM(cfg.model, cfg.baseUrl || "http://localhost:1234/v1");
|
||||
case "openai":
|
||||
return openaiLLM(cfg.model, cfg.apiKey || "");
|
||||
case "openrouter":
|
||||
return openrouterLLM(cfg.model, cfg.apiKey || "");
|
||||
case "anthropic":
|
||||
return new AnthropicLLM(cfg.model, cfg.apiKey || "", cfg.baseUrl);
|
||||
default:
|
||||
return new OllamaLLM(); // safe default
|
||||
}
|
||||
}
|
||||
337
openclaw-memoria-port/core/context-tree.ts
Normal file
337
openclaw-memoria-port/core/context-tree.ts
Normal file
@@ -0,0 +1,337 @@
|
||||
/**
|
||||
* Memoria — Layer 6: Context Tree
|
||||
*
|
||||
* Organizes facts into a semantic hierarchy for structured recall.
|
||||
* Instead of "here are 8 flat facts", we inject:
|
||||
*
|
||||
* Bureau (3 faits)
|
||||
* ├─ CRM (2 faits)
|
||||
* └─ Modules (1 fait)
|
||||
* Convex (2 faits)
|
||||
* Infrastructure (3 faits)
|
||||
*
|
||||
* Permet de :
|
||||
* 1. Montrer la STRUCTURE de la mémoire
|
||||
* 2. Prioriser par branche (Bureau > infra si query mentionne "CRM")
|
||||
* 3. Éviter l'overload de faits non pertinents
|
||||
*/
|
||||
|
||||
import type { MemoriaDB, Fact } from "./db.js";
|
||||
|
||||
// ─── Types ───
|
||||
|
||||
export interface ContextNode {
|
||||
id: string;
|
||||
label: string; // "Bureau", "CRM", "Modules"...
|
||||
type: "root" | "branch" | "leaf";
|
||||
facts: string[]; // Fact IDs
|
||||
children: ContextNode[];
|
||||
weight: number; // Relevance to current query
|
||||
depth: number;
|
||||
parent?: ContextNode;
|
||||
}
|
||||
|
||||
export interface ContextTree {
|
||||
roots: ContextNode[];
|
||||
factMap: Map<string, ContextNode>; // fact ID → node containing it
|
||||
}
|
||||
|
||||
// ─── Builder ───
|
||||
|
||||
export class ContextTreeBuilder {
|
||||
private db: MemoriaDB;
|
||||
|
||||
constructor(db: MemoriaDB) {
|
||||
this.db = db;
|
||||
}
|
||||
|
||||
/** Build a context tree from a list of facts */
|
||||
async build(facts: Fact[], query?: string): Promise<ContextTree> {
|
||||
if (facts.length === 0) {
|
||||
return { roots: [], factMap: new Map() };
|
||||
}
|
||||
|
||||
// Step 1: Cluster by category + keyword heuristics (no LLM)
|
||||
const clusters = await this.clusterFacts(facts);
|
||||
|
||||
// Step 2: Build tree structure
|
||||
const roots: ContextNode[] = [];
|
||||
const factMap = new Map<string, ContextNode>();
|
||||
|
||||
for (const cluster of clusters) {
|
||||
const node = this.buildNode(cluster, 0);
|
||||
roots.push(node);
|
||||
this.indexFactMap(node, factMap);
|
||||
}
|
||||
|
||||
// Step 3: Weight by query relevance (if provided)
|
||||
if (query) {
|
||||
this.weightByQuery(roots, query);
|
||||
}
|
||||
|
||||
return { roots, factMap };
|
||||
}
|
||||
|
||||
/** Render tree as indented text */
|
||||
renderTree(tree: ContextTree, maxDepth = 3): string {
|
||||
const lines: string[] = [];
|
||||
|
||||
const render = (node: ContextNode, indent = "") => {
|
||||
if (node.depth > maxDepth) return;
|
||||
|
||||
const prefix = node.depth === 0 ? "▪" : node.type === "branch" ? "├─" : " ";
|
||||
const weight = node.weight > 0 ? ` [${node.weight.toFixed(2)}]` : "";
|
||||
const factCount = node.facts.length > 0 ? ` (${node.facts.length})` : "";
|
||||
|
||||
lines.push(`${indent}${prefix} ${node.label}${weight}${factCount}`);
|
||||
|
||||
for (let i = 0; i < node.children.length; i++) {
|
||||
const child = node.children[i];
|
||||
const childIndent = indent + (node.depth === 0 ? " " : " ");
|
||||
render(child, childIndent);
|
||||
}
|
||||
};
|
||||
|
||||
for (const root of tree.roots) {
|
||||
render(root);
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
/** Get facts from tree in priority order (high weight first) */
|
||||
extractFacts(tree: ContextTree, limit: number): Fact[] {
|
||||
// Flatten all nodes with facts
|
||||
const nodesWithFacts: Array<{ node: ContextNode; factIds: string[] }> = [];
|
||||
|
||||
const collect = (node: ContextNode) => {
|
||||
if (node.facts.length > 0) {
|
||||
nodesWithFacts.push({ node, factIds: node.facts });
|
||||
}
|
||||
for (const child of node.children) {
|
||||
collect(child);
|
||||
}
|
||||
};
|
||||
|
||||
for (const root of tree.roots) {
|
||||
collect(root);
|
||||
}
|
||||
|
||||
// Sort by weight (high → low)
|
||||
nodesWithFacts.sort((a, b) => b.node.weight - a.node.weight);
|
||||
|
||||
// Extract facts up to limit
|
||||
const factIds = new Set<string>();
|
||||
const results: Fact[] = [];
|
||||
|
||||
for (const { factIds: ids } of nodesWithFacts) {
|
||||
for (const id of ids) {
|
||||
if (factIds.has(id)) continue;
|
||||
factIds.add(id);
|
||||
|
||||
const fact = this.db.getFact(id);
|
||||
if (fact && !fact.superseded) {
|
||||
results.push(fact);
|
||||
if (results.length >= limit) return results;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ─── Private ───
|
||||
|
||||
private async clusterFacts(facts: Fact[]): Promise<Cluster[]> {
|
||||
// Use categories + keywords for initial grouping
|
||||
const grouped = new Map<string, Fact[]>();
|
||||
|
||||
for (const fact of facts) {
|
||||
const category = fact.category || "other";
|
||||
if (!grouped.has(category)) grouped.set(category, []);
|
||||
grouped.get(category)!.push(fact);
|
||||
}
|
||||
|
||||
// Build clusters
|
||||
const clusters: Cluster[] = [];
|
||||
|
||||
for (const [category, catFacts] of grouped) {
|
||||
if (catFacts.length === 0) continue;
|
||||
|
||||
// If too many facts in one category, sub-cluster by keywords
|
||||
if (catFacts.length > 10) {
|
||||
const subClusters = this.subClusterByKeywords(catFacts);
|
||||
clusters.push({
|
||||
label: this.categoryLabel(category),
|
||||
facts: [],
|
||||
children: subClusters,
|
||||
});
|
||||
} else {
|
||||
clusters.push({
|
||||
label: this.categoryLabel(category),
|
||||
facts: catFacts,
|
||||
children: [],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return clusters;
|
||||
}
|
||||
|
||||
private subClusterByKeywords(facts: Fact[]): Cluster[] {
|
||||
// Extract common keywords
|
||||
const keywordMap = new Map<string, Fact[]>();
|
||||
|
||||
for (const fact of facts) {
|
||||
const words = this.extractKeywords(fact.fact);
|
||||
for (const word of words) {
|
||||
if (!keywordMap.has(word)) keywordMap.set(word, []);
|
||||
keywordMap.get(word)!.push(fact);
|
||||
}
|
||||
}
|
||||
|
||||
// Take top keywords (most frequent)
|
||||
const sorted = Array.from(keywordMap.entries())
|
||||
.sort((a, b) => b[1].length - a[1].length)
|
||||
.slice(0, 5);
|
||||
|
||||
const clusters: Cluster[] = [];
|
||||
const assigned = new Set<string>();
|
||||
|
||||
for (const [keyword, keywordFacts] of sorted) {
|
||||
const unique = keywordFacts.filter(f => !assigned.has(f.id));
|
||||
if (unique.length === 0) continue;
|
||||
|
||||
clusters.push({
|
||||
label: keyword,
|
||||
facts: unique,
|
||||
children: [],
|
||||
});
|
||||
|
||||
for (const f of unique) assigned.add(f.id);
|
||||
}
|
||||
|
||||
// Remaining = "Autres"
|
||||
const remaining = facts.filter(f => !assigned.has(f.id));
|
||||
if (remaining.length > 0) {
|
||||
clusters.push({
|
||||
label: "Autres",
|
||||
facts: remaining,
|
||||
children: [],
|
||||
});
|
||||
}
|
||||
|
||||
return clusters;
|
||||
}
|
||||
|
||||
private extractKeywords(text: string): string[] {
|
||||
// Simple keyword extraction (proper nouns + technical terms)
|
||||
const stopWords = new Set([
|
||||
"le", "la", "les", "un", "une", "des", "de", "du", "et", "ou", "à", "dans", "pour", "sur", "avec", "est", "sont",
|
||||
"a", "an", "the", "and", "or", "to", "in", "for", "on", "with", "is", "are", "was", "were", "been", "be",
|
||||
]);
|
||||
|
||||
const words = text.split(/\s+/)
|
||||
.map(w => w.replace(/[^\p{L}\p{N}]/gu, "").toLowerCase())
|
||||
.filter(w => w.length > 3 && !stopWords.has(w));
|
||||
|
||||
// Capitalize proper nouns (heuristic: starts uppercase in original)
|
||||
const properNouns = text.match(/\b[A-Z][a-z]+(?:[A-Z][a-z]+)*\b/g) || [];
|
||||
const allKeywords = [...words, ...properNouns.map(w => w.toLowerCase())];
|
||||
|
||||
// Return unique, most common
|
||||
const freq = new Map<string, number>();
|
||||
for (const w of allKeywords) {
|
||||
freq.set(w, (freq.get(w) || 0) + 1);
|
||||
}
|
||||
|
||||
return Array.from(freq.keys())
|
||||
.sort((a, b) => freq.get(b)! - freq.get(a)!)
|
||||
.slice(0, 5);
|
||||
}
|
||||
|
||||
private buildNode(cluster: Cluster, depth: number): ContextNode {
|
||||
const node: ContextNode = {
|
||||
id: `node_${Date.now()}_${Math.random().toString(36).slice(2, 7)}`,
|
||||
label: cluster.label,
|
||||
type: depth === 0 ? "root" : cluster.children.length > 0 ? "branch" : "leaf",
|
||||
facts: cluster.facts.map(f => f.id),
|
||||
children: [],
|
||||
weight: 0,
|
||||
depth,
|
||||
};
|
||||
|
||||
for (const child of cluster.children) {
|
||||
const childNode = this.buildNode(child, depth + 1);
|
||||
childNode.parent = node;
|
||||
node.children.push(childNode);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
private indexFactMap(node: ContextNode, map: Map<string, ContextNode>) {
|
||||
for (const factId of node.facts) {
|
||||
map.set(factId, node);
|
||||
}
|
||||
for (const child of node.children) {
|
||||
this.indexFactMap(child, map);
|
||||
}
|
||||
}
|
||||
|
||||
private weightByQuery(roots: ContextNode[], query: string) {
|
||||
const lowerQuery = query.toLowerCase();
|
||||
const words = lowerQuery.split(/\s+/).filter(w => w.length > 2);
|
||||
|
||||
const weight = (node: ContextNode) => {
|
||||
const labelLower = node.label.toLowerCase();
|
||||
let score = 0;
|
||||
|
||||
// Exact label match
|
||||
if (lowerQuery.includes(labelLower) || labelLower.includes(lowerQuery)) {
|
||||
score += 1.0;
|
||||
}
|
||||
|
||||
// Word overlap
|
||||
for (const word of words) {
|
||||
if (labelLower.includes(word)) {
|
||||
score += 0.3;
|
||||
}
|
||||
}
|
||||
|
||||
node.weight = score;
|
||||
|
||||
// Recurse children
|
||||
for (const child of node.children) {
|
||||
weight(child);
|
||||
// Propagate child weight to parent
|
||||
node.weight += child.weight * 0.5;
|
||||
}
|
||||
};
|
||||
|
||||
for (const root of roots) {
|
||||
weight(root);
|
||||
}
|
||||
}
|
||||
|
||||
private categoryLabel(cat: string): string {
|
||||
const labels: Record<string, string> = {
|
||||
outil: "Outils",
|
||||
savoir: "Savoir",
|
||||
erreur: "Erreurs",
|
||||
client: "Clients",
|
||||
preference: "Préférences",
|
||||
chronologie: "Chronologie",
|
||||
rh: "RH",
|
||||
};
|
||||
return labels[cat] || cat.charAt(0).toUpperCase() + cat.slice(1);
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Internal types ───
|
||||
|
||||
interface Cluster {
|
||||
label: string;
|
||||
facts: Fact[];
|
||||
children: Cluster[];
|
||||
}
|
||||
684
openclaw-memoria-port/core/db.ts
Normal file
684
openclaw-memoria-port/core/db.ts
Normal file
@@ -0,0 +1,684 @@
|
||||
/**
|
||||
* Memoria — SQLite Database Layer (Layer 1)
|
||||
*
|
||||
* The foundation of Memoria. Manages the SQLite database with:
|
||||
* - facts + facts_fts (FTS5 full-text search)
|
||||
* - embeddings (768d float vectors as BLOBs)
|
||||
* - entities + relations (knowledge graph)
|
||||
* - topics + fact_topics (emergent topic system)
|
||||
* - observations (living syntheses)
|
||||
* - procedures + procedures_fts (how-to memory)
|
||||
* - cluster_members (fact → cluster mapping)
|
||||
* - identity_cache, meta, chunks
|
||||
*
|
||||
* Uses better-sqlite3 for synchronous, fast, zero-dependency SQLite.
|
||||
* All migrations auto-run on construction (additive, never destructive).
|
||||
*
|
||||
* @example
|
||||
* const db = new MemoriaDB("/path/to/workspace");
|
||||
* db.storeFact({ id: "f_123", fact: "...", category: "savoir", ... });
|
||||
* const results = db.searchFacts("Bureau"); // FTS5 search
|
||||
* db.raw.prepare("SELECT ...").all(); // direct SQLite access
|
||||
*/
|
||||
|
||||
import Database from "better-sqlite3";
|
||||
import path from "path";
|
||||
import fs from "fs";
|
||||
|
||||
const SCHEMA_VERSION = 1;
|
||||
|
||||
/**
|
||||
* Core fact record stored in SQLite. Every piece of knowledge Memoria captures.
|
||||
*
|
||||
* Key fields for contributors:
|
||||
* - `fact_type`: "semantic" (durable truth) | "episodic" (dated event) | "cluster" (summary) | "pattern" (consolidated)
|
||||
* - `lifecycle_state`: "fresh" → "settled" → "dormant" (controls recall priority, NOT deletion)
|
||||
* - `superseded`: 0 = active, 1 = replaced by a newer/better version (superseded_by has the replacement ID)
|
||||
* - `tags`: JSON string array, e.g. '["convex","bureau"]'
|
||||
* - `entity_ids`: JSON string array of entity IDs linked to this fact
|
||||
*/
|
||||
export interface Fact {
|
||||
id: string;
|
||||
fact: string;
|
||||
category: string;
|
||||
confidence: number;
|
||||
source: string;
|
||||
tags: string; // JSON array
|
||||
agent: string;
|
||||
created_at: number;
|
||||
updated_at: number;
|
||||
access_count: number;
|
||||
last_accessed_at: number | null;
|
||||
superseded: number; // 0 or 1
|
||||
superseded_by: string | null;
|
||||
superseded_at: number | null;
|
||||
md_file: string | null;
|
||||
md_line: number | null;
|
||||
entity_ids: string; // JSON array
|
||||
fact_type: "semantic" | "episodic" | "cluster" | "pattern"; // semantic = durable, episodic = dated/contextual, cluster = thematic summary, pattern = consolidated behavioral
|
||||
usefulness: number; // feedback score, higher = more useful
|
||||
recall_count: number; // times recalled in prompts
|
||||
used_count: number; // times actually used in answers
|
||||
synced_to_md: number; // 0 = not synced, 1 = synced, 2 = regenerated
|
||||
relevance_weight: number; // 0.0-1.0, calculated from identity context
|
||||
lifecycle_state: "fresh" | "settled" | "dormant"; // fresh = new, settled = confirmed, dormant = unused
|
||||
}
|
||||
|
||||
/** Knowledge graph node. Types: person, project, tool, concept, place. */
|
||||
export interface Entity {
|
||||
id: string;
|
||||
name: string;
|
||||
type: string; // person|project|tool|concept|place
|
||||
attributes: string; // JSON
|
||||
created_at: number;
|
||||
access_count: number;
|
||||
}
|
||||
|
||||
/** Knowledge graph edge. Weight increases via Hebbian reinforcement (co-occurrence), decays when unused. */
|
||||
export interface Relation {
|
||||
id: string;
|
||||
source_id: string;
|
||||
target_id: string;
|
||||
relation: string;
|
||||
weight: number;
|
||||
context: string | null;
|
||||
created_at: number;
|
||||
last_accessed_at: number | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main database class. Created once in index.ts and shared with all managers.
|
||||
*
|
||||
* Key methods:
|
||||
* - `storeFact(fact)` — INSERT OR REPLACE with full column list
|
||||
* - `searchFacts(query)` — FTS5 full-text search on fact text
|
||||
* - `getActiveFacts()` — all non-superseded facts
|
||||
* - `supersedeFact(oldId, newId)` — mark fact as replaced
|
||||
* - `raw` — direct better-sqlite3 Database for custom queries
|
||||
*
|
||||
* Schema migrations run automatically in constructor (additive only).
|
||||
*/
|
||||
export class MemoriaDB {
|
||||
/** Direct better-sqlite3 access for custom queries from other modules */
|
||||
readonly raw: Database.Database;
|
||||
private db: Database.Database;
|
||||
|
||||
constructor(workspaceRoot: string) {
|
||||
const dbPath = path.join(workspaceRoot, "memory", "memoria.db");
|
||||
const dir = path.dirname(dbPath);
|
||||
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
||||
|
||||
// Auto-migrate from cortex.db if memoria.db doesn't exist or is empty
|
||||
const legacyPath = path.join(workspaceRoot, "memory", "cortex.db");
|
||||
if (fs.existsSync(legacyPath)) {
|
||||
const needsMigration = !fs.existsSync(dbPath) || fs.statSync(dbPath).size < 8192;
|
||||
const legacySize = fs.statSync(legacyPath).size;
|
||||
if (needsMigration && legacySize > 8192) {
|
||||
// Use VACUUM INTO to safely copy WAL-mode DBs (plain cp can lose data)
|
||||
try {
|
||||
const legacyDb = new Database(legacyPath, { readonly: true });
|
||||
legacyDb.exec(`VACUUM INTO '${dbPath.replace(/'/g, "''")}'`);
|
||||
legacyDb.close();
|
||||
} catch (_e) {
|
||||
// Fallback: copy file + WAL + SHM
|
||||
fs.copyFileSync(legacyPath, dbPath);
|
||||
const walPath = legacyPath + "-wal";
|
||||
const shmPath = legacyPath + "-shm";
|
||||
if (fs.existsSync(walPath)) fs.copyFileSync(walPath, dbPath + "-wal");
|
||||
if (fs.existsSync(shmPath)) fs.copyFileSync(shmPath, dbPath + "-shm");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.db = new Database(dbPath);
|
||||
this.raw = this.db;
|
||||
this.db.pragma("journal_mode = WAL"); // Better concurrent read perf
|
||||
this.db.pragma("foreign_keys = ON");
|
||||
this.migrate();
|
||||
}
|
||||
|
||||
// ─── Schema Migration ───
|
||||
|
||||
private migrate(): void {
|
||||
const version = this.getSchemaVersion();
|
||||
if (version < 1) this.migrateV1();
|
||||
// V2: add fact_type column for semantic/episodic distinction
|
||||
this.migrateAddFactType();
|
||||
this.migrateAddFeedbackColumns();
|
||||
this.migrateAddRelevanceWeight();
|
||||
this.migrateAddIdentityCache();
|
||||
this.migrateAddLifecycleState();
|
||||
this.migrateAddProcedures();
|
||||
this.migrateAddClusterMembers();
|
||||
this.setSchemaVersion(SCHEMA_VERSION);
|
||||
}
|
||||
|
||||
private migrateAddFactType(): void {
|
||||
try {
|
||||
// Check if column exists
|
||||
const cols = this.db.prepare("PRAGMA table_info(facts)").all() as Array<{ name: string }>;
|
||||
if (!cols.some(c => c.name === "fact_type")) {
|
||||
this.db.exec("ALTER TABLE facts ADD COLUMN fact_type TEXT DEFAULT 'semantic'");
|
||||
// Index for filtering
|
||||
this.db.exec("CREATE INDEX IF NOT EXISTS idx_facts_type ON facts(fact_type)");
|
||||
}
|
||||
} catch (_e) { /* column already exists or table not yet created */ }
|
||||
}
|
||||
|
||||
/** Migration: add feedback loop columns (usefulness, recall_count, used_count) */
|
||||
private migrateAddFeedbackColumns(): void {
|
||||
try {
|
||||
const cols = this.db.prepare("PRAGMA table_info(facts)").all() as Array<{ name: string }>;
|
||||
const colNames = new Set(cols.map(c => c.name));
|
||||
if (!colNames.has("usefulness")) {
|
||||
this.db.exec("ALTER TABLE facts ADD COLUMN usefulness REAL DEFAULT 0");
|
||||
}
|
||||
if (!colNames.has("recall_count")) {
|
||||
this.db.exec("ALTER TABLE facts ADD COLUMN recall_count INTEGER DEFAULT 0");
|
||||
}
|
||||
if (!colNames.has("used_count")) {
|
||||
this.db.exec("ALTER TABLE facts ADD COLUMN used_count INTEGER DEFAULT 0");
|
||||
}
|
||||
} catch (_e) { /* columns already exist or table not yet created */ }
|
||||
}
|
||||
|
||||
/** Migration: add relevance_weight column for identity-aware prioritization */
|
||||
private migrateAddRelevanceWeight(): void {
|
||||
try {
|
||||
const cols = this.db.prepare("PRAGMA table_info(facts)").all() as Array<{ name: string }>;
|
||||
if (!cols.some(c => c.name === "relevance_weight")) {
|
||||
this.db.exec("ALTER TABLE facts ADD COLUMN relevance_weight REAL DEFAULT 0.5");
|
||||
// Index for sorting by relevance
|
||||
this.db.exec("CREATE INDEX IF NOT EXISTS idx_facts_relevance ON facts(relevance_weight DESC)");
|
||||
}
|
||||
} catch (_e) { /* column already exists or table not yet created */ }
|
||||
}
|
||||
|
||||
/** Migration: add identity_cache table for parsed USER.md/COMPANY.md */
|
||||
private migrateAddIdentityCache(): void {
|
||||
try {
|
||||
this.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS identity_cache (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
);
|
||||
`);
|
||||
} catch (_e) { /* table already exists */ }
|
||||
}
|
||||
|
||||
/** Migration: add lifecycle_state for fact evolution (fresh/settled/dormant) */
|
||||
private migrateAddLifecycleState(): void {
|
||||
try {
|
||||
const cols = this.db.prepare("PRAGMA table_info(facts)").all() as Array<{ name: string }>;
|
||||
if (!cols.some(c => c.name === "lifecycle_state")) {
|
||||
this.db.exec("ALTER TABLE facts ADD COLUMN lifecycle_state TEXT DEFAULT 'fresh'");
|
||||
this.db.exec("CREATE INDEX IF NOT EXISTS idx_facts_lifecycle ON facts(lifecycle_state)");
|
||||
}
|
||||
} catch (_e) { /* column already exists or table not yet created */ }
|
||||
}
|
||||
|
||||
/** Migration: add procedures table for procedural memory (Phase 3) */
|
||||
private migrateAddProcedures(): void {
|
||||
try {
|
||||
this.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS procedures (
|
||||
id TEXT PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
goal TEXT,
|
||||
steps TEXT NOT NULL,
|
||||
success_count INTEGER DEFAULT 0,
|
||||
failure_count INTEGER DEFAULT 0,
|
||||
last_success_at INTEGER,
|
||||
last_failure_at INTEGER,
|
||||
last_updated_at INTEGER NOT NULL,
|
||||
avg_duration_ms INTEGER,
|
||||
improvements TEXT DEFAULT '[]',
|
||||
context TEXT,
|
||||
degradation_score REAL DEFAULT 0.0,
|
||||
alternative_of TEXT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_procedures_name ON procedures(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_procedures_degradation ON procedures(degradation_score);
|
||||
CREATE INDEX IF NOT EXISTS idx_procedures_success_rate ON procedures(success_count, failure_count);
|
||||
`);
|
||||
} catch (_e) { /* table already exists */ }
|
||||
}
|
||||
|
||||
/** Migration: add cluster_members table to track which facts compose a cluster */
|
||||
private migrateAddClusterMembers(): void {
|
||||
try {
|
||||
this.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS cluster_members (
|
||||
cluster_id TEXT NOT NULL,
|
||||
fact_id TEXT NOT NULL,
|
||||
PRIMARY KEY (cluster_id, fact_id),
|
||||
FOREIGN KEY (cluster_id) REFERENCES facts(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (fact_id) REFERENCES facts(id) ON DELETE CASCADE
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_cluster_members_fact ON cluster_members(fact_id);
|
||||
`);
|
||||
} catch (_e) { /* table already exists */ }
|
||||
}
|
||||
|
||||
private getSchemaVersion(): number {
|
||||
try {
|
||||
const row = this.db.prepare("SELECT value FROM meta WHERE key = 'schema_version'").get() as { value: string } | undefined;
|
||||
return row ? parseInt(row.value, 10) : 0;
|
||||
} catch (_e) {
|
||||
return 0; // meta table doesn't exist yet
|
||||
}
|
||||
}
|
||||
|
||||
private setSchemaVersion(v: number): void {
|
||||
this.db.prepare("INSERT OR REPLACE INTO meta (key, value) VALUES ('schema_version', ?)").run(String(v));
|
||||
}
|
||||
|
||||
private migrateV1(): void {
|
||||
this.db.exec(`
|
||||
-- Meta table
|
||||
CREATE TABLE IF NOT EXISTS meta (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
);
|
||||
|
||||
-- Facts (mémoire déclarative)
|
||||
CREATE TABLE IF NOT EXISTS facts (
|
||||
id TEXT PRIMARY KEY,
|
||||
fact TEXT NOT NULL,
|
||||
category TEXT NOT NULL DEFAULT 'savoir',
|
||||
confidence REAL NOT NULL DEFAULT 0.8,
|
||||
source TEXT DEFAULT 'auto-capture',
|
||||
tags TEXT DEFAULT '[]',
|
||||
agent TEXT DEFAULT 'koda',
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL,
|
||||
access_count INTEGER DEFAULT 0,
|
||||
last_accessed_at INTEGER,
|
||||
superseded INTEGER DEFAULT 0,
|
||||
superseded_by TEXT,
|
||||
superseded_at INTEGER,
|
||||
md_file TEXT,
|
||||
md_line INTEGER,
|
||||
entity_ids TEXT DEFAULT '[]',
|
||||
fact_type TEXT DEFAULT 'semantic'
|
||||
);
|
||||
|
||||
-- FTS5 full-text search on facts
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS facts_fts USING fts5(
|
||||
fact, category, tags,
|
||||
content='facts',
|
||||
content_rowid='rowid'
|
||||
);
|
||||
|
||||
-- Triggers to keep FTS in sync
|
||||
CREATE TRIGGER IF NOT EXISTS facts_ai AFTER INSERT ON facts BEGIN
|
||||
INSERT INTO facts_fts(rowid, fact, category, tags)
|
||||
VALUES (new.rowid, new.fact, new.category, new.tags);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS facts_ad AFTER DELETE ON facts BEGIN
|
||||
INSERT INTO facts_fts(facts_fts, rowid, fact, category, tags)
|
||||
VALUES ('delete', old.rowid, old.fact, old.category, old.tags);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS facts_au AFTER UPDATE ON facts BEGIN
|
||||
INSERT INTO facts_fts(facts_fts, rowid, fact, category, tags)
|
||||
VALUES ('delete', old.rowid, old.fact, old.category, old.tags);
|
||||
INSERT INTO facts_fts(rowid, fact, category, tags)
|
||||
VALUES (new.rowid, new.fact, new.category, new.tags);
|
||||
END;
|
||||
|
||||
-- Embeddings (vecteurs 768d)
|
||||
CREATE TABLE IF NOT EXISTS embeddings (
|
||||
fact_id TEXT PRIMARY KEY,
|
||||
vector BLOB NOT NULL,
|
||||
model TEXT NOT NULL,
|
||||
created_at INTEGER NOT NULL,
|
||||
FOREIGN KEY (fact_id) REFERENCES facts(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
-- Knowledge Graph: entities
|
||||
CREATE TABLE IF NOT EXISTS entities (
|
||||
id TEXT PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
type TEXT NOT NULL DEFAULT 'concept',
|
||||
attributes TEXT DEFAULT '{}',
|
||||
created_at INTEGER NOT NULL,
|
||||
access_count INTEGER DEFAULT 0
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_entities_type ON entities(type);
|
||||
|
||||
-- Knowledge Graph: relations
|
||||
CREATE TABLE IF NOT EXISTS relations (
|
||||
id TEXT PRIMARY KEY,
|
||||
source_id TEXT NOT NULL,
|
||||
target_id TEXT NOT NULL,
|
||||
relation TEXT NOT NULL,
|
||||
weight REAL DEFAULT 1.0,
|
||||
context TEXT,
|
||||
created_at INTEGER NOT NULL,
|
||||
last_accessed_at INTEGER,
|
||||
FOREIGN KEY (source_id) REFERENCES entities(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (target_id) REFERENCES entities(id) ON DELETE CASCADE
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_relations_source ON relations(source_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_relations_target ON relations(target_id);
|
||||
|
||||
-- Chunks (fichiers .md indexés)
|
||||
CREATE TABLE IF NOT EXISTS chunks (
|
||||
id TEXT PRIMARY KEY,
|
||||
file_path TEXT NOT NULL,
|
||||
chunk_text TEXT NOT NULL,
|
||||
chunk_index INTEGER DEFAULT 0,
|
||||
vector BLOB,
|
||||
updated_at INTEGER NOT NULL
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(file_path);
|
||||
|
||||
-- FTS5 on chunks
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
||||
chunk_text, file_path,
|
||||
content='chunks',
|
||||
content_rowid='rowid'
|
||||
);
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
|
||||
INSERT INTO chunks_fts(rowid, chunk_text, file_path)
|
||||
VALUES (new.rowid, new.chunk_text, new.file_path);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
|
||||
INSERT INTO chunks_fts(chunks_fts, rowid, chunk_text, file_path)
|
||||
VALUES ('delete', old.rowid, old.chunk_text, old.file_path);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
|
||||
INSERT INTO chunks_fts(chunks_fts, rowid, chunk_text, file_path)
|
||||
VALUES ('delete', old.rowid, old.chunk_text, old.file_path);
|
||||
INSERT INTO chunks_fts(rowid, chunk_text, file_path)
|
||||
VALUES (new.rowid, new.chunk_text, new.file_path);
|
||||
END;
|
||||
|
||||
-- Indexes for common queries
|
||||
CREATE INDEX IF NOT EXISTS idx_facts_category ON facts(category);
|
||||
CREATE INDEX IF NOT EXISTS idx_facts_superseded ON facts(superseded);
|
||||
CREATE INDEX IF NOT EXISTS idx_facts_created ON facts(created_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_facts_agent ON facts(agent);
|
||||
CREATE INDEX IF NOT EXISTS idx_facts_type ON facts(fact_type);
|
||||
`);
|
||||
}
|
||||
|
||||
// ─── Facts CRUD ───
|
||||
|
||||
storeFact(fact: Omit<Fact, "access_count" | "last_accessed_at" | "superseded" | "superseded_by" | "superseded_at" | "md_file" | "md_line" | "entity_ids" | "usefulness" | "recall_count" | "used_count" | "synced_to_md" | "relevance_weight" | "lifecycle_state"> & Partial<Fact>): Fact {
|
||||
const now = Date.now();
|
||||
const row: Fact = {
|
||||
id: fact.id || `fact_${now}_${Math.random().toString(36).slice(2, 9)}`,
|
||||
fact: fact.fact,
|
||||
category: fact.category || "savoir",
|
||||
confidence: fact.confidence ?? 0.8,
|
||||
source: fact.source || "auto-capture",
|
||||
tags: fact.tags || "[]",
|
||||
agent: fact.agent || "koda",
|
||||
created_at: fact.created_at || now,
|
||||
updated_at: fact.updated_at || now,
|
||||
access_count: fact.access_count ?? 0,
|
||||
last_accessed_at: fact.last_accessed_at ?? null,
|
||||
superseded: fact.superseded ?? 0,
|
||||
superseded_by: fact.superseded_by ?? null,
|
||||
superseded_at: fact.superseded_at ?? null,
|
||||
md_file: fact.md_file ?? null,
|
||||
md_line: fact.md_line ?? null,
|
||||
entity_ids: fact.entity_ids || "[]",
|
||||
fact_type: fact.fact_type || "semantic",
|
||||
usefulness: fact.usefulness ?? 0,
|
||||
recall_count: fact.recall_count ?? 0,
|
||||
used_count: fact.used_count ?? 0,
|
||||
synced_to_md: fact.synced_to_md ?? 0,
|
||||
relevance_weight: fact.relevance_weight ?? 0.5,
|
||||
lifecycle_state: fact.lifecycle_state ?? "fresh",
|
||||
};
|
||||
|
||||
this.db.prepare(`
|
||||
INSERT OR REPLACE INTO facts
|
||||
(id, fact, category, confidence, source, tags, agent, created_at, updated_at,
|
||||
access_count, last_accessed_at, superseded, superseded_by, superseded_at,
|
||||
md_file, md_line, entity_ids, fact_type,
|
||||
usefulness, recall_count, used_count, synced_to_md, relevance_weight, lifecycle_state)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`).run(
|
||||
row.id, row.fact, row.category, row.confidence, row.source, row.tags, row.agent,
|
||||
row.created_at, row.updated_at, row.access_count, row.last_accessed_at,
|
||||
row.superseded, row.superseded_by, row.superseded_at,
|
||||
row.md_file, row.md_line, row.entity_ids, row.fact_type,
|
||||
row.usefulness, row.recall_count, row.used_count, row.synced_to_md,
|
||||
row.relevance_weight, row.lifecycle_state
|
||||
);
|
||||
|
||||
return row;
|
||||
}
|
||||
|
||||
searchFacts(query: string, limit = 10): Fact[] {
|
||||
if (!query || query.trim().length === 0) {
|
||||
return this.db.prepare(
|
||||
"SELECT * FROM facts WHERE superseded = 0 AND lifecycle_state != 'dormant' ORDER BY updated_at DESC LIMIT ?"
|
||||
).all(limit) as Fact[];
|
||||
}
|
||||
|
||||
// Sanitize for FTS5: keep only alphanumeric + spaces, wrap each word in quotes
|
||||
const sanitized = query
|
||||
.replace(/[^\p{L}\p{N}\s]/gu, " ") // Remove special chars
|
||||
.split(/\s+/)
|
||||
.filter(w => w.length > 1)
|
||||
.map(w => `"${w}"`) // Quote each word
|
||||
.join(" OR "); // OR between words
|
||||
|
||||
if (!sanitized) {
|
||||
return this.db.prepare(
|
||||
"SELECT * FROM facts WHERE superseded = 0 ORDER BY updated_at DESC LIMIT ?"
|
||||
).all(limit) as Fact[];
|
||||
}
|
||||
|
||||
try {
|
||||
return this.db.prepare(`
|
||||
SELECT f.* FROM facts f
|
||||
JOIN facts_fts fts ON f.rowid = fts.rowid
|
||||
WHERE facts_fts MATCH ? AND f.superseded = 0 AND f.lifecycle_state != 'dormant'
|
||||
ORDER BY rank
|
||||
LIMIT ?
|
||||
`).all(sanitized, limit) as Fact[];
|
||||
} catch (_e) {
|
||||
// Fallback: LIKE search if FTS5 fails
|
||||
return this.db.prepare(
|
||||
"SELECT * FROM facts WHERE superseded = 0 AND lifecycle_state != 'dormant' AND fact LIKE ? ORDER BY updated_at DESC LIMIT ?"
|
||||
).all(`%${query.slice(0, 100)}%`, limit) as Fact[];
|
||||
}
|
||||
}
|
||||
|
||||
recentFacts(hours = 24, limit = 10): Fact[] {
|
||||
const cutoff = Date.now() - hours * 3600 * 1000;
|
||||
return this.db.prepare(
|
||||
"SELECT * FROM facts WHERE superseded = 0 AND created_at >= ? ORDER BY created_at DESC LIMIT ?"
|
||||
).all(cutoff, limit) as Fact[];
|
||||
}
|
||||
|
||||
getFact(id: string): Fact | undefined {
|
||||
return this.db.prepare("SELECT * FROM facts WHERE id = ?").get(id) as Fact | undefined;
|
||||
}
|
||||
|
||||
supersedeFact(oldId: string, newId: string): void {
|
||||
const now = Date.now();
|
||||
this.db.prepare(
|
||||
"UPDATE facts SET superseded = 1, superseded_by = ?, superseded_at = ?, updated_at = ? WHERE id = ?"
|
||||
).run(newId, now, now, oldId);
|
||||
}
|
||||
|
||||
enrichFact(id: string, newText: string, newConfidence?: number): void {
|
||||
const now = Date.now();
|
||||
const existing = this.getFact(id);
|
||||
if (!existing) return;
|
||||
const confidence = newConfidence ? Math.max(existing.confidence, newConfidence) : existing.confidence;
|
||||
this.db.prepare(
|
||||
"UPDATE facts SET fact = ?, confidence = ?, updated_at = ? WHERE id = ?"
|
||||
).run(newText, confidence, now, id);
|
||||
}
|
||||
|
||||
/** Get frequently accessed facts (hot tier — "learned by heart") */
|
||||
hotFacts(minAccess: number = 5, staleDays: number = 30, limit: number = 5): Fact[] {
|
||||
const cutoff = Date.now() - staleDays * 24 * 60 * 60 * 1000;
|
||||
return this.db.prepare(
|
||||
`SELECT * FROM facts WHERE superseded = 0 AND lifecycle_state != 'dormant' AND access_count >= ?
|
||||
AND COALESCE(last_accessed_at, updated_at) >= ?
|
||||
ORDER BY access_count DESC LIMIT ?`
|
||||
).all(minAccess, cutoff, limit) as Fact[];
|
||||
}
|
||||
|
||||
trackAccess(ids: string[]): void {
|
||||
const now = Date.now();
|
||||
const stmt = this.db.prepare(
|
||||
"UPDATE facts SET access_count = access_count + 1, last_accessed_at = ? WHERE id = ?"
|
||||
);
|
||||
const tx = this.db.transaction(() => {
|
||||
for (const id of ids) stmt.run(now, id);
|
||||
});
|
||||
tx();
|
||||
}
|
||||
|
||||
// ─── Stats ───
|
||||
|
||||
stats(): { total: number; active: number; superseded: number; categories: Record<string, number> } {
|
||||
const total = (this.db.prepare("SELECT COUNT(*) as c FROM facts").get() as { c: number }).c;
|
||||
const active = (this.db.prepare("SELECT COUNT(*) as c FROM facts WHERE superseded = 0").get() as { c: number }).c;
|
||||
const cats = this.db.prepare(
|
||||
"SELECT category, COUNT(*) as c FROM facts WHERE superseded = 0 GROUP BY category"
|
||||
).all() as Array<{ category: string; c: number }>;
|
||||
|
||||
const categories: Record<string, number> = {};
|
||||
for (const row of cats) categories[row.category] = row.c;
|
||||
|
||||
return { total, active, superseded: total - active, categories };
|
||||
}
|
||||
|
||||
// ─── Entities CRUD ───
|
||||
|
||||
storeEntity(entity: Omit<Entity, "created_at" | "access_count"> & Partial<Entity>): Entity {
|
||||
const now = Date.now();
|
||||
const row: Entity = {
|
||||
id: entity.id || `ent_${now}_${Math.random().toString(36).slice(2, 9)}`,
|
||||
name: entity.name,
|
||||
type: entity.type || "concept",
|
||||
attributes: entity.attributes || "{}",
|
||||
created_at: entity.created_at || now,
|
||||
access_count: entity.access_count ?? 0,
|
||||
};
|
||||
|
||||
this.db.prepare(`
|
||||
INSERT OR REPLACE INTO entities (id, name, type, attributes, created_at, access_count)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
`).run(row.id, row.name, row.type, row.attributes, row.created_at, row.access_count);
|
||||
|
||||
return row;
|
||||
}
|
||||
|
||||
findEntityByName(name: string): Entity | undefined {
|
||||
return this.db.prepare(
|
||||
"SELECT * FROM entities WHERE LOWER(name) = LOWER(?)"
|
||||
).get(name) as Entity | undefined;
|
||||
}
|
||||
|
||||
/** Return all entity names (lowercased) for fast in-text matching */
|
||||
allEntityNames(): string[] {
|
||||
const rows = this.db.prepare("SELECT DISTINCT LOWER(name) as name FROM entities").all() as { name: string }[];
|
||||
return rows.map(r => r.name);
|
||||
}
|
||||
|
||||
/** Find facts linked to any of the given entity IDs */
|
||||
findFactsByEntityIds(entityIds: string[], limit = 10): Fact[] {
|
||||
if (entityIds.length === 0) return [];
|
||||
// entity_ids is stored as JSON array string, e.g. '["ent_123","ent_456"]'
|
||||
const placeholders = entityIds.map(() => "entity_ids LIKE ?").join(" OR ");
|
||||
const params = entityIds.map(id => `%${id}%`);
|
||||
return this.db.prepare(
|
||||
`SELECT * FROM facts WHERE status = 'active' AND (${placeholders}) ORDER BY updated_at DESC LIMIT ?`
|
||||
).all(...params, limit) as Fact[];
|
||||
}
|
||||
|
||||
// ─── Relations CRUD ───
|
||||
|
||||
storeRelation(rel: Omit<Relation, "created_at" | "last_accessed_at"> & Partial<Relation>): Relation {
|
||||
const now = Date.now();
|
||||
const row: Relation = {
|
||||
id: rel.id || `rel_${now}_${Math.random().toString(36).slice(2, 9)}`,
|
||||
source_id: rel.source_id,
|
||||
target_id: rel.target_id,
|
||||
relation: rel.relation,
|
||||
weight: rel.weight ?? 1.0,
|
||||
context: rel.context ?? null,
|
||||
created_at: rel.created_at || now,
|
||||
last_accessed_at: rel.last_accessed_at ?? null,
|
||||
};
|
||||
|
||||
this.db.prepare(`
|
||||
INSERT OR REPLACE INTO relations
|
||||
(id, source_id, target_id, relation, weight, context, created_at, last_accessed_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`).run(row.id, row.source_id, row.target_id, row.relation, row.weight, row.context, row.created_at, row.last_accessed_at);
|
||||
|
||||
return row;
|
||||
}
|
||||
|
||||
getRelationsFrom(entityId: string, minWeight = 0.1): Relation[] {
|
||||
return this.db.prepare(
|
||||
"SELECT * FROM relations WHERE source_id = ? AND weight >= ? ORDER BY weight DESC"
|
||||
).all(entityId, minWeight) as Relation[];
|
||||
}
|
||||
|
||||
getRelationsTo(entityId: string, minWeight = 0.1): Relation[] {
|
||||
return this.db.prepare(
|
||||
"SELECT * FROM relations WHERE target_id = ? AND weight >= ? ORDER BY weight DESC"
|
||||
).all(entityId, minWeight) as Relation[];
|
||||
}
|
||||
|
||||
reinforceRelation(id: string, boost = 0.1): void {
|
||||
const now = Date.now();
|
||||
this.db.prepare(
|
||||
"UPDATE relations SET weight = weight + ?, last_accessed_at = ? WHERE id = ?"
|
||||
).run(boost, now, id);
|
||||
}
|
||||
|
||||
// ─── Bulk import ───
|
||||
|
||||
importFacts(facts: Array<Omit<Fact, "access_count" | "last_accessed_at" | "superseded" | "superseded_by" | "superseded_at" | "md_file" | "md_line" | "entity_ids"> & Partial<Fact>>): number {
|
||||
const tx = this.db.transaction(() => {
|
||||
let count = 0;
|
||||
for (const fact of facts) {
|
||||
this.storeFact(fact);
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
});
|
||||
return tx();
|
||||
}
|
||||
|
||||
// ─── Identity Cache ───
|
||||
|
||||
/** Store identity cache (JSON stringified) */
|
||||
storeIdentityCache(key: string, value: string): void {
|
||||
const now = Date.now();
|
||||
this.db.prepare("INSERT OR REPLACE INTO identity_cache (key, value, updated_at) VALUES (?, ?, ?)").run(key, value, now);
|
||||
}
|
||||
|
||||
/** Get identity cache */
|
||||
getIdentityCache(key: string): string | null {
|
||||
const row = this.db.prepare("SELECT value FROM identity_cache WHERE key = ?").get(key) as { value: string } | undefined;
|
||||
return row?.value ?? null;
|
||||
}
|
||||
|
||||
// ─── Close ───
|
||||
|
||||
close(): void {
|
||||
this.db.close();
|
||||
}
|
||||
}
|
||||
224
openclaw-memoria-port/core/dialectic.ts
Normal file
224
openclaw-memoria-port/core/dialectic.ts
Normal file
@@ -0,0 +1,224 @@
|
||||
/**
|
||||
* Memoria — Dialectic Memory (Layer 24)
|
||||
*
|
||||
* Enables natural language queries against the entire memory.
|
||||
* Instead of keyword search, the agent can ASK questions:
|
||||
* "What frustrates the user about deployments?"
|
||||
* "What's the current status of Primask?"
|
||||
* "What patterns do I repeat when I make mistakes?"
|
||||
*
|
||||
* Inspired by Honcho's "dialectic" concept — memory as a conversation partner.
|
||||
*
|
||||
* Flow:
|
||||
* 1. Parse the natural language query
|
||||
* 2. Fan out: FTS5 + embeddings + graph + topics + procedures
|
||||
* 3. Aggregate all evidence
|
||||
* 4. Synthesize a structured answer via LLM
|
||||
*/
|
||||
|
||||
import type { MemoriaDB, Fact } from "./db.js";
|
||||
import type { EmbeddingManager } from "./embeddings.js";
|
||||
import type { KnowledgeGraph } from "./graph.js";
|
||||
import type { TopicManager } from "./topics.js";
|
||||
import type { ProceduralMemory } from "./procedural.js";
|
||||
import type { ObservationManager } from "./observations.js";
|
||||
import type { LLMProvider } from "./providers/types.js";
|
||||
|
||||
export interface DialecticAnswer {
|
||||
answer: string;
|
||||
confidence: number;
|
||||
sources: Array<{
|
||||
type: "fact" | "procedure" | "observation" | "graph" | "topic";
|
||||
id: string;
|
||||
text: string;
|
||||
relevance: number;
|
||||
}>;
|
||||
reasoning?: string;
|
||||
}
|
||||
|
||||
export interface DialecticDeps {
|
||||
db: MemoriaDB;
|
||||
embeddingMgr: EmbeddingManager;
|
||||
graph: KnowledgeGraph;
|
||||
topicMgr: TopicManager;
|
||||
proceduralMem: ProceduralMemory;
|
||||
observationMgr: ObservationManager;
|
||||
llm: LLMProvider;
|
||||
}
|
||||
|
||||
export class DialecticMemory {
|
||||
private deps: DialecticDeps;
|
||||
|
||||
constructor(deps: DialecticDeps) {
|
||||
this.deps = deps;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ask a natural language question to the memory.
|
||||
*/
|
||||
async query(question: string): Promise<DialecticAnswer> {
|
||||
const { db, embeddingMgr, graph, topicMgr, proceduralMem, observationMgr, llm } = this.deps;
|
||||
|
||||
// ── 1. Fan out: gather evidence from all sources ──
|
||||
const sources: DialecticAnswer["sources"] = [];
|
||||
|
||||
// FTS5 + embeddings search
|
||||
try {
|
||||
if (embeddingMgr.embeddedCount() > 0) {
|
||||
const results = await embeddingMgr.hybridSearch(question, 10, {
|
||||
ftsWeight: 0.35,
|
||||
cosineWeight: 0.45,
|
||||
temporalWeight: 0.20,
|
||||
});
|
||||
for (const r of results) {
|
||||
sources.push({
|
||||
type: "fact",
|
||||
id: r.id,
|
||||
text: r.fact,
|
||||
relevance: r.temporalScore || 0.5,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
const facts = db.searchFacts(question, 10);
|
||||
for (const f of facts) {
|
||||
sources.push({
|
||||
type: "fact",
|
||||
id: f.id,
|
||||
text: f.fact,
|
||||
relevance: f.confidence,
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch { /* non-blocking */ }
|
||||
|
||||
// Graph entities
|
||||
try {
|
||||
const entities = graph.findEntitiesInText(question);
|
||||
if (entities.length > 0) {
|
||||
const related = graph.getRelatedFacts(entities.map(e => e.name), 3, 5);
|
||||
for (const r of related) {
|
||||
const fact = db.getFact(r.id);
|
||||
if (fact && !sources.find(s => s.id === r.id)) {
|
||||
sources.push({
|
||||
type: "graph",
|
||||
id: r.id,
|
||||
text: fact.fact,
|
||||
relevance: 0.6,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch { /* non-blocking */ }
|
||||
|
||||
// Topics
|
||||
try {
|
||||
const expandedQueries = embeddingMgr.expandQuery(question);
|
||||
const topics = await topicMgr.findRelevantTopics(question, 3, expandedQueries);
|
||||
for (const t of topics) {
|
||||
sources.push({
|
||||
type: "topic",
|
||||
id: t.topic.id?.toString() || t.topic.name,
|
||||
text: `Topic "${t.topic.name}": ${t.facts.slice(0, 3).join("; ")}`,
|
||||
relevance: 0.5,
|
||||
});
|
||||
}
|
||||
} catch { /* non-blocking */ }
|
||||
|
||||
// Procedures
|
||||
try {
|
||||
const procs = proceduralMem.search(question, 3);
|
||||
for (const p of procs) {
|
||||
sources.push({
|
||||
type: "procedure",
|
||||
id: p.id,
|
||||
text: `${p.name}: ${p.goal} (${p.steps.length} steps, ${p.success_count} successes)`,
|
||||
relevance: p.quality.overall,
|
||||
});
|
||||
}
|
||||
} catch { /* non-blocking */ }
|
||||
|
||||
// Observations
|
||||
try {
|
||||
const obs = await observationMgr.getRelevantObservations(question);
|
||||
for (const o of obs) {
|
||||
const ob = (o as any).observation || o;
|
||||
sources.push({
|
||||
type: "observation",
|
||||
id: ob.id?.toString() || "obs",
|
||||
text: ob.content || ob.summary || ob.title || "",
|
||||
relevance: (o as any).score || 0.7,
|
||||
});
|
||||
}
|
||||
} catch { /* non-blocking */ }
|
||||
|
||||
// ── 2. If no sources found, return early ──
|
||||
if (sources.length === 0) {
|
||||
return {
|
||||
answer: "Je n'ai trouvé aucune information pertinente dans ma mémoire sur ce sujet.",
|
||||
confidence: 0,
|
||||
sources: [],
|
||||
};
|
||||
}
|
||||
|
||||
// Sort by relevance
|
||||
sources.sort((a, b) => b.relevance - a.relevance);
|
||||
const topSources = sources.slice(0, 15);
|
||||
|
||||
// ── 3. Synthesize answer via LLM ──
|
||||
const synthesisPrompt = `You are a memory assistant. Answer the following question based ONLY on the evidence provided.
|
||||
Be specific and cite which sources support your answer.
|
||||
If evidence is contradictory, mention both sides.
|
||||
If evidence is insufficient, say so honestly.
|
||||
|
||||
Question: ${question}
|
||||
|
||||
Evidence (sorted by relevance):
|
||||
${topSources.map((s, i) => `[${i + 1}] (${s.type}, relevance: ${(s.relevance * 100).toFixed(0)}%) ${s.text}`).join("\n")}
|
||||
|
||||
Answer in the user's language (French if the question is in French).
|
||||
Be concise but thorough. Output JSON:
|
||||
{
|
||||
"answer": "Your synthesized answer",
|
||||
"confidence": 0.0-1.0,
|
||||
"reasoning": "Brief explanation of how you reached this answer"
|
||||
}`;
|
||||
|
||||
try {
|
||||
const response = await llm.generateWithMeta!(synthesisPrompt, {
|
||||
maxTokens: 1024,
|
||||
temperature: 0.2,
|
||||
format: "json",
|
||||
timeoutMs: 30000,
|
||||
});
|
||||
|
||||
if (response?.response) {
|
||||
const cleaned = response.response.replace(/```json\n?|\n?```/g, "").trim();
|
||||
const parsed = JSON.parse(cleaned);
|
||||
return {
|
||||
answer: parsed.answer || "Pas de réponse générée.",
|
||||
confidence: parsed.confidence || 0.5,
|
||||
sources: topSources,
|
||||
reasoning: parsed.reasoning,
|
||||
};
|
||||
}
|
||||
} catch {
|
||||
// Fallback: return raw sources without synthesis
|
||||
}
|
||||
|
||||
// Fallback: concatenate top sources
|
||||
return {
|
||||
answer: topSources.slice(0, 5).map(s => `• ${s.text}`).join("\n"),
|
||||
confidence: 0.3,
|
||||
sources: topSources,
|
||||
reasoning: "LLM synthesis failed — returning raw evidence.",
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Quick factual lookup (no LLM, just search).
|
||||
* Faster but less intelligent than full query().
|
||||
*/
|
||||
quickLookup(question: string, limit = 5): Fact[] {
|
||||
return this.deps.db.searchFacts(question, limit);
|
||||
}
|
||||
}
|
||||
62
openclaw-memoria-port/core/embed-fallback.ts
Normal file
62
openclaw-memoria-port/core/embed-fallback.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
/**
|
||||
* Memoria — Embed Fallback Provider
|
||||
*
|
||||
* Wraps multiple EmbedProviders and tries them in order.
|
||||
* If provider 1 fails → try provider 2 → try provider 3.
|
||||
* Analogous to FallbackChain for LLMProviders.
|
||||
*/
|
||||
|
||||
import type { EmbedProvider } from "./providers/types.js";
|
||||
|
||||
export class EmbedFallback implements EmbedProvider {
|
||||
private providers: EmbedProvider[];
|
||||
private _dimensions: number;
|
||||
private _name: string;
|
||||
private logger?: { info?: (...args: unknown[]) => void; warn?: (...args: unknown[]) => void };
|
||||
|
||||
constructor(providers: EmbedProvider[], logger?: { info?: (...args: unknown[]) => void; warn?: (...args: unknown[]) => void }) {
|
||||
if (providers.length === 0) throw new Error("EmbedFallback requires at least one provider");
|
||||
this.providers = providers;
|
||||
this._dimensions = providers[0].dimensions;
|
||||
this._name = `embed-fallback(${providers.map(p => p.name).join("→")})`;
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
get dimensions(): number {
|
||||
return this._dimensions;
|
||||
}
|
||||
|
||||
get name(): string {
|
||||
return this._name;
|
||||
}
|
||||
|
||||
get providerNames(): string[] {
|
||||
return this.providers.map(p => p.name);
|
||||
}
|
||||
|
||||
async embed(text: string): Promise<number[]> {
|
||||
let lastErr: Error | null = null;
|
||||
for (const provider of this.providers) {
|
||||
try {
|
||||
return await provider.embed(text);
|
||||
} catch (err) {
|
||||
lastErr = err instanceof Error ? err : new Error(String(err));
|
||||
this.logger?.warn?.(`memoria: embed fallback — ${provider.name} failed: ${lastErr.message}`);
|
||||
}
|
||||
}
|
||||
throw lastErr || new Error("All embed providers failed");
|
||||
}
|
||||
|
||||
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||
let lastErr: Error | null = null;
|
||||
for (const provider of this.providers) {
|
||||
try {
|
||||
return await provider.embedBatch(texts);
|
||||
} catch (err) {
|
||||
lastErr = err instanceof Error ? err : new Error(String(err));
|
||||
this.logger?.warn?.(`memoria: embedBatch fallback — ${provider.name} failed: ${lastErr.message}`);
|
||||
}
|
||||
}
|
||||
throw lastErr || new Error("All embed providers failed");
|
||||
}
|
||||
}
|
||||
354
openclaw-memoria-port/core/embeddings.ts
Normal file
354
openclaw-memoria-port/core/embeddings.ts
Normal file
@@ -0,0 +1,354 @@
|
||||
/**
|
||||
* Memoria — Layer 4: Embeddings + Hybrid Search
|
||||
*
|
||||
* Stores 768d float vectors in SQLite (BLOB), computes cosine similarity,
|
||||
* and provides hybrid search (FTS5 text match + cosine similarity + temporal scoring).
|
||||
*
|
||||
* Key methods:
|
||||
* embedFact(id, text) — compute and store embedding for one fact
|
||||
* embedBatch() — batch process all unembedded facts
|
||||
* hybridSearch(query) — combined FTS5 + cosine + scoring
|
||||
* cosineSimilarity(a, b) — vector distance (exported utility)
|
||||
*
|
||||
* Les vecteurs sont stockés en Float32Array → BLOB pour perf maximale.
|
||||
*/
|
||||
|
||||
import type { MemoriaDB, Fact } from "./db.js";
|
||||
import type { EmbedProvider } from "./providers/types.js";
|
||||
import type { ScoredFact } from "./scoring.js";
|
||||
import { scoreFact } from "./scoring.js";
|
||||
|
||||
// ─── Vector Utils ───
|
||||
|
||||
/** Float32Array → Buffer (for SQLite BLOB storage) */
|
||||
export function vectorToBlob(vec: number[]): Buffer {
|
||||
return Buffer.from(new Float32Array(vec).buffer);
|
||||
}
|
||||
|
||||
/** Buffer (SQLite BLOB) → number[] */
|
||||
export function blobToVector(blob: Buffer): number[] {
|
||||
const f32 = new Float32Array(blob.buffer, blob.byteOffset, blob.byteLength / 4);
|
||||
return Array.from(f32);
|
||||
}
|
||||
|
||||
/** Cosine similarity between two vectors. Returns 0-1. */
|
||||
export function cosineSimilarity(a: number[], b: number[]): number {
|
||||
if (a.length !== b.length) return 0;
|
||||
let dot = 0, normA = 0, normB = 0;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
dot += a[i] * b[i];
|
||||
normA += a[i] * a[i];
|
||||
normB += b[i] * b[i];
|
||||
}
|
||||
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
||||
return denom === 0 ? 0 : dot / denom;
|
||||
}
|
||||
|
||||
// ─── Types ───
|
||||
|
||||
export interface EmbeddedFact extends Fact {
|
||||
similarity: number;
|
||||
temporalScore: number;
|
||||
hybridScore: number;
|
||||
}
|
||||
|
||||
interface EmbeddingRow {
|
||||
fact_id: string;
|
||||
vector: Buffer;
|
||||
model: string;
|
||||
}
|
||||
|
||||
// ─── Embedding Manager ───
|
||||
|
||||
export class EmbeddingManager {
|
||||
private db: MemoriaDB;
|
||||
private provider: EmbedProvider;
|
||||
private modelName: string;
|
||||
|
||||
constructor(db: MemoriaDB, provider: EmbedProvider) {
|
||||
this.db = db;
|
||||
this.provider = provider;
|
||||
this.modelName = provider.name;
|
||||
}
|
||||
|
||||
/** Get raw DB handle for direct queries */
|
||||
private get rawDb() {
|
||||
return this.db.raw;
|
||||
}
|
||||
|
||||
// ─── Store ───
|
||||
|
||||
/** Embed a single fact and store the vector */
|
||||
async embedFact(factId: string, text: string): Promise<void> {
|
||||
const vector = await this.provider.embed(text);
|
||||
const blob = vectorToBlob(vector);
|
||||
this.rawDb.prepare(
|
||||
"INSERT OR REPLACE INTO embeddings (fact_id, vector, model, created_at) VALUES (?, ?, ?, ?)"
|
||||
).run(factId, blob, this.modelName, Date.now());
|
||||
}
|
||||
|
||||
/** Embed multiple facts in batch */
|
||||
async embedBatch(facts: Array<{ id: string; text: string }>): Promise<number> {
|
||||
if (facts.length === 0) return 0;
|
||||
|
||||
// Batch embed (max 32 at a time to avoid timeout)
|
||||
const BATCH_SIZE = 32;
|
||||
let embedded = 0;
|
||||
|
||||
for (let i = 0; i < facts.length; i += BATCH_SIZE) {
|
||||
const batch = facts.slice(i, i + BATCH_SIZE);
|
||||
const texts = batch.map(f => f.text);
|
||||
|
||||
try {
|
||||
const vectors = await this.provider.embedBatch(texts);
|
||||
const stmt = this.rawDb.prepare(
|
||||
"INSERT OR REPLACE INTO embeddings (fact_id, vector, model, created_at) VALUES (?, ?, ?, ?)"
|
||||
);
|
||||
const now = Date.now();
|
||||
const tx = this.rawDb.transaction(() => {
|
||||
for (let j = 0; j < batch.length; j++) {
|
||||
stmt.run(batch[j].id, vectorToBlob(vectors[j]), this.modelName, now);
|
||||
}
|
||||
});
|
||||
tx();
|
||||
embedded += batch.length;
|
||||
} catch (err) {
|
||||
// Try one by one on batch failure
|
||||
for (const fact of batch) {
|
||||
try {
|
||||
await this.embedFact(fact.id, fact.text);
|
||||
embedded++;
|
||||
} catch (e) { console.debug('memoria:embeddings: ' + String(e)); }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return embedded;
|
||||
}
|
||||
|
||||
// ─── Search ───
|
||||
|
||||
/** Semantic search: embed query → cosine similarity with all stored vectors */
|
||||
async semanticSearch(query: string, limit = 10, minSimilarity = 0.3): Promise<EmbeddedFact[]> {
|
||||
const queryVector = await this.provider.embed(query);
|
||||
|
||||
// Get all embeddings (we compute cosine in JS — fast enough for <10K facts)
|
||||
const rows = this.rawDb.prepare(
|
||||
"SELECT e.fact_id, e.vector FROM embeddings e JOIN facts f ON e.fact_id = f.id WHERE f.superseded = 0"
|
||||
).all() as EmbeddingRow[];
|
||||
|
||||
if (rows.length === 0) return [];
|
||||
|
||||
// Compute similarities
|
||||
const scored: Array<{ factId: string; similarity: number }> = [];
|
||||
for (const row of rows) {
|
||||
const vec = blobToVector(row.vector);
|
||||
const sim = cosineSimilarity(queryVector, vec);
|
||||
if (sim >= minSimilarity) {
|
||||
scored.push({ factId: row.fact_id, similarity: sim });
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by similarity, take top N
|
||||
scored.sort((a, b) => b.similarity - a.similarity);
|
||||
const topIds = scored.slice(0, limit * 2); // Get extras for post-filtering
|
||||
|
||||
// Fetch full facts
|
||||
const results: EmbeddedFact[] = [];
|
||||
for (const { factId, similarity } of topIds) {
|
||||
const fact = this.db.getFact(factId);
|
||||
if (!fact || fact.superseded) continue;
|
||||
|
||||
const sf = scoreFact(fact);
|
||||
results.push({
|
||||
...fact,
|
||||
similarity,
|
||||
temporalScore: sf.temporalScore,
|
||||
hybridScore: 0, // computed in hybridSearch
|
||||
});
|
||||
}
|
||||
|
||||
return results.slice(0, limit);
|
||||
}
|
||||
|
||||
// ─── Query Expansion ───
|
||||
|
||||
/**
|
||||
* Expand a query into multiple variants for better recall.
|
||||
* Uses the embedding model to find semantically similar terms.
|
||||
* No LLM needed — pure heuristic expansion with synonym/concept maps.
|
||||
*/
|
||||
expandQuery(query: string): string[] {
|
||||
const variants = [query];
|
||||
const lower = query.toLowerCase().trim();
|
||||
|
||||
// Concept expansions: STRICT synonym pairs only (avoid noise from loose associations)
|
||||
const conceptMap: Record<string, string[]> = {
|
||||
// Money/salary — bidirectional synonyms
|
||||
"taux horaire": ["€/h", "salaire"],
|
||||
"salaire": ["taux horaire", "€/h"],
|
||||
"rémunération": ["salaire", "€/h"],
|
||||
"ca": ["chiffre d'affaires"],
|
||||
"chiffre d'affaires": ["CA"],
|
||||
// Tech — FR↔EN translations only
|
||||
"deploy": ["déploiement"],
|
||||
"déploiement": ["deploy"],
|
||||
"modèle": ["model"],
|
||||
"modèles": ["models"],
|
||||
// Config
|
||||
"config": ["configuration"],
|
||||
"configuration": ["config"],
|
||||
};
|
||||
|
||||
// Check each concept key against the query
|
||||
for (const [key, synonyms] of Object.entries(conceptMap)) {
|
||||
if (lower.includes(key)) {
|
||||
// Add 1-2 best synonym variants
|
||||
for (const syn of synonyms.slice(0, 2)) {
|
||||
const variant = query.replace(new RegExp(key, "gi"), syn);
|
||||
if (variant !== query && !variants.includes(variant)) {
|
||||
variants.push(variant);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Entity extraction: if query contains a proper noun, add it standalone
|
||||
const properNouns = query.match(/\b[A-Z][a-z]+(?:\s[A-Z][a-z]+)*/g);
|
||||
if (properNouns) {
|
||||
for (const noun of properNouns) {
|
||||
if (noun.length > 2 && !variants.includes(noun)) {
|
||||
variants.push(noun);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return variants.slice(0, 4); // Max 4 variants
|
||||
}
|
||||
|
||||
/** Hybrid search: FTS5 + cosine, merge and rank — with query expansion */
|
||||
async hybridSearch(query: string, limit = 10, options?: {
|
||||
ftsWeight?: number; // Weight for FTS5 results (default 0.4)
|
||||
cosineWeight?: number; // Weight for cosine results (default 0.4)
|
||||
temporalWeight?: number; // Weight for temporal score (default 0.2)
|
||||
minSimilarity?: number;
|
||||
expandQueries?: boolean; // Enable query expansion (default true)
|
||||
}): Promise<EmbeddedFact[]> {
|
||||
// Adaptive weights: short/generic queries → favor semantic over FTS
|
||||
// because FTS on a 1-word query like "Bureau" matches too many facts
|
||||
const queryWords = query.trim().split(/\s+/).filter(w => w.length > 2);
|
||||
const isShortQuery = queryWords.length <= 2;
|
||||
const ftsW = options?.ftsWeight ?? (isShortQuery ? 0.20 : 0.40);
|
||||
const cosW = options?.cosineWeight ?? (isShortQuery ? 0.55 : 0.40);
|
||||
const tempW = options?.temporalWeight ?? (isShortQuery ? 0.25 : 0.20);
|
||||
const minSim = options?.minSimilarity ?? 0.25;
|
||||
|
||||
// Query expansion: generate variants for better recall
|
||||
const doExpand = options?.expandQueries !== false;
|
||||
const queries = doExpand ? this.expandQuery(query) : [query];
|
||||
|
||||
// 1. FTS5 search — across all query variants
|
||||
const allFtsResults: Fact[] = [];
|
||||
const seenFtsIds = new Set<string>();
|
||||
for (const q of queries) {
|
||||
const results = this.db.searchFacts(q, limit * 2);
|
||||
for (const f of results) {
|
||||
if (!seenFtsIds.has(f.id)) {
|
||||
seenFtsIds.add(f.id);
|
||||
allFtsResults.push(f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Semantic search — across all query variants
|
||||
let allCosResults: EmbeddedFact[] = [];
|
||||
const seenCosIds = new Set<string>();
|
||||
for (const q of queries) {
|
||||
try {
|
||||
const results = await this.semanticSearch(q, limit * 2, minSim);
|
||||
for (const f of results) {
|
||||
if (!seenCosIds.has(f.id)) {
|
||||
seenCosIds.add(f.id);
|
||||
allCosResults.push(f);
|
||||
} else {
|
||||
// Keep highest similarity
|
||||
const existing = allCosResults.find(r => r.id === f.id);
|
||||
if (existing && f.similarity > existing.similarity) {
|
||||
existing.similarity = f.similarity;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.debug('memoria:embeddings: ' + String(e));
|
||||
// Embedding not available → FTS only
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Merge by fact ID
|
||||
const merged = new Map<string, EmbeddedFact>();
|
||||
|
||||
// Add FTS results with rank-based score
|
||||
for (let i = 0; i < allFtsResults.length; i++) {
|
||||
const f = allFtsResults[i];
|
||||
const ftsScore = 1 - i / Math.max(allFtsResults.length, 1); // 1.0 for best, decreasing
|
||||
const sf = scoreFact(f);
|
||||
merged.set(f.id, {
|
||||
...f,
|
||||
similarity: 0,
|
||||
temporalScore: sf.temporalScore,
|
||||
hybridScore: ftsScore * ftsW + sf.temporalScore * tempW,
|
||||
});
|
||||
}
|
||||
|
||||
// Merge cosine results
|
||||
for (const cr of allCosResults) {
|
||||
const existing = merged.get(cr.id);
|
||||
if (existing) {
|
||||
// Boost: fact found by BOTH methods
|
||||
existing.similarity = cr.similarity;
|
||||
existing.hybridScore += cr.similarity * cosW;
|
||||
} else {
|
||||
const sf = scoreFact(cr);
|
||||
merged.set(cr.id, {
|
||||
...cr,
|
||||
temporalScore: sf.temporalScore,
|
||||
hybridScore: cr.similarity * cosW + sf.temporalScore * tempW,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Sort by hybrid score, return top N
|
||||
const results = Array.from(merged.values());
|
||||
results.sort((a, b) => b.hybridScore - a.hybridScore);
|
||||
return results.slice(0, limit);
|
||||
}
|
||||
|
||||
// ─── Stats ───
|
||||
|
||||
/** Count of embedded facts */
|
||||
/**
|
||||
* Called when a fact is superseded — remove its embedding to prevent
|
||||
* stale results in semantic search.
|
||||
*/
|
||||
onFactSuperseded(factId: string): boolean {
|
||||
try {
|
||||
const result = this.db.raw.prepare("DELETE FROM embeddings WHERE fact_id = ?").run(factId);
|
||||
return (result.changes ?? 0) > 0;
|
||||
} catch (e) { console.debug('memoria:embeddings: ' + String(e)); return false; }
|
||||
}
|
||||
|
||||
embeddedCount(): number {
|
||||
return (this.rawDb.prepare("SELECT COUNT(*) as c FROM embeddings").get() as { c: number }).c;
|
||||
}
|
||||
|
||||
/** Facts without embeddings */
|
||||
unembeddedFacts(limit = 100): Array<{ id: string; fact: string }> {
|
||||
return this.rawDb.prepare(`
|
||||
SELECT f.id, f.fact FROM facts f
|
||||
LEFT JOIN embeddings e ON f.id = e.fact_id
|
||||
WHERE e.fact_id IS NULL AND f.superseded = 0
|
||||
ORDER BY f.created_at DESC
|
||||
LIMIT ?
|
||||
`).all(limit) as Array<{ id: string; fact: string }>;
|
||||
}
|
||||
}
|
||||
75
openclaw-memoria-port/core/example.ts
Normal file
75
openclaw-memoria-port/core/example.ts
Normal file
@@ -0,0 +1,75 @@
|
||||
/**
|
||||
* Example: Using @primo-studio/memoria-core standalone
|
||||
*
|
||||
* This demonstrates using Memoria without OpenClaw.
|
||||
* Run with: node --loader ts-node/esm example.ts
|
||||
*/
|
||||
|
||||
import { Memoria } from './index.js';
|
||||
|
||||
async function main() {
|
||||
console.log('🧠 Initializing Memoria core...\n');
|
||||
|
||||
// Initialize with Ollama (local, free)
|
||||
const memoria = await Memoria.init({
|
||||
dbPath: './example-memoria.db',
|
||||
provider: 'ollama',
|
||||
model: 'qwen3.5:4b',
|
||||
embeddingModel: 'nomic-embed-text-v2-moe',
|
||||
recallLimit: 5,
|
||||
debug: true
|
||||
});
|
||||
|
||||
console.log('\n✅ Memoria initialized!\n');
|
||||
|
||||
// Store some facts
|
||||
console.log('📝 Storing facts...\n');
|
||||
|
||||
await memoria.store('User prefers dark mode in all applications', 'preference', 0.95);
|
||||
await memoria.store('User is located in New York, USA', 'savoir', 0.9);
|
||||
await memoria.store('User favorite programming language is TypeScript', 'preference', 0.85);
|
||||
await memoria.store('Project deadline is April 15, 2026', 'chronologie', 0.9);
|
||||
await memoria.store('User dislikes verbose error messages', 'preference', 0.8);
|
||||
|
||||
console.log('✅ 5 facts stored!\n');
|
||||
|
||||
// Recall facts
|
||||
console.log('🔍 Recalling: "What are the user preferences?"\n');
|
||||
const results = await memoria.recall('What are the user preferences?', { limit: 3 });
|
||||
|
||||
console.log(`Found ${results.totalFound} facts:\n`);
|
||||
for (const fact of results.facts) {
|
||||
console.log(` • [${fact.category}] ${fact.fact}`);
|
||||
console.log(` Confidence: ${fact.confidence}, Score: ${fact.score.toFixed(2)}\n`);
|
||||
}
|
||||
|
||||
// Natural language query
|
||||
console.log('💬 Query: "Tell me about the user"\n');
|
||||
const answer = await memoria.query('Tell me about the user');
|
||||
console.log(answer);
|
||||
console.log('');
|
||||
|
||||
// Stats
|
||||
console.log('📊 Memory statistics:\n');
|
||||
const stats = await memoria.stats();
|
||||
console.log(` Total facts: ${stats.totalFacts}`);
|
||||
console.log(` Total embeddings: ${stats.totalEmbeddings}`);
|
||||
console.log(` Total relations: ${stats.totalRelations}`);
|
||||
console.log(` Total topics: ${stats.totalTopics}`);
|
||||
console.log(` Total patterns: ${stats.totalPatterns}`);
|
||||
console.log(` Total observations: ${stats.totalObservations}\n`);
|
||||
|
||||
if (Object.keys(stats.categoryCounts).length > 0) {
|
||||
console.log(' By category:');
|
||||
for (const [cat, count] of Object.entries(stats.categoryCounts)) {
|
||||
console.log(` ${cat}: ${count}`);
|
||||
}
|
||||
console.log('');
|
||||
}
|
||||
|
||||
// Close
|
||||
memoria.close();
|
||||
console.log('✅ Memoria closed. Database saved.\n');
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
148
openclaw-memoria-port/core/expertise.ts
Normal file
148
openclaw-memoria-port/core/expertise.ts
Normal file
@@ -0,0 +1,148 @@
|
||||
/**
|
||||
* Expertise Manager — Specialization via topic interaction
|
||||
*
|
||||
* Human memory develops expertise in frequently-used domains.
|
||||
*
|
||||
* Levels:
|
||||
* - novice: interaction_count < 5
|
||||
* - familiar: 5 <= count < 15
|
||||
* - experienced: 15 <= count < 30
|
||||
* - expert: count >= 30
|
||||
*
|
||||
* Usage:
|
||||
* - Boost recall for facts in expert topics
|
||||
* - Prioritize facts from expertise domains
|
||||
*/
|
||||
|
||||
import type { MemoriaDB } from "./db.js";
|
||||
|
||||
export type ExpertiseLevel = "novice" | "familiar" | "experienced" | "expert";
|
||||
|
||||
export const EXPERTISE_CONFIG = {
|
||||
thresholds: {
|
||||
novice: 0,
|
||||
familiar: 5,
|
||||
experienced: 15,
|
||||
expert: 30,
|
||||
},
|
||||
recallBoost: {
|
||||
novice: 1.0,
|
||||
familiar: 1.1,
|
||||
experienced: 1.3,
|
||||
expert: 1.5,
|
||||
},
|
||||
};
|
||||
|
||||
export interface TopicExpertise {
|
||||
topic: string;
|
||||
interactionCount: number;
|
||||
level: ExpertiseLevel;
|
||||
boost: number;
|
||||
}
|
||||
|
||||
export class ExpertiseManager {
|
||||
private db: MemoriaDB;
|
||||
|
||||
constructor(db: MemoriaDB) {
|
||||
this.db = db;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate expertise level from interaction count
|
||||
*/
|
||||
calculateLevel(interactionCount: number): ExpertiseLevel {
|
||||
if (interactionCount >= EXPERTISE_CONFIG.thresholds.expert) return "expert";
|
||||
if (interactionCount >= EXPERTISE_CONFIG.thresholds.experienced) return "experienced";
|
||||
if (interactionCount >= EXPERTISE_CONFIG.thresholds.familiar) return "familiar";
|
||||
return "novice";
|
||||
}
|
||||
|
||||
/**
|
||||
* Get recall boost multiplier for a topic
|
||||
*/
|
||||
getBoost(level: ExpertiseLevel): number {
|
||||
return EXPERTISE_CONFIG.recallBoost[level];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all topics with expertise levels
|
||||
*/
|
||||
getAllExpertise(): TopicExpertise[] {
|
||||
try {
|
||||
const topics = this.db.raw.prepare(
|
||||
"SELECT name as topic, access_count as interaction_count FROM topics ORDER BY access_count DESC"
|
||||
).all() as Array<{ topic: string; interaction_count: number }>;
|
||||
|
||||
return topics.map(t => ({
|
||||
topic: t.topic,
|
||||
interactionCount: t.interaction_count,
|
||||
level: this.calculateLevel(t.interaction_count),
|
||||
boost: this.getBoost(this.calculateLevel(t.interaction_count)),
|
||||
}));
|
||||
} catch (err) {
|
||||
console.error("[expertise] getAllExpertise failed:", err);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get expertise for specific topics
|
||||
*/
|
||||
getTopicExpertise(topics: string[]): TopicExpertise[] {
|
||||
const result: TopicExpertise[] = [];
|
||||
|
||||
for (const topic of topics) {
|
||||
const row = this.db.raw.prepare(
|
||||
"SELECT access_count as interaction_count FROM topics WHERE name = ?"
|
||||
).get(topic) as { interaction_count: number } | undefined;
|
||||
|
||||
const count = row?.interaction_count ?? 0;
|
||||
const level = this.calculateLevel(count);
|
||||
|
||||
result.push({
|
||||
topic,
|
||||
interactionCount: count,
|
||||
level,
|
||||
boost: this.getBoost(level),
|
||||
});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get stats: count by expertise level
|
||||
*/
|
||||
getStats(): Record<ExpertiseLevel, number> {
|
||||
try {
|
||||
const all = this.getAllExpertise();
|
||||
const stats: Record<ExpertiseLevel, number> = {
|
||||
novice: 0,
|
||||
familiar: 0,
|
||||
experienced: 0,
|
||||
expert: 0,
|
||||
};
|
||||
|
||||
for (const exp of all) {
|
||||
stats[exp.level]++;
|
||||
}
|
||||
|
||||
return stats;
|
||||
} catch (err) {
|
||||
console.error("[expertise] getStats failed:", err);
|
||||
return { novice: 0, familiar: 0, experienced: 0, expert: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Boost fact score based on topic expertise
|
||||
*/
|
||||
applyExpertiseBoost(score: number, factTopics: string[]): number {
|
||||
if (factTopics.length === 0) return score;
|
||||
|
||||
const expertise = this.getTopicExpertise(factTopics);
|
||||
const maxBoost = Math.max(...expertise.map(e => e.boost));
|
||||
|
||||
return score * maxBoost;
|
||||
}
|
||||
}
|
||||
125
openclaw-memoria-port/core/extraction.ts
Normal file
125
openclaw-memoria-port/core/extraction.ts
Normal file
@@ -0,0 +1,125 @@
|
||||
/**
|
||||
* 🧠 Memoria — Fact extraction logic
|
||||
*
|
||||
* This module exports:
|
||||
* - LLM_EXTRACT_PROMPT — the master prompt for LLM-based fact extraction
|
||||
* - parseJSON() — safe JSON parser for LLM outputs (handles fences, trailing commas)
|
||||
* - normalizeCategory() — map free-form category → canonical category
|
||||
*/
|
||||
|
||||
// ─── Extraction Prompt ───
|
||||
|
||||
export const LLM_EXTRACT_PROMPT = `Tu es un extracteur de faits pour un système de mémoire AI.
|
||||
Analyse le texte et extrais les faits qui méritent d'être retenus.
|
||||
|
||||
DEUX TYPES de faits:
|
||||
- "semantic" = vérité durable, processus appris, configuration, règle découverte
|
||||
- "episodic" = événement daté, état temporaire, action en cours, résultat observé
|
||||
|
||||
RÈGLE D'OR: TOUJOURS INCLURE LES DÉTAILS CONCRETS
|
||||
Imagine que tu notes pour une secrétaire qui doit pouvoir tout retrouver plus tard.
|
||||
❌ "Neto a eu une réunion importante" → MANQUE: avec qui? quand? sur quoi?
|
||||
✅ "Neto a eu une réunion avec le client CCOG le 28/03 à 14h sur la refonte du site"
|
||||
❌ "Sol a été redémarré" → MANQUE: pourquoi? quel était le problème?
|
||||
✅ "Sol a été redémarré le 28/03 à 18h25 car better-sqlite3 était compilé pour la mauvaise version de Node (137 vs 141). Fix: npm rebuild"
|
||||
❌ "Une réflexion excellente a été faite" → MANQUE: quelle réflexion? quel contenu?
|
||||
✅ "Neto propose que la mémoire fonctionne comme un cerveau humain: ne rien supprimer, prioriser par usage, les détails éphémères (heure d'un vol) s'effacent mais l'expérience (le vol était long) reste"
|
||||
|
||||
EXTRAIRE — tout ce qui a du contenu:
|
||||
✅ Processus appris avec les étapes ("pour migrer SQLite WAL: VACUUM INTO au lieu de cp")
|
||||
✅ Ce qui a marché ET pourquoi ("le fallback chain résout les crashes car Ollama tombe parfois")
|
||||
✅ Leçons d'erreurs avec la cause ("api.config ≠ api.pluginConfig → configs ignorées")
|
||||
✅ Décisions avec la raison ("on utilise qwen3.5:4b car meilleure qualité JSON, avec think:false")
|
||||
✅ Configs exactes ("Memoria: recallLimit=8, extract LLM qwen3.5:4b, fallback gemma3:4b")
|
||||
✅ Résultats avec chiffres ("Benchmark: retrieval 92% (11/12), RAG 25%, bottleneck = modèle local")
|
||||
✅ Préférences avec contexte ("Neto veut du step-by-step, une feature à la fois avec validation")
|
||||
✅ États temporaires AVEC CONTEXTE ("Sol est en train de refaire HydroTrack — blocker: API endpoint changé")
|
||||
✅ Événements avec date ET détail ("28/03 — Memoria v3.13.0 live: lifecycle fresh/settled/dormant, 385f/90s/0d")
|
||||
✅ Ce que quelqu'un fait en ce moment ET pourquoi ("Sol travaille sur la refonte HydroTrack depuis le 26/03, priorité car le client attend la démo")
|
||||
✅ Outils internes et leur état ("Memoria v3.13.0: lifecycle humain, curseur détail 1-10, 475 facts, publié ClawHub + GitHub")
|
||||
✅ Produits/MVPs et leur avancement ("Bureau module CA v1.2.0 en prod, matching auto Qonto↔projets fonctionnel")
|
||||
|
||||
GÉNÉRALISER — quand un pattern se répète:
|
||||
🔄 Même problème 2+ fois → stocker la RÈGLE + les cas concrets
|
||||
"Les commandes brew/nvm (npm, ollama, node) ne sont pas dans le PATH en SSH non-interactif — fix: source ~/.zprofile ou chemin complet /opt/homebrew/bin/"
|
||||
|
||||
🔥 ERREURS ET DANGERS — PRIORITÉ MAXIMALE (comme toucher du feu):
|
||||
Quand quelque chose a causé un PROBLÈME RÉEL (crash, perte de données, service mort, bug en prod, Neto qui doit intervenir physiquement):
|
||||
→ Catégorie "erreur", confidence 0.95+
|
||||
→ Inclure: CE QUI S'EST PASSÉ + POURQUOI c'est dangereux + CE QU'IL NE FAUT JAMAIS REFAIRE + L'ALTERNATIVE SÛRE
|
||||
→ C'est comme un panneau "DANGER" : on le note dès la PREMIÈRE FOIS, pas après la 2ème brûlure
|
||||
Exemples de VRAIS dangers à capter:
|
||||
✅ "NE JAMAIS utiliser openclaw gateway stop via exec — tue le daemon sans le relancer, gateway reste mort. Utiliser gateway restart (SIGUSR1)." (catégorie erreur)
|
||||
✅ "NE JAMAIS faire cp sur une DB SQLite en mode WAL — données perdues. Utiliser VACUUM INTO." (catégorie erreur)
|
||||
✅ "NE JAMAIS push sur main sans test — régression garantie. Toujours une branche séparée." (catégorie erreur)
|
||||
Signaux qu'un fait est un DANGER:
|
||||
- Quelqu'un dit "ne fais plus ça", "c'est la 2ème fois", "putain", "j'ai dû aller faire X manuellement"
|
||||
- Un service/outil est mort/cassé après une action
|
||||
- Un rollback ou fix manuel a été nécessaire
|
||||
- Le mot "jamais", "interdit", "critique", "ne pas" dans la conversation
|
||||
|
||||
NE PAS STOCKER:
|
||||
❌ Confirmations vides ("ok", "merci", "compris")
|
||||
❌ Narration pure sans résultat ("je lis le fichier", "je regarde le code")
|
||||
❌ MÉTA-FAITS sur le stockage lui-même ("le nouveau fait complète l'ancien", "ce fait a été ajouté")
|
||||
❌ Faits sans AUCUN élément concret ("des informations ont été fournies", "la configuration a été mise à jour")
|
||||
|
||||
QUALITÉ — chaque fait DOIT:
|
||||
⚠️ Contenir au moins UN élément concret: nom propre, chiffre, commande, version, ou date
|
||||
⚠️ Être AUTONOME = compréhensible seul, sans contexte
|
||||
⚠️ Inclure le POURQUOI ou le CONTEXTE quand c'est pertinent (pas juste QUOI)
|
||||
⚠️ Ne JAMAIS commencer par "Le nouveau fait..." ou "Ce fait..." → commencer par le SUJET réel
|
||||
|
||||
Règles:
|
||||
- Phrase(s) complète(s) et autonome(s)
|
||||
- Pour les PROCÉDURES: garder les étapes ensemble en UN fait (2-4 phrases OK)
|
||||
- UN FAIT PAR ENTITÉ — si le texte parle de 3 sujets distincts, 3 faits séparés
|
||||
- Catégories: savoir, erreur, preference, outil, chronologie, rh, client
|
||||
- type: "semantic" ou "episodic"
|
||||
- confidence: 0.7 minimum
|
||||
- Maximum {MAX_FACTS} faits
|
||||
- Si rien de concret → {"facts": []}
|
||||
|
||||
Texte:
|
||||
"{TEXT}"
|
||||
|
||||
JSON valide uniquement:
|
||||
{"facts": [{"fact": "phrase", "category": "...", "type": "semantic|episodic", "confidence": 0.X}]}`;
|
||||
|
||||
// ─── JSON Parse Helper ───
|
||||
|
||||
/** Safely parse JSON from LLM output. Handles markdown code fences, trailing commas, and partial JSON. */
|
||||
export function parseJSON(text: string): unknown {
|
||||
// Strip markdown code blocks (```json ... ``` or ``` ... ```)
|
||||
let cleaned = text.trim();
|
||||
if (cleaned.startsWith("```")) {
|
||||
const lines = cleaned.split("\n");
|
||||
lines.shift(); // remove opening ```json or ```
|
||||
if (lines[lines.length - 1]?.trim() === "```") lines.pop();
|
||||
cleaned = lines.join("\n").trim();
|
||||
}
|
||||
// Try to extract JSON object/array via regex
|
||||
const match = cleaned.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
|
||||
if (match) cleaned = match[1];
|
||||
return JSON.parse(cleaned);
|
||||
}
|
||||
|
||||
// ─── Category Normalization ───
|
||||
|
||||
const VALID_CATEGORIES = new Set(["savoir", "erreur", "preference", "outil", "chronologie", "rh", "client"]);
|
||||
|
||||
/**
|
||||
* Normalize free-form LLM category output → one of 7 canonical categories.
|
||||
* Mapping: architecture/mécanisme → savoir, sévérité/bug → erreur, financier → client, etc.
|
||||
* Unknown categories default to "savoir".
|
||||
*/
|
||||
export function normalizeCategory(raw: string): string {
|
||||
const lower = (raw || "savoir").toLowerCase().trim();
|
||||
if (VALID_CATEGORIES.has(lower)) return lower;
|
||||
// Common LLM variants → map to valid
|
||||
if (lower === "préférence" || lower === "préférences") return "preference";
|
||||
if (lower === "architecture" || lower === "mécanisme" || lower === "stock" || lower === "état") return "savoir";
|
||||
if (lower === "financier") return "client";
|
||||
if (lower === "sévérité" || lower === "bug") return "erreur";
|
||||
return "savoir"; // fallback: anything unknown → savoir
|
||||
}
|
||||
376
openclaw-memoria-port/core/fact-clusters.ts
Normal file
376
openclaw-memoria-port/core/fact-clusters.ts
Normal file
@@ -0,0 +1,376 @@
|
||||
/**
|
||||
* Memoria — Fact Clusters (v3.4.0)
|
||||
*
|
||||
* Generates thematic summaries from groups of related atomic facts.
|
||||
* Solves the "multi-session" problem: when facts about the same entity
|
||||
* are scattered across sessions, a cluster aggregates them into one
|
||||
* searchable summary.
|
||||
*
|
||||
* Like a "dossier" in an office: when you look up a client, you get
|
||||
* the complete file, not individual scattered notes.
|
||||
*
|
||||
* Clusters are:
|
||||
* - Generated from atomic facts sharing the same entity/topic
|
||||
* - Stored as regular facts (fact_type = "cluster") for FTS/embedding search
|
||||
* - Auto-invalidated when a member fact is superseded
|
||||
* - Regenerated periodically to stay fresh
|
||||
*/
|
||||
|
||||
import type { MemoriaDB, Fact } from "./db.js";
|
||||
import type { LLMProvider } from "./providers/types.js";
|
||||
|
||||
// ─── Types ───
|
||||
|
||||
export interface ClusterMeta {
|
||||
memberIds: string[]; // IDs of atomic facts in this cluster
|
||||
entityName: string; // Primary entity ("Sol", "Bureau", "RH Primo Studio")
|
||||
generatedAt: number; // Timestamp of generation
|
||||
stale: boolean; // True if a member was superseded since generation
|
||||
}
|
||||
|
||||
export interface ClusterResult {
|
||||
created: number;
|
||||
updated: number;
|
||||
stale: number;
|
||||
}
|
||||
|
||||
// ─── Config ───
|
||||
|
||||
const MIN_FACTS_FOR_CLUSTER = 3; // Need at least 3 facts to justify a cluster
|
||||
const MAX_CLUSTER_FACTS = 12; // Don't cluster more than 12 facts (context limit)
|
||||
const CLUSTER_REGEN_HOURS = 24; // Regenerate stale clusters after this delay
|
||||
const MAX_CLUSTERS_PER_RUN = 5; // Limit cluster generation per postProcess call
|
||||
|
||||
// ─── Prompt ───
|
||||
|
||||
const CLUSTER_PROMPT = `Tu résumes un groupe de faits liés à la même entité en UN SEUL paragraphe dense.
|
||||
|
||||
Règles:
|
||||
- Le résumé doit contenir TOUTES les informations clés des faits (noms, chiffres, dates, versions, états)
|
||||
- Commence par l'entité principale en gras contexte
|
||||
- Si des infos se contredisent, garde la plus récente
|
||||
- Si un fait dit qu'une personne est partie ou qu'un outil est remplacé, reflète cet état actuel
|
||||
- 2-4 phrases maximum, dense et factuel
|
||||
- En français
|
||||
|
||||
Entité: {ENTITY}
|
||||
|
||||
Faits:
|
||||
{FACTS}
|
||||
|
||||
Résumé dense (texte brut, pas de JSON):`;
|
||||
|
||||
// ─── Manager ───
|
||||
|
||||
export class FactClusterManager {
|
||||
private db: MemoriaDB;
|
||||
private llm: LLMProvider;
|
||||
|
||||
constructor(db: MemoriaDB, llm: LLMProvider) {
|
||||
this.db = db;
|
||||
this.llm = llm;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main entry point: generate/refresh clusters for entities with enough facts.
|
||||
* Called from postProcessNewFacts.
|
||||
*/
|
||||
async generateClusters(): Promise<ClusterResult> {
|
||||
const result: ClusterResult = { created: 0, updated: 0, stale: 0 };
|
||||
|
||||
try {
|
||||
// 1. Find entities with enough active facts
|
||||
const entityGroups = this.groupFactsByEntity();
|
||||
|
||||
// 2. Check existing clusters for staleness
|
||||
result.stale = this.markStaleClusters();
|
||||
|
||||
// 3. Generate/regenerate clusters for top entities
|
||||
let generated = 0;
|
||||
for (const [entityName, facts] of entityGroups) {
|
||||
if (generated >= MAX_CLUSTERS_PER_RUN) break;
|
||||
if (facts.length < MIN_FACTS_FOR_CLUSTER) continue;
|
||||
|
||||
const existing = this.findCluster(entityName);
|
||||
|
||||
// Skip if cluster exists and is fresh
|
||||
if (existing && !this.isStale(existing)) continue;
|
||||
|
||||
// Generate cluster
|
||||
const clusterText = await this.generateClusterText(entityName, facts);
|
||||
if (!clusterText) continue;
|
||||
|
||||
if (existing) {
|
||||
// Update existing cluster
|
||||
this.updateCluster(existing.id, clusterText, facts);
|
||||
result.updated++;
|
||||
} else {
|
||||
// Create new cluster
|
||||
this.createCluster(entityName, clusterText, facts);
|
||||
result.created++;
|
||||
}
|
||||
generated++;
|
||||
}
|
||||
} catch (e) {
|
||||
console.debug('memoria:clusters: ' + String(e));
|
||||
// Non-critical: clusters are a quality enhancement, not required
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Group active (non-superseded, non-cluster) facts by their primary entity.
|
||||
* Uses entity_ids from the knowledge graph when available,
|
||||
* falls back to keyword extraction.
|
||||
*/
|
||||
private groupFactsByEntity(): Map<string, Fact[]> {
|
||||
const groups = new Map<string, Fact[]>();
|
||||
|
||||
// Get all active non-cluster facts
|
||||
const facts = this.db.raw.prepare(
|
||||
"SELECT * FROM facts WHERE superseded = 0 AND (fact_type != 'cluster' OR fact_type IS NULL) ORDER BY created_at DESC"
|
||||
).all() as Fact[];
|
||||
|
||||
for (const fact of facts) {
|
||||
// Try entity_ids first (from knowledge graph)
|
||||
let entities: string[] = [];
|
||||
try {
|
||||
const ids = JSON.parse(fact.entity_ids || "[]") as string[];
|
||||
if (ids.length > 0) {
|
||||
// Look up entity names
|
||||
for (const id of ids) {
|
||||
const ent = this.db.raw.prepare("SELECT name FROM entities WHERE id = ?").get(id) as { name: string } | undefined;
|
||||
if (ent) entities.push(ent.name);
|
||||
}
|
||||
}
|
||||
} catch (e) { console.debug('memoria:clusters: ' + String(e)); }
|
||||
|
||||
// Fallback: extract proper nouns as entity proxies
|
||||
if (entities.length === 0) {
|
||||
entities = this.extractProperNouns(fact.fact);
|
||||
}
|
||||
|
||||
// Add fact to each entity group
|
||||
for (const entity of entities) {
|
||||
const key = entity.toLowerCase().trim();
|
||||
if (key.length < 2) continue;
|
||||
if (!groups.has(key)) groups.set(key, []);
|
||||
groups.get(key)!.push(fact);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by group size (largest first) and filter minimum
|
||||
const sorted = new Map(
|
||||
Array.from(groups.entries())
|
||||
.filter(([, facts]) => facts.length >= MIN_FACTS_FOR_CLUSTER)
|
||||
.sort((a, b) => b[1].length - a[1].length)
|
||||
);
|
||||
|
||||
return sorted;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract proper nouns from text (capitalized words that aren't sentence-starters).
|
||||
*/
|
||||
private extractProperNouns(text: string): string[] {
|
||||
const nouns = new Set<string>();
|
||||
|
||||
// Match capitalized words (2+ chars) that appear after other text
|
||||
const matches = text.match(/(?<=\s)[A-Z][a-zéèêëàâäôöùûüïîç]+(?:\s+[A-Z][a-zéèêëàâäôöùûüïîç]+)*/g) || [];
|
||||
for (const m of matches) {
|
||||
if (m.length > 2) nouns.add(m);
|
||||
}
|
||||
|
||||
// Also match common entity patterns
|
||||
const techTerms = text.match(/\b(?:Memoria|Bureau|Convex|Primask|DockGroups|Sol|Luna|Koda|Neto|Ollama|Vercel|Cloudflare|Qonto|Alexandre|Pierre|HydroTrack|OpenClaw)\b/gi) || [];
|
||||
for (const t of techTerms) {
|
||||
nouns.add(t.charAt(0).toUpperCase() + t.slice(1).toLowerCase());
|
||||
}
|
||||
|
||||
return Array.from(nouns);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find existing cluster for an entity.
|
||||
*/
|
||||
private findCluster(entityName: string): Fact | undefined {
|
||||
const pattern = `%${entityName}%`;
|
||||
return this.db.raw.prepare(
|
||||
"SELECT * FROM facts WHERE fact_type = 'cluster' AND superseded = 0 AND source LIKE ? LIMIT 1"
|
||||
).get(`cluster:${entityName.toLowerCase()}`) as Fact | undefined
|
||||
// Fallback: search by source field
|
||||
|| this.db.raw.prepare(
|
||||
"SELECT * FROM facts WHERE fact_type = 'cluster' AND superseded = 0 AND source = ? LIMIT 1"
|
||||
).get(`cluster:${entityName.toLowerCase()}`) as Fact | undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a cluster is stale (member superseded or too old).
|
||||
*/
|
||||
private isStale(cluster: Fact): boolean {
|
||||
try {
|
||||
const meta = JSON.parse(cluster.tags) as ClusterMeta;
|
||||
if (meta.stale) return true;
|
||||
|
||||
// Check age
|
||||
const ageHours = (Date.now() - meta.generatedAt) / (3600 * 1000);
|
||||
if (ageHours > CLUSTER_REGEN_HOURS) return true;
|
||||
|
||||
// Check if any member was superseded
|
||||
for (const id of meta.memberIds) {
|
||||
const member = this.db.getFact(id);
|
||||
if (member && member.superseded) return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (e) {
|
||||
console.debug('memoria:clusters: ' + String(e));
|
||||
return true; // Can't parse meta → treat as stale
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark clusters as stale if any member fact was recently superseded.
|
||||
*/
|
||||
private markStaleClusters(): number {
|
||||
let staleCount = 0;
|
||||
const clusters = this.db.raw.prepare(
|
||||
"SELECT * FROM facts WHERE fact_type = 'cluster' AND superseded = 0"
|
||||
).all() as Fact[];
|
||||
|
||||
for (const cluster of clusters) {
|
||||
if (this.isStale(cluster)) {
|
||||
try {
|
||||
const meta = JSON.parse(cluster.tags) as ClusterMeta;
|
||||
meta.stale = true;
|
||||
this.db.raw.prepare("UPDATE facts SET tags = ?, updated_at = ? WHERE id = ?")
|
||||
.run(JSON.stringify(meta), Date.now(), cluster.id);
|
||||
staleCount++;
|
||||
} catch (e) { console.debug('memoria:clusters: ' + String(e)); }
|
||||
}
|
||||
}
|
||||
|
||||
return staleCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate cluster text via LLM.
|
||||
*/
|
||||
private async generateClusterText(entityName: string, facts: Fact[]): Promise<string | null> {
|
||||
// Take most recent facts, up to limit
|
||||
const selected = facts
|
||||
.sort((a, b) => b.created_at - a.created_at)
|
||||
.slice(0, MAX_CLUSTER_FACTS);
|
||||
|
||||
const factsText = selected
|
||||
.map((f, i) => `${i + 1}. [${f.category}] ${f.fact}`)
|
||||
.join("\n");
|
||||
|
||||
const prompt = CLUSTER_PROMPT
|
||||
.replace("{ENTITY}", entityName)
|
||||
.replace("{FACTS}", factsText);
|
||||
|
||||
try {
|
||||
const genFn = this.llm.generateWithMeta;
|
||||
if (!genFn) return null;
|
||||
const result = await genFn.call(this.llm, prompt, {
|
||||
maxTokens: 300,
|
||||
temperature: 0.1,
|
||||
timeoutMs: 15000,
|
||||
});
|
||||
|
||||
if (!result?.response) return null;
|
||||
|
||||
// Clean response: remove JSON wrapping if present, take plain text
|
||||
let text = result.response.trim();
|
||||
// Remove markdown formatting artifacts
|
||||
text = text.replace(/^```[\s\S]*?```$/gm, "").trim();
|
||||
text = text.replace(/^["']|["']$/g, "").trim();
|
||||
|
||||
if (text.length < 20) return null;
|
||||
return text;
|
||||
} catch (e) {
|
||||
console.debug('memoria:clusters: ' + String(e));
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new cluster fact.
|
||||
*/
|
||||
private createCluster(entityName: string, text: string, memberFacts: Fact[]): void {
|
||||
const members = memberFacts.slice(0, MAX_CLUSTER_FACTS);
|
||||
const meta: ClusterMeta = {
|
||||
memberIds: members.map(f => f.id),
|
||||
entityName,
|
||||
generatedAt: Date.now(),
|
||||
stale: false,
|
||||
};
|
||||
|
||||
const clusterId = `cluster_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`;
|
||||
this.db.storeFact({
|
||||
id: clusterId,
|
||||
fact: text,
|
||||
category: memberFacts[0]?.category || "savoir",
|
||||
confidence: 0.85,
|
||||
source: `cluster:${entityName.toLowerCase()}`,
|
||||
tags: JSON.stringify(meta),
|
||||
agent: "memoria",
|
||||
created_at: Date.now(),
|
||||
updated_at: Date.now(),
|
||||
fact_type: "cluster",
|
||||
});
|
||||
|
||||
// Populate cluster_members table
|
||||
this.syncClusterMembers(clusterId, members);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update an existing cluster with fresh text and members.
|
||||
*/
|
||||
private updateCluster(clusterId: string, text: string, memberFacts: Fact[]): void {
|
||||
const members = memberFacts.slice(0, MAX_CLUSTER_FACTS);
|
||||
const meta: ClusterMeta = {
|
||||
memberIds: members.map(f => f.id),
|
||||
entityName: memberFacts[0]?.category || "entity",
|
||||
generatedAt: Date.now(),
|
||||
stale: false,
|
||||
};
|
||||
|
||||
this.db.raw.prepare(
|
||||
"UPDATE facts SET fact = ?, tags = ?, updated_at = ? WHERE id = ?"
|
||||
).run(text, JSON.stringify(meta), Date.now(), clusterId);
|
||||
|
||||
// Refresh cluster_members table
|
||||
this.syncClusterMembers(clusterId, members);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sync the cluster_members relational table with the cluster's member facts.
|
||||
* Replaces all existing entries for this cluster.
|
||||
*/
|
||||
private syncClusterMembers(clusterId: string, memberFacts: Fact[]): void {
|
||||
try {
|
||||
const raw = this.db.raw;
|
||||
raw.prepare("DELETE FROM cluster_members WHERE cluster_id = ?").run(clusterId);
|
||||
const insert = raw.prepare("INSERT OR IGNORE INTO cluster_members (cluster_id, fact_id) VALUES (?, ?)");
|
||||
for (const f of memberFacts) {
|
||||
insert.run(clusterId, f.id);
|
||||
}
|
||||
} catch (e) { console.debug('memoria:clusters: ' + String(e)); }
|
||||
}
|
||||
|
||||
/**
|
||||
* Stats for logging.
|
||||
*/
|
||||
stats(): { total: number; stale: number } {
|
||||
const total = (this.db.raw.prepare(
|
||||
"SELECT COUNT(*) as c FROM facts WHERE fact_type = 'cluster' AND superseded = 0"
|
||||
).get() as { c: number }).c;
|
||||
const stale = (this.db.raw.prepare(
|
||||
"SELECT COUNT(*) as c FROM facts WHERE fact_type = 'cluster' AND superseded = 0 AND tags LIKE '%\"stale\":true%'"
|
||||
).get() as { c: number }).c;
|
||||
return { total, stale };
|
||||
}
|
||||
}
|
||||
261
openclaw-memoria-port/core/fallback.ts
Normal file
261
openclaw-memoria-port/core/fallback.ts
Normal file
@@ -0,0 +1,261 @@
|
||||
/**
|
||||
* Memoria — Layer 12: Fallback Chain
|
||||
*
|
||||
* Implements both LLMProvider and EmbedProvider interfaces.
|
||||
* Tries providers in order; first successful response wins.
|
||||
* If all fail → throws (callers wrap in try/catch for graceful degradation).
|
||||
*
|
||||
* Default order (configurable via "fallback" in plugin config):
|
||||
* 1. Ollama gemma3:4b (local, 0€)
|
||||
* 2. OpenAI GPT-5.4-nano (cloud, ~$0.001)
|
||||
* 3. LM Studio GLM-4.7 (local, 0€)
|
||||
*
|
||||
* Used by: every module that needs LLM (selective, graph, topics, observations,
|
||||
* clusters, procedural, revision, patterns). Modules receive the chain via constructor
|
||||
* and don't know/care about the fallback — they see a single LLMProvider.
|
||||
*
|
||||
* @example
|
||||
* const chain = new FallbackChain([
|
||||
* { type: "ollama", model: "gemma3:4b" },
|
||||
* { type: "openai", model: "gpt-5.4-nano", apiKey: "..." }
|
||||
* ]);
|
||||
* const answer = await chain.generate("Extract entities..."); // tries Ollama first
|
||||
* 4. null → FTS-only / skip
|
||||
*/
|
||||
|
||||
import type { LLMProvider, EmbedProvider, GenerateOptions, GenerateResult } from "./providers/types.js";
|
||||
import { OllamaLLM, OllamaEmbed } from "./providers/ollama.js";
|
||||
import {
|
||||
OpenAICompatLLM,
|
||||
OpenAICompatEmbed,
|
||||
lmStudioLLM,
|
||||
lmStudioEmbed,
|
||||
} from "./providers/openai-compat.js";
|
||||
import { AnthropicLLM } from "./providers/anthropic.js";
|
||||
|
||||
// ─── Config ───
|
||||
|
||||
export interface FallbackProviderConfig {
|
||||
name: string;
|
||||
type: "ollama" | "lmstudio" | "openai" | "openrouter" | "anthropic";
|
||||
model: string;
|
||||
baseUrl?: string;
|
||||
apiKey?: string;
|
||||
timeoutMs?: number;
|
||||
/** For embed providers */
|
||||
embedModel?: string;
|
||||
embedDimensions?: number;
|
||||
}
|
||||
|
||||
export interface FallbackConfig {
|
||||
providers: FallbackProviderConfig[];
|
||||
/** Global timeout per provider attempt. Default 15000ms */
|
||||
defaultTimeoutMs: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_FALLBACK_CONFIG: FallbackConfig = {
|
||||
providers: [
|
||||
{
|
||||
name: "ollama",
|
||||
type: "ollama",
|
||||
model: "gemma3:4b",
|
||||
baseUrl: "http://localhost:11434",
|
||||
timeoutMs: 12000,
|
||||
embedModel: "nomic-embed-text-v2-moe",
|
||||
embedDimensions: 768,
|
||||
},
|
||||
{
|
||||
name: "openai",
|
||||
type: "openai",
|
||||
model: "gpt-5.4-nano",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
timeoutMs: 15000,
|
||||
},
|
||||
{
|
||||
name: "lmstudio",
|
||||
type: "lmstudio",
|
||||
model: "auto",
|
||||
baseUrl: "http://localhost:1234/v1",
|
||||
timeoutMs: 12000,
|
||||
},
|
||||
],
|
||||
defaultTimeoutMs: 15000,
|
||||
};
|
||||
|
||||
// ─── Provider Factory ───
|
||||
|
||||
function createLLM(cfg: FallbackProviderConfig): LLMProvider {
|
||||
switch (cfg.type) {
|
||||
case "ollama":
|
||||
return new OllamaLLM(cfg.baseUrl, cfg.model);
|
||||
case "lmstudio":
|
||||
return lmStudioLLM(cfg.model, cfg.baseUrl);
|
||||
case "openai":
|
||||
return new OpenAICompatLLM(
|
||||
cfg.name,
|
||||
cfg.baseUrl || "https://api.openai.com/v1",
|
||||
cfg.model,
|
||||
cfg.apiKey || "",
|
||||
);
|
||||
case "openrouter":
|
||||
return new OpenAICompatLLM(
|
||||
cfg.name,
|
||||
cfg.baseUrl || "https://openrouter.ai/api/v1",
|
||||
cfg.model,
|
||||
cfg.apiKey || "",
|
||||
);
|
||||
case "anthropic":
|
||||
return new AnthropicLLM(cfg.model, cfg.apiKey || "", cfg.baseUrl);
|
||||
default:
|
||||
throw new Error(`Unknown provider type: ${cfg.type}`);
|
||||
}
|
||||
}
|
||||
|
||||
function createEmbed(cfg: FallbackProviderConfig): EmbedProvider | null {
|
||||
if (!cfg.embedModel) return null;
|
||||
switch (cfg.type) {
|
||||
case "ollama":
|
||||
return new OllamaEmbed(cfg.baseUrl, cfg.embedModel, cfg.embedDimensions);
|
||||
case "lmstudio":
|
||||
return lmStudioEmbed(cfg.embedModel!, cfg.embedDimensions, cfg.baseUrl);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Fallback Chain ───
|
||||
|
||||
export interface FallbackResult {
|
||||
response: string;
|
||||
provider: string;
|
||||
attemptMs: number;
|
||||
fallbacksUsed: number;
|
||||
}
|
||||
|
||||
export class FallbackChain implements LLMProvider {
|
||||
private providers: FallbackProviderConfig[];
|
||||
private llmInstances: Map<string, LLMProvider> = new Map();
|
||||
private embedInstances: Map<string, EmbedProvider> = new Map();
|
||||
private defaultTimeoutMs: number;
|
||||
private logger?: { info?: (...args: any[]) => void; warn?: (...args: any[]) => void; debug?: (...args: any[]) => void };
|
||||
|
||||
get name(): string {
|
||||
return `fallback(${this.providerNames.join("→")})`;
|
||||
}
|
||||
|
||||
constructor(config?: Partial<FallbackConfig>, logger?: typeof FallbackChain.prototype.logger) {
|
||||
const cfg = { ...DEFAULT_FALLBACK_CONFIG, ...config };
|
||||
this.providers = cfg.providers;
|
||||
this.defaultTimeoutMs = cfg.defaultTimeoutMs;
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
/**
|
||||
* LLMProvider-compatible generate: returns string or throws.
|
||||
* Modules (selective, graph, topics, context-tree) call this interface.
|
||||
*/
|
||||
async generate(prompt: string, options?: GenerateOptions): Promise<string> {
|
||||
const result = await this.generateWithMeta(prompt, options);
|
||||
if (!result) throw new Error("All LLM providers failed");
|
||||
return result.response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to generate text using the fallback chain with metadata.
|
||||
* Returns null if ALL providers fail (caller should handle FTS-only mode).
|
||||
*/
|
||||
async generateWithMeta(prompt: string, options?: GenerateOptions): Promise<FallbackResult | null> {
|
||||
let fallbacksUsed = 0;
|
||||
|
||||
for (const provCfg of this.providers) {
|
||||
const start = Date.now();
|
||||
const timeoutMs = provCfg.timeoutMs || this.defaultTimeoutMs;
|
||||
|
||||
try {
|
||||
const llm = this.getLLM(provCfg);
|
||||
|
||||
// Race between LLM call and timeout
|
||||
const response = await Promise.race([
|
||||
llm.generate(prompt, { ...options, timeoutMs }),
|
||||
this.timeout(timeoutMs, provCfg.name),
|
||||
]);
|
||||
|
||||
if (!response || response.trim().length === 0) {
|
||||
throw new Error("Empty response");
|
||||
}
|
||||
|
||||
const elapsed = Date.now() - start;
|
||||
|
||||
if (fallbacksUsed > 0) {
|
||||
this.logger?.info?.(`memoria/fallback: ${provCfg.name} responded in ${elapsed}ms (after ${fallbacksUsed} fallback(s))`);
|
||||
} else {
|
||||
this.logger?.debug?.(`memoria/fallback: ${provCfg.name} responded in ${elapsed}ms`);
|
||||
}
|
||||
|
||||
return {
|
||||
response,
|
||||
provider: provCfg.name,
|
||||
attemptMs: elapsed,
|
||||
fallbacksUsed,
|
||||
};
|
||||
} catch (err) {
|
||||
const elapsed = Date.now() - start;
|
||||
this.logger?.warn?.(`memoria/fallback: ${provCfg.name} failed in ${elapsed}ms: ${String(err).slice(0, 100)}`);
|
||||
fallbacksUsed++;
|
||||
}
|
||||
}
|
||||
|
||||
this.logger?.warn?.(`memoria/fallback: ALL providers failed (${this.providers.length} attempts)`);
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the best available embed provider (first one that has embed config).
|
||||
* Embed doesn't need full fallback chain — just use the first available.
|
||||
*/
|
||||
getEmbedProvider(): EmbedProvider | null {
|
||||
for (const provCfg of this.providers) {
|
||||
if (!provCfg.embedModel) continue;
|
||||
try {
|
||||
return this.getEmbed(provCfg);
|
||||
} catch (_e) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// ─── Private ───
|
||||
|
||||
private getLLM(cfg: FallbackProviderConfig): LLMProvider {
|
||||
if (!this.llmInstances.has(cfg.name)) {
|
||||
this.llmInstances.set(cfg.name, createLLM(cfg));
|
||||
}
|
||||
return this.llmInstances.get(cfg.name)!;
|
||||
}
|
||||
|
||||
private getEmbed(cfg: FallbackProviderConfig): EmbedProvider {
|
||||
if (!this.embedInstances.has(cfg.name)) {
|
||||
const embed = createEmbed(cfg);
|
||||
if (!embed) throw new Error(`No embed config for ${cfg.name}`);
|
||||
this.embedInstances.set(cfg.name, embed);
|
||||
}
|
||||
return this.embedInstances.get(cfg.name)!;
|
||||
}
|
||||
|
||||
private timeout(ms: number, name: string): Promise<never> {
|
||||
return new Promise((_, reject) => {
|
||||
setTimeout(() => reject(new Error(`${name} timeout after ${ms}ms`)), ms);
|
||||
});
|
||||
}
|
||||
|
||||
/** Get primary LLM (first in chain) for direct use where fallback isn't needed */
|
||||
get primaryLLM(): LLMProvider {
|
||||
return this.getLLM(this.providers[0]);
|
||||
}
|
||||
|
||||
/** Provider names in order */
|
||||
get providerNames(): string[] {
|
||||
return this.providers.map(p => p.name);
|
||||
}
|
||||
}
|
||||
326
openclaw-memoria-port/core/feedback.ts
Normal file
326
openclaw-memoria-port/core/feedback.ts
Normal file
@@ -0,0 +1,326 @@
|
||||
/**
|
||||
* Memoria — Feedback Loop
|
||||
*
|
||||
* Le chaînon manquant : mesurer si les faits rappelés ont été UTILES.
|
||||
*
|
||||
* Cycle :
|
||||
* 1. Recall injecte N faits → on stocke leurs IDs
|
||||
* 2. Agent produit une réponse → on compare
|
||||
* 3. Si un fait a contribué à la réponse → usefulness++
|
||||
* 4. Si un fait est ignoré systématiquement → decay accéléré
|
||||
* 5. Le scoring en tient compte au prochain recall
|
||||
*
|
||||
* "Neurons that fire together wire together" — mais aussi :
|
||||
* "Neurons that DON'T fire get pruned"
|
||||
*/
|
||||
|
||||
import type { MemoriaDB, Fact } from "./db.js";
|
||||
|
||||
// ─── Config ───
|
||||
|
||||
export interface FeedbackConfig {
|
||||
/** Minimum keyword overlap (0-1) to consider a fact "used" in response. Default 0.25 */
|
||||
usedThreshold: number;
|
||||
/** Boost to usefulness when a fact is used. Default 1.0 */
|
||||
usedBoost: number;
|
||||
/** Penalty when recalled but not used. Default -0.1 */
|
||||
ignoredPenalty: number;
|
||||
/** After this many ignored recalls, fact gets decay penalty. Default 10 */
|
||||
ignoredDecayThreshold: number;
|
||||
/** Max usefulness score (cap). Default 20 */
|
||||
maxUsefulness: number;
|
||||
/** Min usefulness before fact gets deprioritized. Default -3 */
|
||||
minUsefulness: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_FEEDBACK_CONFIG: FeedbackConfig = {
|
||||
usedThreshold: 0.25,
|
||||
usedBoost: 1.0,
|
||||
ignoredPenalty: -0.1,
|
||||
ignoredDecayThreshold: 10,
|
||||
maxUsefulness: 20,
|
||||
minUsefulness: -3,
|
||||
};
|
||||
|
||||
// ─── Session state ───
|
||||
|
||||
export interface RecallRecord {
|
||||
factIds: string[];
|
||||
timestamp: number;
|
||||
prompt: string;
|
||||
}
|
||||
|
||||
// ─── Feedback Manager ───
|
||||
|
||||
export class FeedbackManager {
|
||||
private db: MemoriaDB;
|
||||
private cfg: FeedbackConfig;
|
||||
|
||||
/** Facts injected during the current recall (reset each turn) */
|
||||
private lastRecall: RecallRecord | null = null;
|
||||
|
||||
constructor(db: MemoriaDB, config?: Partial<FeedbackConfig>) {
|
||||
this.cfg = { ...DEFAULT_FEEDBACK_CONFIG, ...config };
|
||||
this.db = db;
|
||||
}
|
||||
|
||||
/** Called at recall time: record which facts were injected */
|
||||
recordRecall(factIds: string[], prompt: string): void {
|
||||
this.lastRecall = {
|
||||
factIds,
|
||||
timestamp: Date.now(),
|
||||
prompt,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Called at agent_end: compare recalled facts with the response.
|
||||
* Returns stats about what was used vs ignored.
|
||||
*/
|
||||
async processResponse(responseText: string): Promise<{
|
||||
used: number;
|
||||
ignored: number;
|
||||
details: Array<{ id: string; used: boolean; overlap: number }>;
|
||||
}> {
|
||||
if (!this.lastRecall || this.lastRecall.factIds.length === 0) {
|
||||
return { used: 0, ignored: 0, details: [] };
|
||||
}
|
||||
|
||||
const recall = this.lastRecall;
|
||||
this.lastRecall = null; // Reset for next turn
|
||||
|
||||
const responseLower = responseText.toLowerCase();
|
||||
const responseKeywords = extractKeywords(responseLower);
|
||||
|
||||
const details: Array<{ id: string; used: boolean; overlap: number }> = [];
|
||||
let used = 0;
|
||||
let ignored = 0;
|
||||
|
||||
for (const factId of recall.factIds) {
|
||||
const fact = this.db.getFact(factId);
|
||||
if (!fact) continue;
|
||||
|
||||
const overlap = computeOverlap(fact.fact, responseLower, responseKeywords);
|
||||
const wasUsed = overlap >= this.cfg.usedThreshold;
|
||||
|
||||
if (wasUsed) {
|
||||
used++;
|
||||
this.updateUsefulness(factId, this.cfg.usedBoost);
|
||||
} else {
|
||||
ignored++;
|
||||
this.updateUsefulness(factId, this.cfg.ignoredPenalty);
|
||||
}
|
||||
|
||||
details.push({ id: factId, used: wasUsed, overlap });
|
||||
}
|
||||
|
||||
return { used, ignored, details };
|
||||
}
|
||||
|
||||
/** Update usefulness score in DB */
|
||||
private updateUsefulness(factId: string, delta: number): void {
|
||||
try {
|
||||
const raw = this.db.raw;
|
||||
|
||||
// Get current values
|
||||
const fact = raw.prepare(
|
||||
"SELECT usefulness, recall_count, used_count FROM facts WHERE id = ?"
|
||||
).get(factId) as { usefulness: number; recall_count: number; used_count: number } | undefined;
|
||||
|
||||
if (!fact) return;
|
||||
|
||||
const newUsefulness = Math.max(
|
||||
this.cfg.minUsefulness,
|
||||
Math.min(this.cfg.maxUsefulness, (fact.usefulness || 0) + delta)
|
||||
);
|
||||
const newRecallCount = (fact.recall_count || 0) + 1;
|
||||
const newUsedCount = delta > 0 ? (fact.used_count || 0) + 1 : (fact.used_count || 0);
|
||||
|
||||
raw.prepare(
|
||||
"UPDATE facts SET usefulness = ?, recall_count = ?, used_count = ?, last_accessed_at = ? WHERE id = ?"
|
||||
).run(newUsefulness, newRecallCount, newUsedCount, Date.now(), factId);
|
||||
} catch (_e) {
|
||||
// Non-critical — don't crash on feedback failure
|
||||
}
|
||||
}
|
||||
|
||||
/** Get feedback stats for debugging */
|
||||
getStats(): { totalWithFeedback: number; avgUsefulness: number; mostUseful: string[]; leastUseful: string[] } {
|
||||
try {
|
||||
const raw = this.db.raw;
|
||||
|
||||
const total = raw.prepare(
|
||||
"SELECT COUNT(*) as cnt FROM facts WHERE recall_count > 0 AND superseded = 0"
|
||||
).get() as { cnt: number };
|
||||
|
||||
const avg = raw.prepare(
|
||||
"SELECT AVG(usefulness) as avg FROM facts WHERE recall_count > 0 AND superseded = 0"
|
||||
).get() as { avg: number | null };
|
||||
|
||||
const best = raw.prepare(
|
||||
"SELECT id FROM facts WHERE recall_count > 0 AND superseded = 0 ORDER BY usefulness DESC LIMIT 5"
|
||||
).all() as { id: string }[];
|
||||
|
||||
const worst = raw.prepare(
|
||||
"SELECT id FROM facts WHERE recall_count > 0 AND superseded = 0 ORDER BY usefulness ASC LIMIT 5"
|
||||
).all() as { id: string }[];
|
||||
|
||||
return {
|
||||
totalWithFeedback: total.cnt,
|
||||
avgUsefulness: avg.avg ?? 0,
|
||||
mostUseful: best.map(r => r.id),
|
||||
leastUseful: worst.map(r => r.id),
|
||||
};
|
||||
} catch (_e) {
|
||||
return { totalWithFeedback: 0, avgUsefulness: 0, mostUseful: [], leastUseful: [] };
|
||||
}
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════
|
||||
// USER CORRECTION DETECTION
|
||||
// When the user says "non c'est X" → the last recalled
|
||||
// facts that mentioned the wrong thing should be penalized
|
||||
// and potentially superseded.
|
||||
// ═══════════════════════════════════════════════════
|
||||
|
||||
/** Correction patterns (FR + EN) */
|
||||
private static readonly CORRECTION_PATTERNS = [
|
||||
// French
|
||||
/\bnon[,.]?\s+(c'est|c est)\b/i,
|
||||
/\ben fait[,.]?\s+(c'est|c est|il|elle|on)\b/i,
|
||||
/\bpas\s+\w+[,.]?\s+(c'est|c est)\b/i,
|
||||
/\bje (te |t'|t )?dis que\b/i,
|
||||
/\bc'est (pas|plus)\b/i,
|
||||
/\bt'as tort\b/i,
|
||||
/\bje (te |t')?corrige\b/i,
|
||||
/\bt(u|')\s*(as)?\s*oubli(é|e)\b/i,
|
||||
/\bje (te |t')?rappelle que\b/i,
|
||||
/\bmais non\b/i,
|
||||
// English
|
||||
/\bno[,.]?\s+(it's|it is|that's|that is)\b/i,
|
||||
/\bactually[,.]?\s+(it's|it is|that|the)\b/i,
|
||||
/\bthat's (wrong|incorrect|not right|not true)\b/i,
|
||||
/\byou('re| are) wrong\b/i,
|
||||
/\bi (just )?told you\b/i,
|
||||
/\bi said\b/i,
|
||||
];
|
||||
|
||||
/** Frustration patterns */
|
||||
private static readonly FRUSTRATION_PATTERNS = [
|
||||
/\bputain\b/i,
|
||||
/\bbordel\b/i,
|
||||
/\bmerde\b/i,
|
||||
/\bserieux\b/i,
|
||||
/\bsérieux\b/i,
|
||||
/\bc'est pas possible\b/i,
|
||||
/\bfuck\b/i,
|
||||
/\bdamn\b/i,
|
||||
/\bwhat the\b/i,
|
||||
/\bwtf\b/i,
|
||||
/\bnon mais\b/i,
|
||||
/\bencore\s*[!?]/i,
|
||||
/\bpourquoi (tu|t')\b.*\?/i, // "pourquoi tu fais ça ?"
|
||||
/\bje (te |t'|t )?(l')?ai (déjà|deja) dit\b/i,
|
||||
];
|
||||
|
||||
/**
|
||||
* Analyze user message for correction signals.
|
||||
* Returns penalty to apply to last-recalled facts + the corrected topic if found.
|
||||
*/
|
||||
analyzeUserMessage(userMessage: string): {
|
||||
isCorrection: boolean;
|
||||
isFrustration: boolean;
|
||||
penalty: number;
|
||||
correctionText: string | null;
|
||||
} {
|
||||
const isCorrection = FeedbackManager.CORRECTION_PATTERNS.some(p => p.test(userMessage));
|
||||
const isFrustration = FeedbackManager.FRUSTRATION_PATTERNS.some(p => p.test(userMessage));
|
||||
|
||||
let penalty = 0;
|
||||
if (isCorrection) penalty += -1.5; // Strong signal: facts were wrong
|
||||
if (isFrustration) penalty += -0.5; // Mild signal: facts may have been unhelpful
|
||||
|
||||
// Extract what was corrected (text after the correction pattern)
|
||||
let correctionText: string | null = null;
|
||||
if (isCorrection) {
|
||||
for (const pattern of FeedbackManager.CORRECTION_PATTERNS) {
|
||||
const match = userMessage.match(pattern);
|
||||
if (match) {
|
||||
const idx = match.index! + match[0].length;
|
||||
const rest = userMessage.slice(idx).trim();
|
||||
if (rest.length > 5) {
|
||||
correctionText = rest.slice(0, 200); // Cap at 200 chars
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { isCorrection, isFrustration, penalty, correctionText };
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply correction/frustration penalty to the last recalled facts.
|
||||
* Returns the facts that were penalized.
|
||||
*/
|
||||
applyUserSignal(penalty: number): string[] {
|
||||
if (!this.lastRecall || this.lastRecall.factIds.length === 0 || penalty >= 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const penalized: string[] = [];
|
||||
for (const factId of this.lastRecall.factIds) {
|
||||
try {
|
||||
this.updateUsefulness(factId, penalty);
|
||||
penalized.push(factId);
|
||||
} catch (_e) { /* non-critical */ }
|
||||
}
|
||||
return penalized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get IDs of facts from last recall (for external use — e.g., contradiction check).
|
||||
*/
|
||||
getLastRecalledIds(): string[] {
|
||||
return this.lastRecall?.factIds ?? [];
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Helpers ───
|
||||
|
||||
/** Extract meaningful keywords from text */
|
||||
function extractKeywords(text: string): Set<string> {
|
||||
return new Set(
|
||||
text.replace(/[^\p{L}\p{N}\s]/gu, " ")
|
||||
.split(/\s+/)
|
||||
.filter(w => w.length > 3) // Only words > 3 chars for meaningful overlap
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute keyword overlap between a fact and a response.
|
||||
* Uses Jaccard-like metric but weighted: longer shared words count more.
|
||||
*/
|
||||
function computeOverlap(
|
||||
factText: string,
|
||||
responseLower: string,
|
||||
responseKeywords: Set<string>
|
||||
): number {
|
||||
const factKeywords = extractKeywords(factText.toLowerCase());
|
||||
if (factKeywords.size === 0) return 0;
|
||||
|
||||
let matchWeight = 0;
|
||||
let totalWeight = 0;
|
||||
|
||||
for (const word of factKeywords) {
|
||||
// Weight longer words more (technical terms, proper nouns)
|
||||
const weight = Math.min(word.length / 5, 2);
|
||||
totalWeight += weight;
|
||||
|
||||
if (responseKeywords.has(word) || responseLower.includes(word)) {
|
||||
matchWeight += weight;
|
||||
}
|
||||
}
|
||||
|
||||
return totalWeight > 0 ? matchWeight / totalWeight : 0;
|
||||
}
|
||||
54
openclaw-memoria-port/core/format.ts
Normal file
54
openclaw-memoria-port/core/format.ts
Normal file
@@ -0,0 +1,54 @@
|
||||
/**
|
||||
* 🧠 Memoria — Recall context formatting
|
||||
*
|
||||
* This module exports:
|
||||
* - formatRecallContext() — format recalled facts into text block for prompt injection
|
||||
*/
|
||||
|
||||
/**
|
||||
* Format recalled facts + observations into the text block injected before the prompt.
|
||||
* Output goes into `event.prependContext` in the before_prompt_build hook.
|
||||
* Includes: header, observations section, per-fact lines with [category] [age] prefix, known procedures.
|
||||
*/
|
||||
export function formatRecallContext(facts: Array<{ fact: string; category: string; confidence: number; temporalScore: number; created_at?: number; updated_at?: number; fact_type?: string }>, observationContext = ""): string {
|
||||
if (facts.length === 0 && !observationContext) return "";
|
||||
const parts: string[] = [
|
||||
"## 🧠 Memoria — Mémoire persistante",
|
||||
"Faits provenant de la mémoire long terme (source de vérité).",
|
||||
"En cas de conflit avec un résumé LCM → la mémoire persistante a priorité.",
|
||||
"Les faits les plus récents (par date) sont les plus fiables en cas de contradiction.",
|
||||
"",
|
||||
];
|
||||
|
||||
// Observations first (synthesized, higher quality)
|
||||
if (observationContext) {
|
||||
parts.push("### Observations (synthèses vivantes)");
|
||||
parts.push(observationContext);
|
||||
parts.push("");
|
||||
}
|
||||
|
||||
// Individual facts with dates for Knowledge Update disambiguation
|
||||
if (facts.length > 0) {
|
||||
if (observationContext) parts.push("### Faits individuels");
|
||||
const now = Date.now();
|
||||
const lines = facts.map(f => {
|
||||
const conf = f.confidence >= 0.9 ? "" : ` (${Math.round(f.confidence * 100)}%)`;
|
||||
// Add date tag so the answering model can disambiguate updates
|
||||
let dateTag = "";
|
||||
const ts = f.updated_at || f.created_at;
|
||||
if (ts && ts > 0) {
|
||||
const d = new Date(ts);
|
||||
const ageDays = Math.floor((now - ts) / 86400000);
|
||||
if (ageDays === 0) dateTag = ` [aujourd'hui]`;
|
||||
else if (ageDays === 1) dateTag = ` [hier]`;
|
||||
else if (ageDays < 7) dateTag = ` [il y a ${ageDays}j]`;
|
||||
else dateTag = ` [${d.toISOString().slice(0, 10)}]`;
|
||||
}
|
||||
return `- [${f.category}]${dateTag} ${f.fact}${conf}`;
|
||||
});
|
||||
parts.push(...lines);
|
||||
parts.push("");
|
||||
}
|
||||
|
||||
return parts.join("\n");
|
||||
}
|
||||
436
openclaw-memoria-port/core/graph.ts
Normal file
436
openclaw-memoria-port/core/graph.ts
Normal file
@@ -0,0 +1,436 @@
|
||||
/**
|
||||
* Memoria — Layer 5: Knowledge Graph
|
||||
*
|
||||
* Extracts entities (person/project/tool/concept/place) and relations from facts via LLM.
|
||||
* Enables associative recall: "Bureau" → Convex, CRM, Qonto, Alexandre...
|
||||
*
|
||||
* Key methods:
|
||||
* extractAndStore(facts) — LLM extracts entities/relations, stores in DB
|
||||
* findEntitiesInText(query) — find mentioned entities in a search query
|
||||
* getRelatedFacts(entityIds) — BFS 2 hops to find connected facts
|
||||
*
|
||||
* Note: Hebbian reinforcement (co-occurrence strengthening) is in hebbian.ts (Layer 16).
|
||||
*
|
||||
* Les connexions se RENFORCENT à chaque co-accès (Hebb: "neurons that fire together wire together").
|
||||
* Les connexions inutilisées s'AFFAIBLISSENT (decay).
|
||||
*/
|
||||
|
||||
import type { MemoriaDB, Entity, Relation } from "./db.js";
|
||||
import type { LLMProvider } from "./providers/types.js";
|
||||
|
||||
// ─── Config ───
|
||||
|
||||
export interface GraphConfig {
|
||||
/** Weight increment on co-access (Hebbian). Default 0.1 */
|
||||
hebbianIncrement: number;
|
||||
/** Decay factor per day for unused relations. Default 0.995 (~0.5% per day) */
|
||||
decayPerDay: number;
|
||||
/** Minimum weight before pruning. Default 0.05 */
|
||||
pruneThreshold: number;
|
||||
/** Max entities to extract per fact. Default 5 */
|
||||
maxEntitiesPerFact: number;
|
||||
/** Max hops for graph traversal. Default 2 */
|
||||
maxHops: number;
|
||||
/** Max related facts to return. Default 5 */
|
||||
maxRelatedFacts: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_GRAPH_CONFIG: GraphConfig = {
|
||||
hebbianIncrement: 0.1,
|
||||
decayPerDay: 0.995,
|
||||
pruneThreshold: 0.05,
|
||||
maxEntitiesPerFact: 5,
|
||||
maxHops: 2,
|
||||
maxRelatedFacts: 5,
|
||||
};
|
||||
|
||||
// ─── Extraction prompt ───
|
||||
|
||||
const EXTRACT_PROMPT = `Extrais les entités et relations de ce fait.
|
||||
|
||||
Fait: "{FACT}"
|
||||
|
||||
Réponds UNIQUEMENT en JSON:
|
||||
{
|
||||
"entities": [
|
||||
{"name": "NomExact", "type": "person|project|tool|concept|place|company"}
|
||||
],
|
||||
"relations": [
|
||||
{"from": "NomExact1", "to": "NomExact2", "type": "uses|part_of|works_on|manages|created_by|depends_on|deployed_on|related_to"}
|
||||
]
|
||||
}
|
||||
|
||||
Règles:
|
||||
- Noms propres exacts (pas de descriptions)
|
||||
- Types stricts: person, project, tool, concept, place, company
|
||||
- Relations: max 3, les plus importantes
|
||||
- Si rien d'intéressant: {"entities": [], "relations": []}`;
|
||||
|
||||
// ─── Graph Manager ───
|
||||
|
||||
export class KnowledgeGraph {
|
||||
private db: MemoriaDB;
|
||||
private llm: LLMProvider;
|
||||
private cfg: GraphConfig;
|
||||
|
||||
constructor(db: MemoriaDB, llm: LLMProvider, config?: Partial<GraphConfig>) {
|
||||
this.db = db;
|
||||
this.llm = llm;
|
||||
this.cfg = { ...DEFAULT_GRAPH_CONFIG, ...config };
|
||||
}
|
||||
|
||||
private get rawDb() {
|
||||
return this.db.raw;
|
||||
}
|
||||
|
||||
// ─── Entity extraction ───
|
||||
|
||||
/** Extract entities and relations from a fact, store them */
|
||||
async extractAndStore(factId: string, factText: string): Promise<{ entities: number; relations: number }> {
|
||||
try {
|
||||
const prompt = EXTRACT_PROMPT.replace("{FACT}", factText);
|
||||
const response = await this.llm.generate(prompt, {
|
||||
maxTokens: 512,
|
||||
temperature: 0.1,
|
||||
format: "json",
|
||||
timeoutMs: 15000,
|
||||
});
|
||||
|
||||
const parsed = this.parseJSON(response) as {
|
||||
entities?: Array<{ name: string; type: string }>;
|
||||
relations?: Array<{ from: string; to: string; type: string }>;
|
||||
};
|
||||
|
||||
if (!parsed) return { entities: 0, relations: 0 };
|
||||
|
||||
let entCount = 0;
|
||||
let relCount = 0;
|
||||
|
||||
// Store entities
|
||||
const entityIds = new Map<string, string>();
|
||||
for (const e of (parsed.entities || []).slice(0, this.cfg.maxEntitiesPerFact)) {
|
||||
if (!e.name || e.name.length < 2) continue;
|
||||
const normalized = e.name.trim();
|
||||
const type = this.normalizeType(e.type);
|
||||
|
||||
// Find existing or create
|
||||
let entity = this.findEntityByName(normalized);
|
||||
if (!entity) {
|
||||
const id = `ent_${Date.now()}_${Math.random().toString(36).slice(2, 7)}`;
|
||||
this.rawDb.prepare(
|
||||
"INSERT INTO entities (id, name, type, attributes, created_at, access_count) VALUES (?, ?, ?, ?, ?, ?)"
|
||||
).run(id, normalized, type, "{}", Date.now(), 0);
|
||||
entity = { id, name: normalized, type, attributes: "{}", created_at: Date.now(), access_count: 0 };
|
||||
entCount++;
|
||||
}
|
||||
entityIds.set(normalized, entity.id);
|
||||
}
|
||||
|
||||
// Store relations
|
||||
for (const r of (parsed.relations || []).slice(0, 3)) {
|
||||
const fromId = entityIds.get(r.from) || this.findEntityByName(r.from)?.id;
|
||||
const toId = entityIds.get(r.to) || this.findEntityByName(r.to)?.id;
|
||||
if (!fromId || !toId || fromId === toId) continue;
|
||||
|
||||
const relType = this.normalizeRelType(r.type);
|
||||
this.upsertRelation(fromId, toId, relType, factId);
|
||||
relCount++;
|
||||
}
|
||||
|
||||
// Link fact to entities
|
||||
if (entityIds.size > 0) {
|
||||
const ids = Array.from(entityIds.values());
|
||||
this.rawDb.prepare(
|
||||
"UPDATE facts SET entity_ids = ? WHERE id = ?"
|
||||
).run(JSON.stringify(ids), factId);
|
||||
}
|
||||
|
||||
return { entities: entCount, relations: relCount };
|
||||
} catch (e) {
|
||||
console.debug('memoria:graph: ' + String(e));
|
||||
return { entities: 0, relations: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Graph traversal ───
|
||||
|
||||
/** Get related facts by traversing the graph from seed entities */
|
||||
getRelatedFacts(entityNames: string[], maxHops?: number, maxFacts?: number): Array<{ fact: string; id: string; score: number; path: string[] }> {
|
||||
const hops = maxHops ?? this.cfg.maxHops;
|
||||
const limit = maxFacts ?? this.cfg.maxRelatedFacts;
|
||||
|
||||
// Find seed entity IDs
|
||||
const seedIds = new Set<string>();
|
||||
for (const name of entityNames) {
|
||||
const entity = this.findEntityByName(name);
|
||||
if (entity) seedIds.add(entity.id);
|
||||
}
|
||||
if (seedIds.size === 0) return [];
|
||||
|
||||
// BFS traversal
|
||||
const visited = new Set<string>();
|
||||
const queue: Array<{ entityId: string; hop: number; path: string[] }> = [];
|
||||
const factScores = new Map<string, { score: number; path: string[] }>();
|
||||
|
||||
for (const id of seedIds) {
|
||||
queue.push({ entityId: id, hop: 0, path: [this.getEntityName(id)] });
|
||||
visited.add(id);
|
||||
}
|
||||
|
||||
while (queue.length > 0) {
|
||||
const { entityId, hop, path } = queue.shift()!;
|
||||
if (hop >= hops) continue;
|
||||
|
||||
// Get relations from this entity
|
||||
const relations = this.getRelations(entityId);
|
||||
for (const rel of relations) {
|
||||
const neighborId = rel.source_id === entityId ? rel.target_id : rel.source_id;
|
||||
const weight = rel.weight;
|
||||
|
||||
// Score facts linked to this neighbor
|
||||
const neighborFacts = this.getFactsByEntity(neighborId);
|
||||
for (const f of neighborFacts) {
|
||||
const hopPenalty = 1 / (hop + 1); // Closer = higher score
|
||||
const score = weight * hopPenalty;
|
||||
const existing = factScores.get(f.id);
|
||||
if (!existing || existing.score < score) {
|
||||
const neighborName = this.getEntityName(neighborId);
|
||||
factScores.set(f.id, { score, path: [...path, neighborName] });
|
||||
}
|
||||
}
|
||||
|
||||
// Continue traversal
|
||||
if (!visited.has(neighborId)) {
|
||||
visited.add(neighborId);
|
||||
const neighborName = this.getEntityName(neighborId);
|
||||
queue.push({ entityId: neighborId, hop: hop + 1, path: [...path, neighborName] });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by score, return top N
|
||||
const results = Array.from(factScores.entries())
|
||||
.map(([factId, { score, path }]) => {
|
||||
const fact = this.db.getFact(factId);
|
||||
if (!fact || fact.superseded) return null;
|
||||
return { fact: fact.fact, id: factId, score, path };
|
||||
})
|
||||
.filter(Boolean) as Array<{ fact: string; id: string; score: number; path: string[] }>;
|
||||
|
||||
results.sort((a, b) => b.score - a.score);
|
||||
return results.slice(0, limit);
|
||||
}
|
||||
|
||||
/** Extract entity names from a query (keyword match + partial match against known entities) */
|
||||
findEntitiesInText(text: string): Entity[] {
|
||||
const allEntities = this.rawDb.prepare(
|
||||
"SELECT * FROM entities ORDER BY access_count DESC LIMIT 200"
|
||||
).all() as Entity[];
|
||||
|
||||
const lower = text.toLowerCase();
|
||||
const words = lower.split(/\s+/).filter(w => w.length > 2);
|
||||
|
||||
return allEntities.filter(e => {
|
||||
const eName = e.name.toLowerCase();
|
||||
// Exact containment (text contains entity name)
|
||||
if (lower.includes(eName)) return true;
|
||||
// Reverse: entity name contains a word from query (for multi-word entities)
|
||||
if (words.some(w => eName.includes(w) && w.length > 3)) return true;
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
// ─── Hebbian learning ───
|
||||
|
||||
/** Strengthen connections between co-accessed entities */
|
||||
hebbianReinforce(entityIds: string[]): void {
|
||||
if (entityIds.length < 2) return;
|
||||
|
||||
// All pairs
|
||||
for (let i = 0; i < entityIds.length; i++) {
|
||||
for (let j = i + 1; j < entityIds.length; j++) {
|
||||
this.reinforceRelation(entityIds[i], entityIds[j]);
|
||||
}
|
||||
// Increment access count
|
||||
this.rawDb.prepare(
|
||||
"UPDATE entities SET access_count = access_count + 1 WHERE id = ?"
|
||||
).run(entityIds[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/** Apply decay to all relations (call periodically, e.g. daily) */
|
||||
applyDecay(): number {
|
||||
const now = Date.now();
|
||||
const relations = this.rawDb.prepare("SELECT * FROM relations").all() as any[];
|
||||
let pruned = 0;
|
||||
|
||||
const updateStmt = this.rawDb.prepare("UPDATE relations SET weight = ? WHERE id = ?");
|
||||
const deleteStmt = this.rawDb.prepare("DELETE FROM relations WHERE id = ?");
|
||||
|
||||
const tx = this.rawDb.transaction(() => {
|
||||
for (const rel of relations) {
|
||||
const daysSinceAccess = (now - (rel.last_accessed_at || rel.created_at)) / 86400000;
|
||||
const decayedWeight = rel.weight * Math.pow(this.cfg.decayPerDay, daysSinceAccess);
|
||||
|
||||
if (decayedWeight < this.cfg.pruneThreshold) {
|
||||
deleteStmt.run(rel.id);
|
||||
pruned++;
|
||||
} else if (decayedWeight < rel.weight) {
|
||||
updateStmt.run(decayedWeight, rel.id);
|
||||
}
|
||||
}
|
||||
});
|
||||
tx();
|
||||
|
||||
return pruned;
|
||||
}
|
||||
|
||||
// ─── Stats ───
|
||||
|
||||
/**
|
||||
* Called when a fact is superseded — weaken relations that depended on it.
|
||||
* Removes factId from relation context arrays; if no facts left → prune relation.
|
||||
*/
|
||||
onFactSuperseded(factId: string): number {
|
||||
let affected = 0;
|
||||
try {
|
||||
// 1. Remove from relation context arrays
|
||||
const relations = this.rawDb.prepare(
|
||||
"SELECT id, context FROM relations WHERE context LIKE ?"
|
||||
).all(`%${factId}%`) as Array<{ id: string; context: string }>;
|
||||
|
||||
for (const rel of relations) {
|
||||
try {
|
||||
const ctx = JSON.parse(rel.context || "[]") as string[];
|
||||
const updated = ctx.filter(id => id !== factId);
|
||||
if (updated.length === 0) {
|
||||
// No facts support this relation anymore → delete it
|
||||
this.rawDb.prepare("DELETE FROM relations WHERE id = ?").run(rel.id);
|
||||
} else {
|
||||
// Weaken the relation (lost a supporting fact)
|
||||
this.rawDb.prepare(
|
||||
"UPDATE relations SET context = ?, weight = MAX(weight - 0.15, 0.1) WHERE id = ?"
|
||||
).run(JSON.stringify(updated), rel.id);
|
||||
}
|
||||
affected++;
|
||||
} catch (e) { console.debug('memoria:graph: ' + String(e)); }
|
||||
}
|
||||
|
||||
// 2. Clean entity_ids from the superseded fact (clear the link)
|
||||
this.rawDb.prepare(
|
||||
"UPDATE facts SET entity_ids = NULL WHERE id = ? AND superseded = 1"
|
||||
).run(factId);
|
||||
} catch (e) { console.debug('memoria:graph: ' + String(e)); }
|
||||
return affected;
|
||||
}
|
||||
|
||||
stats(): { entities: number; relations: number; avgWeight: number } {
|
||||
const entities = (this.rawDb.prepare("SELECT COUNT(*) as c FROM entities").get() as { c: number }).c;
|
||||
const relStats = this.rawDb.prepare("SELECT COUNT(*) as c, AVG(weight) as avg FROM relations").get() as { c: number; avg: number | null };
|
||||
return {
|
||||
entities,
|
||||
relations: relStats.c,
|
||||
avgWeight: relStats.avg ?? 0,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Private helpers ───
|
||||
|
||||
private findEntityByName(name: string): Entity | undefined {
|
||||
// Exact match first
|
||||
let entity = this.rawDb.prepare(
|
||||
"SELECT * FROM entities WHERE name = ? COLLATE NOCASE LIMIT 1"
|
||||
).get(name) as Entity | undefined;
|
||||
if (entity) return entity;
|
||||
|
||||
// Fuzzy: LIKE match
|
||||
entity = this.rawDb.prepare(
|
||||
"SELECT * FROM entities WHERE name LIKE ? COLLATE NOCASE LIMIT 1"
|
||||
).get(`%${name}%`) as Entity | undefined;
|
||||
return entity;
|
||||
}
|
||||
|
||||
private getEntityName(id: string): string {
|
||||
const e = this.rawDb.prepare("SELECT name FROM entities WHERE id = ?").get(id) as { name: string } | undefined;
|
||||
return e?.name ?? id;
|
||||
}
|
||||
|
||||
private getRelations(entityId: string): Array<{ id: string; source_id: string; target_id: string; relation: string; weight: number; context: string | null; created_at: number; last_accessed_at: number | null }> {
|
||||
return this.rawDb.prepare(
|
||||
"SELECT * FROM relations WHERE source_id = ? OR target_id = ?"
|
||||
).all(entityId, entityId) as any[];
|
||||
}
|
||||
|
||||
private getFactsByEntity(entityId: string): Array<{ id: string }> {
|
||||
return this.rawDb.prepare(
|
||||
"SELECT id FROM facts WHERE entity_ids LIKE ? AND superseded = 0 LIMIT 10"
|
||||
).all(`%${entityId}%`) as Array<{ id: string }>;
|
||||
}
|
||||
|
||||
private upsertRelation(fromId: string, toId: string, type: string, factId: string): void {
|
||||
const existing = this.rawDb.prepare(
|
||||
"SELECT * FROM relations WHERE (source_id = ? AND target_id = ?) OR (source_id = ? AND target_id = ?)"
|
||||
).get(fromId, toId, toId, fromId) as any | undefined;
|
||||
|
||||
if (existing) {
|
||||
// Reinforce existing
|
||||
const ctx = this.appendToJsonArray(existing.context || "[]", factId);
|
||||
this.rawDb.prepare(
|
||||
"UPDATE relations SET weight = MIN(weight + ?, 1.0), last_accessed_at = ?, context = ? WHERE id = ?"
|
||||
).run(this.cfg.hebbianIncrement, Date.now(), ctx, existing.id);
|
||||
} else {
|
||||
const id = `rel_${Date.now()}_${Math.random().toString(36).slice(2, 7)}`;
|
||||
this.rawDb.prepare(
|
||||
"INSERT INTO relations (id, source_id, target_id, relation, weight, context, created_at, last_accessed_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"
|
||||
).run(id, fromId, toId, type, 0.5, JSON.stringify([factId]), Date.now(), Date.now());
|
||||
}
|
||||
}
|
||||
|
||||
private reinforceRelation(entityA: string, entityB: string): void {
|
||||
const rel = this.rawDb.prepare(
|
||||
"SELECT * FROM relations WHERE (source_id = ? AND target_id = ?) OR (source_id = ? AND target_id = ?)"
|
||||
).get(entityA, entityB, entityB, entityA) as any | undefined;
|
||||
|
||||
if (rel) {
|
||||
this.rawDb.prepare(
|
||||
"UPDATE relations SET weight = MIN(weight + ?, 1.0), last_accessed_at = ? WHERE id = ?"
|
||||
).run(this.cfg.hebbianIncrement, Date.now(), rel.id);
|
||||
}
|
||||
}
|
||||
|
||||
private appendToJsonArray(existing: string, newItem: string): string {
|
||||
try {
|
||||
const arr = JSON.parse(existing) as string[];
|
||||
if (!arr.includes(newItem)) arr.push(newItem);
|
||||
return JSON.stringify(arr.slice(-20));
|
||||
} catch (e) {
|
||||
console.debug('memoria:graph: ' + String(e));
|
||||
return JSON.stringify([newItem]);
|
||||
}
|
||||
}
|
||||
|
||||
private normalizeType(type: string): string {
|
||||
const valid = ["person", "project", "tool", "concept", "place", "company"];
|
||||
const lower = (type || "").toLowerCase().trim();
|
||||
return valid.includes(lower) ? lower : "concept";
|
||||
}
|
||||
|
||||
private normalizeRelType(type: string): string {
|
||||
const valid = ["uses", "part_of", "works_on", "manages", "created_by", "depends_on", "deployed_on", "related_to"];
|
||||
const lower = (type || "").toLowerCase().trim();
|
||||
return valid.includes(lower) ? lower : "related_to";
|
||||
}
|
||||
|
||||
private parseJSON(text: string): unknown {
|
||||
let cleaned = text.trim();
|
||||
if (cleaned.startsWith("```")) {
|
||||
const lines = cleaned.split("\n");
|
||||
lines.shift();
|
||||
if (lines[lines.length - 1]?.trim() === "```") lines.pop();
|
||||
cleaned = lines.join("\n").trim();
|
||||
}
|
||||
const match = cleaned.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
|
||||
if (match) cleaned = match[1];
|
||||
return JSON.parse(cleaned);
|
||||
}
|
||||
}
|
||||
159
openclaw-memoria-port/core/hebbian.ts
Normal file
159
openclaw-memoria-port/core/hebbian.ts
Normal file
@@ -0,0 +1,159 @@
|
||||
/**
|
||||
* Hebbian Learning — "Neurons that fire together, wire together"
|
||||
*
|
||||
* Human memory strengthens connections between concepts that frequently co-occur.
|
||||
*
|
||||
* Example:
|
||||
* - "Bureau" + "Convex" appear together 10 times → strong relation
|
||||
* - "Memoria" + "ClawHub" appear together 3 times → weak relation
|
||||
*
|
||||
* Implementation:
|
||||
* - Track entity co-occurrence in facts and graph enrichment
|
||||
* - Boost relation weight when entities co-occur
|
||||
* - Decay weight for unused relations
|
||||
*
|
||||
* DB schema: relations(id, source_id, target_id, relation, weight, context, created_at, last_accessed_at)
|
||||
*/
|
||||
|
||||
import type { MemoriaDB } from "./db.js";
|
||||
|
||||
export const HEBBIAN_CONFIG = {
|
||||
boostAmount: 0.1, // Increase weight by 0.1 on each co-occurrence
|
||||
maxWeight: 2.0, // Cap weight at 2.0 (very strong)
|
||||
decayRate: 0.95, // Multiply weight by 0.95 if not used recently
|
||||
decayThresholdDays: 30, // Decay relations not used in 30 days
|
||||
minWeight: 0.1, // Minimum weight before pruning
|
||||
};
|
||||
|
||||
export interface RelationStats {
|
||||
total: number;
|
||||
strong: number; // weight >= 1.0
|
||||
weak: number; // weight < 0.5
|
||||
decayed: number; // recently decayed
|
||||
}
|
||||
|
||||
export class HebbianManager {
|
||||
private db: MemoriaDB;
|
||||
|
||||
constructor(db: MemoriaDB) {
|
||||
this.db = db;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reinforce relation between two entities (co-occurrence detected)
|
||||
*/
|
||||
reinforceRelation(fromEntity: string, toEntity: string, relationType: string = "co-occurs"): void {
|
||||
const now = Date.now();
|
||||
|
||||
// Check if relation exists (use actual DB column names)
|
||||
const existing = this.db.raw.prepare(
|
||||
"SELECT weight, last_accessed_at FROM relations WHERE source_id = ? AND target_id = ? AND relation = ?"
|
||||
).get(fromEntity, toEntity, relationType) as { weight: number; last_accessed_at: number } | undefined;
|
||||
|
||||
if (existing) {
|
||||
// Boost existing relation (capped at maxWeight)
|
||||
const newWeight = Math.min(existing.weight + HEBBIAN_CONFIG.boostAmount, HEBBIAN_CONFIG.maxWeight);
|
||||
this.db.raw.prepare(
|
||||
"UPDATE relations SET weight = ?, last_accessed_at = ? WHERE source_id = ? AND target_id = ? AND relation = ?"
|
||||
).run(newWeight, now, fromEntity, toEntity, relationType);
|
||||
} else {
|
||||
// Create new relation with initial weight
|
||||
const id = `rel_${now}_${Math.random().toString(36).slice(2, 9)}`;
|
||||
this.db.raw.prepare(
|
||||
"INSERT INTO relations (id, source_id, target_id, relation, weight, context, created_at, last_accessed_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"
|
||||
).run(id, fromEntity, toEntity, relationType, HEBBIAN_CONFIG.boostAmount, null, now, now);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decay relations not used recently
|
||||
*/
|
||||
decayStaleRelations(): { decayed: number; pruned: number } {
|
||||
const now = Date.now();
|
||||
const cutoff = now - HEBBIAN_CONFIG.decayThresholdDays * 24 * 60 * 60 * 1000;
|
||||
|
||||
// Find stale relations
|
||||
const stale = this.db.raw.prepare(
|
||||
"SELECT id, source_id, target_id, relation, weight FROM relations WHERE last_accessed_at < ? AND weight > ?"
|
||||
).all(cutoff, HEBBIAN_CONFIG.minWeight) as Array<{ id: string; source_id: string; target_id: string; relation: string; weight: number }>;
|
||||
|
||||
let decayed = 0;
|
||||
let pruned = 0;
|
||||
|
||||
for (const rel of stale) {
|
||||
const newWeight = rel.weight * HEBBIAN_CONFIG.decayRate;
|
||||
|
||||
if (newWeight < HEBBIAN_CONFIG.minWeight) {
|
||||
// Prune very weak relations
|
||||
this.db.raw.prepare("DELETE FROM relations WHERE id = ?").run(rel.id);
|
||||
pruned++;
|
||||
} else {
|
||||
// Decay weight
|
||||
this.db.raw.prepare(
|
||||
"UPDATE relations SET weight = ?, last_accessed_at = ? WHERE id = ?"
|
||||
).run(newWeight, now, rel.id);
|
||||
decayed++;
|
||||
}
|
||||
}
|
||||
|
||||
return { decayed, pruned };
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect co-occurrences in a fact and reinforce
|
||||
*/
|
||||
reinforceFromFact(factId: string, entities: string[]): void {
|
||||
try {
|
||||
if (entities.length < 2) return;
|
||||
|
||||
// Reinforce all pairs (N×N-1)/2 relations
|
||||
for (let i = 0; i < entities.length; i++) {
|
||||
for (let j = i + 1; j < entities.length; j++) {
|
||||
this.reinforceRelation(entities[i], entities[j], "co-occurs");
|
||||
// Bidirectional
|
||||
this.reinforceRelation(entities[j], entities[i], "co-occurs");
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("[hebbian] reinforceFromFact failed:", err);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get stats on relation strengths
|
||||
*/
|
||||
getStats(): RelationStats {
|
||||
try {
|
||||
const all = this.db.raw.prepare("SELECT weight FROM relations").all() as Array<{ weight: number }>;
|
||||
|
||||
const stats: RelationStats = {
|
||||
total: all.length,
|
||||
strong: 0,
|
||||
weak: 0,
|
||||
decayed: 0,
|
||||
};
|
||||
|
||||
for (const rel of all) {
|
||||
if (rel.weight >= 1.0) stats.strong++;
|
||||
else if (rel.weight < 0.5) stats.weak++;
|
||||
}
|
||||
|
||||
return stats;
|
||||
} catch (err) {
|
||||
console.error("[hebbian] getStats failed:", err);
|
||||
return { total: 0, strong: 0, weak: 0, decayed: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get strongest relations for an entity (for contextual recall)
|
||||
*/
|
||||
getStrongestRelations(entity: string, limit = 5): Array<{ target_id: string; weight: number; relation: string }> {
|
||||
return this.db.raw.prepare(
|
||||
`SELECT target_id, weight, relation FROM relations
|
||||
WHERE source_id = ?
|
||||
ORDER BY weight DESC
|
||||
LIMIT ?`
|
||||
).all(entity, limit) as Array<{ target_id: string; weight: number; relation: string }>;
|
||||
}
|
||||
}
|
||||
213
openclaw-memoria-port/core/identity-parser.ts
Normal file
213
openclaw-memoria-port/core/identity-parser.ts
Normal file
@@ -0,0 +1,213 @@
|
||||
/**
|
||||
* Identity Parser — Extract structured identity data from workspace .md files
|
||||
*
|
||||
* Reads USER.md, COMPANY.md, projects/objectifs.md to build a semantic profile:
|
||||
* - Who is the human (name, role, location, timezone)
|
||||
* - What projects are active (daily, paused, backlog)
|
||||
* - What matters most (priorities, preferences, values)
|
||||
*
|
||||
* Used to calculate relevance_weight for facts during extraction.
|
||||
*/
|
||||
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
|
||||
export interface Identity {
|
||||
human: {
|
||||
name: string;
|
||||
role: string;
|
||||
location: string;
|
||||
timezone: string;
|
||||
};
|
||||
priorities: {
|
||||
business: number;
|
||||
product: number;
|
||||
internal_tools: number;
|
||||
infrastructure: number;
|
||||
};
|
||||
activeProjects: Record<string, {
|
||||
weight: number;
|
||||
status: "daily" | "active" | "paused" | "backlog";
|
||||
type: "product" | "revenue" | "internal_tool" | "infrastructure";
|
||||
}>;
|
||||
preferences: string[];
|
||||
triggers: {
|
||||
frustration: string[];
|
||||
satisfaction: string[];
|
||||
urgency: string[];
|
||||
};
|
||||
}
|
||||
|
||||
export class IdentityParser {
|
||||
private workspaceRoot: string;
|
||||
private cachedIdentity: Identity | null = null;
|
||||
|
||||
constructor(workspaceRoot: string) {
|
||||
this.workspaceRoot = workspaceRoot;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse identity from .md files (cached, rebuild on demand)
|
||||
*/
|
||||
parse(): Identity {
|
||||
if (this.cachedIdentity) return this.cachedIdentity;
|
||||
|
||||
const userMd = this.readFile("USER.md");
|
||||
const companyMd = this.readFile("COMPANY.md");
|
||||
const objectivesMd = this.readFile("projects/objectifs.md");
|
||||
|
||||
// Extract human info from USER.md
|
||||
const human = this.extractHuman(userMd);
|
||||
|
||||
// Extract priorities (business > product > tools > infra)
|
||||
const priorities = {
|
||||
business: 1.0,
|
||||
product: 0.9,
|
||||
internal_tools: 0.6,
|
||||
infrastructure: 0.4,
|
||||
};
|
||||
|
||||
// Extract active projects from COMPANY.md + objectifs.md
|
||||
const activeProjects = this.extractProjects(companyMd, objectivesMd);
|
||||
|
||||
// Extract preferences from USER.md
|
||||
const preferences = this.extractPreferences(userMd);
|
||||
|
||||
// Extract trigger words from USER.md
|
||||
const triggers = this.extractTriggers(userMd);
|
||||
|
||||
this.cachedIdentity = {
|
||||
human,
|
||||
priorities,
|
||||
activeProjects,
|
||||
preferences,
|
||||
triggers,
|
||||
};
|
||||
|
||||
return this.cachedIdentity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate relevance weight for a fact (0.0 - 1.0)
|
||||
* Higher weight = more important to recall
|
||||
*/
|
||||
calculateRelevance(fact: string, category: string): number {
|
||||
const identity = this.parse();
|
||||
let weight = 0.5; // default
|
||||
|
||||
// Boost if fact mentions the human by name
|
||||
if (fact.toLowerCase().includes(identity.human.name.toLowerCase())) {
|
||||
weight += 0.2;
|
||||
}
|
||||
|
||||
// Boost based on project mentions
|
||||
for (const [projectName, project] of Object.entries(identity.activeProjects)) {
|
||||
const nameNormalized = projectName.toLowerCase();
|
||||
if (fact.toLowerCase().includes(nameNormalized)) {
|
||||
weight += project.weight * 0.4; // Scale by project importance
|
||||
break; // Only count first match
|
||||
}
|
||||
}
|
||||
|
||||
// Boost based on category importance
|
||||
if (category === "preference" || category === "preference_travail" || category === "preference_communication") {
|
||||
weight += 0.3; // Preferences are high priority
|
||||
} else if (category === "erreur" || category === "erreur_critique") {
|
||||
weight += 0.2; // Errors matter
|
||||
} else if (category === "objectif" || category.startsWith("objectif_")) {
|
||||
weight += 0.25; // Objectives matter
|
||||
}
|
||||
|
||||
// Boost if fact is about business/product (not internal tooling)
|
||||
const lowerFact = fact.toLowerCase();
|
||||
if (lowerFact.includes("client") || lowerFact.includes("ca ") || lowerFact.includes("chiffre") || lowerFact.includes("facture")) {
|
||||
weight += identity.priorities.business * 0.3;
|
||||
} else if (lowerFact.includes("memoria") || lowerFact.includes("plugin") || lowerFact.includes("openclaw")) {
|
||||
weight += identity.priorities.internal_tools * 0.2; // Lower priority
|
||||
}
|
||||
|
||||
// Cap at 1.0
|
||||
return Math.min(weight, 1.0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Invalidate cache (call when .md files change)
|
||||
*/
|
||||
invalidateCache(): void {
|
||||
this.cachedIdentity = null;
|
||||
}
|
||||
|
||||
// ─── Private Helpers ───
|
||||
|
||||
private readFile(relativePath: string): string {
|
||||
try {
|
||||
const fullPath = path.join(this.workspaceRoot, relativePath);
|
||||
return fs.readFileSync(fullPath, "utf-8");
|
||||
} catch (_e) {
|
||||
return ""; // File doesn't exist or unreadable
|
||||
}
|
||||
}
|
||||
|
||||
private extractHuman(userMd: string): Identity["human"] {
|
||||
// Parse USER.md for:
|
||||
// - **Name:** Neto Pompeu
|
||||
// - **Timezone:** America/Cayenne
|
||||
const nameMatch = userMd.match(/\*\*Name:\*\*\s*(.+)/i);
|
||||
const timezoneMatch = userMd.match(/\*\*Timezone:\*\*\s*(.+)/i);
|
||||
const locationMatch = userMd.match(/\*\*Notes:\*\*\s*(.+Guyane française)/i);
|
||||
|
||||
return {
|
||||
name: nameMatch?.[1]?.trim() || "Neto",
|
||||
role: "Dirigeant Primo Studio",
|
||||
location: locationMatch ? "Guyane française" : "Unknown",
|
||||
timezone: timezoneMatch?.[1]?.trim() || "America/Cayenne",
|
||||
};
|
||||
}
|
||||
|
||||
private extractProjects(companyMd: string, objectivesMd: string): Identity["activeProjects"] {
|
||||
// Hardcoded for now — can be improved with regex parsing later
|
||||
// Based on USER.md "Façon de penser & Motivations"
|
||||
return {
|
||||
"Bureau": { weight: 0.9, status: "daily", type: "product" },
|
||||
"Polymarket": { weight: 0.8, status: "daily", type: "revenue" },
|
||||
"Primask": { weight: 0.6, status: "paused", type: "product" },
|
||||
"DockGroups": { weight: 0.3, status: "backlog", type: "product" },
|
||||
"Memoria": { weight: 0.5, status: "active", type: "internal_tool" },
|
||||
"Transport Rino": { weight: 0.4, status: "paused", type: "product" },
|
||||
};
|
||||
}
|
||||
|
||||
private extractPreferences(userMd: string): string[] {
|
||||
// Extract from "## Personnalité & Communication" section
|
||||
// Look for bullet points and list items
|
||||
const preferences: string[] = [];
|
||||
|
||||
const lines = userMd.split("\n");
|
||||
let inPreferences = false;
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.includes("Personnalité & Communication")) {
|
||||
inPreferences = true;
|
||||
continue;
|
||||
}
|
||||
if (inPreferences && line.startsWith("##")) {
|
||||
break; // End of section
|
||||
}
|
||||
if (inPreferences && (line.trim().startsWith("-") || line.trim().startsWith("*"))) {
|
||||
const pref = line.replace(/^[\s\-\*]+/, "").trim();
|
||||
if (pref.length > 10) preferences.push(pref);
|
||||
}
|
||||
}
|
||||
|
||||
return preferences;
|
||||
}
|
||||
|
||||
private extractTriggers(userMd: string): Identity["triggers"] {
|
||||
// Extract trigger words from USER.md
|
||||
return {
|
||||
frustration: ["putain", "ça marche pas", "encore", "crash", "bug"],
|
||||
satisfaction: ["nickel", "parfait", "super", "excellent", "top"],
|
||||
urgency: ["urgent", "maintenant", "vite", "asap", "rapidement"],
|
||||
};
|
||||
}
|
||||
}
|
||||
435
openclaw-memoria-port/core/index.ts
Normal file
435
openclaw-memoria-port/core/index.ts
Normal file
@@ -0,0 +1,435 @@
|
||||
/**
|
||||
* @primo-studio/memoria-core — Standalone multi-layer cognitive memory engine
|
||||
*
|
||||
* Public API for embedding Memoria into any JavaScript/TypeScript application.
|
||||
* Zero dependency on OpenClaw — works standalone or integrated.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* import { Memoria } from '@primo-studio/memoria-core';
|
||||
*
|
||||
* const memoria = await Memoria.init({
|
||||
* dbPath: './my-app.db',
|
||||
* provider: 'ollama',
|
||||
* model: 'qwen3.5:4b',
|
||||
* embeddingModel: 'nomic-embed-text-v2-moe'
|
||||
* });
|
||||
*
|
||||
* await memoria.store('User prefers dark mode', 'preference', 0.95);
|
||||
* const results = await memoria.recall('What theme does the user like?');
|
||||
* console.log(results.facts);
|
||||
*
|
||||
* memoria.close();
|
||||
* ```
|
||||
*/
|
||||
|
||||
import fs from "fs";
|
||||
import { MemoriaDB } from "./db.js";
|
||||
import { SelectiveMemory } from "./selective.js";
|
||||
import { EmbeddingManager } from "./embeddings.js";
|
||||
import { KnowledgeGraph } from "./graph.js";
|
||||
import { ContextTreeBuilder } from "./context-tree.js";
|
||||
import { AdaptiveBudget } from "./budget.js";
|
||||
import { MdSync } from "./sync.js";
|
||||
import { MdRegenManager } from "./md-regen.js";
|
||||
import { FallbackChain } from "./fallback.js";
|
||||
import type { FallbackProviderConfig } from "./fallback.js";
|
||||
import { TopicManager } from "./topics.js";
|
||||
import { lmStudioEmbed, openaiEmbed } from "./providers/openai-compat.js";
|
||||
import type { EmbedProvider, LLMProvider } from "./providers/types.js";
|
||||
import { EmbedFallback } from "./embed-fallback.js";
|
||||
import { ObservationManager } from "./observations.js";
|
||||
import { FactClusterManager } from "./fact-clusters.js";
|
||||
import { FeedbackManager } from "./feedback.js";
|
||||
import { IdentityParser } from "./identity-parser.js";
|
||||
import { LifecycleManager } from "./lifecycle.js";
|
||||
import { RevisionManager } from "./revision.js";
|
||||
import { HebbianManager } from "./hebbian.js";
|
||||
import { ExpertiseManager } from "./expertise.js";
|
||||
import { ProceduralMemory } from "./procedural.js";
|
||||
import { PatternManager } from "./patterns.js";
|
||||
import { formatRecallContext } from "./format.js";
|
||||
import { normalizeCategory } from "./extraction.js";
|
||||
|
||||
// ─── Public API Types ───
|
||||
|
||||
export interface MemoriaInitOptions {
|
||||
/** Path to SQLite database file (will be created if not exists) */
|
||||
dbPath: string;
|
||||
|
||||
/** Optional workspace path for markdown sync (.md files) */
|
||||
workspacePath?: string;
|
||||
|
||||
/** LLM provider: 'ollama' | 'openai' | 'anthropic' | 'lmstudio' */
|
||||
provider?: string;
|
||||
|
||||
/** LLM model name (e.g., 'qwen3.5:4b', 'gpt-5.4-nano') */
|
||||
model?: string;
|
||||
|
||||
/** Embedding model name (e.g., 'nomic-embed-text-v2-moe', 'text-embedding-3-small') */
|
||||
embeddingModel?: string;
|
||||
|
||||
/** Embedding dimensions (default: 768) */
|
||||
embeddingDimensions?: number;
|
||||
|
||||
/** Base URL for provider (e.g., 'http://localhost:11434') */
|
||||
baseUrl?: string;
|
||||
|
||||
/** API key for cloud providers (OpenAI, Anthropic) */
|
||||
apiKey?: string;
|
||||
|
||||
/** Language for prompts: 'fr' | 'en' (default: 'en') */
|
||||
language?: string;
|
||||
|
||||
/** Fallback chain configuration (optional) */
|
||||
fallback?: Array<{
|
||||
type: string;
|
||||
model?: string;
|
||||
baseUrl?: string;
|
||||
apiKey?: string;
|
||||
}>;
|
||||
|
||||
/** Recall limit (max facts to return) */
|
||||
recallLimit?: number;
|
||||
|
||||
/** Enable debug logging */
|
||||
debug?: boolean;
|
||||
}
|
||||
|
||||
export interface StoreResult {
|
||||
factId: number;
|
||||
stored: boolean;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
export interface RecallResult {
|
||||
facts: Array<{
|
||||
id: number;
|
||||
fact: string;
|
||||
category: string;
|
||||
confidence: number;
|
||||
score: number;
|
||||
created_at: string;
|
||||
}>;
|
||||
totalFound: number;
|
||||
}
|
||||
|
||||
export interface RecallOptions {
|
||||
limit?: number;
|
||||
minConfidence?: number;
|
||||
categories?: string[];
|
||||
}
|
||||
|
||||
export interface MemoriaStats {
|
||||
totalFacts: number;
|
||||
totalEmbeddings: number;
|
||||
totalRelations: number;
|
||||
totalTopics: number;
|
||||
totalPatterns: number;
|
||||
totalObservations: number;
|
||||
lifecycleDistribution: Record<string, number>;
|
||||
categoryCounts: Record<string, number>;
|
||||
}
|
||||
|
||||
// ─── Main Memoria Class ───
|
||||
|
||||
export class Memoria {
|
||||
db: MemoriaDB;
|
||||
selective: SelectiveMemory;
|
||||
embeddings: EmbeddingManager;
|
||||
graph: KnowledgeGraph;
|
||||
topics: TopicManager;
|
||||
procedural: ProceduralMemory;
|
||||
patterns: PatternManager;
|
||||
observations: ObservationManager;
|
||||
feedback: FeedbackManager;
|
||||
lifecycle: LifecycleManager;
|
||||
revision: RevisionManager;
|
||||
hebbian: HebbianManager;
|
||||
expertise: ExpertiseManager;
|
||||
factClusters: FactClusterManager;
|
||||
contextTree: ContextTreeBuilder;
|
||||
budget: AdaptiveBudget;
|
||||
mdSync?: MdSync;
|
||||
mdRegen?: MdRegenManager;
|
||||
|
||||
private llm: LLMProvider;
|
||||
private embedder: EmbedProvider;
|
||||
private recallLimit: number;
|
||||
private logger: { info?: (msg: string) => void; warn?: (msg: string) => void; debug?: (msg: string) => void };
|
||||
|
||||
private constructor(
|
||||
db: MemoriaDB,
|
||||
llm: LLMProvider,
|
||||
embedder: EmbedProvider,
|
||||
options: MemoriaInitOptions,
|
||||
logger: { info?: (msg: string) => void; warn?: (msg: string) => void; debug?: (msg: string) => void }
|
||||
) {
|
||||
this.db = db;
|
||||
this.llm = llm;
|
||||
this.embedder = embedder;
|
||||
this.recallLimit = options.recallLimit || 8;
|
||||
this.logger = logger;
|
||||
|
||||
// Initialize all managers
|
||||
this.embeddings = new EmbeddingManager(db, embedder);
|
||||
this.selective = new SelectiveMemory(db, llm, {
|
||||
dupThreshold: 0.85,
|
||||
contradictionCheck: true,
|
||||
enrichEnabled: true,
|
||||
}, this.embeddings);
|
||||
|
||||
this.graph = new KnowledgeGraph(db, llm);
|
||||
this.topics = new TopicManager(db, llm, embedder, {
|
||||
emergenceThreshold: 3,
|
||||
mergeOverlap: 0.7,
|
||||
subtopicThreshold: 5,
|
||||
});
|
||||
this.procedural = new ProceduralMemory(db, llm);
|
||||
this.patterns = new PatternManager(db, llm);
|
||||
this.observations = new ObservationManager(db);
|
||||
this.feedback = new FeedbackManager(db);
|
||||
this.lifecycle = new LifecycleManager(db);
|
||||
this.revision = new RevisionManager(db, llm);
|
||||
this.hebbian = new HebbianManager(db);
|
||||
this.expertise = new ExpertiseManager(db);
|
||||
this.factClusters = new FactClusterManager(db);
|
||||
this.contextTree = new ContextTreeBuilder(db);
|
||||
this.budget = new AdaptiveBudget(db);
|
||||
|
||||
if (options.workspacePath && fs.existsSync(options.workspacePath)) {
|
||||
this.mdSync = new MdSync(options.workspacePath, db);
|
||||
this.mdRegen = new MdRegenManager(options.workspacePath, db);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize a new Memoria instance
|
||||
*/
|
||||
static async init(options: MemoriaInitOptions): Promise<Memoria> {
|
||||
const logger = {
|
||||
info: options.debug ? (msg: string) => console.log(`[memoria] ${msg}`) : undefined,
|
||||
warn: (msg: string) => console.warn(`[memoria] ${msg}`),
|
||||
debug: options.debug ? (msg: string) => console.debug(`[memoria] ${msg}`) : undefined,
|
||||
};
|
||||
|
||||
// Create database
|
||||
const db = new MemoriaDB(options.dbPath);
|
||||
|
||||
// Build fallback chain
|
||||
const provider = options.provider || 'ollama';
|
||||
const model = options.model || 'gemma3:4b';
|
||||
const baseUrl = options.baseUrl || (provider === 'ollama' ? 'http://localhost:11434' : undefined);
|
||||
|
||||
const fallbackProviders: FallbackProviderConfig[] = options.fallback?.map(f => ({
|
||||
name: `${f.type}:${f.model || 'auto'}`,
|
||||
type: f.type as 'ollama' | 'openai' | 'lmstudio' | 'anthropic',
|
||||
model: f.model,
|
||||
baseUrl: f.baseUrl,
|
||||
apiKey: f.apiKey,
|
||||
})) || [
|
||||
{
|
||||
name: `${provider}:${model}`,
|
||||
type: provider as 'ollama' | 'openai' | 'lmstudio' | 'anthropic',
|
||||
model,
|
||||
baseUrl,
|
||||
apiKey: options.apiKey,
|
||||
timeoutMs: 12000,
|
||||
},
|
||||
];
|
||||
|
||||
const llm = new FallbackChain({ providers: fallbackProviders }, logger);
|
||||
|
||||
// Build embedding fallback
|
||||
const embedModel = options.embeddingModel || 'nomic-embed-text-v2-moe';
|
||||
const embedDimensions = options.embeddingDimensions || 768;
|
||||
|
||||
const embedProviders: EmbedProvider[] = [];
|
||||
|
||||
if (provider === 'ollama' || !provider) {
|
||||
try {
|
||||
const { ollamaEmbed } = await import('./providers/ollama.js');
|
||||
embedProviders.push(ollamaEmbed(embedModel, embedDimensions, baseUrl || 'http://localhost:11434'));
|
||||
} catch (e) {
|
||||
logger.debug?.(`Failed to load Ollama embed: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (provider === 'lmstudio') {
|
||||
embedProviders.push(lmStudioEmbed(embedModel, embedDimensions));
|
||||
}
|
||||
|
||||
if (provider === 'openai' && options.apiKey) {
|
||||
embedProviders.push(openaiEmbed(embedModel, options.apiKey, embedDimensions));
|
||||
}
|
||||
|
||||
const embedder = embedProviders.length > 1
|
||||
? new EmbedFallback(embedProviders, logger)
|
||||
: embedProviders[0];
|
||||
|
||||
if (!embedder) {
|
||||
throw new Error('No embedding provider available');
|
||||
}
|
||||
|
||||
logger.info?.(`Memoria initialized: provider=${provider}, model=${model}, embed=${embedModel}`);
|
||||
|
||||
return new Memoria(db, llm, embedder, options, logger);
|
||||
}
|
||||
|
||||
/**
|
||||
* Store a new fact in memory
|
||||
*/
|
||||
async store(fact: string, category?: string, confidence?: number): Promise<StoreResult> {
|
||||
const normalizedCategory = normalizeCategory(category || 'savoir');
|
||||
const finalConfidence = confidence ?? 0.8;
|
||||
|
||||
try {
|
||||
// Check duplicates/contradictions
|
||||
const result = await this.selective.storeFact(fact, normalizedCategory, finalConfidence);
|
||||
|
||||
if (!result.stored) {
|
||||
return { factId: -1, stored: false, reason: result.reason };
|
||||
}
|
||||
|
||||
// Embed
|
||||
try {
|
||||
await this.embeddings.embedFact(result.factId);
|
||||
} catch (e) {
|
||||
this.logger.warn?.(`Embedding failed for fact ${result.factId}: ${e}`);
|
||||
}
|
||||
|
||||
// Extract entities/relations
|
||||
try {
|
||||
await this.graph.extractFromFact(result.factId);
|
||||
} catch (e) {
|
||||
this.logger.debug?.(`Graph extraction failed: ${e}`);
|
||||
}
|
||||
|
||||
return { factId: result.factId, stored: true };
|
||||
} catch (e) {
|
||||
this.logger.warn?.(`Failed to store fact: ${e}`);
|
||||
return { factId: -1, stored: false, reason: String(e) };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recall facts based on a query
|
||||
*/
|
||||
async recall(query: string, options?: RecallOptions): Promise<RecallResult> {
|
||||
const limit = options?.limit || this.recallLimit;
|
||||
const minConfidence = options?.minConfidence || 0.3;
|
||||
const categories = options?.categories;
|
||||
|
||||
try {
|
||||
const results = await this.selective.recall(query, limit, minConfidence, categories);
|
||||
|
||||
return {
|
||||
facts: results.map(r => ({
|
||||
id: r.id,
|
||||
fact: r.fact,
|
||||
category: r.category,
|
||||
confidence: r.confidence,
|
||||
score: r.score,
|
||||
created_at: r.created_at,
|
||||
})),
|
||||
totalFound: results.length,
|
||||
};
|
||||
} catch (e) {
|
||||
this.logger.warn?.(`Recall failed: ${e}`);
|
||||
return { facts: [], totalFound: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Query memory with natural language (future: dialectic reasoning)
|
||||
*/
|
||||
async query(naturalLanguageQuestion: string): Promise<string> {
|
||||
// TODO: implement dialectic reasoning layer
|
||||
const results = await this.recall(naturalLanguageQuestion, { limit: 5 });
|
||||
|
||||
if (results.facts.length === 0) {
|
||||
return "No relevant information found in memory.";
|
||||
}
|
||||
|
||||
return formatRecallContext(results.facts.map(f => ({
|
||||
id: f.id,
|
||||
fact: f.fact,
|
||||
category: f.category,
|
||||
confidence: f.confidence,
|
||||
score: f.score,
|
||||
created_at: f.created_at,
|
||||
})));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get memory statistics
|
||||
*/
|
||||
async stats(): Promise<MemoriaStats> {
|
||||
const totalFacts = this.db.db.prepare('SELECT COUNT(*) as count FROM facts').get() as { count: number };
|
||||
const totalEmbeddings = this.db.db.prepare('SELECT COUNT(*) as count FROM embeddings').get() as { count: number };
|
||||
const totalRelations = this.db.db.prepare('SELECT COUNT(*) as count FROM relations').get() as { count: number };
|
||||
const totalTopics = this.db.db.prepare('SELECT COUNT(*) as count FROM topics').get() as { count: number };
|
||||
const totalPatterns = this.db.db.prepare('SELECT COUNT(*) as count FROM facts WHERE fact_type = "pattern"').get() as { count: number };
|
||||
const totalObservations = this.db.db.prepare('SELECT COUNT(*) as count FROM observations').get() as { count: number };
|
||||
|
||||
const lifecycleRows = this.db.db.prepare('SELECT lifecycle_state, COUNT(*) as count FROM facts GROUP BY lifecycle_state').all() as Array<{ lifecycle_state: string; count: number }>;
|
||||
const lifecycleDistribution: Record<string, number> = {};
|
||||
for (const row of lifecycleRows) {
|
||||
lifecycleDistribution[row.lifecycle_state || 'unknown'] = row.count;
|
||||
}
|
||||
|
||||
const categoryRows = this.db.db.prepare('SELECT category, COUNT(*) as count FROM facts GROUP BY category').all() as Array<{ category: string; count: number }>;
|
||||
const categoryCounts: Record<string, number> = {};
|
||||
for (const row of categoryRows) {
|
||||
categoryCounts[row.category] = row.count;
|
||||
}
|
||||
|
||||
return {
|
||||
totalFacts: totalFacts.count,
|
||||
totalEmbeddings: totalEmbeddings.count,
|
||||
totalRelations: totalRelations.count,
|
||||
totalTopics: totalTopics.count,
|
||||
totalPatterns: totalPatterns.count,
|
||||
totalObservations: totalObservations.count,
|
||||
lifecycleDistribution,
|
||||
categoryCounts,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Close database connection
|
||||
*/
|
||||
close(): void {
|
||||
this.db.db.close();
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Re-export everything for advanced usage ───
|
||||
|
||||
export { MemoriaDB } from "./db.js";
|
||||
export { SelectiveMemory } from "./selective.js";
|
||||
export { EmbeddingManager } from "./embeddings.js";
|
||||
export { KnowledgeGraph } from "./graph.js";
|
||||
export { TopicManager } from "./topics.js";
|
||||
export { ProceduralMemory } from "./procedural.js";
|
||||
export { PatternManager } from "./patterns.js";
|
||||
export { ObservationManager } from "./observations.js";
|
||||
export { FeedbackManager } from "./feedback.js";
|
||||
export { LifecycleManager } from "./lifecycle.js";
|
||||
export { RevisionManager } from "./revision.js";
|
||||
export { HebbianManager } from "./hebbian.js";
|
||||
export { ExpertiseManager } from "./expertise.js";
|
||||
export { FactClusterManager } from "./fact-clusters.ts";
|
||||
export { ContextTreeBuilder } from "./context-tree.js";
|
||||
export { AdaptiveBudget } from "./budget.js";
|
||||
export { FallbackChain } from "./fallback.js";
|
||||
export { EmbedFallback } from "./embed-fallback.js";
|
||||
export { MdSync } from "./sync.js";
|
||||
export { MdRegenManager } from "./md-regen.js";
|
||||
export { IdentityParser } from "./identity-parser.js";
|
||||
|
||||
export type { EmbedProvider, LLMProvider } from "./providers/types.js";
|
||||
export type { FallbackProviderConfig } from "./fallback.js";
|
||||
export { ollamaEmbed } from "./providers/ollama.js";
|
||||
export { lmStudioEmbed, openaiEmbed } from "./providers/openai-compat.js";
|
||||
export { anthropicEmbed } from "./providers/anthropic.js";
|
||||
225
openclaw-memoria-port/core/lifecycle.ts
Normal file
225
openclaw-memoria-port/core/lifecycle.ts
Normal file
@@ -0,0 +1,225 @@
|
||||
/**
|
||||
* Lifecycle Manager — Human-like memory prioritization
|
||||
*
|
||||
* Philosophy: NEVER delete, NEVER forget. Everything stays in the DB forever.
|
||||
* Lifecycle controls RECALL PRIORITY, not existence.
|
||||
*
|
||||
* States:
|
||||
* fresh → new facts (< freshDays), high recall priority
|
||||
* settled → established facts (accessed 3+ times OR aged past freshDays), normal priority
|
||||
* dormant → not accessed in 60+ days, low auto-recall priority but ALWAYS searchable on demand
|
||||
*
|
||||
* The "detail cursor" (1-10) lets the user control how much dormant context
|
||||
* gets included in automatic recall:
|
||||
* cursor 1 → only fresh + top settled
|
||||
* cursor 5 → fresh + settled (default)
|
||||
* cursor 10 → fresh + settled + dormant (everything)
|
||||
*
|
||||
* When the user explicitly asks about a past event ("what did I do on March 15?"),
|
||||
* ALL states are searched regardless of cursor — like asking your secretary to check the calendar.
|
||||
*
|
||||
* Key insight from Neto: "I still remember learning to ride a bike at age 7.
|
||||
* You don't forget — you just don't think about it every day."
|
||||
*/
|
||||
|
||||
import type { MemoriaDB, Fact } from "./db.js";
|
||||
|
||||
export interface LifecycleConfig {
|
||||
/** Days a fact stays "fresh" (high priority). Default: 15 */
|
||||
freshDays: number;
|
||||
/** Min access_count to become "settled" before freshDays. Default: 3 */
|
||||
settledMinAccess: number;
|
||||
/** Days without access before becoming "dormant". Default: 60 */
|
||||
dormantAfterDays: number;
|
||||
/** Detail cursor (1-10). Controls how much dormant context is auto-recalled. Default: 5 */
|
||||
detailCursor: number;
|
||||
/** After N recalls of a settled fact, consider proactive revision. Default: 10 */
|
||||
revisionRecallThreshold: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_LIFECYCLE_CONFIG: LifecycleConfig = {
|
||||
freshDays: 15,
|
||||
settledMinAccess: 3,
|
||||
dormantAfterDays: 60,
|
||||
detailCursor: 5,
|
||||
revisionRecallThreshold: 10,
|
||||
};
|
||||
|
||||
export type LifecycleState = "fresh" | "settled" | "dormant";
|
||||
|
||||
// Backward compat: map old states to new
|
||||
function normalizeState(state: string | null | undefined): LifecycleState {
|
||||
if (!state || state === "fresh") return "fresh";
|
||||
if (state === "mature" || state === "settled") return "settled";
|
||||
if (state === "aged" || state === "archived" || state === "dormant") return "dormant";
|
||||
return "fresh";
|
||||
}
|
||||
|
||||
export class LifecycleManager {
|
||||
private db: MemoriaDB;
|
||||
private cfg: LifecycleConfig;
|
||||
|
||||
constructor(db: MemoriaDB, config?: Partial<LifecycleConfig>) {
|
||||
this.db = db;
|
||||
this.cfg = { ...DEFAULT_LIFECYCLE_CONFIG, ...config };
|
||||
}
|
||||
|
||||
/**
|
||||
* Update lifecycle state for a single fact.
|
||||
* Uses access_count as the real signal (not recall_count which was broken).
|
||||
*/
|
||||
updateLifecycle(fact: Fact, now = Date.now()): LifecycleState {
|
||||
const ageDays = (now - fact.created_at) / (1000 * 60 * 60 * 24);
|
||||
const accessCount = fact.access_count ?? 0;
|
||||
const lastAccessed = fact.last_accessed_at ?? fact.created_at;
|
||||
const daysSinceAccess = (now - lastAccessed) / (1000 * 60 * 60 * 24);
|
||||
|
||||
let newState: LifecycleState;
|
||||
|
||||
// Dormant: not accessed in dormantAfterDays AND past fresh period
|
||||
if (
|
||||
ageDays > this.cfg.freshDays &&
|
||||
daysSinceAccess > this.cfg.dormantAfterDays
|
||||
) {
|
||||
newState = "dormant";
|
||||
}
|
||||
// Settled: either accessed enough times, or past fresh period with some access
|
||||
else if (
|
||||
accessCount >= this.cfg.settledMinAccess ||
|
||||
(ageDays > this.cfg.freshDays && accessCount > 0)
|
||||
) {
|
||||
newState = "settled";
|
||||
}
|
||||
// Fresh: still new
|
||||
else if (ageDays <= this.cfg.freshDays) {
|
||||
newState = "fresh";
|
||||
}
|
||||
// Past fresh period, never accessed → still settled (not dormant yet, needs dormantAfterDays)
|
||||
else {
|
||||
newState = "settled";
|
||||
}
|
||||
|
||||
// Normalize old state for comparison
|
||||
const currentState = normalizeState(fact.lifecycle_state);
|
||||
|
||||
// A dormant fact that gets accessed again → back to settled (wake up)
|
||||
if (currentState === "dormant" && accessCount > 0 && daysSinceAccess < this.cfg.dormantAfterDays) {
|
||||
newState = "settled";
|
||||
}
|
||||
|
||||
// Only update DB if state changed
|
||||
if (newState !== currentState) {
|
||||
this.db.raw.prepare("UPDATE facts SET lifecycle_state = ? WHERE id = ?").run(newState, fact.id);
|
||||
}
|
||||
|
||||
return newState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch update: refresh all active facts' lifecycle states
|
||||
*/
|
||||
refreshAll(): { updated: number; breakdown: Record<LifecycleState, number> } {
|
||||
try {
|
||||
const facts = this.db.raw.prepare("SELECT * FROM facts WHERE superseded = 0").all() as Fact[];
|
||||
const now = Date.now();
|
||||
let updated = 0;
|
||||
|
||||
const breakdown: Record<LifecycleState, number> = {
|
||||
fresh: 0,
|
||||
settled: 0,
|
||||
dormant: 0,
|
||||
};
|
||||
|
||||
for (const fact of facts) {
|
||||
const oldState = normalizeState(fact.lifecycle_state);
|
||||
const newState = this.updateLifecycle(fact, now);
|
||||
if (oldState !== newState) updated++;
|
||||
breakdown[newState]++;
|
||||
}
|
||||
|
||||
return { updated, breakdown };
|
||||
} catch (err) {
|
||||
console.error("[lifecycle] refreshAll failed:", err);
|
||||
return { updated: 0, breakdown: { fresh: 0, settled: 0, dormant: 0 } };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the recall score multiplier based on lifecycle state and detail cursor.
|
||||
*
|
||||
* fresh → 1.0 (always full priority)
|
||||
* settled → 0.8 (slight reduction)
|
||||
* dormant → scales with cursor:
|
||||
* cursor 1 → 0.05 (almost invisible in auto-recall)
|
||||
* cursor 5 → 0.25 (occasionally surfaces)
|
||||
* cursor 10 → 0.7 (nearly full priority)
|
||||
*/
|
||||
getRecallMultiplier(state: string | null | undefined): number {
|
||||
const s = normalizeState(state);
|
||||
if (s === "fresh") return 1.0;
|
||||
if (s === "settled") return 0.85;
|
||||
// dormant: scale with cursor (1-10)
|
||||
const cursor = Math.max(1, Math.min(10, this.cfg.detailCursor));
|
||||
return 0.05 + (cursor - 1) * 0.072; // cursor 1→0.05, 5→0.34, 10→0.70
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a settled fact needs proactive revision
|
||||
*/
|
||||
needsRevision(fact: Fact): boolean {
|
||||
const state = normalizeState(fact.lifecycle_state);
|
||||
if (state !== "settled") return false;
|
||||
const accessCount = fact.access_count ?? 0;
|
||||
return accessCount >= this.cfg.revisionRecallThreshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all facts needing revision
|
||||
*/
|
||||
getFactsNeedingRevision(): Fact[] {
|
||||
const facts = this.db.raw.prepare(
|
||||
`SELECT * FROM facts
|
||||
WHERE superseded = 0
|
||||
AND lifecycle_state = 'settled'
|
||||
AND access_count >= ?
|
||||
ORDER BY access_count DESC
|
||||
LIMIT 5`
|
||||
).all(this.cfg.revisionRecallThreshold) as Fact[];
|
||||
return facts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get stats breakdown by lifecycle state
|
||||
*/
|
||||
getStats(): Record<LifecycleState, number> {
|
||||
try {
|
||||
const rows = this.db.raw.prepare(
|
||||
`SELECT lifecycle_state, COUNT(*) as count
|
||||
FROM facts
|
||||
WHERE superseded = 0
|
||||
GROUP BY lifecycle_state`
|
||||
).all() as Array<{ lifecycle_state: string; count: number }>;
|
||||
|
||||
const stats: Record<LifecycleState, number> = {
|
||||
fresh: 0,
|
||||
settled: 0,
|
||||
dormant: 0,
|
||||
};
|
||||
|
||||
for (const row of rows) {
|
||||
const normalized = normalizeState(row.lifecycle_state);
|
||||
stats[normalized] += row.count;
|
||||
}
|
||||
|
||||
return stats;
|
||||
} catch (err) {
|
||||
console.error("[lifecycle] getStats failed:", err);
|
||||
return { fresh: 0, settled: 0, dormant: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
/** Get current detail cursor value */
|
||||
get detailCursor(): number {
|
||||
return this.cfg.detailCursor;
|
||||
}
|
||||
}
|
||||
348
openclaw-memoria-port/core/md-regen.ts
Normal file
348
openclaw-memoria-port/core/md-regen.ts
Normal file
@@ -0,0 +1,348 @@
|
||||
/**
|
||||
* Memoria — Layer 11b: .md Auto-Regeneration
|
||||
*
|
||||
* Problem: .md files grow indefinitely → context bloat at OpenClaw boot
|
||||
* Solution: Keep them "living" with bounded size (~200 lines):
|
||||
* - Recent facts on top (last 30 days)
|
||||
* - Archive older facts → DB only (with backlink)
|
||||
* - Regenerate .md periodically (not append-only)
|
||||
*
|
||||
* Strategy:
|
||||
* 1. Read all facts from DB (synced_to_md = 1)
|
||||
* 2. Partition: recent (30d) vs archive (>30d)
|
||||
* 3. For each mapped .md file:
|
||||
* - Write header + recents (reverse chrono)
|
||||
* - Footer: "📦 N archived facts (before DATE) → see memoria.db"
|
||||
* 4. Mark all as synced_to_md = 2 (regenerated)
|
||||
*
|
||||
* Trigger: manual command or cron (weekly)
|
||||
*/
|
||||
|
||||
import type { MemoriaDB, Fact } from "./db.js";
|
||||
import { existsSync, writeFileSync, readFileSync } from "fs";
|
||||
import { join } from "path";
|
||||
|
||||
export interface MdRegenConfig {
|
||||
recentDays: number; // Facts within N days = "recent" (default: 30)
|
||||
maxFactsPerFile: number; // Hard cap per .md file (default: 150)
|
||||
archiveNotice: boolean; // Add archive footer (default: true)
|
||||
}
|
||||
|
||||
const DEFAULT_CONFIG: MdRegenConfig = {
|
||||
recentDays: 30,
|
||||
maxFactsPerFile: 150,
|
||||
archiveNotice: true,
|
||||
};
|
||||
|
||||
// Map: category → .md file path (relative to workspace)
|
||||
const MD_FILE_MAP: Record<string, string> = {
|
||||
savoir: "MEMORY.md",
|
||||
outil: "TOOLS.md",
|
||||
erreur: "MEMORY.md", // Erreurs → MEMORY.md section Erreurs
|
||||
preference: "USER.md",
|
||||
chronologie: "MEMORY.md", // Chronologie → MEMORY.md section Chronologie
|
||||
rh: "COMPANY.md",
|
||||
client: "COMPANY.md",
|
||||
};
|
||||
|
||||
// Section headers within files
|
||||
const SECTION_HEADERS: Record<string, string> = {
|
||||
savoir: "## 🧠 Savoir",
|
||||
outil: "## 🛠 Outils",
|
||||
erreur: "## ❌ Erreurs critiques",
|
||||
preference: "## 🎯 Préférences",
|
||||
chronologie: "## 📅 Chronologie",
|
||||
rh: "## 👥 Ressources Humaines",
|
||||
client: "## 🤝 Clients",
|
||||
};
|
||||
|
||||
export class MdRegenManager {
|
||||
private db: MemoriaDB;
|
||||
private cfg: MdRegenConfig;
|
||||
private workspacePath: string;
|
||||
|
||||
// Auto-regen thresholds
|
||||
private static readonly CAPTURES_THRESHOLD = 20; // Regen after N captures
|
||||
private static readonly STALE_DAYS = 7; // Regen if last regen > N days
|
||||
private static readonly LINES_THRESHOLD = 200; // Regen if any file > N lines
|
||||
|
||||
constructor(db: MemoriaDB, workspacePath: string, config?: Partial<MdRegenConfig>) {
|
||||
this.db = db;
|
||||
this.workspacePath = workspacePath;
|
||||
this.cfg = { ...DEFAULT_CONFIG, ...config };
|
||||
}
|
||||
|
||||
// ─── Auto-trigger logic ───
|
||||
|
||||
/** Increment capture counter. Call after each successful capture. */
|
||||
recordCapture(): void {
|
||||
try {
|
||||
const raw = this.db.raw;
|
||||
const row = raw.prepare("SELECT value FROM meta WHERE key = 'captures_since_regen'").get() as { value: string } | undefined;
|
||||
const current = row ? parseInt(row.value, 10) : 0;
|
||||
raw.prepare("INSERT OR REPLACE INTO meta (key, value) VALUES ('captures_since_regen', ?)").run(String(current + 1));
|
||||
} catch (_e) { /* non-critical */ }
|
||||
}
|
||||
|
||||
/** Check if auto-regen should trigger. Returns reason or null. */
|
||||
shouldAutoRegen(): string | null {
|
||||
try {
|
||||
const raw = this.db.raw;
|
||||
|
||||
// Check captures since last regen
|
||||
const capturesRow = raw.prepare("SELECT value FROM meta WHERE key = 'captures_since_regen'").get() as { value: string } | undefined;
|
||||
const capturesSince = capturesRow ? parseInt(capturesRow.value, 10) : 0;
|
||||
if (capturesSince >= MdRegenManager.CAPTURES_THRESHOLD) {
|
||||
return `captures=${capturesSince} >= ${MdRegenManager.CAPTURES_THRESHOLD}`;
|
||||
}
|
||||
|
||||
// Check time since last regen
|
||||
const lastRegenRow = raw.prepare("SELECT value FROM meta WHERE key = 'last_regen_at'").get() as { value: string } | undefined;
|
||||
if (lastRegenRow) {
|
||||
const lastRegen = parseInt(lastRegenRow.value, 10);
|
||||
const daysSince = (Date.now() - lastRegen) / 86400000;
|
||||
if (daysSince >= MdRegenManager.STALE_DAYS) {
|
||||
return `stale=${Math.floor(daysSince)}d >= ${MdRegenManager.STALE_DAYS}d`;
|
||||
}
|
||||
} else {
|
||||
// Never regenerated → trigger if there are synced facts
|
||||
const syncedCount = raw.prepare("SELECT COUNT(*) as cnt FROM facts WHERE synced_to_md > 0 AND superseded = 0").get() as { cnt: number };
|
||||
if (syncedCount.cnt > 30) {
|
||||
return "never_regenerated";
|
||||
}
|
||||
}
|
||||
|
||||
// Check file sizes
|
||||
const sizes = this.fileSizes();
|
||||
for (const [file, info] of Object.entries(sizes)) {
|
||||
if (info.lines > MdRegenManager.LINES_THRESHOLD) {
|
||||
return `${file}=${info.lines} lines > ${MdRegenManager.LINES_THRESHOLD}`;
|
||||
}
|
||||
}
|
||||
|
||||
return null; // No regen needed
|
||||
} catch (_e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Record that a regen just happened. Reset counter. */
|
||||
private markRegenDone(): void {
|
||||
try {
|
||||
const raw = this.db.raw;
|
||||
raw.prepare("INSERT OR REPLACE INTO meta (key, value) VALUES ('captures_since_regen', '0')").run();
|
||||
raw.prepare("INSERT OR REPLACE INTO meta (key, value) VALUES ('last_regen_at', ?)").run(String(Date.now()));
|
||||
} catch (_e) { /* non-critical */ }
|
||||
}
|
||||
|
||||
/**
|
||||
* Regenerate all .md files with recent facts on top, archive notice at bottom.
|
||||
* Returns: { files: number; recentFacts: number; archivedFacts: number }
|
||||
*/
|
||||
regenerate(): { files: number; recentFacts: number; archivedFacts: number; errors: string[] } {
|
||||
const now = Date.now();
|
||||
const recentThreshold = now - (this.cfg.recentDays * 86400000);
|
||||
|
||||
// Get all synced facts
|
||||
const allFacts = this.db.raw.prepare(
|
||||
"SELECT * FROM facts WHERE superseded = 0 AND synced_to_md > 0 ORDER BY created_at DESC"
|
||||
).all() as Fact[];
|
||||
|
||||
// Partition: recent vs archived
|
||||
const recent: Fact[] = [];
|
||||
const archived: Fact[] = [];
|
||||
for (const f of allFacts) {
|
||||
if (f.created_at >= recentThreshold) recent.push(f);
|
||||
else archived.push(f);
|
||||
}
|
||||
|
||||
// Group by target file
|
||||
const fileGroups = new Map<string, Fact[]>();
|
||||
for (const f of recent) {
|
||||
const targetFile = MD_FILE_MAP[f.category] || "MEMORY.md";
|
||||
const existing = fileGroups.get(targetFile) || [];
|
||||
existing.push(f);
|
||||
fileGroups.set(targetFile, existing);
|
||||
}
|
||||
|
||||
// Count archived per file too (for footer notice)
|
||||
const archivedCounts = new Map<string, number>();
|
||||
for (const f of archived) {
|
||||
const targetFile = MD_FILE_MAP[f.category] || "MEMORY.md";
|
||||
archivedCounts.set(targetFile, (archivedCounts.get(targetFile) || 0) + 1);
|
||||
}
|
||||
|
||||
let filesRegenerated = 0;
|
||||
const errors: string[] = [];
|
||||
|
||||
// Regenerate each file
|
||||
for (const [relPath, facts] of fileGroups.entries()) {
|
||||
const fullPath = join(this.workspacePath, relPath);
|
||||
|
||||
// Safety: don't touch if file doesn't exist
|
||||
if (!existsSync(fullPath)) {
|
||||
errors.push(`${relPath} not found — skipped`);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
// Read current file to preserve non-Memoria sections
|
||||
const original = readFileSync(fullPath, "utf-8");
|
||||
const sections = this.parseFileSections(original);
|
||||
|
||||
// Group facts by section (category)
|
||||
const factsBySection = new Map<string, Fact[]>();
|
||||
for (const f of facts) {
|
||||
const section = SECTION_HEADERS[f.category] || "## 🧠 Savoir";
|
||||
const existing = factsBySection.get(section) || [];
|
||||
existing.push(f);
|
||||
factsBySection.set(section, existing);
|
||||
}
|
||||
|
||||
// Rebuild file: preserve non-Memoria content + regenerate Memoria sections
|
||||
let newContent = "";
|
||||
let inMemoriaSection = false;
|
||||
const memoriaSectionSet = new Set(Object.values(SECTION_HEADERS));
|
||||
const processedSections = new Set<string>();
|
||||
|
||||
for (const line of original.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
|
||||
// Detect section start
|
||||
if (trimmed.startsWith("##")) {
|
||||
inMemoriaSection = memoriaSectionSet.has(trimmed);
|
||||
|
||||
// If entering a Memoria section, regenerate it
|
||||
if (inMemoriaSection) {
|
||||
if (!processedSections.has(trimmed)) {
|
||||
newContent += this.regenerateSection(trimmed, factsBySection.get(trimmed) || []);
|
||||
processedSections.add(trimmed);
|
||||
}
|
||||
continue; // Skip original section content
|
||||
}
|
||||
}
|
||||
|
||||
// If in Memoria section, skip old lines (we're regenerating)
|
||||
if (inMemoriaSection) continue;
|
||||
|
||||
// Preserve non-Memoria content
|
||||
newContent += line + "\n";
|
||||
}
|
||||
|
||||
// Append sections that weren't in the original file (new categories)
|
||||
for (const [section, sectionFacts] of factsBySection.entries()) {
|
||||
if (!processedSections.has(section)) {
|
||||
newContent += "\n" + this.regenerateSection(section, sectionFacts);
|
||||
}
|
||||
}
|
||||
|
||||
// Footer: archive notice
|
||||
if (this.cfg.archiveNotice) {
|
||||
const archivedCount = archivedCounts.get(relPath) || 0;
|
||||
if (archivedCount > 0) {
|
||||
const oldestDate = new Date(archived[archived.length - 1].created_at).toISOString().split("T")[0];
|
||||
newContent += `\n---\n📦 **${archivedCount} archived facts** (before ${oldestDate}) → stored in \`memoria.db\` only (not shown here to keep context light)\n`;
|
||||
}
|
||||
}
|
||||
|
||||
// Write
|
||||
writeFileSync(fullPath, newContent.trim() + "\n", "utf-8");
|
||||
filesRegenerated++;
|
||||
} catch (err) {
|
||||
errors.push(`${relPath}: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Mark all facts as regenerated (synced_to_md = 2)
|
||||
this.db.raw.prepare("UPDATE facts SET synced_to_md = 2 WHERE superseded = 0 AND synced_to_md > 0").run();
|
||||
|
||||
// Reset counter + record timestamp
|
||||
this.markRegenDone();
|
||||
|
||||
return {
|
||||
files: filesRegenerated,
|
||||
recentFacts: recent.length,
|
||||
archivedFacts: archived.length,
|
||||
errors,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse file into sections (basic heuristic)
|
||||
*/
|
||||
private parseFileSections(content: string): Map<string, string[]> {
|
||||
const sections = new Map<string, string[]>();
|
||||
let currentSection = "";
|
||||
const lines: string[] = [];
|
||||
|
||||
for (const line of content.split("\n")) {
|
||||
if (line.trim().startsWith("##")) {
|
||||
if (currentSection) sections.set(currentSection, [...lines]);
|
||||
currentSection = line.trim();
|
||||
lines.length = 0;
|
||||
} else {
|
||||
lines.push(line);
|
||||
}
|
||||
}
|
||||
if (currentSection) sections.set(currentSection, lines);
|
||||
|
||||
return sections;
|
||||
}
|
||||
|
||||
/**
|
||||
* Regenerate a single section with facts
|
||||
*/
|
||||
private regenerateSection(header: string, facts: Fact[]): string {
|
||||
let section = `${header}\n\n`;
|
||||
|
||||
if (facts.length === 0) {
|
||||
section += "_Aucun fait récent._\n\n";
|
||||
return section;
|
||||
}
|
||||
|
||||
// Limit to maxFactsPerFile (split across all sections proportionally if needed)
|
||||
const limited = facts.slice(0, Math.min(facts.length, this.cfg.maxFactsPerFile));
|
||||
|
||||
// Reverse chrono (most recent first)
|
||||
limited.sort((a, b) => b.created_at - a.created_at);
|
||||
|
||||
for (const f of limited) {
|
||||
const date = new Date(f.created_at).toISOString().split("T")[0];
|
||||
const conf = Math.round(f.confidence * 100);
|
||||
section += `- **[${date}]** ${f.fact}`;
|
||||
if (conf < 100) section += ` _(${conf}%)_`;
|
||||
section += "\n";
|
||||
}
|
||||
|
||||
section += "\n";
|
||||
return section;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stats: current .md file sizes
|
||||
*/
|
||||
fileSizes(): Record<string, { exists: boolean; lines: number; bytes: number }> {
|
||||
const stats: Record<string, { exists: boolean; lines: number; bytes: number }> = {};
|
||||
|
||||
for (const relPath of new Set(Object.values(MD_FILE_MAP))) {
|
||||
const fullPath = join(this.workspacePath, relPath);
|
||||
if (!existsSync(fullPath)) {
|
||||
stats[relPath] = { exists: false, lines: 0, bytes: 0 };
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const content = readFileSync(fullPath, "utf-8");
|
||||
stats[relPath] = {
|
||||
exists: true,
|
||||
lines: content.split("\n").length,
|
||||
bytes: content.length,
|
||||
};
|
||||
} catch (_e) {
|
||||
stats[relPath] = { exists: false, lines: 0, bytes: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
return stats;
|
||||
}
|
||||
}
|
||||
80
openclaw-memoria-port/core/migrate.ts
Normal file
80
openclaw-memoria-port/core/migrate.ts
Normal file
@@ -0,0 +1,80 @@
|
||||
/**
|
||||
* Migration script: facts.json → memoria.db (SQLite)
|
||||
*
|
||||
* Run: npx tsx migrate.ts
|
||||
*/
|
||||
|
||||
import { MemoriaDB } from "./db.js";
|
||||
import { readFileSync } from "fs";
|
||||
import path from "path";
|
||||
|
||||
const WORKSPACE = process.env.OPENCLAW_WORKSPACE || `${process.env.HOME}/.openclaw/workspace`;
|
||||
const FACTS_JSON = path.join(WORKSPACE, "memory", "facts.json");
|
||||
|
||||
interface OldFact {
|
||||
_id: string;
|
||||
fact: string;
|
||||
category: string;
|
||||
agent: string;
|
||||
confidence: number;
|
||||
source?: string;
|
||||
tags?: string[];
|
||||
factHash?: string;
|
||||
keywordHash?: string;
|
||||
superseded?: boolean;
|
||||
supersededBy?: string;
|
||||
supersededAt?: number;
|
||||
accessCount?: number;
|
||||
lastAccessedAt?: number;
|
||||
createdAt: number;
|
||||
updatedAt: number;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log("🧠 Memoria Migration: facts.json → memoria.db");
|
||||
|
||||
const raw = readFileSync(FACTS_JSON, "utf-8");
|
||||
const oldFacts: OldFact[] = JSON.parse(raw);
|
||||
console.log(` 📦 Loaded ${oldFacts.length} facts from facts.json`);
|
||||
|
||||
const db = new MemoriaDB(WORKSPACE);
|
||||
|
||||
let imported = 0;
|
||||
let skipped = 0;
|
||||
|
||||
for (const old of oldFacts) {
|
||||
try {
|
||||
db.storeFact({
|
||||
id: old._id || `migrated_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`,
|
||||
fact: old.fact,
|
||||
category: old.category,
|
||||
confidence: old.confidence,
|
||||
source: old.source || "convex-import",
|
||||
tags: JSON.stringify(old.tags || []),
|
||||
agent: old.agent || "koda",
|
||||
created_at: old.createdAt,
|
||||
updated_at: old.updatedAt,
|
||||
access_count: old.accessCount ?? 0,
|
||||
last_accessed_at: old.lastAccessedAt ?? null,
|
||||
superseded: old.superseded ? 1 : 0,
|
||||
fact_type: "semantic",
|
||||
superseded_by: old.supersededBy ?? null,
|
||||
superseded_at: old.supersededAt ?? null,
|
||||
} as any);
|
||||
imported++;
|
||||
} catch (err) {
|
||||
console.warn(` ⚠ Skipped: ${String(err)}`);
|
||||
skipped++;
|
||||
}
|
||||
}
|
||||
|
||||
const stats = db.stats();
|
||||
console.log(` ✅ Imported: ${imported}, Skipped: ${skipped}`);
|
||||
console.log(` 📊 DB stats: ${stats.active} active, ${stats.superseded} superseded`);
|
||||
console.log(` 📁 Categories:`, stats.categories);
|
||||
|
||||
db.close();
|
||||
console.log(" 🧠 Migration complete!");
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
488
openclaw-memoria-port/core/observations.ts
Normal file
488
openclaw-memoria-port/core/observations.ts
Normal file
@@ -0,0 +1,488 @@
|
||||
/**
|
||||
* Memoria — Layer 9: Observations
|
||||
*
|
||||
* Living syntheses that evolve as new evidence appears.
|
||||
* Instead of 10 scattered facts about "Bureau deploy",
|
||||
* UNE observation cohérente qui se met à jour.
|
||||
*
|
||||
* Cycle de vie:
|
||||
* 1. Nouveau fait capturé
|
||||
* 2. Cherche observations liées (embedding similarity)
|
||||
* 3. Si trouvée → re-synthétise avec le nouveau fait
|
||||
* 4. Si pas trouvée → accumule, quand 3+ faits partagent un topic → crée observation
|
||||
* 5. Recall injecte observations EN PRIORITÉ, faits individuels en complément
|
||||
*/
|
||||
|
||||
import type { MemoriaDB, Fact } from "./db.js";
|
||||
import type { LLMProvider } from "./providers/types.js";
|
||||
import type { EmbedProvider } from "./providers/types.js";
|
||||
|
||||
// ─── Schema ───
|
||||
|
||||
export interface Observation {
|
||||
id: string;
|
||||
topic: string;
|
||||
summary: string;
|
||||
evidence_ids: string; // JSON array of fact IDs
|
||||
revision: number;
|
||||
confidence: number;
|
||||
created_at: number;
|
||||
updated_at: number;
|
||||
last_accessed_at: number | null;
|
||||
access_count: number;
|
||||
embedding: Float32Array | null;
|
||||
}
|
||||
|
||||
export interface ObservationConfig {
|
||||
/** Min facts sharing a topic before creating an observation. Default 3 */
|
||||
emergenceThreshold: number;
|
||||
/** Cosine similarity threshold to match a fact to an observation. Default 0.6 */
|
||||
matchThreshold: number;
|
||||
/** Max observations to inject in recall. Default 5 */
|
||||
maxRecallObservations: number;
|
||||
/** Max evidence facts per observation before pruning old ones. Default 15 */
|
||||
maxEvidencePerObservation: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_OBS_CONFIG: ObservationConfig = {
|
||||
emergenceThreshold: 3,
|
||||
matchThreshold: 0.6,
|
||||
maxRecallObservations: 5,
|
||||
maxEvidencePerObservation: 15,
|
||||
};
|
||||
|
||||
// ─── Prompts ───
|
||||
|
||||
const SYNTHESIZE_PROMPT = `Tu synthétises des faits en UNE observation cohérente.
|
||||
Combine ces faits en un paragraphe concis (2-4 phrases max) qui capture l'état actuel.
|
||||
Si un fait contredit un autre, garde le plus récent.
|
||||
Le résultat doit être autonome (compréhensible sans les faits originaux).
|
||||
|
||||
Topic: "{TOPIC}"
|
||||
|
||||
Faits (du plus ancien au plus récent):
|
||||
{FACTS}
|
||||
|
||||
Réponds UNIQUEMENT avec le texte de l'observation (pas de JSON, pas de préfixe).`;
|
||||
|
||||
const UPDATE_PROMPT = `Mets à jour cette observation avec un nouveau fait.
|
||||
Si le nouveau fait contredit l'observation, corrige-la.
|
||||
Si il la complète, intègre-le.
|
||||
Si il est redondant, garde l'observation telle quelle.
|
||||
|
||||
Observation actuelle:
|
||||
"{CURRENT}"
|
||||
|
||||
Nouveau fait:
|
||||
"{NEW_FACT}"
|
||||
|
||||
Réponds UNIQUEMENT avec le texte de l'observation mise à jour (pas de JSON, pas de préfixe).`;
|
||||
|
||||
const TOPIC_EXTRACT_PROMPT = `Quel est le sujet principal de ce fait ? Réponds en 2-4 mots maximum (le topic).
|
||||
Exemples: "Sol infrastructure", "Memoria config", "Bureau CRM", "Neto préférences", "API Twitter"
|
||||
|
||||
Fait: "{FACT}"
|
||||
|
||||
Topic:`;
|
||||
|
||||
// ─── Main class ───
|
||||
|
||||
export class ObservationManager {
|
||||
private db: MemoriaDB;
|
||||
private llm: LLMProvider;
|
||||
private embedder: EmbedProvider | null;
|
||||
private cfg: ObservationConfig;
|
||||
|
||||
constructor(db: MemoriaDB, llm: LLMProvider, embedder: EmbedProvider | null, config?: Partial<ObservationConfig>) {
|
||||
this.db = db;
|
||||
this.llm = llm;
|
||||
this.embedder = embedder;
|
||||
this.cfg = { ...DEFAULT_OBS_CONFIG, ...config };
|
||||
this.ensureSchema();
|
||||
}
|
||||
|
||||
// ─── Schema ───
|
||||
|
||||
private ensureSchema(): void {
|
||||
this.db.raw.exec(`
|
||||
CREATE TABLE IF NOT EXISTS observations (
|
||||
id TEXT PRIMARY KEY,
|
||||
topic TEXT NOT NULL,
|
||||
summary TEXT NOT NULL,
|
||||
evidence_ids TEXT DEFAULT '[]',
|
||||
revision INTEGER DEFAULT 1,
|
||||
confidence REAL DEFAULT 0.8,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL,
|
||||
last_accessed_at INTEGER,
|
||||
access_count INTEGER DEFAULT 0,
|
||||
embedding BLOB
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_obs_topic ON observations(topic);
|
||||
CREATE INDEX IF NOT EXISTS idx_obs_updated ON observations(updated_at);
|
||||
`);
|
||||
}
|
||||
|
||||
// ─── Core: process a new fact ───
|
||||
|
||||
async onFactCaptured(factId: string, factText: string, category: string): Promise<{
|
||||
action: "updated_observation" | "created_observation" | "accumulated" | "skipped";
|
||||
observationId?: string;
|
||||
}> {
|
||||
try {
|
||||
// 1. Find matching observation by embedding similarity
|
||||
const matched = await this.findMatchingObservation(factText);
|
||||
|
||||
if (matched) {
|
||||
// Update existing observation with new fact
|
||||
await this.updateObservation(matched.id, factId, factText);
|
||||
return { action: "updated_observation", observationId: matched.id };
|
||||
}
|
||||
|
||||
// 2. Extract topic from fact
|
||||
const topic = await this.extractTopic(factText);
|
||||
if (!topic) return { action: "skipped" };
|
||||
|
||||
// 3. Check if enough facts share this topic to create an observation
|
||||
const relatedFacts = this.findFactsByTopic(topic);
|
||||
if (relatedFacts.length >= this.cfg.emergenceThreshold - 1) {
|
||||
// +1 for the current fact = threshold met
|
||||
const obsId = await this.createObservation(topic, [...relatedFacts, { id: factId, fact: factText, category }]);
|
||||
return { action: "created_observation", observationId: obsId };
|
||||
}
|
||||
|
||||
return { action: "accumulated" };
|
||||
} catch (e) {
|
||||
console.debug('memoria:observations: ' + String(e));
|
||||
return { action: "skipped" };
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Find matching observation ───
|
||||
|
||||
private async findMatchingObservation(factText: string): Promise<Observation | null> {
|
||||
if (!this.embedder) {
|
||||
// Fallback: keyword matching
|
||||
return this.findByKeywords(factText);
|
||||
}
|
||||
|
||||
try {
|
||||
const factEmb = await this.embedder.embed(factText);
|
||||
const allObs = this.getAllObservations();
|
||||
|
||||
let best: Observation | null = null;
|
||||
let bestSim = 0;
|
||||
|
||||
for (const obs of allObs) {
|
||||
if (!obs.embedding) continue;
|
||||
const sim = cosineSimilarity(new Float32Array(factEmb), new Float32Array(obs.embedding));
|
||||
if (sim > bestSim && sim >= this.cfg.matchThreshold) {
|
||||
bestSim = sim;
|
||||
best = obs;
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
} catch (e) {
|
||||
console.debug('memoria:observations: ' + String(e));
|
||||
return this.findByKeywords(factText);
|
||||
}
|
||||
}
|
||||
|
||||
private findByKeywords(factText: string): Observation | null {
|
||||
const words = factText.toLowerCase().split(/\s+/).filter(w => w.length > 3);
|
||||
const allObs = this.getAllObservations();
|
||||
|
||||
let best: Observation | null = null;
|
||||
let bestOverlap = 0;
|
||||
|
||||
for (const obs of allObs) {
|
||||
const obsWords = new Set(obs.summary.toLowerCase().split(/\s+/).filter(w => w.length > 3));
|
||||
let overlap = 0;
|
||||
for (const w of words) if (obsWords.has(w)) overlap++;
|
||||
const ratio = overlap / Math.max(words.length, 1);
|
||||
if (ratio > bestOverlap && ratio >= 0.3) {
|
||||
bestOverlap = ratio;
|
||||
best = obs;
|
||||
}
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
// ─── Update observation ───
|
||||
|
||||
private async updateObservation(obsId: string, newFactId: string, newFactText: string): Promise<void> {
|
||||
const obs = this.getObservation(obsId);
|
||||
if (!obs) return;
|
||||
|
||||
// Add fact to evidence
|
||||
const evidenceIds: string[] = JSON.parse(obs.evidence_ids || "[]");
|
||||
if (!evidenceIds.includes(newFactId)) {
|
||||
evidenceIds.push(newFactId);
|
||||
// Prune old evidence if too many
|
||||
if (evidenceIds.length > this.cfg.maxEvidencePerObservation) {
|
||||
evidenceIds.splice(0, evidenceIds.length - this.cfg.maxEvidencePerObservation);
|
||||
}
|
||||
}
|
||||
|
||||
// Re-synthesize
|
||||
let newSummary: string;
|
||||
try {
|
||||
const prompt = UPDATE_PROMPT
|
||||
.replace("{CURRENT}", obs.summary)
|
||||
.replace("{NEW_FACT}", newFactText);
|
||||
newSummary = (await this.llm.generate(prompt, {
|
||||
maxTokens: 300,
|
||||
temperature: 0.2,
|
||||
timeoutMs: 15000,
|
||||
})).trim();
|
||||
|
||||
if (newSummary.length < 10) newSummary = obs.summary; // LLM returned garbage
|
||||
} catch (e) {
|
||||
console.debug('memoria:observations: ' + String(e));
|
||||
// LLM failed — just append fact reference, keep old summary
|
||||
newSummary = obs.summary;
|
||||
}
|
||||
|
||||
// Update embedding
|
||||
let embedding: Buffer | null = null;
|
||||
if (this.embedder) {
|
||||
try {
|
||||
const emb = await this.embedder.embed(newSummary);
|
||||
embedding = Buffer.from(new Float32Array(emb).buffer);
|
||||
} catch (e) { console.debug('memoria:observations: ' + String(e)); }
|
||||
}
|
||||
|
||||
const now = Date.now();
|
||||
this.db.raw.prepare(`
|
||||
UPDATE observations SET
|
||||
summary = ?, evidence_ids = ?, revision = revision + 1,
|
||||
updated_at = ?, embedding = COALESCE(?, embedding)
|
||||
WHERE id = ?
|
||||
`).run(newSummary, JSON.stringify(evidenceIds), now, embedding, obsId);
|
||||
}
|
||||
|
||||
// ─── Create observation ───
|
||||
|
||||
private async createObservation(topic: string, facts: Array<{ id: string; fact: string; category: string }>): Promise<string> {
|
||||
const factsText = facts.map((f, i) => `${i + 1}. ${f.fact}`).join("\n");
|
||||
|
||||
let summary: string;
|
||||
try {
|
||||
const prompt = SYNTHESIZE_PROMPT
|
||||
.replace("{TOPIC}", topic)
|
||||
.replace("{FACTS}", factsText);
|
||||
summary = (await this.llm.generate(prompt, {
|
||||
maxTokens: 300,
|
||||
temperature: 0.2,
|
||||
timeoutMs: 15000,
|
||||
})).trim();
|
||||
|
||||
if (summary.length < 10) {
|
||||
// LLM returned garbage — use concat fallback
|
||||
summary = facts.map(f => f.fact).join(". ");
|
||||
}
|
||||
} catch (e) {
|
||||
console.debug('memoria:observations: ' + String(e));
|
||||
summary = facts.map(f => f.fact).join(". ");
|
||||
}
|
||||
|
||||
const id = `obs_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`;
|
||||
const evidenceIds = facts.map(f => f.id);
|
||||
const now = Date.now();
|
||||
const avgConfidence = 0.8;
|
||||
|
||||
// Embed the observation
|
||||
let embedding: Buffer | null = null;
|
||||
if (this.embedder) {
|
||||
try {
|
||||
const emb = await this.embedder.embed(summary);
|
||||
embedding = Buffer.from(new Float32Array(emb).buffer);
|
||||
} catch (e) { console.debug('memoria:observations: ' + String(e)); }
|
||||
}
|
||||
|
||||
this.db.raw.prepare(`
|
||||
INSERT INTO observations (id, topic, summary, evidence_ids, revision, confidence, created_at, updated_at, access_count, embedding)
|
||||
VALUES (?, ?, ?, ?, 1, ?, ?, ?, 0, ?)
|
||||
`).run(id, topic, summary, JSON.stringify(evidenceIds), avgConfidence, now, now, embedding);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
// ─── Topic extraction ───
|
||||
|
||||
private async extractTopic(factText: string): Promise<string | null> {
|
||||
try {
|
||||
const prompt = TOPIC_EXTRACT_PROMPT.replace("{FACT}", factText);
|
||||
const response = (await this.llm.generate(prompt, {
|
||||
maxTokens: 20,
|
||||
temperature: 0.1,
|
||||
timeoutMs: 10000,
|
||||
})).trim();
|
||||
|
||||
// Clean up: remove quotes, "Topic:", etc.
|
||||
const cleaned = response.replace(/^["']|["']$/g, "").replace(/^topic:\s*/i, "").trim();
|
||||
return cleaned.length > 1 && cleaned.length < 60 ? cleaned : null;
|
||||
} catch (e) {
|
||||
console.debug('memoria:observations: ' + String(e));
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Find facts by topic (keyword overlap with topic name) ───
|
||||
|
||||
private findFactsByTopic(topic: string): Array<{ id: string; fact: string; category: string }> {
|
||||
const topicWords = topic.toLowerCase().split(/\s+/).filter(w => w.length > 2);
|
||||
if (topicWords.length === 0) return [];
|
||||
|
||||
// Use FTS5 to find candidate facts
|
||||
const query = topicWords.map(w => `"${w}"`).join(" OR ");
|
||||
try {
|
||||
const results = this.db.raw.prepare(`
|
||||
SELECT f.id, f.fact, f.category FROM facts f
|
||||
JOIN facts_fts fts ON f.rowid = fts.rowid
|
||||
WHERE facts_fts MATCH ? AND f.superseded = 0
|
||||
ORDER BY rank LIMIT 20
|
||||
`).all(query) as Array<{ id: string; fact: string; category: string }>;
|
||||
return results;
|
||||
} catch (e) {
|
||||
console.debug('memoria:observations: ' + String(e));
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Recall: get relevant observations ───
|
||||
|
||||
async getRelevantObservations(query: string, limit?: number): Promise<Array<{ observation: Observation; score: number }>> {
|
||||
const maxObs = limit || this.cfg.maxRecallObservations;
|
||||
const allObs = this.getAllObservations();
|
||||
if (allObs.length === 0) return [];
|
||||
|
||||
// Score by embedding similarity if available
|
||||
if (this.embedder) {
|
||||
try {
|
||||
const queryEmb = await this.embedder.embed(query);
|
||||
const scored = allObs
|
||||
.filter(o => o.embedding)
|
||||
.map(o => ({
|
||||
observation: o,
|
||||
score: cosineSimilarity(new Float32Array(queryEmb), new Float32Array(o.embedding!)),
|
||||
}))
|
||||
.filter(s => s.score >= 0.3)
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, maxObs);
|
||||
|
||||
// Track access
|
||||
for (const s of scored) this.trackAccess(s.observation.id);
|
||||
return scored;
|
||||
} catch (e) { console.debug('memoria:observations: ' + String(e)); }
|
||||
}
|
||||
|
||||
// Fallback: keyword matching
|
||||
const queryWords = new Set(query.toLowerCase().split(/\s+/).filter(w => w.length > 3));
|
||||
const scored = allObs.map(o => {
|
||||
const obsWords = o.summary.toLowerCase().split(/\s+/).filter(w => w.length > 3);
|
||||
let overlap = 0;
|
||||
for (const w of obsWords) if (queryWords.has(w)) overlap++;
|
||||
return { observation: o, score: overlap / Math.max(queryWords.size, 1) };
|
||||
})
|
||||
.filter(s => s.score > 0.1)
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, maxObs);
|
||||
|
||||
for (const s of scored) this.trackAccess(s.observation.id);
|
||||
return scored;
|
||||
}
|
||||
|
||||
// ─── Format for injection ───
|
||||
|
||||
formatForRecall(observations: Array<{ observation: Observation; score: number }>): string {
|
||||
if (observations.length === 0) return "";
|
||||
const lines = observations.map(({ observation: o }) => {
|
||||
const evidenceCount = JSON.parse(o.evidence_ids || "[]").length;
|
||||
const revNote = o.revision > 1 ? ` (rev.${o.revision})` : "";
|
||||
return `- 🔮 **${o.topic}**${revNote}: ${o.summary} [${evidenceCount} sources]`;
|
||||
});
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
// ─── Helpers ───
|
||||
|
||||
private getAllObservations(): Observation[] {
|
||||
const rows = this.db.raw.prepare("SELECT * FROM observations ORDER BY updated_at DESC").all() as any[];
|
||||
return rows.map(r => ({
|
||||
...r,
|
||||
embedding: r.embedding ? new Float32Array(r.embedding.buffer, r.embedding.byteOffset, r.embedding.byteLength / 4) : null,
|
||||
}));
|
||||
}
|
||||
|
||||
private getObservation(id: string): Observation | null {
|
||||
const row = this.db.raw.prepare("SELECT * FROM observations WHERE id = ?").get(id) as any;
|
||||
if (!row) return null;
|
||||
return {
|
||||
...row,
|
||||
embedding: row.embedding ? new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4) : null,
|
||||
};
|
||||
}
|
||||
|
||||
private trackAccess(id: string): void {
|
||||
const now = Date.now();
|
||||
this.db.raw.prepare("UPDATE observations SET access_count = access_count + 1, last_accessed_at = ? WHERE id = ?").run(now, id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a fact is superseded — remove it from evidence lists
|
||||
* and flag affected observations for re-synthesis on next access.
|
||||
*/
|
||||
onFactSuperseded(factId: string): number {
|
||||
let affected = 0;
|
||||
try {
|
||||
const allObs = this.db.raw.prepare(
|
||||
"SELECT id, evidence_ids FROM observations"
|
||||
).all() as Array<{ id: string; evidence_ids: string }>;
|
||||
|
||||
for (const obs of allObs) {
|
||||
const evidenceIds: string[] = JSON.parse(obs.evidence_ids || "[]");
|
||||
if (evidenceIds.includes(factId)) {
|
||||
// Remove the superseded fact from evidence
|
||||
const updated = evidenceIds.filter(id => id !== factId);
|
||||
|
||||
if (updated.length === 0) {
|
||||
// No evidence left → delete the observation
|
||||
this.db.raw.prepare("DELETE FROM observations WHERE id = ?").run(obs.id);
|
||||
} else {
|
||||
// Mark as needing re-synthesis (bump revision to signal staleness)
|
||||
this.db.raw.prepare(
|
||||
"UPDATE observations SET evidence_ids = ?, updated_at = ? WHERE id = ?"
|
||||
).run(JSON.stringify(updated), Date.now(), obs.id);
|
||||
}
|
||||
affected++;
|
||||
}
|
||||
}
|
||||
} catch (e) { console.debug('memoria:observations: ' + String(e)); }
|
||||
return affected;
|
||||
}
|
||||
|
||||
stats(): { total: number; avgRevision: number; avgEvidence: number } {
|
||||
const total = (this.db.raw.prepare("SELECT COUNT(*) as c FROM observations").get() as any)?.c || 0;
|
||||
if (total === 0) return { total: 0, avgRevision: 0, avgEvidence: 0 };
|
||||
const avgRev = (this.db.raw.prepare("SELECT AVG(revision) as a FROM observations").get() as any)?.a || 0;
|
||||
const rows = this.db.raw.prepare("SELECT evidence_ids FROM observations").all() as any[];
|
||||
const avgEvidence = rows.reduce((sum, r) => sum + JSON.parse(r.evidence_ids || "[]").length, 0) / total;
|
||||
return { total, avgRevision: Math.round(avgRev * 10) / 10, avgEvidence: Math.round(avgEvidence * 10) / 10 };
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Cosine Similarity ───
|
||||
|
||||
function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
||||
if (a.length !== b.length) return 0;
|
||||
let dot = 0, normA = 0, normB = 0;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
dot += a[i] * b[i];
|
||||
normA += a[i] * a[i];
|
||||
normB += b[i] * b[i];
|
||||
}
|
||||
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
||||
return denom === 0 ? 0 : dot / denom;
|
||||
}
|
||||
35
openclaw-memoria-port/core/package.json
Normal file
35
openclaw-memoria-port/core/package.json
Normal file
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"name": "@primo-studio/memoria-core",
|
||||
"version": "0.1.0",
|
||||
"description": "Standalone multi-layer cognitive memory engine — works with or without OpenClaw",
|
||||
"type": "module",
|
||||
"main": "index.js",
|
||||
"types": "index.d.ts",
|
||||
"exports": {
|
||||
".": "./index.js"
|
||||
},
|
||||
"keywords": [
|
||||
"memory",
|
||||
"cognitive",
|
||||
"ai",
|
||||
"llm",
|
||||
"rag",
|
||||
"vector-search",
|
||||
"knowledge-graph",
|
||||
"embedding",
|
||||
"sqlite"
|
||||
],
|
||||
"author": "Primo Studio",
|
||||
"license": "Apache-2.0",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/primo-studio/memoria-core"
|
||||
},
|
||||
"dependencies": {
|
||||
"better-sqlite3": "^11.0.0"
|
||||
},
|
||||
"peerDependencies": {},
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
}
|
||||
}
|
||||
479
openclaw-memoria-port/core/patterns.ts
Normal file
479
openclaw-memoria-port/core/patterns.ts
Normal file
@@ -0,0 +1,479 @@
|
||||
/**
|
||||
* patterns.ts — Layer 20: Behavioral Pattern Detection
|
||||
*
|
||||
* Detects recurring patterns across facts and consolidates them:
|
||||
* 1. Repeated preferences → single consolidated RULE with all contexts preserved
|
||||
* 2. If/then behavioral patterns → trigger → action correlations
|
||||
* 3. Pattern-boosted recall → confirmed patterns injected first
|
||||
*
|
||||
* Philosophy: consolidation must be ADDITIVE — never lose a detail.
|
||||
* Each occurrence is preserved in an `occurrences` JSON array.
|
||||
*
|
||||
* Stored as regular facts with fact_type='pattern' for FTS/embedding compatibility.
|
||||
*/
|
||||
|
||||
import type { MemoriaDB, Fact } from "./db.js";
|
||||
import type { LLMProvider } from "./providers/types.js";
|
||||
|
||||
// ─── Types ───
|
||||
|
||||
export interface PatternOccurrence {
|
||||
factId: string;
|
||||
snippet: string; // first 120 chars of source fact
|
||||
date: string; // ISO date
|
||||
category: string;
|
||||
}
|
||||
|
||||
export interface DetectedPattern {
|
||||
id: string;
|
||||
rule: string; // consolidated rule text
|
||||
patternType: "preference" | "behavior" | "error" | "workflow";
|
||||
occurrences: PatternOccurrence[];
|
||||
confidence: number; // 0-1, increases with each occurrence
|
||||
triggerContext?: string; // for if/then patterns: "when X happens"
|
||||
action?: string; // for if/then patterns: "do Y"
|
||||
autoWritten: boolean; // true if already written to USER.md
|
||||
}
|
||||
|
||||
export interface PatternConfig {
|
||||
/** Minimum similar facts to form a pattern. Default 3 */
|
||||
minOccurrences: number;
|
||||
/** Levenshtein similarity threshold for grouping. Default 0.55 */
|
||||
similarityThreshold: number;
|
||||
/** Jaccard keyword overlap threshold. Default 0.40 */
|
||||
jaccardThreshold: number;
|
||||
/** Min occurrences before auto-writing to USER.md. Default 5 */
|
||||
autoWriteThreshold: number;
|
||||
/** Score boost multiplier for pattern facts at recall. Default 1.5 */
|
||||
recallBoost: number;
|
||||
/** Max patterns to detect per run. Default 10 */
|
||||
maxPatternsPerRun: number;
|
||||
}
|
||||
|
||||
const DEFAULT_CONFIG: PatternConfig = {
|
||||
minOccurrences: 3,
|
||||
similarityThreshold: 0.55,
|
||||
jaccardThreshold: 0.40,
|
||||
autoWriteThreshold: 5,
|
||||
recallBoost: 1.5,
|
||||
maxPatternsPerRun: 10,
|
||||
};
|
||||
|
||||
// ─── Helpers ───
|
||||
|
||||
function levenshteinSimilarity(a: string, b: string): number {
|
||||
const la = a.length, lb = b.length;
|
||||
if (la === 0 || lb === 0) return 0;
|
||||
// Quick length check: if too different, skip expensive computation
|
||||
if (Math.abs(la - lb) / Math.max(la, lb) > 0.6) return 0;
|
||||
|
||||
const maxLen = Math.max(la, lb);
|
||||
// Use trimmed/lowered versions
|
||||
const sa = a.toLowerCase().trim();
|
||||
const sb = b.toLowerCase().trim();
|
||||
|
||||
const prev = new Uint16Array(sb.length + 1);
|
||||
const curr = new Uint16Array(sb.length + 1);
|
||||
for (let j = 0; j <= sb.length; j++) prev[j] = j;
|
||||
for (let i = 1; i <= sa.length; i++) {
|
||||
curr[0] = i;
|
||||
for (let j = 1; j <= sb.length; j++) {
|
||||
curr[j] = sa[i - 1] === sb[j - 1]
|
||||
? prev[j - 1]
|
||||
: 1 + Math.min(prev[j - 1], prev[j], curr[j - 1]);
|
||||
}
|
||||
prev.set(curr);
|
||||
}
|
||||
return 1 - prev[sb.length] / maxLen;
|
||||
}
|
||||
|
||||
function extractKeywords(text: string): Set<string> {
|
||||
const stopWords = new Set([
|
||||
"le", "la", "les", "de", "du", "des", "un", "une", "et", "en", "à", "est",
|
||||
"que", "qui", "pour", "par", "sur", "pas", "son", "ses", "dans", "avec",
|
||||
"the", "a", "an", "is", "are", "was", "of", "to", "and", "in", "for", "on",
|
||||
"it", "this", "that", "has", "have", "be", "not", "but", "or", "from",
|
||||
]);
|
||||
return new Set(
|
||||
text.toLowerCase()
|
||||
.replace(/[^\w\sàâéèêëïîôùûüç-]/g, " ")
|
||||
.split(/\s+/)
|
||||
.filter(w => w.length >= 3 && !stopWords.has(w))
|
||||
);
|
||||
}
|
||||
|
||||
function jaccardSimilarity(a: Set<string>, b: Set<string>): number {
|
||||
if (a.size === 0 || b.size === 0) return 0;
|
||||
let intersection = 0;
|
||||
for (const w of a) if (b.has(w)) intersection++;
|
||||
return intersection / (a.size + b.size - intersection);
|
||||
}
|
||||
|
||||
// ─── Pattern Manager ───
|
||||
|
||||
export class PatternManager {
|
||||
private db: MemoriaDB;
|
||||
private llm: LLMProvider;
|
||||
private cfg: PatternConfig;
|
||||
|
||||
constructor(db: MemoriaDB, llm: LLMProvider, config?: Partial<PatternConfig>) {
|
||||
this.db = db;
|
||||
this.llm = llm;
|
||||
this.cfg = { ...DEFAULT_CONFIG, ...config };
|
||||
}
|
||||
|
||||
/**
|
||||
* Main entry: scan for repeated similar facts and consolidate into patterns.
|
||||
* Called after postProcessNewFacts or periodically.
|
||||
*/
|
||||
async detectAndConsolidate(): Promise<{ detected: number; consolidated: number; autoWritten: number }> {
|
||||
const raw = this.db.raw;
|
||||
let detected = 0, consolidated = 0, autoWritten = 0;
|
||||
|
||||
// 1. Find groups of similar active non-pattern facts (focus on preferences first, then errors)
|
||||
const targetCategories = ["preference", "erreur", "savoir"];
|
||||
|
||||
for (const category of targetCategories) {
|
||||
const facts = raw.prepare(
|
||||
`SELECT * FROM facts WHERE superseded = 0 AND category = ? AND (fact_type != 'cluster' OR fact_type IS NULL) AND (fact_type != 'pattern' OR fact_type IS NULL) ORDER BY created_at DESC`
|
||||
).all(category) as Fact[];
|
||||
|
||||
if (facts.length < this.cfg.minOccurrences) continue;
|
||||
|
||||
// Group similar facts using keyword + levenshtein clustering
|
||||
const groups = this.clusterSimilarFacts(facts);
|
||||
|
||||
for (const group of groups.slice(0, this.cfg.maxPatternsPerRun)) {
|
||||
if (group.length < this.cfg.minOccurrences) continue;
|
||||
|
||||
detected++;
|
||||
|
||||
// Check if a pattern already exists for this group
|
||||
const existingPattern = this.findExistingPattern(group, category);
|
||||
if (existingPattern) {
|
||||
// Update existing pattern with new occurrences
|
||||
const updated = this.updatePattern(existingPattern, group);
|
||||
if (updated) consolidated++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create new pattern via LLM consolidation
|
||||
const pattern = await this.consolidateGroup(group, category);
|
||||
if (pattern) {
|
||||
this.storePattern(pattern);
|
||||
consolidated++;
|
||||
|
||||
// Auto-write to USER.md if enough occurrences
|
||||
if (pattern.occurrences.length >= this.cfg.autoWriteThreshold && !pattern.autoWritten) {
|
||||
// Don't auto-write yet — mark for next run when confirmed
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Check existing patterns for auto-write eligibility
|
||||
const patterns = raw.prepare(
|
||||
`SELECT * FROM facts WHERE fact_type = 'pattern' AND superseded = 0`
|
||||
).all() as Fact[];
|
||||
|
||||
for (const pFact of patterns) {
|
||||
try {
|
||||
const meta = JSON.parse(pFact.tags || "{}") as DetectedPattern;
|
||||
if (!meta.autoWritten && meta.occurrences && meta.occurrences.length >= this.cfg.autoWriteThreshold) {
|
||||
// Mark as eligible — actual writing done by caller (index.ts)
|
||||
autoWritten++;
|
||||
}
|
||||
} catch (e) { console.debug('memoria:patterns: ' + String(e)); }
|
||||
}
|
||||
|
||||
return { detected, consolidated, autoWritten };
|
||||
}
|
||||
|
||||
/**
|
||||
* Cluster similar facts together using keyword overlap + levenshtein.
|
||||
* Returns groups of 2+ similar facts, sorted by group size descending.
|
||||
*/
|
||||
private clusterSimilarFacts(facts: Fact[]): Fact[][] {
|
||||
const used = new Set<string>();
|
||||
const groups: Fact[][] = [];
|
||||
|
||||
// Precompute keywords
|
||||
const kwCache = new Map<string, Set<string>>();
|
||||
for (const f of facts) {
|
||||
kwCache.set(f.id, extractKeywords(f.fact));
|
||||
}
|
||||
|
||||
for (let i = 0; i < facts.length; i++) {
|
||||
if (used.has(facts[i].id)) continue;
|
||||
|
||||
const group: Fact[] = [facts[i]];
|
||||
const kwA = kwCache.get(facts[i].id)!;
|
||||
|
||||
for (let j = i + 1; j < facts.length; j++) {
|
||||
if (used.has(facts[j].id)) continue;
|
||||
|
||||
const kwB = kwCache.get(facts[j].id)!;
|
||||
const jaccard = jaccardSimilarity(kwA, kwB);
|
||||
const lev = levenshteinSimilarity(facts[i].fact, facts[j].fact);
|
||||
const combined = jaccard * 0.5 + lev * 0.5;
|
||||
|
||||
if (combined >= this.cfg.similarityThreshold || jaccard >= this.cfg.jaccardThreshold) {
|
||||
group.push(facts[j]);
|
||||
used.add(facts[j].id);
|
||||
}
|
||||
}
|
||||
|
||||
if (group.length >= 2) {
|
||||
used.add(facts[i].id);
|
||||
groups.push(group);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by group size (largest first)
|
||||
groups.sort((a, b) => b.length - a.length);
|
||||
return groups;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find an existing pattern that covers this group of facts.
|
||||
*/
|
||||
private findExistingPattern(group: Fact[], category: string): Fact | null {
|
||||
const raw = this.db.raw;
|
||||
const patterns = raw.prepare(
|
||||
`SELECT * FROM facts WHERE fact_type = 'pattern' AND superseded = 0 AND category = ?`
|
||||
).all(category) as Fact[];
|
||||
|
||||
for (const p of patterns) {
|
||||
try {
|
||||
const meta = JSON.parse(p.tags || "{}") as DetectedPattern;
|
||||
if (!meta.occurrences) continue;
|
||||
// Check if any member of this group is already in the pattern
|
||||
const existingIds = new Set(meta.occurrences.map(o => o.factId));
|
||||
const overlap = group.filter(f => existingIds.has(f.id)).length;
|
||||
if (overlap >= Math.ceil(group.length * 0.4)) return p;
|
||||
} catch (e) { console.debug('memoria:patterns: ' + String(e)); }
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update an existing pattern with new occurrences from the group.
|
||||
*/
|
||||
private updatePattern(patternFact: Fact, group: Fact[]): boolean {
|
||||
try {
|
||||
const meta = JSON.parse(patternFact.tags || "{}") as DetectedPattern;
|
||||
const existingIds = new Set(meta.occurrences.map(o => o.factId));
|
||||
|
||||
let added = 0;
|
||||
for (const f of group) {
|
||||
if (!existingIds.has(f.id)) {
|
||||
meta.occurrences.push({
|
||||
factId: f.id,
|
||||
snippet: f.fact.slice(0, 120),
|
||||
date: new Date(f.created_at).toISOString().slice(0, 10),
|
||||
category: f.category,
|
||||
});
|
||||
added++;
|
||||
// Supersede the individual fact — it's now part of the pattern
|
||||
this.db.raw.prepare("UPDATE facts SET superseded = 1, superseded_by = ? WHERE id = ?")
|
||||
.run(patternFact.id, f.id);
|
||||
}
|
||||
}
|
||||
|
||||
if (added > 0) {
|
||||
meta.confidence = Math.min(0.99, 0.7 + meta.occurrences.length * 0.03);
|
||||
this.db.raw.prepare("UPDATE facts SET tags = ?, confidence = ?, updated_at = ? WHERE id = ?")
|
||||
.run(JSON.stringify(meta), meta.confidence, Date.now(), patternFact.id);
|
||||
return true;
|
||||
}
|
||||
} catch (e) { console.debug('memoria:patterns: ' + String(e)); }
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use LLM to consolidate a group of similar facts into a single pattern rule.
|
||||
* CRITICAL: The LLM must preserve ALL details from each occurrence.
|
||||
*/
|
||||
private async consolidateGroup(group: Fact[], category: string): Promise<DetectedPattern | null> {
|
||||
const occurrences: PatternOccurrence[] = group.map(f => ({
|
||||
factId: f.id,
|
||||
snippet: f.fact.slice(0, 120),
|
||||
date: new Date(f.created_at).toISOString().slice(0, 10),
|
||||
category: f.category,
|
||||
}));
|
||||
|
||||
const factsText = group.map((f, i) => `${i + 1}. [${new Date(f.created_at).toISOString().slice(0, 10)}] ${f.fact}`).join("\n");
|
||||
|
||||
const prompt = `Consolide ces ${group.length} faits similaires en UNE SEULE règle claire et actionnable.
|
||||
IMPORTANT: Préserve TOUS les détails et contextes spécifiques de chaque fait. Ne résume pas de façon vague.
|
||||
|
||||
Faits:
|
||||
${factsText}
|
||||
|
||||
Réponds en JSON STRICT (pas de markdown, pas de \`\`\`):
|
||||
{"rule": "La règle consolidée avec tous les détails", "patternType": "${category === 'preference' ? 'preference' : category === 'erreur' ? 'error' : 'behavior'}", "trigger": "contexte déclencheur (si applicable, sinon null)", "action": "action recommandée (si applicable, sinon null)"}`;
|
||||
|
||||
try {
|
||||
const response = await this.llm.generate(prompt);
|
||||
const cleaned = response.replace(/```json?\s*|\s*```/g, "").trim();
|
||||
const parsed = JSON.parse(cleaned);
|
||||
|
||||
if (!parsed.rule) return null;
|
||||
|
||||
const patternType = parsed.patternType || (category === "preference" ? "preference" : "behavior");
|
||||
|
||||
return {
|
||||
id: `pattern_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`,
|
||||
rule: parsed.rule,
|
||||
patternType,
|
||||
occurrences,
|
||||
confidence: Math.min(0.99, 0.7 + group.length * 0.03),
|
||||
triggerContext: parsed.trigger || undefined,
|
||||
action: parsed.action || undefined,
|
||||
autoWritten: false,
|
||||
};
|
||||
} catch (e) {
|
||||
console.debug('memoria:patterns: ' + String(e));
|
||||
// LLM failed — create pattern without LLM (just concatenate facts)
|
||||
const rule = `[Auto-consolidé] ${group[0].fact.slice(0, 200)} (${group.length} occurrences similaires)`;
|
||||
return {
|
||||
id: `pattern_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`,
|
||||
rule,
|
||||
patternType: category === "preference" ? "preference" : "behavior",
|
||||
occurrences,
|
||||
confidence: 0.7,
|
||||
autoWritten: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Store a pattern as a regular fact with fact_type='pattern'.
|
||||
* Supersede the individual member facts (they're now consolidated).
|
||||
*/
|
||||
private storePattern(pattern: DetectedPattern): void {
|
||||
const fact = pattern.rule;
|
||||
const meta = JSON.stringify(pattern);
|
||||
|
||||
this.db.storeFact({
|
||||
id: pattern.id,
|
||||
fact,
|
||||
category: pattern.patternType === "preference" ? "preference" : pattern.patternType === "error" ? "erreur" : "savoir",
|
||||
confidence: pattern.confidence,
|
||||
source: `pattern:${pattern.patternType}`,
|
||||
tags: meta,
|
||||
agent: "memoria",
|
||||
created_at: Date.now(),
|
||||
updated_at: Date.now(),
|
||||
fact_type: "pattern",
|
||||
});
|
||||
|
||||
// Supersede member facts — they're now consolidated
|
||||
for (const occ of pattern.occurrences) {
|
||||
this.db.raw.prepare("UPDATE facts SET superseded = 1, superseded_by = ? WHERE id = ? AND superseded = 0")
|
||||
.run(pattern.id, occ.factId);
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Recall helpers ───
|
||||
|
||||
/**
|
||||
* Get all active patterns for recall injection.
|
||||
*/
|
||||
getActivePatterns(): Array<{ fact: string; confidence: number; occurrenceCount: number; patternType: string }> {
|
||||
const raw = this.db.raw;
|
||||
const patterns = raw.prepare(
|
||||
`SELECT * FROM facts WHERE fact_type = 'pattern' AND superseded = 0 ORDER BY confidence DESC`
|
||||
).all() as Fact[];
|
||||
|
||||
return patterns.map(p => {
|
||||
let occCount = 0;
|
||||
let pType = "behavior";
|
||||
try {
|
||||
const meta = JSON.parse(p.tags || "{}") as DetectedPattern;
|
||||
occCount = meta.occurrences?.length || 0;
|
||||
pType = meta.patternType || "behavior";
|
||||
} catch (e) { console.debug('memoria:patterns: ' + String(e)); }
|
||||
return {
|
||||
fact: p.fact,
|
||||
confidence: p.confidence,
|
||||
occurrenceCount: occCount,
|
||||
patternType: pType,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply recall boost: pattern facts get a multiplier.
|
||||
*/
|
||||
applyPatternBoost(score: number, factType: string | undefined): number {
|
||||
if (factType === "pattern") return score * this.cfg.recallBoost;
|
||||
return score;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get patterns eligible for auto-write to USER.md (5+ occurrences, not yet written).
|
||||
*/
|
||||
getPatternsForAutoWrite(): DetectedPattern[] {
|
||||
const raw = this.db.raw;
|
||||
const patterns = raw.prepare(
|
||||
`SELECT * FROM facts WHERE fact_type = 'pattern' AND superseded = 0`
|
||||
).all() as Fact[];
|
||||
|
||||
const eligible: DetectedPattern[] = [];
|
||||
for (const p of patterns) {
|
||||
try {
|
||||
const meta = JSON.parse(p.tags || "{}") as DetectedPattern;
|
||||
if (!meta.autoWritten && meta.occurrences && meta.occurrences.length >= this.cfg.autoWriteThreshold) {
|
||||
eligible.push(meta);
|
||||
}
|
||||
} catch (e) { console.debug('memoria:patterns: ' + String(e)); }
|
||||
}
|
||||
return eligible;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a pattern as auto-written.
|
||||
*/
|
||||
markAutoWritten(patternId: string): void {
|
||||
const raw = this.db.raw;
|
||||
const fact = raw.prepare("SELECT * FROM facts WHERE id = ?").get(patternId) as Fact | undefined;
|
||||
if (!fact) return;
|
||||
try {
|
||||
const meta = JSON.parse(fact.tags || "{}") as DetectedPattern;
|
||||
meta.autoWritten = true;
|
||||
raw.prepare("UPDATE facts SET tags = ?, updated_at = ? WHERE id = ?")
|
||||
.run(JSON.stringify(meta), Date.now(), patternId);
|
||||
} catch (e) { console.debug('memoria:patterns: ' + String(e)); }
|
||||
}
|
||||
|
||||
// ─── Stats ───
|
||||
|
||||
stats(): { total: number; byType: Record<string, number>; avgOccurrences: number } {
|
||||
const raw = this.db.raw;
|
||||
const patterns = raw.prepare(
|
||||
`SELECT * FROM facts WHERE fact_type = 'pattern' AND superseded = 0`
|
||||
).all() as Fact[];
|
||||
|
||||
const byType: Record<string, number> = {};
|
||||
let totalOcc = 0;
|
||||
|
||||
for (const p of patterns) {
|
||||
try {
|
||||
const meta = JSON.parse(p.tags || "{}") as DetectedPattern;
|
||||
const t = meta.patternType || "unknown";
|
||||
byType[t] = (byType[t] || 0) + 1;
|
||||
totalOcc += meta.occurrences?.length || 0;
|
||||
} catch (e) {
|
||||
console.debug('memoria:patterns: ' + String(e));
|
||||
byType["unknown"] = (byType["unknown"] || 0) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
total: patterns.length,
|
||||
byType,
|
||||
avgOccurrences: patterns.length > 0 ? Math.round(totalOcc / patterns.length * 10) / 10 : 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
1298
openclaw-memoria-port/core/procedural.ts
Normal file
1298
openclaw-memoria-port/core/procedural.ts
Normal file
File diff suppressed because it is too large
Load Diff
77
openclaw-memoria-port/core/providers/anthropic.ts
Normal file
77
openclaw-memoria-port/core/providers/anthropic.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
/**
|
||||
* Anthropic Provider — Claude API direct (not OpenAI-compatible)
|
||||
*
|
||||
* Uses /v1/messages endpoint with Anthropic's native format.
|
||||
* Supports Claude Haiku, Sonnet, Opus via API key.
|
||||
*/
|
||||
|
||||
import type { LLMProvider, GenerateOptions, GenerateResult } from "./types.js";
|
||||
|
||||
export class AnthropicLLM implements LLMProvider {
|
||||
readonly name = "anthropic";
|
||||
private baseUrl: string;
|
||||
private model: string;
|
||||
private apiKey: string;
|
||||
|
||||
constructor(model = "claude-sonnet-4-5-20250514", apiKey = "", baseUrl = "https://api.anthropic.com") {
|
||||
this.model = model;
|
||||
this.apiKey = apiKey;
|
||||
this.baseUrl = baseUrl.replace(/\/$/, "");
|
||||
}
|
||||
|
||||
async generateWithMeta(prompt: string, options?: GenerateOptions): Promise<GenerateResult | null> {
|
||||
const start = Date.now();
|
||||
try {
|
||||
const response = await this.generate(prompt, options);
|
||||
return { response, provider: this.name, attemptMs: Date.now() - start, fallbacksUsed: 0 };
|
||||
} catch (_e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async generate(prompt: string, options?: GenerateOptions): Promise<string> {
|
||||
if (!this.apiKey) throw new Error("Anthropic API key required");
|
||||
|
||||
const body: Record<string, unknown> = {
|
||||
model: this.model,
|
||||
max_tokens: options?.maxTokens ?? 1024,
|
||||
messages: [{ role: "user", content: prompt }],
|
||||
};
|
||||
|
||||
// Temperature: Anthropic range is 0-1
|
||||
if (options?.temperature !== undefined) {
|
||||
body.temperature = options.temperature;
|
||||
}
|
||||
|
||||
const res = await fetch(`${this.baseUrl}/v1/messages`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"x-api-key": this.apiKey,
|
||||
"anthropic-version": "2023-06-01",
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
signal: AbortSignal.timeout(options?.timeoutMs ?? 30000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const text = await res.text().catch(() => "");
|
||||
throw new Error(`Anthropic error ${res.status}: ${text.slice(0, 200)}`);
|
||||
}
|
||||
|
||||
const data = await res.json() as {
|
||||
content: Array<{ type: string; text?: string }>;
|
||||
};
|
||||
|
||||
// Extract text from content blocks
|
||||
return data.content
|
||||
?.filter(b => b.type === "text" && b.text)
|
||||
.map(b => b.text!)
|
||||
.join("\n") || "";
|
||||
}
|
||||
}
|
||||
|
||||
// Factory helper
|
||||
export function anthropicLLM(model: string, apiKey: string, baseUrl?: string) {
|
||||
return new AnthropicLLM(model, apiKey, baseUrl);
|
||||
}
|
||||
124
openclaw-memoria-port/core/providers/ollama.ts
Normal file
124
openclaw-memoria-port/core/providers/ollama.ts
Normal file
@@ -0,0 +1,124 @@
|
||||
/**
|
||||
* Ollama Provider — local, free, default for Koda
|
||||
*/
|
||||
|
||||
import type { EmbedProvider, LLMProvider, GenerateOptions, GenerateResult } from "./types.js";
|
||||
|
||||
export class OllamaEmbed implements EmbedProvider {
|
||||
readonly name = "ollama";
|
||||
readonly dimensions: number;
|
||||
private baseUrl: string;
|
||||
private model: string;
|
||||
|
||||
constructor(baseUrl = "http://localhost:11434", model = "nomic-embed-text-v2-moe", dimensions = 768) {
|
||||
this.baseUrl = baseUrl;
|
||||
this.model = model;
|
||||
this.dimensions = dimensions;
|
||||
}
|
||||
|
||||
async embed(text: string): Promise<number[]> {
|
||||
const res = await fetch(`${this.baseUrl}/api/embed`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ model: this.model, input: text }),
|
||||
signal: AbortSignal.timeout(30000),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Ollama embed error: ${res.status}`);
|
||||
const data = await res.json() as { embeddings: number[][] };
|
||||
return data.embeddings[0];
|
||||
}
|
||||
|
||||
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||
const res = await fetch(`${this.baseUrl}/api/embed`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ model: this.model, input: texts }),
|
||||
signal: AbortSignal.timeout(60000),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Ollama embed batch error: ${res.status}`);
|
||||
const data = await res.json() as { embeddings: number[][] };
|
||||
return data.embeddings;
|
||||
}
|
||||
}
|
||||
|
||||
export class OllamaLLM implements LLMProvider {
|
||||
readonly name = "ollama";
|
||||
private baseUrl: string;
|
||||
private model: string;
|
||||
|
||||
constructor(baseUrl = "http://localhost:11434", model = "gemma3:4b") {
|
||||
this.baseUrl = baseUrl;
|
||||
this.model = model;
|
||||
}
|
||||
|
||||
async generateWithMeta(prompt: string, options?: GenerateOptions): Promise<GenerateResult | null> {
|
||||
const start = Date.now();
|
||||
try {
|
||||
const response = await this.generate(prompt, options);
|
||||
return { response, provider: this.name, attemptMs: Date.now() - start, fallbacksUsed: 0 };
|
||||
} catch (_e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async generate(prompt: string, options?: GenerateOptions): Promise<string> {
|
||||
// Use chat API for models that support think parameter (e.g., qwen3.5)
|
||||
// This allows disabling thinking mode which consumes all tokens
|
||||
const isThinkingModel = this.model.includes("qwen3.5");
|
||||
|
||||
if (isThinkingModel) {
|
||||
return this.generateViaChat(prompt, options);
|
||||
}
|
||||
|
||||
const body: Record<string, unknown> = {
|
||||
model: this.model,
|
||||
prompt,
|
||||
stream: false,
|
||||
options: {
|
||||
num_predict: options?.maxTokens ?? 1024,
|
||||
temperature: options?.temperature ?? 0.1,
|
||||
},
|
||||
};
|
||||
if (options?.format === "json") body.format = "json";
|
||||
|
||||
const res = await fetch(`${this.baseUrl}/api/generate`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body),
|
||||
signal: AbortSignal.timeout(options?.timeoutMs ?? 30000),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Ollama LLM error: ${res.status}`);
|
||||
const data = await res.json() as { response?: string; thinking?: string };
|
||||
// Reasoning models (GPT-OSS, Qwen3.5) put content in "thinking", not "response"
|
||||
const response = data.response || "";
|
||||
const thinking = data.thinking || "";
|
||||
// If response is empty but thinking has content, use thinking
|
||||
// If both exist, prefer response (it's the final answer)
|
||||
return response || thinking;
|
||||
}
|
||||
|
||||
/** Chat API path — required for qwen3.5 models to disable thinking mode */
|
||||
private async generateViaChat(prompt: string, options?: GenerateOptions): Promise<string> {
|
||||
const body: Record<string, unknown> = {
|
||||
model: this.model,
|
||||
messages: [{ role: "user", content: prompt }],
|
||||
stream: false,
|
||||
think: false,
|
||||
options: {
|
||||
num_predict: options?.maxTokens ?? 1024,
|
||||
temperature: options?.temperature ?? 0.1,
|
||||
},
|
||||
};
|
||||
if (options?.format === "json") body.format = "json";
|
||||
|
||||
const res = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body),
|
||||
signal: AbortSignal.timeout(options?.timeoutMs ?? 30000),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Ollama chat LLM error: ${res.status}`);
|
||||
const data = await res.json() as { message?: { content?: string } };
|
||||
return data.message?.content || "";
|
||||
}
|
||||
}
|
||||
122
openclaw-memoria-port/core/providers/openai-compat.ts
Normal file
122
openclaw-memoria-port/core/providers/openai-compat.ts
Normal file
@@ -0,0 +1,122 @@
|
||||
/**
|
||||
* OpenAI-Compatible Provider — works with LM Studio, OpenAI, OpenRouter
|
||||
*
|
||||
* LM Studio, OpenAI, and OpenRouter all use the same API format.
|
||||
* Only the baseUrl and apiKey differ.
|
||||
*/
|
||||
|
||||
import type { EmbedProvider, LLMProvider, GenerateOptions, GenerateResult } from "./types.js";
|
||||
|
||||
export class OpenAICompatEmbed implements EmbedProvider {
|
||||
readonly name: string;
|
||||
readonly dimensions: number;
|
||||
private baseUrl: string;
|
||||
private model: string;
|
||||
private apiKey: string;
|
||||
|
||||
constructor(name: string, baseUrl: string, model: string, apiKey = "", dimensions = 768) {
|
||||
this.name = name;
|
||||
this.baseUrl = baseUrl.replace(/\/$/, "");
|
||||
this.model = model;
|
||||
this.apiKey = apiKey;
|
||||
this.dimensions = dimensions;
|
||||
}
|
||||
|
||||
async embed(text: string): Promise<number[]> {
|
||||
const result = await this.embedBatch([text]);
|
||||
return result[0];
|
||||
}
|
||||
|
||||
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||
const headers: Record<string, string> = { "Content-Type": "application/json" };
|
||||
if (this.apiKey) headers["Authorization"] = `Bearer ${this.apiKey}`;
|
||||
|
||||
const res = await fetch(`${this.baseUrl}/embeddings`, {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify({ model: this.model, input: texts }),
|
||||
signal: AbortSignal.timeout(60000),
|
||||
});
|
||||
if (!res.ok) throw new Error(`${this.name} embed error: ${res.status} ${await res.text()}`);
|
||||
const data = await res.json() as { data: Array<{ embedding: number[] }> };
|
||||
return data.data.map(d => d.embedding);
|
||||
}
|
||||
}
|
||||
|
||||
export class OpenAICompatLLM implements LLMProvider {
|
||||
readonly name: string;
|
||||
private baseUrl: string;
|
||||
private model: string;
|
||||
private apiKey: string;
|
||||
|
||||
constructor(name: string, baseUrl: string, model: string, apiKey = "") {
|
||||
this.name = name;
|
||||
this.baseUrl = baseUrl.replace(/\/$/, "");
|
||||
this.model = model;
|
||||
this.apiKey = apiKey;
|
||||
}
|
||||
|
||||
async generateWithMeta(prompt: string, options?: GenerateOptions): Promise<GenerateResult | null> {
|
||||
const start = Date.now();
|
||||
try {
|
||||
const response = await this.generate(prompt, options);
|
||||
return { response, provider: this.name, attemptMs: Date.now() - start, fallbacksUsed: 0 };
|
||||
} catch (_e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async generate(prompt: string, options?: GenerateOptions): Promise<string> {
|
||||
const headers: Record<string, string> = { "Content-Type": "application/json" };
|
||||
if (this.apiKey) headers["Authorization"] = `Bearer ${this.apiKey}`;
|
||||
|
||||
const body: Record<string, unknown> = {
|
||||
model: this.model,
|
||||
messages: [{ role: "user", content: prompt }],
|
||||
max_tokens: options?.maxTokens ?? 1024,
|
||||
temperature: options?.temperature ?? 0.1,
|
||||
};
|
||||
if (options?.format === "json") {
|
||||
body.response_format = { type: "json_object" };
|
||||
}
|
||||
|
||||
const res = await fetch(`${this.baseUrl}/chat/completions`, {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify(body),
|
||||
signal: AbortSignal.timeout(options?.timeoutMs ?? 30000),
|
||||
});
|
||||
if (!res.ok) throw new Error(`${this.name} LLM error: ${res.status} ${await res.text()}`);
|
||||
const data = await res.json() as { choices: Array<{ message: { content?: string; reasoning_content?: string; reasoning?: string } }> };
|
||||
const msg = data.choices[0]?.message;
|
||||
if (!msg) return "";
|
||||
// Reasoning models (GPT-OSS via LM Studio) put output in reasoning/reasoning_content
|
||||
return msg.content || msg.reasoning_content || msg.reasoning || "";
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Factory helpers ───
|
||||
|
||||
export function lmStudioEmbed(model: string, dimensions = 768, baseUrl = "http://localhost:1234/v1") {
|
||||
return new OpenAICompatEmbed("lmstudio", baseUrl, model, "", dimensions);
|
||||
}
|
||||
|
||||
export function lmStudioLLM(model: string, baseUrl = "http://localhost:1234/v1") {
|
||||
return new OpenAICompatLLM("lmstudio", baseUrl, model, "");
|
||||
}
|
||||
|
||||
export function openaiEmbed(model: string, apiKey: string, dimensions = 1536) {
|
||||
return new OpenAICompatEmbed("openai", "https://api.openai.com/v1", model, apiKey, dimensions);
|
||||
}
|
||||
|
||||
export function openaiLLM(model: string, apiKey: string) {
|
||||
return new OpenAICompatLLM("openai", "https://api.openai.com/v1", model, apiKey);
|
||||
}
|
||||
|
||||
export function openrouterEmbed(model: string, apiKey: string, dimensions = 768) {
|
||||
return new OpenAICompatEmbed("openrouter", "https://openrouter.ai/api/v1", model, apiKey, dimensions);
|
||||
}
|
||||
|
||||
export function openrouterLLM(model: string, apiKey: string) {
|
||||
return new OpenAICompatLLM("openrouter", "https://openrouter.ai/api/v1", model, apiKey);
|
||||
}
|
||||
52
openclaw-memoria-port/core/providers/types.ts
Normal file
52
openclaw-memoria-port/core/providers/types.ts
Normal file
@@ -0,0 +1,52 @@
|
||||
/**
|
||||
* Memoria — Provider Interfaces
|
||||
*
|
||||
* These interfaces are the contract between Memoria and any LLM/embedding backend.
|
||||
* To add a new provider (e.g., Groq, Together, Mistral):
|
||||
* 1. Create providers/your-provider.ts implementing LLMProvider and/or EmbedProvider
|
||||
* 2. Add to the switch in fallback.ts buildProvider()
|
||||
* 3. Add your type to the ProviderConfig.type union below
|
||||
*
|
||||
* All providers are wrapped by FallbackChain — modules never call providers directly.
|
||||
*/
|
||||
|
||||
/** Embedding provider: converts text → float vector. */
|
||||
export interface EmbedProvider {
|
||||
embed(text: string): Promise<number[]>;
|
||||
embedBatch(texts: string[]): Promise<number[][]>;
|
||||
readonly dimensions: number;
|
||||
readonly name: string;
|
||||
}
|
||||
|
||||
/** Options for LLM generation. All optional — providers use their own defaults. */
|
||||
export interface GenerateOptions {
|
||||
maxTokens?: number;
|
||||
temperature?: number;
|
||||
format?: "json" | "text";
|
||||
timeoutMs?: number;
|
||||
}
|
||||
|
||||
/** Extended result with metadata for debugging/logging. */
|
||||
export interface GenerateResult {
|
||||
response: string;
|
||||
provider: string;
|
||||
attemptMs: number;
|
||||
fallbacksUsed: number;
|
||||
}
|
||||
|
||||
/** LLM text generation provider. Only generate() is required; generateWithMeta() is optional. */
|
||||
export interface LLMProvider {
|
||||
generate(prompt: string, options?: GenerateOptions): Promise<string>;
|
||||
/** Extended generate with metadata. Default implementation wraps generate(). */
|
||||
generateWithMeta?(prompt: string, options?: GenerateOptions): Promise<GenerateResult | null>;
|
||||
readonly name: string;
|
||||
}
|
||||
|
||||
/** Config for building a provider instance. Used in fallback[] array and llm/embed config sections. */
|
||||
export interface ProviderConfig {
|
||||
type: "ollama" | "lmstudio" | "openai" | "openrouter" | "anthropic";
|
||||
baseUrl: string;
|
||||
model: string;
|
||||
apiKey?: string;
|
||||
dimensions?: number; // for embed
|
||||
}
|
||||
222
openclaw-memoria-port/core/revision.ts
Normal file
222
openclaw-memoria-port/core/revision.ts
Normal file
@@ -0,0 +1,222 @@
|
||||
/**
|
||||
* Proactive Revision — Like human memory refinement
|
||||
*
|
||||
* When a fact is recalled 10+ times, it proves useful but might be:
|
||||
* - Too vague ("Bureau gère des projets")
|
||||
* - Too broad (multiple concepts in one)
|
||||
* - Outdated (context changed)
|
||||
*
|
||||
* Revision flow:
|
||||
* 1. Detect mature facts with recall_count >= threshold
|
||||
* 2. LLM proposes refinement (more precise, split, or supersede)
|
||||
* 3. If improved → create new fact(s) + supersede old
|
||||
* 4. Track revision history
|
||||
*/
|
||||
|
||||
import type { MemoriaDB, Fact } from "./db.js";
|
||||
import type { LLMProvider } from "./providers/types.js";
|
||||
|
||||
export const REVISION_CONFIG = {
|
||||
recallThreshold: 10, // Trigger revision after 10 recalls
|
||||
cooldownDays: 7, // Don't revise same fact again within 7 days
|
||||
maxRevisionsPerBoot: 3, // Limit revisions per boot to avoid LLM spam
|
||||
};
|
||||
|
||||
export interface RevisionProposal {
|
||||
action: "keep" | "refine" | "split";
|
||||
refined?: string; // For "refine": improved version
|
||||
split?: string[]; // For "split": 2+ new facts
|
||||
reasoning: string;
|
||||
}
|
||||
|
||||
export class RevisionManager {
|
||||
private revisionsThisBoot = 0;
|
||||
|
||||
constructor(
|
||||
private db: MemoriaDB,
|
||||
private llm: LLMProvider,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Get facts needing revision
|
||||
*/
|
||||
getFactsNeedingRevision(): Fact[] {
|
||||
const cooloffMs = REVISION_CONFIG.cooldownDays * 24 * 60 * 60 * 1000;
|
||||
const cutoff = Date.now() - cooloffMs;
|
||||
|
||||
// Find mature facts with high recall count and no recent revision
|
||||
const facts = this.db.raw.prepare(`
|
||||
SELECT * FROM facts
|
||||
WHERE superseded = 0
|
||||
AND lifecycle_state = 'settled'
|
||||
AND recall_count >= ?
|
||||
AND (last_accessed_at IS NULL OR last_accessed_at >= ?)
|
||||
ORDER BY recall_count DESC
|
||||
LIMIT 5
|
||||
`).all(REVISION_CONFIG.recallThreshold, cutoff) as Fact[];
|
||||
|
||||
return facts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Propose revision for a fact using LLM
|
||||
*/
|
||||
async proposeRevision(fact: Fact): Promise<RevisionProposal> {
|
||||
const recallCount = fact.recall_count ?? 0;
|
||||
const usedCount = fact.used_count ?? 0;
|
||||
const usageRatio = recallCount > 0 ? (usedCount / recallCount * 100).toFixed(0) : "0";
|
||||
|
||||
const prompt = `You are reviewing a memory fact that has been recalled ${recallCount} times (used in ${usageRatio}% of those recalls).
|
||||
|
||||
**Current fact:**
|
||||
"${fact.fact}"
|
||||
|
||||
**Category:** ${fact.category}
|
||||
**Type:** ${fact.fact_type}
|
||||
|
||||
**Your task:**
|
||||
1. If the fact is already precise and complete → respond with JSON: {"action":"keep","reasoning":"..."}
|
||||
2. If it can be refined (more precise/complete) → respond with JSON: {"action":"refine","refined":"IMPROVED FACT HERE","reasoning":"..."}
|
||||
3. If it mixes multiple concepts → split into 2+ facts: {"action":"split","split":["fact1","fact2"],"reasoning":"..."}
|
||||
|
||||
**Rules:**
|
||||
- Keep facts short (< 200 chars)
|
||||
- Be concrete, not meta
|
||||
- Don't add dates/timestamps (those are episodic facts)
|
||||
- Respond ONLY with valid JSON, no markdown
|
||||
|
||||
Response:`;
|
||||
|
||||
try {
|
||||
const response = await this.llm.generate(prompt);
|
||||
const cleaned = response.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
|
||||
const proposal = JSON.parse(cleaned) as RevisionProposal;
|
||||
|
||||
if (!proposal.action || !["keep", "refine", "split"].includes(proposal.action)) {
|
||||
throw new Error("Invalid action");
|
||||
}
|
||||
|
||||
return proposal;
|
||||
} catch (err) {
|
||||
// Fallback: keep
|
||||
return {
|
||||
action: "keep",
|
||||
reasoning: `Revision failed: ${err}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply revision proposal
|
||||
*/
|
||||
async applyRevision(fact: Fact, proposal: RevisionProposal): Promise<{ created: number; superseded: boolean }> {
|
||||
let created = 0;
|
||||
let superseded = false;
|
||||
|
||||
if (proposal.action === "keep") {
|
||||
return { created: 0, superseded: false };
|
||||
}
|
||||
|
||||
const now = Date.now();
|
||||
|
||||
if (proposal.action === "refine" && proposal.refined) {
|
||||
// Create refined version
|
||||
const newId = `fact_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`;
|
||||
this.db.raw.prepare(`
|
||||
INSERT INTO facts (
|
||||
id, fact, category, confidence, source, tags, agent,
|
||||
created_at, updated_at, access_count, superseded,
|
||||
fact_type, relevance_weight, lifecycle_state
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 0, 0, ?, ?, 'fresh')
|
||||
`).run(
|
||||
newId,
|
||||
proposal.refined,
|
||||
fact.category,
|
||||
fact.confidence,
|
||||
`revision:${fact.id}`,
|
||||
fact.tags,
|
||||
fact.agent,
|
||||
now,
|
||||
now,
|
||||
fact.fact_type,
|
||||
fact.relevance_weight,
|
||||
);
|
||||
created++;
|
||||
|
||||
// Supersede old fact
|
||||
this.db.raw.prepare(`
|
||||
UPDATE facts SET superseded = 1, superseded_by = ?, superseded_at = ? WHERE id = ?
|
||||
`).run(newId, now, fact.id);
|
||||
superseded = true;
|
||||
} else if (proposal.action === "split" && proposal.split && proposal.split.length >= 2) {
|
||||
// Create multiple new facts
|
||||
for (const newFact of proposal.split) {
|
||||
const newId = `fact_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`;
|
||||
this.db.raw.prepare(`
|
||||
INSERT INTO facts (
|
||||
id, fact, category, confidence, source, tags, agent,
|
||||
created_at, updated_at, access_count, superseded,
|
||||
fact_type, relevance_weight, lifecycle_state
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 0, 0, ?, ?, 'fresh')
|
||||
`).run(
|
||||
newId,
|
||||
newFact,
|
||||
fact.category,
|
||||
fact.confidence,
|
||||
`split:${fact.id}`,
|
||||
fact.tags,
|
||||
fact.agent,
|
||||
now,
|
||||
now,
|
||||
fact.fact_type,
|
||||
fact.relevance_weight,
|
||||
);
|
||||
created++;
|
||||
}
|
||||
|
||||
// Supersede old fact (point to first split)
|
||||
const firstId = `split:${fact.id}`;
|
||||
this.db.raw.prepare(`
|
||||
UPDATE facts SET superseded = 1, superseded_by = ?, superseded_at = ? WHERE id = ?
|
||||
`).run(firstId, now, fact.id);
|
||||
superseded = true;
|
||||
}
|
||||
|
||||
return { created, superseded };
|
||||
}
|
||||
|
||||
/**
|
||||
* Run revision check and apply (called after recall)
|
||||
*/
|
||||
async checkAndRevise(): Promise<{ checked: number; revised: number; created: number }> {
|
||||
if (this.revisionsThisBoot >= REVISION_CONFIG.maxRevisionsPerBoot) {
|
||||
return { checked: 0, revised: 0, created: 0 };
|
||||
}
|
||||
|
||||
const candidates = this.getFactsNeedingRevision();
|
||||
let revised = 0;
|
||||
let created = 0;
|
||||
|
||||
for (const fact of candidates.slice(0, REVISION_CONFIG.maxRevisionsPerBoot - this.revisionsThisBoot)) {
|
||||
const proposal = await this.proposeRevision(fact);
|
||||
const result = await this.applyRevision(fact, proposal);
|
||||
|
||||
if (result.superseded) {
|
||||
revised++;
|
||||
created += result.created;
|
||||
this.revisionsThisBoot++;
|
||||
}
|
||||
|
||||
if (this.revisionsThisBoot >= REVISION_CONFIG.maxRevisionsPerBoot) break;
|
||||
}
|
||||
|
||||
return { checked: candidates.length, revised, created };
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset boot counter (called on plugin reload)
|
||||
*/
|
||||
resetBootCounter(): void {
|
||||
this.revisionsThisBoot = 0;
|
||||
}
|
||||
}
|
||||
187
openclaw-memoria-port/core/scoring.ts
Normal file
187
openclaw-memoria-port/core/scoring.ts
Normal file
@@ -0,0 +1,187 @@
|
||||
/**
|
||||
* Memoria — Temporal Scoring & Decay
|
||||
*
|
||||
* Mimics brain memory: errors are immune, recent is boosted,
|
||||
* old facts decay, frequently accessed facts are stronger.
|
||||
*/
|
||||
|
||||
import type { Fact } from "./db.js";
|
||||
|
||||
// ─── Config ───
|
||||
|
||||
export const DECAY_CONFIG = {
|
||||
// Half-lives by category (in days) — for SEMANTIC facts
|
||||
halfLife: {
|
||||
erreur: Infinity, // IMMUNE — never decays
|
||||
savoir: 90,
|
||||
preference: 90,
|
||||
rh: 60,
|
||||
client: 60,
|
||||
outil: 30,
|
||||
chronologie: 14,
|
||||
} as Record<string, number>,
|
||||
|
||||
// Episodic facts decay faster (contextual, dated)
|
||||
episodicHalfLife: {
|
||||
erreur: 30, // Even episodic errors eventually fade
|
||||
savoir: 14,
|
||||
preference: 14,
|
||||
rh: 14,
|
||||
client: 14,
|
||||
outil: 7,
|
||||
chronologie: 7,
|
||||
} as Record<string, number>,
|
||||
|
||||
// Default for unknown categories
|
||||
defaultHalfLife: 30,
|
||||
defaultEpisodicHalfLife: 14,
|
||||
|
||||
// Recency boost
|
||||
recentBoostHours: 24,
|
||||
recentBoostFactor: 1.3,
|
||||
weekBoostHours: 168, // 7 days
|
||||
weekBoostFactor: 1.1,
|
||||
|
||||
// Access frequency boost — muscled: frequent = retained like human memory
|
||||
accessBoostFactor: 0.3, // × log(count+1) — was 0.1, now 3x stronger
|
||||
|
||||
// Freshness bonus (recently UPDATED facts)
|
||||
freshnessHours: 48,
|
||||
freshnessFactor: 1.2,
|
||||
|
||||
// Stale penalty
|
||||
staleThresholdDays: 90,
|
||||
stalePenalty: 0.7,
|
||||
staleMinConfidence: 0.8, // Only penalize low-confidence stale facts
|
||||
};
|
||||
|
||||
// ─── Types ───
|
||||
|
||||
export interface ScoredFact extends Fact {
|
||||
temporalScore: number;
|
||||
ageHours: number;
|
||||
decayFactor: number;
|
||||
}
|
||||
|
||||
// ─── Scoring ───
|
||||
|
||||
export function scoreFact(fact: Fact, now = Date.now()): ScoredFact {
|
||||
const ageMs = now - fact.created_at;
|
||||
const ageHours = ageMs / (1000 * 60 * 60);
|
||||
const ageDays = ageHours / 24;
|
||||
|
||||
let score = fact.confidence;
|
||||
let decayFactor = 1.0;
|
||||
|
||||
// 1. Category decay — semantic vs episodic
|
||||
const factType = fact.fact_type || "semantic";
|
||||
let halfLife: number;
|
||||
if (factType === "episodic") {
|
||||
halfLife = DECAY_CONFIG.episodicHalfLife[fact.category] ?? DECAY_CONFIG.defaultEpisodicHalfLife;
|
||||
} else {
|
||||
halfLife = DECAY_CONFIG.halfLife[fact.category] ?? DECAY_CONFIG.defaultHalfLife;
|
||||
}
|
||||
if (halfLife === Infinity) {
|
||||
decayFactor = 1.0; // Immune
|
||||
} else {
|
||||
decayFactor = Math.pow(0.5, ageDays / halfLife);
|
||||
}
|
||||
score *= decayFactor;
|
||||
|
||||
// 2. Recency boost
|
||||
if (ageHours < DECAY_CONFIG.recentBoostHours) {
|
||||
score *= DECAY_CONFIG.recentBoostFactor;
|
||||
} else if (ageHours < DECAY_CONFIG.weekBoostHours) {
|
||||
score *= DECAY_CONFIG.weekBoostFactor;
|
||||
}
|
||||
|
||||
// 3. Access frequency boost
|
||||
if (fact.access_count > 0) {
|
||||
score *= (1 + DECAY_CONFIG.accessBoostFactor * Math.log(fact.access_count + 1));
|
||||
}
|
||||
|
||||
// 4. Freshness bonus (recently MODIFIED, not just created)
|
||||
const updateAgeHours = (now - fact.updated_at) / (1000 * 60 * 60);
|
||||
if (updateAgeHours < DECAY_CONFIG.freshnessHours) {
|
||||
score *= DECAY_CONFIG.freshnessFactor;
|
||||
}
|
||||
|
||||
// 5. Stale penalty (old + low confidence)
|
||||
if (ageDays > DECAY_CONFIG.staleThresholdDays && fact.confidence < DECAY_CONFIG.staleMinConfidence) {
|
||||
score *= DECAY_CONFIG.stalePenalty;
|
||||
}
|
||||
|
||||
// 6. Cluster boost — clusters are aggregated "dossiers", more info-dense
|
||||
if (factType === "cluster") {
|
||||
score *= 1.15; // 15% boost: clusters contain multiple facts = higher recall value
|
||||
}
|
||||
|
||||
// 7. Feedback loop: usefulness from actual usage in responses
|
||||
const usefulness = fact.usefulness ?? 0;
|
||||
const recallCount = fact.recall_count ?? 0;
|
||||
if (recallCount > 0) {
|
||||
// Useful facts get boosted, consistently ignored facts get penalized
|
||||
// Use ratio: used_count / recall_count → 0-1 scale
|
||||
const usedCount = fact.used_count ?? 0;
|
||||
const usageRatio = usedCount / recallCount;
|
||||
|
||||
if (usageRatio > 0.5) {
|
||||
// Used more than half the time → boost proportional to usage
|
||||
score *= (1 + 0.2 * usageRatio); // Up to +20%
|
||||
} else if (recallCount >= 5 && usageRatio < 0.1) {
|
||||
// Recalled 5+ times but almost never used → deprioritize
|
||||
score *= 0.8; // -20%
|
||||
}
|
||||
|
||||
// Direct usefulness score influence (capped)
|
||||
if (usefulness > 3) {
|
||||
score *= 1.1; // Proven useful: +10%
|
||||
} else if (usefulness < -2) {
|
||||
score *= 0.85; // Proven useless: -15%
|
||||
}
|
||||
}
|
||||
|
||||
// 8. Relevance weight — identity-aware prioritization (Phase 0)
|
||||
// Facts about daily projects (Bureau, Polymarket) > internal tools (Memoria)
|
||||
const relevanceWeight = fact.relevance_weight ?? 0.5;
|
||||
score *= (0.7 + relevanceWeight * 0.6); // Scale: 0.7x (weight=0) to 1.3x (weight=1.0)
|
||||
|
||||
// 9. Lifecycle multiplier — prioritize fresh > settled > dormant
|
||||
// Dormant facts are NOT deleted — they just surface less in auto-recall.
|
||||
// When user explicitly asks about past events, lifecycle filter is bypassed.
|
||||
// The multiplier is applied externally via LifecycleManager.getRecallMultiplier()
|
||||
// to keep scoring independent of lifecycle config/cursor.
|
||||
|
||||
return { ...fact, temporalScore: score, ageHours, decayFactor };
|
||||
}
|
||||
|
||||
export function scoreAndRank(facts: Fact[]): ScoredFact[] {
|
||||
const now = Date.now();
|
||||
return facts
|
||||
.map(f => scoreFact(f, now))
|
||||
.sort((a, b) => b.temporalScore - a.temporalScore);
|
||||
}
|
||||
|
||||
// ─── Hot Tier ───
|
||||
// Facts accessed frequently = "learned by heart" like a phone number you dial often.
|
||||
// These are always injected in recall, regardless of query relevance.
|
||||
|
||||
export const HOT_TIER_CONFIG = {
|
||||
/** Minimum access count to be "hot" */
|
||||
minAccessCount: 5,
|
||||
/** Maximum hot facts to always inject */
|
||||
maxHotFacts: 3,
|
||||
/** Don't include if last accessed more than X days ago (stale even if hot) */
|
||||
staleAfterDays: 30,
|
||||
};
|
||||
|
||||
export function getHotFacts(facts: Fact[], config = HOT_TIER_CONFIG): ScoredFact[] {
|
||||
const now = Date.now();
|
||||
const staleCutoff = now - config.staleAfterDays * 24 * 60 * 60 * 1000;
|
||||
|
||||
return facts
|
||||
.filter(f => f.access_count >= config.minAccessCount && (f.last_accessed_at || f.updated_at) > staleCutoff)
|
||||
.map(f => scoreFact(f, now))
|
||||
.sort((a, b) => b.access_count - a.access_count) // Sort by usage, not temporal
|
||||
.slice(0, config.maxHotFacts);
|
||||
}
|
||||
623
openclaw-memoria-port/core/selective.ts
Normal file
623
openclaw-memoria-port/core/selective.ts
Normal file
@@ -0,0 +1,623 @@
|
||||
/**
|
||||
* Memoria — Layer 3: Selective Memory (Gatekeeper)
|
||||
*
|
||||
* Decides whether a new fact should be stored, merged, or rejected.
|
||||
* Like the brain filtering: important → store, noise → ignore.
|
||||
*
|
||||
* Pipeline (in processAndApply):
|
||||
* 1. Noise filter — skip trivial facts ("ok", "merci", too short)
|
||||
* 2. FTS5 candidates — find similar existing facts
|
||||
* 3. Levenshtein dedup — reject near-exact duplicates
|
||||
* 4. Prefix dedup — reject facts that start the same way
|
||||
* 5. LLM contradiction check — if similarity > threshold, ask LLM if it contradicts
|
||||
* 6. Store / Enrich / Supersede / Skip
|
||||
*
|
||||
* Thresholds are configurable per category (preferences have tighter dedup at 0.65).
|
||||
* LLM is only called for contradiction detection (step 5), not for every fact.
|
||||
*
|
||||
* @example
|
||||
* const result = await selective.processAndApply("Bureau uses Convex", "savoir", 0.9);
|
||||
* // result: { stored: true, action: "store", factId: "f_abc123" }
|
||||
* // or: { stored: false, action: "skip", reason: "duplicate" }
|
||||
*/
|
||||
|
||||
import type { MemoriaDB, Fact } from "./db.js";
|
||||
import type { LLMProvider } from "./providers/types.js";
|
||||
import type { EmbeddingManager } from "./embeddings.js";
|
||||
|
||||
// ─── Config ───
|
||||
|
||||
export interface SelectiveConfig {
|
||||
/** Levenshtein similarity threshold (0-1). Above = duplicate. Default 0.85 */
|
||||
dupThreshold: number;
|
||||
/** FTS5 candidates to check for dedup. Default 5 */
|
||||
dupCandidates: number;
|
||||
/** Enable LLM contradiction check. Default true */
|
||||
contradictionCheck: boolean;
|
||||
/** Minimum fact length to store. Default 10 */
|
||||
minFactLength: number;
|
||||
/** Minimum importance score (0-1). Below = noise. Default 0.3 */
|
||||
importanceThreshold: number;
|
||||
/** Enable enrichment (merge similar facts). Default true */
|
||||
enrichEnabled: boolean;
|
||||
/** Similarity threshold for enrichment (higher than dedup). Default 0.7 */
|
||||
enrichThreshold: number;
|
||||
/** Cosine similarity threshold for semantic contradiction check. Default 0.55 */
|
||||
semanticContradictionThreshold: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_SELECTIVE_CONFIG: SelectiveConfig = {
|
||||
dupThreshold: 0.75,
|
||||
dupCandidates: 5,
|
||||
contradictionCheck: true,
|
||||
minFactLength: 10,
|
||||
importanceThreshold: 0.3,
|
||||
enrichEnabled: true,
|
||||
enrichThreshold: 0.60,
|
||||
semanticContradictionThreshold: 0.30,
|
||||
};
|
||||
|
||||
// ─── Result type ───
|
||||
|
||||
export type SelectiveResult =
|
||||
| { action: "store"; fact: string; category: string; confidence: number }
|
||||
| { action: "skip"; reason: "noise" | "duplicate" | "too_short" }
|
||||
| { action: "supersede"; oldFactId: string; fact: string; category: string; confidence: number }
|
||||
| { action: "enrich"; existingFactId: string; mergedFact: string; confidence: number };
|
||||
|
||||
// ─── Noise patterns ───
|
||||
|
||||
const NOISE_PATTERNS = [
|
||||
/^(ok|okay|oui|non|yes|no|merci|thanks|thx|cool|nice|bien|parfait|super|top|génial|d'accord|alright|yep|nope|ah|oh|hmm|hm)\.?$/i,
|
||||
/^(bonjour|bonsoir|salut|hello|hi|hey|ciao|bye|bonne nuit|à demain|à \+)\.?$/i,
|
||||
/^(je comprends?|compris|noté|vu|reçu|roger|understood|got it|c'est bon|ça marche)\.?$/i,
|
||||
/^.{0,5}$/, // Less than 6 chars = noise
|
||||
];
|
||||
|
||||
const TEMPORAL_NOISE_KEYWORDS = [
|
||||
"en train de", "je vais", "je fais", "attends", "patience",
|
||||
"une seconde", "un moment", "working on", "processing",
|
||||
];
|
||||
|
||||
// DISPOSABLE TODO patterns — short tasks with no learning value
|
||||
// ⚠️ Keep ONLY shallow "go do X" patterns. Never filter:
|
||||
// - Process knowledge ("pour X il faut Y" = learned trick)
|
||||
// - What worked ("X a résolu Y" = experience)
|
||||
// - Explanations with "because/car/parce que"
|
||||
// - Anything with technical detail (>60 chars usually = knowledge)
|
||||
const TODO_PATTERNS = [
|
||||
/^il faut\b(?!.*(?:car |parce|pour |sinon|→|cause|résou))/i,
|
||||
/^on doit\b(?!.*(?:car |parce|pour |sinon|→|cause|résou))/i,
|
||||
/^(à|a) faire\s*:/i,
|
||||
/^todo\s*:/i,
|
||||
/^faut\b(?!.*(?:car |parce|pour |sinon|→))/i,
|
||||
/^need(s)? to\b(?!.*(?:because|otherwise|since))/i,
|
||||
];
|
||||
|
||||
// These are ALWAYS disposable — pure transitional state, no knowledge
|
||||
const TRANSIENT_PATTERNS = [
|
||||
/\ben préparation\b/i,
|
||||
/\ben cours de\b/i,
|
||||
/\bpas encore\b/i,
|
||||
/\bnot yet\b/i,
|
||||
/\bprochaine étape\b/i,
|
||||
/\bnext step\b/i,
|
||||
];
|
||||
|
||||
// Length heuristic: longer facts usually contain knowledge
|
||||
const MIN_LENGTH_FOR_TRANSIENT = 60;
|
||||
|
||||
// ─── Levenshtein ───
|
||||
|
||||
function levenshtein(a: string, b: string): number {
|
||||
const la = a.length;
|
||||
const lb = b.length;
|
||||
if (la === 0) return lb;
|
||||
if (lb === 0) return la;
|
||||
|
||||
// Optimization: if length difference > 50%, skip (can't be similar)
|
||||
if (Math.abs(la - lb) / Math.max(la, lb) > 0.5) return Math.max(la, lb);
|
||||
|
||||
const matrix: number[][] = [];
|
||||
for (let i = 0; i <= la; i++) {
|
||||
matrix[i] = [i];
|
||||
for (let j = 1; j <= lb; j++) {
|
||||
if (i === 0) {
|
||||
matrix[i][j] = j;
|
||||
} else {
|
||||
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
||||
matrix[i][j] = Math.min(
|
||||
matrix[i - 1][j] + 1, // deletion
|
||||
matrix[i][j - 1] + 1, // insertion
|
||||
matrix[i - 1][j - 1] + cost // substitution
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
return matrix[la][lb];
|
||||
}
|
||||
|
||||
function levenshteinSimilarity(a: string, b: string): number {
|
||||
const dist = levenshtein(a.toLowerCase(), b.toLowerCase());
|
||||
return 1 - dist / Math.max(a.length, b.length);
|
||||
}
|
||||
|
||||
// ─── Keyword overlap (Jaccard) ───
|
||||
|
||||
/** Extract first N words, lowercased and normalized, for prefix dedup */
|
||||
function extractPrefix(text: string, n: number): string {
|
||||
return text.toLowerCase().trim().split(/\s+/).slice(0, n).join(" ");
|
||||
}
|
||||
|
||||
function extractKeywords(text: string): Set<string> {
|
||||
return new Set(
|
||||
text.toLowerCase()
|
||||
.replace(/[^\p{L}\p{N}\s]/gu, " ")
|
||||
.split(/\s+/)
|
||||
.filter(w => w.length > 2)
|
||||
);
|
||||
}
|
||||
|
||||
function jaccardSimilarity(a: Set<string>, b: Set<string>): number {
|
||||
if (a.size === 0 && b.size === 0) return 1;
|
||||
let intersection = 0;
|
||||
for (const w of a) { if (b.has(w)) intersection++; }
|
||||
const union = a.size + b.size - intersection;
|
||||
return union === 0 ? 0 : intersection / union;
|
||||
}
|
||||
|
||||
// ─── Entity extraction for semantic contradiction detection ───
|
||||
|
||||
/**
|
||||
* Extract entities from text by matching against the knowledge graph.
|
||||
* Dynamic: uses entities learned by the graph (225+ in DB), not a hardcoded list.
|
||||
* Falls back to basic regex patterns for bootstrap (empty graph).
|
||||
*/
|
||||
function extractSubjectEntities(fact: string, knownEntities?: string[]): Set<string> {
|
||||
const entities = new Set<string>();
|
||||
const factLower = fact.toLowerCase();
|
||||
|
||||
// 1. Dynamic: match against all known graph entities
|
||||
if (knownEntities && knownEntities.length > 0) {
|
||||
for (const entity of knownEntities) {
|
||||
// Only match entities with 3+ chars to avoid false positives
|
||||
if (entity.length >= 3 && factLower.includes(entity)) {
|
||||
entities.add(entity);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Fallback regex for bootstrap (when graph is empty or for entities not yet in graph)
|
||||
const fallbackPatterns = [
|
||||
/\b(Sol|Koda|Luna|Neto)\b/gi,
|
||||
/\b(Memoria|Cortex|Ollama|Convex|Bureau|OpenClaw|ClawHub)\b/gi,
|
||||
/\b(gemma3[:\w]*|qwen[\w.:]*|gpt[\w-]*|glm[\w.]*|nemotron[\w-]*)\b/gi,
|
||||
/\b(openclaw\.json|memoria\.db|cortex\.db)\b/gi,
|
||||
/\b(memory-convex|lossless-claw)\b/gi,
|
||||
];
|
||||
for (const p of fallbackPatterns) {
|
||||
for (const m of fact.matchAll(p)) {
|
||||
entities.add(m[0].toLowerCase().trim());
|
||||
}
|
||||
}
|
||||
|
||||
return entities;
|
||||
}
|
||||
|
||||
// ─── Importance scoring ───
|
||||
|
||||
function computeImportance(fact: string, category: string): number {
|
||||
let score = 0.5; // baseline
|
||||
|
||||
// Category boosts
|
||||
const catBoost: Record<string, number> = {
|
||||
erreur: 0.3,
|
||||
preference: 0.2,
|
||||
rh: 0.2,
|
||||
client: 0.15,
|
||||
savoir: 0.1,
|
||||
outil: 0.05,
|
||||
chronologie: 0.0,
|
||||
};
|
||||
score += catBoost[category] ?? 0;
|
||||
|
||||
// Length signal (longer facts tend to be more informative)
|
||||
if (fact.length > 100) score += 0.1;
|
||||
if (fact.length > 200) score += 0.05;
|
||||
|
||||
// Contains numbers/versions/dates = likely factual
|
||||
if (/\d{2,}/.test(fact)) score += 0.05;
|
||||
if (/v\d+\.\d+/i.test(fact)) score += 0.1;
|
||||
|
||||
// Contains technical terms
|
||||
if (/(?:api|deploy|config|build|fix|bug|error|crash|prod|merge|commit)/i.test(fact)) score += 0.05;
|
||||
|
||||
// Noise penalty
|
||||
for (const kw of TEMPORAL_NOISE_KEYWORDS) {
|
||||
if (fact.toLowerCase().includes(kw)) { score -= 0.2; break; }
|
||||
}
|
||||
|
||||
return Math.max(0, Math.min(1, score));
|
||||
}
|
||||
|
||||
// ─── Contradiction check prompt ───
|
||||
|
||||
const CONTRADICTION_PROMPT = `Compare ces deux faits et détermine leur relation.
|
||||
|
||||
Fait existant: "{OLD}"
|
||||
Nouveau fait: "{NEW}"
|
||||
|
||||
RÈGLES IMPORTANTES:
|
||||
- Un changement de VERSION (v2.7.0 → v3.11.0) est une CONTRADICTION (l'ancien est obsolète)
|
||||
- Un changement de STATUS (offline → online, installé → désinstallé) est une CONTRADICTION
|
||||
- Un changement de QUANTITÉ (9 facts → 450 facts) est une CONTRADICTION
|
||||
- Si les deux parlent du MÊME sujet mais avec des valeurs différentes = CONTRADICTION
|
||||
|
||||
Réponds UNIQUEMENT en JSON:
|
||||
- Si le nouveau CONTREDIT l'ancien: {"relation": "contradiction", "reason": "explication courte"}
|
||||
- Si le nouveau COMPLÈTE l'ancien: {"relation": "enrichment", "merged": "fait fusionné en une phrase"}
|
||||
- Si les deux sont INDÉPENDANTS: {"relation": "independent"}
|
||||
- Si c'est un DOUBLON: {"relation": "duplicate"}`;
|
||||
|
||||
// ─── Preference enrichment formatter ───
|
||||
|
||||
/**
|
||||
* When merging preference facts, preserve ALL details/contexts from each occurrence.
|
||||
* Format: 'RÈGLE: [the rule]. Contextes: [context1 (date)], [context2 (date)], ...'
|
||||
*/
|
||||
function formatPreferenceEnrichment(existingFact: string, newFact: string, llmMerged: string): string {
|
||||
const now = new Date().toISOString().slice(0, 10);
|
||||
|
||||
// If the existing fact already has the RÈGLE format, append new context
|
||||
if (existingFact.startsWith("RÈGLE:")) {
|
||||
const contextMatch = existingFact.match(/Contextes:\s*(.+)$/);
|
||||
const existingContexts = contextMatch ? contextMatch[1] : "";
|
||||
// Extract the rule part (up to "Contextes:" or full text)
|
||||
const rulePart = existingFact.replace(/\s*Contextes:\s*.+$/, "");
|
||||
const newContext = newFact.length > 80 ? newFact.slice(0, 80) + "…" : newFact;
|
||||
return `${rulePart} Contextes: ${existingContexts}${existingContexts ? ", " : ""}${newContext} (${now})`;
|
||||
}
|
||||
|
||||
// First enrichment: create the RÈGLE format from LLM merged text
|
||||
const existingSnippet = existingFact.length > 80 ? existingFact.slice(0, 80) + "…" : existingFact;
|
||||
const newSnippet = newFact.length > 80 ? newFact.slice(0, 80) + "…" : newFact;
|
||||
return `RÈGLE: ${llmMerged} Contextes: ${existingSnippet} (antérieur), ${newSnippet} (${now})`;
|
||||
}
|
||||
|
||||
// ─── Main class ───
|
||||
|
||||
export class SelectiveMemory {
|
||||
private db: MemoriaDB;
|
||||
private llm: LLMProvider;
|
||||
private cfg: SelectiveConfig;
|
||||
private embedder: EmbeddingManager | null;
|
||||
private knownEntities: string[] = [];
|
||||
private entitiesLoadedAt = 0;
|
||||
private static ENTITY_CACHE_TTL = 5 * 60 * 1000; // Refresh every 5 min
|
||||
|
||||
/** Callback when a fact is superseded — lets other layers react (observations, clusters) */
|
||||
onSupersede: ((supersededFactId: string, newFactId: string) => void) | null = null;
|
||||
|
||||
constructor(db: MemoriaDB, llm: LLMProvider, config?: Partial<SelectiveConfig>, embedder?: EmbeddingManager) {
|
||||
this.db = db;
|
||||
this.llm = llm;
|
||||
this.cfg = { ...DEFAULT_SELECTIVE_CONFIG, ...config };
|
||||
this.embedder = embedder ?? null;
|
||||
this.refreshEntities();
|
||||
}
|
||||
|
||||
/** Load entity names from graph DB (cached, refreshes every 5 min) */
|
||||
private refreshEntities(): void {
|
||||
try {
|
||||
this.knownEntities = this.db.allEntityNames();
|
||||
this.entitiesLoadedAt = Date.now();
|
||||
} catch (e) { console.debug('memoria:selective: ' + String(e)); }
|
||||
}
|
||||
|
||||
private getEntities(): string[] {
|
||||
if (Date.now() - this.entitiesLoadedAt > SelectiveMemory.ENTITY_CACHE_TTL) {
|
||||
this.refreshEntities();
|
||||
}
|
||||
return this.knownEntities;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a candidate fact through the selective filter.
|
||||
* Returns the action to take: store, skip, supersede, or enrich.
|
||||
*/
|
||||
async process(fact: string, category: string, confidence: number): Promise<SelectiveResult> {
|
||||
// 1. Basic filters
|
||||
if (fact.length < this.cfg.minFactLength) {
|
||||
return { action: "skip", reason: "too_short" };
|
||||
}
|
||||
|
||||
// Noise pattern check
|
||||
for (const pattern of NOISE_PATTERNS) {
|
||||
if (pattern.test(fact.trim())) {
|
||||
return { action: "skip", reason: "noise" };
|
||||
}
|
||||
}
|
||||
|
||||
// TODO filter — disposable tasks only (preserve process knowledge)
|
||||
const trimmed = fact.trim();
|
||||
for (const pattern of TODO_PATTERNS) {
|
||||
if (pattern.test(trimmed) && trimmed.length < MIN_LENGTH_FOR_TRANSIENT) {
|
||||
return { action: "skip", reason: "noise" };
|
||||
}
|
||||
}
|
||||
// Transient state — only skip if short (long = probably explains WHY)
|
||||
for (const pattern of TRANSIENT_PATTERNS) {
|
||||
if (pattern.test(trimmed) && trimmed.length < MIN_LENGTH_FOR_TRANSIENT) {
|
||||
return { action: "skip", reason: "noise" };
|
||||
}
|
||||
}
|
||||
|
||||
// Importance check
|
||||
const importance = computeImportance(fact, category);
|
||||
if (importance < this.cfg.importanceThreshold) {
|
||||
return { action: "skip", reason: "noise" };
|
||||
}
|
||||
|
||||
// Category-specific thresholds: preferences are often reformulated differently
|
||||
// but carry the same intent → use lower thresholds to catch more duplicates
|
||||
const isPreference = category === "preference";
|
||||
const dupThreshold = isPreference ? 0.65 : this.cfg.dupThreshold;
|
||||
const enrichThreshold = isPreference ? 0.45 : this.cfg.enrichThreshold;
|
||||
|
||||
// 2. Dedup check (FTS5 + Levenshtein + Jaccard + prefix check)
|
||||
const candidates = this.db.searchFacts(fact, this.cfg.dupCandidates);
|
||||
const newKeywords = extractKeywords(fact);
|
||||
const newPrefix = extractPrefix(fact, 8);
|
||||
|
||||
for (const candidate of candidates) {
|
||||
// Fast prefix check: if first 8 words are identical → duplicate
|
||||
if (newPrefix.length >= 6 && newPrefix === extractPrefix(candidate.fact, 8)) {
|
||||
return { action: "skip", reason: "duplicate" };
|
||||
}
|
||||
|
||||
const levSim = levenshteinSimilarity(fact, candidate.fact);
|
||||
const jacSim = jaccardSimilarity(newKeywords, extractKeywords(candidate.fact));
|
||||
const combined = levSim * 0.6 + jacSim * 0.4; // weighted average
|
||||
|
||||
// Exact duplicate
|
||||
if (combined >= dupThreshold) {
|
||||
return { action: "skip", reason: "duplicate" };
|
||||
}
|
||||
|
||||
// Potential enrichment or contradiction (moderate similarity)
|
||||
if (combined >= enrichThreshold && (this.cfg.contradictionCheck || this.cfg.enrichEnabled)) {
|
||||
const relation = await this.checkRelation(candidate, fact);
|
||||
|
||||
if (relation.type === "duplicate") {
|
||||
return { action: "skip", reason: "duplicate" };
|
||||
}
|
||||
|
||||
if (relation.type === "contradiction") {
|
||||
return {
|
||||
action: "supersede",
|
||||
oldFactId: candidate.id,
|
||||
fact,
|
||||
category,
|
||||
confidence: Math.max(confidence, candidate.confidence),
|
||||
};
|
||||
}
|
||||
|
||||
if (relation.type === "enrichment" && relation.merged) {
|
||||
// For preferences: format enriched fact with consolidated contexts
|
||||
const mergedText = isPreference
|
||||
? formatPreferenceEnrichment(candidate.fact, fact, relation.merged)
|
||||
: relation.merged;
|
||||
return {
|
||||
action: "enrich",
|
||||
existingFactId: candidate.id,
|
||||
mergedFact: mergedText,
|
||||
confidence: Math.max(confidence, candidate.confidence),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Entity-based contradiction check
|
||||
// When text is very different but same entities are mentioned (e.g. "no models on Sol" vs "gemma3 installed on Sol"),
|
||||
// Levenshtein/Jaccard miss it. Entity overlap triggers LLM contradiction check.
|
||||
if (this.cfg.contradictionCheck) {
|
||||
try {
|
||||
const newEntities = extractSubjectEntities(fact, this.getEntities());
|
||||
if (newEntities.size > 0) {
|
||||
// Search for facts sharing at least one entity (using graph DB when available)
|
||||
const entityCandidates = this.findFactsBySharedEntities(fact, newEntities, candidates);
|
||||
for (const candidate of entityCandidates) {
|
||||
const relation = await this.checkRelation(candidate, fact);
|
||||
|
||||
if (relation.type === "contradiction") {
|
||||
return {
|
||||
action: "supersede",
|
||||
oldFactId: candidate.id,
|
||||
fact,
|
||||
category,
|
||||
confidence: Math.max(confidence, candidate.confidence),
|
||||
};
|
||||
}
|
||||
|
||||
if (relation.type === "enrichment" && relation.merged) {
|
||||
return {
|
||||
action: "enrich",
|
||||
existingFactId: candidate.id,
|
||||
mergedFact: relation.merged,
|
||||
confidence: Math.max(confidence, candidate.confidence),
|
||||
};
|
||||
}
|
||||
|
||||
if (relation.type === "duplicate") {
|
||||
return { action: "skip", reason: "duplicate" };
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.debug('memoria:selective: ' + String(e));
|
||||
// Entity check failed → continue with store (fail-safe)
|
||||
}
|
||||
}
|
||||
|
||||
// 4. No issues — store as new
|
||||
return { action: "store", fact, category, confidence };
|
||||
}
|
||||
|
||||
/**
|
||||
* Process and apply: run the selective filter and execute the result.
|
||||
* Returns the stored/updated fact or null if skipped.
|
||||
*/
|
||||
async processAndApply(fact: string, category: string, confidence: number, agent = "koda", factType: "semantic" | "episodic" = "semantic", relevanceWeight = 0.5): Promise<{ stored: boolean; action: string; factId?: string; reason?: string }> {
|
||||
const result = await this.process(fact, category, confidence);
|
||||
|
||||
switch (result.action) {
|
||||
case "skip":
|
||||
return { stored: false, action: "skip", reason: result.reason };
|
||||
|
||||
case "store": {
|
||||
const stored = this.db.storeFact({
|
||||
id: `fact_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`,
|
||||
fact: result.fact,
|
||||
category: result.category,
|
||||
confidence: result.confidence,
|
||||
source: "auto-capture",
|
||||
tags: "[]",
|
||||
agent,
|
||||
created_at: Date.now(),
|
||||
updated_at: Date.now(),
|
||||
fact_type: factType,
|
||||
relevance_weight: relevanceWeight,
|
||||
});
|
||||
return { stored: true, action: "store", factId: stored.id };
|
||||
}
|
||||
|
||||
case "supersede": {
|
||||
// Store new fact
|
||||
const newFact = this.db.storeFact({
|
||||
id: `fact_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`,
|
||||
fact: result.fact,
|
||||
category: result.category,
|
||||
confidence: result.confidence,
|
||||
source: "auto-capture",
|
||||
tags: "[]",
|
||||
agent,
|
||||
created_at: Date.now(),
|
||||
updated_at: Date.now(),
|
||||
fact_type: factType,
|
||||
relevance_weight: relevanceWeight,
|
||||
});
|
||||
// Mark old as superseded
|
||||
this.db.supersedeFact(result.oldFactId, newFact.id);
|
||||
// Notify other layers (observations, clusters)
|
||||
try { this.onSupersede?.(result.oldFactId, newFact.id); } catch (e) { console.debug('memoria:selective: ' + String(e)); }
|
||||
return { stored: true, action: "supersede", factId: newFact.id };
|
||||
}
|
||||
|
||||
case "enrich": {
|
||||
// Update existing fact with merged text
|
||||
this.db.enrichFact(result.existingFactId, result.mergedFact, result.confidence);
|
||||
return { stored: true, action: "enrich", factId: result.existingFactId };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Private ───
|
||||
|
||||
/**
|
||||
* Find existing facts that share at least one entity with the new fact.
|
||||
* Excludes facts already checked in the textual dedup pass.
|
||||
* Limited to MAX_ENTITY_CANDIDATES to avoid excessive LLM calls.
|
||||
*/
|
||||
private findFactsBySharedEntities(newFact: string, newEntities: Set<string>, alreadyChecked: Fact[]): Fact[] {
|
||||
// FIX 3: Increased from 5 to 10 — version contradictions need wider search
|
||||
// (e.g., "Sol = v2.7.0" stored 6 times won't all be caught with limit 5)
|
||||
const MAX_ENTITY_CANDIDATES = 10;
|
||||
const checkedIds = new Set(alreadyChecked.map(c => c.id));
|
||||
const candidates: Fact[] = [];
|
||||
|
||||
// FIX 3: Prioritize version-containing facts when new fact has a version
|
||||
const hasVersion = /v\d+\.\d+/i.test(newFact);
|
||||
|
||||
// Search for each entity via FTS (wider search to catch all related facts)
|
||||
for (const entity of newEntities) {
|
||||
if (candidates.length >= MAX_ENTITY_CANDIDATES) break;
|
||||
const ftsResults = this.db.searchFacts(entity, 30);
|
||||
for (const result of ftsResults) {
|
||||
if (candidates.length >= MAX_ENTITY_CANDIDATES) break;
|
||||
if (checkedIds.has(result.id)) continue;
|
||||
// Verify entity overlap
|
||||
const resultEntities = extractSubjectEntities(result.fact, this.getEntities());
|
||||
const shared = [...newEntities].filter(e => resultEntities.has(e));
|
||||
if (shared.length > 0) {
|
||||
// FIX 3: Boost priority for version-related facts
|
||||
// When new fact says "Sol = v3.11", and existing says "Sol = v2.7",
|
||||
// this MUST be checked even if Levenshtein is low
|
||||
if (hasVersion && /v\d+\.\d+/i.test(result.fact)) {
|
||||
// Put version facts first (higher priority for contradiction check)
|
||||
candidates.unshift(result);
|
||||
} else {
|
||||
candidates.push(result);
|
||||
}
|
||||
checkedIds.add(result.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return candidates.slice(0, MAX_ENTITY_CANDIDATES);
|
||||
}
|
||||
|
||||
private async checkRelation(existing: Fact, newFact: string): Promise<{
|
||||
type: "contradiction" | "enrichment" | "duplicate" | "independent";
|
||||
merged?: string;
|
||||
reason?: string;
|
||||
}> {
|
||||
try {
|
||||
const prompt = CONTRADICTION_PROMPT
|
||||
.replace("{OLD}", existing.fact)
|
||||
.replace("{NEW}", newFact);
|
||||
|
||||
const response = await this.llm.generate(prompt, {
|
||||
maxTokens: 256,
|
||||
temperature: 0.1,
|
||||
format: "json",
|
||||
timeoutMs: 15000,
|
||||
});
|
||||
|
||||
const parsed = this.parseJSON(response) as {
|
||||
relation?: string;
|
||||
merged?: string;
|
||||
reason?: string;
|
||||
};
|
||||
|
||||
if (!parsed?.relation) return { type: "independent" };
|
||||
|
||||
switch (parsed.relation) {
|
||||
case "contradiction":
|
||||
return { type: "contradiction", reason: parsed.reason };
|
||||
case "enrichment":
|
||||
return { type: "enrichment", merged: parsed.merged };
|
||||
case "duplicate":
|
||||
return { type: "duplicate" };
|
||||
default:
|
||||
return { type: "independent" };
|
||||
}
|
||||
} catch (e) {
|
||||
console.debug('memoria:selective: ' + String(e));
|
||||
// LLM failed → safe default: treat as independent (store both)
|
||||
return { type: "independent" };
|
||||
}
|
||||
}
|
||||
|
||||
private parseJSON(text: string): unknown {
|
||||
let cleaned = text.trim();
|
||||
if (cleaned.startsWith("```")) {
|
||||
const lines = cleaned.split("\n");
|
||||
lines.shift();
|
||||
if (lines[lines.length - 1]?.trim() === "```") lines.pop();
|
||||
cleaned = lines.join("\n").trim();
|
||||
}
|
||||
const match = cleaned.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
|
||||
if (match) cleaned = match[1];
|
||||
return JSON.parse(cleaned);
|
||||
}
|
||||
}
|
||||
184
openclaw-memoria-port/core/self-observation.ts
Normal file
184
openclaw-memoria-port/core/self-observation.ts
Normal file
@@ -0,0 +1,184 @@
|
||||
/**
|
||||
* Memoria — AI Self-Observation (Layer 22)
|
||||
*
|
||||
* Tracks the agent's own behavioral patterns over time:
|
||||
* - What tasks it handles well (high success rate)
|
||||
* - What tasks it struggles with (corrections, retries)
|
||||
* - Recurring error patterns
|
||||
* - Strengths and weaknesses profile
|
||||
*
|
||||
* Inspired by Hermes' "AI self-observation" concept.
|
||||
*
|
||||
* Table: self_observations
|
||||
* - id: auto-increment
|
||||
* - domain: string (coding, design, infra, memory, communication, etc.)
|
||||
* - signal: "success" | "correction" | "frustration" | "retry"
|
||||
* - detail: string (what happened)
|
||||
* - created_at: timestamp ms
|
||||
*/
|
||||
|
||||
import type Database from "better-sqlite3";
|
||||
|
||||
export interface SelfObservation {
|
||||
id: number;
|
||||
domain: string;
|
||||
signal: "success" | "correction" | "frustration" | "retry";
|
||||
detail: string;
|
||||
created_at: number;
|
||||
}
|
||||
|
||||
export interface AgentProfile {
|
||||
strengths: Array<{ domain: string; score: number; evidence: number }>;
|
||||
weaknesses: Array<{ domain: string; score: number; evidence: number }>;
|
||||
recentPatterns: string[];
|
||||
totalObservations: number;
|
||||
}
|
||||
|
||||
// Domain detection patterns
|
||||
const DOMAIN_PATTERNS: Array<{ domain: string; patterns: RegExp[] }> = [
|
||||
{ domain: "coding", patterns: [/\b(code|commit|push|git|deploy|build|npm|typescript|bug|fix|refactor)\b/i] },
|
||||
{ domain: "design", patterns: [/\b(design|pencil|screen|ui|ux|figma|mockup|icon|layout)\b/i] },
|
||||
{ domain: "infra", patterns: [/\b(server|ssh|gateway|restart|docker|ollama|sol|luna|deploy)\b/i] },
|
||||
{ domain: "memory", patterns: [/\b(memoria|recall|fact|extract|embed|memory|convex)\b/i] },
|
||||
{ domain: "communication", patterns: [/\b(tweet|discord|telegram|message|post|thread|social)\b/i] },
|
||||
{ domain: "writing", patterns: [/\b(article|doc|readme|study|étude|report|analyse)\b/i] },
|
||||
{ domain: "planning", patterns: [/\b(plan|roadmap|objectif|priorit|strateg|pipeline)\b/i] },
|
||||
];
|
||||
|
||||
export class SelfObserver {
|
||||
private db: Database.Database;
|
||||
|
||||
constructor(db: Database.Database) {
|
||||
this.db = db;
|
||||
this.ensureTable();
|
||||
}
|
||||
|
||||
private ensureTable(): void {
|
||||
this.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS self_observations (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
domain TEXT NOT NULL,
|
||||
signal TEXT NOT NULL CHECK(signal IN ('success', 'correction', 'frustration', 'retry')),
|
||||
detail TEXT NOT NULL DEFAULT '',
|
||||
created_at INTEGER NOT NULL
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_self_obs_domain
|
||||
ON self_observations(domain, signal);
|
||||
`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record an observation about the agent's own behavior.
|
||||
*/
|
||||
record(signal: SelfObservation["signal"], context: string, domainHint?: string): void {
|
||||
const domain = domainHint || this.detectDomain(context);
|
||||
const detail = context.slice(0, 500);
|
||||
this.db.prepare(
|
||||
"INSERT INTO self_observations (domain, signal, detail, created_at) VALUES (?, ?, ?, ?)"
|
||||
).run(domain, signal, detail, Date.now());
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect the domain from message context.
|
||||
*/
|
||||
detectDomain(text: string): string {
|
||||
for (const { domain, patterns } of DOMAIN_PATTERNS) {
|
||||
if (patterns.some(p => p.test(text))) return domain;
|
||||
}
|
||||
return "general";
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a profile of the agent's strengths and weaknesses.
|
||||
* Based on success/correction ratio per domain over the last N days.
|
||||
*/
|
||||
buildProfile(dayRange = 30): AgentProfile {
|
||||
const cutoff = Date.now() - dayRange * 86_400_000;
|
||||
|
||||
const rows = this.db.prepare(`
|
||||
SELECT domain, signal, COUNT(*) as cnt
|
||||
FROM self_observations
|
||||
WHERE created_at > ?
|
||||
GROUP BY domain, signal
|
||||
ORDER BY domain
|
||||
`).all(cutoff) as Array<{ domain: string; signal: string; cnt: number }>;
|
||||
|
||||
// Aggregate by domain
|
||||
const domains: Record<string, Record<string, number>> = {};
|
||||
for (const r of rows) {
|
||||
if (!domains[r.domain]) domains[r.domain] = {};
|
||||
domains[r.domain][r.signal] = r.cnt;
|
||||
}
|
||||
|
||||
const strengths: AgentProfile["strengths"] = [];
|
||||
const weaknesses: AgentProfile["weaknesses"] = [];
|
||||
|
||||
for (const [domain, signals] of Object.entries(domains)) {
|
||||
const successes = signals.success || 0;
|
||||
const corrections = signals.correction || 0;
|
||||
const frustrations = signals.frustration || 0;
|
||||
const retries = signals.retry || 0;
|
||||
const total = successes + corrections + frustrations + retries;
|
||||
if (total < 2) continue; // Not enough data
|
||||
|
||||
// Score: successes / total, penalized by corrections+frustrations
|
||||
const score = (successes - corrections * 0.5 - frustrations * 1.0) / total;
|
||||
|
||||
if (score >= 0.6) {
|
||||
strengths.push({ domain, score: Math.round(score * 100), evidence: total });
|
||||
} else if (score <= 0.3 && (corrections + frustrations) >= 2) {
|
||||
weaknesses.push({ domain, score: Math.round(score * 100), evidence: total });
|
||||
}
|
||||
}
|
||||
|
||||
// Sort: strengths descending, weaknesses ascending
|
||||
strengths.sort((a, b) => b.score - a.score);
|
||||
weaknesses.sort((a, b) => a.score - b.score);
|
||||
|
||||
// Recent patterns: last 10 observations, summarized
|
||||
const recent = this.db.prepare(
|
||||
"SELECT domain, signal, detail FROM self_observations ORDER BY created_at DESC LIMIT 10"
|
||||
).all() as Array<{ domain: string; signal: string; detail: string }>;
|
||||
|
||||
const recentPatterns = recent.map(r =>
|
||||
`${r.signal === "success" ? "✅" : r.signal === "correction" ? "📝" : r.signal === "frustration" ? "😤" : "🔄"} ${r.domain}: ${r.detail.slice(0, 80)}`
|
||||
);
|
||||
|
||||
const totalRow = this.db.prepare(
|
||||
"SELECT COUNT(*) as cnt FROM self_observations"
|
||||
).get() as { cnt: number };
|
||||
|
||||
return {
|
||||
strengths,
|
||||
weaknesses,
|
||||
recentPatterns,
|
||||
totalObservations: totalRow.cnt,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Format profile for injection into the prompt (compact).
|
||||
*/
|
||||
formatForPrompt(): string {
|
||||
const profile = this.buildProfile();
|
||||
if (profile.totalObservations < 5) return ""; // Not enough data yet
|
||||
|
||||
const parts: string[] = [];
|
||||
|
||||
if (profile.strengths.length > 0) {
|
||||
parts.push("**Strengths:** " + profile.strengths.map(s =>
|
||||
`${s.domain} (${s.score}%, ${s.evidence} obs.)`
|
||||
).join(", "));
|
||||
}
|
||||
|
||||
if (profile.weaknesses.length > 0) {
|
||||
parts.push("**Watch out:** " + profile.weaknesses.map(w =>
|
||||
`${w.domain} (${w.score}%, ${w.evidence} obs.)`
|
||||
).join(", "));
|
||||
}
|
||||
|
||||
return parts.length > 0
|
||||
? `\n### 🪞 Self-Observation\n${parts.join("\n")}\n`
|
||||
: "";
|
||||
}
|
||||
}
|
||||
262
openclaw-memoria-port/core/sync.ts
Normal file
262
openclaw-memoria-port/core/sync.ts
Normal file
@@ -0,0 +1,262 @@
|
||||
/**
|
||||
* Memoria — Layer 11a: .md Sync
|
||||
*
|
||||
* DB → .md: When Memoria captures a fact, it appends to the right workspace .md file
|
||||
* Category mapping: savoir/erreur → MEMORY.md, outil → TOOLS.md, preference → USER.md, rh/client → COMPANY.md
|
||||
*
|
||||
* Each fact is appended with format: `- **[date]** fact text _(confidence%)_`
|
||||
* Sets synced_to_md = 1 after successful write.
|
||||
*
|
||||
* Mapping catégorie → fichier :
|
||||
* outil → TOOLS.md
|
||||
* savoir → MEMORY.md (section 🧠 Savoir)
|
||||
* erreur → MEMORY.md (section ❌ Erreurs)
|
||||
* chronologie → MEMORY.md (section 📅 Chronologie)
|
||||
* preference → USER.md
|
||||
* client → COMPANY.md
|
||||
* rh → COMPANY.md
|
||||
*/
|
||||
|
||||
import { readFileSync, writeFileSync, existsSync, statSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import type { MemoriaDB, Fact } from "./db.js";
|
||||
|
||||
// ─── Config ───
|
||||
|
||||
export interface SyncConfig {
|
||||
/** Workspace root. Default ~/.openclaw/workspace */
|
||||
workspacePath: string;
|
||||
/** Enable DB→MD sync. Default true */
|
||||
dbToMd: boolean;
|
||||
/** Enable MD→DB sync. Default false (safety: avoid importing noise) */
|
||||
mdToDb: boolean;
|
||||
/** Max facts to sync per run. Default 10 */
|
||||
batchSize: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_SYNC_CONFIG: SyncConfig = {
|
||||
workspacePath: process.env.HOME + "/.openclaw/workspace",
|
||||
dbToMd: true,
|
||||
mdToDb: false,
|
||||
batchSize: 10,
|
||||
};
|
||||
|
||||
// ─── Category → File mapping ───
|
||||
|
||||
interface FileMapping {
|
||||
file: string;
|
||||
section?: string; // Section header to find/create
|
||||
format: "bullet" | "entry";
|
||||
}
|
||||
|
||||
const CATEGORY_MAP: Record<string, FileMapping> = {
|
||||
outil: { file: "TOOLS.md", format: "bullet" },
|
||||
savoir: { file: "MEMORY.md", section: "## 🧠 Savoir", format: "bullet" },
|
||||
erreur: { file: "MEMORY.md", section: "## ❌ Erreurs", format: "bullet" },
|
||||
chronologie: { file: "MEMORY.md", section: "## 📅 Chronologie", format: "entry" },
|
||||
preference: { file: "USER.md", format: "bullet" },
|
||||
client: { file: "COMPANY.md", format: "bullet" },
|
||||
rh: { file: "COMPANY.md", format: "bullet" },
|
||||
};
|
||||
|
||||
// ─── Sync Manager ───
|
||||
|
||||
export class MdSync {
|
||||
private db: MemoriaDB;
|
||||
private cfg: SyncConfig;
|
||||
private lastSyncTimestamps: Map<string, number> = new Map();
|
||||
|
||||
constructor(db: MemoriaDB, config?: Partial<SyncConfig>) {
|
||||
this.db = db;
|
||||
this.cfg = { ...DEFAULT_SYNC_CONFIG, ...config };
|
||||
}
|
||||
|
||||
// ─── DB → MD: Write new facts to .md files ───
|
||||
|
||||
/**
|
||||
* Sync recent facts from DB to their corresponding .md files.
|
||||
* Only syncs facts that haven't been synced yet (tracked via `synced_to_md` flag).
|
||||
*/
|
||||
syncToMd(db: MemoriaDB): { synced: number; errors: string[] } {
|
||||
if (!this.cfg.dbToMd) return { synced: 0, errors: [] };
|
||||
|
||||
// Get unsynced facts
|
||||
const unsynced = db.raw.prepare(
|
||||
"SELECT * FROM facts WHERE superseded = 0 AND (synced_to_md IS NULL OR synced_to_md = 0) ORDER BY created_at DESC LIMIT ?"
|
||||
).all(this.cfg.batchSize) as (Fact & { synced_to_md?: number })[];
|
||||
|
||||
if (unsynced.length === 0) return { synced: 0, errors: [] };
|
||||
|
||||
let synced = 0;
|
||||
const errors: string[] = [];
|
||||
|
||||
for (const fact of unsynced) {
|
||||
try {
|
||||
const mapping = CATEGORY_MAP[fact.category];
|
||||
if (!mapping) {
|
||||
// Mark as synced anyway to avoid retrying
|
||||
this.markSynced(db, fact.id);
|
||||
continue;
|
||||
}
|
||||
|
||||
const filePath = join(this.cfg.workspacePath, mapping.file);
|
||||
|
||||
// Check if fact already exists in file (dedup)
|
||||
if (this.factExistsInFile(filePath, fact.fact)) {
|
||||
this.markSynced(db, fact.id);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Append to file
|
||||
this.appendToFile(filePath, fact, mapping);
|
||||
this.markSynced(db, fact.id);
|
||||
synced++;
|
||||
} catch (err) {
|
||||
errors.push(`${fact.id}: ${err}`);
|
||||
}
|
||||
}
|
||||
|
||||
return { synced, errors };
|
||||
}
|
||||
|
||||
// ─── MD → DB: Import new content from .md files ───
|
||||
|
||||
/**
|
||||
* Check if .md files were modified and import new lines as facts.
|
||||
* Conservative: only imports clearly new bullet points.
|
||||
*/
|
||||
syncFromMd(db: MemoriaDB): { imported: number; files: string[] } {
|
||||
if (!this.cfg.mdToDb) return { imported: 0, files: [] };
|
||||
|
||||
let imported = 0;
|
||||
const files: string[] = [];
|
||||
|
||||
for (const [category, mapping] of Object.entries(CATEGORY_MAP)) {
|
||||
const filePath = join(this.cfg.workspacePath, mapping.file);
|
||||
if (!existsSync(filePath)) continue;
|
||||
|
||||
const stat = statSync(filePath);
|
||||
const lastMod = stat.mtimeMs;
|
||||
const lastSync = this.lastSyncTimestamps.get(filePath) || 0;
|
||||
|
||||
if (lastMod <= lastSync) continue;
|
||||
|
||||
// File was modified — extract new bullet points
|
||||
const content = readFileSync(filePath, "utf-8");
|
||||
const newFacts = this.extractNewFacts(db, content, category, mapping);
|
||||
|
||||
for (const factText of newFacts) {
|
||||
try {
|
||||
db.storeFact({
|
||||
fact: factText,
|
||||
category,
|
||||
confidence: 0.7,
|
||||
source: "md-sync",
|
||||
agent: "koda",
|
||||
} as any);
|
||||
imported++;
|
||||
} catch (_e) { /* dedup or error */ }
|
||||
}
|
||||
|
||||
if (newFacts.length > 0) files.push(mapping.file);
|
||||
this.lastSyncTimestamps.set(filePath, Date.now());
|
||||
}
|
||||
|
||||
return { imported, files };
|
||||
}
|
||||
|
||||
// ─── Ensure synced_to_md column exists ───
|
||||
|
||||
ensureSchema(db: MemoriaDB): void {
|
||||
try {
|
||||
// Check if column exists
|
||||
const info = db.raw.prepare("PRAGMA table_info(facts)").all() as Array<{ name: string }>;
|
||||
const hasColumn = info.some(col => col.name === "synced_to_md");
|
||||
if (!hasColumn) {
|
||||
db.raw.prepare("ALTER TABLE facts ADD COLUMN synced_to_md INTEGER DEFAULT 0").run();
|
||||
}
|
||||
} catch (_e) { /* column might already exist */ }
|
||||
}
|
||||
|
||||
// ─── Private ───
|
||||
|
||||
private markSynced(db: MemoriaDB, factId: string): void {
|
||||
db.raw.prepare("UPDATE facts SET synced_to_md = 1 WHERE id = ?").run(factId);
|
||||
}
|
||||
|
||||
private factExistsInFile(filePath: string, factText: string): boolean {
|
||||
if (!existsSync(filePath)) return false;
|
||||
const content = readFileSync(filePath, "utf-8");
|
||||
// Check if first 60 chars of the fact appear in the file
|
||||
const snippet = factText.slice(0, 60).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
return new RegExp(snippet, "i").test(content);
|
||||
}
|
||||
|
||||
private appendToFile(filePath: string, fact: Fact, mapping: FileMapping): void {
|
||||
if (!existsSync(filePath)) return; // Don't create files
|
||||
|
||||
let content = readFileSync(filePath, "utf-8");
|
||||
|
||||
const line = mapping.format === "entry"
|
||||
? `- **${this.formatDate()}** : ${fact.fact}`
|
||||
: `- ${fact.fact}`;
|
||||
|
||||
if (mapping.section) {
|
||||
// Find section and append after it
|
||||
const sectionIdx = content.indexOf(mapping.section);
|
||||
if (sectionIdx !== -1) {
|
||||
// Find the next section or end of file
|
||||
const afterSection = content.indexOf("\n## ", sectionIdx + mapping.section.length);
|
||||
const insertAt = afterSection !== -1 ? afterSection : content.length;
|
||||
|
||||
// Find last non-empty line before insertAt
|
||||
const beforeInsert = content.slice(sectionIdx, insertAt);
|
||||
const lastNewline = beforeInsert.lastIndexOf("\n");
|
||||
const insertPos = sectionIdx + lastNewline + 1;
|
||||
|
||||
content = content.slice(0, insertPos) + line + "\n" + content.slice(insertPos);
|
||||
} else {
|
||||
// Section doesn't exist — append at end
|
||||
content += `\n\n${mapping.section}\n\n${line}\n`;
|
||||
}
|
||||
} else {
|
||||
// Append at end of file
|
||||
content = content.trimEnd() + "\n" + line + "\n";
|
||||
}
|
||||
|
||||
writeFileSync(filePath, content, "utf-8");
|
||||
}
|
||||
|
||||
private extractNewFacts(db: MemoriaDB, content: string, category: string, mapping: FileMapping): string[] {
|
||||
const lines = content.split("\n");
|
||||
const newFacts: string[] = [];
|
||||
|
||||
for (const line of lines) {
|
||||
// Only process bullet points
|
||||
const match = line.match(/^[-*]\s+(.+)$/);
|
||||
if (!match) continue;
|
||||
|
||||
const text = match[1].trim();
|
||||
if (text.length < 15) continue; // Skip short lines
|
||||
|
||||
// Clean markdown formatting
|
||||
const cleaned = text.replace(/\*\*([^*]+)\*\*/g, "$1").replace(/`([^`]+)`/g, "$1");
|
||||
|
||||
// Check if already in DB (approximate: search by first 50 chars)
|
||||
const existing = db.raw.prepare(
|
||||
"SELECT id FROM facts WHERE fact LIKE ? AND superseded = 0 LIMIT 1"
|
||||
).get(`${cleaned.slice(0, 50)}%`) as { id: string } | undefined;
|
||||
|
||||
if (!existing) {
|
||||
newFacts.push(cleaned);
|
||||
}
|
||||
}
|
||||
|
||||
return newFacts.slice(0, this.cfg.batchSize);
|
||||
}
|
||||
|
||||
private formatDate(): string {
|
||||
const d = new Date();
|
||||
return `${d.getDate().toString().padStart(2, "0")}/${(d.getMonth() + 1).toString().padStart(2, "0")}`;
|
||||
}
|
||||
}
|
||||
827
openclaw-memoria-port/core/topics.ts
Normal file
827
openclaw-memoria-port/core/topics.ts
Normal file
@@ -0,0 +1,827 @@
|
||||
/**
|
||||
* topics.ts — Phase 8: Emergent Topics
|
||||
*
|
||||
* Topics emerge automatically from repeated patterns in facts.
|
||||
* No manual categories — everything comes from usage.
|
||||
*
|
||||
* Flow:
|
||||
* 1. After capture: extract keywords → match/create topic associations
|
||||
* 2. Periodically: scan orphans, merge, create sub-topics
|
||||
* 3. At recall: topic embeddings enrich search results
|
||||
*/
|
||||
|
||||
import type { LLMProvider, EmbedProvider } from "./providers/types.js";
|
||||
import type { MemoriaDB } from "./db.js";
|
||||
|
||||
// ─── Types ───
|
||||
|
||||
export interface Topic {
|
||||
id: string;
|
||||
name: string;
|
||||
keywords: string[];
|
||||
fact_count: number;
|
||||
first_seen: number;
|
||||
last_seen: number;
|
||||
access_count: number;
|
||||
importance_score: number;
|
||||
parent_topic_id: string | null;
|
||||
embedding: number[] | null;
|
||||
}
|
||||
|
||||
export interface TopicMatch {
|
||||
topic: Topic;
|
||||
overlap: number; // 0-1
|
||||
}
|
||||
|
||||
export interface TopicsConfig {
|
||||
emergenceThreshold: number; // min facts to create topic (default: 3)
|
||||
mergeOverlap: number; // keyword overlap to merge (default: 0.7)
|
||||
subtopicThreshold: number; // min facts for sub-topic (default: 5)
|
||||
decayDays: number; // days before decay starts (default: 30)
|
||||
scanInterval: number; // scan orphans every N captures (default: 15)
|
||||
maxKeywordsPerFact: number; // keywords extracted per fact (default: 5)
|
||||
}
|
||||
|
||||
const DEFAULT_CONFIG: TopicsConfig = {
|
||||
emergenceThreshold: 3,
|
||||
mergeOverlap: 0.7,
|
||||
subtopicThreshold: 5,
|
||||
decayDays: 30,
|
||||
scanInterval: 15,
|
||||
maxKeywordsPerFact: 5,
|
||||
};
|
||||
|
||||
// ─── Schema Migration ───
|
||||
|
||||
export function migrateTopicsSchema(db: MemoriaDB): void {
|
||||
const raw = db.raw;
|
||||
|
||||
raw.exec(`
|
||||
CREATE TABLE IF NOT EXISTS topics (
|
||||
id TEXT PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
keywords TEXT DEFAULT '[]',
|
||||
fact_count INTEGER DEFAULT 0,
|
||||
first_seen INTEGER NOT NULL,
|
||||
last_seen INTEGER NOT NULL,
|
||||
access_count INTEGER DEFAULT 0,
|
||||
importance_score REAL DEFAULT 0.0,
|
||||
parent_topic_id TEXT,
|
||||
embedding BLOB,
|
||||
FOREIGN KEY (parent_topic_id) REFERENCES topics(id) ON DELETE SET NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS fact_topics (
|
||||
fact_id TEXT NOT NULL,
|
||||
topic_id TEXT NOT NULL,
|
||||
PRIMARY KEY (fact_id, topic_id),
|
||||
FOREIGN KEY (fact_id) REFERENCES facts(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (topic_id) REFERENCES topics(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_fact_topics_topic ON fact_topics(topic_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_topics_importance ON topics(importance_score DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_topics_parent ON topics(parent_topic_id);
|
||||
`);
|
||||
}
|
||||
|
||||
// ─── Helper: generate ID ───
|
||||
|
||||
function genId(): string {
|
||||
return `topic_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
|
||||
}
|
||||
|
||||
// ─── Keyword Extraction Prompt ───
|
||||
|
||||
const KEYWORD_PROMPT = `Extrais les mots-clés principaux de ce fait. Ce sont des concepts, noms propres, ou thèmes durables.
|
||||
|
||||
Règles :
|
||||
- Maximum {MAX} mots-clés
|
||||
- Mots-clés en minuscules, français ou anglais technique
|
||||
- Pas de mots vides (le, la, de, est, un, etc.)
|
||||
- Pas de verbes conjugués
|
||||
- Privilégier les noms propres et concepts techniques
|
||||
|
||||
Fait : "{FACT}"
|
||||
|
||||
Réponds UNIQUEMENT en JSON : {"keywords": ["mot1", "mot2"]}`;
|
||||
|
||||
// ─── Main Class ───
|
||||
|
||||
export class TopicManager {
|
||||
private db: MemoriaDB;
|
||||
private llm: LLMProvider;
|
||||
private embedder: EmbedProvider | null;
|
||||
private cfg: TopicsConfig;
|
||||
private capturesSinceLastScan: number = 0;
|
||||
|
||||
constructor(db: MemoriaDB, llm: LLMProvider, embedder: EmbedProvider | null, config?: Partial<TopicsConfig>) {
|
||||
this.db = db;
|
||||
this.llm = llm;
|
||||
this.embedder = embedder;
|
||||
this.cfg = { ...DEFAULT_CONFIG, ...config };
|
||||
|
||||
// Ensure schema
|
||||
migrateTopicsSchema(db);
|
||||
}
|
||||
|
||||
// ─── 1. Extract keywords from a fact ───
|
||||
|
||||
async extractKeywords(fact: string): Promise<string[]> {
|
||||
try {
|
||||
const prompt = KEYWORD_PROMPT
|
||||
.replace("{MAX}", String(this.cfg.maxKeywordsPerFact))
|
||||
.replace("{FACT}", fact.slice(0, 500));
|
||||
|
||||
const response = await this.llm.generate(prompt, {
|
||||
maxTokens: 128,
|
||||
temperature: 0.1,
|
||||
format: "json",
|
||||
timeoutMs: 10000,
|
||||
});
|
||||
|
||||
const parsed = this.parseJSON(response) as { keywords?: string[] };
|
||||
if (!parsed?.keywords || !Array.isArray(parsed.keywords)) return [];
|
||||
|
||||
return parsed.keywords
|
||||
.filter((k: unknown) => typeof k === "string" && k.length > 1 && k.length < 50)
|
||||
.map((k: string) => k.toLowerCase().trim())
|
||||
.slice(0, this.cfg.maxKeywordsPerFact);
|
||||
} catch (e) {
|
||||
console.debug('memoria:topics: ' + String(e));
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
// ─── 2. After capture: tag fact + associate with topics ───
|
||||
|
||||
async onFactCaptured(factId: string, fact: string, category: string): Promise<{ keywords: string[]; topics: string[] }> {
|
||||
// Extract keywords
|
||||
const keywords = await this.extractKeywords(fact);
|
||||
if (keywords.length === 0) return { keywords: [], topics: [] };
|
||||
|
||||
// Update fact tags
|
||||
const raw = this.db.raw;
|
||||
raw.prepare("UPDATE facts SET tags = ? WHERE id = ?")
|
||||
.run(JSON.stringify(keywords), factId);
|
||||
|
||||
// Rebuild FTS for this fact
|
||||
try {
|
||||
const rowid = raw.prepare("SELECT rowid FROM facts WHERE id = ?").get(factId) as { rowid: number } | undefined;
|
||||
if (rowid) {
|
||||
raw.prepare("INSERT OR REPLACE INTO facts_fts(rowid, fact, category, tags) VALUES (?, (SELECT fact FROM facts WHERE rowid = ?), (SELECT category FROM facts WHERE rowid = ?), ?)").run(
|
||||
rowid.rowid, rowid.rowid, rowid.rowid, JSON.stringify(keywords),
|
||||
);
|
||||
}
|
||||
} catch (e) { console.debug('memoria:topics: ' + String(e)); }
|
||||
|
||||
// Find matching topics
|
||||
const matchedTopics = this.findMatchingTopics(keywords);
|
||||
const topicNames: string[] = [];
|
||||
|
||||
for (const match of matchedTopics) {
|
||||
// Associate fact with topic
|
||||
raw.prepare("INSERT OR IGNORE INTO fact_topics (fact_id, topic_id) VALUES (?, ?)")
|
||||
.run(factId, match.topic.id);
|
||||
|
||||
// Update topic stats
|
||||
const now = Date.now();
|
||||
raw.prepare(`UPDATE topics SET
|
||||
fact_count = fact_count + 1,
|
||||
last_seen = ?,
|
||||
importance_score = (fact_count + 1) * (1.0 + ? / (? + 86400000.0 * ?))
|
||||
WHERE id = ?`)
|
||||
.run(now, now, now, this.cfg.decayDays, match.topic.id);
|
||||
|
||||
// Merge keywords
|
||||
const existing: string[] = JSON.parse(match.topic.keywords as unknown as string || "[]");
|
||||
const merged = [...new Set([...existing, ...keywords])];
|
||||
raw.prepare("UPDATE topics SET keywords = ? WHERE id = ?")
|
||||
.run(JSON.stringify(merged), match.topic.id);
|
||||
|
||||
topicNames.push(match.topic.name);
|
||||
}
|
||||
|
||||
// Track for periodic scan
|
||||
this.capturesSinceLastScan++;
|
||||
|
||||
return { keywords, topics: topicNames };
|
||||
}
|
||||
|
||||
// ─── 3. Find topics matching keywords ───
|
||||
|
||||
findMatchingTopics(keywords: string[]): TopicMatch[] {
|
||||
if (keywords.length === 0) return [];
|
||||
|
||||
const raw = this.db.raw;
|
||||
const allTopics = raw.prepare("SELECT * FROM topics WHERE parent_topic_id IS NULL OR parent_topic_id = ''").all() as Topic[];
|
||||
|
||||
const matches: TopicMatch[] = [];
|
||||
for (const topic of allTopics) {
|
||||
const topicKw: string[] = JSON.parse(topic.keywords as unknown as string || "[]");
|
||||
if (topicKw.length === 0) continue;
|
||||
|
||||
const overlap = this.jaccardOverlap(keywords, topicKw);
|
||||
if (overlap >= 0.25) { // Lower threshold for matching (vs merge)
|
||||
matches.push({ topic, overlap });
|
||||
}
|
||||
}
|
||||
|
||||
return matches.sort((a, b) => b.overlap - a.overlap);
|
||||
}
|
||||
|
||||
// ─── 4. Periodic scan: create topics from orphans ───
|
||||
|
||||
async scanAndEmerge(): Promise<{ created: number; merged: number; subtopics: number }> {
|
||||
this.capturesSinceLastScan = 0;
|
||||
const raw = this.db.raw;
|
||||
let created = 0;
|
||||
let merged = 0;
|
||||
let subtopics = 0;
|
||||
|
||||
// Find orphan facts (have tags but no topic)
|
||||
const orphans = raw.prepare(`
|
||||
SELECT f.id, f.fact, f.tags, f.category
|
||||
FROM facts f
|
||||
WHERE f.superseded = 0
|
||||
AND f.tags != '[]'
|
||||
AND f.tags IS NOT NULL
|
||||
AND f.id NOT IN (SELECT fact_id FROM fact_topics)
|
||||
`).all() as Array<{ id: string; fact: string; tags: string; category: string }>;
|
||||
|
||||
// Count keyword frequency among orphans
|
||||
const kwCount = new Map<string, string[]>(); // keyword → [factId, ...]
|
||||
for (const orphan of orphans) {
|
||||
const tags: string[] = JSON.parse(orphan.tags || "[]");
|
||||
for (const kw of tags) {
|
||||
const existing = kwCount.get(kw) || [];
|
||||
existing.push(orphan.id);
|
||||
kwCount.set(kw, existing);
|
||||
}
|
||||
}
|
||||
|
||||
// Find clusters (keywords appearing in >= threshold facts)
|
||||
const clusters: Array<{ keyword: string; factIds: string[] }> = [];
|
||||
for (const [kw, factIds] of kwCount.entries()) {
|
||||
if (factIds.length >= this.cfg.emergenceThreshold) {
|
||||
clusters.push({ keyword: kw, factIds: [...new Set(factIds)] });
|
||||
}
|
||||
}
|
||||
|
||||
// Merge overlapping clusters
|
||||
const mergedClusters = this.mergeClusters(clusters);
|
||||
|
||||
// Create topics from clusters
|
||||
for (const cluster of mergedClusters) {
|
||||
// Check if a similar topic already exists
|
||||
const existing = this.findMatchingTopics(cluster.keywords);
|
||||
if (existing.length > 0 && existing[0].overlap > 0.5) {
|
||||
// Associate orphans with existing topic instead
|
||||
for (const fid of cluster.factIds) {
|
||||
raw.prepare("INSERT OR IGNORE INTO fact_topics (fact_id, topic_id) VALUES (?, ?)")
|
||||
.run(fid, existing[0].topic.id);
|
||||
}
|
||||
raw.prepare("UPDATE topics SET fact_count = fact_count + ? WHERE id = ?")
|
||||
.run(cluster.factIds.length, existing[0].topic.id);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Generate topic name via LLM
|
||||
const name = await this.generateTopicName(cluster.keywords, cluster.factIds);
|
||||
const now = Date.now();
|
||||
const topicId = genId();
|
||||
|
||||
// Find potential parent topic by keyword overlap or name inclusion
|
||||
const parentId = this.findParentTopic(name, cluster.keywords);
|
||||
|
||||
raw.prepare(`INSERT INTO topics (id, name, keywords, fact_count, first_seen, last_seen, importance_score, parent_topic_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`).run(
|
||||
topicId, name, JSON.stringify(cluster.keywords),
|
||||
cluster.factIds.length, now, now,
|
||||
cluster.factIds.length * 1.0, parentId, // initial importance + parent
|
||||
);
|
||||
|
||||
// Link facts
|
||||
for (const fid of cluster.factIds) {
|
||||
raw.prepare("INSERT OR IGNORE INTO fact_topics (fact_id, topic_id) VALUES (?, ?)")
|
||||
.run(fid, topicId);
|
||||
}
|
||||
|
||||
// Register topic as entity in knowledge graph
|
||||
try {
|
||||
raw.prepare(`INSERT OR IGNORE INTO entities (id, name, type, created_at)
|
||||
VALUES (?, ?, 'topic', ?)`).run(topicId, name, now);
|
||||
} catch (e) { console.debug('memoria:topics: ' + String(e)); }
|
||||
|
||||
created++;
|
||||
}
|
||||
|
||||
// ─── Sub-topics ───
|
||||
const topLevelTopics = raw.prepare("SELECT * FROM topics WHERE parent_topic_id IS NULL OR parent_topic_id = ''").all() as Topic[];
|
||||
for (const topic of topLevelTopics) {
|
||||
if (topic.fact_count < this.cfg.subtopicThreshold * 2) continue; // Need enough facts
|
||||
|
||||
const factIds = raw.prepare("SELECT fact_id FROM fact_topics WHERE topic_id = ?")
|
||||
.all(topic.id) as Array<{ fact_id: string }>;
|
||||
|
||||
// Get keywords per fact within this topic
|
||||
const subKwCount = new Map<string, string[]>();
|
||||
for (const { fact_id } of factIds) {
|
||||
const f = raw.prepare("SELECT tags FROM facts WHERE id = ?").get(fact_id) as { tags: string } | undefined;
|
||||
if (!f) continue;
|
||||
const tags: string[] = JSON.parse(f.tags || "[]");
|
||||
for (const kw of tags) {
|
||||
const existing = subKwCount.get(kw) || [];
|
||||
existing.push(fact_id);
|
||||
subKwCount.set(kw, existing);
|
||||
}
|
||||
}
|
||||
|
||||
// Find sub-clusters within this topic
|
||||
for (const [kw, fids] of subKwCount.entries()) {
|
||||
if (fids.length < this.cfg.subtopicThreshold) continue;
|
||||
// Don't create sub-topic if it's the same as the parent's main keywords
|
||||
const parentKw: string[] = JSON.parse(topic.keywords as unknown as string || "[]");
|
||||
if (parentKw.includes(kw)) continue;
|
||||
|
||||
// Check if sub-topic already exists
|
||||
const existingSub = raw.prepare("SELECT id FROM topics WHERE parent_topic_id = ? AND keywords LIKE ?")
|
||||
.get(topic.id, `%"${kw}"%`) as { id: string } | undefined;
|
||||
if (existingSub) continue;
|
||||
|
||||
const name = await this.generateTopicName([kw], fids.slice(0, 3));
|
||||
const now = Date.now();
|
||||
const subId = genId();
|
||||
|
||||
raw.prepare(`INSERT INTO topics (id, name, keywords, fact_count, first_seen, last_seen, importance_score, parent_topic_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`).run(
|
||||
subId, name, JSON.stringify([kw]),
|
||||
fids.length, now, now, fids.length * 0.8, topic.id,
|
||||
);
|
||||
|
||||
for (const fid of fids) {
|
||||
raw.prepare("INSERT OR IGNORE INTO fact_topics (fact_id, topic_id) VALUES (?, ?)")
|
||||
.run(fid, subId);
|
||||
}
|
||||
|
||||
subtopics++;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Merge similar topics ───
|
||||
merged = await this.mergeTopics();
|
||||
|
||||
// ─── Update embeddings for all topics ───
|
||||
await this.updateTopicEmbeddings();
|
||||
|
||||
// ─── Apply decay ───
|
||||
this.applyDecay();
|
||||
|
||||
return { created, merged, subtopics };
|
||||
}
|
||||
|
||||
// ─── 5. Merge overlapping topics ───
|
||||
|
||||
private async mergeTopics(): Promise<number> {
|
||||
const raw = this.db.raw;
|
||||
const topics = raw.prepare("SELECT * FROM topics WHERE parent_topic_id IS NULL OR parent_topic_id = ''")
|
||||
.all() as Topic[];
|
||||
|
||||
let mergeCount = 0;
|
||||
const toDelete = new Set<string>();
|
||||
|
||||
for (let i = 0; i < topics.length; i++) {
|
||||
if (toDelete.has(topics[i].id)) continue;
|
||||
for (let j = i + 1; j < topics.length; j++) {
|
||||
if (toDelete.has(topics[j].id)) continue;
|
||||
|
||||
const kw1: string[] = JSON.parse(topics[i].keywords as unknown as string || "[]");
|
||||
const kw2: string[] = JSON.parse(topics[j].keywords as unknown as string || "[]");
|
||||
const overlap = this.jaccardOverlap(kw1, kw2);
|
||||
|
||||
if (overlap >= this.cfg.mergeOverlap) {
|
||||
// Merge j into i (keep the one with more facts)
|
||||
const [keep, remove] = topics[i].fact_count >= topics[j].fact_count
|
||||
? [topics[i], topics[j]]
|
||||
: [topics[j], topics[i]];
|
||||
|
||||
// Transfer facts
|
||||
raw.prepare("UPDATE fact_topics SET topic_id = ? WHERE topic_id = ?")
|
||||
.run(keep.id, remove.id);
|
||||
|
||||
// Merge keywords
|
||||
const merged = [...new Set([...kw1, ...kw2])];
|
||||
raw.prepare("UPDATE topics SET keywords = ?, fact_count = fact_count + ? WHERE id = ?")
|
||||
.run(JSON.stringify(merged), remove.fact_count, keep.id);
|
||||
|
||||
// Delete merged topic
|
||||
raw.prepare("DELETE FROM topics WHERE id = ?").run(remove.id);
|
||||
toDelete.add(remove.id);
|
||||
mergeCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mergeCount;
|
||||
}
|
||||
|
||||
// ─── 6. Update topic embeddings ───
|
||||
|
||||
async updateTopicEmbeddings(): Promise<number> {
|
||||
if (!this.embedder) return 0;
|
||||
|
||||
const raw = this.db.raw;
|
||||
const topics = raw.prepare("SELECT * FROM topics").all() as Topic[];
|
||||
let updated = 0;
|
||||
|
||||
for (const topic of topics) {
|
||||
// Get all fact embeddings for this topic
|
||||
const factEmbeddings = raw.prepare(`
|
||||
SELECT e.vector FROM embeddings e
|
||||
JOIN fact_topics ft ON ft.fact_id = e.fact_id
|
||||
WHERE ft.topic_id = ?
|
||||
`).all(topic.id) as Array<{ vector: Buffer }>;
|
||||
|
||||
if (factEmbeddings.length === 0) continue;
|
||||
|
||||
// Compute mean embedding
|
||||
const dims = factEmbeddings[0].vector.length / 4; // Float32
|
||||
const mean = new Float32Array(dims);
|
||||
|
||||
for (const { vector } of factEmbeddings) {
|
||||
const arr = new Float32Array(vector.buffer, vector.byteOffset, dims);
|
||||
for (let d = 0; d < dims; d++) mean[d] += arr[d];
|
||||
}
|
||||
for (let d = 0; d < dims; d++) mean[d] /= factEmbeddings.length;
|
||||
|
||||
// Normalize
|
||||
let norm = 0;
|
||||
for (let d = 0; d < dims; d++) norm += mean[d] * mean[d];
|
||||
norm = Math.sqrt(norm);
|
||||
if (norm > 0) for (let d = 0; d < dims; d++) mean[d] /= norm;
|
||||
|
||||
// Store
|
||||
raw.prepare("UPDATE topics SET embedding = ? WHERE id = ?")
|
||||
.run(Buffer.from(mean.buffer), topic.id);
|
||||
|
||||
updated++;
|
||||
}
|
||||
|
||||
return updated;
|
||||
}
|
||||
|
||||
// ─── 7. Recall: find relevant topics for a query ───
|
||||
|
||||
async findRelevantTopics(query: string, limit = 5, expandedQueries?: string[]): Promise<Array<{ topic: Topic; score: number; facts: string[] }>> {
|
||||
const raw = this.db.raw;
|
||||
const results: Array<{ topic: Topic; score: number; facts: string[] }> = [];
|
||||
|
||||
// Strategy 1: Keyword matching — use expanded queries if available
|
||||
const allQueries = expandedQueries && expandedQueries.length > 0 ? expandedQueries : [query];
|
||||
const queryWords = new Set<string>();
|
||||
for (const q of allQueries) {
|
||||
for (const w of q.toLowerCase().split(/\s+/).filter(w => w.length > 2)) {
|
||||
queryWords.add(w);
|
||||
}
|
||||
}
|
||||
const allTopics = raw.prepare("SELECT * FROM topics ORDER BY importance_score DESC").all() as Topic[];
|
||||
|
||||
for (const topic of allTopics) {
|
||||
const kw: string[] = JSON.parse(topic.keywords as unknown as string || "[]");
|
||||
let keywordScore = 0;
|
||||
|
||||
for (const qw of queryWords) {
|
||||
if (kw.some(k => k.includes(qw) || qw.includes(k))) keywordScore += 1;
|
||||
if (topic.name.toLowerCase().includes(qw)) keywordScore += 2;
|
||||
}
|
||||
// Bonus: topic name exact match with any expanded query
|
||||
const topicLower = topic.name.toLowerCase();
|
||||
for (const q of allQueries) {
|
||||
if (q.toLowerCase().includes(topicLower) || topicLower.includes(q.toLowerCase())) {
|
||||
keywordScore += 3;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (keywordScore > 0) {
|
||||
// Boost by importance and recency
|
||||
const recencyBoost = Math.exp(-(Date.now() - topic.last_seen) / (86400000 * 14));
|
||||
const score = keywordScore * (0.5 + topic.importance_score * 0.3 + recencyBoost * 0.2);
|
||||
|
||||
const factRows = raw.prepare(
|
||||
"SELECT f.fact FROM facts f JOIN fact_topics ft ON ft.fact_id = f.id WHERE ft.topic_id = ? AND f.superseded = 0 ORDER BY f.created_at DESC LIMIT 10"
|
||||
).all(topic.id) as Array<{ fact: string }>;
|
||||
|
||||
results.push({ topic, score, facts: factRows.map(r => r.fact) });
|
||||
}
|
||||
}
|
||||
|
||||
// Strategy 2: Semantic (embedding) matching
|
||||
if (this.embedder) {
|
||||
try {
|
||||
const queryEmb = await this.embedder.embed(query);
|
||||
const topicsWithEmb = raw.prepare("SELECT * FROM topics WHERE embedding IS NOT NULL").all() as Topic[];
|
||||
|
||||
for (const topic of topicsWithEmb) {
|
||||
// Skip if already found by keywords
|
||||
if (results.some(r => r.topic.id === topic.id)) continue;
|
||||
|
||||
const topicEmb = new Float32Array(
|
||||
(topic.embedding as unknown as Buffer).buffer,
|
||||
(topic.embedding as unknown as Buffer).byteOffset,
|
||||
queryEmb.length,
|
||||
);
|
||||
|
||||
const cos = this.cosine(new Float32Array(queryEmb), topicEmb);
|
||||
if (cos >= 0.45) {
|
||||
const recencyBoost = Math.exp(-(Date.now() - topic.last_seen) / (86400000 * 14));
|
||||
const score = cos * (0.5 + topic.importance_score * 0.3 + recencyBoost * 0.2);
|
||||
|
||||
const factRows = raw.prepare(
|
||||
"SELECT f.fact FROM facts f JOIN fact_topics ft ON ft.fact_id = f.id WHERE ft.topic_id = ? AND f.superseded = 0 ORDER BY f.created_at DESC LIMIT 10"
|
||||
).all(topic.id) as Array<{ fact: string }>;
|
||||
|
||||
results.push({ topic, score, facts: factRows.map(r => r.fact) });
|
||||
}
|
||||
}
|
||||
} catch (e) { console.debug('memoria:topics: ' + String(e)); }
|
||||
}
|
||||
|
||||
// Sort by score and limit
|
||||
results.sort((a, b) => b.score - a.score);
|
||||
|
||||
// Track access on returned topics
|
||||
for (const r of results.slice(0, limit)) {
|
||||
raw.prepare("UPDATE topics SET access_count = access_count + 1, last_seen = ? WHERE id = ?")
|
||||
.run(Date.now(), r.topic.id);
|
||||
}
|
||||
|
||||
return results.slice(0, limit);
|
||||
}
|
||||
|
||||
// ─── 8. Apply decay on old topics ───
|
||||
|
||||
private applyDecay(): void {
|
||||
const raw = this.db.raw;
|
||||
const now = Date.now();
|
||||
const decayMs = this.cfg.decayDays * 86400000;
|
||||
|
||||
// Reduce importance of topics not seen recently
|
||||
raw.prepare(`
|
||||
UPDATE topics SET importance_score = importance_score *
|
||||
MAX(0.1, 1.0 - (? - last_seen) / (? * 2.0))
|
||||
WHERE (? - last_seen) > ?
|
||||
`).run(now, decayMs, now, decayMs);
|
||||
}
|
||||
|
||||
// ─── 9. Should scan? ───
|
||||
|
||||
/**
|
||||
* Called when a fact is superseded — remove fact↔topic links
|
||||
* and update topic fact_count. Delete empty topics.
|
||||
*/
|
||||
onFactSuperseded(factId: string): number {
|
||||
let affected = 0;
|
||||
try {
|
||||
const raw = this.db.raw;
|
||||
// Find topics linked to this fact
|
||||
const linked = raw.prepare(
|
||||
"SELECT topic_id FROM fact_topics WHERE fact_id = ?"
|
||||
).all(factId) as Array<{ topic_id: string }>;
|
||||
|
||||
// Remove the links
|
||||
raw.prepare("DELETE FROM fact_topics WHERE fact_id = ?").run(factId);
|
||||
|
||||
// Update fact_count and clean up empty topics
|
||||
for (const { topic_id } of linked) {
|
||||
const count = (raw.prepare(
|
||||
"SELECT COUNT(*) as c FROM fact_topics WHERE topic_id = ?"
|
||||
).get(topic_id) as { c: number }).c;
|
||||
|
||||
if (count === 0) {
|
||||
// No facts left → delete topic
|
||||
raw.prepare("DELETE FROM topics WHERE id = ?").run(topic_id);
|
||||
} else {
|
||||
raw.prepare("UPDATE topics SET fact_count = ? WHERE id = ?").run(count, topic_id);
|
||||
}
|
||||
affected++;
|
||||
}
|
||||
} catch (e) { console.debug('memoria:topics: ' + String(e)); }
|
||||
return affected;
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-parent existing orphan topics.
|
||||
* Called once at boot to fix topics created before hierarchy logic existed.
|
||||
*/
|
||||
reparentExistingTopics(): number {
|
||||
const raw = this.db.raw;
|
||||
const orphans = raw.prepare(
|
||||
"SELECT * FROM topics WHERE parent_topic_id IS NULL OR parent_topic_id = '' ORDER BY fact_count ASC"
|
||||
).all() as Topic[];
|
||||
|
||||
let reparented = 0;
|
||||
for (const topic of orphans) {
|
||||
const kw: string[] = JSON.parse(topic.keywords as unknown as string || "[]");
|
||||
const parentId = this.findParentTopic(topic.name, kw);
|
||||
if (parentId && parentId !== topic.id) {
|
||||
raw.prepare("UPDATE topics SET parent_topic_id = ? WHERE id = ?").run(parentId, topic.id);
|
||||
reparented++;
|
||||
}
|
||||
}
|
||||
return reparented;
|
||||
}
|
||||
|
||||
shouldScan(): boolean {
|
||||
return this.capturesSinceLastScan >= this.cfg.scanInterval;
|
||||
}
|
||||
|
||||
// ─── Stats ───
|
||||
|
||||
stats(): { totalTopics: number; topLevelTopics: number; subTopics: number; orphanFacts: number; avgFactsPerTopic: number } {
|
||||
const raw = this.db.raw;
|
||||
const total = (raw.prepare("SELECT COUNT(*) as c FROM topics").get() as { c: number }).c;
|
||||
const topLevel = (raw.prepare("SELECT COUNT(*) as c FROM topics WHERE parent_topic_id IS NULL OR parent_topic_id = ''").get() as { c: number }).c;
|
||||
const orphans = (raw.prepare(`
|
||||
SELECT COUNT(*) as c FROM facts
|
||||
WHERE superseded = 0 AND tags != '[]' AND tags IS NOT NULL
|
||||
AND id NOT IN (SELECT fact_id FROM fact_topics)
|
||||
`).get() as { c: number }).c;
|
||||
const avgFacts = total > 0
|
||||
? (raw.prepare("SELECT AVG(fact_count) as a FROM topics").get() as { a: number }).a
|
||||
: 0;
|
||||
|
||||
return {
|
||||
totalTopics: total,
|
||||
topLevelTopics: topLevel,
|
||||
subTopics: total - topLevel,
|
||||
orphanFacts: orphans,
|
||||
avgFactsPerTopic: Math.round(avgFacts * 10) / 10,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Helpers ───
|
||||
|
||||
/**
|
||||
* Find a parent topic for a newly created topic.
|
||||
* Strategies:
|
||||
* 1. Name inclusion: if an existing topic's name is contained in the new name (or vice-versa)
|
||||
* 2. Keyword overlap: if a top-level topic shares keywords but is broader
|
||||
* Returns parent topic ID or null.
|
||||
*/
|
||||
private findParentTopic(newName: string, newKeywords: string[]): string | null {
|
||||
const raw = this.db.raw;
|
||||
const topLevelTopics = raw.prepare(
|
||||
"SELECT * FROM topics WHERE (parent_topic_id IS NULL OR parent_topic_id = '') ORDER BY fact_count DESC"
|
||||
).all() as Topic[];
|
||||
|
||||
const newNameLower = newName.toLowerCase();
|
||||
// Extract significant words (skip stop words, min 3 chars)
|
||||
const stopWords = new Set(["et", "de", "du", "des", "le", "la", "les", "un", "une", "en", "à", "the", "and", "of", "for", "in", "on", "with", "a"]);
|
||||
const newNameWords = newNameLower.split(/[\s/,—–-]+/).filter(w => w.length >= 3 && !stopWords.has(w));
|
||||
|
||||
let bestCandidate: { id: string; score: number } | null = null;
|
||||
|
||||
for (const topic of topLevelTopics) {
|
||||
const topicNameLower = topic.name.toLowerCase();
|
||||
// Skip if same name (would be a self-reference)
|
||||
if (topicNameLower === newNameLower) continue;
|
||||
|
||||
// Strategy 1: Name inclusion — new topic name contains existing topic name
|
||||
// e.g., "Memoria ClawHub" contains "Memoria" → parent is "Memoria" topic
|
||||
if (newNameLower.includes(topicNameLower) && topicNameLower.length >= 3) {
|
||||
return topic.id;
|
||||
}
|
||||
|
||||
// Strategy 2: Shared significant words in name
|
||||
// e.g., "Sol Memory" and "Sol Succès" share "sol" → broader one (more facts) is parent
|
||||
const topicNameWords = topicNameLower.split(/[\s/,—–-]+/).filter(w => w.length >= 3 && !stopWords.has(w));
|
||||
const sharedWords = newNameWords.filter(w => topicNameWords.includes(w));
|
||||
if (sharedWords.length > 0 && topic.fact_count > 3) {
|
||||
// Score: shared words / max words, weighted by parent fact_count
|
||||
const wordOverlap = sharedWords.length / Math.max(newNameWords.length, topicNameWords.length);
|
||||
// Prefer the topic with more facts (broader = better parent)
|
||||
const score = wordOverlap * Math.log2(topic.fact_count + 1);
|
||||
if (score > 0.4 && (!bestCandidate || score > bestCandidate.score)) {
|
||||
// Only set as parent if existing topic is broader (more facts)
|
||||
if (topic.fact_count >= 5) {
|
||||
bestCandidate = { id: topic.id, score };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Strategy 3: Keyword overlap — existing topic has ≥50% of new topic's keywords
|
||||
// but is broader (has more facts)
|
||||
const topicKw: string[] = JSON.parse(topic.keywords as unknown as string || "[]");
|
||||
if (topicKw.length > 0 && newKeywords.length > 0) {
|
||||
const shared = newKeywords.filter(k => topicKw.includes(k)).length;
|
||||
const overlapRatio = shared / newKeywords.length;
|
||||
// New topic shares ≥50% keywords with existing AND existing is broader
|
||||
if (overlapRatio >= 0.5 && topic.fact_count > newKeywords.length * 2) {
|
||||
return topic.id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bestCandidate?.id ?? null;
|
||||
}
|
||||
|
||||
private async generateTopicName(keywords: string[], sampleFactIds: string[]): Promise<string> {
|
||||
try {
|
||||
// Get sample facts
|
||||
const raw = this.db.raw;
|
||||
const facts: string[] = [];
|
||||
for (const fid of sampleFactIds.slice(0, 3)) {
|
||||
const f = raw.prepare("SELECT fact FROM facts WHERE id = ?").get(fid) as { fact: string } | undefined;
|
||||
if (f) facts.push(f.fact);
|
||||
}
|
||||
|
||||
const prompt = `Donne un nom court (1-3 mots) pour un topic regroupant ces faits.
|
||||
Keywords: ${keywords.join(", ")}
|
||||
Faits exemples: ${facts.join(" | ")}
|
||||
|
||||
Réponds UNIQUEMENT le nom du topic, rien d'autre. Exemples: "Bureau CRM", "Infrastructure Ollama", "Mémoire Agents", "Déploiement Vercel"`;
|
||||
|
||||
const response = await this.llm.generate(prompt, {
|
||||
maxTokens: 20,
|
||||
temperature: 0.3,
|
||||
timeoutMs: 8000,
|
||||
});
|
||||
|
||||
const name = response.trim().replace(/["\n]/g, "").slice(0, 50);
|
||||
return name || keywords.slice(0, 2).join(" ");
|
||||
} catch (e) {
|
||||
console.debug('memoria:topics: ' + String(e));
|
||||
return keywords.slice(0, 2).join(" ");
|
||||
}
|
||||
}
|
||||
|
||||
private mergeClusters(clusters: Array<{ keyword: string; factIds: string[] }>): Array<{ keywords: string[]; factIds: string[] }> {
|
||||
if (clusters.length === 0) return [];
|
||||
|
||||
// Sort by size (biggest first)
|
||||
clusters.sort((a, b) => b.factIds.length - a.factIds.length);
|
||||
|
||||
const merged: Array<{ keywords: string[]; factIds: Set<string> }> = [];
|
||||
const used = new Set<number>();
|
||||
|
||||
for (let i = 0; i < clusters.length; i++) {
|
||||
if (used.has(i)) continue;
|
||||
|
||||
const group = { keywords: [clusters[i].keyword], factIds: new Set(clusters[i].factIds) };
|
||||
|
||||
for (let j = i + 1; j < clusters.length; j++) {
|
||||
if (used.has(j)) continue;
|
||||
|
||||
// Check fact overlap
|
||||
const overlap = clusters[j].factIds.filter(f => group.factIds.has(f)).length;
|
||||
const overlapRatio = overlap / Math.min(group.factIds.size, clusters[j].factIds.length);
|
||||
|
||||
if (overlapRatio >= 0.5) {
|
||||
group.keywords.push(clusters[j].keyword);
|
||||
for (const f of clusters[j].factIds) group.factIds.add(f);
|
||||
used.add(j);
|
||||
}
|
||||
}
|
||||
|
||||
merged.push(group);
|
||||
used.add(i);
|
||||
}
|
||||
|
||||
return merged.map(g => ({ keywords: g.keywords, factIds: [...g.factIds] }));
|
||||
}
|
||||
|
||||
private jaccardOverlap(a: string[], b: string[]): number {
|
||||
if (a.length === 0 || b.length === 0) return 0;
|
||||
const setA = new Set(a);
|
||||
const setB = new Set(b);
|
||||
let intersection = 0;
|
||||
for (const x of setA) if (setB.has(x)) intersection++;
|
||||
const union = new Set([...a, ...b]).size;
|
||||
return union > 0 ? intersection / union : 0;
|
||||
}
|
||||
|
||||
private cosine(a: Float32Array, b: Float32Array): number {
|
||||
let dot = 0, na = 0, nb = 0;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
dot += a[i] * b[i];
|
||||
na += a[i] * a[i];
|
||||
nb += b[i] * b[i];
|
||||
}
|
||||
const denom = Math.sqrt(na) * Math.sqrt(nb);
|
||||
return denom > 0 ? dot / denom : 0;
|
||||
}
|
||||
|
||||
private parseJSON(text: string): unknown {
|
||||
let cleaned = text.trim();
|
||||
if (cleaned.startsWith("```")) {
|
||||
const lines = cleaned.split("\n");
|
||||
lines.shift();
|
||||
if (lines[lines.length - 1]?.trim() === "```") lines.pop();
|
||||
cleaned = lines.join("\n").trim();
|
||||
}
|
||||
const match = cleaned.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
|
||||
if (match) cleaned = match[1];
|
||||
return JSON.parse(cleaned);
|
||||
}
|
||||
}
|
||||
118
openclaw-memoria-port/core/wal.ts
Normal file
118
openclaw-memoria-port/core/wal.ts
Normal file
@@ -0,0 +1,118 @@
|
||||
/**
|
||||
* Memoria — Write-Ahead Log (WAL)
|
||||
*
|
||||
* Persists conversation turns to SQLite IMMEDIATELY when they arrive,
|
||||
* before any LLM processing. If the process crashes mid-session,
|
||||
* these messages survive and are processed on next boot.
|
||||
*
|
||||
* Inspired by proactive-agent's "working buffer" + database WAL concept.
|
||||
*
|
||||
* Table: wal_buffer
|
||||
* - id: auto-increment
|
||||
* - session_id: string (current session identifier)
|
||||
* - role: "user" | "assistant"
|
||||
* - content: message text (truncated to 5000 chars)
|
||||
* - created_at: timestamp ms
|
||||
* - processed: 0 | 1 (whether extraction has consumed this entry)
|
||||
*/
|
||||
|
||||
import type Database from "better-sqlite3";
|
||||
|
||||
export interface WALEntry {
|
||||
id: number;
|
||||
session_id: string;
|
||||
role: "user" | "assistant";
|
||||
content: string;
|
||||
created_at: number;
|
||||
processed: number;
|
||||
}
|
||||
|
||||
export class WriteAheadLog {
|
||||
private db: Database.Database;
|
||||
|
||||
constructor(db: Database.Database) {
|
||||
this.db = db;
|
||||
this.ensureTable();
|
||||
}
|
||||
|
||||
private ensureTable(): void {
|
||||
this.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS wal_buffer (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL DEFAULT '',
|
||||
role TEXT NOT NULL CHECK(role IN ('user', 'assistant')),
|
||||
content TEXT NOT NULL,
|
||||
created_at INTEGER NOT NULL,
|
||||
processed INTEGER NOT NULL DEFAULT 0
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_wal_unprocessed
|
||||
ON wal_buffer(processed, created_at);
|
||||
`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a message to the WAL immediately.
|
||||
* This is synchronous (SQLite write) — no LLM, no network.
|
||||
* Typically < 1ms.
|
||||
*/
|
||||
write(role: "user" | "assistant", content: string, sessionId = ""): void {
|
||||
if (!content || content.length < 5) return;
|
||||
this.db.prepare(
|
||||
"INSERT INTO wal_buffer (session_id, role, content, created_at, processed) VALUES (?, ?, ?, ?, 0)"
|
||||
).run(sessionId, role, content.slice(0, 5000), Date.now());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get unprocessed entries (oldest first), up to `limit`.
|
||||
*/
|
||||
getUnprocessed(limit = 20): WALEntry[] {
|
||||
return this.db.prepare(
|
||||
"SELECT * FROM wal_buffer WHERE processed = 0 ORDER BY created_at ASC LIMIT ?"
|
||||
).all(limit) as WALEntry[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark entries as processed after successful extraction.
|
||||
*/
|
||||
markProcessed(ids: number[]): void {
|
||||
if (ids.length === 0) return;
|
||||
const placeholders = ids.map(() => "?").join(",");
|
||||
this.db.prepare(
|
||||
`UPDATE wal_buffer SET processed = 1 WHERE id IN (${placeholders})`
|
||||
).run(...ids);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up old processed entries (older than `maxAgeDays`).
|
||||
* Called periodically to prevent unbounded growth.
|
||||
*/
|
||||
cleanup(maxAgeDays = 7): number {
|
||||
const cutoff = Date.now() - maxAgeDays * 86_400_000;
|
||||
const result = this.db.prepare(
|
||||
"DELETE FROM wal_buffer WHERE processed = 1 AND created_at < ?"
|
||||
).run(cutoff);
|
||||
return result.changes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Count unprocessed entries.
|
||||
*/
|
||||
unprocessedCount(): number {
|
||||
const row = this.db.prepare(
|
||||
"SELECT COUNT(*) as cnt FROM wal_buffer WHERE processed = 0"
|
||||
).get() as { cnt: number };
|
||||
return row.cnt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a text representation of unprocessed entries for extraction.
|
||||
* Format suitable for LLM fact extraction.
|
||||
*/
|
||||
getUnprocessedText(limit = 20): string {
|
||||
const entries = this.getUnprocessed(limit);
|
||||
if (entries.length === 0) return "";
|
||||
return entries
|
||||
.map(e => `[${e.role}] ${e.content}`)
|
||||
.join("\n\n");
|
||||
}
|
||||
}
|
||||
170
openclaw-memoria-port/docs/ARCHITECTURE.md
Normal file
170
openclaw-memoria-port/docs/ARCHITECTURE.md
Normal file
@@ -0,0 +1,170 @@
|
||||
# Memoria — Architecture & Internals
|
||||
|
||||
> This document covers implementation details for contributors and advanced users.
|
||||
> For installation and usage, see the main [README](../README.md).
|
||||
|
||||
## Hooks
|
||||
|
||||
Memoria integrates with OpenClaw via five hooks:
|
||||
- `before_prompt_build` — Recall pipeline (inject relevant facts into context)
|
||||
- `message_received` — Buffer user messages for continuous learning (Layer 21)
|
||||
- `llm_output` — Buffer assistant responses + trigger extraction (Layer 21)
|
||||
- `agent_end` — Capture pipeline (extract facts from conversation)
|
||||
- `after_compaction` — Capture pipeline (extract facts from compacted summaries)
|
||||
|
||||
Additionally, `after_tool_call` is used for real-time procedural capture (Layer 13).
|
||||
|
||||
## Recall Pipeline
|
||||
|
||||
```
|
||||
1. budget.compute() → determine max facts for current context usage
|
||||
2. observationMgr.getRelevantObservations(query) → matching observations
|
||||
3. embeddingMgr.hybridSearch(query) → FTS5 + cosine + scoring
|
||||
4. scoreAndRank(results) → temporal sort (semantic vs episodic decay)
|
||||
5. graph.findEntitiesInText(query) → mentioned entities
|
||||
6. graph.getRelatedFacts(entities) → BFS 2 hops
|
||||
7. graph.hebbianReinforce(entityIds) → reinforce weights
|
||||
8. topicMgr.findRelevantTopics(query) → topics by keyword + cosine
|
||||
9. treeBuilder.build(allCandidates, query) → hierarchical tree
|
||||
10. treeBuilder.extractFacts(tree, limit) → final selection
|
||||
11. formatRecallContext(facts, observationContext) → inject
|
||||
```
|
||||
|
||||
## Capture Pipeline
|
||||
|
||||
```
|
||||
1. LLM extract → JSON facts with {fact, category, type, confidence}
|
||||
2. TODO filter → skip disposable tasks, keep learned processes
|
||||
3. selective.processAndApply(fact, category, confidence, agent, factType)
|
||||
4. postProcessNewFacts():
|
||||
a. embed batch → vectorize unembedded facts
|
||||
b. graph.extractAndStore → entities/relations
|
||||
c. topicMgr.onFactCaptured → keywords + association
|
||||
d. topicMgr.scanAndEmerge → emergence if threshold met
|
||||
e. observationMgr.onFactCaptured → match/create/update observations
|
||||
f. clusterMgr.generateClusters → entity-grouped summaries
|
||||
g. mdSync.syncToMd → append to .md files
|
||||
h. mdRegen.regenerate → auto if file > 200 lines
|
||||
```
|
||||
|
||||
## Memory Types
|
||||
|
||||
| Type | Description | Decay |
|
||||
|------|-------------|-------|
|
||||
| **semantic** | Durable truths, patterns | Slow (30-90 days by category) |
|
||||
| **episodic** | Dated events, milestones | Fast (7-14 days by category) |
|
||||
|
||||
### Decay half-lives by category × type
|
||||
|
||||
| Category | Semantic | Episodic |
|
||||
|----------|----------|----------|
|
||||
| erreur | ∞ (immune) | 30 days |
|
||||
| savoir | 90 days | 14 days |
|
||||
| preference | 90 days | 14 days |
|
||||
| rh | 60 days | 14 days |
|
||||
| client | 60 days | 14 days |
|
||||
| outil | 30 days | 7 days |
|
||||
| chronologie | 14 days | 7 days |
|
||||
|
||||
## Scoring Formula
|
||||
|
||||
`score = confidence × decayFactor × recencyBoost × accessBoost × freshnessBoost × stalePenalty`
|
||||
|
||||
- **Access boost**: `0.3 × log(accessCount + 1)` — frequently recalled facts score higher
|
||||
- **Hot Tier**: facts accessed ≥5x are always injected
|
||||
|
||||
## Layers
|
||||
|
||||
| # | Layer | File | LLM? |
|
||||
|---|-------|------|------|
|
||||
| 1 | SQLite Core + FTS5 | `db.ts` | ❌ |
|
||||
| 2 | Temporal Scoring + Hot Tier | `scoring.ts` | ❌ |
|
||||
| 3 | Selective Memory (dedup, contradiction) | `selective.ts` | ✅ |
|
||||
| 4 | Embeddings + Hybrid Search | `embeddings.ts` | ❌ (embed only) |
|
||||
| 5 | Knowledge Graph + Hebbian | `graph.ts` | ✅ |
|
||||
| 6 | Context Tree | `context-tree.ts` | ❌ (heuristic) |
|
||||
| 7 | Adaptive Budget | `budget.ts` | ❌ |
|
||||
| 8 | Emergent Topics | `topics.ts` | ✅ |
|
||||
| 9 | Observations | `observations.ts` | ✅ |
|
||||
| 10 | Fact Clusters | `fact-clusters.ts` | ✅ |
|
||||
| 11 | .md Sync + Regen | `sync.ts`, `md-regen.ts` | ❌ |
|
||||
| 12 | Fallback Chain | `fallback.ts` | all |
|
||||
| 13 | Procedural Memory | `procedural.ts` | ✅ |
|
||||
| 14 | Lifecycle | `lifecycle.ts` | ❌ |
|
||||
| 15 | Feedback Loop | `feedback.ts` | ❌ |
|
||||
| 16 | Hebbian Reinforcement | `hebbian.ts` | ❌ |
|
||||
| 17 | Identity Parser | `identity-parser.ts` | ❌ |
|
||||
| 18 | Expertise Specialization | `expertise.ts` | ❌ |
|
||||
| 19 | Proactive Revision | `revision.ts` | ✅ |
|
||||
| 20 | Behavioral Patterns | `patterns.ts` | ✅ |
|
||||
| 21 | Continuous Learning | `index.ts` (hooks) | ✅ |
|
||||
|
||||
## Continuous Learning (Layer 21)
|
||||
|
||||
Real-time fact capture via `message_received` + `llm_output` hooks, independent of context window size, compaction, or session end.
|
||||
|
||||
**Hooks:**
|
||||
- `message_received` → buffers user messages, detects urgent signals (frustration, error keywords)
|
||||
- `llm_output` → buffers assistant responses, triggers extraction
|
||||
|
||||
**3 extraction modes:**
|
||||
- **Periodic** — every N turns (default 4), with 45s cooldown between extractions
|
||||
- **Urgent** — immediate on frustration/error signals (bypasses cooldown): "ne fais plus", "crash", "doublon", "putain"...
|
||||
- **Self-error** — immediate when assistant acknowledges its own mistake: "par erreur", "j'aurais dû"...
|
||||
|
||||
**Cross-layer integration:**
|
||||
- Uses same `extractLlm` + `LLM_EXTRACT_PROMPT` as agent_end
|
||||
- Facts go through `selective.processAndApply()` → dedup/contradiction/enrichment
|
||||
- Triggers full `postProcessNewFacts()` → embed, graph, topics, observations, clusters, sync
|
||||
- `agent_end` reduces its scope when continuous already captured (avoids double LLM calls)
|
||||
|
||||
**Config (`continuous` in plugin config):**
|
||||
- `interval` (default 4): extract every N turns
|
||||
- `cooldownMs` (default 45000): minimum gap between periodic extractions
|
||||
- `enabled` (default true when autoCapture is true): toggle on/off
|
||||
|
||||
## Procedural Memory
|
||||
|
||||
- Short fact (<60 chars) + TODO pattern → skip
|
||||
- Long fact (≥60 chars) → always keep
|
||||
- Contains explanation markers (car, sinon, pour, because, →) → always keep
|
||||
|
||||
## Observations Lifecycle
|
||||
|
||||
1. New fact → search for matching observation (embedding similarity or keywords)
|
||||
2. Match found → re-synthesize with new evidence
|
||||
3. No match → accumulate; 3+ facts sharing a topic → create observation
|
||||
4. Recall injects observations FIRST (priority over individual facts)
|
||||
|
||||
## Database Schema
|
||||
|
||||
- `facts` — main table with `fact_type` (semantic/episodic/cluster)
|
||||
- `facts_fts` — FTS5 index
|
||||
- `embeddings` — float vectors (768d default)
|
||||
- `entities`, `relations` — knowledge graph
|
||||
- `topics`, `fact_topics` — emergent topic system
|
||||
- `cluster_members` — maps cluster facts to their member facts
|
||||
- `observations` — living syntheses
|
||||
- `procedures` — procedural memory (how-to steps)
|
||||
|
||||
## Categories (7)
|
||||
|
||||
| Category | Maps to | Normalizations |
|
||||
|----------|---------|----------------|
|
||||
| savoir | MEMORY.md | architecture, mécanisme → savoir |
|
||||
| erreur | MEMORY.md | sévérité, bug → erreur |
|
||||
| outil | TOOLS.md | — |
|
||||
| preference | USER.md | — |
|
||||
| chronologie | MEMORY.md | — |
|
||||
| rh | COMPANY.md | — |
|
||||
| client | COMPANY.md | financier → client |
|
||||
|
||||
## Provider Support
|
||||
|
||||
| Provider | LLM | Embeddings | Notes |
|
||||
|----------|-----|------------|-------|
|
||||
| Ollama | ✅ | ✅ | Local, 0€, `thinking` field support |
|
||||
| LM Studio | ✅ | ✅ | Local, `reasoning_content` support |
|
||||
| OpenAI | ✅ | ✅ | Cloud, compatible APIs |
|
||||
| OpenRouter | ✅ | ✅ | Multi-model gateway |
|
||||
| Anthropic | ✅ | ❌ | Native `/v1/messages` API |
|
||||
316
openclaw-memoria-port/docs/MODULES.md
Normal file
316
openclaw-memoria-port/docs/MODULES.md
Normal file
@@ -0,0 +1,316 @@
|
||||
# Memoria — Module Guide for Contributors
|
||||
|
||||
> Quick reference to understand each file's role, inputs/outputs, and how modules connect.
|
||||
> For the full pipeline flow, see [ARCHITECTURE.md](ARCHITECTURE.md).
|
||||
|
||||
## Overview
|
||||
|
||||
```
|
||||
26 TypeScript files · ~10,500 LOC · 21 layers
|
||||
SQLite database with FTS5 · Local embeddings (768d)
|
||||
5 LLM providers supported · Fallback chain architecture
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Core Modules
|
||||
|
||||
### `db.ts` (644 LOC) — Layer 1: Database
|
||||
**The foundation.** Manages SQLite connection, schema, migrations, and CRUD.
|
||||
|
||||
- **Key types**: `Fact`, `Entity`, `Relation` (all exported)
|
||||
- **Tables**: facts, facts_fts, embeddings, entities, relations, topics, fact_topics, observations, procedures, cluster_members, chunks, identity_cache, meta
|
||||
- **Key methods**: `storeFact()`, `getFact()`, `searchFacts()` (FTS5), `getActiveFacts()`, `supersedeFact()`, `raw` (direct sqlite3 access)
|
||||
- **Migrations**: auto-run on construction; `migrateAddProcedures()`, `migrateAddClusterMembers()`, etc.
|
||||
- **Important**: `storeFact()` uses `Omit<Fact, ...> & Partial<Fact>` — you must provide id, fact, category, confidence, source, tags, agent, created_at, updated_at, fact_type. Other columns have defaults.
|
||||
|
||||
### `index.ts` (1777 LOC) — Plugin Entry Point
|
||||
**The orchestrator.** Registers all hooks, creates all managers, wires everything together.
|
||||
|
||||
- **Hooks registered**: `before_prompt_build` (recall), `message_received` + `llm_output` (continuous learning), `after_tool_call` (procedural), `agent_end` (capture), `after_compaction` (safety net)
|
||||
- **Key internal functions**: `postProcessNewFacts()` (runs 8 post-capture steps), `doContinuousExtraction()`, `parseConfig()`
|
||||
- **Config parsing**: raw plugin config → `MemoriaConfig` interface
|
||||
- **LLM wiring**: `layerLLM()` function resolves per-layer overrides or falls back to main chain
|
||||
|
||||
### `fallback.ts` (249 LOC) — Layer 12: Fallback Chain
|
||||
**Resilience.** Wraps multiple LLM/embed providers with automatic failover.
|
||||
|
||||
- **Exports**: `FallbackChain` (implements both `LLMProvider` and `EmbedProvider`)
|
||||
- **Behavior**: tries providers in order; if one fails, tries next; if all fail, throws
|
||||
- **Config**: `FallbackProviderConfig[]` — each entry has provider type, model, baseUrl, apiKey
|
||||
- **Used by**: every module that needs LLM — selective, graph, topics, observations, clusters, procedural, revision, patterns
|
||||
|
||||
---
|
||||
|
||||
## Recall Pipeline (before_prompt_build)
|
||||
|
||||
### `scoring.ts` (187 LOC) — Layer 2: Temporal Scoring
|
||||
Calculates decay-based scores for facts.
|
||||
|
||||
- **Input**: array of facts
|
||||
- **Output**: facts with `temporalScore` attached
|
||||
- **Key formula**: `confidence × decayFactor × recencyBoost × accessBoost × freshnessBoost × stalePenalty`
|
||||
- **Hot Tier**: facts with `access_count >= 5` are always included (bypass budget)
|
||||
- **Decay rates**: vary by category × fact_type (semantic vs episodic). Error facts are immune.
|
||||
|
||||
### `budget.ts` (184 LOC) — Layer 7: Adaptive Budget
|
||||
Decides how many facts to inject based on context usage.
|
||||
|
||||
- **Input**: current context token count, max context tokens
|
||||
- **Output**: `BudgetResult` with `maxFacts` (2-12) and `tier`
|
||||
- **Tiers**: empty (12), light (10), medium (8), heavy (5), critical (2)
|
||||
|
||||
### `context-tree.ts` (337 LOC) — Layer 6: Context Tree
|
||||
Organizes facts into a hierarchical tree for better prompt injection.
|
||||
|
||||
- **Input**: array of scored facts + query
|
||||
- **Output**: tree of `ContextNode` with facts grouped by category/topic
|
||||
- **Method**: heuristic regex-based grouping (no LLM needed)
|
||||
|
||||
### `identity-parser.ts` (213 LOC) — Layer 17: Identity Parser
|
||||
Parses SOUL.md, USER.md, COMPANY.md to know what matters to the user.
|
||||
|
||||
- **Input**: workspace path
|
||||
- **Output**: identity context (human name, agent name, company, projects, priorities)
|
||||
- **Method**: `calculateRelevance(fact, category)` → 0.0-1.0 score
|
||||
- **Caches**: parsed identity in SQLite `identity_cache` table
|
||||
|
||||
### `expertise.ts` (144 LOC) — Layer 18: Expertise Specialization
|
||||
Boosts recall score for topics the user frequently asks about.
|
||||
|
||||
- **Input**: fact + topic access counts
|
||||
- **Output**: boosted score (up to 1.5x)
|
||||
- **Data source**: `topics.access_count` (incremented on each recall)
|
||||
|
||||
---
|
||||
|
||||
## Capture Pipeline (agent_end / continuous)
|
||||
|
||||
### `selective.ts` (611 LOC) — Layer 3: Selective Memory
|
||||
**Gatekeeper.** Decides if a new fact should be stored, merged, or rejected.
|
||||
|
||||
- **Input**: new fact text, category, confidence, agent
|
||||
- **Output**: `{ action: "store" | "enrich" | "supersede" | "skip", stored: boolean }`
|
||||
- **Pipeline**: noise filter → too-short check → FTS candidates → Levenshtein dedup → prefix dedup → LLM contradiction check → store/enrich/supersede
|
||||
- **Thresholds**: configurable per category (preferences have tighter dedup at 0.65)
|
||||
- **LLM call**: only for contradiction detection (when similarity > threshold)
|
||||
|
||||
### `embeddings.ts` (347 LOC) — Layer 4: Embeddings
|
||||
Manages vector embeddings for semantic search.
|
||||
|
||||
- **Input**: fact text
|
||||
- **Output**: 768d float vector stored in `embeddings` table
|
||||
- **Methods**: `embedFact()`, `embedBatch()`, `hybridSearch(query)` (FTS5 + cosine)
|
||||
- **Batch embed**: processes unembedded facts on capture; called from `postProcessNewFacts()`
|
||||
|
||||
### `embed-fallback.ts` (62 LOC) — Embed Provider Wrapper
|
||||
Wraps multiple embed providers with fallback (like `fallback.ts` but for embeddings only).
|
||||
|
||||
### `graph.ts` (427 LOC) — Layer 5: Knowledge Graph
|
||||
Extracts entities and relations from facts using LLM.
|
||||
|
||||
- **Input**: fact text
|
||||
- **Output**: entities (person/project/tool/concept/place) + relations stored in DB
|
||||
- **LLM call**: entity/relation extraction prompt
|
||||
- **Recall**: `findEntitiesInText(query)` → `getRelatedFacts(entityIds)` (BFS 2 hops)
|
||||
|
||||
### `hebbian.ts` (155 LOC) — Layer 16: Hebbian Reinforcement
|
||||
Strengthens knowledge graph relations through co-occurrence.
|
||||
|
||||
- **Behavior**: when entities are recalled together, their relation weight increases by 0.1 (capped at 2.0); unused relations decay by 0.05 daily (minimum 0.1)
|
||||
- **Called from**: recall pipeline (after graph entity lookup)
|
||||
|
||||
### `topics.ts` (825 LOC) — Layer 8: Emergent Topics
|
||||
Discovers and manages topic clusters.
|
||||
|
||||
- **Input**: new fact
|
||||
- **Output**: topic associations in `fact_topics` table
|
||||
- **LLM call**: keyword extraction for new facts; topic naming for new clusters
|
||||
- **Features**: parent_topic_id hierarchy, `scanAndEmerge()` for new topic creation (threshold: 3+ facts), boot-time reparenting of existing topics, access_count tracking
|
||||
|
||||
### `observations.ts` (482 LOC) — Layer 9: Observations
|
||||
Living syntheses that evolve as new evidence appears.
|
||||
|
||||
- **Input**: new fact + existing observations
|
||||
- **Output**: updated/created observation
|
||||
- **LLM call**: synthesis when merging new evidence into existing observation
|
||||
- **Lifecycle**: 3+ facts sharing a topic → auto-create observation; existing observation + new evidence → re-synthesize
|
||||
|
||||
### `fact-clusters.ts` (373 LOC) — Layer 10: Fact Clusters
|
||||
Groups related facts by entity into summary "cluster facts."
|
||||
|
||||
- **Input**: entity ID
|
||||
- **Output**: cluster fact (fact_type="cluster") + `cluster_members` links
|
||||
- **LLM call**: generate cluster summary from member facts
|
||||
- **Recall penalty**: clustered facts get 0.6x score (prefer the summary)
|
||||
|
||||
### `patterns.ts` (477 LOC) — Layer 20: Behavioral Patterns
|
||||
Detects repeated similar facts and consolidates them.
|
||||
|
||||
- **Input**: all facts from capture batch
|
||||
- **Output**: pattern facts (fact_type="pattern") with occurrence tracking
|
||||
- **LLM call**: generate consolidated pattern from similar facts
|
||||
- **Threshold**: 3+ similar facts → consolidate into pattern
|
||||
- **Lifecycle**: 5+ occurrences → auto-promote to "settled"
|
||||
|
||||
---
|
||||
|
||||
## Learning & Lifecycle
|
||||
|
||||
### `lifecycle.ts` (223 LOC) — Layer 14: Lifecycle Management
|
||||
Manages fact states: fresh → settled → dormant.
|
||||
|
||||
- **Input**: fact ID, current state
|
||||
- **Output**: state transition
|
||||
- **Rules**: fresh (new) → settled (confirmed by recalls/feedback) → dormant (unused for extended period)
|
||||
- **Integration**: cross-layer promotion from feedback (recall_count ≥ 5 + usefulness ≥ 2)
|
||||
|
||||
### `feedback.ts` (326 LOC) — Layer 15: Feedback Loop
|
||||
Tracks how useful recalled facts actually are.
|
||||
|
||||
- **Columns**: `usefulness`, `recall_count`, `used_count`
|
||||
- **Method**: after response, check which recalled facts appeared in the answer → increment used_count
|
||||
- **Data flows into**: lifecycle (promotion), scoring (access boost), patterns (consolidation)
|
||||
|
||||
### `revision.ts` (222 LOC) — Layer 19: Proactive Revision
|
||||
Periodically reviews settled facts for staleness.
|
||||
|
||||
- **Input**: settled facts with high recall but potentially outdated info
|
||||
- **Output**: updated fact text or supersession
|
||||
- **LLM call**: ask if fact still accurate; if not, generate updated version
|
||||
- **Trigger**: on boot staleness check (every 24h)
|
||||
|
||||
### `procedural.ts` (1281 LOC) — Layer 13: Procedural Memory
|
||||
**The largest module.** Captures "how to do things" with steps, quality, gotchas.
|
||||
|
||||
- **Input**: tool call sequences from `after_tool_call` hook
|
||||
- **Output**: `Procedure` with steps, quality profile, gotchas, alternatives, doc sources
|
||||
- **Features**: success/failure tracking, quality reflection via LLM, degradation scoring, alternative procedures, staleness/doc-check tracking, failure_reasons
|
||||
- **Key methods**: `assembleProcedure()`, `matchExisting()`, `reflectOnExecution()`, `formatForRecall()`
|
||||
|
||||
---
|
||||
|
||||
## Support Modules
|
||||
|
||||
### `sync.ts` (258 LOC) — Layer 11a: .md Sync
|
||||
Appends facts to workspace .md files based on category mapping.
|
||||
|
||||
- **Category → file**: savoir → MEMORY.md, erreur → MEMORY.md, outil → TOOLS.md, preference → USER.md, rh → COMPANY.md, client → COMPANY.md
|
||||
|
||||
### `md-regen.ts` (348 LOC) — Layer 11b: .md Auto-Regeneration
|
||||
Rewrites .md sections when files exceed 200 lines.
|
||||
|
||||
- **Input**: file path, all facts for that category
|
||||
- **Output**: regenerated file preserving manual content above injection point
|
||||
|
||||
### `migrate.ts` (79 LOC) — Migration Utilities
|
||||
Helpers for one-time data migrations.
|
||||
|
||||
### `bootstrap-topics.ts` (88 LOC) — Topic Bootstrapper
|
||||
One-time script to generate initial topics from existing facts.
|
||||
|
||||
### `audit-v25.ts` (255 LOC) — Test Suite
|
||||
34 tests validating core functionality (FTS, scoring, embedding, facts, graph).
|
||||
|
||||
---
|
||||
|
||||
## Providers
|
||||
|
||||
### `providers/types.ts` — Interfaces
|
||||
Defines `LLMProvider` and `EmbedProvider` interfaces that all providers implement.
|
||||
|
||||
```typescript
|
||||
interface LLMProvider {
|
||||
generate(prompt: string, options?: GenerateOptions): Promise<string>;
|
||||
generateWithMeta?(prompt: string, options?: GenerateOptions): Promise<GenerateResult | null>;
|
||||
}
|
||||
|
||||
interface EmbedProvider {
|
||||
embed(text: string): Promise<number[]>;
|
||||
embedBatch?(texts: string[]): Promise<number[][]>;
|
||||
}
|
||||
```
|
||||
|
||||
### `providers/ollama.ts` — Ollama Provider
|
||||
Local Ollama API. Supports `thinking` field extraction, chat API with `think: false`.
|
||||
|
||||
### `providers/openai-compat.ts` — OpenAI / LM Studio / OpenRouter
|
||||
OpenAI-compatible API. Works with any OpenAI-format endpoint.
|
||||
|
||||
### `providers/anthropic.ts` — Anthropic
|
||||
Native Anthropic `/v1/messages` API. Supports Claude models.
|
||||
|
||||
---
|
||||
|
||||
## Data Flow Summary
|
||||
|
||||
```
|
||||
User message
|
||||
↓
|
||||
message_received hook → buffer (Layer 21)
|
||||
↓
|
||||
LLM response
|
||||
↓
|
||||
llm_output hook → buffer (Layer 21)
|
||||
↓ (every N turns or urgent)
|
||||
Continuous extraction → LLM → selective → postProcess
|
||||
↓
|
||||
before_prompt_build hook (next turn):
|
||||
budget(7) → observations(9) → hybridSearch(4)
|
||||
→ graph(5) → topics(8) → contextTree(6)
|
||||
→ scoring(2) × lifecycle(14) × expertise(18)
|
||||
→ identity(17) → format + inject
|
||||
↓
|
||||
agent_end hook:
|
||||
feedback(15) → LLM extract → selective(3) → postProcess:
|
||||
embed(4) → graph(5) → hebbian(16) → topics(8)
|
||||
→ observations(9) → clusters(10) → sync(11)
|
||||
→ patterns(20) → cross-layer(14,16,8,20)
|
||||
↓
|
||||
after_compaction hook (safety net):
|
||||
Same pipeline as agent_end but from compacted text
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Adding a New Layer
|
||||
|
||||
1. Create `your-layer.ts` in the root directory
|
||||
2. Export a manager class with a clear interface
|
||||
3. Import and instantiate in `index.ts` (after line ~450 where other managers are created)
|
||||
4. Wire into `postProcessNewFacts()` if it runs on capture
|
||||
5. Wire into `before_prompt_build` if it affects recall
|
||||
6. Add to the layer table in `docs/ARCHITECTURE.md` and `README.md`
|
||||
7. Update `CHANGELOG.md`
|
||||
8. Bump version in `package.json` and `SKILL.md`
|
||||
|
||||
## Adding a New Provider
|
||||
|
||||
1. Create `providers/your-provider.ts`
|
||||
2. Implement `LLMProvider` and/or `EmbedProvider` from `providers/types.ts`
|
||||
3. Add to `buildProvider()` switch in `fallback.ts`
|
||||
4. Add to `MemoriaConfig.llm.provider` and `embed.provider` union types in `index.ts`
|
||||
5. Update provider support table in README and ARCHITECTURE
|
||||
|
||||
## Running Tests
|
||||
|
||||
```bash
|
||||
npx tsx audit-v25.ts # 34-test suite
|
||||
npx tsx bootstrap-topics.ts # one-time topic generation
|
||||
```
|
||||
|
||||
## Database Inspection
|
||||
|
||||
```bash
|
||||
# Connect to the DB
|
||||
sqlite3 ~/.openclaw/workspace/memory/memoria.db
|
||||
|
||||
# Quick stats
|
||||
SELECT COUNT(*) as facts FROM facts WHERE superseded=0;
|
||||
SELECT COUNT(*) as embedded FROM embeddings;
|
||||
SELECT COUNT(*) as entities FROM entities;
|
||||
SELECT COUNT(*) as relations FROM relations;
|
||||
SELECT COUNT(*) as topics FROM topics;
|
||||
SELECT COUNT(*) as procedures FROM procedures;
|
||||
SELECT lifecycle_state, COUNT(*) FROM facts WHERE superseded=0 GROUP BY lifecycle_state;
|
||||
```
|
||||
395
openclaw-memoria-port/index.ts
Normal file
395
openclaw-memoria-port/index.ts
Normal file
@@ -0,0 +1,395 @@
|
||||
/**
|
||||
* Memoria — Multi-layer memory plugin for OpenClaw (Phase 2.2)
|
||||
*
|
||||
* THIN ADAPTER — imports, initializes, and wires OpenClaw hooks.
|
||||
* All logic lives in focused modules:
|
||||
* - config.ts — MemoriaConfig, parseConfig, provider factories
|
||||
* - extraction.ts — LLM_EXTRACT_PROMPT, parseJSON, normalizeCategory
|
||||
* - format.ts — formatRecallContext
|
||||
* - recall.ts — before_prompt_build hook (Layer 6)
|
||||
* - continuous.ts — message_received + llm_output hooks (Layer 21)
|
||||
* - procedural-hooks.ts — after_tool_call hook (Layer 1b)
|
||||
* - capture.ts — agent_end + after_compaction hooks (Layer 1)
|
||||
* - orchestrator.ts — postProcessNewFacts cascade pipeline
|
||||
*/
|
||||
|
||||
import fs from "fs";
|
||||
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
|
||||
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
|
||||
import { MemoriaDB } from "./core/db.js";
|
||||
import { WriteAheadLog } from "./core/wal.js";
|
||||
import { SelfObserver } from "./core/self-observation.js";
|
||||
import { AutoSkillCreator } from "./core/auto-skill.js";
|
||||
import { DialecticMemory } from "./core/dialectic.js";
|
||||
import { SelectiveMemory } from "./core/selective.js";
|
||||
import { EmbeddingManager } from "./core/embeddings.js";
|
||||
import { KnowledgeGraph } from "./core/graph.js";
|
||||
import { ContextTreeBuilder } from "./core/context-tree.js";
|
||||
import { AdaptiveBudget } from "./core/budget.js";
|
||||
import { MdSync } from "./core/sync.js";
|
||||
import { MdRegenManager } from "./core/md-regen.js";
|
||||
import { FallbackChain } from "./core/fallback.js";
|
||||
import type { FallbackProviderConfig } from "./core/fallback.js";
|
||||
import { TopicManager } from "./core/topics.js";
|
||||
import { lmStudioEmbed, openaiEmbed } from "./core/providers/openai-compat.js";
|
||||
import type { EmbedProvider, LLMProvider } from "./core/providers/types.js";
|
||||
import { EmbedFallback } from "./core/embed-fallback.js";
|
||||
import { ObservationManager } from "./core/observations.js";
|
||||
import { FactClusterManager } from "./core/fact-clusters.js";
|
||||
import { FeedbackManager } from "./core/feedback.js";
|
||||
import { IdentityParser } from "./core/identity-parser.js";
|
||||
import { LifecycleManager } from "./core/lifecycle.js";
|
||||
import { RevisionManager } from "./core/revision.js";
|
||||
import { HebbianManager } from "./core/hebbian.js";
|
||||
import { ExpertiseManager } from "./core/expertise.js";
|
||||
import { ProceduralMemory } from "./core/procedural.js";
|
||||
import { PatternManager } from "./core/patterns.js";
|
||||
|
||||
// Refactored modules
|
||||
import { parseConfig, createEmbedProvider, type MemoriaConfig, type MemoriaLayer } from "./core/config.js";
|
||||
import { createPostProcessNewFacts } from "./orchestrator.js";
|
||||
import { registerContinuousHooks } from "./continuous.js";
|
||||
import { registerRecallHook } from "./recall.js";
|
||||
import { PrefetchCache } from "./prefetch.js";
|
||||
import { registerProceduralHook } from "./procedural-hooks.js";
|
||||
import { registerAgentEndHook, registerCompactionHook } from "./capture.js";
|
||||
|
||||
const WORKSPACE = process.env.OPENCLAW_WORKSPACE || `${process.env.HOME}/.openclaw/workspace`;
|
||||
|
||||
const memoriaPlugin = definePluginEntry({
|
||||
id: "memoria",
|
||||
name: "Memoria — Persistent Memory",
|
||||
description: "The most advanced memory system for AI agents. 21 cognitive layers, knowledge graph, procedural learning, vector search. 100% local-first.",
|
||||
|
||||
register(api: OpenClawPluginApi) {
|
||||
const rawPluginConfig = (api as any).pluginConfig as Record<string, unknown> | undefined;
|
||||
const cfg = parseConfig(rawPluginConfig);
|
||||
|
||||
const db = new MemoriaDB(WORKSPACE);
|
||||
const wal = new WriteAheadLog(db.raw);
|
||||
const selfObserver = new SelfObserver(db.raw);
|
||||
// autoSkill is initialized later, after proceduralMem is created
|
||||
let autoSkill: AutoSkillCreator;
|
||||
|
||||
// Process any unprocessed WAL entries from a previous crash
|
||||
const unprocessedCount = wal.unprocessedCount();
|
||||
if (unprocessedCount > 0) {
|
||||
api.logger.info?.(`memoria: WAL has ${unprocessedCount} unprocessed entries from previous session — will process on next extraction`);
|
||||
}
|
||||
// Cleanup old processed WAL entries
|
||||
const cleaned = wal.cleanup(7);
|
||||
if (cleaned > 0) {
|
||||
api.logger.debug?.(`memoria: WAL cleanup: removed ${cleaned} old processed entries`);
|
||||
}
|
||||
|
||||
// ─── Fallback chain: config providers → default chain ───
|
||||
api.logger.info?.(`[memoria] Config loaded: fallback=${cfg.fallback.length} providers, llm=${cfg.llm.provider}/${cfg.llm.model}, embed=${cfg.embed.provider}/${cfg.embed.model}`);
|
||||
const fallbackProviders: FallbackProviderConfig[] = cfg.fallback.length > 0
|
||||
? cfg.fallback
|
||||
: [
|
||||
{
|
||||
name: "ollama",
|
||||
type: "ollama" as const,
|
||||
model: cfg.llm.model || "gemma3:4b",
|
||||
baseUrl: cfg.llm.provider === "ollama" ? (cfg.llm.baseUrl || "http://localhost:11434") : "http://localhost:11434",
|
||||
timeoutMs: 12000,
|
||||
embedModel: cfg.embed.model || "nomic-embed-text-v2-moe",
|
||||
embedDimensions: cfg.embed.dimensions || 768,
|
||||
},
|
||||
{
|
||||
name: "openai",
|
||||
type: "openai" as const,
|
||||
model: "gpt-5.4-nano",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
apiKey: cfg.llm.apiKey || process.env.OPENAI_API_KEY || "",
|
||||
timeoutMs: 15000,
|
||||
},
|
||||
{
|
||||
name: "lmstudio",
|
||||
type: "lmstudio" as const,
|
||||
model: "auto",
|
||||
baseUrl: "http://localhost:1234/v1",
|
||||
timeoutMs: 12000,
|
||||
},
|
||||
];
|
||||
|
||||
const chain = new FallbackChain(
|
||||
{ providers: fallbackProviders },
|
||||
{ info: api.logger.info?.bind(api.logger), warn: api.logger.warn?.bind(api.logger), debug: api.logger.debug?.bind(api.logger) },
|
||||
);
|
||||
|
||||
// ─── Per-layer LLM overrides ───
|
||||
const overrides = cfg.llm.overrides || {};
|
||||
function layerLLM(layer: MemoriaLayer): LLMProvider {
|
||||
const ov = overrides[layer];
|
||||
if (!ov) return chain;
|
||||
const provCfg: FallbackProviderConfig = {
|
||||
name: `${layer}:${ov.provider}`,
|
||||
type: ov.provider,
|
||||
model: ov.model,
|
||||
baseUrl: ov.baseUrl,
|
||||
apiKey: ov.apiKey || cfg.llm.apiKey || process.env.OPENAI_API_KEY || "",
|
||||
};
|
||||
return new FallbackChain(
|
||||
{ providers: [provCfg, ...fallbackProviders] },
|
||||
{ info: api.logger.info?.bind(api.logger), warn: api.logger.warn?.bind(api.logger), debug: api.logger.debug?.bind(api.logger) },
|
||||
);
|
||||
}
|
||||
|
||||
const extractLlm = layerLLM("extract");
|
||||
const contradictionLlm = layerLLM("contradiction");
|
||||
const graphLlm = layerLLM("graph");
|
||||
const topicsLlm = layerLLM("topics");
|
||||
|
||||
// Log active overrides
|
||||
const activeOverrides = Object.keys(overrides).filter(k => overrides[k as MemoriaLayer]);
|
||||
if (activeOverrides.length > 0) {
|
||||
api.logger.info?.(`memoria: per-layer LLM overrides: ${activeOverrides.map(k => `${k}=${overrides[k as MemoriaLayer]!.provider}/${overrides[k as MemoriaLayer]!.model}`).join(", ")}`);
|
||||
}
|
||||
|
||||
// ─── Embedding fallback chain ───
|
||||
const primaryEmbed = createEmbedProvider(cfg.embed);
|
||||
const embedProviders: EmbedProvider[] = [primaryEmbed];
|
||||
if (cfg.embed.provider !== "lmstudio") {
|
||||
try { embedProviders.push(lmStudioEmbed(cfg.embed.model, cfg.embed.dimensions)); } catch (e) { api?.logger?.debug?.('memoria:embed-fallback: ' + String(e)); }
|
||||
}
|
||||
if (cfg.embed.provider !== "openai" && (cfg.embed.apiKey || cfg.llm.apiKey || process.env.OPENAI_API_KEY)) {
|
||||
try { embedProviders.push(openaiEmbed("text-embedding-3-small", cfg.embed.apiKey || cfg.llm.apiKey || process.env.OPENAI_API_KEY || "", cfg.embed.dimensions)); } catch (e) { api?.logger?.debug?.('memoria:embed-fallback: ' + String(e)); }
|
||||
}
|
||||
const embedder = embedProviders.length > 1
|
||||
? new EmbedFallback(embedProviders, { info: api.logger.info?.bind(api.logger), warn: api.logger.warn?.bind(api.logger) })
|
||||
: primaryEmbed;
|
||||
const embeddingMgr = new EmbeddingManager(db, embedder);
|
||||
|
||||
// ─── Manager instances ───
|
||||
const selective = new SelectiveMemory(db, contradictionLlm, {
|
||||
dupThreshold: 0.85,
|
||||
contradictionCheck: true,
|
||||
enrichEnabled: true,
|
||||
}, embeddingMgr);
|
||||
|
||||
const graph = new KnowledgeGraph(db, graphLlm);
|
||||
const treeBuilder = new ContextTreeBuilder(db);
|
||||
const topicMgr = new TopicManager(db, topicsLlm, embedder, {
|
||||
emergenceThreshold: 3,
|
||||
mergeOverlap: 0.7,
|
||||
subtopicThreshold: 5,
|
||||
scanInterval: 15,
|
||||
});
|
||||
const identityParser = new IdentityParser(cfg.workspacePath);
|
||||
const lifecycleMgr = new LifecycleManager(db, {
|
||||
freshDays: cfg.lifecycle?.freshDays ?? 15,
|
||||
settledMinAccess: cfg.lifecycle?.settledMinAccess ?? 3,
|
||||
dormantAfterDays: cfg.lifecycle?.dormantAfterDays ?? 60,
|
||||
detailCursor: cfg.lifecycle?.detailCursor ?? 5,
|
||||
revisionRecallThreshold: cfg.lifecycle?.revisionRecallThreshold ?? 10,
|
||||
});
|
||||
const revisionMgr = new RevisionManager(db, chain);
|
||||
const hebbianMgr = new HebbianManager(db);
|
||||
const expertiseMgr = new ExpertiseManager(db);
|
||||
const proceduralLlm = layerLLM("procedural");
|
||||
const proceduralMem = new ProceduralMemory(db.raw, proceduralLlm, {
|
||||
reflectEvery: cfg.procedural?.reflectEvery ?? 3,
|
||||
degradedThreshold: cfg.procedural?.degradedThreshold ?? 0.5,
|
||||
defaultSafety: cfg.procedural?.defaultSafety ?? 0.8,
|
||||
staleDays: cfg.procedural?.staleDays ?? 30,
|
||||
docCheckDays: cfg.procedural?.docCheckDays ?? 60,
|
||||
});
|
||||
proceduralMem.ensureSchema();
|
||||
autoSkill = new AutoSkillCreator(proceduralMem, WORKSPACE, cfg.autoSkill);
|
||||
|
||||
const patternMgr = new PatternManager(db, extractLlm, cfg.patterns);
|
||||
|
||||
// Apply staleness penalties — once per process
|
||||
const stalenessKey = '__memoria_staleness_applied';
|
||||
if (!(globalThis as any)[stalenessKey]) {
|
||||
(globalThis as any)[stalenessKey] = true;
|
||||
const stalenessResult = proceduralMem.applyStalenessPenalties();
|
||||
if (stalenessResult.updated > 0 || stalenessResult.flaggedForDocCheck > 0) {
|
||||
console.log(`[memoria] 🕰️ Staleness check: ${stalenessResult.updated} aged, ${stalenessResult.flaggedForDocCheck} flagged for doc check`);
|
||||
}
|
||||
}
|
||||
|
||||
const budget = new AdaptiveBudget({
|
||||
contextWindow: cfg.contextWindow || 200000,
|
||||
maxFacts: cfg.recallLimit || 12,
|
||||
minFacts: 2,
|
||||
});
|
||||
const mdSync = new MdSync(db, {
|
||||
workspacePath: cfg.workspacePath || process.env.HOME + "/.openclaw/workspace",
|
||||
dbToMd: cfg.syncMd !== false,
|
||||
mdToDb: false,
|
||||
});
|
||||
const mdRegen = new MdRegenManager(db, cfg.workspacePath || process.env.HOME + "/.openclaw/workspace", {
|
||||
recentDays: 30,
|
||||
maxFactsPerFile: 150,
|
||||
archiveNotice: true,
|
||||
});
|
||||
|
||||
const observationMgr = new ObservationManager(db, chain, embedder, {
|
||||
emergenceThreshold: 3,
|
||||
matchThreshold: 0.6,
|
||||
maxRecallObservations: Math.max(Math.floor(cfg.recallLimit / 3), 2),
|
||||
maxEvidencePerObservation: 15,
|
||||
});
|
||||
|
||||
const clusterMgr = new FactClusterManager(db, chain);
|
||||
const feedbackMgr = new FeedbackManager(db);
|
||||
|
||||
// Cross-layer: when selective supersedes a fact, cascade to ALL layers
|
||||
selective.onSupersede = (supersededId, _newId) => {
|
||||
try {
|
||||
const parts: string[] = [];
|
||||
const obsAffected = observationMgr.onFactSuperseded(supersededId);
|
||||
if (obsAffected > 0) parts.push(`${obsAffected} obs`);
|
||||
const graphAffected = graph.onFactSuperseded(supersededId);
|
||||
if (graphAffected > 0) parts.push(`${graphAffected} graph`);
|
||||
const topicAffected = topicMgr.onFactSuperseded(supersededId);
|
||||
if (topicAffected > 0) parts.push(`${topicAffected} topics`);
|
||||
const embRemoved = embeddingMgr.onFactSuperseded(supersededId);
|
||||
if (embRemoved) parts.push("1 embed");
|
||||
if (parts.length > 0) {
|
||||
api.logger.debug?.(`memoria: supersede cascade for ${supersededId} — ${parts.join(", ")}`);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:supersede-cascade: ' + String(e)); }
|
||||
};
|
||||
|
||||
mdSync.ensureSchema(db);
|
||||
|
||||
// ─── Boot stats ───
|
||||
const stats = db.stats();
|
||||
const embCount = embeddingMgr.embeddedCount();
|
||||
const gStats = graph.stats();
|
||||
const tStats = topicMgr.stats();
|
||||
const oStats = observationMgr.stats();
|
||||
const cStats = clusterMgr.stats();
|
||||
let pluginVersion = "3.2.0";
|
||||
try {
|
||||
const pkgPath = new URL("./package.json", import.meta.url);
|
||||
const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
|
||||
pluginVersion = pkg.version || pluginVersion;
|
||||
} catch (e) { api?.logger?.debug?.('memoria:version-read: ' + String(e)); }
|
||||
const lifecycleRefresh = lifecycleMgr.refreshAll();
|
||||
|
||||
try {
|
||||
const reparented = topicMgr.reparentExistingTopics();
|
||||
if (reparented > 0) {
|
||||
api.logger.info?.(`memoria: reparented ${reparented} orphan topics`);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:topic-reparent: ' + String(e)); }
|
||||
|
||||
const lifecycleStats = lifecycleMgr.getStats();
|
||||
const hebbianStats = hebbianMgr.getStats();
|
||||
const expertiseStats = expertiseMgr.getStats();
|
||||
const procStats = proceduralMem.getStats();
|
||||
const patStats = patternMgr.stats();
|
||||
const fbStats = feedbackMgr.getStats();
|
||||
const fbNote = fbStats.totalWithFeedback > 0 ? `, feedback: ${fbStats.totalWithFeedback} tracked (avg ${fbStats.avgUsefulness.toFixed(1)})` : "";
|
||||
const lifecycleNote = ` | lifecycle: ${lifecycleStats.fresh ?? 0}f/${lifecycleStats.settled ?? 0}s/${lifecycleStats.dormant ?? 0}d (cursor:${lifecycleMgr.detailCursor})`;
|
||||
const hebbianNote = ` | graph: ${hebbianStats.strong} strong, ${hebbianStats.weak} weak`;
|
||||
const expertiseNote = ` | expertise: ${expertiseStats.expert}★★★/${expertiseStats.experienced}★★/${expertiseStats.familiar}★`;
|
||||
const procNote = procStats.total > 0
|
||||
? ` | procedures: ${procStats.healthy}✓/${procStats.degraded}⚠${(procStats.stale ?? 0) > 0 ? `/${procStats.stale}🕰️` : ''}`
|
||||
: "";
|
||||
const patNote = patStats.total > 0 ? ` | patterns: ${patStats.total} (avg ${patStats.avgOccurrences} occ)` : "";
|
||||
const contEnabled = cfg.continuous?.enabled !== false && cfg.autoCapture;
|
||||
const contInterval = cfg.continuous?.interval ?? 4;
|
||||
const contNote = contEnabled ? ` | continuous: every ${contInterval} turns` : "";
|
||||
api.logger.info?.(`memoria: v${pluginVersion} registered (${stats.active} facts, ${cStats.total} clusters, ${oStats.total} observations, ${embCount} embedded, ${gStats.entities} entities, ${gStats.relations} relations, ${tStats.totalTopics} topics${fbNote}${lifecycleNote}${hebbianNote}${expertiseNote}${procNote}${patNote}${contNote}, fallback: ${chain.providerNames.join(" → ")})`);
|
||||
|
||||
const fileSizes = mdRegen.fileSizes();
|
||||
const totalLines = Object.values(fileSizes).reduce((sum, f) => sum + f.lines, 0);
|
||||
api.logger.info?.(`memoria: workspace .md files = ${totalLines} lines total (regen available to bound growth)`);
|
||||
|
||||
// Background: embed unembedded facts on boot
|
||||
const unembedded = embeddingMgr.unembeddedFacts(100);
|
||||
if (unembedded.length > 0) {
|
||||
api.logger.info?.(`memoria: ${unembedded.length} facts need embedding, starting background indexing...`);
|
||||
embeddingMgr.embedBatch(unembedded.map(f => ({ id: f.id, text: f.fact })))
|
||||
.then(n => api.logger.info?.(`memoria: background embed complete — ${n} facts indexed`))
|
||||
.catch(err => api.logger.warn?.(`memoria: background embed failed: ${String(err)}`));
|
||||
}
|
||||
|
||||
// ─── Dialectic Memory (Layer 24) ───
|
||||
const dialectic = new DialecticMemory({
|
||||
db, embeddingMgr, graph, topicMgr: topicMgr!,
|
||||
proceduralMem, observationMgr, llm: extractLlm,
|
||||
});
|
||||
|
||||
// Expose dialectic.query() for external use (e.g., tools, commands)
|
||||
(api as any)._memoriaDialectic = dialectic;
|
||||
|
||||
// ─── Create shared post-processing pipeline ───
|
||||
const postProcessNewFacts = createPostProcessNewFacts(
|
||||
api, db, embeddingMgr, graph, hebbianMgr, topicMgr,
|
||||
observationMgr, clusterMgr, mdSync, mdRegen, patternMgr
|
||||
);
|
||||
|
||||
// ─── Prefetch cache (async recall, inspired by Hermes) ───
|
||||
const prefetchCache = new PrefetchCache(30_000);
|
||||
|
||||
// ─── Register all hooks ───
|
||||
|
||||
// Layer 6: Recall (before_prompt_build)
|
||||
registerRecallHook({
|
||||
api, cfg, db, embeddingMgr, graph, topicMgr, observationMgr,
|
||||
proceduralMem, treeBuilder, budget, feedbackMgr, lifecycleMgr,
|
||||
expertiseMgr, patternMgr, revisionMgr, prefetchCache, selfObserver,
|
||||
});
|
||||
|
||||
// RecallDeps without prefetchCache — for prefetch computation
|
||||
const recallDepsForPrefetch = {
|
||||
api, cfg, db, embeddingMgr, graph, topicMgr, observationMgr,
|
||||
proceduralMem, treeBuilder, budget, feedbackMgr, lifecycleMgr,
|
||||
expertiseMgr, patternMgr, revisionMgr, selfObserver,
|
||||
};
|
||||
|
||||
// Layer 21: Continuous Learning (message_received + llm_output)
|
||||
const continuousState = registerContinuousHooks(
|
||||
api, cfg, db, selective, extractLlm, identityParser, postProcessNewFacts,
|
||||
prefetchCache, recallDepsForPrefetch, wal, selfObserver,
|
||||
);
|
||||
|
||||
// Layer 1b: Real-time procedural capture (after_tool_call)
|
||||
registerProceduralHook(api, cfg, extractLlm, proceduralMem, graph);
|
||||
|
||||
// Layer 1: Session capture (agent_end + after_compaction)
|
||||
const captureDeps = {
|
||||
api, cfg, db, selective, extractLlm, identityParser,
|
||||
proceduralMem, feedbackMgr, budget, postProcessNewFacts, continuousState,
|
||||
};
|
||||
registerAgentEndHook(captureDeps);
|
||||
registerCompactionHook(captureDeps);
|
||||
|
||||
// Layer 23: Auto Skill Creation + Self-Observation success tracking (agent_end)
|
||||
api.on("agent_end", async (event: any, _ctx: any) => {
|
||||
try {
|
||||
// Record session success in self-observation
|
||||
if (event.success && selfObserver) {
|
||||
const msgCount = event.messages?.length || 0;
|
||||
const toolCount = event.toolCallCount || 0;
|
||||
if (msgCount > 2 || toolCount > 0) {
|
||||
// Detect domain from conversation content
|
||||
const lastMsgs = (event.messages || [])
|
||||
.slice(-5)
|
||||
.map((m: any) => m.content || "")
|
||||
.join(" ");
|
||||
selfObserver.record("success", lastMsgs.slice(0, 500));
|
||||
}
|
||||
}
|
||||
|
||||
// Check for mature procedures → promote to skill files
|
||||
if (event.success) {
|
||||
const promoted = autoSkill.checkAndPromote();
|
||||
if (promoted > 0) {
|
||||
api.logger.info?.(`memoria: 🎓 auto-skill: ${promoted} procedure(s) promoted to skill files`);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
api.logger.debug?.(`memoria: auto-skill/self-obs agent_end error: ${String(err)}`);
|
||||
}
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
export default memoriaPlugin;
|
||||
16
openclaw-memoria-port/openclaw.d.ts
vendored
Normal file
16
openclaw-memoria-port/openclaw.d.ts
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
// Type stub for OpenClaw plugin SDK (provided at runtime by the gateway)
|
||||
declare module "openclaw/plugin-sdk/core" {
|
||||
export interface OpenClawPluginApi {
|
||||
logger: {
|
||||
info?: (...args: any[]) => void;
|
||||
warn?: (...args: any[]) => void;
|
||||
debug?: (...args: any[]) => void;
|
||||
error?: (...args: any[]) => void;
|
||||
};
|
||||
pluginConfig: Record<string, any>;
|
||||
config: Record<string, any>;
|
||||
workspace: { path: string };
|
||||
on: (event: string, handler: (...args: any[]) => any) => void;
|
||||
modifyPrompt: (callback: (ctx: any) => any) => void;
|
||||
}
|
||||
}
|
||||
575
openclaw-memoria-port/openclaw.plugin.json
Normal file
575
openclaw-memoria-port/openclaw.plugin.json
Normal file
@@ -0,0 +1,575 @@
|
||||
{
|
||||
"id": "memoria",
|
||||
"name": "Memoria — Persistent Memory",
|
||||
"version": "3.34.0-port",
|
||||
"kind": "memory",
|
||||
"description": "The most advanced memory system for AI agents. 21 cognitive layers, knowledge graph, procedural learning, vector search. 100% local-first.",
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": true,
|
||||
"properties": {
|
||||
"autoRecall": {
|
||||
"type": "boolean",
|
||||
"default": true
|
||||
},
|
||||
"autoCapture": {
|
||||
"type": "boolean",
|
||||
"default": true
|
||||
},
|
||||
"recallLimit": {
|
||||
"type": "number",
|
||||
"minimum": 1,
|
||||
"maximum": 20,
|
||||
"default": 12
|
||||
},
|
||||
"captureMaxFacts": {
|
||||
"type": "number",
|
||||
"minimum": 1,
|
||||
"maximum": 10,
|
||||
"default": 8
|
||||
},
|
||||
"defaultAgent": {
|
||||
"type": "string",
|
||||
"default": "koda"
|
||||
},
|
||||
"contextWindow": {
|
||||
"type": "number",
|
||||
"minimum": 4096,
|
||||
"default": 200000
|
||||
},
|
||||
"workspacePath": {
|
||||
"type": "string"
|
||||
},
|
||||
"syncMd": {
|
||||
"type": "boolean",
|
||||
"default": true
|
||||
},
|
||||
"lifecycle": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"description": "Human-like memory lifecycle. Controls recall priority, never deletes. Dormant facts are always searchable on explicit request.",
|
||||
"properties": {
|
||||
"freshDays": {
|
||||
"type": "number",
|
||||
"minimum": 1,
|
||||
"maximum": 90,
|
||||
"default": 15,
|
||||
"description": "Days a fact stays 'fresh' (highest recall priority)"
|
||||
},
|
||||
"settledMinAccess": {
|
||||
"type": "number",
|
||||
"minimum": 1,
|
||||
"maximum": 50,
|
||||
"default": 3,
|
||||
"description": "Access count to become 'settled' before freshDays expires"
|
||||
},
|
||||
"dormantAfterDays": {
|
||||
"type": "number",
|
||||
"minimum": 15,
|
||||
"maximum": 365,
|
||||
"default": 60,
|
||||
"description": "Days without access before becoming 'dormant' (low auto-recall priority, still searchable)"
|
||||
},
|
||||
"detailCursor": {
|
||||
"type": "number",
|
||||
"minimum": 1,
|
||||
"maximum": 10,
|
||||
"default": 5,
|
||||
"description": "Detail cursor (1-10). Higher = more dormant context in auto-recall. 1=minimal, 5=normal, 10=everything"
|
||||
}
|
||||
}
|
||||
},
|
||||
"procedural": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"reflectEvery": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 20,
|
||||
"default": 3,
|
||||
"description": "Reflect on procedure quality every N executions (0 = never)"
|
||||
},
|
||||
"degradedThreshold": {
|
||||
"type": "number",
|
||||
"minimum": 0.1,
|
||||
"maximum": 1.0,
|
||||
"default": 0.5,
|
||||
"description": "Degradation score above which a procedure is flagged as degraded"
|
||||
},
|
||||
"defaultSafety": {
|
||||
"type": "number",
|
||||
"minimum": 0.1,
|
||||
"maximum": 1.0,
|
||||
"default": 0.8,
|
||||
"description": "Default safety score for new procedures"
|
||||
},
|
||||
"staleDays": {
|
||||
"type": "number",
|
||||
"minimum": 7,
|
||||
"maximum": 365,
|
||||
"default": 30,
|
||||
"description": "Days without use before a procedure starts degrading from staleness"
|
||||
},
|
||||
"docCheckDays": {
|
||||
"type": "number",
|
||||
"minimum": 7,
|
||||
"maximum": 365,
|
||||
"default": 60,
|
||||
"description": "Days without use before flagging for doc verification"
|
||||
}
|
||||
}
|
||||
},
|
||||
"embed": {
|
||||
"type": "object",
|
||||
"additionalProperties": true,
|
||||
"properties": {
|
||||
"provider": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"ollama",
|
||||
"lmstudio",
|
||||
"openai",
|
||||
"openrouter"
|
||||
],
|
||||
"default": "ollama"
|
||||
},
|
||||
"baseUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"default": "nomic-embed-text-v2-moe"
|
||||
},
|
||||
"dimensions": {
|
||||
"type": "number",
|
||||
"default": 768
|
||||
},
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"llm": {
|
||||
"type": "object",
|
||||
"additionalProperties": true,
|
||||
"properties": {
|
||||
"provider": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"ollama",
|
||||
"lmstudio",
|
||||
"openai",
|
||||
"openrouter",
|
||||
"anthropic"
|
||||
],
|
||||
"default": "ollama"
|
||||
},
|
||||
"baseUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"default": "gemma3:4b"
|
||||
},
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
},
|
||||
"overrides": {
|
||||
"type": "object",
|
||||
"additionalProperties": true,
|
||||
"properties": {
|
||||
"extract": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provider": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"ollama",
|
||||
"lmstudio",
|
||||
"openai",
|
||||
"openrouter"
|
||||
]
|
||||
},
|
||||
"baseUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"provider",
|
||||
"model"
|
||||
]
|
||||
},
|
||||
"contradiction": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provider": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"ollama",
|
||||
"lmstudio",
|
||||
"openai",
|
||||
"openrouter"
|
||||
]
|
||||
},
|
||||
"baseUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"provider",
|
||||
"model"
|
||||
]
|
||||
},
|
||||
"graph": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provider": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"ollama",
|
||||
"lmstudio",
|
||||
"openai",
|
||||
"openrouter"
|
||||
]
|
||||
},
|
||||
"baseUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"provider",
|
||||
"model"
|
||||
]
|
||||
},
|
||||
"topics": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provider": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"ollama",
|
||||
"lmstudio",
|
||||
"openai",
|
||||
"openrouter"
|
||||
]
|
||||
},
|
||||
"baseUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"provider",
|
||||
"model"
|
||||
]
|
||||
},
|
||||
"procedural": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provider": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"ollama",
|
||||
"lmstudio",
|
||||
"openai",
|
||||
"openrouter"
|
||||
]
|
||||
},
|
||||
"baseUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"provider",
|
||||
"model"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"fallback": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": true,
|
||||
"properties": {
|
||||
"provider": {
|
||||
"type": "string"
|
||||
},
|
||||
"baseUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"topics": {
|
||||
"type": "object",
|
||||
"additionalProperties": true,
|
||||
"properties": {
|
||||
"emergenceThreshold": {
|
||||
"type": "number",
|
||||
"minimum": 2,
|
||||
"maximum": 20
|
||||
},
|
||||
"mergeOverlap": {
|
||||
"type": "number",
|
||||
"minimum": 0.3,
|
||||
"maximum": 1.0
|
||||
},
|
||||
"subtopicThreshold": {
|
||||
"type": "number",
|
||||
"minimum": 3,
|
||||
"maximum": 50
|
||||
},
|
||||
"decayDays": {
|
||||
"type": "number",
|
||||
"minimum": 7,
|
||||
"maximum": 365
|
||||
},
|
||||
"scanInterval": {
|
||||
"type": "number",
|
||||
"minimum": 5,
|
||||
"maximum": 100
|
||||
}
|
||||
}
|
||||
},
|
||||
"mdRegen": {
|
||||
"type": "object",
|
||||
"additionalProperties": true,
|
||||
"properties": {
|
||||
"recentDays": {
|
||||
"type": "number",
|
||||
"minimum": 7,
|
||||
"maximum": 365
|
||||
},
|
||||
"maxFactsPerFile": {
|
||||
"type": "number",
|
||||
"minimum": 10,
|
||||
"maximum": 500
|
||||
},
|
||||
"archiveNotice": {
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
},
|
||||
"continuous": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"description": "Layer 21: Continuous Learning. Real-time capture from message_received + llm_output hooks.",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Enable continuous learning (real-time extraction between turns)"
|
||||
},
|
||||
"interval": {
|
||||
"type": "number",
|
||||
"minimum": 2,
|
||||
"maximum": 20,
|
||||
"default": 4,
|
||||
"description": "Extract every N turns (periodic mode)"
|
||||
},
|
||||
"cooldownMs": {
|
||||
"type": "number",
|
||||
"minimum": 10000,
|
||||
"maximum": 300000,
|
||||
"default": 45000,
|
||||
"description": "Minimum ms between extractions"
|
||||
},
|
||||
"maxExtractionsPerSession": {
|
||||
"type": "number",
|
||||
"minimum": 1,
|
||||
"maximum": 50,
|
||||
"default": 10,
|
||||
"description": "Max extractions per session (budget control)"
|
||||
}
|
||||
}
|
||||
},
|
||||
"patterns": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"description": "Layer 20: Behavioral pattern detection. Consolidates repeated similar facts.",
|
||||
"properties": {
|
||||
"minOccurrences": {
|
||||
"type": "number",
|
||||
"minimum": 2,
|
||||
"maximum": 20,
|
||||
"default": 3,
|
||||
"description": "Min similar facts to form a pattern"
|
||||
},
|
||||
"similarityThreshold": {
|
||||
"type": "number",
|
||||
"minimum": 0.3,
|
||||
"maximum": 0.95,
|
||||
"default": 0.7,
|
||||
"description": "Cosine similarity threshold for grouping"
|
||||
}
|
||||
}
|
||||
},
|
||||
"observations": {
|
||||
"type": "object",
|
||||
"additionalProperties": true,
|
||||
"properties": {
|
||||
"emergenceThreshold": {
|
||||
"type": "number",
|
||||
"minimum": 2,
|
||||
"maximum": 10,
|
||||
"default": 3
|
||||
},
|
||||
"matchThreshold": {
|
||||
"type": "number",
|
||||
"minimum": 0.2,
|
||||
"maximum": 0.9,
|
||||
"default": 0.6
|
||||
},
|
||||
"maxRecallObservations": {
|
||||
"type": "number",
|
||||
"minimum": 1,
|
||||
"maximum": 10,
|
||||
"default": 5
|
||||
},
|
||||
"maxEvidencePerObservation": {
|
||||
"type": "number",
|
||||
"minimum": 5,
|
||||
"maximum": 50,
|
||||
"default": 15
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"uiHints": {
|
||||
"autoRecall": {
|
||||
"label": "Auto-Recall",
|
||||
"help": "Inject relevant memories before each response"
|
||||
},
|
||||
"autoCapture": {
|
||||
"label": "Auto-Capture",
|
||||
"help": "Extract facts after each agent turn"
|
||||
},
|
||||
"recallLimit": {
|
||||
"label": "Recall Limit",
|
||||
"help": "Max facts injected (adaptive budget may lower this)",
|
||||
"advanced": true
|
||||
},
|
||||
"contextWindow": {
|
||||
"label": "Context Window",
|
||||
"help": "Total tokens available (for budget calculation)",
|
||||
"advanced": true
|
||||
},
|
||||
"syncMd": {
|
||||
"label": "Sync to .md",
|
||||
"help": "Auto-sync new facts to workspace .md files"
|
||||
},
|
||||
"embed": {
|
||||
"label": "Embedding Provider",
|
||||
"help": "Default: Ollama + nomic-embed-text-v2-moe. Override only if needed.",
|
||||
"advanced": true
|
||||
},
|
||||
"llm": {
|
||||
"label": "LLM Provider",
|
||||
"help": "Default: Ollama + gemma3:4b. Per-layer overrides available for extract/contradiction/graph/topics/procedural.",
|
||||
"advanced": true
|
||||
},
|
||||
"fallback": {
|
||||
"label": "LLM Fallback Chain",
|
||||
"help": "Providers tried in order if primary fails. Default: Ollama \u2192 OpenAI \u2192 LM Studio.",
|
||||
"advanced": true
|
||||
},
|
||||
"topics": {
|
||||
"label": "Topic Emergence",
|
||||
"help": "Auto-clustering settings",
|
||||
"advanced": true
|
||||
},
|
||||
"mdRegen": {
|
||||
"label": ".md Regeneration",
|
||||
"help": "Bounded .md file regeneration settings",
|
||||
"advanced": true
|
||||
},
|
||||
"continuous": {
|
||||
"label": "Continuous Learning",
|
||||
"help": "Layer 21: Real-time fact capture between turns. Detects urgent signals (errors, frustration) for immediate extraction.",
|
||||
"advanced": true
|
||||
},
|
||||
"patterns": {
|
||||
"label": "Pattern Detection",
|
||||
"help": "Layer 20: Auto-consolidate repeated similar facts into behavioral patterns.",
|
||||
"advanced": true
|
||||
},
|
||||
"observations": {
|
||||
"label": "Observations",
|
||||
"help": "Living multi-fact syntheses. emergenceThreshold=min facts to create, matchThreshold=cosine sim to update",
|
||||
"advanced": true
|
||||
}
|
||||
},
|
||||
"description": "The most advanced memory system for AI agents. 21 cognitive layers, knowledge graph, procedural learning, vector search. 100% local-first, zero cloud cost.",
|
||||
"keywords": [
|
||||
"memory",
|
||||
"ai-agent",
|
||||
"persistent-memory",
|
||||
"long-term-memory",
|
||||
"knowledge-graph",
|
||||
"procedural",
|
||||
"vector-search",
|
||||
"sqlite",
|
||||
"ollama",
|
||||
"local-first",
|
||||
"cognitive",
|
||||
"claude",
|
||||
"cursor",
|
||||
"copilot",
|
||||
"openclaw",
|
||||
"wal",
|
||||
"developer-tools",
|
||||
"typescript"
|
||||
],
|
||||
"setup": {
|
||||
"providers": [
|
||||
{
|
||||
"id": "memoria-ollama",
|
||||
"envVars": []
|
||||
},
|
||||
{
|
||||
"id": "memoria-openai",
|
||||
"envVars": ["OPENAI_API_KEY"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
230
openclaw-memoria-port/orchestrator.ts
Normal file
230
openclaw-memoria-port/orchestrator.ts
Normal file
@@ -0,0 +1,230 @@
|
||||
/**
|
||||
* 🧠 Memoria — Post-capture orchestrator
|
||||
*
|
||||
* This module exports:
|
||||
* - createPostProcessNewFacts() — factory for the post-processing pipeline
|
||||
*
|
||||
* The post-processing pipeline runs after every batch of new facts (capture/compaction/continuous).
|
||||
* It orchestrates 9 steps across all layers: embed, graph, hebbian, topics, observations, clusters, md sync, patterns, cross-layer.
|
||||
*/
|
||||
|
||||
import type { OpenClawPluginApi } from "openclaw/plugin-sdk/core";
|
||||
import type { MemoriaDB } from "./core/db.js";
|
||||
import type { EmbeddingManager } from "./core/embeddings.js";
|
||||
import type { KnowledgeGraph } from "./core/graph.js";
|
||||
import type { HebbianManager } from "./core/hebbian.js";
|
||||
import type { TopicManager } from "./core/topics.js";
|
||||
import type { ObservationManager } from "./core/observations.js";
|
||||
import type { FactClusterManager } from "./core/fact-clusters.js";
|
||||
import type { MdSync } from "./core/sync.js";
|
||||
import type { MdRegenManager } from "./core/md-regen.js";
|
||||
import type { PatternManager } from "./core/patterns.js";
|
||||
|
||||
/**
|
||||
* Create the postProcessNewFacts pipeline function.
|
||||
* Called after every capture batch (agent_end, after_compaction, continuous).
|
||||
*
|
||||
* 9 steps:
|
||||
* 1. embedBatch() — vectorize unembedded facts
|
||||
* 2. graph.extractAndStore() — entities + relations from new facts
|
||||
* 3. hebbian.reinforce() — strengthen co-occurring entity relations
|
||||
* 4. topics.onFactCaptured() + scanAndEmerge() — keyword extraction, topic creation
|
||||
* 5. observations.onFactCaptured() — match/create living syntheses
|
||||
* 6. clusters.generateClusters() — entity-grouped summaries
|
||||
* 7. mdSync.syncToMd() + mdRegen — append to .md files, regenerate if > 200 lines
|
||||
* 8. patterns.detectAndConsolidate() — consolidate repeated similar facts
|
||||
* 9. Cross-layer: feedback→lifecycle, hebbian→topics hierarchy, lifecycle→patterns
|
||||
*/
|
||||
export function createPostProcessNewFacts(
|
||||
api: OpenClawPluginApi,
|
||||
db: MemoriaDB,
|
||||
embeddingMgr: EmbeddingManager,
|
||||
graph: KnowledgeGraph,
|
||||
hebbianMgr: HebbianManager,
|
||||
topicMgr: TopicManager,
|
||||
observationMgr: ObservationManager,
|
||||
clusterMgr: FactClusterManager,
|
||||
mdSync: MdSync,
|
||||
mdRegen: MdRegenManager,
|
||||
patternMgr: PatternManager
|
||||
): (source: "capture" | "compaction") => Promise<void> {
|
||||
return async function postProcessNewFacts(source: "capture" | "compaction"): Promise<void> {
|
||||
// 1. Embed unembedded facts
|
||||
try {
|
||||
const toEmbed = embeddingMgr.unembeddedFacts(10);
|
||||
if (toEmbed.length > 0) {
|
||||
const n = await embeddingMgr.embedBatch(toEmbed.map(f => ({ id: f.id, text: f.fact })));
|
||||
if (n > 0) api.logger.info?.(`memoria: [${source}] embedded ${n} new facts`);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:embed-batch: ' + String(e)); }
|
||||
|
||||
// 2. Graph: extract entities/relations (limit to 5 to avoid LLM spam)
|
||||
try {
|
||||
const recentFacts = db.recentFacts(5);
|
||||
let totalEnt = 0, totalRel = 0;
|
||||
for (const f of recentFacts) {
|
||||
if (f.entity_ids && f.entity_ids !== "[]") continue;
|
||||
const { entities: ne, relations: nr } = await graph.extractAndStore(f.id, f.fact);
|
||||
totalEnt += ne;
|
||||
totalRel += nr;
|
||||
|
||||
// Hebbian reinforcement: co-occurring entities strengthen relations
|
||||
if (f.entity_ids && f.entity_ids !== "[]") {
|
||||
const entityIds = JSON.parse(f.entity_ids) as string[];
|
||||
hebbianMgr.reinforceFromFact(f.id, entityIds);
|
||||
}
|
||||
}
|
||||
if (totalEnt > 0 || totalRel > 0) {
|
||||
api.logger.info?.(`memoria: [${source}] graph extracted ${totalEnt} entities, ${totalRel} relations`);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:graph-extract: ' + String(e)); }
|
||||
|
||||
// 3. Topics: keyword extraction + topic association
|
||||
try {
|
||||
const recentForTopics = db.recentFacts(3);
|
||||
for (const f of recentForTopics) {
|
||||
if (f.tags && f.tags !== "[]") continue;
|
||||
const { keywords, topics: topicNames } = await topicMgr.onFactCaptured(f.id, f.fact, f.category);
|
||||
if (keywords.length > 0) {
|
||||
api.logger.debug?.(`memoria: [${source}] tagged "${f.fact.slice(0, 40)}..." → [${keywords.join(", ")}]${topicNames.length > 0 ? ` → topics: ${topicNames.join(", ")}` : ""}`);
|
||||
}
|
||||
}
|
||||
if (topicMgr.shouldScan()) {
|
||||
const scanResult = await topicMgr.scanAndEmerge();
|
||||
if (scanResult.created > 0 || scanResult.merged > 0 || scanResult.subtopics > 0) {
|
||||
api.logger.info?.(`memoria: [${source}] topics scan — ${scanResult.created} created, ${scanResult.merged} merged, ${scanResult.subtopics} sub-topics`);
|
||||
}
|
||||
}
|
||||
} catch (topicErr) {
|
||||
api.logger.debug?.(`memoria: [${source}] topic tagging non-critical error: ${String(topicErr)}`);
|
||||
}
|
||||
|
||||
// 4. Observations: check if new facts match or trigger new observations
|
||||
try {
|
||||
const recentForObs = db.recentFacts(3);
|
||||
let obsUpdated = 0, obsCreated = 0;
|
||||
for (const f of recentForObs) {
|
||||
const result = await observationMgr.onFactCaptured(f.id, f.fact, f.category);
|
||||
if (result.action === "updated_observation") obsUpdated++;
|
||||
if (result.action === "created_observation") obsCreated++;
|
||||
}
|
||||
if (obsUpdated > 0 || obsCreated > 0) {
|
||||
api.logger.info?.(`memoria: [${source}] observations — ${obsCreated} created, ${obsUpdated} updated`);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:observations: ' + String(e)); }
|
||||
|
||||
// 5. Fact Clusters: generate/refresh thematic summaries
|
||||
try {
|
||||
const clusterResult = await clusterMgr.generateClusters();
|
||||
if (clusterResult.created > 0 || clusterResult.updated > 0) {
|
||||
api.logger.info?.(`memoria: [${source}] clusters — ${clusterResult.created} created, ${clusterResult.updated} updated, ${clusterResult.stale} stale`);
|
||||
// Embed new clusters
|
||||
const toEmbed = embeddingMgr.unembeddedFacts(5);
|
||||
if (toEmbed.length > 0) {
|
||||
await embeddingMgr.embedBatch(toEmbed.map(f => ({ id: f.id, text: f.fact })));
|
||||
}
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:clusters: ' + String(e)); }
|
||||
|
||||
// 6. Sync new facts to .md files
|
||||
try {
|
||||
const syncResult = mdSync.syncToMd(db);
|
||||
if (syncResult.synced > 0) {
|
||||
api.logger.info?.(`memoria: [${source}] synced ${syncResult.synced} facts to .md files`);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:md-sync: ' + String(e)); }
|
||||
|
||||
// 7. Auto md-regen: smart trigger (captures count OR stale OR file size)
|
||||
try {
|
||||
mdRegen.recordCapture();
|
||||
const regenReason = mdRegen.shouldAutoRegen();
|
||||
if (regenReason) {
|
||||
const regenResult = mdRegen.regenerate();
|
||||
api.logger.info?.(`memoria: [${source}] auto md-regen triggered (${regenReason}) — ${regenResult.files} files, ${regenResult.recentFacts} recent, ${regenResult.archivedFacts} archived`);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:md-regen: ' + String(e)); }
|
||||
|
||||
// 8. Pattern detection: consolidate repeated similar facts
|
||||
try {
|
||||
const patternResult = await patternMgr.detectAndConsolidate();
|
||||
if (patternResult.consolidated > 0) {
|
||||
api.logger.info?.(`memoria: [${source}] patterns — ${patternResult.detected} groups found, ${patternResult.consolidated} consolidated`);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:patterns: ' + String(e)); }
|
||||
|
||||
// 9. Cross-layer connections (Phase 3)
|
||||
try {
|
||||
let crossUpdates = 0;
|
||||
|
||||
// 9a. Feedback → lifecycle promotion
|
||||
// Facts recalled 5+ times with positive usefulness → force settled
|
||||
const highUseFacts = db.raw.prepare(
|
||||
`SELECT id, lifecycle_state, recall_count, usefulness FROM facts
|
||||
WHERE superseded = 0 AND recall_count >= 5 AND usefulness >= 2
|
||||
AND (lifecycle_state IS NULL OR lifecycle_state = 'fresh')`
|
||||
).all() as Array<{ id: string; lifecycle_state: string; recall_count: number; usefulness: number }>;
|
||||
for (const f of highUseFacts) {
|
||||
db.raw.prepare("UPDATE facts SET lifecycle_state = 'settled' WHERE id = ?").run(f.id);
|
||||
crossUpdates++;
|
||||
}
|
||||
|
||||
// 9b. Hebbian → topics: strong relations (weight >= 1.0) between entities
|
||||
// If both entities belong to different topics, suggest parent-child or merge
|
||||
const strongRelations = db.raw.prepare(
|
||||
`SELECT source_id, target_id, weight FROM relations WHERE weight >= 1.0 ORDER BY weight DESC LIMIT 20`
|
||||
).all() as Array<{ source_id: string; target_id: string; weight: number }>;
|
||||
for (const rel of strongRelations) {
|
||||
// Find topics for each entity
|
||||
const fromTopics = db.raw.prepare(
|
||||
`SELECT DISTINCT t.id, t.name, t.parent_topic_id FROM topics t
|
||||
JOIN fact_topics ft ON ft.topic_id = t.id
|
||||
JOIN facts f ON f.id = ft.fact_id
|
||||
WHERE f.entity_ids LIKE ? AND f.superseded = 0`
|
||||
).all(`%${rel.source_id}%`) as Array<{ id: string; name: string; parent_topic_id: string | null }>;
|
||||
const toTopics = db.raw.prepare(
|
||||
`SELECT DISTINCT t.id, t.name, t.parent_topic_id FROM topics t
|
||||
JOIN fact_topics ft ON ft.topic_id = t.id
|
||||
JOIN facts f ON f.id = ft.fact_id
|
||||
WHERE f.entity_ids LIKE ? AND f.superseded = 0`
|
||||
).all(`%${rel.target_id}%`) as Array<{ id: string; name: string; parent_topic_id: string | null }>;
|
||||
|
||||
// If one topic is smaller, make it child of the larger
|
||||
for (const ft of fromTopics) {
|
||||
for (const tt of toTopics) {
|
||||
if (ft.id === tt.id) continue;
|
||||
if (ft.parent_topic_id || tt.parent_topic_id) continue; // already has parent
|
||||
const ftCount = (db.raw.prepare("SELECT fact_count FROM topics WHERE id = ?").get(ft.id) as any)?.fact_count || 0;
|
||||
const ttCount = (db.raw.prepare("SELECT fact_count FROM topics WHERE id = ?").get(tt.id) as any)?.fact_count || 0;
|
||||
// Smaller becomes child of larger (only if ratio > 2:1)
|
||||
if (ftCount > ttCount * 2 && ttCount > 0) {
|
||||
db.raw.prepare("UPDATE topics SET parent_topic_id = ? WHERE id = ?").run(ft.id, tt.id);
|
||||
crossUpdates++;
|
||||
} else if (ttCount > ftCount * 2 && ftCount > 0) {
|
||||
db.raw.prepare("UPDATE topics SET parent_topic_id = ? WHERE id = ?").run(tt.id, ft.id);
|
||||
crossUpdates++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 9c. Lifecycle → patterns: confirmed patterns (5+ occurrences) → settled
|
||||
const freshPatterns = db.raw.prepare(
|
||||
`SELECT id, tags FROM facts WHERE fact_type = 'pattern' AND superseded = 0
|
||||
AND (lifecycle_state IS NULL OR lifecycle_state = 'fresh')`
|
||||
).all() as Array<{ id: string; tags: string }>;
|
||||
for (const p of freshPatterns) {
|
||||
try {
|
||||
const meta = JSON.parse(p.tags || "{}");
|
||||
if (meta.occurrences && meta.occurrences.length >= 5) {
|
||||
db.raw.prepare("UPDATE facts SET lifecycle_state = 'settled' WHERE id = ?").run(p.id);
|
||||
crossUpdates++;
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:parse: ' + String(e)); }
|
||||
}
|
||||
|
||||
if (crossUpdates > 0) {
|
||||
api.logger.info?.(`memoria: [${source}] cross-layer — ${crossUpdates} updates (feedback→lifecycle, hebbian→topics, lifecycle→patterns)`);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:cross-layer: ' + String(e)); }
|
||||
};
|
||||
}
|
||||
510
openclaw-memoria-port/package-lock.json
generated
Normal file
510
openclaw-memoria-port/package-lock.json
generated
Normal file
@@ -0,0 +1,510 @@
|
||||
{
|
||||
"name": "memoria-plugin",
|
||||
"version": "3.23.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "memoria-plugin",
|
||||
"version": "3.23.0",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"better-sqlite3": "^11.10.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/better-sqlite3": "^7.6.13",
|
||||
"typescript": "^6.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/better-sqlite3": {
|
||||
"version": "7.6.13",
|
||||
"resolved": "https://registry.npmjs.org/@types/better-sqlite3/-/better-sqlite3-7.6.13.tgz",
|
||||
"integrity": "sha512-NMv9ASNARoKksWtsq/SHakpYAYnhBrQgGD8zkLYk/jaK8jUGn08CfEdTRgYhMypUQAfzSP8W6gNLe0q19/t4VA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "25.5.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.5.0.tgz",
|
||||
"integrity": "sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~7.18.0"
|
||||
}
|
||||
},
|
||||
"node_modules/base64-js": {
|
||||
"version": "1.5.1",
|
||||
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/feross"
|
||||
},
|
||||
{
|
||||
"type": "patreon",
|
||||
"url": "https://www.patreon.com/feross"
|
||||
},
|
||||
{
|
||||
"type": "consulting",
|
||||
"url": "https://feross.org/support"
|
||||
}
|
||||
],
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/better-sqlite3": {
|
||||
"version": "11.10.0",
|
||||
"resolved": "https://registry.npmjs.org/better-sqlite3/-/better-sqlite3-11.10.0.tgz",
|
||||
"integrity": "sha512-EwhOpyXiOEL/lKzHz9AW1msWFNzGc/z+LzeB3/jnFJpxu+th2yqvzsSWas1v9jgs9+xiXJcD5A8CJxAG2TaghQ==",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"bindings": "^1.5.0",
|
||||
"prebuild-install": "^7.1.1"
|
||||
}
|
||||
},
|
||||
"node_modules/bindings": {
|
||||
"version": "1.5.0",
|
||||
"resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
|
||||
"integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"file-uri-to-path": "1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/bl": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
|
||||
"integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"buffer": "^5.5.0",
|
||||
"inherits": "^2.0.4",
|
||||
"readable-stream": "^3.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/buffer": {
|
||||
"version": "5.7.1",
|
||||
"resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
|
||||
"integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/feross"
|
||||
},
|
||||
{
|
||||
"type": "patreon",
|
||||
"url": "https://www.patreon.com/feross"
|
||||
},
|
||||
{
|
||||
"type": "consulting",
|
||||
"url": "https://feross.org/support"
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"base64-js": "^1.3.1",
|
||||
"ieee754": "^1.1.13"
|
||||
}
|
||||
},
|
||||
"node_modules/chownr": {
|
||||
"version": "1.1.4",
|
||||
"resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
|
||||
"integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/decompress-response": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
|
||||
"integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"mimic-response": "^3.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/deep-extend": {
|
||||
"version": "0.6.0",
|
||||
"resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz",
|
||||
"integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=4.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/detect-libc": {
|
||||
"version": "2.1.2",
|
||||
"resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz",
|
||||
"integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/end-of-stream": {
|
||||
"version": "1.4.5",
|
||||
"resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz",
|
||||
"integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"once": "^1.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/expand-template": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
|
||||
"integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==",
|
||||
"license": "(MIT OR WTFPL)",
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/file-uri-to-path": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
|
||||
"integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/fs-constants": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
|
||||
"integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/github-from-package": {
|
||||
"version": "0.0.0",
|
||||
"resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz",
|
||||
"integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ieee754": {
|
||||
"version": "1.2.1",
|
||||
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
|
||||
"integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/feross"
|
||||
},
|
||||
{
|
||||
"type": "patreon",
|
||||
"url": "https://www.patreon.com/feross"
|
||||
},
|
||||
{
|
||||
"type": "consulting",
|
||||
"url": "https://feross.org/support"
|
||||
}
|
||||
],
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/inherits": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
|
||||
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/ini": {
|
||||
"version": "1.3.8",
|
||||
"resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
|
||||
"integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/mimic-response": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
|
||||
"integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/minimist": {
|
||||
"version": "1.2.8",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
|
||||
"integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/mkdirp-classic": {
|
||||
"version": "0.5.3",
|
||||
"resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
|
||||
"integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/napi-build-utils": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz",
|
||||
"integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/node-abi": {
|
||||
"version": "3.89.0",
|
||||
"resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.89.0.tgz",
|
||||
"integrity": "sha512-6u9UwL0HlAl21+agMN3YAMXcKByMqwGx+pq+P76vii5f7hTPtKDp08/H9py6DY+cfDw7kQNTGEj/rly3IgbNQA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"semver": "^7.3.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/once": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
|
||||
"integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/prebuild-install": {
|
||||
"version": "7.1.3",
|
||||
"resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz",
|
||||
"integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==",
|
||||
"deprecated": "No longer maintained. Please contact the author of the relevant native addon; alternatives are available.",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"detect-libc": "^2.0.0",
|
||||
"expand-template": "^2.0.3",
|
||||
"github-from-package": "0.0.0",
|
||||
"minimist": "^1.2.3",
|
||||
"mkdirp-classic": "^0.5.3",
|
||||
"napi-build-utils": "^2.0.0",
|
||||
"node-abi": "^3.3.0",
|
||||
"pump": "^3.0.0",
|
||||
"rc": "^1.2.7",
|
||||
"simple-get": "^4.0.0",
|
||||
"tar-fs": "^2.0.0",
|
||||
"tunnel-agent": "^0.6.0"
|
||||
},
|
||||
"bin": {
|
||||
"prebuild-install": "bin.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/pump": {
|
||||
"version": "3.0.4",
|
||||
"resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz",
|
||||
"integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"end-of-stream": "^1.1.0",
|
||||
"once": "^1.3.1"
|
||||
}
|
||||
},
|
||||
"node_modules/rc": {
|
||||
"version": "1.2.8",
|
||||
"resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz",
|
||||
"integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==",
|
||||
"license": "(BSD-2-Clause OR MIT OR Apache-2.0)",
|
||||
"dependencies": {
|
||||
"deep-extend": "^0.6.0",
|
||||
"ini": "~1.3.0",
|
||||
"minimist": "^1.2.0",
|
||||
"strip-json-comments": "~2.0.1"
|
||||
},
|
||||
"bin": {
|
||||
"rc": "cli.js"
|
||||
}
|
||||
},
|
||||
"node_modules/readable-stream": {
|
||||
"version": "3.6.2",
|
||||
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
|
||||
"integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"inherits": "^2.0.3",
|
||||
"string_decoder": "^1.1.1",
|
||||
"util-deprecate": "^1.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/safe-buffer": {
|
||||
"version": "5.2.1",
|
||||
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
|
||||
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/feross"
|
||||
},
|
||||
{
|
||||
"type": "patreon",
|
||||
"url": "https://www.patreon.com/feross"
|
||||
},
|
||||
{
|
||||
"type": "consulting",
|
||||
"url": "https://feross.org/support"
|
||||
}
|
||||
],
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/semver": {
|
||||
"version": "7.7.4",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz",
|
||||
"integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==",
|
||||
"license": "ISC",
|
||||
"bin": {
|
||||
"semver": "bin/semver.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/simple-concat": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz",
|
||||
"integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/feross"
|
||||
},
|
||||
{
|
||||
"type": "patreon",
|
||||
"url": "https://www.patreon.com/feross"
|
||||
},
|
||||
{
|
||||
"type": "consulting",
|
||||
"url": "https://feross.org/support"
|
||||
}
|
||||
],
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/simple-get": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz",
|
||||
"integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/feross"
|
||||
},
|
||||
{
|
||||
"type": "patreon",
|
||||
"url": "https://www.patreon.com/feross"
|
||||
},
|
||||
{
|
||||
"type": "consulting",
|
||||
"url": "https://feross.org/support"
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"decompress-response": "^6.0.0",
|
||||
"once": "^1.3.1",
|
||||
"simple-concat": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/string_decoder": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
|
||||
"integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"safe-buffer": "~5.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/strip-json-comments": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
|
||||
"integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/tar-fs": {
|
||||
"version": "2.1.4",
|
||||
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz",
|
||||
"integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"chownr": "^1.1.1",
|
||||
"mkdirp-classic": "^0.5.2",
|
||||
"pump": "^3.0.0",
|
||||
"tar-stream": "^2.1.4"
|
||||
}
|
||||
},
|
||||
"node_modules/tar-stream": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz",
|
||||
"integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"bl": "^4.0.3",
|
||||
"end-of-stream": "^1.4.1",
|
||||
"fs-constants": "^1.0.0",
|
||||
"inherits": "^2.0.3",
|
||||
"readable-stream": "^3.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/tunnel-agent": {
|
||||
"version": "0.6.0",
|
||||
"resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
|
||||
"integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"safe-buffer": "^5.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "6.0.2",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-6.0.2.tgz",
|
||||
"integrity": "sha512-bGdAIrZ0wiGDo5l8c++HWtbaNCWTS4UTv7RaTH/ThVIgjkveJt83m74bBHMJkuCbslY8ixgLBVZJIOiQlQTjfQ==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "7.18.2",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz",
|
||||
"integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/util-deprecate": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
|
||||
"integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/wrappy": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
|
||||
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
|
||||
"license": "ISC"
|
||||
}
|
||||
}
|
||||
}
|
||||
69
openclaw-memoria-port/package.json
Normal file
69
openclaw-memoria-port/package.json
Normal file
@@ -0,0 +1,69 @@
|
||||
{
|
||||
"name": "@memoria/openclaw-memoria",
|
||||
"version": "3.34.0-port",
|
||||
"description": "The most advanced memory system for AI agents. 21 cognitive layers, knowledge graph, procedural learning, vector search. 100% local-first.",
|
||||
"license": "Apache-2.0",
|
||||
"author": "Primo Studio",
|
||||
"main": "./dist/index.js",
|
||||
"types": "./dist/index.d.ts",
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/index.d.ts",
|
||||
"import": "./dist/index.js"
|
||||
}
|
||||
},
|
||||
"type": "module",
|
||||
"files": [
|
||||
"dist",
|
||||
"openclaw.plugin.json"
|
||||
],
|
||||
"scripts": {
|
||||
"build": "tsup",
|
||||
"clean": "rm -rf dist"
|
||||
},
|
||||
"dependencies": {
|
||||
"better-sqlite3": "^11.10.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/better-sqlite3": "^7.6.13",
|
||||
"tsup": "^8.5.0",
|
||||
"typescript": "^5.8.3"
|
||||
},
|
||||
"openclaw": {
|
||||
"extensions": [
|
||||
"./dist/index.js"
|
||||
],
|
||||
"compat": {
|
||||
"pluginApi": ">=2026.4.24",
|
||||
"minGatewayVersion": ">=2026.4.24"
|
||||
},
|
||||
"build": {
|
||||
"openclawVersion": "2026.5.5",
|
||||
"pluginSdkVersion": "2026.5.5"
|
||||
}
|
||||
},
|
||||
"keywords": [
|
||||
"memory",
|
||||
"ai-agent",
|
||||
"persistent-memory",
|
||||
"long-term-memory",
|
||||
"knowledge-graph",
|
||||
"procedural-learning",
|
||||
"vector-search",
|
||||
"sqlite",
|
||||
"ollama",
|
||||
"lm-studio",
|
||||
"local-first",
|
||||
"cognitive",
|
||||
"context",
|
||||
"claude",
|
||||
"cursor",
|
||||
"copilot",
|
||||
"chatgpt",
|
||||
"openclaw",
|
||||
"wal",
|
||||
"developer-tools",
|
||||
"typescript",
|
||||
"llm"
|
||||
]
|
||||
}
|
||||
131
openclaw-memoria-port/prefetch.ts
Normal file
131
openclaw-memoria-port/prefetch.ts
Normal file
@@ -0,0 +1,131 @@
|
||||
/**
|
||||
* Memoria — Async Prefetch (inspired by Hermes)
|
||||
*
|
||||
* Strategy: When message_received fires, we immediately start the recall
|
||||
* computation in the background. By the time before_prompt_build fires
|
||||
* (usually 50-200ms later), the result is already cached.
|
||||
*
|
||||
* This eliminates recall latency from the critical path.
|
||||
*/
|
||||
|
||||
export interface PrefetchResult {
|
||||
prompt: string;
|
||||
result: string | undefined;
|
||||
timestamp: number;
|
||||
computeTimeMs: number;
|
||||
}
|
||||
|
||||
export class PrefetchCache {
|
||||
private cache: PrefetchResult | null = null;
|
||||
private pending: Promise<PrefetchResult> | null = null;
|
||||
private readonly maxAgeMs: number;
|
||||
|
||||
constructor(maxAgeMs = 30_000) {
|
||||
this.maxAgeMs = maxAgeMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a prefetch computation. Called from message_received hook.
|
||||
* Non-blocking — returns immediately.
|
||||
*/
|
||||
startPrefetch(prompt: string, computeFn: () => Promise<string | undefined>): void {
|
||||
// Don't prefetch very short messages (system events, etc.)
|
||||
if (!prompt || prompt.length < 5) return;
|
||||
|
||||
const startTime = Date.now();
|
||||
this.pending = computeFn()
|
||||
.then((result) => {
|
||||
const entry: PrefetchResult = {
|
||||
prompt,
|
||||
result,
|
||||
timestamp: Date.now(),
|
||||
computeTimeMs: Date.now() - startTime,
|
||||
};
|
||||
this.cache = entry;
|
||||
this.pending = null;
|
||||
return entry;
|
||||
})
|
||||
.catch(() => {
|
||||
this.pending = null;
|
||||
return { prompt, result: undefined, timestamp: Date.now(), computeTimeMs: Date.now() - startTime };
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the prefetched result. Called from before_prompt_build hook.
|
||||
* If the prefetch is still running, waits for it (bounded by timeout).
|
||||
* If no prefetch was started, returns null (caller falls back to sync recall).
|
||||
*/
|
||||
async get(currentPrompt: string, timeoutMs = 5_000): Promise<PrefetchResult | null> {
|
||||
// Check cache first
|
||||
if (this.cache) {
|
||||
const age = Date.now() - this.cache.timestamp;
|
||||
if (age < this.maxAgeMs && this.promptMatches(this.cache.prompt, currentPrompt)) {
|
||||
return this.cache;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for pending computation if it exists
|
||||
if (this.pending) {
|
||||
try {
|
||||
const result = await Promise.race([
|
||||
this.pending,
|
||||
new Promise<null>((resolve) => setTimeout(() => resolve(null), timeoutMs)),
|
||||
]);
|
||||
if (result && this.promptMatches(result.prompt, currentPrompt)) {
|
||||
return result;
|
||||
}
|
||||
} catch {
|
||||
// Prefetch failed, caller will do sync recall
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the prefetched prompt matches the current prompt.
|
||||
* Uses a fuzzy match — the user message portion should match
|
||||
* even if the full event prompt has different metadata.
|
||||
*/
|
||||
private promptMatches(prefetchedPrompt: string, currentPrompt: string): boolean {
|
||||
// Extract user message from both (last significant chunk)
|
||||
const extractUserMsg = (p: string): string => {
|
||||
// Strip common Memoria/OpenClaw envelope
|
||||
const lastBlock = p.lastIndexOf("```\n\n");
|
||||
if (lastBlock !== -1 && p.includes("untrusted metadata")) {
|
||||
return p.slice(lastBlock + 5).trim().slice(0, 200);
|
||||
}
|
||||
return p.slice(-200).trim();
|
||||
};
|
||||
|
||||
const a = extractUserMsg(prefetchedPrompt);
|
||||
const b = extractUserMsg(currentPrompt);
|
||||
|
||||
// Exact match on user portion
|
||||
if (a === b) return true;
|
||||
|
||||
// One contains the other (common when metadata wrapping differs)
|
||||
if (a.length > 20 && b.length > 20) {
|
||||
return a.includes(b.slice(0, 100)) || b.includes(a.slice(0, 100));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Clear the cache (e.g., on session end) */
|
||||
clear(): void {
|
||||
this.cache = null;
|
||||
this.pending = null;
|
||||
}
|
||||
|
||||
/** Get stats for debugging */
|
||||
stats(): { hasCached: boolean; hasPending: boolean; lastComputeMs: number | null; cacheAgeMs: number | null } {
|
||||
return {
|
||||
hasCached: this.cache !== null,
|
||||
hasPending: this.pending !== null,
|
||||
lastComputeMs: this.cache?.computeTimeMs ?? null,
|
||||
cacheAgeMs: this.cache ? Date.now() - this.cache.timestamp : null,
|
||||
};
|
||||
}
|
||||
}
|
||||
227
openclaw-memoria-port/procedural-hooks.ts
Normal file
227
openclaw-memoria-port/procedural-hooks.ts
Normal file
@@ -0,0 +1,227 @@
|
||||
/**
|
||||
* Memoria — Real-time procedural capture hook (Layer 1b: after_tool_call)
|
||||
*
|
||||
* Extracted from index.ts Phase 2.2 — pure mechanical move, zero logic change.
|
||||
*/
|
||||
|
||||
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
|
||||
import type { MemoriaConfig } from "./core/config.js";
|
||||
import type { LLMProvider } from "./core/providers/types.js";
|
||||
import type { ProceduralMemory, Procedure } from "./core/procedural.js";
|
||||
import type { KnowledgeGraph } from "./core/graph.js";
|
||||
|
||||
/**
|
||||
* Register the after_tool_call hook for real-time procedural capture (Layer 1b).
|
||||
* Learns on-the-fly from successful tool call sequences.
|
||||
*/
|
||||
export function registerProceduralHook(
|
||||
api: OpenClawPluginApi,
|
||||
cfg: MemoriaConfig,
|
||||
extractLlm: LLMProvider,
|
||||
proceduralMem: ProceduralMemory,
|
||||
graph: KnowledgeGraph,
|
||||
): void {
|
||||
// Session buffer: accumulates tool calls until a success pattern triggers assembly
|
||||
const toolCallBuffer: Array<{
|
||||
toolName: string;
|
||||
params: Record<string, unknown>;
|
||||
result?: unknown;
|
||||
error?: string;
|
||||
durationMs?: number;
|
||||
timestamp: number;
|
||||
}> = [];
|
||||
// Track which procedures were already assembled to avoid duplicates
|
||||
const assembledGoals = new Set<string>();
|
||||
// Cooldown to avoid assembling too frequently
|
||||
let lastAssemblyTime = 0;
|
||||
const ASSEMBLY_COOLDOWN_MS = 60_000; // 1 minute between assemblies
|
||||
|
||||
api.on("after_tool_call", async (event: any, _ctx: any) => {
|
||||
try {
|
||||
const { toolName, params, result, error, durationMs } = event;
|
||||
|
||||
// Buffer all tool calls (keep last 30 to avoid memory leak)
|
||||
toolCallBuffer.push({
|
||||
toolName,
|
||||
params: params || {},
|
||||
result: typeof result === 'string' ? result.slice(0, 2000) : result,
|
||||
error,
|
||||
durationMs,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
if (toolCallBuffer.length > 30) toolCallBuffer.shift();
|
||||
|
||||
// Only trigger assembly on exec-type tools with a successful outcome
|
||||
if (toolName !== 'exec' && toolName !== 'Edit' && toolName !== 'Write') return;
|
||||
if (error) return; // failed step — don't assemble yet
|
||||
|
||||
// Check result for success keywords (publish, deploy, commit, install, etc.)
|
||||
const resultStr = typeof result === 'string' ? result : JSON.stringify(result || '');
|
||||
const successPatterns = [
|
||||
/Published?\s/i, /✔|✅/, /success/i, /deployed/i, /created/i,
|
||||
/\[new tag\]/, /release.*created/i, /installed/i, /committed/i,
|
||||
/pushed/i, /merged/i, /completed/i, /OK\.\s/,
|
||||
];
|
||||
|
||||
const isSuccess = successPatterns.some(p => p.test(resultStr));
|
||||
if (!isSuccess) return;
|
||||
|
||||
// Cooldown check
|
||||
const now = Date.now();
|
||||
if (now - lastAssemblyTime < ASSEMBLY_COOLDOWN_MS) return;
|
||||
|
||||
// We have a success signal — assemble procedure from recent exec calls
|
||||
const recentExecs = toolCallBuffer
|
||||
.filter(tc => tc.toolName === 'exec' && !tc.error)
|
||||
.slice(-15); // last 15 exec calls
|
||||
|
||||
if (recentExecs.length < 2) return;
|
||||
|
||||
// Extract commands
|
||||
const commands = recentExecs
|
||||
.map(tc => (tc.params as any)?.command as string)
|
||||
.filter(Boolean)
|
||||
.filter(cmd => cmd.length > 5 && cmd.length < 1000);
|
||||
|
||||
if (commands.length < 2) return;
|
||||
|
||||
// Filter — only capture reusable procedures
|
||||
if (!proceduralMem.isReusableProcedure(commands)) {
|
||||
api.logger.debug?.(`memoria: procedural skipped — not reusable (${commands.length} cmds, no action pattern)`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Quick fingerprint to avoid duplicate assemblies
|
||||
const fingerprint = commands.slice(-3).join('|').slice(0, 200);
|
||||
if (assembledGoals.has(fingerprint)) return;
|
||||
|
||||
// Assemble the procedure via LLM
|
||||
api.logger.info?.(`memoria: 🔧 real-time procedural capture — ${commands.length} commands, trigger: "${resultStr.slice(0, 80)}..."`);
|
||||
|
||||
const prompt = `Analyze this successful command sequence and extract a reusable procedure.
|
||||
|
||||
Commands executed (in order):
|
||||
${commands.map((c, i) => `${i + 1}. ${c}`).join('\n')}
|
||||
|
||||
Final result (success): ${resultStr.slice(0, 500)}
|
||||
|
||||
Output JSON only (no markdown, no explanation):
|
||||
{
|
||||
"name": "Short name (e.g., 'Publish Memoria to ClawHub')",
|
||||
"goal": "What this accomplishes in one sentence",
|
||||
"trigger_patterns": ["keyword1", "keyword2"],
|
||||
"key_steps": ["step1 description", "step2 description"],
|
||||
"gotchas": ["pitfall or workaround learned"]
|
||||
}`;
|
||||
|
||||
try {
|
||||
const response = await extractLlm.generateWithMeta!(prompt, {
|
||||
maxTokens: 512,
|
||||
temperature: 0.1,
|
||||
format: "json",
|
||||
timeoutMs: 15000,
|
||||
});
|
||||
|
||||
if (!response?.response) return;
|
||||
|
||||
const cleaned = response.response.replace(/```json\n?|\n?```/g, '').trim();
|
||||
const meta = JSON.parse(cleaned);
|
||||
|
||||
if (!meta.name || !meta.goal) return;
|
||||
|
||||
// Re-check name for noise patterns
|
||||
if (!proceduralMem.isReusableProcedure(commands, meta.name)) {
|
||||
api.logger.debug?.(`memoria: procedural skipped — LLM named it noise: "${meta.name}"`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Smart duplicate detection
|
||||
const similar = proceduralMem.findSimilarProcedure(meta.name, meta.goal);
|
||||
|
||||
if (similar) {
|
||||
// Reinforce existing procedure
|
||||
const totalDuration = recentExecs.reduce((sum, tc) => sum + (tc.durationMs || 0), 0);
|
||||
proceduralMem.recordExecution(similar.id, true, totalDuration);
|
||||
|
||||
// Add improvement if steps changed
|
||||
const newSteps = commands.filter(c => !similar.steps.includes(c));
|
||||
if (newSteps.length > 0) {
|
||||
proceduralMem.addImprovement(
|
||||
similar.id,
|
||||
`Updated steps: ${newSteps.slice(0, 3).join('; ')}`,
|
||||
'Real-time learning from successful execution'
|
||||
);
|
||||
}
|
||||
|
||||
// Reflect: was this the best approach?
|
||||
const reflectEvery = cfg.procedural?.reflectEvery ?? 3;
|
||||
if (reflectEvery > 0 && (similar.success_count + 1) % reflectEvery === 0) {
|
||||
try {
|
||||
const errors = recentExecs
|
||||
.filter(tc => tc.error)
|
||||
.map(tc => tc.error!);
|
||||
const reflection = await proceduralMem.reflect(similar.id, {
|
||||
durationMs: totalDuration,
|
||||
stepsTaken: commands,
|
||||
errorsEncountered: errors.length > 0 ? errors : undefined,
|
||||
});
|
||||
if (reflection?.should_improve) {
|
||||
api.logger.info?.(`memoria: procedural 🔍 reflected on "${similar.name}" — ${reflection.suggestions.slice(0, 2).join('; ')}`);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:procedural-reflection: ' + String(e)); }
|
||||
}
|
||||
|
||||
api.logger.info?.(`memoria: procedural ✅ reinforced "${similar.name}" (v${similar.version}, ${similar.success_count + 1} successes, quality=${similar.quality.overall})`);
|
||||
} else {
|
||||
// Create new procedure with full type compliance
|
||||
const proc: Procedure = {
|
||||
id: `proc_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`,
|
||||
name: meta.name,
|
||||
goal: meta.goal,
|
||||
steps: commands,
|
||||
version: 1,
|
||||
success_count: 1,
|
||||
failure_count: 0,
|
||||
last_success_at: Date.now(),
|
||||
last_updated_at: Date.now(),
|
||||
improvements: [],
|
||||
quality: {
|
||||
speed: 0.5,
|
||||
reliability: 0.5,
|
||||
elegance: Math.max(0.2, 1 - commands.length * 0.1),
|
||||
safety: 0.8,
|
||||
overall: 0.5,
|
||||
},
|
||||
context: [...(meta.trigger_patterns || []), ...(meta.gotchas || [])].join(', '),
|
||||
gotchas: meta.gotchas?.join(' | '),
|
||||
degradation_score: 0,
|
||||
preferred: false,
|
||||
};
|
||||
|
||||
proceduralMem.storeProcedure(proc);
|
||||
api.logger.info?.(`memoria: procedural ✅ NEW "${proc.name}" (${proc.steps.length} steps, real-time)`);
|
||||
|
||||
// Cross-layer: enrich Knowledge Graph with procedure entities
|
||||
try {
|
||||
const procFact = `Procedure "${proc.name}": ${proc.goal}. Steps: ${commands.slice(0, 3).join('; ')}`;
|
||||
await graph.extractAndStore(`proc_${proc.id}`, procFact);
|
||||
api.logger.debug?.(`memoria: procedural → graph entities extracted for "${proc.name}"`);
|
||||
} catch (e) { api?.logger?.debug?.('memoria:procedural-graph: ' + String(e)); }
|
||||
}
|
||||
|
||||
assembledGoals.add(fingerprint);
|
||||
lastAssemblyTime = now;
|
||||
|
||||
// Clear old buffer entries (keep last 5 for context)
|
||||
toolCallBuffer.splice(0, Math.max(0, toolCallBuffer.length - 5));
|
||||
|
||||
} catch (llmErr) {
|
||||
api.logger.debug?.(`memoria: procedural LLM failed: ${String(llmErr)}`);
|
||||
}
|
||||
|
||||
} catch (err) {
|
||||
// Non-blocking — never crash the plugin
|
||||
api.logger.debug?.(`memoria: after_tool_call error: ${String(err)}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
392
openclaw-memoria-port/recall.ts
Normal file
392
openclaw-memoria-port/recall.ts
Normal file
@@ -0,0 +1,392 @@
|
||||
/**
|
||||
* Memoria — Recall hook (Layer 6: before_prompt_build)
|
||||
*
|
||||
* Extracted from index.ts Phase 2.2 — pure mechanical move, zero logic change.
|
||||
*/
|
||||
|
||||
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
|
||||
import type { MemoriaConfig } from "./core/config.js";
|
||||
import type { MemoriaDB, Fact } from "./core/db.js";
|
||||
import type { EmbeddingManager } from "./core/embeddings.js";
|
||||
import type { KnowledgeGraph } from "./core/graph.js";
|
||||
import type { TopicManager } from "./core/topics.js";
|
||||
import type { ObservationManager } from "./core/observations.js";
|
||||
import type { ProceduralMemory } from "./core/procedural.js";
|
||||
import type { ContextTreeBuilder } from "./core/context-tree.js";
|
||||
import { AdaptiveBudget } from "./core/budget.js";
|
||||
import type { FeedbackManager } from "./core/feedback.js";
|
||||
import type { LifecycleManager } from "./core/lifecycle.js";
|
||||
import type { ExpertiseManager } from "./core/expertise.js";
|
||||
import type { PatternManager } from "./core/patterns.js";
|
||||
import type { RevisionManager } from "./core/revision.js";
|
||||
import { scoreAndRank, getHotFacts, HOT_TIER_CONFIG } from "./core/scoring.js";
|
||||
import { formatRecallContext } from "./core/format.js";
|
||||
import type { PrefetchCache } from "./prefetch.js";
|
||||
import type { SelfObserver } from "./core/self-observation.js";
|
||||
|
||||
export interface RecallDeps {
|
||||
api: OpenClawPluginApi;
|
||||
cfg: MemoriaConfig;
|
||||
db: MemoriaDB;
|
||||
embeddingMgr: EmbeddingManager;
|
||||
graph: KnowledgeGraph;
|
||||
topicMgr: TopicManager;
|
||||
observationMgr: ObservationManager;
|
||||
proceduralMem: ProceduralMemory;
|
||||
treeBuilder: ContextTreeBuilder;
|
||||
budget: AdaptiveBudget;
|
||||
feedbackMgr: FeedbackManager;
|
||||
lifecycleMgr: LifecycleManager;
|
||||
expertiseMgr: ExpertiseManager;
|
||||
patternMgr: PatternManager;
|
||||
revisionMgr: RevisionManager;
|
||||
prefetchCache?: PrefetchCache;
|
||||
selfObserver?: SelfObserver;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip OpenClaw envelope metadata and Memoria headers from a raw prompt
|
||||
* to extract the actual user message for recall matching.
|
||||
*/
|
||||
export function extractUserPrompt(rawPrompt: string): string {
|
||||
if (!rawPrompt || rawPrompt.length < 3) return "";
|
||||
let prompt = rawPrompt;
|
||||
const lastJsonEnd = rawPrompt.lastIndexOf("```\n\n");
|
||||
if (lastJsonEnd !== -1 && rawPrompt.includes("untrusted metadata")) {
|
||||
prompt = rawPrompt.slice(lastJsonEnd + 5).trim();
|
||||
}
|
||||
if (prompt.startsWith("## 🧠 Memoria")) {
|
||||
const afterMemoria = prompt.indexOf("\n\n", prompt.indexOf("Conversation info"));
|
||||
if (afterMemoria !== -1) prompt = prompt.slice(afterMemoria).trim();
|
||||
}
|
||||
if (!prompt || prompt.length < 3) {
|
||||
prompt = rawPrompt.slice(-500).trim();
|
||||
}
|
||||
return prompt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Core recall computation — extracted so it can be called from:
|
||||
* 1. before_prompt_build (sync fallback)
|
||||
* 2. prefetch on message_received (async, ahead of time)
|
||||
*
|
||||
* Returns the formatted context string, or undefined if nothing to inject.
|
||||
*/
|
||||
export async function computeRecall(
|
||||
prompt: string,
|
||||
messageCount: number,
|
||||
deps: Omit<RecallDeps, "prefetchCache">
|
||||
): Promise<string | undefined> {
|
||||
const { api, cfg, db, embeddingMgr, graph, topicMgr, observationMgr,
|
||||
proceduralMem, treeBuilder, budget, feedbackMgr, lifecycleMgr,
|
||||
expertiseMgr, patternMgr, revisionMgr } = deps;
|
||||
|
||||
if (!prompt || prompt.length < 3) return undefined;
|
||||
|
||||
try {
|
||||
|
||||
// ── User signal detection (correction / frustration) ──
|
||||
try {
|
||||
const signal = feedbackMgr.analyzeUserMessage(prompt);
|
||||
if (signal.isCorrection || signal.isFrustration) {
|
||||
const penalized = feedbackMgr.applyUserSignal(signal.penalty);
|
||||
const parts: string[] = [];
|
||||
if (signal.isCorrection) parts.push("correction detected");
|
||||
if (signal.isFrustration) parts.push("frustration detected");
|
||||
if (penalized.length > 0) {
|
||||
api.logger.info?.(`memoria: user signal (${parts.join(" + ")}) → ${penalized.length} facts penalized by ${signal.penalty}`);
|
||||
}
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:recall: ' + String(e)); }
|
||||
|
||||
// Adaptive budget: compute how many facts to inject based on context usage
|
||||
const tokenEstimate = AdaptiveBudget.estimateTokens(messageCount);
|
||||
const budgetResult = budget.compute(tokenEstimate);
|
||||
const recallLimit = budgetResult.limit;
|
||||
|
||||
const penaltyLog = budget.penalty > 0 ? `, penalty -${budget.penalty}` : "";
|
||||
api.logger.debug?.(`memoria: budget ${budgetResult.zone} (${(budgetResult.usage * 100).toFixed(0)}% used${penaltyLog}) → ${recallLimit} facts`);
|
||||
|
||||
// Hot tier: always-injected facts
|
||||
const hotFactsRaw = db.hotFacts(HOT_TIER_CONFIG.minAccessCount, HOT_TIER_CONFIG.staleAfterDays, HOT_TIER_CONFIG.maxHotFacts);
|
||||
const hotIds = new Set(hotFactsRaw.map(f => f.id));
|
||||
const hotScored = getHotFacts(hotFactsRaw);
|
||||
const hotLimit = hotScored.length;
|
||||
const searchLimit = Math.max(recallLimit - hotLimit, 2);
|
||||
|
||||
// Hybrid search: FTS5 + cosine + temporal scoring
|
||||
let topFacts: Array<{ id: string; fact: string; category: string; confidence: number; temporalScore: number }>;
|
||||
|
||||
if (embeddingMgr.embeddedCount() > 0) {
|
||||
const results = await embeddingMgr.hybridSearch(prompt, searchLimit, {
|
||||
ftsWeight: 0.35,
|
||||
cosineWeight: 0.45,
|
||||
temporalWeight: 0.20,
|
||||
});
|
||||
topFacts = results.filter(f => f.confidence >= 0.5 && !hotIds.has(f.id));
|
||||
} else {
|
||||
const fetchLimit = Math.min(searchLimit * 2, 20);
|
||||
const facts = db.searchFacts(prompt, fetchLimit);
|
||||
if (!facts || facts.length === 0 && hotScored.length === 0) return undefined;
|
||||
const relevant = (facts || []).filter(f => f.confidence >= 0.5 && !hotIds.has(f.id));
|
||||
const scored = scoreAndRank(relevant);
|
||||
topFacts = scored.slice(0, searchLimit);
|
||||
}
|
||||
|
||||
if (topFacts.length === 0) return undefined;
|
||||
|
||||
// Graph enrichment: find entities in the query, traverse graph for related facts
|
||||
let graphFacts: Fact[] = [];
|
||||
try {
|
||||
const entities = graph.findEntitiesInText(prompt);
|
||||
if (entities.length > 0) {
|
||||
const related = graph.getRelatedFacts(entities.map(e => e.name), 2, 3);
|
||||
const existingIds = new Set(topFacts.map(f => f.id));
|
||||
for (const r of related) {
|
||||
if (!existingIds.has(r.id)) {
|
||||
const fact = db.getFact(r.id);
|
||||
if (fact) graphFacts.push(fact);
|
||||
}
|
||||
}
|
||||
|
||||
// Hebbian: reinforce connections between co-accessed entities
|
||||
const entityIds = entities.map(e => e.id).filter(Boolean) as string[];
|
||||
if (entityIds.length >= 2) {
|
||||
graph.hebbianReinforce(entityIds);
|
||||
}
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:graph-enrichment: ' + String(e)); }
|
||||
|
||||
// Topic enrichment: find relevant topics and add their facts
|
||||
const expandedQueries = embeddingMgr.expandQuery(prompt);
|
||||
let topicFacts: Fact[] = [];
|
||||
try {
|
||||
const relevantTopics = await topicMgr.findRelevantTopics(prompt, 3, expandedQueries);
|
||||
if (relevantTopics.length > 0) {
|
||||
const existingIds = new Set([...topFacts.map(f => f.id), ...graphFacts.map(f => f.id)]);
|
||||
for (const rt of relevantTopics) {
|
||||
for (const factText of rt.facts.slice(0, 3)) {
|
||||
const found = db.searchFacts(factText.slice(0, 80), 1);
|
||||
if (found.length > 0 && !existingIds.has(found[0].id)) {
|
||||
topicFacts.push(found[0]);
|
||||
existingIds.add(found[0].id);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (relevantTopics.length > 0) {
|
||||
api.logger.debug?.(`memoria: topics matched: ${relevantTopics.map(t => t.topic.name).join(", ")}`);
|
||||
}
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:topic-enrichment: ' + String(e)); }
|
||||
|
||||
// Observations: synthesized multi-fact summaries
|
||||
let observationContext = "";
|
||||
try {
|
||||
const relevantObs = await observationMgr.getRelevantObservations(prompt);
|
||||
if (relevantObs.length > 0) {
|
||||
observationContext = observationMgr.formatForRecall(relevantObs);
|
||||
api.logger.debug?.(`memoria: ${relevantObs.length} observations matched`);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:observations-recall: ' + String(e)); }
|
||||
|
||||
// Procedural memory: search for matching "how-to" procedures
|
||||
let proceduresContext = "";
|
||||
const matchedProcedureIds: string[] = [];
|
||||
try {
|
||||
// Strategy 1: Direct text search
|
||||
let procedures = proceduralMem.search(prompt, 3);
|
||||
|
||||
// Strategy 2: If few results, expand via Graph entities
|
||||
if (procedures.length < 2) {
|
||||
try {
|
||||
const graphEntities = graph.findEntitiesInText(prompt);
|
||||
if (graphEntities.length > 0) {
|
||||
const relatedTerms = graphEntities
|
||||
.flatMap((e: any) => [e.name, ...(e.aliases || [])])
|
||||
.slice(0, 5);
|
||||
for (const term of relatedTerms) {
|
||||
const extra = proceduralMem.search(term, 2);
|
||||
for (const p of extra) {
|
||||
if (!procedures.find(existing => existing.id === p.id)) {
|
||||
procedures.push(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (procedures.length > 3) procedures = procedures.slice(0, 3);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:graph-expansion: ' + String(e)); }
|
||||
}
|
||||
|
||||
if (procedures.length > 0) {
|
||||
// Limit to max 2 procedures, show max 3 steps each (avoid context bloat)
|
||||
const MAX_PROCEDURES = 2;
|
||||
const MAX_STEPS = 3;
|
||||
const procTexts: string[] = [];
|
||||
for (const proc of procedures.slice(0, MAX_PROCEDURES)) {
|
||||
matchedProcedureIds.push(proc.id);
|
||||
const successRate = proc.success_count / Math.max(proc.success_count + proc.failure_count, 1);
|
||||
const degThreshold = cfg.procedural?.degradedThreshold ?? 0.5;
|
||||
const isStale = proceduralMem.needsDocCheck(proc);
|
||||
const isDegraded = proc.degradation_score > degThreshold;
|
||||
const status = isDegraded ? "⚠ degraded"
|
||||
: isStale ? "🕰️ stale — verify before using"
|
||||
: proc.preferred ? "★ preferred" : "✓";
|
||||
const qualityStr = `quality: ${(proc.quality.overall * 100).toFixed(0)}%`;
|
||||
const versionStr = proc.version > 1 ? ` v${proc.version}` : '';
|
||||
const gotchaStr = proc.gotchas ? `\n ⚠ Gotchas: ${proc.gotchas.slice(0, 200)}` : '';
|
||||
const staleStr = isStale ? `\n 🕰️ Stale — verify before using` : '';
|
||||
const truncatedSteps = proc.steps.slice(0, MAX_STEPS);
|
||||
const moreSteps = proc.steps.length > MAX_STEPS ? `\n ... (+${proc.steps.length - MAX_STEPS} more steps)` : '';
|
||||
procTexts.push(
|
||||
`**${proc.name}**${versionStr} ${status} (${(successRate * 100).toFixed(0)}% success, ${qualityStr}):\n` +
|
||||
truncatedSteps.map((s, i) => ` ${i + 1}. ${s.slice(0, 300)}`).join('\n') +
|
||||
moreSteps +
|
||||
gotchaStr +
|
||||
staleStr
|
||||
);
|
||||
}
|
||||
proceduresContext = `\n## 🔧 Known Procedures\n${procTexts.join('\n\n')}\n`;
|
||||
api.logger.debug?.(`memoria: ${procedures.length} procedures matched, showing ${Math.min(procedures.length, MAX_PROCEDURES)} (graph-expanded)`);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:procedural-recall: ' + String(e)); }
|
||||
|
||||
// Context tree: organize facts hierarchically, weight by query
|
||||
let finalFacts: Fact[] = [];
|
||||
try {
|
||||
// Build set of fact IDs that are members of active (non-stale) clusters
|
||||
let clusteredFactIds: Set<string> = new Set();
|
||||
try {
|
||||
const clusters = db.raw.prepare(
|
||||
"SELECT tags FROM facts WHERE fact_type = 'cluster' AND superseded = 0"
|
||||
).all() as Array<{ tags: string }>;
|
||||
for (const c of clusters) {
|
||||
try {
|
||||
const meta = JSON.parse(c.tags);
|
||||
if (!meta.stale && Array.isArray(meta.memberIds)) {
|
||||
for (const id of meta.memberIds) clusteredFactIds.add(id);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:json-parse: ' + String(e)); }
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:cluster-parse: ' + String(e)); }
|
||||
|
||||
// Apply lifecycle multiplier + expertise boost + cluster-member deprioritization
|
||||
const allFactsCandidates = [...hotScored, ...topFacts, ...graphFacts, ...topicFacts].map((f: any) => {
|
||||
let mult = lifecycleMgr.getRecallMultiplier(f.lifecycle_state);
|
||||
if (clusteredFactIds.has(f.id) && f.fact_type !== "cluster") {
|
||||
mult *= 0.6;
|
||||
}
|
||||
try {
|
||||
const factTopics = db.raw.prepare(
|
||||
"SELECT t.name FROM topics t JOIN fact_topics ft ON ft.topic_id = t.id WHERE ft.fact_id = ?"
|
||||
).all(f.id) as Array<{ name: string }>;
|
||||
if (factTopics.length > 0) {
|
||||
const boost = expertiseMgr.applyExpertiseBoost(1.0, factTopics.map(t => t.name));
|
||||
if (boost > 1.0) mult *= boost;
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:expertise: ' + String(e)); }
|
||||
mult *= patternMgr.applyPatternBoost(1.0, f.fact_type);
|
||||
if ((f as any).temporalScore) {
|
||||
return { ...f, temporalScore: (f as any).temporalScore * mult };
|
||||
}
|
||||
return f;
|
||||
});
|
||||
const tree = await treeBuilder.build(allFactsCandidates as any, prompt);
|
||||
|
||||
finalFacts = treeBuilder.extractFacts(tree, recallLimit) as any;
|
||||
|
||||
if (tree.roots.length > 0) {
|
||||
const treeView = treeBuilder.renderTree(tree, 2);
|
||||
api.logger.debug?.(`memoria tree:\n${treeView}`);
|
||||
}
|
||||
} catch (e) {
|
||||
api?.logger?.debug?.('memoria:tree-build: ' + String(e));
|
||||
finalFacts = [...topFacts, ...graphFacts, ...topicFacts].slice(0, recallLimit) as any;
|
||||
}
|
||||
|
||||
if (finalFacts.length === 0 && !observationContext && !proceduresContext) return undefined;
|
||||
|
||||
// Self-observation: append agent profile if enough data
|
||||
let selfObsContext = "";
|
||||
try {
|
||||
if (deps.selfObserver) {
|
||||
selfObsContext = deps.selfObserver.formatForPrompt();
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:self-obs-recall: ' + String(e)); }
|
||||
|
||||
const context = formatRecallContext(finalFacts as any, observationContext) + proceduresContext + selfObsContext;
|
||||
|
||||
// Track access + feedback loop + budget learning + lifecycle update
|
||||
const ids = finalFacts.map(f => f.id);
|
||||
try { db.trackAccess(ids); } catch (e) { api?.logger?.debug?.('memoria:track-access: ' + String(e)); }
|
||||
try { feedbackMgr.recordRecall(ids, prompt); } catch (e) { api?.logger?.debug?.('memoria:feedback-record: ' + String(e)); }
|
||||
try { budget.recordRecall(recallLimit); } catch (e) { api?.logger?.debug?.('memoria:budget-record: ' + String(e)); }
|
||||
|
||||
try {
|
||||
for (const fact of finalFacts) {
|
||||
lifecycleMgr.updateLifecycle(fact);
|
||||
}
|
||||
} catch (e) { api?.logger?.debug?.('memoria:lifecycle-update: ' + String(e)); }
|
||||
|
||||
// Proactive revision: check if any settled facts need refinement (async, non-blocking)
|
||||
setImmediate(async () => {
|
||||
try {
|
||||
const revResult = await revisionMgr.checkAndRevise();
|
||||
if (revResult.revised > 0) {
|
||||
api.logger.info?.(`memoria: proactive revision completed (${revResult.revised} refined, ${revResult.created} new facts)`);
|
||||
}
|
||||
} catch (err) {
|
||||
api.logger.debug?.(`memoria: proactive revision failed: ${String(err)}`);
|
||||
}
|
||||
});
|
||||
|
||||
const hotNote = hotLimit > 0 ? `, ${hotLimit} hot` : "";
|
||||
const graphNote = graphFacts.length > 0 ? `, +${graphFacts.length} graph` : "";
|
||||
const obsNote = observationContext ? ", +obs" : "";
|
||||
api.logger.info?.(`memoria: recall injected ${finalFacts.length} facts${obsNote} (${hotNote}${graphNote}, tree+hybrid) for "${prompt.slice(0, 50)}..."`);
|
||||
return context;
|
||||
} catch (err) {
|
||||
api.logger.warn?.(`memoria: recall failed: ${String(err)}`);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Register the before_prompt_build hook (recall / Layer 6).
|
||||
* Checks prefetch cache first; falls back to sync computeRecall.
|
||||
*/
|
||||
export function registerRecallHook(deps: RecallDeps): void {
|
||||
const { api, cfg, prefetchCache } = deps;
|
||||
|
||||
if (!cfg.autoRecall) return;
|
||||
|
||||
api.on("before_prompt_build", async (event: any, _ctx: any) => {
|
||||
try {
|
||||
const rawPrompt = typeof event.prompt === "string" ? event.prompt : "";
|
||||
if (!rawPrompt || rawPrompt.length < 3) return undefined;
|
||||
|
||||
// ── Prefetch cache: check if recall was already computed async ──
|
||||
if (prefetchCache) {
|
||||
const cached = await prefetchCache.get(rawPrompt, 3_000);
|
||||
if (cached?.result) {
|
||||
api.logger.debug?.(`memoria: ⚡ prefetch HIT (computed in ${cached.computeTimeMs}ms, age ${Date.now() - cached.timestamp}ms)`);
|
||||
return { prependContext: cached.result };
|
||||
}
|
||||
if (cached) {
|
||||
api.logger.debug?.(`memoria: prefetch completed but no results`);
|
||||
} else {
|
||||
api.logger.debug?.(`memoria: prefetch MISS — falling back to sync recall`);
|
||||
}
|
||||
}
|
||||
|
||||
// Sync fallback: compute recall now
|
||||
const prompt = extractUserPrompt(rawPrompt);
|
||||
const msgCount = (event as any).messageCount || (event as any).messages?.length || 0;
|
||||
const context = await computeRecall(prompt, msgCount, deps);
|
||||
if (!context) return undefined;
|
||||
return { prependContext: context };
|
||||
} catch (err) {
|
||||
api.logger.warn?.(`memoria: recall hook failed: ${String(err)}`);
|
||||
return undefined;
|
||||
}
|
||||
});
|
||||
}
|
||||
223
openclaw-memoria-port/tests/test-core.ts
Normal file
223
openclaw-memoria-port/tests/test-core.ts
Normal file
@@ -0,0 +1,223 @@
|
||||
/**
|
||||
* Memoria Core Tests — validates DB, scoring, selective, clusters
|
||||
* Run: npx tsx tests/test-core.ts
|
||||
*
|
||||
* These tests use SQLite in-memory and don't require LLM/Ollama.
|
||||
*/
|
||||
|
||||
import { MemoriaDB } from "../db.js";
|
||||
import { scoreFact, scoreAndRank } from "../scoring.js";
|
||||
import { AdaptiveBudget } from "../budget.js";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import os from "os";
|
||||
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
|
||||
function assert(condition: boolean, name: string) {
|
||||
if (condition) {
|
||||
console.log(` ✅ ${name}`);
|
||||
passed++;
|
||||
} else {
|
||||
console.log(` ❌ ${name}`);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Setup temp workspace ───
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "memoria-test-"));
|
||||
const memoryDir = path.join(tmpDir, "memory");
|
||||
fs.mkdirSync(memoryDir, { recursive: true });
|
||||
|
||||
console.log("🧪 Memoria Core Tests\n");
|
||||
|
||||
// ═══════════════════════════════════════
|
||||
// TEST 1: Database CRUD
|
||||
// ═══════════════════════════════════════
|
||||
console.log("📦 DB Tests:");
|
||||
const db = new MemoriaDB(tmpDir);
|
||||
|
||||
// Store facts
|
||||
const f1 = db.storeFact({
|
||||
id: "test_1", fact: "Sol uses gemma3:4b for extraction",
|
||||
category: "outil", confidence: 0.9, source: "test",
|
||||
tags: "[]", agent: "koda", created_at: Date.now(), updated_at: Date.now(),
|
||||
fact_type: "semantic"
|
||||
});
|
||||
assert(f1.id === "test_1", "storeFact returns correct id");
|
||||
|
||||
const f2 = db.storeFact({
|
||||
id: "test_2", fact: "Alexandre earns 5.19€/h",
|
||||
category: "rh", confidence: 0.85, source: "test",
|
||||
tags: "[]", agent: "koda", created_at: Date.now(), updated_at: Date.now(),
|
||||
fact_type: "semantic"
|
||||
});
|
||||
|
||||
const f3 = db.storeFact({
|
||||
id: "test_3", fact: "Alexandre earns 6.50€/h after raise",
|
||||
category: "rh", confidence: 0.9, source: "test",
|
||||
tags: "[]", agent: "koda", created_at: Date.now(), updated_at: Date.now(),
|
||||
fact_type: "semantic"
|
||||
});
|
||||
|
||||
// Get
|
||||
const got = db.getFact("test_1");
|
||||
assert(got !== undefined && got.fact.includes("gemma3"), "getFact works");
|
||||
|
||||
// Search
|
||||
const results = db.searchFacts("gemma3", 5);
|
||||
assert(results.length >= 1, "searchFacts returns results");
|
||||
assert(results[0].fact.includes("gemma3"), "searchFacts finds correct fact");
|
||||
|
||||
// Supersede
|
||||
db.supersedeFact("test_2", "test_3");
|
||||
const superseded = db.getFact("test_2");
|
||||
assert(superseded!.superseded === 1, "supersedeFact marks old fact");
|
||||
assert(superseded!.superseded_by === "test_3", "supersedeFact links to new fact");
|
||||
|
||||
// Active search excludes superseded
|
||||
const activeResults = db.searchFacts("Alexandre", 5);
|
||||
assert(activeResults.every(f => f.superseded === 0), "searchFacts excludes superseded");
|
||||
|
||||
// Stats
|
||||
const stats = db.stats();
|
||||
assert(stats.total === 3, "stats total correct");
|
||||
assert(stats.active === 2, "stats active excludes superseded");
|
||||
|
||||
// Hot facts
|
||||
db.trackAccess(["test_1", "test_1", "test_1", "test_1", "test_1"]);
|
||||
const hot = db.hotFacts(5, 30, 5);
|
||||
assert(hot.length >= 1 && hot[0].id === "test_1", "hotFacts returns frequently accessed");
|
||||
|
||||
// Recent facts
|
||||
const recent = db.recentFacts(1, 10);
|
||||
assert(recent.length >= 1, "recentFacts returns results");
|
||||
|
||||
// ═══════════════════════════════════════
|
||||
// TEST 2: Scoring
|
||||
// ═══════════════════════════════════════
|
||||
console.log("\n📊 Scoring Tests:");
|
||||
|
||||
const nowMs = Date.now();
|
||||
const freshFact = {
|
||||
id: "score_1", fact: "Fresh fact", category: "savoir", confidence: 0.9,
|
||||
source: "test", tags: "[]", agent: "koda",
|
||||
created_at: nowMs - 3600000, updated_at: nowMs - 3600000, // 1 hour ago
|
||||
access_count: 0, last_accessed_at: null,
|
||||
superseded: 0, superseded_by: null, superseded_at: null,
|
||||
md_file: null, md_line: null, entity_ids: "[]", fact_type: "semantic" as const
|
||||
};
|
||||
|
||||
const oldFact = {
|
||||
...freshFact,
|
||||
id: "score_2", fact: "Old fact",
|
||||
created_at: nowMs - 30 * 24 * 3600000, updated_at: nowMs - 30 * 24 * 3600000, // 30 days ago
|
||||
};
|
||||
|
||||
const errorFact = {
|
||||
...freshFact,
|
||||
id: "score_3", fact: "Error fact", category: "erreur",
|
||||
};
|
||||
|
||||
const scored1 = scoreFact(freshFact);
|
||||
const scored2 = scoreFact(oldFact);
|
||||
assert(scored1.temporalScore > scored2.temporalScore, "Fresh fact scores higher than old");
|
||||
|
||||
const scoredError = scoreFact(errorFact);
|
||||
assert(scoredError.temporalScore >= scored1.temporalScore * 0.9, "Error facts are immune to decay");
|
||||
|
||||
// Episodic decays faster
|
||||
const episodicFact = { ...oldFact, id: "score_4", fact_type: "episodic" as const };
|
||||
const scoredEpisodic = scoreFact(episodicFact);
|
||||
assert(scoredEpisodic.temporalScore < scored2.temporalScore, "Episodic decays faster than semantic");
|
||||
|
||||
// Cluster boost
|
||||
const clusterFact = { ...freshFact, id: "score_5", fact_type: "cluster" as any };
|
||||
const scoredCluster = scoreFact(clusterFact);
|
||||
assert(scoredCluster.temporalScore > scored1.temporalScore, "Cluster facts get scoring boost");
|
||||
|
||||
// scoreAndRank
|
||||
const ranked = scoreAndRank([oldFact, freshFact]);
|
||||
assert(ranked[0].id === "score_1", "scoreAndRank puts fresh first");
|
||||
|
||||
// ═══════════════════════════════════════
|
||||
// TEST 3: Adaptive Budget
|
||||
// ═══════════════════════════════════════
|
||||
console.log("\n📐 Budget Tests:");
|
||||
|
||||
const budget = new AdaptiveBudget({ contextWindow: 200000, maxFacts: 12, minFacts: 2 });
|
||||
|
||||
const light = budget.compute(10000);
|
||||
assert(light.zone === "light" && light.limit >= 8, "Light context → high fact count");
|
||||
|
||||
const heavy = budget.compute(150000);
|
||||
assert(heavy.zone === "heavy" && heavy.limit <= 4, "Heavy context → low fact count");
|
||||
|
||||
const empty = budget.compute(0);
|
||||
assert(empty.limit >= 10, "Empty context → max facts");
|
||||
|
||||
// ═══════════════════════════════════════
|
||||
// TEST 4: Entity extraction (fact-clusters helpers)
|
||||
// ═══════════════════════════════════════
|
||||
console.log("\n🧩 Cluster Entity Tests:");
|
||||
|
||||
// Store facts with known entities
|
||||
db.storeFact({
|
||||
id: "ent_1", fact: "Sol runs Ollama with gemma3:4b",
|
||||
category: "outil", confidence: 0.9, source: "test",
|
||||
tags: "[]", agent: "koda", created_at: Date.now(), updated_at: Date.now(),
|
||||
fact_type: "semantic"
|
||||
});
|
||||
db.storeFact({
|
||||
id: "ent_2", fact: "Sol has nomic-embed-text-v2-moe installed",
|
||||
category: "outil", confidence: 0.9, source: "test",
|
||||
tags: "[]", agent: "koda", created_at: Date.now(), updated_at: Date.now(),
|
||||
fact_type: "semantic"
|
||||
});
|
||||
db.storeFact({
|
||||
id: "ent_3", fact: "Sol is a Mac Mini available 24/7",
|
||||
category: "outil", confidence: 0.9, source: "test",
|
||||
tags: "[]", agent: "koda", created_at: Date.now(), updated_at: Date.now(),
|
||||
fact_type: "semantic"
|
||||
});
|
||||
|
||||
// Verify the facts are stored and searchable
|
||||
const solFacts = db.searchFacts("Sol", 10);
|
||||
assert(solFacts.length >= 3, "Sol facts stored and searchable via FTS");
|
||||
|
||||
// Verify cluster type can be stored
|
||||
db.storeFact({
|
||||
id: "cluster_test_1",
|
||||
fact: "Sol (Mac Mini): machine dev 24/7, Ollama gemma3:4b + nomic-embed, disponible en permanence",
|
||||
category: "outil", confidence: 0.85, source: "cluster:sol",
|
||||
tags: JSON.stringify({ memberIds: ["ent_1", "ent_2", "ent_3"], entityName: "Sol", generatedAt: Date.now(), stale: false }),
|
||||
agent: "memoria", created_at: Date.now(), updated_at: Date.now(),
|
||||
fact_type: "cluster" as any,
|
||||
});
|
||||
|
||||
const clusterSearch = db.searchFacts("Sol Mac Mini", 10);
|
||||
const hasCluster = clusterSearch.some(f => (f as any).fact_type === "cluster" || f.source?.startsWith("cluster:"));
|
||||
assert(hasCluster, "Cluster fact searchable via FTS");
|
||||
|
||||
// ═══════════════════════════════════════
|
||||
// TEST 5: DB migration + fact_type column
|
||||
// ═══════════════════════════════════════
|
||||
console.log("\n🔄 Migration Tests:");
|
||||
|
||||
// fact_type column should exist (added by migration)
|
||||
const cols = db.raw.prepare("PRAGMA table_info(facts)").all() as Array<{ name: string }>;
|
||||
assert(cols.some(c => c.name === "fact_type"), "fact_type column exists after migration");
|
||||
|
||||
// ═══════════════════════════════════════
|
||||
// SUMMARY
|
||||
// ═══════════════════════════════════════
|
||||
console.log(`\n${"=".repeat(50)}`);
|
||||
console.log(`🧪 Results: ${passed} passed, ${failed} failed (${passed + failed} total)`);
|
||||
console.log(`${"=".repeat(50)}`);
|
||||
|
||||
// Cleanup
|
||||
db.close();
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
|
||||
process.exit(failed > 0 ? 1 : 0);
|
||||
24
openclaw-memoria-port/tsconfig.json
Normal file
24
openclaw-memoria-port/tsconfig.json
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ES2022",
|
||||
"moduleResolution": "bundler",
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true,
|
||||
"outDir": "dist",
|
||||
"rootDir": ".",
|
||||
"strict": false,
|
||||
"noImplicitAny": false,
|
||||
"types": ["node"],
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"isolatedModules": true,
|
||||
"verbatimModuleSyntax": true,
|
||||
"allowImportingTsExtensions": true,
|
||||
"noEmit": true
|
||||
},
|
||||
"include": ["index.ts", "*.ts", "core/**/*.ts"],
|
||||
"exclude": ["node_modules", "dist", "**/*.test.ts"]
|
||||
}
|
||||
15
openclaw-memoria-port/tsup.config.ts
Normal file
15
openclaw-memoria-port/tsup.config.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
import { defineConfig } from "tsup";
|
||||
import pkg from "./package.json";
|
||||
|
||||
export default defineConfig({
|
||||
entry: ["index.ts"],
|
||||
format: ["esm"],
|
||||
splitting: true,
|
||||
dts: true,
|
||||
sourcemap: true,
|
||||
clean: true,
|
||||
external: [/^node:/, /^openclaw\//, "fs", "os", "path", "url", "readline", "module", "better-sqlite3"],
|
||||
define: {
|
||||
__MEMORIA_PLUGIN_VERSION__: JSON.stringify(pkg.version),
|
||||
},
|
||||
});
|
||||
Reference in New Issue
Block a user