fix(security): 添加VITE_PAYMENT_URL环境变量配置
This commit is contained in:
140
.qoder/skills/understand-domain/SKILL.md
Normal file
140
.qoder/skills/understand-domain/SKILL.md
Normal file
@@ -0,0 +1,140 @@
|
||||
---
|
||||
name: understand-domain
|
||||
description: Extract business domain knowledge from a codebase and generate an interactive domain flow graph. Works standalone (lightweight scan) or derives from an existing /understand knowledge graph.
|
||||
argument-hint: [--full]
|
||||
---
|
||||
|
||||
# /understand-domain
|
||||
|
||||
Extracts business domain knowledge — domains, business flows, and process steps — from a codebase and produces an interactive horizontal flow graph in the dashboard.
|
||||
|
||||
## How It Works
|
||||
|
||||
- If a knowledge graph already exists (`.understand-anything/knowledge-graph.json`), derives domain knowledge from it (cheap, no file scanning)
|
||||
- If no knowledge graph exists, performs a lightweight scan: file tree + entry point detection + sampled files
|
||||
- Use `--full` flag to force a fresh scan even if a knowledge graph exists
|
||||
|
||||
## Instructions
|
||||
|
||||
### Phase 0: Resolve `PROJECT_ROOT`
|
||||
|
||||
Set `PROJECT_ROOT` to the current working directory.
|
||||
|
||||
**Worktree redirect.** If `PROJECT_ROOT` is inside a git worktree (not the main checkout), redirect output to the main repository root. Worktrees managed by Claude Code are ephemeral — `.understand-anything/` written there is destroyed when the session ends, taking the domain graph with it (issue #133). Detect a worktree by comparing `git rev-parse --git-dir` against `git rev-parse --git-common-dir`; in a normal checkout or submodule they resolve to the same path, in a worktree they differ and the parent of `--git-common-dir` is the main repo root.
|
||||
|
||||
```bash
|
||||
COMMON_DIR=$(git -C "$PROJECT_ROOT" rev-parse --git-common-dir 2>/dev/null)
|
||||
GIT_DIR=$(git -C "$PROJECT_ROOT" rev-parse --git-dir 2>/dev/null)
|
||||
if [ -n "$COMMON_DIR" ] && [ -n "$GIT_DIR" ]; then
|
||||
COMMON_ABS=$(cd "$PROJECT_ROOT" && cd "$COMMON_DIR" 2>/dev/null && pwd -P)
|
||||
GIT_ABS=$(cd "$PROJECT_ROOT" && cd "$GIT_DIR" 2>/dev/null && pwd -P)
|
||||
if [ -n "$COMMON_ABS" ] && [ "$COMMON_ABS" != "$GIT_ABS" ]; then
|
||||
MAIN_ROOT=$(dirname "$COMMON_ABS")
|
||||
if [ -d "$MAIN_ROOT" ] && [ "${UNDERSTAND_NO_WORKTREE_REDIRECT:-0}" != "1" ]; then
|
||||
echo "[understand-domain] Detected git worktree at $PROJECT_ROOT"
|
||||
echo "[understand-domain] Redirecting output to main repo root: $MAIN_ROOT"
|
||||
echo "[understand-domain] (Set UNDERSTAND_NO_WORKTREE_REDIRECT=1 to keep PROJECT_ROOT as the worktree.)"
|
||||
PROJECT_ROOT="$MAIN_ROOT"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
```
|
||||
|
||||
Use `$PROJECT_ROOT` (not the bare CWD) for every reference to "the current project" / `<project-root>` in subsequent phases.
|
||||
|
||||
**Important:** do **not** assume the plugin root is simply two directories above the skill path string. In many installations `~/.agents/skills/understand-domain` is a symlink into the real plugin checkout. Prefer runtime-provided plugin roots first (for Claude), then fall back to universal symlinks, skill symlink resolution, and common clone-based install paths.
|
||||
|
||||
Resolve the plugin root like this:
|
||||
|
||||
```bash
|
||||
SKILL_REAL=$(realpath ~/.agents/skills/understand-domain 2>/dev/null || readlink -f ~/.agents/skills/understand-domain 2>/dev/null || echo "")
|
||||
SELF_RELATIVE=$([ -n "$SKILL_REAL" ] && cd "$SKILL_REAL/../.." 2>/dev/null && pwd || echo "")
|
||||
COPILOT_SKILL_REAL=$(realpath ~/.copilot/skills/understand-domain 2>/dev/null || readlink -f ~/.copilot/skills/understand-domain 2>/dev/null || echo "")
|
||||
COPILOT_SELF_RELATIVE=$([ -n "$COPILOT_SKILL_REAL" ] && cd "$COPILOT_SKILL_REAL/../.." 2>/dev/null && pwd || echo "")
|
||||
|
||||
PLUGIN_ROOT=""
|
||||
for candidate in \
|
||||
"${CLAUDE_PLUGIN_ROOT}" \
|
||||
"$HOME/.understand-anything-plugin" \
|
||||
"$SELF_RELATIVE" \
|
||||
"$COPILOT_SELF_RELATIVE" \
|
||||
"$HOME/.codex/understand-anything/understand-anything-plugin" \
|
||||
"$HOME/.opencode/understand-anything/understand-anything-plugin" \
|
||||
"$HOME/.pi/understand-anything/understand-anything-plugin" \
|
||||
"$HOME/understand-anything/understand-anything-plugin"; do
|
||||
if [ -n "$candidate" ] && [ -f "$candidate/package.json" ] && [ -f "$candidate/pnpm-workspace.yaml" ]; then
|
||||
PLUGIN_ROOT="$candidate"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -z "$PLUGIN_ROOT" ]; then
|
||||
echo "Error: Cannot find the understand-anything plugin root."
|
||||
echo "Checked:"
|
||||
echo " - ${CLAUDE_PLUGIN_ROOT:-<unset CLAUDE_PLUGIN_ROOT>}"
|
||||
echo " - $HOME/.understand-anything-plugin"
|
||||
echo " - ${SELF_RELATIVE:-<unresolved path derived from ~/.agents/skills/understand-domain>}"
|
||||
echo " - ${COPILOT_SELF_RELATIVE:-<unresolved path derived from ~/.copilot/skills/understand-domain>}"
|
||||
echo " - $HOME/.codex/understand-anything/understand-anything-plugin"
|
||||
echo " - $HOME/.opencode/understand-anything/understand-anything-plugin"
|
||||
echo " - $HOME/.pi/understand-anything/understand-anything-plugin"
|
||||
echo " - $HOME/understand-anything/understand-anything-plugin"
|
||||
echo "Make sure the plugin is installed correctly."
|
||||
exit 1
|
||||
fi
|
||||
```
|
||||
|
||||
Use `$PLUGIN_ROOT` for every reference to agent definitions in subsequent phases.
|
||||
|
||||
### Phase 1: Detect Existing Graph
|
||||
|
||||
1. Check if `$PROJECT_ROOT/.understand-anything/knowledge-graph.json` exists
|
||||
2. If it exists AND `--full` was NOT passed → proceed to Phase 3 (derive from graph)
|
||||
3. Otherwise → proceed to Phase 2 (lightweight scan)
|
||||
|
||||
### Phase 2: Lightweight Scan (Path 1)
|
||||
|
||||
The preprocessing script does NOT produce a domain graph — it produces **raw material** (file tree, entry points, exports/imports) so the domain-analyzer agent can focus on the actual domain analysis instead of spending dozens of tool calls exploring the codebase. Think of it as a cheat sheet: cheap Python preprocessing → expensive LLM gets a clean, small input → better results for less cost.
|
||||
|
||||
1. Run the preprocessing script bundled with this skill, passing `$PROJECT_ROOT` from Phase 0:
|
||||
```
|
||||
python ./extract-domain-context.py "$PROJECT_ROOT"
|
||||
```
|
||||
This outputs `$PROJECT_ROOT/.understand-anything/intermediate/domain-context.json` containing:
|
||||
- File tree (respecting `.gitignore`)
|
||||
- Detected entry points (HTTP routes, CLI commands, event handlers, cron jobs, exported handlers)
|
||||
- File signatures (exports, imports per file)
|
||||
- Code snippets for each entry point (signature + first few lines)
|
||||
- Project metadata (package.json, README, etc.)
|
||||
2. Read the generated `domain-context.json` as context for Phase 4
|
||||
3. Proceed to Phase 4
|
||||
|
||||
### Phase 3: Derive from Existing Graph (Path 2)
|
||||
|
||||
1. Read `$PROJECT_ROOT/.understand-anything/knowledge-graph.json`
|
||||
2. Format the graph data as structured context:
|
||||
- All nodes with their types, names, summaries, and tags
|
||||
- All edges with their types (especially `calls`, `imports`, `contains`)
|
||||
- All layers with their descriptions
|
||||
- Tour steps if available
|
||||
3. This is the context for the domain analyzer — no file reading needed
|
||||
4. Proceed to Phase 4
|
||||
|
||||
### Phase 4: Domain Analysis
|
||||
|
||||
1. Read the domain-analyzer agent prompt from `$PLUGIN_ROOT/agents/domain-analyzer.md`
|
||||
2. Dispatch a subagent with the domain-analyzer prompt + the context from Phase 2 or 3
|
||||
3. The agent writes its output to `$PROJECT_ROOT/.understand-anything/intermediate/domain-analysis.json`
|
||||
|
||||
### Phase 5: Validate and Save
|
||||
|
||||
1. Read the domain analysis output
|
||||
2. Validate using the standard graph validation pipeline (the schema now supports domain/flow/step types)
|
||||
3. If validation fails, log warnings but save what's valid (error tolerance)
|
||||
4. Save to `$PROJECT_ROOT/.understand-anything/domain-graph.json`
|
||||
5. Clean up `$PROJECT_ROOT/.understand-anything/intermediate/domain-analysis.json` and `$PROJECT_ROOT/.understand-anything/intermediate/domain-context.json`
|
||||
|
||||
### Phase 6: Launch Dashboard
|
||||
|
||||
1. Auto-trigger `/understand-dashboard` to visualize the domain graph
|
||||
2. The dashboard will detect `domain-graph.json` and show the domain view by default
|
||||
428
.qoder/skills/understand-domain/extract-domain-context.py
Normal file
428
.qoder/skills/understand-domain/extract-domain-context.py
Normal file
@@ -0,0 +1,428 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
extract-domain-context.py — Lightweight codebase scanner for domain knowledge extraction.
|
||||
|
||||
Scans a project directory and produces a structured JSON context file that the
|
||||
domain-analyzer agent uses to identify business domains, flows, and steps.
|
||||
|
||||
Usage:
|
||||
python extract-domain-context.py <project-root>
|
||||
|
||||
Output:
|
||||
<project-root>/.understand-anything/intermediate/domain-context.json
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
# ── Configuration ──────────────────────────────────────────────────────────
|
||||
|
||||
MAX_FILE_TREE_DEPTH = 6
|
||||
MAX_FILES_PER_DIR = 50
|
||||
MAX_FILES_TOTAL = 5000
|
||||
MAX_SAMPLED_FILES = 40
|
||||
MAX_LINES_PER_FILE = 80
|
||||
MAX_ENTRY_POINTS = 200
|
||||
MAX_OUTPUT_BYTES = 512 * 1024 # 512 KB — keeps output within agent context limits
|
||||
|
||||
# File extensions we care about for domain analysis
|
||||
SOURCE_EXTENSIONS = {
|
||||
".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs",
|
||||
".py", ".pyi",
|
||||
".go",
|
||||
".rs",
|
||||
".java", ".kt", ".scala",
|
||||
".rb",
|
||||
".cs",
|
||||
".php",
|
||||
".swift",
|
||||
".c", ".cpp", ".h", ".hpp",
|
||||
".ex", ".exs",
|
||||
".hs",
|
||||
".lua",
|
||||
".r", ".R",
|
||||
}
|
||||
|
||||
# Directories to always skip
|
||||
SKIP_DIRS = {
|
||||
"node_modules", ".git", ".svn", ".hg", "__pycache__", ".tox",
|
||||
"venv", ".venv", "env", ".env", "dist", "build", "out", ".next",
|
||||
".nuxt", "target", "vendor", ".idea", ".vscode", "coverage",
|
||||
".understand-anything", ".pytest_cache", ".mypy_cache",
|
||||
"Pods", "DerivedData", ".gradle", "bin", "obj",
|
||||
}
|
||||
|
||||
# Files that reveal project metadata
|
||||
METADATA_FILES = [
|
||||
"package.json", "Cargo.toml", "go.mod", "pyproject.toml",
|
||||
"setup.py", "setup.cfg", "pom.xml", "build.gradle",
|
||||
"Gemfile", "composer.json", "mix.exs", "Makefile",
|
||||
"docker-compose.yml", "docker-compose.yaml",
|
||||
"README.md", "README.rst", "README.txt", "README",
|
||||
]
|
||||
|
||||
# ── Entry point detection patterns ─────────────────────────────────────────
|
||||
|
||||
ENTRY_POINT_PATTERNS: list[tuple[str, str, re.Pattern[str]]] = [
|
||||
# HTTP routes
|
||||
("http", "Express/Koa route", re.compile(
|
||||
r"""(?:app|router|server)\s*\.\s*(?:get|post|put|patch|delete|all|use)\s*\(\s*['"](/[^'"]*?)['"]""",
|
||||
re.IGNORECASE,
|
||||
)),
|
||||
("http", "Decorator route (Flask/FastAPI/NestJS)", re.compile(
|
||||
r"""@(?:app\.)?(?:route|get|post|put|patch|delete|api_view|RequestMapping|GetMapping|PostMapping)\s*\(\s*['"](/[^'"]*?)['"]""",
|
||||
re.IGNORECASE,
|
||||
)),
|
||||
("http", "Next.js/Remix route handler", re.compile(
|
||||
r"""export\s+(?:async\s+)?function\s+(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\b""",
|
||||
)),
|
||||
# CLI
|
||||
("cli", "CLI command", re.compile(
|
||||
r"""\.command\s*\(\s*['"]([\w\-:]+)['"]""",
|
||||
)),
|
||||
("cli", "argparse subparser", re.compile(
|
||||
r"""add_parser\s*\(\s*['"]([\w\-]+)['"]""",
|
||||
)),
|
||||
# Event handlers
|
||||
("event", "Event listener", re.compile(
|
||||
r"""\.on\s*\(\s*['"]([\w\-:.]+)['"]""",
|
||||
)),
|
||||
("event", "Event subscriber decorator", re.compile(
|
||||
r"""@(?:EventHandler|Subscribe|Listener|on_event)\s*\(\s*['"]([\w\-:.]+)['"]""",
|
||||
)),
|
||||
# Cron / scheduled
|
||||
("cron", "Cron schedule", re.compile(
|
||||
r"""@?(?:Cron|Schedule|Scheduled|crontab)\s*\(\s*['"]([^'"]+)['"]""",
|
||||
re.IGNORECASE,
|
||||
)),
|
||||
# GraphQL
|
||||
("http", "GraphQL resolver", re.compile(
|
||||
r"""@(?:Query|Mutation|Subscription|Resolver)\s*\(""",
|
||||
)),
|
||||
# gRPC (only in .proto files — handled by file extension check below)
|
||||
("http", "gRPC service", re.compile(
|
||||
r"""^service\s+(\w+)\s*\{""", re.MULTILINE,
|
||||
)),
|
||||
# Exported handlers (generic)
|
||||
("manual", "Exported handler", re.compile(
|
||||
r"""export\s+(?:async\s+)?function\s+(handle\w+|process\w+|on\w+)\b""",
|
||||
)),
|
||||
]
|
||||
|
||||
|
||||
# ── Gitignore support ──────────────────────────────────────────────────────
|
||||
|
||||
def parse_gitignore(project_root: Path) -> list[re.Pattern[str]]:
|
||||
"""Parse .gitignore into a list of compiled regex patterns."""
|
||||
gitignore = project_root / ".gitignore"
|
||||
patterns: list[re.Pattern[str]] = []
|
||||
if not gitignore.exists():
|
||||
return patterns
|
||||
|
||||
for line in gitignore.read_text(errors="replace").splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
# Convert glob to regex (simplified)
|
||||
regex = line.replace(".", r"\.").replace("**/", "(.*/)?").replace("*", "[^/]*").replace("?", "[^/]")
|
||||
if line.endswith("/"):
|
||||
regex = regex.rstrip("/") + "(/|$)"
|
||||
try:
|
||||
patterns.append(re.compile(regex))
|
||||
except re.error as e:
|
||||
print(f"Warning: skipping invalid gitignore pattern '{line}': {e}", file=sys.stderr)
|
||||
return patterns
|
||||
|
||||
|
||||
def is_ignored(rel_path: str, gitignore_patterns: list[re.Pattern[str]]) -> bool:
|
||||
"""Check if a relative path matches any gitignore pattern."""
|
||||
for pattern in gitignore_patterns:
|
||||
if pattern.search(rel_path):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ── File tree scanner ──────────────────────────────────────────────────────
|
||||
|
||||
def scan_file_tree(
|
||||
root: Path,
|
||||
gitignore_patterns: list[re.Pattern[str]],
|
||||
max_depth: int = MAX_FILE_TREE_DEPTH,
|
||||
) -> list[str]:
|
||||
"""Return a flat list of relative file paths (source files only)."""
|
||||
result: list[str] = []
|
||||
|
||||
def _walk(dir_path: Path, depth: int) -> None:
|
||||
if depth > max_depth or len(result) >= MAX_FILES_TOTAL:
|
||||
return
|
||||
try:
|
||||
entries = sorted(dir_path.iterdir(), key=lambda e: (not e.is_dir(), e.name.lower()))
|
||||
except PermissionError:
|
||||
return
|
||||
|
||||
file_count = 0
|
||||
for entry in entries:
|
||||
if len(result) >= MAX_FILES_TOTAL:
|
||||
break
|
||||
# Skip symlinks to avoid infinite loops
|
||||
if entry.is_symlink():
|
||||
continue
|
||||
rel = str(entry.relative_to(root))
|
||||
if entry.is_dir():
|
||||
if entry.name in SKIP_DIRS:
|
||||
continue
|
||||
if is_ignored(rel + "/", gitignore_patterns):
|
||||
continue
|
||||
_walk(entry, depth + 1)
|
||||
elif entry.is_file():
|
||||
if file_count >= MAX_FILES_PER_DIR:
|
||||
break
|
||||
if entry.suffix not in SOURCE_EXTENSIONS:
|
||||
continue
|
||||
if is_ignored(rel, gitignore_patterns):
|
||||
continue
|
||||
result.append(rel)
|
||||
file_count += 1
|
||||
|
||||
_walk(root, 0)
|
||||
return result
|
||||
|
||||
|
||||
# ── Entry point detection ──────────────────────────────────────────────────
|
||||
|
||||
def detect_entry_points(root: Path, file_paths: list[str]) -> list[dict[str, Any]]:
|
||||
"""Scan source files for entry point patterns."""
|
||||
entry_points: list[dict[str, Any]] = []
|
||||
|
||||
# Skip test files and the extraction script itself
|
||||
test_patterns = re.compile(r"(?:\.test\.|\.spec\.|__tests__|_test\.py|test_\w+\.py|extract-domain-context\.py)")
|
||||
|
||||
for rel_path in file_paths:
|
||||
if len(entry_points) >= MAX_ENTRY_POINTS:
|
||||
break
|
||||
if test_patterns.search(rel_path):
|
||||
continue
|
||||
full_path = root / rel_path
|
||||
try:
|
||||
content = full_path.read_text(errors="replace")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
continue
|
||||
|
||||
lines = content.splitlines()
|
||||
for entry_type, description, pattern in ENTRY_POINT_PATTERNS:
|
||||
for match in pattern.finditer(content):
|
||||
# Find line number
|
||||
line_no = content[:match.start()].count("\n") + 1
|
||||
# Extract a snippet (signature + a few lines)
|
||||
start = max(0, line_no - 1)
|
||||
end = min(len(lines), start + 5)
|
||||
snippet = "\n".join(lines[start:end])
|
||||
|
||||
entry_points.append({
|
||||
"file": rel_path,
|
||||
"line": line_no,
|
||||
"type": entry_type,
|
||||
"description": description,
|
||||
"match": match.group(0)[:120],
|
||||
"snippet": snippet[:300],
|
||||
})
|
||||
|
||||
if len(entry_points) >= MAX_ENTRY_POINTS:
|
||||
break
|
||||
if len(entry_points) >= MAX_ENTRY_POINTS:
|
||||
break
|
||||
|
||||
return entry_points
|
||||
|
||||
|
||||
# ── File signatures ────────────────────────────────────────────────────────
|
||||
|
||||
def extract_file_signatures(root: Path, file_paths: list[str]) -> list[dict[str, Any]]:
|
||||
"""Extract exports and imports from each file (lightweight)."""
|
||||
signatures: list[dict[str, Any]] = []
|
||||
|
||||
# Prioritize files likely to contain business logic
|
||||
priority_keywords = [
|
||||
"controller", "service", "handler", "router", "route", "api",
|
||||
"model", "entity", "repository", "usecase", "use_case",
|
||||
"command", "query", "event", "subscriber", "listener",
|
||||
"middleware", "guard", "interceptor", "resolver",
|
||||
"workflow", "flow", "process", "pipeline", "job", "task",
|
||||
]
|
||||
|
||||
def priority_score(path: str) -> int:
|
||||
lower = path.lower()
|
||||
score = 0
|
||||
for kw in priority_keywords:
|
||||
if kw in lower:
|
||||
score += 1
|
||||
return score
|
||||
|
||||
sorted_paths = sorted(file_paths, key=priority_score, reverse=True)
|
||||
|
||||
for rel_path in sorted_paths[:MAX_SAMPLED_FILES]:
|
||||
full_path = root / rel_path
|
||||
try:
|
||||
content = full_path.read_text(errors="replace")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
continue
|
||||
|
||||
lines = content.splitlines()[:MAX_LINES_PER_FILE]
|
||||
truncated = "\n".join(lines)
|
||||
|
||||
# Extract exports (JS/TS)
|
||||
exports = re.findall(
|
||||
r"export\s+(?:default\s+)?(?:async\s+)?(?:function|class|const|let|var|interface|type|enum)\s+(\w+)",
|
||||
truncated,
|
||||
)
|
||||
# Extract exports (Python)
|
||||
if not exports:
|
||||
exports = re.findall(r"^(?:def|class)\s+(\w+)", truncated, re.MULTILINE)
|
||||
|
||||
# Extract imports (first 20)
|
||||
imports = re.findall(
|
||||
r"""(?:import\s+.*?from\s+['"]([^'"]+)['"]|from\s+([\w.]+)\s+import)""",
|
||||
truncated,
|
||||
)
|
||||
import_list = [m[0] or m[1] for m in imports][:20]
|
||||
|
||||
signatures.append({
|
||||
"file": rel_path,
|
||||
"exports": exports[:20],
|
||||
"imports": import_list,
|
||||
"lines": len(content.splitlines()),
|
||||
"preview": truncated[:500],
|
||||
})
|
||||
|
||||
return signatures
|
||||
|
||||
|
||||
# ── Metadata extraction ────────────────────────────────────────────────────
|
||||
|
||||
def extract_metadata(root: Path) -> dict[str, Any]:
|
||||
"""Read project metadata files."""
|
||||
metadata: dict[str, Any] = {}
|
||||
|
||||
for filename in METADATA_FILES:
|
||||
filepath = root / filename
|
||||
if not filepath.exists():
|
||||
continue
|
||||
try:
|
||||
content = filepath.read_text(errors="replace")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
continue
|
||||
|
||||
if filename == "package.json":
|
||||
try:
|
||||
pkg = json.loads(content)
|
||||
metadata["package.json"] = {
|
||||
"name": pkg.get("name"),
|
||||
"description": pkg.get("description"),
|
||||
"scripts": list((pkg.get("scripts") or {}).keys()),
|
||||
"dependencies": list((pkg.get("dependencies") or {}).keys()),
|
||||
"devDependencies": list((pkg.get("devDependencies") or {}).keys()),
|
||||
}
|
||||
except json.JSONDecodeError:
|
||||
metadata["package.json"] = content[:500]
|
||||
elif filename.endswith((".md", ".rst", ".txt")) or filename == "README":
|
||||
metadata[filename] = content[:2000]
|
||||
elif filename.endswith((".toml", ".cfg", ".mod")):
|
||||
metadata[filename] = content[:1000]
|
||||
elif filename.endswith((".json", ".yml", ".yaml", ".xml", ".gradle")):
|
||||
metadata[filename] = content[:1000]
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
# ── Main ───────────────────────────────────────────────────────────────────
|
||||
|
||||
def _truncate_to_fit(context: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Progressively trim context sections to stay under MAX_OUTPUT_BYTES."""
|
||||
output = json.dumps(context, indent=2)
|
||||
if len(output.encode()) <= MAX_OUTPUT_BYTES:
|
||||
return context
|
||||
|
||||
# 1. Trim file tree to just a count
|
||||
context["fileTree"] = context["fileTree"][:200]
|
||||
output = json.dumps(context, indent=2)
|
||||
if len(output.encode()) <= MAX_OUTPUT_BYTES:
|
||||
return context
|
||||
|
||||
# 2. Trim previews in signatures
|
||||
for sig in context.get("fileSignatures", []):
|
||||
sig["preview"] = sig["preview"][:200]
|
||||
output = json.dumps(context, indent=2)
|
||||
if len(output.encode()) <= MAX_OUTPUT_BYTES:
|
||||
return context
|
||||
|
||||
# 3. Trim snippets in entry points
|
||||
for ep in context.get("entryPoints", []):
|
||||
ep["snippet"] = ep["snippet"][:100]
|
||||
output = json.dumps(context, indent=2)
|
||||
if len(output.encode()) <= MAX_OUTPUT_BYTES:
|
||||
return context
|
||||
|
||||
# 4. Reduce number of signatures and entry points
|
||||
context["fileSignatures"] = context["fileSignatures"][:20]
|
||||
context["entryPoints"] = context["entryPoints"][:100]
|
||||
|
||||
return context
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python extract-domain-context.py <project-root>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
project_root = Path(sys.argv[1]).resolve()
|
||||
if not project_root.is_dir():
|
||||
print(f"Error: {project_root} is not a directory", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Ensure output directory exists
|
||||
output_dir = project_root / ".understand-anything" / "intermediate"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_path = output_dir / "domain-context.json"
|
||||
|
||||
print(f"Scanning {project_root} ...", file=sys.stderr)
|
||||
|
||||
gitignore_patterns = parse_gitignore(project_root)
|
||||
file_tree = scan_file_tree(project_root, gitignore_patterns)
|
||||
print(f" Found {len(file_tree)} source files", file=sys.stderr)
|
||||
|
||||
entry_points = detect_entry_points(project_root, file_tree)
|
||||
print(f" Detected {len(entry_points)} entry points", file=sys.stderr)
|
||||
|
||||
signatures = extract_file_signatures(project_root, file_tree)
|
||||
print(f" Extracted {len(signatures)} file signatures", file=sys.stderr)
|
||||
|
||||
metadata = extract_metadata(project_root)
|
||||
print(f" Read {len(metadata)} metadata files", file=sys.stderr)
|
||||
|
||||
context = {
|
||||
"projectRoot": str(project_root),
|
||||
"fileCount": len(file_tree),
|
||||
"fileTree": file_tree,
|
||||
"entryPoints": entry_points,
|
||||
"fileSignatures": signatures,
|
||||
"metadata": metadata,
|
||||
}
|
||||
|
||||
context = _truncate_to_fit(context)
|
||||
output = json.dumps(context, indent=2)
|
||||
output_path.write_text(output)
|
||||
size_kb = len(output.encode()) / 1024
|
||||
print(f" Wrote {output_path} ({size_kb:.0f} KB)", file=sys.stderr)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user