fix(security): 添加VITE_PAYMENT_URL环境变量配置

2026-06-18 21:29:41 +08:00
parent 3d977d0a2d
commit 8afeb2e4d9
160 changed files with 21893 additions and 0 deletions
--- a/.qoder/skills/understand-knowledge/parse-knowledge-base.py
+++ b/.qoder/skills/understand-knowledge/parse-knowledge-base.py
@@ -0,0 +1,509 @@
+#!/usr/bin/env python3
+"""
+Deterministic parser for Karpathy-pattern LLM wikis.
+
+Detects the three-layer pattern (raw sources + wiki markdown + schema),
+extracts structure from markdown files, resolves wikilinks, and derives
+categories from index.md section headings.
+
+Usage:
+    python parse-knowledge-base.py <wiki-directory>
+
+Output:
+    Writes scan-manifest.json to <wiki-directory>/.understand-anything/intermediate/
+"""
+
+import json
+import os
+import re
+import sys
+from pathlib import Path
+
+# ---------------------------------------------------------------------------
+# Regex patterns
+# ---------------------------------------------------------------------------
+WIKILINK_RE = re.compile(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]")
+FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)
+CODE_BLOCK_RE = re.compile(r"```(\w*)")
+HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
+INDEX_SECTION_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE)
+
+# Files that are part of wiki infrastructure, not content articles
+INFRA_FILES = {"index.md", "log.md", "claude.md", "agents.md", "soul.md"}
+
+# ---------------------------------------------------------------------------
+# Detection: is this a Karpathy-pattern wiki?
+# ---------------------------------------------------------------------------
+
+def detect_format(root: Path) -> dict:
+    """Detect if directory follows the Karpathy LLM wiki three-layer pattern."""
+    signals = {
+        "has_index": (root / "index.md").is_file() or (root / "wiki" / "index.md").is_file(),
+        "has_log": (root / "log.md").is_file() or (root / "wiki" / "log.md").is_file(),
+        "has_raw": (root / "raw").is_dir(),
+        "has_schema": any(
+            (root / f).is_file() or (root / "wiki" / f).is_file()
+            for f in ["CLAUDE.md", "AGENTS.md"]
+        ),
+    }
+
+    # Find the wiki root — could be the directory itself or a wiki/ subdirectory
+    if (root / "wiki").is_dir():
+        wiki_root = root / "wiki"
+    else:
+        wiki_root = root
+
+    # Count markdown files in the wiki root
+    md_files = list(wiki_root.rglob("*.md"))
+    signals["md_count"] = len(md_files)
+    signals["wiki_root"] = str(wiki_root)
+
+    # Primary signal: has index.md + meaningful number of markdown files
+    if signals["has_index"] and signals["md_count"] >= 3:
+        signals["detected"] = True
+        signals["format"] = "karpathy"
+    else:
+        signals["detected"] = False
+        signals["format"] = "unknown"
+
+    return signals
+
+
+# ---------------------------------------------------------------------------
+# Markdown extraction helpers
+# ---------------------------------------------------------------------------
+
+def extract_frontmatter(text: str) -> dict:
+    """Extract YAML frontmatter as a simple key-value dict."""
+    m = FRONTMATTER_RE.match(text)
+    if not m:
+        return {}
+    fm = {}
+    for line in m.group(1).split("\n"):
+        if ":" in line:
+            key, _, val = line.partition(":")
+            fm[key.strip()] = val.strip().strip('"').strip("'")
+    return fm
+
+
+def extract_wikilinks(text: str) -> list[dict]:
+    """Extract all [[target]] and [[target|display]] wikilinks."""
+    links = []
+    for m in WIKILINK_RE.finditer(text):
+        links.append({
+            "target": m.group(1).strip(),
+            "display": m.group(2).strip() if m.group(2) else None,
+        })
+    return links
+
+
+def extract_headings(text: str) -> list[dict]:
+    """Extract all markdown headings with level and text."""
+    return [
+        {"level": len(m.group(1)), "text": m.group(2).strip()}
+        for m in HEADING_RE.finditer(text)
+    ]
+
+
+def extract_code_blocks(text: str) -> list[str]:
+    """Extract languages from fenced code blocks."""
+    return [m.group(1) for m in CODE_BLOCK_RE.finditer(text) if m.group(1)]
+
+
+def extract_first_paragraph(text: str) -> str:
+    """Extract the first non-empty paragraph after frontmatter and H1."""
+    # Strip frontmatter
+    stripped = FRONTMATTER_RE.sub("", text).strip()
+    if not stripped:
+        return ""
+    lines = stripped.split("\n")
+
+    def _collect_paragraph(start_lines: list[str]) -> str:
+        """Collect the first paragraph from the given lines."""
+        para: list[str] = []
+        for s_raw in start_lines:
+            s = s_raw.strip()
+            if not s and not para:
+                continue  # Skip leading blank lines
+            if not s and para:
+                break  # End of paragraph
+            if s.startswith(">"):
+                continue  # Skip blockquotes
+            if re.match(r"^[-*_]{3,}\s*$", s):
+                continue  # Skip horizontal rules
+            if s.startswith("#"):
+                if para:
+                    break  # End paragraph at next heading
+                continue  # Skip headings before paragraph
+            para.append(s)
+        return " ".join(para)
+
+    # Try: find first paragraph after H1
+    for i, line in enumerate(lines):
+        if line.strip().startswith("# "):
+            result = _collect_paragraph(lines[i + 1:])
+            if result:
+                if len(result) > 200:
+                    return result[:197] + "..."
+                return result
+
+    # Fallback: no H1 found, take first paragraph from start
+    result = _collect_paragraph(lines)
+    if len(result) > 200:
+        result = result[:197] + "..."
+    return result or ""
+
+
+def extract_h1(text: str) -> str:
+    """Extract the first H1 heading."""
+    for m in HEADING_RE.finditer(text):
+        if len(m.group(1)) == 1:
+            # Strip trailing wiki-style decorations like " — subtitle"
+            return m.group(2).strip()
+    return ""
+
+
+# ---------------------------------------------------------------------------
+# Index.md parsing — categories come from section headings
+# ---------------------------------------------------------------------------
+
+def parse_index(index_path: Path) -> list[dict]:
+    """Parse index.md to extract categories from ## headings and their wikilinks."""
+    if not index_path.is_file():
+        return []
+    text = index_path.read_text(encoding="utf-8", errors="replace")
+    categories = []
+    current_category = None
+
+    for line in text.split("\n"):
+        # Detect ## section heading
+        sec_match = re.match(r"^##\s+(.+)$", line)
+        if sec_match:
+            current_category = {
+                "name": sec_match.group(1).strip(),
+                "articles": [],
+            }
+            categories.append(current_category)
+            continue
+
+        # Collect wikilinks under current section
+        if current_category:
+            for wl in WIKILINK_RE.finditer(line):
+                current_category["articles"].append(wl.group(1).strip())
+
+    return categories
+
+
+# ---------------------------------------------------------------------------
+# Log.md parsing — extract operation timeline
+# ---------------------------------------------------------------------------
+
+def parse_log(log_path: Path) -> list[dict]:
+    """Parse log.md to extract chronological entries."""
+    if not log_path.is_file():
+        return []
+    text = log_path.read_text(encoding="utf-8", errors="replace")
+    entries = []
+    log_entry_re = re.compile(
+        r"^##\s+\[(\d{4}-\d{2}-\d{2})\]\s+(\w+)\s*\|\s*(.+)$", re.MULTILINE
+    )
+    for m in log_entry_re.finditer(text):
+        entries.append({
+            "date": m.group(1),
+            "operation": m.group(2),
+            "title": m.group(3).strip(),
+        })
+    return entries
+
+
+# ---------------------------------------------------------------------------
+# Main pipeline
+# ---------------------------------------------------------------------------
+
+def build_name_to_stem_map(wiki_root: Path) -> dict[str, str]:
+    """Build a case-insensitive map from filename stem to relative stem path.
+
+    Full relative paths always map uniquely. Bare basenames map only when
+    unambiguous — duplicate basenames are removed so they don't silently
+    resolve to the wrong page.
+    """
+    name_map: dict[str, str] = {}
+    # Track which bare basenames appear more than once
+    basename_counts: dict[str, int] = {}
+    for md_file in wiki_root.rglob("*.md"):
+        rel = md_file.relative_to(wiki_root)
+        stem = rel.with_suffix("").as_posix()  # e.g., "decisions/decision-foo"
+        basename = md_file.stem            # e.g., "decision-foo"
+        # Full relative path always maps uniquely
+        name_map[stem.lower()] = stem
+        # Track basename for ambiguity detection
+        key = basename.lower()
+        basename_counts[key] = basename_counts.get(key, 0) + 1
+        name_map[key] = stem
+
+    # Remove ambiguous basename entries (appear more than once)
+    for key, count in basename_counts.items():
+        if count > 1 and key in name_map:
+            del name_map[key]
+
+    return name_map
+
+
+def resolve_wikilink(target: str, name_map: dict[str, str], node_ids: set[str] | None = None) -> str | None:
+    """Resolve a wikilink target to an article node ID.
+
+    If node_ids is provided, only resolve to IDs that exist in the set.
+    """
+    key = target.lower().strip()
+    # Skip targets that are clearly not page names (shell flags, etc.)
+    if key.startswith("-"):
+        return None
+    stem = name_map.get(key)
+    if stem:
+        candidate = f"article:{stem}"
+        # If we have a node set, verify the target exists
+        if node_ids is not None and candidate not in node_ids:
+            return None
+        return candidate
+    # Try without directory prefix
+    for stored_key, stored_stem in name_map.items():
+        if stored_key.endswith("/" + key) or stored_key == key:
+            candidate = f"article:{stored_stem}"
+            if node_ids is not None and candidate not in node_ids:
+                return None
+            return candidate
+    return None
+
+
+def parse_wiki(root: Path) -> dict:
+    """Parse a Karpathy-pattern wiki and produce the scan manifest."""
+    detection = detect_format(root)
+    if not detection["detected"]:
+        print(json.dumps({"error": "Not a Karpathy-pattern wiki", "detection": detection}),
+              file=sys.stderr)
+        sys.exit(1)
+
+    wiki_root = Path(detection["wiki_root"])
+    raw_root = root / "raw"
+
+    # Build name resolution map
+    name_map = build_name_to_stem_map(wiki_root)
+
+    # Find index.md and log.md
+    index_path = wiki_root / "index.md"
+    if not index_path.is_file():
+        index_path = root / "index.md"
+    log_path = wiki_root / "log.md"
+    if not log_path.is_file():
+        log_path = root / "log.md"
+
+    # Parse index for categories
+    categories = parse_index(index_path)
+    log_entries = parse_log(log_path)
+
+    # Build category lookup: wikilink target → category name
+    category_lookup: dict[str, str] = {}
+    for cat in categories:
+        for article_target in cat["articles"]:
+            category_lookup[article_target.lower()] = cat["name"]
+
+    # --- Pre-compute article IDs (for edge resolution validation) ---
+    # Only skip infra files at the wiki root level, not in subdirectories
+    # (e.g., wiki/index.md is infra, but wiki/concepts/index.md is content)
+    article_ids: set[str] = set()
+    for md_file in sorted(wiki_root.rglob("*.md")):
+        rel = md_file.relative_to(wiki_root)
+        stem = rel.with_suffix("").as_posix()
+        # Only filter infra files at root level (no parent directory)
+        if rel.parent == Path(".") and rel.name.lower() in INFRA_FILES:
+            continue
+        article_ids.add(f"article:{stem}")
+
+    # --- Build article nodes ---
+    nodes = []
+    edges = []
+    warnings = []
+    stats = {"articles": 0, "sources": 0, "topics": 0, "wikilinks": 0, "unresolved": 0}
+
+    for md_file in sorted(wiki_root.rglob("*.md")):
+        rel = md_file.relative_to(wiki_root)
+        stem = rel.with_suffix("").as_posix()
+        basename = md_file.stem
+
+        # Skip infrastructure files only at wiki root level
+        if rel.parent == Path(".") and rel.name.lower() in INFRA_FILES:
+            continue
+
+        text = md_file.read_text(encoding="utf-8", errors="replace")
+        h1 = extract_h1(text)
+        frontmatter = extract_frontmatter(text)
+        wikilinks = extract_wikilinks(text)
+        headings = extract_headings(text)
+        code_langs = extract_code_blocks(text)
+        summary = extract_first_paragraph(text)
+        line_count = text.count("\n") + 1
+        word_count = len(text.split())
+
+        # Derive category from index.md lookup
+        category = category_lookup.get(basename.lower(), "")
+        if not category:
+            # Try stem match
+            category = category_lookup.get(stem.lower(), "")
+
+        # Derive tags (deduplicated)
+        tag_set: set[str] = set()
+        if category:
+            tag_set.add(category.lower())
+        if rel.parent != Path("."):
+            tag_set.add(str(rel.parent))
+        fm_tags = frontmatter.get("tags", "")
+        if fm_tags:
+            tag_set.update(t.strip() for t in fm_tags.split(",") if t.strip())
+        tags = sorted(tag_set)
+
+        # Complexity from wikilink density
+        wl_count = len(wikilinks)
+        if wl_count > 15:
+            complexity = "complex"
+        elif wl_count > 5:
+            complexity = "moderate"
+        else:
+            complexity = "simple"
+
+        node_id = f"article:{stem}"
+        nodes.append({
+            "id": node_id,
+            "type": "article",
+            "name": h1 or basename,
+            "filePath": str(rel),
+            "summary": summary or f"Wiki article: {h1 or basename}",
+            "tags": tags,
+            "complexity": complexity,
+            "knowledgeMeta": {
+                "wikilinks": [wl["target"] for wl in wikilinks],
+                **({"category": category} if category else {}),
+                "content": text[:3000],  # First 3000 chars for LLM analysis
+            },
+        })
+        stats["articles"] += 1
+        stats["wikilinks"] += wl_count
+
+        # Build edges from wikilinks (resolve against known article IDs)
+        for wl in wikilinks:
+            target_id = resolve_wikilink(wl["target"], name_map, article_ids)
+            if target_id and target_id != node_id:
+                edges.append({
+                    "source": node_id,
+                    "target": target_id,
+                    "type": "related",
+                    "direction": "forward",
+                    "weight": 0.7,
+                })
+            elif not target_id:
+                warnings.append(f"Unresolved wikilink: [[{wl['target']}]] in {rel}")
+                stats["unresolved"] += 1
+
+    # --- Build topic nodes from index.md categories ---
+    for cat in categories:
+        topic_id = f"topic:{cat['name'].lower().replace(' ', '-')}"
+        nodes.append({
+            "id": topic_id,
+            "type": "topic",
+            "name": cat["name"],
+            "summary": f"Category from index: {cat['name']} ({len(cat['articles'])} articles)",
+            "tags": ["category"],
+            "complexity": "simple",
+        })
+        stats["topics"] += 1
+
+        # categorized_under edges (only resolve to known article nodes)
+        for article_target in cat["articles"]:
+            article_id = resolve_wikilink(article_target, name_map, article_ids)
+            if article_id:
+                edges.append({
+                    "source": article_id,
+                    "target": topic_id,
+                    "type": "categorized_under",
+                    "direction": "forward",
+                    "weight": 0.6,
+                })
+
+    # --- Build source nodes from raw/ ---
+    if raw_root.is_dir():
+        for raw_file in sorted(raw_root.rglob("*")):
+            if raw_file.is_file() and not raw_file.name.startswith("."):
+                rel_raw = raw_file.relative_to(root)
+                ext = raw_file.suffix.lower()
+                size_kb = raw_file.stat().st_size / 1024
+                source_id = f"source:{raw_file.relative_to(raw_root).with_suffix('')}"
+                nodes.append({
+                    "id": source_id,
+                    "type": "source",
+                    "name": raw_file.name,
+                    "filePath": str(rel_raw),
+                    "summary": f"Raw source ({ext or 'unknown'}, {size_kb:.0f} KB)",
+                    "tags": ["raw", ext.lstrip(".") or "unknown"],
+                    "complexity": "simple",
+                })
+                stats["sources"] += 1
+
+    # --- Compute backlinks ---
+    backlink_map: dict[str, list[str]] = {}
+    for edge in edges:
+        if edge["type"] == "related":
+            target = edge["target"]
+            source = edge["source"]
+            backlink_map.setdefault(target, []).append(source)
+    for node in nodes:
+        if node["type"] == "article" and "knowledgeMeta" in node:
+            bl = backlink_map.get(node["id"], [])
+            node["knowledgeMeta"]["backlinks"] = bl
+
+    # --- Deduplicate edges ---
+    seen_edges: set[tuple[str, str, str]] = set()
+    deduped_edges = []
+    for edge in edges:
+        key = (edge["source"], edge["target"], edge["type"])
+        if key not in seen_edges:
+            seen_edges.add(key)
+            deduped_edges.append(edge)
+
+    return {
+        "format": "karpathy",
+        "stats": stats,
+        "categories": [{"name": c["name"], "count": len(c["articles"])} for c in categories],
+        "logEntries": len(log_entries),
+        "nodes": nodes,
+        "edges": deduped_edges,
+        "warnings": warnings[:50],  # Cap warnings
+    }
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: parse-knowledge-base.py <wiki-directory>", file=sys.stderr)
+        sys.exit(1)
+
+    root = Path(sys.argv[1]).resolve()
+    if not root.is_dir():
+        print(f"Error: {root} is not a directory", file=sys.stderr)
+        sys.exit(1)
+
+    manifest = parse_wiki(root)
+
+    # Write output
+    out_dir = root / ".understand-anything" / "intermediate"
+    out_dir.mkdir(parents=True, exist_ok=True)
+    out_path = out_dir / "scan-manifest.json"
+    out_path.write_text(json.dumps(manifest, indent=2), encoding="utf-8")
+
+    # Report to stderr
+    s = manifest["stats"]
+    print(f"[parse] Karpathy wiki: {s['articles']} articles, {s['sources']} sources, "
+          f"{s['topics']} topics, {s['wikilinks']} wikilinks "
+          f"({s['unresolved']} unresolved)", file=sys.stderr)
+    print(f"[parse] Output: {out_path}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()